xref: /illumos-gate/usr/src/uts/intel/io/vmm/io/vhpet.c (revision a4955f4fa65e38d70c07d38e657a9aff43fa155f)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2013 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
5  * Copyright (c) 2013 Neel Natu <neel@freebsd.org>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  * $FreeBSD$
30  */
31 
32 /*
33  * Copyright 2018 Joyent, Inc.
34  * Copyright 2022 Oxide Computer Company
35  */
36 
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39 
40 #include <sys/param.h>
41 #include <sys/mutex.h>
42 #include <sys/kernel.h>
43 #include <sys/kmem.h>
44 #include <sys/systm.h>
45 
46 #include <dev/acpica/acpi_hpet.h>
47 
48 #include <machine/vmm.h>
49 #include <machine/vmm_dev.h>
50 
51 #include "vmm_lapic.h"
52 #include "vatpic.h"
53 #include "vioapic.h"
54 #include "vhpet.h"
55 
56 
57 #define	HPET_FREQ	16777216		/* 16.7 (2^24) Mhz */
58 #define	FS_PER_S	1000000000000000ul
59 
60 /* Timer N Configuration and Capabilities Register */
61 #define	HPET_TCAP_RO_MASK	(HPET_TCAP_INT_ROUTE	|	\
62 				HPET_TCAP_FSB_INT_DEL	|	\
63 				HPET_TCAP_SIZE		|	\
64 				HPET_TCAP_PER_INT)
65 /*
66  * HPET requires at least 3 timers and up to 32 timers per block.
67  */
68 #define	VHPET_NUM_TIMERS	8
69 CTASSERT(VHPET_NUM_TIMERS >= 3 && VHPET_NUM_TIMERS <= 32);
70 
71 struct vhpet_callout_arg {
72 	struct vhpet *vhpet;
73 	int timer_num;
74 };
75 
76 struct vhpet_timer {
77 	uint64_t	cap_config;	/* Configuration */
78 	uint64_t	msireg;		/* FSB interrupt routing */
79 	uint32_t	compval;	/* Comparator */
80 	uint32_t	comprate;
81 	struct callout	callout;
82 	hrtime_t	callout_expire;	/* time when counter==compval */
83 	struct vhpet_callout_arg arg;
84 };
85 
86 struct vhpet {
87 	struct vm	*vm;
88 	kmutex_t	lock;
89 
90 	uint64_t	config;		/* Configuration */
91 	uint64_t	isr;		/* Interrupt Status */
92 	uint32_t	base_count;	/* HPET counter base value */
93 	hrtime_t	base_time;	/* uptime corresponding to base value */
94 
95 	struct vhpet_timer timer[VHPET_NUM_TIMERS];
96 };
97 
98 #define	VHPET_LOCK(vhp)		mutex_enter(&((vhp)->lock))
99 #define	VHPET_UNLOCK(vhp)	mutex_exit(&((vhp)->lock))
100 #define	VHPET_LOCKED(vhp)	MUTEX_HELD(&((vhp)->lock))
101 
102 static void vhpet_start_timer(struct vhpet *vhpet, int n, uint32_t counter,
103     hrtime_t now);
104 
105 static uint64_t
106 vhpet_capabilities(void)
107 {
108 	uint64_t cap = 0;
109 
110 	cap |= 0x8086 << 16;			/* vendor id */
111 	cap |= (VHPET_NUM_TIMERS - 1) << 8;	/* number of timers */
112 	cap |= 1;				/* revision */
113 	cap &= ~HPET_CAP_COUNT_SIZE;		/* 32-bit timer */
114 
115 	cap &= 0xffffffff;
116 	cap |= (FS_PER_S / HPET_FREQ) << 32;	/* tick period in fs */
117 
118 	return (cap);
119 }
120 
121 static __inline bool
122 vhpet_counter_enabled(struct vhpet *vhpet)
123 {
124 
125 	return ((vhpet->config & HPET_CNF_ENABLE) ? true : false);
126 }
127 
128 static __inline bool
129 vhpet_timer_msi_enabled(struct vhpet *vhpet, int n)
130 {
131 	const uint64_t msi_enable = HPET_TCAP_FSB_INT_DEL | HPET_TCNF_FSB_EN;
132 
133 	if ((vhpet->timer[n].cap_config & msi_enable) == msi_enable)
134 		return (true);
135 	else
136 		return (false);
137 }
138 
139 static __inline int
140 vhpet_timer_ioapic_pin(struct vhpet *vhpet, int n)
141 {
142 	/*
143 	 * If the timer is configured to use MSI then treat it as if the
144 	 * timer is not connected to the ioapic.
145 	 */
146 	if (vhpet_timer_msi_enabled(vhpet, n))
147 		return (0);
148 
149 	return ((vhpet->timer[n].cap_config & HPET_TCNF_INT_ROUTE) >> 9);
150 }
151 
152 static uint32_t
153 vhpet_counter(struct vhpet *vhpet, hrtime_t *nowptr)
154 {
155 	const hrtime_t now = gethrtime();
156 	uint32_t val = vhpet->base_count;
157 
158 	if (vhpet_counter_enabled(vhpet)) {
159 		const hrtime_t delta = now - vhpet->base_time;
160 
161 		ASSERT3S(delta, >=, 0);
162 		val += hrt_freq_count(delta, HPET_FREQ);
163 	} else {
164 		/* Value of the counter is meaningless when it is disabled */
165 	}
166 
167 	if (nowptr != NULL) {
168 		*nowptr = now;
169 	}
170 	return (val);
171 }
172 
173 static void
174 vhpet_timer_clear_isr(struct vhpet *vhpet, int n)
175 {
176 	int pin;
177 
178 	if (vhpet->isr & (1 << n)) {
179 		pin = vhpet_timer_ioapic_pin(vhpet, n);
180 		KASSERT(pin != 0, ("vhpet timer %d irq incorrectly routed", n));
181 		(void) vioapic_deassert_irq(vhpet->vm, pin);
182 		vhpet->isr &= ~(1 << n);
183 	}
184 }
185 
186 static __inline bool
187 vhpet_periodic_timer(struct vhpet *vhpet, int n)
188 {
189 
190 	return ((vhpet->timer[n].cap_config & HPET_TCNF_TYPE) != 0);
191 }
192 
193 static __inline bool
194 vhpet_timer_interrupt_enabled(struct vhpet *vhpet, int n)
195 {
196 
197 	return ((vhpet->timer[n].cap_config & HPET_TCNF_INT_ENB) != 0);
198 }
199 
200 static __inline bool
201 vhpet_timer_edge_trig(struct vhpet *vhpet, int n)
202 {
203 
204 	KASSERT(!vhpet_timer_msi_enabled(vhpet, n), ("vhpet_timer_edge_trig: "
205 	    "timer %d is using MSI", n));
206 
207 	if ((vhpet->timer[n].cap_config & HPET_TCNF_INT_TYPE) == 0)
208 		return (true);
209 	else
210 		return (false);
211 }
212 
213 static void
214 vhpet_timer_interrupt(struct vhpet *vhpet, int n)
215 {
216 	int pin;
217 
218 	/* If interrupts are not enabled for this timer then just return. */
219 	if (!vhpet_timer_interrupt_enabled(vhpet, n))
220 		return;
221 
222 	/*
223 	 * If a level triggered interrupt is already asserted then just return.
224 	 */
225 	if ((vhpet->isr & (1 << n)) != 0) {
226 		return;
227 	}
228 
229 	if (vhpet_timer_msi_enabled(vhpet, n)) {
230 		(void) lapic_intr_msi(vhpet->vm, vhpet->timer[n].msireg >> 32,
231 		    vhpet->timer[n].msireg & 0xffffffff);
232 		return;
233 	}
234 
235 	pin = vhpet_timer_ioapic_pin(vhpet, n);
236 	if (pin == 0) {
237 		/* Interrupt is not routed to IOAPIC */
238 		return;
239 	}
240 
241 	if (vhpet_timer_edge_trig(vhpet, n)) {
242 		(void) vioapic_pulse_irq(vhpet->vm, pin);
243 	} else {
244 		vhpet->isr |= 1 << n;
245 		(void) vioapic_assert_irq(vhpet->vm, pin);
246 	}
247 }
248 
249 static void
250 vhpet_adjust_compval(struct vhpet *vhpet, int n, uint32_t counter)
251 {
252 	uint32_t compval, comprate, compnext;
253 
254 	KASSERT(vhpet->timer[n].comprate != 0, ("hpet t%d is not periodic", n));
255 
256 	compval = vhpet->timer[n].compval;
257 	comprate = vhpet->timer[n].comprate;
258 
259 	/*
260 	 * Calculate the comparator value to be used for the next periodic
261 	 * interrupt.
262 	 *
263 	 * This function is commonly called from the callout handler.
264 	 * In this scenario the 'counter' is ahead of 'compval'. To find
265 	 * the next value to program into the accumulator we divide the
266 	 * number space between 'compval' and 'counter' into 'comprate'
267 	 * sized units. The 'compval' is rounded up such that is "ahead"
268 	 * of 'counter'.
269 	 */
270 	compnext = compval + ((counter - compval) / comprate + 1) * comprate;
271 
272 	vhpet->timer[n].compval = compnext;
273 }
274 
275 static void
276 vhpet_handler(void *arg)
277 {
278 	const struct vhpet_callout_arg *vca = arg;
279 	struct vhpet *vhpet = vca->vhpet;
280 	const int n = vca->timer_num;
281 	struct callout *callout = &vhpet->timer[n].callout;
282 
283 	VHPET_LOCK(vhpet);
284 
285 	if (callout_pending(callout) || !callout_active(callout)) {
286 		VHPET_UNLOCK(vhpet);
287 		return;
288 	}
289 
290 	callout_deactivate(callout);
291 	ASSERT(vhpet_counter_enabled(vhpet));
292 
293 	if (vhpet_periodic_timer(vhpet, n)) {
294 		hrtime_t now;
295 		uint32_t counter = vhpet_counter(vhpet, &now);
296 
297 		vhpet_start_timer(vhpet, n, counter, now);
298 	} else {
299 		/*
300 		 * Zero out the expiration time to distinguish a fired timer
301 		 * from one which is held due to a VM pause.
302 		 */
303 		vhpet->timer[n].callout_expire = 0;
304 	}
305 	vhpet_timer_interrupt(vhpet, n);
306 
307 	VHPET_UNLOCK(vhpet);
308 }
309 
310 static void
311 vhpet_stop_timer(struct vhpet *vhpet, int n, hrtime_t now)
312 {
313 	ASSERT(VHPET_LOCKED(vhpet));
314 
315 	callout_stop(&vhpet->timer[n].callout);
316 
317 	/*
318 	 * If the callout was scheduled to expire in the past but hasn't
319 	 * had a chance to execute yet then trigger the timer interrupt
320 	 * here. Failing to do so will result in a missed timer interrupt
321 	 * in the guest. This is especially bad in one-shot mode because
322 	 * the next interrupt has to wait for the counter to wrap around.
323 	 */
324 	if (vhpet->timer[n].callout_expire < now) {
325 		vhpet_timer_interrupt(vhpet, n);
326 	}
327 	vhpet->timer[n].callout_expire = 0;
328 }
329 
330 static void
331 vhpet_start_timer(struct vhpet *vhpet, int n, uint32_t counter, hrtime_t now)
332 {
333 	struct vhpet_timer *timer = &vhpet->timer[n];
334 
335 	ASSERT(VHPET_LOCKED(vhpet));
336 
337 	if (timer->comprate != 0)
338 		vhpet_adjust_compval(vhpet, n, counter);
339 	else {
340 		/*
341 		 * In one-shot mode it is the guest's responsibility to make
342 		 * sure that the comparator value is not in the "past". The
343 		 * hardware doesn't have any belt-and-suspenders to deal with
344 		 * this so we don't either.
345 		 */
346 	}
347 
348 	const hrtime_t delta = hrt_freq_interval(HPET_FREQ,
349 	    timer->compval - counter);
350 	timer->callout_expire = now + delta;
351 	callout_reset_hrtime(&timer->callout, timer->callout_expire,
352 	    vhpet_handler, &timer->arg, C_ABSOLUTE);
353 }
354 
355 static void
356 vhpet_start_counting(struct vhpet *vhpet)
357 {
358 	int i;
359 
360 	vhpet->base_time = gethrtime();
361 	for (i = 0; i < VHPET_NUM_TIMERS; i++) {
362 		/*
363 		 * Restart the timers based on the value of the main counter
364 		 * when it stopped counting.
365 		 */
366 		vhpet_start_timer(vhpet, i, vhpet->base_count,
367 		    vhpet->base_time);
368 	}
369 }
370 
371 static void
372 vhpet_stop_counting(struct vhpet *vhpet, uint32_t counter, hrtime_t now)
373 {
374 	int i;
375 
376 	vhpet->base_count = counter;
377 	for (i = 0; i < VHPET_NUM_TIMERS; i++)
378 		vhpet_stop_timer(vhpet, i, now);
379 }
380 
381 static __inline void
382 update_register(uint64_t *regptr, uint64_t data, uint64_t mask)
383 {
384 
385 	*regptr &= ~mask;
386 	*regptr |= (data & mask);
387 }
388 
389 static void
390 vhpet_timer_update_config(struct vhpet *vhpet, int n, uint64_t data,
391     uint64_t mask)
392 {
393 	bool clear_isr;
394 	int old_pin, new_pin;
395 	uint32_t allowed_irqs;
396 	uint64_t oldval, newval;
397 
398 	if (vhpet_timer_msi_enabled(vhpet, n) ||
399 	    vhpet_timer_edge_trig(vhpet, n)) {
400 		if (vhpet->isr & (1 << n))
401 			panic("vhpet timer %d isr should not be asserted", n);
402 	}
403 	old_pin = vhpet_timer_ioapic_pin(vhpet, n);
404 	oldval = vhpet->timer[n].cap_config;
405 
406 	newval = oldval;
407 	update_register(&newval, data, mask);
408 	newval &= ~(HPET_TCAP_RO_MASK | HPET_TCNF_32MODE);
409 	newval |= oldval & HPET_TCAP_RO_MASK;
410 
411 	if (newval == oldval)
412 		return;
413 
414 	vhpet->timer[n].cap_config = newval;
415 
416 	/*
417 	 * Validate the interrupt routing in the HPET_TCNF_INT_ROUTE field.
418 	 * If it does not match the bits set in HPET_TCAP_INT_ROUTE then set
419 	 * it to the default value of 0.
420 	 */
421 	allowed_irqs = vhpet->timer[n].cap_config >> 32;
422 	new_pin = vhpet_timer_ioapic_pin(vhpet, n);
423 	if (new_pin != 0 && (allowed_irqs & (1 << new_pin)) == 0) {
424 		/* Invalid IRQ configured */
425 		new_pin = 0;
426 		vhpet->timer[n].cap_config &= ~HPET_TCNF_INT_ROUTE;
427 	}
428 
429 	if (!vhpet_periodic_timer(vhpet, n))
430 		vhpet->timer[n].comprate = 0;
431 
432 	/*
433 	 * If the timer's ISR bit is set then clear it in the following cases:
434 	 * - interrupt is disabled
435 	 * - interrupt type is changed from level to edge or fsb.
436 	 * - interrupt routing is changed
437 	 *
438 	 * This is to ensure that this timer's level triggered interrupt does
439 	 * not remain asserted forever.
440 	 */
441 	if (vhpet->isr & (1 << n)) {
442 		KASSERT(old_pin != 0, ("timer %d isr asserted to ioapic pin %d",
443 		    n, old_pin));
444 		if (!vhpet_timer_interrupt_enabled(vhpet, n))
445 			clear_isr = true;
446 		else if (vhpet_timer_msi_enabled(vhpet, n))
447 			clear_isr = true;
448 		else if (vhpet_timer_edge_trig(vhpet, n))
449 			clear_isr = true;
450 		else if (vhpet_timer_ioapic_pin(vhpet, n) != old_pin)
451 			clear_isr = true;
452 		else
453 			clear_isr = false;
454 
455 		if (clear_isr) {
456 			(void) vioapic_deassert_irq(vhpet->vm, old_pin);
457 			vhpet->isr &= ~(1 << n);
458 		}
459 	}
460 }
461 
462 int
463 vhpet_mmio_write(struct vm *vm, int vcpuid, uint64_t gpa, uint64_t val,
464     int size)
465 {
466 	struct vhpet *vhpet;
467 	uint64_t data, mask, oldval, val64;
468 	uint32_t isr_clear_mask, old_compval, old_comprate, counter;
469 	hrtime_t now;
470 	int i, offset;
471 
472 	vhpet = vm_hpet(vm);
473 	offset = gpa - VHPET_BASE;
474 
475 	VHPET_LOCK(vhpet);
476 
477 	/* Accesses to the HPET should be 4 or 8 bytes wide */
478 	switch (size) {
479 	case 8:
480 		mask = 0xffffffffffffffff;
481 		data = val;
482 		break;
483 	case 4:
484 		mask = 0xffffffff;
485 		data = val;
486 		if ((offset & 0x4) != 0) {
487 			mask <<= 32;
488 			data <<= 32;
489 		}
490 		break;
491 	default:
492 		/* Invalid MMIO write */
493 		goto done;
494 	}
495 
496 	/* Access to the HPET should be naturally aligned to its width */
497 	if (offset & (size - 1)) {
498 		goto done;
499 	}
500 
501 	if (offset == HPET_CONFIG || offset == HPET_CONFIG + 4) {
502 		/*
503 		 * Get the most recent value of the counter before updating
504 		 * the 'config' register. If the HPET is going to be disabled
505 		 * then we need to update 'base_count' with the value right
506 		 * before it is disabled.
507 		 */
508 		counter = vhpet_counter(vhpet, &now);
509 		oldval = vhpet->config;
510 		update_register(&vhpet->config, data, mask);
511 
512 		/*
513 		 * LegacyReplacement Routing is not supported so clear the
514 		 * bit explicitly.
515 		 */
516 		vhpet->config &= ~HPET_CNF_LEG_RT;
517 
518 		if ((oldval ^ vhpet->config) & HPET_CNF_ENABLE) {
519 			if (vhpet_counter_enabled(vhpet)) {
520 				vhpet_start_counting(vhpet);
521 			} else {
522 				vhpet_stop_counting(vhpet, counter, now);
523 			}
524 		}
525 		goto done;
526 	}
527 
528 	if (offset == HPET_ISR || offset == HPET_ISR + 4) {
529 		isr_clear_mask = vhpet->isr & data;
530 		for (i = 0; i < VHPET_NUM_TIMERS; i++) {
531 			if ((isr_clear_mask & (1 << i)) != 0) {
532 				vhpet_timer_clear_isr(vhpet, i);
533 			}
534 		}
535 		goto done;
536 	}
537 
538 	if (offset == HPET_MAIN_COUNTER || offset == HPET_MAIN_COUNTER + 4) {
539 		/* Zero-extend the counter to 64-bits before updating it */
540 		val64 = vhpet_counter(vhpet, NULL);
541 		update_register(&val64, data, mask);
542 		vhpet->base_count = val64;
543 		if (vhpet_counter_enabled(vhpet))
544 			vhpet_start_counting(vhpet);
545 		goto done;
546 	}
547 
548 	for (i = 0; i < VHPET_NUM_TIMERS; i++) {
549 		if (offset == HPET_TIMER_CAP_CNF(i) ||
550 		    offset == HPET_TIMER_CAP_CNF(i) + 4) {
551 			vhpet_timer_update_config(vhpet, i, data, mask);
552 			break;
553 		}
554 
555 		if (offset == HPET_TIMER_COMPARATOR(i) ||
556 		    offset == HPET_TIMER_COMPARATOR(i) + 4) {
557 			old_compval = vhpet->timer[i].compval;
558 			old_comprate = vhpet->timer[i].comprate;
559 			if (vhpet_periodic_timer(vhpet, i)) {
560 				/*
561 				 * In periodic mode writes to the comparator
562 				 * change the 'compval' register only if the
563 				 * HPET_TCNF_VAL_SET bit is set in the config
564 				 * register.
565 				 */
566 				val64 = vhpet->timer[i].comprate;
567 				update_register(&val64, data, mask);
568 				vhpet->timer[i].comprate = val64;
569 				if ((vhpet->timer[i].cap_config &
570 				    HPET_TCNF_VAL_SET) != 0) {
571 					vhpet->timer[i].compval = val64;
572 				}
573 			} else {
574 				KASSERT(vhpet->timer[i].comprate == 0,
575 				    ("vhpet one-shot timer %d has invalid "
576 				    "rate %u", i, vhpet->timer[i].comprate));
577 				val64 = vhpet->timer[i].compval;
578 				update_register(&val64, data, mask);
579 				vhpet->timer[i].compval = val64;
580 			}
581 			vhpet->timer[i].cap_config &= ~HPET_TCNF_VAL_SET;
582 
583 			if (vhpet->timer[i].compval != old_compval ||
584 			    vhpet->timer[i].comprate != old_comprate) {
585 				if (vhpet_counter_enabled(vhpet)) {
586 					counter = vhpet_counter(vhpet, &now);
587 					vhpet_start_timer(vhpet, i, counter,
588 					    now);
589 				}
590 			}
591 			break;
592 		}
593 
594 		if (offset == HPET_TIMER_FSB_VAL(i) ||
595 		    offset == HPET_TIMER_FSB_ADDR(i)) {
596 			update_register(&vhpet->timer[i].msireg, data, mask);
597 			break;
598 		}
599 	}
600 done:
601 	VHPET_UNLOCK(vhpet);
602 	return (0);
603 }
604 
605 int
606 vhpet_mmio_read(struct vm *vm, int vcpuid, uint64_t gpa, uint64_t *rval,
607     int size)
608 {
609 	int i, offset;
610 	struct vhpet *vhpet;
611 	uint64_t data;
612 
613 	vhpet = vm_hpet(vm);
614 	offset = gpa - VHPET_BASE;
615 
616 	VHPET_LOCK(vhpet);
617 
618 	/* Accesses to the HPET should be 4 or 8 bytes wide */
619 	if (size != 4 && size != 8) {
620 		data = 0;
621 		goto done;
622 	}
623 
624 	/* Access to the HPET should be naturally aligned to its width */
625 	if (offset & (size - 1)) {
626 		data = 0;
627 		goto done;
628 	}
629 
630 	if (offset == HPET_CAPABILITIES || offset == HPET_CAPABILITIES + 4) {
631 		data = vhpet_capabilities();
632 		goto done;
633 	}
634 
635 	if (offset == HPET_CONFIG || offset == HPET_CONFIG + 4) {
636 		data = vhpet->config;
637 		goto done;
638 	}
639 
640 	if (offset == HPET_ISR || offset == HPET_ISR + 4) {
641 		data = vhpet->isr;
642 		goto done;
643 	}
644 
645 	if (offset == HPET_MAIN_COUNTER || offset == HPET_MAIN_COUNTER + 4) {
646 		data = vhpet_counter(vhpet, NULL);
647 		goto done;
648 	}
649 
650 	for (i = 0; i < VHPET_NUM_TIMERS; i++) {
651 		if (offset == HPET_TIMER_CAP_CNF(i) ||
652 		    offset == HPET_TIMER_CAP_CNF(i) + 4) {
653 			data = vhpet->timer[i].cap_config;
654 			break;
655 		}
656 
657 		if (offset == HPET_TIMER_COMPARATOR(i) ||
658 		    offset == HPET_TIMER_COMPARATOR(i) + 4) {
659 			data = vhpet->timer[i].compval;
660 			break;
661 		}
662 
663 		if (offset == HPET_TIMER_FSB_VAL(i) ||
664 		    offset == HPET_TIMER_FSB_ADDR(i)) {
665 			data = vhpet->timer[i].msireg;
666 			break;
667 		}
668 	}
669 
670 	if (i >= VHPET_NUM_TIMERS)
671 		data = 0;
672 done:
673 	VHPET_UNLOCK(vhpet);
674 
675 	if (size == 4) {
676 		if (offset & 0x4)
677 			data >>= 32;
678 	}
679 	*rval = data;
680 	return (0);
681 }
682 
683 struct vhpet *
684 vhpet_init(struct vm *vm)
685 {
686 	int i, pincount;
687 	struct vhpet *vhpet;
688 	uint64_t allowed_irqs;
689 	struct vhpet_callout_arg *arg;
690 
691 	vhpet = kmem_zalloc(sizeof (struct vhpet), KM_SLEEP);
692 	vhpet->vm = vm;
693 	mutex_init(&vhpet->lock, NULL, MUTEX_ADAPTIVE, NULL);
694 
695 	pincount = vioapic_pincount(vm);
696 	if (pincount >= 32)
697 		allowed_irqs = 0xff000000;	/* irqs 24-31 */
698 	else if (pincount >= 20)
699 		allowed_irqs = 0xf << (pincount - 4);	/* 4 upper irqs */
700 	else
701 		allowed_irqs = 0;
702 
703 	/*
704 	 * Initialize HPET timer hardware state.
705 	 */
706 	for (i = 0; i < VHPET_NUM_TIMERS; i++) {
707 		vhpet->timer[i].cap_config = allowed_irqs << 32;
708 		vhpet->timer[i].cap_config |= HPET_TCAP_PER_INT;
709 		vhpet->timer[i].cap_config |= HPET_TCAP_FSB_INT_DEL;
710 
711 		vhpet->timer[i].compval = 0xffffffff;
712 		callout_init(&vhpet->timer[i].callout, 1);
713 
714 		arg = &vhpet->timer[i].arg;
715 		arg->vhpet = vhpet;
716 		arg->timer_num = i;
717 	}
718 
719 	return (vhpet);
720 }
721 
722 void
723 vhpet_cleanup(struct vhpet *vhpet)
724 {
725 	int i;
726 
727 	for (i = 0; i < VHPET_NUM_TIMERS; i++)
728 		callout_drain(&vhpet->timer[i].callout);
729 
730 	mutex_destroy(&vhpet->lock);
731 	kmem_free(vhpet, sizeof (*vhpet));
732 }
733 
734 int
735 vhpet_getcap(struct vm_hpet_cap *cap)
736 {
737 
738 	cap->capabilities = vhpet_capabilities();
739 	return (0);
740 }
741 void
742 vhpet_localize_resources(struct vhpet *vhpet)
743 {
744 	for (uint_t i = 0; i < VHPET_NUM_TIMERS; i++) {
745 		vmm_glue_callout_localize(&vhpet->timer[i].callout);
746 	}
747 }
748 
749 void
750 vhpet_pause(struct vhpet *vhpet)
751 {
752 	VHPET_LOCK(vhpet);
753 	for (uint_t i = 0; i < VHPET_NUM_TIMERS; i++) {
754 		struct vhpet_timer *timer = &vhpet->timer[i];
755 
756 		callout_stop(&timer->callout);
757 	}
758 	VHPET_UNLOCK(vhpet);
759 }
760 
761 void
762 vhpet_resume(struct vhpet *vhpet)
763 {
764 	VHPET_LOCK(vhpet);
765 	for (uint_t i = 0; i < VHPET_NUM_TIMERS; i++) {
766 		struct vhpet_timer *timer = &vhpet->timer[i];
767 
768 		if (timer->callout_expire != 0) {
769 			callout_reset_hrtime(&timer->callout,
770 			    timer->callout_expire, vhpet_handler,
771 			    &timer->arg, C_ABSOLUTE);
772 		}
773 	}
774 	VHPET_UNLOCK(vhpet);
775 }
776 
777 static int
778 vhpet_data_read(void *datap, const vmm_data_req_t *req)
779 {
780 	VERIFY3U(req->vdr_class, ==, VDC_HPET);
781 	VERIFY3U(req->vdr_version, ==, 1);
782 	VERIFY3U(req->vdr_len, >=, sizeof (struct vdi_hpet_v1));
783 
784 	struct vhpet *vhpet = datap;
785 	struct vdi_hpet_v1 *out = req->vdr_data;
786 
787 	VHPET_LOCK(vhpet);
788 	out->vh_config = vhpet->config;
789 	out->vh_isr = vhpet->isr;
790 	out->vh_count_base = vhpet->base_count;
791 	out->vh_time_base = vm_normalize_hrtime(vhpet->vm, vhpet->base_time);
792 	for (uint_t i = 0; i < 8; i++) {
793 		const struct vhpet_timer *timer = &vhpet->timer[i];
794 		struct vdi_hpet_timer_v1 *timer_out = &out->vh_timers[i];
795 
796 		timer_out->vht_config = timer->cap_config;
797 		timer_out->vht_msi = timer->msireg;
798 		timer_out->vht_comp_val = timer->compval;
799 		timer_out->vht_comp_rate = timer->comprate;
800 		if (timer->callout_expire != 0) {
801 			timer_out->vht_time_target =
802 			    vm_normalize_hrtime(vhpet->vm,
803 			    timer->callout_expire);
804 		} else {
805 			timer_out->vht_time_target = 0;
806 		}
807 	}
808 	VHPET_UNLOCK(vhpet);
809 
810 	return (0);
811 }
812 
813 enum vhpet_validation_error {
814 	VVE_OK,
815 	VVE_BAD_CONFIG,
816 	VVE_BAD_BASE_TIME,
817 	VVE_BAD_ISR,
818 	VVE_BAD_TIMER_CONFIG,
819 	VVE_BAD_TIMER_ISR,
820 	VVE_BAD_TIMER_TIME,
821 };
822 
823 static enum vhpet_validation_error
824 vhpet_data_validate(const vmm_data_req_t *req, struct vm *vm)
825 {
826 	ASSERT(req->vdr_version == 1 &&
827 	    req->vdr_len >= sizeof (struct vdi_hpet_v1));
828 	const struct vdi_hpet_v1 *src = req->vdr_data;
829 
830 	/* LegacyReplacement Routing is not supported */
831 	if ((src->vh_config & HPET_CNF_LEG_RT) != 0) {
832 		return (VVE_BAD_CONFIG);
833 	}
834 
835 	/* A base time in the future makes no sense */
836 	const hrtime_t base_time = vm_denormalize_hrtime(vm, src->vh_time_base);
837 	if (base_time > gethrtime()) {
838 		return (VVE_BAD_BASE_TIME);
839 	}
840 
841 	/* All asserted ISRs must be associated with an existing timer */
842 	if ((src->vh_isr & ~(uint64_t)((1 << VHPET_NUM_TIMERS) - 1)) != 0) {
843 		return (VVE_BAD_ISR);
844 	}
845 
846 	for (uint_t i = 0; i < 8; i++) {
847 		const struct vdi_hpet_timer_v1 *timer = &src->vh_timers[i];
848 
849 		const bool msi_enabled =
850 		    (timer->vht_config & HPET_TCNF_FSB_EN) != 0;
851 		const bool level_triggered =
852 		    (timer->vht_config & HPET_TCNF_INT_TYPE) != 0;
853 		const bool irq_asserted = (src->vh_isr & (1 << i)) != 0;
854 		const uint32_t allowed_irqs = (timer->vht_config >> 32);
855 		const uint32_t irq_pin =
856 		    (timer->vht_config & HPET_TCNF_INT_ROUTE) >> 9;
857 
858 		if (msi_enabled) {
859 			if (level_triggered) {
860 				return (VVE_BAD_TIMER_CONFIG);
861 			}
862 		} else {
863 			/*
864 			 * Ensure interrupt route is valid as ensured by the
865 			 * logic in vhpet_timer_update_config.
866 			 */
867 			if (irq_pin != 0 &&
868 			    (allowed_irqs & (1 << irq_pin)) == 0) {
869 				return (VVE_BAD_TIMER_CONFIG);
870 			}
871 		}
872 		if (irq_asserted && !level_triggered) {
873 			return (VVE_BAD_TIMER_ISR);
874 		}
875 
876 		if (timer->vht_time_target != 0) {
877 			/*
878 			 * A timer scheduled earlier than the base time of the
879 			 * entire HPET makes no sense.
880 			 */
881 			const uint64_t timer_target =
882 			    vm_denormalize_hrtime(vm, timer->vht_time_target);
883 			if (timer_target < base_time) {
884 				return (VVE_BAD_TIMER_TIME);
885 			}
886 		}
887 	}
888 
889 	return (VVE_OK);
890 }
891 
892 static int
893 vhpet_data_write(void *datap, const vmm_data_req_t *req)
894 {
895 	VERIFY3U(req->vdr_class, ==, VDC_HPET);
896 	VERIFY3U(req->vdr_version, ==, 1);
897 	VERIFY3U(req->vdr_len, >=, sizeof (struct vdi_hpet_v1));
898 
899 	struct vhpet *vhpet = datap;
900 
901 	if (vhpet_data_validate(req, vhpet->vm) != VVE_OK) {
902 		return (EINVAL);
903 	}
904 	const struct vdi_hpet_v1 *src = req->vdr_data;
905 
906 	VHPET_LOCK(vhpet);
907 	vhpet->config = src->vh_config;
908 	vhpet->isr = src->vh_isr;
909 	vhpet->base_count = src->vh_count_base;
910 	vhpet->base_time = vm_denormalize_hrtime(vhpet->vm, src->vh_time_base);
911 
912 	for (uint_t i = 0; i < 8; i++) {
913 		struct vhpet_timer *timer = &vhpet->timer[i];
914 		const struct vdi_hpet_timer_v1 *timer_src = &src->vh_timers[i];
915 
916 		timer->cap_config = timer_src->vht_config;
917 		timer->msireg = timer_src->vht_msi;
918 		timer->compval = timer_src->vht_comp_val;
919 		timer->comprate = timer_src->vht_comp_rate;
920 
921 		/*
922 		 * For now, any state associating an IOAPIC pin with a given
923 		 * timer is not kept in sync. (We will not increment or
924 		 * decrement a pin level based on the timer state.)  It is left
925 		 * to the consumer to keep those pin levels maintained if
926 		 * modifying either the HPET or the IOAPIC.
927 		 *
928 		 * If both the HPET and IOAPIC are exported and then imported,
929 		 * this will occur naturally, as any asserted IOAPIC pin level
930 		 * from the HPET would come along for the ride.
931 		 */
932 
933 		if (timer_src->vht_time_target != 0) {
934 			timer->callout_expire = vm_denormalize_hrtime(vhpet->vm,
935 			    timer_src->vht_time_target);
936 
937 			if (!vm_is_paused(vhpet->vm)) {
938 				callout_reset_hrtime(&timer->callout,
939 				    timer->callout_expire, vhpet_handler,
940 				    &timer->arg, C_ABSOLUTE);
941 			}
942 		} else {
943 			timer->callout_expire = 0;
944 		}
945 	}
946 	VHPET_UNLOCK(vhpet);
947 	return (0);
948 }
949 
950 static const vmm_data_version_entry_t hpet_v1 = {
951 	.vdve_class = VDC_HPET,
952 	.vdve_version = 1,
953 	.vdve_len_expect = sizeof (struct vdi_hpet_v1),
954 	.vdve_readf = vhpet_data_read,
955 	.vdve_writef = vhpet_data_write,
956 };
957 VMM_DATA_VERSION(hpet_v1);
958