xref: /illumos-gate/usr/src/uts/i86pc/io/apix/apix_utils.c (revision b6805bf78d2bbbeeaea8909a05623587b42d58b3)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 /*
26  * Copyright (c) 2010, Intel Corporation.
27  * All rights reserved.
28  */
29 /*
30  * Copyright 2013 Nexenta Systems, Inc.  All rights reserved.
31  */
32 
33 #include <sys/processor.h>
34 #include <sys/time.h>
35 #include <sys/psm.h>
36 #include <sys/smp_impldefs.h>
37 #include <sys/cram.h>
38 #include <sys/acpi/acpi.h>
39 #include <sys/acpica.h>
40 #include <sys/psm_common.h>
41 #include <sys/pit.h>
42 #include <sys/ddi.h>
43 #include <sys/sunddi.h>
44 #include <sys/ddi_impldefs.h>
45 #include <sys/pci.h>
46 #include <sys/promif.h>
47 #include <sys/x86_archext.h>
48 #include <sys/cpc_impl.h>
49 #include <sys/uadmin.h>
50 #include <sys/panic.h>
51 #include <sys/debug.h>
52 #include <sys/archsystm.h>
53 #include <sys/trap.h>
54 #include <sys/machsystm.h>
55 #include <sys/sysmacros.h>
56 #include <sys/cpuvar.h>
57 #include <sys/rm_platter.h>
58 #include <sys/privregs.h>
59 #include <sys/note.h>
60 #include <sys/pci_intr_lib.h>
61 #include <sys/spl.h>
62 #include <sys/clock.h>
63 #include <sys/dditypes.h>
64 #include <sys/sunddi.h>
65 #include <sys/x_call.h>
66 #include <sys/reboot.h>
67 #include <sys/apix.h>
68 
69 static int apix_get_avail_vector_oncpu(uint32_t, int, int);
70 static apix_vector_t *apix_init_vector(processorid_t, uchar_t);
71 static void apix_cleanup_vector(apix_vector_t *);
72 static void apix_insert_av(apix_vector_t *, void *, avfunc, caddr_t, caddr_t,
73     uint64_t *, int, dev_info_t *);
74 static void apix_remove_av(apix_vector_t *, struct autovec *);
75 static void apix_clear_dev_map(dev_info_t *, int, int);
76 static boolean_t apix_is_cpu_enabled(processorid_t);
77 static void apix_wait_till_seen(processorid_t, int);
78 
79 #define	GET_INTR_INUM(ihdlp)		\
80 	(((ihdlp) != NULL) ? ((ddi_intr_handle_impl_t *)(ihdlp))->ih_inum : 0)
81 
82 apix_rebind_info_t apix_rebindinfo = {0, 0, 0, NULL, 0, NULL};
83 
84 /*
85  * Allocate IPI
86  *
87  * Return vector number or 0 on error
88  */
89 uchar_t
90 apix_alloc_ipi(int ipl)
91 {
92 	apix_vector_t *vecp;
93 	uchar_t vector;
94 	int cpun;
95 	int nproc;
96 
97 	APIX_ENTER_CPU_LOCK(0);
98 
99 	vector = apix_get_avail_vector_oncpu(0, APIX_IPI_MIN, APIX_IPI_MAX);
100 	if (vector == 0) {
101 		APIX_LEAVE_CPU_LOCK(0);
102 		cmn_err(CE_WARN, "apix: no available IPI\n");
103 		apic_error |= APIC_ERR_GET_IPIVECT_FAIL;
104 		return (0);
105 	}
106 
107 	nproc = max(apic_nproc, apic_max_nproc);
108 	for (cpun = 0; cpun < nproc; cpun++) {
109 		vecp = xv_vector(cpun, vector);
110 		if (vecp == NULL) {
111 			vecp = kmem_zalloc(sizeof (apix_vector_t), KM_NOSLEEP);
112 			if (vecp == NULL) {
113 				cmn_err(CE_WARN, "apix: No memory for ipi");
114 				goto fail;
115 			}
116 			xv_vector(cpun, vector) = vecp;
117 		}
118 		vecp->v_state = APIX_STATE_ALLOCED;
119 		vecp->v_type = APIX_TYPE_IPI;
120 		vecp->v_cpuid = vecp->v_bound_cpuid = cpun;
121 		vecp->v_vector = vector;
122 		vecp->v_pri = ipl;
123 	}
124 	APIX_LEAVE_CPU_LOCK(0);
125 	return (vector);
126 
127 fail:
128 	while (--cpun >= 0)
129 		apix_cleanup_vector(xv_vector(cpun, vector));
130 	APIX_LEAVE_CPU_LOCK(0);
131 	return (0);
132 }
133 
134 /*
135  * Add IPI service routine
136  */
137 static int
138 apix_add_ipi(int ipl, avfunc xxintr, char *name, int vector,
139     caddr_t arg1, caddr_t arg2)
140 {
141 	int cpun;
142 	apix_vector_t *vecp;
143 	int nproc;
144 
145 	ASSERT(vector >= APIX_IPI_MIN && vector <= APIX_IPI_MAX);
146 
147 	nproc = max(apic_nproc, apic_max_nproc);
148 	for (cpun = 0; cpun < nproc; cpun++) {
149 		APIX_ENTER_CPU_LOCK(cpun);
150 		vecp = xv_vector(cpun, vector);
151 		apix_insert_av(vecp, NULL, xxintr, arg1, arg2, NULL, ipl, NULL);
152 		vecp->v_state = APIX_STATE_ENABLED;
153 		APIX_LEAVE_CPU_LOCK(cpun);
154 	}
155 
156 	APIC_VERBOSE(IPI, (CE_CONT, "apix: add ipi for %s, vector %x "
157 	    "ipl %x\n", name, vector, ipl));
158 
159 	return (1);
160 }
161 
162 /*
163  * Find and return first free vector in range (start, end)
164  */
165 static int
166 apix_get_avail_vector_oncpu(uint32_t cpuid, int start, int end)
167 {
168 	int i;
169 	apix_impl_t *apixp = apixs[cpuid];
170 
171 	for (i = start; i <= end; i++) {
172 		if (APIC_CHECK_RESERVE_VECTORS(i))
173 			continue;
174 		if (IS_VECT_FREE(apixp->x_vectbl[i]))
175 			return (i);
176 	}
177 
178 	return (0);
179 }
180 
181 /*
182  * Allocate a vector on specified cpu
183  *
184  * Return NULL on error
185  */
186 static apix_vector_t *
187 apix_alloc_vector_oncpu(uint32_t cpuid, dev_info_t *dip, int inum, int type)
188 {
189 	processorid_t tocpu = cpuid & ~IRQ_USER_BOUND;
190 	apix_vector_t *vecp;
191 	int vector;
192 
193 	ASSERT(APIX_CPU_LOCK_HELD(tocpu));
194 
195 	/* find free vector */
196 	vector = apix_get_avail_vector_oncpu(tocpu, APIX_AVINTR_MIN,
197 	    APIX_AVINTR_MAX);
198 	if (vector == 0)
199 		return (NULL);
200 
201 	vecp = apix_init_vector(tocpu, vector);
202 	vecp->v_type = (ushort_t)type;
203 	vecp->v_inum = inum;
204 	vecp->v_flags = (cpuid & IRQ_USER_BOUND) ? APIX_VECT_USER_BOUND : 0;
205 
206 	if (dip != NULL)
207 		apix_set_dev_map(vecp, dip, inum);
208 
209 	return (vecp);
210 }
211 
212 /*
213  * Allocates "count" contiguous MSI vectors starting at the proper alignment.
214  * Caller needs to make sure that count has to be power of 2 and should not
215  * be < 1.
216  *
217  * Return first vector number
218  */
219 apix_vector_t *
220 apix_alloc_nvectors_oncpu(uint32_t cpuid, dev_info_t *dip, int inum,
221     int count, int type)
222 {
223 	int i, msibits, start = 0, navail = 0;
224 	apix_vector_t *vecp, *startp = NULL;
225 	processorid_t tocpu = cpuid & ~IRQ_USER_BOUND;
226 	uint_t flags;
227 
228 	ASSERT(APIX_CPU_LOCK_HELD(tocpu));
229 
230 	/*
231 	 * msibits is the no. of lower order message data bits for the
232 	 * allocated MSI vectors and is used to calculate the aligned
233 	 * starting vector
234 	 */
235 	msibits = count - 1;
236 
237 	/* It has to be contiguous */
238 	for (i = APIX_AVINTR_MIN; i <= APIX_AVINTR_MAX; i++) {
239 		if (!IS_VECT_FREE(xv_vector(tocpu, i)))
240 			continue;
241 
242 		/*
243 		 * starting vector has to be aligned accordingly for
244 		 * multiple MSIs
245 		 */
246 		if (msibits)
247 			i = (i + msibits) & ~msibits;
248 
249 		for (navail = 0, start = i; i <= APIX_AVINTR_MAX; i++) {
250 			if (!IS_VECT_FREE(xv_vector(tocpu, i)))
251 				break;
252 			if (APIC_CHECK_RESERVE_VECTORS(i))
253 				break;
254 			if (++navail == count)
255 				goto done;
256 		}
257 	}
258 
259 	return (NULL);
260 
261 done:
262 	flags = (cpuid & IRQ_USER_BOUND) ? APIX_VECT_USER_BOUND : 0;
263 
264 	for (i = 0; i < count; i++) {
265 		if ((vecp = apix_init_vector(tocpu, start + i)) == NULL)
266 			goto fail;
267 
268 		vecp->v_type = (ushort_t)type;
269 		vecp->v_inum = inum + i;
270 		vecp->v_flags = flags;
271 
272 		if (dip != NULL)
273 			apix_set_dev_map(vecp, dip, inum + i);
274 
275 		if (i == 0)
276 			startp = vecp;
277 	}
278 
279 	return (startp);
280 
281 fail:
282 	while (i-- > 0) {	/* Free allocated vectors */
283 		vecp = xv_vector(tocpu, start + i);
284 		apix_clear_dev_map(dip, inum + i, type);
285 		apix_cleanup_vector(vecp);
286 	}
287 	return (NULL);
288 }
289 
290 #define	APIX_WRITE_MSI_DATA(_hdl, _cap, _ctrl, _v)\
291 do {\
292 	if ((_ctrl) & PCI_MSI_64BIT_MASK)\
293 		pci_config_put16((_hdl), (_cap) + PCI_MSI_64BIT_DATA, (_v));\
294 	else\
295 		pci_config_put16((_hdl), (_cap) + PCI_MSI_32BIT_DATA, (_v));\
296 _NOTE(CONSTCOND)} while (0)
297 
298 static void
299 apix_pci_msi_enable_vector(apix_vector_t *vecp, dev_info_t *dip, int type,
300     int inum, int count, uchar_t vector, int target_apic_id)
301 {
302 	uint64_t		msi_addr, msi_data;
303 	ushort_t		msi_ctrl;
304 	int			i, cap_ptr = i_ddi_get_msi_msix_cap_ptr(dip);
305 	ddi_acc_handle_t	handle = i_ddi_get_pci_config_handle(dip);
306 	msi_regs_t		msi_regs;
307 	void			*intrmap_tbl[PCI_MSI_MAX_INTRS];
308 
309 	DDI_INTR_IMPLDBG((CE_CONT, "apix_pci_msi_enable_vector: dip=0x%p\n"
310 	    "\tdriver = %s, inum=0x%x vector=0x%x apicid=0x%x\n", (void *)dip,
311 	    ddi_driver_name(dip), inum, vector, target_apic_id));
312 
313 	ASSERT((handle != NULL) && (cap_ptr != 0));
314 
315 	msi_regs.mr_data = vector;
316 	msi_regs.mr_addr = target_apic_id;
317 
318 	for (i = 0; i < count; i++)
319 		intrmap_tbl[i] = xv_intrmap_private(vecp->v_cpuid, vector + i);
320 	apic_vt_ops->apic_intrmap_alloc_entry(intrmap_tbl, dip, type,
321 	    count, 0xff);
322 	for (i = 0; i < count; i++)
323 		xv_intrmap_private(vecp->v_cpuid, vector + i) = intrmap_tbl[i];
324 
325 	apic_vt_ops->apic_intrmap_map_entry(vecp->v_intrmap_private,
326 	    (void *)&msi_regs, type, count);
327 	apic_vt_ops->apic_intrmap_record_msi(vecp->v_intrmap_private,
328 	    &msi_regs);
329 
330 	/* MSI Address */
331 	msi_addr = msi_regs.mr_addr;
332 
333 	/* MSI Data: MSI is edge triggered according to spec */
334 	msi_data = msi_regs.mr_data;
335 
336 	DDI_INTR_IMPLDBG((CE_CONT, "apix_pci_msi_enable_vector: addr=0x%lx "
337 	    "data=0x%lx\n", (long)msi_addr, (long)msi_data));
338 
339 	if (type == APIX_TYPE_MSI) {
340 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
341 
342 		/* Set the bits to inform how many MSIs are enabled */
343 		msi_ctrl |= ((highbit(count) - 1) << PCI_MSI_MME_SHIFT);
344 		pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
345 
346 		if ((vecp->v_flags & APIX_VECT_MASKABLE) == 0)
347 			APIX_WRITE_MSI_DATA(handle, cap_ptr, msi_ctrl,
348 			    APIX_RESV_VECTOR);
349 
350 		pci_config_put32(handle,
351 		    cap_ptr + PCI_MSI_ADDR_OFFSET, msi_addr);
352 		if (msi_ctrl &  PCI_MSI_64BIT_MASK)
353 			pci_config_put32(handle,
354 			    cap_ptr + PCI_MSI_ADDR_OFFSET + 4, msi_addr >> 32);
355 
356 		APIX_WRITE_MSI_DATA(handle, cap_ptr, msi_ctrl, msi_data);
357 	} else if (type == APIX_TYPE_MSIX) {
358 		uintptr_t	off;
359 		ddi_intr_msix_t	*msix_p = i_ddi_get_msix(dip);
360 
361 		/* Offset into the "inum"th entry in the MSI-X table */
362 		off = (uintptr_t)msix_p->msix_tbl_addr +
363 		    (inum * PCI_MSIX_VECTOR_SIZE);
364 
365 		ddi_put32(msix_p->msix_tbl_hdl,
366 		    (uint32_t *)(off + PCI_MSIX_DATA_OFFSET), msi_data);
367 		ddi_put64(msix_p->msix_tbl_hdl,
368 		    (uint64_t *)(off + PCI_MSIX_LOWER_ADDR_OFFSET), msi_addr);
369 	}
370 }
371 
372 static void
373 apix_pci_msi_enable_mode(dev_info_t *dip, int type, int inum)
374 {
375 	ushort_t		msi_ctrl;
376 	int			cap_ptr = i_ddi_get_msi_msix_cap_ptr(dip);
377 	ddi_acc_handle_t	handle = i_ddi_get_pci_config_handle(dip);
378 
379 	ASSERT((handle != NULL) && (cap_ptr != 0));
380 
381 	if (type == APIX_TYPE_MSI) {
382 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
383 		if ((msi_ctrl & PCI_MSI_ENABLE_BIT))
384 			return;
385 
386 		msi_ctrl |= PCI_MSI_ENABLE_BIT;
387 		pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
388 
389 	} else if (type == DDI_INTR_TYPE_MSIX) {
390 		uintptr_t	off;
391 		uint32_t	mask;
392 		ddi_intr_msix_t	*msix_p;
393 
394 		msix_p = i_ddi_get_msix(dip);
395 
396 		/* Offset into "inum"th entry in the MSI-X table & clear mask */
397 		off = (uintptr_t)msix_p->msix_tbl_addr + (inum *
398 		    PCI_MSIX_VECTOR_SIZE) + PCI_MSIX_VECTOR_CTRL_OFFSET;
399 
400 		mask = ddi_get32(msix_p->msix_tbl_hdl, (uint32_t *)off);
401 
402 		ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off, (mask & ~1));
403 
404 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSIX_CTRL);
405 
406 		if (!(msi_ctrl & PCI_MSIX_ENABLE_BIT)) {
407 			msi_ctrl |= PCI_MSIX_ENABLE_BIT;
408 			pci_config_put16(handle, cap_ptr + PCI_MSIX_CTRL,
409 			    msi_ctrl);
410 		}
411 	}
412 }
413 
414 /*
415  * Setup interrupt, pogramming IO-APIC or MSI/X address/data.
416  */
417 void
418 apix_enable_vector(apix_vector_t *vecp)
419 {
420 	int tocpu = vecp->v_cpuid, type = vecp->v_type;
421 	apic_cpus_info_t *cpu_infop;
422 	ulong_t iflag;
423 
424 	ASSERT(tocpu < apic_nproc);
425 
426 	cpu_infop = &apic_cpus[tocpu];
427 	if (vecp->v_flags & APIX_VECT_USER_BOUND)
428 		cpu_infop->aci_bound++;
429 	else
430 		cpu_infop->aci_temp_bound++;
431 
432 	iflag = intr_clear();
433 	lock_set(&apic_ioapic_lock);
434 
435 	if (!DDI_INTR_IS_MSI_OR_MSIX(type)) {	/* fixed */
436 		apix_intx_enable(vecp->v_inum);
437 	} else {
438 		int inum = vecp->v_inum;
439 		dev_info_t *dip = APIX_GET_DIP(vecp);
440 		int count = i_ddi_intr_get_current_nintrs(dip);
441 
442 		if (type == APIX_TYPE_MSI) {	/* MSI */
443 			if (inum == apix_get_max_dev_inum(dip, type)) {
444 				/* last one */
445 				uchar_t start_inum = inum + 1 - count;
446 				uchar_t start_vect = vecp->v_vector + 1 - count;
447 				apix_vector_t *start_vecp =
448 				    xv_vector(vecp->v_cpuid, start_vect);
449 
450 				APIC_VERBOSE(INTR, (CE_CONT, "apix: call "
451 				    "apix_pci_msi_enable_vector\n"));
452 				apix_pci_msi_enable_vector(start_vecp, dip,
453 				    type, start_inum, count, start_vect,
454 				    cpu_infop->aci_local_id);
455 
456 				APIC_VERBOSE(INTR, (CE_CONT, "apix: call "
457 				    "apix_pci_msi_enable_mode\n"));
458 				apix_pci_msi_enable_mode(dip, type, inum);
459 			}
460 		} else {				/* MSI-X */
461 			apix_pci_msi_enable_vector(vecp, dip,
462 			    type, inum, 1, vecp->v_vector,
463 			    cpu_infop->aci_local_id);
464 			apix_pci_msi_enable_mode(dip, type, inum);
465 		}
466 	}
467 	vecp->v_state = APIX_STATE_ENABLED;
468 	apic_redist_cpu_skip &= ~(1 << tocpu);
469 
470 	lock_clear(&apic_ioapic_lock);
471 	intr_restore(iflag);
472 }
473 
474 /*
475  * Disable the interrupt
476  */
477 void
478 apix_disable_vector(apix_vector_t *vecp)
479 {
480 	struct autovec *avp = vecp->v_autovect;
481 	ulong_t iflag;
482 
483 	ASSERT(avp != NULL);
484 
485 	iflag = intr_clear();
486 	lock_set(&apic_ioapic_lock);
487 
488 	switch (vecp->v_type) {
489 	case APIX_TYPE_MSI:
490 		ASSERT(avp->av_vector != NULL && avp->av_dip != NULL);
491 		/*
492 		 * Disable the MSI vector
493 		 * Make sure we only disable on the last
494 		 * of the multi-MSI support
495 		 */
496 		if (i_ddi_intr_get_current_nenables(avp->av_dip) == 1) {
497 			apic_pci_msi_disable_mode(avp->av_dip,
498 			    DDI_INTR_TYPE_MSI);
499 		}
500 		break;
501 	case APIX_TYPE_MSIX:
502 		ASSERT(avp->av_vector != NULL && avp->av_dip != NULL);
503 		/*
504 		 * Disable the MSI-X vector
505 		 * needs to clear its mask and addr/data for each MSI-X
506 		 */
507 		apic_pci_msi_unconfigure(avp->av_dip, DDI_INTR_TYPE_MSIX,
508 		    vecp->v_inum);
509 		/*
510 		 * Make sure we only disable on the last MSI-X
511 		 */
512 		if (i_ddi_intr_get_current_nenables(avp->av_dip) == 1) {
513 			apic_pci_msi_disable_mode(avp->av_dip,
514 			    DDI_INTR_TYPE_MSIX);
515 		}
516 		break;
517 	default:
518 		apix_intx_disable(vecp->v_inum);
519 		break;
520 	}
521 
522 	if (!(apic_cpus[vecp->v_cpuid].aci_status & APIC_CPU_SUSPEND))
523 		vecp->v_state = APIX_STATE_DISABLED;
524 	apic_vt_ops->apic_intrmap_free_entry(&vecp->v_intrmap_private);
525 	vecp->v_intrmap_private = NULL;
526 
527 	lock_clear(&apic_ioapic_lock);
528 	intr_restore(iflag);
529 }
530 
531 /*
532  * Mark vector as obsoleted or freed. The vector is marked
533  * obsoleted if there are pending requests on it. Otherwise,
534  * free the vector. The obsoleted vectors get freed after
535  * being serviced.
536  *
537  * Return 1 on being obosoleted and 0 on being freed.
538  */
539 #define	INTR_BUSY(_avp)\
540 	((((volatile ushort_t)(_avp)->av_flags) &\
541 	(AV_PENTRY_PEND | AV_PENTRY_ONPROC)) != 0)
542 #define	LOCAL_WITH_INTR_DISABLED(_cpuid)\
543 	((_cpuid) == psm_get_cpu_id() && !interrupts_enabled())
544 static uint64_t dummy_tick;
545 
546 int
547 apix_obsolete_vector(apix_vector_t *vecp)
548 {
549 	struct autovec *avp = vecp->v_autovect;
550 	int repeats, tries, ipl, busy = 0, cpuid = vecp->v_cpuid;
551 	apix_impl_t *apixp = apixs[cpuid];
552 
553 	ASSERT(APIX_CPU_LOCK_HELD(cpuid));
554 
555 	for (avp = vecp->v_autovect; avp != NULL; avp = avp->av_link) {
556 		if (avp->av_vector == NULL)
557 			continue;
558 
559 		if (LOCAL_WITH_INTR_DISABLED(cpuid)) {
560 			int bit, index, irr;
561 
562 			if (INTR_BUSY(avp)) {
563 				busy++;
564 				continue;
565 			}
566 
567 			/* check IRR for pending interrupts */
568 			index = vecp->v_vector / 32;
569 			bit = vecp->v_vector % 32;
570 			irr = apic_reg_ops->apic_read(APIC_IRR_REG + index);
571 			if ((irr & (1 << bit)) != 0)
572 				busy++;
573 
574 			if (!busy)
575 				apix_remove_av(vecp, avp);
576 
577 			continue;
578 		}
579 
580 		repeats = 0;
581 		do {
582 			repeats++;
583 			for (tries = 0; tries < apic_max_reps_clear_pending;
584 			    tries++)
585 				if (!INTR_BUSY(avp))
586 					break;
587 		} while (INTR_BUSY(avp) &&
588 		    (repeats < apic_max_reps_clear_pending));
589 
590 		if (INTR_BUSY(avp))
591 			busy++;
592 		else {
593 			/*
594 			 * Interrupt is not in pending list or being serviced.
595 			 * However it might be cached in Local APIC's IRR
596 			 * register. It's impossible to check another CPU's
597 			 * IRR register. Then wait till lower levels finish
598 			 * running.
599 			 */
600 			for (ipl = 1; ipl < MIN(LOCK_LEVEL, vecp->v_pri); ipl++)
601 				apix_wait_till_seen(cpuid, ipl);
602 			if (INTR_BUSY(avp))
603 				busy++;
604 		}
605 
606 		if (!busy)
607 			apix_remove_av(vecp, avp);
608 	}
609 
610 	if (busy) {
611 		apix_vector_t *tp = apixp->x_obsoletes;
612 
613 		if (vecp->v_state == APIX_STATE_OBSOLETED)
614 			return (1);
615 
616 		vecp->v_state = APIX_STATE_OBSOLETED;
617 		vecp->v_next = NULL;
618 		if (tp == NULL)
619 			apixp->x_obsoletes = vecp;
620 		else {
621 			while (tp->v_next != NULL)
622 				tp = tp->v_next;
623 			tp->v_next = vecp;
624 		}
625 		return (1);
626 	}
627 
628 	/* interrupt is not busy */
629 	if (vecp->v_state == APIX_STATE_OBSOLETED) {
630 		/* remove from obsoleted list */
631 		apixp->x_obsoletes = vecp->v_next;
632 		vecp->v_next = NULL;
633 	}
634 	apix_cleanup_vector(vecp);
635 	return (0);
636 }
637 
638 /*
639  * Duplicate number of continuous vectors to specified target vectors.
640  */
641 static void
642 apix_dup_vectors(apix_vector_t *oldp, apix_vector_t *newp, int count)
643 {
644 	struct autovec *avp;
645 	apix_vector_t *fromp, *top;
646 	processorid_t oldcpu = oldp->v_cpuid, newcpu = newp->v_cpuid;
647 	uchar_t oldvec = oldp->v_vector, newvec = newp->v_vector;
648 	int i, inum;
649 
650 	ASSERT(oldp->v_type != APIX_TYPE_IPI);
651 
652 	for (i = 0; i < count; i++) {
653 		fromp = xv_vector(oldcpu, oldvec + i);
654 		top = xv_vector(newcpu, newvec + i);
655 		ASSERT(fromp != NULL && top != NULL);
656 
657 		/* copy over original one */
658 		top->v_state = fromp->v_state;
659 		top->v_type = fromp->v_type;
660 		top->v_bound_cpuid = fromp->v_bound_cpuid;
661 		top->v_inum = fromp->v_inum;
662 		top->v_flags = fromp->v_flags;
663 		top->v_intrmap_private = fromp->v_intrmap_private;
664 
665 		for (avp = fromp->v_autovect; avp != NULL; avp = avp->av_link) {
666 			if (avp->av_vector == NULL)
667 				continue;
668 
669 			apix_insert_av(top, avp->av_intr_id, avp->av_vector,
670 			    avp->av_intarg1, avp->av_intarg2, avp->av_ticksp,
671 			    avp->av_prilevel, avp->av_dip);
672 
673 			if (fromp->v_type == APIX_TYPE_FIXED &&
674 			    avp->av_dip != NULL) {
675 				inum = GET_INTR_INUM(avp->av_intr_id);
676 				apix_set_dev_map(top, avp->av_dip, inum);
677 			}
678 		}
679 
680 		if (DDI_INTR_IS_MSI_OR_MSIX(fromp->v_type) &&
681 		    fromp->v_devp != NULL)
682 			apix_set_dev_map(top, fromp->v_devp->dv_dip,
683 			    fromp->v_devp->dv_inum);
684 	}
685 }
686 
687 static apix_vector_t *
688 apix_init_vector(processorid_t cpuid, uchar_t vector)
689 {
690 	apix_impl_t *apixp = apixs[cpuid];
691 	apix_vector_t *vecp = apixp->x_vectbl[vector];
692 
693 	ASSERT(IS_VECT_FREE(vecp));
694 
695 	if (vecp == NULL) {
696 		vecp = kmem_zalloc(sizeof (apix_vector_t), KM_NOSLEEP);
697 		if (vecp == NULL) {
698 			cmn_err(CE_WARN, "apix: no memory to allocate vector");
699 			return (NULL);
700 		}
701 		apixp->x_vectbl[vector] = vecp;
702 	}
703 	vecp->v_state = APIX_STATE_ALLOCED;
704 	vecp->v_cpuid = vecp->v_bound_cpuid = cpuid;
705 	vecp->v_vector = vector;
706 
707 	return (vecp);
708 }
709 
710 static void
711 apix_cleanup_vector(apix_vector_t *vecp)
712 {
713 	ASSERT(vecp->v_share == 0);
714 	vecp->v_bound_cpuid = IRQ_UNINIT;
715 	vecp->v_state = APIX_STATE_FREED;
716 	vecp->v_type = 0;
717 	vecp->v_flags = 0;
718 	vecp->v_busy = 0;
719 	vecp->v_intrmap_private = NULL;
720 }
721 
722 static void
723 apix_dprint_vector(apix_vector_t *vecp, dev_info_t *dip, int count)
724 {
725 #ifdef DEBUG
726 	major_t major;
727 	char *name, *drv_name;
728 	int instance, len, t_len;
729 	char mesg[1024] = "apix: ";
730 
731 	t_len = sizeof (mesg);
732 	len = strlen(mesg);
733 	if (dip != NULL) {
734 		name = ddi_get_name(dip);
735 		major = ddi_name_to_major(name);
736 		drv_name = ddi_major_to_name(major);
737 		instance = ddi_get_instance(dip);
738 		(void) snprintf(mesg + len, t_len - len, "%s (%s) instance %d ",
739 		    name, drv_name, instance);
740 	}
741 	len = strlen(mesg);
742 
743 	switch (vecp->v_type) {
744 	case APIX_TYPE_FIXED:
745 		(void) snprintf(mesg + len, t_len - len, "irqno %d",
746 		    vecp->v_inum);
747 		break;
748 	case APIX_TYPE_MSI:
749 		(void) snprintf(mesg + len, t_len - len,
750 		    "msi inum %d (count %d)", vecp->v_inum, count);
751 		break;
752 	case APIX_TYPE_MSIX:
753 		(void) snprintf(mesg + len, t_len - len, "msi-x inum %d",
754 		    vecp->v_inum);
755 		break;
756 	default:
757 		break;
758 
759 	}
760 
761 	APIC_VERBOSE(ALLOC, (CE_CONT, "%s allocated with vector 0x%x on "
762 	    "cpu %d\n", mesg, vecp->v_vector, vecp->v_cpuid));
763 #endif	/* DEBUG */
764 }
765 
766 /*
767  * Operations on avintr
768  */
769 
770 #define	INIT_AUTOVEC(p, intr_id, f, arg1, arg2, ticksp, ipl, dip)	\
771 do { \
772 	(p)->av_intr_id = intr_id;	\
773 	(p)->av_vector = f;		\
774 	(p)->av_intarg1 = arg1;		\
775 	(p)->av_intarg2 = arg2;		\
776 	(p)->av_ticksp = ticksp;	\
777 	(p)->av_prilevel = ipl;		\
778 	(p)->av_dip = dip;		\
779 	(p)->av_flags = 0;		\
780 _NOTE(CONSTCOND)} while (0)
781 
782 /*
783  * Insert an interrupt service routine into chain by its priority from
784  * high to low
785  */
786 static void
787 apix_insert_av(apix_vector_t *vecp, void *intr_id, avfunc f, caddr_t arg1,
788     caddr_t arg2, uint64_t *ticksp, int ipl, dev_info_t *dip)
789 {
790 	struct autovec *p, *prep, *mem;
791 
792 	APIC_VERBOSE(INTR, (CE_CONT, "apix_insert_av: dip %p, vector 0x%x, "
793 	    "cpu %d\n", (void *)dip, vecp->v_vector, vecp->v_cpuid));
794 
795 	mem = kmem_zalloc(sizeof (struct autovec), KM_SLEEP);
796 	INIT_AUTOVEC(mem, intr_id, f, arg1, arg2, ticksp, ipl, dip);
797 	if (vecp->v_type == APIX_TYPE_FIXED && apic_level_intr[vecp->v_inum])
798 		mem->av_flags |= AV_PENTRY_LEVEL;
799 
800 	vecp->v_share++;
801 	vecp->v_pri = (ipl > vecp->v_pri) ? ipl : vecp->v_pri;
802 	if (vecp->v_autovect == NULL) {	/* Nothing on list - put it at head */
803 		vecp->v_autovect = mem;
804 		return;
805 	}
806 
807 	if (DDI_INTR_IS_MSI_OR_MSIX(vecp->v_type)) {	/* MSI/X */
808 		ASSERT(vecp->v_share == 1);	/* No sharing for MSI/X */
809 
810 		INIT_AUTOVEC(vecp->v_autovect, intr_id, f, arg1, arg2, ticksp,
811 		    ipl, dip);
812 		prep = vecp->v_autovect->av_link;
813 		vecp->v_autovect->av_link = NULL;
814 
815 		/* Free the following autovect chain */
816 		while (prep != NULL) {
817 			ASSERT(prep->av_vector == NULL);
818 
819 			p = prep;
820 			prep = prep->av_link;
821 			kmem_free(p, sizeof (struct autovec));
822 		}
823 
824 		kmem_free(mem, sizeof (struct autovec));
825 		return;
826 	}
827 
828 	/* find where it goes in list */
829 	prep = NULL;
830 	for (p = vecp->v_autovect; p != NULL; p = p->av_link) {
831 		if (p->av_vector && p->av_prilevel <= ipl)
832 			break;
833 		prep = p;
834 	}
835 	if (prep != NULL) {
836 		if (prep->av_vector == NULL) {	/* freed struct available */
837 			INIT_AUTOVEC(prep, intr_id, f, arg1, arg2,
838 			    ticksp, ipl, dip);
839 			prep->av_flags = mem->av_flags;
840 			kmem_free(mem, sizeof (struct autovec));
841 			return;
842 		}
843 
844 		mem->av_link = prep->av_link;
845 		prep->av_link = mem;
846 	} else {
847 		/* insert new intpt at beginning of chain */
848 		mem->av_link = vecp->v_autovect;
849 		vecp->v_autovect = mem;
850 	}
851 }
852 
853 /*
854  * After having made a change to an autovector list, wait until we have
855  * seen specified cpu not executing an interrupt at that level--so we
856  * know our change has taken effect completely (no old state in registers,
857  * etc).
858  */
859 #define	APIX_CPU_ENABLED(_cp) \
860 	(quiesce_active == 0 && \
861 	(((_cp)->cpu_flags & (CPU_QUIESCED|CPU_OFFLINE)) == 0))
862 
863 static void
864 apix_wait_till_seen(processorid_t cpuid, int ipl)
865 {
866 	struct cpu *cp = cpu[cpuid];
867 
868 	if (cp == NULL || LOCAL_WITH_INTR_DISABLED(cpuid))
869 		return;
870 
871 	/*
872 	 * Don't wait if the CPU is quiesced or offlined. This can happen
873 	 * when a CPU is running pause thread but hardware triggered an
874 	 * interrupt and the interrupt gets queued.
875 	 */
876 	for (;;) {
877 		if (!INTR_ACTIVE((volatile struct cpu *)cpu[cpuid], ipl) &&
878 		    (!APIX_CPU_ENABLED(cp) ||
879 		    !INTR_PENDING((volatile apix_impl_t *)apixs[cpuid], ipl)))
880 			return;
881 	}
882 }
883 
884 static void
885 apix_remove_av(apix_vector_t *vecp, struct autovec *target)
886 {
887 	int hi_pri = 0;
888 	struct autovec *p;
889 
890 	if (target == NULL)
891 		return;
892 
893 	APIC_VERBOSE(INTR, (CE_CONT, "apix_remove_av: dip %p, vector 0x%x, "
894 	    "cpu %d\n", (void *)target->av_dip, vecp->v_vector, vecp->v_cpuid));
895 
896 	for (p = vecp->v_autovect; p; p = p->av_link) {
897 		if (p == target || p->av_vector == NULL)
898 			continue;
899 		hi_pri = (p->av_prilevel > hi_pri) ? p->av_prilevel : hi_pri;
900 	}
901 
902 	vecp->v_share--;
903 	vecp->v_pri = hi_pri;
904 
905 	/*
906 	 * This drops the handler from the chain, it can no longer be called.
907 	 * However, there is no guarantee that the handler is not currently
908 	 * still executing.
909 	 */
910 	target->av_vector = NULL;
911 	/*
912 	 * There is a race where we could be just about to pick up the ticksp
913 	 * pointer to increment it after returning from the service routine
914 	 * in av_dispatch_autovect.  Rather than NULL it out let's just point
915 	 * it off to something safe so that any final tick update attempt
916 	 * won't fault.
917 	 */
918 	target->av_ticksp = &dummy_tick;
919 	apix_wait_till_seen(vecp->v_cpuid, target->av_prilevel);
920 }
921 
922 static struct autovec *
923 apix_find_av(apix_vector_t *vecp, void *intr_id, avfunc f)
924 {
925 	struct autovec *p;
926 
927 	for (p = vecp->v_autovect; p; p = p->av_link) {
928 		if ((p->av_vector == f) && (p->av_intr_id == intr_id)) {
929 			/* found the handler */
930 			return (p);
931 		}
932 	}
933 
934 	return (NULL);
935 }
936 
937 static apix_vector_t *
938 apix_find_vector_by_avintr(void *intr_id, avfunc f)
939 {
940 	apix_vector_t *vecp;
941 	processorid_t n;
942 	uchar_t v;
943 
944 	for (n = 0; n < apic_nproc; n++) {
945 		if (!apix_is_cpu_enabled(n))
946 			continue;
947 
948 		for (v = APIX_AVINTR_MIN; v <= APIX_AVINTR_MIN; v++) {
949 			vecp = xv_vector(n, v);
950 			if (vecp == NULL ||
951 			    vecp->v_state <= APIX_STATE_OBSOLETED)
952 				continue;
953 
954 			if (apix_find_av(vecp, intr_id, f) != NULL)
955 				return (vecp);
956 		}
957 	}
958 
959 	return (NULL);
960 }
961 
962 /*
963  * Add interrupt service routine.
964  *
965  * For legacy interrupts (HPET timer, ACPI SCI), the vector is actually
966  * IRQ no. A vector is then allocated. Otherwise, the vector is already
967  * allocated. The input argument virt_vect is virtual vector of format
968  * APIX_VIRTVEC_VECTOR(cpuid, vector).
969  *
970  * Return 1 on success, 0 on failure.
971  */
972 int
973 apix_add_avintr(void *intr_id, int ipl, avfunc xxintr, char *name,
974     int virt_vect, caddr_t arg1, caddr_t arg2, uint64_t *ticksp,
975     dev_info_t *dip)
976 {
977 	int cpuid;
978 	uchar_t v = (uchar_t)APIX_VIRTVEC_VECTOR(virt_vect);
979 	apix_vector_t *vecp;
980 
981 	if (xxintr == NULL) {
982 		cmn_err(CE_WARN, "Attempt to add null for %s "
983 		    "on vector 0x%x,0x%x", name,
984 		    APIX_VIRTVEC_CPU(virt_vect),
985 		    APIX_VIRTVEC_VECTOR(virt_vect));
986 		return (0);
987 	}
988 
989 	if (v >= APIX_IPI_MIN)	/* IPIs */
990 		return (apix_add_ipi(ipl, xxintr, name, v, arg1, arg2));
991 
992 	if (!APIX_IS_VIRTVEC(virt_vect)) {	/* got irq */
993 		int irqno = virt_vect;
994 		int inum = GET_INTR_INUM(intr_id);
995 
996 		/*
997 		 * Senarios include:
998 		 * a. add_avintr() is called before irqp initialized (legacy)
999 		 * b. irqp is initialized, vector is not allocated (fixed)
1000 		 * c. irqp is initialized, vector is allocated (fixed & shared)
1001 		 */
1002 		if ((vecp = apix_alloc_intx(dip, inum, irqno)) == NULL)
1003 			return (0);
1004 
1005 		cpuid = vecp->v_cpuid;
1006 		v = vecp->v_vector;
1007 		virt_vect = APIX_VIRTVECTOR(cpuid, v);
1008 	} else {	/* got virtual vector */
1009 		cpuid = APIX_VIRTVEC_CPU(virt_vect);
1010 		vecp = xv_vector(cpuid, v);
1011 		ASSERT(vecp != NULL);
1012 	}
1013 
1014 	lock_set(&apix_lock);
1015 	if (vecp->v_state <= APIX_STATE_OBSOLETED) {
1016 		vecp = NULL;
1017 
1018 		/*
1019 		 * Basically the allocated but not enabled interrupts
1020 		 * will not get re-targeted. But MSIs in allocated state
1021 		 * could be re-targeted due to group re-targeting.
1022 		 */
1023 		if (intr_id != NULL && dip != NULL) {
1024 			ddi_intr_handle_impl_t *hdlp = intr_id;
1025 			vecp = apix_get_dev_map(dip, hdlp->ih_inum,
1026 			    hdlp->ih_type);
1027 			ASSERT(vecp->v_state == APIX_STATE_ALLOCED);
1028 		}
1029 		if (vecp == NULL) {
1030 			lock_clear(&apix_lock);
1031 			cmn_err(CE_WARN, "Invalid interrupt 0x%x,0x%x "
1032 			    " for %p to add", cpuid, v, intr_id);
1033 			return (0);
1034 		}
1035 		cpuid = vecp->v_cpuid;
1036 		virt_vect = APIX_VIRTVECTOR(cpuid, vecp->v_vector);
1037 	}
1038 
1039 	APIX_ENTER_CPU_LOCK(cpuid);
1040 	apix_insert_av(vecp, intr_id, xxintr, arg1, arg2, ticksp, ipl, dip);
1041 	APIX_LEAVE_CPU_LOCK(cpuid);
1042 
1043 	(void) apix_addspl(virt_vect, ipl, 0, 0);
1044 
1045 	lock_clear(&apix_lock);
1046 
1047 	return (1);
1048 }
1049 
1050 /*
1051  * Remove avintr
1052  *
1053  * For fixed, if it's the last one of shared interrupts, free the vector.
1054  * For msi/x, only disable the interrupt but not free the vector, which
1055  * is freed by PSM_XXX_FREE_XXX.
1056  */
1057 void
1058 apix_rem_avintr(void *intr_id, int ipl, avfunc xxintr, int virt_vect)
1059 {
1060 	avfunc f;
1061 	apix_vector_t *vecp;
1062 	struct autovec *avp;
1063 	processorid_t cpuid;
1064 
1065 	if ((f = xxintr) == NULL)
1066 		return;
1067 
1068 	lock_set(&apix_lock);
1069 
1070 	if (!APIX_IS_VIRTVEC(virt_vect)) {	/* got irq */
1071 		vecp = apix_intx_get_vector(virt_vect);
1072 		virt_vect = APIX_VIRTVECTOR(vecp->v_cpuid, vecp->v_vector);
1073 	} else	/* got virtual vector */
1074 		vecp = xv_vector(APIX_VIRTVEC_CPU(virt_vect),
1075 		    APIX_VIRTVEC_VECTOR(virt_vect));
1076 
1077 	if (vecp == NULL) {
1078 		lock_clear(&apix_lock);
1079 		cmn_err(CE_CONT, "Invalid interrupt 0x%x,0x%x to remove",
1080 		    APIX_VIRTVEC_CPU(virt_vect),
1081 		    APIX_VIRTVEC_VECTOR(virt_vect));
1082 		return;
1083 	}
1084 
1085 	if (vecp->v_state <= APIX_STATE_OBSOLETED ||
1086 	    ((avp = apix_find_av(vecp, intr_id, f)) == NULL)) {
1087 		/*
1088 		 * It's possible that the interrupt is rebound to a
1089 		 * different cpu before rem_avintr() is called. Search
1090 		 * through all vectors once it happens.
1091 		 */
1092 		if ((vecp = apix_find_vector_by_avintr(intr_id, f))
1093 		    == NULL) {
1094 			lock_clear(&apix_lock);
1095 			cmn_err(CE_CONT, "Unknown interrupt 0x%x,0x%x "
1096 			    "for %p to remove", APIX_VIRTVEC_CPU(virt_vect),
1097 			    APIX_VIRTVEC_VECTOR(virt_vect), intr_id);
1098 			return;
1099 		}
1100 		virt_vect = APIX_VIRTVECTOR(vecp->v_cpuid, vecp->v_vector);
1101 		avp = apix_find_av(vecp, intr_id, f);
1102 	}
1103 	cpuid = vecp->v_cpuid;
1104 
1105 	/* disable interrupt */
1106 	(void) apix_delspl(virt_vect, ipl, 0, 0);
1107 
1108 	/* remove ISR entry */
1109 	APIX_ENTER_CPU_LOCK(cpuid);
1110 	apix_remove_av(vecp, avp);
1111 	APIX_LEAVE_CPU_LOCK(cpuid);
1112 
1113 	lock_clear(&apix_lock);
1114 }
1115 
1116 /*
1117  * Device to vector mapping table
1118  */
1119 
1120 static void
1121 apix_clear_dev_map(dev_info_t *dip, int inum, int type)
1122 {
1123 	char *name;
1124 	major_t major;
1125 	apix_dev_vector_t *dvp, *prev = NULL;
1126 	int found = 0;
1127 
1128 	name = ddi_get_name(dip);
1129 	major = ddi_name_to_major(name);
1130 
1131 	mutex_enter(&apix_mutex);
1132 
1133 	for (dvp = apix_dev_vector[major]; dvp != NULL;
1134 	    prev = dvp, dvp = dvp->dv_next) {
1135 		if (dvp->dv_dip == dip && dvp->dv_inum == inum &&
1136 		    dvp->dv_type == type) {
1137 			found++;
1138 			break;
1139 		}
1140 	}
1141 
1142 	if (!found) {
1143 		mutex_exit(&apix_mutex);
1144 		return;
1145 	}
1146 
1147 	if (prev != NULL)
1148 		prev->dv_next = dvp->dv_next;
1149 
1150 	if (apix_dev_vector[major] == dvp)
1151 		apix_dev_vector[major] = dvp->dv_next;
1152 
1153 	dvp->dv_vector->v_devp = NULL;
1154 
1155 	mutex_exit(&apix_mutex);
1156 
1157 	kmem_free(dvp, sizeof (apix_dev_vector_t));
1158 }
1159 
1160 void
1161 apix_set_dev_map(apix_vector_t *vecp, dev_info_t *dip, int inum)
1162 {
1163 	apix_dev_vector_t *dvp;
1164 	char *name;
1165 	major_t major;
1166 	uint32_t found = 0;
1167 
1168 	ASSERT(dip != NULL);
1169 	name = ddi_get_name(dip);
1170 	major = ddi_name_to_major(name);
1171 
1172 	mutex_enter(&apix_mutex);
1173 
1174 	for (dvp = apix_dev_vector[major]; dvp != NULL;
1175 	    dvp = dvp->dv_next) {
1176 		if (dvp->dv_dip == dip && dvp->dv_inum == inum &&
1177 		    dvp->dv_type == vecp->v_type) {
1178 			found++;
1179 			break;
1180 		}
1181 	}
1182 
1183 	if (found == 0) {	/* not found */
1184 		dvp = kmem_zalloc(sizeof (apix_dev_vector_t), KM_SLEEP);
1185 		dvp->dv_dip = dip;
1186 		dvp->dv_inum = inum;
1187 		dvp->dv_type = vecp->v_type;
1188 
1189 		dvp->dv_next = apix_dev_vector[major];
1190 		apix_dev_vector[major] = dvp;
1191 	}
1192 	dvp->dv_vector = vecp;
1193 	vecp->v_devp = dvp;
1194 
1195 	mutex_exit(&apix_mutex);
1196 
1197 	DDI_INTR_IMPLDBG((CE_CONT, "apix_set_dev_map: dip=0x%p "
1198 	    "inum=0x%x  vector=0x%x/0x%x\n",
1199 	    (void *)dip, inum, vecp->v_cpuid, vecp->v_vector));
1200 }
1201 
1202 apix_vector_t *
1203 apix_get_dev_map(dev_info_t *dip, int inum, int type)
1204 {
1205 	char *name;
1206 	major_t major;
1207 	apix_dev_vector_t *dvp;
1208 	apix_vector_t *vecp;
1209 
1210 	name = ddi_get_name(dip);
1211 	if ((major = ddi_name_to_major(name)) == DDI_MAJOR_T_NONE)
1212 		return (NULL);
1213 
1214 	mutex_enter(&apix_mutex);
1215 	for (dvp = apix_dev_vector[major]; dvp != NULL;
1216 	    dvp = dvp->dv_next) {
1217 		if (dvp->dv_dip == dip && dvp->dv_inum == inum &&
1218 		    dvp->dv_type == type) {
1219 			vecp = dvp->dv_vector;
1220 			mutex_exit(&apix_mutex);
1221 			return (vecp);
1222 		}
1223 	}
1224 	mutex_exit(&apix_mutex);
1225 
1226 	return (NULL);
1227 }
1228 
1229 /*
1230  * Get minimum inum for specified device, used for MSI
1231  */
1232 int
1233 apix_get_min_dev_inum(dev_info_t *dip, int type)
1234 {
1235 	char *name;
1236 	major_t major;
1237 	apix_dev_vector_t *dvp;
1238 	int inum = -1;
1239 
1240 	name = ddi_get_name(dip);
1241 	major = ddi_name_to_major(name);
1242 
1243 	mutex_enter(&apix_mutex);
1244 	for (dvp = apix_dev_vector[major]; dvp != NULL;
1245 	    dvp = dvp->dv_next) {
1246 		if (dvp->dv_dip == dip && dvp->dv_type == type) {
1247 			if (inum == -1)
1248 				inum = dvp->dv_inum;
1249 			else
1250 				inum = (dvp->dv_inum < inum) ?
1251 				    dvp->dv_inum : inum;
1252 		}
1253 	}
1254 	mutex_exit(&apix_mutex);
1255 
1256 	return (inum);
1257 }
1258 
1259 int
1260 apix_get_max_dev_inum(dev_info_t *dip, int type)
1261 {
1262 	char *name;
1263 	major_t major;
1264 	apix_dev_vector_t *dvp;
1265 	int inum = -1;
1266 
1267 	name = ddi_get_name(dip);
1268 	major = ddi_name_to_major(name);
1269 
1270 	mutex_enter(&apix_mutex);
1271 	for (dvp = apix_dev_vector[major]; dvp != NULL;
1272 	    dvp = dvp->dv_next) {
1273 		if (dvp->dv_dip == dip && dvp->dv_type == type) {
1274 			if (inum == -1)
1275 				inum = dvp->dv_inum;
1276 			else
1277 				inum = (dvp->dv_inum > inum) ?
1278 				    dvp->dv_inum : inum;
1279 		}
1280 	}
1281 	mutex_exit(&apix_mutex);
1282 
1283 	return (inum);
1284 }
1285 
1286 /*
1287  * Major to cpu binding, for INTR_ROUND_ROBIN_WITH_AFFINITY cpu
1288  * binding policy
1289  */
1290 
1291 static uint32_t
1292 apix_get_dev_binding(dev_info_t *dip)
1293 {
1294 	major_t major;
1295 	char *name;
1296 	uint32_t cpu = IRQ_UNINIT;
1297 
1298 	name = ddi_get_name(dip);
1299 	major = ddi_name_to_major(name);
1300 	if (major < devcnt) {
1301 		mutex_enter(&apix_mutex);
1302 		cpu = apix_major_to_cpu[major];
1303 		mutex_exit(&apix_mutex);
1304 	}
1305 
1306 	return (cpu);
1307 }
1308 
1309 static void
1310 apix_set_dev_binding(dev_info_t *dip, uint32_t cpu)
1311 {
1312 	major_t major;
1313 	char *name;
1314 
1315 	/* setup major to cpu mapping */
1316 	name = ddi_get_name(dip);
1317 	major = ddi_name_to_major(name);
1318 	if (apix_major_to_cpu[major] == IRQ_UNINIT) {
1319 		mutex_enter(&apix_mutex);
1320 		apix_major_to_cpu[major] = cpu;
1321 		mutex_exit(&apix_mutex);
1322 	}
1323 }
1324 
1325 /*
1326  * return the cpu to which this intr should be bound.
1327  * Check properties or any other mechanism to see if user wants it
1328  * bound to a specific CPU. If so, return the cpu id with high bit set.
1329  * If not, use the policy to choose a cpu and return the id.
1330  */
1331 uint32_t
1332 apix_bind_cpu(dev_info_t *dip)
1333 {
1334 	int	instance, instno, prop_len, bind_cpu, count;
1335 	uint_t	i, rc;
1336 	major_t	major;
1337 	char	*name, *drv_name, *prop_val, *cptr;
1338 	char	prop_name[32];
1339 
1340 	lock_set(&apix_lock);
1341 
1342 	if (apic_intr_policy == INTR_LOWEST_PRIORITY) {
1343 		cmn_err(CE_WARN, "apix: unsupported interrupt binding policy "
1344 		    "LOWEST PRIORITY, use ROUND ROBIN instead");
1345 		apic_intr_policy = INTR_ROUND_ROBIN;
1346 	}
1347 
1348 	if (apic_nproc == 1) {
1349 		lock_clear(&apix_lock);
1350 		return (0);
1351 	}
1352 
1353 	drv_name = NULL;
1354 	rc = DDI_PROP_NOT_FOUND;
1355 	major = (major_t)-1;
1356 	if (dip != NULL) {
1357 		name = ddi_get_name(dip);
1358 		major = ddi_name_to_major(name);
1359 		drv_name = ddi_major_to_name(major);
1360 		instance = ddi_get_instance(dip);
1361 		if (apic_intr_policy == INTR_ROUND_ROBIN_WITH_AFFINITY) {
1362 			bind_cpu = apix_get_dev_binding(dip);
1363 			if (bind_cpu != IRQ_UNINIT) {
1364 				lock_clear(&apix_lock);
1365 				return (bind_cpu);
1366 			}
1367 		}
1368 		/*
1369 		 * search for "drvname"_intpt_bind_cpus property first, the
1370 		 * syntax of the property should be "a[,b,c,...]" where
1371 		 * instance 0 binds to cpu a, instance 1 binds to cpu b,
1372 		 * instance 3 binds to cpu c...
1373 		 * ddi_getlongprop() will search /option first, then /
1374 		 * if "drvname"_intpt_bind_cpus doesn't exist, then find
1375 		 * intpt_bind_cpus property.  The syntax is the same, and
1376 		 * it applies to all the devices if its "drvname" specific
1377 		 * property doesn't exist
1378 		 */
1379 		(void) strcpy(prop_name, drv_name);
1380 		(void) strcat(prop_name, "_intpt_bind_cpus");
1381 		rc = ddi_getlongprop(DDI_DEV_T_ANY, dip, 0, prop_name,
1382 		    (caddr_t)&prop_val, &prop_len);
1383 		if (rc != DDI_PROP_SUCCESS) {
1384 			rc = ddi_getlongprop(DDI_DEV_T_ANY, dip, 0,
1385 			    "intpt_bind_cpus", (caddr_t)&prop_val, &prop_len);
1386 		}
1387 	}
1388 	if (rc == DDI_PROP_SUCCESS) {
1389 		for (i = count = 0; i < (prop_len - 1); i++)
1390 			if (prop_val[i] == ',')
1391 				count++;
1392 		if (prop_val[i-1] != ',')
1393 			count++;
1394 		/*
1395 		 * if somehow the binding instances defined in the
1396 		 * property are not enough for this instno., then
1397 		 * reuse the pattern for the next instance until
1398 		 * it reaches the requested instno
1399 		 */
1400 		instno = instance % count;
1401 		i = 0;
1402 		cptr = prop_val;
1403 		while (i < instno)
1404 			if (*cptr++ == ',')
1405 				i++;
1406 		bind_cpu = stoi(&cptr);
1407 		kmem_free(prop_val, prop_len);
1408 		/* if specific cpu is bogus, then default to cpu 0 */
1409 		if (bind_cpu >= apic_nproc) {
1410 			cmn_err(CE_WARN, "apix: %s=%s: CPU %d not present",
1411 			    prop_name, prop_val, bind_cpu);
1412 			bind_cpu = 0;
1413 		} else {
1414 			/* indicate that we are bound at user request */
1415 			bind_cpu |= IRQ_USER_BOUND;
1416 		}
1417 		/*
1418 		 * no need to check apic_cpus[].aci_status, if specific cpu is
1419 		 * not up, then post_cpu_start will handle it.
1420 		 */
1421 	} else {
1422 		bind_cpu = apic_get_next_bind_cpu();
1423 	}
1424 
1425 	lock_clear(&apix_lock);
1426 
1427 	return ((uint32_t)bind_cpu);
1428 }
1429 
1430 static boolean_t
1431 apix_is_cpu_enabled(processorid_t cpuid)
1432 {
1433 	apic_cpus_info_t *cpu_infop;
1434 
1435 	cpu_infop = &apic_cpus[cpuid];
1436 
1437 	if ((cpu_infop->aci_status & APIC_CPU_INTR_ENABLE) == 0)
1438 		return (B_FALSE);
1439 
1440 	return (B_TRUE);
1441 }
1442 
1443 /*
1444  * Must be called with apix_lock held. This function can be
1445  * called from above lock level by apix_intr_redistribute().
1446  *
1447  * Arguments:
1448  *    vecp  : Vector to be rebound
1449  *    tocpu : Target cpu. IRQ_UNINIT means target is vecp->v_cpuid.
1450  *    count : Number of continuous vectors
1451  *
1452  * Return new vector being bound to
1453  */
1454 apix_vector_t *
1455 apix_rebind(apix_vector_t *vecp, processorid_t newcpu, int count)
1456 {
1457 	apix_vector_t *newp, *oldp;
1458 	processorid_t oldcpu = vecp->v_cpuid;
1459 	uchar_t newvec, oldvec = vecp->v_vector;
1460 	int i;
1461 
1462 	ASSERT(LOCK_HELD(&apix_lock) && count > 0);
1463 
1464 	if (!apix_is_cpu_enabled(newcpu))
1465 		return (NULL);
1466 
1467 	if (vecp->v_cpuid == newcpu) 	/* rebind to the same cpu */
1468 		return (vecp);
1469 
1470 	APIX_ENTER_CPU_LOCK(oldcpu);
1471 	APIX_ENTER_CPU_LOCK(newcpu);
1472 
1473 	/* allocate vector */
1474 	if (count == 1)
1475 		newp = apix_alloc_vector_oncpu(newcpu, NULL, 0, vecp->v_type);
1476 	else {
1477 		ASSERT(vecp->v_type == APIX_TYPE_MSI);
1478 		newp = apix_alloc_nvectors_oncpu(newcpu, NULL, 0, count,
1479 		    vecp->v_type);
1480 	}
1481 	if (newp == NULL) {
1482 		APIX_LEAVE_CPU_LOCK(newcpu);
1483 		APIX_LEAVE_CPU_LOCK(oldcpu);
1484 		return (NULL);
1485 	}
1486 
1487 	newvec = newp->v_vector;
1488 	apix_dup_vectors(vecp, newp, count);
1489 
1490 	APIX_LEAVE_CPU_LOCK(newcpu);
1491 	APIX_LEAVE_CPU_LOCK(oldcpu);
1492 
1493 	if (!DDI_INTR_IS_MSI_OR_MSIX(vecp->v_type)) {
1494 		ASSERT(count == 1);
1495 		if (apix_intx_rebind(vecp->v_inum, newcpu, newvec) != 0) {
1496 			struct autovec *avp;
1497 			int inum;
1498 
1499 			/* undo duplication */
1500 			APIX_ENTER_CPU_LOCK(oldcpu);
1501 			APIX_ENTER_CPU_LOCK(newcpu);
1502 			for (avp = newp->v_autovect; avp != NULL;
1503 			    avp = avp->av_link) {
1504 				if (avp->av_dip != NULL) {
1505 					inum = GET_INTR_INUM(avp->av_intr_id);
1506 					apix_set_dev_map(vecp, avp->av_dip,
1507 					    inum);
1508 				}
1509 				apix_remove_av(newp, avp);
1510 			}
1511 			apix_cleanup_vector(newp);
1512 			APIX_LEAVE_CPU_LOCK(newcpu);
1513 			APIX_LEAVE_CPU_LOCK(oldcpu);
1514 			APIC_VERBOSE(REBIND, (CE_CONT, "apix: rebind fixed "
1515 			    "interrupt 0x%x to cpu %d failed\n",
1516 			    vecp->v_inum, newcpu));
1517 			return (NULL);
1518 		}
1519 
1520 		APIX_ENTER_CPU_LOCK(oldcpu);
1521 		(void) apix_obsolete_vector(vecp);
1522 		APIX_LEAVE_CPU_LOCK(oldcpu);
1523 		APIC_VERBOSE(REBIND, (CE_CONT, "apix: rebind fixed interrupt"
1524 		    " 0x%x/0x%x to 0x%x/0x%x\n",
1525 		    oldcpu, oldvec, newcpu, newvec));
1526 		return (newp);
1527 	}
1528 
1529 	for (i = 0; i < count; i++) {
1530 		oldp = xv_vector(oldcpu, oldvec + i);
1531 		newp = xv_vector(newcpu, newvec + i);
1532 
1533 		if (newp->v_share > 0) {
1534 			APIX_SET_REBIND_INFO(oldp, newp);
1535 
1536 			apix_enable_vector(newp);
1537 
1538 			APIX_CLR_REBIND_INFO();
1539 		}
1540 
1541 		APIX_ENTER_CPU_LOCK(oldcpu);
1542 		(void) apix_obsolete_vector(oldp);
1543 		APIX_LEAVE_CPU_LOCK(oldcpu);
1544 	}
1545 	APIC_VERBOSE(REBIND, (CE_CONT, "apix: rebind vector 0x%x/0x%x "
1546 	    "to 0x%x/0x%x, count=%d\n",
1547 	    oldcpu, oldvec, newcpu, newvec, count));
1548 
1549 	return (xv_vector(newcpu, newvec));
1550 }
1551 
1552 /*
1553  * Senarios include:
1554  * a. add_avintr() is called before irqp initialized (legacy)
1555  * b. irqp is initialized, vector is not allocated (fixed interrupts)
1556  * c. irqp is initialized, vector is allocated (shared interrupts)
1557  */
1558 apix_vector_t *
1559 apix_alloc_intx(dev_info_t *dip, int inum, int irqno)
1560 {
1561 	apic_irq_t *irqp;
1562 	apix_vector_t *vecp;
1563 
1564 	/*
1565 	 * Allocate IRQ. Caller is later responsible for the
1566 	 * initialization
1567 	 */
1568 	mutex_enter(&airq_mutex);
1569 	if ((irqp = apic_irq_table[irqno]) == NULL) {
1570 		/* allocate irq */
1571 		irqp = kmem_zalloc(sizeof (apic_irq_t), KM_SLEEP);
1572 		irqp->airq_mps_intr_index = FREE_INDEX;
1573 		apic_irq_table[irqno] = irqp;
1574 	}
1575 	if (irqp->airq_mps_intr_index == FREE_INDEX) {
1576 		irqp->airq_mps_intr_index = DEFAULT_INDEX;
1577 		irqp->airq_cpu = IRQ_UNINIT;
1578 		irqp->airq_origirq = (uchar_t)irqno;
1579 	}
1580 
1581 	mutex_exit(&airq_mutex);
1582 
1583 	/*
1584 	 * allocate vector
1585 	 */
1586 	if (irqp->airq_cpu == IRQ_UNINIT) {
1587 		uint32_t bindcpu, cpuid;
1588 
1589 		/* select cpu by system policy */
1590 		bindcpu = apix_bind_cpu(dip);
1591 		cpuid = bindcpu & ~IRQ_USER_BOUND;
1592 
1593 		/* allocate vector */
1594 		APIX_ENTER_CPU_LOCK(cpuid);
1595 
1596 		if ((vecp = apix_alloc_vector_oncpu(bindcpu, dip, inum,
1597 		    APIX_TYPE_FIXED)) == NULL) {
1598 			cmn_err(CE_WARN, "No interrupt vector for irq %x",
1599 			    irqno);
1600 			APIX_LEAVE_CPU_LOCK(cpuid);
1601 			return (NULL);
1602 		}
1603 		vecp->v_inum = irqno;
1604 		vecp->v_flags |= APIX_VECT_MASKABLE;
1605 
1606 		apix_intx_set_vector(irqno, vecp->v_cpuid, vecp->v_vector);
1607 
1608 		APIX_LEAVE_CPU_LOCK(cpuid);
1609 	} else {
1610 		vecp = xv_vector(irqp->airq_cpu, irqp->airq_vector);
1611 		ASSERT(!IS_VECT_FREE(vecp));
1612 
1613 		if (dip != NULL)
1614 			apix_set_dev_map(vecp, dip, inum);
1615 	}
1616 
1617 	if ((dip != NULL) &&
1618 	    (apic_intr_policy == INTR_ROUND_ROBIN_WITH_AFFINITY) &&
1619 	    ((vecp->v_flags & APIX_VECT_USER_BOUND) == 0))
1620 		apix_set_dev_binding(dip, vecp->v_cpuid);
1621 
1622 	apix_dprint_vector(vecp, dip, 1);
1623 
1624 	return (vecp);
1625 }
1626 
1627 int
1628 apix_alloc_msi(dev_info_t *dip, int inum, int count, int behavior)
1629 {
1630 	int i, cap_ptr, rcount = count;
1631 	apix_vector_t *vecp;
1632 	processorid_t bindcpu, cpuid;
1633 	ushort_t msi_ctrl;
1634 	ddi_acc_handle_t handle;
1635 
1636 	DDI_INTR_IMPLDBG((CE_CONT, "apix_alloc_msi_vectors: dip=0x%p "
1637 	    "inum=0x%x  count=0x%x behavior=%d\n",
1638 	    (void *)dip, inum, count, behavior));
1639 
1640 	if (count > 1) {
1641 		if (behavior == DDI_INTR_ALLOC_STRICT &&
1642 		    apic_multi_msi_enable == 0)
1643 			return (0);
1644 		if (apic_multi_msi_enable == 0)
1645 			count = 1;
1646 	}
1647 
1648 	/* Check whether it supports per-vector masking */
1649 	cap_ptr = i_ddi_get_msi_msix_cap_ptr(dip);
1650 	handle = i_ddi_get_pci_config_handle(dip);
1651 	msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
1652 
1653 	/* bind to cpu */
1654 	bindcpu = apix_bind_cpu(dip);
1655 	cpuid = bindcpu & ~IRQ_USER_BOUND;
1656 
1657 	/* if not ISP2, then round it down */
1658 	if (!ISP2(rcount))
1659 		rcount = 1 << (highbit(rcount) - 1);
1660 
1661 	APIX_ENTER_CPU_LOCK(cpuid);
1662 	for (vecp = NULL; rcount > 0; rcount >>= 1) {
1663 		vecp = apix_alloc_nvectors_oncpu(bindcpu, dip, inum, rcount,
1664 		    APIX_TYPE_MSI);
1665 		if (vecp != NULL || behavior == DDI_INTR_ALLOC_STRICT)
1666 			break;
1667 	}
1668 	for (i = 0; vecp && i < rcount; i++)
1669 		xv_vector(vecp->v_cpuid, vecp->v_vector + i)->v_flags |=
1670 		    (msi_ctrl & PCI_MSI_PVM_MASK) ? APIX_VECT_MASKABLE : 0;
1671 	APIX_LEAVE_CPU_LOCK(cpuid);
1672 	if (vecp == NULL) {
1673 		APIC_VERBOSE(INTR, (CE_CONT,
1674 		    "apix_alloc_msi: no %d cont vectors found on cpu 0x%x\n",
1675 		    count, bindcpu));
1676 		return (0);
1677 	}
1678 
1679 	/* major to cpu binding */
1680 	if ((apic_intr_policy == INTR_ROUND_ROBIN_WITH_AFFINITY) &&
1681 	    ((vecp->v_flags & APIX_VECT_USER_BOUND) == 0))
1682 		apix_set_dev_binding(dip, vecp->v_cpuid);
1683 
1684 	apix_dprint_vector(vecp, dip, rcount);
1685 
1686 	return (rcount);
1687 }
1688 
1689 int
1690 apix_alloc_msix(dev_info_t *dip, int inum, int count, int behavior)
1691 {
1692 	apix_vector_t *vecp;
1693 	processorid_t bindcpu, cpuid;
1694 	int i;
1695 
1696 	for (i = 0; i < count; i++) {
1697 		/* select cpu by system policy */
1698 		bindcpu = apix_bind_cpu(dip);
1699 		cpuid = bindcpu & ~IRQ_USER_BOUND;
1700 
1701 		/* allocate vector */
1702 		APIX_ENTER_CPU_LOCK(cpuid);
1703 		if ((vecp = apix_alloc_vector_oncpu(bindcpu, dip, inum + i,
1704 		    APIX_TYPE_MSIX)) == NULL) {
1705 			APIX_LEAVE_CPU_LOCK(cpuid);
1706 			APIC_VERBOSE(INTR, (CE_CONT, "apix_alloc_msix: "
1707 			    "allocate msix for device dip=%p, inum=%d on"
1708 			    " cpu %d failed", (void *)dip, inum + i, bindcpu));
1709 			break;
1710 		}
1711 		vecp->v_flags |= APIX_VECT_MASKABLE;
1712 		APIX_LEAVE_CPU_LOCK(cpuid);
1713 
1714 		/* major to cpu mapping */
1715 		if ((i == 0) &&
1716 		    (apic_intr_policy == INTR_ROUND_ROBIN_WITH_AFFINITY) &&
1717 		    ((vecp->v_flags & APIX_VECT_USER_BOUND) == 0))
1718 			apix_set_dev_binding(dip, vecp->v_cpuid);
1719 
1720 		apix_dprint_vector(vecp, dip, 1);
1721 	}
1722 
1723 	if (i < count && behavior == DDI_INTR_ALLOC_STRICT) {
1724 		APIC_VERBOSE(INTR, (CE_WARN, "apix_alloc_msix: "
1725 		    "strictly allocate %d vectors failed, got %d\n",
1726 		    count, i));
1727 		apix_free_vectors(dip, inum, i, APIX_TYPE_MSIX);
1728 		i = 0;
1729 	}
1730 
1731 	return (i);
1732 }
1733 
1734 /*
1735  * A rollback free for vectors allocated by apix_alloc_xxx().
1736  */
1737 void
1738 apix_free_vectors(dev_info_t *dip, int inum, int count, int type)
1739 {
1740 	int i, cpuid;
1741 	apix_vector_t *vecp;
1742 
1743 	DDI_INTR_IMPLDBG((CE_CONT, "apix_free_vectors: dip: %p inum: %x "
1744 	    "count: %x type: %x\n",
1745 	    (void *)dip, inum, count, type));
1746 
1747 	lock_set(&apix_lock);
1748 
1749 	for (i = 0; i < count; i++, inum++) {
1750 		if ((vecp = apix_get_dev_map(dip, inum, type)) == NULL) {
1751 			lock_clear(&apix_lock);
1752 			DDI_INTR_IMPLDBG((CE_CONT, "apix_free_vectors: "
1753 			    "dip=0x%p inum=0x%x type=0x%x apix_find_intr() "
1754 			    "failed\n", (void *)dip, inum, type));
1755 			continue;
1756 		}
1757 
1758 		APIX_ENTER_CPU_LOCK(vecp->v_cpuid);
1759 		cpuid = vecp->v_cpuid;
1760 
1761 		DDI_INTR_IMPLDBG((CE_CONT, "apix_free_vectors: "
1762 		    "dip=0x%p inum=0x%x type=0x%x vector 0x%x (share %d)\n",
1763 		    (void *)dip, inum, type, vecp->v_vector, vecp->v_share));
1764 
1765 		/* tear down device interrupt to vector mapping */
1766 		apix_clear_dev_map(dip, inum, type);
1767 
1768 		if (vecp->v_type == APIX_TYPE_FIXED) {
1769 			if (vecp->v_share > 0) {	/* share IRQ line */
1770 				APIX_LEAVE_CPU_LOCK(cpuid);
1771 				continue;
1772 			}
1773 
1774 			/* Free apic_irq_table entry */
1775 			apix_intx_free(vecp->v_inum);
1776 		}
1777 
1778 		/* free vector */
1779 		apix_cleanup_vector(vecp);
1780 
1781 		APIX_LEAVE_CPU_LOCK(cpuid);
1782 	}
1783 
1784 	lock_clear(&apix_lock);
1785 }
1786 
1787 /*
1788  * Must be called with apix_lock held
1789  */
1790 apix_vector_t *
1791 apix_setup_io_intr(apix_vector_t *vecp)
1792 {
1793 	processorid_t bindcpu;
1794 	int ret;
1795 
1796 	ASSERT(LOCK_HELD(&apix_lock));
1797 
1798 	/*
1799 	 * Interrupts are enabled on the CPU, programme IOAPIC RDT
1800 	 * entry or MSI/X address/data to enable the interrupt.
1801 	 */
1802 	if (apix_is_cpu_enabled(vecp->v_cpuid)) {
1803 		apix_enable_vector(vecp);
1804 		return (vecp);
1805 	}
1806 
1807 	/*
1808 	 * CPU is not up or interrupts are disabled. Fall back to the
1809 	 * first avialable CPU.
1810 	 */
1811 	bindcpu = apic_find_cpu(APIC_CPU_INTR_ENABLE);
1812 
1813 	if (vecp->v_type == APIX_TYPE_MSI)
1814 		return (apix_grp_set_cpu(vecp, bindcpu, &ret));
1815 
1816 	return (apix_set_cpu(vecp, bindcpu, &ret));
1817 }
1818 
1819 /*
1820  * For interrupts which call add_avintr() before apic is initialized.
1821  * ioapix_setup_intr() will
1822  *   - allocate vector
1823  *   - copy over ISR
1824  */
1825 static void
1826 ioapix_setup_intr(int irqno, iflag_t *flagp)
1827 {
1828 	extern struct av_head autovect[];
1829 	apix_vector_t *vecp;
1830 	apic_irq_t *irqp;
1831 	uchar_t ioapicindex, ipin;
1832 	ulong_t iflag;
1833 	struct autovec *avp;
1834 
1835 	ioapicindex = acpi_find_ioapic(irqno);
1836 	ASSERT(ioapicindex != 0xFF);
1837 	ipin = irqno - apic_io_vectbase[ioapicindex];
1838 
1839 	mutex_enter(&airq_mutex);
1840 	irqp = apic_irq_table[irqno];
1841 
1842 	/*
1843 	 * The irq table entry shouldn't exist unless the interrupts are shared.
1844 	 * In that case, make sure it matches what we would initialize it to.
1845 	 */
1846 	if (irqp != NULL) {
1847 		ASSERT(irqp->airq_mps_intr_index == ACPI_INDEX);
1848 		ASSERT(irqp->airq_intin_no == ipin &&
1849 		    irqp->airq_ioapicindex == ioapicindex);
1850 		vecp = xv_vector(irqp->airq_cpu, irqp->airq_vector);
1851 		ASSERT(!IS_VECT_FREE(vecp));
1852 		mutex_exit(&airq_mutex);
1853 	} else {
1854 		irqp = kmem_zalloc(sizeof (apic_irq_t), KM_SLEEP);
1855 
1856 		irqp->airq_cpu = IRQ_UNINIT;
1857 		irqp->airq_origirq = (uchar_t)irqno;
1858 		irqp->airq_mps_intr_index = ACPI_INDEX;
1859 		irqp->airq_ioapicindex = ioapicindex;
1860 		irqp->airq_intin_no = ipin;
1861 		irqp->airq_iflag = *flagp;
1862 		irqp->airq_share++;
1863 
1864 		apic_irq_table[irqno] = irqp;
1865 		mutex_exit(&airq_mutex);
1866 
1867 		vecp = apix_alloc_intx(NULL, 0, irqno);
1868 	}
1869 
1870 	/* copy over autovect */
1871 	for (avp = autovect[irqno].avh_link; avp; avp = avp->av_link)
1872 		apix_insert_av(vecp, avp->av_intr_id, avp->av_vector,
1873 		    avp->av_intarg1, avp->av_intarg2, avp->av_ticksp,
1874 		    avp->av_prilevel, avp->av_dip);
1875 
1876 	/* Program I/O APIC */
1877 	iflag = intr_clear();
1878 	lock_set(&apix_lock);
1879 
1880 	(void) apix_setup_io_intr(vecp);
1881 
1882 	lock_clear(&apix_lock);
1883 	intr_restore(iflag);
1884 
1885 	APIC_VERBOSE_IOAPIC((CE_CONT, "apix: setup ioapic, irqno %x "
1886 	    "(ioapic %x, ipin %x) is bound to cpu %x, vector %x\n",
1887 	    irqno, ioapicindex, ipin, irqp->airq_cpu, irqp->airq_vector));
1888 }
1889 
1890 void
1891 ioapix_init_intr(int mask_apic)
1892 {
1893 	int ioapicindex;
1894 	int i, j;
1895 
1896 	/* mask interrupt vectors */
1897 	for (j = 0; j < apic_io_max && mask_apic; j++) {
1898 		int intin_max;
1899 
1900 		ioapicindex = j;
1901 		/* Bits 23-16 define the maximum redirection entries */
1902 		intin_max = (ioapic_read(ioapicindex, APIC_VERS_CMD) >> 16)
1903 		    & 0xff;
1904 		for (i = 0; i <= intin_max; i++)
1905 			ioapic_write(ioapicindex, APIC_RDT_CMD + 2 * i,
1906 			    AV_MASK);
1907 	}
1908 
1909 	/*
1910 	 * Hack alert: deal with ACPI SCI interrupt chicken/egg here
1911 	 */
1912 	if (apic_sci_vect > 0)
1913 		ioapix_setup_intr(apic_sci_vect, &apic_sci_flags);
1914 
1915 	/*
1916 	 * Hack alert: deal with ACPI HPET interrupt chicken/egg here.
1917 	 */
1918 	if (apic_hpet_vect > 0)
1919 		ioapix_setup_intr(apic_hpet_vect, &apic_hpet_flags);
1920 }
1921