xref: /illumos-gate/usr/src/uts/sun4u/os/mach_ddi_impl.c (revision 7c478bd95313f5f23a4c958a745db2134aa03244)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * sun4u specific DDI implementation
31  */
32 #include <sys/bootconf.h>
33 #include <sys/conf.h>
34 #include <sys/ddi_subrdefs.h>
35 #include <sys/ethernet.h>
36 #include <sys/idprom.h>
37 #include <sys/machsystm.h>
38 #include <sys/modhash.h>
39 #include <sys/promif.h>
40 #include <sys/prom_plat.h>
41 #include <sys/sunndi.h>
42 #include <sys/systeminfo.h>
43 #include <sys/fpu/fpusystm.h>
44 #include <sys/vm.h>
45 #include <sys/fs/dv_node.h>
46 #include <sys/fs/snode.h>
47 
48 /*
49  * Favored drivers of this implementation
50  * architecture.  These drivers MUST be present for
51  * the system to boot at all.
52  */
53 char *impl_module_list[] = {
54 	"rootnex",
55 	"options",
56 	"sad",		/* Referenced via init_tbl[] */
57 	"pseudo",
58 	"clone",
59 	"scsi_vhci",
60 	(char *)0
61 };
62 
63 /*
64  * These strings passed to not_serviced in locore.s
65  */
66 const char busname_ovec[] = "onboard ";
67 const char busname_svec[] = "SBus ";
68 const char busname_vec[] = "";
69 
70 
71 static uint64_t *intr_map_reg[32];
72 
73 /*
74  * Forward declarations
75  */
76 static int getlongprop_buf();
77 static int get_boardnum(int nid, dev_info_t *par);
78 
79 /*
80  * Check the status of the device node passed as an argument.
81  *
82  *	if ((status is OKAY) || (status is DISABLED))
83  *		return DDI_SUCCESS
84  *	else
85  *		print a warning and return DDI_FAILURE
86  */
87 /*ARGSUSED*/
88 int
89 check_status(int id, char *buf, dev_info_t *parent)
90 {
91 	char status_buf[64];
92 	char devtype_buf[OBP_MAXPROPNAME];
93 	char board_buf[32];
94 	char path[OBP_MAXPATHLEN];
95 	int boardnum;
96 	int retval = DDI_FAILURE;
97 	extern int status_okay(int, char *, int);
98 
99 	/*
100 	 * is the status okay?
101 	 */
102 	if (status_okay(id, status_buf, sizeof (status_buf)))
103 		return (DDI_SUCCESS);
104 
105 	/*
106 	 * a status property indicating bad memory will be associated
107 	 * with a node which has a "device_type" property with a value of
108 	 * "memory-controller". in this situation, return DDI_SUCCESS
109 	 */
110 	if (getlongprop_buf(id, OBP_DEVICETYPE, devtype_buf,
111 	    sizeof (devtype_buf)) > 0) {
112 		if (strcmp(devtype_buf, "memory-controller") == 0)
113 			retval = DDI_SUCCESS;
114 	}
115 
116 	/*
117 	 * get the full OBP pathname of this node
118 	 */
119 	if (prom_phandle_to_path((phandle_t)id, path, sizeof (path)) < 0)
120 		cmn_err(CE_WARN, "prom_phandle_to_path(%d) failed", id);
121 
122 	/*
123 	 * get the board number, if one exists
124 	 */
125 	if ((boardnum = get_boardnum(id, parent)) >= 0)
126 		(void) sprintf(board_buf, " on board %d", boardnum);
127 	else
128 		board_buf[0] = '\0';
129 
130 	/*
131 	 * print the status property information
132 	 */
133 	cmn_err(CE_WARN, "status '%s' for '%s'%s",
134 		status_buf, path, board_buf);
135 	return (retval);
136 }
137 
138 /*
139  * determine the board number associated with this nodeid
140  */
141 static int
142 get_boardnum(int nid, dev_info_t *par)
143 {
144 	int board_num;
145 
146 	if (prom_getprop((dnode_t)nid, OBP_BOARDNUM,
147 	    (caddr_t)&board_num) != -1)
148 		return (board_num);
149 
150 	/*
151 	 * Look at current node and up the parent chain
152 	 * till we find a node with an OBP_BOARDNUM.
153 	 */
154 	while (par) {
155 		nid = ddi_get_nodeid(par);
156 
157 		if (prom_getprop((dnode_t)nid, OBP_BOARDNUM,
158 		    (caddr_t)&board_num) != -1)
159 			return (board_num);
160 
161 		par = ddi_get_parent(par);
162 	}
163 	return (-1);
164 }
165 
166 /*
167  * Note that this routine does not take into account the endianness
168  * of the host or the device (or PROM) when retrieving properties.
169  */
170 static int
171 getlongprop_buf(int id, char *name, char *buf, int maxlen)
172 {
173 	int size;
174 
175 	size = prom_getproplen((dnode_t)id, name);
176 	if (size <= 0 || (size > maxlen - 1))
177 		return (-1);
178 
179 	if (-1 == prom_getprop((dnode_t)id, name, buf))
180 		return (-1);
181 
182 	/*
183 	 * Workaround for bugid 1085575 - OBP may return a "name" property
184 	 * without null terminating the string with '\0'.  When this occurs,
185 	 * append a '\0' and return (size + 1).
186 	 */
187 	if (strcmp("name", name) == 0) {
188 		if (buf[size - 1] != '\0') {
189 			buf[size] = '\0';
190 			size += 1;
191 		}
192 	}
193 
194 	return (size);
195 }
196 
197 /*
198  * Routines to set/get UPA slave only device interrupt mapping registers.
199  * set_intr_mapping_reg() is called by the UPA master to register the address
200  * of an interrupt mapping register. The upa id is that of the master. If
201  * this routine is called on behalf of a slave device, the framework
202  * determines the upa id of the slave based on that supplied by the master.
203  *
204  * get_intr_mapping_reg() is called by the UPA nexus driver on behalf
205  * of a child device to get and program the interrupt mapping register of
206  * one of it's child nodes.  It uses the upa id of the child device to
207  * index into a table of mapping registers.  If the routine is called on
208  * behalf of a slave device and the mapping register has not been set,
209  * the framework determines the devinfo node of the corresponding master
210  * nexus which owns the mapping register of the slave and installs that
211  * driver.  The device driver which owns the mapping register must call
212  * set_intr_mapping_reg() in its attach routine to register the slaves
213  * mapping register with the system.
214  */
215 void
216 set_intr_mapping_reg(int upaid, uint64_t *addr, int slave)
217 {
218 	int affin_upaid;
219 
220 	/* For UPA master devices, set the mapping reg addr and we're done */
221 	if (slave == 0) {
222 		intr_map_reg[upaid] = addr;
223 		return;
224 	}
225 
226 	/*
227 	 * If we get here, we're adding an entry for a UPA slave only device.
228 	 * The UPA id of the device which has affinity with that requesting,
229 	 * will be the device with the same UPA id minus the slave number.
230 	 * If the affin_upaid is negative, silently return to the caller.
231 	 */
232 	if ((affin_upaid = upaid - slave) < 0)
233 		return;
234 
235 	/*
236 	 * Load the address of the mapping register in the correct slot
237 	 * for the slave device.
238 	 */
239 	intr_map_reg[affin_upaid] = addr;
240 }
241 
242 uint64_t *
243 get_intr_mapping_reg(int upaid, int slave)
244 {
245 	int affin_upaid;
246 	dev_info_t *affin_dip;
247 	uint64_t *addr = intr_map_reg[upaid];
248 
249 	/* If we're a UPA master, or we have a valid mapping register. */
250 	if (!slave || addr != NULL)
251 		return (addr);
252 
253 	/*
254 	 * We only get here if we're a UPA slave only device whose interrupt
255 	 * mapping register has not been set.
256 	 * We need to try and install the nexus whose physical address
257 	 * space is where the slaves mapping register resides.  They
258 	 * should call set_intr_mapping_reg() in their xxattach() to register
259 	 * the mapping register with the system.
260 	 */
261 
262 	/*
263 	 * We don't know if a single- or multi-interrupt proxy is fielding
264 	 * our UPA slave interrupt, we must check both cases.
265 	 * Start out by assuming the multi-interrupt case.
266 	 * We assume that single- and multi- interrupters are not
267 	 * overlapping in UPA portid space.
268 	 */
269 
270 	affin_upaid = upaid | 3;
271 
272 	/*
273 	 * We start looking for the multi-interrupter affinity node.
274 	 * We know it's ONLY a child of the root node since the root
275 	 * node defines UPA space.
276 	 */
277 	for (affin_dip = ddi_get_child(ddi_root_node()); affin_dip;
278 	    affin_dip = ddi_get_next_sibling(affin_dip))
279 		if (ddi_prop_get_int(DDI_DEV_T_ANY, affin_dip,
280 		    DDI_PROP_DONTPASS, "upa-portid", -1) == affin_upaid)
281 			break;
282 
283 	if (affin_dip) {
284 		if (i_ddi_attach_node_hierarchy(affin_dip) == DDI_SUCCESS) {
285 			/* try again to get the mapping register. */
286 			addr = intr_map_reg[upaid];
287 		}
288 	}
289 
290 	/*
291 	 * If we still don't have a mapping register try single -interrupter
292 	 * case.
293 	 */
294 	if (addr == NULL) {
295 
296 		affin_upaid = upaid | 1;
297 
298 		for (affin_dip = ddi_get_child(ddi_root_node()); affin_dip;
299 		    affin_dip = ddi_get_next_sibling(affin_dip))
300 			if (ddi_prop_get_int(DDI_DEV_T_ANY, affin_dip,
301 			    DDI_PROP_DONTPASS, "upa-portid", -1) == affin_upaid)
302 				break;
303 
304 		if (affin_dip) {
305 			if (i_ddi_attach_node_hierarchy(affin_dip)
306 			    == DDI_SUCCESS) {
307 				/* try again to get the mapping register. */
308 				addr = intr_map_reg[upaid];
309 			}
310 		}
311 	}
312 	return (addr);
313 }
314 
315 
316 static struct upa_dma_pfns {
317 	pfn_t hipfn;
318 	pfn_t lopfn;
319 } upa_dma_pfn_array[MAX_UPA];
320 
321 static int upa_dma_pfn_ndx = 0;
322 
323 /*
324  * Certain UPA busses cannot accept dma transactions from any other source
325  * except for memory due to livelock conditions in their hardware. (e.g. sbus
326  * and PCI). These routines allow devices or busses on the UPA to register
327  * a physical address block within it's own register space where DMA can be
328  * performed.  Currently, the FFB is the only such device which supports
329  * device DMA on the UPA.
330  */
331 void
332 pf_set_dmacapable(pfn_t hipfn, pfn_t lopfn)
333 {
334 	int i = upa_dma_pfn_ndx;
335 
336 	upa_dma_pfn_ndx++;
337 
338 	upa_dma_pfn_array[i].hipfn = hipfn;
339 	upa_dma_pfn_array[i].lopfn = lopfn;
340 }
341 
342 void
343 pf_unset_dmacapable(pfn_t pfn)
344 {
345 	int i;
346 
347 	for (i = 0; i < upa_dma_pfn_ndx; i++) {
348 		if (pfn <= upa_dma_pfn_array[i].hipfn &&
349 		    pfn >= upa_dma_pfn_array[i].lopfn) {
350 			upa_dma_pfn_array[i].hipfn =
351 			    upa_dma_pfn_array[upa_dma_pfn_ndx - 1].hipfn;
352 			upa_dma_pfn_array[i].lopfn =
353 			    upa_dma_pfn_array[upa_dma_pfn_ndx - 1].lopfn;
354 			upa_dma_pfn_ndx--;
355 			break;
356 		}
357 	}
358 }
359 
360 /*
361  * This routine should only be called using a pfn that is known to reside
362  * in IO space.  The function pf_is_memory() can be used to determine this.
363  */
364 int
365 pf_is_dmacapable(pfn_t pfn)
366 {
367 	int i, j;
368 
369 	/* If the caller passed in a memory pfn, return true. */
370 	if (pf_is_memory(pfn))
371 		return (1);
372 
373 	for (i = upa_dma_pfn_ndx, j = 0; j < i; j++)
374 		if (pfn <= upa_dma_pfn_array[j].hipfn &&
375 		    pfn >= upa_dma_pfn_array[j].lopfn)
376 			return (1);
377 
378 	return (0);
379 }
380 
381 
382 /*
383  * Find cpu_id corresponding to the dip of a CPU device node
384  */
385 int
386 dip_to_cpu_id(dev_info_t *dip, processorid_t *cpu_id)
387 {
388 	dnode_t		nodeid;
389 	int		i;
390 
391 	nodeid = (dnode_t)ddi_get_nodeid(dip);
392 	for (i = 0; i < NCPU; i++) {
393 		if (cpunodes[i].nodeid == nodeid) {
394 			*cpu_id = i;
395 			return (DDI_SUCCESS);
396 		}
397 	}
398 	return (DDI_FAILURE);
399 }
400 
401 /*
402  * Platform independent DR routines
403  */
404 
405 static int
406 ndi2errno(int n)
407 {
408 	int err = 0;
409 
410 	switch (n) {
411 		case NDI_NOMEM:
412 			err = ENOMEM;
413 			break;
414 		case NDI_BUSY:
415 			err = EBUSY;
416 			break;
417 		case NDI_FAULT:
418 			err = EFAULT;
419 			break;
420 		case NDI_FAILURE:
421 			err = EIO;
422 			break;
423 		case NDI_SUCCESS:
424 			break;
425 		case NDI_BADHANDLE:
426 		default:
427 			err = EINVAL;
428 			break;
429 	}
430 	return (err);
431 }
432 
433 /*
434  * Prom tree node list
435  */
436 struct ptnode {
437 	dnode_t		nodeid;
438 	struct ptnode	*next;
439 };
440 
441 /*
442  * Prom tree walk arg
443  */
444 struct pta {
445 	dev_info_t	*pdip;
446 	devi_branch_t	*bp;
447 	uint_t		flags;
448 	dev_info_t	*fdip;
449 	struct ptnode	*head;
450 };
451 
452 static void
453 visit_node(dnode_t nodeid, struct pta *ap)
454 {
455 	struct ptnode	**nextp;
456 	int		(*select)(dnode_t, void *, uint_t);
457 
458 	ASSERT(nodeid != OBP_NONODE && nodeid != OBP_BADNODE);
459 
460 	select = ap->bp->create.prom_branch_select;
461 
462 	ASSERT(select);
463 
464 	if (select(nodeid, ap->bp->arg, 0) == DDI_SUCCESS) {
465 
466 		for (nextp = &ap->head; *nextp; nextp = &(*nextp)->next)
467 			;
468 
469 		*nextp = kmem_zalloc(sizeof (struct ptnode), KM_SLEEP);
470 
471 		(*nextp)->nodeid = nodeid;
472 	}
473 
474 	if ((ap->flags & DEVI_BRANCH_CHILD) == DEVI_BRANCH_CHILD)
475 		return;
476 
477 	nodeid = prom_childnode(nodeid);
478 	while (nodeid != OBP_NONODE && nodeid != OBP_BADNODE) {
479 		visit_node(nodeid, ap);
480 		nodeid = prom_nextnode(nodeid);
481 	}
482 }
483 
484 /*ARGSUSED*/
485 static int
486 set_dip_offline(dev_info_t *dip, void *arg)
487 {
488 	ASSERT(dip);
489 
490 	if (!DEVI_IS_DEVICE_OFFLINE(dip))
491 		DEVI_SET_DEVICE_OFFLINE(dip);
492 
493 	return (DDI_WALK_CONTINUE);
494 }
495 
496 /*ARGSUSED*/
497 static int
498 create_prom_branch(void *arg, int has_changed)
499 {
500 	int		circ, c;
501 	int		exists, rv;
502 	dnode_t		nodeid;
503 	struct ptnode	*tnp;
504 	dev_info_t	*dip;
505 	struct pta	*ap = arg;
506 	devi_branch_t	*bp;
507 
508 	ASSERT(ap);
509 	ASSERT(ap->fdip == NULL);
510 	ASSERT(ap->pdip && ndi_dev_is_prom_node(ap->pdip));
511 
512 	bp = ap->bp;
513 
514 	nodeid = ddi_get_nodeid(ap->pdip);
515 	if (nodeid == OBP_NONODE || nodeid == OBP_BADNODE) {
516 		cmn_err(CE_WARN, "create_prom_branch: invalid "
517 		    "nodeid: 0x%x", nodeid);
518 		return (EINVAL);
519 	}
520 
521 	ap->head = NULL;
522 
523 	nodeid = prom_childnode(nodeid);
524 	while (nodeid != OBP_NONODE && nodeid != OBP_BADNODE) {
525 		visit_node(nodeid, ap);
526 		nodeid = prom_nextnode(nodeid);
527 	}
528 
529 	if (ap->head == NULL)
530 		return (ENODEV);
531 
532 	rv = 0;
533 	while ((tnp = ap->head) != NULL) {
534 		ap->head = tnp->next;
535 
536 		ndi_devi_enter(ap->pdip, &circ);
537 
538 		/*
539 		 * Check if the branch already exists.
540 		 */
541 		exists = 0;
542 		dip = e_ddi_nodeid_to_dip(tnp->nodeid);
543 		if (dip != NULL) {
544 			exists = 1;
545 
546 			/* Parent is held busy, so release hold */
547 			ndi_rele_devi(dip);
548 #ifdef	DEBUG
549 			cmn_err(CE_WARN, "create_prom_branch: dip(%p) exists"
550 			    " for nodeid 0x%x", (void *)dip, tnp->nodeid);
551 #endif
552 		} else {
553 			dip = i_ddi_create_branch(ap->pdip, tnp->nodeid);
554 		}
555 
556 		kmem_free(tnp, sizeof (struct ptnode));
557 
558 		if (dip == NULL) {
559 			ndi_devi_exit(ap->pdip, circ);
560 			rv = EIO;
561 			continue;
562 		}
563 
564 		ASSERT(ddi_get_parent(dip) == ap->pdip);
565 
566 		/*
567 		 * Hold the branch if it is not already held
568 		 */
569 		if (!exists)
570 			e_ddi_branch_hold(dip);
571 
572 		ASSERT(e_ddi_branch_held(dip));
573 
574 		/*
575 		 * Set all dips in the branch offline so that
576 		 * only a "configure" operation can attach
577 		 * the branch
578 		 */
579 		(void) set_dip_offline(dip, NULL);
580 
581 		ndi_devi_enter(dip, &c);
582 		ddi_walk_devs(ddi_get_child(dip), set_dip_offline, NULL);
583 		ndi_devi_exit(dip, c);
584 
585 		ndi_devi_exit(ap->pdip, circ);
586 
587 		if (ap->flags & DEVI_BRANCH_CONFIGURE) {
588 			int error = e_ddi_branch_configure(dip, &ap->fdip, 0);
589 			if (error && rv == 0)
590 				rv = error;
591 		}
592 
593 		/*
594 		 * Invoke devi_branch_callback() (if it exists) only for
595 		 * newly created branches
596 		 */
597 		if (bp->devi_branch_callback && !exists)
598 			bp->devi_branch_callback(dip, bp->arg, 0);
599 	}
600 
601 	return (rv);
602 }
603 
604 static int
605 sid_node_create(dev_info_t *pdip, devi_branch_t *bp, dev_info_t **rdipp)
606 {
607 	int			rv, circ, len;
608 	int			i, flags;
609 	dev_info_t		*dip;
610 	char			*nbuf;
611 	static const char	*noname = "<none>";
612 
613 	ASSERT(pdip);
614 	ASSERT(DEVI_BUSY_OWNED(pdip));
615 
616 	flags = 0;
617 
618 	/*
619 	 * Creating the root of a branch ?
620 	 */
621 	if (rdipp) {
622 		*rdipp = NULL;
623 		flags = DEVI_BRANCH_ROOT;
624 	}
625 
626 	ndi_devi_alloc_sleep(pdip, (char *)noname, DEVI_SID_NODEID, &dip);
627 	rv = bp->create.sid_branch_create(dip, bp->arg, flags);
628 
629 	nbuf = kmem_alloc(OBP_MAXDRVNAME, KM_SLEEP);
630 
631 	if (rv == DDI_WALK_ERROR) {
632 		cmn_err(CE_WARN, "e_ddi_branch_create: Error setting"
633 		    " properties on devinfo node %p",  (void *)dip);
634 		goto fail;
635 	}
636 
637 	len = OBP_MAXDRVNAME;
638 	if (ddi_getlongprop_buf(DDI_DEV_T_ANY, dip,
639 	    DDI_PROP_DONTPASS | DDI_PROP_NOTPROM, "name", nbuf, &len)
640 	    != DDI_PROP_SUCCESS) {
641 		cmn_err(CE_WARN, "e_ddi_branch_create: devinfo node %p has"
642 		    "no name property", (void *)dip);
643 		goto fail;
644 	}
645 
646 	ASSERT(i_ddi_node_state(dip) == DS_PROTO);
647 	if (ndi_devi_set_nodename(dip, nbuf, 0) != NDI_SUCCESS) {
648 		cmn_err(CE_WARN, "e_ddi_branch_create: cannot set name (%s)"
649 		    " for devinfo node %p", nbuf, (void *)dip);
650 		goto fail;
651 	}
652 
653 	kmem_free(nbuf, OBP_MAXDRVNAME);
654 
655 	/*
656 	 * Ignore bind failures just like boot does
657 	 */
658 	(void) ndi_devi_bind_driver(dip, 0);
659 
660 	switch (rv) {
661 	case DDI_WALK_CONTINUE:
662 	case DDI_WALK_PRUNESIB:
663 		ndi_devi_enter(dip, &circ);
664 
665 		i = DDI_WALK_CONTINUE;
666 		for (; i == DDI_WALK_CONTINUE; ) {
667 			i = sid_node_create(dip, bp, NULL);
668 		}
669 
670 		ASSERT(i == DDI_WALK_ERROR || i == DDI_WALK_PRUNESIB);
671 		if (i == DDI_WALK_ERROR)
672 			rv = i;
673 		/*
674 		 * If PRUNESIB stop creating siblings
675 		 * of dip's child. Subsequent walk behavior
676 		 * is determined by rv returned by dip.
677 		 */
678 
679 		ndi_devi_exit(dip, circ);
680 		break;
681 	case DDI_WALK_TERMINATE:
682 		/*
683 		 * Don't create children and ask our parent
684 		 * to not create siblings either.
685 		 */
686 		rv = DDI_WALK_PRUNESIB;
687 		break;
688 	case DDI_WALK_PRUNECHILD:
689 		/*
690 		 * Don't create children, but ask parent to continue
691 		 * with siblings.
692 		 */
693 		rv = DDI_WALK_CONTINUE;
694 		break;
695 	default:
696 		ASSERT(0);
697 		break;
698 	}
699 
700 	if (rdipp)
701 		*rdipp = dip;
702 
703 	/*
704 	 * Set device offline - only the "configure" op should cause an attach
705 	 */
706 	(void) set_dip_offline(dip, NULL);
707 
708 	return (rv);
709 fail:
710 	(void) ndi_devi_free(dip);
711 	kmem_free(nbuf, OBP_MAXDRVNAME);
712 	return (DDI_WALK_ERROR);
713 }
714 
715 static int
716 create_sid_branch(
717 	dev_info_t	*pdip,
718 	devi_branch_t	*bp,
719 	dev_info_t	**dipp,
720 	uint_t		flags)
721 {
722 	int		rv = 0, state = DDI_WALK_CONTINUE;
723 	dev_info_t	*rdip;
724 
725 	while (state == DDI_WALK_CONTINUE) {
726 		int	circ;
727 
728 		ndi_devi_enter(pdip, &circ);
729 
730 		state = sid_node_create(pdip, bp, &rdip);
731 		if (rdip == NULL) {
732 			ndi_devi_exit(pdip, circ);
733 			ASSERT(state == DDI_WALK_ERROR);
734 			break;
735 		}
736 
737 		e_ddi_branch_hold(rdip);
738 
739 		ndi_devi_exit(pdip, circ);
740 
741 		if (flags & DEVI_BRANCH_CONFIGURE) {
742 			int error = e_ddi_branch_configure(rdip, dipp, 0);
743 			if (error && rv == 0)
744 				rv = error;
745 		}
746 
747 		/*
748 		 * devi_branch_callback() is optional
749 		 */
750 		if (bp->devi_branch_callback)
751 			bp->devi_branch_callback(rdip, bp->arg, 0);
752 	}
753 
754 	ASSERT(state == DDI_WALK_ERROR || state == DDI_WALK_PRUNESIB);
755 
756 	return (state == DDI_WALK_ERROR ? EIO : rv);
757 }
758 
759 int
760 e_ddi_branch_create(
761 	dev_info_t	*pdip,
762 	devi_branch_t	*bp,
763 	dev_info_t	**dipp,
764 	uint_t		flags)
765 {
766 	int prom_devi, sid_devi, error;
767 
768 	if (pdip == NULL || bp == NULL || bp->type == 0)
769 		return (EINVAL);
770 
771 	prom_devi = (bp->type == DEVI_BRANCH_PROM) ? 1 : 0;
772 	sid_devi = (bp->type == DEVI_BRANCH_SID) ? 1 : 0;
773 
774 	if (prom_devi && bp->create.prom_branch_select == NULL)
775 		return (EINVAL);
776 	else if (sid_devi && bp->create.sid_branch_create == NULL)
777 		return (EINVAL);
778 	else if (!prom_devi && !sid_devi)
779 		return (EINVAL);
780 
781 	if (flags & DEVI_BRANCH_EVENT)
782 		return (EINVAL);
783 
784 	if (prom_devi) {
785 		struct pta pta = {0};
786 
787 		pta.pdip = pdip;
788 		pta.bp = bp;
789 		pta.flags = flags;
790 
791 		error = prom_tree_access(create_prom_branch, &pta, NULL);
792 
793 		if (dipp)
794 			*dipp = pta.fdip;
795 		else if (pta.fdip)
796 			ndi_rele_devi(pta.fdip);
797 	} else {
798 		error = create_sid_branch(pdip, bp, dipp, flags);
799 	}
800 
801 	return (error);
802 }
803 
804 int
805 e_ddi_branch_configure(dev_info_t *rdip, dev_info_t **dipp, uint_t flags)
806 {
807 	int		circ, rv;
808 	char		*devnm;
809 	dev_info_t	*pdip;
810 
811 	if (dipp)
812 		*dipp = NULL;
813 
814 	if (rdip == NULL || flags != 0 || (flags & DEVI_BRANCH_EVENT))
815 		return (EINVAL);
816 
817 	pdip = ddi_get_parent(rdip);
818 
819 	ndi_devi_enter(pdip, &circ);
820 
821 	if (!e_ddi_branch_held(rdip)) {
822 		ndi_devi_exit(pdip, circ);
823 		cmn_err(CE_WARN, "e_ddi_branch_configure: "
824 		    "dip(%p) not held", (void *)rdip);
825 		return (EINVAL);
826 	}
827 
828 	if (i_ddi_node_state(rdip) < DS_INITIALIZED) {
829 		/*
830 		 * First attempt to bind a driver. If we fail, return
831 		 * success (On some platforms, dips for some device
832 		 * types (CPUs) may not have a driver)
833 		 */
834 		if (ndi_devi_bind_driver(rdip, 0) != NDI_SUCCESS) {
835 			ndi_devi_exit(pdip, circ);
836 			return (0);
837 		}
838 
839 		if (ddi_initchild(pdip, rdip) != DDI_SUCCESS) {
840 			rv = NDI_FAILURE;
841 			goto out;
842 		}
843 	}
844 
845 	ASSERT(i_ddi_node_state(rdip) >= DS_INITIALIZED);
846 
847 	devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP);
848 
849 	(void) ddi_deviname(rdip, devnm);
850 
851 	if ((rv = ndi_devi_config_one(pdip, devnm+1, &rdip,
852 	    NDI_DEVI_ONLINE | NDI_CONFIG)) == NDI_SUCCESS) {
853 		/* release hold from ndi_devi_config_one() */
854 		ndi_rele_devi(rdip);
855 	}
856 
857 	kmem_free(devnm, MAXNAMELEN + 1);
858 out:
859 	if (rv != NDI_SUCCESS && dipp) {
860 		ndi_hold_devi(rdip);
861 		*dipp = rdip;
862 	}
863 	ndi_devi_exit(pdip, circ);
864 	return (ndi2errno(rv));
865 }
866 
867 void
868 e_ddi_branch_hold(dev_info_t *rdip)
869 {
870 	if (e_ddi_branch_held(rdip)) {
871 		cmn_err(CE_WARN, "e_ddi_branch_hold: branch already held");
872 		return;
873 	}
874 
875 	mutex_enter(&DEVI(rdip)->devi_lock);
876 	if ((DEVI(rdip)->devi_flags & DEVI_BRANCH_HELD) == 0) {
877 		DEVI(rdip)->devi_flags |= DEVI_BRANCH_HELD;
878 		DEVI(rdip)->devi_ref++;
879 	}
880 	ASSERT(DEVI(rdip)->devi_ref > 0);
881 	mutex_exit(&DEVI(rdip)->devi_lock);
882 }
883 
884 int
885 e_ddi_branch_held(dev_info_t *rdip)
886 {
887 	int rv = 0;
888 
889 	mutex_enter(&DEVI(rdip)->devi_lock);
890 	if ((DEVI(rdip)->devi_flags & DEVI_BRANCH_HELD) &&
891 	    DEVI(rdip)->devi_ref > 0) {
892 		rv = 1;
893 	}
894 	mutex_exit(&DEVI(rdip)->devi_lock);
895 
896 	return (rv);
897 }
898 void
899 e_ddi_branch_rele(dev_info_t *rdip)
900 {
901 	mutex_enter(&DEVI(rdip)->devi_lock);
902 	DEVI(rdip)->devi_flags &= ~DEVI_BRANCH_HELD;
903 	DEVI(rdip)->devi_ref--;
904 	mutex_exit(&DEVI(rdip)->devi_lock);
905 }
906 
907 int
908 e_ddi_branch_unconfigure(
909 	dev_info_t *rdip,
910 	dev_info_t **dipp,
911 	uint_t flags)
912 {
913 	int	circ, rv;
914 	int	destroy;
915 	char	*devnm;
916 	uint_t	nflags;
917 	dev_info_t *pdip;
918 
919 	if (dipp)
920 		*dipp = NULL;
921 
922 	if (rdip == NULL)
923 		return (EINVAL);
924 
925 	pdip = ddi_get_parent(rdip);
926 
927 	ASSERT(pdip);
928 
929 	/*
930 	 * Check if caller holds pdip busy - can cause deadlocks during
931 	 * devfs_clean()
932 	 */
933 	if (DEVI_BUSY_OWNED(pdip)) {
934 		cmn_err(CE_WARN, "e_ddi_branch_unconfigure: failed: parent"
935 		    " devinfo node(%p) is busy held", (void *)pdip);
936 		return (EINVAL);
937 	}
938 
939 	destroy = (flags & DEVI_BRANCH_DESTROY) ? 1 : 0;
940 
941 	devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP);
942 
943 	ndi_devi_enter(pdip, &circ);
944 	(void) ddi_deviname(rdip, devnm);
945 	ndi_devi_exit(pdip, circ);
946 
947 	/*
948 	 * ddi_deviname() returns a component name with / prepended.
949 	 */
950 	rv = devfs_clean(pdip, devnm + 1, DV_CLEAN_FORCE);
951 	if (rv) {
952 		kmem_free(devnm, MAXNAMELEN + 1);
953 		return (rv);
954 	}
955 
956 	ndi_devi_enter(pdip, &circ);
957 
958 	/*
959 	 * Recreate device name as it may have changed state (init/uninit)
960 	 * when parent busy lock was dropped for devfs_clean()
961 	 */
962 	(void) ddi_deviname(rdip, devnm);
963 
964 	if (!e_ddi_branch_held(rdip)) {
965 		kmem_free(devnm, MAXNAMELEN + 1);
966 		ndi_devi_exit(pdip, circ);
967 		cmn_err(CE_WARN, "e_ddi_%s_branch: dip(%p) not held",
968 		    destroy ? "destroy" : "unconfigure", (void *)rdip);
969 		return (EINVAL);
970 	}
971 
972 	/*
973 	 * Release hold on the branch. This is ok since we are holding the
974 	 * parent busy. If rdip is not removed, we must do a hold on the
975 	 * branch before returning.
976 	 */
977 	e_ddi_branch_rele(rdip);
978 
979 	nflags = NDI_DEVI_OFFLINE;
980 	if (destroy || (flags & DEVI_BRANCH_DESTROY)) {
981 		nflags |= NDI_DEVI_REMOVE;
982 		destroy = 1;
983 	} else {
984 		nflags |= NDI_UNCONFIG;		/* uninit but don't remove */
985 	}
986 
987 	if (flags & DEVI_BRANCH_EVENT)
988 		nflags |= NDI_POST_EVENT;
989 
990 	if (i_ddi_node_state(pdip) == DS_READY &&
991 	    i_ddi_node_state(rdip) >= DS_INITIALIZED) {
992 		rv = ndi_devi_unconfig_one(pdip, devnm+1, dipp, nflags);
993 	} else {
994 		rv = e_ddi_devi_unconfig(rdip, dipp, nflags);
995 		if (rv == NDI_SUCCESS) {
996 			ASSERT(!destroy || ddi_get_child(rdip) == NULL);
997 			rv = ndi_devi_offline(rdip, nflags);
998 		}
999 	}
1000 
1001 	if (!destroy || rv != NDI_SUCCESS) {
1002 		/* The dip still exists, so do a hold */
1003 		e_ddi_branch_hold(rdip);
1004 	}
1005 out:
1006 	kmem_free(devnm, MAXNAMELEN + 1);
1007 	ndi_devi_exit(pdip, circ);
1008 	return (ndi2errno(rv));
1009 }
1010 
1011 int
1012 e_ddi_branch_destroy(dev_info_t *rdip, dev_info_t **dipp, uint_t flag)
1013 {
1014 	return (e_ddi_branch_unconfigure(rdip, dipp,
1015 	    flag|DEVI_BRANCH_DESTROY));
1016 }
1017 
1018 /*
1019  * Number of chains for hash table
1020  */
1021 #define	NUMCHAINS	17
1022 
1023 /*
1024  * Devinfo busy arg
1025  */
1026 struct devi_busy {
1027 	int dv_total;
1028 	int s_total;
1029 	mod_hash_t *dv_hash;
1030 	mod_hash_t *s_hash;
1031 	int (*callback)(dev_info_t *, void *, uint_t);
1032 	void *arg;
1033 };
1034 
1035 static int
1036 visit_dip(dev_info_t *dip, void *arg)
1037 {
1038 	uintptr_t sbusy, dvbusy, ref;
1039 	struct devi_busy *bsp = arg;
1040 
1041 	ASSERT(bsp->callback);
1042 
1043 	/*
1044 	 * A dip cannot be busy if its reference count is 0
1045 	 */
1046 	if ((ref = e_ddi_devi_holdcnt(dip)) == 0) {
1047 		return (bsp->callback(dip, bsp->arg, 0));
1048 	}
1049 
1050 	if (mod_hash_find(bsp->dv_hash, dip, (mod_hash_val_t *)&dvbusy))
1051 		dvbusy = 0;
1052 
1053 	/*
1054 	 * To catch device opens currently maintained on specfs common snodes.
1055 	 */
1056 	if (mod_hash_find(bsp->s_hash, dip, (mod_hash_val_t *)&sbusy))
1057 		sbusy = 0;
1058 
1059 #ifdef	DEBUG
1060 	if (ref < sbusy || ref < dvbusy) {
1061 		cmn_err(CE_WARN, "dip(%p): sopen = %lu, dvopen = %lu "
1062 		    "dip ref = %lu\n", (void *)dip, sbusy, dvbusy, ref);
1063 	}
1064 #endif
1065 
1066 	dvbusy = (sbusy > dvbusy) ? sbusy : dvbusy;
1067 
1068 	return (bsp->callback(dip, bsp->arg, dvbusy));
1069 }
1070 
1071 static int
1072 visit_snode(struct snode *sp, void *arg)
1073 {
1074 	uintptr_t sbusy;
1075 	dev_info_t *dip;
1076 	int count;
1077 	struct devi_busy *bsp = arg;
1078 
1079 	ASSERT(sp);
1080 
1081 	/*
1082 	 * The stable lock is held. This prevents
1083 	 * the snode and its associated dip from
1084 	 * going away.
1085 	 */
1086 	dip = NULL;
1087 	count = spec_devi_open_count(sp, &dip);
1088 
1089 	if (count <= 0)
1090 		return (DDI_WALK_CONTINUE);
1091 
1092 	ASSERT(dip);
1093 
1094 	if (mod_hash_remove(bsp->s_hash, dip, (mod_hash_val_t *)&sbusy))
1095 		sbusy = count;
1096 	else
1097 		sbusy += count;
1098 
1099 	if (mod_hash_insert(bsp->s_hash, dip, (mod_hash_val_t)sbusy)) {
1100 		cmn_err(CE_WARN, "%s: s_hash insert failed: dip=0x%p, "
1101 		    "sbusy = %lu", "e_ddi_branch_referenced",
1102 		    (void *)dip, sbusy);
1103 	}
1104 
1105 	bsp->s_total += count;
1106 
1107 	return (DDI_WALK_CONTINUE);
1108 }
1109 
1110 static void
1111 visit_dvnode(struct dv_node *dv, void *arg)
1112 {
1113 	uintptr_t dvbusy;
1114 	uint_t count;
1115 	struct vnode *vp;
1116 	struct devi_busy *bsp = arg;
1117 
1118 	ASSERT(dv && dv->dv_devi);
1119 
1120 	vp = DVTOV(dv);
1121 
1122 	mutex_enter(&vp->v_lock);
1123 	count = vp->v_count;
1124 	mutex_exit(&vp->v_lock);
1125 
1126 	if (!count)
1127 		return;
1128 
1129 	if (mod_hash_remove(bsp->dv_hash, dv->dv_devi,
1130 	    (mod_hash_val_t *)&dvbusy))
1131 		dvbusy = count;
1132 	else
1133 		dvbusy += count;
1134 
1135 	if (mod_hash_insert(bsp->dv_hash, dv->dv_devi,
1136 	    (mod_hash_val_t)dvbusy)) {
1137 		cmn_err(CE_WARN, "%s: dv_hash insert failed: dip=0x%p, "
1138 		    "dvbusy=%lu", "e_ddi_branch_referenced",
1139 		    (void *)dv->dv_devi, dvbusy);
1140 	}
1141 
1142 	bsp->dv_total += count;
1143 }
1144 
1145 /*
1146  * Returns reference count on success or -1 on failure.
1147  */
1148 int
1149 e_ddi_branch_referenced(
1150 	dev_info_t *rdip,
1151 	int (*callback)(dev_info_t *dip, void *arg, uint_t ref),
1152 	void *arg)
1153 {
1154 	int circ;
1155 	char *path;
1156 	dev_info_t *pdip;
1157 	struct devi_busy bsa = {0};
1158 
1159 	ASSERT(rdip);
1160 
1161 	path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1162 
1163 	ndi_hold_devi(rdip);
1164 
1165 	pdip = ddi_get_parent(rdip);
1166 
1167 	ASSERT(pdip);
1168 
1169 	/*
1170 	 * Check if caller holds pdip busy - can cause deadlocks during
1171 	 * devfs_walk()
1172 	 */
1173 	if (!e_ddi_branch_held(rdip) || DEVI_BUSY_OWNED(pdip)) {
1174 		cmn_err(CE_WARN, "e_ddi_branch_referenced: failed: "
1175 		    "devinfo branch(%p) not held or parent busy held",
1176 		    (void *)rdip);
1177 		ndi_rele_devi(rdip);
1178 		kmem_free(path, MAXPATHLEN);
1179 		return (-1);
1180 	}
1181 
1182 	ndi_devi_enter(pdip, &circ);
1183 	(void) ddi_pathname(rdip, path);
1184 	ndi_devi_exit(pdip, circ);
1185 
1186 	bsa.dv_hash = mod_hash_create_ptrhash("dv_node busy hash", NUMCHAINS,
1187 	    mod_hash_null_valdtor, sizeof (struct dev_info));
1188 
1189 	bsa.s_hash = mod_hash_create_ptrhash("snode busy hash", NUMCHAINS,
1190 	    mod_hash_null_valdtor, sizeof (struct snode));
1191 
1192 	if (devfs_walk(path, visit_dvnode, &bsa)) {
1193 		cmn_err(CE_WARN, "e_ddi_branch_referenced: "
1194 		    "devfs walk failed for: %s", path);
1195 		kmem_free(path, MAXPATHLEN);
1196 		bsa.s_total = bsa.dv_total = -1;
1197 		goto out;
1198 	}
1199 
1200 	kmem_free(path, MAXPATHLEN);
1201 
1202 	/*
1203 	 * Walk the snode table to detect device opens, which are currently
1204 	 * maintained on specfs common snodes.
1205 	 */
1206 	spec_snode_walk(visit_snode, &bsa);
1207 
1208 	if (callback == NULL)
1209 		goto out;
1210 
1211 	bsa.callback = callback;
1212 	bsa.arg = arg;
1213 
1214 	if (visit_dip(rdip, &bsa) == DDI_WALK_CONTINUE) {
1215 		ndi_devi_enter(rdip, &circ);
1216 		ddi_walk_devs(ddi_get_child(rdip), visit_dip, &bsa);
1217 		ndi_devi_exit(rdip, circ);
1218 	}
1219 
1220 out:
1221 	ndi_rele_devi(rdip);
1222 	mod_hash_destroy_ptrhash(bsa.s_hash);
1223 	mod_hash_destroy_ptrhash(bsa.dv_hash);
1224 	return (bsa.s_total > bsa.dv_total ? bsa.s_total : bsa.dv_total);
1225 }
1226