xref: /illumos-gate/usr/src/uts/common/os/ddi.c (revision 581cede61ac9c14d8d4ea452562a567189eead78)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
22 /*	  All Rights Reserved  	*/
23 
24 
25 /*
26  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
27  * Use is subject to license terms.
28  */
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 /*
33  * UNIX Device Driver Interface functions
34  *
35  * This file contains functions that are to be added to the kernel
36  * to put the interface presented to drivers in conformance with
37  * the DDI standard. Of the functions added to the kernel, 17 are
38  * function equivalents of existing macros in sysmacros.h,
39  * stream.h, and param.h
40  *
41  * 17 additional functions -- drv_getparm(), drv_setparm(),
42  * getrbuf(), freerbuf(),
43  * getemajor(), geteminor(), etoimajor(), itoemajor(), drv_usectohz(),
44  * drv_hztousec(), drv_usecwait(), drv_priv(), and kvtoppid() --
45  * are specified by DDI to exist in the kernel and are implemented here.
46  *
47  * Note that putnext() and put() are not in this file. The C version of
48  * these routines are in uts/common/os/putnext.c and assembly versions
49  * might exist for some architectures.
50  */
51 
52 #include <sys/types.h>
53 #include <sys/param.h>
54 #include <sys/t_lock.h>
55 #include <sys/time.h>
56 #include <sys/systm.h>
57 #include <sys/cpuvar.h>
58 #include <sys/signal.h>
59 #include <sys/pcb.h>
60 #include <sys/user.h>
61 #include <sys/errno.h>
62 #include <sys/buf.h>
63 #include <sys/proc.h>
64 #include <sys/cmn_err.h>
65 #include <sys/stream.h>
66 #include <sys/strsubr.h>
67 #include <sys/uio.h>
68 #include <sys/kmem.h>
69 #include <sys/conf.h>
70 #include <sys/cred.h>
71 #include <sys/vnode.h>
72 #include <sys/file.h>
73 #include <sys/poll.h>
74 #include <sys/session.h>
75 #include <sys/ddi.h>
76 #include <sys/sunddi.h>
77 #include <sys/esunddi.h>
78 #include <sys/mkdev.h>
79 #include <sys/debug.h>
80 #include <sys/vtrace.h>
81 
82 /*
83  * return internal major number corresponding to device
84  * number (new format) argument
85  */
86 major_t
87 getmajor(dev_t dev)
88 {
89 #ifdef _LP64
90 	return ((major_t)((dev >> NBITSMINOR64) & MAXMAJ64));
91 #else
92 	return ((major_t)((dev >> NBITSMINOR) & MAXMAJ));
93 #endif
94 }
95 
96 /*
97  * return external major number corresponding to device
98  * number (new format) argument
99  */
100 major_t
101 getemajor(dev_t dev)
102 {
103 #ifdef _LP64
104 	return ((major_t)((dev >> NBITSMINOR64) & MAXMAJ64));
105 #else
106 	return ((major_t)((dev >> NBITSMINOR) & MAXMAJ));
107 #endif
108 }
109 
110 /*
111  * return internal minor number corresponding to device
112  * number (new format) argument
113  */
114 minor_t
115 getminor(dev_t dev)
116 {
117 #ifdef _LP64
118 	return ((minor_t)(dev & MAXMIN64));
119 #else
120 	return ((minor_t)(dev & MAXMIN));
121 #endif
122 }
123 
124 /*
125  * return external minor number corresponding to device
126  * number (new format) argument
127  */
128 minor_t
129 geteminor(dev_t dev)
130 {
131 #ifdef _LP64
132 	return ((minor_t)(dev & MAXMIN64));
133 #else
134 	return ((minor_t)(dev & MAXMIN));
135 #endif
136 }
137 
138 /*
139  * return internal major number corresponding to external
140  * major number.
141  */
142 int
143 etoimajor(major_t emajnum)
144 {
145 #ifdef _LP64
146 	if (emajnum >= devcnt)
147 		return (-1); /* invalid external major */
148 #else
149 	if (emajnum > MAXMAJ || emajnum >= devcnt)
150 		return (-1); /* invalid external major */
151 #endif
152 	return ((int)emajnum);
153 }
154 
155 /*
156  * return external major number corresponding to internal
157  * major number argument or -1 if no external major number
158  * can be found after lastemaj that maps to the internal
159  * major number. Pass a lastemaj val of -1 to start
160  * the search initially. (Typical use of this function is
161  * of the form:
162  *
163  *	lastemaj = -1;
164  *	while ((lastemaj = itoemajor(imag, lastemaj)) != -1)
165  *		{ process major number }
166  */
167 int
168 itoemajor(major_t imajnum, int lastemaj)
169 {
170 	if (imajnum >= devcnt)
171 		return (-1);
172 
173 	/*
174 	 * if lastemaj == -1 then start from beginning of
175 	 * the (imaginary) MAJOR table
176 	 */
177 	if (lastemaj < -1)
178 		return (-1);
179 
180 	/*
181 	 * given that there's a 1-1 mapping of internal to external
182 	 * major numbers, searching is somewhat pointless ... let's
183 	 * just go there directly.
184 	 */
185 	if (++lastemaj < devcnt && imajnum < devcnt)
186 		return (imajnum);
187 	return (-1);
188 }
189 
190 /*
191  * encode external major and minor number arguments into a
192  * new format device number
193  */
194 dev_t
195 makedevice(major_t maj, minor_t minor)
196 {
197 #ifdef _LP64
198 	return (((dev_t)maj << NBITSMINOR64) | (minor & MAXMIN64));
199 #else
200 	return (((dev_t)maj << NBITSMINOR) | (minor & MAXMIN));
201 #endif
202 }
203 
204 /*
205  * cmpdev - compress new device format to old device format
206  */
207 o_dev_t
208 cmpdev(dev_t dev)
209 {
210 	major_t major_d;
211 	minor_t minor_d;
212 
213 #ifdef _LP64
214 	major_d = dev >> NBITSMINOR64;
215 	minor_d = dev & MAXMIN64;
216 #else
217 	major_d = dev >> NBITSMINOR;
218 	minor_d = dev & MAXMIN;
219 #endif
220 	if (major_d > OMAXMAJ || minor_d > OMAXMIN)
221 		return ((o_dev_t)NODEV);
222 	return ((o_dev_t)((major_d << ONBITSMINOR) | minor_d));
223 }
224 
225 dev_t
226 expdev(dev_t dev)
227 {
228 	major_t major_d;
229 	minor_t minor_d;
230 
231 	major_d = ((dev >> ONBITSMINOR) & OMAXMAJ);
232 	minor_d = (dev & OMAXMIN);
233 #ifdef _LP64
234 	return ((((dev_t)major_d << NBITSMINOR64) | minor_d));
235 #else
236 	return ((((dev_t)major_d << NBITSMINOR) | minor_d));
237 #endif
238 }
239 
240 /*
241  * return true (1) if the message type input is a data
242  * message type, 0 otherwise
243  */
244 #undef datamsg
245 int
246 datamsg(unsigned char db_type)
247 {
248 	return (db_type == M_DATA || db_type == M_PROTO ||
249 	    db_type == M_PCPROTO || db_type == M_DELAY);
250 }
251 
252 /*
253  * return a pointer to the other queue in the queue pair of qp
254  */
255 queue_t *
256 OTHERQ(queue_t *q)
257 {
258 	return (_OTHERQ(q));
259 }
260 
261 /*
262  * return a pointer to the read queue in the queue pair of qp.
263  */
264 queue_t *
265 RD(queue_t *q)
266 {
267 		return (_RD(q));
268 
269 }
270 
271 /*
272  * return a pointer to the write queue in the queue pair of qp.
273  */
274 int
275 SAMESTR(queue_t *q)
276 {
277 	return (_SAMESTR(q));
278 }
279 
280 /*
281  * return a pointer to the write queue in the queue pair of qp.
282  */
283 queue_t *
284 WR(queue_t *q)
285 {
286 	return (_WR(q));
287 }
288 
289 /*
290  * store value of kernel parameter associated with parm
291  */
292 int
293 drv_getparm(unsigned int parm, void *valuep)
294 {
295 	proc_t	*p = curproc;
296 	time_t	now;
297 
298 	switch (parm) {
299 	case UPROCP:
300 		*(proc_t **)valuep = p;
301 		break;
302 	case PPGRP:
303 		mutex_enter(&p->p_lock);
304 		*(pid_t *)valuep = p->p_pgrp;
305 		mutex_exit(&p->p_lock);
306 		break;
307 	case LBOLT:
308 		*(clock_t *)valuep = lbolt;
309 		break;
310 	case TIME:
311 		if ((now = gethrestime_sec()) == 0) {
312 			timestruc_t ts;
313 			mutex_enter(&tod_lock);
314 			ts = tod_get();
315 			mutex_exit(&tod_lock);
316 			*(time_t *)valuep = ts.tv_sec;
317 		} else {
318 			*(time_t *)valuep = now;
319 		}
320 		break;
321 	case PPID:
322 		*(pid_t *)valuep = p->p_pid;
323 		break;
324 	case PSID:
325 		mutex_enter(&p->p_splock);
326 		*(pid_t *)valuep = p->p_sessp->s_sid;
327 		mutex_exit(&p->p_splock);
328 		break;
329 	case UCRED:
330 		*(cred_t **)valuep = CRED();
331 		break;
332 	default:
333 		return (-1);
334 	}
335 
336 	return (0);
337 }
338 
339 /*
340  * set value of kernel parameter associated with parm
341  */
342 int
343 drv_setparm(unsigned int parm, unsigned long value)
344 {
345 	switch (parm) {
346 	case SYSRINT:
347 		CPU_STATS_ADDQ(CPU, sys, rcvint, value);
348 		break;
349 	case SYSXINT:
350 		CPU_STATS_ADDQ(CPU, sys, xmtint, value);
351 		break;
352 	case SYSMINT:
353 		CPU_STATS_ADDQ(CPU, sys, mdmint, value);
354 		break;
355 	case SYSRAWC:
356 		CPU_STATS_ADDQ(CPU, sys, rawch, value);
357 		break;
358 	case SYSCANC:
359 		CPU_STATS_ADDQ(CPU, sys, canch, value);
360 		break;
361 	case SYSOUTC:
362 		CPU_STATS_ADDQ(CPU, sys, outch, value);
363 		break;
364 	default:
365 		return (-1);
366 	}
367 
368 	return (0);
369 }
370 
371 /*
372  * allocate space for buffer header and return pointer to it.
373  * preferred means of obtaining space for a local buf header.
374  * returns pointer to buf upon success, NULL for failure
375  */
376 struct buf *
377 getrbuf(int sleep)
378 {
379 	struct buf *bp;
380 
381 	bp = kmem_alloc(sizeof (struct buf), sleep);
382 	if (bp == NULL)
383 		return (NULL);
384 	bioinit(bp);
385 
386 	return (bp);
387 }
388 
389 /*
390  * free up space allocated by getrbuf()
391  */
392 void
393 freerbuf(struct buf *bp)
394 {
395 	biofini(bp);
396 	kmem_free(bp, sizeof (struct buf));
397 }
398 
399 /*
400  * convert byte count input to logical page units
401  * (byte counts that are not a page-size multiple
402  * are rounded down)
403  */
404 pgcnt_t
405 btop(size_t numbytes)
406 {
407 	return (numbytes >> PAGESHIFT);
408 }
409 
410 /*
411  * convert byte count input to logical page units
412  * (byte counts that are not a page-size multiple
413  * are rounded up)
414  */
415 pgcnt_t
416 btopr(size_t numbytes)
417 {
418 	return ((numbytes + PAGEOFFSET) >> PAGESHIFT);
419 }
420 
421 /*
422  * convert size in pages to bytes.
423  */
424 size_t
425 ptob(pgcnt_t numpages)
426 {
427 	return (numpages << PAGESHIFT);
428 }
429 
430 #define	MAXCLOCK_T LONG_MAX
431 
432 /*
433  * Convert from system time units (hz) to microseconds.
434  *
435  * If ticks <= 0, return 0.
436  * If converting ticks to usecs would overflow, return MAXCLOCK_T.
437  * Otherwise, convert ticks to microseconds.
438  */
439 clock_t
440 drv_hztousec(clock_t ticks)
441 {
442 	if (ticks <= 0)
443 		return (0);
444 
445 	if (ticks > MAXCLOCK_T / usec_per_tick)
446 		return (MAXCLOCK_T);
447 
448 	return (TICK_TO_USEC(ticks));
449 }
450 
451 
452 /*
453  * Convert from microseconds to system time units (hz), rounded up.
454  *
455  * If ticks <= 0, return 0.
456  * Otherwise, convert microseconds to ticks, rounding up.
457  */
458 clock_t
459 drv_usectohz(clock_t microsecs)
460 {
461 	if (microsecs <= 0)
462 		return (0);
463 
464 	return (USEC_TO_TICK_ROUNDUP(microsecs));
465 }
466 
467 #ifdef	sun
468 /*
469  * drv_usecwait implemented in each architecture's machine
470  * specific code somewhere. For sparc, it is the alternate entry
471  * to usec_delay (eventually usec_delay goes away). See
472  * sparc/os/ml/sparc_subr.s
473  */
474 #endif
475 
476 /*
477  * bcanputnext, canputnext assume called from timeout, bufcall,
478  * or esballoc free routines.  since these are driven by
479  * clock interrupts, instead of system calls the appropriate plumbing
480  * locks have not been acquired.
481  */
482 int
483 bcanputnext(queue_t *q, unsigned char band)
484 {
485 	int	ret;
486 
487 	claimstr(q);
488 	ret = bcanput(q->q_next, band);
489 	releasestr(q);
490 	return (ret);
491 }
492 
493 int
494 canputnext(queue_t *q)
495 {
496 	queue_t	*qofsq = q;
497 	struct stdata *stp = STREAM(q);
498 	kmutex_t *sdlock;
499 
500 	TRACE_1(TR_FAC_STREAMS_FR, TR_CANPUTNEXT_IN,
501 	    "canputnext?:%p\n", q);
502 
503 	if (stp->sd_ciputctrl != NULL) {
504 		int ix = CPU->cpu_seqid & stp->sd_nciputctrl;
505 		sdlock = &stp->sd_ciputctrl[ix].ciputctrl_lock;
506 		mutex_enter(sdlock);
507 	} else
508 		mutex_enter(sdlock = &stp->sd_reflock);
509 
510 	/* get next module forward with a service queue */
511 	q = q->q_next->q_nfsrv;
512 	ASSERT(q != NULL);
513 
514 	/* this is for loopback transports, they should not do a canputnext */
515 	ASSERT(STRMATED(q->q_stream) || STREAM(q) == STREAM(qofsq));
516 
517 	if (!(q->q_flag & QFULL)) {
518 		mutex_exit(sdlock);
519 		TRACE_2(TR_FAC_STREAMS_FR, TR_CANPUTNEXT_OUT,
520 		    "canputnext:%p %d", q, 1);
521 		return (1);
522 	}
523 
524 	if (sdlock != &stp->sd_reflock) {
525 		mutex_exit(sdlock);
526 		mutex_enter(&stp->sd_reflock);
527 	}
528 
529 	/* the above is the most frequently used path */
530 	stp->sd_refcnt++;
531 	ASSERT(stp->sd_refcnt != 0);	/* Wraparound */
532 	mutex_exit(&stp->sd_reflock);
533 
534 	mutex_enter(QLOCK(q));
535 	if (q->q_flag & QFULL) {
536 		q->q_flag |= QWANTW;
537 		mutex_exit(QLOCK(q));
538 		TRACE_2(TR_FAC_STREAMS_FR, TR_CANPUTNEXT_OUT,
539 		    "canputnext:%p %d", q, 0);
540 		releasestr(qofsq);
541 
542 		return (0);
543 	}
544 	mutex_exit(QLOCK(q));
545 	TRACE_2(TR_FAC_STREAMS_FR, TR_CANPUTNEXT_OUT, "canputnext:%p %d", q, 1);
546 	releasestr(qofsq);
547 
548 	return (1);
549 }
550 
551 
552 /*
553  * Open has progressed to the point where it is safe to send/receive messages.
554  *
555  * "qprocson enables the put and service routines of the driver
556  * or module... Prior to the call to qprocson, the put and service
557  * routines of a newly pushed module or newly opened driver are
558  * disabled.  For the module, messages flow around it as if it
559  * were not present in the stream... qprocson must be called by
560  * the first open of a module or driver after allocation and
561  * initialization of any resource on which the put and service
562  * routines depend."
563  *
564  * Note that before calling qprocson a module/driver could itself cause its
565  * put or service procedures to be run by using put() or qenable().
566  */
567 void
568 qprocson(queue_t *q)
569 {
570 	ASSERT(q->q_flag & QREADR);
571 	/*
572 	 * Do not call insertq() if it is a re-open.  But if _QINSERTING
573 	 * is set, q_next will not be NULL and we need to call insertq().
574 	 */
575 	if ((q->q_next == NULL && WR(q)->q_next == NULL) ||
576 	    (q->q_flag & _QINSERTING))
577 		insertq(STREAM(q), q);
578 }
579 
580 /*
581  * Close has reached a point where it can no longer allow put/service
582  * into the queue.
583  *
584  * "qprocsoff disables the put and service routines of the driver
585  * or module... When the routines are disabled in a module, messages
586  * flow around the module as if it were not present in the stream.
587  * qprocsoff must be called by the close routine of a driver or module
588  * before deallocating any resources on which the driver/module's
589  * put and service routines depend.  qprocsoff will remove the
590  * queue's service routines from the list of service routines to be
591  * run and waits until any concurrent put or service routines are
592  * finished."
593  *
594  * Note that after calling qprocsoff a module/driver could itself cause its
595  * put procedures to be run by using put().
596  */
597 void
598 qprocsoff(queue_t *q)
599 {
600 	ASSERT(q->q_flag & QREADR);
601 	if (q->q_flag & QWCLOSE) {
602 		/* Called more than once */
603 		return;
604 	}
605 	disable_svc(q);
606 	removeq(q);
607 }
608 
609 /*
610  * "freezestr() freezes the state of the entire STREAM  containing
611  *  the  queue  pair  q.  A frozen STREAM blocks any thread
612  *  attempting to enter any open, close, put or service  routine
613  *  belonging  to  any  queue instance in the STREAM, and blocks
614  *  any thread currently within the STREAM if it attempts to put
615  *  messages  onto  or take messages off of any queue within the
616  *  STREAM (with the sole exception  of  the  caller).   Threads
617  *  blocked  by  this  mechanism  remain  so until the STREAM is
618  *  thawed by a call to unfreezestr().
619  *
620  * Use strblock to set SQ_FROZEN in all syncqs in the stream (prevents
621  * further entry into put, service, open, and close procedures) and
622  * grab (and hold) all the QLOCKs in the stream (to block putq, getq etc.)
623  *
624  * Note: this has to be the only code that acquires one QLOCK while holding
625  * another QLOCK (otherwise we would have locking hirarchy/ordering violations.)
626  */
627 void
628 freezestr(queue_t *q)
629 {
630 	struct stdata *stp = STREAM(q);
631 
632 	/*
633 	 * Increment refcnt to prevent q_next from changing during the strblock
634 	 * as well as while the stream is frozen.
635 	 */
636 	claimstr(RD(q));
637 
638 	strblock(q);
639 	ASSERT(stp->sd_freezer == NULL);
640 	stp->sd_freezer = curthread;
641 	for (q = stp->sd_wrq; q != NULL; q = SAMESTR(q) ? q->q_next : NULL) {
642 		mutex_enter(QLOCK(q));
643 		mutex_enter(QLOCK(RD(q)));
644 	}
645 }
646 
647 /*
648  * Undo what freezestr did.
649  * Have to drop the QLOCKs before the strunblock since strunblock will
650  * potentially call other put procedures.
651  */
652 void
653 unfreezestr(queue_t *q)
654 {
655 	struct stdata *stp = STREAM(q);
656 	queue_t	*q1;
657 
658 	for (q1 = stp->sd_wrq; q1 != NULL;
659 	    q1 = SAMESTR(q1) ? q1->q_next : NULL) {
660 		mutex_exit(QLOCK(q1));
661 		mutex_exit(QLOCK(RD(q1)));
662 	}
663 	ASSERT(stp->sd_freezer == curthread);
664 	stp->sd_freezer = NULL;
665 	strunblock(q);
666 	releasestr(RD(q));
667 }
668 
669 /*
670  * Used by open and close procedures to "sleep" waiting for messages to
671  * arrive. Note: can only be used in open and close procedures.
672  *
673  * Lower the gate and let in either messages on the syncq (if there are
674  * any) or put/service procedures.
675  *
676  * If the queue has an outer perimeter this will not prevent entry into this
677  * syncq (since outer_enter does not set SQ_WRITER on the syncq that gets the
678  * exclusive access to the outer perimeter.)
679  *
680  * Return 0 is the cv_wait_sig was interrupted; otherwise 1.
681  *
682  * It only makes sense to grab sq_putlocks for !SQ_CIOC sync queues because
683  * otherwise put entry points were not blocked in the first place. if this is
684  * SQ_CIOC then qwait is used to wait for service procedure to run since syncq
685  * is always SQ_CIPUT if it is SQ_CIOC.
686  *
687  * Note that SQ_EXCL is dropped and SQ_WANTEXITWAKEUP set in sq_flags
688  * atomically under sq_putlocks to make sure putnext will not miss a pending
689  * wakeup.
690  */
691 int
692 qwait_sig(queue_t *q)
693 {
694 	syncq_t		*sq, *outer;
695 	uint_t		flags;
696 	int		ret = 1;
697 	int		is_sq_cioc;
698 
699 	/*
700 	 * Perform the same operations as a leavesq(sq, SQ_OPENCLOSE)
701 	 * while detecting all cases where the perimeter is entered
702 	 * so that qwait_sig can return to the caller.
703 	 *
704 	 * Drain the syncq if possible. Otherwise reset SQ_EXCL and
705 	 * wait for a thread to leave the syncq.
706 	 */
707 	sq = q->q_syncq;
708 	ASSERT(sq);
709 	is_sq_cioc = (sq->sq_type & SQ_CIOC) ? 1 : 0;
710 	ASSERT(sq->sq_outer == NULL || sq->sq_outer->sq_flags & SQ_WRITER);
711 	outer = sq->sq_outer;
712 	/*
713 	 * XXX this does not work if there is only an outer perimeter.
714 	 * The semantics of qwait/qwait_sig are undefined in this case.
715 	 */
716 	if (outer)
717 		outer_exit(outer);
718 
719 	mutex_enter(SQLOCK(sq));
720 	if (is_sq_cioc == 0) {
721 		SQ_PUTLOCKS_ENTER(sq);
722 	}
723 	flags = sq->sq_flags;
724 	/*
725 	 * Drop SQ_EXCL and sq_count but hold the SQLOCK
726 	 * to prevent any undetected entry and exit into the perimeter.
727 	 */
728 	ASSERT(sq->sq_count > 0);
729 	sq->sq_count--;
730 
731 	if (is_sq_cioc == 0) {
732 		ASSERT(flags & SQ_EXCL);
733 		flags &= ~SQ_EXCL;
734 	}
735 	/*
736 	 * Unblock any thread blocked in an entersq or outer_enter.
737 	 * Note: we do not unblock a thread waiting in qwait/qwait_sig,
738 	 * since that could lead to livelock with two threads in
739 	 * qwait for the same (per module) inner perimeter.
740 	 */
741 	if (flags & SQ_WANTWAKEUP) {
742 		cv_broadcast(&sq->sq_wait);
743 		flags &= ~SQ_WANTWAKEUP;
744 	}
745 	sq->sq_flags = flags;
746 	if ((flags & SQ_QUEUED) && !(flags & SQ_STAYAWAY)) {
747 		if (is_sq_cioc == 0) {
748 			SQ_PUTLOCKS_EXIT(sq);
749 		}
750 		/* drain_syncq() drops SQLOCK */
751 		drain_syncq(sq);
752 		ASSERT(MUTEX_NOT_HELD(SQLOCK(sq)));
753 		entersq(sq, SQ_OPENCLOSE);
754 		return (1);
755 	}
756 	/*
757 	 * Sleep on sq_exitwait to only be woken up when threads leave the
758 	 * put or service procedures. We can not sleep on sq_wait since an
759 	 * outer_exit in a qwait running in the same outer perimeter would
760 	 * cause a livelock "ping-pong" between two or more qwait'ers.
761 	 */
762 	do {
763 		sq->sq_flags |= SQ_WANTEXWAKEUP;
764 		if (is_sq_cioc == 0) {
765 			SQ_PUTLOCKS_EXIT(sq);
766 		}
767 		ret = cv_wait_sig(&sq->sq_exitwait, SQLOCK(sq));
768 		if (is_sq_cioc == 0) {
769 			SQ_PUTLOCKS_ENTER(sq);
770 		}
771 	} while (ret && (sq->sq_flags & SQ_WANTEXWAKEUP));
772 	if (is_sq_cioc == 0) {
773 		SQ_PUTLOCKS_EXIT(sq);
774 	}
775 	mutex_exit(SQLOCK(sq));
776 
777 	/*
778 	 * Re-enter the perimeters again
779 	 */
780 	entersq(sq, SQ_OPENCLOSE);
781 	return (ret);
782 }
783 
784 /*
785  * Used by open and close procedures to "sleep" waiting for messages to
786  * arrive. Note: can only be used in open and close procedures.
787  *
788  * Lower the gate and let in either messages on the syncq (if there are
789  * any) or put/service procedures.
790  *
791  * If the queue has an outer perimeter this will not prevent entry into this
792  * syncq (since outer_enter does not set SQ_WRITER on the syncq that gets the
793  * exclusive access to the outer perimeter.)
794  *
795  * It only makes sense to grab sq_putlocks for !SQ_CIOC sync queues because
796  * otherwise put entry points were not blocked in the first place. if this is
797  * SQ_CIOC then qwait is used to wait for service procedure to run since syncq
798  * is always SQ_CIPUT if it is SQ_CIOC.
799  *
800  * Note that SQ_EXCL is dropped and SQ_WANTEXITWAKEUP set in sq_flags
801  * atomically under sq_putlocks to make sure putnext will not miss a pending
802  * wakeup.
803  */
804 void
805 qwait(queue_t *q)
806 {
807 	syncq_t		*sq, *outer;
808 	uint_t		flags;
809 	int		is_sq_cioc;
810 
811 	/*
812 	 * Perform the same operations as a leavesq(sq, SQ_OPENCLOSE)
813 	 * while detecting all cases where the perimeter is entered
814 	 * so that qwait can return to the caller.
815 	 *
816 	 * Drain the syncq if possible. Otherwise reset SQ_EXCL and
817 	 * wait for a thread to leave the syncq.
818 	 */
819 	sq = q->q_syncq;
820 	ASSERT(sq);
821 	is_sq_cioc = (sq->sq_type & SQ_CIOC) ? 1 : 0;
822 	ASSERT(sq->sq_outer == NULL || sq->sq_outer->sq_flags & SQ_WRITER);
823 	outer = sq->sq_outer;
824 	/*
825 	 * XXX this does not work if there is only an outer perimeter.
826 	 * The semantics of qwait/qwait_sig are undefined in this case.
827 	 */
828 	if (outer)
829 		outer_exit(outer);
830 
831 	mutex_enter(SQLOCK(sq));
832 	if (is_sq_cioc == 0) {
833 		SQ_PUTLOCKS_ENTER(sq);
834 	}
835 	flags = sq->sq_flags;
836 	/*
837 	 * Drop SQ_EXCL and sq_count but hold the SQLOCK
838 	 * to prevent any undetected entry and exit into the perimeter.
839 	 */
840 	ASSERT(sq->sq_count > 0);
841 	sq->sq_count--;
842 
843 	if (is_sq_cioc == 0) {
844 		ASSERT(flags & SQ_EXCL);
845 		flags &= ~SQ_EXCL;
846 	}
847 	/*
848 	 * Unblock any thread blocked in an entersq or outer_enter.
849 	 * Note: we do not unblock a thread waiting in qwait/qwait_sig,
850 	 * since that could lead to livelock with two threads in
851 	 * qwait for the same (per module) inner perimeter.
852 	 */
853 	if (flags & SQ_WANTWAKEUP) {
854 		cv_broadcast(&sq->sq_wait);
855 		flags &= ~SQ_WANTWAKEUP;
856 	}
857 	sq->sq_flags = flags;
858 	if ((flags & SQ_QUEUED) && !(flags & SQ_STAYAWAY)) {
859 		if (is_sq_cioc == 0) {
860 			SQ_PUTLOCKS_EXIT(sq);
861 		}
862 		/* drain_syncq() drops SQLOCK */
863 		drain_syncq(sq);
864 		ASSERT(MUTEX_NOT_HELD(SQLOCK(sq)));
865 		entersq(sq, SQ_OPENCLOSE);
866 		return;
867 	}
868 	/*
869 	 * Sleep on sq_exitwait to only be woken up when threads leave the
870 	 * put or service procedures. We can not sleep on sq_wait since an
871 	 * outer_exit in a qwait running in the same outer perimeter would
872 	 * cause a livelock "ping-pong" between two or more qwait'ers.
873 	 */
874 	do {
875 		sq->sq_flags |= SQ_WANTEXWAKEUP;
876 		if (is_sq_cioc == 0) {
877 			SQ_PUTLOCKS_EXIT(sq);
878 		}
879 		cv_wait(&sq->sq_exitwait, SQLOCK(sq));
880 		if (is_sq_cioc == 0) {
881 			SQ_PUTLOCKS_ENTER(sq);
882 		}
883 	} while (sq->sq_flags & SQ_WANTEXWAKEUP);
884 	if (is_sq_cioc == 0) {
885 		SQ_PUTLOCKS_EXIT(sq);
886 	}
887 	mutex_exit(SQLOCK(sq));
888 
889 	/*
890 	 * Re-enter the perimeters again
891 	 */
892 	entersq(sq, SQ_OPENCLOSE);
893 }
894 
895 /*
896  * Used for the synchronous streams entrypoints when sleeping outside
897  * the perimeters. Must never be called from regular put entrypoint.
898  *
899  * There's no need to grab sq_putlocks here (which only exist for CIPUT sync
900  * queues). If it is CIPUT sync queue put entry points were not blocked in the
901  * first place by rwnext/infonext which are treated as put entrypoints for
902  * permiter syncronization purposes.
903  *
904  * Consolidation private.
905  */
906 boolean_t
907 qwait_rw(queue_t *q)
908 {
909 	syncq_t		*sq;
910 	ulong_t		flags;
911 	boolean_t	gotsignal = B_FALSE;
912 
913 	/*
914 	 * Perform the same operations as a leavesq(sq, SQ_PUT)
915 	 * while detecting all cases where the perimeter is entered
916 	 * so that qwait_rw can return to the caller.
917 	 *
918 	 * Drain the syncq if possible. Otherwise reset SQ_EXCL and
919 	 * wait for a thread to leave the syncq.
920 	 */
921 	sq = q->q_syncq;
922 	ASSERT(sq);
923 
924 	mutex_enter(SQLOCK(sq));
925 	flags = sq->sq_flags;
926 	/*
927 	 * Drop SQ_EXCL and sq_count but hold the SQLOCK until to prevent any
928 	 * undetected entry and exit into the perimeter.
929 	 */
930 	ASSERT(sq->sq_count > 0);
931 	sq->sq_count--;
932 	if (!(sq->sq_type & SQ_CIPUT)) {
933 		ASSERT(flags & SQ_EXCL);
934 		flags &= ~SQ_EXCL;
935 	}
936 	/*
937 	 * Unblock any thread blocked in an entersq or outer_enter.
938 	 * Note: we do not unblock a thread waiting in qwait/qwait_sig,
939 	 * since that could lead to livelock with two threads in
940 	 * qwait for the same (per module) inner perimeter.
941 	 */
942 	if (flags & SQ_WANTWAKEUP) {
943 		cv_broadcast(&sq->sq_wait);
944 		flags &= ~SQ_WANTWAKEUP;
945 	}
946 	sq->sq_flags = flags;
947 	if ((flags & SQ_QUEUED) && !(flags & SQ_STAYAWAY)) {
948 		/* drain_syncq() drops SQLOCK */
949 		drain_syncq(sq);
950 		ASSERT(MUTEX_NOT_HELD(SQLOCK(sq)));
951 		entersq(sq, SQ_PUT);
952 		return (B_FALSE);
953 	}
954 	/*
955 	 * Sleep on sq_exitwait to only be woken up when threads leave the
956 	 * put or service procedures. We can not sleep on sq_wait since an
957 	 * outer_exit in a qwait running in the same outer perimeter would
958 	 * cause a livelock "ping-pong" between two or more qwait'ers.
959 	 */
960 	do {
961 		sq->sq_flags |= SQ_WANTEXWAKEUP;
962 		if (cv_wait_sig(&sq->sq_exitwait, SQLOCK(sq)) <= 0) {
963 			sq->sq_flags &= ~SQ_WANTEXWAKEUP;
964 			gotsignal = B_TRUE;
965 			break;
966 		}
967 	} while (sq->sq_flags & SQ_WANTEXWAKEUP);
968 	mutex_exit(SQLOCK(sq));
969 
970 	/*
971 	 * Re-enter the perimeters again
972 	 */
973 	entersq(sq, SQ_PUT);
974 	return (gotsignal);
975 }
976 
977 /*
978  * Asynchronously upgrade to exclusive access at either the inner or
979  * outer perimeter.
980  */
981 void
982 qwriter(queue_t *q, mblk_t *mp, void (*func)(), int perim)
983 {
984 	if (perim == PERIM_INNER)
985 		qwriter_inner(q, mp, func);
986 	else if (perim == PERIM_OUTER)
987 		qwriter_outer(q, mp, func);
988 	else
989 		panic("qwriter: wrong \"perimeter\" parameter");
990 }
991 
992 /*
993  * Schedule a synchronous streams timeout
994  */
995 timeout_id_t
996 qtimeout(queue_t *q, void (*func)(void *), void *arg, clock_t tim)
997 {
998 	syncq_t		*sq;
999 	callbparams_t	*cbp;
1000 	timeout_id_t	tid;
1001 
1002 	sq = q->q_syncq;
1003 	/*
1004 	 * you don't want the timeout firing before its params are set up
1005 	 * callbparams_alloc() acquires SQLOCK(sq)
1006 	 * qtimeout() can't fail and can't sleep, so panic if memory is not
1007 	 * available.
1008 	 */
1009 	cbp = callbparams_alloc(sq, func, arg, KM_NOSLEEP | KM_PANIC);
1010 	/*
1011 	 * the callbflags in the sq use the same flags. They get anded
1012 	 * in the callbwrapper to determine if a qun* of this callback type
1013 	 * is required. This is not a request to cancel.
1014 	 */
1015 	cbp->cbp_flags = SQ_CANCEL_TOUT;
1016 	/* check new timeout version return codes */
1017 	tid = timeout(qcallbwrapper, cbp, tim);
1018 	cbp->cbp_id = (callbparams_id_t)tid;
1019 	mutex_exit(SQLOCK(sq));
1020 	/* use local id because the cbp memory could be free by now */
1021 	return (tid);
1022 }
1023 
1024 bufcall_id_t
1025 qbufcall(queue_t *q, size_t size, uint_t pri, void (*func)(void *), void *arg)
1026 {
1027 	syncq_t		*sq;
1028 	callbparams_t	*cbp;
1029 	bufcall_id_t	bid;
1030 
1031 	sq = q->q_syncq;
1032 	/*
1033 	 * you don't want the timeout firing before its params are set up
1034 	 * callbparams_alloc() acquires SQLOCK(sq) if successful.
1035 	 */
1036 	cbp = callbparams_alloc(sq, func, arg, KM_NOSLEEP);
1037 	if (cbp == NULL)
1038 		return ((bufcall_id_t)0);
1039 
1040 	/*
1041 	 * the callbflags in the sq use the same flags. They get anded
1042 	 * in the callbwrapper to determine if a qun* of this callback type
1043 	 * is required. This is not a request to cancel.
1044 	 */
1045 	cbp->cbp_flags = SQ_CANCEL_BUFCALL;
1046 	/* check new timeout version return codes */
1047 	bid = bufcall(size, pri, qcallbwrapper, cbp);
1048 	cbp->cbp_id = (callbparams_id_t)bid;
1049 	if (bid == 0) {
1050 		callbparams_free(sq, cbp);
1051 	}
1052 	mutex_exit(SQLOCK(sq));
1053 	/* use local id because the params memory could be free by now */
1054 	return (bid);
1055 }
1056 
1057 /*
1058  * cancel a timeout callback which enters the inner perimeter.
1059  * cancelling of all callback types on a given syncq is serialized.
1060  * the SQ_CALLB_BYPASSED flag indicates that the callback fn did
1061  * not execute. The quntimeout return value needs to reflect this.
1062  * As with out existing callback programming model - callbacks must
1063  * be cancelled before a close completes - so ensuring that the sq
1064  * is valid when the callback wrapper is executed.
1065  */
1066 clock_t
1067 quntimeout(queue_t *q, timeout_id_t id)
1068 {
1069 	syncq_t *sq = q->q_syncq;
1070 	clock_t ret;
1071 
1072 	mutex_enter(SQLOCK(sq));
1073 	/* callbacks are processed serially on each syncq */
1074 	while (sq->sq_callbflags & SQ_CALLB_CANCEL_MASK) {
1075 		sq->sq_flags |= SQ_WANTWAKEUP;
1076 		cv_wait(&sq->sq_wait, SQLOCK(sq));
1077 	}
1078 	sq->sq_cancelid = (callbparams_id_t)id;
1079 	sq->sq_callbflags = SQ_CANCEL_TOUT;
1080 	if (sq->sq_flags & SQ_WANTWAKEUP) {
1081 		cv_broadcast(&sq->sq_wait);
1082 		sq->sq_flags &= ~SQ_WANTWAKEUP;
1083 	}
1084 	mutex_exit(SQLOCK(sq));
1085 	ret = untimeout(id);
1086 	mutex_enter(SQLOCK(sq));
1087 	if (ret != -1) {
1088 		/* The wrapper was never called - need to free based on id */
1089 		callbparams_free_id(sq, (callbparams_id_t)id, SQ_CANCEL_TOUT);
1090 	}
1091 	if (sq->sq_callbflags & SQ_CALLB_BYPASSED) {
1092 		ret = 0;	/* this was how much time left */
1093 	}
1094 	sq->sq_callbflags = 0;
1095 	if (sq->sq_flags & SQ_WANTWAKEUP) {
1096 		cv_broadcast(&sq->sq_wait);
1097 		sq->sq_flags &= ~SQ_WANTWAKEUP;
1098 	}
1099 	mutex_exit(SQLOCK(sq));
1100 	return (ret);
1101 }
1102 
1103 
1104 void
1105 qunbufcall(queue_t *q, bufcall_id_t id)
1106 {
1107 	syncq_t *sq = q->q_syncq;
1108 
1109 	mutex_enter(SQLOCK(sq));
1110 	/* callbacks are processed serially on each syncq */
1111 	while (sq->sq_callbflags & SQ_CALLB_CANCEL_MASK) {
1112 		sq->sq_flags |= SQ_WANTWAKEUP;
1113 		cv_wait(&sq->sq_wait, SQLOCK(sq));
1114 	}
1115 	sq->sq_cancelid = (callbparams_id_t)id;
1116 	sq->sq_callbflags = SQ_CANCEL_BUFCALL;
1117 	if (sq->sq_flags & SQ_WANTWAKEUP) {
1118 		cv_broadcast(&sq->sq_wait);
1119 		sq->sq_flags &= ~SQ_WANTWAKEUP;
1120 	}
1121 	mutex_exit(SQLOCK(sq));
1122 	unbufcall(id);
1123 	mutex_enter(SQLOCK(sq));
1124 	/*
1125 	 * No indication from unbufcall if the callback has already run.
1126 	 * Always attempt to free it.
1127 	 */
1128 	callbparams_free_id(sq, (callbparams_id_t)id, SQ_CANCEL_BUFCALL);
1129 	sq->sq_callbflags = 0;
1130 	if (sq->sq_flags & SQ_WANTWAKEUP) {
1131 		cv_broadcast(&sq->sq_wait);
1132 		sq->sq_flags &= ~SQ_WANTWAKEUP;
1133 	}
1134 	mutex_exit(SQLOCK(sq));
1135 }
1136 
1137 /*
1138  * Associate the stream with an instance of the bottom driver.  This
1139  * function is called by APIs that establish or modify the hardware
1140  * association (ppa) of an open stream.  Two examples of such
1141  * post-open(9E) APIs are the dlpi(7p) DL_ATTACH_REQ message, and the
1142  * ndd(1M) "instance=" ioctl(2).  This interface may be called from a
1143  * stream driver's wput procedure and from within syncq perimeters,
1144  * so it can't block.
1145  *
1146  * The qassociate() "model" is that it should drive attach(9E), yet it
1147  * can't really do that because driving attach(9E) is a blocking
1148  * operation.  Instead, the qassociate() implementation has complex
1149  * dependencies on the implementation behavior of other parts of the
1150  * kernel to ensure all appropriate instances (ones that have not been
1151  * made inaccessible by DR) are attached at stream open() time, and
1152  * that they will not autodetach.  The code relies on the fact that an
1153  * open() of a stream that ends up using qassociate() always occurs on
1154  * a minor node created with CLONE_DEV.  The open() comes through
1155  * clnopen() and since clnopen() calls ddi_hold_installed_driver() we
1156  * attach all instances and mark them DN_NO_AUTODETACH (given
1157  * DN_DRIVER_HELD is maintained correctly).
1158  *
1159  * Since qassociate() can't really drive attach(9E), there are corner
1160  * cases where the compromise described above leads to qassociate()
1161  * returning failure.  This can happen when administrative functions
1162  * that cause detach(9E), such as "update_drv" or "modunload -i", are
1163  * performed on the driver between the time the stream was opened and
1164  * the time its hardware association was established.  Although this can
1165  * theoretically be an arbitrary amount of time, in practice the window
1166  * is usually quite small, since applications almost always issue their
1167  * hardware association request immediately after opening the stream,
1168  * and do not typically switch association while open.  When these
1169  * corner cases occur, and qassociate() finds the requested instance
1170  * detached, it will return failure.  This failure should be propagated
1171  * to the requesting administrative application using the appropriate
1172  * post-open(9E) API error mechanism.
1173  *
1174  * All qassociate() callers are expected to check for and gracefully handle
1175  * failure return, propagating errors back to the requesting administrative
1176  * application.
1177  */
1178 int
1179 qassociate(queue_t *q, int instance)
1180 {
1181 	vnode_t *vp;
1182 	major_t major;
1183 	dev_info_t *dip;
1184 
1185 	if (instance == -1) {
1186 		ddi_assoc_queue_with_devi(q, NULL);
1187 		return (0);
1188 	}
1189 
1190 	vp = STREAM(q)->sd_vnode;
1191 	major = getmajor(vp->v_rdev);
1192 	dip = ddi_hold_devi_by_instance(major, instance,
1193 	    E_DDI_HOLD_DEVI_NOATTACH);
1194 	if (dip == NULL)
1195 		return (-1);
1196 
1197 	ddi_assoc_queue_with_devi(q, dip);
1198 	ddi_release_devi(dip);
1199 	return (0);
1200 }
1201 
1202 /*
1203  * This routine is the SVR4MP 'replacement' for
1204  * hat_getkpfnum.  The only major difference is
1205  * the return value for illegal addresses - since
1206  * sunm_getkpfnum() and srmmu_getkpfnum() both
1207  * return '-1' for bogus mappings, we can (more or
1208  * less) return the value directly.
1209  */
1210 ppid_t
1211 kvtoppid(caddr_t addr)
1212 {
1213 	return ((ppid_t)hat_getpfnum(kas.a_hat, addr));
1214 }
1215 
1216 /*
1217  * This is used to set the timeout value for cv_timed_wait() or
1218  * cv_timedwait_sig().
1219  */
1220 void
1221 time_to_wait(clock_t *now, clock_t time)
1222 {
1223 	*now = lbolt + time;
1224 }
1225