xref: /illumos-gate/usr/src/uts/common/os/putnext.c (revision 581cede61ac9c14d8d4ea452562a567189eead78)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 1991-2003 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 
31 #pragma ident	"%Z%%M%	%I%	%E% SMI"
32 
33 /*
34  *		UNIX Device Driver Interface functions
35  *	This file contains the C-versions of putnext() and put().
36  *	Assembly language versions exist for some architectures.
37  */
38 
39 #include <sys/types.h>
40 #include <sys/systm.h>
41 #include <sys/cpuvar.h>
42 #include <sys/debug.h>
43 #include <sys/t_lock.h>
44 #include <sys/stream.h>
45 #include <sys/thread.h>
46 #include <sys/strsubr.h>
47 #include <sys/ddi.h>
48 #include <sys/vtrace.h>
49 #include <sys/cmn_err.h>
50 #include <sys/strft.h>
51 #include <sys/stack.h>
52 #include <sys/archsystm.h>
53 
54 /*
55  * Streams with many modules may create long chains of calls via putnext() which
56  * may exhaust stack space. When putnext detects that the stack space left is
57  * too small (less then PUT_STACK_NEEDED), the call chain is broken and
58  * further processing is delegated to the background thread via call to
59  * putnext_tail(). Unfortunately there is no generic solution with fixed stack
60  * size, and putnext() is recursive function, so this hack is a necessary evil.
61  *
62  * The redzone value is chosen dependent on the default stack size which is 8K
63  * on 32-bit kernels and on x86 and 16K on 64-bit kernels. The values are chosen
64  * empirically. For 64-bit kernels it is 5000 and for 32-bit kernels it is 2500.
65  * Experiments showed that 2500 is not enough for 64-bit kernels and 2048 is not
66  * enough for 32-bit.
67  *
68  * The redzone value is a tuneable rather then a constant to allow adjustments
69  * in the field.
70  *
71  * The check in PUT_STACK_NOTENOUGH is taken from segkp_map_red() function. It
72  * is possible to define it as a generic function exported by seg_kp, but
73  *
74  * a) It may sound like an open invitation to use the facility indiscriminately.
75  * b) It adds extra function call in putnext path.
76  *
77  * We keep a global counter `put_stack_notenough' which keeps track how many
78  * times the stack switching hack was used.
79  */
80 
81 static ulong_t put_stack_notenough;
82 
83 #ifdef	_LP64
84 #define	PUT_STACK_NEEDED 5000
85 #else
86 #define	PUT_STACK_NEEDED 2500
87 #endif
88 
89 int put_stack_needed = PUT_STACK_NEEDED;
90 
91 #if defined(STACK_GROWTH_DOWN)
92 #define	PUT_STACK_NOTENOUGH() 					\
93 	(((STACK_BIAS + (uintptr_t)getfp() -			\
94 	    (uintptr_t)curthread->t_stkbase) < put_stack_needed) && \
95 	++put_stack_notenough)
96 #else
97 #error	"STACK_GROWTH_DOWN undefined"
98 #endif
99 
100 boolean_t	UseFastlocks = B_FALSE;
101 
102 /*
103  * function: putnext()
104  * purpose:  call the put routine of the queue linked to qp
105  *
106  * Note: this function is written to perform well on modern computer
107  * architectures by e.g. preloading values into registers and "smearing" out
108  * code.
109  *
110  * A note on the fastput mechanism.  The most significant bit of a
111  * putcount is considered the "FASTPUT" bit.  If set, then there is
112  * nothing stoping a concurrent put from occuring (note that putcounts
113  * are only allowed on CIPUT perimiters).  If, however, it is cleared,
114  * then we need to take the normal lock path by aquiring the SQLOCK.
115  * This is a slowlock.  When a thread starts exclusiveness, e.g. wants
116  * writer access, it will clear the FASTPUT bit, causing new threads
117  * to take the slowlock path.  This assures that putcounts will not
118  * increase in value, so the want-writer does not need to constantly
119  * aquire the putlocks to sum the putcounts.  This does have the
120  * possibility of having the count drop right after reading, but that
121  * is no different than aquiring, reading and then releasing.  However,
122  * in this mode, it cannot go up, so eventually they will drop to zero
123  * and the want-writer can proceed.
124  *
125  * If the FASTPUT bit is set, or in the slowlock path we see that there
126  * are no writers or want-writers, we make the choice of calling the
127  * putproc, or a "fast-fill_syncq".  The fast-fill is a fill with
128  * immediate intention to drain.  This is done because there are
129  * messages already at the queue waiting to drain.  To preserve message
130  * ordering, we need to put this message at the end, and pickup the
131  * messages at the beginning.  We call the macro that actually
132  * enqueues the message on the queue, and then call qdrain_syncq.  If
133  * there is already a drainer, we just return.  We could make that
134  * check before calling qdrain_syncq, but it is a little more clear
135  * to have qdrain_syncq do this (we might try the above optimization
136  * as this behavior evolves).  qdrain_syncq assumes that SQ_EXCL is set
137  * already if this is a non-CIPUT perimiter, and that an appropriate
138  * claim has been made.  So we do all that work before dropping the
139  * SQLOCK with our claim.
140  *
141  * If we cannot proceed with the putproc/fast-fill, we just fall
142  * through to the qfill_syncq, and then tail processing.  If state
143  * has changed in that cycle, or wakeups are needed, it will occur
144  * there.
145  */
146 void
147 putnext(queue_t *qp, mblk_t *mp)
148 {
149 	queue_t		*fqp = qp; /* For strft tracing */
150 	syncq_t		*sq;
151 	uint16_t	flags;
152 	uint16_t	drain_mask;
153 	struct qinit	*qi;
154 	int		(*putproc)();
155 	struct stdata	*stp;
156 	int		ix;
157 	boolean_t	queued = B_FALSE;
158 	kmutex_t	*sdlock = NULL;
159 	kmutex_t	*sqciplock = NULL;
160 	ushort_t	*sqcipcount = NULL;
161 
162 	TRACE_2(TR_FAC_STREAMS_FR, TR_PUTNEXT_START,
163 		"putnext_start:(%p, %p)", qp, mp);
164 
165 	ASSERT(mp->b_datap->db_ref != 0);
166 	ASSERT(mp->b_next == NULL && mp->b_prev == NULL);
167 	stp = STREAM(qp);
168 	ASSERT(stp != NULL);
169 	if (stp->sd_ciputctrl != NULL) {
170 		ix = CPU->cpu_seqid & stp->sd_nciputctrl;
171 		sdlock = &stp->sd_ciputctrl[ix].ciputctrl_lock;
172 		mutex_enter(sdlock);
173 	} else {
174 		mutex_enter(sdlock = &stp->sd_lock);
175 	}
176 	qp = qp->q_next;
177 	sq = qp->q_syncq;
178 	ASSERT(sq != NULL);
179 	ASSERT(MUTEX_NOT_HELD(SQLOCK(sq)));
180 	qi = qp->q_qinfo;
181 
182 	if (sq->sq_ciputctrl != NULL) {
183 		/* fastlock: */
184 		ASSERT(sq->sq_flags & SQ_CIPUT);
185 		ix = CPU->cpu_seqid & sq->sq_nciputctrl;
186 		sqciplock = &sq->sq_ciputctrl[ix].ciputctrl_lock;
187 		sqcipcount = &sq->sq_ciputctrl[ix].ciputctrl_count;
188 		mutex_enter(sqciplock);
189 		if (!((*sqcipcount) & SQ_FASTPUT) ||
190 		    (sq->sq_flags & (SQ_STAYAWAY|SQ_EXCL|SQ_EVENTS))) {
191 			mutex_exit(sqciplock);
192 			sqciplock = NULL;
193 			goto slowlock;
194 		}
195 		mutex_exit(sdlock);
196 		(*sqcipcount)++;
197 		ASSERT(*sqcipcount != 0);
198 		queued = qp->q_sqflags & Q_SQQUEUED;
199 		mutex_exit(sqciplock);
200 	} else {
201 	    slowlock:
202 		ASSERT(sqciplock == NULL);
203 		mutex_enter(SQLOCK(sq));
204 		mutex_exit(sdlock);
205 		flags = sq->sq_flags;
206 		/*
207 		 * We are going to drop SQLOCK, so make a claim to prevent syncq
208 		 * from closing.
209 		 */
210 		sq->sq_count++;
211 		ASSERT(sq->sq_count != 0);		/* Wraparound */
212 		/*
213 		 * If there are writers or exclusive waiters, there is not much
214 		 * we can do.  Place the message on the syncq and schedule a
215 		 * background thread to drain it.
216 		 *
217 		 * Also if we are approaching end of stack, fill the syncq and
218 		 * switch processing to a background thread - see comments on
219 		 * top.
220 		 */
221 		if ((flags & (SQ_STAYAWAY|SQ_EXCL|SQ_EVENTS)) ||
222 		    (sq->sq_needexcl != 0) || PUT_STACK_NOTENOUGH()) {
223 
224 			TRACE_3(TR_FAC_STREAMS_FR, TR_PUTNEXT_END,
225 			    "putnext_end:(%p, %p, %p) SQ_EXCL fill",
226 			    qp, mp, sq);
227 
228 			/*
229 			 * NOTE: qfill_syncq will need QLOCK. It is safe to drop
230 			 * SQLOCK because positive sq_count keeps the syncq from
231 			 * closing.
232 			 */
233 			mutex_exit(SQLOCK(sq));
234 
235 			qfill_syncq(sq, qp, mp);
236 			/*
237 			 * NOTE: after the call to qfill_syncq() qp may be
238 			 * closed, both qp and sq should not be referenced at
239 			 * this point.
240 			 *
241 			 * This ASSERT is located here to prevent stack frame
242 			 * consumption in the DEBUG code.
243 			 */
244 			ASSERT(sqciplock == NULL);
245 			return;
246 		}
247 
248 		queued = qp->q_sqflags & Q_SQQUEUED;
249 		/*
250 		 * If not a concurrent perimiter, we need to acquire
251 		 * it exclusively.  It could not have been previously
252 		 * set since we held the SQLOCK before testing
253 		 * SQ_GOAWAY above (which includes SQ_EXCL).
254 		 * We do this here because we hold the SQLOCK, and need
255 		 * to make this state change BEFORE dropping it.
256 		 */
257 		if (!(flags & SQ_CIPUT)) {
258 			ASSERT((sq->sq_flags & SQ_EXCL) == 0);
259 			ASSERT(!(sq->sq_type & SQ_CIPUT));
260 			sq->sq_flags |= SQ_EXCL;
261 		}
262 		mutex_exit(SQLOCK(sq));
263 	}
264 
265 	ASSERT((sq->sq_flags & (SQ_EXCL|SQ_CIPUT)));
266 	ASSERT(MUTEX_NOT_HELD(SQLOCK(sq)));
267 
268 	/*
269 	 * We now have a claim on the syncq, we are either going to
270 	 * put the message on the syncq and then drain it, or we are
271 	 * going to call the putproc().
272 	 */
273 	putproc = qi->qi_putp;
274 	if (!queued) {
275 		STR_FTEVENT_MSG(mp, fqp, FTEV_PUTNEXT, mp->b_rptr -
276 		    mp->b_datap->db_base);
277 		(*putproc)(qp, mp);
278 		ASSERT(MUTEX_NOT_HELD(SQLOCK(sq)));
279 		ASSERT(MUTEX_NOT_HELD(QLOCK(qp)));
280 	} else {
281 		mutex_enter(QLOCK(qp));
282 		/*
283 		 * If there are no messages in front of us, just call putproc(),
284 		 * otherwise enqueue the message and drain the queue.
285 		 */
286 		if (qp->q_syncqmsgs == 0) {
287 			mutex_exit(QLOCK(qp));
288 			STR_FTEVENT_MSG(mp, fqp, FTEV_PUTNEXT, mp->b_rptr -
289 			    mp->b_datap->db_base);
290 			(*putproc)(qp, mp);
291 			ASSERT(MUTEX_NOT_HELD(SQLOCK(sq)));
292 		} else {
293 			/*
294 			 * We are doing a fill with the intent to
295 			 * drain (meaning we are filling because
296 			 * there are messages in front of us ane we
297 			 * need to preserve message ordering)
298 			 * Therefore, put the message on the queue
299 			 * and call qdrain_syncq (must be done with
300 			 * the QLOCK held).
301 			 */
302 			STR_FTEVENT_MSG(mp, fqp, FTEV_PUTNEXT,
303 			    mp->b_rptr - mp->b_datap->db_base);
304 
305 #ifdef DEBUG
306 			/*
307 			 * These two values were in the original code for
308 			 * all syncq messages.  This is unnecessary in
309 			 * the current implementation, but was retained
310 			 * in debug mode as it is usefull to know where
311 			 * problems occur.
312 			 */
313 			mp->b_queue = qp;
314 			mp->b_prev = (mblk_t *)putproc;
315 #endif
316 			SQPUT_MP(qp, mp);
317 			qdrain_syncq(sq, qp);
318 			ASSERT(MUTEX_NOT_HELD(QLOCK(qp)));
319 		}
320 	}
321 	/*
322 	 * Before we release our claim, we need to see if any
323 	 * events were posted. If the syncq is SQ_EXCL && SQ_QUEUED,
324 	 * we were responsible for going exclusive and, therefore,
325 	 * are resposible for draining.
326 	 */
327 	if (sq->sq_flags & (SQ_EXCL)) {
328 		drain_mask = 0;
329 	} else {
330 		drain_mask = SQ_QUEUED;
331 	}
332 
333 	if (sqciplock != NULL) {
334 		mutex_enter(sqciplock);
335 		flags = sq->sq_flags;
336 		ASSERT(flags & SQ_CIPUT);
337 		/* SQ_EXCL could have been set by qwriter_inner */
338 		if ((flags & (SQ_EXCL|SQ_TAIL)) || sq->sq_needexcl) {
339 			/*
340 			 * we need SQLOCK to handle
341 			 * wakeups/drains/flags change.  sqciplock
342 			 * is needed to decrement sqcipcount.
343 			 * SQLOCK has to be grabbed before sqciplock
344 			 * for lock ordering purposes.
345 			 * after sqcipcount is decremented some lock
346 			 * still needs to be held to make sure
347 			 * syncq won't get freed on us.
348 			 *
349 			 * To prevent deadlocks we try to grab SQLOCK and if it
350 			 * is held already we drop sqciplock, acquire SQLOCK and
351 			 * reacqwire sqciplock again.
352 			 */
353 			if (mutex_tryenter(SQLOCK(sq)) == 0) {
354 				mutex_exit(sqciplock);
355 				mutex_enter(SQLOCK(sq));
356 				mutex_enter(sqciplock);
357 			}
358 			flags = sq->sq_flags;
359 			ASSERT(*sqcipcount != 0);
360 			(*sqcipcount)--;
361 			mutex_exit(sqciplock);
362 		} else {
363 			ASSERT(*sqcipcount != 0);
364 			(*sqcipcount)--;
365 			mutex_exit(sqciplock);
366 			TRACE_3(TR_FAC_STREAMS_FR, TR_PUTNEXT_END,
367 			"putnext_end:(%p, %p, %p) done", qp, mp, sq);
368 			return;
369 		}
370 	} else {
371 		mutex_enter(SQLOCK(sq));
372 		flags = sq->sq_flags;
373 		ASSERT(sq->sq_count != 0);
374 		sq->sq_count--;
375 	}
376 	if ((flags & (SQ_TAIL)) || sq->sq_needexcl) {
377 		putnext_tail(sq, qp, (flags & ~drain_mask));
378 		/*
379 		 * The only purpose of this ASSERT is to preserve calling stack
380 		 * in DEBUG kernel.
381 		 */
382 		ASSERT(sq != NULL);
383 		return;
384 	}
385 	ASSERT((sq->sq_flags & (SQ_EXCL|SQ_CIPUT)) || queued);
386 	ASSERT((flags & (SQ_EXCL|SQ_CIPUT)) || queued);
387 	/*
388 	 * Safe to always drop SQ_EXCL:
389 	 *	Not SQ_CIPUT means we set SQ_EXCL above
390 	 *	For SQ_CIPUT SQ_EXCL will only be set if the put
391 	 *	procedure did a qwriter(INNER) in which case
392 	 *	nobody else is in the inner perimeter and we
393 	 *	are exiting.
394 	 *
395 	 * I would like to make the following assertion:
396 	 *
397 	 * ASSERT((flags & (SQ_EXCL|SQ_CIPUT)) != (SQ_EXCL|SQ_CIPUT) ||
398 	 * 	sq->sq_count == 0);
399 	 *
400 	 * which indicates that if we are both putshared and exclusive,
401 	 * we became exclusive while executing the putproc, and the only
402 	 * claim on the syncq was the one we dropped a few lines above.
403 	 * But other threads that enter putnext while the syncq is exclusive
404 	 * need to make a claim as they may need to drop SQLOCK in the
405 	 * has_writers case to avoid deadlocks.  If these threads are
406 	 * delayed or preempted, it is possible that the writer thread can
407 	 * find out that there are other claims making the (sq_count == 0)
408 	 * test invalid.
409 	 */
410 
411 	sq->sq_flags = flags & ~SQ_EXCL;
412 	mutex_exit(SQLOCK(sq));
413 	TRACE_3(TR_FAC_STREAMS_FR, TR_PUTNEXT_END,
414 	    "putnext_end:(%p, %p, %p) done", qp, mp, sq);
415 }
416 
417 
418 /*
419  * wrapper for qi_putp entry in module ops vec.
420  * implements asynchronous putnext().
421  * Note, that unlike putnext(), this routine is NOT optimized for the
422  * fastpath.  Calling this routine will grab whatever locks are necessary
423  * to protect the stream head, q_next, and syncq's.
424  * And since it is in the normal locks path, we do not use putlocks if
425  * they exist (though this can be changed by swapping the value of
426  * UseFastlocks).
427  */
428 void
429 put(queue_t *qp, mblk_t *mp)
430 {
431 	queue_t		*fqp = qp; /* For strft tracing */
432 	syncq_t		*sq;
433 	uint16_t	flags;
434 	uint16_t	drain_mask;
435 	struct qinit	*qi;
436 	int		(*putproc)();
437 	int		ix;
438 	boolean_t	queued = B_FALSE;
439 	kmutex_t	*sqciplock = NULL;
440 	ushort_t	*sqcipcount = NULL;
441 
442 	TRACE_2(TR_FAC_STREAMS_FR, TR_PUT_START,
443 		"put:(%X, %X)", qp, mp);
444 	ASSERT(mp->b_datap->db_ref != 0);
445 	ASSERT(mp->b_next == NULL && mp->b_prev == NULL);
446 
447 	sq = qp->q_syncq;
448 	ASSERT(sq != NULL);
449 	qi = qp->q_qinfo;
450 
451 	if (UseFastlocks && sq->sq_ciputctrl != NULL) {
452 		/* fastlock: */
453 		ASSERT(sq->sq_flags & SQ_CIPUT);
454 		ix = CPU->cpu_seqid & sq->sq_nciputctrl;
455 		sqciplock = &sq->sq_ciputctrl[ix].ciputctrl_lock;
456 		sqcipcount = &sq->sq_ciputctrl[ix].ciputctrl_count;
457 		mutex_enter(sqciplock);
458 		if (!((*sqcipcount) & SQ_FASTPUT) ||
459 		    (sq->sq_flags & (SQ_STAYAWAY|SQ_EXCL|SQ_EVENTS))) {
460 			mutex_exit(sqciplock);
461 			sqciplock = NULL;
462 			goto slowlock;
463 		}
464 		(*sqcipcount)++;
465 		ASSERT(*sqcipcount != 0);
466 		queued = qp->q_sqflags & Q_SQQUEUED;
467 		mutex_exit(sqciplock);
468 	} else {
469 	    slowlock:
470 		ASSERT(sqciplock == NULL);
471 		mutex_enter(SQLOCK(sq));
472 		flags = sq->sq_flags;
473 		/*
474 		 * We are going to drop SQLOCK, so make a claim to prevent syncq
475 		 * from closing.
476 		 */
477 		sq->sq_count++;
478 		ASSERT(sq->sq_count != 0);		/* Wraparound */
479 		/*
480 		 * If there are writers or exclusive waiters, there is not much
481 		 * we can do.  Place the message on the syncq and schedule a
482 		 * background thread to drain it.
483 		 *
484 		 * Also if we are approaching end of stack, fill the syncq and
485 		 * switch processing to a background thread - see comments on
486 		 * top.
487 		 */
488 		if ((flags & (SQ_STAYAWAY|SQ_EXCL|SQ_EVENTS)) ||
489 		    (sq->sq_needexcl != 0) || PUT_STACK_NOTENOUGH()) {
490 
491 			TRACE_3(TR_FAC_STREAMS_FR, TR_PUTNEXT_END,
492 			    "putnext_end:(%p, %p, %p) SQ_EXCL fill",
493 			    qp, mp, sq);
494 
495 			/*
496 			 * NOTE: qfill_syncq will need QLOCK. It is safe to drop
497 			 * SQLOCK because positive sq_count keeps the syncq from
498 			 * closing.
499 			 */
500 			mutex_exit(SQLOCK(sq));
501 
502 			qfill_syncq(sq, qp, mp);
503 			/*
504 			 * NOTE: after the call to qfill_syncq() qp may be
505 			 * closed, both qp and sq should not be referenced at
506 			 * this point.
507 			 *
508 			 * This ASSERT is located here to prevent stack frame
509 			 * consumption in the DEBUG code.
510 			 */
511 			ASSERT(sqciplock == NULL);
512 			return;
513 		}
514 
515 		queued = qp->q_sqflags & Q_SQQUEUED;
516 		/*
517 		 * If not a concurrent perimiter, we need to acquire
518 		 * it exclusively.  It could not have been previously
519 		 * set since we held the SQLOCK before testing
520 		 * SQ_GOAWAY above (which includes SQ_EXCL).
521 		 * We do this here because we hold the SQLOCK, and need
522 		 * to make this state change BEFORE dropping it.
523 		 */
524 		if (!(flags & SQ_CIPUT)) {
525 			ASSERT((sq->sq_flags & SQ_EXCL) == 0);
526 			ASSERT(!(sq->sq_type & SQ_CIPUT));
527 			sq->sq_flags |= SQ_EXCL;
528 		}
529 		mutex_exit(SQLOCK(sq));
530 	}
531 
532 	ASSERT((sq->sq_flags & (SQ_EXCL|SQ_CIPUT)));
533 	ASSERT(MUTEX_NOT_HELD(SQLOCK(sq)));
534 
535 	/*
536 	 * We now have a claim on the syncq, we are either going to
537 	 * put the message on the syncq and then drain it, or we are
538 	 * going to call the putproc().
539 	 */
540 	putproc = qi->qi_putp;
541 	if (!queued) {
542 		STR_FTEVENT_MSG(mp, fqp, FTEV_PUTNEXT, mp->b_rptr -
543 		    mp->b_datap->db_base);
544 		(*putproc)(qp, mp);
545 		ASSERT(MUTEX_NOT_HELD(SQLOCK(sq)));
546 		ASSERT(MUTEX_NOT_HELD(QLOCK(qp)));
547 	} else {
548 		mutex_enter(QLOCK(qp));
549 		/*
550 		 * If there are no messages in front of us, just call putproc(),
551 		 * otherwise enqueue the message and drain the queue.
552 		 */
553 		if (qp->q_syncqmsgs == 0) {
554 			mutex_exit(QLOCK(qp));
555 			STR_FTEVENT_MSG(mp, fqp, FTEV_PUTNEXT, mp->b_rptr -
556 			    mp->b_datap->db_base);
557 			(*putproc)(qp, mp);
558 			ASSERT(MUTEX_NOT_HELD(SQLOCK(sq)));
559 		} else {
560 			/*
561 			 * We are doing a fill with the intent to
562 			 * drain (meaning we are filling because
563 			 * there are messages in front of us ane we
564 			 * need to preserve message ordering)
565 			 * Therefore, put the message on the queue
566 			 * and call qdrain_syncq (must be done with
567 			 * the QLOCK held).
568 			 */
569 			STR_FTEVENT_MSG(mp, fqp, FTEV_PUTNEXT,
570 			    mp->b_rptr - mp->b_datap->db_base);
571 
572 #ifdef DEBUG
573 			/*
574 			 * These two values were in the original code for
575 			 * all syncq messages.  This is unnecessary in
576 			 * the current implementation, but was retained
577 			 * in debug mode as it is usefull to know where
578 			 * problems occur.
579 			 */
580 			mp->b_queue = qp;
581 			mp->b_prev = (mblk_t *)putproc;
582 #endif
583 			SQPUT_MP(qp, mp);
584 			qdrain_syncq(sq, qp);
585 			ASSERT(MUTEX_NOT_HELD(QLOCK(qp)));
586 		}
587 	}
588 	/*
589 	 * Before we release our claim, we need to see if any
590 	 * events were posted. If the syncq is SQ_EXCL && SQ_QUEUED,
591 	 * we were responsible for going exclusive and, therefore,
592 	 * are resposible for draining.
593 	 */
594 	if (sq->sq_flags & (SQ_EXCL)) {
595 		drain_mask = 0;
596 	} else {
597 		drain_mask = SQ_QUEUED;
598 	}
599 
600 	if (sqciplock != NULL) {
601 		mutex_enter(sqciplock);
602 		flags = sq->sq_flags;
603 		ASSERT(flags & SQ_CIPUT);
604 		/* SQ_EXCL could have been set by qwriter_inner */
605 		if ((flags & (SQ_EXCL|SQ_TAIL)) || sq->sq_needexcl) {
606 			/*
607 			 * we need SQLOCK to handle
608 			 * wakeups/drains/flags change.  sqciplock
609 			 * is needed to decrement sqcipcount.
610 			 * SQLOCK has to be grabbed before sqciplock
611 			 * for lock ordering purposes.
612 			 * after sqcipcount is decremented some lock
613 			 * still needs to be held to make sure
614 			 * syncq won't get freed on us.
615 			 *
616 			 * To prevent deadlocks we try to grab SQLOCK and if it
617 			 * is held already we drop sqciplock, acquire SQLOCK and
618 			 * reacqwire sqciplock again.
619 			 */
620 			if (mutex_tryenter(SQLOCK(sq)) == 0) {
621 				mutex_exit(sqciplock);
622 				mutex_enter(SQLOCK(sq));
623 				mutex_enter(sqciplock);
624 			}
625 			flags = sq->sq_flags;
626 			ASSERT(*sqcipcount != 0);
627 			(*sqcipcount)--;
628 			mutex_exit(sqciplock);
629 		} else {
630 			ASSERT(*sqcipcount != 0);
631 			(*sqcipcount)--;
632 			mutex_exit(sqciplock);
633 			TRACE_3(TR_FAC_STREAMS_FR, TR_PUTNEXT_END,
634 			"putnext_end:(%p, %p, %p) done", qp, mp, sq);
635 			return;
636 		}
637 	} else {
638 		mutex_enter(SQLOCK(sq));
639 		flags = sq->sq_flags;
640 		ASSERT(sq->sq_count != 0);
641 		sq->sq_count--;
642 	}
643 	if ((flags & (SQ_TAIL)) || sq->sq_needexcl) {
644 		putnext_tail(sq, qp, (flags & ~drain_mask));
645 		/*
646 		 * The only purpose of this ASSERT is to preserve calling stack
647 		 * in DEBUG kernel.
648 		 */
649 		ASSERT(sq != NULL);
650 		return;
651 	}
652 	ASSERT((sq->sq_flags & (SQ_EXCL|SQ_CIPUT)) || queued);
653 	ASSERT((flags & (SQ_EXCL|SQ_CIPUT)) || queued);
654 	/*
655 	 * Safe to always drop SQ_EXCL:
656 	 *	Not SQ_CIPUT means we set SQ_EXCL above
657 	 *	For SQ_CIPUT SQ_EXCL will only be set if the put
658 	 *	procedure did a qwriter(INNER) in which case
659 	 *	nobody else is in the inner perimeter and we
660 	 *	are exiting.
661 	 *
662 	 * I would like to make the following assertion:
663 	 *
664 	 * ASSERT((flags & (SQ_EXCL|SQ_CIPUT)) != (SQ_EXCL|SQ_CIPUT) ||
665 	 * 	sq->sq_count == 0);
666 	 *
667 	 * which indicates that if we are both putshared and exclusive,
668 	 * we became exclusive while executing the putproc, and the only
669 	 * claim on the syncq was the one we dropped a few lines above.
670 	 * But other threads that enter putnext while the syncq is exclusive
671 	 * need to make a claim as they may need to drop SQLOCK in the
672 	 * has_writers case to avoid deadlocks.  If these threads are
673 	 * delayed or preempted, it is possible that the writer thread can
674 	 * find out that there are other claims making the (sq_count == 0)
675 	 * test invalid.
676 	 */
677 
678 	sq->sq_flags = flags & ~SQ_EXCL;
679 	mutex_exit(SQLOCK(sq));
680 	TRACE_3(TR_FAC_STREAMS_FR, TR_PUTNEXT_END,
681 	    "putnext_end:(%p, %p, %p) done", qp, mp, sq);
682 }
683