xref: /illumos-gate/usr/src/uts/common/fs/fifofs/fifovnops.c (revision 7c478bd95313f5f23a4c958a745db2134aa03244)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
23 /*	  All rights reserved.  	*/
24 
25 
26 /*
27  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
28  * Use is subject to license terms.
29  */
30 
31 #pragma ident	"%Z%%M%	%I%	%E% SMI"
32 
33 /*
34  * FIFOFS file system vnode operations.  This file system
35  * type supports STREAMS-based pipes and FIFOs.
36  */
37 #include <sys/types.h>
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/sysmacros.h>
41 #include <sys/cred.h>
42 #include <sys/errno.h>
43 #include <sys/time.h>
44 #include <sys/file.h>
45 #include <sys/fcntl.h>
46 #include <sys/kmem.h>
47 #include <sys/uio.h>
48 #include <sys/vfs.h>
49 #include <sys/vnode.h>
50 #include <sys/signal.h>
51 #include <sys/user.h>
52 #include <sys/strsubr.h>
53 #include <sys/stream.h>
54 #include <sys/strsun.h>
55 #include <sys/strredir.h>
56 #include <sys/fs/fifonode.h>
57 #include <sys/fs/namenode.h>
58 #include <sys/stropts.h>
59 #include <sys/proc.h>
60 #include <sys/unistd.h>
61 #include <sys/debug.h>
62 #include <fs/fs_subr.h>
63 #include <sys/filio.h>
64 #include <sys/termio.h>
65 #include <sys/ddi.h>
66 #include <sys/vtrace.h>
67 #include <sys/policy.h>
68 
69 /*
70  * Define the routines/data structures used in this file.
71  */
72 static int fifo_read(vnode_t *, uio_t *, int, cred_t *, caller_context_t *);
73 static int fifo_write(vnode_t *, uio_t *, int, cred_t *, caller_context_t *);
74 static int fifo_getattr(vnode_t *, vattr_t *, int, cred_t *);
75 static int fifo_setattr(vnode_t *, vattr_t *, int, cred_t *,
76 	caller_context_t *);
77 static int fifo_realvp(vnode_t *, vnode_t **);
78 static int fifo_access(vnode_t *, int, int, cred_t *);
79 static int fifo_fid(vnode_t *, fid_t *);
80 static int fifo_fsync(vnode_t *, int, cred_t *);
81 static int fifo_seek(vnode_t *, offset_t, offset_t *);
82 static int fifo_ioctl(vnode_t *, int, intptr_t, int, cred_t *, int *);
83 static int fifo_fastioctl(vnode_t *, int, intptr_t, int, cred_t *, int *);
84 static int fifo_strioctl(vnode_t *, int, intptr_t, int, cred_t *, int *);
85 static int fifo_poll(vnode_t *, short, int, short *, pollhead_t **);
86 static int fifo_pathconf(vnode_t *, int, ulong_t *, cred_t *);
87 static void fifo_inactive(vnode_t *, cred_t *);
88 static int fifo_rwlock(vnode_t *, int, caller_context_t *);
89 static void fifo_rwunlock(vnode_t *, int, caller_context_t *);
90 static int fifo_setsecattr(struct vnode *, vsecattr_t *, int, struct cred *);
91 static int fifo_getsecattr(struct vnode *, vsecattr_t *, int, struct cred *);
92 
93 /*
94  * Define the data structures external to this file.
95  */
96 extern	dev_t	fifodev;
97 extern struct qinit fifo_stwdata;
98 extern struct qinit fifo_strdata;
99 extern kmutex_t ftable_lock;
100 
101 struct  streamtab fifoinfo = { &fifo_strdata, &fifo_stwdata, NULL, NULL };
102 
103 struct vnodeops *fifo_vnodeops;
104 
105 const fs_operation_def_t fifo_vnodeops_template[] = {
106 	VOPNAME_OPEN, fifo_open,
107 	VOPNAME_CLOSE, fifo_close,
108 	VOPNAME_READ, fifo_read,
109 	VOPNAME_WRITE, fifo_write,
110 	VOPNAME_IOCTL, fifo_ioctl,
111 	VOPNAME_GETATTR, fifo_getattr,
112 	VOPNAME_SETATTR, fifo_setattr,
113 	VOPNAME_ACCESS, fifo_access,
114 	VOPNAME_FSYNC, fifo_fsync,
115 	VOPNAME_INACTIVE, (fs_generic_func_p) fifo_inactive,
116 	VOPNAME_FID, fifo_fid,
117 	VOPNAME_RWLOCK, fifo_rwlock,
118 	VOPNAME_RWUNLOCK, (fs_generic_func_p) fifo_rwunlock,
119 	VOPNAME_SEEK, fifo_seek,
120 	VOPNAME_REALVP, fifo_realvp,
121 	VOPNAME_POLL, (fs_generic_func_p) fifo_poll,
122 	VOPNAME_PATHCONF, fifo_pathconf,
123 	VOPNAME_DISPOSE, fs_error,
124 	VOPNAME_SETSECATTR, fifo_setsecattr,
125 	VOPNAME_GETSECATTR, fifo_getsecattr,
126 	NULL, NULL
127 };
128 
129 /*
130  * Return the fifoinfo structure.
131  */
132 struct streamtab *
133 fifo_getinfo()
134 {
135 	return (&fifoinfo);
136 }
137 
138 /*
139  * Open and stream a FIFO.
140  * If this is the first open of the file (FIFO is not streaming),
141  * initialize the fifonode and attach a stream to the vnode.
142  *
143  * Each end of a fifo must be synchronized with the other end.
144  * If not, the mated end may complete an open, I/O, close sequence
145  * before the end waiting in open ever wakes up.
146  * Note: namefs pipes come through this routine too.
147  */
148 int
149 fifo_open(vnode_t **vpp, int flag, cred_t *crp)
150 {
151 	vnode_t		*vp		= *vpp;
152 	fifonode_t	*fnp		= VTOF(vp);
153 	fifolock_t	*fn_lock	= fnp->fn_lock;
154 	int		error;
155 
156 	ASSERT(vp->v_type == VFIFO);
157 	ASSERT(vn_matchops(vp, fifo_vnodeops));
158 
159 	mutex_enter(&fn_lock->flk_lock);
160 	/*
161 	 * If we are the first reader, wake up any writers that
162 	 * may be waiting around.  wait for all of them to
163 	 * wake up before proceeding (i.e. fn_wsynccnt == 0)
164 	 */
165 	if (flag & FREAD) {
166 		fnp->fn_rcnt++;		/* record reader present */
167 		if (! (fnp->fn_flag & ISPIPE))
168 			fnp->fn_rsynccnt++;	/* record reader in open */
169 	}
170 
171 	/*
172 	 * If we are the first writer, wake up any readers that
173 	 * may be waiting around.  wait for all of them to
174 	 * wake up before proceeding (i.e. fn_rsynccnt == 0)
175 	 */
176 
177 	if (flag & FWRITE) {
178 		fnp->fn_wcnt++;		/* record writer present */
179 		if (! (fnp->fn_flag & ISPIPE))
180 			fnp->fn_wsynccnt++;	/* record writer in open */
181 	}
182 	/*
183 	 * fifo_stropen will take care of twisting the queues on the first
184 	 * open.  The 1 being passed in means twist the queues on the first
185 	 * open.
186 	 */
187 	error = fifo_stropen(vpp, flag, crp, 1, 1);
188 	/*
189 	 * fifo_stropen() could have replaced vpp
190 	 * since fifo's are the only thing we need to sync up,
191 	 * everything else just returns;
192 	 * Note: don't need to hold lock since ISPIPE can't change
193 	 * and both old and new vp need to be pipes
194 	 */
195 	ASSERT(MUTEX_HELD(&VTOF(*vpp)->fn_lock->flk_lock));
196 	if (fnp->fn_flag & ISPIPE) {
197 		ASSERT(VTOF(*vpp)->fn_flag & ISPIPE);
198 		ASSERT(VTOF(*vpp)->fn_rsynccnt == 0);
199 		ASSERT(VTOF(*vpp)->fn_rsynccnt == 0);
200 		/*
201 		 * XXX note: should probably hold locks, but
202 		 * These values should not be changing
203 		 */
204 		ASSERT(fnp->fn_rsynccnt == 0);
205 		ASSERT(fnp->fn_wsynccnt == 0);
206 		mutex_exit(&VTOF(*vpp)->fn_lock->flk_lock);
207 		return (error);
208 	}
209 	/*
210 	 * vp can't change for FIFOS
211 	 */
212 	ASSERT(vp == *vpp);
213 	/*
214 	 * If we are opening for read (or writer)
215 	 *   indicate that the reader (or writer) is done with open
216 	 *   if there is a writer (or reader) waiting for us, wake them up
217 	 *	and indicate that at least 1 read (or write) open has occured
218 	 *	this is need in the event the read (or write) side closes
219 	 *	before the writer (or reader) has a chance to wake up
220 	 *	i.e. it sees that a reader (or writer) was once there
221 	 */
222 	if (flag & FREAD) {
223 		fnp->fn_rsynccnt--;	/* reader done with open */
224 		if (fnp->fn_flag & FIFOSYNC) {
225 			/*
226 			 * This indicates that a read open has occured
227 			 * Only need to set if writer is actually asleep
228 			 * Flag will be consumed by writer.
229 			 */
230 			fnp->fn_flag |= FIFOROCR;
231 			cv_broadcast(&fnp->fn_wait_cv);
232 		}
233 	}
234 	if (flag & FWRITE) {
235 		fnp->fn_wsynccnt--;	/* writer done with open */
236 		if (fnp->fn_flag & FIFOSYNC) {
237 			/*
238 			 * This indicates that a write open has occured
239 			 * Only need to set if reader is actually asleep
240 			 * Flag will be consumed by reader.
241 			 */
242 			fnp->fn_flag |= FIFOWOCR;
243 			cv_broadcast(&fnp->fn_wait_cv);
244 		}
245 	}
246 
247 	fnp->fn_flag &= ~FIFOSYNC;
248 
249 	/*
250 	 * errors don't wait around.. just return
251 	 * Note: XXX other end will wake up and continue despite error.
252 	 * There is no defined semantic on the correct course of option
253 	 * so we do what we've done in the past
254 	 */
255 	if (error != 0) {
256 		mutex_exit(&fnp->fn_lock->flk_lock);
257 		goto done;
258 	}
259 	ASSERT(fnp->fn_rsynccnt <= fnp->fn_rcnt);
260 	ASSERT(fnp->fn_wsynccnt <= fnp->fn_wcnt);
261 	/*
262 	 * FIFOWOCR (or FIFOROCR) indicates that the writer (or reader)
263 	 * has woken us up and is done with open (this way, if the other
264 	 * end has made it to close, we don't block forever in open)
265 	 * fn_wnct == fn_wsynccnt (or fn_rcnt == fn_rsynccnt) indicates
266 	 * that no writer (or reader) has yet made it through open
267 	 * This has the side benifit of that the first
268 	 * reader (or writer) will wait until the other end finishes open
269 	 */
270 	if (flag & FREAD) {
271 		while ((fnp->fn_flag & FIFOWOCR) == 0 &&
272 		    fnp->fn_wcnt == fnp->fn_wsynccnt) {
273 			if (flag & (FNDELAY|FNONBLOCK)) {
274 				mutex_exit(&fnp->fn_lock->flk_lock);
275 				goto done;
276 			}
277 			fnp->fn_insync++;
278 			fnp->fn_flag |= FIFOSYNC;
279 			if (!cv_wait_sig_swap(&fnp->fn_wait_cv,
280 			    &fnp->fn_lock->flk_lock)) {
281 				/*
282 				 * Last reader to wakeup clear writer
283 				 * Clear both writer and reader open
284 				 * occured flag incase other end is O_RDWR
285 				 */
286 				if (--fnp->fn_insync == 0 &&
287 				    fnp->fn_flag & FIFOWOCR) {
288 					fnp->fn_flag &= ~(FIFOWOCR|FIFOROCR);
289 				}
290 				mutex_exit(&fnp->fn_lock->flk_lock);
291 				(void) fifo_close(*vpp, flag, 1, 0, crp);
292 				error = EINTR;
293 				goto done;
294 			}
295 			/*
296 			 * Last reader to wakeup clear writer open occured flag
297 			 * Clear both writer and reader open occured flag
298 			 * incase other end is O_RDWR
299 			 */
300 			if (--fnp->fn_insync == 0 &&
301 			    fnp->fn_flag & FIFOWOCR) {
302 				fnp->fn_flag &= ~(FIFOWOCR|FIFOROCR);
303 				break;
304 			}
305 		}
306 	} else if (flag & FWRITE) {
307 		while ((fnp->fn_flag & FIFOROCR) == 0 &&
308 		    fnp->fn_rcnt == fnp->fn_rsynccnt) {
309 			if ((flag & (FNDELAY|FNONBLOCK)) && fnp->fn_rcnt == 0) {
310 				mutex_exit(&fnp->fn_lock->flk_lock);
311 				(void) fifo_close(*vpp, flag, 1, 0, crp);
312 				error = ENXIO;
313 				goto done;
314 			}
315 			fnp->fn_flag |= FIFOSYNC;
316 			fnp->fn_insync++;
317 			if (!cv_wait_sig_swap(&fnp->fn_wait_cv,
318 			    &fnp->fn_lock->flk_lock)) {
319 				/*
320 				 * Last writer to wakeup clear
321 				 * Clear both writer and reader open
322 				 * occured flag in case other end is O_RDWR
323 				 */
324 				if (--fnp->fn_insync == 0 &&
325 				    (fnp->fn_flag & FIFOROCR) != 0) {
326 					fnp->fn_flag &= ~(FIFOWOCR|FIFOROCR);
327 				}
328 				mutex_exit(&fnp->fn_lock->flk_lock);
329 				(void) fifo_close(*vpp, flag, 1, 0, crp);
330 				error = EINTR;
331 				goto done;
332 			}
333 			/*
334 			 * Last writer to wakeup clear reader open occured flag
335 			 * Clear both writer and reader open
336 			 * occured flag in case other end is O_RDWR
337 			 */
338 			if (--fnp->fn_insync == 0 &&
339 			    (fnp->fn_flag & FIFOROCR) != 0) {
340 				fnp->fn_flag &= ~(FIFOWOCR|FIFOROCR);
341 				break;
342 			}
343 		}
344 	}
345 	mutex_exit(&fn_lock->flk_lock);
346 done:
347 	return (error);
348 }
349 
350 /*
351  * Close down a stream.
352  * Call cleanlocks() and strclean() on every close.
353  * For last close send hangup message and force
354  * the other end of a named pipe to be unmounted.
355  * Mount guarantees that the mounted end will only call fifo_close()
356  * with a count of 1 when the unmount occurs.
357  * This routine will close down one end of a pipe or FIFO
358  * and free the stream head via strclose()
359  */
360 /*ARGSUSED*/
361 int
362 fifo_close(vnode_t *vp, int flag, int count, offset_t offset, cred_t *crp)
363 {
364 	fifonode_t	*fnp		= VTOF(vp);
365 	fifonode_t	*fn_dest	= fnp->fn_dest;
366 	int		error		= 0;
367 	fifolock_t	*fn_lock	= fnp->fn_lock;
368 	queue_t		*sd_wrq;
369 	vnode_t		*fn_dest_vp;
370 	int		senthang = 0;
371 
372 	ASSERT(vp->v_stream != NULL);
373 	/*
374 	 * clean locks and clear events.
375 	 */
376 	(void) cleanlocks(vp, ttoproc(curthread)->p_pid, 0);
377 	cleanshares(vp, ttoproc(curthread)->p_pid);
378 	strclean(vp);
379 
380 	/*
381 	 * If a file still has the pipe/FIFO open, return.
382 	 */
383 	if (count > 1)
384 		return (0);
385 
386 
387 	sd_wrq = strvp2wq(vp);
388 	mutex_enter(&fn_lock->flk_lock);
389 
390 	/*
391 	 * wait for pending opens to finish up
392 	 * note: this also has the side effect of single threading closes
393 	 */
394 	while (fn_lock->flk_ocsync)
395 		cv_wait(&fn_lock->flk_wait_cv, &fn_lock->flk_lock);
396 
397 	fn_lock->flk_ocsync = 1;
398 
399 	if (flag & FREAD) {
400 		fnp->fn_rcnt--;
401 	}
402 	/*
403 	 * If we are last writer wake up sleeping readers
404 	 * (They'll figure out that there are no more writers
405 	 * and do the right thing)
406 	 * send hangup down stream so that stream head will do the
407 	 * right thing.
408 	 */
409 	if (flag & FWRITE) {
410 		if (--fnp->fn_wcnt == 0 && fn_dest->fn_rcnt > 0) {
411 			if ((fn_dest->fn_flag & (FIFOFAST | FIFOWANTR)) ==
412 			    (FIFOFAST | FIFOWANTR)) {
413 				/*
414 				 * While we're at it, clear FIFOWANTW too
415 				 * Wake up any sleeping readers or
416 				 * writers.
417 				 */
418 				fn_dest->fn_flag &= ~(FIFOWANTR | FIFOWANTW);
419 				cv_broadcast(&fn_dest->fn_wait_cv);
420 			}
421 			/*
422 			 * This is needed incase the other side
423 			 * was opened non-blocking.  It is the
424 			 * only way we can tell that wcnt is 0 because
425 			 * of close instead of never having a writer
426 			 */
427 			if (!(fnp->fn_flag & ISPIPE))
428 				fnp->fn_flag |= FIFOCLOSE;
429 			/*
430 			 * Note: sending hangup effectively shuts down
431 			 * both reader and writer at other end.
432 			 */
433 			(void) putnextctl_wait(sd_wrq, M_HANGUP);
434 			senthang = 1;
435 		}
436 	}
437 
438 	/*
439 	 * For FIFOs we need to indicate to stream head that last reader
440 	 * has gone away so that an error is generated
441 	 * Pipes just need to wake up the other end so that it can
442 	 * notice this end has gone away.
443 	 */
444 
445 	if (fnp->fn_rcnt == 0 && fn_dest->fn_wcnt > 0) {
446 		if ((fn_dest->fn_flag & (FIFOFAST | FIFOWANTW)) ==
447 		    (FIFOFAST | FIFOWANTW)) {
448 			/*
449 			 * wake up any sleeping writers
450 			 */
451 			fn_dest->fn_flag &= ~FIFOWANTW;
452 			cv_broadcast(&fn_dest->fn_wait_cv);
453 		}
454 	}
455 
456 	/*
457 	 * if there are still processes with this FIFO open
458 	 *	clear open/close sync flag
459 	 *	and just return;
460 	 */
461 	if (--fnp->fn_open > 0) {
462 		ASSERT((fnp->fn_rcnt + fnp->fn_wcnt) != 0);
463 		fn_lock->flk_ocsync = 0;
464 		cv_broadcast(&fn_lock->flk_wait_cv);
465 		mutex_exit(&fn_lock->flk_lock);
466 		return (0);
467 	}
468 
469 	/*
470 	 * Need to send HANGUP if other side is still open
471 	 * (fnp->fn_rcnt or fnp->fn_wcnt may not be zero (some thread
472 	 * on this end of the pipe may still be in fifo_open())
473 	 *
474 	 * Note: we can get here with fn_rcnt and fn_wcnt != 0 if some
475 	 * thread is blocked somewhere in the fifo_open() path prior to
476 	 * fifo_stropen() incrementing fn_open.  This can occur for
477 	 * normal FIFOs as well as named pipes.  fn_rcnt and
478 	 * fn_wcnt only indicate attempts to open. fn_open indicates
479 	 * successful opens. Partially opened FIFOs should proceed
480 	 * normally; i.e. they will appear to be new opens.  Partially
481 	 * opened pipes will probably fail.
482 	 */
483 
484 	if (fn_dest->fn_open && senthang == 0)
485 		(void) putnextctl_wait(sd_wrq, M_HANGUP);
486 
487 
488 	/*
489 	 * If this a pipe and this is the first end to close,
490 	 * then we have a bit of cleanup work to do.
491 	 * 	Mark both ends of pipe as closed.
492 	 * 	Wake up anybody blocked at the other end and for named pipes,
493 	 *	Close down this end of the stream
494 	 *	Allow other opens/closes to continue
495 	 * 	force an unmount of other end.
496 	 * Otherwise if this is last close,
497 	 *	flush messages,
498 	 *	close down the stream
499 	 *	allow other opens/closes to continue
500 	 */
501 	fnp->fn_flag &= ~FIFOISOPEN;
502 	if ((fnp->fn_flag & ISPIPE) && !(fnp->fn_flag & FIFOCLOSE)) {
503 		fnp->fn_flag |= FIFOCLOSE;
504 		fn_dest->fn_flag |= FIFOCLOSE;
505 		if (fnp->fn_flag & FIFOFAST)
506 			fifo_fastflush(fnp);
507 		if (vp->v_stream != NULL) {
508 			mutex_exit(&fn_lock->flk_lock);
509 			(void) strclose(vp, flag, crp);
510 			mutex_enter(&fn_lock->flk_lock);
511 		}
512 		cv_broadcast(&fn_dest->fn_wait_cv);
513 		/*
514 		 * allow opens and closes to proceed
515 		 * Since this end is now closed down, any attempt
516 		 * to do anything with this end will fail
517 		 */
518 		fn_lock->flk_ocsync = 0;
519 		cv_broadcast(&fn_lock->flk_wait_cv);
520 		fn_dest_vp = FTOV(fn_dest);
521 		/*
522 		 * if other end of pipe has been opened and it's
523 		 * a named pipe, unmount it
524 		 */
525 		if (fn_dest_vp->v_stream &&
526 		    (fn_dest_vp->v_stream->sd_flag & STRMOUNT)) {
527 			/*
528 			 * We must hold the destination vnode because
529 			 * nm_unmountall() causes close to be called
530 			 * for the other end of named pipe.  This
531 			 * could free the vnode before we are ready.
532 			 */
533 			VN_HOLD(fn_dest_vp);
534 			mutex_exit(&fn_lock->flk_lock);
535 			error = nm_unmountall(fn_dest_vp, crp);
536 			ASSERT(error == 0);
537 			VN_RELE(fn_dest_vp);
538 		} else {
539 			ASSERT(vp->v_count >= 1);
540 			mutex_exit(&fn_lock->flk_lock);
541 		}
542 	} else {
543 		if (fnp->fn_flag & FIFOFAST)
544 			fifo_fastflush(fnp);
545 #if DEBUG
546 		fn_dest_vp = FTOV(fn_dest);
547 		if (fn_dest_vp->v_stream)
548 		    ASSERT((fn_dest_vp->v_stream->sd_flag & STRMOUNT) == 0);
549 #endif
550 		if (vp->v_stream != NULL) {
551 			mutex_exit(&fn_lock->flk_lock);
552 			(void) strclose(vp, flag, crp);
553 			mutex_enter(&fn_lock->flk_lock);
554 		}
555 		fn_lock->flk_ocsync = 0;
556 		cv_broadcast(&fn_lock->flk_wait_cv);
557 		cv_broadcast(&fn_dest->fn_wait_cv);
558 		mutex_exit(&fn_lock->flk_lock);
559 	}
560 	return (error);
561 }
562 
563 /*
564  * Read from a pipe or FIFO.
565  * return 0 if....
566  *    (1) user read request is 0 or no stream
567  *    (2) broken pipe with no data
568  *    (3) write-only FIFO with no data
569  *    (4) no data and FNDELAY flag is set.
570  * Otherwise return
571  *	EAGAIN if FNONBLOCK is set and no data to read
572  *	EINTR if signal recieved while waiting for data
573  *
574  * While there is no data to read....
575  *   -  if the NDELAY/NONBLOCK flag is set, return 0/EAGAIN.
576  *   -  wait for a write.
577  *
578  */
579 /*ARGSUSED*/
580 
581 static int
582 fifo_read(struct vnode *vp, struct uio *uiop, int ioflag, struct cred *crp,
583 	caller_context_t *ct)
584 {
585 	fifonode_t	*fnp		= VTOF(vp);
586 	fifonode_t	*fn_dest;
587 	fifolock_t	*fn_lock	= fnp->fn_lock;
588 	int		error		= 0;
589 	mblk_t		*bp;
590 
591 	ASSERT(vp->v_stream != NULL);
592 	if (uiop->uio_resid == 0)
593 		return (0);
594 
595 	mutex_enter(&fn_lock->flk_lock);
596 
597 	TRACE_2(TR_FAC_FIFO,
598 		TR_FIFOREAD_IN, "fifo_read in:%p fnp %p", vp, fnp);
599 
600 	if (! (fnp->fn_flag & FIFOFAST))
601 		goto stream_mode;
602 
603 	fn_dest	= fnp->fn_dest;
604 	/*
605 	 * Check for data on our input queue
606 	 */
607 
608 	while (fnp->fn_count == 0) {
609 		/*
610 		 * No data on first attempt and no writer, then EOF
611 		 */
612 		if (fn_dest->fn_wcnt == 0 || fn_dest->fn_rcnt == 0) {
613 			mutex_exit(&fn_lock->flk_lock);
614 			return (0);
615 		}
616 		/*
617 		 * no data found.. if non-blocking, return EAGAIN
618 		 * otherwise 0.
619 		 */
620 		if (uiop->uio_fmode & (FNDELAY|FNONBLOCK)) {
621 			mutex_exit(&fn_lock->flk_lock);
622 			if (uiop->uio_fmode & FNONBLOCK)
623 				return (EAGAIN);
624 			return (0);
625 		}
626 
627 		/*
628 		 * Note: FIFOs can get here with FIFOCLOSE set if
629 		 * write side is in the middle of opeining after
630 		 * it once closed. Pipes better not have FIFOCLOSE set
631 		 */
632 		ASSERT((fnp->fn_flag & (ISPIPE|FIFOCLOSE)) !=
633 		    (ISPIPE|FIFOCLOSE));
634 		/*
635 		 * wait for data
636 		 */
637 		fnp->fn_flag |= FIFOWANTR;
638 
639 		TRACE_1(TR_FAC_FIFO, TR_FIFOREAD_WAIT,
640 			"fiforead wait: %p", vp);
641 
642 		if (!cv_wait_sig_swap(&fnp->fn_wait_cv,
643 		    &fn_lock->flk_lock)) {
644 			error = EINTR;
645 			goto done;
646 		}
647 
648 		TRACE_1(TR_FAC_FIFO, TR_FIFOREAD_WAKE,
649 			"fiforead awake: %p", vp);
650 
651 		/*
652 		 * check to make sure we are still in fast mode
653 		 */
654 		if (!(fnp->fn_flag & FIFOFAST))
655 			goto stream_mode;
656 	}
657 
658 	ASSERT(fnp->fn_mp != NULL);
659 
660 	/* For pipes copy should not bypass cache */
661 	uiop->uio_extflg |= UIO_COPY_CACHED;
662 
663 	do {
664 		int bpsize = MBLKL(fnp->fn_mp);
665 		int uiosize = MIN(bpsize, uiop->uio_resid);
666 
667 		error = uiomove(fnp->fn_mp->b_rptr, uiosize, UIO_READ, uiop);
668 		if (error != 0)
669 			break;
670 
671 		fnp->fn_count -= uiosize;
672 
673 		if (bpsize <= uiosize) {
674 			bp = fnp->fn_mp;
675 			fnp->fn_mp = fnp->fn_mp->b_cont;
676 			freeb(bp);
677 
678 			if (uiop->uio_resid == 0)
679 				break;
680 
681 			while (fnp->fn_mp == NULL && fn_dest->fn_wwaitcnt > 0) {
682 				ASSERT(fnp->fn_count == 0);
683 
684 				if (uiop->uio_fmode & (FNDELAY|FNONBLOCK))
685 					goto trywake;
686 
687 				/*
688 				 * We've consumed all available data but there
689 				 * are threads waiting to write more, let them
690 				 * proceed before bailing.
691 				 */
692 
693 				fnp->fn_flag |= FIFOWANTR;
694 				fifo_wakewriter(fn_dest, fn_lock);
695 
696 				if (!cv_wait_sig(&fnp->fn_wait_cv,
697 				    &fn_lock->flk_lock))
698 					goto trywake;
699 
700 				if (!(fnp->fn_flag & FIFOFAST))
701 					goto stream_mode;
702 			}
703 		} else {
704 			fnp->fn_mp->b_rptr += uiosize;
705 			ASSERT(uiop->uio_resid == 0);
706 		}
707 	} while (uiop->uio_resid != 0 && fnp->fn_mp != NULL);
708 
709 trywake:
710 	ASSERT(msgdsize(fnp->fn_mp) == fnp->fn_count);
711 
712 	/*
713 	 * wake up any blocked writers, processes
714 	 * sleeping on POLLWRNORM, or processes waiting for SIGPOLL
715 	 * Note: checking for fn_count < Fifohiwat emulates
716 	 * STREAMS functionality when low water mark is 0
717 	 */
718 	if (fn_dest->fn_flag & (FIFOWANTW | FIFOHIWATW) &&
719 	    fnp->fn_count < Fifohiwat) {
720 		fifo_wakewriter(fn_dest, fn_lock);
721 	}
722 	goto done;
723 
724 	/*
725 	 * FIFO is in streams mode.. let the stream head handle it
726 	 */
727 stream_mode:
728 
729 	mutex_exit(&fn_lock->flk_lock);
730 	TRACE_1(TR_FAC_FIFO,
731 		TR_FIFOREAD_STREAM, "fifo_read stream_mode:%p", vp);
732 
733 	error = strread(vp, uiop, crp);
734 
735 	mutex_enter(&fn_lock->flk_lock);
736 
737 done:
738 	/*
739 	 * vnode update access time
740 	 */
741 	if (error == 0) {
742 		time_t now = gethrestime_sec();
743 
744 		if (fnp->fn_flag & ISPIPE)
745 			fnp->fn_dest->fn_atime = now;
746 		fnp->fn_atime = now;
747 	}
748 	TRACE_2(TR_FAC_FIFO,
749 		TR_FIFOREAD_OUT, "fifo_read out:%p error %d",
750 		vp, error);
751 	mutex_exit(&fn_lock->flk_lock);
752 	return (error);
753 }
754 
755 /*
756  * send SIGPIPE and return EPIPE if ...
757  *   (1) broken pipe (essentially, reader is gone)
758  *   (2) FIFO is not open for reading
759  * return 0 if...
760  *   (1) no stream
761  *   (2) user write request is for 0 bytes and SW_SNDZERO is not set
762  *	Note: SW_SNDZERO can't be set in fast mode
763  * While the stream is flow controlled....
764  *   -  if the NDELAY/NONBLOCK flag is set, return 0/EAGAIN.
765  *   -  unlock the fifonode and sleep waiting for a reader.
766  *   -  if a pipe and it has a mate, sleep waiting for its mate
767  *	to read.
768  */
769 /*ARGSUSED*/
770 static int
771 fifo_write(vnode_t *vp, uio_t *uiop, int ioflag, cred_t *crp,
772 	caller_context_t *ct)
773 {
774 	struct fifonode	*fnp, *fn_dest;
775 	fifolock_t	*fn_lock;
776 	struct stdata	*stp;
777 	int		error	= 0;
778 	int		write_size;
779 	int		size;
780 	int		fmode;
781 	mblk_t		*bp;
782 	boolean_t	hotread;
783 
784 	ASSERT(vp->v_stream);
785 	uiop->uio_loffset = 0;
786 	stp	= vp->v_stream;
787 
788 	/*
789 	 * remember original number of bytes requested. Used to determine if
790 	 * we actually have written anything at all
791 	 */
792 	write_size = uiop->uio_resid;
793 
794 	/*
795 	 * only send zero-length messages if SW_SNDZERO is set
796 	 * Note: we will be in streams mode if SW_SNDZERO is set
797 	 * XXX this streams interface should not be exposed
798 	 */
799 	if ((write_size == 0) && !(stp->sd_wput_opt & SW_SNDZERO))
800 		return (0);
801 
802 	fnp = VTOF(vp);
803 	fn_lock = fnp->fn_lock;
804 	fn_dest = fnp->fn_dest;
805 
806 	mutex_enter(&fn_lock->flk_lock);
807 
808 	TRACE_3(TR_FAC_FIFO,
809 		TR_FIFOWRITE_IN, "fifo_write in:%p fnp %p size %d",
810 		vp, fnp, write_size);
811 
812 	/*
813 	 * oops, no readers, error
814 	 */
815 	if (fn_dest->fn_rcnt == 0 || fn_dest->fn_wcnt == 0) {
816 		goto epipe;
817 	}
818 
819 	/*
820 	 * if we are not in fast mode, let streams handle it
821 	 */
822 	if (!(fnp->fn_flag & FIFOFAST))
823 		goto stream_mode;
824 
825 	fmode = uiop->uio_fmode & (FNDELAY|FNONBLOCK);
826 
827 	/* For pipes copy should not bypass cache */
828 	uiop->uio_extflg |= UIO_COPY_CACHED;
829 
830 	do  {
831 		/*
832 		 * check to make sure we are not over high water mark
833 		 */
834 		while (fn_dest->fn_count >= Fifohiwat) {
835 			/*
836 			 * Indicate that we have gone over high
837 			 * water mark
838 			 */
839 			/*
840 			 * if non-blocking, return
841 			 * only happens first time through loop
842 			 */
843 			if (fmode) {
844 				fnp->fn_flag |= FIFOHIWATW;
845 				if (uiop->uio_resid == write_size) {
846 					mutex_exit(&fn_lock->flk_lock);
847 					if (fmode & FNDELAY)
848 						return (0);
849 					else
850 						return (EAGAIN);
851 				}
852 				goto done;
853 			}
854 
855 			/*
856 			 * wait for things to drain
857 			 */
858 			fnp->fn_flag |= FIFOWANTW;
859 			fnp->fn_wwaitcnt++;
860 			TRACE_1(TR_FAC_FIFO, TR_FIFOWRITE_WAIT,
861 				"fifo_write wait: %p", vp);
862 			if (!cv_wait_sig_swap(&fnp->fn_wait_cv,
863 			    &fn_lock->flk_lock)) {
864 				error = EINTR;
865 				fnp->fn_wwaitcnt--;
866 				fifo_wakereader(fn_dest, fn_lock);
867 				goto done;
868 			}
869 			fnp->fn_wwaitcnt--;
870 
871 			TRACE_1(TR_FAC_FIFO, TR_FIFOWRITE_WAKE,
872 				"fifo_write wake: %p", vp);
873 
874 			/*
875 			 * check to make sure we're still in fast mode
876 			 */
877 			if (!(fnp->fn_flag & FIFOFAST))
878 				goto stream_mode;
879 
880 			/*
881 			 * make sure readers didn't go away
882 			 */
883 			if (fn_dest->fn_rcnt == 0 || fn_dest->fn_wcnt == 0) {
884 				goto epipe;
885 			}
886 		}
887 		/*
888 		 * If the write will put us over the high water mark,
889 		 * then we must break the message up into PIPE_BUF
890 		 * chunks to stay compliant with STREAMS
891 		 */
892 		if (uiop->uio_resid + fn_dest->fn_count > Fifohiwat)
893 			size = MIN(uiop->uio_resid, PIPE_BUF);
894 		else
895 			size = uiop->uio_resid;
896 
897 		/*
898 		 * We don't need to hold flk_lock across the allocb() and
899 		 * uiomove().  However, on a multiprocessor machine where both
900 		 * the reader and writer thread are on cpu's, we must be
901 		 * careful to only drop the lock if there's data to be read.
902 		 * This forces threads entering fifo_read() to spin or block
903 		 * on flk_lock, rather than acquiring flk_lock only to
904 		 * discover there's no data to read and being forced to go
905 		 * back to sleep, only to be woken up microseconds later by
906 		 * this writer thread.
907 		 */
908 		hotread = fn_dest->fn_count > 0;
909 		if (hotread)
910 			mutex_exit(&fn_lock->flk_lock);
911 
912 		ASSERT(size != 0);
913 		/*
914 		 * Align the mblk with the user data so that
915 		 * copying in the data can take advantage of
916 		 * the double word alignment
917 		 */
918 		if ((bp = allocb(size + 8, BPRI_MED)) == NULL) {
919 			if (!hotread)
920 				mutex_exit(&fn_lock->flk_lock);
921 
922 			error = strwaitbuf(size, BPRI_MED);
923 
924 			mutex_enter(&fn_lock->flk_lock);
925 			if (error != 0) {
926 				goto done;
927 			}
928 			/*
929 			 * check to make sure we're still in fast mode
930 			 */
931 			if (!(fnp->fn_flag & FIFOFAST))
932 				goto stream_mode;
933 
934 			/*
935 			 * make sure readers didn't go away
936 			 */
937 			if (fn_dest->fn_rcnt == 0 || fn_dest->fn_wcnt == 0) {
938 				goto epipe;
939 			}
940 			/*
941 			 * some other thread could have gotten in
942 			 * need to go back and check hi water mark
943 			 */
944 			continue;
945 		}
946 		bp->b_rptr += ((uintptr_t)uiop->uio_iov->iov_base & 0x7);
947 		bp->b_wptr = bp->b_rptr + size;
948 		error = uiomove((caddr_t)bp->b_rptr, size, UIO_WRITE, uiop);
949 		if (hotread)
950 			mutex_enter(&fn_lock->flk_lock);
951 		if (error != 0) {
952 			freeb(bp);
953 			goto done;
954 		}
955 
956 		fn_dest->fn_count += size;
957 		if (fn_dest->fn_mp != NULL) {
958 			fn_dest->fn_tail->b_cont = bp;
959 			fn_dest->fn_tail = bp;
960 		} else {
961 			fn_dest->fn_mp = fn_dest->fn_tail = bp;
962 			/*
963 			 * This is the first bit of data; wake up any sleeping
964 			 * readers, processes blocked in poll, and those
965 			 * expecting a SIGPOLL.
966 			 */
967 			fifo_wakereader(fn_dest, fn_lock);
968 		}
969 	} while (uiop->uio_resid != 0);
970 
971 	goto done;
972 
973 stream_mode:
974 	/*
975 	 * streams mode
976 	 *  let the stream head handle the write
977 	 */
978 	ASSERT(MUTEX_HELD(&fn_lock->flk_lock));
979 
980 	mutex_exit(&fn_lock->flk_lock);
981 	TRACE_1(TR_FAC_FIFO,
982 		TR_FIFOWRITE_STREAM, "fifo_write stream_mode:%p", vp);
983 
984 	error = strwrite(vp, uiop, crp);
985 
986 	mutex_enter(&fn_lock->flk_lock);
987 
988 done:
989 	/*
990 	 * update vnode modification and change times
991 	 * make sure there were no errors and some data was transfered
992 	 */
993 	if (error == 0 && write_size != uiop->uio_resid) {
994 		time_t now = gethrestime_sec();
995 
996 		if (fnp->fn_flag & ISPIPE) {
997 			fn_dest->fn_mtime = fn_dest->fn_ctime = now;
998 		}
999 		fnp->fn_mtime = fnp->fn_ctime = now;
1000 	} else if (fn_dest->fn_rcnt == 0 || fn_dest->fn_wcnt == 0) {
1001 		goto epipe;
1002 	}
1003 	TRACE_3(TR_FAC_FIFO, TR_FIFOWRITE_OUT,
1004 		"fifo_write out: vp %p error %d fnp %p", vp, error, fnp);
1005 	mutex_exit(&fn_lock->flk_lock);
1006 	return (error);
1007 epipe:
1008 	error = EPIPE;
1009 	TRACE_3(TR_FAC_FIFO, TR_FIFOWRITE_OUT,
1010 		"fifo_write out: vp %p error %d fnp %p",
1011 		vp, error, fnp);
1012 	mutex_exit(&fn_lock->flk_lock);
1013 	tsignal(curthread, SIGPIPE);
1014 	return (error);
1015 }
1016 
1017 static int
1018 fifo_ioctl(vnode_t *vp, int cmd, intptr_t arg, int mode,
1019 	cred_t *cr, int *rvalp)
1020 {
1021 	/*
1022 	 * Just a quick check
1023 	 * Once we go to streams mode we don't ever revert back
1024 	 * So we do this quick check so as not to incur the overhead
1025 	 * associated with acquiring the lock
1026 	 */
1027 	return ((VTOF(vp)->fn_flag & FIFOFAST) ?
1028 		fifo_fastioctl(vp, cmd, arg, mode, cr, rvalp) :
1029 		fifo_strioctl(vp, cmd, arg, mode, cr, rvalp));
1030 }
1031 
1032 static int
1033 fifo_fastioctl(vnode_t *vp, int cmd, intptr_t arg, int mode,
1034 	cred_t *cr, int *rvalp)
1035 {
1036 	fifonode_t	*fnp		= VTOF(vp);
1037 	fifonode_t	*fn_dest;
1038 	int		error		= 0;
1039 	fifolock_t	*fn_lock	= fnp->fn_lock;
1040 	int		cnt;
1041 
1042 	/*
1043 	 * tty operations not allowed
1044 	 */
1045 	if (((cmd & IOCTYPE) == LDIOC) ||
1046 	    ((cmd & IOCTYPE) == tIOC) ||
1047 	    ((cmd & IOCTYPE) == TIOC)) {
1048 		return (EINVAL);
1049 	}
1050 
1051 	mutex_enter(&fn_lock->flk_lock);
1052 
1053 	if (!(fnp->fn_flag & FIFOFAST)) {
1054 		goto stream_mode;
1055 	}
1056 
1057 	switch (cmd) {
1058 
1059 	/*
1060 	 * Things we can't handle
1061 	 * These will switch us to streams mode.
1062 	 */
1063 	default:
1064 	case I_STR:
1065 	case I_SRDOPT:
1066 	case I_PUSH:
1067 	case I_FDINSERT:
1068 	case I_SENDFD:
1069 	case I_RECVFD:
1070 	case I_E_RECVFD:
1071 	case I_ATMARK:
1072 	case I_CKBAND:
1073 	case I_GETBAND:
1074 	case I_SWROPT:
1075 		goto turn_fastoff;
1076 
1077 	/*
1078 	 * Things that don't do damage
1079 	 * These things don't adjust the state of the
1080 	 * stream head (i_setcltime does, but we don't care)
1081 	 */
1082 	case I_FIND:
1083 	case I_GETSIG:
1084 	case FIONBIO:
1085 	case FIOASYNC:
1086 	case I_GRDOPT:	/* probably should not get this, but no harm */
1087 	case I_GWROPT:
1088 	case I_LIST:
1089 	case I_SETCLTIME:
1090 	case I_GETCLTIME:
1091 		mutex_exit(&fn_lock->flk_lock);
1092 		return (strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp));
1093 
1094 	case I_CANPUT:
1095 		/*
1096 		 * We can only handle normal band canputs.
1097 		 * XXX : We could just always go to stream mode; after all
1098 		 * canput is a streams semantics type thing
1099 		 */
1100 		if (arg != 0) {
1101 			goto turn_fastoff;
1102 		}
1103 		*rvalp = (fnp->fn_dest->fn_count < Fifohiwat) ? 1 : 0;
1104 		mutex_exit(&fn_lock->flk_lock);
1105 		return (0);
1106 
1107 	case I_NREAD:
1108 		/*
1109 		 * This may seem a bit silly for non-streams semantics,
1110 		 * (After all, if they really want a message, they'll
1111 		 * probably use getmsg() anyway). but it doesn't hurt
1112 		 */
1113 		error = copyout((caddr_t)&fnp->fn_count, (caddr_t)arg,
1114 			sizeof (cnt));
1115 		if (error == 0) {
1116 			*rvalp = (fnp->fn_count == 0) ? 0 : 1;
1117 		}
1118 		break;
1119 
1120 	case FIORDCHK:
1121 		*rvalp = fnp->fn_count;
1122 		break;
1123 
1124 	case I_PEEK:
1125 	    {
1126 		STRUCT_DECL(strpeek, strpeek);
1127 		struct uio	uio;
1128 		struct iovec	iov;
1129 		int		count;
1130 		mblk_t		*bp;
1131 
1132 		STRUCT_INIT(strpeek, mode);
1133 
1134 		if (fnp->fn_count == 0) {
1135 			*rvalp = 0;
1136 			break;
1137 		}
1138 
1139 		error = copyin((caddr_t)arg, STRUCT_BUF(strpeek),
1140 		    STRUCT_SIZE(strpeek));
1141 		if (error)
1142 			break;
1143 
1144 		/*
1145 		 * can't have any high priority message when in fast mode
1146 		 */
1147 		if (STRUCT_FGET(strpeek, flags) & RS_HIPRI) {
1148 			*rvalp = 0;
1149 			break;
1150 		}
1151 
1152 		iov.iov_base = STRUCT_FGETP(strpeek, databuf.buf);
1153 		iov.iov_len = STRUCT_FGET(strpeek, databuf.maxlen);
1154 		uio.uio_iov = &iov;
1155 		uio.uio_iovcnt = 1;
1156 		uio.uio_loffset = 0;
1157 		uio.uio_segflg = UIO_USERSPACE;
1158 		uio.uio_fmode = 0;
1159 		/* For pipes copy should not bypass cache */
1160 		uio.uio_extflg = UIO_COPY_CACHED;
1161 		uio.uio_resid = iov.iov_len;
1162 		count = fnp->fn_count;
1163 		bp = fnp->fn_mp;
1164 		while (count > 0 && uio.uio_resid) {
1165 			cnt = MIN(uio.uio_resid, bp->b_wptr - bp->b_rptr);
1166 			if ((error = uiomove((char *)bp->b_rptr, cnt,
1167 			    UIO_READ, &uio)) != 0) {
1168 				break;
1169 			}
1170 			count -= cnt;
1171 			bp = bp->b_cont;
1172 		}
1173 		STRUCT_FSET(strpeek, databuf.len,
1174 		    STRUCT_FGET(strpeek, databuf.maxlen) - uio.uio_resid);
1175 		STRUCT_FSET(strpeek, flags, 0);
1176 		STRUCT_FSET(strpeek, ctlbuf.len,
1177 		    STRUCT_FGET(strpeek, ctlbuf.maxlen));
1178 
1179 		error = copyout(STRUCT_BUF(strpeek), (caddr_t)arg,
1180 		    STRUCT_SIZE(strpeek));
1181 		if (error == 0)
1182 			*rvalp = 1;
1183 		break;
1184 	    }
1185 
1186 	case FIONREAD:
1187 		/*
1188 		 * let user know total number of bytes in message queue
1189 		 */
1190 		error = copyout((caddr_t)&fnp->fn_count, (caddr_t)arg,
1191 			sizeof (fnp->fn_count));
1192 		if (error == 0)
1193 			*rvalp = 0;
1194 		break;
1195 
1196 	case I_SETSIG:
1197 		/*
1198 		 * let streams set up the signal masking for us
1199 		 * we just check to see if it's set
1200 		 * XXX : this interface should not be visible
1201 		 *  i.e. STREAM's framework is exposed.
1202 		 */
1203 		error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp);
1204 		if (vp->v_stream->sd_sigflags & (S_INPUT|S_RDNORM|S_WRNORM))
1205 			fnp->fn_flag |= FIFOSETSIG;
1206 		else
1207 			fnp->fn_flag &= ~FIFOSETSIG;
1208 		break;
1209 
1210 	case I_FLUSH:
1211 		/*
1212 		 * flush them message queues
1213 		 */
1214 		if (arg & ~FLUSHRW) {
1215 			error = EINVAL;
1216 			break;
1217 		}
1218 		if (arg & FLUSHR) {
1219 			fifo_fastflush(fnp);
1220 		}
1221 		fn_dest = fnp->fn_dest;
1222 		if ((arg & FLUSHW)) {
1223 			fifo_fastflush(fn_dest);
1224 		}
1225 		/*
1226 		 * wake up any sleeping readers or writers
1227 		 * (waking readers probably doesn't make sense, but it
1228 		 *  doesn't hurt; i.e. we just got rid of all the data
1229 		 *  what's to read ?)
1230 		 */
1231 		if (fn_dest->fn_flag & (FIFOWANTW | FIFOWANTR)) {
1232 			fn_dest->fn_flag &= ~(FIFOWANTW | FIFOWANTR);
1233 			cv_broadcast(&fn_dest->fn_wait_cv);
1234 		}
1235 		*rvalp = 0;
1236 		break;
1237 
1238 	/*
1239 	 * Since no band data can ever get on a fifo in fast mode
1240 	 * just return 0.
1241 	 */
1242 	case I_FLUSHBAND:
1243 		error = 0;
1244 		*rvalp = 0;
1245 		break;
1246 
1247 	/*
1248 	 * invalid calls for stream head or fifos
1249 	 */
1250 
1251 	case I_POP:		/* shouldn't happen */
1252 	case I_LOOK:
1253 	case I_LINK:
1254 	case I_PLINK:
1255 	case I_UNLINK:
1256 	case I_PUNLINK:
1257 
1258 	/*
1259 	 * more invalid tty type of ioctls
1260 	 */
1261 
1262 	case SRIOCSREDIR:
1263 	case SRIOCISREDIR:
1264 		error = EINVAL;
1265 		break;
1266 
1267 	}
1268 	mutex_exit(&fn_lock->flk_lock);
1269 	return (error);
1270 
1271 turn_fastoff:
1272 	fifo_fastoff(fnp);
1273 
1274 stream_mode:
1275 	/*
1276 	 * streams mode
1277 	 */
1278 	mutex_exit(&fn_lock->flk_lock);
1279 	return (fifo_strioctl(vp, cmd, arg, mode, cr, rvalp));
1280 
1281 }
1282 
1283 /*
1284  * FIFO is in STREAMS mode; STREAMS framework does most of the work.
1285  */
1286 static int
1287 fifo_strioctl(vnode_t *vp, int cmd, intptr_t arg, int mode,
1288 	cred_t *cr, int *rvalp)
1289 {
1290 	fifonode_t	*fnp = VTOF(vp);
1291 	int		error;
1292 	fifolock_t	*fn_lock;
1293 
1294 	if (cmd == _I_GETPEERCRED) {
1295 		if (mode == FKIOCTL && fnp->fn_pcredp != NULL) {
1296 			k_peercred_t *kp = (k_peercred_t *)arg;
1297 			crhold(fnp->fn_pcredp);
1298 			kp->pc_cr = fnp->fn_pcredp;
1299 			kp->pc_cpid = fnp->fn_cpid;
1300 			return (0);
1301 		} else {
1302 			return (ENOTSUP);
1303 		}
1304 	}
1305 
1306 	error = strioctl(vp, cmd, arg, mode, U_TO_K, cr, rvalp);
1307 
1308 	switch (cmd) {
1309 	/*
1310 	 * The FIFOSEND flag is set to inform other processes that a file
1311 	 * descriptor is pending at the stream head of this pipe.
1312 	 * The flag is cleared and the sending process is awoken when
1313 	 * this process has completed recieving the file descriptor.
1314 	 * XXX This could become out of sync if the process does I_SENDFDs
1315 	 * and opens on connld attached to the same pipe.
1316 	 */
1317 	case I_RECVFD:
1318 	case I_E_RECVFD:
1319 		if (error == 0) {
1320 			fn_lock = fnp->fn_lock;
1321 			mutex_enter(&fn_lock->flk_lock);
1322 			if (fnp->fn_flag & FIFOSEND) {
1323 				fnp->fn_flag &= ~FIFOSEND;
1324 				cv_broadcast(&fnp->fn_dest->fn_wait_cv);
1325 			}
1326 			mutex_exit(&fn_lock->flk_lock);
1327 		}
1328 		break;
1329 	default:
1330 		break;
1331 	}
1332 
1333 	return (error);
1334 }
1335 
1336 /*
1337  * If shadowing a vnode (FIFOs), apply the VOP_GETATTR to the shadowed
1338  * vnode to Obtain the node information. If not shadowing (pipes), obtain
1339  * the node information from the credentials structure.
1340  */
1341 int
1342 fifo_getattr(vnode_t *vp, vattr_t *vap, int flags, cred_t *crp)
1343 {
1344 	int		error		= 0;
1345 	fifonode_t	*fnp		= VTOF(vp);
1346 	queue_t		*qp;
1347 	qband_t		*bandp;
1348 	fifolock_t	*fn_lock	= fnp->fn_lock;
1349 
1350 	if (fnp->fn_realvp) {
1351 		/*
1352 		 * for FIFOs or mounted pipes
1353 		 */
1354 		if (error = VOP_GETATTR(fnp->fn_realvp, vap, flags, crp))
1355 			return (error);
1356 		mutex_enter(&fn_lock->flk_lock);
1357 		/* set current times from fnode, even if older than vnode */
1358 		vap->va_atime.tv_sec = fnp->fn_atime;
1359 		vap->va_atime.tv_nsec = 0;
1360 		vap->va_mtime.tv_sec = fnp->fn_mtime;
1361 		vap->va_mtime.tv_nsec = 0;
1362 		vap->va_ctime.tv_sec = fnp->fn_ctime;
1363 		vap->va_ctime.tv_nsec = 0;
1364 	} else {
1365 		/*
1366 		 * for non-attached/ordinary pipes
1367 		 */
1368 		vap->va_mode = 0;
1369 		mutex_enter(&fn_lock->flk_lock);
1370 		vap->va_atime.tv_sec = fnp->fn_atime;
1371 		vap->va_atime.tv_nsec = 0;
1372 		vap->va_mtime.tv_sec = fnp->fn_mtime;
1373 		vap->va_mtime.tv_nsec = 0;
1374 		vap->va_ctime.tv_sec = fnp->fn_ctime;
1375 		vap->va_ctime.tv_nsec = 0;
1376 		vap->va_uid = crgetuid(crp);
1377 		vap->va_gid = crgetgid(crp);
1378 		vap->va_nlink = 0;
1379 		vap->va_fsid = fifodev;
1380 		vap->va_nodeid = (ino64_t)fnp->fn_ino;
1381 		vap->va_rdev = 0;
1382 	}
1383 	vap->va_type = VFIFO;
1384 	vap->va_blksize = PIPE_BUF;
1385 	/*
1386 	 * Size is number of un-read bytes at the stream head and
1387 	 * nblocks is the unread bytes expressed in blocks.
1388 	 */
1389 	if (vp->v_stream && (fnp->fn_flag & FIFOISOPEN)) {
1390 		if ((fnp->fn_flag & FIFOFAST)) {
1391 			vap->va_size = (u_offset_t)fnp->fn_count;
1392 		} else {
1393 			qp = RD((strvp2wq(vp)));
1394 			vap->va_size = (u_offset_t)qp->q_count;
1395 			if (qp->q_nband != 0) {
1396 				mutex_enter(QLOCK(qp));
1397 				for (bandp = qp->q_bandp; bandp;
1398 				    bandp = bandp->qb_next)
1399 					vap->va_size += bandp->qb_count;
1400 				mutex_exit(QLOCK(qp));
1401 			}
1402 		}
1403 		vap->va_nblocks = (fsblkcnt64_t)btod(vap->va_size);
1404 	} else {
1405 		vap->va_size = (u_offset_t)0;
1406 		vap->va_nblocks = (fsblkcnt64_t)0;
1407 	}
1408 	mutex_exit(&fn_lock->flk_lock);
1409 	vap->va_seq = 0;
1410 	return (0);
1411 }
1412 
1413 /*
1414  * If shadowing a vnode, apply the VOP_SETATTR to it, and to the fnode.
1415  * Otherwise, set the time and return 0.
1416  */
1417 int
1418 fifo_setattr(
1419 	vnode_t			*vp,
1420 	vattr_t			*vap,
1421 	int			flags,
1422 	cred_t			*crp,
1423 	caller_context_t	*ctp)
1424 {
1425 	fifonode_t	*fnp	= VTOF(vp);
1426 	int		error	= 0;
1427 	fifolock_t	*fn_lock;
1428 
1429 	if (fnp->fn_realvp)
1430 		error = VOP_SETATTR(fnp->fn_realvp, vap, flags, crp, ctp);
1431 	if (error == 0) {
1432 		fn_lock = fnp->fn_lock;
1433 		mutex_enter(&fn_lock->flk_lock);
1434 		if (vap->va_mask & AT_ATIME)
1435 			fnp->fn_atime = vap->va_atime.tv_sec;
1436 		if (vap->va_mask & AT_MTIME)
1437 			fnp->fn_mtime = vap->va_mtime.tv_sec;
1438 		fnp->fn_ctime = gethrestime_sec();
1439 		mutex_exit(&fn_lock->flk_lock);
1440 	}
1441 	return (error);
1442 }
1443 
1444 /*
1445  * If shadowing a vnode, apply VOP_ACCESS to it.
1446  * Otherwise, return 0 (allow all access).
1447  */
1448 int
1449 fifo_access(vnode_t *vp, int mode, int flags, cred_t *crp)
1450 {
1451 
1452 	if (VTOF(vp)->fn_realvp)
1453 		return (VOP_ACCESS(VTOF(vp)->fn_realvp, mode, flags, crp));
1454 	else
1455 		return (0);
1456 }
1457 
1458 /*
1459  * If shadowing a vnode, apply the VOP_FSYNC to it.
1460  * Otherwise, return 0.
1461  */
1462 int
1463 fifo_fsync(vnode_t *vp, int syncflag, cred_t *crp)
1464 {
1465 	fifonode_t	*fnp	= VTOF(vp);
1466 	vattr_t		va;
1467 
1468 	if (fnp->fn_realvp == NULL)
1469 		return (0);
1470 
1471 	bzero((caddr_t)&va, sizeof (va));
1472 	va.va_mask = AT_MTIME | AT_ATIME;
1473 	if (VOP_GETATTR(fnp->fn_realvp, &va, 0, crp) == 0) {
1474 		va.va_mask = 0;
1475 		if (fnp->fn_mtime > va.va_mtime.tv_sec) {
1476 			va.va_mtime.tv_sec = fnp->fn_mtime;
1477 			va.va_mask = AT_MTIME;
1478 		}
1479 		if (fnp->fn_atime > va.va_atime.tv_sec) {
1480 			va.va_atime.tv_sec = fnp->fn_atime;
1481 			va.va_mask |= AT_ATIME;
1482 		}
1483 		if (va.va_mask != 0)
1484 			(void) VOP_SETATTR(fnp->fn_realvp, &va, 0, crp, NULL);
1485 	}
1486 	return (VOP_FSYNC(fnp->fn_realvp, syncflag, crp));
1487 }
1488 
1489 /*
1490  * Called when the upper level no longer holds references to the
1491  * vnode. Sync the file system and free the fifonode.
1492  */
1493 void
1494 fifo_inactive(vnode_t *vp, cred_t *crp)
1495 {
1496 	fifonode_t	*fnp;
1497 	fifolock_t	*fn_lock;
1498 
1499 	mutex_enter(&ftable_lock);
1500 	mutex_enter(&vp->v_lock);
1501 	ASSERT(vp->v_count >= 1);
1502 	if (--vp->v_count != 0) {
1503 		/*
1504 		 * Somebody accessed the fifo before we got a chance to
1505 		 * remove it.  They will remove it when they do a vn_rele.
1506 		 */
1507 		mutex_exit(&vp->v_lock);
1508 		mutex_exit(&ftable_lock);
1509 		return;
1510 	}
1511 	mutex_exit(&vp->v_lock);
1512 
1513 	fnp = VTOF(vp);
1514 
1515 	/*
1516 	 * remove fifo from fifo list so that no other process
1517 	 * can grab it.
1518 	 */
1519 	if (fnp->fn_realvp) {
1520 		(void) fiforemove(fnp);
1521 		mutex_exit(&ftable_lock);
1522 		(void) fifo_fsync(vp, FSYNC, crp);
1523 		VN_RELE(fnp->fn_realvp);
1524 		vp->v_vfsp = NULL;
1525 	} else
1526 		mutex_exit(&ftable_lock);
1527 
1528 	fn_lock = fnp->fn_lock;
1529 
1530 	mutex_enter(&fn_lock->flk_lock);
1531 	ASSERT(vp->v_stream == NULL);
1532 	ASSERT(vp->v_count == 0);
1533 	/*
1534 	 * if this is last reference to the lock, then we can
1535 	 * free everything up.
1536 	 */
1537 	if (--fn_lock->flk_ref == 0) {
1538 		mutex_exit(&fn_lock->flk_lock);
1539 		ASSERT(fnp->fn_open == 0);
1540 		ASSERT(fnp->fn_dest->fn_open == 0);
1541 		if (fnp->fn_mp) {
1542 			freemsg(fnp->fn_mp);
1543 			fnp->fn_mp = NULL;
1544 			fnp->fn_count = 0;
1545 		}
1546 		if (fnp->fn_pcredp != NULL) {
1547 			crfree(fnp->fn_pcredp);
1548 			fnp->fn_pcredp = NULL;
1549 		}
1550 		if (fnp->fn_flag & ISPIPE) {
1551 			fifonode_t *fn_dest = fnp->fn_dest;
1552 
1553 			vp = FTOV(fn_dest);
1554 			if (fn_dest->fn_mp) {
1555 				freemsg(fn_dest->fn_mp);
1556 				fn_dest->fn_mp = NULL;
1557 				fn_dest->fn_count = 0;
1558 			}
1559 			if (fn_dest->fn_pcredp != NULL) {
1560 				crfree(fn_dest->fn_pcredp);
1561 				fn_dest->fn_pcredp = NULL;
1562 			}
1563 			kmem_cache_free(pipe_cache, (fifodata_t *)fn_lock);
1564 		} else
1565 			kmem_cache_free(fnode_cache, (fifodata_t *)fn_lock);
1566 	} else {
1567 		mutex_exit(&fn_lock->flk_lock);
1568 	}
1569 }
1570 
1571 /*
1572  * If shadowing a vnode, apply the VOP_FID to it.
1573  * Otherwise, return EINVAL.
1574  */
1575 int
1576 fifo_fid(vnode_t *vp, fid_t *fidfnp)
1577 {
1578 	if (VTOF(vp)->fn_realvp)
1579 		return (VOP_FID(VTOF(vp)->fn_realvp, fidfnp));
1580 	else
1581 		return (EINVAL);
1582 }
1583 
1584 /*
1585  * Lock a fifonode.
1586  */
1587 /* ARGSUSED */
1588 int
1589 fifo_rwlock(vnode_t *vp, int write_lock, caller_context_t *ctp)
1590 {
1591 	return (-1);
1592 }
1593 
1594 /*
1595  * Unlock a fifonode.
1596  */
1597 /* ARGSUSED */
1598 void
1599 fifo_rwunlock(vnode_t *vp, int write_lock, caller_context_t *ctp)
1600 {
1601 }
1602 
1603 /*
1604  * Return error since seeks are not allowed on pipes.
1605  */
1606 /*ARGSUSED*/
1607 int
1608 fifo_seek(vnode_t *vp, offset_t ooff, offset_t *noffp)
1609 {
1610 	return (ESPIPE);
1611 }
1612 
1613 /*
1614  * If there is a realvp associated with vp, return it.
1615  */
1616 int
1617 fifo_realvp(vnode_t *vp, vnode_t **vpp)
1618 {
1619 	vnode_t *rvp;
1620 
1621 	if ((rvp = VTOF(vp)->fn_realvp) != NULL) {
1622 		vp = rvp;
1623 		if (VOP_REALVP(vp, &rvp) == 0)
1624 			vp = rvp;
1625 	}
1626 
1627 	*vpp = vp;
1628 	return (0);
1629 }
1630 
1631 /*
1632  * Poll for interesting events on a stream pipe
1633  */
1634 int
1635 fifo_poll(vnode_t *vp, short events, int anyyet, short *reventsp,
1636 	pollhead_t **phpp)
1637 {
1638 	fifonode_t	*fnp, *fn_dest;
1639 	fifolock_t	*fn_lock;
1640 	int		retevents;
1641 	struct stdata	*stp;
1642 
1643 	ASSERT(vp->v_stream != NULL);
1644 
1645 	stp = vp->v_stream;
1646 	retevents	= 0;
1647 	fnp		= VTOF(vp);
1648 	fn_dest		= fnp->fn_dest;
1649 	fn_lock		= fnp->fn_lock;
1650 
1651 	polllock(&stp->sd_pollist, &fn_lock->flk_lock);
1652 
1653 	/*
1654 	 * see if FIFO/pipe open
1655 	 */
1656 	if ((fnp->fn_flag & FIFOISOPEN) == 0) {
1657 		if (((events & (POLLIN | POLLRDNORM | POLLPRI | POLLRDBAND)) &&
1658 		    fnp->fn_rcnt == 0) ||
1659 		    ((events & (POLLWRNORM | POLLWRBAND)) &&
1660 		    fnp->fn_wcnt == 0)) {
1661 			mutex_exit(&fnp->fn_lock->flk_lock);
1662 			*reventsp = POLLERR;
1663 			return (0);
1664 		}
1665 	}
1666 
1667 	/*
1668 	 * if not in fast mode, let the stream head take care of it
1669 	 */
1670 	if (!(fnp->fn_flag & FIFOFAST)) {
1671 		mutex_exit(&fnp->fn_lock->flk_lock);
1672 		goto stream_mode;
1673 	}
1674 
1675 	/*
1676 	 * If this is a pipe.. check to see if the other
1677 	 * end is gone.  If we are a fifo, check to see
1678 	 * if write end is gone.
1679 	 */
1680 
1681 	if ((fnp->fn_flag & ISPIPE) && (fn_dest->fn_open == 0)) {
1682 		retevents = POLLHUP;
1683 	} else if ((fnp->fn_flag & (FIFOCLOSE | ISPIPE)) == FIFOCLOSE &&
1684 	    (fn_dest->fn_wcnt == 0)) {
1685 		/*
1686 		 * no writer at other end.
1687 		 * it was closed (versus yet to be opened)
1688 		 */
1689 			retevents = POLLHUP;
1690 	} else if (events & (POLLWRNORM | POLLWRBAND)) {
1691 		if (events & POLLWRNORM) {
1692 			if (fn_dest->fn_count < Fifohiwat)
1693 				retevents = POLLWRNORM;
1694 			else
1695 				fnp->fn_flag |= FIFOHIWATW;
1696 		}
1697 		/*
1698 		 * This is always true for fast pipes
1699 		 * (Note: will go to STREAMS mode if band data is written)
1700 		 */
1701 		if (events & POLLWRBAND)
1702 			retevents |= POLLWRBAND;
1703 	}
1704 	if (events & (POLLIN | POLLRDNORM)) {
1705 		if (fnp->fn_count)
1706 			retevents |= (events & (POLLIN | POLLRDNORM));
1707 	}
1708 
1709 	/*
1710 	 * if we happened to get something, return
1711 	 */
1712 
1713 	if ((*reventsp = (short)retevents) != 0) {
1714 		mutex_exit(&fnp->fn_lock->flk_lock);
1715 		return (0);
1716 	}
1717 
1718 	/*
1719 	 * If poll() has not found any events yet, set up event cell
1720 	 * to wake up the poll if a requested event occurs on this
1721 	 * pipe/fifo.
1722 	 */
1723 	if (!anyyet) {
1724 		if (events & POLLWRNORM)
1725 			fnp->fn_flag |= FIFOPOLLW;
1726 		if (events & (POLLIN | POLLRDNORM))
1727 			fnp->fn_flag |= FIFOPOLLR;
1728 		if (events & POLLRDBAND)
1729 			fnp->fn_flag |= FIFOPOLLRBAND;
1730 		/*
1731 		 * XXX Don't like exposing this from streams
1732 		 */
1733 		*phpp = &stp->sd_pollist;
1734 	}
1735 	mutex_exit(&fnp->fn_lock->flk_lock);
1736 	return (0);
1737 stream_mode:
1738 	return (strpoll(stp, events, anyyet, reventsp, phpp));
1739 }
1740 
1741 /*
1742  * POSIX pathconf() support.
1743  */
1744 /* ARGSUSED */
1745 int
1746 fifo_pathconf(vnode_t *vp, int cmd, ulong_t *valp, cred_t *cr)
1747 {
1748 	ulong_t val;
1749 	int error = 0;
1750 
1751 	switch (cmd) {
1752 
1753 	case _PC_LINK_MAX:
1754 		val = MAXLINK;
1755 		break;
1756 
1757 	case _PC_MAX_CANON:
1758 		val = MAX_CANON;
1759 		break;
1760 
1761 	case _PC_MAX_INPUT:
1762 		val = MAX_INPUT;
1763 		break;
1764 
1765 	case _PC_NAME_MAX:
1766 		error = EINVAL;
1767 		break;
1768 
1769 	case _PC_PATH_MAX:
1770 	case _PC_SYMLINK_MAX:
1771 		val = MAXPATHLEN;
1772 		break;
1773 
1774 	case _PC_PIPE_BUF:
1775 		val = PIPE_BUF;
1776 		break;
1777 
1778 	case _PC_NO_TRUNC:
1779 		if (vp->v_vfsp->vfs_flag & VFS_NOTRUNC)
1780 			val = 1;	/* NOTRUNC is enabled for vp */
1781 		else
1782 			val = (ulong_t)-1;
1783 		break;
1784 
1785 	case _PC_VDISABLE:
1786 		val = _POSIX_VDISABLE;
1787 		break;
1788 
1789 	case _PC_CHOWN_RESTRICTED:
1790 		if (rstchown)
1791 			val = rstchown;		/* chown restricted enabled */
1792 		else
1793 			val = (ulong_t)-1;
1794 		break;
1795 
1796 	case _PC_FILESIZEBITS:
1797 		val = (ulong_t)-1;
1798 		break;
1799 
1800 	default:
1801 		if (VTOF(vp)->fn_realvp)
1802 			error = VOP_PATHCONF(VTOF(vp)->fn_realvp, cmd,
1803 			    &val, cr);
1804 		else
1805 			error = EINVAL;
1806 		break;
1807 	}
1808 
1809 	if (error == 0)
1810 		*valp = val;
1811 	return (error);
1812 }
1813 
1814 /*
1815  * If shadowing a vnode, apply VOP_SETSECATTR to it.
1816  * Otherwise, return NOSYS.
1817  */
1818 int
1819 fifo_setsecattr(struct vnode *vp, vsecattr_t *vsap, int flag, struct cred *crp)
1820 {
1821 	int error;
1822 
1823 	/*
1824 	 * The acl(2) system call tries to grab the write lock on the
1825 	 * file when setting an ACL, but fifofs does not implement
1826 	 * VOP_RWLOCK or VOP_RWUNLOCK, so we do it here instead.
1827 	 */
1828 	if (VTOF(vp)->fn_realvp) {
1829 		(void) VOP_RWLOCK(VTOF(vp)->fn_realvp, V_WRITELOCK_TRUE, NULL);
1830 		error = VOP_SETSECATTR(VTOF(vp)->fn_realvp, vsap, flag, crp);
1831 		VOP_RWUNLOCK(VTOF(vp)->fn_realvp, V_WRITELOCK_TRUE, NULL);
1832 		return (error);
1833 	} else
1834 		return (fs_nosys());
1835 }
1836 
1837 /*
1838  * If shadowing a vnode, apply VOP_GETSECATTR to it. Otherwise, fabricate
1839  * an ACL from the permission bits that fifo_getattr() makes up.
1840  */
1841 int
1842 fifo_getsecattr(struct vnode *vp, vsecattr_t *vsap, int flag, struct cred *crp)
1843 {
1844 	if (VTOF(vp)->fn_realvp)
1845 		return (VOP_GETSECATTR(VTOF(vp)->fn_realvp, vsap, flag, crp));
1846 	else
1847 		return (fs_fab_acl(vp, vsap, flag, crp));
1848 }
1849