xref: /illumos-gate/usr/src/uts/common/fs/sockfs/sockfilter.c (revision b6805bf78d2bbbeeaea8909a05623587b42d58b3)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 #include <sys/systm.h>
26 #include <sys/sysmacros.h>
27 #include <sys/cmn_err.h>
28 #include <sys/disp.h>
29 #include <sys/list.h>
30 #include <sys/mutex.h>
31 #include <sys/note.h>
32 #include <sys/rwlock.h>
33 #include <sys/stropts.h>
34 #include <sys/taskq.h>
35 #include <sys/socketvar.h>
36 #include <fs/sockfs/sockcommon.h>
37 #include <fs/sockfs/sockfilter_impl.h>
38 
39 /*
40  * Socket Filter Framework
41  *
42  * Socket filter entry (sof_entry_t):
43  *
44  *   There exists one entry for each configured filter (done via soconfig(1M)),
45  *   and they are all in sof_entry_list. In addition to the global list, each
46  *   sockparams entry maintains a list of filters that is interested in that
47  *   particular socket type. So the filter entry may be referenced by multiple
48  *   sockparams. The set of sockparams referencing a filter may change as
49  *   socket types are added and/or removed from the system. Both sof_entry_list
50  *   and the sockparams list is protected by sockconf_lock.
51  *
52  *   Each filter entry has a ref count which is incremented whenever a filter
53  *   is attached to a socket. An entry is marked SOFEF_CONDEMED when it is
54  *   unconfigured, which will result in the entry being freed when its ref
55  *   count reaches zero.
56  *
57  * Socket filter module (sof_module_t):
58  *
59  *   Modules are created by sof_register() and placed in sof_module_list,
60  *   which is protected by sof_module_lock. Each module has a reference count
61  *   that is incremented when a filter entry is using the module. A module
62  *   can be destroyed by sof_unregister() only when its ref count is zero.
63  *
64  * Socket filter instance (sof_instance_t):
65  *
66  *   Whenever a filter is attached to a socket (sonode), a new instance is
67  *   created. The socket is guaranteed to be single threaded when filters are
68  *   being attached/detached. The instance uses the sonode's so_lock for
69  *   protection.
70  *
71  *   The lifetime of an instance is the same as the socket it's attached to.
72  *
73  * How things link together:
74  *
75  *      sockparams.sp_{auto,prog}_filters -> sp_filter_t -> sp_filter_t
76  *      ^                                    |              |
77  *      |                                    |              |
78  *   sonode.so_filter_top -> sof_instance_t  |              |
79  *                                     |     |              |
80  *                                     v     v              v
81  *    sof_entry_list -> sof_entry_t -> sof_entry -> ... -> sof_entry_t
82  *                                     |
83  *                                     v
84  *           sof_module_list -> sof_module_t -> ... -> sof_module_t
85  */
86 
87 static list_t 	sof_entry_list;		/* list of configured filters */
88 
89 static list_t	sof_module_list;	/* list of loaded filter modules */
90 static kmutex_t	sof_module_lock;	/* protect the module list */
91 
92 static sof_kstat_t	sof_stat;
93 static kstat_t 		*sof_stat_ksp;
94 
95 #ifdef DEBUG
96 static int socket_filter_debug = 0;
97 #endif
98 
99 /*
100  * A connection that has been deferred for more than `sof_defer_drop_time'
101  * ticks can be dropped to make room for new connections. A connection that
102  * is to be dropped is moved over to `sof_close_deferred_list' where it will
103  * be closed by sof_close_deferred() (which is running on a taskq). Connections
104  * will not be moved over to the close list if it grows larger than
105  * `sof_close_deferred_max_backlog'.
106  */
107 clock_t		sof_defer_drop_time = 3000;
108 uint_t		sof_close_deferred_max_backlog = 1000;
109 
110 taskq_t		*sof_close_deferred_taskq;
111 boolean_t	sof_close_deferred_running;
112 uint_t		sof_close_deferred_backlog;
113 list_t		sof_close_deferred_list;
114 kmutex_t	sof_close_deferred_lock;
115 
116 static void	sof_close_deferred(void *);
117 
118 static void		sof_module_rele(sof_module_t *);
119 static sof_module_t 	*sof_module_hold_by_name(const char *, const char *);
120 
121 static int		sof_entry_load_module(sof_entry_t *);
122 static void 		sof_entry_hold(sof_entry_t *);
123 static void 		sof_entry_rele(sof_entry_t *);
124 static int 		sof_entry_kstat_create(sof_entry_t *);
125 static void 		sof_entry_kstat_destroy(sof_entry_t *);
126 
127 static sof_instance_t 	*sof_instance_create(sof_entry_t *, struct sonode *);
128 static void		sof_instance_destroy(sof_instance_t *);
129 
130 static int
131 sof_kstat_update(kstat_t *ksp, int rw)
132 {
133 	_NOTE(ARGUNUSED(ksp));
134 
135 	if (rw == KSTAT_WRITE)
136 		return (EACCES);
137 
138 	sof_stat.sofks_defer_close_backlog.value.ui64 =
139 	    sof_close_deferred_backlog;
140 
141 	return (0);
142 }
143 
144 void
145 sof_init(void)
146 {
147 	list_create(&sof_entry_list, sizeof (sof_entry_t),
148 	    offsetof(sof_entry_t, sofe_node));
149 	list_create(&sof_module_list, sizeof (sof_module_t),
150 	    offsetof(sof_module_t, sofm_node));
151 	list_create(&sof_close_deferred_list, sizeof (struct sonode),
152 	    offsetof(struct sonode, so_acceptq_node));
153 
154 	sof_close_deferred_taskq = taskq_create("sof_close_deferred_taskq",
155 	    1, minclsyspri, 1, INT_MAX, TASKQ_PREPOPULATE);
156 	sof_close_deferred_running = B_FALSE;
157 	sof_close_deferred_backlog = 0;
158 
159 	mutex_init(&sof_close_deferred_lock, NULL, MUTEX_DEFAULT, 0);
160 	mutex_init(&sof_module_lock, NULL, MUTEX_DEFAULT, 0);
161 
162 	sof_stat_ksp = kstat_create("sockfs", 0, "sockfilter", "misc",
163 	    KSTAT_TYPE_NAMED, sizeof (sof_kstat_t) / sizeof (kstat_named_t),
164 	    KSTAT_FLAG_VIRTUAL);
165 
166 	if (sof_stat_ksp == NULL)
167 		return;
168 
169 	kstat_named_init(&sof_stat.sofks_defer_closed, "defer_closed",
170 	    KSTAT_DATA_UINT64);
171 	kstat_named_init(&sof_stat.sofks_defer_close_backlog,
172 	    "defer_close_backlog", KSTAT_DATA_UINT64);
173 	kstat_named_init(&sof_stat.sofks_defer_close_failed_backlog_too_big,
174 	    "defer_close_failed_backlog_too_big", KSTAT_DATA_UINT64);
175 
176 	sof_stat_ksp->ks_data = &sof_stat;
177 	sof_stat_ksp->ks_update = sof_kstat_update;
178 	kstat_install(sof_stat_ksp);
179 }
180 
181 /*
182  * Process filter options.
183  */
184 static int
185 sof_setsockopt_impl(struct sonode *so, int option_name,
186     const void *optval, socklen_t optlen, struct cred *cr)
187 {
188 	struct sockparams *sp = so->so_sockparams;
189 	sof_entry_t *ent = NULL;
190 	sp_filter_t *fil;
191 	sof_instance_t *inst;
192 	sof_rval_t rval;
193 	int error;
194 
195 	_NOTE(ARGUNUSED(optlen));
196 
197 	/*
198 	 * Is the filter in a state where filters can be attached?
199 	 */
200 	if (!(so->so_state & SS_FILOP_OK))
201 		return (EINVAL);
202 
203 	if (option_name == FIL_ATTACH) {
204 		/*
205 		 * Make sure there isn't already another instance of the
206 		 * same filter attached to the socket.
207 		 */
208 		for (inst = so->so_filter_top; inst != NULL;
209 		    inst = inst->sofi_next) {
210 			if (strncmp(inst->sofi_filter->sofe_name,
211 			    (const char *)optval, SOF_MAXNAMELEN) == 0)
212 				return (EEXIST);
213 		}
214 		/* Look up the filter. */
215 		rw_enter(&sockconf_lock, RW_READER);
216 		for (fil = list_head(&sp->sp_prog_filters); fil != NULL;
217 		    fil = list_next(&sp->sp_prog_filters, fil)) {
218 			ent = fil->spf_filter;
219 			ASSERT(ent->sofe_flags & SOFEF_PROG);
220 
221 			if (strncmp(ent->sofe_name, (const char *)optval,
222 			    SOF_MAXNAMELEN) == 0)
223 				break;
224 		}
225 		/* No such filter */
226 		if (fil == NULL) {
227 			rw_exit(&sockconf_lock);
228 			return (ENOENT);
229 		}
230 		inst = sof_instance_create(ent, so);
231 		rw_exit(&sockconf_lock);
232 
233 		/* Failed to create an instance; must be out of memory */
234 		if (inst == NULL)
235 			return (ENOMEM);
236 
237 		/*
238 		 * This might be the first time the filter is being used,
239 		 * so try to load the module if it's not already registered.
240 		 */
241 		if (ent->sofe_mod == NULL &&
242 		    (error = sof_entry_load_module(ent)) != 0) {
243 			sof_instance_destroy(inst);
244 			return (error);
245 		}
246 
247 		/* Module loaded OK, so there must be an ops vector */
248 		ASSERT(ent->sofe_mod != NULL);
249 		inst->sofi_ops = &ent->sofe_mod->sofm_ops;
250 
251 		SOF_STAT_ADD(inst, tot_active_attach, 1);
252 		if (inst->sofi_ops->sofop_attach_active != NULL) {
253 			rval = inst->sofi_ops->sofop_attach_active(
254 			    (sof_handle_t)inst, so->so_family, so->so_type,
255 			    so->so_protocol, cr, &inst->sofi_cookie);
256 			if (rval != SOF_RVAL_CONTINUE) {
257 				switch (rval) {
258 				case SOF_RVAL_DETACH:
259 					/*
260 					 * Filter does not want to to attach.
261 					 * An error is returned so the user
262 					 * knows the request did not go
263 					 * through.
264 					 */
265 					error = EINVAL;
266 					break;
267 				default:
268 					SOF_STAT_ADD(inst, attach_failures, 1);
269 					/* Not a valid rval for active attach */
270 					ASSERT(rval != SOF_RVAL_DEFER);
271 					error = sof_rval2errno(rval);
272 					break;
273 				}
274 				sof_instance_destroy(inst);
275 				return (error);
276 			}
277 		}
278 		return (0);
279 	} else if (option_name == FIL_DETACH) {
280 		for (inst = so->so_filter_top; inst != NULL;
281 		    inst = inst->sofi_next) {
282 
283 			ent = inst->sofi_filter;
284 			if (strncmp(ent->sofe_name, (const char *)optval,
285 			    SOF_MAXNAMELEN) == 0)
286 				break;
287 		}
288 		if (inst == NULL)
289 			return (ENXIO);
290 
291 		/* automatic filters cannot be detached */
292 		if (inst->sofi_filter->sofe_flags & SOFEF_AUTO)
293 			return (EINVAL);
294 
295 		if (inst->sofi_ops->sofop_detach != NULL)
296 			inst->sofi_ops->sofop_detach((sof_handle_t)inst,
297 			    inst->sofi_cookie, cr);
298 		sof_instance_destroy(inst);
299 
300 		return (0);
301 	} else {
302 		return (EINVAL);
303 	}
304 }
305 
306 int
307 sof_setsockopt(struct sonode *so, int option_name,
308     const void *optval, socklen_t optlen, struct cred *cr)
309 {
310 	int error;
311 
312 	/*
313 	 * By grabbing the lock as a writer we ensure that no other socket
314 	 * operations can start while the filter stack is being manipulated.
315 	 *
316 	 * We do a tryenter so that in case there is an active thread we
317 	 * ask the caller to try again instead of blocking here until the
318 	 * other thread is done (which could be indefinitely in case of recv).
319 	 */
320 	if (!rw_tryenter(&so->so_fallback_rwlock, RW_WRITER)) {
321 		return (EAGAIN);
322 	}
323 
324 	/* Bail out if a fallback has taken place */
325 	if (so->so_state & SS_FALLBACK_COMP)
326 		error = EINVAL;
327 	else
328 		error = sof_setsockopt_impl(so, option_name, optval,
329 		    optlen, cr);
330 	rw_exit(&so->so_fallback_rwlock);
331 
332 	return (error);
333 }
334 
335 /*
336  * Get filter socket options.
337  */
338 static int
339 sof_getsockopt_impl(struct sonode *so, int option_name,
340     void *optval, socklen_t *optlenp, struct cred *cr)
341 {
342 	sof_instance_t *inst;
343 	struct fil_info *fi;
344 	socklen_t maxsz = *optlenp;
345 	int i;
346 	uint_t cnt;
347 
348 	_NOTE(ARGUNUSED(cr));
349 
350 	if (option_name == FIL_LIST) {
351 		fi = (struct fil_info *)optval;
352 
353 		if (maxsz < sizeof (*fi))
354 			return (EINVAL);
355 
356 		for (inst = so->so_filter_top, cnt = 0; inst != NULL;
357 		    inst = inst->sofi_next)
358 			cnt++;
359 		for (inst = so->so_filter_top, i = 0;
360 		    inst != NULL && (i+1) * sizeof (*fi) <= maxsz;
361 		    inst = inst->sofi_next, i++) {
362 			fi[i].fi_flags =
363 			    (inst->sofi_filter->sofe_flags & SOFEF_AUTO) ?
364 			    FILF_AUTO : FILF_PROG;
365 			if (inst->sofi_flags & SOFIF_BYPASS)
366 				fi[i].fi_flags |= FILF_BYPASS;
367 			(void) strncpy(fi[i].fi_name,
368 			    inst->sofi_filter->sofe_name, FILNAME_MAX);
369 			ASSERT(cnt > 0);
370 			fi[i].fi_pos = --cnt;
371 		}
372 		*optlenp = i * sizeof (*fi);
373 		return (0);
374 	} else {
375 		return (EINVAL);
376 	}
377 }
378 
379 int
380 sof_getsockopt(struct sonode *so, int option_name,
381     void *optval, socklen_t *optlenp, struct cred *cr)
382 {
383 	int error;
384 
385 	/*
386 	 * The fallback lock is used here to serialize set and get
387 	 * filter operations.
388 	 */
389 	rw_enter(&so->so_fallback_rwlock, RW_READER);
390 	if (so->so_state & SS_FALLBACK_COMP)
391 		error = EINVAL;
392 	else
393 		error = sof_getsockopt_impl(so, option_name, optval, optlenp,
394 		    cr);
395 	rw_exit(&so->so_fallback_rwlock);
396 
397 	return (error);
398 }
399 
400 /*
401  * The socket `so' wants to inherit the filter stack from `pso'.
402  * Returns 0 if all went well or an errno otherwise.
403  */
404 int
405 sof_sonode_inherit_filters(struct sonode *so, struct sonode *pso)
406 {
407 	sof_instance_t *inst, *pinst;
408 	sof_rval_t rval;
409 	int error;
410 	struct sockaddr_in6 laddrbuf, faddrbuf;
411 	struct sockaddr_in6 *laddr, *faddr;
412 	socklen_t laddrlen, faddrlen;
413 
414 	/*
415 	 * Make sure there is enough room to retrieve the addresses
416 	 */
417 	if (so->so_proto_props.sopp_maxaddrlen > sizeof (laddrbuf)) {
418 		laddr = kmem_zalloc(so->so_proto_props.sopp_maxaddrlen,
419 		    KM_NOSLEEP);
420 		if (laddr == NULL)
421 			return (ENOMEM);
422 		faddr = kmem_zalloc(so->so_proto_props.sopp_maxaddrlen,
423 		    KM_NOSLEEP);
424 		if (faddr == NULL) {
425 			kmem_free(laddr, so->so_proto_props.sopp_maxaddrlen);
426 			return (ENOMEM);
427 		}
428 		laddrlen = faddrlen = so->so_proto_props.sopp_maxaddrlen;
429 	} else {
430 		laddrlen = faddrlen = sizeof (laddrbuf);
431 		laddr = &laddrbuf;
432 		faddr = &faddrbuf;
433 	}
434 
435 	error = (*so->so_downcalls->sd_getpeername)
436 	    (so->so_proto_handle, (struct sockaddr *)faddr, &faddrlen, kcred);
437 	if (error != 0)
438 		goto out;
439 	error = (*so->so_downcalls->sd_getsockname)
440 	    (so->so_proto_handle, (struct sockaddr *)laddr, &laddrlen, kcred);
441 	if (error != 0)
442 		goto out;
443 
444 	/*
445 	 * The stack is built bottom up. Filters are allowed to modify the
446 	 * the foreign and local addresses during attach.
447 	 */
448 	for (pinst = pso->so_filter_bottom;
449 	    pinst != NULL && !(pinst->sofi_flags & SOFIF_BYPASS);
450 	    pinst = pinst->sofi_prev) {
451 		inst = sof_instance_create(pinst->sofi_filter, so);
452 		if (inst == NULL) {
453 			error = ENOMEM;
454 			goto out;
455 		}
456 		/*
457 		 * The filter module must be loaded since it's already
458 		 * attached to the listener.
459 		 */
460 		ASSERT(pinst->sofi_ops != NULL);
461 		inst->sofi_ops = pinst->sofi_ops;
462 
463 		SOF_STAT_ADD(inst, tot_passive_attach, 1);
464 		if (inst->sofi_ops->sofop_attach_passive != NULL) {
465 			rval = inst->sofi_ops->sofop_attach_passive(
466 			    (sof_handle_t)inst,
467 			    (sof_handle_t)pinst, pinst->sofi_cookie,
468 			    (struct sockaddr *)laddr, laddrlen,
469 			    (struct sockaddr *)faddr, faddrlen,
470 			    &inst->sofi_cookie);
471 			if (rval != SOF_RVAL_CONTINUE) {
472 				if (rval == SOF_RVAL_DEFER) {
473 					mutex_enter(&so->so_lock);
474 					inst->sofi_flags |= SOFIF_DEFER;
475 					so->so_state |= SS_FIL_DEFER;
476 					mutex_exit(&so->so_lock);
477 					so->so_filter_defertime =
478 					    ddi_get_lbolt();
479 					SOF_STAT_ADD(inst, ndeferred, 1);
480 				} else if (rval == SOF_RVAL_DETACH) {
481 					sof_instance_destroy(inst);
482 				} else {
483 					SOF_STAT_ADD(inst, attach_failures, 1);
484 					error = sof_rval2errno(rval);
485 					/*
486 					 * Filters that called attached will be
487 					 * destroyed when the socket goes away,
488 					 * after detach is called.
489 					 */
490 					goto out;
491 				}
492 			}
493 		}
494 	}
495 
496 out:
497 	if (laddr != &laddrbuf) {
498 		kmem_free(laddr, so->so_proto_props.sopp_maxaddrlen);
499 		kmem_free(faddr, so->so_proto_props.sopp_maxaddrlen);
500 	}
501 	return (error);
502 }
503 
504 /*
505  * Attach any automatic filters to sonode `so'. Returns 0 if all went well
506  * and an errno otherwise.
507  */
508 int
509 sof_sonode_autoattach_filters(struct sonode *so, cred_t *cr)
510 {
511 	struct sockparams *sp = so->so_sockparams;
512 	sp_filter_t *fil;
513 	sof_instance_t *inst;
514 	sof_rval_t rval;
515 	int error;
516 
517 	/*
518 	 * A created instance is added to the top of the sonode's filter
519 	 * stack, so traverse the config list in reverse order.
520 	 */
521 	rw_enter(&sockconf_lock, RW_READER);
522 	for (fil = list_tail(&sp->sp_auto_filters);
523 	    fil != NULL; fil = list_prev(&sp->sp_auto_filters, fil)) {
524 		ASSERT(fil->spf_filter->sofe_flags & SOFEF_AUTO);
525 		if (!sof_instance_create(fil->spf_filter, so)) {
526 			rw_exit(&sockconf_lock);
527 			error = ENOMEM; /* must have run out of memory */
528 			goto free_all;
529 		}
530 	}
531 	rw_exit(&sockconf_lock);
532 
533 	/*
534 	 * Notify each filter that it's being attached.
535 	 */
536 	inst = so->so_filter_top;
537 	while (inst != NULL) {
538 		sof_entry_t *ent = inst->sofi_filter;
539 		sof_instance_t *ninst = inst->sofi_next;
540 
541 		/*
542 		 * This might be the first time the filter is being used,
543 		 * so try to load the module if it's not already registered.
544 		 */
545 		if (ent->sofe_mod == NULL &&
546 		    (error = sof_entry_load_module(ent)) != 0)
547 			goto free_detached;
548 
549 		/* Module loaded OK, so there must be an ops vector */
550 		ASSERT(ent->sofe_mod != NULL);
551 		inst->sofi_ops = &ent->sofe_mod->sofm_ops;
552 
553 		SOF_STAT_ADD(inst, tot_active_attach, 1);
554 		if (inst->sofi_ops->sofop_attach_active != NULL) {
555 			rval = inst->sofi_ops->sofop_attach_active(
556 			    (sof_handle_t)inst, so->so_family, so->so_type,
557 			    so->so_protocol, cr, &inst->sofi_cookie);
558 			if (rval != SOF_RVAL_CONTINUE) {
559 				switch (rval) {
560 				case SOF_RVAL_DETACH:
561 					/* filter does not want to attach */
562 					sof_instance_destroy(inst);
563 					break;
564 				default:
565 					SOF_STAT_ADD(inst, attach_failures, 1);
566 					/* Not a valid rval for active attach */
567 					ASSERT(rval != SOF_RVAL_DEFER);
568 					error = sof_rval2errno(rval);
569 					goto free_detached;
570 				}
571 			}
572 		}
573 		inst = ninst;
574 	}
575 	return (0);
576 
577 free_all:
578 	inst = so->so_filter_top;
579 free_detached:
580 	ASSERT(inst != NULL);
581 	/*
582 	 * Destroy all filters for which attach was not called. The other
583 	 * filters will be destroyed (and detach called) when the socket
584 	 * is freed.
585 	 */
586 	do {
587 		sof_instance_t *t = inst->sofi_next;
588 		sof_instance_destroy(inst);
589 		inst = t;
590 	} while (inst != NULL);
591 
592 	return (error);
593 }
594 
595 /*
596  * Detaches and frees all filters attached to sonode `so'.
597  */
598 void
599 sof_sonode_cleanup(struct sonode *so)
600 {
601 	sof_instance_t *inst;
602 
603 	while ((inst = so->so_filter_top) != NULL) {
604 		(inst->sofi_ops->sofop_detach)((sof_handle_t)inst,
605 		    inst->sofi_cookie, kcred);
606 		sof_instance_destroy(inst);
607 	}
608 }
609 
610 /*
611  * Notifies all active filters attached to `so' about the `event' and
612  * where `arg' is an event specific argument.
613  */
614 void
615 sof_sonode_notify_filters(struct sonode *so, sof_event_t event, uintptr_t arg)
616 {
617 	sof_instance_t *inst;
618 
619 	for (inst = so->so_filter_bottom; inst != NULL;
620 	    inst = inst->sofi_prev) {
621 		if (SOF_INTERESTED(inst, notify))
622 			(inst->sofi_ops->sofop_notify)((sof_handle_t)inst,
623 			    inst->sofi_cookie, event, arg);
624 	}
625 }
626 
627 /*
628  * The socket `so' is closing. Notify filters and make sure that there
629  * are no pending tx operations.
630  */
631 void
632 sof_sonode_closing(struct sonode *so)
633 {
634 	/*
635 	 * Notify filters that the socket is being closed. It's OK for
636 	 * filters to inject data.
637 	 */
638 	sof_sonode_notify_filters(so, SOF_EV_CLOSING, (uintptr_t)B_TRUE);
639 
640 	/*
641 	 * Stop any future attempts to inject data, and wait for any
642 	 * pending operations to complete. This has to be done to ensure
643 	 * that no data is sent down to the protocol once a close
644 	 * downcall has been made.
645 	 */
646 	mutex_enter(&so->so_lock);
647 	so->so_state |= SS_FIL_STOP;
648 	while (so->so_filter_tx > 0)
649 		cv_wait(&so->so_closing_cv, &so->so_lock);
650 	mutex_exit(&so->so_lock);
651 }
652 
653 /*
654  * Called when socket `so' wants to get rid of a deferred connection.
655  * Returns TRUE if a connection was dropped.
656  */
657 boolean_t
658 sof_sonode_drop_deferred(struct sonode *so)
659 {
660 	struct sonode *def;
661 	clock_t now = ddi_get_lbolt();
662 
663 	if (sof_close_deferred_backlog > sof_close_deferred_max_backlog) {
664 		SOF_GLOBAL_STAT_BUMP(defer_close_failed_backlog_too_big);
665 		return (B_FALSE);
666 	}
667 	mutex_enter(&so->so_acceptq_lock);
668 	if ((def = list_head(&so->so_acceptq_defer)) != NULL &&
669 	    (now - def->so_filter_defertime) > sof_defer_drop_time) {
670 		list_remove(&so->so_acceptq_defer, def);
671 		so->so_acceptq_len--;
672 		mutex_exit(&so->so_acceptq_lock);
673 		def->so_listener = NULL;
674 	} else {
675 		mutex_exit(&so->so_acceptq_lock);
676 		return (B_FALSE);
677 	}
678 
679 	mutex_enter(&sof_close_deferred_lock);
680 	list_insert_tail(&sof_close_deferred_list, def);
681 	sof_close_deferred_backlog++;
682 	if (!sof_close_deferred_running) {
683 		mutex_exit(&sof_close_deferred_lock);
684 		(void) taskq_dispatch(sof_close_deferred_taskq,
685 		    sof_close_deferred, NULL, TQ_NOSLEEP);
686 	} else {
687 		mutex_exit(&sof_close_deferred_lock);
688 	}
689 	return (B_TRUE);
690 }
691 
692 /*
693  * Called from a taskq to close connections that have been deferred for
694  * too long.
695  */
696 void
697 sof_close_deferred(void *unused)
698 {
699 	struct sonode *drop;
700 
701 	_NOTE(ARGUNUSED(unused));
702 
703 	mutex_enter(&sof_close_deferred_lock);
704 	if (!sof_close_deferred_running) {
705 		sof_close_deferred_running = B_TRUE;
706 		while ((drop =
707 		    list_remove_head(&sof_close_deferred_list)) != NULL) {
708 			sof_close_deferred_backlog--;
709 			mutex_exit(&sof_close_deferred_lock);
710 
711 			SOF_GLOBAL_STAT_BUMP(defer_closed);
712 			(void) socket_close(drop, 0, kcred);
713 			socket_destroy(drop);
714 
715 			mutex_enter(&sof_close_deferred_lock);
716 		}
717 		sof_close_deferred_running = B_FALSE;
718 		ASSERT(sof_close_deferred_backlog == 0);
719 	}
720 	mutex_exit(&sof_close_deferred_lock);
721 }
722 
723 /*
724  * Creates a new filter instance from the entry `ent' and attaches
725  * it to the sonode `so'. On success, return a pointer to the created
726  * instance.
727  *
728  * The new instance will be placed on the top of the filter stack.
729  *
730  * The caller is responsible for assigning the instance's ops vector and
731  * calling the filter's attach callback.
732  *
733  * No locks are held while manipulating the sonode fields because we are
734  * guaranteed that this operation is serialized.
735  *
736  * We can be sure that the entry `ent' will not disappear, because the
737  * caller is either holding sockconf_lock (in case of an active open), or is
738  * already holding a reference (in case of a passive open, the listener has
739  * one).
740  */
741 static sof_instance_t *
742 sof_instance_create(sof_entry_t *ent, struct sonode *so)
743 {
744 	sof_instance_t *inst;
745 
746 	inst = kmem_zalloc(sizeof (sof_instance_t), KM_NOSLEEP);
747 	if (inst == NULL)
748 		return (NULL);
749 	sof_entry_hold(ent);
750 	inst->sofi_filter = ent;
751 	inst->sofi_sonode = so;
752 
753 	inst->sofi_next = so->so_filter_top;
754 	if (so->so_filter_top != NULL)
755 		so->so_filter_top->sofi_prev = inst;
756 	else
757 		so->so_filter_bottom = inst;
758 	so->so_filter_top = inst;
759 	so->so_filter_active++;
760 
761 	return (inst);
762 }
763 /*
764  * Destroys the filter instance `inst' and unlinks it from the sonode.
765  *
766  * Any filter private state must be destroyed (via the detach callback)
767  * before the instance is destroyed.
768  */
769 static void
770 sof_instance_destroy(sof_instance_t *inst)
771 {
772 	struct sonode *so = inst->sofi_sonode;
773 
774 	ASSERT(inst->sofi_sonode != NULL);
775 	ASSERT(inst->sofi_filter != NULL);
776 	ASSERT(inst->sofi_prev != NULL || so->so_filter_top == inst);
777 	ASSERT(inst->sofi_next != NULL || so->so_filter_bottom == inst);
778 
779 	if (inst->sofi_prev != NULL)
780 		inst->sofi_prev->sofi_next = inst->sofi_next;
781 	else
782 		so->so_filter_top = inst->sofi_next;
783 
784 	if (inst->sofi_next != NULL)
785 		inst->sofi_next->sofi_prev = inst->sofi_prev;
786 	else
787 		so->so_filter_bottom = inst->sofi_prev;
788 
789 	if (!(inst->sofi_flags & SOFIF_BYPASS)) {
790 		ASSERT(so->so_filter_active > 0);
791 		so->so_filter_active--;
792 	}
793 	if (inst->sofi_flags & SOFIF_DEFER)
794 		SOF_STAT_ADD(inst, ndeferred, -1);
795 	sof_entry_rele(inst->sofi_filter);
796 	kmem_free(inst, sizeof (sof_instance_t));
797 }
798 
799 static sof_entry_t *
800 sof_entry_find(const char *name)
801 {
802 	sof_entry_t *ent;
803 
804 	for (ent = list_head(&sof_entry_list); ent != NULL;
805 	    ent = list_next(&sof_entry_list, ent)) {
806 		if (strncmp(ent->sofe_name, name, SOF_MAXNAMELEN) == 0)
807 			return (ent);
808 	}
809 	return (NULL);
810 }
811 
812 void
813 sof_entry_free(sof_entry_t *ent)
814 {
815 	ASSERT(ent->sofe_refcnt == 0);
816 	ASSERT(!list_link_active(&ent->sofe_node));
817 
818 	if (ent->sofe_hintarg != NULL) {
819 		ASSERT(ent->sofe_hint == SOF_HINT_BEFORE ||
820 		    ent->sofe_hint == SOF_HINT_AFTER);
821 		kmem_free(ent->sofe_hintarg, strlen(ent->sofe_hintarg) + 1);
822 		ent->sofe_hintarg = NULL;
823 	}
824 	if (ent->sofe_socktuple_cnt > 0) {
825 		ASSERT(ent->sofe_socktuple != NULL);
826 		kmem_free(ent->sofe_socktuple,
827 		    sizeof (sof_socktuple_t) * ent->sofe_socktuple_cnt);
828 		ent->sofe_socktuple = NULL;
829 		ent->sofe_socktuple_cnt = 0;
830 	}
831 	sof_entry_kstat_destroy(ent);
832 
833 	mutex_destroy(&ent->sofe_lock);
834 	kmem_free(ent, sizeof (sof_entry_t));
835 }
836 
837 static int
838 sof_entry_kstat_update(kstat_t *ksp, int rw)
839 {
840 	sof_entry_t *ent = ksp->ks_private;
841 
842 	if (rw == KSTAT_WRITE)
843 		return (EACCES);
844 
845 	ent->sofe_kstat.sofek_nactive.value.ui64 = ent->sofe_refcnt;
846 
847 	return (0);
848 }
849 
850 /*
851  * Create the kstat for filter entry `ent'.
852  */
853 static int
854 sof_entry_kstat_create(sof_entry_t *ent)
855 {
856 	char name[SOF_MAXNAMELEN + 7];
857 
858 	(void) snprintf(name, sizeof (name), "filter_%s", ent->sofe_name);
859 	ent->sofe_ksp = kstat_create("sockfs", 0, name, "misc",
860 	    KSTAT_TYPE_NAMED,
861 	    sizeof (sof_entry_kstat_t) / sizeof (kstat_named_t),
862 	    KSTAT_FLAG_VIRTUAL);
863 
864 	if (ent->sofe_ksp == NULL)
865 		return (ENOMEM);
866 
867 	kstat_named_init(&ent->sofe_kstat.sofek_nactive, "nactive",
868 	    KSTAT_DATA_UINT64);
869 	kstat_named_init(&ent->sofe_kstat.sofek_tot_active_attach,
870 	    "tot_active_attach", KSTAT_DATA_UINT64);
871 	kstat_named_init(&ent->sofe_kstat.sofek_tot_passive_attach,
872 	    "tot_passive_attach", KSTAT_DATA_UINT64);
873 	kstat_named_init(&ent->sofe_kstat.sofek_ndeferred, "ndeferred",
874 	    KSTAT_DATA_UINT64);
875 	kstat_named_init(&ent->sofe_kstat.sofek_attach_failures,
876 	    "attach_failures", KSTAT_DATA_UINT64);
877 
878 	ent->sofe_ksp->ks_data = &ent->sofe_kstat;
879 	ent->sofe_ksp->ks_update = sof_entry_kstat_update;
880 	ent->sofe_ksp->ks_private = ent;
881 	kstat_install(ent->sofe_ksp);
882 
883 	return (0);
884 }
885 
886 /*
887  * Destroys the kstat for filter entry `ent'.
888  */
889 static void
890 sof_entry_kstat_destroy(sof_entry_t *ent)
891 {
892 	if (ent->sofe_ksp != NULL) {
893 		kstat_delete(ent->sofe_ksp);
894 		ent->sofe_ksp = NULL;
895 	}
896 }
897 
898 static void
899 sof_entry_hold(sof_entry_t *ent)
900 {
901 	mutex_enter(&ent->sofe_lock);
902 	ent->sofe_refcnt++;
903 	mutex_exit(&ent->sofe_lock);
904 }
905 
906 /*
907  * Decrement the reference count for `ent'. The entry will
908  * drop its' reference on the filter module whenever its'
909  * ref count reaches zero.
910  */
911 static void
912 sof_entry_rele(sof_entry_t *ent)
913 {
914 	mutex_enter(&ent->sofe_lock);
915 	if (--ent->sofe_refcnt == 0) {
916 		sof_module_t *mod = ent->sofe_mod;
917 		ent->sofe_mod = NULL;
918 		if (ent->sofe_flags & SOFEF_CONDEMED) {
919 			mutex_exit(&ent->sofe_lock);
920 			sof_entry_free(ent);
921 		} else {
922 			mutex_exit(&ent->sofe_lock);
923 		}
924 		if (mod != NULL)
925 			sof_module_rele(mod);
926 	} else {
927 		mutex_exit(&ent->sofe_lock);
928 	}
929 }
930 
931 /*
932  * Loads the module used by `ent'
933  */
934 static int
935 sof_entry_load_module(sof_entry_t *ent)
936 {
937 	sof_module_t *mod = sof_module_hold_by_name(ent->sofe_name,
938 	    ent->sofe_modname);
939 
940 	if (mod == NULL)
941 		return (EINVAL);
942 
943 	mutex_enter(&ent->sofe_lock);
944 	/* Another thread might have already loaded the module */
945 	ASSERT(ent->sofe_mod == mod || ent->sofe_mod == NULL);
946 	if (ent->sofe_mod != NULL) {
947 		mutex_exit(&ent->sofe_lock);
948 		sof_module_rele(mod);
949 	} else {
950 		ent->sofe_mod = mod;
951 		mutex_exit(&ent->sofe_lock);
952 	}
953 
954 	return (0);
955 }
956 
957 /*
958  * Add filter entry `ent' to the global list and attach it to all sockparam
959  * entries which the filter is interested in. Upon successful return the filter
960  * will be available for applications to use.
961  */
962 int
963 sof_entry_add(sof_entry_t *ent)
964 {
965 	int error;
966 
967 	/*
968 	 * We hold sockconf_lock as a WRITER for the whole operation,
969 	 * so all operations must be non-blocking.
970 	 */
971 	rw_enter(&sockconf_lock, RW_WRITER);
972 	if (sof_entry_find(ent->sofe_name) != NULL) {
973 		rw_exit(&sockconf_lock);
974 		return (EEXIST);
975 	}
976 
977 	/* The entry is unique; create the kstats */
978 	if (sof_entry_kstat_create(ent) != 0) {
979 		rw_exit(&sockconf_lock);
980 		return (ENOMEM);
981 	}
982 
983 	/*
984 	 * Attach the filter to sockparams of interest.
985 	 */
986 	if ((error = sockparams_new_filter(ent)) != 0) {
987 		sof_entry_kstat_destroy(ent);
988 		rw_exit(&sockconf_lock);
989 		return (error);
990 	}
991 	/*
992 	 * Everything is OK; insert in global list.
993 	 */
994 	list_insert_tail(&sof_entry_list, ent);
995 	rw_exit(&sockconf_lock);
996 
997 	return (0);
998 }
999 
1000 /*
1001  * Removes the filter entry `ent' from global list and all sockparams.
1002  */
1003 sof_entry_t *
1004 sof_entry_remove_by_name(const char *name)
1005 {
1006 	sof_entry_t *ent;
1007 
1008 	rw_enter(&sockconf_lock, RW_WRITER);
1009 	if ((ent = sof_entry_find(name)) == NULL) {
1010 		rw_exit(&sockconf_lock);
1011 		return (NULL);
1012 	}
1013 	list_remove(&sof_entry_list, ent);
1014 	sockparams_filter_cleanup(ent);
1015 	sof_entry_kstat_destroy(ent);
1016 	rw_exit(&sockconf_lock);
1017 
1018 	return (ent);
1019 }
1020 
1021 /*
1022  * Filter entry `ent' will process sockparams entry `sp' to determine whether
1023  * it should be attached to the sockparams. It should be called whenever a new
1024  * filter or sockparams is being added. Returns zero either if the filter is
1025  * not interested in the sockparams or if it successfully attached to the
1026  * sockparams. On failure an errno is returned.
1027  */
1028 int
1029 sof_entry_proc_sockparams(sof_entry_t *ent, struct sockparams *sp)
1030 {
1031 	uint_t i;
1032 	sof_socktuple_t *t = ent->sofe_socktuple;
1033 	sp_filter_t *new, *fil;
1034 
1035 	/* Only interested in non-TPI sockets */
1036 	if (strcmp(sp->sp_smod_name, SOTPI_SMOD_NAME) == 0)
1037 		return (0);
1038 
1039 	for (i = 0; i < ent->sofe_socktuple_cnt; i++) {
1040 		if (t[i].sofst_family == sp->sp_family &&
1041 		    t[i].sofst_type == sp->sp_type &&
1042 		    t[i].sofst_protocol == sp->sp_protocol)
1043 			break;
1044 	}
1045 	/* This filter is not interested in the sockparams entry */
1046 	if (i == ent->sofe_socktuple_cnt)
1047 		return (0);
1048 
1049 	new = kmem_zalloc(sizeof (sp_filter_t), KM_NOSLEEP);
1050 	if (new == NULL)
1051 		return (ENOMEM);
1052 
1053 	new->spf_filter = ent;
1054 	if (ent->sofe_flags & SOFEF_PROG) {
1055 		/* placement is irrelevant for programmatic filters */
1056 		list_insert_head(&sp->sp_prog_filters, new);
1057 		return (0);
1058 	} else {
1059 		ASSERT(ent->sofe_flags & SOFEF_AUTO);
1060 		/*
1061 		 * If the filter specifies a placement hint, then make sure
1062 		 * it can be satisfied.
1063 		 */
1064 		switch (ent->sofe_hint) {
1065 		case SOF_HINT_TOP:
1066 			if ((fil = list_head(&sp->sp_auto_filters)) != NULL &&
1067 			    fil->spf_filter->sofe_hint == SOF_HINT_TOP)
1068 				break;
1069 			list_insert_head(&sp->sp_auto_filters, new);
1070 			return (0);
1071 		case SOF_HINT_BOTTOM:
1072 			if ((fil = list_tail(&sp->sp_auto_filters)) != NULL &&
1073 			    fil->spf_filter->sofe_hint == SOF_HINT_BOTTOM)
1074 				break;
1075 			list_insert_tail(&sp->sp_auto_filters, new);
1076 			return (0);
1077 		case SOF_HINT_BEFORE:
1078 		case SOF_HINT_AFTER:
1079 			for (fil = list_head(&sp->sp_auto_filters);
1080 			    fil != NULL;
1081 			    fil = list_next(&sp->sp_auto_filters, fil)) {
1082 				if (strncmp(ent->sofe_hintarg,
1083 				    fil->spf_filter->sofe_name,
1084 				    SOF_MAXNAMELEN) == 0)
1085 				break;
1086 			}
1087 
1088 			if (fil != NULL) {
1089 				if (ent->sofe_hint == SOF_HINT_BEFORE) {
1090 					if (fil->spf_filter->sofe_hint ==
1091 					    SOF_HINT_TOP)
1092 						break;
1093 					list_insert_before(&sp->sp_auto_filters,
1094 					    fil, new);
1095 				} else {
1096 					if (fil->spf_filter->sofe_hint ==
1097 					    SOF_HINT_BOTTOM)
1098 						break;
1099 					list_insert_after(&sp->sp_auto_filters,
1100 					    fil, new);
1101 				}
1102 				return (0);
1103 			}
1104 			/*FALLTHRU*/
1105 		case SOF_HINT_NONE:
1106 			/*
1107 			 * Insert the new filter at the beginning as long as it
1108 			 * does not violate a TOP hint, otherwise insert in the
1109 			 * next suitable location.
1110 			 */
1111 			if ((fil = list_head(&sp->sp_auto_filters)) != NULL &&
1112 			    fil->spf_filter->sofe_hint == SOF_HINT_TOP) {
1113 				list_insert_after(&sp->sp_auto_filters, fil,
1114 				    new);
1115 			} else {
1116 				list_insert_head(&sp->sp_auto_filters, new);
1117 			}
1118 			return (0);
1119 		}
1120 		/* Failed to insert the filter */
1121 		kmem_free(new, sizeof (sp_filter_t));
1122 		return (ENOSPC);
1123 	}
1124 }
1125 
1126 /*
1127  * Remove all filter entries attached to the sockparams entry `sp'.
1128  */
1129 void
1130 sof_sockparams_fini(struct sockparams *sp)
1131 {
1132 	sp_filter_t *fil;
1133 
1134 	ASSERT(!list_link_active(&sp->sp_node));
1135 
1136 	while ((fil = list_remove_head(&sp->sp_auto_filters)) != NULL)
1137 		kmem_free(fil, sizeof (sp_filter_t));
1138 	while ((fil = list_remove_head(&sp->sp_prog_filters)) != NULL)
1139 		kmem_free(fil, sizeof (sp_filter_t));
1140 }
1141 
1142 /*
1143  * A new sockparams is being added. Walk all filters and attach those that
1144  * are interested in the entry.
1145  *
1146  * It should be called when the sockparams entry is about to be made available
1147  * for use and while holding the sockconf_lock.
1148  */
1149 int
1150 sof_sockparams_init(struct sockparams *sp)
1151 {
1152 	sof_entry_t *ent;
1153 
1154 	ASSERT(RW_WRITE_HELD(&sockconf_lock));
1155 
1156 	for (ent = list_head(&sof_entry_list); ent != NULL;
1157 	    ent = list_next(&sof_entry_list, ent)) {
1158 		if (sof_entry_proc_sockparams(ent, sp) != 0) {
1159 			sof_sockparams_fini(sp);
1160 			return (ENOMEM);
1161 		}
1162 	}
1163 	return (0);
1164 }
1165 
1166 static sof_module_t *
1167 sof_module_find(const char *name)
1168 {
1169 	sof_module_t *ent;
1170 
1171 	ASSERT(MUTEX_HELD(&sof_module_lock));
1172 
1173 	for (ent = list_head(&sof_module_list); ent != NULL;
1174 	    ent = list_next(&sof_module_list, ent))
1175 		if (strcmp(ent->sofm_name, name) == 0)
1176 			return (ent);
1177 	return (NULL);
1178 }
1179 
1180 /*
1181  * Returns a pointer to a module identified by `name' with its ref count
1182  * bumped. An attempt to load the module is done if it's not found in the
1183  * global list.
1184  */
1185 sof_module_t *
1186 sof_module_hold_by_name(const char *name, const char *modname)
1187 {
1188 	ddi_modhandle_t handle = NULL;
1189 	sof_module_t *mod = NULL;
1190 	char *modpath;
1191 	int error;
1192 
1193 	/*
1194 	 * We'll go through the loop at most two times, which will only
1195 	 * happen if the module needs to be loaded.
1196 	 */
1197 	for (;;) {
1198 		mutex_enter(&sof_module_lock);
1199 		mod = sof_module_find(name);
1200 		if (mod != NULL || handle != NULL)
1201 			break;
1202 		mutex_exit(&sof_module_lock);
1203 
1204 		modpath = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1205 		(void) snprintf(modpath, MAXPATHLEN, "%s/%s", SOF_MODPATH,
1206 		    modname);
1207 		handle = ddi_modopen(modpath, KRTLD_MODE_FIRST, &error);
1208 		kmem_free(modpath, MAXPATHLEN);
1209 		/* Failed to load, then bail */
1210 		if (handle == NULL) {
1211 			cmn_err(CE_WARN,
1212 			    "Failed to load socket filter module: %s (err %d)",
1213 			    modname, error);
1214 			return (NULL);
1215 		}
1216 	}
1217 	if (mod != NULL)
1218 		mod->sofm_refcnt++;
1219 	mutex_exit(&sof_module_lock);
1220 
1221 	if (handle != NULL) {
1222 		(void) ddi_modclose(handle);
1223 		/*
1224 		 * The module was loaded, but the filter module could not be
1225 		 * found. It's likely a misconfigured filter.
1226 		 */
1227 		if (mod == NULL) {
1228 			cmn_err(CE_WARN,
1229 			    "Socket filter module %s was loaded, but did not" \
1230 			    "register. Filter %s is likely misconfigured.",
1231 			    modname, name);
1232 		}
1233 	}
1234 
1235 	return (mod);
1236 }
1237 
1238 void
1239 sof_module_rele(sof_module_t *mod)
1240 {
1241 	mutex_enter(&sof_module_lock);
1242 	mod->sofm_refcnt--;
1243 	mutex_exit(&sof_module_lock);
1244 }
1245 
1246 int
1247 sof_rval2errno(sof_rval_t rval)
1248 {
1249 	if (rval > SOF_RVAL_CONTINUE) {
1250 		return ((int)rval);
1251 	} else {
1252 #ifdef DEBUG
1253 		if (socket_filter_debug)
1254 			printf("sof_rval2errno: invalid rval '%d'\n", rval);
1255 #endif
1256 		return (EINVAL);
1257 	}
1258 }
1259 
1260 /*
1261  * Walk through all the filters attached to `so' and allow each filter
1262  * to process the data using its data_out callback. `mp' is a b_cont chain.
1263  *
1264  * Returns the processed mblk, or NULL if mblk was consumed. The mblk might
1265  * have been consumed as a result of an error, in which case `errp' is set to
1266  * the appropriate errno.
1267  */
1268 mblk_t *
1269 sof_filter_data_out_from(struct sonode *so, sof_instance_t *start,
1270     mblk_t *mp, struct nmsghdr *msg, cred_t *cr, int *errp)
1271 {
1272 	sof_instance_t *inst;
1273 	sof_rval_t rval;
1274 
1275 	_NOTE(ARGUNUSED(so));
1276 
1277 	for (inst = start; inst != NULL; inst = inst->sofi_next) {
1278 		if (!SOF_INTERESTED(inst, data_out))
1279 			continue;
1280 		mp = (inst->sofi_ops->sofop_data_out)((sof_handle_t)inst,
1281 		    inst->sofi_cookie, mp, msg, cr, &rval);
1282 		DTRACE_PROBE2(filter__data, (sof_instance_t), inst,
1283 		    (mblk_t *), mp);
1284 		if (mp == NULL) {
1285 			*errp = sof_rval2errno(rval);
1286 			break;
1287 		}
1288 	}
1289 	return (mp);
1290 }
1291 
1292 /*
1293  * Walk through all the filters attached to `so' and allow each filter
1294  * to process the data using its data_in_proc callback. `mp' is the start of
1295  * a possible b_next chain, and `lastmp' points to the last mblk in the chain.
1296  *
1297  * Returns the processed mblk, or NULL if all mblks in the chain were
1298  * consumed. `lastmp' is updated to point to the last mblk in the processed
1299  * chain.
1300  */
1301 mblk_t *
1302 sof_filter_data_in_proc(struct sonode *so, mblk_t *mp, mblk_t **lastmp)
1303 {
1304 	sof_instance_t *inst;
1305 	size_t len = 0, orig = 0;
1306 	ssize_t diff = 0;
1307 	mblk_t *retmp = NULL, *tailmp, *nextmp;
1308 
1309 	*lastmp = NULL;
1310 	do {
1311 		nextmp = mp->b_next;
1312 		mp->b_next = mp->b_prev = NULL;
1313 		len = orig = msgdsize(mp);
1314 		for (inst = so->so_filter_bottom; inst != NULL;
1315 		    inst = inst->sofi_prev) {
1316 			if (!SOF_INTERESTED(inst, data_in_proc))
1317 				continue;
1318 			mp = (inst->sofi_ops->sofop_data_in_proc)(
1319 			    (sof_handle_t)inst, inst->sofi_cookie, mp,
1320 			    kcred, &len);
1321 			if (mp == NULL)
1322 				break;
1323 		}
1324 		DTRACE_PROBE2(filter__data, (sof_instance_t), inst,
1325 		    (mblk_t *), mp);
1326 		diff += len - orig;
1327 		if (mp == NULL)
1328 			continue;
1329 
1330 		for (tailmp = mp; tailmp->b_cont != NULL;
1331 		    tailmp = tailmp->b_cont)
1332 			;
1333 		mp->b_prev = tailmp;
1334 
1335 		if (*lastmp == NULL)
1336 			retmp = mp;
1337 		else
1338 			(*lastmp)->b_next = mp;
1339 		*lastmp = mp;
1340 	} while ((mp = nextmp) != NULL);
1341 
1342 	/*
1343 	 * The size of the chain has changed; make sure the rcv queue
1344 	 * stays consistent and check if the flow control state should
1345 	 * change.
1346 	 */
1347 	if (diff != 0) {
1348 		DTRACE_PROBE2(filter__data__adjust__qlen,
1349 		    (struct sonode *), so, (size_t), diff);
1350 		mutex_enter(&so->so_lock);
1351 		so->so_rcv_queued += diff;
1352 		/* so_check_flow_control drops so_lock */
1353 		(void) so_check_flow_control(so);
1354 	}
1355 
1356 	return (retmp);
1357 }
1358 
1359 int
1360 sof_filter_bind(struct sonode *so, struct sockaddr *addr,
1361     socklen_t *addrlen, cred_t *cr)
1362 {
1363 	__SOF_FILTER_OP(so, bind, cr, addr, addrlen)
1364 }
1365 
1366 int
1367 sof_filter_listen(struct sonode *so, int *backlogp, cred_t *cr)
1368 {
1369 	__SOF_FILTER_OP(so, listen, cr, backlogp)
1370 }
1371 
1372 int
1373 sof_filter_connect(struct sonode *so, struct sockaddr *addr,
1374     socklen_t *addrlen, cred_t *cr)
1375 {
1376 	__SOF_FILTER_OP(so, connect, cr, addr, addrlen)
1377 }
1378 
1379 int
1380 sof_filter_accept(struct sonode *so, cred_t *cr)
1381 {
1382 	sof_instance_t *inst;
1383 	sof_rval_t rval;
1384 
1385 	for (inst = so->so_filter_top; inst != NULL; inst = inst->sofi_next) {
1386 		if (!SOF_INTERESTED(inst, accept))
1387 			continue;
1388 		rval = (inst->sofi_ops->sofop_accept)((sof_handle_t)inst,
1389 		    inst->sofi_cookie, cr);
1390 		DTRACE_PROBE2(filter__action, (sof_instance_t), inst,
1391 		    (sof_rval_t), rval);
1392 		if (rval != SOF_RVAL_CONTINUE) {
1393 			ASSERT(rval != SOF_RVAL_RETURN);
1394 			return (sof_rval2errno(rval));
1395 		}
1396 	}
1397 	return (-1);
1398 }
1399 
1400 int
1401 sof_filter_shutdown(struct sonode *so, int *howp, cred_t *cr)
1402 {
1403 	__SOF_FILTER_OP(so, shutdown, cr, howp)
1404 }
1405 
1406 int
1407 sof_filter_getsockname(struct sonode *so, struct sockaddr *addr,
1408     socklen_t *addrlenp, cred_t *cr)
1409 {
1410 	__SOF_FILTER_OP(so, getsockname, cr, addr, addrlenp)
1411 }
1412 
1413 int
1414 sof_filter_getpeername(struct sonode *so, struct sockaddr *addr,
1415     socklen_t *addrlenp, cred_t *cr)
1416 {
1417 	__SOF_FILTER_OP(so, getpeername, cr, addr, addrlenp)
1418 }
1419 
1420 int
1421 sof_filter_setsockopt(struct sonode *so, int level, int option_name,
1422     void *optval, socklen_t *optlenp, cred_t *cr)
1423 {
1424 	__SOF_FILTER_OP(so, setsockopt, cr, level, option_name,
1425 	    optval, optlenp)
1426 }
1427 
1428 int
1429 sof_filter_getsockopt(struct sonode *so, int level, int option_name,
1430     void *optval, socklen_t *optlenp, cred_t *cr)
1431 {
1432 	__SOF_FILTER_OP(so, getsockopt, cr, level, option_name,
1433 	    optval, optlenp)
1434 }
1435 
1436 int
1437 sof_filter_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode,
1438     int32_t *rvalp, cred_t *cr)
1439 {
1440 	__SOF_FILTER_OP(so, ioctl, cr, cmd, arg, mode, rvalp)
1441 }
1442 
1443 /*
1444  * sof_register(version, name, ops, flags)
1445  *
1446  * Register a socket filter identified by name `name' and which should use
1447  * the ops vector `ops' for event notification. `flags' should be set to 0.
1448  * On success 0 is returned, otherwise an errno is returned.
1449  */
1450 int
1451 sof_register(int version, const char *name, const sof_ops_t *ops, int flags)
1452 {
1453 	sof_module_t *mod;
1454 
1455 	_NOTE(ARGUNUSED(flags));
1456 
1457 	if (version != SOF_VERSION)
1458 		return (EINVAL);
1459 
1460 	mod = kmem_zalloc(sizeof (sof_module_t), KM_SLEEP);
1461 	mod->sofm_name = kmem_alloc(strlen(name) + 1, KM_SLEEP);
1462 	(void) strcpy(mod->sofm_name, name);
1463 	mod->sofm_ops = *ops;
1464 
1465 	mutex_enter(&sof_module_lock);
1466 	if (sof_module_find(name) != NULL) {
1467 		mutex_exit(&sof_module_lock);
1468 		kmem_free(mod->sofm_name, strlen(mod->sofm_name) + 1);
1469 		kmem_free(mod, sizeof (sof_module_t));
1470 		return (EEXIST);
1471 	}
1472 	list_insert_tail(&sof_module_list, mod);
1473 	mutex_exit(&sof_module_lock);
1474 
1475 	return (0);
1476 }
1477 
1478 /*
1479  * sof_unregister(name)
1480  *
1481  * Try to unregister the socket filter identified by `name'. If the filter
1482  * is successfully unregistered, then 0 is returned, otherwise an errno is
1483  * returned.
1484  */
1485 int
1486 sof_unregister(const char *name)
1487 {
1488 	sof_module_t *mod;
1489 
1490 	mutex_enter(&sof_module_lock);
1491 	mod = sof_module_find(name);
1492 	if (mod != NULL) {
1493 		if (mod->sofm_refcnt == 0) {
1494 			list_remove(&sof_module_list, mod);
1495 			mutex_exit(&sof_module_lock);
1496 
1497 			kmem_free(mod->sofm_name, strlen(mod->sofm_name) + 1);
1498 			kmem_free(mod, sizeof (sof_module_t));
1499 			return (0);
1500 		} else {
1501 			mutex_exit(&sof_module_lock);
1502 			return (EBUSY);
1503 		}
1504 	}
1505 	mutex_exit(&sof_module_lock);
1506 
1507 	return (ENXIO);
1508 }
1509 
1510 /*
1511  * sof_newconn_ready(handle)
1512  *
1513  * The filter `handle` no longer wants to defer the socket it is attached
1514  * to. A newconn notification will be generated if there is no other filter
1515  * that wants the socket deferred.
1516  */
1517 void
1518 sof_newconn_ready(sof_handle_t handle)
1519 {
1520 	sof_instance_t *inst = (sof_instance_t *)handle;
1521 	struct sonode *so = inst->sofi_sonode;
1522 	struct sonode *pso = so->so_listener;
1523 
1524 	mutex_enter(&so->so_lock);
1525 	if (!(inst->sofi_flags & SOFIF_DEFER)) {
1526 		mutex_exit(&so->so_lock);
1527 		return;
1528 	}
1529 	ASSERT(so->so_state & SS_FIL_DEFER);
1530 	inst->sofi_flags &= ~SOFIF_DEFER;
1531 	SOF_STAT_ADD(inst, ndeferred, -1);
1532 
1533 	/*
1534 	 * Check if any other filter has deferred the socket. The last
1535 	 * filter to remove its DEFER flag will be the one generating the
1536 	 * wakeup.
1537 	 */
1538 	for (inst = so->so_filter_top; inst != NULL; inst = inst->sofi_next) {
1539 		/* Still deferred; nothing to do */
1540 		if (inst->sofi_flags & SOFIF_DEFER) {
1541 			mutex_exit(&so->so_lock);
1542 			return;
1543 		}
1544 	}
1545 	so->so_state &= ~SS_FIL_DEFER;
1546 	mutex_exit(&so->so_lock);
1547 
1548 	/*
1549 	 * The socket is no longer deferred; move it over to the regular
1550 	 * accept list and notify the user. However, it is possible that
1551 	 * the socket is being dropped by sof_sonode_drop_deferred(), so
1552 	 * first make sure the socket is on the deferred list.
1553 	 */
1554 	mutex_enter(&pso->so_acceptq_lock);
1555 	if (!list_link_active(&so->so_acceptq_node)) {
1556 		mutex_exit(&pso->so_acceptq_lock);
1557 		return;
1558 	}
1559 	list_remove(&pso->so_acceptq_defer, so);
1560 	list_insert_tail(&pso->so_acceptq_list, so);
1561 	cv_signal(&pso->so_acceptq_cv);
1562 	mutex_exit(&pso->so_acceptq_lock);
1563 
1564 	mutex_enter(&pso->so_lock);
1565 	so_notify_newconn(pso);		/* so_notify_newconn drops the lock */
1566 }
1567 
1568 /*
1569  * sof_bypass(handle)
1570  *
1571  * Stop generating callbacks for `handle'.
1572  */
1573 void
1574 sof_bypass(sof_handle_t handle)
1575 {
1576 	sof_instance_t *inst = (sof_instance_t *)handle;
1577 	struct sonode *so = inst->sofi_sonode;
1578 
1579 	mutex_enter(&so->so_lock);
1580 	if (!(inst->sofi_flags & SOFIF_BYPASS)) {
1581 		inst->sofi_flags |= SOFIF_BYPASS;
1582 		ASSERT(so->so_filter_active > 0);
1583 		so->so_filter_active--;
1584 	}
1585 	mutex_exit(&so->so_lock);
1586 }
1587 
1588 /*
1589  * sof_rcv_flowctrl(handle, enable)
1590  *
1591  * If `enable' is TRUE, then recv side flow control will be asserted for
1592  * the socket associated with `handle'. When `enable' is FALSE the filter
1593  * indicates that it no longer wants to assert flow control, however, the
1594  * condition will not be removed until there are no other filters asserting
1595  * flow control and there is space available in the receive buffer.
1596  */
1597 void
1598 sof_rcv_flowctrl(sof_handle_t handle, boolean_t enable)
1599 {
1600 	sof_instance_t *inst = (sof_instance_t *)handle;
1601 	struct sonode *so = inst->sofi_sonode;
1602 
1603 	mutex_enter(&so->so_lock);
1604 	if (enable) {
1605 		inst->sofi_flags |= SOFIF_RCV_FLOWCTRL;
1606 		so->so_flowctrld = B_TRUE;
1607 		so->so_state |= SS_FIL_RCV_FLOWCTRL;
1608 		mutex_exit(&so->so_lock);
1609 	} else {
1610 		inst->sofi_flags &= ~SOFIF_RCV_FLOWCTRL;
1611 		for (inst = so->so_filter_top; inst != NULL;
1612 		    inst = inst->sofi_next) {
1613 			/* another filter is asserting flow control */
1614 			if (inst->sofi_flags & SOFIF_RCV_FLOWCTRL) {
1615 				mutex_exit(&so->so_lock);
1616 				return;
1617 			}
1618 		}
1619 		so->so_state &= ~SS_FIL_RCV_FLOWCTRL;
1620 		/* so_check_flow_control drops so_lock */
1621 		(void) so_check_flow_control(so);
1622 	}
1623 	ASSERT(MUTEX_NOT_HELD(&so->so_lock));
1624 }
1625 
1626 /*
1627  * sof_snd_flowctrl(handle, enable)
1628  *
1629  * If `enable' is TRUE, then send side flow control will be asserted for
1630  * the socket associated with `handle'. When `enable' is FALSE the filter
1631  * indicates that is no longer wants to assert flow control, however, the
1632  * condition will not be removed until there are no other filters asserting
1633  * flow control and there are tx buffers available.
1634  */
1635 void
1636 sof_snd_flowctrl(sof_handle_t handle, boolean_t enable)
1637 {
1638 	sof_instance_t *inst = (sof_instance_t *)handle;
1639 	struct sonode *so = inst->sofi_sonode;
1640 
1641 	mutex_enter(&so->so_lock);
1642 	if (enable) {
1643 		inst->sofi_flags |= SOFIF_SND_FLOWCTRL;
1644 		so->so_state |= SS_FIL_SND_FLOWCTRL;
1645 	} else {
1646 		inst->sofi_flags &= ~SOFIF_SND_FLOWCTRL;
1647 		for (inst = so->so_filter_top; inst != NULL;
1648 		    inst = inst->sofi_next) {
1649 			if (inst->sofi_flags & SOFIF_SND_FLOWCTRL) {
1650 				mutex_exit(&so->so_lock);
1651 				return;
1652 			}
1653 		}
1654 		so->so_state &= ~SS_FIL_SND_FLOWCTRL;
1655 		/*
1656 		 * Wake up writer if the socket is no longer flow controlled.
1657 		 */
1658 		if (!SO_SND_FLOWCTRLD(so)) {
1659 			/* so_notify_writable drops so_lock */
1660 			so_notify_writable(so);
1661 			return;
1662 		}
1663 	}
1664 	mutex_exit(&so->so_lock);
1665 }
1666 
1667 /*
1668  * sof_get_cookie(handle)
1669  *
1670  * Returns the cookie used by `handle'.
1671  */
1672 void *
1673 sof_get_cookie(sof_handle_t handle)
1674 {
1675 	return (((sof_instance_t *)handle)->sofi_cookie);
1676 }
1677 
1678 /*
1679  * sof_cas_cookie(handle, old, new)
1680  *
1681  * Compare-and-swap the cookie used by `handle'.
1682  */
1683 void *
1684 sof_cas_cookie(sof_handle_t handle, void *old, void *new)
1685 {
1686 	sof_instance_t *inst = (sof_instance_t *)handle;
1687 
1688 	return (atomic_cas_ptr(&inst->sofi_cookie, old, new));
1689 }
1690 
1691 /*
1692  * sof_inject_data_out(handle, mp, msg, flowctrld)
1693  *
1694  * Submit `mp' for transmission. `msg' cannot by NULL, and may contain
1695  * ancillary data and destination address. Returns 0 when successful
1696  * in which case `flowctrld' is updated. If flow controlled, no new data
1697  * should be injected until a SOF_EV_INJECT_DATA_OUT_OK event is observed.
1698  * In case of failure, an errno is returned.
1699  *
1700  * Filters that are lower in the stack than `handle' will see the data
1701  * before it is transmitted and may end up modifying or freeing the data.
1702  */
1703 int
1704 sof_inject_data_out(sof_handle_t handle, mblk_t *mp, struct nmsghdr *msg,
1705     boolean_t *flowctrld)
1706 {
1707 	sof_instance_t *inst = (sof_instance_t *)handle;
1708 	struct sonode *so = inst->sofi_sonode;
1709 	int error;
1710 
1711 	mutex_enter(&so->so_lock);
1712 	if (so->so_state & SS_FIL_STOP) {
1713 		mutex_exit(&so->so_lock);
1714 		freemsg(mp);
1715 		return (EPIPE);
1716 	}
1717 	so->so_filter_tx++;
1718 	mutex_exit(&so->so_lock);
1719 
1720 	error = so_sendmblk_impl(inst->sofi_sonode, msg, FNONBLOCK,
1721 	    kcred, &mp, inst->sofi_next, B_TRUE);
1722 
1723 	mutex_enter(&so->so_lock);
1724 	ASSERT(so->so_filter_tx > 0);
1725 	so->so_filter_tx--;
1726 	if (so->so_state & SS_CLOSING)
1727 		cv_signal(&so->so_closing_cv);
1728 	mutex_exit(&so->so_lock);
1729 
1730 	if (mp != NULL)
1731 		freemsg(mp);
1732 
1733 	if (error == ENOSPC) {
1734 		*flowctrld = B_TRUE;
1735 		error = 0;
1736 	} else {
1737 		*flowctrld = B_FALSE;
1738 	}
1739 
1740 	return (error);
1741 }
1742 
1743 /*
1744  * sof_inject_data_in(handle, mp, len, flag, flowctrld)
1745  *
1746  * Enqueue `mp' which contains `len' bytes of M_DATA onto the socket
1747  * associated with `handle'. `flags' should be set to 0. Returns 0 when
1748  * successful in which case `flowctrld' is updated. If flow controlled,
1749  * no new data should be injected until a SOF_EV_INJECT_DATA_IN_OK event
1750  * is observed.  In case of failure, an errno is returned.
1751  *
1752  * Filters that are higher in the stack than `handle' will see the data
1753  * before it is enqueued on the receive queue and may end up modifying or
1754  * freeing the data.
1755  */
1756 int
1757 sof_inject_data_in(sof_handle_t handle, mblk_t *mp, size_t len, int flags,
1758     boolean_t *flowctrld)
1759 {
1760 	sof_instance_t *inst = (sof_instance_t *)handle;
1761 	ssize_t avail;
1762 	int error = 0;
1763 
1764 	ASSERT(flags == 0);
1765 	avail = so_queue_msg_impl(inst->sofi_sonode, mp, len, flags, &error,
1766 	    NULL, inst->sofi_prev);
1767 	/* fallback should never happen when there is an active filter */
1768 	ASSERT(error != EOPNOTSUPP);
1769 
1770 	*flowctrld = (avail > 0) ? B_FALSE : B_TRUE;
1771 	return (error);
1772 }
1773 
1774 /*
1775  * sof_newconn_move(handle, newparent)
1776  *
1777  * Private interface only to be used by KSSL.
1778  *
1779  * Moves the socket associated with `handle' from its current listening
1780  * socket to the listener associated with `newparent'. The socket being
1781  * moved must be in a deferred state and it is up to the consumer of the
1782  * interface to ensure that the `newparent' does not go away while this
1783  * operation is pending.
1784  */
1785 boolean_t
1786 sof_newconn_move(sof_handle_t handle, sof_handle_t newparent)
1787 {
1788 	sof_instance_t *inst = (sof_instance_t *)handle;
1789 	sof_instance_t *newpinst = (sof_instance_t *)newparent;
1790 	struct sonode *so, *old, *new;
1791 
1792 	so = inst->sofi_sonode;
1793 	ASSERT(so->so_state & SS_FIL_DEFER);
1794 
1795 	if (inst->sofi_next != NULL || inst->sofi_prev != NULL ||
1796 	    !(so->so_state & SS_FIL_DEFER))
1797 		return (B_FALSE);
1798 
1799 	old = so->so_listener;
1800 	mutex_enter(&old->so_acceptq_lock);
1801 	list_remove(&old->so_acceptq_defer, so);
1802 	old->so_acceptq_len--;
1803 	mutex_exit(&old->so_acceptq_lock);
1804 
1805 	new = newpinst->sofi_sonode;
1806 	mutex_enter(&new->so_acceptq_lock);
1807 	list_insert_tail(&new->so_acceptq_defer, so);
1808 	new->so_acceptq_len++;
1809 	mutex_exit(&new->so_acceptq_lock);
1810 
1811 	so->so_listener = new;
1812 
1813 	return (B_TRUE);
1814 }
1815