xref: /illumos-gate/usr/src/uts/common/inet/ipf/ip_fil_solaris.c (revision 56f33205c9ed776c3c909e07d52e94610a675740)
1 /*
2  * Copyright (C) 1993-2001, 2003 by Darren Reed.
3  *
4  * See the IPFILTER.LICENCE file for details on licencing.
5  *
6  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
7  * Use is subject to license terms.
8  */
9 
10 #if !defined(lint)
11 static const char sccsid[] = "@(#)ip_fil_solaris.c	1.7 07/22/06 (C) 1993-2000 Darren Reed";
12 static const char rcsid[] = "@(#)$Id: ip_fil_solaris.c,v 2.62.2.19 2005/07/13 21:40:46 darrenr Exp $";
13 #endif
14 
15 #include <sys/types.h>
16 #include <sys/errno.h>
17 #include <sys/param.h>
18 #include <sys/cpuvar.h>
19 #include <sys/open.h>
20 #include <sys/ioctl.h>
21 #include <sys/filio.h>
22 #include <sys/systm.h>
23 #include <sys/strsubr.h>
24 #include <sys/cred.h>
25 #include <sys/ddi.h>
26 #include <sys/sunddi.h>
27 #include <sys/ksynch.h>
28 #include <sys/kmem.h>
29 #include <sys/mkdev.h>
30 #include <sys/protosw.h>
31 #include <sys/socket.h>
32 #include <sys/dditypes.h>
33 #include <sys/cmn_err.h>
34 #include <sys/zone.h>
35 #include <net/if.h>
36 #include <net/af.h>
37 #include <net/route.h>
38 #include <netinet/in.h>
39 #include <netinet/in_systm.h>
40 #include <netinet/ip.h>
41 #include <netinet/ip_var.h>
42 #include <netinet/tcp.h>
43 #include <netinet/udp.h>
44 #include <netinet/tcpip.h>
45 #include <netinet/ip_icmp.h>
46 #include "netinet/ip_compat.h"
47 #ifdef	USE_INET6
48 # include <netinet/icmp6.h>
49 #endif
50 #include "netinet/ip_fil.h"
51 #include "netinet/ip_nat.h"
52 #include "netinet/ip_frag.h"
53 #include "netinet/ip_state.h"
54 #include "netinet/ip_auth.h"
55 #include "netinet/ip_proxy.h"
56 #include "netinet/ipf_stack.h"
57 #ifdef	IPFILTER_LOOKUP
58 # include "netinet/ip_lookup.h"
59 #endif
60 #include <inet/ip_ire.h>
61 
62 #include <sys/md5.h>
63 #include <sys/neti.h>
64 
65 static	int	frzerostats __P((caddr_t, ipf_stack_t *));
66 static	int	fr_setipfloopback __P((int, ipf_stack_t *));
67 static	int	fr_enableipf __P((ipf_stack_t *, int));
68 static	int	fr_send_ip __P((fr_info_t *fin, mblk_t *m, mblk_t **mp));
69 static	int	ipf_nic_event_v4 __P((hook_event_token_t, hook_data_t, void *));
70 static	int	ipf_nic_event_v6 __P((hook_event_token_t, hook_data_t, void *));
71 static	int	ipf_hook __P((hook_data_t, int, int, void *));
72 static	int	ipf_hook4_in __P((hook_event_token_t, hook_data_t, void *));
73 static	int	ipf_hook4_out __P((hook_event_token_t, hook_data_t, void *));
74 static	int	ipf_hook4_loop_out __P((hook_event_token_t, hook_data_t,
75     void *));
76 static	int	ipf_hook4_loop_in __P((hook_event_token_t, hook_data_t, void *));
77 static	int	ipf_hook4 __P((hook_data_t, int, int, void *));
78 static	int	ipf_hook6_out __P((hook_event_token_t, hook_data_t, void *));
79 static	int	ipf_hook6_in __P((hook_event_token_t, hook_data_t, void *));
80 static	int	ipf_hook6_loop_out __P((hook_event_token_t, hook_data_t,
81     void *));
82 static	int	ipf_hook6_loop_in __P((hook_event_token_t, hook_data_t,
83     void *));
84 static	int     ipf_hook6 __P((hook_data_t, int, int, void *));
85 extern	int	ipf_geniter __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *));
86 extern	int	ipf_frruleiter __P((void *, int, void *, ipf_stack_t *));
87 
88 #if SOLARIS2 < 10
89 #if SOLARIS2 >= 7
90 u_int		*ip_ttl_ptr = NULL;
91 u_int		*ip_mtudisc = NULL;
92 # if SOLARIS2 >= 8
93 int		*ip_forwarding = NULL;
94 u_int		*ip6_forwarding = NULL;
95 # else
96 u_int		*ip_forwarding = NULL;
97 # endif
98 #else
99 u_long		*ip_ttl_ptr = NULL;
100 u_long		*ip_mtudisc = NULL;
101 u_long		*ip_forwarding = NULL;
102 #endif
103 #endif
104 
105 
106 /* ------------------------------------------------------------------------ */
107 /* Function:    ipldetach                                                   */
108 /* Returns:     int - 0 == success, else error.                             */
109 /* Parameters:  Nil                                                         */
110 /*                                                                          */
111 /* This function is responsible for undoing anything that might have been   */
112 /* done in a call to iplattach().  It must be able to clean up from a call  */
113 /* to iplattach() that did not succeed.  Why might that happen?  Someone    */
114 /* configures a table to be so large that we cannot allocate enough memory  */
115 /* for it.                                                                  */
116 /* ------------------------------------------------------------------------ */
117 int ipldetach(ifs)
118 ipf_stack_t *ifs;
119 {
120 
121 	ASSERT(rw_read_locked(&ifs->ifs_ipf_global.ipf_lk) == 0);
122 
123 #if SOLARIS2 < 10
124 
125 	if (ifs->ifs_fr_control_forwarding & 2) {
126 		if (ip_forwarding != NULL)
127 			*ip_forwarding = 0;
128 #if SOLARIS2 >= 8
129 		if (ip6_forwarding != NULL)
130 			*ip6_forwarding = 0;
131 #endif
132 	}
133 #endif
134 
135 	/*
136 	 * This lock needs to be dropped around the net_hook_unregister calls
137 	 * because we can deadlock here with:
138 	 * W(ipf_global)->R(hook_family)->W(hei_lock) (this code path) vs
139 	 * R(hook_family)->R(hei_lock)->R(ipf_global) (active hook running)
140 	 */
141 	RWLOCK_EXIT(&ifs->ifs_ipf_global);
142 
143 #define	UNDO_HOOK(_f, _b, _e, _h)					\
144 	do {								\
145 		if (ifs->_f != NULL) {					\
146 			if (ifs->_b) {					\
147 				ifs->_b = (net_hook_unregister(ifs->_f,	\
148 					   _e, ifs->_h) != 0);		\
149 				if (!ifs->_b) {				\
150 					hook_free(ifs->_h);		\
151 					ifs->_h = NULL;			\
152 				}					\
153 			} else if (ifs->_h != NULL) {			\
154 				hook_free(ifs->_h);			\
155 				ifs->_h = NULL;				\
156 			}						\
157 		}							\
158 		_NOTE(CONSTCOND)					\
159 	} while (0)
160 
161 	/*
162 	 * Remove IPv6 Hooks
163 	 */
164 	if (ifs->ifs_ipf_ipv6 != NULL) {
165 		UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_physical_in,
166 			  NH_PHYSICAL_IN, ifs_ipfhook6_in);
167 		UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_physical_out,
168 			  NH_PHYSICAL_OUT, ifs_ipfhook6_out);
169 		UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_nic_events,
170 			  NH_NIC_EVENTS, ifs_ipfhook6_nicevents);
171 		UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_loopback_in,
172 			  NH_LOOPBACK_IN, ifs_ipfhook6_loop_in);
173 		UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_loopback_out,
174 			  NH_LOOPBACK_OUT, ifs_ipfhook6_loop_out);
175 
176 		if (net_protocol_release(ifs->ifs_ipf_ipv6) != 0)
177 			goto detach_failed;
178 		ifs->ifs_ipf_ipv6 = NULL;
179         }
180 
181 	/*
182 	 * Remove IPv4 Hooks
183 	 */
184 	if (ifs->ifs_ipf_ipv4 != NULL) {
185 		UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_physical_in,
186 			  NH_PHYSICAL_IN, ifs_ipfhook4_in);
187 		UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_physical_out,
188 			  NH_PHYSICAL_OUT, ifs_ipfhook4_out);
189 		UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_nic_events,
190 			  NH_NIC_EVENTS, ifs_ipfhook4_nicevents);
191 		UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_loopback_in,
192 			  NH_LOOPBACK_IN, ifs_ipfhook4_loop_in);
193 		UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_loopback_out,
194 			  NH_LOOPBACK_OUT, ifs_ipfhook4_loop_out);
195 
196 		if (net_protocol_release(ifs->ifs_ipf_ipv4) != 0)
197 			goto detach_failed;
198 		ifs->ifs_ipf_ipv4 = NULL;
199 	}
200 
201 #undef UNDO_HOOK
202 
203 #ifdef	IPFDEBUG
204 	cmn_err(CE_CONT, "ipldetach()\n");
205 #endif
206 
207 	WRITE_ENTER(&ifs->ifs_ipf_global);
208 	fr_deinitialise(ifs);
209 
210 	(void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE|FR_INACTIVE, ifs);
211 	(void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE, ifs);
212 
213 	if (ifs->ifs_ipf_locks_done == 1) {
214 		MUTEX_DESTROY(&ifs->ifs_ipf_timeoutlock);
215 		MUTEX_DESTROY(&ifs->ifs_ipf_rw);
216 		RW_DESTROY(&ifs->ifs_ipf_tokens);
217 		RW_DESTROY(&ifs->ifs_ipf_ipidfrag);
218 		ifs->ifs_ipf_locks_done = 0;
219 	}
220 
221 	if (ifs->ifs_hook4_physical_in || ifs->ifs_hook4_physical_out ||
222 	    ifs->ifs_hook4_nic_events || ifs->ifs_hook4_loopback_in ||
223 	    ifs->ifs_hook4_loopback_out || ifs->ifs_hook6_nic_events ||
224 	    ifs->ifs_hook6_physical_in || ifs->ifs_hook6_physical_out ||
225 	    ifs->ifs_hook6_loopback_in || ifs->ifs_hook6_loopback_out)
226 		return -1;
227 
228 	return 0;
229 
230 detach_failed:
231 	WRITE_ENTER(&ifs->ifs_ipf_global);
232 	return -1;
233 }
234 
235 int iplattach(ifs)
236 ipf_stack_t *ifs;
237 {
238 #if SOLARIS2 < 10
239 	int i;
240 #endif
241 	netid_t id = ifs->ifs_netid;
242 
243 #ifdef	IPFDEBUG
244 	cmn_err(CE_CONT, "iplattach()\n");
245 #endif
246 
247 	ASSERT(rw_read_locked(&ifs->ifs_ipf_global.ipf_lk) == 0);
248 	ifs->ifs_fr_flags = IPF_LOGGING;
249 #ifdef _KERNEL
250 	ifs->ifs_fr_update_ipid = 0;
251 #else
252 	ifs->ifs_fr_update_ipid = 1;
253 #endif
254 	ifs->ifs_fr_minttl = 4;
255 	ifs->ifs_fr_icmpminfragmtu = 68;
256 #if defined(IPFILTER_DEFAULT_BLOCK)
257 	ifs->ifs_fr_pass = FR_BLOCK|FR_NOMATCH;
258 #else
259 	ifs->ifs_fr_pass = (IPF_DEFAULT_PASS)|FR_NOMATCH;
260 #endif
261 
262 	bzero((char *)ifs->ifs_frcache, sizeof(ifs->ifs_frcache));
263 	MUTEX_INIT(&ifs->ifs_ipf_rw, "ipf rw mutex");
264 	MUTEX_INIT(&ifs->ifs_ipf_timeoutlock, "ipf timeout lock mutex");
265 	RWLOCK_INIT(&ifs->ifs_ipf_ipidfrag, "ipf IP NAT-Frag rwlock");
266 	RWLOCK_INIT(&ifs->ifs_ipf_tokens, "ipf token rwlock");
267 	ifs->ifs_ipf_locks_done = 1;
268 
269 	if (fr_initialise(ifs) < 0)
270 		return -1;
271 
272 	HOOK_INIT(ifs->ifs_ipfhook4_nicevents, ipf_nic_event_v4,
273 		  "ipfilter_hook4_nicevents", ifs);
274 	HOOK_INIT(ifs->ifs_ipfhook4_in, ipf_hook4_in,
275 		  "ipfilter_hook4_in", ifs);
276 	HOOK_INIT(ifs->ifs_ipfhook4_out, ipf_hook4_out,
277 		  "ipfilter_hook4_out", ifs);
278 	HOOK_INIT(ifs->ifs_ipfhook4_loop_in, ipf_hook4_loop_in,
279 		  "ipfilter_hook4_loop_in", ifs);
280 	HOOK_INIT(ifs->ifs_ipfhook4_loop_out, ipf_hook4_loop_out,
281 		  "ipfilter_hook4_loop_out", ifs);
282 
283 	/*
284 	 * If we hold this lock over all of the net_hook_register calls, we
285 	 * can cause a deadlock to occur with the following lock ordering:
286 	 * W(ipf_global)->R(hook_family)->W(hei_lock) (this code path) vs
287 	 * R(hook_family)->R(hei_lock)->R(ipf_global) (packet path)
288 	 */
289 	RWLOCK_EXIT(&ifs->ifs_ipf_global);
290 
291 	/*
292 	 * Add IPv4 hooks
293 	 */
294 	ifs->ifs_ipf_ipv4 = net_protocol_lookup(id, NHF_INET);
295 	if (ifs->ifs_ipf_ipv4 == NULL)
296 		goto hookup_failed;
297 
298 	ifs->ifs_hook4_nic_events = (net_hook_register(ifs->ifs_ipf_ipv4,
299 	    NH_NIC_EVENTS, ifs->ifs_ipfhook4_nicevents) == 0);
300 	if (!ifs->ifs_hook4_nic_events)
301 		goto hookup_failed;
302 
303 	ifs->ifs_hook4_physical_in = (net_hook_register(ifs->ifs_ipf_ipv4,
304 	    NH_PHYSICAL_IN, ifs->ifs_ipfhook4_in) == 0);
305 	if (!ifs->ifs_hook4_physical_in)
306 		goto hookup_failed;
307 
308 	ifs->ifs_hook4_physical_out = (net_hook_register(ifs->ifs_ipf_ipv4,
309 	    NH_PHYSICAL_OUT, ifs->ifs_ipfhook4_out) == 0);
310 	if (!ifs->ifs_hook4_physical_out)
311 		goto hookup_failed;
312 
313 	if (ifs->ifs_ipf_loopback) {
314 		ifs->ifs_hook4_loopback_in = (net_hook_register(
315 		    ifs->ifs_ipf_ipv4, NH_LOOPBACK_IN,
316 		    ifs->ifs_ipfhook4_loop_in) == 0);
317 		if (!ifs->ifs_hook4_loopback_in)
318 			goto hookup_failed;
319 
320 		ifs->ifs_hook4_loopback_out = (net_hook_register(
321 		    ifs->ifs_ipf_ipv4, NH_LOOPBACK_OUT,
322 		    ifs->ifs_ipfhook4_loop_out) == 0);
323 		if (!ifs->ifs_hook4_loopback_out)
324 			goto hookup_failed;
325 	}
326 	/*
327 	 * Add IPv6 hooks
328 	 */
329 	ifs->ifs_ipf_ipv6 = net_protocol_lookup(id, NHF_INET6);
330 	if (ifs->ifs_ipf_ipv6 == NULL)
331 		goto hookup_failed;
332 
333 	HOOK_INIT(ifs->ifs_ipfhook6_nicevents, ipf_nic_event_v6,
334 		  "ipfilter_hook6_nicevents", ifs);
335 	HOOK_INIT(ifs->ifs_ipfhook6_in, ipf_hook6_in,
336 		  "ipfilter_hook6_in", ifs);
337 	HOOK_INIT(ifs->ifs_ipfhook6_out, ipf_hook6_out,
338 		  "ipfilter_hook6_out", ifs);
339 	HOOK_INIT(ifs->ifs_ipfhook6_loop_in, ipf_hook6_loop_in,
340 		  "ipfilter_hook6_loop_in", ifs);
341 	HOOK_INIT(ifs->ifs_ipfhook6_loop_out, ipf_hook6_loop_out,
342 		  "ipfilter_hook6_loop_out", ifs);
343 
344 	ifs->ifs_hook6_nic_events = (net_hook_register(ifs->ifs_ipf_ipv6,
345 	    NH_NIC_EVENTS, ifs->ifs_ipfhook6_nicevents) == 0);
346 	if (!ifs->ifs_hook6_nic_events)
347 		goto hookup_failed;
348 
349 	ifs->ifs_hook6_physical_in = (net_hook_register(ifs->ifs_ipf_ipv6,
350 	    NH_PHYSICAL_IN, ifs->ifs_ipfhook6_in) == 0);
351 	if (!ifs->ifs_hook6_physical_in)
352 		goto hookup_failed;
353 
354 	ifs->ifs_hook6_physical_out = (net_hook_register(ifs->ifs_ipf_ipv6,
355 	    NH_PHYSICAL_OUT, ifs->ifs_ipfhook6_out) == 0);
356 	if (!ifs->ifs_hook6_physical_out)
357 		goto hookup_failed;
358 
359 	if (ifs->ifs_ipf_loopback) {
360 		ifs->ifs_hook6_loopback_in = (net_hook_register(
361 		    ifs->ifs_ipf_ipv6, NH_LOOPBACK_IN,
362 		    ifs->ifs_ipfhook6_loop_in) == 0);
363 		if (!ifs->ifs_hook6_loopback_in)
364 			goto hookup_failed;
365 
366 		ifs->ifs_hook6_loopback_out = (net_hook_register(
367 		    ifs->ifs_ipf_ipv6, NH_LOOPBACK_OUT,
368 		    ifs->ifs_ipfhook6_loop_out) == 0);
369 		if (!ifs->ifs_hook6_loopback_out)
370 			goto hookup_failed;
371 	}
372 
373 	/*
374 	 * Reacquire ipf_global, now it is safe.
375 	 */
376 	WRITE_ENTER(&ifs->ifs_ipf_global);
377 
378 /* Do not use private interface ip_params_arr[] in Solaris 10 */
379 #if SOLARIS2 < 10
380 
381 #if SOLARIS2 >= 8
382 	ip_forwarding = &ip_g_forward;
383 #endif
384 	/*
385 	 * XXX - There is no terminator for this array, so it is not possible
386 	 * to tell if what we are looking for is missing and go off the end
387 	 * of the array.
388 	 */
389 
390 #if SOLARIS2 <= 8
391 	for (i = 0; ; i++) {
392 		if (!strcmp(ip_param_arr[i].ip_param_name, "ip_def_ttl")) {
393 			ip_ttl_ptr = &ip_param_arr[i].ip_param_value;
394 		} else if (!strcmp(ip_param_arr[i].ip_param_name,
395 			    "ip_path_mtu_discovery")) {
396 			ip_mtudisc = &ip_param_arr[i].ip_param_value;
397 		}
398 #if SOLARIS2 < 8
399 		else if (!strcmp(ip_param_arr[i].ip_param_name,
400 			    "ip_forwarding")) {
401 			ip_forwarding = &ip_param_arr[i].ip_param_value;
402 		}
403 #else
404 		else if (!strcmp(ip_param_arr[i].ip_param_name,
405 			    "ip6_forwarding")) {
406 			ip6_forwarding = &ip_param_arr[i].ip_param_value;
407 		}
408 #endif
409 
410 		if (ip_mtudisc != NULL && ip_ttl_ptr != NULL &&
411 #if SOLARIS2 >= 8
412 		    ip6_forwarding != NULL &&
413 #endif
414 		    ip_forwarding != NULL)
415 			break;
416 	}
417 #endif
418 
419 	if (ifs->ifs_fr_control_forwarding & 1) {
420 		if (ip_forwarding != NULL)
421 			*ip_forwarding = 1;
422 #if SOLARIS2 >= 8
423 		if (ip6_forwarding != NULL)
424 			*ip6_forwarding = 1;
425 #endif
426 	}
427 
428 #endif
429 
430 	return 0;
431 hookup_failed:
432 	WRITE_ENTER(&ifs->ifs_ipf_global);
433 	return -1;
434 }
435 
436 static	int	fr_setipfloopback(set, ifs)
437 int set;
438 ipf_stack_t *ifs;
439 {
440 	if (ifs->ifs_ipf_ipv4 == NULL || ifs->ifs_ipf_ipv6 == NULL)
441 		return EFAULT;
442 
443 	if (set && !ifs->ifs_ipf_loopback) {
444 		ifs->ifs_ipf_loopback = 1;
445 
446 		ifs->ifs_hook4_loopback_in = (net_hook_register(
447 		    ifs->ifs_ipf_ipv4, NH_LOOPBACK_IN,
448 		    ifs->ifs_ipfhook4_loop_in) == 0);
449 		if (!ifs->ifs_hook4_loopback_in)
450 			return EINVAL;
451 
452 		ifs->ifs_hook4_loopback_out = (net_hook_register(
453 		    ifs->ifs_ipf_ipv4, NH_LOOPBACK_OUT,
454 		    ifs->ifs_ipfhook4_loop_out) == 0);
455 		if (!ifs->ifs_hook4_loopback_out)
456 			return EINVAL;
457 
458 		ifs->ifs_hook6_loopback_in = (net_hook_register(
459 		    ifs->ifs_ipf_ipv6, NH_LOOPBACK_IN,
460 		    ifs->ifs_ipfhook6_loop_in) == 0);
461 		if (!ifs->ifs_hook6_loopback_in)
462 			return EINVAL;
463 
464 		ifs->ifs_hook6_loopback_out = (net_hook_register(
465 		    ifs->ifs_ipf_ipv6, NH_LOOPBACK_OUT,
466 		    ifs->ifs_ipfhook6_loop_out) == 0);
467 		if (!ifs->ifs_hook6_loopback_out)
468 			return EINVAL;
469 
470 	} else if (!set && ifs->ifs_ipf_loopback) {
471 		ifs->ifs_ipf_loopback = 0;
472 
473 		ifs->ifs_hook4_loopback_in =
474 		    (net_hook_unregister(ifs->ifs_ipf_ipv4,
475 		    NH_LOOPBACK_IN, ifs->ifs_ipfhook4_loop_in) != 0);
476 		if (ifs->ifs_hook4_loopback_in)
477 			return EBUSY;
478 
479 		ifs->ifs_hook4_loopback_out =
480 		    (net_hook_unregister(ifs->ifs_ipf_ipv4,
481 		    NH_LOOPBACK_OUT, ifs->ifs_ipfhook4_loop_out) != 0);
482 		if (ifs->ifs_hook4_loopback_out)
483 			return EBUSY;
484 
485 		ifs->ifs_hook6_loopback_in =
486 		    (net_hook_unregister(ifs->ifs_ipf_ipv6,
487 		    NH_LOOPBACK_IN, ifs->ifs_ipfhook4_loop_in) != 0);
488 		if (ifs->ifs_hook6_loopback_in)
489 			return EBUSY;
490 
491 		ifs->ifs_hook6_loopback_out =
492 		    (net_hook_unregister(ifs->ifs_ipf_ipv6,
493 		    NH_LOOPBACK_OUT, ifs->ifs_ipfhook6_loop_out) != 0);
494 		if (ifs->ifs_hook6_loopback_out)
495 			return EBUSY;
496 	}
497 	return 0;
498 }
499 
500 
501 /*
502  * Filter ioctl interface.
503  */
504 /*ARGSUSED*/
505 int iplioctl(dev, cmd, data, mode, cp, rp)
506 dev_t dev;
507 int cmd;
508 #if SOLARIS2 >= 7
509 intptr_t data;
510 #else
511 int *data;
512 #endif
513 int mode;
514 cred_t *cp;
515 int *rp;
516 {
517 	int error = 0, tmp;
518 	friostat_t fio;
519 	minor_t unit;
520 	u_int enable;
521 	ipf_stack_t *ifs;
522 
523 #ifdef	IPFDEBUG
524 	cmn_err(CE_CONT, "iplioctl(%x,%x,%x,%d,%x,%d)\n",
525 		dev, cmd, data, mode, cp, rp);
526 #endif
527 	unit = getminor(dev);
528 	if (IPL_LOGMAX < unit)
529 		return ENXIO;
530 
531         /*
532 	 * As we're calling ipf_find_stack in user space, from a given zone
533 	 * to find the stack pointer for this zone, there is no need to have
534 	 * a hold/refence count here.
535 	 */
536 	ifs = ipf_find_stack(crgetzoneid(cp));
537 	ASSERT(ifs != NULL);
538 
539 	if (ifs->ifs_fr_running <= 0) {
540 		if (unit != IPL_LOGIPF) {
541 			return EIO;
542 		}
543 		if (cmd != SIOCIPFGETNEXT && cmd != SIOCIPFGET &&
544 		    cmd != SIOCIPFSET && cmd != SIOCFRENB &&
545 		    cmd != SIOCGETFS && cmd != SIOCGETFF) {
546 			return EIO;
547 		}
548 	}
549 
550 	READ_ENTER(&ifs->ifs_ipf_global);
551 	if (ifs->ifs_fr_enable_active != 0) {
552 		RWLOCK_EXIT(&ifs->ifs_ipf_global);
553 		return EBUSY;
554 	}
555 
556 	error = fr_ioctlswitch(unit, (caddr_t)data, cmd, mode, crgetuid(cp),
557 			       curproc, ifs);
558 	if (error != -1) {
559 		RWLOCK_EXIT(&ifs->ifs_ipf_global);
560 		return error;
561 	}
562 	error = 0;
563 
564 	switch (cmd)
565 	{
566 	case SIOCFRENB :
567 		if (!(mode & FWRITE))
568 			error = EPERM;
569 		else {
570 			error = COPYIN((caddr_t)data, (caddr_t)&enable,
571 				       sizeof(enable));
572 			if (error != 0) {
573 				error = EFAULT;
574 				break;
575 			}
576 
577 			RWLOCK_EXIT(&ifs->ifs_ipf_global);
578 			WRITE_ENTER(&ifs->ifs_ipf_global);
579 
580 			/*
581 			 * We must recheck fr_enable_active here, since we've
582 			 * dropped ifs_ipf_global from R in order to get it
583 			 * exclusively.
584 			 */
585 			if (ifs->ifs_fr_enable_active == 0) {
586 				ifs->ifs_fr_enable_active = 1;
587 				error = fr_enableipf(ifs, enable);
588 				ifs->ifs_fr_enable_active = 0;
589 			}
590 		}
591 		break;
592 	case SIOCIPFSET :
593 		if (!(mode & FWRITE)) {
594 			error = EPERM;
595 			break;
596 		}
597 		/* FALLTHRU */
598 	case SIOCIPFGETNEXT :
599 	case SIOCIPFGET :
600 		error = fr_ipftune(cmd, (void *)data, ifs);
601 		break;
602 	case SIOCSETFF :
603 		if (!(mode & FWRITE))
604 			error = EPERM;
605 		else {
606 			error = COPYIN((caddr_t)data,
607 				       (caddr_t)&ifs->ifs_fr_flags,
608 				       sizeof(ifs->ifs_fr_flags));
609 			if (error != 0)
610 				error = EFAULT;
611 		}
612 		break;
613 	case SIOCIPFLP :
614 		error = COPYIN((caddr_t)data, (caddr_t)&tmp,
615 			       sizeof(tmp));
616 		if (error != 0)
617 			error = EFAULT;
618 		else
619 			error = fr_setipfloopback(tmp, ifs);
620 		break;
621 	case SIOCGETFF :
622 		error = COPYOUT((caddr_t)&ifs->ifs_fr_flags, (caddr_t)data,
623 				sizeof(ifs->ifs_fr_flags));
624 		if (error != 0)
625 			error = EFAULT;
626 		break;
627 	case SIOCFUNCL :
628 		error = fr_resolvefunc((void *)data);
629 		break;
630 	case SIOCINAFR :
631 	case SIOCRMAFR :
632 	case SIOCADAFR :
633 	case SIOCZRLST :
634 		if (!(mode & FWRITE))
635 			error = EPERM;
636 		else
637 			error = frrequest(unit, cmd, (caddr_t)data,
638 					  ifs->ifs_fr_active, 1, ifs);
639 		break;
640 	case SIOCINIFR :
641 	case SIOCRMIFR :
642 	case SIOCADIFR :
643 		if (!(mode & FWRITE))
644 			error = EPERM;
645 		else
646 			error = frrequest(unit, cmd, (caddr_t)data,
647 					  1 - ifs->ifs_fr_active, 1, ifs);
648 		break;
649 	case SIOCSWAPA :
650 		if (!(mode & FWRITE))
651 			error = EPERM;
652 		else {
653 			WRITE_ENTER(&ifs->ifs_ipf_mutex);
654 			bzero((char *)ifs->ifs_frcache,
655 			    sizeof (ifs->ifs_frcache));
656 			error = COPYOUT((caddr_t)&ifs->ifs_fr_active,
657 					(caddr_t)data,
658 					sizeof(ifs->ifs_fr_active));
659 			if (error != 0)
660 				error = EFAULT;
661 			else
662 				ifs->ifs_fr_active = 1 - ifs->ifs_fr_active;
663 			RWLOCK_EXIT(&ifs->ifs_ipf_mutex);
664 		}
665 		break;
666 	case SIOCGETFS :
667 		fr_getstat(&fio, ifs);
668 		error = fr_outobj((void *)data, &fio, IPFOBJ_IPFSTAT);
669 		break;
670 	case SIOCFRZST :
671 		if (!(mode & FWRITE))
672 			error = EPERM;
673 		else
674 			error = fr_zerostats((caddr_t)data, ifs);
675 		break;
676 	case	SIOCIPFFL :
677 		if (!(mode & FWRITE))
678 			error = EPERM;
679 		else {
680 			error = COPYIN((caddr_t)data, (caddr_t)&tmp,
681 				       sizeof(tmp));
682 			if (!error) {
683 				tmp = frflush(unit, 4, tmp, ifs);
684 				error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
685 						sizeof(tmp));
686 				if (error != 0)
687 					error = EFAULT;
688 			} else
689 				error = EFAULT;
690 		}
691 		break;
692 #ifdef USE_INET6
693 	case	SIOCIPFL6 :
694 		if (!(mode & FWRITE))
695 			error = EPERM;
696 		else {
697 			error = COPYIN((caddr_t)data, (caddr_t)&tmp,
698 				       sizeof(tmp));
699 			if (!error) {
700 				tmp = frflush(unit, 6, tmp, ifs);
701 				error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
702 						sizeof(tmp));
703 				if (error != 0)
704 					error = EFAULT;
705 			} else
706 				error = EFAULT;
707 		}
708 		break;
709 #endif
710 	case SIOCSTLCK :
711 		error = COPYIN((caddr_t)data, (caddr_t)&tmp, sizeof(tmp));
712 		if (error == 0) {
713 			ifs->ifs_fr_state_lock = tmp;
714 			ifs->ifs_fr_nat_lock = tmp;
715 			ifs->ifs_fr_frag_lock = tmp;
716 			ifs->ifs_fr_auth_lock = tmp;
717 		} else
718 			error = EFAULT;
719 	break;
720 #ifdef	IPFILTER_LOG
721 	case	SIOCIPFFB :
722 		if (!(mode & FWRITE))
723 			error = EPERM;
724 		else {
725 			tmp = ipflog_clear(unit, ifs);
726 			error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
727 				       sizeof(tmp));
728 			if (error)
729 				error = EFAULT;
730 		}
731 		break;
732 #endif /* IPFILTER_LOG */
733 	case SIOCFRSYN :
734 		if (!(mode & FWRITE))
735 			error = EPERM;
736 		else {
737 			RWLOCK_EXIT(&ifs->ifs_ipf_global);
738 			WRITE_ENTER(&ifs->ifs_ipf_global);
739 
740 			frsync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs);
741 			fr_natifpsync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs);
742 			fr_nataddrsync(0, NULL, NULL, ifs);
743 			fr_statesync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs);
744 			error = 0;
745 		}
746 		break;
747 	case SIOCGFRST :
748 		error = fr_outobj((void *)data, fr_fragstats(ifs),
749 				  IPFOBJ_FRAGSTAT);
750 		break;
751 	case FIONREAD :
752 #ifdef	IPFILTER_LOG
753 		tmp = (int)ifs->ifs_iplused[IPL_LOGIPF];
754 
755 		error = COPYOUT((caddr_t)&tmp, (caddr_t)data, sizeof(tmp));
756 		if (error != 0)
757 			error = EFAULT;
758 #endif
759 		break;
760 	case SIOCIPFITER :
761 		error = ipf_frruleiter((caddr_t)data, crgetuid(cp),
762 				       curproc, ifs);
763 		break;
764 
765 	case SIOCGENITER :
766 		error = ipf_genericiter((caddr_t)data, crgetuid(cp),
767 					curproc, ifs);
768 		break;
769 
770 	case SIOCIPFDELTOK :
771 		error = BCOPYIN((caddr_t)data, (caddr_t)&tmp, sizeof(tmp));
772 		if (error != 0) {
773 			error = EFAULT;
774 		} else {
775 			error = ipf_deltoken(tmp, crgetuid(cp), curproc, ifs);
776 		}
777 		break;
778 
779 	default :
780 #ifdef	IPFDEBUG
781 		cmn_err(CE_NOTE, "Unknown: cmd 0x%x data %p",
782 			cmd, (void *)data);
783 #endif
784 		error = EINVAL;
785 		break;
786 	}
787 	RWLOCK_EXIT(&ifs->ifs_ipf_global);
788 	return error;
789 }
790 
791 
792 static int fr_enableipf(ifs, enable)
793 ipf_stack_t *ifs;
794 int enable;
795 {
796 	int error;
797 
798 	if (!enable) {
799 		error = ipldetach(ifs);
800 		if (error == 0)
801 			ifs->ifs_fr_running = -1;
802 		return error;
803 	}
804 
805 	if (ifs->ifs_fr_running > 0)
806 		return 0;
807 
808 	error = iplattach(ifs);
809 	if (error == 0) {
810 		if (ifs->ifs_fr_timer_id == NULL) {
811 			int hz = drv_usectohz(500000);
812 
813 			ifs->ifs_fr_timer_id = timeout(fr_slowtimer,
814 						       (void *)ifs,
815 						       hz);
816 		}
817 		ifs->ifs_fr_running = 1;
818 	} else {
819 		(void) ipldetach(ifs);
820 	}
821 	return error;
822 }
823 
824 
825 phy_if_t get_unit(name, v, ifs)
826 char *name;
827 int v;
828 ipf_stack_t *ifs;
829 {
830 	net_handle_t nif;
831 
832   	if (v == 4)
833  		nif = ifs->ifs_ipf_ipv4;
834   	else if (v == 6)
835  		nif = ifs->ifs_ipf_ipv6;
836   	else
837  		return 0;
838 
839  	return (net_phylookup(nif, name));
840 }
841 
842 /*
843  * routines below for saving IP headers to buffer
844  */
845 /*ARGSUSED*/
846 int iplopen(devp, flags, otype, cred)
847 dev_t *devp;
848 int flags, otype;
849 cred_t *cred;
850 {
851 	minor_t min = getminor(*devp);
852 
853 #ifdef	IPFDEBUG
854 	cmn_err(CE_CONT, "iplopen(%x,%x,%x,%x)\n", devp, flags, otype, cred);
855 #endif
856 	if (!(otype & OTYP_CHR))
857 		return ENXIO;
858 
859 	min = (IPL_LOGMAX < min) ? ENXIO : 0;
860 	return min;
861 }
862 
863 
864 /*ARGSUSED*/
865 int iplclose(dev, flags, otype, cred)
866 dev_t dev;
867 int flags, otype;
868 cred_t *cred;
869 {
870 	minor_t	min = getminor(dev);
871 
872 #ifdef	IPFDEBUG
873 	cmn_err(CE_CONT, "iplclose(%x,%x,%x,%x)\n", dev, flags, otype, cred);
874 #endif
875 
876 	min = (IPL_LOGMAX < min) ? ENXIO : 0;
877 	return min;
878 }
879 
880 #ifdef	IPFILTER_LOG
881 /*
882  * iplread/ipllog
883  * both of these must operate with at least splnet() lest they be
884  * called during packet processing and cause an inconsistancy to appear in
885  * the filter lists.
886  */
887 /*ARGSUSED*/
888 int iplread(dev, uio, cp)
889 dev_t dev;
890 register struct uio *uio;
891 cred_t *cp;
892 {
893 	ipf_stack_t *ifs;
894 	int ret;
895 
896         /*
897 	 * As we're calling ipf_find_stack in user space, from a given zone
898 	 * to find the stack pointer for this zone, there is no need to have
899 	 * a hold/refence count here.
900 	 */
901 	ifs = ipf_find_stack(crgetzoneid(cp));
902 	ASSERT(ifs != NULL);
903 
904 # ifdef	IPFDEBUG
905 	cmn_err(CE_CONT, "iplread(%x,%x,%x)\n", dev, uio, cp);
906 # endif
907 
908 	if (ifs->ifs_fr_running < 1) {
909 		return EIO;
910 	}
911 
912 # ifdef	IPFILTER_SYNC
913 	if (getminor(dev) == IPL_LOGSYNC) {
914 		return ipfsync_read(uio);
915 	}
916 # endif
917 
918 	ret = ipflog_read(getminor(dev), uio, ifs);
919 	return ret;
920 }
921 #endif /* IPFILTER_LOG */
922 
923 
924 /*
925  * iplread/ipllog
926  * both of these must operate with at least splnet() lest they be
927  * called during packet processing and cause an inconsistancy to appear in
928  * the filter lists.
929  */
930 int iplwrite(dev, uio, cp)
931 dev_t dev;
932 register struct uio *uio;
933 cred_t *cp;
934 {
935 	ipf_stack_t *ifs;
936 
937         /*
938 	 * As we're calling ipf_find_stack in user space, from a given zone
939 	 * to find the stack pointer for this zone, there is no need to have
940 	 * a hold/refence count here.
941 	 */
942 	ifs = ipf_find_stack(crgetzoneid(cp));
943 	ASSERT(ifs != NULL);
944 
945 #ifdef	IPFDEBUG
946 	cmn_err(CE_CONT, "iplwrite(%x,%x,%x)\n", dev, uio, cp);
947 #endif
948 
949 	if (ifs->ifs_fr_running < 1) {
950 		return EIO;
951 	}
952 
953 #ifdef	IPFILTER_SYNC
954 	if (getminor(dev) == IPL_LOGSYNC)
955 		return ipfsync_write(uio);
956 #endif /* IPFILTER_SYNC */
957 	dev = dev;	/* LINT */
958 	uio = uio;	/* LINT */
959 	cp = cp;	/* LINT */
960 	return ENXIO;
961 }
962 
963 
964 /*
965  * fr_send_reset - this could conceivably be a call to tcp_respond(), but that
966  * requires a large amount of setting up and isn't any more efficient.
967  */
968 int fr_send_reset(fin)
969 fr_info_t *fin;
970 {
971 	tcphdr_t *tcp, *tcp2;
972 	int tlen, hlen;
973 	mblk_t *m;
974 #ifdef	USE_INET6
975 	ip6_t *ip6;
976 #endif
977 	ip_t *ip;
978 
979 	tcp = fin->fin_dp;
980 	if (tcp->th_flags & TH_RST)
981 		return -1;
982 
983 #ifndef	IPFILTER_CKSUM
984 	if (fr_checkl4sum(fin) == -1)
985 		return -1;
986 #endif
987 
988 	tlen = (tcp->th_flags & (TH_SYN|TH_FIN)) ? 1 : 0;
989 #ifdef	USE_INET6
990 	if (fin->fin_v == 6)
991 		hlen = sizeof(ip6_t);
992 	else
993 #endif
994 		hlen = sizeof(ip_t);
995 	hlen += sizeof(*tcp2);
996 	if ((m = (mblk_t *)allocb(hlen + 64, BPRI_HI)) == NULL)
997 		return -1;
998 
999 	m->b_rptr += 64;
1000 	MTYPE(m) = M_DATA;
1001 	m->b_wptr = m->b_rptr + hlen;
1002 	ip = (ip_t *)m->b_rptr;
1003 	bzero((char *)ip, hlen);
1004 	tcp2 = (struct tcphdr *)(m->b_rptr + hlen - sizeof(*tcp2));
1005 	tcp2->th_dport = tcp->th_sport;
1006 	tcp2->th_sport = tcp->th_dport;
1007 	if (tcp->th_flags & TH_ACK) {
1008 		tcp2->th_seq = tcp->th_ack;
1009 		tcp2->th_flags = TH_RST;
1010 	} else {
1011 		tcp2->th_ack = ntohl(tcp->th_seq);
1012 		tcp2->th_ack += tlen;
1013 		tcp2->th_ack = htonl(tcp2->th_ack);
1014 		tcp2->th_flags = TH_RST|TH_ACK;
1015 	}
1016 	tcp2->th_off = sizeof(struct tcphdr) >> 2;
1017 
1018 	ip->ip_v = fin->fin_v;
1019 #ifdef	USE_INET6
1020 	if (fin->fin_v == 6) {
1021 		ip6 = (ip6_t *)m->b_rptr;
1022 		ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow;
1023 		ip6->ip6_src = fin->fin_dst6.in6;
1024 		ip6->ip6_dst = fin->fin_src6.in6;
1025 		ip6->ip6_plen = htons(sizeof(*tcp));
1026 		ip6->ip6_nxt = IPPROTO_TCP;
1027 		tcp2->th_sum = fr_cksum(m, (ip_t *)ip6, IPPROTO_TCP, tcp2);
1028 	} else
1029 #endif
1030 	{
1031 		ip->ip_src.s_addr = fin->fin_daddr;
1032 		ip->ip_dst.s_addr = fin->fin_saddr;
1033 		ip->ip_id = fr_nextipid(fin);
1034 		ip->ip_hl = sizeof(*ip) >> 2;
1035 		ip->ip_p = IPPROTO_TCP;
1036 		ip->ip_len = sizeof(*ip) + sizeof(*tcp);
1037 		ip->ip_tos = fin->fin_ip->ip_tos;
1038 		tcp2->th_sum = fr_cksum(m, ip, IPPROTO_TCP, tcp2);
1039 	}
1040 	return fr_send_ip(fin, m, &m);
1041 }
1042 
1043 /*
1044  * Function:	fr_send_ip
1045  * Returns:	 0: success
1046  *		-1: failed
1047  * Parameters:
1048  *	fin: packet information
1049  *	m: the message block where ip head starts
1050  *
1051  * Send a new packet through the IP stack.
1052  *
1053  * For IPv4 packets, ip_len must be in host byte order, and ip_v,
1054  * ip_ttl, ip_off, and ip_sum are ignored (filled in by this
1055  * function).
1056  *
1057  * For IPv6 packets, ip6_flow, ip6_vfc, and ip6_hlim are filled
1058  * in by this function.
1059  *
1060  * All other portions of the packet must be in on-the-wire format.
1061  */
1062 /*ARGSUSED*/
1063 static int fr_send_ip(fin, m, mpp)
1064 fr_info_t *fin;
1065 mblk_t *m, **mpp;
1066 {
1067 	qpktinfo_t qpi, *qpip;
1068 	fr_info_t fnew;
1069 	ip_t *ip;
1070 	int i, hlen;
1071 	ipf_stack_t *ifs = fin->fin_ifs;
1072 
1073 	ip = (ip_t *)m->b_rptr;
1074 	bzero((char *)&fnew, sizeof(fnew));
1075 
1076 #ifdef	USE_INET6
1077 	if (fin->fin_v == 6) {
1078 		ip6_t *ip6;
1079 
1080 		ip6 = (ip6_t *)ip;
1081 		ip6->ip6_vfc = 0x60;
1082 		ip6->ip6_hlim = 127;
1083 		fnew.fin_v = 6;
1084 		hlen = sizeof(*ip6);
1085 		fnew.fin_plen = ntohs(ip6->ip6_plen) + hlen;
1086 	} else
1087 #endif
1088 	{
1089 		fnew.fin_v = 4;
1090 #if SOLARIS2 >= 10
1091 		ip->ip_ttl = 255;
1092 		if (net_getpmtuenabled(ifs->ifs_ipf_ipv4) == 1)
1093 			ip->ip_off = htons(IP_DF);
1094 #else
1095 		if (ip_ttl_ptr != NULL)
1096 			ip->ip_ttl = (u_char)(*ip_ttl_ptr);
1097 		else
1098 			ip->ip_ttl = 63;
1099 		if (ip_mtudisc != NULL)
1100 			ip->ip_off = htons(*ip_mtudisc ? IP_DF : 0);
1101 		else
1102 			ip->ip_off = htons(IP_DF);
1103 #endif
1104 		/*
1105 		 * The dance with byte order and ip_len/ip_off is because in
1106 		 * fr_fastroute, it expects them to be in host byte order but
1107 		 * ipf_cksum expects them to be in network byte order.
1108 		 */
1109 		ip->ip_len = htons(ip->ip_len);
1110 		ip->ip_sum = ipf_cksum((u_short *)ip, sizeof(*ip));
1111 		ip->ip_len = ntohs(ip->ip_len);
1112 		ip->ip_off = ntohs(ip->ip_off);
1113 		hlen = sizeof(*ip);
1114 		fnew.fin_plen = ip->ip_len;
1115 	}
1116 
1117 	qpip = fin->fin_qpi;
1118 	qpi.qpi_off = 0;
1119 	qpi.qpi_ill = qpip->qpi_ill;
1120 	qpi.qpi_m = m;
1121 	qpi.qpi_data = ip;
1122 	fnew.fin_qpi = &qpi;
1123 	fnew.fin_ifp = fin->fin_ifp;
1124 	fnew.fin_flx = FI_NOCKSUM;
1125 	fnew.fin_m = m;
1126 	fnew.fin_qfm = m;
1127 	fnew.fin_ip = ip;
1128 	fnew.fin_mp = mpp;
1129 	fnew.fin_hlen = hlen;
1130 	fnew.fin_dp = (char *)ip + hlen;
1131 	fnew.fin_ifs = fin->fin_ifs;
1132 	(void) fr_makefrip(hlen, ip, &fnew);
1133 
1134 	i = fr_fastroute(m, mpp, &fnew, NULL);
1135 	return i;
1136 }
1137 
1138 
1139 int fr_send_icmp_err(type, fin, dst)
1140 int type;
1141 fr_info_t *fin;
1142 int dst;
1143 {
1144 	struct in_addr dst4;
1145 	struct icmp *icmp;
1146 	qpktinfo_t *qpi;
1147 	int hlen, code;
1148 	phy_if_t phy;
1149 	u_short sz;
1150 #ifdef	USE_INET6
1151 	mblk_t *mb;
1152 #endif
1153 	mblk_t *m;
1154 #ifdef	USE_INET6
1155 	ip6_t *ip6;
1156 #endif
1157 	ip_t *ip;
1158 	ipf_stack_t *ifs = fin->fin_ifs;
1159 
1160 	if ((type < 0) || (type > ICMP_MAXTYPE))
1161 		return -1;
1162 
1163 	code = fin->fin_icode;
1164 #ifdef USE_INET6
1165 	if ((code < 0) || (code >= ICMP_MAX_UNREACH))
1166 		return -1;
1167 #endif
1168 
1169 #ifndef	IPFILTER_CKSUM
1170 	if (fr_checkl4sum(fin) == -1)
1171 		return -1;
1172 #endif
1173 
1174 	qpi = fin->fin_qpi;
1175 
1176 #ifdef	USE_INET6
1177 	mb = fin->fin_qfm;
1178 
1179 	if (fin->fin_v == 6) {
1180 		sz = sizeof(ip6_t);
1181 		sz += MIN(mb->b_wptr - mb->b_rptr, 512);
1182 		hlen = sizeof(ip6_t);
1183 		type = icmptoicmp6types[type];
1184 		if (type == ICMP6_DST_UNREACH)
1185 			code = icmptoicmp6unreach[code];
1186 	} else
1187 #endif
1188 	{
1189 		if ((fin->fin_p == IPPROTO_ICMP) &&
1190 		    !(fin->fin_flx & FI_SHORT))
1191 			switch (ntohs(fin->fin_data[0]) >> 8)
1192 			{
1193 			case ICMP_ECHO :
1194 			case ICMP_TSTAMP :
1195 			case ICMP_IREQ :
1196 			case ICMP_MASKREQ :
1197 				break;
1198 			default :
1199 				return 0;
1200 			}
1201 
1202 		sz = sizeof(ip_t) * 2;
1203 		sz += 8;		/* 64 bits of data */
1204 		hlen = sizeof(ip_t);
1205 	}
1206 
1207 	sz += offsetof(struct icmp, icmp_ip);
1208 	if ((m = (mblk_t *)allocb((size_t)sz + 64, BPRI_HI)) == NULL)
1209 		return -1;
1210 	MTYPE(m) = M_DATA;
1211 	m->b_rptr += 64;
1212 	m->b_wptr = m->b_rptr + sz;
1213 	bzero((char *)m->b_rptr, (size_t)sz);
1214 	ip = (ip_t *)m->b_rptr;
1215 	ip->ip_v = fin->fin_v;
1216 	icmp = (struct icmp *)(m->b_rptr + hlen);
1217 	icmp->icmp_type = type & 0xff;
1218 	icmp->icmp_code = code & 0xff;
1219 	phy = (phy_if_t)qpi->qpi_ill;
1220 	if (type == ICMP_UNREACH && (phy != 0) &&
1221 	    fin->fin_icode == ICMP_UNREACH_NEEDFRAG)
1222 		icmp->icmp_nextmtu = net_getmtu(ifs->ifs_ipf_ipv4, phy,0 );
1223 
1224 #ifdef	USE_INET6
1225 	if (fin->fin_v == 6) {
1226 		struct in6_addr dst6;
1227 		int csz;
1228 
1229 		if (dst == 0) {
1230 			ipf_stack_t *ifs = fin->fin_ifs;
1231 
1232 			if (fr_ifpaddr(6, FRI_NORMAL, (void *)phy,
1233 				       (void *)&dst6, NULL, ifs) == -1) {
1234 				FREE_MB_T(m);
1235 				return -1;
1236 			}
1237 		} else
1238 			dst6 = fin->fin_dst6.in6;
1239 
1240 		csz = sz;
1241 		sz -= sizeof(ip6_t);
1242 		ip6 = (ip6_t *)m->b_rptr;
1243 		ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow;
1244 		ip6->ip6_plen = htons((u_short)sz);
1245 		ip6->ip6_nxt = IPPROTO_ICMPV6;
1246 		ip6->ip6_src = dst6;
1247 		ip6->ip6_dst = fin->fin_src6.in6;
1248 		sz -= offsetof(struct icmp, icmp_ip);
1249 		bcopy((char *)mb->b_rptr, (char *)&icmp->icmp_ip, sz);
1250 		icmp->icmp_cksum = csz - sizeof(ip6_t);
1251 	} else
1252 #endif
1253 	{
1254 		ip->ip_hl = sizeof(*ip) >> 2;
1255 		ip->ip_p = IPPROTO_ICMP;
1256 		ip->ip_id = fin->fin_ip->ip_id;
1257 		ip->ip_tos = fin->fin_ip->ip_tos;
1258 		ip->ip_len = (u_short)sz;
1259 		if (dst == 0) {
1260 			ipf_stack_t *ifs = fin->fin_ifs;
1261 
1262 			if (fr_ifpaddr(4, FRI_NORMAL, (void *)phy,
1263 				       (void *)&dst4, NULL, ifs) == -1) {
1264 				FREE_MB_T(m);
1265 				return -1;
1266 			}
1267 		} else {
1268 			dst4 = fin->fin_dst;
1269 		}
1270 		ip->ip_src = dst4;
1271 		ip->ip_dst = fin->fin_src;
1272 		bcopy((char *)fin->fin_ip, (char *)&icmp->icmp_ip,
1273 		      sizeof(*fin->fin_ip));
1274 		bcopy((char *)fin->fin_ip + fin->fin_hlen,
1275 		      (char *)&icmp->icmp_ip + sizeof(*fin->fin_ip), 8);
1276 		icmp->icmp_ip.ip_len = htons(icmp->icmp_ip.ip_len);
1277 		icmp->icmp_ip.ip_off = htons(icmp->icmp_ip.ip_off);
1278 		icmp->icmp_cksum = ipf_cksum((u_short *)icmp,
1279 					     sz - sizeof(ip_t));
1280 	}
1281 
1282 	/*
1283 	 * Need to exit out of these so we don't recursively call rw_enter
1284 	 * from fr_qout.
1285 	 */
1286 	return fr_send_ip(fin, m, &m);
1287 }
1288 
1289 #include <sys/time.h>
1290 #include <sys/varargs.h>
1291 
1292 #ifndef _KERNEL
1293 #include <stdio.h>
1294 #endif
1295 
1296 #define	NULLADDR_RATE_LIMIT 10	/* 10 seconds */
1297 
1298 
1299 /*
1300  * Print out warning message at rate-limited speed.
1301  */
1302 static void rate_limit_message(ipf_stack_t *ifs,
1303 			       int rate, const char *message, ...)
1304 {
1305 	static time_t last_time = 0;
1306 	time_t now;
1307 	va_list args;
1308 	char msg_buf[256];
1309 	int  need_printed = 0;
1310 
1311 	now = ddi_get_time();
1312 
1313 	/* make sure, no multiple entries */
1314 	ASSERT(MUTEX_NOT_HELD(&(ifs->ifs_ipf_rw.ipf_lk)));
1315 	MUTEX_ENTER(&ifs->ifs_ipf_rw);
1316 	if (now - last_time >= rate) {
1317 		need_printed = 1;
1318 		last_time = now;
1319 	}
1320 	MUTEX_EXIT(&ifs->ifs_ipf_rw);
1321 
1322 	if (need_printed) {
1323 		va_start(args, message);
1324 		(void)vsnprintf(msg_buf, 255, message, args);
1325 		va_end(args);
1326 #ifdef _KERNEL
1327 		cmn_err(CE_WARN, msg_buf);
1328 #else
1329 		fprintf(std_err, msg_buf);
1330 #endif
1331 	}
1332 }
1333 
1334 /*
1335  * Return the first IP Address associated with an interface
1336  * For IPv6, we walk through the list of logical interfaces and return
1337  * the address of the first one that isn't a link-local interface.
1338  * We can't assume that it is :1 because another link-local address
1339  * may have been assigned there.
1340  */
1341 /*ARGSUSED*/
1342 int fr_ifpaddr(v, atype, ifptr, inp, inpmask, ifs)
1343 int v, atype;
1344 void *ifptr;
1345 struct in_addr  *inp, *inpmask;
1346 ipf_stack_t *ifs;
1347 {
1348 	struct sockaddr_in6 v6addr[2];
1349 	struct sockaddr_in v4addr[2];
1350 	net_ifaddr_t type[2];
1351 	net_handle_t net_data;
1352 	phy_if_t phyif;
1353 	void *array;
1354 
1355 	switch (v)
1356 	{
1357 	case 4:
1358 		net_data = ifs->ifs_ipf_ipv4;
1359 		array = v4addr;
1360 		break;
1361 	case 6:
1362 		net_data = ifs->ifs_ipf_ipv6;
1363 		array = v6addr;
1364 		break;
1365 	default:
1366 		net_data = NULL;
1367 		break;
1368 	}
1369 
1370 	if (net_data == NULL)
1371 		return -1;
1372 
1373 	phyif = (phy_if_t)ifptr;
1374 
1375 	switch (atype)
1376 	{
1377 	case FRI_PEERADDR :
1378 		type[0] = NA_PEER;
1379 		break;
1380 
1381 	case FRI_BROADCAST :
1382 		type[0] = NA_BROADCAST;
1383 		break;
1384 
1385 	default :
1386 		type[0] = NA_ADDRESS;
1387 		break;
1388 	}
1389 
1390 	type[1] = NA_NETMASK;
1391 
1392 	if (v == 6) {
1393 		lif_if_t idx = 0;
1394 
1395 		do {
1396 			idx = net_lifgetnext(net_data, phyif, idx);
1397 			if (net_getlifaddr(net_data, phyif, idx, 2, type,
1398 					   array) < 0)
1399 				return -1;
1400 			if (!IN6_IS_ADDR_LINKLOCAL(&v6addr[0].sin6_addr) &&
1401 			    !IN6_IS_ADDR_MULTICAST(&v6addr[0].sin6_addr))
1402 				break;
1403 		} while (idx != 0);
1404 
1405 		if (idx == 0)
1406 			return -1;
1407 
1408 		return fr_ifpfillv6addr(atype, &v6addr[0], &v6addr[1],
1409 					inp, inpmask);
1410 	}
1411 
1412 	if (net_getlifaddr(net_data, phyif, 0, 2, type, array) < 0)
1413 		return -1;
1414 
1415 	return fr_ifpfillv4addr(atype, &v4addr[0], &v4addr[1], inp, inpmask);
1416 }
1417 
1418 
1419 u_32_t fr_newisn(fin)
1420 fr_info_t *fin;
1421 {
1422 	static int iss_seq_off = 0;
1423 	u_char hash[16];
1424 	u_32_t newiss;
1425 	MD5_CTX ctx;
1426 	ipf_stack_t *ifs = fin->fin_ifs;
1427 
1428 	/*
1429 	 * Compute the base value of the ISS.  It is a hash
1430 	 * of (saddr, sport, daddr, dport, secret).
1431 	 */
1432 	MD5Init(&ctx);
1433 
1434 	MD5Update(&ctx, (u_char *) &fin->fin_fi.fi_src,
1435 		  sizeof(fin->fin_fi.fi_src));
1436 	MD5Update(&ctx, (u_char *) &fin->fin_fi.fi_dst,
1437 		  sizeof(fin->fin_fi.fi_dst));
1438 	MD5Update(&ctx, (u_char *) &fin->fin_dat, sizeof(fin->fin_dat));
1439 
1440 	MD5Update(&ctx, ifs->ifs_ipf_iss_secret, sizeof(ifs->ifs_ipf_iss_secret));
1441 
1442 	MD5Final(hash, &ctx);
1443 
1444 	bcopy(hash, &newiss, sizeof(newiss));
1445 
1446 	/*
1447 	 * Now increment our "timer", and add it in to
1448 	 * the computed value.
1449 	 *
1450 	 * XXX Use `addin'?
1451 	 * XXX TCP_ISSINCR too large to use?
1452 	 */
1453 	iss_seq_off += 0x00010000;
1454 	newiss += iss_seq_off;
1455 	return newiss;
1456 }
1457 
1458 
1459 /* ------------------------------------------------------------------------ */
1460 /* Function:    fr_nextipid                                                 */
1461 /* Returns:     int - 0 == success, -1 == error (packet should be droppped) */
1462 /* Parameters:  fin(I) - pointer to packet information                      */
1463 /*                                                                          */
1464 /* Returns the next IPv4 ID to use for this packet.                         */
1465 /* ------------------------------------------------------------------------ */
1466 u_short fr_nextipid(fin)
1467 fr_info_t *fin;
1468 {
1469 	static u_short ipid = 0;
1470 	u_short id;
1471 	ipf_stack_t *ifs = fin->fin_ifs;
1472 
1473 	MUTEX_ENTER(&ifs->ifs_ipf_rw);
1474 	if (fin->fin_pktnum != 0) {
1475 		id = fin->fin_pktnum & 0xffff;
1476 	} else {
1477 		id = ipid++;
1478 	}
1479 	MUTEX_EXIT(&ifs->ifs_ipf_rw);
1480 
1481 	return id;
1482 }
1483 
1484 
1485 #ifndef IPFILTER_CKSUM
1486 /* ARGSUSED */
1487 #endif
1488 INLINE void fr_checkv4sum(fin)
1489 fr_info_t *fin;
1490 {
1491 #ifdef IPFILTER_CKSUM
1492 	if (fr_checkl4sum(fin) == -1)
1493 		fin->fin_flx |= FI_BAD;
1494 #endif
1495 }
1496 
1497 
1498 #ifdef USE_INET6
1499 # ifndef IPFILTER_CKSUM
1500 /* ARGSUSED */
1501 # endif
1502 INLINE void fr_checkv6sum(fin)
1503 fr_info_t *fin;
1504 {
1505 # ifdef IPFILTER_CKSUM
1506 	if (fr_checkl4sum(fin) == -1)
1507 		fin->fin_flx |= FI_BAD;
1508 # endif
1509 }
1510 #endif /* USE_INET6 */
1511 
1512 
1513 #if (SOLARIS2 < 7)
1514 void fr_slowtimer()
1515 #else
1516 /*ARGSUSED*/
1517 void fr_slowtimer __P((void *arg))
1518 #endif
1519 {
1520 	ipf_stack_t *ifs = arg;
1521 
1522 	READ_ENTER(&ifs->ifs_ipf_global);
1523 	if (ifs->ifs_fr_running != 1) {
1524 		ifs->ifs_fr_timer_id = NULL;
1525 		RWLOCK_EXIT(&ifs->ifs_ipf_global);
1526 		return;
1527 	}
1528 	ipf_expiretokens(ifs);
1529 	fr_fragexpire(ifs);
1530 	fr_timeoutstate(ifs);
1531 	fr_natexpire(ifs);
1532 	fr_authexpire(ifs);
1533 	ifs->ifs_fr_ticks++;
1534 	if (ifs->ifs_fr_running == 1)
1535 		ifs->ifs_fr_timer_id = timeout(fr_slowtimer, arg,
1536 		    drv_usectohz(500000));
1537 	else
1538 		ifs->ifs_fr_timer_id = NULL;
1539 	RWLOCK_EXIT(&ifs->ifs_ipf_global);
1540 }
1541 
1542 
1543 /* ------------------------------------------------------------------------ */
1544 /* Function:    fr_pullup                                                   */
1545 /* Returns:     NULL == pullup failed, else pointer to protocol header      */
1546 /* Parameters:  m(I)   - pointer to buffer where data packet starts         */
1547 /*              fin(I) - pointer to packet information                      */
1548 /*              len(I) - number of bytes to pullup                          */
1549 /*                                                                          */
1550 /* Attempt to move at least len bytes (from the start of the buffer) into a */
1551 /* single buffer for ease of access.  Operating system native functions are */
1552 /* used to manage buffers - if necessary.  If the entire packet ends up in  */
1553 /* a single buffer, set the FI_COALESCE flag even though fr_coalesce() has  */
1554 /* not been called.  Both fin_ip and fin_dp are updated before exiting _IF_ */
1555 /* and ONLY if the pullup succeeds.                                         */
1556 /*                                                                          */
1557 /* We assume that 'min' is a pointer to a buffer that is part of the chain  */
1558 /* of buffers that starts at *fin->fin_mp.                                  */
1559 /* ------------------------------------------------------------------------ */
1560 void *fr_pullup(min, fin, len)
1561 mb_t *min;
1562 fr_info_t *fin;
1563 int len;
1564 {
1565 	qpktinfo_t *qpi = fin->fin_qpi;
1566 	int out = fin->fin_out, dpoff, ipoff;
1567 	mb_t *m = min, *m1, *m2;
1568 	char *ip;
1569 	uint32_t start, stuff, end, value, flags;
1570 	ipf_stack_t *ifs = fin->fin_ifs;
1571 
1572 	if (m == NULL)
1573 		return NULL;
1574 
1575 	ip = (char *)fin->fin_ip;
1576 	if ((fin->fin_flx & FI_COALESCE) != 0)
1577 		return ip;
1578 
1579 	ipoff = fin->fin_ipoff;
1580 	if (fin->fin_dp != NULL)
1581 		dpoff = (char *)fin->fin_dp - (char *)ip;
1582 	else
1583 		dpoff = 0;
1584 
1585 	if (M_LEN(m) < len + ipoff) {
1586 
1587 		/*
1588 		 * pfil_precheck ensures the IP header is on a 32bit
1589 		 * aligned address so simply fail if that isn't currently
1590 		 * the case (should never happen).
1591 		 */
1592 		int inc = 0;
1593 
1594 		if (ipoff > 0) {
1595 			if ((ipoff & 3) != 0) {
1596 				inc = 4 - (ipoff & 3);
1597 				if (m->b_rptr - inc >= m->b_datap->db_base)
1598 					m->b_rptr -= inc;
1599 				else
1600 					inc = 0;
1601 			}
1602 		}
1603 
1604 		/*
1605 		 * XXX This is here as a work around for a bug with DEBUG
1606 		 * XXX Solaris kernels.  The problem is b_prev is used by IP
1607 		 * XXX code as a way to stash the phyint_index for a packet,
1608 		 * XXX this doesn't get reset by IP but freeb does an ASSERT()
1609 		 * XXX for both of these to be NULL.  See 6442390.
1610 		 */
1611 		m1 = m;
1612 		m2 = m->b_prev;
1613 
1614 		do {
1615 			m1->b_next = NULL;
1616 			m1->b_prev = NULL;
1617 			m1 = m1->b_cont;
1618 		} while (m1);
1619 
1620 		/*
1621 		 * Need to preserve checksum information by copying them
1622 		 * to newmp which heads the pulluped message.
1623 		 */
1624 		hcksum_retrieve(m, NULL, NULL, &start, &stuff, &end,
1625 		    &value, &flags);
1626 
1627 		if (pullupmsg(m, len + ipoff + inc) == 0) {
1628 			ATOMIC_INCL(ifs->ifs_frstats[out].fr_pull[1]);
1629 			FREE_MB_T(*fin->fin_mp);
1630 			*fin->fin_mp = NULL;
1631 			fin->fin_m = NULL;
1632 			fin->fin_ip = NULL;
1633 			fin->fin_dp = NULL;
1634 			qpi->qpi_data = NULL;
1635 			return NULL;
1636 		}
1637 
1638 		(void) hcksum_assoc(m, NULL, NULL, start, stuff, end,
1639 		    value, flags, 0);
1640 
1641 		m->b_prev = m2;
1642 		m->b_rptr += inc;
1643 		fin->fin_m = m;
1644 		ip = MTOD(m, char *) + ipoff;
1645 		qpi->qpi_data = ip;
1646 	}
1647 
1648 	ATOMIC_INCL(ifs->ifs_frstats[out].fr_pull[0]);
1649 	fin->fin_ip = (ip_t *)ip;
1650 	if (fin->fin_dp != NULL)
1651 		fin->fin_dp = (char *)fin->fin_ip + dpoff;
1652 
1653 	if (len == fin->fin_plen)
1654 		fin->fin_flx |= FI_COALESCE;
1655 	return ip;
1656 }
1657 
1658 
1659 /*
1660  * Function:	fr_verifysrc
1661  * Returns:	int (really boolean)
1662  * Parameters:	fin - packet information
1663  *
1664  * Check whether the packet has a valid source address for the interface on
1665  * which the packet arrived, implementing the "fr_chksrc" feature.
1666  * Returns true iff the packet's source address is valid.
1667  */
1668 int fr_verifysrc(fin)
1669 fr_info_t *fin;
1670 {
1671 	net_handle_t net_data_p;
1672 	phy_if_t phy_ifdata_routeto;
1673 	struct sockaddr	sin;
1674 	ipf_stack_t *ifs = fin->fin_ifs;
1675 
1676 	if (fin->fin_v == 4) {
1677 		net_data_p = ifs->ifs_ipf_ipv4;
1678 	} else if (fin->fin_v == 6) {
1679 		net_data_p = ifs->ifs_ipf_ipv6;
1680 	} else {
1681 		return (0);
1682 	}
1683 
1684 	/* Get the index corresponding to the if name */
1685 	sin.sa_family = (fin->fin_v == 4) ? AF_INET : AF_INET6;
1686 	bcopy(&fin->fin_saddr, &sin.sa_data, sizeof (struct in_addr));
1687 	phy_ifdata_routeto = net_routeto(net_data_p, &sin, NULL);
1688 
1689 	return (((phy_if_t)fin->fin_ifp == phy_ifdata_routeto) ? 1 : 0);
1690 }
1691 
1692 
1693 /*
1694  * Function:	fr_fastroute
1695  * Returns:	 0: success;
1696  *		-1: failed
1697  * Parameters:
1698  *	mb: the message block where ip head starts
1699  *	mpp: the pointer to the pointer of the orignal
1700  *		packet message
1701  *	fin: packet information
1702  *	fdp: destination interface information
1703  *	if it is NULL, no interface information provided.
1704  *
1705  * This function is for fastroute/to/dup-to rules. It calls
1706  * pfil_make_lay2_packet to search route, make lay-2 header
1707  * ,and identify output queue for the IP packet.
1708  * The destination address depends on the following conditions:
1709  * 1: for fastroute rule, fdp is passed in as NULL, so the
1710  *	destination address is the IP Packet's destination address
1711  * 2: for to/dup-to rule, if an ip address is specified after
1712  *	the interface name, this address is the as destination
1713  *	address. Otherwise IP Packet's destination address is used
1714  */
1715 int fr_fastroute(mb, mpp, fin, fdp)
1716 mblk_t *mb, **mpp;
1717 fr_info_t *fin;
1718 frdest_t *fdp;
1719 {
1720         net_handle_t net_data_p;
1721 	net_inject_t *inj;
1722 	mblk_t *mp = NULL;
1723 	frentry_t *fr = fin->fin_fr;
1724 	qpktinfo_t *qpi;
1725 	ip_t *ip;
1726 
1727 	struct sockaddr_in *sin;
1728 	struct sockaddr_in6 *sin6;
1729 	struct sockaddr *sinp;
1730 	ipf_stack_t *ifs = fin->fin_ifs;
1731 #ifndef	sparc
1732 	u_short __iplen, __ipoff;
1733 #endif
1734 
1735 	if (fin->fin_v == 4) {
1736 		net_data_p = ifs->ifs_ipf_ipv4;
1737 	} else if (fin->fin_v == 6) {
1738 		net_data_p = ifs->ifs_ipf_ipv6;
1739 	} else {
1740 		return (-1);
1741 	}
1742 
1743 	inj = net_inject_alloc(NETINFO_VERSION);
1744 	if (inj == NULL)
1745 		return -1;
1746 
1747 	ip = fin->fin_ip;
1748 	qpi = fin->fin_qpi;
1749 
1750 	/*
1751 	 * If this is a duplicate mblk then we want ip to point at that
1752 	 * data, not the original, if and only if it is already pointing at
1753 	 * the current mblk data.
1754 	 *
1755 	 * Otherwise, if it's not a duplicate, and we're not already pointing
1756 	 * at the current mblk data, then we want to ensure that the data
1757 	 * points at ip.
1758 	 */
1759 
1760 	if ((ip == (ip_t *)qpi->qpi_m->b_rptr) && (qpi->qpi_m != mb)) {
1761 		ip = (ip_t *)mb->b_rptr;
1762 	} else if ((qpi->qpi_m == mb) && (ip != (ip_t *)qpi->qpi_m->b_rptr)) {
1763 		qpi->qpi_m->b_rptr = (uchar_t *)ip;
1764 		qpi->qpi_off = 0;
1765 	}
1766 
1767 	/*
1768 	 * If there is another M_PROTO, we don't want it
1769 	 */
1770 	if (*mpp != mb) {
1771 		mp = unlinkb(*mpp);
1772 		freeb(*mpp);
1773 		*mpp = mp;
1774 	}
1775 
1776 	sinp = (struct sockaddr *)&inj->ni_addr;
1777 	sin = (struct sockaddr_in *)sinp;
1778 	sin6 = (struct sockaddr_in6 *)sinp;
1779 	bzero((char *)&inj->ni_addr, sizeof (inj->ni_addr));
1780 	inj->ni_addr.ss_family = (fin->fin_v == 4) ? AF_INET : AF_INET6;
1781 	inj->ni_packet = mb;
1782 
1783 	/*
1784 	 * In case we're here due to "to <if>" being used with
1785 	 * "keep state", check that we're going in the correct
1786 	 * direction.
1787 	 */
1788 	if (fdp != NULL) {
1789 		if ((fr != NULL) && (fdp->fd_ifp != NULL) &&
1790 			(fin->fin_rev != 0) && (fdp == &fr->fr_tif))
1791 			goto bad_fastroute;
1792 		inj->ni_physical = (phy_if_t)fdp->fd_ifp;
1793 		if (fin->fin_v == 4) {
1794 			sin->sin_addr = fdp->fd_ip;
1795 		} else {
1796 			sin6->sin6_addr = fdp->fd_ip6.in6;
1797 		}
1798 	} else {
1799 		if (fin->fin_v == 4) {
1800 			sin->sin_addr = ip->ip_dst;
1801 		} else {
1802 			sin6->sin6_addr = ((ip6_t *)ip)->ip6_dst;
1803 		}
1804 		inj->ni_physical = net_routeto(net_data_p, sinp, NULL);
1805 	}
1806 
1807 	/*
1808 	 * Clear the hardware checksum flags from packets that we are doing
1809 	 * input processing on as leaving them set will cause the outgoing
1810 	 * NIC (if it supports hardware checksum) to calculate them anew,
1811 	 * using the old (correct) checksums as the pseudo value to start
1812 	 * from.
1813 	 */
1814 	if (fin->fin_out == 0) {
1815 		DB_CKSUMFLAGS(mb) = 0;
1816 	}
1817 
1818 	*mpp = mb;
1819 
1820 	if (fin->fin_out == 0) {
1821 		void *saveifp;
1822 		u_32_t pass;
1823 
1824 		saveifp = fin->fin_ifp;
1825 		fin->fin_ifp = (void *)inj->ni_physical;
1826 		fin->fin_flx &= ~FI_STATE;
1827 		fin->fin_out = 1;
1828 		(void) fr_acctpkt(fin, &pass);
1829 		fin->fin_fr = NULL;
1830 		if (!fr || !(fr->fr_flags & FR_RETMASK))
1831 			(void) fr_checkstate(fin, &pass);
1832 		if (fr_checknatout(fin, NULL) == -1)
1833 			goto bad_fastroute;
1834 		fin->fin_out = 0;
1835 		fin->fin_ifp = saveifp;
1836 	}
1837 #ifndef	sparc
1838 	if (fin->fin_v == 4) {
1839 		__iplen = (u_short)ip->ip_len,
1840 		__ipoff = (u_short)ip->ip_off;
1841 
1842 		ip->ip_len = htons(__iplen);
1843 		ip->ip_off = htons(__ipoff);
1844 	}
1845 #endif
1846 
1847 	if (net_data_p) {
1848 		if (net_inject(net_data_p, NI_DIRECT_OUT, inj) < 0) {
1849 			net_inject_free(inj);
1850 			return (-1);
1851 		}
1852 	}
1853 
1854 	ifs->ifs_fr_frouteok[0]++;
1855 	net_inject_free(inj);
1856 	return 0;
1857 bad_fastroute:
1858 	net_inject_free(inj);
1859 	freemsg(mb);
1860 	ifs->ifs_fr_frouteok[1]++;
1861 	return -1;
1862 }
1863 
1864 
1865 /* ------------------------------------------------------------------------ */
1866 /* Function:    ipf_hook4_out                                               */
1867 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
1868 /* Parameters:  event(I)     - pointer to event                             */
1869 /*              info(I)      - pointer to hook information for firewalling  */
1870 /*                                                                          */
1871 /* Calling ipf_hook.                                                        */
1872 /* ------------------------------------------------------------------------ */
1873 /*ARGSUSED*/
1874 int ipf_hook4_out(hook_event_token_t token, hook_data_t info, void *arg)
1875 {
1876 	return ipf_hook(info, 1, 0, arg);
1877 }
1878 /*ARGSUSED*/
1879 int ipf_hook6_out(hook_event_token_t token, hook_data_t info, void *arg)
1880 {
1881 	return ipf_hook6(info, 1, 0, arg);
1882 }
1883 
1884 /* ------------------------------------------------------------------------ */
1885 /* Function:    ipf_hook4_in                                                */
1886 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
1887 /* Parameters:  event(I)     - pointer to event                             */
1888 /*              info(I)      - pointer to hook information for firewalling  */
1889 /*                                                                          */
1890 /* Calling ipf_hook.                                                        */
1891 /* ------------------------------------------------------------------------ */
1892 /*ARGSUSED*/
1893 int ipf_hook4_in(hook_event_token_t token, hook_data_t info, void *arg)
1894 {
1895 	return ipf_hook(info, 0, 0, arg);
1896 }
1897 /*ARGSUSED*/
1898 int ipf_hook6_in(hook_event_token_t token, hook_data_t info, void *arg)
1899 {
1900 	return ipf_hook6(info, 0, 0, arg);
1901 }
1902 
1903 
1904 /* ------------------------------------------------------------------------ */
1905 /* Function:    ipf_hook4_loop_out                                          */
1906 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
1907 /* Parameters:  event(I)     - pointer to event                             */
1908 /*              info(I)      - pointer to hook information for firewalling  */
1909 /*                                                                          */
1910 /* Calling ipf_hook.                                                        */
1911 /* ------------------------------------------------------------------------ */
1912 /*ARGSUSED*/
1913 int ipf_hook4_loop_out(hook_event_token_t token, hook_data_t info, void *arg)
1914 {
1915 	return ipf_hook(info, 1, FI_NOCKSUM, arg);
1916 }
1917 /*ARGSUSED*/
1918 int ipf_hook6_loop_out(hook_event_token_t token, hook_data_t info, void *arg)
1919 {
1920 	return ipf_hook6(info, 1, FI_NOCKSUM, arg);
1921 }
1922 
1923 /* ------------------------------------------------------------------------ */
1924 /* Function:    ipf_hook4_loop_in                                           */
1925 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
1926 /* Parameters:  event(I)     - pointer to event                             */
1927 /*              info(I)      - pointer to hook information for firewalling  */
1928 /*                                                                          */
1929 /* Calling ipf_hook.                                                        */
1930 /* ------------------------------------------------------------------------ */
1931 /*ARGSUSED*/
1932 int ipf_hook4_loop_in(hook_event_token_t token, hook_data_t info, void *arg)
1933 {
1934 	return ipf_hook(info, 0, FI_NOCKSUM, arg);
1935 }
1936 /*ARGSUSED*/
1937 int ipf_hook6_loop_in(hook_event_token_t token, hook_data_t info, void *arg)
1938 {
1939 	return ipf_hook6(info, 0, FI_NOCKSUM, arg);
1940 }
1941 
1942 /* ------------------------------------------------------------------------ */
1943 /* Function:    ipf_hook                                                    */
1944 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
1945 /* Parameters:  info(I)      - pointer to hook information for firewalling  */
1946 /*              out(I)       - whether packet is going in or out            */
1947 /*              loopback(I)  - whether packet is a loopback packet or not   */
1948 /*                                                                          */
1949 /* Stepping stone function between the IP mainline and IPFilter.  Extracts  */
1950 /* parameters out of the info structure and forms them up to be useful for  */
1951 /* calling ipfilter.                                                        */
1952 /* ------------------------------------------------------------------------ */
1953 int ipf_hook(hook_data_t info, int out, int loopback, void *arg)
1954 {
1955 	hook_pkt_event_t *fw;
1956 	ipf_stack_t *ifs;
1957 	qpktinfo_t qpi;
1958 	int rval, hlen;
1959 	u_short swap;
1960 	phy_if_t phy;
1961 	ip_t *ip;
1962 
1963 	ifs = arg;
1964 	fw = (hook_pkt_event_t *)info;
1965 
1966 	ASSERT(fw != NULL);
1967 	phy = (out == 0) ? fw->hpe_ifp : fw->hpe_ofp;
1968 
1969 	ip = fw->hpe_hdr;
1970 	swap = ntohs(ip->ip_len);
1971 	ip->ip_len = swap;
1972 	swap = ntohs(ip->ip_off);
1973 	ip->ip_off = swap;
1974 	hlen = IPH_HDR_LENGTH(ip);
1975 
1976 	qpi.qpi_m = fw->hpe_mb;
1977 	qpi.qpi_data = fw->hpe_hdr;
1978 	qpi.qpi_off = (char *)qpi.qpi_data - (char *)fw->hpe_mb->b_rptr;
1979 	qpi.qpi_ill = (void *)phy;
1980 	qpi.qpi_flags = fw->hpe_flags & (HPE_MULTICAST|HPE_BROADCAST);
1981 	if (qpi.qpi_flags)
1982 		qpi.qpi_flags |= FI_MBCAST;
1983 	qpi.qpi_flags |= loopback;
1984 
1985 	rval = fr_check(fw->hpe_hdr, hlen, qpi.qpi_ill, out,
1986 	    &qpi, fw->hpe_mp, ifs);
1987 
1988 	/* For fastroute cases, fr_check returns 0 with mp set to NULL */
1989 	if (rval == 0 && *(fw->hpe_mp) == NULL)
1990 		rval = 1;
1991 
1992 	/* Notify IP the packet mblk_t and IP header pointers. */
1993 	fw->hpe_mb = qpi.qpi_m;
1994 	fw->hpe_hdr = qpi.qpi_data;
1995 	if (rval == 0) {
1996 		ip = qpi.qpi_data;
1997 		swap = ntohs(ip->ip_len);
1998 		ip->ip_len = swap;
1999 		swap = ntohs(ip->ip_off);
2000 		ip->ip_off = swap;
2001 	}
2002 	return rval;
2003 
2004 }
2005 int ipf_hook6(hook_data_t info, int out, int loopback, void *arg)
2006 {
2007 	hook_pkt_event_t *fw;
2008 	int rval, hlen;
2009 	qpktinfo_t qpi;
2010 	phy_if_t phy;
2011 
2012 	fw = (hook_pkt_event_t *)info;
2013 
2014 	ASSERT(fw != NULL);
2015 	phy = (out == 0) ? fw->hpe_ifp : fw->hpe_ofp;
2016 
2017 	hlen = sizeof (ip6_t);
2018 
2019 	qpi.qpi_m = fw->hpe_mb;
2020 	qpi.qpi_data = fw->hpe_hdr;
2021 	qpi.qpi_off = (char *)qpi.qpi_data - (char *)fw->hpe_mb->b_rptr;
2022 	qpi.qpi_ill = (void *)phy;
2023 	qpi.qpi_flags = fw->hpe_flags & (HPE_MULTICAST|HPE_BROADCAST);
2024 	if (qpi.qpi_flags)
2025 		qpi.qpi_flags |= FI_MBCAST;
2026 	qpi.qpi_flags |= loopback;
2027 
2028 	rval = fr_check(fw->hpe_hdr, hlen, qpi.qpi_ill, out,
2029 	    &qpi, fw->hpe_mp, arg);
2030 
2031 	/* For fastroute cases, fr_check returns 0 with mp set to NULL */
2032 	if (rval == 0 && *(fw->hpe_mp) == NULL)
2033 		rval = 1;
2034 
2035 	/* Notify IP the packet mblk_t and IP header pointers. */
2036 	fw->hpe_mb = qpi.qpi_m;
2037 	fw->hpe_hdr = qpi.qpi_data;
2038 	return rval;
2039 
2040 }
2041 
2042 
2043 /* ------------------------------------------------------------------------ */
2044 /* Function:    ipf_nic_event_v4                                            */
2045 /* Returns:     int - 0 == no problems encountered                          */
2046 /* Parameters:  event(I)     - pointer to event                             */
2047 /*              info(I)      - pointer to information about a NIC event     */
2048 /*                                                                          */
2049 /* Function to receive asynchronous NIC events from IP                      */
2050 /* ------------------------------------------------------------------------ */
2051 /*ARGSUSED*/
2052 int ipf_nic_event_v4(hook_event_token_t event, hook_data_t info, void *arg)
2053 {
2054 	struct sockaddr_in *sin;
2055 	hook_nic_event_t *hn;
2056 	ipf_stack_t *ifs = arg;
2057 	void *new_ifp = NULL;
2058 
2059 	if (ifs->ifs_fr_running <= 0)
2060 		return (0);
2061 
2062 	hn = (hook_nic_event_t *)info;
2063 
2064 	switch (hn->hne_event)
2065 	{
2066 	case NE_PLUMB :
2067 		frsync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic, hn->hne_data,
2068 		       ifs);
2069 		fr_natifpsync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic,
2070 			      hn->hne_data, ifs);
2071 		fr_statesync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic,
2072 			     hn->hne_data, ifs);
2073 		break;
2074 
2075 	case NE_UNPLUMB :
2076 		frsync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL, ifs);
2077 		fr_natifpsync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL,
2078 			      ifs);
2079 		fr_statesync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL, ifs);
2080 		break;
2081 
2082 	case NE_ADDRESS_CHANGE :
2083 		/*
2084 		 * We only respond to events for logical interface 0 because
2085 		 * IPFilter only uses the first address given to a network
2086 		 * interface.  We check for hne_lif==1 because the netinfo
2087 		 * code maps adds 1 to the lif number so that it can return
2088 		 * 0 to indicate "no more lifs" when walking them.
2089 		 */
2090 		if (hn->hne_lif == 1) {
2091 			frsync(IPFSYNC_RESYNC, 4, (void *)hn->hne_nic, NULL,
2092 			    ifs);
2093 			sin = hn->hne_data;
2094 			fr_nataddrsync(4, (void *)hn->hne_nic, &sin->sin_addr,
2095 			    ifs);
2096 		}
2097 		break;
2098 
2099 #if SOLARIS2 >= 10
2100 	case NE_IFINDEX_CHANGE :
2101 		WRITE_ENTER(&ifs->ifs_ipf_mutex);
2102 
2103 		if (hn->hne_data != NULL) {
2104 			/*
2105 			 * The netinfo passes interface index as int (hne_data should be
2106 			 * handled as a pointer to int), which is always 32bit. We need to
2107 			 * convert it to void pointer here, since interfaces are
2108 			 * represented as pointers to void in IPF. The pointers are 64 bits
2109 			 * long on 64bit platforms. Doing something like
2110 			 *	(void *)((int) x)
2111 			 * will throw warning:
2112 			 *   "cast to pointer from integer of different size"
2113 			 * during 64bit compilation.
2114 			 *
2115 			 * The line below uses (size_t) to typecast int to
2116 			 * size_t, which might be 64bit/32bit (depending
2117 			 * on architecture). Once we have proper 64bit/32bit
2118 			 * type (size_t), we can safely convert it to void pointer.
2119 			 */
2120 			new_ifp = (void *)(size_t)*((int *)hn->hne_data);
2121 			fr_ifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2122 			fr_natifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2123 			fr_stateifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2124 		}
2125 		RWLOCK_EXIT(&ifs->ifs_ipf_mutex);
2126 		break;
2127 #endif
2128 
2129 	default :
2130 		break;
2131 	}
2132 
2133 	return 0;
2134 }
2135 
2136 
2137 /* ------------------------------------------------------------------------ */
2138 /* Function:    ipf_nic_event_v6                                            */
2139 /* Returns:     int - 0 == no problems encountered                          */
2140 /* Parameters:  event(I)     - pointer to event                             */
2141 /*              info(I)      - pointer to information about a NIC event     */
2142 /*                                                                          */
2143 /* Function to receive asynchronous NIC events from IP                      */
2144 /* ------------------------------------------------------------------------ */
2145 /*ARGSUSED*/
2146 int ipf_nic_event_v6(hook_event_token_t event, hook_data_t info, void *arg)
2147 {
2148 	struct sockaddr_in6 *sin6;
2149 	hook_nic_event_t *hn;
2150 	ipf_stack_t *ifs = arg;
2151 	void *new_ifp = NULL;
2152 
2153 	if (ifs->ifs_fr_running <= 0)
2154 		return (0);
2155 
2156 	hn = (hook_nic_event_t *)info;
2157 
2158 	switch (hn->hne_event)
2159 	{
2160 	case NE_PLUMB :
2161 		frsync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic,
2162 		       hn->hne_data, ifs);
2163 		fr_natifpsync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic,
2164 			      hn->hne_data, ifs);
2165 		fr_statesync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic,
2166 			     hn->hne_data, ifs);
2167 		break;
2168 
2169 	case NE_UNPLUMB :
2170 		frsync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL, ifs);
2171 		fr_natifpsync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL,
2172 			      ifs);
2173 		fr_statesync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL, ifs);
2174 		break;
2175 
2176 	case NE_ADDRESS_CHANGE :
2177 		if (hn->hne_lif == 1) {
2178 			sin6 = hn->hne_data;
2179 			fr_nataddrsync(6, (void *)hn->hne_nic, &sin6->sin6_addr,
2180 				       ifs);
2181 		}
2182 		break;
2183 
2184 #if SOLARIS2 >= 10
2185 	case NE_IFINDEX_CHANGE :
2186 		WRITE_ENTER(&ifs->ifs_ipf_mutex);
2187 		if (hn->hne_data != NULL) {
2188 			/*
2189 			 * The netinfo passes interface index as int (hne_data should be
2190 			 * handled as a pointer to int), which is always 32bit. We need to
2191 			 * convert it to void pointer here, since interfaces are
2192 			 * represented as pointers to void in IPF. The pointers are 64 bits
2193 			 * long on 64bit platforms. Doing something like
2194 			 *	(void *)((int) x)
2195 			 * will throw warning:
2196 			 *   "cast to pointer from integer of different size"
2197 			 * during 64bit compilation.
2198 			 *
2199 			 * The line below uses (size_t) to typecast int to
2200 			 * size_t, which might be 64bit/32bit (depending
2201 			 * on architecture). Once we have proper 64bit/32bit
2202 			 * type (size_t), we can safely convert it to void pointer.
2203 			 */
2204 			new_ifp = (void *)(size_t)*((int *)hn->hne_data);
2205 			fr_ifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2206 			fr_natifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2207 			fr_stateifindexsync((void *)hn->hne_nic, new_ifp, ifs);
2208 		}
2209 		RWLOCK_EXIT(&ifs->ifs_ipf_mutex);
2210 		break;
2211 #endif
2212 
2213 	default :
2214 		break;
2215 	}
2216 
2217 	return 0;
2218 }
2219 
2220 /*
2221  * Functions fr_make_rst(), fr_make_icmp_v4(), fr_make_icmp_v6()
2222  * are needed in Solaris kernel only. We don't need them in
2223  * ipftest to pretend the ICMP/RST packet was sent as a response.
2224  */
2225 #if defined(_KERNEL) && (SOLARIS2 >= 10)
2226 /* ------------------------------------------------------------------------ */
2227 /* Function:    fr_make_rst                                                 */
2228 /* Returns:     int - 0 on success, -1 on failure			    */
2229 /* Parameters:  fin(I) - pointer to packet information                      */
2230 /*                                                                          */
2231 /* We must alter the original mblks passed to IPF from IP stack via	    */
2232 /* FW_HOOKS. FW_HOOKS interface is powerfull, but it has some limitations.  */
2233 /* IPF can basicaly do only these things with mblk representing the packet: */
2234 /*	leave it as it is (pass the packet)				    */
2235 /*                                                                          */
2236 /*	discard it (block the packet)					    */
2237 /*                                                                          */
2238 /*	alter it (i.e. NAT)						    */
2239 /*                                                                          */
2240 /* As you can see IPF can not simply discard the mblk and supply a new one  */
2241 /* instead to IP stack via FW_HOOKS.					    */
2242 /*                                                                          */
2243 /* The return-rst action for packets coming via NIC is handled as follows:  */
2244 /*	mblk with packet is discarded					    */
2245 /*                                                                          */
2246 /*	new mblk with RST response is constructed and injected to network   */
2247 /*                                                                          */
2248 /* IPF can't inject packets to loopback interface, this is just another	    */
2249 /* limitation we have to deal with here. The only option to send RST	    */
2250 /* response to offending TCP packet coming via loopback is to alter it.	    */
2251 /*									    */
2252 /* The fr_make_rst() function alters TCP SYN/FIN packet intercepted on	    */
2253 /* loopback interface into TCP RST packet. fin->fin_mp is pointer to	    */
2254 /* mblk L3 (IP) and L4 (TCP/UDP) packet headers.			    */
2255 /* ------------------------------------------------------------------------ */
2256 int fr_make_rst(fin)
2257 fr_info_t *fin;
2258 {
2259 	uint16_t tmp_port;
2260 	int rv = -1;
2261 	uint32_t old_ack;
2262 	tcphdr_t *tcp = NULL;
2263 	struct in_addr tmp_src;
2264 #ifdef USE_INET6
2265 	struct in6_addr	tmp_src6;
2266 #endif
2267 
2268 	ASSERT(fin->fin_p == IPPROTO_TCP);
2269 
2270 	/*
2271 	 * We do not need to adjust chksum, since it is not being checked by
2272 	 * Solaris IP stack for loopback clients.
2273 	 */
2274 	if ((fin->fin_v == 4) && (fin->fin_p == IPPROTO_TCP) &&
2275 	    ((tcp = (tcphdr_t *) fin->fin_dp) != NULL)) {
2276 
2277 		if (tcp->th_flags & (TH_SYN | TH_FIN)) {
2278 			/* Swap IPv4 addresses. */
2279 			tmp_src = fin->fin_ip->ip_src;
2280 			fin->fin_ip->ip_src = fin->fin_ip->ip_dst;
2281 			fin->fin_ip->ip_dst = tmp_src;
2282 
2283 			rv = 0;
2284 		}
2285 		else
2286 			tcp = NULL;
2287 	}
2288 #ifdef USE_INET6
2289 	else if ((fin->fin_v == 6) && (fin->fin_p == IPPROTO_TCP) &&
2290 	    ((tcp = (tcphdr_t *) fin->fin_dp) != NULL)) {
2291 		/*
2292 		 * We are relying on fact the next header is TCP, which is true
2293 		 * for regular TCP packets coming in over loopback.
2294 		 */
2295 		if (tcp->th_flags & (TH_SYN | TH_FIN)) {
2296 			/* Swap IPv6 addresses. */
2297 			tmp_src6 = fin->fin_ip6->ip6_src;
2298 			fin->fin_ip6->ip6_src = fin->fin_ip6->ip6_dst;
2299 			fin->fin_ip6->ip6_dst = tmp_src6;
2300 
2301 			rv = 0;
2302 		}
2303 		else
2304 			tcp = NULL;
2305 	}
2306 #endif
2307 
2308 	if (tcp != NULL) {
2309 		/*
2310 		 * Adjust TCP header:
2311 		 *	swap ports,
2312 		 *	set flags,
2313 		 *	set correct ACK number
2314 		 */
2315 		tmp_port = tcp->th_sport;
2316 		tcp->th_sport = tcp->th_dport;
2317 		tcp->th_dport = tmp_port;
2318 		old_ack = tcp->th_ack;
2319 		tcp->th_ack = htonl(ntohl(tcp->th_seq) + 1);
2320 		tcp->th_seq = old_ack;
2321 		tcp->th_flags = TH_RST | TH_ACK;
2322 	}
2323 
2324 	return (rv);
2325 }
2326 
2327 /* ------------------------------------------------------------------------ */
2328 /* Function:    fr_make_icmp_v4                                             */
2329 /* Returns:     int - 0 on success, -1 on failure			    */
2330 /* Parameters:  fin(I) - pointer to packet information                      */
2331 /*                                                                          */
2332 /* Please read comment at fr_make_icmp() wrapper function to get an idea    */
2333 /* what is going to happen here and why. Once you read the comment there,   */
2334 /* continue here with next paragraph.					    */
2335 /*									    */
2336 /* To turn IPv4 packet into ICMPv4 response packet, these things must	    */
2337 /* happen here:								    */
2338 /*	(1) Original mblk is copied (duplicated).			    */
2339 /*                                                                          */
2340 /*	(2) ICMP header is created.					    */
2341 /*                                                                          */
2342 /*	(3) Link ICMP header with copy of original mblk, we have ICMPv4	    */
2343 /*	    data ready then.						    */
2344 /*                                                                          */
2345 /*      (4) Swap IP addresses in original mblk and adjust IP header data.   */
2346 /*                                                                          */
2347 /*	(5) The mblk containing original packet is trimmed to contain IP    */
2348 /*	    header only and ICMP chksum is computed.			    */
2349 /*                                                                          */
2350 /*	(6) The ICMP header we have from (3) is linked to original mblk,    */
2351 /*	    which now contains new IP header. If original packet was spread */
2352 /*	    over several mblks, only the first mblk is kept.		    */
2353 /* ------------------------------------------------------------------------ */
2354 static int fr_make_icmp_v4(fin)
2355 fr_info_t *fin;
2356 {
2357 	struct in_addr tmp_src;
2358 	tcphdr_t *tcp;
2359 	struct icmp *icmp;
2360 	mblk_t *mblk_icmp;
2361 	mblk_t *mblk_ip;
2362 	size_t icmp_pld_len;	/* octets to append to ICMP header */
2363 	size_t orig_iphdr_len;	/* length of IP header only */
2364 	uint32_t sum;
2365 	uint16_t *buf;
2366 	int len;
2367 
2368 
2369 	if (fin->fin_v != 4)
2370 		return (-1);
2371 
2372 	/*
2373 	 * If we are dealing with TCP, then packet must be SYN/FIN to be routed
2374 	 * by IP stack. If it is not SYN/FIN, then we must drop it silently.
2375 	 */
2376 	tcp = (tcphdr_t *) fin->fin_dp;
2377 
2378 	if ((fin->fin_p == IPPROTO_TCP) &&
2379 	    ((tcp == NULL) || ((tcp->th_flags & (TH_SYN | TH_FIN)) == 0)))
2380 		return (-1);
2381 
2382 	/*
2383 	 * Step (1)
2384 	 *
2385 	 * Make copy of original mblk.
2386 	 *
2387 	 * We want to copy as much data as necessary, not less, not more.  The
2388 	 * ICMPv4 payload length for unreachable messages is:
2389 	 *	original IP header + 8 bytes of L4 (if there are any).
2390 	 *
2391 	 * We determine if there are at least 8 bytes of L4 data following IP
2392 	 * header first.
2393 	 */
2394 	icmp_pld_len = (fin->fin_dlen > ICMPERR_ICMPHLEN) ?
2395 		ICMPERR_ICMPHLEN : fin->fin_dlen;
2396 	/*
2397 	 * Since we don't want to copy more data than necessary, we must trim
2398 	 * the original mblk here.  The right way (STREAMish) would be to use
2399 	 * adjmsg() to trim it.  However we would have to calculate the length
2400 	 * argument for adjmsg() from pointers we already have here.
2401 	 *
2402 	 * Since we have pointers and offsets, it's faster and easier for
2403 	 * us to just adjust pointers by hand instead of using adjmsg().
2404 	 */
2405 	fin->fin_m->b_wptr = (unsigned char *) fin->fin_dp;
2406 	fin->fin_m->b_wptr += icmp_pld_len;
2407 	icmp_pld_len = fin->fin_m->b_wptr - (unsigned char *) fin->fin_ip;
2408 
2409 	/*
2410 	 * Also we don't want to copy any L2 stuff, which might precede IP
2411 	 * header, so we have have to set b_rptr to point to the start of IP
2412 	 * header.
2413 	 */
2414 	fin->fin_m->b_rptr += fin->fin_ipoff;
2415 	if ((mblk_ip = copyb(fin->fin_m)) == NULL)
2416 		return (-1);
2417 	fin->fin_m->b_rptr -= fin->fin_ipoff;
2418 
2419 	/*
2420 	 * Step (2)
2421 	 *
2422 	 * Create an ICMP header, which will be appened to original mblk later.
2423 	 * ICMP header is just another mblk.
2424 	 */
2425 	mblk_icmp = (mblk_t *) allocb(ICMPERR_ICMPHLEN, BPRI_HI);
2426 	if (mblk_icmp == NULL) {
2427 		FREE_MB_T(mblk_ip);
2428 		return (-1);
2429 	}
2430 
2431 	MTYPE(mblk_icmp) = M_DATA;
2432 	icmp = (struct icmp *) mblk_icmp->b_wptr;
2433 	icmp->icmp_type = ICMP_UNREACH;
2434 	icmp->icmp_code = fin->fin_icode & 0xFF;
2435 	icmp->icmp_void = 0;
2436 	icmp->icmp_cksum = 0;
2437 	mblk_icmp->b_wptr += ICMPERR_ICMPHLEN;
2438 
2439 	/*
2440 	 * Step (3)
2441 	 *
2442 	 * Complete ICMP packet - link ICMP header with L4 data from original
2443 	 * IP packet.
2444 	 */
2445 	linkb(mblk_icmp, mblk_ip);
2446 
2447 	/*
2448 	 * Step (4)
2449 	 *
2450 	 * Swap IP addresses and change IP header fields accordingly in
2451 	 * original IP packet.
2452 	 *
2453 	 * There is a rule option return-icmp as a dest for physical
2454 	 * interfaces. This option becomes useless for loopback, since IPF box
2455 	 * uses same address as a loopback destination. We ignore the option
2456 	 * here, the ICMP packet will always look like as it would have been
2457 	 * sent from the original destination host.
2458 	 */
2459 	tmp_src = fin->fin_ip->ip_src;
2460 	fin->fin_ip->ip_src = fin->fin_ip->ip_dst;
2461 	fin->fin_ip->ip_dst = tmp_src;
2462 	fin->fin_ip->ip_p = IPPROTO_ICMP;
2463 	fin->fin_ip->ip_sum = 0;
2464 
2465 	/*
2466 	 * Step (5)
2467 	 *
2468 	 * We trim the orignal mblk to hold IP header only.
2469 	 */
2470 	fin->fin_m->b_wptr = fin->fin_dp;
2471 	orig_iphdr_len = fin->fin_m->b_wptr -
2472 			    (fin->fin_m->b_rptr + fin->fin_ipoff);
2473 	fin->fin_ip->ip_len = htons(icmp_pld_len + ICMPERR_ICMPHLEN +
2474 			    orig_iphdr_len);
2475 
2476 	/*
2477 	 * ICMP chksum calculation. The data we are calculating chksum for are
2478 	 * spread over two mblks, therefore we have to use two for loops.
2479 	 *
2480 	 * First for loop computes chksum part for ICMP header.
2481 	 */
2482 	buf = (uint16_t *) icmp;
2483 	len = ICMPERR_ICMPHLEN;
2484 	for (sum = 0; len > 1; len -= 2)
2485 		sum += *buf++;
2486 
2487 	/*
2488 	 * Here we add chksum part for ICMP payload.
2489 	 */
2490 	len = icmp_pld_len;
2491 	buf = (uint16_t *) mblk_ip->b_rptr;
2492 	for (; len > 1; len -= 2)
2493 		sum += *buf++;
2494 
2495 	/*
2496 	 * Chksum is done.
2497 	 */
2498 	sum = (sum >> 16) + (sum & 0xffff);
2499 	sum += (sum >> 16);
2500 	icmp->icmp_cksum = ~sum;
2501 
2502 	/*
2503 	 * Step (6)
2504 	 *
2505 	 * Release all packet mblks, except the first one.
2506 	 */
2507 	if (fin->fin_m->b_cont != NULL) {
2508 		FREE_MB_T(fin->fin_m->b_cont);
2509 	}
2510 
2511 	/*
2512 	 * Append ICMP payload to first mblk, which already contains new IP
2513 	 * header.
2514 	 */
2515 	linkb(fin->fin_m, mblk_icmp);
2516 
2517 	return (0);
2518 }
2519 
2520 #ifdef USE_INET6
2521 /* ------------------------------------------------------------------------ */
2522 /* Function:    fr_make_icmp_v6                                             */
2523 /* Returns:     int - 0 on success, -1 on failure			    */
2524 /* Parameters:  fin(I) - pointer to packet information                      */
2525 /*									    */
2526 /* Please read comment at fr_make_icmp() wrapper function to get an idea    */
2527 /* what and why is going to happen here. Once you read the comment there,   */
2528 /* continue here with next paragraph.					    */
2529 /*									    */
2530 /* This function turns IPv6 packet (UDP, TCP, ...) into ICMPv6 response.    */
2531 /* The algorithm is fairly simple:					    */
2532 /*	1) We need to get copy of complete mblk.			    */
2533 /*									    */
2534 /*	2) New ICMPv6 header is created.				    */
2535 /*									    */
2536 /*	3) The copy of original mblk with packet is linked to ICMPv6	    */
2537 /*	   header.							    */
2538 /*									    */
2539 /*	4) The checksum must be adjusted.				    */
2540 /*									    */
2541 /*	5) IP addresses in original mblk are swapped and IP header data	    */
2542 /*	   are adjusted (protocol number).				    */
2543 /*									    */
2544 /*	6) Original mblk is trimmed to hold IPv6 header only, then it is    */
2545 /*	   linked with the ICMPv6 data we got from (3).			    */
2546 /* ------------------------------------------------------------------------ */
2547 static int fr_make_icmp_v6(fin)
2548 fr_info_t *fin;
2549 {
2550 	struct icmp6_hdr *icmp6;
2551 	tcphdr_t *tcp;
2552 	struct in6_addr	tmp_src6;
2553 	size_t icmp_pld_len;
2554 	mblk_t *mblk_ip, *mblk_icmp;
2555 
2556 	if (fin->fin_v != 6)
2557 		return (-1);
2558 
2559 	/*
2560 	 * If we are dealing with TCP, then packet must SYN/FIN to be routed by
2561 	 * IP stack. If it is not SYN/FIN, then we must drop it silently.
2562 	 */
2563 	tcp = (tcphdr_t *) fin->fin_dp;
2564 
2565 	if ((fin->fin_p == IPPROTO_TCP) &&
2566 	    ((tcp == NULL) || ((tcp->th_flags & (TH_SYN | TH_FIN)) == 0)))
2567 		return (-1);
2568 
2569 	/*
2570 	 * Step (1)
2571 	 *
2572 	 * We need to copy complete packet in case of IPv6, no trimming is
2573 	 * needed (except the L2 headers).
2574 	 */
2575 	icmp_pld_len = M_LEN(fin->fin_m);
2576 	fin->fin_m->b_rptr += fin->fin_ipoff;
2577 	if ((mblk_ip = copyb(fin->fin_m)) == NULL)
2578 		return (-1);
2579 	fin->fin_m->b_rptr -= fin->fin_ipoff;
2580 
2581 	/*
2582 	 * Step (2)
2583 	 *
2584 	 * Allocate and create ICMP header.
2585 	 */
2586 	mblk_icmp = (mblk_t *) allocb(sizeof (struct icmp6_hdr),
2587 			BPRI_HI);
2588 
2589 	if (mblk_icmp == NULL)
2590 		return (-1);
2591 
2592 	MTYPE(mblk_icmp) = M_DATA;
2593 	icmp6 =  (struct icmp6_hdr *) mblk_icmp->b_wptr;
2594 	icmp6->icmp6_type = ICMP6_DST_UNREACH;
2595 	icmp6->icmp6_code = fin->fin_icode & 0xFF;
2596 	icmp6->icmp6_data32[0] = 0;
2597 	mblk_icmp->b_wptr += sizeof (struct icmp6_hdr);
2598 
2599 	/*
2600 	 * Step (3)
2601 	 *
2602 	 * Link the copy of IP packet to ICMP header.
2603 	 */
2604 	linkb(mblk_icmp, mblk_ip);
2605 
2606 	/*
2607 	 * Step (4)
2608 	 *
2609 	 * Calculate chksum - this is much more easier task than in case of
2610 	 * IPv4  - ICMPv6 chksum only covers IP addresses, and payload length.
2611 	 * We are making compensation just for change of packet length.
2612 	 */
2613 	icmp6->icmp6_cksum = icmp_pld_len + sizeof (struct icmp6_hdr);
2614 
2615 	/*
2616 	 * Step (5)
2617 	 *
2618 	 * Swap IP addresses.
2619 	 */
2620 	tmp_src6 = fin->fin_ip6->ip6_src;
2621 	fin->fin_ip6->ip6_src = fin->fin_ip6->ip6_dst;
2622 	fin->fin_ip6->ip6_dst = tmp_src6;
2623 
2624 	/*
2625 	 * and adjust IP header data.
2626 	 */
2627 	fin->fin_ip6->ip6_nxt = IPPROTO_ICMPV6;
2628 	fin->fin_ip6->ip6_plen = htons(icmp_pld_len + sizeof (struct icmp6_hdr));
2629 
2630 	/*
2631 	 * Step (6)
2632 	 *
2633 	 * We must release all linked mblks from original packet and keep only
2634 	 * the first mblk with IP header to link ICMP data.
2635 	 */
2636 	fin->fin_m->b_wptr = (unsigned char *) fin->fin_ip6 + sizeof (ip6_t);
2637 
2638 	if (fin->fin_m->b_cont != NULL) {
2639 		FREE_MB_T(fin->fin_m->b_cont);
2640 	}
2641 
2642 	/*
2643 	 * Append ICMP payload to IP header.
2644 	 */
2645 	linkb(fin->fin_m, mblk_icmp);
2646 
2647 	return (0);
2648 }
2649 #endif	/* USE_INET6 */
2650 
2651 /* ------------------------------------------------------------------------ */
2652 /* Function:    fr_make_icmp                                                */
2653 /* Returns:     int - 0 on success, -1 on failure			    */
2654 /* Parameters:  fin(I) - pointer to packet information                      */
2655 /*                                                                          */
2656 /* We must alter the original mblks passed to IPF from IP stack via	    */
2657 /* FW_HOOKS. The reasons why we must alter packet are discussed within	    */
2658 /* comment at fr_make_rst() function.					    */
2659 /*									    */
2660 /* The fr_make_icmp() function acts as a wrapper, which passes the code	    */
2661 /* execution to	fr_make_icmp_v4() or fr_make_icmp_v6() depending on	    */
2662 /* protocol version. However there are some details, which are common to    */
2663 /* both IP versions. The details are going to be explained here.	    */
2664 /*                                                                          */
2665 /* The packet looks as follows:						    */
2666 /*    xxx | IP hdr | IP payload    ...	| 				    */
2667 /*    ^   ^        ^            	^				    */
2668 /*    |   |        |            	|				    */
2669 /*    |   |        |		fin_m->b_wptr = fin->fin_dp + fin->fin_dlen */
2670 /*    |   |        |							    */
2671 /*    |   |        `- fin_m->fin_dp (in case of IPv4 points to L4 header)   */
2672 /*    |   |								    */
2673 /*    |   `- fin_m->b_rptr + fin_ipoff (fin_ipoff is most likely 0 in case  */
2674 /*    |      of loopback)						    */
2675 /*    |   								    */
2676 /*    `- fin_m->b_rptr -  points to L2 header in case of physical NIC	    */
2677 /*                                                                          */
2678 /* All relevant IP headers are pulled up into the first mblk. It happened   */
2679 /* well in advance before the matching rule was found (the rule, which took */
2680 /* us here, to fr_make_icmp() function).				    */
2681 /*                                                                          */
2682 /* Both functions will turn packet passed in fin->fin_m mblk into a new	    */
2683 /* packet. New packet will be represented as chain of mblks.		    */
2684 /* orig mblk |- b_cont ---.						    */
2685 /*    ^                    `-> ICMP hdr |- b_cont--.			    */
2686 /*    |	                          ^	            `-> duped orig mblk	    */
2687 /*    |                           |				^	    */
2688 /*    `- The original mblk        |				|	    */
2689 /*       will be trimmed to       |				|	    */
2690 /*       to contain IP header     |				|	    */
2691 /*       only                     |				|	    */
2692 /*                                |				|	    */
2693 /*                                `- This is newly		|           */
2694 /*                                   allocated mblk to		|	    */
2695 /*                                   hold ICMPv6 data.		|	    */
2696 /*								|	    */
2697 /*								|	    */
2698 /*								|	    */
2699 /*	    This is the copy of original mblk, it will contain -'	    */
2700 /*	    orignal IP  packet in case of ICMPv6. In case of		    */
2701 /*	    ICMPv4 it will contain up to 8 bytes of IP payload		    */
2702 /*	    (TCP/UDP/L4) data from original packet.			    */
2703 /* ------------------------------------------------------------------------ */
2704 int fr_make_icmp(fin)
2705 fr_info_t *fin;
2706 {
2707 	int rv;
2708 
2709 	if (fin->fin_v == 4)
2710 		rv = fr_make_icmp_v4(fin);
2711 #ifdef USE_INET6
2712 	else if (fin->fin_v == 6)
2713 		rv = fr_make_icmp_v6(fin);
2714 #endif
2715 	else
2716 		rv = -1;
2717 
2718 	return (rv);
2719 }
2720 #endif	/* _KERNEL && SOLARIS2 >= 10 */
2721