xref: /illumos-gate/usr/src/uts/common/inet/ip/ip6_asp.c (revision b6805bf78d2bbbeeaea8909a05623587b42d58b3)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/socket.h>
28 #include <sys/ksynch.h>
29 #include <sys/kmem.h>
30 #include <sys/errno.h>
31 #include <sys/systm.h>
32 #include <sys/sysmacros.h>
33 #include <sys/cmn_err.h>
34 #include <sys/strsun.h>
35 #include <sys/zone.h>
36 #include <netinet/in.h>
37 #include <inet/common.h>
38 #include <inet/ip.h>
39 #include <inet/ip6.h>
40 #include <inet/ip6_asp.h>
41 #include <inet/ip_ire.h>
42 #include <inet/ip_if.h>
43 #include <inet/ipclassifier.h>
44 
45 #define	IN6ADDR_MASK128_INIT \
46 	{ 0xffffffffU, 0xffffffffU, 0xffffffffU, 0xffffffffU }
47 #define	IN6ADDR_MASK96_INIT	{ 0xffffffffU, 0xffffffffU, 0xffffffffU, 0 }
48 #ifdef _BIG_ENDIAN
49 #define	IN6ADDR_MASK16_INIT	{ 0xffff0000U, 0, 0, 0 }
50 #else
51 #define	IN6ADDR_MASK16_INIT	{ 0x0000ffffU, 0, 0, 0 }
52 #endif
53 
54 
55 /*
56  * This table is ordered such that longest prefix matches are hit first
57  * (longer prefix lengths first).  The last entry must be the "default"
58  * entry (::0/0).
59  */
60 static ip6_asp_t default_ip6_asp_table[] = {
61 	{ IN6ADDR_LOOPBACK_INIT,	IN6ADDR_MASK128_INIT,
62 	    "Loopback", 50 },
63 	{ IN6ADDR_ANY_INIT,		IN6ADDR_MASK96_INIT,
64 	    "IPv4_Compatible", 20 },
65 #ifdef _BIG_ENDIAN
66 	{ { 0, 0, 0x0000ffffU, 0 },	IN6ADDR_MASK96_INIT,
67 	    "IPv4", 10 },
68 	{ { 0x20020000U, 0, 0, 0 },	IN6ADDR_MASK16_INIT,
69 	    "6to4", 30 },
70 #else
71 	{ { 0, 0, 0xffff0000U, 0 },	IN6ADDR_MASK96_INIT,
72 	    "IPv4", 10 },
73 	{ { 0x00000220U, 0, 0, 0 },	IN6ADDR_MASK16_INIT,
74 	    "6to4", 30 },
75 #endif
76 	{ IN6ADDR_ANY_INIT,		IN6ADDR_ANY_INIT,
77 	    "Default", 40 }
78 };
79 
80 /*
81  * The IPv6 Default Address Selection policy table.
82  * Until someone up above reconfigures the policy table, use the global
83  * default.  The table needs no lock since the only way to alter it is
84  * through the SIOCSIP6ADDRPOLICY which is exclusive in ip.
85  */
86 static void ip6_asp_copy(ip6_asp_t *, ip6_asp_t *, uint_t);
87 static void ip6_asp_check_for_updates(ip_stack_t *);
88 
89 void
90 ip6_asp_init(ip_stack_t *ipst)
91 {
92 	/* Initialize the table lock */
93 	mutex_init(&ipst->ips_ip6_asp_lock, NULL, MUTEX_DEFAULT, NULL);
94 
95 	ipst->ips_ip6_asp_table = default_ip6_asp_table;
96 
97 	ipst->ips_ip6_asp_table_count =
98 	    sizeof (default_ip6_asp_table) / sizeof (ip6_asp_t);
99 }
100 
101 void
102 ip6_asp_free(ip_stack_t *ipst)
103 {
104 	if (ipst->ips_ip6_asp_table != default_ip6_asp_table) {
105 		kmem_free(ipst->ips_ip6_asp_table,
106 		    ipst->ips_ip6_asp_table_count * sizeof (ip6_asp_t));
107 		ipst->ips_ip6_asp_table = NULL;
108 	}
109 	mutex_destroy(&ipst->ips_ip6_asp_lock);
110 }
111 
112 /*
113  * Return false if the table is being updated. Else, increment the ref
114  * count and return true.
115  */
116 boolean_t
117 ip6_asp_can_lookup(ip_stack_t *ipst)
118 {
119 	mutex_enter(&ipst->ips_ip6_asp_lock);
120 	if (ipst->ips_ip6_asp_uip) {
121 		mutex_exit(&ipst->ips_ip6_asp_lock);
122 		return (B_FALSE);
123 	}
124 	IP6_ASP_TABLE_REFHOLD(ipst);
125 	mutex_exit(&ipst->ips_ip6_asp_lock);
126 	return (B_TRUE);
127 
128 }
129 
130 void
131 ip6_asp_pending_op(queue_t *q, mblk_t *mp, aspfunc_t func)
132 {
133 	conn_t	*connp = Q_TO_CONN(q);
134 	ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
135 
136 	ASSERT((mp->b_prev == NULL) && (mp->b_queue == NULL) &&
137 	    (mp->b_next == NULL));
138 	mp->b_queue = (void *)q;
139 	mp->b_prev = (void *)func;
140 	mp->b_next = NULL;
141 
142 	mutex_enter(&ipst->ips_ip6_asp_lock);
143 	if (ipst->ips_ip6_asp_pending_ops == NULL) {
144 		ASSERT(ipst->ips_ip6_asp_pending_ops_tail == NULL);
145 		ipst->ips_ip6_asp_pending_ops =
146 		    ipst->ips_ip6_asp_pending_ops_tail = mp;
147 	} else {
148 		ipst->ips_ip6_asp_pending_ops_tail->b_next = mp;
149 		ipst->ips_ip6_asp_pending_ops_tail = mp;
150 	}
151 	mutex_exit(&ipst->ips_ip6_asp_lock);
152 }
153 
154 static void
155 ip6_asp_complete_op(ip_stack_t *ipst)
156 {
157 	mblk_t		*mp;
158 	queue_t		*q;
159 	aspfunc_t	func;
160 
161 	mutex_enter(&ipst->ips_ip6_asp_lock);
162 	while (ipst->ips_ip6_asp_pending_ops != NULL) {
163 		mp = ipst->ips_ip6_asp_pending_ops;
164 		ipst->ips_ip6_asp_pending_ops = mp->b_next;
165 		mp->b_next = NULL;
166 		if (ipst->ips_ip6_asp_pending_ops == NULL)
167 			ipst->ips_ip6_asp_pending_ops_tail = NULL;
168 		mutex_exit(&ipst->ips_ip6_asp_lock);
169 
170 		q = (queue_t *)mp->b_queue;
171 		func = (aspfunc_t)mp->b_prev;
172 
173 		mp->b_prev = NULL;
174 		mp->b_queue = NULL;
175 
176 
177 		(*func)(NULL, q, mp, NULL);
178 		mutex_enter(&ipst->ips_ip6_asp_lock);
179 	}
180 	mutex_exit(&ipst->ips_ip6_asp_lock);
181 }
182 
183 /*
184  * Decrement reference count. When it gets to 0, we check for (pending)
185  * saved update to the table, if any.
186  */
187 void
188 ip6_asp_table_refrele(ip_stack_t *ipst)
189 {
190 	IP6_ASP_TABLE_REFRELE(ipst);
191 }
192 
193 /*
194  * This function is guaranteed never to return a NULL pointer.  It
195  * will always return information from one of the entries in the
196  * asp_table (which will never be empty).  If a pointer is passed
197  * in for the precedence, the precedence value will be set; a
198  * pointer to the label will be returned by the function.
199  *
200  * Since the table is only anticipated to have five or six entries
201  * total, the lookup algorithm hasn't been optimized to anything
202  * better than O(n).
203  */
204 char *
205 ip6_asp_lookup(const in6_addr_t *addr, uint32_t *precedence, ip_stack_t *ipst)
206 {
207 	ip6_asp_t *aspp;
208 	ip6_asp_t *match = NULL;
209 	ip6_asp_t *default_policy;
210 
211 	aspp = ipst->ips_ip6_asp_table;
212 	/* The default entry must always be the last one */
213 	default_policy = aspp + ipst->ips_ip6_asp_table_count - 1;
214 
215 	while (match == NULL) {
216 		if (aspp == default_policy) {
217 			match = aspp;
218 		} else {
219 			if (V6_MASK_EQ(*addr, aspp->ip6_asp_mask,
220 			    aspp->ip6_asp_prefix))
221 				match = aspp;
222 			else
223 				aspp++;
224 		}
225 	}
226 
227 	if (precedence != NULL)
228 		*precedence = match->ip6_asp_precedence;
229 	return (match->ip6_asp_label);
230 }
231 
232 /*
233  * If we had deferred updating the table because of outstanding references,
234  * do it now. Note, we don't do error checking on the queued IOCTL mblk, since
235  * ip_sioctl_ip6addrpolicy() has already done it for us.
236  */
237 void
238 ip6_asp_check_for_updates(ip_stack_t *ipst)
239 {
240 	ip6_asp_t *table;
241 	size_t	table_size;
242 	mblk_t	*data_mp, *mp;
243 	struct iocblk *iocp;
244 
245 	mutex_enter(&ipst->ips_ip6_asp_lock);
246 	if (ipst->ips_ip6_asp_pending_update == NULL ||
247 	    ipst->ips_ip6_asp_refcnt > 0) {
248 		mutex_exit(&ipst->ips_ip6_asp_lock);
249 		return;
250 	}
251 
252 	mp = ipst->ips_ip6_asp_pending_update;
253 	ipst->ips_ip6_asp_pending_update = NULL;
254 	ASSERT(mp->b_prev != NULL);
255 
256 	ipst->ips_ip6_asp_uip = B_TRUE;
257 
258 	iocp = (struct iocblk *)mp->b_rptr;
259 	data_mp = mp->b_cont;
260 	if (data_mp == NULL) {
261 		table = NULL;
262 		table_size = iocp->ioc_count;
263 	} else {
264 		table = (ip6_asp_t *)data_mp->b_rptr;
265 		table_size = iocp->ioc_count;
266 	}
267 
268 	ip6_asp_replace(mp, table, table_size, B_TRUE, ipst,
269 	    iocp->ioc_flag & IOC_MODELS);
270 }
271 
272 /*
273  * ip6_asp_replace replaces the contents of the IPv6 address selection
274  * policy table with those specified in new_table.  If new_table is NULL,
275  * this indicates that the caller wishes ip to use the default policy
276  * table.  The caller is responsible for making sure that there are exactly
277  * new_count policy entries in new_table.
278  */
279 /*ARGSUSED5*/
280 void
281 ip6_asp_replace(mblk_t *mp, ip6_asp_t *new_table, size_t new_size,
282     boolean_t locked, ip_stack_t *ipst, model_t datamodel)
283 {
284 	int			ret_val = 0;
285 	ip6_asp_t		*tmp_table;
286 	uint_t			count;
287 	queue_t			*q;
288 	struct iocblk		*iocp;
289 #if defined(_SYSCALL32_IMPL) && _LONG_LONG_ALIGNMENT_32 == 4
290 	size_t ip6_asp_size = SIZEOF_STRUCT(ip6_asp, datamodel);
291 #else
292 	const size_t ip6_asp_size = sizeof (ip6_asp_t);
293 #endif
294 
295 	if (new_size % ip6_asp_size != 0) {
296 		ip1dbg(("ip6_asp_replace: invalid table size\n"));
297 		ret_val = EINVAL;
298 		if (locked)
299 			goto unlock_end;
300 		goto replace_end;
301 	} else {
302 		count = new_size / ip6_asp_size;
303 	}
304 
305 
306 	if (!locked)
307 		mutex_enter(&ipst->ips_ip6_asp_lock);
308 	/*
309 	 * Check if we are in the process of creating any IRE using the
310 	 * current information. If so, wait till that is done.
311 	 */
312 	if (!locked && ipst->ips_ip6_asp_refcnt > 0) {
313 		/* Save this request for later processing */
314 		if (ipst->ips_ip6_asp_pending_update == NULL) {
315 			ipst->ips_ip6_asp_pending_update = mp;
316 		} else {
317 			/* Let's not queue multiple requests for now */
318 			ip1dbg(("ip6_asp_replace: discarding request\n"));
319 			mutex_exit(&ipst->ips_ip6_asp_lock);
320 			ret_val =  EAGAIN;
321 			goto replace_end;
322 		}
323 		mutex_exit(&ipst->ips_ip6_asp_lock);
324 		return;
325 	}
326 
327 	/* Prevent lookups till the table have been updated */
328 	if (!locked)
329 		ipst->ips_ip6_asp_uip = B_TRUE;
330 
331 	ASSERT(ipst->ips_ip6_asp_refcnt == 0);
332 
333 	if (new_table == NULL) {
334 		/*
335 		 * This is a special case.  The user wants to revert
336 		 * back to using the default table.
337 		 */
338 		if (ipst->ips_ip6_asp_table == default_ip6_asp_table)
339 			goto unlock_end;
340 
341 		kmem_free(ipst->ips_ip6_asp_table,
342 		    ipst->ips_ip6_asp_table_count * sizeof (ip6_asp_t));
343 		ipst->ips_ip6_asp_table = default_ip6_asp_table;
344 		ipst->ips_ip6_asp_table_count =
345 		    sizeof (default_ip6_asp_table) / sizeof (ip6_asp_t);
346 		goto unlock_end;
347 	}
348 
349 	if (count == 0) {
350 		ret_val = EINVAL;
351 		ip1dbg(("ip6_asp_replace: empty table\n"));
352 		goto unlock_end;
353 	}
354 
355 	if ((tmp_table = kmem_alloc(count * sizeof (ip6_asp_t), KM_NOSLEEP)) ==
356 	    NULL) {
357 		ret_val = ENOMEM;
358 		goto unlock_end;
359 	}
360 
361 #if defined(_SYSCALL32_IMPL) && _LONG_LONG_ALIGNMENT_32 == 4
362 
363 	/*
364 	 * If 'new_table' -actually- originates from a 32-bit process
365 	 * then the nicely aligned ip6_asp_label array will be
366 	 * subtlely misaligned on this kernel, because the structure
367 	 * is 8 byte aligned in the kernel, but only 4 byte aligned in
368 	 * userland.  Fix it up here.
369 	 *
370 	 * XX64	See the notes in ip_sioctl_ip6addrpolicy.  Perhaps we could
371 	 *	do the datamodel transformation (below) there instead of here?
372 	 */
373 	if (datamodel == IOC_ILP32) {
374 		ip6_asp_t *dst;
375 		ip6_asp32_t *src;
376 		int i;
377 
378 		if ((dst = kmem_zalloc(count * sizeof (*dst),
379 		    KM_NOSLEEP)) == NULL) {
380 			kmem_free(tmp_table, count * sizeof (ip6_asp_t));
381 			ret_val = ENOMEM;
382 			goto unlock_end;
383 		}
384 
385 		/*
386 		 * Copy each element of the table from ip6_asp32_t
387 		 * format into ip6_asp_t format.  Fortunately, since
388 		 * we're just dealing with a trailing structure pad,
389 		 * we can do this straightforwardly with a flurry of
390 		 * bcopying.
391 		 */
392 		src = (void *)new_table;
393 		for (i = 0; i < count; i++)
394 			bcopy(src + i, dst + i, sizeof (*src));
395 
396 		ip6_asp_copy(dst, tmp_table, count);
397 		kmem_free(dst, count * sizeof (*dst));
398 	} else
399 #endif
400 		ip6_asp_copy(new_table, tmp_table, count);
401 
402 	/* Make sure the last entry is the default entry */
403 	if (!IN6_IS_ADDR_UNSPECIFIED(&tmp_table[count - 1].ip6_asp_prefix) ||
404 	    !IN6_IS_ADDR_UNSPECIFIED(&tmp_table[count - 1].ip6_asp_mask)) {
405 		ret_val = EINVAL;
406 		kmem_free(tmp_table, count * sizeof (ip6_asp_t));
407 		ip1dbg(("ip6_asp_replace: bad table: no default entry\n"));
408 		goto unlock_end;
409 	}
410 	if (ipst->ips_ip6_asp_table != default_ip6_asp_table) {
411 		kmem_free(ipst->ips_ip6_asp_table,
412 		    ipst->ips_ip6_asp_table_count * sizeof (ip6_asp_t));
413 	}
414 	ipst->ips_ip6_asp_table = tmp_table;
415 	ipst->ips_ip6_asp_table_count = count;
416 
417 unlock_end:
418 	ipst->ips_ip6_asp_uip = B_FALSE;
419 	mutex_exit(&ipst->ips_ip6_asp_lock);
420 
421 	/* Let conn_ixa caching know that source address selection changed */
422 	ip_update_source_selection(ipst);
423 
424 replace_end:
425 	/* Reply to the ioctl */
426 	q = (queue_t *)mp->b_prev;
427 	mp->b_prev = NULL;
428 	if (q == NULL) {
429 		freemsg(mp);
430 		goto check_binds;
431 	}
432 	iocp = (struct iocblk *)mp->b_rptr;
433 	iocp->ioc_error = ret_val;
434 	iocp->ioc_count = 0;
435 	DB_TYPE(mp) = (iocp->ioc_error == 0) ? M_IOCACK : M_IOCNAK;
436 	qreply(q, mp);
437 check_binds:
438 	ip6_asp_complete_op(ipst);
439 }
440 
441 /*
442  * Copies the contents of src_table to dst_table, and sorts the
443  * entries in decending order of prefix lengths.  It assumes that both
444  * tables are appropriately sized to contain count entries.
445  */
446 static void
447 ip6_asp_copy(ip6_asp_t *src_table, ip6_asp_t *dst_table, uint_t count)
448 {
449 	ip6_asp_t *src_ptr, *src_limit, *dst_ptr, *dst_limit, *dp;
450 
451 	dst_table[0] = src_table[0];
452 	if (count == 1)
453 		return;
454 
455 	/*
456 	 * Sort the entries in descending order of prefix lengths.
457 	 *
458 	 * Note: this should be a small table.  In 99% of cases, we
459 	 * expect the table to have 5 entries.  In the remaining 1%
460 	 * of cases, we expect the table to have one or two more
461 	 * entries.  It would be very rare for the table to have
462 	 * double-digit entries.
463 	 */
464 	src_limit = src_table + count;
465 	dst_limit = dst_table + 1;
466 	for (src_ptr = src_table + 1; src_ptr != src_limit;
467 	    src_ptr++, dst_limit++) {
468 		for (dst_ptr = dst_table; dst_ptr < dst_limit; dst_ptr++) {
469 			if (ip_mask_to_plen_v6(&src_ptr->ip6_asp_mask) >
470 			    ip_mask_to_plen_v6(&dst_ptr->ip6_asp_mask)) {
471 				/*
472 				 * Make room to insert the source entry
473 				 * before dst_ptr by shifting entries to
474 				 * the right.
475 				 */
476 				for (dp = dst_limit - 1; dp >= dst_ptr; dp--)
477 					*(dp + 1) = *dp;
478 				break;
479 			}
480 		}
481 		*dst_ptr = *src_ptr;
482 	}
483 }
484 
485 /*
486  * This function copies as many entries from ip6_asp_table as will fit
487  * into dtable.  The dtable_size parameter is the size of dtable
488  * in bytes.  This function returns the number of entries in
489  * ip6_asp_table, even if it's not able to fit all of the entries into
490  * dtable.
491  */
492 int
493 ip6_asp_get(ip6_asp_t *dtable, size_t dtable_size, ip_stack_t *ipst)
494 {
495 	uint_t dtable_count;
496 
497 	if (dtable != NULL) {
498 		if (dtable_size < sizeof (ip6_asp_t))
499 			return (-1);
500 
501 		dtable_count = dtable_size / sizeof (ip6_asp_t);
502 		bcopy(ipst->ips_ip6_asp_table, dtable,
503 		    MIN(ipst->ips_ip6_asp_table_count, dtable_count) *
504 		    sizeof (ip6_asp_t));
505 	}
506 
507 	return (ipst->ips_ip6_asp_table_count);
508 }
509 
510 /*
511  * Compare two labels.  Return B_TRUE if they are equal, B_FALSE
512  * otherwise.
513  */
514 boolean_t
515 ip6_asp_labelcmp(const char *label1, const char *label2)
516 {
517 	int64_t *llptr1, *llptr2;
518 
519 	/*
520 	 * The common case, the two labels are actually the same string
521 	 * from the policy table.
522 	 */
523 	if (label1 == label2)
524 		return (B_TRUE);
525 
526 	/*
527 	 * Since we know the labels are at most 16 bytes long, compare
528 	 * the two strings as two 8-byte long integers.  The ip6_asp_t
529 	 * structure guarantees that the labels are 8 byte alligned.
530 	 */
531 	llptr1 = (int64_t *)label1;
532 	llptr2 = (int64_t *)label2;
533 	if (llptr1[0] == llptr2[0] && llptr1[1] == llptr2[1])
534 		return (B_TRUE);
535 	return (B_FALSE);
536 }
537