xref: /illumos-gate/usr/src/uts/common/inet/tcp/tcp_sig.c (revision 4e0cc57d7ff13862aa2a3b1eed78c72355eda972)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2024 Oxide Computer Company
14  */
15 
16 /*
17  * RFC 2385 TCP MD5 Signature Option
18  *
19  * A security option commonly used to enhance security for BGP sessions. When a
20  * TCP socket has its TCP_MD5SIG option enabled, an additional TCP option is
21  * added to the header containing an MD5 digest calculated across the pseudo IP
22  * header, part of the TCP header, the data in the segment and a shared secret.
23  * The option is large (18 bytes plus 2 more for padding to a word boundary),
24  * and often /just/ fits in the TCP header -- particularly with SYN packets due
25  * to their additional options such as MSS.
26  *
27  * The socket option is boolean, and it is also necessary to have configured a
28  * security association (SA) to match the traffic that should be signed, and to
29  * provide the signing key. These SAs are configured from userland via
30  * tcpkey(8), use source and destination addresses and ports as criteria, and
31  * are maintained in a per-netstack linked list. The SAs pertaining to a
32  * particular TCP connection, one for each direction, are cached in the
33  * connection's TCP state after the first packet has been processed, and so
34  * using a single list is not a significant overhead, particularly as it is
35  * expected to be short.
36  *
37  * Enabling the socket option has a number of side effects:
38  *
39  *  - TCP fast path is disabled;
40  *  - TCP Fusion is disabled;
41  *  - Outbound packets for which a matching SA cannot be found are silently
42  *    discarded.
43  *  - Inbound packets that DO NOT contain an MD5 option in their TCP header are
44  *    silently discarded.
45  *  - Inbound packets that DO contain an MD5 option but for which the digest
46  *    does not match the locally calculated one are silently discarded.
47  *
48  * An SA is bound to a TCP stream once the first packet is sent or received
49  * following the TCP_MD5SIG socket option being enabled. Typically an
50  * application will enable the socket option immediately after creating the
51  * socket, and before moving on to calling connect() or bind() but it is
52  * necessary to wait for the first packet as that is the point at which the
53  * source and destination addresses and ports are all known, and we need these
54  * to find the SA. Note that if no matching SA is present in the database when
55  * the first packet is sent or received, it will be silently dropped. Due to
56  * the reference counting and tombstone logic, an SA that has been bound to one
57  * or more streams will persist until all of those streams have been torn down.
58  * It is not possible to change the SA for an active connection.
59  *
60  * -------------
61  * Lock Ordering
62  * -------------
63  *
64  * In order to ensure that we don't deadlock, if both are required, the RW lock
65  * across the SADB must be taken before acquiring an individual SA's lock. That
66  * is, locks must be taken in the following order (and released in the opposite
67  * order):
68  *
69  * 0) <tcpstack>->tcps_sigdb->td_lock
70  * 1) <tcpstack>->tcps_sigdb->td_sa.list-><entry>->ts_lock
71  *
72  * The lock at <tcpstack>->tcps_sigdb_lock is independent and used to
73  * synchronize lazy initialization of the database.
74  */
75 
76 #include <sys/atomic.h>
77 #include <sys/cmn_err.h>
78 #include <sys/cpuvar.h>
79 #include <sys/debug.h>
80 #include <sys/errno.h>
81 #include <sys/kmem.h>
82 #include <sys/list.h>
83 #include <sys/md5.h>
84 #include <sys/stdbool.h>
85 #include <sys/stream.h>
86 #include <sys/stropts.h>
87 #include <sys/strsubr.h>
88 #include <sys/strsun.h>
89 #include <sys/sysmacros.h>
90 #include <sys/types.h>
91 #include <netinet/in.h>
92 #include <netinet/ip6.h>
93 #include <net/pfkeyv2.h>
94 #include <net/pfpolicy.h>
95 #include <inet/common.h>
96 #include <inet/mi.h>
97 #include <inet/ip.h>
98 #include <inet/ip6.h>
99 #include <inet/ip_if.h>
100 #include <inet/tcp_stats.h>
101 #include <inet/keysock.h>
102 #include <inet/sadb.h>
103 #include <inet/tcp_sig.h>
104 
105 static void tcpsig_sa_free(tcpsig_sa_t *);
106 
107 void
108 tcpsig_init(tcp_stack_t *tcps)
109 {
110 	mutex_init(&tcps->tcps_sigdb_lock, NULL, MUTEX_DEFAULT, NULL);
111 }
112 
113 void
114 tcpsig_fini(tcp_stack_t *tcps)
115 {
116 	tcpsig_db_t *db;
117 
118 	if ((db = tcps->tcps_sigdb) != NULL) {
119 		tcpsig_sa_t *sa;
120 
121 		rw_destroy(&db->td_lock);
122 		while ((sa = list_remove_head(&db->td_salist)) != NULL)
123 			tcpsig_sa_free(sa);
124 		list_destroy(&db->td_salist);
125 		kmem_free(tcps->tcps_sigdb, sizeof (tcpsig_db_t));
126 		tcps->tcps_sigdb = NULL;
127 	}
128 	mutex_destroy(&tcps->tcps_sigdb_lock);
129 }
130 
131 static tcpsig_db_t *
132 tcpsig_db(tcp_stack_t *tcps)
133 {
134 	mutex_enter(&tcps->tcps_sigdb_lock);
135 	if (tcps->tcps_sigdb == NULL) {
136 		tcpsig_db_t *db = kmem_alloc(sizeof (tcpsig_db_t), KM_SLEEP);
137 
138 		rw_init(&db->td_lock, NULL, RW_DEFAULT, 0);
139 		list_create(&db->td_salist, sizeof (tcpsig_sa_t),
140 		    offsetof(tcpsig_sa_t, ts_link));
141 
142 		tcps->tcps_sigdb = db;
143 	}
144 	mutex_exit(&tcps->tcps_sigdb_lock);
145 
146 	return ((tcpsig_db_t *)tcps->tcps_sigdb);
147 }
148 
149 static void
150 tcpsig_sa_free(tcpsig_sa_t *sa)
151 {
152 	ASSERT0(sa->ts_refcnt);
153 	mutex_destroy(&sa->ts_lock);
154 	kmem_free(sa->ts_key.sak_key, sa->ts_key.sak_keylen);
155 	kmem_free(sa, sizeof (*sa));
156 }
157 
158 void
159 tcpsig_sa_rele(tcpsig_sa_t *sa)
160 {
161 	mutex_enter(&sa->ts_lock);
162 	VERIFY3U(sa->ts_refcnt, >, 0);
163 	sa->ts_refcnt--;
164 	/*
165 	 * If we are tombstoned (have been marked as deleted) and the reference
166 	 * count has now dropped to zero, then we can go ahead and finally
167 	 * remove this SA from the database.
168 	 */
169 	if (sa->ts_tombstoned && sa->ts_refcnt == 0) {
170 		tcpsig_db_t *db = tcpsig_db(sa->ts_stack);
171 
172 		/*
173 		 * To maintain the required lock ordering, we need to drop the
174 		 * lock on the SA while acquiring the RW lock on the list. Take
175 		 * an additional hold before doing this dance and drop it once
176 		 * we have re-gained the lock.
177 		 */
178 		sa->ts_refcnt++;
179 		mutex_exit(&sa->ts_lock);
180 		rw_enter(&db->td_lock, RW_WRITER);
181 		mutex_enter(&sa->ts_lock);
182 		sa->ts_refcnt--;
183 		mutex_exit(&sa->ts_lock);
184 
185 		list_remove(&db->td_salist, sa);
186 
187 		rw_exit(&db->td_lock);
188 		tcpsig_sa_free(sa);
189 	} else {
190 		mutex_exit(&sa->ts_lock);
191 	}
192 }
193 
194 static bool
195 tcpsig_sa_match4(tcpsig_sa_t *sa, struct sockaddr_storage *src_s,
196     struct sockaddr_storage *dst_s)
197 {
198 	sin_t msrc, mdst, *src, *dst, *sasrc, *sadst;
199 
200 	if (src_s->ss_family != AF_INET)
201 		return (false);
202 
203 	src = (sin_t *)src_s;
204 	dst = (sin_t *)dst_s;
205 
206 	if (sa->ts_family == AF_INET6) {
207 		sin6_t *sasrc6 = (sin6_t *)&sa->ts_src;
208 		sin6_t *sadst6 = (sin6_t *)&sa->ts_dst;
209 
210 		if (!IN6_IS_ADDR_V4MAPPED(&sasrc6->sin6_addr) ||
211 		    !IN6_IS_ADDR_V4MAPPED(&sadst6->sin6_addr)) {
212 			return (false);
213 		}
214 
215 		msrc = sin_null;
216 		msrc.sin_family = AF_INET;
217 		msrc.sin_port = sasrc6->sin6_port;
218 		IN6_V4MAPPED_TO_INADDR(&sasrc6->sin6_addr, &msrc.sin_addr);
219 		sasrc = &msrc;
220 
221 		mdst = sin_null;
222 		mdst.sin_family = AF_INET;
223 		mdst.sin_port = sadst6->sin6_port;
224 		IN6_V4MAPPED_TO_INADDR(&sadst6->sin6_addr, &mdst.sin_addr);
225 		sadst = &mdst;
226 	} else {
227 		sasrc = (sin_t *)&sa->ts_src;
228 		sadst = (sin_t *)&sa->ts_dst;
229 	}
230 
231 	if (sasrc->sin_port != 0 && sasrc->sin_port != src->sin_port)
232 		return (false);
233 	if (sadst->sin_port != 0 && sadst->sin_port != dst->sin_port)
234 		return (false);
235 
236 	if (sasrc->sin_addr.s_addr != src->sin_addr.s_addr)
237 		return (false);
238 	if (sadst->sin_addr.s_addr != dst->sin_addr.s_addr)
239 		return (false);
240 
241 	return (true);
242 }
243 
244 static bool
245 tcpsig_sa_match6(tcpsig_sa_t *sa, struct sockaddr_storage *src_s,
246     struct sockaddr_storage *dst_s)
247 {
248 	sin6_t *src, *dst, *sasrc, *sadst;
249 
250 	if (src_s->ss_family != AF_INET6 || sa->ts_src.ss_family != AF_INET6)
251 		return (false);
252 
253 	src = (sin6_t *)src_s;
254 	dst = (sin6_t *)dst_s;
255 
256 	sasrc = (sin6_t *)&sa->ts_src;
257 	sadst = (sin6_t *)&sa->ts_dst;
258 
259 	if (sasrc->sin6_port != 0 && sasrc->sin6_port != src->sin6_port)
260 		return (false);
261 	if (sadst->sin6_port != 0 && sadst->sin6_port != dst->sin6_port)
262 		return (false);
263 
264 	if (!IN6_ARE_ADDR_EQUAL(&sasrc->sin6_addr, &src->sin6_addr))
265 		return (false);
266 	if (!IN6_ARE_ADDR_EQUAL(&sadst->sin6_addr, &dst->sin6_addr))
267 		return (false);
268 
269 	return (true);
270 }
271 
272 static tcpsig_sa_t *
273 tcpsig_sa_find_held(struct sockaddr_storage *src, struct sockaddr_storage *dst,
274     tcp_stack_t *tcps)
275 {
276 	tcpsig_db_t *db = tcpsig_db(tcps);
277 	tcpsig_sa_t *sa = NULL;
278 
279 	ASSERT(RW_LOCK_HELD(&db->td_lock));
280 
281 	if (src->ss_family != dst->ss_family)
282 		return (NULL);
283 
284 	for (sa = list_head(&db->td_salist); sa != NULL;
285 	    sa = list_next(&db->td_salist, sa)) {
286 		mutex_enter(&sa->ts_lock);
287 		/* We don't consider tombstoned entries as a possible match */
288 		if (sa->ts_tombstoned) {
289 			mutex_exit(&sa->ts_lock);
290 			continue;
291 		}
292 		if (tcpsig_sa_match4(sa, src, dst) ||
293 		    tcpsig_sa_match6(sa, src, dst)) {
294 			sa->ts_refcnt++;
295 			mutex_exit(&sa->ts_lock);
296 			break;
297 		}
298 		mutex_exit(&sa->ts_lock);
299 	}
300 
301 	return (sa);
302 }
303 
304 static tcpsig_sa_t *
305 tcpsig_sa_find(struct sockaddr_storage *src, struct sockaddr_storage *dst,
306     tcp_stack_t *tcps)
307 {
308 	tcpsig_db_t *db = tcpsig_db(tcps);
309 	tcpsig_sa_t *sa;
310 
311 	rw_enter(&db->td_lock, RW_READER);
312 	sa = tcpsig_sa_find_held(src, dst, tcps);
313 	rw_exit(&db->td_lock);
314 
315 	return (sa);
316 }
317 
318 static int
319 tcpsig_sa_flush(keysock_t *ks, tcp_stack_t *tcps, int *diagp)
320 {
321 	tcpsig_db_t *db = tcpsig_db(tcps);
322 	tcpsig_sa_t *nextsa;
323 
324 	rw_enter(&db->td_lock, RW_WRITER);
325 	nextsa = list_head(&db->td_salist);
326 	while (nextsa != NULL) {
327 		tcpsig_sa_t *sa = nextsa;
328 
329 		nextsa = list_next(&db->td_salist, sa);
330 
331 		mutex_enter(&sa->ts_lock);
332 		if (sa->ts_refcnt > 0) {
333 			sa->ts_tombstoned = true;
334 			mutex_exit(&sa->ts_lock);
335 			continue;
336 		}
337 
338 		list_remove(&db->td_salist, sa);
339 
340 		mutex_exit(&sa->ts_lock);
341 		tcpsig_sa_free(sa);
342 	}
343 
344 	rw_exit(&db->td_lock);
345 
346 	return (0);
347 }
348 
349 static int
350 tcpsig_sa_add(keysock_t *ks, tcp_stack_t *tcps, keysock_in_t *ksi,
351     sadb_ext_t **extv, int *diagp)
352 {
353 	tcpsig_db_t *db;
354 	sadb_address_t *srcext, *dstext;
355 	sadb_sa_t *assoc;
356 	struct sockaddr_storage *src, *dst;
357 	sadb_key_t *key;
358 	tcpsig_sa_t *sa, *dupsa;
359 	int ret = 0;
360 
361 	assoc = (sadb_sa_t *)extv[SADB_EXT_SA];
362 	srcext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_SRC];
363 	dstext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_DST];
364 	key = (sadb_key_t *)extv[SADB_X_EXT_STR_AUTH];
365 
366 	if (assoc == NULL) {
367 		*diagp = SADB_X_DIAGNOSTIC_MISSING_SA;
368 		return (EINVAL);
369 	}
370 
371 	if (srcext == NULL) {
372 		*diagp = SADB_X_DIAGNOSTIC_MISSING_SRC;
373 		return (EINVAL);
374 	}
375 
376 	if (dstext == NULL) {
377 		*diagp = SADB_X_DIAGNOSTIC_MISSING_DST;
378 		return (EINVAL);
379 	}
380 
381 	if (key == NULL) {
382 		*diagp = SADB_X_DIAGNOSTIC_MISSING_ASTR;
383 		return (EINVAL);
384 	}
385 
386 	src = (struct sockaddr_storage *)(srcext + 1);
387 	dst = (struct sockaddr_storage *)(dstext + 1);
388 
389 	if (src->ss_family != dst->ss_family) {
390 		*diagp = SADB_X_DIAGNOSTIC_AF_MISMATCH;
391 		return (EINVAL);
392 	}
393 
394 	if (src->ss_family != AF_INET && src->ss_family != AF_INET6) {
395 		*diagp = SADB_X_DIAGNOSTIC_BAD_SRC_AF;
396 		return (EINVAL);
397 	}
398 
399 	/* We only support MD5 */
400 	if (assoc->sadb_sa_auth != SADB_AALG_MD5) {
401 		*diagp = SADB_X_DIAGNOSTIC_BAD_AALG;
402 		return (EINVAL);
403 	}
404 
405 	/* The authentication key length must be a multiple of whole bytes */
406 	if ((key->sadb_key_bits & 0x7) != 0) {
407 		*diagp = SADB_X_DIAGNOSTIC_MALFORMED_AKEY;
408 		return (EINVAL);
409 	}
410 
411 	db = tcpsig_db(tcps);
412 
413 	sa = kmem_zalloc(sizeof (*sa), KM_NOSLEEP_LAZY);
414 	if (sa == NULL)
415 		return (ENOMEM);
416 
417 	sa->ts_stack = tcps;
418 	sa->ts_family = src->ss_family;
419 	if (sa->ts_family == AF_INET6) {
420 		bcopy(src, (sin6_t *)&sa->ts_src, sizeof (sin6_t));
421 		bcopy(dst, (sin6_t *)&sa->ts_dst, sizeof (sin6_t));
422 	} else {
423 		bcopy(src, (sin_t *)&sa->ts_src, sizeof (sin_t));
424 		bcopy(dst, (sin_t *)&sa->ts_dst, sizeof (sin_t));
425 	}
426 
427 	sa->ts_key.sak_algid = assoc->sadb_sa_auth;
428 	sa->ts_key.sak_keylen = SADB_1TO8(key->sadb_key_bits);
429 	sa->ts_key.sak_keybits = key->sadb_key_bits;
430 
431 	sa->ts_key.sak_key = kmem_alloc(sa->ts_key.sak_keylen,
432 	    KM_NOSLEEP_LAZY);
433 	if (sa->ts_key.sak_key == NULL) {
434 		kmem_free(sa, sizeof (*sa));
435 		return (ENOMEM);
436 	}
437 	bcopy(key + 1, sa->ts_key.sak_key, sa->ts_key.sak_keylen);
438 	bzero(key + 1, sa->ts_key.sak_keylen);
439 
440 	mutex_init(&sa->ts_lock, NULL, MUTEX_DEFAULT, NULL);
441 	sa->ts_refcnt = 0;
442 	sa->ts_tombstoned = false;
443 
444 	rw_enter(&db->td_lock, RW_WRITER);
445 	if ((dupsa = tcpsig_sa_find_held(src, dst, tcps)) != NULL) {
446 		rw_exit(&db->td_lock);
447 		tcpsig_sa_rele(dupsa);
448 		tcpsig_sa_free(sa);
449 		*diagp = SADB_X_DIAGNOSTIC_DUPLICATE_SA;
450 		ret = EEXIST;
451 	} else {
452 		list_insert_tail(&db->td_salist, sa);
453 		rw_exit(&db->td_lock);
454 	}
455 
456 	return (ret);
457 }
458 
459 static uint8_t *
460 tcpsig_make_addr_ext(uint8_t *start, uint8_t *end, uint16_t exttype,
461     sa_family_t af, struct sockaddr_storage *addr)
462 {
463 	uint8_t *cur = start;
464 	unsigned int addrext_len;
465 	sadb_address_t *addrext	= (sadb_address_t *)cur;
466 
467 	if (cur == NULL)
468 		return (NULL);
469 
470 	cur += sizeof (*addrext);
471 	if (cur > end)
472 		return (NULL);
473 
474 	addrext->sadb_address_proto = IPPROTO_TCP;
475 	addrext->sadb_address_reserved = 0;
476 	addrext->sadb_address_prefixlen = 0;
477 	addrext->sadb_address_exttype = exttype;
478 
479 	ASSERT(af == AF_INET || af == AF_INET6);
480 	if (af == AF_INET) {
481 		sin_t *sin = (sin_t *)cur;
482 
483 		cur += sizeof (*sin);
484 		if (cur > end)
485 			return (NULL);
486 
487 		*sin = sin_null;
488 		bcopy(addr, sin, sizeof (*sin));
489 	} else {
490 		sin6_t *sin6 = (sin6_t *)cur;
491 
492 		cur += sizeof (*sin6);
493 		if (cur > end)
494 			return (NULL);
495 
496 		*sin6 = sin6_null;
497 		bcopy(addr, sin6, sizeof (*sin6));
498 	}
499 
500 	addrext_len = roundup(cur - start, sizeof (uint64_t));
501 	addrext->sadb_address_len = SADB_8TO64(addrext_len);
502 
503 	cur = start + addrext_len;
504 	if (cur > end)
505 		cur = NULL;
506 
507 	return (cur);
508 }
509 
510 static mblk_t *
511 tcpsig_dump_one(tcpsig_sa_t *sa, sadb_msg_t *samsg)
512 {
513 	size_t alloclen, addrsize, keysize;
514 	sadb_sa_t *assoc;
515 	sadb_msg_t *newsamsg;
516 	uint8_t *cur, *end;
517 	sadb_key_t *key;
518 	mblk_t *mp;
519 
520 	alloclen = sizeof (sadb_msg_t) + sizeof (sadb_sa_t);
521 
522 	switch (sa->ts_family) {
523 	case AF_INET:
524 		addrsize = roundup(sizeof (sin_t) +
525 		    sizeof (sadb_address_t), sizeof (uint64_t));
526 		break;
527 	case AF_INET6:
528 		addrsize = roundup(sizeof (sin6_t) +
529 		    sizeof (sadb_address_t), sizeof (uint64_t));
530 		break;
531 	}
532 	keysize = roundup(sizeof (sadb_key_t) + sa->ts_key.sak_keylen,
533 	    sizeof (uint64_t));
534 
535 	alloclen += addrsize * 2 + keysize;
536 
537 	mp = allocb(alloclen, BPRI_HI);
538 	if (mp == NULL)
539 		return (NULL);
540 
541 	bzero(mp->b_rptr, alloclen);
542 	mp->b_wptr += alloclen;
543 	end = mp->b_wptr;
544 
545 	newsamsg = (sadb_msg_t *)mp->b_rptr;
546 	*newsamsg = *samsg;
547 	newsamsg->sadb_msg_len = (uint16_t)SADB_8TO64(alloclen);
548 
549 	assoc = (sadb_sa_t *)(newsamsg + 1);
550 	assoc->sadb_sa_exttype = SADB_EXT_SA;
551 	assoc->sadb_sa_len = SADB_8TO64(sizeof (*assoc));
552 	assoc->sadb_sa_auth = sa->ts_key.sak_algid;
553 	assoc->sadb_sa_flags = SADB_X_SAFLAGS_TCPSIG;
554 	assoc->sadb_sa_state = IPSA_STATE_MATURE;
555 
556 	cur = (uint8_t *)(assoc + 1);
557 	cur = tcpsig_make_addr_ext(cur, end, SADB_EXT_ADDRESS_SRC,
558 	    sa->ts_family, &sa->ts_src);
559 	cur = tcpsig_make_addr_ext(cur, end, SADB_EXT_ADDRESS_DST,
560 	    sa->ts_family, &sa->ts_dst);
561 
562 	if (cur == NULL)
563 		return (NULL);
564 
565 	key = (sadb_key_t *)cur;
566 	key->sadb_key_exttype = SADB_X_EXT_STR_AUTH;
567 	key->sadb_key_len = SADB_8TO64(keysize);
568 	key->sadb_key_bits = sa->ts_key.sak_keybits;
569 	key->sadb_key_reserved = 0;
570 	bcopy(sa->ts_key.sak_key, (uint8_t *)(key + 1), sa->ts_key.sak_keylen);
571 
572 	return (mp);
573 }
574 
575 static int
576 tcpsig_sa_dump(keysock_t *ks, tcp_stack_t *tcps, sadb_msg_t *samsg, int *diag)
577 {
578 	tcpsig_db_t *db;
579 	tcpsig_sa_t *sa;
580 
581 	db = tcpsig_db(tcps);
582 	rw_enter(&db->td_lock, RW_READER);
583 
584 	for (sa = list_head(&db->td_salist); sa != NULL;
585 	    sa = list_next(&db->td_salist, sa)) {
586 		mblk_t *mp;
587 
588 		mutex_enter(&sa->ts_lock);
589 		if (sa->ts_tombstoned) {
590 			mutex_exit(&sa->ts_lock);
591 			continue;
592 		}
593 		mutex_exit(&sa->ts_lock);
594 
595 		mp = tcpsig_dump_one(sa, samsg);
596 		if (mp == NULL) {
597 			rw_exit(&db->td_lock);
598 			return (ENOMEM);
599 		}
600 		keysock_passup(mp, (sadb_msg_t *)mp->b_rptr,
601 		    ks->keysock_serial, NULL, B_TRUE, ks->keysock_keystack);
602 	}
603 
604 	rw_exit(&db->td_lock);
605 
606 	/* A sequence number of 0 indicates the end of the list */
607 	samsg->sadb_msg_seq = 0;
608 
609 	return (0);
610 }
611 
612 static int
613 tcpsig_sa_delget(keysock_t *ks, tcp_stack_t *tcps, sadb_msg_t *samsg,
614     sadb_ext_t **extv, int *diagp)
615 {
616 	sadb_address_t *srcext, *dstext;
617 	struct sockaddr_storage *src, *dst;
618 	tcpsig_sa_t *sa;
619 	mblk_t *mp;
620 
621 	srcext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_SRC];
622 	dstext = (sadb_address_t *)extv[SADB_EXT_ADDRESS_DST];
623 
624 	if (srcext == NULL) {
625 		*diagp = SADB_X_DIAGNOSTIC_MISSING_SRC;
626 		return (EINVAL);
627 	}
628 
629 	if (dstext == NULL) {
630 		*diagp = SADB_X_DIAGNOSTIC_MISSING_DST;
631 		return (EINVAL);
632 	}
633 
634 	src = (struct sockaddr_storage *)(srcext + 1);
635 	dst = (struct sockaddr_storage *)(dstext + 1);
636 
637 	sa = tcpsig_sa_find(src, dst, tcps);
638 
639 	if (sa == NULL) {
640 		*diagp = SADB_X_DIAGNOSTIC_PAIR_SA_NOTFOUND;
641 		return (ESRCH);
642 	}
643 
644 	if (samsg->sadb_msg_type == SADB_GET) {
645 		mp = tcpsig_dump_one(sa, samsg);
646 
647 		if (mp == NULL)
648 			return (ENOMEM);
649 		keysock_passup(mp, (sadb_msg_t *)mp->b_rptr,
650 		    ks->keysock_serial, NULL, B_TRUE, ks->keysock_keystack);
651 		tcpsig_sa_rele(sa);
652 
653 		return (0);
654 	}
655 
656 	/*
657 	 * Delete the entry.
658 	 * At this point we still have a hold on the entry from the find call
659 	 * above, so mark it as tombstoned and then release the hold. If
660 	 * that causes the reference count to become 0, the entry will be
661 	 * removed from the database.
662 	 */
663 
664 	mutex_enter(&sa->ts_lock);
665 	sa->ts_tombstoned = true;
666 	mutex_exit(&sa->ts_lock);
667 	tcpsig_sa_rele(sa);
668 
669 	return (0);
670 }
671 
672 void
673 tcpsig_sa_handler(keysock_t *ks, mblk_t *mp, sadb_msg_t *samsg,
674     sadb_ext_t **extv)
675 {
676 	keysock_stack_t *keystack = ks->keysock_keystack;
677 	netstack_t *nst = keystack->keystack_netstack;
678 	tcp_stack_t *tcps = nst->netstack_tcp;
679 	keysock_in_t *ksi = (keysock_in_t *)mp->b_rptr;
680 	int diag = SADB_X_DIAGNOSTIC_NONE;
681 	int error;
682 
683 	switch (samsg->sadb_msg_type) {
684 	case SADB_ADD:
685 		error = tcpsig_sa_add(ks, tcps, ksi, extv, &diag);
686 		keysock_error(ks, mp, error, diag);
687 		break;
688 	case SADB_GET:
689 	case SADB_DELETE:
690 		error = tcpsig_sa_delget(ks, tcps, samsg, extv, &diag);
691 		keysock_error(ks, mp, error, diag);
692 		break;
693 	case SADB_FLUSH:
694 		error = tcpsig_sa_flush(ks, tcps, &diag);
695 		keysock_error(ks, mp, error, diag);
696 		break;
697 	case SADB_DUMP:
698 		error = tcpsig_sa_dump(ks, tcps, samsg, &diag);
699 		keysock_error(ks, mp, error, diag);
700 		break;
701 	default:
702 		keysock_error(ks, mp, EOPNOTSUPP, diag);
703 		break;
704 	}
705 }
706 
707 static void
708 tcpsig_pseudo_compute4(tcp_t *tcp, int tcplen, MD5_CTX *ctx, bool inbound)
709 {
710 	struct ip_pseudo {
711 		struct in_addr	ipp_src;
712 		struct in_addr	ipp_dst;
713 		uint8_t		ipp_pad;
714 		uint8_t		ipp_proto;
715 		uint16_t	ipp_len;
716 	} ipp;
717 	conn_t *connp = tcp->tcp_connp;
718 
719 	if (inbound) {
720 		ipp.ipp_src.s_addr = connp->conn_faddr_v4;
721 		ipp.ipp_dst.s_addr = connp->conn_saddr_v4;
722 	} else {
723 		ipp.ipp_src.s_addr = connp->conn_saddr_v4;
724 		ipp.ipp_dst.s_addr = connp->conn_faddr_v4;
725 	}
726 	ipp.ipp_pad = 0;
727 	ipp.ipp_proto = IPPROTO_TCP;
728 	ipp.ipp_len = htons(tcplen);
729 
730 	DTRACE_PROBE1(ipp4, struct ip_pseudo *, &ipp);
731 
732 	MD5Update(ctx, (char *)&ipp, sizeof (ipp));
733 }
734 
735 static void
736 tcpsig_pseudo_compute6(tcp_t *tcp, int tcplen, MD5_CTX *ctx, bool inbound)
737 {
738 	struct ip6_pseudo {
739 		struct in6_addr	ipp_src;
740 		struct in6_addr ipp_dst;
741 		uint32_t	ipp_len;
742 		uint32_t	ipp_nxt;
743 	} ip6p;
744 	conn_t *connp = tcp->tcp_connp;
745 
746 	if (inbound) {
747 		ip6p.ipp_src = connp->conn_faddr_v6;
748 		ip6p.ipp_dst = connp->conn_saddr_v6;
749 	} else {
750 		ip6p.ipp_src = connp->conn_saddr_v6;
751 		ip6p.ipp_dst = connp->conn_faddr_v6;
752 	}
753 	ip6p.ipp_len = htonl(tcplen);
754 	ip6p.ipp_nxt = htonl(IPPROTO_TCP);
755 
756 	DTRACE_PROBE1(ipp6, struct ip6_pseudo *, &ip6p);
757 
758 	MD5Update(ctx, (char *)&ip6p, sizeof (ip6p));
759 }
760 
761 bool
762 tcpsig_signature(mblk_t *mp, tcp_t *tcp, tcpha_t *tcpha, int tcplen,
763     uint8_t *digest, bool inbound)
764 {
765 	tcp_stack_t *tcps = tcp->tcp_tcps;
766 	conn_t *connp = tcp->tcp_connp;
767 	tcpsig_sa_t *sa;
768 	MD5_CTX context;
769 
770 	/*
771 	 * The TCP_MD5SIG option is 20 bytes, including padding, which adds 5
772 	 * 32-bit words to the header's 4-bit field. Check that it can fit in
773 	 * the current packet.
774 	 */
775 	if (!inbound && (tcpha->tha_offset_and_reserved >> 4) > 10) {
776 		TCP_STAT(tcps, tcp_sig_no_space);
777 		return (false);
778 	}
779 
780 	sa = inbound ? tcp->tcp_sig_sa_in : tcp->tcp_sig_sa_out;
781 	if (sa == NULL) {
782 		struct sockaddr_storage src, dst;
783 
784 		bzero(&src, sizeof (src));
785 		bzero(&dst, sizeof (dst));
786 
787 		if (connp->conn_ipversion == IPV6_VERSION) {
788 			sin6_t *sin6;
789 
790 			sin6 = (sin6_t *)&src;
791 			sin6->sin6_family = AF_INET6;
792 			if (inbound) {
793 				sin6->sin6_addr = connp->conn_faddr_v6;
794 				sin6->sin6_port = connp->conn_fport;
795 			} else {
796 				sin6->sin6_addr = connp->conn_saddr_v6;
797 				sin6->sin6_port = connp->conn_lport;
798 			}
799 
800 			sin6 = (sin6_t *)&dst;
801 			sin6->sin6_family = AF_INET6;
802 			if (inbound) {
803 				sin6->sin6_addr = connp->conn_saddr_v6;
804 				sin6->sin6_port = connp->conn_lport;
805 			} else {
806 				sin6->sin6_addr = connp->conn_faddr_v6;
807 				sin6->sin6_port = connp->conn_fport;
808 			}
809 		} else {
810 			sin_t *sin;
811 
812 			sin = (sin_t *)&src;
813 			sin->sin_family = AF_INET;
814 			if (inbound) {
815 				sin->sin_addr.s_addr = connp->conn_faddr_v4;
816 				sin->sin_port = connp->conn_fport;
817 			} else {
818 				sin->sin_addr.s_addr = connp->conn_saddr_v4;
819 				sin->sin_port = connp->conn_lport;
820 			}
821 
822 			sin = (sin_t *)&dst;
823 			sin->sin_family = AF_INET;
824 			if (inbound) {
825 				sin->sin_addr.s_addr = connp->conn_saddr_v4;
826 				sin->sin_port = connp->conn_lport;
827 			} else {
828 				sin->sin_addr.s_addr = connp->conn_faddr_v4;
829 				sin->sin_port = connp->conn_fport;
830 			}
831 		}
832 
833 		sa = tcpsig_sa_find(&src, &dst, tcps);
834 
835 		if (sa == NULL) {
836 			TCP_STAT(tcps, tcp_sig_match_failed);
837 			return (false);
838 		}
839 
840 		/*
841 		 * tcpsig_sa_find() returns a held SA, so we don't need to take
842 		 * another one before adding it to tcp.
843 		 */
844 		if (inbound)
845 			tcp->tcp_sig_sa_in = sa;
846 		else
847 			tcp->tcp_sig_sa_out = sa;
848 	}
849 
850 	VERIFY3U(sa->ts_key.sak_algid, ==, SADB_AALG_MD5);
851 
852 	/* We have a key for this connection, generate the hash */
853 	MD5Init(&context);
854 
855 	/* TCP pseudo-header */
856 	if (connp->conn_ipversion == IPV6_VERSION)
857 		tcpsig_pseudo_compute6(tcp, tcplen, &context, inbound);
858 	else
859 		tcpsig_pseudo_compute4(tcp, tcplen, &context, inbound);
860 
861 	/* TCP header, excluding options and with a zero checksum */
862 	uint16_t offset = tcpha->tha_offset_and_reserved;
863 	uint16_t sum = tcpha->tha_sum;
864 
865 	if (!inbound) {
866 		/* Account for the MD5 option we are going to add */
867 		tcpha->tha_offset_and_reserved += (5 << 4);
868 	}
869 	tcpha->tha_sum = 0;
870 	MD5Update(&context, tcpha, sizeof (*tcpha));
871 	tcpha->tha_offset_and_reserved = offset;
872 	tcpha->tha_sum = sum;
873 
874 	/* TCP segment data */
875 	for (; mp != NULL; mp = mp->b_cont)
876 		MD5Update(&context, mp->b_rptr, mp->b_wptr - mp->b_rptr);
877 
878 	/* Connection-specific key */
879 	MD5Update(&context, sa->ts_key.sak_key, sa->ts_key.sak_keylen);
880 
881 	MD5Final(digest, &context);
882 
883 	return (true);
884 }
885 
886 bool
887 tcpsig_verify(mblk_t *mp, tcp_t *tcp, tcpha_t *tcpha, ip_recv_attr_t *ira,
888     uint8_t *digest)
889 {
890 	uint8_t calc_digest[MD5_DIGEST_LENGTH];
891 
892 	if (!tcpsig_signature(mp, tcp, tcpha,
893 	    ira->ira_pktlen - ira->ira_ip_hdr_length, calc_digest, true)) {
894 		/* The appropriate stat will already have been bumped */
895 		return (false);
896 	}
897 
898 	if (bcmp(digest, calc_digest, sizeof (calc_digest)) != 0) {
899 		TCP_STAT(tcp->tcp_tcps, tcp_sig_verify_failed);
900 		return (false);
901 	}
902 
903 	return (true);
904 }
905