xref: /illumos-gate/usr/src/uts/common/io/scsi/adapters/iscsi/iscsi_net.c (revision 257873cfc1dd3337766407f80397db60a56f2f5a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  *
25  * iSCSI Software Initiator
26  */
27 
28 #include <sys/socket.h>		/* networking stuff */
29 #include <sys/strsubr.h>	/* networking stuff */
30 #include <netinet/tcp.h>	/* TCP_NODELAY */
31 #include <sys/socketvar.h>	/* _ALLOC_SLEEP */
32 #include <sys/pathname.h>	/* declares:	lookupname */
33 #include <sys/fs/snode.h>	/* defines:	VTOS */
34 #include <sys/fs/dv_node.h>	/* declares:	devfs_lookupname */
35 #include <netinet/in.h>
36 #include "iscsi.h"
37 
38 /*
39  * This is a high level description of the default
40  * iscsi_net transport interfaces.  These are used
41  * to create, send, recv, and close standard TCP/IP
42  * messages.  In addition there are extensions to send
43  * and recv iSCSI PDU data.
44  *
45  * NOTE: It would be very easy for an iSCSI HBA vendor
46  * to register their own functions over the top of
47  * the default interfaces.  This would allow an iSCSI
48  * HBA to use the same iscsiadm management interfaces
49  * and the Solaris iSCSI session / connection management.
50  * The current problem with this approach is we only
51  * allow one one registered transport table.  This
52  * would be pretty easy to correct although will require
53  * additional CLI changes to manage multiple interfaces.
54  * If a vendor can present compelling performance data,
55  * then Sun will be willing to enhance this support for
56  * multiple interface tables and better CLI management.
57  *
58  * The following listing describes the iscsi_net
59  * entry points:
60  *
61  *   socket            - Creates TCP/IP socket connection.  In the
62  *                       default implementation creates a sonode
63  *                       via the sockfs kernel layer.
64  *   bind              - Performs standard TCP/IP BSD operation.  In
65  *                       the default implementation this only act
66  *                       as a soft binding based on the IP and routing
67  *	                 tables.  It would be preferred if this was
68  *	                 a hard binding but that is currently not
69  *	                 possible with Solaris's networking stack.
70  *   connect           - Performs standard TCP/IP BSD operation.  This
71  *                       establishes the TCP SYN to the peer IP address.
72  *   listen            - Performs standard TCP/IP BSD operation.  This
73  *                       listens for incoming peer connections.
74  *   accept            - Performs standard TCP/IP BSD operation.  This
75  *                       accepts incoming peer connections.
76  *   shutdown          - This disconnects the TCP/IP connection while
77  *                       maintaining the resources.
78  *   close             - This disconnects the TCP/IP connection and
79  *                       releases the resources.
80  *
81  *   getsockopt        - Gets socket option for specified socket.
82  *   setsockopt        - Sets socket option for specified socket.
83  *
84  *      The current socket options that are used by the initiator
85  *      are listed below.
86  *
87  *        TCP_CONN_NOTIFY_THRESHOLD
88  *        TCP_CONN_ABORT_THRESHOLD
89  *        TCP_ABORT_THRESHOLD
90  *        TCP_NODELAY
91  *        SO_RCVBUF
92  *        SO_SNDBUF
93  *
94  *   iscsi_net_poll    - Poll socket interface for a specified amount
95  *                       of data.  If data not received in timeout
96  *                       period fail request.
97  *   iscsi_net_sendmsg - Send message on socket connection
98  *   iscsi_net_recvmsg - Receive message on socket connection
99  *
100  *   iscsi_net_sendpdu - Send iSCSI PDU on socket connection
101  *   iscsi_net_recvhdr - Receive iSCSI header on socket connection
102  *   iscsi_net_recvdata - Receive iSCSI data on socket connection
103  *
104  *     The iSCSI interfaces have the below optional flags.
105  *
106  *       ISCSI_NET_HEADER_DIGEST - The interface should either
107  *				generate or validate the iSCSI
108  *				header digest CRC.
109  *       ISCSI_NET_DATA_DIGESt   - The interface should either
110  *                              generate or validate the iSCSI
111  *                              data digest CRC.
112  */
113 
114 
115 /* global */
116 iscsi_network_t *iscsi_net;
117 
118 /* consts */
119 
120 /*
121  * This table is used for quick validation of incoming
122  * iSCSI PDU opcodes.  A value of '0' in the table below
123  * indicated that the opcode is invalid for an iSCSI
124  * initiator to receive.
125  */
126 const int   is_incoming_opcode_invalid[256] = {
127 	/*		0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F */
128 	/* 0x0X */	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
129 	/* 0x1X */	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
130 	/* 0x2X */	0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
131 	/* 0x3X */	1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
132 	/* 0x4X */	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
133 	/* 0x5X */	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
134 	/* 0x6X */	0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
135 	/* 0x7X */	1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
136 	/* 0x8X */	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
137 	/* 0x9X */	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
138 	/* 0xAX */	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
139 	/* 0xBX */	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
140 	/* 0xCX */	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
141 	/* 0xDX */	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
142 	/* 0xEX */	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
143 	/* 0xFX */	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
144 };
145 
146 /* prototypes */
147 static void * iscsi_net_socket(int domain, int type, int protocol);
148 static int iscsi_net_bind(void *socket, struct sockaddr *
149     name, int name_len, int backlog, int flags);
150 static int iscsi_net_connect(void *socket, struct sockaddr *
151     name, int name_len, int fflag, int flags);
152 static int iscsi_net_listen(void *socket, int backlog);
153 static void * iscsi_net_accept(void *socket, struct sockaddr *addr,
154     int *addr_len);
155 static int iscsi_net_getsockname(void *socket);
156 static int iscsi_net_getsockopt(void *socket, int level,
157     int option_name, void *option_val, int *option_len, int flags);
158 static int iscsi_net_setsockopt(void *socket, int level,
159     int option_name, void *option_val, int option_len);
160 static int iscsi_net_shutdown(void *socket, int how);
161 static void iscsi_net_close(void *socket);
162 
163 static size_t iscsi_net_poll(void *socket, clock_t timeout);
164 static size_t iscsi_net_sendmsg(void *socket, struct msghdr *msg);
165 static size_t iscsi_net_recvmsg(void *socket,
166     struct msghdr *msg, int timeout);
167 
168 static iscsi_status_t iscsi_net_sendpdu(void *socket, iscsi_hdr_t *ihp,
169     char *data, int flags);
170 static iscsi_status_t iscsi_net_recvdata(void *socket, iscsi_hdr_t *ihp,
171     char *data, int max_data_length, int timeout, int flags);
172 static iscsi_status_t iscsi_net_recvhdr(void *socket, iscsi_hdr_t *ihp,
173     int header_length, int timeout, int flags);
174 
175 static void iscsi_net_set_preconnect_options(void *socket);
176 static void iscsi_net_set_postconnect_options(void *socket);
177 
178 /*
179  * +--------------------------------------------------------------------+
180  * | network interface registration functions                           |
181  * +--------------------------------------------------------------------+
182  */
183 
184 /*
185  * iscsi_net_init - initialize network interface
186  */
187 void
188 iscsi_net_init()
189 {
190 	iscsi_net = kmem_zalloc(sizeof (*iscsi_net), KM_SLEEP);
191 
192 	iscsi_net->socket	= iscsi_net_socket;
193 
194 	iscsi_net->bind		= iscsi_net_bind;
195 	iscsi_net->connect	= iscsi_net_connect;
196 	iscsi_net->listen	= iscsi_net_listen;
197 	iscsi_net->accept	= iscsi_net_accept;
198 	iscsi_net->shutdown	= iscsi_net_shutdown;
199 	iscsi_net->close	= iscsi_net_close;
200 
201 	iscsi_net->getsockname	= iscsi_net_getsockname;
202 	iscsi_net->getsockopt	= iscsi_net_getsockopt;
203 	iscsi_net->setsockopt	= iscsi_net_setsockopt;
204 
205 	iscsi_net->poll		= iscsi_net_poll;
206 	iscsi_net->sendmsg	= iscsi_net_sendmsg;
207 	iscsi_net->recvmsg	= iscsi_net_recvmsg;
208 
209 	iscsi_net->sendpdu	= iscsi_net_sendpdu;
210 	iscsi_net->recvhdr	= iscsi_net_recvhdr;
211 	iscsi_net->recvdata	= iscsi_net_recvdata;
212 }
213 
214 /*
215  * iscsi_net_fini - release network interface
216  */
217 void
218 iscsi_net_fini()
219 {
220 	kmem_free(iscsi_net, sizeof (*iscsi_net));
221 	iscsi_net = NULL;
222 }
223 
224 
225 /*
226  * iscsi_net_set_preconnect_options -
227  */
228 static void
229 iscsi_net_set_preconnect_options(void *socket)
230 {
231 	int ret = 0;
232 	ret += iscsi_net->setsockopt(socket, IPPROTO_TCP,
233 	    TCP_CONN_NOTIFY_THRESHOLD, (char *)&iscsi_net->tweaks.
234 	    conn_notify_threshold, sizeof (int));
235 	ret += iscsi_net->setsockopt(socket, IPPROTO_TCP,
236 	    TCP_CONN_ABORT_THRESHOLD, (char *)&iscsi_net->tweaks.
237 	    conn_abort_threshold, sizeof (int));
238 	ret += iscsi_net->setsockopt(socket, IPPROTO_TCP, TCP_ABORT_THRESHOLD,
239 	    (char *)&iscsi_net->tweaks.abort_threshold, sizeof (int));
240 	if (ret != 0) {
241 		cmn_err(CE_NOTE, "iscsi connection failed to set socket option"
242 		    "TCP_CONN_NOTIFY_THRESHOLD, TCP_CONN_ABORT_THRESHOLD or "
243 		    "TCP_ABORT_THRESHOLD");
244 	}
245 }
246 
247 /*
248  * iscsi_net_set_postconnect_options -
249  */
250 static void
251 iscsi_net_set_postconnect_options(void *socket)
252 {
253 	int ret = 0;
254 	ret += iscsi_net->setsockopt(socket, IPPROTO_TCP, TCP_NODELAY,
255 	    (char *)&iscsi_net->tweaks.nodelay, sizeof (int));
256 	ret += iscsi_net->setsockopt(socket, SOL_SOCKET, SO_RCVBUF,
257 	    (char *)&iscsi_net->tweaks.rcvbuf, sizeof (int));
258 	ret += iscsi_net->setsockopt(socket, SOL_SOCKET, SO_SNDBUF,
259 	    (char *)&iscsi_net->tweaks.sndbuf, sizeof (int));
260 	if (ret != 0) {
261 		cmn_err(CE_NOTE, "iscsi connection failed to set socket option"
262 		    "TCP_NODELAY, SO_RCVBUF or SO_SNDBUF");
263 	}
264 }
265 
266 
267 /*
268  * +--------------------------------------------------------------------+
269  * | register network interfaces                                        |
270  * +--------------------------------------------------------------------+
271  */
272 
273 /*
274  * iscsi_net_socket - create socket
275  */
276 static void *
277 iscsi_net_socket(int domain, int type, int protocol)
278 {
279 	vnode_t		*dvp		= NULL,
280 	    *vp		= NULL;
281 	struct snode	*csp		= NULL;
282 	int		err		= 0;
283 	major_t		maj;
284 
285 	/* ---- solookup: start ---- */
286 	if ((vp = solookup(domain, type, protocol, NULL, &err)) == NULL) {
287 
288 		/*
289 		 * solookup calls sogetvp if the vp is not found in
290 		 * the cache.  Since the call to sogetvp is hardwired
291 		 * to use USERSPACE and declared static we'll do the
292 		 * work here instead.
293 		 */
294 		err = lookupname(type == SOCK_STREAM ? "/dev/tcp" : "/dev/udp",
295 		    UIO_SYSSPACE, FOLLOW, NULLVPP, &vp);
296 		if (err)
297 			return (NULL);
298 
299 		/* ---- check that it is the correct vnode ---- */
300 		if (vp->v_type != VCHR) {
301 			VN_RELE(vp);
302 			return (NULL);
303 		}
304 
305 		csp = VTOS(VTOS(vp)->s_commonvp);
306 		if (!(csp->s_flag & SDIPSET)) {
307 			char    *pathname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
308 			err = ddi_dev_pathname(vp->v_rdev, S_IFCHR,
309 			    pathname);
310 			if (err == 0) {
311 				err = devfs_lookupname(pathname, NULLVPP,
312 				    &dvp);
313 			}
314 			VN_RELE(vp);
315 			kmem_free(pathname, MAXPATHLEN);
316 			if (err != 0) {
317 				return (NULL);
318 			}
319 			vp = dvp;
320 		}
321 
322 		maj = getmajor(vp->v_rdev);
323 		if (!STREAMSTAB(maj)) {
324 			VN_RELE(vp);
325 			return (NULL);
326 		}
327 	}
328 	/* ---- solookup: end ---- */
329 	return (socreate(vp, domain, type, protocol, SOV_DEFAULT, NULL, &err));
330 }
331 
332 /*
333  * iscsi_net_bind - bind socket to a specific sockaddr
334  */
335 static int
336 iscsi_net_bind(void *socket, struct sockaddr *name, int name_len,
337 	int backlog, int flags)
338 {
339 	return (sobind((struct sonode *)socket, name, name_len,
340 	    backlog, flags));
341 }
342 
343 /*
344  * iscsi_net_connect - connect socket to peer sockaddr
345  */
346 static int
347 iscsi_net_connect(void *socket, struct sockaddr *name, int name_len,
348 	int fflag, int flags)
349 {
350 	int rval;
351 
352 	iscsi_net_set_preconnect_options(socket);
353 	rval = soconnect((struct sonode *)socket, name,
354 	    name_len, fflag, flags);
355 	iscsi_net_set_postconnect_options(socket);
356 
357 	return (rval);
358 }
359 
360 /*
361  * iscsi_net_listen - listen to socket for peer connections
362  */
363 static int
364 iscsi_net_listen(void *socket, int backlog)
365 {
366 	return (solisten((struct sonode *)socket, backlog));
367 }
368 
369 /*
370  * iscsi_net_accept - accept peer socket connections
371  */
372 static void *
373 iscsi_net_accept(void *socket, struct sockaddr *addr, int *addr_len)
374 {
375 	struct sonode *listening_socket;
376 
377 	(void) soaccept((struct sonode *)socket,
378 	    ((struct sonode *)socket)->so_flag,
379 	    &listening_socket);
380 	if (listening_socket != NULL) {
381 		bcopy(listening_socket->so_faddr_sa, addr,
382 		    (socklen_t)listening_socket->so_faddr_len);
383 		*addr_len = listening_socket->so_faddr_len;
384 	} else {
385 		*addr_len = 0;
386 	}
387 
388 	return ((void *)listening_socket);
389 }
390 
391 /*
392  * iscsi_net_getsockname -
393  */
394 static int
395 iscsi_net_getsockname(void *socket)
396 {
397 	return (sogetsockname((struct sonode *)socket));
398 }
399 
400 /*
401  * iscsi_net_getsockopt - get value of option on socket
402  */
403 static int
404 iscsi_net_getsockopt(void *socket, int level, int option_name,
405 	void *option_val, int *option_len, int flags)
406 {
407 	return (sogetsockopt((struct sonode *)socket, level,
408 	    option_name, option_val, (socklen_t *)option_len,
409 	    flags));
410 }
411 
412 /*
413  * iscsi_net_setsockopt - set value for option on socket
414  */
415 static int
416 iscsi_net_setsockopt(void *socket, int level, int option_name,
417 	void *option_val, int option_len)
418 {
419 	return (sosetsockopt((struct sonode *)socket, level,
420 	    option_name, option_val, option_len));
421 }
422 
423 /*
424  * iscsi_net_shutdown - shutdown socket connection
425  */
426 static int
427 iscsi_net_shutdown(void *socket, int how)
428 {
429 	return (soshutdown((struct sonode *)socket, how));
430 }
431 
432 /*
433  * iscsi_net_close - shutdown socket connection and release resources
434  */
435 static void
436 iscsi_net_close(void *socket)
437 {
438 	vnode_t *vp = SOTOV((struct sonode *)socket);
439 	(void) soshutdown((struct sonode *)socket, 2);
440 	(void) VOP_CLOSE(vp, 0, 1, 0, kcred, NULL);
441 	VN_RELE(vp);
442 }
443 
444 /*
445  * iscsi_net_poll - poll socket for data
446  */
447 static size_t
448 iscsi_net_poll(void *socket, clock_t timeout)
449 {
450 	int pflag;
451 	uchar_t pri;
452 	rval_t rval;
453 
454 	pri = 0;
455 	pflag = MSG_ANY;
456 	return (kstrgetmsg(SOTOV((struct sonode *)socket), NULL, NULL,
457 	    &pri, &pflag, timeout, &rval));
458 }
459 
460 /*
461  * iscsi_net_sendmsg - send message on socket
462  */
463 /* ARGSUSED */
464 static size_t
465 iscsi_net_sendmsg(void *socket, struct msghdr *msg)
466 {
467 	int i = 0;
468 	int total_len = 0;
469 	struct uio uio;
470 
471 	/* Initialization of the uio structure. */
472 	bzero(&uio, sizeof (uio));
473 	uio.uio_iov = msg->msg_iov;
474 	uio.uio_iovcnt = msg->msg_iovlen;
475 	uio.uio_segflg  = UIO_SYSSPACE;
476 
477 	for (i = 0; i < msg->msg_iovlen; i++) {
478 		total_len += (msg->msg_iov)[i].iov_len;
479 	}
480 	uio.uio_resid = total_len;
481 
482 	(void) sosendmsg((struct sonode *)socket, msg, &uio);
483 	DTRACE_PROBE2(sosendmsg, size_t, total_len, size_t, uio.uio_resid);
484 	return (total_len - uio.uio_resid);
485 }
486 
487 /*
488  * iscsi_net_recvmsg - receive message on socket
489  */
490 /* ARGSUSED */
491 static size_t
492 iscsi_net_recvmsg(void *socket, struct msghdr *msg, int timeout)
493 {
494 	int		idx;
495 	int		total_len   = 0;
496 	struct uio	uio;
497 	uchar_t		pri	    = 0;
498 	int		prflag	    = MSG_ANY;
499 	rval_t		rval;
500 	struct sonode	*sonode	    = (struct sonode *)socket;
501 
502 	/* Initialization of the uio structure. */
503 	bzero(&uio, sizeof (uio));
504 	uio.uio_iov	    = msg->msg_iov;
505 	uio.uio_iovcnt	    = msg->msg_iovlen;
506 	uio.uio_segflg	    = UIO_SYSSPACE;
507 
508 	for (idx = 0; idx < msg->msg_iovlen; idx++) {
509 		total_len += (msg->msg_iov)[idx].iov_len;
510 	}
511 	uio.uio_resid = total_len;
512 
513 	/* If timeout requested on receive */
514 	if (timeout > 0) {
515 		boolean_t   loopback = B_FALSE;
516 
517 		/* And this isn't a loopback connection */
518 		if (sonode->so_laddr.soa_sa->sa_family == AF_INET) {
519 			struct sockaddr_in *lin =
520 			    (struct sockaddr_in *)sonode->so_laddr.soa_sa;
521 			struct sockaddr_in *fin =
522 			    (struct sockaddr_in *)sonode->so_faddr.soa_sa;
523 
524 			if ((lin->sin_family == fin->sin_family) &&
525 			    (bcmp(&lin->sin_addr, &fin->sin_addr,
526 			    sizeof (struct in_addr)) == 0)) {
527 				loopback = B_TRUE;
528 			}
529 		} else {
530 			struct sockaddr_in6 *lin6 =
531 			    (struct sockaddr_in6 *)sonode->so_laddr.soa_sa;
532 			struct sockaddr_in6 *fin6 =
533 			    (struct sockaddr_in6 *)sonode->so_faddr.soa_sa;
534 
535 			if ((lin6->sin6_family == fin6->sin6_family) &&
536 			    (bcmp(&lin6->sin6_addr, &fin6->sin6_addr,
537 			    sizeof (struct in6_addr)) == 0)) {
538 				loopback = B_TRUE;
539 			}
540 		}
541 
542 		if (loopback == B_FALSE) {
543 			/*
544 			 * Then poll device for up to the timeout
545 			 * period or the requested data is received.
546 			 */
547 			if (kstrgetmsg(SOTOV(sonode),
548 			    NULL, NULL, &pri, &prflag, timeout * 1000,
549 			    &rval) == ETIME) {
550 				return (0);
551 			}
552 		}
553 	}
554 
555 	/*
556 	 * Receive the requested data.  Block until all
557 	 * data is received.
558 	 *
559 	 * resid occurs only when the connection is
560 	 * disconnected.  In that case it will return
561 	 * the amount of data that was not received.
562 	 * In general this is the total amount we
563 	 * requested.
564 	 */
565 	(void) sorecvmsg((struct sonode *)socket, msg, &uio);
566 	DTRACE_PROBE2(sorecvmsg, size_t, total_len, size_t, uio.uio_resid);
567 	return (total_len - uio.uio_resid);
568 }
569 
570 /*
571  * iscsi_net_sendpdu - send iscsi pdu on socket
572  */
573 static iscsi_status_t
574 iscsi_net_sendpdu(void *socket, iscsi_hdr_t *ihp, char *data, int flags)
575 {
576 	uint32_t	pad;
577 	uint32_t	crc_hdr;
578 	uint32_t	crc_data;
579 	uint32_t	pad_len;
580 	uint32_t	data_len;
581 	iovec_t		iovec[ISCSI_MAX_IOVEC];
582 	int		iovlen = 0;
583 	size_t		total_len = 0;
584 	size_t		send_len;
585 	struct msghdr	msg;
586 
587 	ASSERT(socket != NULL);
588 	ASSERT(ihp != NULL);
589 
590 	/*
591 	 * Let's send the header first.  'hlength' is in 32-bit
592 	 * quantities, so we need to multiply by four to get bytes
593 	 */
594 	ASSERT(iovlen < ISCSI_MAX_IOVEC);
595 	iovec[iovlen].iov_base = (void *)ihp;
596 	iovec[iovlen].iov_len  = sizeof (*ihp) + ihp->hlength * 4;
597 	total_len += sizeof (*ihp) + ihp->hlength * 4;
598 	iovlen++;
599 
600 	/* Let's transmit the header digest if we have to. */
601 	if ((flags & ISCSI_NET_HEADER_DIGEST) != 0) {
602 		ASSERT(iovlen < ISCSI_MAX_IOVEC);
603 		/*
604 		 * Converting the calculated CRC via htonl is not
605 		 * necessary because iscsi_crc32c calculates
606 		 * the value as it expects to be written
607 		 */
608 		crc_hdr = iscsi_crc32c((char *)ihp,
609 		    sizeof (iscsi_hdr_t) + ihp->hlength * 4);
610 
611 		iovec[iovlen].iov_base = (void *)&crc_hdr;
612 		iovec[iovlen].iov_len  = sizeof (crc_hdr);
613 		total_len += sizeof (crc_hdr);
614 		iovlen++;
615 	}
616 
617 	/* Let's transmit the data if any. */
618 	data_len = ntoh24(ihp->dlength);
619 
620 	if (data_len) {
621 
622 		ASSERT(iovlen < ISCSI_MAX_IOVEC);
623 		iovec[iovlen].iov_base = (void *)data;
624 		iovec[iovlen].iov_len  = data_len;
625 		total_len += data_len;
626 		iovlen++;
627 
628 		pad_len = ((ISCSI_PAD_WORD_LEN -
629 		    (data_len & (ISCSI_PAD_WORD_LEN - 1))) &
630 		    (ISCSI_PAD_WORD_LEN - 1));
631 
632 		/* Let's transmit the data pad if any. */
633 		if (pad_len) {
634 
635 			ASSERT(iovlen < ISCSI_MAX_IOVEC);
636 			pad = 0;
637 			iovec[iovlen].iov_base = (void *)&pad;
638 			iovec[iovlen].iov_len  = pad_len;
639 			total_len += pad_len;
640 			iovlen++;
641 		}
642 
643 		/* Let's transmit the data digest if we have to. */
644 		if ((flags & ISCSI_NET_DATA_DIGEST) != 0) {
645 
646 			ASSERT(iovlen < ISCSI_MAX_IOVEC);
647 			/*
648 			 * Converting the calculated CRC via htonl is not
649 			 * necessary because iscsi_crc32c calculates the
650 			 * value as it expects to be written
651 			 */
652 			crc_data = iscsi_crc32c(data, data_len);
653 			crc_data = iscsi_crc32c_continued(
654 			    (char *)&pad, pad_len, crc_data);
655 
656 			iovec[iovlen].iov_base = (void *)&crc_data;
657 			iovec[iovlen].iov_len  = sizeof (crc_data);
658 			total_len += sizeof (crc_data);
659 			iovlen++;
660 		}
661 	}
662 
663 	DTRACE_PROBE4(tx, void *, socket, iovec_t *, &iovec[0],
664 	    int, iovlen, int, total_len);
665 
666 	/* Initialization of the message header. */
667 	bzero(&msg, sizeof (msg));
668 	msg.msg_iov	= &iovec[0];
669 	msg.msg_flags	= MSG_WAITALL;
670 	msg.msg_iovlen	= iovlen;
671 
672 	send_len = iscsi_net->sendmsg((struct sonode *)socket, &msg);
673 	DTRACE_PROBE2(sendmsg, size_t, total_len, size_t, send_len);
674 	if (total_len != send_len) {
675 		return (ISCSI_STATUS_TCP_TX_ERROR);
676 	}
677 	return (ISCSI_STATUS_SUCCESS);
678 }
679 
680 /*
681  * iscsi_net_recvhdr - receive iscsi hdr on socket
682  */
683 static iscsi_status_t
684 iscsi_net_recvhdr(void *socket, iscsi_hdr_t *ihp, int header_length,
685     int timeout, int flags)
686 {
687 	iovec_t		    iov[ISCSI_MAX_IOVEC];
688 	int		    iovlen		= 1;
689 	int		    total_len		= 0;
690 	uint32_t	    crc_actual		= 0;
691 	uint32_t	    crc_calculated	= 0;
692 	char		    *adhdr		= NULL;
693 	int		    adhdr_length	= 0;
694 	struct msghdr	    msg;
695 	size_t		    recv_len;
696 
697 	ASSERT(socket != NULL);
698 	ASSERT(ihp != NULL);
699 
700 	if (header_length < sizeof (iscsi_hdr_t)) {
701 		ASSERT(FALSE);
702 		return (ISCSI_STATUS_INTERNAL_ERROR);
703 	}
704 
705 	/*
706 	 * Receive primary header
707 	 */
708 	iov[0].iov_base = (char *)ihp;
709 	iov[0].iov_len = sizeof (iscsi_hdr_t);
710 
711 	bzero(&msg, sizeof (msg));
712 	msg.msg_iov	= iov;
713 	msg.msg_flags	= MSG_WAITALL;
714 	msg.msg_iovlen	= iovlen;
715 
716 	recv_len = iscsi_net->recvmsg(socket, &msg, timeout);
717 	if (recv_len != sizeof (iscsi_hdr_t)) {
718 		return (ISCSI_STATUS_TCP_RX_ERROR);
719 	}
720 
721 	DTRACE_PROBE2(rx_hdr, void *, socket, iovec_t *iop, &iov[0]);
722 
723 	/* verify incoming opcode is a valid operation */
724 	if (is_incoming_opcode_invalid[ihp->opcode]) {
725 		cmn_err(CE_WARN, "iscsi connection(%p) protocol error - "
726 		    "received an unsupported opcode:0x%02x",
727 		    socket, ihp->opcode);
728 		return (ISCSI_STATUS_PROTOCOL_ERROR);
729 	}
730 
731 	/*
732 	 * Setup receipt of additional header
733 	 */
734 	if (ihp->hlength > 0) {
735 		adhdr = ((char *)ihp) + sizeof (iscsi_hdr_t);
736 		adhdr_length = header_length - sizeof (iscsi_hdr_t);
737 		/* make sure enough space is available for adhdr */
738 		if (ihp->hlength > adhdr_length) {
739 			ASSERT(FALSE);
740 			return (ISCSI_STATUS_INTERNAL_ERROR);
741 		}
742 
743 		ASSERT(iovlen < ISCSI_MAX_IOVEC);
744 		iov[iovlen].iov_base = adhdr;
745 		iov[iovlen].iov_len = adhdr_length;
746 		total_len += adhdr_length;
747 		iovlen++;
748 	}
749 
750 	/*
751 	 * Setup receipt of header digest if enabled and connection
752 	 * is in full feature mode.
753 	 */
754 	if ((flags & ISCSI_NET_HEADER_DIGEST) != 0) {
755 		ASSERT(iovlen < ISCSI_MAX_IOVEC);
756 		iov[iovlen].iov_base = (char *)&crc_actual;
757 		iov[iovlen].iov_len = sizeof (uint32_t);
758 		total_len += sizeof (uint32_t);
759 		iovlen++;
760 	}
761 
762 	/*
763 	 * Read additional header and/or header digest if pieces
764 	 * are available
765 	 */
766 	if (iovlen > 1) {
767 
768 		bzero(&msg, sizeof (msg));
769 		msg.msg_iov	= iov;
770 		msg.msg_flags	= MSG_WAITALL;
771 		msg.msg_iovlen	= iovlen;
772 
773 		recv_len = iscsi_net->recvmsg(socket, &msg, timeout);
774 		if (recv_len != total_len) {
775 			return (ISCSI_STATUS_TCP_RX_ERROR);
776 		}
777 
778 		DTRACE_PROBE4(rx_adhdr_digest, void *, socket,
779 		    iovec_t *iop, &iov[0], int, iovlen, int, total_len);
780 
781 		/*
782 		 * Verify header digest if enabled and connection
783 		 * is in full feature mode
784 		 */
785 		if ((flags & ISCSI_NET_HEADER_DIGEST) != 0) {
786 			crc_calculated = iscsi_crc32c((uchar_t *)ihp,
787 			    sizeof (iscsi_hdr_t) + ihp->hlength * 4);
788 
789 			/*
790 			 * Converting actual CRC read via ntohl is not
791 			 * necessary because iscsi_crc32c calculates the
792 			 * value as it expect to be read
793 			 */
794 			if (crc_calculated != crc_actual) {
795 				/* Invalid Header Digest */
796 				cmn_err(CE_WARN, "iscsi connection(%p) "
797 				    "protocol error - encountered a header "
798 				    "digest error expected:0x%08x "
799 				    "received:0x%08x", socket,
800 				    crc_calculated, crc_actual);
801 				return (ISCSI_STATUS_HEADER_DIGEST_ERROR);
802 			}
803 		}
804 	}
805 	return (ISCSI_STATUS_SUCCESS);
806 }
807 
808 
809 /*
810  * iscsi_net_recvdata - receive iscsi data payload from socket
811  */
812 static iscsi_status_t
813 iscsi_net_recvdata(void *socket, iscsi_hdr_t *ihp, char *data,
814     int max_data_length, int timeout, int flags)
815 {
816 	struct iovec	iov[3];
817 	int		iovlen			= 1;
818 	int		total_len		= 0;
819 	int		dlength			= 0;
820 	int		pad_len			= 0;
821 	uint8_t		pad[ISCSI_PAD_WORD_LEN];
822 	uint32_t	crc_calculated		= 0;
823 	uint32_t	crc_actual		= 0;
824 	struct msghdr	msg;
825 	size_t		recv_len;
826 
827 	ASSERT(socket != NULL);
828 	ASSERT(ihp != NULL);
829 	ASSERT(data != NULL);
830 
831 	/* short hand dlength */
832 	dlength = ntoh24(ihp->dlength);
833 
834 	/* verify dlength is valid */
835 	if (dlength > max_data_length) {
836 		cmn_err(CE_WARN, "iscsi connection(%p) protocol error - "
837 		    "invalid data lengths itt:0x%x received:0x%x "
838 		    "max expected:0x%x", socket, ihp->itt,
839 		    dlength, max_data_length);
840 		return (ISCSI_STATUS_PROTOCOL_ERROR);
841 	}
842 
843 	if (dlength) {
844 
845 		/* calculate pad */
846 		pad_len = ((ISCSI_PAD_WORD_LEN -
847 		    (dlength & (ISCSI_PAD_WORD_LEN - 1))) &
848 		    (ISCSI_PAD_WORD_LEN - 1));
849 
850 		/* setup data iovec */
851 		iov[0].iov_base	= (char *)data;
852 		iov[0].iov_len	= dlength;
853 		total_len	= dlength;
854 
855 		/* if pad setup pad iovec */
856 		if (pad_len) {
857 			iov[iovlen].iov_base	= (char *)&pad;
858 			iov[iovlen].iov_len	= pad_len;
859 			total_len		+= pad_len;
860 			iovlen++;
861 		}
862 
863 		/* setup data digest */
864 		if ((flags & ISCSI_NET_DATA_DIGEST) != 0) {
865 			iov[iovlen].iov_base	= (char *)&crc_actual;
866 			iov[iovlen].iov_len	= sizeof (crc_actual);
867 			total_len		+= sizeof (crc_actual);
868 			iovlen++;
869 		}
870 
871 		bzero(&msg, sizeof (msg));
872 		msg.msg_iov	= iov;
873 		msg.msg_flags	= MSG_WAITALL;
874 		msg.msg_iovlen	= iovlen;
875 
876 		recv_len = iscsi_net->recvmsg(socket, &msg, timeout);
877 		if (recv_len != total_len) {
878 			return (ISCSI_STATUS_TCP_RX_ERROR);
879 		}
880 
881 		DTRACE_PROBE4(rx_data, void *, socket, iovec_t *iop,
882 		    &iov[0], int, iovlen, int, total_len);
883 
884 		/* verify data digest is present */
885 		if ((flags & ISCSI_NET_DATA_DIGEST) != 0) {
886 
887 			crc_calculated = iscsi_crc32c(data, dlength);
888 			crc_calculated = iscsi_crc32c_continued(
889 			    (char *)&pad, pad_len, crc_calculated);
890 
891 			/*
892 			 * Converting actual CRC read via ntohl is not
893 			 * necessary because iscsi_crc32c calculates the
894 			 * value as it expects to be read
895 			 */
896 			if (crc_calculated != crc_actual) {
897 				cmn_err(CE_WARN, "iscsi connection(%p) "
898 				    "protocol error - encountered a data "
899 				    "digest error itt:0x%x expected:0x%08x "
900 				    "received:0x%08x", socket,
901 				    ihp->itt, crc_calculated, crc_actual);
902 				return (ISCSI_STATUS_DATA_DIGEST_ERROR);
903 			}
904 		}
905 	}
906 	return (ISCSI_STATUS_SUCCESS);
907 }
908