xref: /illumos-gate/usr/src/uts/common/io/comstar/port/srpt/srpt_ch.c (revision 5bbb4db2c3f208d12bf0fd11769728f9e5ba66a2)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * RDMA channel interface for Solaris SCSI RDMA Protocol Target (SRP)
29  * transport port provider module for the COMSTAR framework.
30  */
31 
32 #include <sys/cpuvar.h>
33 #include <sys/types.h>
34 #include <sys/conf.h>
35 #include <sys/stat.h>
36 #include <sys/file.h>
37 #include <sys/ddi.h>
38 #include <sys/sunddi.h>
39 #include <sys/modctl.h>
40 #include <sys/sysmacros.h>
41 #include <sys/sdt.h>
42 #include <sys/taskq.h>
43 #include <sys/scsi/scsi.h>
44 #include <sys/ib/ibtl/ibti.h>
45 
46 #include <stmf.h>
47 #include <stmf_ioctl.h>
48 #include <portif.h>
49 
50 #include "srp.h"
51 #include "srpt_impl.h"
52 #include "srpt_ioc.h"
53 #include "srpt_stp.h"
54 #include "srpt_ch.h"
55 
56 extern srpt_ctxt_t *srpt_ctxt;
57 extern uint16_t srpt_send_msg_depth;
58 
59 /*
60  * Prototypes.
61  */
62 static void srpt_ch_scq_hdlr(ibt_cq_hdl_t cq_dhl, void *arg);
63 static void srpt_ch_rcq_hdlr(ibt_cq_hdl_t cq_dhl, void *arg);
64 static void srpt_ch_process_iu(srpt_channel_t *ch, srpt_iu_t *iu);
65 
66 /*
67  * srpt_ch_alloc()
68  */
69 srpt_channel_t *
70 srpt_ch_alloc(srpt_target_port_t *tgt, uint8_t port)
71 {
72 	ibt_status_t			status;
73 	srpt_channel_t			*ch;
74 	ibt_cq_attr_t			cq_attr;
75 	ibt_rc_chan_alloc_args_t	ch_args;
76 	uint32_t			cq_real_size;
77 	srpt_ioc_t			*ioc;
78 
79 	ASSERT(tgt != NULL);
80 	ioc = tgt->tp_ioc;
81 	ASSERT(ioc != NULL);
82 
83 	ch = kmem_zalloc(sizeof (*ch), KM_SLEEP);
84 	rw_init(&ch->ch_rwlock, NULL, RW_DRIVER, NULL);
85 	mutex_init(&ch->ch_reflock, NULL, MUTEX_DRIVER, NULL);
86 	cv_init(&ch->ch_cv_complete, NULL, CV_DRIVER, NULL);
87 	ch->ch_refcnt	= 1;
88 	ch->ch_cv_waiters = 0;
89 
90 	ch->ch_state  = SRPT_CHANNEL_CONNECTING;
91 	ch->ch_tgt    = tgt;
92 	ch->ch_req_lim_delta = 0;
93 	ch->ch_ti_iu_len = 0;
94 
95 	cq_attr.cq_size	 = srpt_send_msg_depth * 2;
96 	cq_attr.cq_sched = 0;
97 	cq_attr.cq_flags = IBT_CQ_NO_FLAGS;
98 
99 	status = ibt_alloc_cq(ioc->ioc_ibt_hdl, &cq_attr, &ch->ch_scq_hdl,
100 	    &cq_real_size);
101 	if (status != IBT_SUCCESS) {
102 		SRPT_DPRINTF_L1("ch_alloc, send CQ alloc error (%d)",
103 		    status);
104 		goto scq_alloc_err;
105 	}
106 
107 	cq_attr.cq_size	 = srpt_send_msg_depth + 1;
108 	cq_attr.cq_sched = 0;
109 	cq_attr.cq_flags = IBT_CQ_NO_FLAGS;
110 
111 	status = ibt_alloc_cq(ioc->ioc_ibt_hdl, &cq_attr, &ch->ch_rcq_hdl,
112 	    &cq_real_size);
113 	if (status != IBT_SUCCESS) {
114 		SRPT_DPRINTF_L2("ch_alloc, receive CQ alloc error (%d)",
115 		    status);
116 		goto rcq_alloc_err;
117 	}
118 
119 	ibt_set_cq_handler(ch->ch_scq_hdl, srpt_ch_scq_hdlr, ch);
120 	ibt_set_cq_handler(ch->ch_rcq_hdl, srpt_ch_rcq_hdlr, ch);
121 	ibt_enable_cq_notify(ch->ch_scq_hdl, IBT_NEXT_COMPLETION);
122 	ibt_enable_cq_notify(ch->ch_rcq_hdl, IBT_NEXT_COMPLETION);
123 
124 	ch_args.rc_flags   = IBT_WR_SIGNALED;
125 
126 	/* Maker certain initiator can not read/write our memory */
127 	ch_args.rc_control = 0;
128 
129 	ch_args.rc_hca_port_num = port;
130 
131 	/*
132 	 * Any SRP IU can result in a number of STMF data buffer transfers
133 	 * and those transfers themselves could span multiple initiator
134 	 * buffers.  Therefore, the number of send WQE's actually required
135 	 * can vary.  Here we assume that on average an I/O will require
136 	 * no more than SRPT_MAX_OUT_IO_PER_CMD send WQE's.  In practice
137 	 * this will prevent send work queue overrun, but we will also
138 	 * inform STMF to throttle I/O should the work queue become full.
139 	 *
140 	 * If the HCA tells us the max outstanding WRs for a channel is
141 	 * lower than our default, use the HCA value.
142 	 */
143 	ch_args.rc_sizes.cs_sq = min(ioc->ioc_attr.hca_max_chan_sz,
144 	    (srpt_send_msg_depth * SRPT_MAX_OUT_IO_PER_CMD));
145 	ch_args.rc_sizes.cs_rq =  0;
146 	ch_args.rc_sizes.cs_sq_sgl = 2;
147 	ch_args.rc_sizes.cs_rq_sgl = 0;
148 
149 	ch_args.rc_scq = ch->ch_scq_hdl;
150 	ch_args.rc_rcq = ch->ch_rcq_hdl;
151 	ch_args.rc_pd  = ioc->ioc_pd_hdl;
152 	ch_args.rc_clone_chan = NULL;
153 	ch_args.rc_srq = ioc->ioc_srq_hdl;
154 
155 	status = ibt_alloc_rc_channel(ioc->ioc_ibt_hdl, IBT_ACHAN_USES_SRQ,
156 	    &ch_args, &ch->ch_chan_hdl, &ch->ch_sizes);
157 	if (status != IBT_SUCCESS) {
158 		SRPT_DPRINTF_L2("ch_alloc, IBT channel alloc error (%d)",
159 		    status);
160 		goto qp_alloc_err;
161 	}
162 
163 	/*
164 	 * Create pool of send WQE entries to map send wqe work IDs
165 	 * to various types (specifically in error cases where OP
166 	 * is not known).
167 	 */
168 	ch->ch_num_swqe = ch->ch_sizes.cs_sq;
169 	SRPT_DPRINTF_L2("ch_alloc, number of SWQEs = %u", ch->ch_num_swqe);
170 	ch->ch_swqe = kmem_zalloc(sizeof (srpt_swqe_t) * ch->ch_num_swqe,
171 	    KM_SLEEP);
172 	if (ch->ch_swqe == NULL) {
173 		SRPT_DPRINTF_L2("ch_alloc, SWQE alloc error");
174 		ibt_free_channel(ch->ch_chan_hdl);
175 		goto qp_alloc_err;
176 	}
177 	mutex_init(&ch->ch_swqe_lock, NULL, MUTEX_DRIVER, NULL);
178 	ch->ch_head = 1;
179 	for (ch->ch_tail = 1; ch->ch_tail < ch->ch_num_swqe -1; ch->ch_tail++) {
180 		ch->ch_swqe[ch->ch_tail].sw_next = ch->ch_tail + 1;
181 	}
182 	ch->ch_swqe[ch->ch_tail].sw_next = 0;
183 
184 	ibt_set_chan_private(ch->ch_chan_hdl, ch);
185 	return (ch);
186 
187 qp_alloc_err:
188 	ibt_free_cq(ch->ch_rcq_hdl);
189 
190 rcq_alloc_err:
191 	ibt_free_cq(ch->ch_scq_hdl);
192 
193 scq_alloc_err:
194 	cv_destroy(&ch->ch_cv_complete);
195 	mutex_destroy(&ch->ch_reflock);
196 	rw_destroy(&ch->ch_rwlock);
197 	kmem_free(ch, sizeof (*ch));
198 
199 	return (NULL);
200 }
201 
202 /*
203  * srpt_ch_add_ref()
204  */
205 void
206 srpt_ch_add_ref(srpt_channel_t *ch)
207 {
208 	mutex_enter(&ch->ch_reflock);
209 	ch->ch_refcnt++;
210 	SRPT_DPRINTF_L4("ch_add_ref, ch (%p), refcnt (%d)",
211 	    (void *)ch, ch->ch_refcnt);
212 	ASSERT(ch->ch_refcnt != 0);
213 	mutex_exit(&ch->ch_reflock);
214 }
215 
216 /*
217  * srpt_ch_release_ref()
218  *
219  * A non-zero value for wait causes thread to block until all references
220  * to channel are released.
221  */
222 void
223 srpt_ch_release_ref(srpt_channel_t *ch, uint_t wait)
224 {
225 	mutex_enter(&ch->ch_reflock);
226 
227 	SRPT_DPRINTF_L4("ch_release_ref, ch (%p), refcnt (%d), wait (%d)",
228 	    (void *)ch, ch->ch_refcnt, wait);
229 
230 	ASSERT(ch->ch_refcnt != 0);
231 
232 	ch->ch_refcnt--;
233 
234 	if (ch->ch_refcnt != 0) {
235 		if (wait) {
236 			ch->ch_cv_waiters++;
237 			while (ch->ch_refcnt != 0) {
238 				cv_wait(&ch->ch_cv_complete, &ch->ch_reflock);
239 			}
240 			ch->ch_cv_waiters--;
241 		} else {
242 			mutex_exit(&ch->ch_reflock);
243 			return;
244 		}
245 	}
246 
247 	/*
248 	 * Last thread out frees the IB resources, locks/conditions and memory
249 	 */
250 	if (ch->ch_cv_waiters > 0) {
251 		/* we're not last, wake someone else up */
252 		cv_signal(&ch->ch_cv_complete);
253 		mutex_exit(&ch->ch_reflock);
254 		return;
255 	}
256 
257 	SRPT_DPRINTF_L3("ch_release_ref - release resources");
258 	if (ch->ch_chan_hdl) {
259 		SRPT_DPRINTF_L3("ch_release_ref - free channel");
260 		ibt_free_channel(ch->ch_chan_hdl);
261 	}
262 
263 	if (ch->ch_scq_hdl) {
264 		ibt_free_cq(ch->ch_scq_hdl);
265 	}
266 
267 	if (ch->ch_rcq_hdl) {
268 		ibt_free_cq(ch->ch_rcq_hdl);
269 	}
270 
271 	/*
272 	 * There should be no IU's associated with this
273 	 * channel on the SCSI session.
274 	 */
275 	if (ch->ch_session != NULL) {
276 		ASSERT(list_is_empty(&ch->ch_session->ss_task_list));
277 
278 		/*
279 		 * Currently only have one channel per session, we will
280 		 * need to release a reference when support is added
281 		 * for multi-channel target login.
282 		 */
283 		srpt_stp_free_session(ch->ch_session);
284 		ch->ch_session = NULL;
285 	}
286 
287 	kmem_free(ch->ch_swqe, sizeof (srpt_swqe_t) * ch->ch_num_swqe);
288 	mutex_destroy(&ch->ch_swqe_lock);
289 	mutex_exit(&ch->ch_reflock);
290 	mutex_destroy(&ch->ch_reflock);
291 	rw_destroy(&ch->ch_rwlock);
292 	kmem_free(ch, sizeof (srpt_channel_t));
293 }
294 
295 /*
296  * srpt_ch_disconnect()
297  */
298 void
299 srpt_ch_disconnect(srpt_channel_t *ch)
300 {
301 	ibt_status_t		status;
302 
303 	SRPT_DPRINTF_L3("ch_disconnect, invoked for ch (%p)",
304 	    (void *)ch);
305 
306 	rw_enter(&ch->ch_rwlock, RW_WRITER);
307 
308 	/*
309 	 * If we are already in the process of disconnecting then
310 	 * nothing need be done, CM will call-back into us when done.
311 	 */
312 	if (ch->ch_state == SRPT_CHANNEL_DISCONNECTING) {
313 		SRPT_DPRINTF_L2("ch_disconnect, called when"
314 		    " disconnect in progress");
315 		rw_exit(&ch->ch_rwlock);
316 		return;
317 	}
318 	ch->ch_state = SRPT_CHANNEL_DISCONNECTING;
319 	rw_exit(&ch->ch_rwlock);
320 
321 	/*
322 	 * Initiate the sending of the CM DREQ message, the private data
323 	 * should be the SRP Target logout IU.  We don't really care about
324 	 * the remote CM DREP message returned.  We issue this in an
325 	 * asynchronous manner and will cleanup when called back by CM.
326 	 */
327 	status = ibt_close_rc_channel(ch->ch_chan_hdl, IBT_NONBLOCKING,
328 	    NULL, 0, NULL, NULL, 0);
329 
330 	if (status != IBT_SUCCESS) {
331 		SRPT_DPRINTF_L2("ch_disconnect, close RC channel"
332 		    " err(%d)", status);
333 	}
334 }
335 
336 /*
337  * srpt_ch_cleanup()
338  */
339 void
340 srpt_ch_cleanup(srpt_channel_t *ch)
341 {
342 	srpt_iu_t		*iu;
343 	srpt_iu_t		*next;
344 	ibt_wc_t		wc;
345 	srpt_target_port_t	*tgt;
346 	srpt_channel_t		*tgt_ch;
347 	scsi_task_t		*iutask;
348 
349 	SRPT_DPRINTF_L3("ch_cleanup, invoked for ch(%p), state(%d)",
350 	    (void *)ch, ch->ch_state);
351 
352 	/* add a ref for the channel until we're done */
353 	srpt_ch_add_ref(ch);
354 
355 	tgt = ch->ch_tgt;
356 	ASSERT(tgt != NULL);
357 
358 	/*
359 	 * Make certain the channel is in the target ports list of
360 	 * known channels and remove it (releasing the target
361 	 * ports reference to the channel).
362 	 */
363 	mutex_enter(&tgt->tp_ch_list_lock);
364 	tgt_ch = list_head(&tgt->tp_ch_list);
365 	while (tgt_ch != NULL) {
366 		if (tgt_ch == ch) {
367 			list_remove(&tgt->tp_ch_list, tgt_ch);
368 			srpt_ch_release_ref(tgt_ch, 0);
369 			break;
370 		}
371 		tgt_ch = list_next(&tgt->tp_ch_list, tgt_ch);
372 	}
373 	mutex_exit(&tgt->tp_ch_list_lock);
374 
375 	if (tgt_ch == NULL) {
376 		SRPT_DPRINTF_L2("ch_cleanup, target channel no"
377 		    "longer known to target");
378 		srpt_ch_release_ref(ch, 0);
379 		return;
380 	}
381 
382 	rw_enter(&ch->ch_rwlock, RW_WRITER);
383 	ch->ch_state = SRPT_CHANNEL_DISCONNECTING;
384 	rw_exit(&ch->ch_rwlock);
385 
386 
387 	/*
388 	 * Generally the IB CQ's will have been drained prior to
389 	 * getting to this call; but we check here to make certain.
390 	 */
391 	if (ch->ch_scq_hdl) {
392 		SRPT_DPRINTF_L4("ch_cleanup, start drain (%d)",
393 		    ch->ch_swqe_posted);
394 		while ((int)ch->ch_swqe_posted > 0) {
395 			delay(drv_usectohz(1000));
396 		}
397 		ibt_set_cq_handler(ch->ch_scq_hdl, NULL, NULL);
398 	}
399 
400 	if (ch->ch_rcq_hdl) {
401 		ibt_set_cq_handler(ch->ch_rcq_hdl, NULL, NULL);
402 
403 		while (ibt_poll_cq(ch->ch_rcq_hdl, &wc, 1, NULL) ==
404 		    IBT_SUCCESS) {
405 			iu = (srpt_iu_t *)(uintptr_t)wc.wc_id;
406 			SRPT_DPRINTF_L4("ch_cleanup, recovering"
407 			    " outstanding RX iu(%p)", (void *)iu);
408 			mutex_enter(&iu->iu_lock);
409 			srpt_ioc_repost_recv_iu(iu->iu_ioc, iu);
410 			/*
411 			 * Channel reference has not yet been added for this
412 			 * IU, so do not decrement.
413 			 */
414 			mutex_exit(&iu->iu_lock);
415 		}
416 	}
417 
418 	/*
419 	 * Go through the list of outstanding IU for the channel's SCSI
420 	 * session and for each either abort or complete an abort.
421 	 */
422 	rw_enter(&ch->ch_rwlock, RW_READER);
423 	if (ch->ch_session != NULL) {
424 		rw_enter(&ch->ch_session->ss_rwlock, RW_READER);
425 		iu = list_head(&ch->ch_session->ss_task_list);
426 		while (iu != NULL) {
427 			next = list_next(&ch->ch_session->ss_task_list, iu);
428 
429 			mutex_enter(&iu->iu_lock);
430 			if (ch == iu->iu_ch) {
431 				if (iu->iu_stmf_task == NULL) {
432 					cmn_err(CE_NOTE,
433 					    "ch_cleanup, NULL stmf task");
434 					ASSERT(0);
435 				}
436 				iutask = iu->iu_stmf_task;
437 			} else {
438 				iutask = NULL;
439 			}
440 			mutex_exit(&iu->iu_lock);
441 
442 			if (iutask != NULL) {
443 				SRPT_DPRINTF_L4("ch_cleanup, aborting "
444 				    "task(%p)", (void *)iutask);
445 				stmf_abort(STMF_QUEUE_TASK_ABORT, iutask,
446 				    STMF_ABORTED, NULL);
447 			}
448 			iu = next;
449 		}
450 		rw_exit(&ch->ch_session->ss_rwlock);
451 	}
452 	rw_exit(&ch->ch_rwlock);
453 
454 	srpt_ch_release_ref(ch, 0);
455 }
456 
457 /*
458  * srpt_ch_rsp_comp()
459  *
460  * Process a completion for an IB SEND message.  A SEND completion
461  * is for a SRP response packet sent back to the initiator.  It
462  * will not have a STMF SCSI task associated with it if it was
463  * sent for a rejected IU, or was a task management abort response.
464  */
465 static void
466 srpt_ch_rsp_comp(srpt_channel_t *ch, srpt_iu_t *iu,
467 	ibt_wc_status_t wc_status)
468 {
469 	ASSERT(iu->iu_ch == ch);
470 
471 	/*
472 	 * If work completion indicates failure, decrement the
473 	 * send posted count.  If it is a flush error, we are
474 	 * done; for all other errors start a channel disconnect.
475 	 */
476 	if (wc_status != IBT_SUCCESS) {
477 		SRPT_DPRINTF_L2("ch_rsp_comp, WC status err(%d)",
478 		    wc_status);
479 		atomic_dec_32(&iu->iu_sq_posted_cnt);
480 
481 		if (wc_status != IBT_WC_WR_FLUSHED_ERR) {
482 			srpt_ch_disconnect(ch);
483 		}
484 
485 		mutex_enter(&iu->iu_lock);
486 		if (iu->iu_stmf_task == NULL) {
487 			srpt_ioc_repost_recv_iu(iu->iu_ioc, iu);
488 			mutex_exit(&iu->iu_lock);
489 			srpt_ch_release_ref(ch, 0);
490 		} else {
491 			/* cleanup handled in task_free */
492 			mutex_exit(&iu->iu_lock);
493 		}
494 		return;
495 	}
496 
497 	/*
498 	 * If the IU response completion is not associated with
499 	 * with a SCSI task, release the IU to return the resource
500 	 * and the reference to the channel it holds.
501 	 */
502 	mutex_enter(&iu->iu_lock);
503 	atomic_dec_32(&iu->iu_sq_posted_cnt);
504 
505 	if (iu->iu_stmf_task == NULL) {
506 		srpt_ioc_repost_recv_iu(iu->iu_ioc, iu);
507 		mutex_exit(&iu->iu_lock);
508 		srpt_ch_release_ref(ch, 0);
509 		return;
510 	}
511 
512 	/*
513 	 * If STMF has requested the IU task be aborted, then notify STMF
514 	 * the command is now aborted.
515 	 */
516 	if ((iu->iu_flags & SRPT_IU_STMF_ABORTING) != 0) {
517 		scsi_task_t	*abort_task = iu->iu_stmf_task;
518 
519 		mutex_exit(&iu->iu_lock);
520 		stmf_abort(STMF_REQUEUE_TASK_ABORT_LPORT, abort_task,
521 		    STMF_ABORTED, NULL);
522 		return;
523 	}
524 
525 	/*
526 	 * We should not get a SEND completion where the task has already
527 	 * completed aborting and STMF has been informed.
528 	 */
529 	ASSERT((iu->iu_flags & SRPT_IU_ABORTED) == 0);
530 
531 	/*
532 	 * Successful status response completion for SCSI task.
533 	 * Let STMF know we are done.
534 	 */
535 	mutex_exit(&iu->iu_lock);
536 
537 	stmf_send_status_done(iu->iu_stmf_task, STMF_SUCCESS,
538 	    STMF_IOF_LPORT_DONE);
539 }
540 
541 /*
542  * srpt_ch_data_comp()
543  *
544  * Process an IB completion for a RDMA operation.  This completion
545  * should be associated with the last RDMA operation for any
546  * data buffer transfer.
547  */
548 static void
549 srpt_ch_data_comp(srpt_channel_t *ch, stmf_data_buf_t *stmf_dbuf,
550 	ibt_wc_status_t wc_status)
551 {
552 	srpt_ds_dbuf_t		*dbuf;
553 	srpt_iu_t		*iu;
554 	stmf_status_t		status;
555 
556 	ASSERT(stmf_dbuf != NULL);
557 
558 	dbuf = (srpt_ds_dbuf_t *)stmf_dbuf->db_port_private;
559 
560 	ASSERT(dbuf != NULL);
561 
562 	iu = dbuf->db_iu;
563 
564 	ASSERT(iu != NULL);
565 	ASSERT(iu->iu_ch == ch);
566 
567 	/*
568 	 * If work completion indicates non-flush failure, then
569 	 * start a channel disconnect (asynchronous) and release
570 	 * the reference to the IU.  The task will be cleaned
571 	 * up with STMF during channel shutdown processing.
572 	 */
573 	if (wc_status != IBT_SUCCESS) {
574 		SRPT_DPRINTF_L2("ch_data_comp, WC status err(%d)",
575 		    wc_status);
576 		if (wc_status != IBT_WC_WR_FLUSHED_ERR) {
577 			srpt_ch_disconnect(ch);
578 		}
579 		atomic_dec_32(&iu->iu_sq_posted_cnt);
580 		return;
581 	}
582 
583 	/*
584 	 * If STMF has requested this task be aborted, then if this is the
585 	 * last I/O operation outstanding, notify STMF the task has been
586 	 *  aborted and ignore the completion.
587 	 */
588 	mutex_enter(&iu->iu_lock);
589 	atomic_dec_32(&iu->iu_sq_posted_cnt);
590 
591 	if ((iu->iu_flags & SRPT_IU_STMF_ABORTING) != 0) {
592 		scsi_task_t	*abort_task = iu->iu_stmf_task;
593 
594 		mutex_exit(&iu->iu_lock);
595 		stmf_abort(STMF_REQUEUE_TASK_ABORT_LPORT, abort_task,
596 		    STMF_ABORTED, NULL);
597 		return;
598 	}
599 
600 	/*
601 	 * We should not get an RDMA completion where the task has already
602 	 * completed aborting and STMF has been informed.
603 	 */
604 	ASSERT((iu->iu_flags & SRPT_IU_ABORTED) == 0);
605 
606 	/*
607 	 * Good completion for last RDMA op associated with a data buffer
608 	 * I/O, if specified initiate status otherwise let STMF know we are
609 	 * done.
610 	 */
611 	stmf_dbuf->db_xfer_status = STMF_SUCCESS;
612 	mutex_exit(&iu->iu_lock);
613 
614 	DTRACE_SRP_8(xfer__done, srpt_channel_t, ch,
615 	    ibt_wr_ds_t, &(dbuf->db_sge), srpt_iu_t, iu,
616 	    ibt_send_wr_t, 0, uint32_t, stmf_dbuf->db_data_size,
617 	    uint32_t, 0, uint32_t, 0,
618 	    uint32_t, (stmf_dbuf->db_flags & DB_DIRECTION_TO_RPORT) ? 1 : 0);
619 
620 	if ((stmf_dbuf->db_flags & DB_SEND_STATUS_GOOD) != 0) {
621 		status = srpt_stp_send_status(dbuf->db_iu->iu_stmf_task, 0);
622 		if (status == STMF_SUCCESS) {
623 			return;
624 		}
625 		stmf_dbuf->db_xfer_status = STMF_FAILURE;
626 	}
627 	stmf_data_xfer_done(dbuf->db_iu->iu_stmf_task, stmf_dbuf, 0);
628 }
629 
630 /*
631  * srpt_ch_scq_hdlr()
632  */
633 static void
634 srpt_ch_scq_hdlr(ibt_cq_hdl_t cq_hdl, void *arg)
635 {
636 	ibt_status_t		status;
637 	srpt_channel_t		*ch = arg;
638 	ibt_wc_t		wc[SRPT_SEND_WC_POLL_SIZE];
639 	ibt_wc_t		*wcp;
640 	int			i;
641 	uint32_t		cq_rearmed = 0;
642 	uint32_t		entries;
643 	srpt_swqe_t		*swqe;
644 
645 	ASSERT(ch != NULL);
646 
647 	/* Reference channel for the duration of this call */
648 	srpt_ch_add_ref(ch);
649 
650 	for (;;) {
651 		status = ibt_poll_cq(cq_hdl, &wc[0], SRPT_SEND_WC_POLL_SIZE,
652 		    &entries);
653 		if (status == IBT_CQ_EMPTY) {
654 			/*
655 			 * CQ drained, if we have not rearmed the CQ
656 			 * do so and poll to eliminate race; otherwise
657 			 * we are done.
658 			 */
659 			if (cq_rearmed == 0) {
660 				ibt_enable_cq_notify(ch->ch_scq_hdl,
661 				    IBT_NEXT_COMPLETION);
662 				cq_rearmed = 1;
663 				continue;
664 			} else {
665 				break;
666 			}
667 		} else if (status != IBT_SUCCESS) {
668 			/*
669 			 * This error should not happen, it indicates something
670 			 * abnormal has gone wrong and represents either a
671 			 * hardware or programming logic coding error.
672 			 */
673 			SRPT_DPRINTF_L2("ch_scq_hdlr, unexpected CQ err(%d)",
674 			    status);
675 			srpt_ch_disconnect(ch);
676 			break;
677 		}
678 
679 		for (wcp = wc, i = 0; i < entries; i++, wcp++) {
680 
681 			/*
682 			 * A zero work ID indicates this CQE is associated
683 			 * with an intermediate post of a RDMA data transfer
684 			 * operation.  Since intermediate data requests are
685 			 * unsignaled, we should only get these if there was
686 			 * an error.  No action is required.
687 			 */
688 			if (wcp->wc_id == 0) {
689 				continue;
690 			}
691 			swqe = ch->ch_swqe + wcp->wc_id;
692 
693 			switch (swqe->sw_type) {
694 			case SRPT_SWQE_TYPE_RESP:
695 				srpt_ch_rsp_comp(ch, (srpt_iu_t *)
696 				    swqe->sw_addr, wcp->wc_status);
697 				break;
698 
699 			case SRPT_SWQE_TYPE_DATA:
700 				srpt_ch_data_comp(ch, (stmf_data_buf_t *)
701 				    swqe->sw_addr, wcp->wc_status);
702 				break;
703 
704 			default:
705 				SRPT_DPRINTF_L2("ch_scq_hdlr, bad type(%d)",
706 				    swqe->sw_type);
707 				ASSERT(0);
708 			}
709 
710 			srpt_ch_free_swqe_wrid(ch, wcp->wc_id);
711 		}
712 	}
713 
714 	srpt_ch_release_ref(ch, 0);
715 }
716 
717 /*
718  * srpt_ch_rcq_hdlr()
719  */
720 static void
721 srpt_ch_rcq_hdlr(ibt_cq_hdl_t cq_hdl, void *arg)
722 {
723 	ibt_status_t		status;
724 	srpt_channel_t		*ch = arg;
725 	ibt_wc_t		wc[SRPT_RECV_WC_POLL_SIZE];
726 	ibt_wc_t		*wcp;
727 	int			i;
728 	uint32_t		entries;
729 	srpt_iu_t		*iu;
730 	uint_t			cq_rearmed = 0;
731 
732 	/*
733 	 * The channel object will exists while the CQ handler call-back
734 	 * is installed.
735 	 */
736 	ASSERT(ch != NULL);
737 	srpt_ch_add_ref(ch);
738 
739 	/*
740 	 * If we know a channel disconnect has started do nothing
741 	 * and let channel cleanup code recover resources from the CQ.
742 	 * We are not concerned about races with the state transition
743 	 * since the code will do the correct thing either way. This
744 	 * is simply to circumvent rearming the CQ, and it will
745 	 * catch the state next time.
746 	 */
747 	rw_enter(&ch->ch_rwlock, RW_READER);
748 	if (ch->ch_state == SRPT_CHANNEL_DISCONNECTING) {
749 		SRPT_DPRINTF_L2("ch_rcq_hdlr, channel disconnecting");
750 		rw_exit(&ch->ch_rwlock);
751 		srpt_ch_release_ref(ch, 0);
752 		return;
753 	}
754 	rw_exit(&ch->ch_rwlock);
755 
756 	for (;;) {
757 		status = ibt_poll_cq(cq_hdl, &wc[0], SRPT_RECV_WC_POLL_SIZE,
758 		    &entries);
759 		if (status == IBT_CQ_EMPTY) {
760 			/*
761 			 * OK, empty, if we have not rearmed the CQ
762 			 * do so, and poll to eliminate race; otherwise
763 			 * we are done.
764 			 */
765 			if (cq_rearmed == 0) {
766 				ibt_enable_cq_notify(ch->ch_rcq_hdl,
767 				    IBT_NEXT_COMPLETION);
768 				cq_rearmed = 1;
769 				continue;
770 			} else {
771 				break;
772 			}
773 		} else if (status != IBT_SUCCESS) {
774 			/*
775 			 * This error should not happen, it indicates something
776 			 * abnormal has gone wrong and represents either a
777 			 * hardware or programming logic coding error.
778 			 */
779 			SRPT_DPRINTF_L2("ch_rcq_hdlr, unexpected CQ err(%d)",
780 			    status);
781 			srpt_ch_disconnect(ch);
782 			break;
783 		}
784 
785 		for (wcp = wc, i = 0; i < entries; i++, wcp++) {
786 
787 			/*
788 			 *  Check wc_status before proceeding.  If the
789 			 *  status indicates a channel problem, stop processing.
790 			 */
791 			if (wcp->wc_status != IBT_WC_SUCCESS) {
792 				if (wcp->wc_status == IBT_WC_WR_FLUSHED_ERR) {
793 					SRPT_DPRINTF_L2(
794 					    "ch_rcq, unexpected"
795 					    " wc_status err(%d)",
796 					    wcp->wc_status);
797 					srpt_ch_disconnect(ch);
798 					/* XXX - verify not leaking IUs */
799 					goto done;
800 				} else {
801 					/* skip IUs with errors */
802 					SRPT_DPRINTF_L2(
803 					    "ch_rcq, ERROR comp(%d)",
804 					    wcp->wc_status);
805 					/* XXX - verify not leaking IUs */
806 					continue;
807 				}
808 			}
809 
810 			iu = (srpt_iu_t *)(uintptr_t)wcp->wc_id;
811 			ASSERT(iu != NULL);
812 
813 			/*
814 			 * Process the IU.
815 			 */
816 			ASSERT(wcp->wc_type == IBT_WRC_RECV);
817 			srpt_ch_process_iu(ch, iu);
818 		}
819 	}
820 
821 done:
822 	srpt_ch_release_ref(ch, 0);
823 }
824 
825 /*
826  * srpt_ch_srp_cmd()
827  */
828 static int
829 srpt_ch_srp_cmd(srpt_channel_t *ch, srpt_iu_t *iu)
830 {
831 	srp_cmd_req_t		*cmd = (srp_cmd_req_t *)iu->iu_buf;
832 	srp_indirect_desc_t	*i_desc;
833 	uint_t			i_di_cnt;
834 	uint_t			i_do_cnt;
835 	uint8_t			do_fmt;
836 	uint8_t			di_fmt;
837 	uint32_t		*cur_desc_off;
838 	int			i;
839 	ibt_status_t		status;
840 	uint8_t			addlen;
841 
842 
843 	DTRACE_SRP_2(task__command, srpt_channel_t, ch, srp_cmd_req_t, cmd);
844 	iu->iu_ch  = ch;
845 	iu->iu_tag = cmd->cr_tag;
846 
847 	/*
848 	 * The SRP specification and SAM require support for bi-directional
849 	 * data transfer, so we create a single buffer descriptor list that
850 	 * in the IU buffer that covers the data-in and data-out buffers.
851 	 * In practice we will just see unidirectional transfers with either
852 	 * data-in or data out descriptors.  If we were to take that as fact,
853 	 * we could reduce overhead slightly.
854 	 */
855 
856 	/*
857 	 * additional length is a 6-bit number in 4-byte words, so multiply by 4
858 	 * to get bytes.
859 	 */
860 	addlen = cmd->cr_add_cdb_len & 0x3f;	/* mask off 6 bits */
861 
862 	cur_desc_off = (uint32_t *)(void *)&cmd->cr_add_data;
863 	cur_desc_off  += addlen;		/* 32-bit arithmetic */
864 	iu->iu_num_rdescs = 0;
865 	iu->iu_rdescs = (srp_direct_desc_t *)(void *)cur_desc_off;
866 
867 	/*
868 	 * Examine buffer description for Data In (i.e. data flows
869 	 * to the initiator).
870 	 */
871 	i_do_cnt = i_di_cnt = 0;
872 	di_fmt = cmd->cr_buf_fmt >> 4;
873 	if (di_fmt == SRP_DATA_DESC_DIRECT) {
874 		iu->iu_num_rdescs = 1;
875 		cur_desc_off = (uint32_t *)(void *)&iu->iu_rdescs[1];
876 	} else if (di_fmt == SRP_DATA_DESC_INDIRECT) {
877 		i_desc = (srp_indirect_desc_t *)iu->iu_rdescs;
878 		i_di_cnt  = b2h32(i_desc->id_table.dd_len) /
879 		    sizeof (srp_direct_desc_t);
880 
881 		/*
882 		 * Some initiators like OFED occasionally use the wrong counts,
883 		 * so check total to allow for this.  NOTE: we do not support
884 		 * reading of the descriptor table from the initiator, so if
885 		 * not all descriptors are in the IU we drop the task.
886 		 */
887 		if (i_di_cnt > (cmd->cr_dicnt + cmd->cr_docnt)) {
888 			SRPT_DPRINTF_L2("ch_srp_cmd, remote RDMA of"
889 			    " descriptors not supported");
890 			SRPT_DPRINTF_L2("ch_srp_cmd, sizeof entry (%d),"
891 			    " i_di_cnt(%d), cr_dicnt(%d)",
892 			    (uint_t)sizeof (srp_direct_desc_t),
893 			    i_di_cnt, cmd->cr_dicnt);
894 			iu->iu_rdescs = NULL;
895 			return (1);
896 		}
897 		bcopy(&i_desc->id_desc[0], iu->iu_rdescs,
898 		    sizeof (srp_direct_desc_t) * i_di_cnt);
899 		iu->iu_num_rdescs += i_di_cnt;
900 		cur_desc_off = (uint32_t *)(void *)&i_desc->id_desc[i_di_cnt];
901 	}
902 
903 	/*
904 	 * Examine buffer description for Data Out (i.e. data flows
905 	 * from the initiator).
906 	 */
907 	do_fmt = cmd->cr_buf_fmt & 0x0F;
908 	if (do_fmt == SRP_DATA_DESC_DIRECT) {
909 		if (di_fmt == SRP_DATA_DESC_DIRECT) {
910 			bcopy(cur_desc_off, &iu->iu_rdescs[iu->iu_num_rdescs],
911 			    sizeof (srp_direct_desc_t));
912 		}
913 		iu->iu_num_rdescs++;
914 	} else if (do_fmt == SRP_DATA_DESC_INDIRECT) {
915 		i_desc = (srp_indirect_desc_t *)cur_desc_off;
916 		i_do_cnt  = b2h32(i_desc->id_table.dd_len) /
917 		    sizeof (srp_direct_desc_t);
918 
919 		/*
920 		 * Some initiators like OFED occasionally use the wrong counts,
921 		 * so check total to allow for this.  NOTE: we do not support
922 		 * reading of the descriptor table from the initiator, so if
923 		 * not all descriptors are in the IU we drop the task.
924 		 */
925 		if ((i_di_cnt + i_do_cnt) > (cmd->cr_dicnt + cmd->cr_docnt)) {
926 			SRPT_DPRINTF_L2("ch_srp_cmd, remote RDMA of"
927 			    " descriptors not supported");
928 			SRPT_DPRINTF_L2("ch_srp_cmd, sizeof entry (%d),"
929 			    " i_do_cnt(%d), cr_docnt(%d)",
930 			    (uint_t)sizeof (srp_direct_desc_t),
931 			    i_do_cnt, cmd->cr_docnt);
932 			iu->iu_rdescs = 0;
933 			return (1);
934 		}
935 		bcopy(&i_desc->id_desc[0], &iu->iu_rdescs[iu->iu_num_rdescs],
936 		    sizeof (srp_direct_desc_t) * i_do_cnt);
937 		iu->iu_num_rdescs += i_do_cnt;
938 	}
939 
940 	iu->iu_tot_xfer_len = 0;
941 	for (i = 0; i < iu->iu_num_rdescs; i++) {
942 		iu->iu_rdescs[i].dd_vaddr = b2h64(iu->iu_rdescs[i].dd_vaddr);
943 		iu->iu_rdescs[i].dd_hdl   = b2h32(iu->iu_rdescs[i].dd_hdl);
944 		iu->iu_rdescs[i].dd_len   = b2h32(iu->iu_rdescs[i].dd_len);
945 		iu->iu_tot_xfer_len += iu->iu_rdescs[i].dd_len;
946 	}
947 
948 #ifdef DEBUG
949 	if (srpt_errlevel >= SRPT_LOG_L4) {
950 		SRPT_DPRINTF_L4("ch_srp_cmd, iu->iu_tot_xfer_len (%d)",
951 		    iu->iu_tot_xfer_len);
952 		for (i = 0; i < iu->iu_num_rdescs; i++) {
953 			SRPT_DPRINTF_L4("ch_srp_cmd, rdescs[%d].dd_vaddr"
954 			    " (0x%08llx)",
955 			    i, (u_longlong_t)iu->iu_rdescs[i].dd_vaddr);
956 			SRPT_DPRINTF_L4("ch_srp_cmd, rdescs[%d].dd_hdl"
957 			    " (0x%08x)", i, iu->iu_rdescs[i].dd_hdl);
958 			SRPT_DPRINTF_L4("ch_srp_cmd, rdescs[%d].dd_len (%d)",
959 			    i, iu->iu_rdescs[i].dd_len);
960 		}
961 		SRPT_DPRINTF_L4("ch_srp_cmd, LUN (0x%08lx)",
962 		    (unsigned long int) *((uint64_t *)(void *) cmd->cr_lun));
963 	}
964 #endif
965 	rw_enter(&ch->ch_rwlock, RW_READER);
966 
967 	if (ch->ch_state == SRPT_CHANNEL_DISCONNECTING) {
968 		/*
969 		 * The channel has begun disconnecting, so ignore the
970 		 * the command returning the IU resources.
971 		 */
972 		rw_exit(&ch->ch_rwlock);
973 		return (1);
974 	}
975 
976 	/*
977 	 * Once a SCSI task is allocated and assigned to the IU, it
978 	 * owns those IU resources, which will be held until STMF
979 	 * is notified the task is done (from a lport perspective).
980 	 */
981 	iu->iu_stmf_task = stmf_task_alloc(ch->ch_tgt->tp_lport,
982 	    ch->ch_session->ss_ss, cmd->cr_lun,
983 	    SRP_CDB_SIZE + (addlen * 4), 0);
984 	if (iu->iu_stmf_task == NULL) {
985 		/*
986 		 * Could not allocate, return status to the initiator
987 		 * indicating that we are temporarily unable to process
988 		 * commands.  If unable to send, immediately return IU
989 		 * resource.
990 		 */
991 		SRPT_DPRINTF_L2("ch_srp_cmd, SCSI task allocation failure");
992 		rw_exit(&ch->ch_rwlock);
993 		mutex_enter(&iu->iu_lock);
994 		status = srpt_stp_send_response(iu, STATUS_BUSY, 0, 0, 0,
995 		    NULL, SRPT_NO_FENCE_SEND);
996 		mutex_exit(&iu->iu_lock);
997 		if (status != IBT_SUCCESS) {
998 			SRPT_DPRINTF_L2("ch_srp_cmd, error(%d) posting error"
999 			    " response", status);
1000 			return (1);
1001 		} else {
1002 			return (0);
1003 		}
1004 	}
1005 
1006 	iu->iu_stmf_task->task_port_private = iu;
1007 	iu->iu_stmf_task->task_flags = 0;
1008 
1009 	if (di_fmt != 0) {
1010 		iu->iu_stmf_task->task_flags |= TF_WRITE_DATA;
1011 	}
1012 	if (do_fmt != 0) {
1013 		iu->iu_stmf_task->task_flags |= TF_READ_DATA;
1014 	}
1015 
1016 	switch (cmd->cr_task_attr) {
1017 	case SRP_TSK_ATTR_QTYPE_SIMPLE:
1018 		iu->iu_stmf_task->task_flags |=	TF_ATTR_SIMPLE_QUEUE;
1019 		break;
1020 
1021 	case SRP_TSK_ATTR_QTYPE_HEAD_OF_Q:
1022 		iu->iu_stmf_task->task_flags |=	TF_ATTR_HEAD_OF_QUEUE;
1023 		break;
1024 
1025 	case SRP_TSK_ATTR_QTYPE_ORDERED:
1026 		iu->iu_stmf_task->task_flags |=	TF_ATTR_ORDERED_QUEUE;
1027 		break;
1028 
1029 	case SRP_TSK_ATTR_QTYPE_ACA_Q_TAG:
1030 		iu->iu_stmf_task->task_flags |=	TF_ATTR_ACA;
1031 		break;
1032 
1033 	default:
1034 		SRPT_DPRINTF_L2("ch_srp_cmd, reserved task attr (%d)",
1035 		    cmd->cr_task_attr);
1036 		iu->iu_stmf_task->task_flags |=	TF_ATTR_ORDERED_QUEUE;
1037 		break;
1038 	}
1039 	iu->iu_stmf_task->task_additional_flags = 0;
1040 	iu->iu_stmf_task->task_priority		= 0;
1041 	iu->iu_stmf_task->task_mgmt_function    = TM_NONE;
1042 	iu->iu_stmf_task->task_max_nbufs	= STMF_BUFS_MAX;
1043 	iu->iu_stmf_task->task_expected_xfer_length = iu->iu_tot_xfer_len;
1044 	iu->iu_stmf_task->task_csn_size		= 0;
1045 
1046 	bcopy(cmd->cr_cdb, iu->iu_stmf_task->task_cdb,
1047 	    SRP_CDB_SIZE);
1048 	if (addlen != 0) {
1049 		bcopy(&cmd->cr_add_data,
1050 		    iu->iu_stmf_task->task_cdb + SRP_CDB_SIZE,
1051 		    addlen * 4);
1052 	}
1053 
1054 	/*
1055 	 * Add the IU/task to the session and post to STMF.  The task will
1056 	 * remain in the session's list until STMF is informed by SRP that
1057 	 * it is done with the task.
1058 	 */
1059 	DTRACE_SRP_3(scsi__command, srpt_channel_t, iu->iu_ch,
1060 	    scsi_task_t, iu->iu_stmf_task, srp_cmd_req_t, cmd);
1061 	srpt_stp_add_task(ch->ch_session, iu);
1062 
1063 	SRPT_DPRINTF_L3("ch_srp_cmd, new task (%p) posted",
1064 	    (void *)iu->iu_stmf_task);
1065 	stmf_post_task(iu->iu_stmf_task, NULL);
1066 	rw_exit(&ch->ch_rwlock);
1067 
1068 	return (0);
1069 }
1070 
1071 /*
1072  * srpt_ch_task_mgmt_abort()
1073  *
1074  * Returns 0 on success, indicating we've sent a management response.
1075  * Returns !0 to indicate failure; the IU should be reposted.
1076  */
1077 static ibt_status_t
1078 srpt_ch_task_mgmt_abort(srpt_channel_t *ch, srpt_iu_t *iu,
1079 	uint64_t tag_to_abort)
1080 {
1081 	srpt_session_t	*session = ch->ch_session;
1082 	srpt_iu_t	*ss_iu;
1083 	ibt_status_t	status;
1084 
1085 	/*
1086 	 * Locate the associated task (tag_to_abort) in the
1087 	 * session's active task list.
1088 	 */
1089 	rw_enter(&session->ss_rwlock, RW_READER);
1090 	ss_iu = list_head(&session->ss_task_list);
1091 	while (ss_iu != NULL) {
1092 		mutex_enter(&ss_iu->iu_lock);
1093 		if ((tag_to_abort == ss_iu->iu_tag)) {
1094 			mutex_exit(&ss_iu->iu_lock);
1095 			break;
1096 		}
1097 		mutex_exit(&ss_iu->iu_lock);
1098 		ss_iu = list_next(&session->ss_task_list, ss_iu);
1099 	}
1100 	rw_exit(&session->ss_rwlock);
1101 
1102 	/*
1103 	 * Take appropriate action based on state of task
1104 	 * to be aborted:
1105 	 * 1) No longer exists - do nothing.
1106 	 * 2) Previously aborted or status queued - do nothing.
1107 	 * 3) Otherwise - initiate abort.
1108 	 */
1109 	if (ss_iu == NULL)  {
1110 		goto send_mgmt_resp;
1111 	}
1112 
1113 	mutex_enter(&ss_iu->iu_lock);
1114 	if ((ss_iu->iu_flags & (SRPT_IU_STMF_ABORTING |
1115 	    SRPT_IU_ABORTED | SRPT_IU_RESP_SENT)) != 0) {
1116 		mutex_exit(&ss_iu->iu_lock);
1117 		goto send_mgmt_resp;
1118 	}
1119 
1120 	/*
1121 	 * Set aborting flag and notify STMF of abort request.  No
1122 	 * additional I/O will be queued for this IU.
1123 	 */
1124 	SRPT_DPRINTF_L3("ch_task_mgmt_abort, task found");
1125 	ss_iu->iu_flags |= SRPT_IU_SRP_ABORTING;
1126 	mutex_exit(&ss_iu->iu_lock);
1127 	stmf_abort(STMF_QUEUE_TASK_ABORT,
1128 	    ss_iu->iu_stmf_task, STMF_ABORTED, NULL);
1129 
1130 send_mgmt_resp:
1131 	mutex_enter(&iu->iu_lock);
1132 	status = srpt_stp_send_mgmt_response(iu, SRP_TM_SUCCESS,
1133 	    SRPT_FENCE_SEND);
1134 	mutex_exit(&iu->iu_lock);
1135 
1136 	if (status != IBT_SUCCESS) {
1137 		SRPT_DPRINTF_L2("ch_task_mgmt_abort, err(%d)"
1138 		    " posting abort response", status);
1139 	}
1140 
1141 	return (status);
1142 }
1143 
1144 /*
1145  * srpt_ch_srp_task_mgmt()
1146  */
1147 static int
1148 srpt_ch_srp_task_mgmt(srpt_channel_t *ch, srpt_iu_t *iu)
1149 {
1150 	srp_tsk_mgmt_t		*tsk = (srp_tsk_mgmt_t *)iu->iu_buf;
1151 	uint8_t			tm_fn;
1152 	ibt_status_t		status;
1153 
1154 	SRPT_DPRINTF_L3("ch_srp_task_mgmt, SRP TASK MGMT func(%d)",
1155 	    tsk->tm_function);
1156 
1157 	/*
1158 	 * Both tag and lun fileds have the same corresponding offsets
1159 	 * in both srp_tsk_mgmt_t and srp_cmd_req_t structures.  The
1160 	 * casting will allow us to use the same dtrace translator.
1161 	 */
1162 	DTRACE_SRP_2(task__command, srpt_channel_t, ch,
1163 	    srp_cmd_req_t, (srp_cmd_req_t *)tsk);
1164 
1165 	iu->iu_ch  = ch;
1166 	iu->iu_tag = tsk->tm_tag;
1167 
1168 	/*
1169 	 * Task management aborts are processed directly by the SRP driver;
1170 	 * all other task management requests are handed off to STMF.
1171 	 */
1172 	switch (tsk->tm_function) {
1173 	case SRP_TSK_MGMT_ABORT_TASK:
1174 		/*
1175 		 * Initiate SCSI transport protocol specific task abort
1176 		 * logic.
1177 		 */
1178 		status = srpt_ch_task_mgmt_abort(ch, iu, tsk->tm_task_tag);
1179 		if (status != IBT_SUCCESS) {
1180 			/* repost this IU */
1181 			return (1);
1182 		} else {
1183 			return (0);
1184 		}
1185 
1186 	case SRP_TSK_MGMT_ABORT_TASK_SET:
1187 		tm_fn = TM_ABORT_TASK_SET;
1188 		break;
1189 
1190 	case SRP_TSK_MGMT_CLEAR_TASK_SET:
1191 		tm_fn = TM_CLEAR_TASK_SET;
1192 		break;
1193 
1194 	case SRP_TSK_MGMT_LUN_RESET:
1195 		tm_fn = TM_LUN_RESET;
1196 		break;
1197 
1198 	case SRP_TSK_MGMT_CLEAR_ACA:
1199 		tm_fn = TM_CLEAR_ACA;
1200 		break;
1201 
1202 	default:
1203 		/*
1204 		 * SRP does not support the requested task management
1205 		 * function; return a not supported status in the response.
1206 		 */
1207 		SRPT_DPRINTF_L2("ch_srp_task_mgmt, SRP task mgmt fn(%d)"
1208 		    " not supported", tsk->tm_function);
1209 		mutex_enter(&iu->iu_lock);
1210 		status = srpt_stp_send_mgmt_response(iu,
1211 		    SRP_TM_NOT_SUPPORTED, SRPT_NO_FENCE_SEND);
1212 		mutex_exit(&iu->iu_lock);
1213 		if (status != IBT_SUCCESS) {
1214 			SRPT_DPRINTF_L2("ch_srp_task_mgmt, err(%d) posting"
1215 			    " response", status);
1216 			return (1);
1217 		}
1218 		return (0);
1219 	}
1220 
1221 	rw_enter(&ch->ch_rwlock, RW_READER);
1222 	if (ch->ch_state == SRPT_CHANNEL_DISCONNECTING) {
1223 		/*
1224 		 * The channel has begun disconnecting, so ignore the
1225 		 * the command returning the IU resources.
1226 		 */
1227 		rw_exit(&ch->ch_rwlock);
1228 		return (1);
1229 	}
1230 
1231 	/*
1232 	 * Once a SCSI mgmt task is allocated and assigned to the IU, it
1233 	 * owns those IU resources, which will be held until we inform
1234 	 * STMF that we are done with the task (from an lports perspective).
1235 	 */
1236 	iu->iu_stmf_task = stmf_task_alloc(ch->ch_tgt->tp_lport,
1237 	    ch->ch_session->ss_ss, tsk->tm_lun, 0, STMF_TASK_EXT_NONE);
1238 	if (iu->iu_stmf_task == NULL) {
1239 		/*
1240 		 * Could not allocate, return status to the initiator
1241 		 * indicating that we are temporarily unable to process
1242 		 * commands.  If unable to send, immediately return IU
1243 		 * resource.
1244 		 */
1245 		SRPT_DPRINTF_L2("ch_srp_task_mgmt, SCSI task allocation"
1246 		    " failure");
1247 		rw_exit(&ch->ch_rwlock);
1248 		mutex_enter(&iu->iu_lock);
1249 		status = srpt_stp_send_response(iu, STATUS_BUSY, 0, 0, 0,
1250 		    NULL, SRPT_NO_FENCE_SEND);
1251 		mutex_exit(&iu->iu_lock);
1252 		if (status != IBT_SUCCESS) {
1253 			SRPT_DPRINTF_L2("ch_srp_task_mgmt, err(%d) posting"
1254 			    "busy response", status);
1255 			/* repost the IU */
1256 			return (1);
1257 		}
1258 		return (0);
1259 	}
1260 
1261 	iu->iu_stmf_task->task_port_private = iu;
1262 	iu->iu_stmf_task->task_flags = 0;
1263 	iu->iu_stmf_task->task_additional_flags =
1264 	    TASK_AF_NO_EXPECTED_XFER_LENGTH;
1265 	iu->iu_stmf_task->task_priority = 0;
1266 	iu->iu_stmf_task->task_mgmt_function = tm_fn;
1267 	iu->iu_stmf_task->task_max_nbufs = STMF_BUFS_MAX;
1268 	iu->iu_stmf_task->task_expected_xfer_length = 0;
1269 	iu->iu_stmf_task->task_csn_size = 0;
1270 
1271 	/*
1272 	 * Add the IU/task to the session and post to STMF.  The task will
1273 	 * remain in the session's list until STMF is informed by SRP that
1274 	 * it is done with the task.
1275 	 */
1276 	srpt_stp_add_task(ch->ch_session, iu);
1277 
1278 	SRPT_DPRINTF_L3("ch_srp_task_mgmt, new mgmt task(%p) posted",
1279 	    (void *)iu->iu_stmf_task);
1280 	stmf_post_task(iu->iu_stmf_task, NULL);
1281 	rw_exit(&ch->ch_rwlock);
1282 
1283 	return (0);
1284 }
1285 
1286 /*
1287  * srpt_ch_process_iu()
1288  */
1289 static void
1290 srpt_ch_process_iu(srpt_channel_t *ch, srpt_iu_t *iu)
1291 {
1292 	srpt_iu_data_t	*iud;
1293 	int		status = 1;
1294 
1295 	/*
1296 	 * IU adds reference to channel which will represent a
1297 	 * a reference by STMF.  If for whatever reason the IU
1298 	 * is not handed off to STMF, then this reference will be
1299 	 * released.  Otherwise, the reference will be released when
1300 	 * SRP informs STMF that the associated SCSI task is done.
1301 	 */
1302 	srpt_ch_add_ref(ch);
1303 
1304 	/*
1305 	 * Validate login RC channel state. Normally active, if
1306 	 * not active then we need to handle a possible race between the
1307 	 * receipt of a implied RTU and CM calling back to notify of the
1308 	 * state transition.
1309 	 */
1310 	rw_enter(&ch->ch_rwlock, RW_READER);
1311 	if (ch->ch_state == SRPT_CHANNEL_DISCONNECTING) {
1312 		rw_exit(&ch->ch_rwlock);
1313 		goto repost_iu;
1314 	}
1315 	rw_exit(&ch->ch_rwlock);
1316 
1317 	iud = iu->iu_buf;
1318 
1319 	switch (iud->rx_iu.srp_op) {
1320 	case SRP_IU_CMD:
1321 		status = srpt_ch_srp_cmd(ch, iu);
1322 		break;
1323 
1324 	case SRP_IU_TASK_MGMT:
1325 		status = srpt_ch_srp_task_mgmt(ch, iu);
1326 		return;
1327 
1328 	case SRP_IU_I_LOGOUT:
1329 		SRPT_DPRINTF_L3("ch_process_iu, SRP INITIATOR LOGOUT");
1330 		/*
1331 		 * Initiators should logout by issuing a CM disconnect
1332 		 * request (DREQ) with the logout IU in the private data;
1333 		 * however some initiators have been known to send the
1334 		 * IU in-band, if this happens just initiate the logout.
1335 		 * Note that we do not return a response as per the
1336 		 * specification.
1337 		 */
1338 		srpt_stp_logout(ch);
1339 		break;
1340 
1341 	case SRP_IU_AER_RSP:
1342 	case SRP_IU_CRED_RSP:
1343 	default:
1344 		/*
1345 		 * We don't send asynchronous events or ask for credit
1346 		 * adjustments, so nothing need be done.  Log we got an
1347 		 * unexpected IU but then just repost the IU to the SRQ.
1348 		 */
1349 		SRPT_DPRINTF_L2("ch_process_iu, invalid IU from initiator,"
1350 		    " IU opcode(%d)", iud->rx_iu.srp_op);
1351 		break;
1352 	}
1353 
1354 	if (status == 0) {
1355 		return;
1356 	}
1357 
1358 repost_iu:
1359 	SRPT_DPRINTF_L4("process_iu:  reposting iu %p", (void *)iu);
1360 	mutex_enter(&iu->iu_lock);
1361 	srpt_ioc_repost_recv_iu(iu->iu_ioc, iu);
1362 	mutex_exit(&iu->iu_lock);
1363 	srpt_ch_release_ref(ch, 0);
1364 }
1365 
1366 /*
1367  * srpt_ch_post_send
1368  */
1369 ibt_status_t
1370 srpt_ch_post_send(srpt_channel_t *ch, srpt_iu_t *iu, uint32_t len,
1371 	uint_t fence)
1372 {
1373 	ibt_status_t		status;
1374 	ibt_send_wr_t		wr;
1375 	ibt_wr_ds_t		ds;
1376 	uint_t			posted;
1377 
1378 	ASSERT(ch != NULL);
1379 	ASSERT(iu != NULL);
1380 	ASSERT(mutex_owned(&iu->iu_lock));
1381 
1382 	rw_enter(&ch->ch_rwlock, RW_READER);
1383 	if (ch->ch_state == SRPT_CHANNEL_DISCONNECTING) {
1384 		rw_exit(&ch->ch_rwlock);
1385 		SRPT_DPRINTF_L2("ch_post_send, bad ch state (%d)",
1386 		    ch->ch_state);
1387 		return (IBT_FAILURE);
1388 	}
1389 	rw_exit(&ch->ch_rwlock);
1390 
1391 	wr.wr_id = srpt_ch_alloc_swqe_wrid(ch, SRPT_SWQE_TYPE_RESP,
1392 	    (void *)iu);
1393 	if (wr.wr_id == 0) {
1394 		SRPT_DPRINTF_L2("ch_post_send, queue full");
1395 		return (IBT_FAILURE);
1396 	}
1397 
1398 	atomic_inc_32(&iu->iu_sq_posted_cnt);
1399 
1400 	wr.wr_flags = IBT_WR_SEND_SIGNAL;
1401 	if (fence == SRPT_FENCE_SEND) {
1402 		wr.wr_flags |= IBT_WR_SEND_FENCE;
1403 	}
1404 	wr.wr_opcode = IBT_WRC_SEND;
1405 	wr.wr_trans  = IBT_RC_SRV;
1406 	wr.wr_nds = 1;
1407 	wr.wr_sgl = &ds;
1408 
1409 	ds.ds_va = iu->iu_sge.ds_va;
1410 	ds.ds_key = iu->iu_sge.ds_key;
1411 	ds.ds_len = len;
1412 
1413 	SRPT_DPRINTF_L4("ch_post_send, posting SRP response to channel"
1414 	    " ds.ds_va (0x%16llx), ds.ds_key (0x%08x), "
1415 	    " ds.ds_len (%d)",
1416 	    (u_longlong_t)ds.ds_va, ds.ds_key, ds.ds_len);
1417 
1418 	status = ibt_post_send(ch->ch_chan_hdl, &wr, 1, &posted);
1419 	if (status != IBT_SUCCESS) {
1420 		SRPT_DPRINTF_L2("ch_post_send, post_send failed (%d)",
1421 		    status);
1422 		atomic_dec_32(&iu->iu_sq_posted_cnt);
1423 		srpt_ch_free_swqe_wrid(ch, wr.wr_id);
1424 		return (status);
1425 	}
1426 
1427 	return (IBT_SUCCESS);
1428 }
1429 
1430 /*
1431  * srpt_ch_alloc_swqe_wrid()
1432  */
1433 ibt_wrid_t
1434 srpt_ch_alloc_swqe_wrid(srpt_channel_t *ch,
1435 	srpt_swqe_type_t wqe_type, void *addr)
1436 {
1437 	ibt_wrid_t	wrid;
1438 
1439 	mutex_enter(&ch->ch_swqe_lock);
1440 	if (ch->ch_head == ch->ch_tail) {
1441 		mutex_exit(&ch->ch_swqe_lock);
1442 		return ((ibt_wrid_t)0);
1443 	}
1444 	wrid = (ibt_wrid_t)ch->ch_head;
1445 	ch->ch_swqe[ch->ch_head].sw_type = wqe_type;
1446 	ch->ch_swqe[ch->ch_head].sw_addr = addr;
1447 	ch->ch_head = ch->ch_swqe[ch->ch_head].sw_next;
1448 	ch->ch_swqe_posted++;
1449 	mutex_exit(&ch->ch_swqe_lock);
1450 	return (wrid);
1451 }
1452 
1453 /*
1454  * srpt_ch_free_swqe_wrid()
1455  */
1456 void
1457 srpt_ch_free_swqe_wrid(srpt_channel_t *ch, ibt_wrid_t id)
1458 {
1459 	mutex_enter(&ch->ch_swqe_lock);
1460 	ch->ch_swqe[ch->ch_tail].sw_next = id;
1461 	ch->ch_tail = (uint32_t)id;
1462 	ch->ch_swqe_posted--;
1463 	mutex_exit(&ch->ch_swqe_lock);
1464 }
1465