xref: /illumos-gate/usr/src/uts/common/io/comstar/port/srpt/srpt_ioc.c (revision 581cede61ac9c14d8d4ea452562a567189eead78)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * I/O Controller functions for the Solaris COMSTAR SCSI RDMA Protocol
29  * Target (SRPT) port provider.
30  */
31 
32 #include <sys/types.h>
33 #include <sys/ddi.h>
34 #include <sys/types.h>
35 #include <sys/sunddi.h>
36 #include <sys/atomic.h>
37 #include <sys/sysmacros.h>
38 #include <sys/ib/ibtl/ibti.h>
39 #include <sys/sdt.h>
40 
41 #include "srp.h"
42 #include "srpt_impl.h"
43 #include "srpt_ioc.h"
44 #include "srpt_stp.h"
45 #include "srpt_ch.h"
46 
47 /*
48  * srpt_ioc_srq_size - Tunable parameter that specifies the number
49  * of receive WQ entries that can be posted to the IOC shared
50  * receive queue.
51  */
52 uint32_t	srpt_ioc_srq_size = SRPT_DEFAULT_IOC_SRQ_SIZE;
53 extern uint16_t srpt_send_msg_depth;
54 
55 /* IOC profile capabilities mask must be big-endian */
56 typedef struct srpt_ioc_opcap_bits_s {
57 #if	defined(_BIT_FIELDS_LTOH)
58 	uint8_t		af:1,
59 			at:1,
60 			wf:1,
61 			wt:1,
62 			rf:1,
63 			rt:1,
64 			sf:1,
65 			st:1;
66 #elif	defined(_BIT_FIELDS_HTOL)
67 	uint8_t		st:1,
68 			sf:1,
69 			rt:1,
70 			rf:1,
71 			wt:1,
72 			wf:1,
73 			at:1,
74 			af:1;
75 #else
76 #error	One of _BIT_FIELDS_LTOH or _BIT_FIELDS_HTOL must be defined
77 #endif
78 } srpt_ioc_opcap_bits_t;
79 
80 typedef union {
81 	srpt_ioc_opcap_bits_t	bits;
82 	uint8_t			mask;
83 } srpt_ioc_opcap_mask_t;
84 
85 /*
86  * vmem arena variables - values derived from iSER
87  */
88 #define	SRPT_MR_QUANTSIZE	0x400			/* 1K */
89 #define	SRPT_MIN_CHUNKSIZE	0x100000		/* 1MB */
90 
91 /* use less memory on 32-bit kernels as it's much more constrained */
92 #ifdef _LP64
93 #define	SRPT_BUF_MR_CHUNKSIZE	0x1000000		/* 16MB */
94 #define	SRPT_BUF_POOL_MAX	0x40000000		/* 1GB */
95 #else
96 #define	SRPT_BUF_MR_CHUNKSIZE	0x400000		/* 4MB */
97 #define	SRPT_BUF_POOL_MAX	0x4000000		/* 64MB */
98 #endif
99 
100 static ibt_mr_flags_t	srpt_dbuf_mr_flags =
101     IBT_MR_ENABLE_LOCAL_WRITE | IBT_MR_ENABLE_REMOTE_WRITE |
102     IBT_MR_ENABLE_REMOTE_READ;
103 
104 void srpt_ioc_ib_async_hdlr(void *clnt, ibt_hca_hdl_t hdl,
105 	ibt_async_code_t code, ibt_async_event_t *event);
106 
107 static struct ibt_clnt_modinfo_s srpt_ibt_modinfo = {
108 	IBTI_V_CURR,
109 	IBT_STORAGE_DEV,
110 	srpt_ioc_ib_async_hdlr,
111 	NULL,
112 	"srpt"
113 };
114 
115 static srpt_ioc_t *srpt_ioc_init(ib_guid_t guid);
116 static void srpt_ioc_fini(srpt_ioc_t *ioc);
117 
118 static srpt_vmem_pool_t *srpt_vmem_create(const char *name, srpt_ioc_t *ioc,
119     ib_memlen_t chunksize, uint64_t maxsize, ibt_mr_flags_t flags);
120 static void *srpt_vmem_alloc(srpt_vmem_pool_t *vm_pool, size_t size);
121 static int srpt_vmem_mr_compare(const void *a, const void *b);
122 static srpt_mr_t *srpt_vmem_chunk_alloc(srpt_vmem_pool_t *ioc,
123     ib_memlen_t chunksize);
124 static void srpt_vmem_destroy(srpt_vmem_pool_t *vm_pool);
125 static void srpt_vmem_free(srpt_vmem_pool_t *vm_pool, void *vaddr, size_t size);
126 static srpt_mr_t *srpt_reg_mem(srpt_vmem_pool_t *vm_pool, ib_vaddr_t vaddr,
127     ib_memlen_t len);
128 static void srpt_vmem_chunk_free(srpt_vmem_pool_t *vm_pool, srpt_mr_t *mr);
129 static void srpt_dereg_mem(srpt_ioc_t *ioc, srpt_mr_t *mr);
130 static int srpt_vmem_mr(srpt_vmem_pool_t *vm_pool, void *vaddr, size_t size,
131     srpt_mr_t *mr);
132 
133 /*
134  * srpt_ioc_attach() - I/O Controller attach
135  *
136  * Attach to IBTF and initialize I/O controllers. The srpt_ctxt->sc_rwlock
137  * should be held outside of this call.
138  */
139 int
140 srpt_ioc_attach()
141 {
142 	int		status;
143 	int		hca_cnt;
144 	int		hca_ndx;
145 	ib_guid_t	*guid;
146 	srpt_ioc_t	*ioc;
147 
148 	ASSERT(srpt_ctxt != NULL);
149 
150 	/*
151 	 * Attach to IBTF and initialize a list of IB devices.  Each
152 	 * HCA will be represented by an I/O Controller.
153 	 */
154 	status = ibt_attach(&srpt_ibt_modinfo, srpt_ctxt->sc_dip,
155 	    srpt_ctxt,  &srpt_ctxt->sc_ibt_hdl);
156 	if (status != DDI_SUCCESS) {
157 		SRPT_DPRINTF_L1("ioc_attach, ibt_attach failed (0x%x)",
158 		    status);
159 		return (DDI_FAILURE);
160 	}
161 
162 	hca_cnt = ibt_get_hca_list(&guid);
163 	if (hca_cnt < 1) {
164 		SRPT_DPRINTF_L2("ioc_attach, no HCA found");
165 		ibt_detach(srpt_ctxt->sc_ibt_hdl);
166 		srpt_ctxt->sc_ibt_hdl = NULL;
167 		return (DDI_FAILURE);
168 	}
169 
170 	list_create(&srpt_ctxt->sc_ioc_list, sizeof (srpt_ioc_t),
171 	    offsetof(srpt_ioc_t, ioc_node));
172 
173 	for (hca_ndx = 0; hca_ndx < hca_cnt; hca_ndx++) {
174 		SRPT_DPRINTF_L2("ioc_attach, adding I/O"
175 		    " Controller (%016llx)", (u_longlong_t)guid[hca_ndx]);
176 
177 		ioc = srpt_ioc_init(guid[hca_ndx]);
178 		if (ioc == NULL) {
179 			SRPT_DPRINTF_L1("ioc_attach, ioc_init GUID(%016llx)"
180 			    " failed", (u_longlong_t)guid[hca_ndx]);
181 			continue;
182 		}
183 		list_insert_tail(&srpt_ctxt->sc_ioc_list, ioc);
184 		SRPT_DPRINTF_L2("ioc_attach, I/O Controller ibt HCA hdl (%p)",
185 		    (void *)ioc->ioc_ibt_hdl);
186 		srpt_ctxt->sc_num_iocs++;
187 	}
188 
189 	ibt_free_hca_list(guid, hca_cnt);
190 	SRPT_DPRINTF_L3("ioc_attach, added %d I/O Controller(s)",
191 	    srpt_ctxt->sc_num_iocs);
192 	return (DDI_SUCCESS);
193 }
194 
195 /*
196  * srpt_ioc_detach() - I/O Controller detach
197  *
198  * srpt_ctxt->sc_rwlock should be held outside of this call.
199  */
200 void
201 srpt_ioc_detach()
202 {
203 	srpt_ioc_t	*ioc;
204 
205 	ASSERT(srpt_ctxt != NULL);
206 
207 	while ((ioc = list_head(&srpt_ctxt->sc_ioc_list)) != NULL) {
208 		list_remove(&srpt_ctxt->sc_ioc_list, ioc);
209 		SRPT_DPRINTF_L2("ioc_detach, removing I/O Controller(%p)"
210 		    " (%016llx), ibt_hdl(%p)",
211 		    (void *)ioc,
212 		    ioc ? (u_longlong_t)ioc->ioc_guid : 0x0ll,
213 		    (void *)ioc->ioc_ibt_hdl);
214 		srpt_ioc_fini(ioc);
215 	}
216 
217 	list_destroy(&srpt_ctxt->sc_ioc_list);
218 
219 	ibt_detach(srpt_ctxt->sc_ibt_hdl);
220 	srpt_ctxt->sc_ibt_hdl = NULL;
221 }
222 
223 /*
224  * srpt_ioc_init() - I/O Controller initialization
225  *
226  * Requires srpt_ctxt->rw_lock be held outside of call.
227  */
228 static srpt_ioc_t *
229 srpt_ioc_init(ib_guid_t guid)
230 {
231 	ibt_status_t		status;
232 	srpt_ioc_t		*ioc;
233 	ibt_hca_attr_t		hca_attr;
234 	uint_t			iu_ndx;
235 	uint_t			err_ndx;
236 	ibt_mr_attr_t		mr_attr;
237 	ibt_mr_desc_t		mr_desc;
238 	srpt_iu_t		*iu;
239 	ibt_srq_sizes_t		srq_attr;
240 	char			namebuf[32];
241 	size_t			iu_offset;
242 
243 	status = ibt_query_hca_byguid(guid, &hca_attr);
244 	if (status != IBT_SUCCESS) {
245 		SRPT_DPRINTF_L1("ioc_init, HCA query error (%d)",
246 		    status);
247 		return (NULL);
248 	}
249 
250 	ioc = srpt_ioc_get_locked(guid);
251 	if (ioc != NULL) {
252 		SRPT_DPRINTF_L1("ioc_init, HCA already exists");
253 		return (NULL);
254 	}
255 
256 	ioc = kmem_zalloc(sizeof (srpt_ioc_t), KM_SLEEP);
257 
258 	rw_init(&ioc->ioc_rwlock, NULL, RW_DRIVER, NULL);
259 	rw_enter(&ioc->ioc_rwlock, RW_WRITER);
260 
261 	bcopy(&hca_attr, &ioc->ioc_attr, sizeof (ibt_hca_attr_t));
262 
263 	SRPT_DPRINTF_L2("ioc_init, HCA max mr=%d, mrlen=%lld",
264 	    hca_attr.hca_max_memr, (u_longlong_t)hca_attr.hca_max_memr_len);
265 	ioc->ioc_guid   = guid;
266 
267 	status = ibt_open_hca(srpt_ctxt->sc_ibt_hdl, guid, &ioc->ioc_ibt_hdl);
268 	if (status != IBT_SUCCESS) {
269 		SRPT_DPRINTF_L1("ioc_init, IBT open failed (%d)", status);
270 		goto hca_open_err;
271 	}
272 
273 	status = ibt_alloc_pd(ioc->ioc_ibt_hdl, IBT_PD_NO_FLAGS,
274 	    &ioc->ioc_pd_hdl);
275 	if (status != IBT_SUCCESS) {
276 		SRPT_DPRINTF_L1("ioc_init, IBT create PD failed (%d)", status);
277 		goto pd_alloc_err;
278 	}
279 
280 	/*
281 	 * We require hardware support for SRQs.  We use a common SRQ to
282 	 * reduce channel memory consumption.
283 	 */
284 	if ((ioc->ioc_attr.hca_flags & IBT_HCA_SRQ) == 0) {
285 		SRPT_DPRINTF_L0("ioc_init, no SRQ capability, not supported");
286 		goto srq_alloc_err;
287 	}
288 
289 	SRPT_DPRINTF_L3("ioc_init, Using shared receive queues, max srq work"
290 	    " queue size(%d), def size = %d", ioc->ioc_attr.hca_max_srqs_sz,
291 	    srpt_ioc_srq_size);
292 	srq_attr.srq_wr_sz = min(srpt_ioc_srq_size,
293 	    ioc->ioc_attr.hca_max_srqs_sz);
294 	srq_attr.srq_sgl_sz = 1;
295 
296 	status = ibt_alloc_srq(ioc->ioc_ibt_hdl, IBT_SRQ_NO_FLAGS,
297 	    ioc->ioc_pd_hdl, &srq_attr, &ioc->ioc_srq_hdl,
298 	    &ioc->ioc_srq_attr);
299 	if (status != IBT_SUCCESS) {
300 		SRPT_DPRINTF_L1("ioc_init, IBT create SRQ failed(%d)", status);
301 		goto srq_alloc_err;
302 	}
303 
304 	SRPT_DPRINTF_L2("ioc_init, SRQ WR size(%d), SG size(%d)",
305 	    ioc->ioc_srq_attr.srq_wr_sz, ioc->ioc_srq_attr.srq_sgl_sz);
306 
307 	ibt_set_srq_private(ioc->ioc_srq_hdl, ioc);
308 
309 	/*
310 	 * Allocate a pool of SRP IU message buffers and post them to
311 	 * the I/O Controller SRQ.  We let the SRQ manage the free IU
312 	 * messages.
313 	 */
314 	ioc->ioc_num_iu_entries =
315 	    min(srq_attr.srq_wr_sz, srpt_ioc_srq_size) - 1;
316 
317 	ioc->ioc_iu_pool = kmem_zalloc(sizeof (srpt_iu_t) *
318 	    ioc->ioc_num_iu_entries, KM_SLEEP);
319 
320 	ioc->ioc_iu_bufs = kmem_alloc(SRPT_DEFAULT_SEND_MSG_SIZE *
321 	    ioc->ioc_num_iu_entries, KM_SLEEP);
322 
323 	if ((ioc->ioc_iu_pool == NULL) || (ioc->ioc_iu_bufs == NULL)) {
324 		SRPT_DPRINTF_L1("ioc_init, failed to allocate SRQ IUs");
325 		goto srq_iu_alloc_err;
326 	}
327 
328 	mr_attr.mr_vaddr = (ib_vaddr_t)(uintptr_t)ioc->ioc_iu_bufs;
329 	mr_attr.mr_len   = SRPT_DEFAULT_SEND_MSG_SIZE * ioc->ioc_num_iu_entries;
330 	mr_attr.mr_as    = NULL;
331 	mr_attr.mr_flags = IBT_MR_ENABLE_LOCAL_WRITE;
332 
333 	status = ibt_register_mr(ioc->ioc_ibt_hdl, ioc->ioc_pd_hdl,
334 	    &mr_attr, &ioc->ioc_iu_mr_hdl, &mr_desc);
335 	if (status != IBT_SUCCESS) {
336 		SRPT_DPRINTF_L1("ioc_init, IU buffer pool MR err(%d)",
337 		    status);
338 		goto srq_iu_alloc_err;
339 	}
340 
341 	for (iu_ndx = 0, iu = ioc->ioc_iu_pool; iu_ndx <
342 	    ioc->ioc_num_iu_entries; iu_ndx++, iu++) {
343 
344 		iu_offset = (iu_ndx * SRPT_DEFAULT_SEND_MSG_SIZE);
345 		iu->iu_buf = (void *)((uintptr_t)ioc->ioc_iu_bufs + iu_offset);
346 
347 		mutex_init(&iu->iu_lock, NULL, MUTEX_DRIVER, NULL);
348 
349 		iu->iu_sge.ds_va  = mr_desc.md_vaddr + iu_offset;
350 		iu->iu_sge.ds_key = mr_desc.md_lkey;
351 		iu->iu_sge.ds_len = SRPT_DEFAULT_SEND_MSG_SIZE;
352 		iu->iu_ioc	  = ioc;
353 		iu->iu_pool_ndx   = iu_ndx;
354 
355 		status = srpt_ioc_post_recv_iu(ioc, &ioc->ioc_iu_pool[iu_ndx]);
356 		if (status != IBT_SUCCESS) {
357 			SRPT_DPRINTF_L1("ioc_init, SRQ IU post err(%d)",
358 			    status);
359 			goto srq_iu_post_err;
360 		}
361 	}
362 
363 	/*
364 	 * Initialize the dbuf vmem arena
365 	 */
366 	(void) snprintf(namebuf, sizeof (namebuf),
367 	    "srpt_buf_pool_%16llX", (u_longlong_t)guid);
368 	ioc->ioc_dbuf_pool = srpt_vmem_create(namebuf, ioc,
369 	    SRPT_BUF_MR_CHUNKSIZE, SRPT_BUF_POOL_MAX, srpt_dbuf_mr_flags);
370 
371 	if (ioc->ioc_dbuf_pool == NULL) {
372 		goto stmf_db_alloc_err;
373 	}
374 
375 	/*
376 	 * Allocate the I/O Controller STMF data buffer allocator.  The
377 	 * data store will span all targets associated with this IOC.
378 	 */
379 	ioc->ioc_stmf_ds = stmf_alloc(STMF_STRUCT_DBUF_STORE, 0, 0);
380 	if (ioc->ioc_stmf_ds == NULL) {
381 		SRPT_DPRINTF_L1("ioc_attach, STMF DBUF alloc failure for IOC");
382 		goto stmf_db_alloc_err;
383 	}
384 	ioc->ioc_stmf_ds->ds_alloc_data_buf = &srpt_ioc_ds_alloc_dbuf;
385 	ioc->ioc_stmf_ds->ds_free_data_buf  = &srpt_ioc_ds_free_dbuf;
386 	ioc->ioc_stmf_ds->ds_port_private   = ioc;
387 
388 	rw_exit(&ioc->ioc_rwlock);
389 	return (ioc);
390 
391 stmf_db_alloc_err:
392 	if (ioc->ioc_dbuf_pool != NULL) {
393 		srpt_vmem_destroy(ioc->ioc_dbuf_pool);
394 	}
395 
396 srq_iu_post_err:
397 	if (ioc->ioc_iu_mr_hdl != NULL) {
398 		status = ibt_deregister_mr(ioc->ioc_ibt_hdl,
399 		    ioc->ioc_iu_mr_hdl);
400 		if (status != IBT_SUCCESS) {
401 			SRPT_DPRINTF_L1("ioc_init, error deregistering"
402 			    " memory region (%d)", status);
403 		}
404 	}
405 	for (err_ndx = 0, iu = ioc->ioc_iu_pool; err_ndx < iu_ndx;
406 	    err_ndx++, iu++) {
407 		mutex_destroy(&iu->iu_lock);
408 	}
409 
410 srq_iu_alloc_err:
411 	if (ioc->ioc_iu_bufs != NULL) {
412 		kmem_free(ioc->ioc_iu_bufs, SRPT_DEFAULT_SEND_MSG_SIZE *
413 		    ioc->ioc_num_iu_entries);
414 	}
415 	if (ioc->ioc_iu_pool != NULL) {
416 		kmem_free(ioc->ioc_iu_pool,
417 		    sizeof (srpt_iu_t) * ioc->ioc_num_iu_entries);
418 	}
419 	if (ioc->ioc_srq_hdl != NULL) {
420 		status = ibt_free_srq(ioc->ioc_srq_hdl);
421 		if (status != IBT_SUCCESS) {
422 			SRPT_DPRINTF_L1("ioc_init, error freeing SRQ (%d)",
423 			    status);
424 		}
425 
426 	}
427 
428 srq_alloc_err:
429 	status = ibt_free_pd(ioc->ioc_ibt_hdl, ioc->ioc_pd_hdl);
430 	if (status != IBT_SUCCESS) {
431 		SRPT_DPRINTF_L1("ioc_init, free PD error (%d)", status);
432 	}
433 
434 pd_alloc_err:
435 	status = ibt_close_hca(ioc->ioc_ibt_hdl);
436 	if (status != IBT_SUCCESS) {
437 		SRPT_DPRINTF_L1("ioc_init, close ioc error (%d)", status);
438 	}
439 
440 hca_open_err:
441 	rw_exit(&ioc->ioc_rwlock);
442 	rw_destroy(&ioc->ioc_rwlock);
443 	kmem_free(ioc, sizeof (*ioc));
444 	return (NULL);
445 }
446 
447 /*
448  * srpt_ioc_fini() - I/O Controller Cleanup
449  *
450  * Requires srpt_ctxt->sc_rwlock be held outside of call.
451  */
452 static void
453 srpt_ioc_fini(srpt_ioc_t *ioc)
454 {
455 	int		status;
456 	int		ndx;
457 
458 	/*
459 	 * Note driver flows will have already taken all SRP
460 	 * services running on the I/O Controller off-line.
461 	 */
462 	rw_enter(&ioc->ioc_rwlock, RW_WRITER);
463 	if (ioc->ioc_ibt_hdl != NULL) {
464 		if (ioc->ioc_stmf_ds != NULL) {
465 			stmf_free(ioc->ioc_stmf_ds);
466 		}
467 
468 		if (ioc->ioc_srq_hdl != NULL) {
469 			SRPT_DPRINTF_L4("ioc_fini, freeing SRQ");
470 			status = ibt_free_srq(ioc->ioc_srq_hdl);
471 			if (status != IBT_SUCCESS) {
472 				SRPT_DPRINTF_L1("ioc_fini, free SRQ"
473 				    " error (%d)", status);
474 			}
475 		}
476 
477 		if (ioc->ioc_iu_mr_hdl != NULL) {
478 			status = ibt_deregister_mr(
479 			    ioc->ioc_ibt_hdl, ioc->ioc_iu_mr_hdl);
480 			if (status != IBT_SUCCESS) {
481 				SRPT_DPRINTF_L1("ioc_fini, error deregistering"
482 				    " memory region (%d)", status);
483 			}
484 		}
485 
486 		if (ioc->ioc_iu_bufs != NULL) {
487 			kmem_free(ioc->ioc_iu_bufs, SRPT_DEFAULT_SEND_MSG_SIZE *
488 			    ioc->ioc_num_iu_entries);
489 		}
490 
491 		if (ioc->ioc_iu_pool != NULL) {
492 			SRPT_DPRINTF_L4("ioc_fini, freeing IU entries");
493 			for (ndx = 0; ndx < ioc->ioc_num_iu_entries; ndx++) {
494 				mutex_destroy(&ioc->ioc_iu_pool[ndx].iu_lock);
495 			}
496 
497 			SRPT_DPRINTF_L4("ioc_fini, free IU pool struct");
498 			kmem_free(ioc->ioc_iu_pool,
499 			    sizeof (srpt_iu_t) * (ioc->ioc_num_iu_entries));
500 			ioc->ioc_iu_pool = NULL;
501 			ioc->ioc_num_iu_entries = 0;
502 		}
503 
504 		if (ioc->ioc_dbuf_pool != NULL) {
505 			srpt_vmem_destroy(ioc->ioc_dbuf_pool);
506 		}
507 
508 		if (ioc->ioc_pd_hdl != NULL) {
509 			status = ibt_free_pd(ioc->ioc_ibt_hdl,
510 			    ioc->ioc_pd_hdl);
511 			if (status != IBT_SUCCESS) {
512 				SRPT_DPRINTF_L1("ioc_fini, free PD"
513 				    " error (%d)", status);
514 			}
515 		}
516 
517 		status = ibt_close_hca(ioc->ioc_ibt_hdl);
518 		if (status != IBT_SUCCESS) {
519 			SRPT_DPRINTF_L1(
520 			    "ioc_fini, close ioc error (%d)", status);
521 		}
522 	}
523 	rw_exit(&ioc->ioc_rwlock);
524 	rw_destroy(&ioc->ioc_rwlock);
525 	kmem_free(ioc, sizeof (srpt_ioc_t));
526 }
527 
528 /*
529  * srpt_ioc_port_active() - I/O Controller port active
530  */
531 static void
532 srpt_ioc_port_active(ibt_async_event_t *event)
533 {
534 	ibt_status_t		status;
535 	srpt_ioc_t		*ioc;
536 
537 	ASSERT(event != NULL);
538 
539 	SRPT_DPRINTF_L3("ioc_port_active event handler, invoked");
540 
541 	/*
542 	 * Find the HCA in question and if the HCA has completed
543 	 * initialization, and the SRP Target service for the
544 	 * the I/O Controller exists, then bind this port.
545 	 */
546 	ioc = srpt_ioc_get(event->ev_hca_guid);
547 
548 	if (ioc == NULL) {
549 		SRPT_DPRINTF_L2("ioc_port_active, I/O Controller not"
550 		    " active");
551 		return;
552 	}
553 
554 	if (ioc->ioc_tgt_port == NULL) {
555 		SRPT_DPRINTF_L2("ioc_port_active, no I/O Controller target"
556 		    " undefined");
557 		return;
558 	}
559 
560 
561 	/*
562 	 * We take the target lock here to serialize this operation
563 	 * with any STMF initiated target state transitions.  If
564 	 * SRP is off-line then the service handle is NULL.
565 	 */
566 	mutex_enter(&ioc->ioc_tgt_port->tp_lock);
567 
568 	if (ioc->ioc_tgt_port->tp_ibt_svc_hdl != NULL) {
569 		status = srpt_ioc_svc_bind(ioc->ioc_tgt_port, event->ev_port);
570 		if (status != IBT_SUCCESS &&
571 		    status != IBT_HCA_PORT_NOT_ACTIVE) {
572 			SRPT_DPRINTF_L1("ioc_port_active, bind failed (%d)",
573 			    status);
574 		}
575 	}
576 	mutex_exit(&ioc->ioc_tgt_port->tp_lock);
577 }
578 
579 /*
580  * srpt_ioc_port_down()
581  */
582 static void
583 srpt_ioc_port_down(ibt_async_event_t *event)
584 {
585 	srpt_ioc_t		*ioc;
586 	srpt_target_port_t	*tgt;
587 	srpt_channel_t		*ch;
588 	srpt_channel_t		*next_ch;
589 
590 	SRPT_DPRINTF_L3("ioc_port_down event handler, invoked");
591 
592 	/*
593 	 * Find the HCA in question and if the HCA has completed
594 	 * initialization, and the SRP Target service for the
595 	 * the I/O Controller exists, then logout initiators
596 	 * through this port.
597 	 */
598 	ioc = srpt_ioc_get(event->ev_hca_guid);
599 
600 	if (ioc == NULL) {
601 		SRPT_DPRINTF_L2("ioc_port_down, I/O Controller not"
602 		    " active");
603 		return;
604 	}
605 
606 	/*
607 	 * We only have one target now, but we could go through all
608 	 * SCSI target ports if more are added.
609 	 */
610 	tgt = ioc->ioc_tgt_port;
611 	if (tgt == NULL) {
612 		SRPT_DPRINTF_L2("ioc_port_down, no I/O Controller target"
613 		    " undefined");
614 		return;
615 	}
616 	mutex_enter(&tgt->tp_lock);
617 
618 	/*
619 	 * For all channel's logged in through this port, initiate a
620 	 * disconnect.
621 	 */
622 	mutex_enter(&tgt->tp_ch_list_lock);
623 	ch = list_head(&tgt->tp_ch_list);
624 	while (ch != NULL) {
625 		next_ch = list_next(&tgt->tp_ch_list, ch);
626 		if (ch->ch_session && (ch->ch_session->ss_hw_port ==
627 		    event->ev_port)) {
628 			srpt_ch_disconnect(ch);
629 		}
630 		ch = next_ch;
631 	}
632 	mutex_exit(&tgt->tp_ch_list_lock);
633 
634 	mutex_exit(&tgt->tp_lock);
635 }
636 
637 /*
638  * srpt_ioc_ib_async_hdlr - I/O Controller IB asynchronous events
639  */
640 /* ARGSUSED */
641 void
642 srpt_ioc_ib_async_hdlr(void *clnt, ibt_hca_hdl_t hdl,
643 	ibt_async_code_t code, ibt_async_event_t *event)
644 {
645 	srpt_ioc_t		*ioc;
646 	srpt_channel_t		*ch;
647 
648 	switch (code) {
649 	case IBT_EVENT_PORT_UP:
650 		srpt_ioc_port_active(event);
651 		break;
652 
653 	case IBT_ERROR_PORT_DOWN:
654 		srpt_ioc_port_down(event);
655 		break;
656 
657 	case IBT_HCA_ATTACH_EVENT:
658 		rw_enter(&srpt_ctxt->sc_rwlock, RW_WRITER);
659 		ioc = srpt_ioc_init(event->ev_hca_guid);
660 
661 		if (ioc == NULL) {
662 			rw_exit(&srpt_ctxt->sc_rwlock);
663 			SRPT_DPRINTF_L1("ib_async_hdlr, HCA_ATTACH"
664 			    " event failed to initialize HCA (0x%016llx)",
665 			    (u_longlong_t)event->ev_hca_guid);
666 			return;
667 		}
668 		SRPT_DPRINTF_L2("HCA_ATTACH_EVENT: I/O Controller"
669 		    " ibt hdl (%p)",
670 		    (void *)ioc->ioc_ibt_hdl);
671 
672 		rw_enter(&ioc->ioc_rwlock, RW_WRITER);
673 		ioc->ioc_tgt_port = srpt_stp_alloc_port(ioc, ioc->ioc_guid);
674 		if (ioc->ioc_tgt_port == NULL) {
675 			SRPT_DPRINTF_L1("ioc_ib_async_hdlr, alloc SCSI "
676 			    "target port error for HCA (0x%016llx)",
677 			    (u_longlong_t)event->ev_hca_guid);
678 			rw_exit(&ioc->ioc_rwlock);
679 			srpt_ioc_fini(ioc);
680 			rw_exit(&srpt_ctxt->sc_rwlock);
681 			return;
682 		}
683 
684 		/*
685 		 * New HCA added with default SCSI Target Port, SRP service
686 		 * will be started when SCSI Target Port is brought
687 		 * on-line by STMF.
688 		 */
689 		srpt_ctxt->sc_num_iocs++;
690 		list_insert_tail(&srpt_ctxt->sc_ioc_list, ioc);
691 
692 		rw_exit(&ioc->ioc_rwlock);
693 		rw_exit(&srpt_ctxt->sc_rwlock);
694 		break;
695 
696 	case IBT_HCA_DETACH_EVENT:
697 		SRPT_DPRINTF_L1(
698 		    "ioc_iob_async_hdlr, HCA_DETACH_EVENT received.");
699 		break;
700 
701 	case IBT_EVENT_EMPTY_CHAN:
702 		/* Channel in ERROR state is now empty */
703 		ch = (srpt_channel_t *)ibt_get_chan_private(event->ev_chan_hdl);
704 		SRPT_DPRINTF_L3(
705 		    "ioc_iob_async_hdlr, received empty channel error on %p",
706 		    (void *)ch);
707 		break;
708 
709 	default:
710 		SRPT_DPRINTF_L2("ioc_ib_async_hdlr, event not "
711 		    "handled (%d)", code);
712 		break;
713 	}
714 }
715 
716 /*
717  * srpt_ioc_svc_bind()
718  */
719 ibt_status_t
720 srpt_ioc_svc_bind(srpt_target_port_t *tgt, uint_t portnum)
721 {
722 	ibt_status_t		status;
723 	srpt_hw_port_t		*port;
724 	ibt_hca_portinfo_t	*portinfo;
725 	uint_t			qportinfo_sz;
726 	uint_t			qportnum;
727 	ib_gid_t		new_gid;
728 	srpt_ioc_t		*ioc;
729 	srpt_session_t		sess;
730 
731 	ASSERT(tgt != NULL);
732 	ASSERT(tgt->tp_ioc != NULL);
733 	ioc = tgt->tp_ioc;
734 
735 	if (tgt->tp_ibt_svc_hdl == NULL) {
736 		SRPT_DPRINTF_L2("ioc_svc_bind, NULL SCSI target port"
737 		    " service");
738 		return (IBT_INVALID_PARAM);
739 	}
740 
741 	if (portnum == 0 || portnum > tgt->tp_nports) {
742 		SRPT_DPRINTF_L2("ioc_svc_bind, bad port (%d)", portnum);
743 		return (IBT_INVALID_PARAM);
744 	}
745 	status = ibt_query_hca_ports(ioc->ioc_ibt_hdl, portnum,
746 	    &portinfo, &qportnum, &qportinfo_sz);
747 	if (status != IBT_SUCCESS) {
748 		SRPT_DPRINTF_L1("ioc_svc_bind, query port error (%d)",
749 		    portnum);
750 		return (IBT_INVALID_PARAM);
751 	}
752 
753 	ASSERT(portinfo != NULL);
754 
755 	/*
756 	 * If port is not active do nothing, caller should attempt to bind
757 	 * after the port goes active.
758 	 */
759 	if (portinfo->p_linkstate != IBT_PORT_ACTIVE) {
760 		SRPT_DPRINTF_L2("ioc_svc_bind, port %d not in active state",
761 		    portnum);
762 		ibt_free_portinfo(portinfo, qportinfo_sz);
763 		return (IBT_HCA_PORT_NOT_ACTIVE);
764 	}
765 
766 	port    = &tgt->tp_hw_port[portnum-1];
767 	new_gid = portinfo->p_sgid_tbl[0];
768 	ibt_free_portinfo(portinfo, qportinfo_sz);
769 
770 	/*
771 	 * If previously bound and the port GID has changed,
772 	 * rebind to the new GID.
773 	 */
774 	if (port->hwp_bind_hdl != NULL) {
775 		if (new_gid.gid_guid != port->hwp_gid.gid_guid ||
776 		    new_gid.gid_prefix != port->hwp_gid.gid_prefix) {
777 			SRPT_DPRINTF_L2("ioc_svc_bind, unregister current"
778 			    " bind");
779 			ibt_unbind_service(tgt->tp_ibt_svc_hdl,
780 			    port->hwp_bind_hdl);
781 			port->hwp_bind_hdl = NULL;
782 		}
783 	}
784 	SRPT_DPRINTF_L2("ioc_svc_bind, bind service, %016llx:%016llx",
785 	    (u_longlong_t)new_gid.gid_prefix,
786 	    (u_longlong_t)new_gid.gid_guid);
787 
788 	/*
789 	 * Pass SCSI Target Port as CM private data, the target will always
790 	 * exist while this service is bound.
791 	 */
792 	status = ibt_bind_service(tgt->tp_ibt_svc_hdl, new_gid, NULL, tgt,
793 	    &port->hwp_bind_hdl);
794 	if (status != IBT_SUCCESS && status != IBT_CM_SERVICE_EXISTS) {
795 		SRPT_DPRINTF_L1("ioc_svc_bind, bind error (%d)", status);
796 		return (status);
797 	}
798 	port->hwp_gid.gid_prefix = new_gid.gid_prefix;
799 	port->hwp_gid.gid_guid = new_gid.gid_guid;
800 
801 	/* setting up a transient structure for the dtrace probe. */
802 	bzero(&sess, sizeof (srpt_session_t));
803 	ALIAS_STR(sess.ss_t_gid, new_gid.gid_prefix, new_gid.gid_guid);
804 	EUI_STR(sess.ss_t_name, tgt->tp_ibt_svc_id);
805 
806 	DTRACE_SRP_1(service__up, srpt_session_t, &sess);
807 
808 	return (IBT_SUCCESS);
809 }
810 
811 /*
812  * srpt_ioc_svc_unbind()
813  */
814 void
815 srpt_ioc_svc_unbind(srpt_target_port_t *tgt, uint_t portnum)
816 {
817 	srpt_hw_port_t		*port;
818 	srpt_session_t		sess;
819 
820 	if (tgt == NULL) {
821 		SRPT_DPRINTF_L2("ioc_svc_unbind, SCSI target does not exist");
822 		return;
823 	}
824 
825 	if (portnum == 0 || portnum > tgt->tp_nports) {
826 		SRPT_DPRINTF_L2("ioc_svc_unbind, bad port (%d)", portnum);
827 		return;
828 	}
829 	port = &tgt->tp_hw_port[portnum-1];
830 
831 	/* setting up a transient structure for the dtrace probe. */
832 	bzero(&sess, sizeof (srpt_session_t));
833 	ALIAS_STR(sess.ss_t_gid, port->hwp_gid.gid_prefix,
834 	    port->hwp_gid.gid_guid);
835 	EUI_STR(sess.ss_t_name, tgt->tp_ibt_svc_id);
836 
837 	DTRACE_SRP_1(service__down, srpt_session_t, &sess);
838 
839 	if (tgt->tp_ibt_svc_hdl != NULL && port->hwp_bind_hdl != NULL) {
840 		SRPT_DPRINTF_L2("ioc_svc_unbind, unregister current bind");
841 		ibt_unbind_service(tgt->tp_ibt_svc_hdl, port->hwp_bind_hdl);
842 	}
843 	port->hwp_bind_hdl = NULL;
844 	port->hwp_gid.gid_prefix = 0;
845 	port->hwp_gid.gid_guid = 0;
846 }
847 
848 /*
849  * srpt_ioc_svc_unbind_all()
850  */
851 void
852 srpt_ioc_svc_unbind_all(srpt_target_port_t *tgt)
853 {
854 	uint_t		portnum;
855 
856 	if (tgt == NULL) {
857 		SRPT_DPRINTF_L2("ioc_svc_unbind_all, NULL SCSI target port"
858 		    " specified");
859 		return;
860 	}
861 	for (portnum = 1; portnum <= tgt->tp_nports; portnum++) {
862 		srpt_ioc_svc_unbind(tgt, portnum);
863 	}
864 }
865 
866 /*
867  * srpt_ioc_get_locked()
868  *
869  * Requires srpt_ctxt->rw_lock be held outside of call.
870  */
871 srpt_ioc_t *
872 srpt_ioc_get_locked(ib_guid_t guid)
873 {
874 	srpt_ioc_t	*ioc;
875 
876 	ioc = list_head(&srpt_ctxt->sc_ioc_list);
877 	while (ioc != NULL) {
878 		if (ioc->ioc_guid == guid) {
879 			break;
880 		}
881 		ioc = list_next(&srpt_ctxt->sc_ioc_list, ioc);
882 	}
883 	return (ioc);
884 }
885 
886 /*
887  * srpt_ioc_get()
888  */
889 srpt_ioc_t *
890 srpt_ioc_get(ib_guid_t guid)
891 {
892 	srpt_ioc_t	*ioc;
893 
894 	rw_enter(&srpt_ctxt->sc_rwlock, RW_READER);
895 	ioc = srpt_ioc_get_locked(guid);
896 	rw_exit(&srpt_ctxt->sc_rwlock);
897 	return (ioc);
898 }
899 
900 /*
901  * srpt_ioc_post_recv_iu()
902  */
903 ibt_status_t
904 srpt_ioc_post_recv_iu(srpt_ioc_t *ioc, srpt_iu_t *iu)
905 {
906 	ibt_status_t		status;
907 	ibt_recv_wr_t		wr;
908 	uint_t			posted;
909 
910 	ASSERT(ioc != NULL);
911 	ASSERT(iu != NULL);
912 
913 	wr.wr_id  = (ibt_wrid_t)(uintptr_t)iu;
914 	wr.wr_nds = 1;
915 	wr.wr_sgl = &iu->iu_sge;
916 	posted    = 0;
917 
918 	status = ibt_post_srq(ioc->ioc_srq_hdl, &wr, 1, &posted);
919 	if (status != IBT_SUCCESS) {
920 		SRPT_DPRINTF_L2("ioc_post_recv_iu, post error (%d)",
921 		    status);
922 	}
923 	return (status);
924 }
925 
926 /*
927  * srpt_ioc_repost_recv_iu()
928  */
929 void
930 srpt_ioc_repost_recv_iu(srpt_ioc_t *ioc, srpt_iu_t *iu)
931 {
932 	srpt_channel_t		*ch;
933 	ibt_status_t		status;
934 
935 	ASSERT(iu != NULL);
936 	ASSERT(mutex_owned(&iu->iu_lock));
937 
938 	/*
939 	 * Some additional sanity checks while in debug state, all STMF
940 	 * related task activities should be complete prior to returning
941 	 * this IU to the available pool.
942 	 */
943 	ASSERT(iu->iu_stmf_task == NULL);
944 	ASSERT(iu->iu_sq_posted_cnt == 0);
945 
946 	ch = iu->iu_ch;
947 	iu->iu_ch = NULL;
948 	iu->iu_num_rdescs = 0;
949 	iu->iu_rdescs = NULL;
950 	iu->iu_tot_xfer_len = 0;
951 	iu->iu_tag = 0;
952 	iu->iu_flags = 0;
953 	iu->iu_sq_posted_cnt = 0;
954 
955 	status = srpt_ioc_post_recv_iu(ioc, iu);
956 
957 	if (status != IBT_SUCCESS) {
958 		/*
959 		 * Very bad, we should initiate a shutdown of the I/O
960 		 * Controller here, off-lining any targets associated
961 		 * with this I/O Controller (and therefore disconnecting
962 		 * any logins that remain).
963 		 *
964 		 * In practice this should never happen so we put
965 		 * the code near the bottom of the implementation list.
966 		 */
967 		SRPT_DPRINTF_L0("ioc_repost_recv_iu, error RX IU (%d)",
968 		    status);
969 		ASSERT(0);
970 	} else if (ch != NULL) {
971 		atomic_inc_32(&ch->ch_req_lim_delta);
972 	}
973 }
974 
975 /*
976  * srpt_ioc_init_profile()
977  *
978  * SRP I/O Controller serialization lock must be held when this
979  * routine is invoked.
980  */
981 void
982 srpt_ioc_init_profile(srpt_ioc_t *ioc)
983 {
984 	srpt_ioc_opcap_mask_t		capmask = {0};
985 
986 	ASSERT(ioc != NULL);
987 
988 	ioc->ioc_profile.ioc_guid = h2b64(ioc->ioc_guid);
989 	(void) memcpy(ioc->ioc_profile.ioc_id_string,
990 	    "Solaris SRP Target 0.9a", 23);
991 
992 	/*
993 	 * Note vendor ID and subsystem ID are 24 bit values.  Low order
994 	 * 8 bits in vendor ID field is slot and is initialized to zero.
995 	 * Low order 8 bits of subsystem ID is a reserved field and
996 	 * initialized to zero.
997 	 */
998 	ioc->ioc_profile.ioc_vendorid =
999 	    h2b32((uint32_t)(ioc->ioc_attr.hca_vendor_id << 8));
1000 	ioc->ioc_profile.ioc_deviceid =
1001 	    h2b32((uint32_t)ioc->ioc_attr.hca_device_id);
1002 	ioc->ioc_profile.ioc_device_ver =
1003 	    h2b16((uint16_t)ioc->ioc_attr.hca_version_id);
1004 	ioc->ioc_profile.ioc_subsys_vendorid =
1005 	    h2b32((uint32_t)(ioc->ioc_attr.hca_vendor_id << 8));
1006 	ioc->ioc_profile.ioc_subsys_id = h2b32(0);
1007 	ioc->ioc_profile.ioc_io_class = h2b16(SRP_REV_16A_IO_CLASS);
1008 	ioc->ioc_profile.ioc_io_subclass = h2b16(SRP_IO_SUBCLASS);
1009 	ioc->ioc_profile.ioc_protocol = h2b16(SRP_PROTOCOL);
1010 	ioc->ioc_profile.ioc_protocol_ver = h2b16(SRP_PROTOCOL_VERSION);
1011 	ioc->ioc_profile.ioc_send_msg_qdepth = h2b16(srpt_send_msg_depth);
1012 	ioc->ioc_profile.ioc_rdma_read_qdepth =
1013 	    ioc->ioc_attr.hca_max_rdma_out_chan;
1014 	ioc->ioc_profile.ioc_send_msg_sz = h2b32(SRPT_DEFAULT_SEND_MSG_SIZE);
1015 	ioc->ioc_profile.ioc_rdma_xfer_sz = h2b32(SRPT_DEFAULT_MAX_RDMA_SIZE);
1016 
1017 	capmask.bits.st = 1;	/* Messages can be sent to IOC */
1018 	capmask.bits.sf = 1;	/* Messages can be sent from IOC */
1019 	capmask.bits.rf = 1;	/* RDMA Reads can be sent from IOC */
1020 	capmask.bits.wf = 1;	/* RDMA Writes can be sent from IOC */
1021 	ioc->ioc_profile.ioc_ctrl_opcap_mask = capmask.mask;
1022 
1023 	/*
1024 	 * We currently only have one target, but if we had a list we would
1025 	 * go through that list and only count those that are ONLINE when
1026 	 * setting the services count and entries.
1027 	 */
1028 	if (ioc->ioc_tgt_port->tp_srp_enabled) {
1029 		ioc->ioc_profile.ioc_service_entries = 1;
1030 		ioc->ioc_svc.srv_id = h2b64(ioc->ioc_guid);
1031 		(void) snprintf((char *)ioc->ioc_svc.srv_name,
1032 		    IB_DM_MAX_SVC_NAME_LEN, "SRP.T10:%016llx",
1033 		    (u_longlong_t)ioc->ioc_guid);
1034 	} else {
1035 		ioc->ioc_profile.ioc_service_entries = 0;
1036 		ioc->ioc_svc.srv_id = 0;
1037 	}
1038 }
1039 
1040 /*
1041  * srpt_ioc_ds_alloc_dbuf()
1042  */
1043 /* ARGSUSED */
1044 stmf_data_buf_t *
1045 srpt_ioc_ds_alloc_dbuf(struct scsi_task *task, uint32_t size,
1046 	uint32_t *pminsize, uint32_t flags)
1047 {
1048 	srpt_iu_t		*iu;
1049 	srpt_ioc_t		*ioc;
1050 	srpt_ds_dbuf_t		*dbuf;
1051 	stmf_data_buf_t		*stmf_dbuf;
1052 	void			*buf;
1053 	srpt_mr_t		mr;
1054 
1055 	ASSERT(task != NULL);
1056 	iu  = task->task_port_private;
1057 	ioc = iu->iu_ioc;
1058 
1059 	SRPT_DPRINTF_L4("ioc_ds_alloc_dbuf, invoked ioc(%p)"
1060 	    " size(%d), flags(%x)",
1061 	    (void *)ioc, size, flags);
1062 
1063 	buf = srpt_vmem_alloc(ioc->ioc_dbuf_pool, size);
1064 	if (buf == NULL) {
1065 		return (NULL);
1066 	}
1067 
1068 	if (srpt_vmem_mr(ioc->ioc_dbuf_pool, buf, size, &mr) != 0) {
1069 		goto stmf_alloc_err;
1070 	}
1071 
1072 	stmf_dbuf = stmf_alloc(STMF_STRUCT_DATA_BUF, sizeof (srpt_ds_dbuf_t),
1073 	    0);
1074 	if (stmf_dbuf == NULL) {
1075 		SRPT_DPRINTF_L2("ioc_ds_alloc_dbuf, stmf_alloc failed");
1076 		goto stmf_alloc_err;
1077 	}
1078 
1079 	dbuf = stmf_dbuf->db_port_private;
1080 	dbuf->db_stmf_buf = stmf_dbuf;
1081 	dbuf->db_mr_hdl = mr.mr_hdl;
1082 	dbuf->db_ioc = ioc;
1083 	dbuf->db_sge.ds_va = mr.mr_va;
1084 	dbuf->db_sge.ds_key = mr.mr_lkey;
1085 	dbuf->db_sge.ds_len = size;
1086 
1087 	stmf_dbuf->db_buf_size = size;
1088 	stmf_dbuf->db_data_size = size;
1089 	stmf_dbuf->db_relative_offset = 0;
1090 	stmf_dbuf->db_flags = 0;
1091 	stmf_dbuf->db_xfer_status = 0;
1092 	stmf_dbuf->db_sglist_length = 1;
1093 	stmf_dbuf->db_sglist[0].seg_addr = buf;
1094 	stmf_dbuf->db_sglist[0].seg_length = size;
1095 
1096 	return (stmf_dbuf);
1097 
1098 buf_mr_err:
1099 	stmf_free(stmf_dbuf);
1100 
1101 stmf_alloc_err:
1102 	srpt_vmem_free(ioc->ioc_dbuf_pool, buf, size);
1103 
1104 	return (NULL);
1105 }
1106 
1107 void
1108 srpt_ioc_ds_free_dbuf(struct stmf_dbuf_store *ds,
1109 	stmf_data_buf_t *dbuf)
1110 {
1111 	srpt_ioc_t	*ioc;
1112 
1113 	SRPT_DPRINTF_L4("ioc_ds_free_dbuf, invoked buf (%p)",
1114 	    (void *)dbuf);
1115 	ioc = ds->ds_port_private;
1116 
1117 	srpt_vmem_free(ioc->ioc_dbuf_pool, dbuf->db_sglist[0].seg_addr,
1118 	    dbuf->db_buf_size);
1119 	stmf_free(dbuf);
1120 }
1121 
1122 /* Memory arena routines */
1123 
1124 static srpt_vmem_pool_t *
1125 srpt_vmem_create(const char *name, srpt_ioc_t *ioc, ib_memlen_t chunksize,
1126     uint64_t maxsize, ibt_mr_flags_t flags)
1127 {
1128 	srpt_mr_t		*chunk;
1129 	srpt_vmem_pool_t	*result;
1130 
1131 	ASSERT(chunksize <= maxsize);
1132 
1133 	result = kmem_zalloc(sizeof (srpt_vmem_pool_t), KM_SLEEP);
1134 
1135 	result->svp_ioc = ioc;
1136 	result->svp_chunksize = chunksize;
1137 	result->svp_max_size = maxsize;
1138 	result->svp_flags = flags;
1139 
1140 	rw_init(&result->svp_lock, NULL, RW_DRIVER, NULL);
1141 	avl_create(&result->svp_mr_list, srpt_vmem_mr_compare,
1142 	    sizeof (srpt_mr_t), offsetof(srpt_mr_t, mr_avl));
1143 
1144 	chunk = srpt_vmem_chunk_alloc(result, chunksize);
1145 
1146 	avl_add(&result->svp_mr_list, chunk);
1147 	result->svp_total_size = chunksize;
1148 
1149 	result->svp_vmem = vmem_create(name,
1150 	    (void*)(uintptr_t)chunk->mr_va,
1151 	    (size_t)chunk->mr_len, SRPT_MR_QUANTSIZE,
1152 	    NULL, NULL, NULL, 0, VM_SLEEP);
1153 
1154 	return (result);
1155 }
1156 
1157 static void
1158 srpt_vmem_destroy(srpt_vmem_pool_t *vm_pool)
1159 {
1160 	srpt_mr_t		*chunk;
1161 	srpt_mr_t		*next;
1162 
1163 	rw_enter(&vm_pool->svp_lock, RW_WRITER);
1164 	vmem_destroy(vm_pool->svp_vmem);
1165 
1166 	chunk = avl_first(&vm_pool->svp_mr_list);
1167 
1168 	while (chunk != NULL) {
1169 		next = AVL_NEXT(&vm_pool->svp_mr_list, chunk);
1170 		avl_remove(&vm_pool->svp_mr_list, chunk);
1171 		srpt_vmem_chunk_free(vm_pool, chunk);
1172 		chunk = next;
1173 	}
1174 
1175 	avl_destroy(&vm_pool->svp_mr_list);
1176 
1177 	rw_exit(&vm_pool->svp_lock);
1178 	rw_destroy(&vm_pool->svp_lock);
1179 
1180 	kmem_free(vm_pool, sizeof (srpt_vmem_pool_t));
1181 }
1182 
1183 static void *
1184 srpt_vmem_alloc(srpt_vmem_pool_t *vm_pool, size_t size)
1185 {
1186 	void		*result;
1187 	srpt_mr_t	*next;
1188 	ib_memlen_t	chunklen;
1189 
1190 	ASSERT(vm_pool != NULL);
1191 
1192 	result = vmem_alloc(vm_pool->svp_vmem, size,
1193 	    VM_NOSLEEP | VM_FIRSTFIT);
1194 
1195 	if (result != NULL) {
1196 		/* memory successfully allocated */
1197 		return (result);
1198 	}
1199 
1200 	/* need more vmem */
1201 	rw_enter(&vm_pool->svp_lock, RW_WRITER);
1202 	chunklen = vm_pool->svp_chunksize;
1203 
1204 	if (vm_pool->svp_total_size >= vm_pool->svp_max_size) {
1205 		/* no more room to alloc */
1206 		rw_exit(&vm_pool->svp_lock);
1207 		return (NULL);
1208 	}
1209 
1210 	if ((vm_pool->svp_total_size + chunklen) > vm_pool->svp_max_size) {
1211 		chunklen = vm_pool->svp_max_size - vm_pool->svp_total_size;
1212 	}
1213 
1214 	next = srpt_vmem_chunk_alloc(vm_pool, chunklen);
1215 	if (next != NULL) {
1216 		/*
1217 		 * Note that the size of the chunk we got
1218 		 * may not be the size we requested.  Use the
1219 		 * length returned in the chunk itself.
1220 		 */
1221 		if (vmem_add(vm_pool->svp_vmem, (void*)(uintptr_t)next->mr_va,
1222 		    next->mr_len, VM_NOSLEEP) == NULL) {
1223 			srpt_vmem_chunk_free(vm_pool, next);
1224 			SRPT_DPRINTF_L2("vmem_add failed");
1225 		} else {
1226 			vm_pool->svp_total_size += next->mr_len;
1227 			avl_add(&vm_pool->svp_mr_list, next);
1228 		}
1229 	}
1230 
1231 	rw_exit(&vm_pool->svp_lock);
1232 
1233 	result = vmem_alloc(vm_pool->svp_vmem, size, VM_NOSLEEP | VM_FIRSTFIT);
1234 
1235 	return (result);
1236 }
1237 
1238 static void
1239 srpt_vmem_free(srpt_vmem_pool_t *vm_pool, void *vaddr, size_t size)
1240 {
1241 	vmem_free(vm_pool->svp_vmem, vaddr, size);
1242 }
1243 
1244 static int
1245 srpt_vmem_mr(srpt_vmem_pool_t *vm_pool, void *vaddr, size_t size,
1246     srpt_mr_t *mr)
1247 {
1248 	avl_index_t		where;
1249 	ib_vaddr_t		mrva = (ib_vaddr_t)(uintptr_t)vaddr;
1250 	srpt_mr_t		chunk;
1251 	srpt_mr_t		*nearest;
1252 	ib_vaddr_t		chunk_end;
1253 	int			status = DDI_FAILURE;
1254 
1255 	rw_enter(&vm_pool->svp_lock, RW_READER);
1256 
1257 	chunk.mr_va = mrva;
1258 	nearest = avl_find(&vm_pool->svp_mr_list, &chunk, &where);
1259 
1260 	if (nearest == NULL) {
1261 		nearest = avl_nearest(&vm_pool->svp_mr_list, where,
1262 		    AVL_BEFORE);
1263 	}
1264 
1265 	if (nearest != NULL) {
1266 		/* Verify this chunk contains the specified address range */
1267 		ASSERT(nearest->mr_va <= mrva);
1268 
1269 		chunk_end = nearest->mr_va + nearest->mr_len;
1270 		if (chunk_end >= mrva + size) {
1271 			mr->mr_hdl = nearest->mr_hdl;
1272 			mr->mr_va = mrva;
1273 			mr->mr_len = size;
1274 			mr->mr_lkey = nearest->mr_lkey;
1275 			mr->mr_rkey = nearest->mr_rkey;
1276 			status = DDI_SUCCESS;
1277 		}
1278 	}
1279 
1280 	rw_exit(&vm_pool->svp_lock);
1281 	return (status);
1282 }
1283 
1284 static srpt_mr_t *
1285 srpt_vmem_chunk_alloc(srpt_vmem_pool_t *vm_pool, ib_memlen_t chunksize)
1286 {
1287 	void			*chunk = NULL;
1288 	srpt_mr_t		*result = NULL;
1289 
1290 	while ((chunk == NULL) && (chunksize >= SRPT_MIN_CHUNKSIZE)) {
1291 		chunk = kmem_alloc(chunksize, KM_NOSLEEP);
1292 		if (chunk == NULL) {
1293 			SRPT_DPRINTF_L2("srpt_vmem_chunk_alloc: "
1294 			    "failed to alloc chunk of %d, trying %d",
1295 			    (int)chunksize, (int)chunksize/2);
1296 			chunksize /= 2;
1297 		}
1298 	}
1299 
1300 	if (chunk != NULL) {
1301 		result = srpt_reg_mem(vm_pool, (ib_vaddr_t)(uintptr_t)chunk,
1302 		    chunksize);
1303 		if (result == NULL) {
1304 			SRPT_DPRINTF_L2("srpt_vmem_chunk_alloc: "
1305 			    "chunk registration failed");
1306 			kmem_free(chunk, chunksize);
1307 		}
1308 	}
1309 
1310 	return (result);
1311 }
1312 
1313 static void
1314 srpt_vmem_chunk_free(srpt_vmem_pool_t *vm_pool, srpt_mr_t *mr)
1315 {
1316 	void			*chunk = (void *)(uintptr_t)mr->mr_va;
1317 	ib_memlen_t		chunksize = mr->mr_len;
1318 
1319 	srpt_dereg_mem(vm_pool->svp_ioc, mr);
1320 	kmem_free(chunk, chunksize);
1321 }
1322 
1323 static srpt_mr_t *
1324 srpt_reg_mem(srpt_vmem_pool_t *vm_pool, ib_vaddr_t vaddr, ib_memlen_t len)
1325 {
1326 	srpt_mr_t		*result = NULL;
1327 	ibt_mr_attr_t		mr_attr;
1328 	ibt_mr_desc_t		mr_desc;
1329 	ibt_status_t		status;
1330 	srpt_ioc_t		*ioc = vm_pool->svp_ioc;
1331 
1332 	result = kmem_zalloc(sizeof (srpt_mr_t), KM_NOSLEEP);
1333 	if (result == NULL) {
1334 		SRPT_DPRINTF_L2("srpt_reg_mem: failed to allocate");
1335 		return (NULL);
1336 	}
1337 
1338 	bzero(&mr_attr, sizeof (ibt_mr_attr_t));
1339 	bzero(&mr_desc, sizeof (ibt_mr_desc_t));
1340 
1341 	mr_attr.mr_vaddr = vaddr;
1342 	mr_attr.mr_len = len;
1343 	mr_attr.mr_as = NULL;
1344 	mr_attr.mr_flags = vm_pool->svp_flags;
1345 
1346 	status = ibt_register_mr(ioc->ioc_ibt_hdl, ioc->ioc_pd_hdl,
1347 	    &mr_attr, &result->mr_hdl, &mr_desc);
1348 	if (status != IBT_SUCCESS) {
1349 		SRPT_DPRINTF_L2("srpt_reg_mem: ibt_register_mr "
1350 		    "failed %d", status);
1351 		kmem_free(result, sizeof (srpt_mr_t));
1352 		return (NULL);
1353 	}
1354 
1355 	result->mr_va = mr_attr.mr_vaddr;
1356 	result->mr_len = mr_attr.mr_len;
1357 	result->mr_lkey = mr_desc.md_lkey;
1358 	result->mr_rkey = mr_desc.md_rkey;
1359 
1360 	return (result);
1361 }
1362 
1363 static void
1364 srpt_dereg_mem(srpt_ioc_t *ioc, srpt_mr_t *mr)
1365 {
1366 	ibt_status_t		status;
1367 
1368 	status = ibt_deregister_mr(ioc->ioc_ibt_hdl, mr->mr_hdl);
1369 	if (status != IBT_SUCCESS) {
1370 		SRPT_DPRINTF_L1("ioc_fini, error deregistering MR (%d)",
1371 		    status);
1372 	}
1373 	kmem_free(mr, sizeof (srpt_mr_t));
1374 }
1375 
1376 static int
1377 srpt_vmem_mr_compare(const void *a, const void *b)
1378 {
1379 	srpt_mr_t		*mr1 = (srpt_mr_t *)a;
1380 	srpt_mr_t		*mr2 = (srpt_mr_t *)b;
1381 
1382 	/* sort and match by virtual address */
1383 	if (mr1->mr_va < mr2->mr_va) {
1384 		return (-1);
1385 	} else if (mr1->mr_va > mr2->mr_va) {
1386 		return (1);
1387 	}
1388 
1389 	return (0);
1390 }
1391