1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * I/O Controller functions for the Solaris COMSTAR SCSI RDMA Protocol 29 * Target (SRPT) port provider. 30 */ 31 32 #include <sys/types.h> 33 #include <sys/ddi.h> 34 #include <sys/types.h> 35 #include <sys/sunddi.h> 36 #include <sys/atomic.h> 37 #include <sys/sysmacros.h> 38 #include <sys/ib/ibtl/ibti.h> 39 #include <sys/sdt.h> 40 41 #include "srp.h" 42 #include "srpt_impl.h" 43 #include "srpt_ioc.h" 44 #include "srpt_stp.h" 45 #include "srpt_ch.h" 46 47 /* 48 * srpt_ioc_srq_size - Tunable parameter that specifies the number 49 * of receive WQ entries that can be posted to the IOC shared 50 * receive queue. 51 */ 52 uint32_t srpt_ioc_srq_size = SRPT_DEFAULT_IOC_SRQ_SIZE; 53 extern uint16_t srpt_send_msg_depth; 54 55 /* IOC profile capabilities mask must be big-endian */ 56 typedef struct srpt_ioc_opcap_bits_s { 57 #if defined(_BIT_FIELDS_LTOH) 58 uint8_t af:1, 59 at:1, 60 wf:1, 61 wt:1, 62 rf:1, 63 rt:1, 64 sf:1, 65 st:1; 66 #elif defined(_BIT_FIELDS_HTOL) 67 uint8_t st:1, 68 sf:1, 69 rt:1, 70 rf:1, 71 wt:1, 72 wf:1, 73 at:1, 74 af:1; 75 #else 76 #error One of _BIT_FIELDS_LTOH or _BIT_FIELDS_HTOL must be defined 77 #endif 78 } srpt_ioc_opcap_bits_t; 79 80 typedef union { 81 srpt_ioc_opcap_bits_t bits; 82 uint8_t mask; 83 } srpt_ioc_opcap_mask_t; 84 85 /* 86 * vmem arena variables - values derived from iSER 87 */ 88 #define SRPT_MR_QUANTSIZE 0x400 /* 1K */ 89 #define SRPT_MIN_CHUNKSIZE 0x100000 /* 1MB */ 90 91 /* use less memory on 32-bit kernels as it's much more constrained */ 92 #ifdef _LP64 93 #define SRPT_BUF_MR_CHUNKSIZE 0x1000000 /* 16MB */ 94 #define SRPT_BUF_POOL_MAX 0x40000000 /* 1GB */ 95 #else 96 #define SRPT_BUF_MR_CHUNKSIZE 0x400000 /* 4MB */ 97 #define SRPT_BUF_POOL_MAX 0x4000000 /* 64MB */ 98 #endif 99 100 static ibt_mr_flags_t srpt_dbuf_mr_flags = 101 IBT_MR_ENABLE_LOCAL_WRITE | IBT_MR_ENABLE_REMOTE_WRITE | 102 IBT_MR_ENABLE_REMOTE_READ; 103 104 void srpt_ioc_ib_async_hdlr(void *clnt, ibt_hca_hdl_t hdl, 105 ibt_async_code_t code, ibt_async_event_t *event); 106 107 static struct ibt_clnt_modinfo_s srpt_ibt_modinfo = { 108 IBTI_V_CURR, 109 IBT_STORAGE_DEV, 110 srpt_ioc_ib_async_hdlr, 111 NULL, 112 "srpt" 113 }; 114 115 static srpt_ioc_t *srpt_ioc_init(ib_guid_t guid); 116 static void srpt_ioc_fini(srpt_ioc_t *ioc); 117 118 static srpt_vmem_pool_t *srpt_vmem_create(const char *name, srpt_ioc_t *ioc, 119 ib_memlen_t chunksize, uint64_t maxsize, ibt_mr_flags_t flags); 120 static void *srpt_vmem_alloc(srpt_vmem_pool_t *vm_pool, size_t size); 121 static int srpt_vmem_mr_compare(const void *a, const void *b); 122 static srpt_mr_t *srpt_vmem_chunk_alloc(srpt_vmem_pool_t *ioc, 123 ib_memlen_t chunksize); 124 static void srpt_vmem_destroy(srpt_vmem_pool_t *vm_pool); 125 static void srpt_vmem_free(srpt_vmem_pool_t *vm_pool, void *vaddr, size_t size); 126 static srpt_mr_t *srpt_reg_mem(srpt_vmem_pool_t *vm_pool, ib_vaddr_t vaddr, 127 ib_memlen_t len); 128 static void srpt_vmem_chunk_free(srpt_vmem_pool_t *vm_pool, srpt_mr_t *mr); 129 static void srpt_dereg_mem(srpt_ioc_t *ioc, srpt_mr_t *mr); 130 static int srpt_vmem_mr(srpt_vmem_pool_t *vm_pool, void *vaddr, size_t size, 131 srpt_mr_t *mr); 132 133 /* 134 * srpt_ioc_attach() - I/O Controller attach 135 * 136 * Attach to IBTF and initialize I/O controllers. The srpt_ctxt->sc_rwlock 137 * should be held outside of this call. 138 */ 139 int 140 srpt_ioc_attach() 141 { 142 int status; 143 int hca_cnt; 144 int hca_ndx; 145 ib_guid_t *guid; 146 srpt_ioc_t *ioc; 147 148 ASSERT(srpt_ctxt != NULL); 149 150 /* 151 * Attach to IBTF and initialize a list of IB devices. Each 152 * HCA will be represented by an I/O Controller. 153 */ 154 status = ibt_attach(&srpt_ibt_modinfo, srpt_ctxt->sc_dip, 155 srpt_ctxt, &srpt_ctxt->sc_ibt_hdl); 156 if (status != DDI_SUCCESS) { 157 SRPT_DPRINTF_L1("ioc_attach, ibt_attach failed (0x%x)", 158 status); 159 return (DDI_FAILURE); 160 } 161 162 hca_cnt = ibt_get_hca_list(&guid); 163 if (hca_cnt < 1) { 164 SRPT_DPRINTF_L2("ioc_attach, no HCA found"); 165 ibt_detach(srpt_ctxt->sc_ibt_hdl); 166 srpt_ctxt->sc_ibt_hdl = NULL; 167 return (DDI_FAILURE); 168 } 169 170 list_create(&srpt_ctxt->sc_ioc_list, sizeof (srpt_ioc_t), 171 offsetof(srpt_ioc_t, ioc_node)); 172 173 for (hca_ndx = 0; hca_ndx < hca_cnt; hca_ndx++) { 174 SRPT_DPRINTF_L2("ioc_attach, adding I/O" 175 " Controller (%016llx)", (u_longlong_t)guid[hca_ndx]); 176 177 ioc = srpt_ioc_init(guid[hca_ndx]); 178 if (ioc == NULL) { 179 SRPT_DPRINTF_L1("ioc_attach, ioc_init GUID(%016llx)" 180 " failed", (u_longlong_t)guid[hca_ndx]); 181 continue; 182 } 183 list_insert_tail(&srpt_ctxt->sc_ioc_list, ioc); 184 SRPT_DPRINTF_L2("ioc_attach, I/O Controller ibt HCA hdl (%p)", 185 (void *)ioc->ioc_ibt_hdl); 186 srpt_ctxt->sc_num_iocs++; 187 } 188 189 ibt_free_hca_list(guid, hca_cnt); 190 SRPT_DPRINTF_L3("ioc_attach, added %d I/O Controller(s)", 191 srpt_ctxt->sc_num_iocs); 192 return (DDI_SUCCESS); 193 } 194 195 /* 196 * srpt_ioc_detach() - I/O Controller detach 197 * 198 * srpt_ctxt->sc_rwlock should be held outside of this call. 199 */ 200 void 201 srpt_ioc_detach() 202 { 203 srpt_ioc_t *ioc; 204 205 ASSERT(srpt_ctxt != NULL); 206 207 while ((ioc = list_head(&srpt_ctxt->sc_ioc_list)) != NULL) { 208 list_remove(&srpt_ctxt->sc_ioc_list, ioc); 209 SRPT_DPRINTF_L2("ioc_detach, removing I/O Controller(%p)" 210 " (%016llx), ibt_hdl(%p)", 211 (void *)ioc, 212 ioc ? (u_longlong_t)ioc->ioc_guid : 0x0ll, 213 (void *)ioc->ioc_ibt_hdl); 214 srpt_ioc_fini(ioc); 215 } 216 217 list_destroy(&srpt_ctxt->sc_ioc_list); 218 219 ibt_detach(srpt_ctxt->sc_ibt_hdl); 220 srpt_ctxt->sc_ibt_hdl = NULL; 221 } 222 223 /* 224 * srpt_ioc_init() - I/O Controller initialization 225 * 226 * Requires srpt_ctxt->rw_lock be held outside of call. 227 */ 228 static srpt_ioc_t * 229 srpt_ioc_init(ib_guid_t guid) 230 { 231 ibt_status_t status; 232 srpt_ioc_t *ioc; 233 ibt_hca_attr_t hca_attr; 234 uint_t iu_ndx; 235 uint_t err_ndx; 236 ibt_mr_attr_t mr_attr; 237 ibt_mr_desc_t mr_desc; 238 srpt_iu_t *iu; 239 ibt_srq_sizes_t srq_attr; 240 char namebuf[32]; 241 size_t iu_offset; 242 243 status = ibt_query_hca_byguid(guid, &hca_attr); 244 if (status != IBT_SUCCESS) { 245 SRPT_DPRINTF_L1("ioc_init, HCA query error (%d)", 246 status); 247 return (NULL); 248 } 249 250 ioc = srpt_ioc_get_locked(guid); 251 if (ioc != NULL) { 252 SRPT_DPRINTF_L1("ioc_init, HCA already exists"); 253 return (NULL); 254 } 255 256 ioc = kmem_zalloc(sizeof (srpt_ioc_t), KM_SLEEP); 257 258 rw_init(&ioc->ioc_rwlock, NULL, RW_DRIVER, NULL); 259 rw_enter(&ioc->ioc_rwlock, RW_WRITER); 260 261 bcopy(&hca_attr, &ioc->ioc_attr, sizeof (ibt_hca_attr_t)); 262 263 SRPT_DPRINTF_L2("ioc_init, HCA max mr=%d, mrlen=%lld", 264 hca_attr.hca_max_memr, (u_longlong_t)hca_attr.hca_max_memr_len); 265 ioc->ioc_guid = guid; 266 267 status = ibt_open_hca(srpt_ctxt->sc_ibt_hdl, guid, &ioc->ioc_ibt_hdl); 268 if (status != IBT_SUCCESS) { 269 SRPT_DPRINTF_L1("ioc_init, IBT open failed (%d)", status); 270 goto hca_open_err; 271 } 272 273 status = ibt_alloc_pd(ioc->ioc_ibt_hdl, IBT_PD_NO_FLAGS, 274 &ioc->ioc_pd_hdl); 275 if (status != IBT_SUCCESS) { 276 SRPT_DPRINTF_L1("ioc_init, IBT create PD failed (%d)", status); 277 goto pd_alloc_err; 278 } 279 280 /* 281 * We require hardware support for SRQs. We use a common SRQ to 282 * reduce channel memory consumption. 283 */ 284 if ((ioc->ioc_attr.hca_flags & IBT_HCA_SRQ) == 0) { 285 SRPT_DPRINTF_L0("ioc_init, no SRQ capability, not supported"); 286 goto srq_alloc_err; 287 } 288 289 SRPT_DPRINTF_L3("ioc_init, Using shared receive queues, max srq work" 290 " queue size(%d), def size = %d", ioc->ioc_attr.hca_max_srqs_sz, 291 srpt_ioc_srq_size); 292 srq_attr.srq_wr_sz = min(srpt_ioc_srq_size, 293 ioc->ioc_attr.hca_max_srqs_sz); 294 srq_attr.srq_sgl_sz = 1; 295 296 status = ibt_alloc_srq(ioc->ioc_ibt_hdl, IBT_SRQ_NO_FLAGS, 297 ioc->ioc_pd_hdl, &srq_attr, &ioc->ioc_srq_hdl, 298 &ioc->ioc_srq_attr); 299 if (status != IBT_SUCCESS) { 300 SRPT_DPRINTF_L1("ioc_init, IBT create SRQ failed(%d)", status); 301 goto srq_alloc_err; 302 } 303 304 SRPT_DPRINTF_L2("ioc_init, SRQ WR size(%d), SG size(%d)", 305 ioc->ioc_srq_attr.srq_wr_sz, ioc->ioc_srq_attr.srq_sgl_sz); 306 307 ibt_set_srq_private(ioc->ioc_srq_hdl, ioc); 308 309 /* 310 * Allocate a pool of SRP IU message buffers and post them to 311 * the I/O Controller SRQ. We let the SRQ manage the free IU 312 * messages. 313 */ 314 ioc->ioc_num_iu_entries = 315 min(srq_attr.srq_wr_sz, srpt_ioc_srq_size) - 1; 316 317 ioc->ioc_iu_pool = kmem_zalloc(sizeof (srpt_iu_t) * 318 ioc->ioc_num_iu_entries, KM_SLEEP); 319 320 ioc->ioc_iu_bufs = kmem_alloc(SRPT_DEFAULT_SEND_MSG_SIZE * 321 ioc->ioc_num_iu_entries, KM_SLEEP); 322 323 if ((ioc->ioc_iu_pool == NULL) || (ioc->ioc_iu_bufs == NULL)) { 324 SRPT_DPRINTF_L1("ioc_init, failed to allocate SRQ IUs"); 325 goto srq_iu_alloc_err; 326 } 327 328 mr_attr.mr_vaddr = (ib_vaddr_t)(uintptr_t)ioc->ioc_iu_bufs; 329 mr_attr.mr_len = SRPT_DEFAULT_SEND_MSG_SIZE * ioc->ioc_num_iu_entries; 330 mr_attr.mr_as = NULL; 331 mr_attr.mr_flags = IBT_MR_ENABLE_LOCAL_WRITE; 332 333 status = ibt_register_mr(ioc->ioc_ibt_hdl, ioc->ioc_pd_hdl, 334 &mr_attr, &ioc->ioc_iu_mr_hdl, &mr_desc); 335 if (status != IBT_SUCCESS) { 336 SRPT_DPRINTF_L1("ioc_init, IU buffer pool MR err(%d)", 337 status); 338 goto srq_iu_alloc_err; 339 } 340 341 for (iu_ndx = 0, iu = ioc->ioc_iu_pool; iu_ndx < 342 ioc->ioc_num_iu_entries; iu_ndx++, iu++) { 343 344 iu_offset = (iu_ndx * SRPT_DEFAULT_SEND_MSG_SIZE); 345 iu->iu_buf = (void *)((uintptr_t)ioc->ioc_iu_bufs + iu_offset); 346 347 mutex_init(&iu->iu_lock, NULL, MUTEX_DRIVER, NULL); 348 349 iu->iu_sge.ds_va = mr_desc.md_vaddr + iu_offset; 350 iu->iu_sge.ds_key = mr_desc.md_lkey; 351 iu->iu_sge.ds_len = SRPT_DEFAULT_SEND_MSG_SIZE; 352 iu->iu_ioc = ioc; 353 iu->iu_pool_ndx = iu_ndx; 354 355 status = srpt_ioc_post_recv_iu(ioc, &ioc->ioc_iu_pool[iu_ndx]); 356 if (status != IBT_SUCCESS) { 357 SRPT_DPRINTF_L1("ioc_init, SRQ IU post err(%d)", 358 status); 359 goto srq_iu_post_err; 360 } 361 } 362 363 /* 364 * Initialize the dbuf vmem arena 365 */ 366 (void) snprintf(namebuf, sizeof (namebuf), 367 "srpt_buf_pool_%16llX", (u_longlong_t)guid); 368 ioc->ioc_dbuf_pool = srpt_vmem_create(namebuf, ioc, 369 SRPT_BUF_MR_CHUNKSIZE, SRPT_BUF_POOL_MAX, srpt_dbuf_mr_flags); 370 371 if (ioc->ioc_dbuf_pool == NULL) { 372 goto stmf_db_alloc_err; 373 } 374 375 /* 376 * Allocate the I/O Controller STMF data buffer allocator. The 377 * data store will span all targets associated with this IOC. 378 */ 379 ioc->ioc_stmf_ds = stmf_alloc(STMF_STRUCT_DBUF_STORE, 0, 0); 380 if (ioc->ioc_stmf_ds == NULL) { 381 SRPT_DPRINTF_L1("ioc_attach, STMF DBUF alloc failure for IOC"); 382 goto stmf_db_alloc_err; 383 } 384 ioc->ioc_stmf_ds->ds_alloc_data_buf = &srpt_ioc_ds_alloc_dbuf; 385 ioc->ioc_stmf_ds->ds_free_data_buf = &srpt_ioc_ds_free_dbuf; 386 ioc->ioc_stmf_ds->ds_port_private = ioc; 387 388 rw_exit(&ioc->ioc_rwlock); 389 return (ioc); 390 391 stmf_db_alloc_err: 392 if (ioc->ioc_dbuf_pool != NULL) { 393 srpt_vmem_destroy(ioc->ioc_dbuf_pool); 394 } 395 396 srq_iu_post_err: 397 if (ioc->ioc_iu_mr_hdl != NULL) { 398 status = ibt_deregister_mr(ioc->ioc_ibt_hdl, 399 ioc->ioc_iu_mr_hdl); 400 if (status != IBT_SUCCESS) { 401 SRPT_DPRINTF_L1("ioc_init, error deregistering" 402 " memory region (%d)", status); 403 } 404 } 405 for (err_ndx = 0, iu = ioc->ioc_iu_pool; err_ndx < iu_ndx; 406 err_ndx++, iu++) { 407 mutex_destroy(&iu->iu_lock); 408 } 409 410 srq_iu_alloc_err: 411 if (ioc->ioc_iu_bufs != NULL) { 412 kmem_free(ioc->ioc_iu_bufs, SRPT_DEFAULT_SEND_MSG_SIZE * 413 ioc->ioc_num_iu_entries); 414 } 415 if (ioc->ioc_iu_pool != NULL) { 416 kmem_free(ioc->ioc_iu_pool, 417 sizeof (srpt_iu_t) * ioc->ioc_num_iu_entries); 418 } 419 if (ioc->ioc_srq_hdl != NULL) { 420 status = ibt_free_srq(ioc->ioc_srq_hdl); 421 if (status != IBT_SUCCESS) { 422 SRPT_DPRINTF_L1("ioc_init, error freeing SRQ (%d)", 423 status); 424 } 425 426 } 427 428 srq_alloc_err: 429 status = ibt_free_pd(ioc->ioc_ibt_hdl, ioc->ioc_pd_hdl); 430 if (status != IBT_SUCCESS) { 431 SRPT_DPRINTF_L1("ioc_init, free PD error (%d)", status); 432 } 433 434 pd_alloc_err: 435 status = ibt_close_hca(ioc->ioc_ibt_hdl); 436 if (status != IBT_SUCCESS) { 437 SRPT_DPRINTF_L1("ioc_init, close ioc error (%d)", status); 438 } 439 440 hca_open_err: 441 rw_exit(&ioc->ioc_rwlock); 442 rw_destroy(&ioc->ioc_rwlock); 443 kmem_free(ioc, sizeof (*ioc)); 444 return (NULL); 445 } 446 447 /* 448 * srpt_ioc_fini() - I/O Controller Cleanup 449 * 450 * Requires srpt_ctxt->sc_rwlock be held outside of call. 451 */ 452 static void 453 srpt_ioc_fini(srpt_ioc_t *ioc) 454 { 455 int status; 456 int ndx; 457 458 /* 459 * Note driver flows will have already taken all SRP 460 * services running on the I/O Controller off-line. 461 */ 462 rw_enter(&ioc->ioc_rwlock, RW_WRITER); 463 if (ioc->ioc_ibt_hdl != NULL) { 464 if (ioc->ioc_stmf_ds != NULL) { 465 stmf_free(ioc->ioc_stmf_ds); 466 } 467 468 if (ioc->ioc_srq_hdl != NULL) { 469 SRPT_DPRINTF_L4("ioc_fini, freeing SRQ"); 470 status = ibt_free_srq(ioc->ioc_srq_hdl); 471 if (status != IBT_SUCCESS) { 472 SRPT_DPRINTF_L1("ioc_fini, free SRQ" 473 " error (%d)", status); 474 } 475 } 476 477 if (ioc->ioc_iu_mr_hdl != NULL) { 478 status = ibt_deregister_mr( 479 ioc->ioc_ibt_hdl, ioc->ioc_iu_mr_hdl); 480 if (status != IBT_SUCCESS) { 481 SRPT_DPRINTF_L1("ioc_fini, error deregistering" 482 " memory region (%d)", status); 483 } 484 } 485 486 if (ioc->ioc_iu_bufs != NULL) { 487 kmem_free(ioc->ioc_iu_bufs, SRPT_DEFAULT_SEND_MSG_SIZE * 488 ioc->ioc_num_iu_entries); 489 } 490 491 if (ioc->ioc_iu_pool != NULL) { 492 SRPT_DPRINTF_L4("ioc_fini, freeing IU entries"); 493 for (ndx = 0; ndx < ioc->ioc_num_iu_entries; ndx++) { 494 mutex_destroy(&ioc->ioc_iu_pool[ndx].iu_lock); 495 } 496 497 SRPT_DPRINTF_L4("ioc_fini, free IU pool struct"); 498 kmem_free(ioc->ioc_iu_pool, 499 sizeof (srpt_iu_t) * (ioc->ioc_num_iu_entries)); 500 ioc->ioc_iu_pool = NULL; 501 ioc->ioc_num_iu_entries = 0; 502 } 503 504 if (ioc->ioc_dbuf_pool != NULL) { 505 srpt_vmem_destroy(ioc->ioc_dbuf_pool); 506 } 507 508 if (ioc->ioc_pd_hdl != NULL) { 509 status = ibt_free_pd(ioc->ioc_ibt_hdl, 510 ioc->ioc_pd_hdl); 511 if (status != IBT_SUCCESS) { 512 SRPT_DPRINTF_L1("ioc_fini, free PD" 513 " error (%d)", status); 514 } 515 } 516 517 status = ibt_close_hca(ioc->ioc_ibt_hdl); 518 if (status != IBT_SUCCESS) { 519 SRPT_DPRINTF_L1( 520 "ioc_fini, close ioc error (%d)", status); 521 } 522 } 523 rw_exit(&ioc->ioc_rwlock); 524 rw_destroy(&ioc->ioc_rwlock); 525 kmem_free(ioc, sizeof (srpt_ioc_t)); 526 } 527 528 /* 529 * srpt_ioc_port_active() - I/O Controller port active 530 */ 531 static void 532 srpt_ioc_port_active(ibt_async_event_t *event) 533 { 534 ibt_status_t status; 535 srpt_ioc_t *ioc; 536 537 ASSERT(event != NULL); 538 539 SRPT_DPRINTF_L3("ioc_port_active event handler, invoked"); 540 541 /* 542 * Find the HCA in question and if the HCA has completed 543 * initialization, and the SRP Target service for the 544 * the I/O Controller exists, then bind this port. 545 */ 546 ioc = srpt_ioc_get(event->ev_hca_guid); 547 548 if (ioc == NULL) { 549 SRPT_DPRINTF_L2("ioc_port_active, I/O Controller not" 550 " active"); 551 return; 552 } 553 554 if (ioc->ioc_tgt_port == NULL) { 555 SRPT_DPRINTF_L2("ioc_port_active, no I/O Controller target" 556 " undefined"); 557 return; 558 } 559 560 561 /* 562 * We take the target lock here to serialize this operation 563 * with any STMF initiated target state transitions. If 564 * SRP is off-line then the service handle is NULL. 565 */ 566 mutex_enter(&ioc->ioc_tgt_port->tp_lock); 567 568 if (ioc->ioc_tgt_port->tp_ibt_svc_hdl != NULL) { 569 status = srpt_ioc_svc_bind(ioc->ioc_tgt_port, event->ev_port); 570 if (status != IBT_SUCCESS && 571 status != IBT_HCA_PORT_NOT_ACTIVE) { 572 SRPT_DPRINTF_L1("ioc_port_active, bind failed (%d)", 573 status); 574 } 575 } 576 mutex_exit(&ioc->ioc_tgt_port->tp_lock); 577 } 578 579 /* 580 * srpt_ioc_port_down() 581 */ 582 static void 583 srpt_ioc_port_down(ibt_async_event_t *event) 584 { 585 srpt_ioc_t *ioc; 586 srpt_target_port_t *tgt; 587 srpt_channel_t *ch; 588 srpt_channel_t *next_ch; 589 590 SRPT_DPRINTF_L3("ioc_port_down event handler, invoked"); 591 592 /* 593 * Find the HCA in question and if the HCA has completed 594 * initialization, and the SRP Target service for the 595 * the I/O Controller exists, then logout initiators 596 * through this port. 597 */ 598 ioc = srpt_ioc_get(event->ev_hca_guid); 599 600 if (ioc == NULL) { 601 SRPT_DPRINTF_L2("ioc_port_down, I/O Controller not" 602 " active"); 603 return; 604 } 605 606 /* 607 * We only have one target now, but we could go through all 608 * SCSI target ports if more are added. 609 */ 610 tgt = ioc->ioc_tgt_port; 611 if (tgt == NULL) { 612 SRPT_DPRINTF_L2("ioc_port_down, no I/O Controller target" 613 " undefined"); 614 return; 615 } 616 mutex_enter(&tgt->tp_lock); 617 618 /* 619 * For all channel's logged in through this port, initiate a 620 * disconnect. 621 */ 622 mutex_enter(&tgt->tp_ch_list_lock); 623 ch = list_head(&tgt->tp_ch_list); 624 while (ch != NULL) { 625 next_ch = list_next(&tgt->tp_ch_list, ch); 626 if (ch->ch_session && (ch->ch_session->ss_hw_port == 627 event->ev_port)) { 628 srpt_ch_disconnect(ch); 629 } 630 ch = next_ch; 631 } 632 mutex_exit(&tgt->tp_ch_list_lock); 633 634 mutex_exit(&tgt->tp_lock); 635 } 636 637 /* 638 * srpt_ioc_ib_async_hdlr - I/O Controller IB asynchronous events 639 */ 640 /* ARGSUSED */ 641 void 642 srpt_ioc_ib_async_hdlr(void *clnt, ibt_hca_hdl_t hdl, 643 ibt_async_code_t code, ibt_async_event_t *event) 644 { 645 srpt_ioc_t *ioc; 646 srpt_channel_t *ch; 647 648 switch (code) { 649 case IBT_EVENT_PORT_UP: 650 srpt_ioc_port_active(event); 651 break; 652 653 case IBT_ERROR_PORT_DOWN: 654 srpt_ioc_port_down(event); 655 break; 656 657 case IBT_HCA_ATTACH_EVENT: 658 rw_enter(&srpt_ctxt->sc_rwlock, RW_WRITER); 659 ioc = srpt_ioc_init(event->ev_hca_guid); 660 661 if (ioc == NULL) { 662 rw_exit(&srpt_ctxt->sc_rwlock); 663 SRPT_DPRINTF_L1("ib_async_hdlr, HCA_ATTACH" 664 " event failed to initialize HCA (0x%016llx)", 665 (u_longlong_t)event->ev_hca_guid); 666 return; 667 } 668 SRPT_DPRINTF_L2("HCA_ATTACH_EVENT: I/O Controller" 669 " ibt hdl (%p)", 670 (void *)ioc->ioc_ibt_hdl); 671 672 rw_enter(&ioc->ioc_rwlock, RW_WRITER); 673 ioc->ioc_tgt_port = srpt_stp_alloc_port(ioc, ioc->ioc_guid); 674 if (ioc->ioc_tgt_port == NULL) { 675 SRPT_DPRINTF_L1("ioc_ib_async_hdlr, alloc SCSI " 676 "target port error for HCA (0x%016llx)", 677 (u_longlong_t)event->ev_hca_guid); 678 rw_exit(&ioc->ioc_rwlock); 679 srpt_ioc_fini(ioc); 680 rw_exit(&srpt_ctxt->sc_rwlock); 681 return; 682 } 683 684 /* 685 * New HCA added with default SCSI Target Port, SRP service 686 * will be started when SCSI Target Port is brought 687 * on-line by STMF. 688 */ 689 srpt_ctxt->sc_num_iocs++; 690 list_insert_tail(&srpt_ctxt->sc_ioc_list, ioc); 691 692 rw_exit(&ioc->ioc_rwlock); 693 rw_exit(&srpt_ctxt->sc_rwlock); 694 break; 695 696 case IBT_HCA_DETACH_EVENT: 697 SRPT_DPRINTF_L1( 698 "ioc_iob_async_hdlr, HCA_DETACH_EVENT received."); 699 break; 700 701 case IBT_EVENT_EMPTY_CHAN: 702 /* Channel in ERROR state is now empty */ 703 ch = (srpt_channel_t *)ibt_get_chan_private(event->ev_chan_hdl); 704 SRPT_DPRINTF_L3( 705 "ioc_iob_async_hdlr, received empty channel error on %p", 706 (void *)ch); 707 break; 708 709 default: 710 SRPT_DPRINTF_L2("ioc_ib_async_hdlr, event not " 711 "handled (%d)", code); 712 break; 713 } 714 } 715 716 /* 717 * srpt_ioc_svc_bind() 718 */ 719 ibt_status_t 720 srpt_ioc_svc_bind(srpt_target_port_t *tgt, uint_t portnum) 721 { 722 ibt_status_t status; 723 srpt_hw_port_t *port; 724 ibt_hca_portinfo_t *portinfo; 725 uint_t qportinfo_sz; 726 uint_t qportnum; 727 ib_gid_t new_gid; 728 srpt_ioc_t *ioc; 729 srpt_session_t sess; 730 731 ASSERT(tgt != NULL); 732 ASSERT(tgt->tp_ioc != NULL); 733 ioc = tgt->tp_ioc; 734 735 if (tgt->tp_ibt_svc_hdl == NULL) { 736 SRPT_DPRINTF_L2("ioc_svc_bind, NULL SCSI target port" 737 " service"); 738 return (IBT_INVALID_PARAM); 739 } 740 741 if (portnum == 0 || portnum > tgt->tp_nports) { 742 SRPT_DPRINTF_L2("ioc_svc_bind, bad port (%d)", portnum); 743 return (IBT_INVALID_PARAM); 744 } 745 status = ibt_query_hca_ports(ioc->ioc_ibt_hdl, portnum, 746 &portinfo, &qportnum, &qportinfo_sz); 747 if (status != IBT_SUCCESS) { 748 SRPT_DPRINTF_L1("ioc_svc_bind, query port error (%d)", 749 portnum); 750 return (IBT_INVALID_PARAM); 751 } 752 753 ASSERT(portinfo != NULL); 754 755 /* 756 * If port is not active do nothing, caller should attempt to bind 757 * after the port goes active. 758 */ 759 if (portinfo->p_linkstate != IBT_PORT_ACTIVE) { 760 SRPT_DPRINTF_L2("ioc_svc_bind, port %d not in active state", 761 portnum); 762 ibt_free_portinfo(portinfo, qportinfo_sz); 763 return (IBT_HCA_PORT_NOT_ACTIVE); 764 } 765 766 port = &tgt->tp_hw_port[portnum-1]; 767 new_gid = portinfo->p_sgid_tbl[0]; 768 ibt_free_portinfo(portinfo, qportinfo_sz); 769 770 /* 771 * If previously bound and the port GID has changed, 772 * rebind to the new GID. 773 */ 774 if (port->hwp_bind_hdl != NULL) { 775 if (new_gid.gid_guid != port->hwp_gid.gid_guid || 776 new_gid.gid_prefix != port->hwp_gid.gid_prefix) { 777 SRPT_DPRINTF_L2("ioc_svc_bind, unregister current" 778 " bind"); 779 ibt_unbind_service(tgt->tp_ibt_svc_hdl, 780 port->hwp_bind_hdl); 781 port->hwp_bind_hdl = NULL; 782 } 783 } 784 SRPT_DPRINTF_L2("ioc_svc_bind, bind service, %016llx:%016llx", 785 (u_longlong_t)new_gid.gid_prefix, 786 (u_longlong_t)new_gid.gid_guid); 787 788 /* 789 * Pass SCSI Target Port as CM private data, the target will always 790 * exist while this service is bound. 791 */ 792 status = ibt_bind_service(tgt->tp_ibt_svc_hdl, new_gid, NULL, tgt, 793 &port->hwp_bind_hdl); 794 if (status != IBT_SUCCESS && status != IBT_CM_SERVICE_EXISTS) { 795 SRPT_DPRINTF_L1("ioc_svc_bind, bind error (%d)", status); 796 return (status); 797 } 798 port->hwp_gid.gid_prefix = new_gid.gid_prefix; 799 port->hwp_gid.gid_guid = new_gid.gid_guid; 800 801 /* setting up a transient structure for the dtrace probe. */ 802 bzero(&sess, sizeof (srpt_session_t)); 803 ALIAS_STR(sess.ss_t_gid, new_gid.gid_prefix, new_gid.gid_guid); 804 EUI_STR(sess.ss_t_name, tgt->tp_ibt_svc_id); 805 806 DTRACE_SRP_1(service__up, srpt_session_t, &sess); 807 808 return (IBT_SUCCESS); 809 } 810 811 /* 812 * srpt_ioc_svc_unbind() 813 */ 814 void 815 srpt_ioc_svc_unbind(srpt_target_port_t *tgt, uint_t portnum) 816 { 817 srpt_hw_port_t *port; 818 srpt_session_t sess; 819 820 if (tgt == NULL) { 821 SRPT_DPRINTF_L2("ioc_svc_unbind, SCSI target does not exist"); 822 return; 823 } 824 825 if (portnum == 0 || portnum > tgt->tp_nports) { 826 SRPT_DPRINTF_L2("ioc_svc_unbind, bad port (%d)", portnum); 827 return; 828 } 829 port = &tgt->tp_hw_port[portnum-1]; 830 831 /* setting up a transient structure for the dtrace probe. */ 832 bzero(&sess, sizeof (srpt_session_t)); 833 ALIAS_STR(sess.ss_t_gid, port->hwp_gid.gid_prefix, 834 port->hwp_gid.gid_guid); 835 EUI_STR(sess.ss_t_name, tgt->tp_ibt_svc_id); 836 837 DTRACE_SRP_1(service__down, srpt_session_t, &sess); 838 839 if (tgt->tp_ibt_svc_hdl != NULL && port->hwp_bind_hdl != NULL) { 840 SRPT_DPRINTF_L2("ioc_svc_unbind, unregister current bind"); 841 ibt_unbind_service(tgt->tp_ibt_svc_hdl, port->hwp_bind_hdl); 842 } 843 port->hwp_bind_hdl = NULL; 844 port->hwp_gid.gid_prefix = 0; 845 port->hwp_gid.gid_guid = 0; 846 } 847 848 /* 849 * srpt_ioc_svc_unbind_all() 850 */ 851 void 852 srpt_ioc_svc_unbind_all(srpt_target_port_t *tgt) 853 { 854 uint_t portnum; 855 856 if (tgt == NULL) { 857 SRPT_DPRINTF_L2("ioc_svc_unbind_all, NULL SCSI target port" 858 " specified"); 859 return; 860 } 861 for (portnum = 1; portnum <= tgt->tp_nports; portnum++) { 862 srpt_ioc_svc_unbind(tgt, portnum); 863 } 864 } 865 866 /* 867 * srpt_ioc_get_locked() 868 * 869 * Requires srpt_ctxt->rw_lock be held outside of call. 870 */ 871 srpt_ioc_t * 872 srpt_ioc_get_locked(ib_guid_t guid) 873 { 874 srpt_ioc_t *ioc; 875 876 ioc = list_head(&srpt_ctxt->sc_ioc_list); 877 while (ioc != NULL) { 878 if (ioc->ioc_guid == guid) { 879 break; 880 } 881 ioc = list_next(&srpt_ctxt->sc_ioc_list, ioc); 882 } 883 return (ioc); 884 } 885 886 /* 887 * srpt_ioc_get() 888 */ 889 srpt_ioc_t * 890 srpt_ioc_get(ib_guid_t guid) 891 { 892 srpt_ioc_t *ioc; 893 894 rw_enter(&srpt_ctxt->sc_rwlock, RW_READER); 895 ioc = srpt_ioc_get_locked(guid); 896 rw_exit(&srpt_ctxt->sc_rwlock); 897 return (ioc); 898 } 899 900 /* 901 * srpt_ioc_post_recv_iu() 902 */ 903 ibt_status_t 904 srpt_ioc_post_recv_iu(srpt_ioc_t *ioc, srpt_iu_t *iu) 905 { 906 ibt_status_t status; 907 ibt_recv_wr_t wr; 908 uint_t posted; 909 910 ASSERT(ioc != NULL); 911 ASSERT(iu != NULL); 912 913 wr.wr_id = (ibt_wrid_t)(uintptr_t)iu; 914 wr.wr_nds = 1; 915 wr.wr_sgl = &iu->iu_sge; 916 posted = 0; 917 918 status = ibt_post_srq(ioc->ioc_srq_hdl, &wr, 1, &posted); 919 if (status != IBT_SUCCESS) { 920 SRPT_DPRINTF_L2("ioc_post_recv_iu, post error (%d)", 921 status); 922 } 923 return (status); 924 } 925 926 /* 927 * srpt_ioc_repost_recv_iu() 928 */ 929 void 930 srpt_ioc_repost_recv_iu(srpt_ioc_t *ioc, srpt_iu_t *iu) 931 { 932 srpt_channel_t *ch; 933 ibt_status_t status; 934 935 ASSERT(iu != NULL); 936 ASSERT(mutex_owned(&iu->iu_lock)); 937 938 /* 939 * Some additional sanity checks while in debug state, all STMF 940 * related task activities should be complete prior to returning 941 * this IU to the available pool. 942 */ 943 ASSERT(iu->iu_stmf_task == NULL); 944 ASSERT(iu->iu_sq_posted_cnt == 0); 945 946 ch = iu->iu_ch; 947 iu->iu_ch = NULL; 948 iu->iu_num_rdescs = 0; 949 iu->iu_rdescs = NULL; 950 iu->iu_tot_xfer_len = 0; 951 iu->iu_tag = 0; 952 iu->iu_flags = 0; 953 iu->iu_sq_posted_cnt = 0; 954 955 status = srpt_ioc_post_recv_iu(ioc, iu); 956 957 if (status != IBT_SUCCESS) { 958 /* 959 * Very bad, we should initiate a shutdown of the I/O 960 * Controller here, off-lining any targets associated 961 * with this I/O Controller (and therefore disconnecting 962 * any logins that remain). 963 * 964 * In practice this should never happen so we put 965 * the code near the bottom of the implementation list. 966 */ 967 SRPT_DPRINTF_L0("ioc_repost_recv_iu, error RX IU (%d)", 968 status); 969 ASSERT(0); 970 } else if (ch != NULL) { 971 atomic_inc_32(&ch->ch_req_lim_delta); 972 } 973 } 974 975 /* 976 * srpt_ioc_init_profile() 977 * 978 * SRP I/O Controller serialization lock must be held when this 979 * routine is invoked. 980 */ 981 void 982 srpt_ioc_init_profile(srpt_ioc_t *ioc) 983 { 984 srpt_ioc_opcap_mask_t capmask = {0}; 985 986 ASSERT(ioc != NULL); 987 988 ioc->ioc_profile.ioc_guid = h2b64(ioc->ioc_guid); 989 (void) memcpy(ioc->ioc_profile.ioc_id_string, 990 "Solaris SRP Target 0.9a", 23); 991 992 /* 993 * Note vendor ID and subsystem ID are 24 bit values. Low order 994 * 8 bits in vendor ID field is slot and is initialized to zero. 995 * Low order 8 bits of subsystem ID is a reserved field and 996 * initialized to zero. 997 */ 998 ioc->ioc_profile.ioc_vendorid = 999 h2b32((uint32_t)(ioc->ioc_attr.hca_vendor_id << 8)); 1000 ioc->ioc_profile.ioc_deviceid = 1001 h2b32((uint32_t)ioc->ioc_attr.hca_device_id); 1002 ioc->ioc_profile.ioc_device_ver = 1003 h2b16((uint16_t)ioc->ioc_attr.hca_version_id); 1004 ioc->ioc_profile.ioc_subsys_vendorid = 1005 h2b32((uint32_t)(ioc->ioc_attr.hca_vendor_id << 8)); 1006 ioc->ioc_profile.ioc_subsys_id = h2b32(0); 1007 ioc->ioc_profile.ioc_io_class = h2b16(SRP_REV_16A_IO_CLASS); 1008 ioc->ioc_profile.ioc_io_subclass = h2b16(SRP_IO_SUBCLASS); 1009 ioc->ioc_profile.ioc_protocol = h2b16(SRP_PROTOCOL); 1010 ioc->ioc_profile.ioc_protocol_ver = h2b16(SRP_PROTOCOL_VERSION); 1011 ioc->ioc_profile.ioc_send_msg_qdepth = h2b16(srpt_send_msg_depth); 1012 ioc->ioc_profile.ioc_rdma_read_qdepth = 1013 ioc->ioc_attr.hca_max_rdma_out_chan; 1014 ioc->ioc_profile.ioc_send_msg_sz = h2b32(SRPT_DEFAULT_SEND_MSG_SIZE); 1015 ioc->ioc_profile.ioc_rdma_xfer_sz = h2b32(SRPT_DEFAULT_MAX_RDMA_SIZE); 1016 1017 capmask.bits.st = 1; /* Messages can be sent to IOC */ 1018 capmask.bits.sf = 1; /* Messages can be sent from IOC */ 1019 capmask.bits.rf = 1; /* RDMA Reads can be sent from IOC */ 1020 capmask.bits.wf = 1; /* RDMA Writes can be sent from IOC */ 1021 ioc->ioc_profile.ioc_ctrl_opcap_mask = capmask.mask; 1022 1023 /* 1024 * We currently only have one target, but if we had a list we would 1025 * go through that list and only count those that are ONLINE when 1026 * setting the services count and entries. 1027 */ 1028 if (ioc->ioc_tgt_port->tp_srp_enabled) { 1029 ioc->ioc_profile.ioc_service_entries = 1; 1030 ioc->ioc_svc.srv_id = h2b64(ioc->ioc_guid); 1031 (void) snprintf((char *)ioc->ioc_svc.srv_name, 1032 IB_DM_MAX_SVC_NAME_LEN, "SRP.T10:%016llx", 1033 (u_longlong_t)ioc->ioc_guid); 1034 } else { 1035 ioc->ioc_profile.ioc_service_entries = 0; 1036 ioc->ioc_svc.srv_id = 0; 1037 } 1038 } 1039 1040 /* 1041 * srpt_ioc_ds_alloc_dbuf() 1042 */ 1043 /* ARGSUSED */ 1044 stmf_data_buf_t * 1045 srpt_ioc_ds_alloc_dbuf(struct scsi_task *task, uint32_t size, 1046 uint32_t *pminsize, uint32_t flags) 1047 { 1048 srpt_iu_t *iu; 1049 srpt_ioc_t *ioc; 1050 srpt_ds_dbuf_t *dbuf; 1051 stmf_data_buf_t *stmf_dbuf; 1052 void *buf; 1053 srpt_mr_t mr; 1054 1055 ASSERT(task != NULL); 1056 iu = task->task_port_private; 1057 ioc = iu->iu_ioc; 1058 1059 SRPT_DPRINTF_L4("ioc_ds_alloc_dbuf, invoked ioc(%p)" 1060 " size(%d), flags(%x)", 1061 (void *)ioc, size, flags); 1062 1063 buf = srpt_vmem_alloc(ioc->ioc_dbuf_pool, size); 1064 if (buf == NULL) { 1065 return (NULL); 1066 } 1067 1068 if (srpt_vmem_mr(ioc->ioc_dbuf_pool, buf, size, &mr) != 0) { 1069 goto stmf_alloc_err; 1070 } 1071 1072 stmf_dbuf = stmf_alloc(STMF_STRUCT_DATA_BUF, sizeof (srpt_ds_dbuf_t), 1073 0); 1074 if (stmf_dbuf == NULL) { 1075 SRPT_DPRINTF_L2("ioc_ds_alloc_dbuf, stmf_alloc failed"); 1076 goto stmf_alloc_err; 1077 } 1078 1079 dbuf = stmf_dbuf->db_port_private; 1080 dbuf->db_stmf_buf = stmf_dbuf; 1081 dbuf->db_mr_hdl = mr.mr_hdl; 1082 dbuf->db_ioc = ioc; 1083 dbuf->db_sge.ds_va = mr.mr_va; 1084 dbuf->db_sge.ds_key = mr.mr_lkey; 1085 dbuf->db_sge.ds_len = size; 1086 1087 stmf_dbuf->db_buf_size = size; 1088 stmf_dbuf->db_data_size = size; 1089 stmf_dbuf->db_relative_offset = 0; 1090 stmf_dbuf->db_flags = 0; 1091 stmf_dbuf->db_xfer_status = 0; 1092 stmf_dbuf->db_sglist_length = 1; 1093 stmf_dbuf->db_sglist[0].seg_addr = buf; 1094 stmf_dbuf->db_sglist[0].seg_length = size; 1095 1096 return (stmf_dbuf); 1097 1098 buf_mr_err: 1099 stmf_free(stmf_dbuf); 1100 1101 stmf_alloc_err: 1102 srpt_vmem_free(ioc->ioc_dbuf_pool, buf, size); 1103 1104 return (NULL); 1105 } 1106 1107 void 1108 srpt_ioc_ds_free_dbuf(struct stmf_dbuf_store *ds, 1109 stmf_data_buf_t *dbuf) 1110 { 1111 srpt_ioc_t *ioc; 1112 1113 SRPT_DPRINTF_L4("ioc_ds_free_dbuf, invoked buf (%p)", 1114 (void *)dbuf); 1115 ioc = ds->ds_port_private; 1116 1117 srpt_vmem_free(ioc->ioc_dbuf_pool, dbuf->db_sglist[0].seg_addr, 1118 dbuf->db_buf_size); 1119 stmf_free(dbuf); 1120 } 1121 1122 /* Memory arena routines */ 1123 1124 static srpt_vmem_pool_t * 1125 srpt_vmem_create(const char *name, srpt_ioc_t *ioc, ib_memlen_t chunksize, 1126 uint64_t maxsize, ibt_mr_flags_t flags) 1127 { 1128 srpt_mr_t *chunk; 1129 srpt_vmem_pool_t *result; 1130 1131 ASSERT(chunksize <= maxsize); 1132 1133 result = kmem_zalloc(sizeof (srpt_vmem_pool_t), KM_SLEEP); 1134 1135 result->svp_ioc = ioc; 1136 result->svp_chunksize = chunksize; 1137 result->svp_max_size = maxsize; 1138 result->svp_flags = flags; 1139 1140 rw_init(&result->svp_lock, NULL, RW_DRIVER, NULL); 1141 avl_create(&result->svp_mr_list, srpt_vmem_mr_compare, 1142 sizeof (srpt_mr_t), offsetof(srpt_mr_t, mr_avl)); 1143 1144 chunk = srpt_vmem_chunk_alloc(result, chunksize); 1145 1146 avl_add(&result->svp_mr_list, chunk); 1147 result->svp_total_size = chunksize; 1148 1149 result->svp_vmem = vmem_create(name, 1150 (void*)(uintptr_t)chunk->mr_va, 1151 (size_t)chunk->mr_len, SRPT_MR_QUANTSIZE, 1152 NULL, NULL, NULL, 0, VM_SLEEP); 1153 1154 return (result); 1155 } 1156 1157 static void 1158 srpt_vmem_destroy(srpt_vmem_pool_t *vm_pool) 1159 { 1160 srpt_mr_t *chunk; 1161 srpt_mr_t *next; 1162 1163 rw_enter(&vm_pool->svp_lock, RW_WRITER); 1164 vmem_destroy(vm_pool->svp_vmem); 1165 1166 chunk = avl_first(&vm_pool->svp_mr_list); 1167 1168 while (chunk != NULL) { 1169 next = AVL_NEXT(&vm_pool->svp_mr_list, chunk); 1170 avl_remove(&vm_pool->svp_mr_list, chunk); 1171 srpt_vmem_chunk_free(vm_pool, chunk); 1172 chunk = next; 1173 } 1174 1175 avl_destroy(&vm_pool->svp_mr_list); 1176 1177 rw_exit(&vm_pool->svp_lock); 1178 rw_destroy(&vm_pool->svp_lock); 1179 1180 kmem_free(vm_pool, sizeof (srpt_vmem_pool_t)); 1181 } 1182 1183 static void * 1184 srpt_vmem_alloc(srpt_vmem_pool_t *vm_pool, size_t size) 1185 { 1186 void *result; 1187 srpt_mr_t *next; 1188 ib_memlen_t chunklen; 1189 1190 ASSERT(vm_pool != NULL); 1191 1192 result = vmem_alloc(vm_pool->svp_vmem, size, 1193 VM_NOSLEEP | VM_FIRSTFIT); 1194 1195 if (result != NULL) { 1196 /* memory successfully allocated */ 1197 return (result); 1198 } 1199 1200 /* need more vmem */ 1201 rw_enter(&vm_pool->svp_lock, RW_WRITER); 1202 chunklen = vm_pool->svp_chunksize; 1203 1204 if (vm_pool->svp_total_size >= vm_pool->svp_max_size) { 1205 /* no more room to alloc */ 1206 rw_exit(&vm_pool->svp_lock); 1207 return (NULL); 1208 } 1209 1210 if ((vm_pool->svp_total_size + chunklen) > vm_pool->svp_max_size) { 1211 chunklen = vm_pool->svp_max_size - vm_pool->svp_total_size; 1212 } 1213 1214 next = srpt_vmem_chunk_alloc(vm_pool, chunklen); 1215 if (next != NULL) { 1216 /* 1217 * Note that the size of the chunk we got 1218 * may not be the size we requested. Use the 1219 * length returned in the chunk itself. 1220 */ 1221 if (vmem_add(vm_pool->svp_vmem, (void*)(uintptr_t)next->mr_va, 1222 next->mr_len, VM_NOSLEEP) == NULL) { 1223 srpt_vmem_chunk_free(vm_pool, next); 1224 SRPT_DPRINTF_L2("vmem_add failed"); 1225 } else { 1226 vm_pool->svp_total_size += next->mr_len; 1227 avl_add(&vm_pool->svp_mr_list, next); 1228 } 1229 } 1230 1231 rw_exit(&vm_pool->svp_lock); 1232 1233 result = vmem_alloc(vm_pool->svp_vmem, size, VM_NOSLEEP | VM_FIRSTFIT); 1234 1235 return (result); 1236 } 1237 1238 static void 1239 srpt_vmem_free(srpt_vmem_pool_t *vm_pool, void *vaddr, size_t size) 1240 { 1241 vmem_free(vm_pool->svp_vmem, vaddr, size); 1242 } 1243 1244 static int 1245 srpt_vmem_mr(srpt_vmem_pool_t *vm_pool, void *vaddr, size_t size, 1246 srpt_mr_t *mr) 1247 { 1248 avl_index_t where; 1249 ib_vaddr_t mrva = (ib_vaddr_t)(uintptr_t)vaddr; 1250 srpt_mr_t chunk; 1251 srpt_mr_t *nearest; 1252 ib_vaddr_t chunk_end; 1253 int status = DDI_FAILURE; 1254 1255 rw_enter(&vm_pool->svp_lock, RW_READER); 1256 1257 chunk.mr_va = mrva; 1258 nearest = avl_find(&vm_pool->svp_mr_list, &chunk, &where); 1259 1260 if (nearest == NULL) { 1261 nearest = avl_nearest(&vm_pool->svp_mr_list, where, 1262 AVL_BEFORE); 1263 } 1264 1265 if (nearest != NULL) { 1266 /* Verify this chunk contains the specified address range */ 1267 ASSERT(nearest->mr_va <= mrva); 1268 1269 chunk_end = nearest->mr_va + nearest->mr_len; 1270 if (chunk_end >= mrva + size) { 1271 mr->mr_hdl = nearest->mr_hdl; 1272 mr->mr_va = mrva; 1273 mr->mr_len = size; 1274 mr->mr_lkey = nearest->mr_lkey; 1275 mr->mr_rkey = nearest->mr_rkey; 1276 status = DDI_SUCCESS; 1277 } 1278 } 1279 1280 rw_exit(&vm_pool->svp_lock); 1281 return (status); 1282 } 1283 1284 static srpt_mr_t * 1285 srpt_vmem_chunk_alloc(srpt_vmem_pool_t *vm_pool, ib_memlen_t chunksize) 1286 { 1287 void *chunk = NULL; 1288 srpt_mr_t *result = NULL; 1289 1290 while ((chunk == NULL) && (chunksize >= SRPT_MIN_CHUNKSIZE)) { 1291 chunk = kmem_alloc(chunksize, KM_NOSLEEP); 1292 if (chunk == NULL) { 1293 SRPT_DPRINTF_L2("srpt_vmem_chunk_alloc: " 1294 "failed to alloc chunk of %d, trying %d", 1295 (int)chunksize, (int)chunksize/2); 1296 chunksize /= 2; 1297 } 1298 } 1299 1300 if (chunk != NULL) { 1301 result = srpt_reg_mem(vm_pool, (ib_vaddr_t)(uintptr_t)chunk, 1302 chunksize); 1303 if (result == NULL) { 1304 SRPT_DPRINTF_L2("srpt_vmem_chunk_alloc: " 1305 "chunk registration failed"); 1306 kmem_free(chunk, chunksize); 1307 } 1308 } 1309 1310 return (result); 1311 } 1312 1313 static void 1314 srpt_vmem_chunk_free(srpt_vmem_pool_t *vm_pool, srpt_mr_t *mr) 1315 { 1316 void *chunk = (void *)(uintptr_t)mr->mr_va; 1317 ib_memlen_t chunksize = mr->mr_len; 1318 1319 srpt_dereg_mem(vm_pool->svp_ioc, mr); 1320 kmem_free(chunk, chunksize); 1321 } 1322 1323 static srpt_mr_t * 1324 srpt_reg_mem(srpt_vmem_pool_t *vm_pool, ib_vaddr_t vaddr, ib_memlen_t len) 1325 { 1326 srpt_mr_t *result = NULL; 1327 ibt_mr_attr_t mr_attr; 1328 ibt_mr_desc_t mr_desc; 1329 ibt_status_t status; 1330 srpt_ioc_t *ioc = vm_pool->svp_ioc; 1331 1332 result = kmem_zalloc(sizeof (srpt_mr_t), KM_NOSLEEP); 1333 if (result == NULL) { 1334 SRPT_DPRINTF_L2("srpt_reg_mem: failed to allocate"); 1335 return (NULL); 1336 } 1337 1338 bzero(&mr_attr, sizeof (ibt_mr_attr_t)); 1339 bzero(&mr_desc, sizeof (ibt_mr_desc_t)); 1340 1341 mr_attr.mr_vaddr = vaddr; 1342 mr_attr.mr_len = len; 1343 mr_attr.mr_as = NULL; 1344 mr_attr.mr_flags = vm_pool->svp_flags; 1345 1346 status = ibt_register_mr(ioc->ioc_ibt_hdl, ioc->ioc_pd_hdl, 1347 &mr_attr, &result->mr_hdl, &mr_desc); 1348 if (status != IBT_SUCCESS) { 1349 SRPT_DPRINTF_L2("srpt_reg_mem: ibt_register_mr " 1350 "failed %d", status); 1351 kmem_free(result, sizeof (srpt_mr_t)); 1352 return (NULL); 1353 } 1354 1355 result->mr_va = mr_attr.mr_vaddr; 1356 result->mr_len = mr_attr.mr_len; 1357 result->mr_lkey = mr_desc.md_lkey; 1358 result->mr_rkey = mr_desc.md_rkey; 1359 1360 return (result); 1361 } 1362 1363 static void 1364 srpt_dereg_mem(srpt_ioc_t *ioc, srpt_mr_t *mr) 1365 { 1366 ibt_status_t status; 1367 1368 status = ibt_deregister_mr(ioc->ioc_ibt_hdl, mr->mr_hdl); 1369 if (status != IBT_SUCCESS) { 1370 SRPT_DPRINTF_L1("ioc_fini, error deregistering MR (%d)", 1371 status); 1372 } 1373 kmem_free(mr, sizeof (srpt_mr_t)); 1374 } 1375 1376 static int 1377 srpt_vmem_mr_compare(const void *a, const void *b) 1378 { 1379 srpt_mr_t *mr1 = (srpt_mr_t *)a; 1380 srpt_mr_t *mr2 = (srpt_mr_t *)b; 1381 1382 /* sort and match by virtual address */ 1383 if (mr1->mr_va < mr2->mr_va) { 1384 return (-1); 1385 } else if (mr1->mr_va > mr2->mr_va) { 1386 return (1); 1387 } 1388 1389 return (0); 1390 } 1391