1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/ddi.h> 28 #include <sys/types.h> 29 #include <sys/socket.h> 30 #include <netinet/in.h> 31 #include <sys/sunddi.h> 32 #include <sys/sysmacros.h> 33 #include <sys/iscsi_protocol.h> 34 35 #include <sys/ib/clients/iser/iser.h> 36 #include <sys/ib/clients/iser/iser_idm.h> 37 38 /* 39 * iser_ib.c 40 * Routines for InfiniBand transport for iSER 41 * 42 * This file contains the routines to interface with the IBT API to attach and 43 * allocate IB resources, handle async events, and post recv work requests. 44 * 45 */ 46 47 static iser_hca_t *iser_ib_gid2hca(ib_gid_t gid); 48 static iser_hca_t *iser_ib_guid2hca(ib_guid_t guid); 49 50 static iser_hca_t *iser_ib_alloc_hca(ib_guid_t guid); 51 static int iser_ib_free_hca(iser_hca_t *hca); 52 static int iser_ib_update_hcaports(iser_hca_t *hca); 53 static int iser_ib_init_hcas(void); 54 static int iser_ib_fini_hcas(void); 55 56 static iser_sbind_t *iser_ib_get_bind( 57 iser_svc_t *iser_svc, ib_guid_t hca_guid, ib_gid_t gid); 58 static int iser_ib_activate_port( 59 idm_svc_t *idm_svc, ib_guid_t guid, ib_gid_t gid); 60 static void iser_ib_deactivate_port(ib_guid_t hca_guid, ib_gid_t gid); 61 62 static void iser_ib_init_qp(iser_chan_t *chan, uint_t sq_size, uint_t rq_size); 63 static void iser_ib_fini_qp(iser_qp_t *qp); 64 65 static int iser_ib_setup_cq(ibt_hca_hdl_t hca_hdl, uint_t cq_size, 66 ibt_cq_hdl_t *cq_hdl); 67 68 static void iser_ib_setup_chanargs(uint8_t hca_port, ibt_cq_hdl_t scq_hdl, 69 ibt_cq_hdl_t rcq_hdl, uint_t sq_size, uint_t rq_size, 70 ibt_pd_hdl_t hca_pdhdl, ibt_rc_chan_alloc_args_t *cargs); 71 72 static void iser_ib_handle_portup_event(ibt_hca_hdl_t hdl, 73 ibt_async_event_t *event); 74 static void iser_ib_handle_portdown_event(ibt_hca_hdl_t hdl, 75 ibt_async_event_t *event); 76 static void iser_ib_handle_hca_detach_event(ibt_hca_hdl_t hdl, 77 ibt_async_event_t *event); 78 79 static void iser_ib_post_recv_task(void *arg); 80 81 static struct ibt_clnt_modinfo_s iser_ib_modinfo = { 82 IBTI_V_CURR, 83 IBT_STORAGE_DEV, 84 iser_ib_async_handler, 85 NULL, 86 "iSER" 87 }; 88 89 /* 90 * iser_ib_init 91 * 92 * This function registers the HCA drivers with IBTF and registers and binds 93 * iSER as a service with IBTF. 94 */ 95 int 96 iser_ib_init(void) 97 { 98 int status; 99 100 /* Register with IBTF */ 101 status = ibt_attach(&iser_ib_modinfo, iser_state->is_dip, iser_state, 102 &iser_state->is_ibhdl); 103 if (status != DDI_SUCCESS) { 104 ISER_LOG(CE_NOTE, "iser_ib_init: ibt_attach failed (0x%x)", 105 status); 106 return (DDI_FAILURE); 107 } 108 109 /* Create the global work request kmem_cache */ 110 iser_state->iser_wr_cache = kmem_cache_create("iser_wr_cache", 111 sizeof (iser_wr_t), 0, NULL, NULL, NULL, 112 iser_state, NULL, KM_SLEEP); 113 114 /* Populate our list of HCAs */ 115 status = iser_ib_init_hcas(); 116 if (status != DDI_SUCCESS) { 117 /* HCAs failed to initialize, tear it down */ 118 kmem_cache_destroy(iser_state->iser_wr_cache); 119 (void) ibt_detach(iser_state->is_ibhdl); 120 iser_state->is_ibhdl = NULL; 121 ISER_LOG(CE_NOTE, "iser_ib_init: failed to initialize HCAs"); 122 return (DDI_FAILURE); 123 } 124 125 /* Target will register iSER as a service with IBTF when required */ 126 127 /* Target will bind this service when it comes online */ 128 129 return (DDI_SUCCESS); 130 } 131 132 /* 133 * iser_ib_fini 134 * 135 * This function unbinds and degisters the iSER service from IBTF 136 */ 137 int 138 iser_ib_fini(void) 139 { 140 /* IDM would have already disabled all the services */ 141 142 /* Teardown the HCA list and associated resources */ 143 if (iser_ib_fini_hcas() != DDI_SUCCESS) 144 return (DDI_FAILURE); 145 146 /* Teardown the global work request kmem_cache */ 147 kmem_cache_destroy(iser_state->iser_wr_cache); 148 149 /* Deregister with IBTF */ 150 if (iser_state->is_ibhdl != NULL) { 151 (void) ibt_detach(iser_state->is_ibhdl); 152 iser_state->is_ibhdl = NULL; 153 } 154 155 return (DDI_SUCCESS); 156 } 157 158 /* 159 * iser_ib_register_service 160 * 161 * This function registers the iSER service using the RDMA-Aware Service ID. 162 */ 163 int 164 iser_ib_register_service(idm_svc_t *idm_svc) 165 { 166 ibt_srv_desc_t srvdesc; 167 iser_svc_t *iser_svc; 168 int status; 169 170 bzero(&srvdesc, sizeof (ibt_srv_desc_t)); 171 172 /* Set up IBTI client callback handler from the CM */ 173 srvdesc.sd_handler = iser_ib_cm_handler; 174 175 srvdesc.sd_flags = IBT_SRV_NO_FLAGS; 176 177 iser_svc = (iser_svc_t *)idm_svc->is_iser_svc; 178 179 /* Register the service on the specified port */ 180 status = ibt_register_service( 181 iser_state->is_ibhdl, &srvdesc, 182 iser_svc->is_svcid, 1, &iser_svc->is_srvhdl, NULL); 183 184 return (status); 185 } 186 187 /* 188 * iser_ib_bind_service 189 * 190 * This function binds a given iSER service on all available HCA ports. The 191 * current specification does not allow user to specify transport bindings 192 * for each iscsi target. The ULP invokes this function to bind the target 193 * to all available iser ports after checking for the presence of an IB HCA. 194 * iSER is "configured" whenever an IB-capable IP address exists. The lack 195 * of active IB ports is a less-fatal condition, and sockets would be used 196 * as the transport even though an Infiniband HCA is configured but unusable. 197 * 198 */ 199 int 200 iser_ib_bind_service(idm_svc_t *idm_svc) 201 { 202 iser_hca_t *hca; 203 ib_gid_t gid; 204 int num_ports = 0; 205 int num_binds = 0; 206 int num_inactive_binds = 0; /* if HCA ports inactive */ 207 int status; 208 int i; 209 210 ASSERT(idm_svc != NULL); 211 ASSERT(idm_svc->is_iser_svc != NULL); 212 213 /* Register the iSER service on all available ports */ 214 mutex_enter(&iser_state->is_hcalist_lock); 215 216 for (hca = list_head(&iser_state->is_hcalist); 217 hca != NULL; 218 hca = list_next(&iser_state->is_hcalist, hca)) { 219 220 for (i = 0; i < hca->hca_num_ports; i++) { 221 num_ports++; 222 if (hca->hca_port_info[i].p_linkstate != 223 IBT_PORT_ACTIVE) { 224 /* 225 * Move on. We will attempt to bind service 226 * in our async handler if the port comes up 227 * at a later time. 228 */ 229 num_inactive_binds++; 230 continue; 231 } 232 233 gid = hca->hca_port_info[i].p_sgid_tbl[0]; 234 235 /* If the port is already bound, skip */ 236 if (iser_ib_get_bind( 237 idm_svc->is_iser_svc, hca->hca_guid, gid) == NULL) { 238 239 status = iser_ib_activate_port( 240 idm_svc, hca->hca_guid, gid); 241 if (status != IBT_SUCCESS) { 242 ISER_LOG(CE_NOTE, 243 "iser_ib_bind_service: " 244 "iser_ib_activate_port failure " 245 "(0x%x)", status); 246 continue; 247 } 248 } 249 num_binds++; 250 } 251 } 252 mutex_exit(&iser_state->is_hcalist_lock); 253 254 if (num_binds) { 255 ISER_LOG(CE_NOTE, "iser_ib_bind_service: Service available on " 256 "(%d) of (%d) ports", num_binds, num_ports); 257 return (ISER_STATUS_SUCCESS); 258 } else if (num_inactive_binds) { 259 ISER_LOG(CE_NOTE, "iser_ib_bind_service: Could not bind " 260 "service, HCA ports are not active."); 261 /* 262 * still considered success, the async handler will bind 263 * the service when the port comes up at a later time 264 */ 265 return (ISER_STATUS_SUCCESS); 266 } else { 267 ISER_LOG(CE_NOTE, "iser_ib_bind_service: Did not bind service"); 268 return (ISER_STATUS_FAIL); 269 } 270 } 271 272 /* 273 * iser_ib_unbind_service 274 * 275 * This function unbinds a given service on a all HCA ports 276 */ 277 void 278 iser_ib_unbind_service(idm_svc_t *idm_svc) 279 { 280 iser_svc_t *iser_svc; 281 iser_sbind_t *is_sbind, *next_sb; 282 283 if (idm_svc != NULL && idm_svc->is_iser_svc != NULL) { 284 285 iser_svc = idm_svc->is_iser_svc; 286 287 for (is_sbind = list_head(&iser_svc->is_sbindlist); 288 is_sbind != NULL; 289 is_sbind = next_sb) { 290 next_sb = list_next(&iser_svc->is_sbindlist, is_sbind); 291 ibt_unbind_service(iser_svc->is_srvhdl, 292 is_sbind->is_sbindhdl); 293 list_remove(&iser_svc->is_sbindlist, is_sbind); 294 kmem_free(is_sbind, sizeof (iser_sbind_t)); 295 } 296 } 297 } 298 299 /* ARGSUSED */ 300 void 301 iser_ib_deregister_service(idm_svc_t *idm_svc) 302 { 303 iser_svc_t *iser_svc; 304 305 if (idm_svc != NULL && idm_svc->is_iser_svc != NULL) { 306 307 iser_svc = (iser_svc_t *)idm_svc->is_iser_svc; 308 ibt_deregister_service(iser_state->is_ibhdl, 309 iser_svc->is_srvhdl); 310 ibt_release_ip_sid(iser_svc->is_svcid); 311 } 312 } 313 314 /* 315 * iser_ib_get_paths 316 * This function finds the IB path between the local and the remote address. 317 * 318 */ 319 int 320 iser_ib_get_paths(ibt_ip_addr_t *local_ip, ibt_ip_addr_t *remote_ip, 321 ibt_path_info_t *path, ibt_path_ip_src_t *path_src_ip) 322 { 323 ibt_ip_path_attr_t ipattr; 324 int status; 325 326 (void) bzero(&ipattr, sizeof (ibt_ip_path_attr_t)); 327 ipattr.ipa_dst_ip = remote_ip; 328 ipattr.ipa_src_ip = *local_ip; 329 ipattr.ipa_max_paths = 1; 330 ipattr.ipa_ndst = 1; 331 332 (void) bzero(path, sizeof (ibt_path_info_t)); 333 status = ibt_get_ip_paths(iser_state->is_ibhdl, IBT_PATH_NO_FLAGS, 334 &ipattr, path, NULL, path_src_ip); 335 if (status != IBT_SUCCESS) { 336 ISER_LOG(CE_NOTE, "ibt_get_ip_paths: ibt_get_ip_paths " 337 "failure: status (%d)", status); 338 return (status); 339 } 340 341 if (local_ip != NULL) { 342 ISER_LOG(CE_NOTE, "iser_ib_get_paths success: IP[%x to %x]", 343 local_ip->un.ip4addr, remote_ip->un.ip4addr); 344 } else { 345 ISER_LOG(CE_NOTE, "iser_ib_get_paths success: " 346 "IP[INADDR_ANY to %x]", remote_ip->un.ip4addr); 347 } 348 349 return (ISER_STATUS_SUCCESS); 350 } 351 352 /* 353 * iser_ib_alloc_channel_nopathlookup 354 * 355 * This function allocates a reliable connected channel. This function does 356 * not invoke ibt_get_ip_paths() to do the path lookup. The HCA GUID and 357 * port are input to this function. 358 */ 359 iser_chan_t * 360 iser_ib_alloc_channel_nopathlookup(ib_guid_t hca_guid, uint8_t hca_port) 361 { 362 iser_hca_t *hca; 363 iser_chan_t *chan; 364 365 /* Lookup the hca using the gid in the path info */ 366 hca = iser_ib_guid2hca(hca_guid); 367 if (hca == NULL) { 368 ISER_LOG(CE_NOTE, "iser_ib_alloc_channel_nopathlookup: failed " 369 "to lookup HCA(%llx) handle", (longlong_t)hca_guid); 370 return (NULL); 371 } 372 373 chan = iser_ib_alloc_rc_channel(hca, hca_port); 374 if (chan == NULL) { 375 ISER_LOG(CE_NOTE, "iser_ib_alloc_channel_nopathlookup: failed " 376 "to alloc channel on HCA(%llx) %d", 377 (longlong_t)hca_guid, hca_port); 378 return (NULL); 379 } 380 381 ISER_LOG(CE_NOTE, "iser_ib_alloc_channel_pathlookup success: " 382 "chanhdl (0x%p), HCA(%llx) %d", 383 (void *)chan->ic_chanhdl, (longlong_t)hca_guid, hca_port); 384 385 return (chan); 386 } 387 388 /* 389 * iser_ib_alloc_channel_pathlookup 390 * 391 * This function allocates a reliable connected channel but first invokes 392 * ibt_get_ip_paths() with the given local and remote addres to get the 393 * HCA lgid and the port number. 394 */ 395 iser_chan_t * 396 iser_ib_alloc_channel_pathlookup( 397 ibt_ip_addr_t *local_ip, ibt_ip_addr_t *remote_ip) 398 { 399 ibt_path_info_t ibt_path; 400 ibt_path_ip_src_t path_src_ip; 401 ib_gid_t lgid; 402 uint8_t hca_port; /* from path */ 403 iser_hca_t *hca; 404 iser_chan_t *chan; 405 int status; 406 407 /* Lookup a path to the given destination */ 408 status = iser_ib_get_paths( 409 local_ip, remote_ip, &ibt_path, &path_src_ip); 410 411 if (status != ISER_STATUS_SUCCESS) { 412 ISER_LOG(CE_NOTE, "iser_ib_alloc_channel_pathlookup: faild " 413 "Path lookup IP:[%llx to %llx] failed: status (%d)", 414 (longlong_t)local_ip->un.ip4addr, 415 (longlong_t)remote_ip->un.ip4addr, 416 status); 417 return (NULL); 418 } 419 420 /* get the local gid from the path info */ 421 lgid = ibt_path.pi_prim_cep_path.cep_adds_vect.av_sgid; 422 423 /* get the hca port from the path info */ 424 hca_port = ibt_path.pi_prim_cep_path.cep_hca_port_num; 425 426 /* Lookup the hca using the gid in the path info */ 427 hca = iser_ib_gid2hca(lgid); 428 if (hca == NULL) { 429 ISER_LOG(CE_NOTE, "iser_ib_alloc_channel_pathlookup: failed " 430 "to lookup HCA (%llx) handle", 431 (longlong_t)hca->hca_guid); 432 return (NULL); 433 } 434 435 chan = iser_ib_alloc_rc_channel(hca, hca_port); 436 if (chan == NULL) { 437 ISER_LOG(CE_NOTE, "iser_ib_alloc_channel_pathlookup: failed " 438 "to alloc channel from IP:[%llx to %llx] on HCA (%llx) %d", 439 (longlong_t)local_ip->un.ip4addr, 440 (longlong_t)remote_ip->un.ip4addr, 441 (longlong_t)hca->hca_guid, hca_port); 442 return (NULL); 443 } 444 445 ISER_LOG(CE_NOTE, "iser_ib_alloc_channel_pathlookup success: " 446 "chanhdl (0x%p), IP:[%llx to %llx], lgid (%llx:%llx), HCA(%llx) %d", 447 (void *)chan->ic_chanhdl, 448 (longlong_t)local_ip->un.ip4addr, 449 (longlong_t)remote_ip->un.ip4addr, 450 (longlong_t)lgid.gid_prefix, (longlong_t)lgid.gid_guid, 451 (longlong_t)hca->hca_guid, hca_port); 452 453 chan->ic_ibt_path = ibt_path; 454 chan->ic_localip = path_src_ip.ip_primary; 455 chan->ic_remoteip = *remote_ip; 456 457 return (chan); 458 } 459 460 /* 461 * iser_ib_alloc_rc_channel 462 * 463 * This function allocates a reliable communication channel using the specified 464 * channel attributes. 465 */ 466 iser_chan_t * 467 iser_ib_alloc_rc_channel(iser_hca_t *hca, uint8_t hca_port) 468 { 469 470 iser_chan_t *chan; 471 ibt_rc_chan_alloc_args_t chanargs; 472 uint_t sq_size, rq_size; 473 int status; 474 475 chan = kmem_zalloc(sizeof (iser_chan_t), KM_SLEEP); 476 477 mutex_init(&chan->ic_lock, NULL, MUTEX_DRIVER, NULL); 478 mutex_init(&chan->ic_sq_post_lock, NULL, MUTEX_DRIVER, NULL); 479 480 /* Set up the iSER channel handle with HCA */ 481 chan->ic_hca = hca; 482 483 /* 484 * Determine the queue sizes, based upon the HCA query data. 485 * For our Work Queues, we will use either our default value, 486 * or the HCA's maximum value, whichever is smaller. 487 */ 488 sq_size = min(hca->hca_attr.hca_max_chan_sz, ISER_IB_SENDQ_SIZE); 489 rq_size = min(hca->hca_attr.hca_max_chan_sz, ISER_IB_RECVQ_SIZE); 490 491 /* 492 * For our Completion Queues, we again check the device maximum. 493 * We want to end up with CQs that are the next size up from the 494 * WQs they are servicing so that they have some overhead. 495 */ 496 if (hca->hca_attr.hca_max_cq_sz >= (sq_size + 1)) { 497 chan->ic_sendcq_sz = sq_size + 1; 498 } else { 499 chan->ic_sendcq_sz = hca->hca_attr.hca_max_cq_sz; 500 sq_size = chan->ic_sendcq_sz - 1; 501 } 502 503 if (hca->hca_attr.hca_max_cq_sz >= (rq_size + 1)) { 504 chan->ic_recvcq_sz = rq_size + 1; 505 } else { 506 chan->ic_recvcq_sz = hca->hca_attr.hca_max_cq_sz; 507 rq_size = chan->ic_recvcq_sz - 1; 508 } 509 510 /* Initialize the iSER channel's QP handle */ 511 iser_ib_init_qp(chan, sq_size, rq_size); 512 513 /* Set up the Send Completion Queue */ 514 status = iser_ib_setup_cq(hca->hca_hdl, chan->ic_sendcq_sz, 515 &chan->ic_sendcq); 516 if (status != ISER_STATUS_SUCCESS) { 517 iser_ib_fini_qp(&chan->ic_qp); 518 mutex_destroy(&chan->ic_lock); 519 mutex_destroy(&chan->ic_sq_post_lock); 520 kmem_free(chan, sizeof (iser_chan_t)); 521 return (NULL); 522 } 523 ibt_set_cq_handler(chan->ic_sendcq, iser_ib_sendcq_handler, chan); 524 ibt_enable_cq_notify(chan->ic_sendcq, IBT_NEXT_COMPLETION); 525 526 /* Set up the Receive Completion Queue */ 527 status = iser_ib_setup_cq(hca->hca_hdl, chan->ic_recvcq_sz, 528 &chan->ic_recvcq); 529 if (status != ISER_STATUS_SUCCESS) { 530 (void) ibt_free_cq(chan->ic_sendcq); 531 iser_ib_fini_qp(&chan->ic_qp); 532 mutex_destroy(&chan->ic_lock); 533 mutex_destroy(&chan->ic_sq_post_lock); 534 kmem_free(chan, sizeof (iser_chan_t)); 535 return (NULL); 536 } 537 ibt_set_cq_handler(chan->ic_recvcq, iser_ib_recvcq_handler, chan); 538 ibt_enable_cq_notify(chan->ic_recvcq, IBT_NEXT_COMPLETION); 539 540 /* Setup the channel arguments */ 541 iser_ib_setup_chanargs(hca_port, chan->ic_sendcq, chan->ic_recvcq, 542 sq_size, rq_size, hca->hca_pdhdl, &chanargs); 543 544 status = ibt_alloc_rc_channel(hca->hca_hdl, 545 IBT_ACHAN_NO_FLAGS, &chanargs, &chan->ic_chanhdl, NULL); 546 if (status != IBT_SUCCESS) { 547 ISER_LOG(CE_NOTE, "iser_ib_alloc_rc_channel: failed " 548 "ibt_alloc_rc_channel: status (%d)", status); 549 (void) ibt_free_cq(chan->ic_sendcq); 550 (void) ibt_free_cq(chan->ic_recvcq); 551 iser_ib_fini_qp(&chan->ic_qp); 552 mutex_destroy(&chan->ic_lock); 553 mutex_destroy(&chan->ic_sq_post_lock); 554 kmem_free(chan, sizeof (iser_chan_t)); 555 return (NULL); 556 } 557 558 /* Set the 'channel' as the client private data */ 559 (void) ibt_set_chan_private(chan->ic_chanhdl, chan); 560 561 return (chan); 562 } 563 564 /* 565 * iser_ib_open_rc_channel 566 * This function opens a RC connection on the given allocated RC channel 567 */ 568 int 569 iser_ib_open_rc_channel(iser_chan_t *chan) 570 { 571 ibt_ip_cm_info_t ipcm_info; 572 iser_private_data_t iser_priv_data; 573 ibt_chan_open_args_t ocargs; 574 ibt_rc_returns_t ocreturns; 575 int status; 576 577 mutex_enter(&chan->ic_lock); 578 579 /* 580 * For connection establishment, the initiator sends a CM REQ using the 581 * iSER RDMA-Aware Service ID. Included are the source and destination 582 * IP addresses, and the src port. 583 */ 584 bzero(&ipcm_info, sizeof (ibt_ip_cm_info_t)); 585 ipcm_info.src_addr = chan->ic_localip; 586 ipcm_info.dst_addr = chan->ic_remoteip; 587 ipcm_info.src_port = chan->ic_lport; 588 589 /* 590 * The CM Private Data field defines the iSER connection parameters 591 * such as zero based virtual address exception (ZBVAE) and Send with 592 * invalidate Exception (SIE). 593 * 594 * Solaris IBT does not currently support ZBVAE or SIE. 595 */ 596 iser_priv_data.rsvd1 = 0; 597 iser_priv_data.sie = 1; 598 iser_priv_data.zbvae = 1; 599 600 status = ibt_format_ip_private_data(&ipcm_info, 601 sizeof (iser_private_data_t), &iser_priv_data); 602 if (status != IBT_SUCCESS) { 603 ISER_LOG(CE_NOTE, "iser_ib_open_rc_channel failed: %d", status); 604 mutex_exit(&chan->ic_lock); 605 return (status); 606 } 607 608 /* 609 * Set the SID we are attempting to connect to, based upon the 610 * remote port number. 611 */ 612 chan->ic_ibt_path.pi_sid = ibt_get_ip_sid(IPPROTO_TCP, chan->ic_rport); 613 614 /* Set up the args for the channel open */ 615 bzero(&ocargs, sizeof (ibt_chan_open_args_t)); 616 ocargs.oc_path = &chan->ic_ibt_path; 617 ocargs.oc_cm_handler = iser_ib_cm_handler; 618 ocargs.oc_cm_clnt_private = iser_state; 619 ocargs.oc_rdma_ra_out = 4; 620 ocargs.oc_rdma_ra_in = 4; 621 ocargs.oc_path_retry_cnt = 2; 622 ocargs.oc_path_rnr_retry_cnt = 2; 623 ocargs.oc_priv_data_len = sizeof (iser_private_data_t); 624 ocargs.oc_priv_data = &iser_priv_data; 625 626 bzero(&ocreturns, sizeof (ibt_rc_returns_t)); 627 628 status = ibt_open_rc_channel(chan->ic_chanhdl, 629 IBT_OCHAN_NO_FLAGS, IBT_BLOCKING, &ocargs, &ocreturns); 630 631 if (status != IBT_SUCCESS) { 632 ISER_LOG(CE_NOTE, "iser_ib_open_rc_channel failed: %d", status); 633 mutex_exit(&chan->ic_lock); 634 return (status); 635 } 636 637 mutex_exit(&chan->ic_lock); 638 return (IDM_STATUS_SUCCESS); 639 } 640 641 /* 642 * iser_ib_close_rc_channel 643 * This function closes the RC channel related to this iser_chan handle. 644 * We invoke this in a non-blocking, no callbacks context. 645 */ 646 void 647 iser_ib_close_rc_channel(iser_chan_t *chan) 648 { 649 int status; 650 651 mutex_enter(&chan->ic_lock); 652 status = ibt_close_rc_channel(chan->ic_chanhdl, IBT_BLOCKING, NULL, 653 0, NULL, NULL, 0); 654 if (status != IBT_SUCCESS) { 655 ISER_LOG(CE_NOTE, "iser_ib_close_rc_channel: " 656 "ibt_close_rc_channel failed: status (%d)", status); 657 } 658 mutex_exit(&chan->ic_lock); 659 } 660 661 /* 662 * iser_ib_free_rc_channel 663 * 664 * This function tears down an RC channel's QP initialization and frees it. 665 * Note that we do not need synchronization here; the channel has been 666 * closed already, so we should only have completion polling occuring. Once 667 * complete, we are free to free the IBTF channel, WQ and CQ resources, and 668 * our own related resources. 669 */ 670 void 671 iser_ib_free_rc_channel(iser_chan_t *chan) 672 { 673 iser_qp_t *iser_qp; 674 675 iser_qp = &chan->ic_qp; 676 677 /* Ensure the SQ is empty */ 678 while (chan->ic_sq_post_count != 0) { 679 mutex_exit(&chan->ic_conn->ic_lock); 680 delay(drv_usectohz(ISER_DELAY_HALF_SECOND)); 681 mutex_enter(&chan->ic_conn->ic_lock); 682 } 683 mutex_destroy(&chan->ic_sq_post_lock); 684 685 /* Ensure the RQ is empty */ 686 (void) ibt_flush_channel(chan->ic_chanhdl); 687 mutex_enter(&iser_qp->qp_lock); 688 while (iser_qp->rq_level != 0) { 689 mutex_exit(&iser_qp->qp_lock); 690 mutex_exit(&chan->ic_conn->ic_lock); 691 delay(drv_usectohz(ISER_DELAY_HALF_SECOND)); 692 mutex_enter(&chan->ic_conn->ic_lock); 693 mutex_enter(&iser_qp->qp_lock); 694 } 695 696 /* Free our QP handle */ 697 mutex_exit(&iser_qp->qp_lock); 698 (void) iser_ib_fini_qp(iser_qp); 699 700 /* Free the IBT channel resources */ 701 (void) ibt_free_channel(chan->ic_chanhdl); 702 chan->ic_chanhdl = NULL; 703 704 /* Free the CQs */ 705 ibt_free_cq(chan->ic_sendcq); 706 ibt_free_cq(chan->ic_recvcq); 707 708 /* Free the chan handle */ 709 mutex_destroy(&chan->ic_lock); 710 kmem_free(chan, sizeof (iser_chan_t)); 711 } 712 713 /* 714 * iser_ib_post_recv 715 * 716 * This function handles keeping the RQ full on a given channel. 717 * This routine will mostly be run on a taskq, and will check the 718 * current fill level of the RQ, and post as many WRs as necessary 719 * to fill it again. 720 */ 721 722 int 723 iser_ib_post_recv_async(ibt_channel_hdl_t chanhdl) 724 { 725 iser_chan_t *chan; 726 int status; 727 728 /* Pull our iSER channel handle from the private data */ 729 chan = (iser_chan_t *)ibt_get_chan_private(chanhdl); 730 731 /* 732 * Caller must check that chan->ic_conn->ic_stage indicates 733 * the connection is active (not closing, not closed) and 734 * it must hold the mutex cross the check and the call to this function 735 */ 736 ASSERT(mutex_owned(&chan->ic_conn->ic_lock)); 737 ASSERT((chan->ic_conn->ic_stage >= ISER_CONN_STAGE_IC_CONNECTED) && 738 (chan->ic_conn->ic_stage <= ISER_CONN_STAGE_LOGGED_IN)); 739 idm_conn_hold(chan->ic_conn->ic_idmc); 740 status = ddi_taskq_dispatch(iser_taskq, iser_ib_post_recv_task, 741 (void *)chanhdl, DDI_NOSLEEP); 742 if (status != DDI_SUCCESS) { 743 idm_conn_rele(chan->ic_conn->ic_idmc); 744 } 745 746 return (status); 747 } 748 749 static void 750 iser_ib_post_recv_task(void *arg) 751 { 752 ibt_channel_hdl_t chanhdl = arg; 753 iser_chan_t *chan; 754 755 /* Pull our iSER channel handle from the private data */ 756 chan = (iser_chan_t *)ibt_get_chan_private(chanhdl); 757 758 iser_ib_post_recv(chanhdl); 759 idm_conn_rele(chan->ic_conn->ic_idmc); 760 } 761 762 void 763 iser_ib_post_recv(ibt_channel_hdl_t chanhdl) 764 { 765 iser_chan_t *chan; 766 iser_hca_t *hca; 767 iser_msg_t *msg; 768 ibt_recv_wr_t *wrlist, wr[ISER_IB_RQ_POST_MAX]; 769 int rq_space, msg_ret; 770 int total_num, npost; 771 uint_t nposted; 772 int status, i; 773 iser_qp_t *iser_qp; 774 775 /* Pull our iSER channel handle from the private data */ 776 chan = (iser_chan_t *)ibt_get_chan_private(chanhdl); 777 778 ASSERT(chan != NULL); 779 780 mutex_enter(&chan->ic_conn->ic_lock); 781 782 /* Bail out if the connection is closed; no need for more recv WRs */ 783 if ((chan->ic_conn->ic_stage == ISER_CONN_STAGE_CLOSING) || 784 (chan->ic_conn->ic_stage == ISER_CONN_STAGE_CLOSED)) { 785 mutex_exit(&chan->ic_conn->ic_lock); 786 return; 787 } 788 789 /* get the QP handle from the iser_chan */ 790 iser_qp = &chan->ic_qp; 791 792 hca = chan->ic_hca; 793 794 if (hca == NULL) { 795 ISER_LOG(CE_NOTE, "iser_ib_post_recv: unable to retrieve " 796 "HCA handle"); 797 mutex_exit(&chan->ic_conn->ic_lock); 798 return; 799 } 800 801 /* check for space to post on the RQ */ 802 mutex_enter(&iser_qp->qp_lock); 803 rq_space = iser_qp->rq_depth - iser_qp->rq_level; 804 if (rq_space == 0) { 805 /* The RQ is full, clear the pending flag and return */ 806 iser_qp->rq_taskqpending = B_FALSE; 807 mutex_exit(&iser_qp->qp_lock); 808 mutex_exit(&chan->ic_conn->ic_lock); 809 return; 810 } 811 812 /* Keep track of the lowest value for rq_min_post_level */ 813 if (iser_qp->rq_level < iser_qp->rq_min_post_level) 814 iser_qp->rq_min_post_level = iser_qp->rq_level; 815 816 mutex_exit(&iser_qp->qp_lock); 817 818 /* we've room to post, so pull from the msg cache */ 819 msg = iser_msg_get(hca, rq_space, &msg_ret); 820 if (msg == NULL) { 821 ISER_LOG(CE_NOTE, "iser_ib_post_recv: no message handles " 822 "available in msg cache currently"); 823 /* 824 * There are no messages on the cache. Wait a half- 825 * second, then try again. 826 */ 827 delay(drv_usectohz(ISER_DELAY_HALF_SECOND)); 828 status = iser_ib_post_recv_async(chanhdl); 829 if (status != DDI_SUCCESS) { 830 ISER_LOG(CE_NOTE, "iser_ib_post_recv: failed to " 831 "redispatch routine"); 832 /* Failed to dispatch, clear pending flag */ 833 mutex_enter(&iser_qp->qp_lock); 834 iser_qp->rq_taskqpending = B_FALSE; 835 mutex_exit(&iser_qp->qp_lock); 836 } 837 mutex_exit(&chan->ic_conn->ic_lock); 838 return; 839 } 840 841 if (msg_ret != rq_space) { 842 ISER_LOG(CE_NOTE, "iser_ib_post_recv: requested number of " 843 "messages not allocated: requested (%d) allocated (%d)", 844 rq_space, msg_ret); 845 /* We got some, but not all, of our requested depth */ 846 rq_space = msg_ret; 847 } 848 849 /* 850 * Now, walk through the allocated WRs and post them, 851 * ISER_IB_RQ_POST_MAX (or less) at a time. 852 */ 853 wrlist = &wr[0]; 854 total_num = rq_space; 855 856 while (total_num) { 857 /* determine the number to post on this iteration */ 858 npost = (total_num > ISER_IB_RQ_POST_MAX) ? 859 ISER_IB_RQ_POST_MAX : total_num; 860 861 /* build a list of WRs from the msg list */ 862 for (i = 0; i < npost; i++) { 863 wrlist[i].wr_id = (ibt_wrid_t)(uintptr_t)msg; 864 wrlist[i].wr_nds = ISER_IB_SGLIST_SIZE; 865 wrlist[i].wr_sgl = &msg->msg_ds; 866 msg = msg->nextp; 867 } 868 869 /* post the list to the RQ */ 870 nposted = 0; 871 status = ibt_post_recv(chanhdl, wrlist, npost, &nposted); 872 if ((status != IBT_SUCCESS) || (nposted != npost)) { 873 ISER_LOG(CE_NOTE, "iser_ib_post_recv: ibt_post_recv " 874 "failed: requested (%d) posted (%d) status (%d)", 875 npost, nposted, status); 876 total_num -= nposted; 877 break; 878 } 879 880 /* decrement total number to post by the number posted */ 881 total_num -= nposted; 882 } 883 884 mutex_enter(&iser_qp->qp_lock); 885 if (total_num != 0) { 886 ISER_LOG(CE_NOTE, "iser_ib_post_recv: unable to fill RQ, " 887 "failed to post (%d) WRs", total_num); 888 iser_qp->rq_level += rq_space - total_num; 889 } else { 890 iser_qp->rq_level += rq_space; 891 } 892 893 /* 894 * Now that we've filled the RQ, check that all of the recv WRs 895 * haven't just been immediately consumed. If so, taskqpending is 896 * still B_TRUE, so we need to fire off a taskq thread to post 897 * more WRs. 898 */ 899 if (iser_qp->rq_level == 0) { 900 mutex_exit(&iser_qp->qp_lock); 901 status = iser_ib_post_recv_async(chanhdl); 902 if (status != DDI_SUCCESS) { 903 ISER_LOG(CE_NOTE, "iser_ib_post_recv: failed to " 904 "dispatch followup routine"); 905 /* Failed to dispatch, clear pending flag */ 906 mutex_enter(&iser_qp->qp_lock); 907 iser_qp->rq_taskqpending = B_FALSE; 908 mutex_exit(&iser_qp->qp_lock); 909 } 910 } else { 911 /* 912 * We're done, we've filled the RQ. Clear the taskq 913 * flag so that we can run again. 914 */ 915 iser_qp->rq_taskqpending = B_FALSE; 916 mutex_exit(&iser_qp->qp_lock); 917 } 918 919 mutex_exit(&chan->ic_conn->ic_lock); 920 } 921 922 /* 923 * iser_ib_handle_portup_event() 924 * This handles the IBT_EVENT_PORT_UP unaffiliated asynchronous event. 925 * 926 * To facilitate a seamless bringover of the port and configure the CM service 927 * for inbound iSER service requests on this newly active port, the existing 928 * IDM services will be checked for iSER support. 929 * If an iSER service was already created, then this service will simply be 930 * bound to the gid of the newly active port. If on the other hand, the CM 931 * service did not exist, i.e. only socket communication, then a new CM 932 * service will be first registered with the saved service parameters and 933 * then bound to the newly active port. 934 * 935 */ 936 /* ARGSUSED */ 937 static void 938 iser_ib_handle_portup_event(ibt_hca_hdl_t hdl, ibt_async_event_t *event) 939 { 940 iser_hca_t *hca; 941 ib_gid_t gid; 942 idm_svc_t *idm_svc; 943 int status; 944 945 ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event: HCA(0x%llx) port(%d)", 946 (longlong_t)event->ev_hca_guid, event->ev_port); 947 948 /* 949 * Query all ports on the HCA and update the port information 950 * maintainted in the iser_hca_t structure 951 */ 952 hca = iser_ib_guid2hca(event->ev_hca_guid); 953 if (hca == NULL) { 954 955 /* HCA is just made available, first port on that HCA */ 956 hca = iser_ib_alloc_hca(event->ev_hca_guid); 957 958 mutex_enter(&iser_state->is_hcalist_lock); 959 list_insert_tail(&iser_state->is_hcalist, hca); 960 iser_state->is_num_hcas++; 961 mutex_exit(&iser_state->is_hcalist_lock); 962 963 } else { 964 965 status = iser_ib_update_hcaports(hca); 966 967 if (status != IBT_SUCCESS) { 968 ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event " 969 "status(0x%x): iser_ib_update_hcaports failed: " 970 "HCA(0x%llx) port(%d)", status, 971 (longlong_t)event->ev_hca_guid, event->ev_port); 972 return; 973 } 974 } 975 976 gid = hca->hca_port_info[event->ev_port - 1].p_sgid_tbl[0]; 977 978 /* 979 * Iterate through the global list of IDM target services 980 * and check for existing iSER CM service. 981 */ 982 mutex_enter(&idm.idm_global_mutex); 983 for (idm_svc = list_head(&idm.idm_tgt_svc_list); 984 idm_svc != NULL; 985 idm_svc = list_next(&idm.idm_tgt_svc_list, idm_svc)) { 986 987 988 if (idm_svc->is_iser_svc == NULL) { 989 990 /* Establish a new CM service for iSER requests */ 991 status = iser_tgt_svc_create( 992 &idm_svc->is_svc_req, idm_svc); 993 994 if (status != IBT_SUCCESS) { 995 ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event " 996 "status(0x%x): iser_tgt_svc_create failed: " 997 "HCA(0x%llx) port(%d)", status, 998 (longlong_t)event->ev_hca_guid, 999 event->ev_port); 1000 1001 continue; 1002 } 1003 } 1004 1005 status = iser_ib_activate_port( 1006 idm_svc, event->ev_hca_guid, gid); 1007 if (status != IBT_SUCCESS) { 1008 1009 ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event " 1010 "status(0x%x): Bind service on port " 1011 "(%llx:%llx) failed", 1012 status, (longlong_t)gid.gid_prefix, 1013 (longlong_t)gid.gid_guid); 1014 1015 continue; 1016 } 1017 ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event: service bound " 1018 "HCA(0x%llx) port(%d)", (longlong_t)event->ev_hca_guid, 1019 event->ev_port); 1020 } 1021 mutex_exit(&idm.idm_global_mutex); 1022 1023 ISER_LOG(CE_NOTE, "iser_ib_handle_portup_event success: " 1024 "HCA(0x%llx) port(%d)", (longlong_t)event->ev_hca_guid, 1025 event->ev_port); 1026 } 1027 1028 /* 1029 * iser_ib_handle_portdown_event() 1030 * This handles the IBT_EVENT_PORT_DOWN unaffiliated asynchronous error. 1031 * 1032 * Unconfigure the CM service on the deactivated port and teardown the 1033 * connections that are using the CM service. 1034 */ 1035 /* ARGSUSED */ 1036 static void 1037 iser_ib_handle_portdown_event(ibt_hca_hdl_t hdl, ibt_async_event_t *event) 1038 { 1039 iser_hca_t *hca; 1040 ib_gid_t gid; 1041 int status; 1042 1043 /* 1044 * Query all ports on the HCA and update the port information 1045 * maintainted in the iser_hca_t structure 1046 */ 1047 hca = iser_ib_guid2hca(event->ev_hca_guid); 1048 ASSERT(hca != NULL); 1049 1050 status = iser_ib_update_hcaports(hca); 1051 if (status != IBT_SUCCESS) { 1052 ISER_LOG(CE_NOTE, "iser_ib_handle_portdown_event status(0x%x): " 1053 "ibt_ib_update_hcaports failed: HCA(0x%llx) port(%d)", 1054 status, (longlong_t)event->ev_hca_guid, event->ev_port); 1055 return; 1056 } 1057 1058 /* get the gid of the new port */ 1059 gid = hca->hca_port_info[event->ev_port - 1].p_sgid_tbl[0]; 1060 iser_ib_deactivate_port(event->ev_hca_guid, gid); 1061 1062 ISER_LOG(CE_NOTE, "iser_ib_handle_portdown_event success: " 1063 "HCA(0x%llx) port(%d)", (longlong_t)event->ev_hca_guid, 1064 event->ev_port); 1065 } 1066 1067 /* 1068 * iser_ib_handle_hca_detach_event() 1069 * Quiesce all activity bound for the port, teardown the connection, unbind 1070 * iSER services on all ports and release the HCA handle. 1071 */ 1072 /* ARGSUSED */ 1073 static void 1074 iser_ib_handle_hca_detach_event(ibt_hca_hdl_t hdl, ibt_async_event_t *event) 1075 { 1076 iser_hca_t *nexthca, *hca; 1077 int i, status; 1078 1079 ISER_LOG(CE_NOTE, "iser_ib_handle_hca_detach_event: HCA(0x%llx)", 1080 (longlong_t)event->ev_hca_guid); 1081 1082 hca = iser_ib_guid2hca(event->ev_hca_guid); 1083 for (i = 0; i < hca->hca_num_ports; i++) { 1084 iser_ib_deactivate_port(hca->hca_guid, 1085 hca->hca_port_info[i].p_sgid_tbl[0]); 1086 } 1087 1088 /* 1089 * Update the HCA list maintained in the iser_state. Free the 1090 * resources allocated to the HCA, i.e. caches, protection domain 1091 */ 1092 mutex_enter(&iser_state->is_hcalist_lock); 1093 1094 for (hca = list_head(&iser_state->is_hcalist); 1095 hca != NULL; 1096 hca = nexthca) { 1097 1098 nexthca = list_next(&iser_state->is_hcalist, hca); 1099 1100 if (hca->hca_guid == event->ev_hca_guid) { 1101 1102 list_remove(&iser_state->is_hcalist, hca); 1103 iser_state->is_num_hcas--; 1104 1105 status = iser_ib_free_hca(hca); 1106 if (status != DDI_SUCCESS) { 1107 ISER_LOG(CE_WARN, "iser_ib_handle_hca_detach: " 1108 "Failed to free hca(%p)", (void *)hca); 1109 list_insert_tail(&iser_state->is_hcalist, hca); 1110 iser_state->is_num_hcas++; 1111 } 1112 /* No way to return status to IBT if this fails */ 1113 } 1114 } 1115 mutex_exit(&iser_state->is_hcalist_lock); 1116 1117 } 1118 1119 /* 1120 * iser_ib_async_handler 1121 * An IBT Asynchronous Event handler is registered it with the framework and 1122 * passed via the ibt_attach() routine. This function handles the following 1123 * asynchronous events. 1124 * IBT_EVENT_PORT_UP 1125 * IBT_ERROR_PORT_DOWN 1126 * IBT_HCA_ATTACH_EVENT 1127 * IBT_HCA_DETACH_EVENT 1128 */ 1129 /* ARGSUSED */ 1130 void 1131 iser_ib_async_handler(void *clntp, ibt_hca_hdl_t hdl, ibt_async_code_t code, 1132 ibt_async_event_t *event) 1133 { 1134 switch (code) { 1135 case IBT_EVENT_PORT_UP: 1136 iser_ib_handle_portup_event(hdl, event); 1137 break; 1138 1139 case IBT_ERROR_PORT_DOWN: 1140 iser_ib_handle_portdown_event(hdl, event); 1141 break; 1142 1143 case IBT_HCA_ATTACH_EVENT: 1144 /* 1145 * A new HCA device is available for use, ignore this 1146 * event because the corresponding IBT_EVENT_PORT_UP 1147 * events will get triggered and handled accordingly. 1148 */ 1149 break; 1150 1151 case IBT_HCA_DETACH_EVENT: 1152 iser_ib_handle_hca_detach_event(hdl, event); 1153 break; 1154 1155 default: 1156 break; 1157 } 1158 } 1159 1160 /* 1161 * iser_ib_init_hcas 1162 * 1163 * This function opens all the HCA devices, gathers the HCA state information 1164 * and adds the HCA handle for each HCA found in the iser_soft_state. 1165 */ 1166 static int 1167 iser_ib_init_hcas(void) 1168 { 1169 ib_guid_t *guid; 1170 int num_hcas; 1171 int i; 1172 iser_hca_t *hca; 1173 1174 /* Retrieve the HCA list */ 1175 num_hcas = ibt_get_hca_list(&guid); 1176 if (num_hcas == 0) { 1177 /* 1178 * This shouldn't happen, but might if we have all HCAs 1179 * detach prior to initialization. 1180 */ 1181 return (DDI_FAILURE); 1182 } 1183 1184 /* Initialize the hcalist lock */ 1185 mutex_init(&iser_state->is_hcalist_lock, NULL, MUTEX_DRIVER, NULL); 1186 1187 /* Create the HCA list */ 1188 list_create(&iser_state->is_hcalist, sizeof (iser_hca_t), 1189 offsetof(iser_hca_t, hca_node)); 1190 1191 for (i = 0; i < num_hcas; i++) { 1192 1193 ISER_LOG(CE_NOTE, "iser_ib_init_hcas: initializing HCA " 1194 "(0x%llx)", (longlong_t)guid[i]); 1195 1196 hca = iser_ib_alloc_hca(guid[i]); 1197 if (hca == NULL) { 1198 /* This shouldn't happen, teardown and fail */ 1199 (void) iser_ib_fini_hcas(); 1200 (void) ibt_free_hca_list(guid, num_hcas); 1201 return (DDI_FAILURE); 1202 } 1203 1204 mutex_enter(&iser_state->is_hcalist_lock); 1205 list_insert_tail(&iser_state->is_hcalist, hca); 1206 iser_state->is_num_hcas++; 1207 mutex_exit(&iser_state->is_hcalist_lock); 1208 1209 } 1210 1211 /* Free the IBT HCA list */ 1212 (void) ibt_free_hca_list(guid, num_hcas); 1213 1214 /* Check that we've initialized at least one HCA */ 1215 mutex_enter(&iser_state->is_hcalist_lock); 1216 if (list_is_empty(&iser_state->is_hcalist)) { 1217 ISER_LOG(CE_NOTE, "iser_ib_init_hcas: failed to initialize " 1218 "any HCAs"); 1219 1220 mutex_exit(&iser_state->is_hcalist_lock); 1221 (void) iser_ib_fini_hcas(); 1222 return (DDI_FAILURE); 1223 } 1224 mutex_exit(&iser_state->is_hcalist_lock); 1225 1226 return (DDI_SUCCESS); 1227 } 1228 1229 /* 1230 * iser_ib_fini_hcas 1231 * 1232 * Teardown the iSER HCA list initialized above. 1233 */ 1234 static int 1235 iser_ib_fini_hcas(void) 1236 { 1237 iser_hca_t *nexthca, *hca; 1238 int status; 1239 1240 mutex_enter(&iser_state->is_hcalist_lock); 1241 for (hca = list_head(&iser_state->is_hcalist); 1242 hca != NULL; 1243 hca = nexthca) { 1244 1245 nexthca = list_next(&iser_state->is_hcalist, hca); 1246 1247 list_remove(&iser_state->is_hcalist, hca); 1248 1249 status = iser_ib_free_hca(hca); 1250 if (status != IBT_SUCCESS) { 1251 ISER_LOG(CE_NOTE, "iser_ib_fini_hcas: failed to free " 1252 "HCA during fini"); 1253 list_insert_tail(&iser_state->is_hcalist, hca); 1254 return (DDI_FAILURE); 1255 } 1256 1257 iser_state->is_num_hcas--; 1258 1259 } 1260 mutex_exit(&iser_state->is_hcalist_lock); 1261 list_destroy(&iser_state->is_hcalist); 1262 mutex_destroy(&iser_state->is_hcalist_lock); 1263 1264 return (DDI_SUCCESS); 1265 } 1266 1267 /* 1268 * iser_ib_alloc_hca 1269 * 1270 * This function opens the given HCA device, gathers the HCA state information 1271 * and adds the HCA handle 1272 */ 1273 static iser_hca_t * 1274 iser_ib_alloc_hca(ib_guid_t guid) 1275 { 1276 iser_hca_t *hca; 1277 int status; 1278 1279 /* Allocate an iser_hca_t HCA handle */ 1280 hca = (iser_hca_t *)kmem_zalloc(sizeof (iser_hca_t), KM_SLEEP); 1281 1282 /* Open this HCA */ 1283 status = ibt_open_hca(iser_state->is_ibhdl, guid, &hca->hca_hdl); 1284 if (status != IBT_SUCCESS) { 1285 ISER_LOG(CE_NOTE, "iser_ib_alloc_hca: ibt_open_hca failed:" 1286 " guid (0x%llx) status (0x%x)", (longlong_t)guid, status); 1287 kmem_free(hca, sizeof (iser_hca_t)); 1288 return (NULL); 1289 } 1290 1291 hca->hca_guid = guid; 1292 hca->hca_clnt_hdl = iser_state->is_ibhdl; 1293 1294 /* Query the HCA */ 1295 status = ibt_query_hca(hca->hca_hdl, &hca->hca_attr); 1296 if (status != IBT_SUCCESS) { 1297 ISER_LOG(CE_NOTE, "iser_ib_alloc_hca: ibt_query_hca " 1298 "failure: guid (0x%llx) status (0x%x)", 1299 (longlong_t)guid, status); 1300 (void) ibt_close_hca(hca->hca_hdl); 1301 kmem_free(hca, sizeof (iser_hca_t)); 1302 return (NULL); 1303 } 1304 1305 /* Query all ports on the HCA */ 1306 status = ibt_query_hca_ports(hca->hca_hdl, 0, 1307 &hca->hca_port_info, &hca->hca_num_ports, 1308 &hca->hca_port_info_sz); 1309 if (status != IBT_SUCCESS) { 1310 ISER_LOG(CE_NOTE, "iser_ib_alloc_hca: " 1311 "ibt_query_hca_ports failure: guid (0x%llx) " 1312 "status (0x%x)", (longlong_t)guid, status); 1313 (void) ibt_close_hca(hca->hca_hdl); 1314 kmem_free(hca, sizeof (iser_hca_t)); 1315 return (NULL); 1316 } 1317 1318 /* Allocate a single PD on this HCA */ 1319 status = ibt_alloc_pd(hca->hca_hdl, IBT_PD_NO_FLAGS, 1320 &hca->hca_pdhdl); 1321 if (status != IBT_SUCCESS) { 1322 ISER_LOG(CE_NOTE, "iser_ib_alloc_hca: ibt_alloc_pd " 1323 "failure: guid (0x%llx) status (0x%x)", 1324 (longlong_t)guid, status); 1325 (void) ibt_close_hca(hca->hca_hdl); 1326 ibt_free_portinfo(hca->hca_port_info, hca->hca_port_info_sz); 1327 kmem_free(hca, sizeof (iser_hca_t)); 1328 return (NULL); 1329 } 1330 1331 /* Initialize the message and data MR caches for this HCA */ 1332 iser_init_hca_caches(hca); 1333 1334 return (hca); 1335 } 1336 1337 static int 1338 iser_ib_free_hca(iser_hca_t *hca) 1339 { 1340 int status; 1341 ibt_hca_portinfo_t *hca_port_info; 1342 uint_t hca_port_info_sz; 1343 1344 ASSERT(hca != NULL); 1345 if (hca->hca_failed) 1346 return (DDI_FAILURE); 1347 1348 hca_port_info = hca->hca_port_info; 1349 hca_port_info_sz = hca->hca_port_info_sz; 1350 1351 /* 1352 * Free the memory regions before freeing 1353 * the associated protection domain 1354 */ 1355 iser_fini_hca_caches(hca); 1356 1357 status = ibt_free_pd(hca->hca_hdl, hca->hca_pdhdl); 1358 if (status != IBT_SUCCESS) { 1359 ISER_LOG(CE_NOTE, "iser_ib_free_hca: failed to free PD " 1360 "status=0x%x", status); 1361 goto out_caches; 1362 } 1363 1364 status = ibt_close_hca(hca->hca_hdl); 1365 if (status != IBT_SUCCESS) { 1366 ISER_LOG(CE_NOTE, "iser_ib_fini_hcas: failed to close HCA " 1367 "status=0x%x", status); 1368 goto out_pd; 1369 } 1370 1371 ibt_free_portinfo(hca_port_info, hca_port_info_sz); 1372 1373 kmem_free(hca, sizeof (iser_hca_t)); 1374 return (DDI_SUCCESS); 1375 1376 /* 1377 * We only managed to partially tear down the HCA, try to put it back 1378 * like it was before returning. 1379 */ 1380 out_pd: 1381 status = ibt_alloc_pd(hca->hca_hdl, IBT_PD_NO_FLAGS, &hca->hca_pdhdl); 1382 if (status != IBT_SUCCESS) { 1383 hca->hca_failed = B_TRUE; 1384 /* Report error and exit */ 1385 ISER_LOG(CE_NOTE, "iser_ib_free_hca: could not re-alloc PD " 1386 "status=0x%x", status); 1387 return (DDI_FAILURE); 1388 } 1389 1390 out_caches: 1391 iser_init_hca_caches(hca); 1392 1393 return (DDI_FAILURE); 1394 } 1395 1396 static int 1397 iser_ib_update_hcaports(iser_hca_t *hca) 1398 { 1399 ibt_hca_portinfo_t *pinfop, *oldpinfop; 1400 uint_t size, oldsize, nport; 1401 int status; 1402 1403 ASSERT(hca != NULL); 1404 1405 status = ibt_query_hca_ports(hca->hca_hdl, 0, &pinfop, &nport, &size); 1406 if (status != IBT_SUCCESS) { 1407 ISER_LOG(CE_NOTE, "ibt_query_hca_ports failed: %d", status); 1408 return (status); 1409 } 1410 1411 oldpinfop = hca->hca_port_info; 1412 oldsize = hca->hca_port_info_sz; 1413 hca->hca_port_info = pinfop; 1414 hca->hca_port_info_sz = size; 1415 1416 (void) ibt_free_portinfo(oldpinfop, oldsize); 1417 1418 return (IBT_SUCCESS); 1419 } 1420 1421 /* 1422 * iser_ib_gid2hca 1423 * Given a gid, find the corresponding hca 1424 */ 1425 iser_hca_t * 1426 iser_ib_gid2hca(ib_gid_t gid) 1427 { 1428 1429 iser_hca_t *hca; 1430 int i; 1431 1432 mutex_enter(&iser_state->is_hcalist_lock); 1433 for (hca = list_head(&iser_state->is_hcalist); 1434 hca != NULL; 1435 hca = list_next(&iser_state->is_hcalist, hca)) { 1436 1437 for (i = 0; i < hca->hca_num_ports; i++) { 1438 if ((hca->hca_port_info[i].p_sgid_tbl[0].gid_prefix == 1439 gid.gid_prefix) && 1440 (hca->hca_port_info[i].p_sgid_tbl[0].gid_guid == 1441 gid.gid_guid)) { 1442 1443 mutex_exit(&iser_state->is_hcalist_lock); 1444 1445 return (hca); 1446 } 1447 } 1448 } 1449 mutex_exit(&iser_state->is_hcalist_lock); 1450 return (NULL); 1451 } 1452 1453 /* 1454 * iser_ib_guid2hca 1455 * Given a HCA guid, find the corresponding HCA 1456 */ 1457 iser_hca_t * 1458 iser_ib_guid2hca(ib_guid_t guid) 1459 { 1460 1461 iser_hca_t *hca; 1462 1463 mutex_enter(&iser_state->is_hcalist_lock); 1464 for (hca = list_head(&iser_state->is_hcalist); 1465 hca != NULL; 1466 hca = list_next(&iser_state->is_hcalist, hca)) { 1467 1468 if (hca->hca_guid == guid) { 1469 mutex_exit(&iser_state->is_hcalist_lock); 1470 return (hca); 1471 } 1472 } 1473 mutex_exit(&iser_state->is_hcalist_lock); 1474 return (NULL); 1475 } 1476 1477 /* 1478 * iser_ib_conv_sockaddr2ibtaddr 1479 * This function converts a socket address into the IBT format 1480 */ 1481 void iser_ib_conv_sockaddr2ibtaddr( 1482 idm_sockaddr_t *saddr, ibt_ip_addr_t *ibt_addr) 1483 { 1484 if (saddr == NULL) { 1485 ibt_addr->family = AF_UNSPEC; 1486 ibt_addr->un.ip4addr = 0; 1487 } else { 1488 switch (saddr->sin.sa_family) { 1489 case AF_INET: 1490 1491 ibt_addr->family = saddr->sin4.sin_family; 1492 ibt_addr->un.ip4addr = saddr->sin4.sin_addr.s_addr; 1493 break; 1494 1495 case AF_INET6: 1496 1497 ibt_addr->family = saddr->sin6.sin6_family; 1498 ibt_addr->un.ip6addr = saddr->sin6.sin6_addr; 1499 break; 1500 1501 default: 1502 ibt_addr->family = AF_UNSPEC; 1503 } 1504 1505 } 1506 } 1507 1508 /* 1509 * iser_ib_conv_ibtaddr2sockaddr 1510 * This function converts an IBT ip address handle to a sockaddr 1511 */ 1512 void iser_ib_conv_ibtaddr2sockaddr(struct sockaddr_storage *ss, 1513 ibt_ip_addr_t *ibt_addr, in_port_t port) 1514 { 1515 struct sockaddr_in *sin; 1516 struct sockaddr_in6 *sin6; 1517 1518 switch (ibt_addr->family) { 1519 case AF_INET: 1520 case AF_UNSPEC: 1521 1522 sin = (struct sockaddr_in *)ibt_addr; 1523 sin->sin_port = ntohs(port); 1524 bcopy(sin, ss, sizeof (struct sockaddr_in)); 1525 break; 1526 1527 case AF_INET6: 1528 1529 sin6 = (struct sockaddr_in6 *)ibt_addr; 1530 sin6->sin6_port = ntohs(port); 1531 bcopy(sin6, ss, sizeof (struct sockaddr_in6)); 1532 break; 1533 1534 default: 1535 ISER_LOG(CE_NOTE, "iser_ib_conv_ibtaddr2sockaddr: " 1536 "unknown family type: 0x%x", ibt_addr->family); 1537 } 1538 } 1539 1540 /* 1541 * iser_ib_setup_cq 1542 * This function sets up the Completion Queue size and allocates the specified 1543 * Completion Queue 1544 */ 1545 static int 1546 iser_ib_setup_cq(ibt_hca_hdl_t hca_hdl, uint_t cq_size, ibt_cq_hdl_t *cq_hdl) 1547 { 1548 1549 ibt_cq_attr_t cq_attr; 1550 int status; 1551 1552 cq_attr.cq_size = cq_size; 1553 cq_attr.cq_sched = 0; 1554 cq_attr.cq_flags = IBT_CQ_NO_FLAGS; 1555 1556 /* Allocate a Completion Queue */ 1557 status = ibt_alloc_cq(hca_hdl, &cq_attr, cq_hdl, NULL); 1558 if (status != IBT_SUCCESS) { 1559 ISER_LOG(CE_NOTE, "iser_ib_setup_cq: ibt_alloc_cq failure (%d)", 1560 status); 1561 return (status); 1562 } 1563 1564 return (ISER_STATUS_SUCCESS); 1565 } 1566 1567 /* 1568 * iser_ib_setup_chanargs 1569 * 1570 */ 1571 static void 1572 iser_ib_setup_chanargs(uint8_t hca_port, ibt_cq_hdl_t scq_hdl, 1573 ibt_cq_hdl_t rcq_hdl, uint_t sq_size, uint_t rq_size, 1574 ibt_pd_hdl_t hca_pdhdl, ibt_rc_chan_alloc_args_t *cargs) 1575 { 1576 1577 bzero(cargs, sizeof (ibt_rc_chan_alloc_args_t)); 1578 1579 /* 1580 * Set up the size of the channels send queue, receive queue and the 1581 * maximum number of elements in a scatter gather list of work requests 1582 * posted to the send and receive queues. 1583 */ 1584 cargs->rc_sizes.cs_sq = sq_size; 1585 cargs->rc_sizes.cs_rq = rq_size; 1586 cargs->rc_sizes.cs_sq_sgl = ISER_IB_SGLIST_SIZE; 1587 cargs->rc_sizes.cs_rq_sgl = ISER_IB_SGLIST_SIZE; 1588 1589 /* 1590 * All Work requests signaled on a WR basis will receive a send 1591 * request completion. 1592 */ 1593 cargs->rc_flags = IBT_ALL_SIGNALED; 1594 1595 /* Enable RDMA read and RDMA write on the channel end points */ 1596 cargs->rc_control = IBT_CEP_RDMA_RD | IBT_CEP_RDMA_WR; 1597 1598 /* Set the local hca port on which the channel is allocated */ 1599 cargs->rc_hca_port_num = hca_port; 1600 1601 /* Set the Send and Receive Completion Queue handles */ 1602 cargs->rc_scq = scq_hdl; 1603 cargs->rc_rcq = rcq_hdl; 1604 1605 /* Set the protection domain associated with the channel */ 1606 cargs->rc_pd = hca_pdhdl; 1607 1608 /* No SRQ usage */ 1609 cargs->rc_srq = NULL; 1610 } 1611 1612 /* 1613 * iser_ib_init_qp 1614 * Initialize the QP handle 1615 */ 1616 void 1617 iser_ib_init_qp(iser_chan_t *chan, uint_t sq_size, uint_t rq_size) 1618 { 1619 /* Initialize the handle lock */ 1620 mutex_init(&chan->ic_qp.qp_lock, NULL, MUTEX_DRIVER, NULL); 1621 1622 /* Record queue sizes */ 1623 chan->ic_qp.sq_size = sq_size; 1624 chan->ic_qp.rq_size = rq_size; 1625 1626 /* Initialize the RQ monitoring data */ 1627 chan->ic_qp.rq_depth = rq_size; 1628 chan->ic_qp.rq_level = 0; 1629 chan->ic_qp.rq_lwm = (chan->ic_recvcq_sz * ISER_IB_RQ_LWM_PCT) / 100; 1630 1631 /* Initialize the taskq flag */ 1632 chan->ic_qp.rq_taskqpending = B_FALSE; 1633 } 1634 1635 /* 1636 * iser_ib_fini_qp 1637 * Teardown the QP handle 1638 */ 1639 void 1640 iser_ib_fini_qp(iser_qp_t *qp) 1641 { 1642 /* Destroy the handle lock */ 1643 mutex_destroy(&qp->qp_lock); 1644 } 1645 1646 static int 1647 iser_ib_activate_port(idm_svc_t *idm_svc, ib_guid_t guid, ib_gid_t gid) 1648 { 1649 iser_svc_t *iser_svc; 1650 iser_sbind_t *is_sbind; 1651 int status; 1652 1653 iser_svc = idm_svc->is_iser_svc; 1654 1655 /* 1656 * Save the address of the service bind handle in the 1657 * iser_svc_t to undo the service binding at a later time 1658 */ 1659 is_sbind = kmem_zalloc(sizeof (iser_sbind_t), KM_SLEEP); 1660 is_sbind->is_gid = gid; 1661 is_sbind->is_guid = guid; 1662 1663 status = ibt_bind_service(iser_svc->is_srvhdl, gid, NULL, 1664 idm_svc, &is_sbind->is_sbindhdl); 1665 1666 if (status != IBT_SUCCESS) { 1667 ISER_LOG(CE_NOTE, "iser_ib_activate_port: status(0x%x): " 1668 "Bind service(%llx) on port(%llx:%llx) failed", 1669 status, (longlong_t)iser_svc->is_svcid, 1670 (longlong_t)gid.gid_prefix, (longlong_t)gid.gid_guid); 1671 1672 kmem_free(is_sbind, sizeof (iser_sbind_t)); 1673 1674 return (status); 1675 } 1676 1677 list_insert_tail(&iser_svc->is_sbindlist, is_sbind); 1678 1679 return (IBT_SUCCESS); 1680 } 1681 1682 static void 1683 iser_ib_deactivate_port(ib_guid_t hca_guid, ib_gid_t gid) 1684 { 1685 iser_svc_t *iser_svc; 1686 iser_conn_t *iser_conn; 1687 iser_sbind_t *is_sbind; 1688 idm_conn_t *idm_conn; 1689 1690 /* 1691 * Iterate through the global list of IDM target connections. 1692 * Issue a TRANSPORT_FAIL for any connections on this port, and 1693 * if there is a bound service running on the port, tear it down. 1694 */ 1695 mutex_enter(&idm.idm_global_mutex); 1696 for (idm_conn = list_head(&idm.idm_tgt_conn_list); 1697 idm_conn != NULL; 1698 idm_conn = list_next(&idm.idm_tgt_conn_list, idm_conn)) { 1699 1700 if (idm_conn->ic_transport_type != IDM_TRANSPORT_TYPE_ISER) { 1701 /* this is not an iSER connection, skip it */ 1702 continue; 1703 } 1704 1705 iser_conn = idm_conn->ic_transport_private; 1706 if (iser_conn->ic_chan->ic_ibt_path.pi_hca_guid != hca_guid) { 1707 /* this iSER connection is on a different port */ 1708 continue; 1709 } 1710 1711 /* Fail the transport for this connection */ 1712 idm_conn_event(idm_conn, CE_TRANSPORT_FAIL, IDM_STATUS_FAIL); 1713 1714 if (idm_conn->ic_conn_type == CONN_TYPE_INI) { 1715 /* initiator connection, nothing else to do */ 1716 continue; 1717 } 1718 1719 /* Check for a service binding */ 1720 iser_svc = idm_conn->ic_svc_binding->is_iser_svc; 1721 is_sbind = iser_ib_get_bind(iser_svc, hca_guid, gid); 1722 if (is_sbind != NULL) { 1723 /* This service is still bound, tear it down */ 1724 ibt_unbind_service(iser_svc->is_srvhdl, 1725 is_sbind->is_sbindhdl); 1726 list_remove(&iser_svc->is_sbindlist, is_sbind); 1727 kmem_free(is_sbind, sizeof (iser_sbind_t)); 1728 } 1729 } 1730 mutex_exit(&idm.idm_global_mutex); 1731 } 1732 1733 static iser_sbind_t * 1734 iser_ib_get_bind(iser_svc_t *iser_svc, ib_guid_t hca_guid, ib_gid_t gid) 1735 { 1736 iser_sbind_t *is_sbind; 1737 1738 for (is_sbind = list_head(&iser_svc->is_sbindlist); 1739 is_sbind != NULL; 1740 is_sbind = list_next(&iser_svc->is_sbindlist, is_sbind)) { 1741 1742 if ((is_sbind->is_guid == hca_guid) && 1743 (is_sbind->is_gid.gid_prefix == gid.gid_prefix) && 1744 (is_sbind->is_gid.gid_guid == gid.gid_guid)) { 1745 return (is_sbind); 1746 } 1747 } 1748 return (NULL); 1749 } 1750