1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* 26 * Copyright (c) 2005 SilverStorm Technologies, Inc. All rights reserved. 27 * 28 * This software is available to you under a choice of one of two 29 * licenses. You may choose to be licensed under the terms of the GNU 30 * General Public License (GPL) Version 2, available from the file 31 * COPYING in the main directory of this source tree, or the 32 * OpenIB.org BSD license below: 33 * 34 * Redistribution and use in source and binary forms, with or 35 * without modification, are permitted provided that the following 36 * conditions are met: 37 * 38 * - Redistributions of source code must retain the above 39 * copyright notice, this list of conditions and the following 40 * disclaimer. 41 * 42 * - Redistributions in binary form must reproduce the above 43 * copyright notice, this list of conditions and the following 44 * disclaimer in the documentation and/or other materials 45 * provided with the distribution. 46 * 47 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 48 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 49 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 50 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 51 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 52 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 53 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 54 * SOFTWARE. 55 * 56 */ 57 /* 58 * Sun elects to include this software in Sun product 59 * under the OpenIB BSD license. 60 * 61 * 62 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 63 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 64 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 65 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 66 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 67 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 68 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 69 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 70 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 71 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 72 * POSSIBILITY OF SUCH DAMAGE. 73 */ 74 75 #include <sys/stream.h> 76 #include <sys/ib/clients/rds/rdsib_cm.h> 77 #include <sys/ib/clients/rds/rdsib_ib.h> 78 #include <sys/ib/clients/rds/rdsib_buf.h> 79 #include <sys/ib/clients/rds/rdsib_ep.h> 80 #include <sys/ib/clients/rds/rds_kstat.h> 81 #include <sys/zone.h> 82 83 #define RDS_POLL_CQ_IN_2TICKS 1 84 85 /* 86 * This File contains the endpoint related calls 87 */ 88 89 extern boolean_t rds_islocal(ipaddr_t addr); 90 extern uint_t rds_wc_signal; 91 92 #define RDS_LOOPBACK 0 93 #define RDS_LOCAL 1 94 #define RDS_REMOTE 2 95 96 #define IBT_IPADDR 1 97 98 static uint8_t 99 rds_is_port_marked(rds_session_t *sp, in_port_t port, uint_t qualifier) 100 { 101 uint8_t ret; 102 103 switch (qualifier) { 104 case RDS_LOOPBACK: /* loopback */ 105 rw_enter(&rds_loopback_portmap_lock, RW_READER); 106 ret = (rds_loopback_portmap[port/8] & (1 << (port % 8))); 107 rw_exit(&rds_loopback_portmap_lock); 108 break; 109 110 case RDS_LOCAL: /* Session local */ 111 ASSERT(sp != NULL); 112 rw_enter(&sp->session_local_portmap_lock, RW_READER); 113 ret = (sp->session_local_portmap[port/8] & (1 << (port % 8))); 114 rw_exit(&sp->session_local_portmap_lock); 115 break; 116 117 case RDS_REMOTE: /* Session remote */ 118 ASSERT(sp != NULL); 119 rw_enter(&sp->session_remote_portmap_lock, RW_READER); 120 ret = (sp->session_remote_portmap[port/8] & (1 << (port % 8))); 121 rw_exit(&sp->session_remote_portmap_lock); 122 break; 123 } 124 125 return (ret); 126 } 127 128 static uint8_t 129 rds_check_n_mark_port(rds_session_t *sp, in_port_t port, uint_t qualifier) 130 { 131 uint8_t ret; 132 133 switch (qualifier) { 134 case RDS_LOOPBACK: /* loopback */ 135 rw_enter(&rds_loopback_portmap_lock, RW_WRITER); 136 ret = (rds_loopback_portmap[port/8] & (1 << (port % 8))); 137 if (!ret) { 138 /* port is not marked, mark it */ 139 rds_loopback_portmap[port/8] = 140 rds_loopback_portmap[port/8] | (1 << (port % 8)); 141 } 142 rw_exit(&rds_loopback_portmap_lock); 143 break; 144 145 case RDS_LOCAL: /* Session local */ 146 ASSERT(sp != NULL); 147 rw_enter(&sp->session_local_portmap_lock, RW_WRITER); 148 ret = (sp->session_local_portmap[port/8] & (1 << (port % 8))); 149 if (!ret) { 150 /* port is not marked, mark it */ 151 sp->session_local_portmap[port/8] = 152 sp->session_local_portmap[port/8] | 153 (1 << (port % 8)); 154 } 155 rw_exit(&sp->session_local_portmap_lock); 156 break; 157 158 case RDS_REMOTE: /* Session remote */ 159 ASSERT(sp != NULL); 160 rw_enter(&sp->session_remote_portmap_lock, RW_WRITER); 161 ret = (sp->session_remote_portmap[port/8] & (1 << (port % 8))); 162 if (!ret) { 163 /* port is not marked, mark it */ 164 sp->session_remote_portmap[port/8] = 165 sp->session_remote_portmap[port/8] | 166 (1 << (port % 8)); 167 } 168 rw_exit(&sp->session_remote_portmap_lock); 169 break; 170 } 171 172 return (ret); 173 } 174 175 static uint8_t 176 rds_check_n_unmark_port(rds_session_t *sp, in_port_t port, uint_t qualifier) 177 { 178 uint8_t ret; 179 180 switch (qualifier) { 181 case RDS_LOOPBACK: /* loopback */ 182 rw_enter(&rds_loopback_portmap_lock, RW_WRITER); 183 ret = (rds_loopback_portmap[port/8] & (1 << (port % 8))); 184 if (ret) { 185 /* port is marked, unmark it */ 186 rds_loopback_portmap[port/8] = 187 rds_loopback_portmap[port/8] & ~(1 << (port % 8)); 188 } 189 rw_exit(&rds_loopback_portmap_lock); 190 break; 191 192 case RDS_LOCAL: /* Session local */ 193 ASSERT(sp != NULL); 194 rw_enter(&sp->session_local_portmap_lock, RW_WRITER); 195 ret = (sp->session_local_portmap[port/8] & (1 << (port % 8))); 196 if (ret) { 197 /* port is marked, unmark it */ 198 sp->session_local_portmap[port/8] = 199 sp->session_local_portmap[port/8] & 200 ~(1 << (port % 8)); 201 } 202 rw_exit(&sp->session_local_portmap_lock); 203 break; 204 205 case RDS_REMOTE: /* Session remote */ 206 ASSERT(sp != NULL); 207 rw_enter(&sp->session_remote_portmap_lock, RW_WRITER); 208 ret = (sp->session_remote_portmap[port/8] & (1 << (port % 8))); 209 if (ret) { 210 /* port is marked, unmark it */ 211 sp->session_remote_portmap[port/8] = 212 sp->session_remote_portmap[port/8] & 213 ~(1 << (port % 8)); 214 } 215 rw_exit(&sp->session_remote_portmap_lock); 216 break; 217 } 218 219 return (ret); 220 } 221 222 static void 223 rds_mark_all_ports(rds_session_t *sp, uint_t qualifier) 224 { 225 switch (qualifier) { 226 case RDS_LOOPBACK: /* loopback */ 227 rw_enter(&rds_loopback_portmap_lock, RW_WRITER); 228 (void) memset(rds_loopback_portmap, 0xFF, RDS_PORT_MAP_SIZE); 229 rw_exit(&rds_loopback_portmap_lock); 230 break; 231 232 case RDS_LOCAL: /* Session local */ 233 ASSERT(sp != NULL); 234 rw_enter(&sp->session_local_portmap_lock, RW_WRITER); 235 (void) memset(sp->session_local_portmap, 0xFF, 236 RDS_PORT_MAP_SIZE); 237 rw_exit(&sp->session_local_portmap_lock); 238 break; 239 240 case RDS_REMOTE: /* Session remote */ 241 ASSERT(sp != NULL); 242 rw_enter(&sp->session_remote_portmap_lock, RW_WRITER); 243 (void) memset(sp->session_remote_portmap, 0xFF, 244 RDS_PORT_MAP_SIZE); 245 rw_exit(&sp->session_remote_portmap_lock); 246 break; 247 } 248 } 249 250 static void 251 rds_unmark_all_ports(rds_session_t *sp, uint_t qualifier) 252 { 253 switch (qualifier) { 254 case RDS_LOOPBACK: /* loopback */ 255 rw_enter(&rds_loopback_portmap_lock, RW_WRITER); 256 bzero(rds_loopback_portmap, RDS_PORT_MAP_SIZE); 257 rw_exit(&rds_loopback_portmap_lock); 258 break; 259 260 case RDS_LOCAL: /* Session local */ 261 ASSERT(sp != NULL); 262 rw_enter(&sp->session_local_portmap_lock, RW_WRITER); 263 bzero(sp->session_local_portmap, RDS_PORT_MAP_SIZE); 264 rw_exit(&sp->session_local_portmap_lock); 265 break; 266 267 case RDS_REMOTE: /* Session remote */ 268 ASSERT(sp != NULL); 269 rw_enter(&sp->session_remote_portmap_lock, RW_WRITER); 270 bzero(sp->session_remote_portmap, RDS_PORT_MAP_SIZE); 271 rw_exit(&sp->session_remote_portmap_lock); 272 break; 273 } 274 } 275 276 static boolean_t 277 rds_add_session(rds_session_t *sp, boolean_t locked) 278 { 279 boolean_t retval = B_TRUE; 280 281 RDS_DPRINTF2("rds_add_session", "Enter: SP(%p)", sp); 282 283 if (!locked) { 284 rw_enter(&rdsib_statep->rds_sessionlock, RW_WRITER); 285 } 286 287 /* Don't allow more sessions than configured in rdsib.conf */ 288 if (rdsib_statep->rds_nsessions >= (MaxNodes - 1)) { 289 RDS_DPRINTF1("rds_add_session", "Max session limit reached"); 290 retval = B_FALSE; 291 } else { 292 sp->session_nextp = rdsib_statep->rds_sessionlistp; 293 rdsib_statep->rds_sessionlistp = sp; 294 rdsib_statep->rds_nsessions++; 295 RDS_INCR_SESS(); 296 } 297 298 if (!locked) { 299 rw_exit(&rdsib_statep->rds_sessionlock); 300 } 301 302 RDS_DPRINTF2("rds_add_session", "Return: SP(%p)", sp); 303 304 return (retval); 305 } 306 307 /* Session lookup based on destination IP or destination node guid */ 308 rds_session_t * 309 rds_session_lkup(rds_state_t *statep, ipaddr_t remoteip, ib_guid_t node_guid) 310 { 311 rds_session_t *sp; 312 313 RDS_DPRINTF4("rds_session_lkup", "Enter: 0x%p 0x%x 0x%llx", statep, 314 remoteip, node_guid); 315 316 /* A read/write lock is expected, will panic if none of them are held */ 317 ASSERT(rw_lock_held(&statep->rds_sessionlock)); 318 sp = statep->rds_sessionlistp; 319 while (sp) { 320 if ((sp->session_remip == remoteip) || ((node_guid != 0) && 321 (sp->session_rgid.gid_guid == node_guid))) { 322 break; 323 } 324 325 sp = sp->session_nextp; 326 } 327 328 RDS_DPRINTF4("rds_session_lkup", "Return: SP(%p)", sp); 329 330 return (sp); 331 } 332 333 boolean_t 334 rds_session_lkup_by_sp(rds_session_t *sp) 335 { 336 rds_session_t *sessionp; 337 338 RDS_DPRINTF4("rds_session_lkup_by_sp", "Enter: 0x%p", sp); 339 340 rw_enter(&rdsib_statep->rds_sessionlock, RW_READER); 341 sessionp = rdsib_statep->rds_sessionlistp; 342 while (sessionp) { 343 if (sessionp == sp) { 344 rw_exit(&rdsib_statep->rds_sessionlock); 345 return (B_TRUE); 346 } 347 348 sessionp = sessionp->session_nextp; 349 } 350 rw_exit(&rdsib_statep->rds_sessionlock); 351 352 return (B_FALSE); 353 } 354 355 static void 356 rds_ep_fini(rds_ep_t *ep) 357 { 358 RDS_DPRINTF3("rds_ep_fini", "Enter: EP(%p) type: %d", ep, ep->ep_type); 359 360 /* free send pool */ 361 rds_free_send_pool(ep); 362 363 /* free recv pool */ 364 rds_free_recv_pool(ep); 365 366 mutex_enter(&ep->ep_lock); 367 ep->ep_hca_guid = 0; 368 mutex_exit(&ep->ep_lock); 369 370 RDS_DPRINTF3("rds_ep_fini", "Return EP(%p)", ep); 371 } 372 373 /* Assumes SP write lock is held */ 374 int 375 rds_ep_init(rds_ep_t *ep, ib_guid_t hca_guid) 376 { 377 uint_t ret; 378 379 RDS_DPRINTF3("rds_ep_init", "Enter: EP(%p) Type: %d", ep, ep->ep_type); 380 381 /* send pool */ 382 ret = rds_init_send_pool(ep, hca_guid); 383 if (ret != 0) { 384 RDS_DPRINTF2(LABEL, "EP(%p): rds_init_send_pool failed: %d", 385 ep, ret); 386 return (-1); 387 } 388 389 /* recv pool */ 390 ret = rds_init_recv_pool(ep); 391 if (ret != 0) { 392 RDS_DPRINTF2(LABEL, "EP(%p): rds_init_recv_pool failed: %d", 393 ep, ret); 394 rds_free_send_pool(ep); 395 return (-1); 396 } 397 398 /* reset the ep state */ 399 mutex_enter(&ep->ep_lock); 400 ep->ep_state = RDS_EP_STATE_UNCONNECTED; 401 ep->ep_hca_guid = hca_guid; 402 ep->ep_lbufid = NULL; 403 ep->ep_rbufid = NULL; 404 ep->ep_segfbp = NULL; 405 ep->ep_seglbp = NULL; 406 407 /* Initialize the WR to send acknowledgements */ 408 ep->ep_ackwr.wr_id = RDS_RDMAW_WRID; 409 ep->ep_ackwr.wr_flags = IBT_WR_SEND_SOLICIT; 410 ep->ep_ackwr.wr_trans = IBT_RC_SRV; 411 ep->ep_ackwr.wr_opcode = IBT_WRC_RDMAW; 412 ep->ep_ackwr.wr_nds = 1; 413 ep->ep_ackwr.wr_sgl = &ep->ep_ackds; 414 ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_raddr = NULL; 415 ep->ep_ackwr.wr.rc.rcwr.rdma.rdma_rkey = 0; 416 mutex_exit(&ep->ep_lock); 417 418 RDS_DPRINTF3("rds_ep_init", "Return: EP(%p) type: %d", ep, ep->ep_type); 419 420 return (0); 421 } 422 423 static int 424 rds_ep_reinit(rds_ep_t *ep, ib_guid_t hca_guid) 425 { 426 int ret; 427 428 RDS_DPRINTF3("rds_ep_reinit", "Enter: EP(%p) Type: %d", 429 ep, ep->ep_type); 430 431 /* Re-initialize send pool */ 432 ret = rds_reinit_send_pool(ep, hca_guid); 433 if (ret != 0) { 434 RDS_DPRINTF2("rds_ep_reinit", 435 "EP(%p): rds_reinit_send_pool failed: %d", ep, ret); 436 return (-1); 437 } 438 439 /* free all the receive buffers in the pool */ 440 rds_free_recv_pool(ep); 441 442 RDS_DPRINTF3("rds_ep_reinit", "Return: EP(%p) Type: %d", 443 ep, ep->ep_type); 444 445 return (0); 446 } 447 448 void 449 rds_session_fini(rds_session_t *sp) 450 { 451 RDS_DPRINTF2("rds_session_fini", "Enter: SP(0x%p)", sp); 452 453 rds_ep_fini(&sp->session_dataep); 454 rds_ep_fini(&sp->session_ctrlep); 455 456 RDS_DPRINTF2("rds_session_fini", "Return: SP(0x%p)", sp); 457 } 458 459 /* 460 * Allocate and initialize the resources needed for the control and 461 * data channels 462 */ 463 int 464 rds_session_init(rds_session_t *sp) 465 { 466 int ret; 467 rds_hca_t *hcap; 468 ib_guid_t hca_guid; 469 470 RDS_DPRINTF2("rds_session_init", "Enter: SP(0x%p)", sp); 471 472 /* CALLED WITH SESSION WRITE LOCK */ 473 474 hcap = rds_gid_to_hcap(rdsib_statep, sp->session_lgid); 475 if (hcap == NULL) { 476 RDS_DPRINTF2("rds_session_init", "SGID is on an uninitialized " 477 "HCA: %llx", sp->session_lgid.gid_guid); 478 return (-1); 479 } 480 481 hca_guid = hcap->hca_guid; 482 sp->session_hca_guid = hca_guid; 483 484 /* allocate and initialize the ctrl channel */ 485 ret = rds_ep_init(&sp->session_ctrlep, hca_guid); 486 if (ret != 0) { 487 RDS_DPRINTF2(LABEL, "SP(%p): Ctrl EP(%p) initialization " 488 "failed", sp, &sp->session_ctrlep); 489 return (-1); 490 } 491 492 RDS_DPRINTF2(LABEL, "SP(%p) Control EP(%p)", sp, &sp->session_ctrlep); 493 494 /* allocate and initialize the data channel */ 495 ret = rds_ep_init(&sp->session_dataep, hca_guid); 496 if (ret != 0) { 497 RDS_DPRINTF2(LABEL, "SP(%p): Data EP(%p) initialization " 498 "failed", sp, &sp->session_dataep); 499 rds_ep_fini(&sp->session_ctrlep); 500 return (-1); 501 } 502 503 /* Clear the portmaps */ 504 rds_unmark_all_ports(sp, RDS_LOCAL); 505 rds_unmark_all_ports(sp, RDS_REMOTE); 506 507 RDS_DPRINTF2(LABEL, "SP(%p) Data EP(%p)", sp, &sp->session_dataep); 508 509 RDS_DPRINTF2("rds_session_init", "Return"); 510 511 return (0); 512 } 513 514 /* 515 * This should be called before moving a session from ERROR state to 516 * INIT state. This will update the HCA keys incase the session has moved from 517 * one HCA to another. 518 */ 519 int 520 rds_session_reinit(rds_session_t *sp, ib_gid_t lgid) 521 { 522 rds_hca_t *hcap, *hcap1; 523 int ret; 524 525 RDS_DPRINTF2("rds_session_reinit", "Enter: SP(0x%p)", sp); 526 527 /* CALLED WITH SESSION WRITE LOCK */ 528 529 /* Clear the portmaps */ 530 rds_unmark_all_ports(sp, RDS_LOCAL); 531 rds_unmark_all_ports(sp, RDS_REMOTE); 532 533 /* make the last buffer as the acknowledged */ 534 *(uintptr_t *)sp->session_dataep.ep_ack_addr = 535 (uintptr_t)sp->session_dataep.ep_sndpool.pool_tailp; 536 537 hcap = rds_gid_to_hcap(rdsib_statep, lgid); 538 if (hcap == NULL) { 539 RDS_DPRINTF2("rds_session_reinit", "SGID is on an " 540 "uninitialized HCA: %llx", lgid.gid_guid); 541 return (-1); 542 } 543 544 hcap1 = rds_gid_to_hcap(rdsib_statep, sp->session_lgid); 545 if (hcap1 == NULL) { 546 RDS_DPRINTF2("rds_session_reinit", "Seems like HCA %llx " 547 "is unplugged", sp->session_lgid.gid_guid); 548 } else if (hcap->hca_guid == hcap1->hca_guid) { 549 /* 550 * No action is needed as the session did not move across 551 * HCAs 552 */ 553 RDS_DPRINTF2("rds_session_reinit", "Failover on the same HCA"); 554 return (0); 555 } 556 557 RDS_DPRINTF2("rds_session_reinit", "Failover across HCAs"); 558 559 sp->session_hca_guid = hcap->hca_guid; 560 561 /* re-initialize the control channel */ 562 ret = rds_ep_reinit(&sp->session_ctrlep, hcap->hca_guid); 563 if (ret != 0) { 564 RDS_DPRINTF2("rds_session_reinit", 565 "SP(%p): Ctrl EP(%p) re-initialization failed", 566 sp, &sp->session_ctrlep); 567 return (-1); 568 } 569 570 RDS_DPRINTF2("rds_session_reinit", "SP(%p) Control EP(%p)", 571 sp, &sp->session_ctrlep); 572 573 /* re-initialize the data channel */ 574 ret = rds_ep_reinit(&sp->session_dataep, hcap->hca_guid); 575 if (ret != 0) { 576 RDS_DPRINTF2("rds_session_reinit", 577 "SP(%p): Data EP(%p) re-initialization failed", 578 sp, &sp->session_dataep); 579 return (-1); 580 } 581 582 RDS_DPRINTF2("rds_session_reinit", "SP(%p) Data EP(%p)", 583 sp, &sp->session_dataep); 584 585 sp->session_lgid = lgid; 586 587 RDS_DPRINTF2("rds_session_reinit", "Return: SP(0x%p)", sp); 588 589 return (0); 590 } 591 592 static int 593 rds_session_connect(rds_session_t *sp) 594 { 595 ibt_channel_hdl_t ctrlchan, datachan; 596 rds_ep_t *ep; 597 int ret; 598 599 RDS_DPRINTF2("rds_session_connect", "Enter SP(%p)", sp); 600 601 sp->session_pinfo.pi_sid = rdsib_statep->rds_service_id; 602 603 /* Override the packet life time based on the conf file */ 604 if (IBPktLifeTime != 0) { 605 sp->session_pinfo.pi_prim_cep_path.cep_cm_opaque1 = 606 IBPktLifeTime; 607 } 608 609 /* Session type may change if we run into peer-to-peer case. */ 610 rw_enter(&sp->session_lock, RW_READER); 611 if (sp->session_type == RDS_SESSION_PASSIVE) { 612 RDS_DPRINTF2("rds_session_connect", "SP(%p) is no longer the " 613 "active end", sp); 614 rw_exit(&sp->session_lock); 615 return (0); /* return success */ 616 } 617 rw_exit(&sp->session_lock); 618 619 /* connect the data ep first */ 620 ep = &sp->session_dataep; 621 mutex_enter(&ep->ep_lock); 622 if (ep->ep_state == RDS_EP_STATE_UNCONNECTED) { 623 ep->ep_state = RDS_EP_STATE_ACTIVE_PENDING; 624 mutex_exit(&ep->ep_lock); 625 ret = rds_open_rc_channel(ep, &sp->session_pinfo, IBT_BLOCKING, 626 &datachan); 627 if (ret != IBT_SUCCESS) { 628 RDS_DPRINTF2(LABEL, "EP(%p): rds_open_rc_channel " 629 "failed: %d", ep, ret); 630 return (-1); 631 } 632 sp->session_dataep.ep_chanhdl = datachan; 633 } else { 634 RDS_DPRINTF2(LABEL, "SP(%p) Data EP(%p) is in " 635 "unexpected state: %d", sp, ep, ep->ep_state); 636 mutex_exit(&ep->ep_lock); 637 return (-1); 638 } 639 640 RDS_DPRINTF3(LABEL, "SP(%p) EP(%p): Data channel is connected", 641 sp, ep); 642 643 ep = &sp->session_ctrlep; 644 mutex_enter(&ep->ep_lock); 645 if (ep->ep_state == RDS_EP_STATE_UNCONNECTED) { 646 ep->ep_state = RDS_EP_STATE_ACTIVE_PENDING; 647 mutex_exit(&ep->ep_lock); 648 ret = rds_open_rc_channel(ep, &sp->session_pinfo, IBT_BLOCKING, 649 &ctrlchan); 650 if (ret != IBT_SUCCESS) { 651 RDS_DPRINTF2(LABEL, "EP(%p): rds_open_rc_channel " 652 "failed: %d", ep, ret); 653 return (-1); 654 } 655 sp->session_ctrlep.ep_chanhdl = ctrlchan; 656 } else { 657 RDS_DPRINTF2(LABEL, "SP(%p) Control EP(%p) is in " 658 "unexpected state: %d", sp, ep, ep->ep_state); 659 mutex_exit(&ep->ep_lock); 660 return (-1); 661 } 662 663 RDS_DPRINTF2(LABEL, "Session (%p) 0x%x <--> 0x%x is CONNECTED", 664 sp, sp->session_myip, sp->session_remip); 665 666 RDS_DPRINTF2("rds_session_connect", "Return SP(%p)", sp); 667 668 return (0); 669 } 670 671 /* 672 * Can be called with or without session_lock. 673 */ 674 void 675 rds_session_close(rds_session_t *sp, ibt_execution_mode_t mode, uint_t wait) 676 { 677 rds_ep_t *ep; 678 679 RDS_DPRINTF2("rds_session_close", "SP(%p) State: %d", sp, 680 sp->session_state); 681 682 ep = &sp->session_dataep; 683 RDS_DPRINTF3(LABEL, "EP(%p) State: %d", ep, ep->ep_state); 684 685 /* wait until the SQ is empty before closing */ 686 if (wait != 0) { 687 (void) rds_is_sendq_empty(ep, wait); 688 } 689 690 mutex_enter(&ep->ep_lock); 691 while (ep->ep_state == RDS_EP_STATE_CLOSING) { 692 mutex_exit(&ep->ep_lock); 693 delay(drv_usectohz(300000)); 694 mutex_enter(&ep->ep_lock); 695 } 696 697 if (ep->ep_state == RDS_EP_STATE_CONNECTED) { 698 ep->ep_state = RDS_EP_STATE_CLOSING; 699 mutex_exit(&ep->ep_lock); 700 (void) rds_close_rc_channel(ep->ep_chanhdl, mode); 701 if (wait == 0) { 702 /* make sure all WCs are flushed before proceeding */ 703 (void) rds_is_sendq_empty(ep, 1); 704 } 705 mutex_enter(&ep->ep_lock); 706 } 707 rds_ep_free_rc_channel(ep); 708 ep->ep_state = RDS_EP_STATE_UNCONNECTED; 709 ep->ep_segfbp = NULL; 710 ep->ep_seglbp = NULL; 711 mutex_exit(&ep->ep_lock); 712 713 ep = &sp->session_ctrlep; 714 RDS_DPRINTF3(LABEL, "EP(%p) State: %d", ep, ep->ep_state); 715 716 /* wait until the SQ is empty before closing */ 717 if (wait != 0) { 718 (void) rds_is_sendq_empty(ep, wait); 719 } 720 721 mutex_enter(&ep->ep_lock); 722 while (ep->ep_state == RDS_EP_STATE_CLOSING) { 723 mutex_exit(&ep->ep_lock); 724 delay(drv_usectohz(300000)); 725 mutex_enter(&ep->ep_lock); 726 } 727 728 if (ep->ep_state == RDS_EP_STATE_CONNECTED) { 729 ep->ep_state = RDS_EP_STATE_CLOSING; 730 mutex_exit(&ep->ep_lock); 731 (void) rds_close_rc_channel(ep->ep_chanhdl, mode); 732 if (wait == 0) { 733 /* make sure all WCs are flushed before proceeding */ 734 (void) rds_is_sendq_empty(ep, 1); 735 } 736 mutex_enter(&ep->ep_lock); 737 } 738 rds_ep_free_rc_channel(ep); 739 ep->ep_state = RDS_EP_STATE_UNCONNECTED; 740 ep->ep_segfbp = NULL; 741 ep->ep_seglbp = NULL; 742 mutex_exit(&ep->ep_lock); 743 744 RDS_DPRINTF2("rds_session_close", "Return (%p)", sp); 745 } 746 747 /* Free the session */ 748 static void 749 rds_destroy_session(rds_session_t *sp) 750 { 751 rds_ep_t *ep; 752 rds_bufpool_t *pool; 753 754 ASSERT((sp->session_state == RDS_SESSION_STATE_CLOSED) || 755 (sp->session_state == RDS_SESSION_STATE_FAILED) || 756 (sp->session_state == RDS_SESSION_STATE_FINI) || 757 (sp->session_state == RDS_SESSION_STATE_PASSIVE_CLOSING)); 758 759 rw_enter(&sp->session_lock, RW_READER); 760 RDS_DPRINTF2("rds_destroy_session", "SP(%p) State: %d", sp, 761 sp->session_state); 762 while (!((sp->session_state == RDS_SESSION_STATE_CLOSED) || 763 (sp->session_state == RDS_SESSION_STATE_FAILED) || 764 (sp->session_state == RDS_SESSION_STATE_FINI))) { 765 rw_exit(&sp->session_lock); 766 delay(drv_usectohz(1000000)); 767 rw_enter(&sp->session_lock, RW_READER); 768 RDS_DPRINTF2("rds_destroy_session", "SP(%p) State: %d WAITING " 769 "ON SESSION", sp, sp->session_state); 770 } 771 rw_exit(&sp->session_lock); 772 773 /* data channel */ 774 ep = &sp->session_dataep; 775 776 /* send pool locks */ 777 pool = &ep->ep_sndpool; 778 cv_destroy(&pool->pool_cv); 779 mutex_destroy(&pool->pool_lock); 780 781 /* recv pool locks */ 782 pool = &ep->ep_rcvpool; 783 cv_destroy(&pool->pool_cv); 784 mutex_destroy(&pool->pool_lock); 785 mutex_destroy(&ep->ep_recvqp.qp_lock); 786 787 /* control channel */ 788 ep = &sp->session_ctrlep; 789 790 /* send pool locks */ 791 pool = &ep->ep_sndpool; 792 cv_destroy(&pool->pool_cv); 793 mutex_destroy(&pool->pool_lock); 794 795 /* recv pool locks */ 796 pool = &ep->ep_rcvpool; 797 cv_destroy(&pool->pool_cv); 798 mutex_destroy(&pool->pool_lock); 799 mutex_destroy(&ep->ep_recvqp.qp_lock); 800 801 /* session */ 802 rw_destroy(&sp->session_lock); 803 rw_destroy(&sp->session_local_portmap_lock); 804 rw_destroy(&sp->session_remote_portmap_lock); 805 806 /* free the session */ 807 kmem_free(sp, sizeof (rds_session_t)); 808 809 RDS_DPRINTF2("rds_destroy_session", "SP(%p) Return", sp); 810 } 811 812 /* This is called on the taskq thread */ 813 void 814 rds_failover_session(void *arg) 815 { 816 rds_session_t *sp = (rds_session_t *)arg; 817 ib_gid_t lgid, rgid; 818 ipaddr_t myip, remip; 819 int ret, cnt = 0; 820 uint8_t sp_state; 821 822 RDS_DPRINTF2("rds_failover_session", "Enter: (%p)", sp); 823 824 /* Make sure the session is still alive */ 825 if (rds_session_lkup_by_sp(sp) == B_FALSE) { 826 RDS_DPRINTF2("rds_failover_session", 827 "Return: SP(%p) not ALIVE", sp); 828 return; 829 } 830 831 RDS_INCR_FAILOVERS(); 832 833 rw_enter(&sp->session_lock, RW_WRITER); 834 if (sp->session_type != RDS_SESSION_ACTIVE) { 835 /* 836 * The remote side must have seen the error and initiated 837 * a re-connect. 838 */ 839 RDS_DPRINTF2("rds_failover_session", 840 "SP(%p) has become passive", sp); 841 rw_exit(&sp->session_lock); 842 return; 843 } 844 sp->session_failover = 1; 845 sp_state = sp->session_state; 846 rw_exit(&sp->session_lock); 847 848 /* 849 * The session is in ERROR state but close both channels 850 * for a clean start. 851 */ 852 if (sp_state == RDS_SESSION_STATE_ERROR) { 853 rds_session_close(sp, IBT_BLOCKING, 1); 854 } 855 856 /* wait 1 sec before re-connecting */ 857 delay(drv_usectohz(1000000)); 858 859 do { 860 ibt_ip_path_attr_t ipattr; 861 ibt_ip_addr_t dstip; 862 863 /* The ipaddr should be in the network order */ 864 myip = sp->session_myip; 865 remip = sp->session_remip; 866 ret = rds_sc_path_lookup(&myip, &remip); 867 if (ret == 0) { 868 RDS_DPRINTF2(LABEL, "Path not found (0x%x 0x%x)", 869 myip, remip); 870 } 871 /* check if we have (new) path from the source to destination */ 872 lgid.gid_prefix = 0; 873 lgid.gid_guid = 0; 874 rgid.gid_prefix = 0; 875 rgid.gid_guid = 0; 876 877 bzero(&ipattr, sizeof (ibt_ip_path_attr_t)); 878 dstip.family = AF_INET; 879 dstip.un.ip4addr = remip; 880 ipattr.ipa_dst_ip = &dstip; 881 ipattr.ipa_src_ip.family = AF_INET; 882 ipattr.ipa_src_ip.un.ip4addr = myip; 883 ipattr.ipa_ndst = 1; 884 ipattr.ipa_max_paths = 1; 885 RDS_DPRINTF2(LABEL, "ibt_get_ip_paths: 0x%x <-> 0x%x ", 886 myip, remip); 887 ret = ibt_get_ip_paths(rdsib_statep->rds_ibhdl, 888 IBT_PATH_NO_FLAGS, &ipattr, &sp->session_pinfo, NULL, NULL); 889 if (ret == IBT_SUCCESS) { 890 RDS_DPRINTF2(LABEL, "ibt_get_ip_paths success"); 891 lgid = sp->session_pinfo. 892 pi_prim_cep_path.cep_adds_vect.av_sgid; 893 rgid = sp->session_pinfo. 894 pi_prim_cep_path.cep_adds_vect.av_dgid; 895 break; 896 } 897 898 RDS_DPRINTF2(LABEL, "ibt_get_ip_paths failed, ret: %d ", ret); 899 900 /* wait 1 sec before re-trying */ 901 delay(drv_usectohz(1000000)); 902 cnt++; 903 } while (cnt < 5); 904 905 if (ret != IBT_SUCCESS) { 906 rw_enter(&sp->session_lock, RW_WRITER); 907 if (sp->session_type == RDS_SESSION_ACTIVE) { 908 rds_session_fini(sp); 909 sp->session_state = RDS_SESSION_STATE_FAILED; 910 sp->session_failover = 0; 911 RDS_DPRINTF3("rds_failover_session", 912 "SP(%p) State RDS_SESSION_STATE_FAILED", sp); 913 } else { 914 RDS_DPRINTF2("rds_failover_session", 915 "SP(%p) has become passive", sp); 916 } 917 rw_exit(&sp->session_lock); 918 return; 919 } 920 921 RDS_DPRINTF2(LABEL, "lgid: %llx:%llx rgid: %llx:%llx", 922 lgid.gid_prefix, lgid.gid_guid, rgid.gid_prefix, 923 rgid.gid_guid); 924 925 rw_enter(&sp->session_lock, RW_WRITER); 926 if (sp->session_type != RDS_SESSION_ACTIVE) { 927 /* 928 * The remote side must have seen the error and initiated 929 * a re-connect. 930 */ 931 RDS_DPRINTF2("rds_failover_session", 932 "SP(%p) has become passive", sp); 933 rw_exit(&sp->session_lock); 934 return; 935 } 936 937 /* move the session to init state */ 938 ret = rds_session_reinit(sp, lgid); 939 sp->session_lgid = lgid; 940 sp->session_rgid = rgid; 941 if (ret != 0) { 942 rds_session_fini(sp); 943 sp->session_state = RDS_SESSION_STATE_FAILED; 944 sp->session_failover = 0; 945 RDS_DPRINTF3("rds_failover_session", 946 "SP(%p) State RDS_SESSION_STATE_FAILED", sp); 947 rw_exit(&sp->session_lock); 948 return; 949 } else { 950 sp->session_state = RDS_SESSION_STATE_INIT; 951 RDS_DPRINTF3("rds_failover_session", 952 "SP(%p) State RDS_SESSION_STATE_INIT", sp); 953 } 954 rw_exit(&sp->session_lock); 955 956 rds_session_open(sp); 957 958 RDS_DPRINTF2("rds_failover_session", "Return: (%p)", sp); 959 } 960 961 void 962 rds_handle_send_error(rds_ep_t *ep) 963 { 964 if (rds_is_sendq_empty(ep, 0)) { 965 /* Session should already be in ERROR, try to reconnect */ 966 RDS_DPRINTF2("rds_handle_send_error", 967 "Dispatching taskq to failover SP(%p)", ep->ep_sp); 968 (void) ddi_taskq_dispatch(rds_taskq, rds_failover_session, 969 (void *)ep->ep_sp, DDI_SLEEP); 970 } 971 } 972 973 /* 974 * Called in the CM handler on the passive side 975 * Called on a taskq thread. 976 */ 977 void 978 rds_cleanup_passive_session(void *arg) 979 { 980 rds_session_t *sp = arg; 981 982 RDS_DPRINTF2("rds_cleanup_passive_session", "SP(%p) State: %d", sp, 983 sp->session_state); 984 ASSERT((sp->session_state == RDS_SESSION_STATE_CLOSED) || 985 (sp->session_state == RDS_SESSION_STATE_ERROR)); 986 987 rds_session_close(sp, IBT_BLOCKING, 1); 988 989 rw_enter(&sp->session_lock, RW_WRITER); 990 if (sp->session_state == RDS_SESSION_STATE_CLOSED) { 991 rds_session_fini(sp); 992 sp->session_state = RDS_SESSION_STATE_FINI; 993 sp->session_failover = 0; 994 RDS_DPRINTF3("rds_cleanup_passive_session", 995 "SP(%p) State RDS_SESSION_STATE_FINI", sp); 996 } else if (sp->session_state == RDS_SESSION_STATE_ERROR) { 997 rds_session_fini(sp); 998 sp->session_state = RDS_SESSION_STATE_FAILED; 999 sp->session_failover = 0; 1000 RDS_DPRINTF3("rds_cleanup_passive_session", 1001 "SP(%p) State RDS_SESSION_STATE_FAILED", sp); 1002 } 1003 rw_exit(&sp->session_lock); 1004 1005 RDS_DPRINTF2("rds_cleanup_passive_session", "Return: SP (%p)", sp); 1006 } 1007 1008 /* 1009 * Called by the CM handler on the passive side 1010 * Called with WRITE lock on the session 1011 */ 1012 void 1013 rds_passive_session_fini(rds_session_t *sp) 1014 { 1015 rds_ep_t *ep; 1016 1017 RDS_DPRINTF2("rds_passive_session_fini", "SP(%p) State: %d", sp, 1018 sp->session_state); 1019 ASSERT((sp->session_state == RDS_SESSION_STATE_CLOSED) || 1020 (sp->session_state == RDS_SESSION_STATE_ERROR)); 1021 1022 /* clean the data channel */ 1023 ep = &sp->session_dataep; 1024 (void) rds_is_sendq_empty(ep, 1); 1025 mutex_enter(&ep->ep_lock); 1026 RDS_DPRINTF2("rds_passive_session_fini", "EP(%p) State: %d", ep, 1027 ep->ep_state); 1028 rds_ep_free_rc_channel(ep); 1029 mutex_exit(&ep->ep_lock); 1030 1031 /* clean the control channel */ 1032 ep = &sp->session_ctrlep; 1033 (void) rds_is_sendq_empty(ep, 1); 1034 mutex_enter(&ep->ep_lock); 1035 RDS_DPRINTF2("rds_passive_session_fini", "EP(%p) State: %d", ep, 1036 ep->ep_state); 1037 rds_ep_free_rc_channel(ep); 1038 mutex_exit(&ep->ep_lock); 1039 1040 rds_session_fini(sp); 1041 sp->session_failover = 0; 1042 1043 RDS_DPRINTF2("rds_passive_session_fini", "Return: SP (%p)", sp); 1044 } 1045 1046 void 1047 rds_close_this_session(rds_session_t *sp, uint8_t wait) 1048 { 1049 switch (sp->session_state) { 1050 case RDS_SESSION_STATE_CONNECTED: 1051 sp->session_state = RDS_SESSION_STATE_ACTIVE_CLOSING; 1052 rw_exit(&sp->session_lock); 1053 1054 rds_session_close(sp, IBT_BLOCKING, wait); 1055 1056 rw_enter(&sp->session_lock, RW_WRITER); 1057 sp->session_state = RDS_SESSION_STATE_CLOSED; 1058 RDS_DPRINTF3("rds_close_sessions", 1059 "SP(%p) State RDS_SESSION_STATE_CLOSED", sp); 1060 rds_session_fini(sp); 1061 sp->session_state = RDS_SESSION_STATE_FINI; 1062 sp->session_failover = 0; 1063 RDS_DPRINTF3("rds_close_sessions", 1064 "SP(%p) State RDS_SESSION_STATE_FINI", sp); 1065 break; 1066 1067 case RDS_SESSION_STATE_ERROR: 1068 case RDS_SESSION_STATE_PASSIVE_CLOSING: 1069 case RDS_SESSION_STATE_INIT: 1070 sp->session_state = RDS_SESSION_STATE_ACTIVE_CLOSING; 1071 rw_exit(&sp->session_lock); 1072 1073 rds_session_close(sp, IBT_BLOCKING, wait); 1074 1075 rw_enter(&sp->session_lock, RW_WRITER); 1076 sp->session_state = RDS_SESSION_STATE_CLOSED; 1077 RDS_DPRINTF3("rds_close_sessions", 1078 "SP(%p) State RDS_SESSION_STATE_CLOSED", sp); 1079 /* FALLTHRU */ 1080 case RDS_SESSION_STATE_CLOSED: 1081 rds_session_fini(sp); 1082 sp->session_state = RDS_SESSION_STATE_FINI; 1083 sp->session_failover = 0; 1084 RDS_DPRINTF3("rds_close_sessions", 1085 "SP(%p) State RDS_SESSION_STATE_FINI", sp); 1086 break; 1087 } 1088 } 1089 1090 /* 1091 * Can be called: 1092 * 1. on driver detach 1093 * 2. on taskq thread 1094 * arg is always NULL 1095 */ 1096 /* ARGSUSED */ 1097 void 1098 rds_close_sessions(void *arg) 1099 { 1100 rds_session_t *sp, *spnextp; 1101 1102 RDS_DPRINTF2("rds_close_sessions", "Enter"); 1103 1104 /* wait until all the buffers are freed by the sockets */ 1105 while (RDS_GET_RXPKTS_PEND() != 0) { 1106 /* wait one second and try again */ 1107 RDS_DPRINTF2("rds_close_sessions", "waiting on " 1108 "pending packets", RDS_GET_RXPKTS_PEND()); 1109 delay(drv_usectohz(1000000)); 1110 } 1111 RDS_DPRINTF2("rds_close_sessions", "No more RX packets pending"); 1112 1113 /* close all the sessions */ 1114 rw_enter(&rdsib_statep->rds_sessionlock, RW_WRITER); 1115 sp = rdsib_statep->rds_sessionlistp; 1116 while (sp) { 1117 rw_enter(&sp->session_lock, RW_WRITER); 1118 RDS_DPRINTF2("rds_close_sessions", "SP(%p) State: %d", sp, 1119 sp->session_state); 1120 rds_close_this_session(sp, 2); 1121 rw_exit(&sp->session_lock); 1122 sp = sp->session_nextp; 1123 } 1124 1125 sp = rdsib_statep->rds_sessionlistp; 1126 rdsib_statep->rds_sessionlistp = NULL; 1127 rdsib_statep->rds_nsessions = 0; 1128 rw_exit(&rdsib_statep->rds_sessionlock); 1129 1130 while (sp) { 1131 spnextp = sp->session_nextp; 1132 rds_destroy_session(sp); 1133 RDS_DECR_SESS(); 1134 sp = spnextp; 1135 } 1136 1137 /* free the global pool */ 1138 rds_free_recv_caches(rdsib_statep); 1139 1140 RDS_DPRINTF2("rds_close_sessions", "Return"); 1141 } 1142 1143 void 1144 rds_session_open(rds_session_t *sp) 1145 { 1146 int ret; 1147 1148 RDS_DPRINTF2("rds_session_open", "Enter SP(%p)", sp); 1149 1150 ret = rds_session_connect(sp); 1151 if (ret == -1) { 1152 /* 1153 * may be the session has become passive due to 1154 * hitting peer-to-peer case 1155 */ 1156 rw_enter(&sp->session_lock, RW_READER); 1157 if (sp->session_type == RDS_SESSION_PASSIVE) { 1158 RDS_DPRINTF2("rds_session_open", "SP(%p) " 1159 "has become passive from active", sp); 1160 rw_exit(&sp->session_lock); 1161 return; 1162 } 1163 1164 /* get the lock for writing */ 1165 rw_exit(&sp->session_lock); 1166 rw_enter(&sp->session_lock, RW_WRITER); 1167 sp->session_state = RDS_SESSION_STATE_ERROR; 1168 RDS_DPRINTF3("rds_session_open", 1169 "SP(%p) State RDS_SESSION_STATE_ERROR", sp); 1170 rw_exit(&sp->session_lock); 1171 1172 /* Connect request failed */ 1173 rds_session_close(sp, IBT_BLOCKING, 1); 1174 1175 rw_enter(&sp->session_lock, RW_WRITER); 1176 rds_session_fini(sp); 1177 sp->session_state = RDS_SESSION_STATE_FAILED; 1178 sp->session_failover = 0; 1179 RDS_DPRINTF3("rds_session_open", 1180 "SP(%p) State RDS_SESSION_STATE_FAILED", sp); 1181 rw_exit(&sp->session_lock); 1182 1183 return; 1184 } 1185 1186 RDS_DPRINTF2("rds_session_open", "Return: SP(%p)", sp); 1187 } 1188 1189 /* 1190 * Creates a session and inserts it into the list of sessions. The session 1191 * state would be CREATED. 1192 * Return Values: 1193 * EWOULDBLOCK 1194 */ 1195 rds_session_t * 1196 rds_session_create(rds_state_t *statep, ipaddr_t localip, ipaddr_t remip, 1197 ibt_cm_req_rcv_t *reqp, uint8_t type) 1198 { 1199 ib_gid_t lgid, rgid; 1200 rds_session_t *newp, *oldp; 1201 rds_ep_t *dataep, *ctrlep; 1202 rds_bufpool_t *pool; 1203 int ret; 1204 1205 RDS_DPRINTF2("rds_session_create", "Enter: 0x%p 0x%x 0x%x, type: %d", 1206 statep, localip, remip, type); 1207 1208 /* Check if there is space for a new session */ 1209 rw_enter(&statep->rds_sessionlock, RW_READER); 1210 if (statep->rds_nsessions >= (MaxNodes - 1)) { 1211 rw_exit(&statep->rds_sessionlock); 1212 RDS_DPRINTF1("rds_session_create", "No More Sessions allowed"); 1213 return (NULL); 1214 } 1215 rw_exit(&statep->rds_sessionlock); 1216 1217 /* Allocate and initialize global buffer pool */ 1218 ret = rds_init_recv_caches(statep); 1219 if (ret != 0) { 1220 RDS_DPRINTF2(LABEL, "Buffer Cache Initialization failed"); 1221 return (NULL); 1222 } 1223 1224 /* enough memory for session (includes 2 endpoints) */ 1225 newp = kmem_zalloc(sizeof (rds_session_t), KM_SLEEP); 1226 1227 newp->session_remip = remip; 1228 newp->session_myip = localip; 1229 newp->session_type = type; 1230 newp->session_state = RDS_SESSION_STATE_CREATED; 1231 RDS_DPRINTF3("rds_session_create", 1232 "SP(%p) State RDS_SESSION_STATE_CREATED", newp); 1233 rw_init(&newp->session_lock, NULL, RW_DRIVER, NULL); 1234 rw_init(&newp->session_local_portmap_lock, NULL, RW_DRIVER, NULL); 1235 rw_init(&newp->session_remote_portmap_lock, NULL, RW_DRIVER, NULL); 1236 1237 /* Initialize data endpoint */ 1238 dataep = &newp->session_dataep; 1239 dataep->ep_remip = newp->session_remip; 1240 dataep->ep_myip = newp->session_myip; 1241 dataep->ep_state = RDS_EP_STATE_UNCONNECTED; 1242 dataep->ep_sp = newp; 1243 dataep->ep_type = RDS_EP_TYPE_DATA; 1244 mutex_init(&dataep->ep_lock, NULL, MUTEX_DRIVER, NULL); 1245 1246 /* Initialize send pool locks */ 1247 pool = &dataep->ep_sndpool; 1248 mutex_init(&pool->pool_lock, NULL, MUTEX_DRIVER, NULL); 1249 cv_init(&pool->pool_cv, NULL, CV_DRIVER, NULL); 1250 1251 /* Initialize recv pool locks */ 1252 pool = &dataep->ep_rcvpool; 1253 mutex_init(&dataep->ep_recvqp.qp_lock, NULL, MUTEX_DRIVER, NULL); 1254 mutex_init(&pool->pool_lock, NULL, MUTEX_DRIVER, NULL); 1255 cv_init(&pool->pool_cv, NULL, CV_DRIVER, NULL); 1256 1257 /* Initialize control endpoint */ 1258 ctrlep = &newp->session_ctrlep; 1259 ctrlep->ep_remip = newp->session_remip; 1260 ctrlep->ep_myip = newp->session_myip; 1261 ctrlep->ep_state = RDS_EP_STATE_UNCONNECTED; 1262 ctrlep->ep_sp = newp; 1263 ctrlep->ep_type = RDS_EP_TYPE_CTRL; 1264 mutex_init(&ctrlep->ep_lock, NULL, MUTEX_DRIVER, NULL); 1265 1266 /* Initialize send pool locks */ 1267 pool = &ctrlep->ep_sndpool; 1268 mutex_init(&pool->pool_lock, NULL, MUTEX_DRIVER, NULL); 1269 cv_init(&pool->pool_cv, NULL, CV_DRIVER, NULL); 1270 1271 /* Initialize recv pool locks */ 1272 pool = &ctrlep->ep_rcvpool; 1273 mutex_init(&ctrlep->ep_recvqp.qp_lock, NULL, MUTEX_DRIVER, NULL); 1274 mutex_init(&pool->pool_lock, NULL, MUTEX_DRIVER, NULL); 1275 cv_init(&pool->pool_cv, NULL, CV_DRIVER, NULL); 1276 1277 /* lkup if there is already a session */ 1278 rw_enter(&statep->rds_sessionlock, RW_WRITER); 1279 oldp = rds_session_lkup(statep, remip, 0); 1280 if (oldp != NULL) { 1281 /* A session to this destination exists */ 1282 rw_exit(&statep->rds_sessionlock); 1283 rw_destroy(&newp->session_lock); 1284 rw_destroy(&newp->session_local_portmap_lock); 1285 rw_destroy(&newp->session_remote_portmap_lock); 1286 mutex_destroy(&dataep->ep_lock); 1287 mutex_destroy(&ctrlep->ep_lock); 1288 kmem_free(newp, sizeof (rds_session_t)); 1289 return (NULL); 1290 } 1291 1292 /* Insert this session into the list */ 1293 if (rds_add_session(newp, B_TRUE) != B_TRUE) { 1294 /* No room to add this session */ 1295 rw_exit(&statep->rds_sessionlock); 1296 rw_destroy(&newp->session_lock); 1297 rw_destroy(&newp->session_local_portmap_lock); 1298 rw_destroy(&newp->session_remote_portmap_lock); 1299 mutex_destroy(&dataep->ep_lock); 1300 mutex_destroy(&ctrlep->ep_lock); 1301 kmem_free(newp, sizeof (rds_session_t)); 1302 return (NULL); 1303 } 1304 1305 /* unlock the session list */ 1306 rw_exit(&statep->rds_sessionlock); 1307 1308 if (type == RDS_SESSION_ACTIVE) { 1309 ipaddr_t localip1, remip1; 1310 ibt_ip_path_attr_t ipattr; 1311 ibt_ip_addr_t dstip; 1312 1313 /* The ipaddr should be in the network order */ 1314 localip1 = localip; 1315 remip1 = remip; 1316 ret = rds_sc_path_lookup(&localip1, &remip1); 1317 if (ret == 0) { 1318 RDS_DPRINTF2(LABEL, "Path not found (0x%x 0x%x)", 1319 localip, remip); 1320 } 1321 1322 /* Get the gids for the source and destination ip addrs */ 1323 lgid.gid_prefix = 0; 1324 lgid.gid_guid = 0; 1325 rgid.gid_prefix = 0; 1326 rgid.gid_guid = 0; 1327 1328 bzero(&ipattr, sizeof (ibt_ip_path_attr_t)); 1329 dstip.family = AF_INET; 1330 dstip.un.ip4addr = remip1; 1331 ipattr.ipa_dst_ip = &dstip; 1332 ipattr.ipa_src_ip.family = AF_INET; 1333 ipattr.ipa_src_ip.un.ip4addr = localip1; 1334 ipattr.ipa_ndst = 1; 1335 ipattr.ipa_max_paths = 1; 1336 RDS_DPRINTF2(LABEL, "ibt_get_ip_paths: 0x%x <-> 0x%x ", 1337 localip1, remip1); 1338 ret = ibt_get_ip_paths(rdsib_statep->rds_ibhdl, 1339 IBT_PATH_NO_FLAGS, &ipattr, &newp->session_pinfo, 1340 NULL, NULL); 1341 if (ret != IBT_SUCCESS) { 1342 RDS_DPRINTF2(LABEL, "ibt_get_ip_paths failed, ret: %d " 1343 "lgid: %llx:%llx rgid: %llx:%llx", lgid.gid_prefix, 1344 lgid.gid_guid, rgid.gid_prefix, rgid.gid_guid); 1345 1346 RDS_SESSION_TRANSITION(newp, RDS_SESSION_STATE_FAILED); 1347 return (NULL); 1348 } 1349 RDS_DPRINTF2(LABEL, "ibt_get_ip_paths success"); 1350 lgid = 1351 newp->session_pinfo.pi_prim_cep_path.cep_adds_vect.av_sgid; 1352 rgid = 1353 newp->session_pinfo.pi_prim_cep_path.cep_adds_vect.av_dgid; 1354 1355 RDS_DPRINTF2(LABEL, "lgid: %llx:%llx rgid: %llx:%llx", 1356 lgid.gid_prefix, lgid.gid_guid, rgid.gid_prefix, 1357 rgid.gid_guid); 1358 } 1359 1360 rw_enter(&newp->session_lock, RW_WRITER); 1361 /* check for peer-to-peer case */ 1362 if (type == newp->session_type) { 1363 /* no peer-to-peer case */ 1364 if (type == RDS_SESSION_ACTIVE) { 1365 newp->session_lgid = lgid; 1366 newp->session_rgid = rgid; 1367 } else { 1368 /* rgid is requester gid & lgid is receiver gid */ 1369 newp->session_rgid = reqp->req_prim_addr.av_dgid; 1370 newp->session_lgid = reqp->req_prim_addr.av_sgid; 1371 } 1372 } 1373 rw_exit(&newp->session_lock); 1374 1375 RDS_DPRINTF2("rds_session_create", "Return SP(%p)", newp); 1376 1377 return (newp); 1378 } 1379 1380 void 1381 rds_handle_close_session_request(void *arg) 1382 { 1383 rds_session_t *sp = (rds_session_t *)arg; 1384 1385 RDS_DPRINTF2("rds_handle_close_session_request", 1386 "Enter: Closing this Session (%p)", sp); 1387 1388 rw_enter(&sp->session_lock, RW_WRITER); 1389 RDS_DPRINTF2("rds_handle_close_session_request", 1390 "SP(%p) State: %d", sp, sp->session_state); 1391 rds_close_this_session(sp, 2); 1392 rw_exit(&sp->session_lock); 1393 1394 RDS_DPRINTF2("rds_handle_close_session_request", "Return SP(%p)", sp); 1395 } 1396 1397 void 1398 rds_handle_control_message(rds_session_t *sp, rds_ctrl_pkt_t *cpkt) 1399 { 1400 RDS_DPRINTF4("rds_handle_control_message", "Enter: SP(%p) code: %d " 1401 "port: %d", sp, cpkt->rcp_code, cpkt->rcp_port); 1402 1403 switch (cpkt->rcp_code) { 1404 case RDS_CTRL_CODE_STALL: 1405 RDS_INCR_STALLS_RCVD(); 1406 (void) rds_check_n_mark_port(sp, cpkt->rcp_port, RDS_REMOTE); 1407 break; 1408 case RDS_CTRL_CODE_UNSTALL: 1409 RDS_INCR_UNSTALLS_RCVD(); 1410 (void) rds_check_n_unmark_port(sp, cpkt->rcp_port, RDS_REMOTE); 1411 break; 1412 case RDS_CTRL_CODE_STALL_PORTS: 1413 rds_mark_all_ports(sp, RDS_REMOTE); 1414 break; 1415 case RDS_CTRL_CODE_UNSTALL_PORTS: 1416 rds_unmark_all_ports(sp, RDS_REMOTE); 1417 break; 1418 case RDS_CTRL_CODE_HEARTBEAT: 1419 break; 1420 case RDS_CTRL_CODE_CLOSE_SESSION: 1421 RDS_DPRINTF2("rds_handle_control_message", 1422 "SP(%p) Remote Requested to close this session", sp); 1423 (void) ddi_taskq_dispatch(rds_taskq, 1424 rds_handle_close_session_request, (void *)sp, DDI_SLEEP); 1425 break; 1426 default: 1427 RDS_DPRINTF2(LABEL, "ERROR: Invalid Control code: %d", 1428 cpkt->rcp_code); 1429 break; 1430 } 1431 1432 RDS_DPRINTF4("rds_handle_control_message", "Return"); 1433 } 1434 1435 int 1436 rds_post_control_message(rds_session_t *sp, uint8_t code, in_port_t port) 1437 { 1438 ibt_send_wr_t wr; 1439 rds_ep_t *ep; 1440 rds_buf_t *bp; 1441 rds_ctrl_pkt_t *cp; 1442 int ret; 1443 1444 RDS_DPRINTF4("rds_post_control_message", "Enter: SP(%p) Code: %d " 1445 "Port: %d", sp, code, port); 1446 1447 ep = &sp->session_ctrlep; 1448 1449 bp = rds_get_send_buf(ep, 1); 1450 if (bp == NULL) { 1451 RDS_DPRINTF2(LABEL, "No buffers available to send control " 1452 "message: SP(%p) Code: %d Port: %d", sp, code, 1453 port); 1454 return (-1); 1455 } 1456 1457 cp = (rds_ctrl_pkt_t *)(uintptr_t)bp->buf_ds.ds_va; 1458 cp->rcp_code = code; 1459 cp->rcp_port = port; 1460 bp->buf_ds.ds_len = RDS_CTRLPKT_SIZE; 1461 1462 wr.wr_id = (uintptr_t)bp; 1463 wr.wr_flags = IBT_WR_SEND_SOLICIT; 1464 wr.wr_trans = IBT_RC_SRV; 1465 wr.wr_opcode = IBT_WRC_SEND; 1466 wr.wr_nds = 1; 1467 wr.wr_sgl = &bp->buf_ds; 1468 RDS_DPRINTF5(LABEL, "ds_va %p ds_len %d ds_lkey 0x%llx", 1469 bp->buf_ds.ds_va, bp->buf_ds.ds_len, bp->buf_ds.ds_key); 1470 ret = ibt_post_send(ep->ep_chanhdl, &wr, 1, NULL); 1471 if (ret != IBT_SUCCESS) { 1472 RDS_DPRINTF2(LABEL, "EP(%p): ibt_post_send failed: " 1473 "%d", ep, ret); 1474 bp->buf_state = RDS_SNDBUF_FREE; 1475 rds_free_send_buf(ep, bp, NULL, 1, B_FALSE); 1476 return (-1); 1477 } 1478 1479 RDS_DPRINTF4("rds_post_control_message", "Return SP(%p) Code: %d " 1480 "Port: %d", sp, code, port); 1481 1482 return (0); 1483 } 1484 1485 void 1486 rds_stall_port(rds_session_t *sp, in_port_t port, uint_t qualifier) 1487 { 1488 int ret; 1489 1490 RDS_DPRINTF4("rds_stall_port", "Enter: SP(%p) Port %d", sp, port); 1491 1492 RDS_INCR_STALLS_TRIGGERED(); 1493 1494 if (!rds_check_n_mark_port(sp, port, qualifier)) { 1495 1496 if (sp != NULL) { 1497 ret = rds_post_control_message(sp, 1498 RDS_CTRL_CODE_STALL, port); 1499 if (ret != 0) { 1500 (void) rds_check_n_unmark_port(sp, port, 1501 qualifier); 1502 return; 1503 } 1504 RDS_INCR_STALLS_SENT(); 1505 } 1506 } else { 1507 RDS_DPRINTF3(LABEL, 1508 "Port %d is already in stall state", port); 1509 } 1510 1511 RDS_DPRINTF4("rds_stall_port", "Return: SP(%p) Port %d", sp, port); 1512 } 1513 1514 void 1515 rds_resume_port(in_port_t port) 1516 { 1517 rds_session_t *sp; 1518 uint_t ix; 1519 int ret; 1520 1521 RDS_DPRINTF4("rds_resume_port", "Enter: Port %d", port); 1522 1523 RDS_INCR_UNSTALLS_TRIGGERED(); 1524 1525 /* resume loopback traffic */ 1526 (void) rds_check_n_unmark_port(NULL, port, RDS_LOOPBACK); 1527 1528 /* send unstall messages to resume the remote traffic */ 1529 rw_enter(&rdsib_statep->rds_sessionlock, RW_READER); 1530 1531 sp = rdsib_statep->rds_sessionlistp; 1532 for (ix = 0; ix < rdsib_statep->rds_nsessions; ix++) { 1533 ASSERT(sp != NULL); 1534 if ((sp->session_state == RDS_SESSION_STATE_CONNECTED) && 1535 (rds_check_n_unmark_port(sp, port, RDS_LOCAL))) { 1536 ret = rds_post_control_message(sp, 1537 RDS_CTRL_CODE_UNSTALL, port); 1538 if (ret != 0) { 1539 (void) rds_check_n_mark_port(sp, port, 1540 RDS_LOCAL); 1541 } else { 1542 RDS_INCR_UNSTALLS_SENT(); 1543 } 1544 } 1545 1546 sp = sp->session_nextp; 1547 } 1548 1549 rw_exit(&rdsib_statep->rds_sessionlock); 1550 1551 RDS_DPRINTF4("rds_resume_port", "Return: Port %d", port); 1552 } 1553 1554 static int 1555 rds_build_n_post_msg(rds_ep_t *ep, uio_t *uiop, in_port_t sendport, 1556 in_port_t recvport) 1557 { 1558 ibt_send_wr_t *wrp, wr; 1559 rds_buf_t *bp, *bp1; 1560 rds_data_hdr_t *pktp; 1561 uint32_t msgsize, npkts, residual, pktno, ix; 1562 int ret; 1563 1564 RDS_DPRINTF4("rds_build_n_post_msg", "Enter: EP(%p) UIOP(%p)", 1565 ep, uiop); 1566 1567 /* how many pkts are needed to carry this msg */ 1568 msgsize = uiop->uio_resid; 1569 npkts = ((msgsize - 1) / UserBufferSize) + 1; 1570 residual = ((msgsize - 1) % UserBufferSize) + 1; 1571 1572 RDS_DPRINTF5(LABEL, "EP(%p) UIOP(%p) msg size: %d npkts: %d", ep, uiop, 1573 msgsize, npkts); 1574 1575 /* Get the buffers needed to post this message */ 1576 bp = rds_get_send_buf(ep, npkts); 1577 if (bp == NULL) { 1578 RDS_INCR_ENOBUFS(); 1579 return (ENOBUFS); 1580 } 1581 1582 if (npkts > 1) { 1583 /* 1584 * multi-pkt messages are posted at the same time as a list 1585 * of WRs 1586 */ 1587 wrp = (ibt_send_wr_t *)kmem_zalloc(sizeof (ibt_send_wr_t) * 1588 npkts, KM_SLEEP); 1589 } 1590 1591 1592 pktno = 0; 1593 bp1 = bp; 1594 do { 1595 /* prepare the header */ 1596 pktp = (rds_data_hdr_t *)(uintptr_t)bp1->buf_ds.ds_va; 1597 pktp->dh_datalen = UserBufferSize; 1598 pktp->dh_npkts = npkts - pktno; 1599 pktp->dh_psn = pktno; 1600 pktp->dh_sendport = sendport; 1601 pktp->dh_recvport = recvport; 1602 bp1->buf_ds.ds_len = RdsPktSize; 1603 1604 /* copy the data */ 1605 ret = uiomove((uint8_t *)pktp + RDS_DATA_HDR_SZ, 1606 UserBufferSize, UIO_WRITE, uiop); 1607 if (ret != 0) { 1608 break; 1609 } 1610 1611 if (uiop->uio_resid == 0) { 1612 pktp->dh_datalen = residual; 1613 bp1->buf_ds.ds_len = residual + RDS_DATA_HDR_SZ; 1614 break; 1615 } 1616 pktno++; 1617 bp1 = bp1->buf_nextp; 1618 } while (uiop->uio_resid); 1619 1620 if (ret) { 1621 /* uiomove failed */ 1622 RDS_DPRINTF2("rds_build_n_post_msg", "UIO(%p) Move FAILED: %d", 1623 uiop, ret); 1624 if (npkts > 1) { 1625 kmem_free(wrp, npkts * sizeof (ibt_send_wr_t)); 1626 } 1627 rds_free_send_buf(ep, bp, NULL, npkts, B_FALSE); 1628 return (ret); 1629 } 1630 1631 if (npkts > 1) { 1632 /* multi-pkt message */ 1633 RDS_DPRINTF5(LABEL, "EP(%p) Sending Multiple Packets", ep); 1634 1635 bp1 = bp; 1636 for (ix = 0; ix < npkts; ix++) { 1637 wrp[ix].wr_id = (uintptr_t)bp1; 1638 wrp[ix].wr_flags = IBT_WR_NO_FLAGS; 1639 wrp[ix].wr_trans = IBT_RC_SRV; 1640 wrp[ix].wr_opcode = IBT_WRC_SEND; 1641 wrp[ix].wr_nds = 1; 1642 wrp[ix].wr_sgl = &bp1->buf_ds; 1643 bp1 = bp1->buf_nextp; 1644 } 1645 wrp[npkts - 1].wr_flags = IBT_WR_SEND_SOLICIT; 1646 1647 ret = ibt_post_send(ep->ep_chanhdl, wrp, npkts, &ix); 1648 if (ret != IBT_SUCCESS) { 1649 RDS_DPRINTF2(LABEL, "EP(%p): ibt_post_send failed: " 1650 "%d for %d pkts", ep, ret, npkts); 1651 rds_free_send_buf(ep, bp, NULL, npkts, B_FALSE); 1652 kmem_free(wrp, npkts * sizeof (ibt_send_wr_t)); 1653 return (ret); 1654 } 1655 1656 kmem_free(wrp, npkts * sizeof (ibt_send_wr_t)); 1657 } else { 1658 /* single pkt */ 1659 RDS_DPRINTF5(LABEL, "EP(%p) Sending Single Packet", ep); 1660 wr.wr_id = (uintptr_t)bp; 1661 wr.wr_flags = IBT_WR_SEND_SOLICIT; 1662 wr.wr_trans = IBT_RC_SRV; 1663 wr.wr_opcode = IBT_WRC_SEND; 1664 wr.wr_nds = 1; 1665 wr.wr_sgl = &bp->buf_ds; 1666 RDS_DPRINTF5(LABEL, "ds_va %p ds_key 0x%llx ds_len %d ", 1667 bp->buf_ds.ds_va, bp->buf_ds.ds_key, bp->buf_ds.ds_len); 1668 ret = ibt_post_send(ep->ep_chanhdl, &wr, 1, NULL); 1669 if (ret != IBT_SUCCESS) { 1670 RDS_DPRINTF2(LABEL, "EP(%p): ibt_post_send failed: " 1671 "%d", ep, ret); 1672 rds_free_send_buf(ep, bp, NULL, 1, B_FALSE); 1673 return (ret); 1674 } 1675 } 1676 1677 RDS_INCR_TXPKTS(npkts); 1678 RDS_INCR_TXBYTES(msgsize); 1679 1680 RDS_DPRINTF4("rds_build_n_post_msg", "Return: EP(%p) UIOP(%p)", 1681 ep, uiop); 1682 1683 return (0); 1684 } 1685 1686 static int 1687 rds_deliver_loopback_msg(uio_t *uiop, ipaddr_t recvip, ipaddr_t sendip, 1688 in_port_t recvport, in_port_t sendport, zoneid_t zoneid) 1689 { 1690 mblk_t *mp; 1691 int ret; 1692 1693 RDS_DPRINTF4("rds_deliver_loopback_msg", "Enter"); 1694 1695 RDS_DPRINTF3(LABEL, "Loopback message: sendport: " 1696 "%d to recvport: %d", sendport, recvport); 1697 1698 mp = allocb(uiop->uio_resid, BPRI_MED); 1699 if (mp == NULL) { 1700 RDS_DPRINTF2(LABEL, "allocb failed, size: %d\n", 1701 uiop->uio_resid); 1702 return (ENOSPC); 1703 } 1704 mp->b_wptr = mp->b_rptr + uiop->uio_resid; 1705 1706 ret = uiomove(mp->b_rptr, uiop->uio_resid, UIO_WRITE, uiop); 1707 if (ret) { 1708 RDS_DPRINTF2(LABEL, "ERROR: uiomove returned: %d", ret); 1709 freeb(mp); 1710 return (ret); 1711 } 1712 1713 ret = rds_deliver_new_msg(mp, recvip, sendip, recvport, sendport, 1714 zoneid); 1715 if (ret != 0) { 1716 if (ret == ENOSPC) { 1717 /* 1718 * The message is delivered but cannot take more, 1719 * stop further loopback traffic to this port 1720 */ 1721 RDS_DPRINTF3("rds_deliver_loopback_msg", 1722 "Port %d NO SPACE", recvport); 1723 rds_stall_port(NULL, recvport, RDS_LOOPBACK); 1724 } else { 1725 RDS_DPRINTF2(LABEL, "Loopback message: port %d -> " 1726 "port %d failed: %d", sendport, recvport, ret); 1727 return (ret); 1728 } 1729 } 1730 1731 RDS_DPRINTF4("rds_deliver_loopback_msg", "Return"); 1732 return (0); 1733 } 1734 1735 static void 1736 rds_resend_messages(void *arg) 1737 { 1738 rds_session_t *sp = (rds_session_t *)arg; 1739 rds_ep_t *ep; 1740 rds_bufpool_t *spool; 1741 rds_buf_t *bp, *endp, *tmp; 1742 ibt_send_wr_t *wrp; 1743 uint_t nwr = 0, ix, jx; 1744 int ret; 1745 1746 RDS_DPRINTF2("rds_resend_messages", "Enter: SP(%p)", sp); 1747 1748 ep = &sp->session_dataep; 1749 1750 spool = &ep->ep_sndpool; 1751 mutex_enter(&spool->pool_lock); 1752 1753 ASSERT(spool->pool_nfree == spool->pool_nbuffers); 1754 1755 if (ep->ep_lbufid == NULL) { 1756 RDS_DPRINTF2("rds_resend_messages", 1757 "SP(%p) Remote session is cleaned up ", sp); 1758 /* 1759 * The remote end cleaned up its session. There may be loss 1760 * of messages. Mark all buffers as acknowledged. 1761 */ 1762 tmp = spool->pool_tailp; 1763 } else { 1764 tmp = (rds_buf_t *)ep->ep_lbufid; 1765 RDS_DPRINTF2("rds_resend_messages", 1766 "SP(%p) Last successful BP(%p) ", sp, tmp); 1767 } 1768 1769 endp = spool->pool_tailp; 1770 bp = spool->pool_headp; 1771 jx = 0; 1772 while ((bp != NULL) && (bp != tmp)) { 1773 bp->buf_state = RDS_SNDBUF_FREE; 1774 jx++; 1775 bp = bp->buf_nextp; 1776 } 1777 1778 if (bp == NULL) { 1779 mutex_exit(&spool->pool_lock); 1780 RDS_DPRINTF2("rds_resend_messages", "Alert: lbufid(%p) is not " 1781 "found in the list", tmp); 1782 1783 rw_enter(&sp->session_lock, RW_WRITER); 1784 if (sp->session_state == RDS_SESSION_STATE_INIT) { 1785 sp->session_state = RDS_SESSION_STATE_CONNECTED; 1786 } else { 1787 RDS_DPRINTF2("rds_resend_messages", "SP(%p) State: %d " 1788 "Expected State: %d", sp, sp->session_state, 1789 RDS_SESSION_STATE_CONNECTED); 1790 } 1791 sp->session_failover = 0; 1792 rw_exit(&sp->session_lock); 1793 return; 1794 } 1795 1796 /* Found the match */ 1797 bp->buf_state = RDS_SNDBUF_FREE; 1798 jx++; 1799 1800 spool->pool_tailp = bp; 1801 bp = bp->buf_nextp; 1802 spool->pool_tailp->buf_nextp = NULL; 1803 nwr = spool->pool_nfree - jx; 1804 spool->pool_nfree = jx; 1805 mutex_exit(&spool->pool_lock); 1806 1807 RDS_DPRINTF2("rds_resend_messages", "SP(%p): Number of " 1808 "bufs (BP %p) to re-send: %d", sp, bp, nwr); 1809 1810 if (bp) { 1811 wrp = (ibt_send_wr_t *)kmem_zalloc(sizeof (ibt_send_wr_t) * 100, 1812 KM_SLEEP); 1813 1814 while (nwr) { 1815 jx = (nwr > 100) ? 100 : nwr; 1816 1817 tmp = bp; 1818 for (ix = 0; ix < jx; ix++) { 1819 bp->buf_state = RDS_SNDBUF_PENDING; 1820 wrp[ix].wr_id = (uintptr_t)bp; 1821 wrp[ix].wr_flags = IBT_WR_SEND_SOLICIT; 1822 wrp[ix].wr_trans = IBT_RC_SRV; 1823 wrp[ix].wr_opcode = IBT_WRC_SEND; 1824 wrp[ix].wr_nds = 1; 1825 wrp[ix].wr_sgl = &bp->buf_ds; 1826 bp = bp->buf_nextp; 1827 } 1828 1829 ret = ibt_post_send(ep->ep_chanhdl, wrp, jx, &ix); 1830 if (ret != IBT_SUCCESS) { 1831 RDS_DPRINTF2(LABEL, "EP(%p): ibt_post_send " 1832 "failed: %d for % pkts", ep, ret, jx); 1833 break; 1834 } 1835 1836 mutex_enter(&spool->pool_lock); 1837 spool->pool_nbusy += jx; 1838 mutex_exit(&spool->pool_lock); 1839 1840 nwr -= jx; 1841 } 1842 1843 kmem_free(wrp, sizeof (ibt_send_wr_t) * 100); 1844 1845 if (nwr != 0) { 1846 1847 /* 1848 * An error while failover is in progress. Some WRs are 1849 * posted while other remain. If any of the posted WRs 1850 * complete in error then they would dispatch a taskq to 1851 * do a failover. Getting the session lock will prevent 1852 * the taskq to wait until we are done here. 1853 */ 1854 rw_enter(&sp->session_lock, RW_READER); 1855 1856 /* 1857 * Wait until all the previous WRs are completed and 1858 * then queue the remaining, otherwise the order of 1859 * the messages may change. 1860 */ 1861 (void) rds_is_sendq_empty(ep, 1); 1862 1863 /* free the remaining buffers */ 1864 rds_free_send_buf(ep, tmp, endp, nwr, B_FALSE); 1865 1866 rw_exit(&sp->session_lock); 1867 return; 1868 } 1869 } 1870 1871 rw_enter(&sp->session_lock, RW_WRITER); 1872 if (sp->session_state == RDS_SESSION_STATE_INIT) { 1873 sp->session_state = RDS_SESSION_STATE_CONNECTED; 1874 } else { 1875 RDS_DPRINTF2("rds_resend_messages", "SP(%p) State: %d " 1876 "Expected State: %d", sp, sp->session_state, 1877 RDS_SESSION_STATE_CONNECTED); 1878 } 1879 sp->session_failover = 0; 1880 rw_exit(&sp->session_lock); 1881 1882 RDS_DPRINTF2("rds_resend_messages", "Return: SP(%p)", sp); 1883 } 1884 1885 /* 1886 * This is called when a channel is connected. Transition the session to 1887 * CONNECTED state iff both channels are connected. 1888 */ 1889 void 1890 rds_session_active(rds_session_t *sp) 1891 { 1892 rds_ep_t *ep; 1893 uint_t failover; 1894 1895 RDS_DPRINTF2("rds_session_active", "Enter: 0x%p", sp); 1896 1897 rw_enter(&sp->session_lock, RW_READER); 1898 1899 failover = sp->session_failover; 1900 1901 /* 1902 * we establish the data channel first, so check the control channel 1903 * first but make sure it is initialized. 1904 */ 1905 ep = &sp->session_ctrlep; 1906 mutex_enter(&ep->ep_lock); 1907 if (ep->ep_state != RDS_EP_STATE_CONNECTED) { 1908 /* the session is not ready yet */ 1909 mutex_exit(&ep->ep_lock); 1910 rw_exit(&sp->session_lock); 1911 return; 1912 } 1913 mutex_exit(&ep->ep_lock); 1914 1915 /* control channel is connected, check the data channel */ 1916 ep = &sp->session_dataep; 1917 mutex_enter(&ep->ep_lock); 1918 if (ep->ep_state != RDS_EP_STATE_CONNECTED) { 1919 /* data channel is not yet connected */ 1920 mutex_exit(&ep->ep_lock); 1921 rw_exit(&sp->session_lock); 1922 return; 1923 } 1924 mutex_exit(&ep->ep_lock); 1925 1926 if (failover) { 1927 rw_exit(&sp->session_lock); 1928 1929 /* 1930 * The session has failed over. Previous msgs have to be 1931 * re-sent before the session is moved to the connected 1932 * state. 1933 */ 1934 RDS_DPRINTF2("rds_session_active", "SP(%p) Dispatching taskq " 1935 "to re-send messages", sp); 1936 (void) ddi_taskq_dispatch(rds_taskq, 1937 rds_resend_messages, (void *)sp, DDI_SLEEP); 1938 return; 1939 } 1940 1941 /* the session is ready */ 1942 sp->session_state = RDS_SESSION_STATE_CONNECTED; 1943 RDS_DPRINTF3("rds_session_active", 1944 "SP(%p) State RDS_SESSION_STATE_CONNECTED", sp); 1945 1946 rw_exit(&sp->session_lock); 1947 1948 RDS_DPRINTF2("rds_session_active", "Return: SP(%p) is CONNECTED", sp); 1949 } 1950 1951 static int 1952 rds_ep_sendmsg(rds_ep_t *ep, uio_t *uiop, in_port_t sendport, 1953 in_port_t recvport) 1954 { 1955 int ret; 1956 1957 RDS_DPRINTF4("rds_ep_sendmsg", "Enter: EP(%p) sendport: %d recvport: " 1958 "%d", ep, sendport, recvport); 1959 1960 /* make sure the remote port is not stalled */ 1961 if (rds_is_port_marked(ep->ep_sp, recvport, RDS_REMOTE)) { 1962 RDS_DPRINTF2(LABEL, "SP(%p) Port:%d is in stall state", 1963 ep->ep_sp, recvport); 1964 RDS_INCR_EWOULDBLOCK(); 1965 ret = ENOMEM; 1966 } else { 1967 ret = rds_build_n_post_msg(ep, uiop, sendport, recvport); 1968 } 1969 1970 RDS_DPRINTF4("rds_ep_sendmsg", "Return: EP(%p)", ep); 1971 1972 return (ret); 1973 } 1974 1975 /* Send a message to a destination socket */ 1976 int 1977 rds_sendmsg(uio_t *uiop, ipaddr_t sendip, ipaddr_t recvip, in_port_t sendport, 1978 in_port_t recvport, zoneid_t zoneid) 1979 { 1980 rds_session_t *sp; 1981 ib_gid_t lgid, rgid; 1982 int ret; 1983 1984 RDS_DPRINTF4("rds_sendmsg", "Enter: uiop: 0x%p, srcIP: 0x%x destIP: " 1985 "0x%x sndport: %d recvport: %d", uiop, sendip, recvip, 1986 sendport, recvport); 1987 1988 /* If msg length is 0, just return success */ 1989 if (uiop->uio_resid == 0) { 1990 RDS_DPRINTF2("rds_sendmsg", "Zero sized message"); 1991 return (0); 1992 } 1993 1994 /* Is there a session to the destination? */ 1995 rw_enter(&rdsib_statep->rds_sessionlock, RW_READER); 1996 sp = rds_session_lkup(rdsib_statep, recvip, 0); 1997 rw_exit(&rdsib_statep->rds_sessionlock); 1998 1999 /* Is this a loopback message? */ 2000 if ((sp == NULL) && (rds_islocal(recvip))) { 2001 /* make sure the port is not stalled */ 2002 if (rds_is_port_marked(NULL, recvport, RDS_LOOPBACK)) { 2003 RDS_DPRINTF2(LABEL, "Local Port:%d is in stall state", 2004 recvport); 2005 RDS_INCR_EWOULDBLOCK(); 2006 return (ENOMEM); 2007 } 2008 ret = rds_deliver_loopback_msg(uiop, recvip, sendip, recvport, 2009 sendport, zoneid); 2010 return (ret); 2011 } 2012 2013 /* Not a loopback message */ 2014 if (sp == NULL) { 2015 /* There is no session to the destination, create one. */ 2016 RDS_DPRINTF3(LABEL, "There is no session to the destination " 2017 "IP: 0x%x", recvip); 2018 sp = rds_session_create(rdsib_statep, sendip, recvip, NULL, 2019 RDS_SESSION_ACTIVE); 2020 if (sp != NULL) { 2021 rw_enter(&sp->session_lock, RW_WRITER); 2022 if (sp->session_type == RDS_SESSION_ACTIVE) { 2023 ret = rds_session_init(sp); 2024 if (ret != 0) { 2025 RDS_DPRINTF2("rds_sendmsg", 2026 "SP(%p): rds_session_init failed", 2027 sp); 2028 sp->session_state = 2029 RDS_SESSION_STATE_FAILED; 2030 RDS_DPRINTF3("rds_sendmsg", 2031 "SP(%p) State " 2032 "RDS_SESSION_STATE_FAILED", sp); 2033 rw_exit(&sp->session_lock); 2034 return (EFAULT); 2035 } 2036 sp->session_state = RDS_SESSION_STATE_INIT; 2037 RDS_DPRINTF3("rds_sendmsg", 2038 "SP(%p) State " 2039 "RDS_SESSION_STATE_INIT", sp); 2040 rw_exit(&sp->session_lock); 2041 rds_session_open(sp); 2042 } else { 2043 rw_exit(&sp->session_lock); 2044 } 2045 } else { 2046 /* Is a session created for this destination */ 2047 rw_enter(&rdsib_statep->rds_sessionlock, RW_READER); 2048 sp = rds_session_lkup(rdsib_statep, recvip, 0); 2049 rw_exit(&rdsib_statep->rds_sessionlock); 2050 if (sp == NULL) { 2051 return (EFAULT); 2052 } 2053 } 2054 } 2055 2056 /* There is a session to the destination */ 2057 rw_enter(&sp->session_lock, RW_READER); 2058 if (sp->session_state == RDS_SESSION_STATE_CONNECTED) { 2059 rw_exit(&sp->session_lock); 2060 2061 ret = rds_ep_sendmsg(&sp->session_dataep, uiop, sendport, 2062 recvport); 2063 return (ret); 2064 } else if ((sp->session_state == RDS_SESSION_STATE_FAILED) || 2065 (sp->session_state == RDS_SESSION_STATE_FINI)) { 2066 ipaddr_t sendip1, recvip1; 2067 2068 RDS_DPRINTF3("rds_sendmsg", "SP(%p) is not connected, State: " 2069 "%d", sp, sp->session_state); 2070 rw_exit(&sp->session_lock); 2071 rw_enter(&sp->session_lock, RW_WRITER); 2072 if ((sp->session_state == RDS_SESSION_STATE_FAILED) || 2073 (sp->session_state == RDS_SESSION_STATE_FINI)) { 2074 ibt_ip_path_attr_t ipattr; 2075 ibt_ip_addr_t dstip; 2076 2077 sp->session_state = RDS_SESSION_STATE_CREATED; 2078 sp->session_type = RDS_SESSION_ACTIVE; 2079 RDS_DPRINTF3("rds_sendmsg", "SP(%p) State " 2080 "RDS_SESSION_STATE_CREATED", sp); 2081 rw_exit(&sp->session_lock); 2082 2083 2084 /* The ipaddr should be in the network order */ 2085 sendip1 = sendip; 2086 recvip1 = recvip; 2087 ret = rds_sc_path_lookup(&sendip1, &recvip1); 2088 if (ret == 0) { 2089 RDS_DPRINTF2(LABEL, "Path not found " 2090 "(0x%x 0x%x)", sendip1, recvip1); 2091 } 2092 2093 /* Resolve the IP addresses */ 2094 lgid.gid_prefix = 0; 2095 lgid.gid_guid = 0; 2096 rgid.gid_prefix = 0; 2097 rgid.gid_guid = 0; 2098 2099 bzero(&ipattr, sizeof (ibt_ip_path_attr_t)); 2100 dstip.family = AF_INET; 2101 dstip.un.ip4addr = recvip1; 2102 ipattr.ipa_dst_ip = &dstip; 2103 ipattr.ipa_src_ip.family = AF_INET; 2104 ipattr.ipa_src_ip.un.ip4addr = sendip1; 2105 ipattr.ipa_ndst = 1; 2106 ipattr.ipa_max_paths = 1; 2107 RDS_DPRINTF2(LABEL, "ibt_get_ip_paths: 0x%x <-> 0x%x ", 2108 sendip1, recvip1); 2109 ret = ibt_get_ip_paths(rdsib_statep->rds_ibhdl, 2110 IBT_PATH_NO_FLAGS, &ipattr, &sp->session_pinfo, 2111 NULL, NULL); 2112 if (ret != IBT_SUCCESS) { 2113 RDS_DPRINTF2("rds_sendmsg", 2114 "ibt_get_ip_paths failed, ret: %d ", ret); 2115 2116 rw_enter(&sp->session_lock, RW_WRITER); 2117 if (sp->session_type == RDS_SESSION_ACTIVE) { 2118 sp->session_state = 2119 RDS_SESSION_STATE_FAILED; 2120 RDS_DPRINTF3("rds_sendmsg", 2121 "SP(%p) State " 2122 "RDS_SESSION_STATE_FAILED", sp); 2123 rw_exit(&sp->session_lock); 2124 return (EFAULT); 2125 } else { 2126 rw_exit(&sp->session_lock); 2127 return (ENOMEM); 2128 } 2129 } 2130 RDS_DPRINTF2(LABEL, "ibt_get_ip_paths success"); 2131 lgid = sp->session_pinfo. 2132 pi_prim_cep_path.cep_adds_vect.av_sgid; 2133 rgid = sp->session_pinfo. 2134 pi_prim_cep_path.cep_adds_vect.av_dgid; 2135 2136 RDS_DPRINTF2(LABEL, "lgid: %llx:%llx rgid: %llx:%llx", 2137 lgid.gid_prefix, lgid.gid_guid, rgid.gid_prefix, 2138 rgid.gid_guid); 2139 2140 rw_enter(&sp->session_lock, RW_WRITER); 2141 if (sp->session_type == RDS_SESSION_ACTIVE) { 2142 sp->session_lgid = lgid; 2143 sp->session_rgid = rgid; 2144 ret = rds_session_init(sp); 2145 if (ret != 0) { 2146 RDS_DPRINTF2("rds_sendmsg", 2147 "SP(%p): rds_session_init failed", 2148 sp); 2149 sp->session_state = 2150 RDS_SESSION_STATE_FAILED; 2151 RDS_DPRINTF3("rds_sendmsg", 2152 "SP(%p) State " 2153 "RDS_SESSION_STATE_FAILED", sp); 2154 rw_exit(&sp->session_lock); 2155 return (EFAULT); 2156 } 2157 sp->session_state = RDS_SESSION_STATE_INIT; 2158 rw_exit(&sp->session_lock); 2159 2160 rds_session_open(sp); 2161 2162 } else { 2163 RDS_DPRINTF2("rds_sendmsg", 2164 "SP(%p): type changed to %d", 2165 sp, sp->session_type); 2166 rw_exit(&sp->session_lock); 2167 return (ENOMEM); 2168 } 2169 } else { 2170 RDS_DPRINTF2("rds_sendmsg", 2171 "SP(%p): Session state %d changed", 2172 sp, sp->session_state); 2173 rw_exit(&sp->session_lock); 2174 return (ENOMEM); 2175 } 2176 } else { 2177 RDS_DPRINTF4("rds_sendmsg", "SP(%p): Session is in %d state", 2178 sp, sp->session_state); 2179 rw_exit(&sp->session_lock); 2180 return (ENOMEM); 2181 } 2182 2183 rw_enter(&sp->session_lock, RW_READER); 2184 if (sp->session_state == RDS_SESSION_STATE_CONNECTED) { 2185 rw_exit(&sp->session_lock); 2186 2187 ret = rds_ep_sendmsg(&sp->session_dataep, uiop, sendport, 2188 recvport); 2189 } else { 2190 RDS_DPRINTF2("rds_sendmsg", "SP(%p): state(%d) not connected", 2191 sp, sp->session_state); 2192 rw_exit(&sp->session_lock); 2193 } 2194 2195 RDS_DPRINTF4("rds_sendmsg", "Return: SP(%p) ret: %d", sp, ret); 2196 2197 return (ret); 2198 } 2199 2200 /* Note: This is called on the CQ handler thread */ 2201 void 2202 rds_received_msg(rds_ep_t *ep, rds_buf_t *bp) 2203 { 2204 mblk_t *mp, *mp1; 2205 rds_data_hdr_t *pktp, *pktp1; 2206 uint8_t *datap; 2207 rds_buf_t *bp1; 2208 rds_bufpool_t *rpool; 2209 uint_t npkts, ix; 2210 int ret; 2211 2212 RDS_DPRINTF4("rds_received_msg", "Enter: EP(%p)", ep); 2213 2214 pktp = (rds_data_hdr_t *)(uintptr_t)bp->buf_ds.ds_va; 2215 datap = ((uint8_t *)(uintptr_t)bp->buf_ds.ds_va) + RDS_DATA_HDR_SZ; 2216 npkts = pktp->dh_npkts; 2217 2218 /* increment rx pending here */ 2219 rpool = &ep->ep_rcvpool; 2220 mutex_enter(&rpool->pool_lock); 2221 rpool->pool_nbusy += npkts; 2222 mutex_exit(&rpool->pool_lock); 2223 2224 /* this will get freed by sockfs */ 2225 mp = esballoc(datap, pktp->dh_datalen, BPRI_HI, &bp->buf_frtn); 2226 if (mp == NULL) { 2227 RDS_DPRINTF2(LABEL, "EP(%p) BP(%p): allocb failed", 2228 ep, bp); 2229 rds_free_recv_buf(bp, npkts); 2230 return; 2231 } 2232 mp->b_wptr = datap + pktp->dh_datalen; 2233 mp->b_datap->db_type = M_DATA; 2234 2235 mp1 = mp; 2236 bp1 = bp->buf_nextp; 2237 while (bp1 != NULL) { 2238 pktp1 = (rds_data_hdr_t *)(uintptr_t)bp1->buf_ds.ds_va; 2239 datap = ((uint8_t *)(uintptr_t)bp1->buf_ds.ds_va) + 2240 RDS_DATA_HDR_SZ; 2241 2242 mp1->b_cont = esballoc(datap, pktp1->dh_datalen, 2243 BPRI_HI, &bp1->buf_frtn); 2244 if (mp1->b_cont == NULL) { 2245 RDS_DPRINTF2(LABEL, "EP(%p) BP(%p): allocb failed", 2246 ep, bp1); 2247 freemsg(mp); 2248 rds_free_recv_buf(bp1, pktp1->dh_npkts); 2249 return; 2250 } 2251 mp1 = mp1->b_cont; 2252 mp1->b_wptr = datap + pktp1->dh_datalen; 2253 mp1->b_datap->db_type = M_DATA; 2254 2255 bp1 = bp1->buf_nextp; 2256 } 2257 2258 RDS_INCR_RXPKTS_PEND(npkts); 2259 RDS_INCR_RXPKTS(npkts); 2260 RDS_INCR_RXBYTES(msgdsize(mp)); 2261 2262 RDS_DPRINTF5(LABEL, "Deliver Message: sendIP: 0x%x recvIP: 0x%x " 2263 "sendport: %d recvport: %d npkts: %d pktno: %d", ep->ep_remip, 2264 ep->ep_myip, pktp->dh_sendport, pktp->dh_recvport, 2265 npkts, pktp->dh_psn); 2266 2267 /* store the last buffer id, no lock needed */ 2268 if (npkts > 1) { 2269 ep->ep_rbufid = pktp1->dh_bufid; 2270 } else { 2271 ep->ep_rbufid = pktp->dh_bufid; 2272 } 2273 2274 ret = rds_deliver_new_msg(mp, ep->ep_myip, ep->ep_remip, 2275 pktp->dh_recvport, pktp->dh_sendport, ALL_ZONES); 2276 if (ret != 0) { 2277 if (ret == ENOSPC) { 2278 /* 2279 * The message is delivered but cannot take more, 2280 * stop further remote messages coming to this port 2281 */ 2282 RDS_DPRINTF3("rds_received_msg", "Port %d NO SPACE", 2283 pktp->dh_recvport); 2284 rds_stall_port(ep->ep_sp, pktp->dh_recvport, RDS_LOCAL); 2285 } else { 2286 RDS_DPRINTF2(LABEL, "rds_deliver_new_msg returned: %d", 2287 ret); 2288 } 2289 } 2290 2291 mutex_enter(&ep->ep_lock); 2292 /* The first message can come in before the conn est event */ 2293 if ((ep->ep_rdmacnt == 0) && (ep->ep_state == RDS_EP_STATE_CONNECTED)) { 2294 ep->ep_rdmacnt++; 2295 *(uintptr_t *)(uintptr_t)ep->ep_ackds.ds_va = ep->ep_rbufid; 2296 mutex_exit(&ep->ep_lock); 2297 2298 /* send acknowledgement */ 2299 RDS_INCR_TXACKS(); 2300 ret = ibt_post_send(ep->ep_chanhdl, &ep->ep_ackwr, 1, &ix); 2301 if (ret != IBT_SUCCESS) { 2302 RDS_DPRINTF2(LABEL, "EP(%p): ibt_post_send for " 2303 "acknowledgement failed: %d, SQ depth: %d", 2304 ep, ret, ep->ep_sndpool.pool_nbusy); 2305 mutex_enter(&ep->ep_lock); 2306 ep->ep_rdmacnt--; 2307 mutex_exit(&ep->ep_lock); 2308 } 2309 } else { 2310 /* no room to send acknowledgement */ 2311 mutex_exit(&ep->ep_lock); 2312 } 2313 2314 RDS_DPRINTF4("rds_received_msg", "Return: EP(%p)", ep); 2315 } 2316