1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * tavor_umap.c 29 * Tavor Userland Mapping Routines 30 * 31 * Implements all the routines necessary for enabling direct userland 32 * access to the Tavor hardware. This includes all routines necessary for 33 * maintaining the "userland resources database" and all the support routines 34 * for the devmap calls. 35 */ 36 37 #include <sys/types.h> 38 #include <sys/conf.h> 39 #include <sys/ddi.h> 40 #include <sys/sunddi.h> 41 #include <sys/modctl.h> 42 #include <sys/file.h> 43 #include <sys/avl.h> 44 #include <sys/sysmacros.h> 45 46 #include <sys/ib/adapters/tavor/tavor.h> 47 48 /* Tavor HCA state pointer (extern) */ 49 extern void *tavor_statep; 50 51 /* Tavor HCA Userland Resource Database (extern) */ 52 extern tavor_umap_db_t tavor_userland_rsrc_db; 53 54 static int tavor_umap_uarpg(tavor_state_t *state, devmap_cookie_t dhp, 55 tavor_rsrc_t *rsrcp, size_t *maplen, int *err); 56 static int tavor_umap_cqmem(tavor_state_t *state, devmap_cookie_t dhp, 57 tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err); 58 static int tavor_umap_qpmem(tavor_state_t *state, devmap_cookie_t dhp, 59 tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err); 60 static int tavor_umap_srqmem(tavor_state_t *state, devmap_cookie_t dhp, 61 tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err); 62 static int tavor_devmap_umem_map(devmap_cookie_t dhp, dev_t dev, uint_t flags, 63 offset_t off, size_t len, void **pvtp); 64 static int tavor_devmap_umem_dup(devmap_cookie_t dhp, void *pvtp, 65 devmap_cookie_t new_dhp, void **new_pvtp); 66 static void tavor_devmap_umem_unmap(devmap_cookie_t dhp, void *pvtp, 67 offset_t off, size_t len, devmap_cookie_t new_dhp1, void **pvtp1, 68 devmap_cookie_t new_dhp2, void **pvtp2); 69 static int tavor_devmap_devmem_map(devmap_cookie_t dhp, dev_t dev, uint_t flags, 70 offset_t off, size_t len, void **pvtp); 71 static int tavor_devmap_devmem_dup(devmap_cookie_t dhp, void *pvtp, 72 devmap_cookie_t new_dhp, void **new_pvtp); 73 static void tavor_devmap_devmem_unmap(devmap_cookie_t dhp, void *pvtp, 74 offset_t off, size_t len, devmap_cookie_t new_dhp1, void **pvtp1, 75 devmap_cookie_t new_dhp2, void **pvtp2); 76 static ibt_status_t tavor_umap_mr_data_in(tavor_mrhdl_t mr, 77 ibt_mr_data_in_t *data, size_t data_sz); 78 static ibt_status_t tavor_umap_cq_data_out(tavor_cqhdl_t cq, 79 mlnx_umap_cq_data_out_t *data, size_t data_sz); 80 static ibt_status_t tavor_umap_qp_data_out(tavor_qphdl_t qp, 81 mlnx_umap_qp_data_out_t *data, size_t data_sz); 82 static ibt_status_t tavor_umap_srq_data_out(tavor_srqhdl_t srq, 83 mlnx_umap_srq_data_out_t *data, size_t data_sz); 84 static int tavor_umap_db_compare(const void *query, const void *entry); 85 static ibt_status_t tavor_umap_pd_data_out(tavor_pdhdl_t pd, 86 mlnx_umap_pd_data_out_t *data, size_t data_sz); 87 88 89 /* 90 * These callbacks are passed to devmap_umem_setup() and devmap_devmem_setup(), 91 * respectively. They are used to handle (among other things) partial 92 * unmappings and to provide a method for invalidating mappings inherited 93 * as a result of a fork(2) system call. 94 */ 95 static struct devmap_callback_ctl tavor_devmap_umem_cbops = { 96 DEVMAP_OPS_REV, 97 tavor_devmap_umem_map, 98 NULL, 99 tavor_devmap_umem_dup, 100 tavor_devmap_umem_unmap 101 }; 102 static struct devmap_callback_ctl tavor_devmap_devmem_cbops = { 103 DEVMAP_OPS_REV, 104 tavor_devmap_devmem_map, 105 NULL, 106 tavor_devmap_devmem_dup, 107 tavor_devmap_devmem_unmap 108 }; 109 110 /* 111 * tavor_devmap() 112 * Context: Can be called from user context. 113 */ 114 /* ARGSUSED */ 115 int 116 tavor_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len, 117 size_t *maplen, uint_t model) 118 { 119 tavor_state_t *state; 120 tavor_rsrc_t *rsrcp; 121 minor_t instance; 122 uint64_t key, value; 123 uint_t type; 124 int err, status; 125 126 /* Get Tavor softstate structure from instance */ 127 instance = TAVOR_DEV_INSTANCE(dev); 128 state = ddi_get_soft_state(tavor_statep, instance); 129 if (state == NULL) { 130 return (ENXIO); 131 } 132 133 /* 134 * Access to Tavor devmap interface is not allowed in 135 * "maintenance mode". 136 */ 137 if (state->ts_operational_mode == TAVOR_MAINTENANCE_MODE) { 138 return (EFAULT); 139 } 140 141 /* 142 * The bottom bits of "offset" are undefined (number depends on 143 * system PAGESIZE). Shifting these off leaves us with a "key". 144 * The "key" is actually a combination of both a real key value 145 * (for the purpose of database lookup) and a "type" value. We 146 * extract this information before doing the database lookup. 147 */ 148 key = off >> PAGESHIFT; 149 type = key & MLNX_UMAP_RSRC_TYPE_MASK; 150 key = key >> MLNX_UMAP_RSRC_TYPE_SHIFT; 151 status = tavor_umap_db_find(instance, key, type, &value, 0, NULL); 152 if (status == DDI_SUCCESS) { 153 rsrcp = (tavor_rsrc_t *)(uintptr_t)value; 154 155 switch (type) { 156 case MLNX_UMAP_UARPG_RSRC: 157 /* 158 * Double check that process who open()'d Tavor is 159 * same process attempting to mmap() UAR page. 160 */ 161 if (key != ddi_get_pid()) { 162 return (EINVAL); 163 } 164 165 /* Map the UAR page out for userland access */ 166 status = tavor_umap_uarpg(state, dhp, rsrcp, maplen, 167 &err); 168 if (status != DDI_SUCCESS) { 169 return (err); 170 } 171 break; 172 173 case MLNX_UMAP_CQMEM_RSRC: 174 /* Map the CQ memory out for userland access */ 175 status = tavor_umap_cqmem(state, dhp, rsrcp, off, 176 maplen, &err); 177 if (status != DDI_SUCCESS) { 178 return (err); 179 } 180 break; 181 182 case MLNX_UMAP_QPMEM_RSRC: 183 /* Map the QP memory out for userland access */ 184 status = tavor_umap_qpmem(state, dhp, rsrcp, off, 185 maplen, &err); 186 if (status != DDI_SUCCESS) { 187 return (err); 188 } 189 break; 190 191 case MLNX_UMAP_SRQMEM_RSRC: 192 /* Map the SRQ memory out for userland access */ 193 status = tavor_umap_srqmem(state, dhp, rsrcp, off, 194 maplen, &err); 195 if (status != DDI_SUCCESS) { 196 return (err); 197 } 198 break; 199 200 default: 201 TAVOR_WARNING(state, "unexpected rsrc type in devmap"); 202 return (EINVAL); 203 } 204 } else { 205 return (EINVAL); 206 } 207 208 return (0); 209 } 210 211 212 /* 213 * tavor_umap_uarpg() 214 * Context: Can be called from user context. 215 */ 216 static int 217 tavor_umap_uarpg(tavor_state_t *state, devmap_cookie_t dhp, 218 tavor_rsrc_t *rsrcp, size_t *maplen, int *err) 219 { 220 int status; 221 uint_t maxprot; 222 223 /* Map out the UAR page (doorbell page) */ 224 maxprot = (PROT_READ | PROT_WRITE | PROT_USER); 225 status = devmap_devmem_setup(dhp, state->ts_dip, 226 &tavor_devmap_devmem_cbops, TAVOR_UAR_BAR, (rsrcp->tr_indx << 227 PAGESHIFT), PAGESIZE, maxprot, DEVMAP_ALLOW_REMAP, 228 &state->ts_reg_accattr); 229 if (status < 0) { 230 *err = status; 231 return (DDI_FAILURE); 232 } 233 234 *maplen = PAGESIZE; 235 return (DDI_SUCCESS); 236 } 237 238 239 /* 240 * tavor_umap_cqmem() 241 * Context: Can be called from user context. 242 */ 243 /* ARGSUSED */ 244 static int 245 tavor_umap_cqmem(tavor_state_t *state, devmap_cookie_t dhp, 246 tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err) 247 { 248 tavor_cqhdl_t cq; 249 size_t size; 250 uint_t maxprot; 251 int status; 252 253 /* Extract the Tavor CQ handle pointer from the tavor_rsrc_t */ 254 cq = (tavor_cqhdl_t)rsrcp->tr_addr; 255 256 /* Round-up the CQ size to system page size */ 257 size = ptob(btopr(cq->cq_cqinfo.qa_size)); 258 259 /* Map out the CQ memory */ 260 maxprot = (PROT_READ | PROT_WRITE | PROT_USER); 261 status = devmap_umem_setup(dhp, state->ts_dip, 262 &tavor_devmap_umem_cbops, cq->cq_cqinfo.qa_umemcookie, 0, size, 263 maxprot, (DEVMAP_ALLOW_REMAP | DEVMAP_DEFAULTS), NULL); 264 if (status < 0) { 265 *err = status; 266 return (DDI_FAILURE); 267 } 268 *maplen = size; 269 270 return (DDI_SUCCESS); 271 } 272 273 274 /* 275 * tavor_umap_qpmem() 276 * Context: Can be called from user context. 277 */ 278 /* ARGSUSED */ 279 static int 280 tavor_umap_qpmem(tavor_state_t *state, devmap_cookie_t dhp, 281 tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err) 282 { 283 tavor_qphdl_t qp; 284 offset_t offset; 285 size_t size; 286 uint_t maxprot; 287 int status; 288 289 /* Extract the Tavor QP handle pointer from the tavor_rsrc_t */ 290 qp = (tavor_qphdl_t)rsrcp->tr_addr; 291 292 /* 293 * Calculate the offset of the first work queue (send or recv) into 294 * the memory (ddi_umem_alloc()) allocated previously for the QP. 295 */ 296 offset = (offset_t)((uintptr_t)qp->qp_wqinfo.qa_buf_aligned - 297 (uintptr_t)qp->qp_wqinfo.qa_buf_real); 298 299 /* Round-up the QP work queue sizes to system page size */ 300 size = ptob(btopr(qp->qp_wqinfo.qa_size)); 301 302 /* Map out the QP memory */ 303 maxprot = (PROT_READ | PROT_WRITE | PROT_USER); 304 status = devmap_umem_setup(dhp, state->ts_dip, 305 &tavor_devmap_umem_cbops, qp->qp_wqinfo.qa_umemcookie, offset, 306 size, maxprot, (DEVMAP_ALLOW_REMAP | DEVMAP_DEFAULTS), NULL); 307 if (status < 0) { 308 *err = status; 309 return (DDI_FAILURE); 310 } 311 *maplen = size; 312 313 return (DDI_SUCCESS); 314 } 315 316 317 /* 318 * tavor_umap_srqmem() 319 * Context: Can be called from user context. 320 */ 321 /* ARGSUSED */ 322 static int 323 tavor_umap_srqmem(tavor_state_t *state, devmap_cookie_t dhp, 324 tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err) 325 { 326 tavor_srqhdl_t srq; 327 offset_t offset; 328 size_t size; 329 uint_t maxprot; 330 int status; 331 332 /* Extract the Tavor SRQ handle pointer from the tavor_rsrc_t */ 333 srq = (tavor_srqhdl_t)rsrcp->tr_addr; 334 335 /* 336 * Calculate the offset of the first shared recv queue into the memory 337 * (ddi_umem_alloc()) allocated previously for the SRQ. 338 */ 339 offset = (offset_t)((uintptr_t)srq->srq_wqinfo.qa_buf_aligned - 340 (uintptr_t)srq->srq_wqinfo.qa_buf_real); 341 342 /* Round-up the SRQ work queue sizes to system page size */ 343 size = ptob(btopr(srq->srq_wqinfo.qa_size)); 344 345 /* Map out the QP memory */ 346 maxprot = (PROT_READ | PROT_WRITE | PROT_USER); 347 status = devmap_umem_setup(dhp, state->ts_dip, 348 &tavor_devmap_umem_cbops, srq->srq_wqinfo.qa_umemcookie, offset, 349 size, maxprot, (DEVMAP_ALLOW_REMAP | DEVMAP_DEFAULTS), NULL); 350 if (status < 0) { 351 *err = status; 352 return (DDI_FAILURE); 353 } 354 *maplen = size; 355 356 return (DDI_SUCCESS); 357 } 358 359 360 /* 361 * tavor_devmap_umem_map() 362 * Context: Can be called from kernel context. 363 */ 364 /* ARGSUSED */ 365 static int 366 tavor_devmap_umem_map(devmap_cookie_t dhp, dev_t dev, uint_t flags, 367 offset_t off, size_t len, void **pvtp) 368 { 369 tavor_state_t *state; 370 tavor_devmap_track_t *dvm_track; 371 tavor_cqhdl_t cq; 372 tavor_qphdl_t qp; 373 tavor_srqhdl_t srq; 374 minor_t instance; 375 uint64_t key; 376 uint_t type; 377 378 /* Get Tavor softstate structure from instance */ 379 instance = TAVOR_DEV_INSTANCE(dev); 380 state = ddi_get_soft_state(tavor_statep, instance); 381 if (state == NULL) { 382 return (ENXIO); 383 } 384 385 /* 386 * The bottom bits of "offset" are undefined (number depends on 387 * system PAGESIZE). Shifting these off leaves us with a "key". 388 * The "key" is actually a combination of both a real key value 389 * (for the purpose of database lookup) and a "type" value. Although 390 * we are not going to do any database lookup per se, we do want 391 * to extract the "key" and the "type" (to enable faster lookup of 392 * the appropriate CQ or QP handle). 393 */ 394 key = off >> PAGESHIFT; 395 type = key & MLNX_UMAP_RSRC_TYPE_MASK; 396 key = key >> MLNX_UMAP_RSRC_TYPE_SHIFT; 397 398 /* 399 * Allocate an entry to track the mapping and unmapping (specifically, 400 * partial unmapping) of this resource. 401 */ 402 dvm_track = (tavor_devmap_track_t *)kmem_zalloc( 403 sizeof (tavor_devmap_track_t), KM_SLEEP); 404 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dvm_track)) 405 dvm_track->tdt_offset = off; 406 dvm_track->tdt_state = state; 407 dvm_track->tdt_refcnt = 1; 408 mutex_init(&dvm_track->tdt_lock, NULL, MUTEX_DRIVER, 409 DDI_INTR_PRI(state->ts_intrmsi_pri)); 410 411 /* 412 * Depending of the type of resource that has been mapped out, we 413 * need to update the QP or CQ handle to reflect that it has, in 414 * fact, been mapped. This allows the driver code which frees a QP 415 * or a CQ to know whether it is appropriate to do a 416 * devmap_devmem_remap() to invalidate the userland mapping for the 417 * corresponding queue's memory. 418 */ 419 if (type == MLNX_UMAP_CQMEM_RSRC) { 420 421 /* Use "key" (CQ number) to do fast lookup of CQ handle */ 422 cq = tavor_cqhdl_from_cqnum(state, key); 423 424 /* 425 * Update the handle to the userland mapping. Note: If 426 * the CQ already has a valid userland mapping, then stop 427 * and return failure. 428 */ 429 mutex_enter(&cq->cq_lock); 430 if (cq->cq_umap_dhp == NULL) { 431 cq->cq_umap_dhp = dhp; 432 dvm_track->tdt_size = cq->cq_cqinfo.qa_size; 433 mutex_exit(&cq->cq_lock); 434 } else { 435 mutex_exit(&cq->cq_lock); 436 goto umem_map_fail; 437 } 438 439 } else if (type == MLNX_UMAP_QPMEM_RSRC) { 440 441 /* Use "key" (QP number) to do fast lookup of QP handle */ 442 qp = tavor_qphdl_from_qpnum(state, key); 443 444 /* 445 * Update the handle to the userland mapping. Note: If 446 * the CQ already has a valid userland mapping, then stop 447 * and return failure. 448 */ 449 mutex_enter(&qp->qp_lock); 450 if (qp->qp_umap_dhp == NULL) { 451 qp->qp_umap_dhp = dhp; 452 dvm_track->tdt_size = qp->qp_wqinfo.qa_size; 453 mutex_exit(&qp->qp_lock); 454 } else { 455 mutex_exit(&qp->qp_lock); 456 goto umem_map_fail; 457 } 458 459 } else if (type == MLNX_UMAP_SRQMEM_RSRC) { 460 461 /* Use "key" (SRQ number) to do fast lookup on SRQ handle */ 462 srq = tavor_srqhdl_from_srqnum(state, key); 463 464 /* 465 * Update the handle to the userland mapping. Note: If the 466 * SRQ already has a valid userland mapping, then stop and 467 * return failure. 468 */ 469 mutex_enter(&srq->srq_lock); 470 if (srq->srq_umap_dhp == NULL) { 471 srq->srq_umap_dhp = dhp; 472 dvm_track->tdt_size = srq->srq_wqinfo.qa_size; 473 mutex_exit(&srq->srq_lock); 474 } else { 475 mutex_exit(&srq->srq_lock); 476 goto umem_map_fail; 477 } 478 } 479 480 /* 481 * Pass the private "Tavor devmap tracking structure" back. This 482 * pointer will be returned in subsequent "unmap" callbacks. 483 */ 484 *pvtp = dvm_track; 485 486 return (DDI_SUCCESS); 487 488 umem_map_fail: 489 mutex_destroy(&dvm_track->tdt_lock); 490 kmem_free(dvm_track, sizeof (tavor_devmap_track_t)); 491 return (DDI_FAILURE); 492 } 493 494 495 /* 496 * tavor_devmap_umem_dup() 497 * Context: Can be called from kernel context. 498 */ 499 /* ARGSUSED */ 500 static int 501 tavor_devmap_umem_dup(devmap_cookie_t dhp, void *pvtp, devmap_cookie_t new_dhp, 502 void **new_pvtp) 503 { 504 tavor_state_t *state; 505 tavor_devmap_track_t *dvm_track, *new_dvm_track; 506 uint_t maxprot; 507 int status; 508 509 /* 510 * Extract the Tavor softstate pointer from "Tavor devmap tracking 511 * structure" (in "pvtp"). 512 */ 513 dvm_track = (tavor_devmap_track_t *)pvtp; 514 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dvm_track)) 515 state = dvm_track->tdt_state; 516 517 /* 518 * Since this devmap_dup() entry point is generally called 519 * when a process does fork(2), it is incumbent upon the driver 520 * to insure that the child does not inherit a valid copy of 521 * the parent's QP or CQ resource. This is accomplished by using 522 * devmap_devmem_remap() to invalidate the child's mapping to the 523 * kernel memory. 524 */ 525 maxprot = (PROT_READ | PROT_WRITE | PROT_USER); 526 status = devmap_devmem_remap(new_dhp, state->ts_dip, 0, 0, 527 dvm_track->tdt_size, maxprot, DEVMAP_MAPPING_INVALID, NULL); 528 if (status != DDI_SUCCESS) { 529 TAVOR_WARNING(state, "failed in tavor_devmap_umem_dup()"); 530 return (status); 531 } 532 533 /* 534 * Allocate a new entry to track the subsequent unmapping 535 * (specifically, all partial unmappings) of the child's newly 536 * invalidated resource. Note: Setting the "tdt_size" field to 537 * zero here is an indication to the devmap_unmap() entry point 538 * that this mapping is invalid, and that its subsequent unmapping 539 * should not affect any of the parent's CQ or QP resources. 540 */ 541 new_dvm_track = (tavor_devmap_track_t *)kmem_zalloc( 542 sizeof (tavor_devmap_track_t), KM_SLEEP); 543 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*new_dvm_track)) 544 new_dvm_track->tdt_offset = 0; 545 new_dvm_track->tdt_state = state; 546 new_dvm_track->tdt_refcnt = 1; 547 new_dvm_track->tdt_size = 0; 548 mutex_init(&new_dvm_track->tdt_lock, NULL, MUTEX_DRIVER, 549 DDI_INTR_PRI(state->ts_intrmsi_pri)); 550 *new_pvtp = new_dvm_track; 551 552 return (DDI_SUCCESS); 553 } 554 555 556 /* 557 * tavor_devmap_umem_unmap() 558 * Context: Can be called from kernel context. 559 */ 560 /* ARGSUSED */ 561 static void 562 tavor_devmap_umem_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off, 563 size_t len, devmap_cookie_t new_dhp1, void **pvtp1, 564 devmap_cookie_t new_dhp2, void **pvtp2) 565 { 566 tavor_state_t *state; 567 tavor_rsrc_t *rsrcp; 568 tavor_devmap_track_t *dvm_track; 569 tavor_cqhdl_t cq; 570 tavor_qphdl_t qp; 571 tavor_srqhdl_t srq; 572 uint64_t key, value; 573 uint_t type; 574 uint_t size; 575 int status; 576 577 /* 578 * Extract the Tavor softstate pointer from "Tavor devmap tracking 579 * structure" (in "pvtp"). 580 */ 581 dvm_track = (tavor_devmap_track_t *)pvtp; 582 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dvm_track)) 583 state = dvm_track->tdt_state; 584 585 /* 586 * Extract the "offset" from the "Tavor devmap tracking structure". 587 * Note: The input argument "off" is ignored here because the 588 * Tavor mapping interfaces define a very specific meaning to 589 * each "logical offset". Also extract the "key" and "type" encoded 590 * in the logical offset. 591 */ 592 key = dvm_track->tdt_offset >> PAGESHIFT; 593 type = key & MLNX_UMAP_RSRC_TYPE_MASK; 594 key = key >> MLNX_UMAP_RSRC_TYPE_SHIFT; 595 596 /* 597 * Extract the "size" of the mapping. If this size is determined 598 * to be zero, then it is an indication of a previously invalidated 599 * mapping, and no CQ or QP resources should be affected. 600 */ 601 size = dvm_track->tdt_size; 602 603 /* 604 * If only the "middle portion of a given mapping is being unmapped, 605 * then we are effectively creating one new piece of mapped memory. 606 * (Original region is divided into three pieces of which the middle 607 * piece is being removed. This leaves two pieces. Since we started 608 * with one piece and now have two pieces, we need to increment the 609 * counter in the "Tavor devmap tracking structure". 610 * 611 * If, however, the whole mapped region is being unmapped, then we 612 * have started with one region which we are completely removing. 613 * In this case, we need to decrement the counter in the "Tavor 614 * devmap tracking structure". 615 * 616 * In each of the remaining cases, we will have started with one 617 * mapped region and ended with one (different) region. So no counter 618 * modification is necessary. 619 */ 620 mutex_enter(&dvm_track->tdt_lock); 621 if ((new_dhp1 == NULL) && (new_dhp2 == NULL)) { 622 dvm_track->tdt_refcnt--; 623 } else if ((new_dhp1 != NULL) && (new_dhp2 != NULL)) { 624 dvm_track->tdt_refcnt++; 625 } 626 mutex_exit(&dvm_track->tdt_lock); 627 628 /* 629 * For each of the cases where the region is being divided, then we 630 * need to pass back the "Tavor devmap tracking structure". This way 631 * we get it back when each of the remaining pieces is subsequently 632 * unmapped. 633 */ 634 if (new_dhp1 != NULL) { 635 *pvtp1 = pvtp; 636 } 637 if (new_dhp2 != NULL) { 638 *pvtp2 = pvtp; 639 } 640 641 /* 642 * If the "Tavor devmap tracking structure" is no longer being 643 * referenced, then free it up. Otherwise, return. 644 */ 645 if (dvm_track->tdt_refcnt == 0) { 646 mutex_destroy(&dvm_track->tdt_lock); 647 kmem_free(dvm_track, sizeof (tavor_devmap_track_t)); 648 649 /* 650 * If the mapping was invalid (see explanation above), then 651 * no further processing is necessary. 652 */ 653 if (size == 0) { 654 return; 655 } 656 } else { 657 return; 658 } 659 660 /* 661 * Now that we can guarantee that the user memory is fully unmapped, 662 * we can use the "key" and "type" values to try to find the entry 663 * in the "userland resources database". If it's found, then it 664 * indicates that the queue memory (CQ or QP) has not yet been freed. 665 * In this case, we update the corresponding CQ or QP handle to 666 * indicate that the "devmap_devmem_remap()" call will be unnecessary. 667 * If it's _not_ found, then it indicates that the CQ or QP memory 668 * was, in fact, freed before it was unmapped (thus requiring a 669 * previous invalidation by remapping - which will already have 670 * been done in the free routine). 671 */ 672 status = tavor_umap_db_find(state->ts_instance, key, type, &value, 673 0, NULL); 674 if (status == DDI_SUCCESS) { 675 /* 676 * Depending on the type of the mapped resource (CQ or QP), 677 * update handle to indicate that no invalidation remapping 678 * will be necessary. 679 */ 680 if (type == MLNX_UMAP_CQMEM_RSRC) { 681 682 /* Use "value" to convert to CQ handle */ 683 rsrcp = (tavor_rsrc_t *)(uintptr_t)value; 684 cq = (tavor_cqhdl_t)rsrcp->tr_addr; 685 686 /* 687 * Invalidate the handle to the userland mapping. 688 * Note: We must ensure that the mapping being 689 * unmapped here is the current one for the CQ. It 690 * is possible that it might not be if this CQ has 691 * been resized and the previous CQ memory has not 692 * yet been unmapped. But in that case, because of 693 * the devmap_devmem_remap(), there is no longer any 694 * association between the mapping and the real CQ 695 * kernel memory. 696 */ 697 mutex_enter(&cq->cq_lock); 698 if (cq->cq_umap_dhp == dhp) { 699 cq->cq_umap_dhp = (devmap_cookie_t)NULL; 700 } 701 mutex_exit(&cq->cq_lock); 702 703 } else if (type == MLNX_UMAP_QPMEM_RSRC) { 704 705 /* Use "value" to convert to QP handle */ 706 rsrcp = (tavor_rsrc_t *)(uintptr_t)value; 707 qp = (tavor_qphdl_t)rsrcp->tr_addr; 708 709 /* 710 * Invalidate the handle to the userland mapping. 711 * Note: we ensure that the mapping being unmapped 712 * here is the current one for the QP. This is 713 * more of a sanity check here since, unlike CQs 714 * (above) we do not support resize of QPs. 715 */ 716 mutex_enter(&qp->qp_lock); 717 if (qp->qp_umap_dhp == dhp) { 718 qp->qp_umap_dhp = (devmap_cookie_t)NULL; 719 } 720 mutex_exit(&qp->qp_lock); 721 722 } else if (type == MLNX_UMAP_SRQMEM_RSRC) { 723 724 /* Use "value" to convert to SRQ handle */ 725 rsrcp = (tavor_rsrc_t *)(uintptr_t)value; 726 srq = (tavor_srqhdl_t)rsrcp->tr_addr; 727 728 /* 729 * Invalidate the handle to the userland mapping. 730 * Note: we ensure that the mapping being unmapped 731 * here is the current one for the QP. This is 732 * more of a sanity check here since, unlike CQs 733 * (above) we do not support resize of QPs. 734 */ 735 mutex_enter(&srq->srq_lock); 736 if (srq->srq_umap_dhp == dhp) { 737 srq->srq_umap_dhp = (devmap_cookie_t)NULL; 738 } 739 mutex_exit(&srq->srq_lock); 740 } 741 } 742 } 743 744 745 /* 746 * tavor_devmap_devmem_map() 747 * Context: Can be called from kernel context. 748 */ 749 /* ARGSUSED */ 750 static int 751 tavor_devmap_devmem_map(devmap_cookie_t dhp, dev_t dev, uint_t flags, 752 offset_t off, size_t len, void **pvtp) 753 { 754 tavor_state_t *state; 755 tavor_devmap_track_t *dvm_track; 756 minor_t instance; 757 758 /* Get Tavor softstate structure from instance */ 759 instance = TAVOR_DEV_INSTANCE(dev); 760 state = ddi_get_soft_state(tavor_statep, instance); 761 if (state == NULL) { 762 return (ENXIO); 763 } 764 765 /* 766 * Allocate an entry to track the mapping and unmapping of this 767 * resource. Note: We don't need to initialize the "refcnt" or 768 * "offset" fields here, nor do we need to initialize the mutex 769 * used with the "refcnt". Since UAR pages are single pages, they 770 * are not subject to "partial" unmappings. This makes these other 771 * fields unnecessary. 772 */ 773 dvm_track = (tavor_devmap_track_t *)kmem_zalloc( 774 sizeof (tavor_devmap_track_t), KM_SLEEP); 775 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dvm_track)) 776 dvm_track->tdt_state = state; 777 dvm_track->tdt_size = PAGESIZE; 778 779 /* 780 * Pass the private "Tavor devmap tracking structure" back. This 781 * pointer will be returned in a subsequent "unmap" callback. 782 */ 783 *pvtp = dvm_track; 784 785 return (DDI_SUCCESS); 786 } 787 788 789 /* 790 * tavor_devmap_devmem_dup() 791 * Context: Can be called from kernel context. 792 */ 793 /* ARGSUSED */ 794 static int 795 tavor_devmap_devmem_dup(devmap_cookie_t dhp, void *pvtp, 796 devmap_cookie_t new_dhp, void **new_pvtp) 797 { 798 tavor_state_t *state; 799 tavor_devmap_track_t *dvm_track; 800 uint_t maxprot; 801 int status; 802 803 /* 804 * Extract the Tavor softstate pointer from "Tavor devmap tracking 805 * structure" (in "pvtp"). Note: If the tracking structure is NULL 806 * here, it means that the mapping corresponds to an invalid mapping. 807 * In this case, it can be safely ignored ("new_pvtp" set to NULL). 808 */ 809 dvm_track = (tavor_devmap_track_t *)pvtp; 810 if (dvm_track == NULL) { 811 *new_pvtp = NULL; 812 return (DDI_SUCCESS); 813 } 814 815 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dvm_track)) 816 state = dvm_track->tdt_state; 817 818 /* 819 * Since this devmap_dup() entry point is generally called 820 * when a process does fork(2), it is incumbent upon the driver 821 * to insure that the child does not inherit a valid copy of 822 * the parent's resource. This is accomplished by using 823 * devmap_devmem_remap() to invalidate the child's mapping to the 824 * kernel memory. 825 */ 826 maxprot = (PROT_READ | PROT_WRITE | PROT_USER); 827 status = devmap_devmem_remap(new_dhp, state->ts_dip, 0, 0, 828 dvm_track->tdt_size, maxprot, DEVMAP_MAPPING_INVALID, NULL); 829 if (status != DDI_SUCCESS) { 830 TAVOR_WARNING(state, "failed in tavor_devmap_devmem_dup()"); 831 return (status); 832 } 833 834 /* 835 * Since the region is invalid, there is no need for us to 836 * allocate and continue to track an additional "Tavor devmap 837 * tracking structure". Instead we return NULL here, which is an 838 * indication to the devmap_unmap() entry point that this entry 839 * can be safely ignored. 840 */ 841 *new_pvtp = NULL; 842 843 return (DDI_SUCCESS); 844 } 845 846 847 /* 848 * tavor_devmap_devmem_unmap() 849 * Context: Can be called from kernel context. 850 */ 851 /* ARGSUSED */ 852 static void 853 tavor_devmap_devmem_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off, 854 size_t len, devmap_cookie_t new_dhp1, void **pvtp1, 855 devmap_cookie_t new_dhp2, void **pvtp2) 856 { 857 tavor_devmap_track_t *dvm_track; 858 859 /* 860 * Free up the "Tavor devmap tracking structure" (in "pvtp"). 861 * There cannot be "partial" unmappings here because all UAR pages 862 * are single pages. Note: If the tracking structure is NULL here, 863 * it means that the mapping corresponds to an invalid mapping. In 864 * this case, it can be safely ignored. 865 */ 866 dvm_track = (tavor_devmap_track_t *)pvtp; 867 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dvm_track)) 868 if (dvm_track == NULL) { 869 return; 870 } 871 872 kmem_free(dvm_track, sizeof (tavor_devmap_track_t)); 873 } 874 875 876 /* 877 * tavor_umap_ci_data_in() 878 * Context: Can be called from user or kernel context. 879 */ 880 /* ARGSUSED */ 881 ibt_status_t 882 tavor_umap_ci_data_in(tavor_state_t *state, ibt_ci_data_flags_t flags, 883 ibt_object_type_t object, void *hdl, void *data_p, size_t data_sz) 884 { 885 int status; 886 887 /* 888 * Depending on the type of object about which additional information 889 * is being provided (currently only MR is supported), we call the 890 * appropriate resource-specific function. 891 */ 892 switch (object) { 893 case IBT_HDL_MR: 894 status = tavor_umap_mr_data_in((tavor_mrhdl_t)hdl, 895 (ibt_mr_data_in_t *)data_p, data_sz); 896 if (status != DDI_SUCCESS) { 897 return (status); 898 } 899 break; 900 901 /* 902 * For other possible valid IBT types, we return IBT_NOT_SUPPORTED, 903 * since the Tavor driver does not support these. 904 */ 905 case IBT_HDL_HCA: 906 case IBT_HDL_QP: 907 case IBT_HDL_CQ: 908 case IBT_HDL_PD: 909 case IBT_HDL_MW: 910 case IBT_HDL_AH: 911 case IBT_HDL_SCHED: 912 case IBT_HDL_EEC: 913 case IBT_HDL_RDD: 914 case IBT_HDL_SRQ: 915 return (IBT_NOT_SUPPORTED); 916 917 /* 918 * Any other types are invalid. 919 */ 920 default: 921 return (IBT_INVALID_PARAM); 922 } 923 924 return (DDI_SUCCESS); 925 } 926 927 928 /* 929 * tavor_umap_mr_data_in() 930 * Context: Can be called from user or kernel context. 931 */ 932 static ibt_status_t 933 tavor_umap_mr_data_in(tavor_mrhdl_t mr, ibt_mr_data_in_t *data, 934 size_t data_sz) 935 { 936 if (data->mr_rev != IBT_MR_DATA_IN_IF_VERSION) { 937 return (IBT_NOT_SUPPORTED); 938 } 939 940 /* Check for valid MR handle pointer */ 941 if (mr == NULL) { 942 return (IBT_MR_HDL_INVALID); 943 } 944 945 /* Check for valid MR input structure size */ 946 if (data_sz < sizeof (ibt_mr_data_in_t)) { 947 return (IBT_INSUFF_RESOURCE); 948 } 949 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*data)) 950 951 /* 952 * Ensure that the MR corresponds to userland memory and that it is 953 * a currently valid memory region as well. 954 */ 955 mutex_enter(&mr->mr_lock); 956 if ((mr->mr_is_umem == 0) || (mr->mr_umemcookie == NULL)) { 957 mutex_exit(&mr->mr_lock); 958 return (IBT_MR_HDL_INVALID); 959 } 960 961 /* 962 * If it has passed all the above checks, then extract the callback 963 * function and argument from the input structure. Copy them into 964 * the MR handle. This function will be called only if the memory 965 * corresponding to the MR handle gets a umem_lockmemory() callback. 966 */ 967 mr->mr_umem_cbfunc = data->mr_func; 968 mr->mr_umem_cbarg1 = data->mr_arg1; 969 mr->mr_umem_cbarg2 = data->mr_arg2; 970 mutex_exit(&mr->mr_lock); 971 972 return (DDI_SUCCESS); 973 } 974 975 976 /* 977 * tavor_umap_ci_data_out() 978 * Context: Can be called from user or kernel context. 979 */ 980 /* ARGSUSED */ 981 ibt_status_t 982 tavor_umap_ci_data_out(tavor_state_t *state, ibt_ci_data_flags_t flags, 983 ibt_object_type_t object, void *hdl, void *data_p, size_t data_sz) 984 { 985 int status; 986 987 /* 988 * Depending on the type of object about which additional information 989 * is being requested (CQ or QP), we call the appropriate resource- 990 * specific mapping function. 991 */ 992 switch (object) { 993 case IBT_HDL_CQ: 994 status = tavor_umap_cq_data_out((tavor_cqhdl_t)hdl, 995 (mlnx_umap_cq_data_out_t *)data_p, data_sz); 996 if (status != DDI_SUCCESS) { 997 return (status); 998 } 999 break; 1000 1001 case IBT_HDL_QP: 1002 status = tavor_umap_qp_data_out((tavor_qphdl_t)hdl, 1003 (mlnx_umap_qp_data_out_t *)data_p, data_sz); 1004 if (status != DDI_SUCCESS) { 1005 return (status); 1006 } 1007 break; 1008 1009 case IBT_HDL_SRQ: 1010 status = tavor_umap_srq_data_out((tavor_srqhdl_t)hdl, 1011 (mlnx_umap_srq_data_out_t *)data_p, data_sz); 1012 if (status != DDI_SUCCESS) { 1013 return (status); 1014 } 1015 break; 1016 1017 /* 1018 * For other possible valid IBT types, we return IBT_NOT_SUPPORTED, 1019 * since the Tavor driver does not support these. 1020 */ 1021 case IBT_HDL_PD: 1022 status = tavor_umap_pd_data_out((tavor_pdhdl_t)hdl, 1023 (mlnx_umap_pd_data_out_t *)data_p, data_sz); 1024 if (status != DDI_SUCCESS) { 1025 return (status); 1026 } 1027 break; 1028 1029 case IBT_HDL_HCA: 1030 case IBT_HDL_MR: 1031 case IBT_HDL_MW: 1032 case IBT_HDL_AH: 1033 case IBT_HDL_SCHED: 1034 case IBT_HDL_EEC: 1035 case IBT_HDL_RDD: 1036 return (IBT_NOT_SUPPORTED); 1037 1038 /* 1039 * Any other types are invalid. 1040 */ 1041 default: 1042 return (IBT_INVALID_PARAM); 1043 } 1044 1045 return (DDI_SUCCESS); 1046 } 1047 1048 1049 /* 1050 * tavor_umap_cq_data_out() 1051 * Context: Can be called from user or kernel context. 1052 */ 1053 static ibt_status_t 1054 tavor_umap_cq_data_out(tavor_cqhdl_t cq, mlnx_umap_cq_data_out_t *data, 1055 size_t data_sz) 1056 { 1057 /* Check for valid CQ handle pointer */ 1058 if (cq == NULL) { 1059 return (IBT_CQ_HDL_INVALID); 1060 } 1061 1062 /* Check for valid CQ mapping structure size */ 1063 if (data_sz < sizeof (mlnx_umap_cq_data_out_t)) { 1064 return (IBT_INSUFF_RESOURCE); 1065 } 1066 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*data)) 1067 1068 /* 1069 * If it has passed all the above checks, then fill in all the useful 1070 * mapping information (including the mapping offset that will be 1071 * passed back to the devmap() interface during a subsequent mmap() 1072 * call. 1073 * 1074 * The "offset" for CQ mmap()'s looks like this: 1075 * +----------------------------------------+--------+--------------+ 1076 * | CQ Number | 0x33 | Reserved (0) | 1077 * +----------------------------------------+--------+--------------+ 1078 * (64 - 8 - PAGESHIFT) bits 8 bits PAGESHIFT bits 1079 * 1080 * This returns information about the mapping offset, the length of 1081 * the CQ memory, the CQ number (for use in later CQ doorbells), the 1082 * number of CQEs the CQ memory can hold, and the size of each CQE. 1083 */ 1084 data->mcq_rev = MLNX_UMAP_IF_VERSION; 1085 data->mcq_mapoffset = ((((uint64_t)cq->cq_cqnum << 1086 MLNX_UMAP_RSRC_TYPE_SHIFT) | MLNX_UMAP_CQMEM_RSRC) << PAGESHIFT); 1087 data->mcq_maplen = cq->cq_cqinfo.qa_size; 1088 data->mcq_cqnum = cq->cq_cqnum; 1089 data->mcq_numcqe = cq->cq_bufsz; 1090 data->mcq_cqesz = sizeof (tavor_hw_cqe_t); 1091 1092 return (DDI_SUCCESS); 1093 } 1094 1095 1096 /* 1097 * tavor_umap_qp_data_out() 1098 * Context: Can be called from user or kernel context. 1099 */ 1100 static ibt_status_t 1101 tavor_umap_qp_data_out(tavor_qphdl_t qp, mlnx_umap_qp_data_out_t *data, 1102 size_t data_sz) 1103 { 1104 /* Check for valid QP handle pointer */ 1105 if (qp == NULL) { 1106 return (IBT_QP_HDL_INVALID); 1107 } 1108 1109 /* Check for valid QP mapping structure size */ 1110 if (data_sz < sizeof (mlnx_umap_qp_data_out_t)) { 1111 return (IBT_INSUFF_RESOURCE); 1112 } 1113 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*data)) 1114 1115 /* 1116 * If it has passed all the checks, then fill in all the useful 1117 * mapping information (including the mapping offset that will be 1118 * passed back to the devmap() interface during a subsequent mmap() 1119 * call. 1120 * 1121 * The "offset" for QP mmap()'s looks like this: 1122 * +----------------------------------------+--------+--------------+ 1123 * | QP Number | 0x44 | Reserved (0) | 1124 * +----------------------------------------+--------+--------------+ 1125 * (64 - 8 - PAGESHIFT) bits 8 bits PAGESHIFT bits 1126 * 1127 * This returns information about the mapping offset, the length of 1128 * the QP memory, and the QP number (for use in later send and recv 1129 * doorbells). It also returns the following information for both 1130 * the receive work queue and the send work queue, respectively: the 1131 * offset (from the base mapped address) of the start of the given 1132 * work queue, the 64-bit IB virtual address that corresponds to 1133 * the base mapped address (needed for posting WQEs though the 1134 * QP doorbells), the number of WQEs the given work queue can hold, 1135 * and the size of each WQE for the given work queue. 1136 */ 1137 data->mqp_rev = MLNX_UMAP_IF_VERSION; 1138 data->mqp_mapoffset = ((((uint64_t)qp->qp_qpnum << 1139 MLNX_UMAP_RSRC_TYPE_SHIFT) | MLNX_UMAP_QPMEM_RSRC) << PAGESHIFT); 1140 data->mqp_maplen = qp->qp_wqinfo.qa_size; 1141 data->mqp_qpnum = qp->qp_qpnum; 1142 1143 /* 1144 * If this QP is associated with a shared receive queue (SRQ), 1145 * then return invalid RecvQ parameters. Otherwise, return 1146 * the proper parameter values. 1147 */ 1148 if (qp->qp_srq_en == TAVOR_QP_SRQ_ENABLED) { 1149 data->mqp_rq_off = (uint32_t)qp->qp_wqinfo.qa_size; 1150 data->mqp_rq_desc_addr = (uint32_t)qp->qp_wqinfo.qa_size; 1151 data->mqp_rq_numwqe = 0; 1152 data->mqp_rq_wqesz = 0; 1153 } else { 1154 data->mqp_rq_off = (uintptr_t)qp->qp_rq_buf - 1155 (uintptr_t)qp->qp_wqinfo.qa_buf_aligned; 1156 data->mqp_rq_desc_addr = (uint32_t)((uintptr_t)qp->qp_rq_buf - 1157 qp->qp_desc_off); 1158 data->mqp_rq_numwqe = qp->qp_rq_bufsz; 1159 data->mqp_rq_wqesz = (1 << qp->qp_rq_log_wqesz); 1160 } 1161 data->mqp_sq_off = (uintptr_t)qp->qp_sq_buf - 1162 (uintptr_t)qp->qp_wqinfo.qa_buf_aligned; 1163 data->mqp_sq_desc_addr = (uint32_t)((uintptr_t)qp->qp_sq_buf - 1164 qp->qp_desc_off); 1165 data->mqp_sq_numwqe = qp->qp_sq_bufsz; 1166 data->mqp_sq_wqesz = (1 << qp->qp_sq_log_wqesz); 1167 1168 return (DDI_SUCCESS); 1169 } 1170 1171 1172 /* 1173 * tavor_umap_srq_data_out() 1174 * Context: Can be called from user or kernel context. 1175 */ 1176 static ibt_status_t 1177 tavor_umap_srq_data_out(tavor_srqhdl_t srq, mlnx_umap_srq_data_out_t *data, 1178 size_t data_sz) 1179 { 1180 /* Check for valid SRQ handle pointer */ 1181 if (srq == NULL) { 1182 return (IBT_SRQ_HDL_INVALID); 1183 } 1184 1185 /* Check for valid SRQ mapping structure size */ 1186 if (data_sz < sizeof (mlnx_umap_srq_data_out_t)) { 1187 return (IBT_INSUFF_RESOURCE); 1188 } 1189 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*data)) 1190 1191 /* 1192 * If it has passed all the checks, then fill in all the useful 1193 * mapping information (including the mapping offset that will be 1194 * passed back to the devmap() interface during a subsequent mmap() 1195 * call. 1196 * 1197 * The "offset" for SRQ mmap()'s looks like this: 1198 * +----------------------------------------+--------+--------------+ 1199 * | SRQ Number | 0x66 | Reserved (0) | 1200 * +----------------------------------------+--------+--------------+ 1201 * (64 - 8 - PAGESHIFT) bits 8 bits PAGESHIFT bits 1202 * 1203 * This returns information about the mapping offset, the length of the 1204 * SRQ memory, and the SRQ number (for use in later send and recv 1205 * doorbells). It also returns the following information for the 1206 * shared receive queue: the offset (from the base mapped address) of 1207 * the start of the given work queue, the 64-bit IB virtual address 1208 * that corresponds to the base mapped address (needed for posting WQEs 1209 * though the QP doorbells), the number of WQEs the given work queue 1210 * can hold, and the size of each WQE for the given work queue. 1211 */ 1212 data->msrq_rev = MLNX_UMAP_IF_VERSION; 1213 data->msrq_mapoffset = ((((uint64_t)srq->srq_srqnum << 1214 MLNX_UMAP_RSRC_TYPE_SHIFT) | MLNX_UMAP_SRQMEM_RSRC) << PAGESHIFT); 1215 data->msrq_maplen = srq->srq_wqinfo.qa_size; 1216 data->msrq_srqnum = srq->srq_srqnum; 1217 1218 data->msrq_desc_addr = (uint32_t)((uintptr_t)srq->srq_wq_buf - 1219 srq->srq_desc_off); 1220 data->msrq_numwqe = srq->srq_wq_bufsz; 1221 data->msrq_wqesz = (1 << srq->srq_wq_log_wqesz); 1222 1223 return (DDI_SUCCESS); 1224 } 1225 1226 /* 1227 * tavor_umap_pd_data_out() 1228 * Context: Can be called from user or kernel context. 1229 */ 1230 static ibt_status_t 1231 tavor_umap_pd_data_out(tavor_pdhdl_t pd, mlnx_umap_pd_data_out_t *data, 1232 size_t data_sz) 1233 { 1234 /* Check for valid PD handle pointer */ 1235 if (pd == NULL) { 1236 return (IBT_PD_HDL_INVALID); 1237 } 1238 1239 /* Check for valid PD mapping structure size */ 1240 if (data_sz < sizeof (mlnx_umap_pd_data_out_t)) { 1241 return (IBT_INSUFF_RESOURCE); 1242 } 1243 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*data)) 1244 1245 /* 1246 * If it has passed all the checks, then fill the PD table index 1247 * (the PD table allocated index for the PD pd_pdnum) 1248 */ 1249 data->mpd_rev = MLNX_UMAP_IF_VERSION; 1250 data->mpd_pdnum = pd->pd_pdnum; 1251 1252 return (DDI_SUCCESS); 1253 } 1254 1255 /* 1256 * tavor_umap_db_init() 1257 * Context: Only called from attach() path context 1258 */ 1259 void 1260 tavor_umap_db_init(void) 1261 { 1262 /* 1263 * Initialize the lock used by the Tavor "userland resources database" 1264 * This is used to ensure atomic access to add, remove, and find 1265 * entries in the database. 1266 */ 1267 mutex_init(&tavor_userland_rsrc_db.tdl_umapdb_lock, NULL, 1268 MUTEX_DRIVER, NULL); 1269 1270 /* 1271 * Initialize the AVL tree used for the "userland resources 1272 * database". Using an AVL tree here provides the ability to 1273 * scale the database size to large numbers of resources. The 1274 * entries in the tree are "tavor_umap_db_entry_t". 1275 * The tree is searched with the help of the 1276 * tavor_umap_db_compare() routine. 1277 */ 1278 avl_create(&tavor_userland_rsrc_db.tdl_umapdb_avl, 1279 tavor_umap_db_compare, sizeof (tavor_umap_db_entry_t), 1280 offsetof(tavor_umap_db_entry_t, tdbe_avlnode)); 1281 } 1282 1283 1284 /* 1285 * tavor_umap_db_fini() 1286 * Context: Only called from attach() and/or detach() path contexts 1287 */ 1288 void 1289 tavor_umap_db_fini(void) 1290 { 1291 /* Destroy the AVL tree for the "userland resources database" */ 1292 avl_destroy(&tavor_userland_rsrc_db.tdl_umapdb_avl); 1293 1294 /* Destroy the lock for the "userland resources database" */ 1295 mutex_destroy(&tavor_userland_rsrc_db.tdl_umapdb_lock); 1296 } 1297 1298 1299 /* 1300 * tavor_umap_db_alloc() 1301 * Context: Can be called from user or kernel context. 1302 */ 1303 tavor_umap_db_entry_t * 1304 tavor_umap_db_alloc(uint_t instance, uint64_t key, uint_t type, uint64_t value) 1305 { 1306 tavor_umap_db_entry_t *umapdb; 1307 1308 /* Allocate an entry to add to the "userland resources database" */ 1309 umapdb = kmem_zalloc(sizeof (tavor_umap_db_entry_t), KM_NOSLEEP); 1310 if (umapdb == NULL) { 1311 return (NULL); 1312 } 1313 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*umapdb)) 1314 1315 /* Fill in the fields in the database entry */ 1316 umapdb->tdbe_common.tdb_instance = instance; 1317 umapdb->tdbe_common.tdb_type = type; 1318 umapdb->tdbe_common.tdb_key = key; 1319 umapdb->tdbe_common.tdb_value = value; 1320 1321 return (umapdb); 1322 } 1323 1324 1325 /* 1326 * tavor_umap_db_free() 1327 * Context: Can be called from user or kernel context. 1328 */ 1329 void 1330 tavor_umap_db_free(tavor_umap_db_entry_t *umapdb) 1331 { 1332 /* Free the database entry */ 1333 kmem_free(umapdb, sizeof (tavor_umap_db_entry_t)); 1334 } 1335 1336 1337 /* 1338 * tavor_umap_db_add() 1339 * Context: Can be called from user or kernel context. 1340 */ 1341 void 1342 tavor_umap_db_add(tavor_umap_db_entry_t *umapdb) 1343 { 1344 mutex_enter(&tavor_userland_rsrc_db.tdl_umapdb_lock); 1345 tavor_umap_db_add_nolock(umapdb); 1346 mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock); 1347 } 1348 1349 1350 /* 1351 * tavor_umap_db_add_nolock() 1352 * Context: Can be called from user or kernel context. 1353 */ 1354 void 1355 tavor_umap_db_add_nolock(tavor_umap_db_entry_t *umapdb) 1356 { 1357 tavor_umap_db_query_t query; 1358 avl_index_t where; 1359 1360 ASSERT(MUTEX_HELD(&tavor_userland_rsrc_db.tdl_umapdb_lock)); 1361 1362 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*umapdb)) 1363 1364 /* 1365 * Copy the common portion of the "to-be-added" database entry 1366 * into the "tavor_umap_db_query_t" structure. We use this structure 1367 * (with no flags set) to find the appropriate location in the 1368 * "userland resources database" for the new entry to be added. 1369 * 1370 * Note: we expect that this entry should not be found in the 1371 * database (unless something bad has happened). 1372 */ 1373 query.tqdb_common = umapdb->tdbe_common; 1374 query.tqdb_flags = 0; 1375 (void) avl_find(&tavor_userland_rsrc_db.tdl_umapdb_avl, &query, 1376 &where); 1377 1378 /* 1379 * Now, using the "where" field from the avl_find() operation 1380 * above, we will insert the new database entry ("umapdb"). 1381 */ 1382 avl_insert(&tavor_userland_rsrc_db.tdl_umapdb_avl, umapdb, 1383 where); 1384 } 1385 1386 1387 /* 1388 * tavor_umap_db_find() 1389 * Context: Can be called from user or kernel context. 1390 */ 1391 int 1392 tavor_umap_db_find(uint_t instance, uint64_t key, uint_t type, 1393 uint64_t *value, uint_t flag, tavor_umap_db_entry_t **umapdb) 1394 { 1395 int status; 1396 1397 mutex_enter(&tavor_userland_rsrc_db.tdl_umapdb_lock); 1398 status = tavor_umap_db_find_nolock(instance, key, type, value, flag, 1399 umapdb); 1400 mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock); 1401 1402 return (status); 1403 } 1404 1405 1406 /* 1407 * tavor_umap_db_find_nolock() 1408 * Context: Can be called from user or kernel context. 1409 */ 1410 int 1411 tavor_umap_db_find_nolock(uint_t instance, uint64_t key, uint_t type, 1412 uint64_t *value, uint_t flags, tavor_umap_db_entry_t **umapdb) 1413 { 1414 tavor_umap_db_query_t query; 1415 tavor_umap_db_entry_t *entry; 1416 avl_index_t where; 1417 1418 ASSERT(MUTEX_HELD(&tavor_userland_rsrc_db.tdl_umapdb_lock)); 1419 1420 /* 1421 * Fill in key, type, instance, and flags values of the 1422 * tavor_umap_db_query_t in preparation for the database 1423 * lookup. 1424 */ 1425 query.tqdb_flags = flags; 1426 query.tqdb_common.tdb_key = key; 1427 query.tqdb_common.tdb_type = type; 1428 query.tqdb_common.tdb_instance = instance; 1429 1430 /* 1431 * Perform the database query. If no entry is found, then 1432 * return failure, else continue. 1433 */ 1434 entry = (tavor_umap_db_entry_t *)avl_find( 1435 &tavor_userland_rsrc_db.tdl_umapdb_avl, &query, &where); 1436 if (entry == NULL) { 1437 return (DDI_FAILURE); 1438 } 1439 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*entry)) 1440 1441 /* 1442 * If the flags argument specifies that the entry should 1443 * be removed if found, then call avl_remove() to remove 1444 * the entry from the database. 1445 */ 1446 if (flags & TAVOR_UMAP_DB_REMOVE) { 1447 1448 avl_remove(&tavor_userland_rsrc_db.tdl_umapdb_avl, entry); 1449 1450 /* 1451 * The database entry is returned with the expectation 1452 * that the caller will use tavor_umap_db_free() to 1453 * free the entry's memory. ASSERT that this is non-NULL. 1454 * NULL pointer should never be passed for the 1455 * TAVOR_UMAP_DB_REMOVE case. 1456 */ 1457 ASSERT(umapdb != NULL); 1458 } 1459 1460 /* 1461 * If the caller would like visibility to the database entry 1462 * (indicated through the use of a non-NULL "umapdb" argument), 1463 * then fill it in. 1464 */ 1465 if (umapdb != NULL) { 1466 *umapdb = entry; 1467 } 1468 1469 /* Extract value field from database entry and return success */ 1470 *value = entry->tdbe_common.tdb_value; 1471 1472 return (DDI_SUCCESS); 1473 } 1474 1475 1476 /* 1477 * tavor_umap_umemlock_cb() 1478 * Context: Can be called from callback context. 1479 */ 1480 void 1481 tavor_umap_umemlock_cb(ddi_umem_cookie_t *umem_cookie) 1482 { 1483 tavor_umap_db_entry_t *umapdb; 1484 tavor_state_t *state; 1485 tavor_rsrc_t *rsrcp; 1486 tavor_mrhdl_t mr; 1487 uint64_t value; 1488 uint_t instance; 1489 int status; 1490 void (*mr_callback)(void *, void *); 1491 void *mr_cbarg1, *mr_cbarg2; 1492 1493 /* 1494 * If this was userland memory, then we need to remove its entry 1495 * from the "userland resources database". Note: We use the 1496 * TAVOR_UMAP_DB_IGNORE_INSTANCE flag here because we don't know 1497 * which instance was used when the entry was added (but we want 1498 * to know after the entry is found using the other search criteria). 1499 */ 1500 status = tavor_umap_db_find(0, (uint64_t)(uintptr_t)umem_cookie, 1501 MLNX_UMAP_MRMEM_RSRC, &value, (TAVOR_UMAP_DB_REMOVE | 1502 TAVOR_UMAP_DB_IGNORE_INSTANCE), &umapdb); 1503 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*umapdb)) 1504 if (status == DDI_SUCCESS) { 1505 instance = umapdb->tdbe_common.tdb_instance; 1506 state = ddi_get_soft_state(tavor_statep, instance); 1507 if (state == NULL) { 1508 cmn_err(CE_WARN, "Unable to match Tavor instance\n"); 1509 return; 1510 } 1511 1512 /* Free the database entry */ 1513 tavor_umap_db_free(umapdb); 1514 1515 /* Use "value" to convert to an MR handle */ 1516 rsrcp = (tavor_rsrc_t *)(uintptr_t)value; 1517 mr = (tavor_mrhdl_t)rsrcp->tr_addr; 1518 1519 /* 1520 * If a callback has been provided, call it first. This 1521 * callback is expected to do any cleanup necessary to 1522 * guarantee that the subsequent MR deregister (below) 1523 * will succeed. Specifically, this means freeing up memory 1524 * windows which might have been associated with the MR. 1525 */ 1526 mutex_enter(&mr->mr_lock); 1527 mr_callback = mr->mr_umem_cbfunc; 1528 mr_cbarg1 = mr->mr_umem_cbarg1; 1529 mr_cbarg2 = mr->mr_umem_cbarg2; 1530 mutex_exit(&mr->mr_lock); 1531 if (mr_callback != NULL) { 1532 mr_callback(mr_cbarg1, mr_cbarg2); 1533 } 1534 1535 /* 1536 * Then call tavor_mr_deregister() to release the resources 1537 * associated with the MR handle. Note: Because this routine 1538 * will also check for whether the ddi_umem_cookie_t is in the 1539 * database, it will take responsibility for disabling the 1540 * memory region and calling ddi_umem_unlock(). 1541 */ 1542 status = tavor_mr_deregister(state, &mr, TAVOR_MR_DEREG_ALL, 1543 TAVOR_SLEEP); 1544 if (status != DDI_SUCCESS) { 1545 TAVOR_WARNING(state, "Unexpected failure in " 1546 "deregister from callback\n"); 1547 } 1548 } 1549 } 1550 1551 1552 /* 1553 * tavor_umap_db_compare() 1554 * Context: Can be called from user or kernel context. 1555 */ 1556 static int 1557 tavor_umap_db_compare(const void *q, const void *e) 1558 { 1559 tavor_umap_db_common_t *entry_common, *query_common; 1560 uint_t query_flags; 1561 1562 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*((tavor_umap_db_query_t *)q))) 1563 1564 entry_common = &((tavor_umap_db_entry_t *)e)->tdbe_common; 1565 query_common = &((tavor_umap_db_query_t *)q)->tqdb_common; 1566 query_flags = ((tavor_umap_db_query_t *)q)->tqdb_flags; 1567 1568 /* 1569 * The first comparison is done on the "key" value in "query" 1570 * and "entry". If they are not equal, then the appropriate 1571 * search direction is returned. Else, we continue by 1572 * comparing "type". 1573 */ 1574 if (query_common->tdb_key < entry_common->tdb_key) { 1575 return (-1); 1576 } else if (query_common->tdb_key > entry_common->tdb_key) { 1577 return (+1); 1578 } 1579 1580 /* 1581 * If the search reaches this point, then "query" and "entry" 1582 * have equal key values. So we continue be comparing their 1583 * "type" values. Again, if they are not equal, then the 1584 * appropriate search direction is returned. Else, we continue 1585 * by comparing "instance". 1586 */ 1587 if (query_common->tdb_type < entry_common->tdb_type) { 1588 return (-1); 1589 } else if (query_common->tdb_type > entry_common->tdb_type) { 1590 return (+1); 1591 } 1592 1593 /* 1594 * If the search reaches this point, then "query" and "entry" 1595 * have exactly the same key and type values. Now we consult 1596 * the "flags" field in the query to determine whether the 1597 * "instance" is relevant to the search. If the 1598 * TAVOR_UMAP_DB_IGNORE_INSTANCE flags is set, then return 1599 * success (0) here. Otherwise, continue the search by comparing 1600 * instance values and returning the appropriate search direction. 1601 */ 1602 if (query_flags & TAVOR_UMAP_DB_IGNORE_INSTANCE) { 1603 return (0); 1604 } 1605 1606 /* 1607 * If the search has reached this point, then "query" and "entry" 1608 * can only be differentiated by their instance values. If these 1609 * are not equal, then return the appropriate search direction. 1610 * Else, we return success (0). 1611 */ 1612 if (query_common->tdb_instance < entry_common->tdb_instance) { 1613 return (-1); 1614 } else if (query_common->tdb_instance > entry_common->tdb_instance) { 1615 return (+1); 1616 } 1617 1618 /* Everything matches... so return success */ 1619 return (0); 1620 } 1621 1622 1623 /* 1624 * tavor_umap_db_set_onclose_cb() 1625 * Context: Can be called from user or kernel context. 1626 */ 1627 int 1628 tavor_umap_db_set_onclose_cb(dev_t dev, uint64_t flag, 1629 void (*callback)(void *), void *arg) 1630 { 1631 tavor_umap_db_priv_t *priv; 1632 tavor_umap_db_entry_t *umapdb; 1633 minor_t instance; 1634 uint64_t value; 1635 int status; 1636 1637 instance = TAVOR_DEV_INSTANCE(dev); 1638 if (instance == -1) { 1639 return (DDI_FAILURE); 1640 } 1641 1642 if (flag != TAVOR_ONCLOSE_FLASH_INPROGRESS) { 1643 return (DDI_FAILURE); 1644 } 1645 1646 /* 1647 * Grab the lock for the "userland resources database" and find 1648 * the entry corresponding to this minor number. Once it's found, 1649 * allocate (if necessary) and add an entry (in the "tdb_priv" 1650 * field) to indicate that further processing may be needed during 1651 * Tavor's close() handling. 1652 */ 1653 mutex_enter(&tavor_userland_rsrc_db.tdl_umapdb_lock); 1654 status = tavor_umap_db_find_nolock(instance, dev, 1655 MLNX_UMAP_PID_RSRC, &value, 0, &umapdb); 1656 if (status != DDI_SUCCESS) { 1657 mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock); 1658 return (DDI_FAILURE); 1659 } 1660 1661 priv = (tavor_umap_db_priv_t *)umapdb->tdbe_common.tdb_priv; 1662 if (priv == NULL) { 1663 priv = (tavor_umap_db_priv_t *)kmem_zalloc( 1664 sizeof (tavor_umap_db_priv_t), KM_NOSLEEP); 1665 if (priv == NULL) { 1666 mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock); 1667 return (DDI_FAILURE); 1668 } 1669 } 1670 1671 /* 1672 * Save away the callback and argument to be used during Tavor's 1673 * close() processing. 1674 */ 1675 priv->tdp_cb = callback; 1676 priv->tdp_arg = arg; 1677 1678 umapdb->tdbe_common.tdb_priv = (void *)priv; 1679 mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock); 1680 1681 return (DDI_SUCCESS); 1682 } 1683 1684 1685 /* 1686 * tavor_umap_db_clear_onclose_cb() 1687 * Context: Can be called from user or kernel context. 1688 */ 1689 int 1690 tavor_umap_db_clear_onclose_cb(dev_t dev, uint64_t flag) 1691 { 1692 tavor_umap_db_priv_t *priv; 1693 tavor_umap_db_entry_t *umapdb; 1694 minor_t instance; 1695 uint64_t value; 1696 int status; 1697 1698 instance = TAVOR_DEV_INSTANCE(dev); 1699 if (instance == -1) { 1700 return (DDI_FAILURE); 1701 } 1702 1703 if (flag != TAVOR_ONCLOSE_FLASH_INPROGRESS) { 1704 return (DDI_FAILURE); 1705 } 1706 1707 /* 1708 * Grab the lock for the "userland resources database" and find 1709 * the entry corresponding to this minor number. Once it's found, 1710 * remove the entry (in the "tdb_priv" field) that indicated the 1711 * need for further processing during Tavor's close(). Free the 1712 * entry, if appropriate. 1713 */ 1714 mutex_enter(&tavor_userland_rsrc_db.tdl_umapdb_lock); 1715 status = tavor_umap_db_find_nolock(instance, dev, 1716 MLNX_UMAP_PID_RSRC, &value, 0, &umapdb); 1717 if (status != DDI_SUCCESS) { 1718 mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock); 1719 return (DDI_FAILURE); 1720 } 1721 1722 priv = (tavor_umap_db_priv_t *)umapdb->tdbe_common.tdb_priv; 1723 if (priv != NULL) { 1724 kmem_free(priv, sizeof (tavor_umap_db_priv_t)); 1725 priv = NULL; 1726 } 1727 1728 umapdb->tdbe_common.tdb_priv = (void *)priv; 1729 mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock); 1730 return (DDI_SUCCESS); 1731 } 1732 1733 1734 /* 1735 * tavor_umap_db_clear_onclose_cb() 1736 * Context: Can be called from user or kernel context. 1737 */ 1738 void 1739 tavor_umap_db_handle_onclose_cb(tavor_umap_db_priv_t *priv) 1740 { 1741 void (*callback)(void *); 1742 1743 ASSERT(MUTEX_HELD(&tavor_userland_rsrc_db.tdl_umapdb_lock)); 1744 1745 /* 1746 * Call the callback. 1747 * Note: Currently there is only one callback (in "tdp_cb"), but 1748 * in the future there may be more, depending on what other types 1749 * of interaction there are between userland processes and the 1750 * driver. 1751 */ 1752 callback = priv->tdp_cb; 1753 callback(priv->tdp_arg); 1754 } 1755