1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * tavor_cq.c 29 * Tavor Completion Queue Processing Routines 30 * 31 * Implements all the routines necessary for allocating, freeing, resizing, 32 * and handling the completion type events that the Tavor hardware can 33 * generate. 34 */ 35 36 #include <sys/types.h> 37 #include <sys/conf.h> 38 #include <sys/ddi.h> 39 #include <sys/sunddi.h> 40 #include <sys/modctl.h> 41 #include <sys/bitmap.h> 42 #include <sys/sysmacros.h> 43 44 #include <sys/ib/adapters/tavor/tavor.h> 45 46 static void tavor_cq_doorbell(tavor_state_t *state, uint32_t cq_cmd, 47 uint32_t cqn, uint32_t cq_param); 48 #pragma inline(tavor_cq_doorbell) 49 static int tavor_cq_cqe_consume(tavor_state_t *state, tavor_cqhdl_t cq, 50 tavor_hw_cqe_t *cqe, ibt_wc_t *wc); 51 static int tavor_cq_errcqe_consume(tavor_state_t *state, tavor_cqhdl_t cq, 52 tavor_hw_cqe_t *cqe, ibt_wc_t *wc); 53 static void tavor_cqe_sync(tavor_cqhdl_t cq, tavor_hw_cqe_t *cqe, 54 uint_t flag); 55 static void tavor_cq_resize_helper(tavor_cqhdl_t cq, tavor_hw_cqe_t *new_cqbuf, 56 uint32_t old_cons_indx, uint32_t num_newcqe); 57 58 /* 59 * tavor_cq_alloc() 60 * Context: Can be called only from user or kernel context. 61 */ 62 int 63 tavor_cq_alloc(tavor_state_t *state, ibt_cq_hdl_t ibt_cqhdl, 64 ibt_cq_attr_t *cq_attr, uint_t *actual_size, tavor_cqhdl_t *cqhdl, 65 uint_t sleepflag) 66 { 67 tavor_rsrc_t *cqc, *rsrc; 68 tavor_umap_db_entry_t *umapdb; 69 tavor_hw_cqc_t cqc_entry; 70 tavor_cqhdl_t cq; 71 ibt_mr_attr_t mr_attr; 72 tavor_mr_options_t op; 73 tavor_pdhdl_t pd; 74 tavor_mrhdl_t mr; 75 tavor_hw_cqe_t *buf; 76 uint64_t addr, value; 77 uint32_t log_cq_size, lkey, uarpg; 78 uint_t dma_xfer_mode, cq_sync, cq_is_umap; 79 int status, i, flag; 80 81 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*cq_attr)) 82 83 /* 84 * Determine whether CQ is being allocated for userland access or 85 * whether it is being allocated for kernel access. If the CQ is 86 * being allocated for userland access, then lookup the UAR doorbell 87 * page number for the current process. Note: If this is not found 88 * (e.g. if the process has not previously open()'d the Tavor driver), 89 * then an error is returned. 90 */ 91 cq_is_umap = (cq_attr->cq_flags & IBT_CQ_USER_MAP) ? 1 : 0; 92 if (cq_is_umap) { 93 status = tavor_umap_db_find(state->ts_instance, ddi_get_pid(), 94 MLNX_UMAP_UARPG_RSRC, &value, 0, NULL); 95 if (status != DDI_SUCCESS) { 96 goto cqalloc_fail; 97 } 98 uarpg = ((tavor_rsrc_t *)(uintptr_t)value)->tr_indx; 99 } 100 101 /* Use the internal protection domain (PD) for setting up CQs */ 102 pd = state->ts_pdhdl_internal; 103 104 /* Increment the reference count on the protection domain (PD) */ 105 tavor_pd_refcnt_inc(pd); 106 107 /* 108 * Allocate an CQ context entry. This will be filled in with all 109 * the necessary parameters to define the Completion Queue. And then 110 * ownership will be passed to the hardware in the final step 111 * below. If we fail here, we must undo the protection domain 112 * reference count. 113 */ 114 status = tavor_rsrc_alloc(state, TAVOR_CQC, 1, sleepflag, &cqc); 115 if (status != DDI_SUCCESS) { 116 goto cqalloc_fail1; 117 } 118 119 /* 120 * Allocate the software structure for tracking the completion queue 121 * (i.e. the Tavor Completion Queue handle). If we fail here, we must 122 * undo the protection domain reference count and the previous 123 * resource allocation. 124 */ 125 status = tavor_rsrc_alloc(state, TAVOR_CQHDL, 1, sleepflag, &rsrc); 126 if (status != DDI_SUCCESS) { 127 goto cqalloc_fail2; 128 } 129 cq = (tavor_cqhdl_t)rsrc->tr_addr; 130 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*cq)) 131 cq->cq_is_umap = cq_is_umap; 132 133 /* Use the index as CQ number */ 134 cq->cq_cqnum = cqc->tr_indx; 135 136 /* 137 * If this will be a user-mappable CQ, then allocate an entry for 138 * the "userland resources database". This will later be added to 139 * the database (after all further CQ operations are successful). 140 * If we fail here, we must undo the reference counts and the 141 * previous resource allocation. 142 */ 143 if (cq->cq_is_umap) { 144 umapdb = tavor_umap_db_alloc(state->ts_instance, cq->cq_cqnum, 145 MLNX_UMAP_CQMEM_RSRC, (uint64_t)(uintptr_t)rsrc); 146 if (umapdb == NULL) { 147 goto cqalloc_fail3; 148 } 149 } 150 151 /* 152 * Calculate the appropriate size for the completion queue. 153 * Note: All Tavor CQs must be a power-of-2 minus 1 in size. Also 154 * they may not be any smaller than TAVOR_CQ_MIN_SIZE. This step is 155 * to round the requested size up to the next highest power-of-2 156 */ 157 cq_attr->cq_size = max(cq_attr->cq_size, TAVOR_CQ_MIN_SIZE); 158 log_cq_size = highbit(cq_attr->cq_size); 159 160 /* 161 * Next we verify that the rounded-up size is valid (i.e. consistent 162 * with the device limits and/or software-configured limits) 163 */ 164 if (log_cq_size > state->ts_cfg_profile->cp_log_max_cq_sz) { 165 goto cqalloc_fail4; 166 } 167 168 /* 169 * Allocate the memory for Completion Queue. 170 * 171 * Note: Although we use the common queue allocation routine, we 172 * always specify TAVOR_QUEUE_LOCATION_NORMAL (i.e. CQ located in 173 * kernel system memory) for kernel CQs because it would be 174 * inefficient to have CQs located in DDR memory. This is primarily 175 * because CQs are read from (by software) more than they are written 176 * to. (We always specify TAVOR_QUEUE_LOCATION_USERLAND for all 177 * user-mappable CQs for a similar reason.) 178 * It is also worth noting that, unlike Tavor QP work queues, 179 * completion queues do not have the same strict alignment 180 * requirements. It is sufficient for the CQ memory to be both 181 * aligned to and bound to addresses which are a multiple of CQE size. 182 */ 183 cq->cq_cqinfo.qa_size = (1 << log_cq_size) * sizeof (tavor_hw_cqe_t); 184 cq->cq_cqinfo.qa_alloc_align = sizeof (tavor_hw_cqe_t); 185 cq->cq_cqinfo.qa_bind_align = sizeof (tavor_hw_cqe_t); 186 if (cq->cq_is_umap) { 187 cq->cq_cqinfo.qa_location = TAVOR_QUEUE_LOCATION_USERLAND; 188 } else { 189 cq->cq_cqinfo.qa_location = TAVOR_QUEUE_LOCATION_NORMAL; 190 } 191 status = tavor_queue_alloc(state, &cq->cq_cqinfo, sleepflag); 192 if (status != DDI_SUCCESS) { 193 goto cqalloc_fail4; 194 } 195 buf = (tavor_hw_cqe_t *)cq->cq_cqinfo.qa_buf_aligned; 196 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*buf)) 197 198 /* 199 * Initialize each of the Completion Queue Entries (CQE) by setting 200 * their ownership to hardware ("owner" bit set to HW). This is in 201 * preparation for the final transfer of ownership (below) of the 202 * CQ context itself. 203 */ 204 for (i = 0; i < (1 << log_cq_size); i++) { 205 TAVOR_CQE_OWNER_SET_HW(cq, &buf[i]); 206 } 207 208 /* 209 * Register the memory for the CQ. The memory for the CQ must 210 * be registered in the Tavor TPT tables. This gives us the LKey 211 * to specify in the CQ context below. Note: If this is a user- 212 * mappable CQ, then we will force DDI_DMA_CONSISTENT mapping. 213 */ 214 flag = (sleepflag == TAVOR_SLEEP) ? IBT_MR_SLEEP : IBT_MR_NOSLEEP; 215 mr_attr.mr_vaddr = (uint64_t)(uintptr_t)buf; 216 mr_attr.mr_len = cq->cq_cqinfo.qa_size; 217 mr_attr.mr_as = NULL; 218 mr_attr.mr_flags = flag | IBT_MR_ENABLE_LOCAL_WRITE; 219 if (cq->cq_is_umap) { 220 dma_xfer_mode = DDI_DMA_CONSISTENT; 221 } else { 222 dma_xfer_mode = state->ts_cfg_profile->cp_streaming_consistent; 223 } 224 if (dma_xfer_mode == DDI_DMA_STREAMING) { 225 mr_attr.mr_flags |= IBT_MR_NONCOHERENT; 226 } 227 op.mro_bind_type = state->ts_cfg_profile->cp_iommu_bypass; 228 op.mro_bind_dmahdl = cq->cq_cqinfo.qa_dmahdl; 229 op.mro_bind_override_addr = 0; 230 status = tavor_mr_register(state, pd, &mr_attr, &mr, &op); 231 if (status != DDI_SUCCESS) { 232 goto cqalloc_fail5; 233 } 234 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr)) 235 addr = mr->mr_bindinfo.bi_addr; 236 lkey = mr->mr_lkey; 237 238 /* Determine if later ddi_dma_sync will be necessary */ 239 cq_sync = TAVOR_CQ_IS_SYNC_REQ(state, cq->cq_cqinfo); 240 241 /* Sync entire CQ for use by the hardware (if necessary). */ 242 if (cq_sync) { 243 (void) ddi_dma_sync(mr->mr_bindinfo.bi_dmahdl, 0, 244 cq->cq_cqinfo.qa_size, DDI_DMA_SYNC_FORDEV); 245 } 246 247 /* 248 * Fill in the CQC entry. This is the final step before passing 249 * ownership of the CQC entry to the Tavor hardware. We use all of 250 * the information collected/calculated above to fill in the 251 * requisite portions of the CQC. Note: If this CQ is going to be 252 * used for userland access, then we need to set the UAR page number 253 * appropriately (otherwise it's a "don't care") 254 */ 255 bzero(&cqc_entry, sizeof (tavor_hw_cqc_t)); 256 cq->cq_eqnum = TAVOR_CQ_EQNUM_GET(cq->cq_cqnum); 257 cq->cq_erreqnum = TAVOR_CQ_ERREQNUM_GET(cq->cq_cqnum); 258 cqc_entry.xlat = TAVOR_VA2PA_XLAT_ENABLED; 259 cqc_entry.state = TAVOR_CQ_DISARMED; 260 cqc_entry.start_addr_h = (addr >> 32); 261 cqc_entry.start_addr_l = (addr & 0xFFFFFFFF); 262 cqc_entry.log_cq_sz = log_cq_size; 263 if (cq->cq_is_umap) { 264 cqc_entry.usr_page = uarpg; 265 } else { 266 cqc_entry.usr_page = 0; 267 } 268 cqc_entry.pd = pd->pd_pdnum; 269 cqc_entry.lkey = lkey; 270 cqc_entry.e_eqn = cq->cq_erreqnum; 271 cqc_entry.c_eqn = cq->cq_eqnum; 272 cqc_entry.cqn = cq->cq_cqnum; 273 274 /* 275 * Write the CQC entry to hardware. Lastly, we pass ownership of 276 * the entry to the hardware (using the Tavor SW2HW_CQ firmware 277 * command). Note: In general, this operation shouldn't fail. But 278 * if it does, we have to undo everything we've done above before 279 * returning error. 280 */ 281 status = tavor_cmn_ownership_cmd_post(state, SW2HW_CQ, &cqc_entry, 282 sizeof (tavor_hw_cqc_t), cq->cq_cqnum, sleepflag); 283 if (status != TAVOR_CMD_SUCCESS) { 284 cmn_err(CE_CONT, "Tavor: SW2HW_CQ command failed: %08x\n", 285 status); 286 goto cqalloc_fail6; 287 } 288 289 /* 290 * Fill in the rest of the Tavor Completion Queue handle. Having 291 * successfully transferred ownership of the CQC, we can update the 292 * following fields for use in further operations on the CQ. 293 */ 294 cq->cq_cqcrsrcp = cqc; 295 cq->cq_rsrcp = rsrc; 296 cq->cq_consindx = 0; 297 cq->cq_buf = buf; 298 cq->cq_bufsz = (1 << log_cq_size); 299 cq->cq_mrhdl = mr; 300 cq->cq_sync = cq_sync; 301 cq->cq_refcnt = 0; 302 cq->cq_is_special = 0; 303 cq->cq_uarpg = uarpg; 304 cq->cq_umap_dhp = (devmap_cookie_t)NULL; 305 avl_create(&cq->cq_wrid_wqhdr_avl_tree, tavor_wrid_wqhdr_compare, 306 sizeof (struct tavor_workq_hdr_s), 307 offsetof(struct tavor_workq_hdr_s, wq_avl_link)); 308 309 cq->cq_wrid_reap_head = NULL; 310 cq->cq_wrid_reap_tail = NULL; 311 cq->cq_hdlrarg = (void *)ibt_cqhdl; 312 313 /* 314 * Put CQ handle in Tavor CQNum-to-CQHdl list. Then fill in the 315 * "actual_size" and "cqhdl" and return success 316 */ 317 ASSERT(state->ts_cqhdl[cqc->tr_indx] == NULL); 318 state->ts_cqhdl[cqc->tr_indx] = cq; 319 320 /* 321 * If this is a user-mappable CQ, then we need to insert the previously 322 * allocated entry into the "userland resources database". This will 323 * allow for later lookup during devmap() (i.e. mmap()) calls. 324 */ 325 if (cq->cq_is_umap) { 326 tavor_umap_db_add(umapdb); 327 } 328 329 /* 330 * Fill in the return arguments (if necessary). This includes the 331 * real completion queue size. 332 */ 333 if (actual_size != NULL) { 334 *actual_size = (1 << log_cq_size) - 1; 335 } 336 *cqhdl = cq; 337 338 return (DDI_SUCCESS); 339 340 /* 341 * The following is cleanup for all possible failure cases in this routine 342 */ 343 cqalloc_fail6: 344 if (tavor_mr_deregister(state, &mr, TAVOR_MR_DEREG_ALL, 345 sleepflag) != DDI_SUCCESS) { 346 TAVOR_WARNING(state, "failed to deregister CQ memory"); 347 } 348 cqalloc_fail5: 349 tavor_queue_free(state, &cq->cq_cqinfo); 350 cqalloc_fail4: 351 if (cq_is_umap) { 352 tavor_umap_db_free(umapdb); 353 } 354 cqalloc_fail3: 355 tavor_rsrc_free(state, &rsrc); 356 cqalloc_fail2: 357 tavor_rsrc_free(state, &cqc); 358 cqalloc_fail1: 359 tavor_pd_refcnt_dec(pd); 360 cqalloc_fail: 361 return (status); 362 } 363 364 365 /* 366 * tavor_cq_free() 367 * Context: Can be called only from user or kernel context. 368 */ 369 /* ARGSUSED */ 370 int 371 tavor_cq_free(tavor_state_t *state, tavor_cqhdl_t *cqhdl, uint_t sleepflag) 372 { 373 tavor_rsrc_t *cqc, *rsrc; 374 tavor_umap_db_entry_t *umapdb; 375 tavor_hw_cqc_t cqc_entry; 376 tavor_pdhdl_t pd; 377 tavor_mrhdl_t mr; 378 tavor_cqhdl_t cq; 379 uint32_t cqnum; 380 uint64_t value; 381 uint_t maxprot; 382 int status; 383 384 /* 385 * Pull all the necessary information from the Tavor Completion Queue 386 * handle. This is necessary here because the resource for the 387 * CQ handle is going to be freed up as part of this operation. 388 */ 389 cq = *cqhdl; 390 mutex_enter(&cq->cq_lock); 391 cqc = cq->cq_cqcrsrcp; 392 rsrc = cq->cq_rsrcp; 393 pd = state->ts_pdhdl_internal; 394 mr = cq->cq_mrhdl; 395 cqnum = cq->cq_cqnum; 396 397 /* 398 * If there are work queues still associated with the CQ, then return 399 * an error. Otherwise, we will be holding the CQ lock. 400 */ 401 if (cq->cq_refcnt != 0) { 402 mutex_exit(&cq->cq_lock); 403 return (IBT_CQ_BUSY); 404 } 405 406 /* 407 * If this was a user-mappable CQ, then we need to remove its entry 408 * from the "userland resources database". If it is also currently 409 * mmap()'d out to a user process, then we need to call 410 * devmap_devmem_remap() to remap the CQ memory to an invalid mapping. 411 * We also need to invalidate the CQ tracking information for the 412 * user mapping. 413 */ 414 if (cq->cq_is_umap) { 415 status = tavor_umap_db_find(state->ts_instance, cqnum, 416 MLNX_UMAP_CQMEM_RSRC, &value, TAVOR_UMAP_DB_REMOVE, 417 &umapdb); 418 if (status != DDI_SUCCESS) { 419 mutex_exit(&cq->cq_lock); 420 TAVOR_WARNING(state, "failed to find in database"); 421 return (ibc_get_ci_failure(0)); 422 } 423 tavor_umap_db_free(umapdb); 424 if (cq->cq_umap_dhp != NULL) { 425 maxprot = (PROT_READ | PROT_WRITE | PROT_USER); 426 status = devmap_devmem_remap(cq->cq_umap_dhp, 427 state->ts_dip, 0, 0, cq->cq_cqinfo.qa_size, 428 maxprot, DEVMAP_MAPPING_INVALID, NULL); 429 if (status != DDI_SUCCESS) { 430 mutex_exit(&cq->cq_lock); 431 TAVOR_WARNING(state, "failed in CQ memory " 432 "devmap_devmem_remap()"); 433 return (ibc_get_ci_failure(0)); 434 } 435 cq->cq_umap_dhp = (devmap_cookie_t)NULL; 436 } 437 } 438 439 /* 440 * Put NULL into the Tavor CQNum-to-CQHdl list. This will allow any 441 * in-progress events to detect that the CQ corresponding to this 442 * number has been freed. 443 */ 444 state->ts_cqhdl[cqc->tr_indx] = NULL; 445 446 /* 447 * While we hold the CQ lock, do a "forced reap" of the workQ WRID 448 * list. This cleans up all the structures associated with the WRID 449 * processing for this CQ. Once we complete, drop the lock and finish 450 * the deallocation of the CQ. 451 */ 452 tavor_wrid_cq_force_reap(cq); 453 454 mutex_exit(&cq->cq_lock); 455 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*cq)) 456 457 /* 458 * Reclaim CQC entry from hardware (using the Tavor HW2SW_CQ 459 * firmware command). If the ownership transfer fails for any reason, 460 * then it is an indication that something (either in HW or SW) has 461 * gone seriously wrong. 462 */ 463 status = tavor_cmn_ownership_cmd_post(state, HW2SW_CQ, &cqc_entry, 464 sizeof (tavor_hw_cqc_t), cqnum, sleepflag); 465 if (status != TAVOR_CMD_SUCCESS) { 466 TAVOR_WARNING(state, "failed to reclaim CQC ownership"); 467 cmn_err(CE_CONT, "Tavor: HW2SW_CQ command failed: %08x\n", 468 status); 469 return (ibc_get_ci_failure(0)); 470 } 471 472 /* 473 * Deregister the memory for the Completion Queue. If this fails 474 * for any reason, then it is an indication that something (either 475 * in HW or SW) has gone seriously wrong. So we print a warning 476 * message and return. 477 */ 478 status = tavor_mr_deregister(state, &mr, TAVOR_MR_DEREG_ALL, 479 sleepflag); 480 if (status != DDI_SUCCESS) { 481 TAVOR_WARNING(state, "failed to deregister CQ memory"); 482 return (ibc_get_ci_failure(0)); 483 } 484 485 /* Free the memory for the CQ */ 486 tavor_queue_free(state, &cq->cq_cqinfo); 487 488 /* Free the Tavor Completion Queue handle */ 489 tavor_rsrc_free(state, &rsrc); 490 491 /* Free up the CQC entry resource */ 492 tavor_rsrc_free(state, &cqc); 493 494 /* Decrement the reference count on the protection domain (PD) */ 495 tavor_pd_refcnt_dec(pd); 496 497 /* Set the cqhdl pointer to NULL and return success */ 498 *cqhdl = NULL; 499 500 return (DDI_SUCCESS); 501 } 502 503 504 /* 505 * tavor_cq_resize() 506 * Context: Can be called only from user or kernel context. 507 */ 508 int 509 tavor_cq_resize(tavor_state_t *state, tavor_cqhdl_t cq, uint_t req_size, 510 uint_t *actual_size, uint_t sleepflag) 511 { 512 tavor_hw_cqc_t cqc_entry; 513 tavor_qalloc_info_t new_cqinfo, old_cqinfo; 514 ibt_mr_attr_t mr_attr; 515 tavor_mr_options_t op; 516 tavor_pdhdl_t pd; 517 tavor_mrhdl_t mr, mr_old; 518 tavor_hw_cqe_t *buf; 519 uint32_t new_prod_indx, old_cons_indx; 520 uint_t dma_xfer_mode, cq_sync, log_cq_size, maxprot; 521 int status, i, flag; 522 523 /* Use the internal protection domain (PD) for CQs */ 524 pd = state->ts_pdhdl_internal; 525 526 /* 527 * Calculate the appropriate size for the new resized completion queue. 528 * Note: All Tavor CQs must be a power-of-2 minus 1 in size. Also 529 * they may not be any smaller than TAVOR_CQ_MIN_SIZE. This step is 530 * to round the requested size up to the next highest power-of-2 531 */ 532 req_size = max(req_size, TAVOR_CQ_MIN_SIZE); 533 log_cq_size = highbit(req_size); 534 535 /* 536 * Next we verify that the rounded-up size is valid (i.e. consistent 537 * with the device limits and/or software-configured limits) 538 */ 539 if (log_cq_size > state->ts_cfg_profile->cp_log_max_cq_sz) { 540 goto cqresize_fail; 541 } 542 543 /* 544 * Allocate the memory for newly resized Completion Queue. 545 * 546 * Note: Although we use the common queue allocation routine, we 547 * always specify TAVOR_QUEUE_LOCATION_NORMAL (i.e. CQ located in 548 * kernel system memory) for kernel CQs because it would be 549 * inefficient to have CQs located in DDR memory. This is the same 550 * as we do when we first allocate completion queues primarily 551 * because CQs are read from (by software) more than they are written 552 * to. (We always specify TAVOR_QUEUE_LOCATION_USERLAND for all 553 * user-mappable CQs for a similar reason.) 554 * It is also worth noting that, unlike Tavor QP work queues, 555 * completion queues do not have the same strict alignment 556 * requirements. It is sufficient for the CQ memory to be both 557 * aligned to and bound to addresses which are a multiple of CQE size. 558 */ 559 new_cqinfo.qa_size = (1 << log_cq_size) * sizeof (tavor_hw_cqe_t); 560 new_cqinfo.qa_alloc_align = sizeof (tavor_hw_cqe_t); 561 new_cqinfo.qa_bind_align = sizeof (tavor_hw_cqe_t); 562 if (cq->cq_is_umap) { 563 new_cqinfo.qa_location = TAVOR_QUEUE_LOCATION_USERLAND; 564 } else { 565 new_cqinfo.qa_location = TAVOR_QUEUE_LOCATION_NORMAL; 566 } 567 status = tavor_queue_alloc(state, &new_cqinfo, sleepflag); 568 if (status != DDI_SUCCESS) { 569 goto cqresize_fail; 570 } 571 buf = (tavor_hw_cqe_t *)new_cqinfo.qa_buf_aligned; 572 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*buf)) 573 574 /* 575 * Initialize each of the Completion Queue Entries (CQE) by setting 576 * their ownership to hardware ("owner" bit set to HW). This is in 577 * preparation for the final resize operation (below). 578 */ 579 for (i = 0; i < (1 << log_cq_size); i++) { 580 TAVOR_CQE_OWNER_SET_HW(cq, &buf[i]); 581 } 582 583 /* 584 * Register the memory for the CQ. The memory for the CQ must 585 * be registered in the Tavor TPT tables. This gives us the LKey 586 * to specify in the CQ context below. 587 */ 588 flag = (sleepflag == TAVOR_SLEEP) ? IBT_MR_SLEEP : IBT_MR_NOSLEEP; 589 mr_attr.mr_vaddr = (uint64_t)(uintptr_t)buf; 590 mr_attr.mr_len = new_cqinfo.qa_size; 591 mr_attr.mr_as = NULL; 592 mr_attr.mr_flags = flag | IBT_MR_ENABLE_LOCAL_WRITE; 593 if (cq->cq_is_umap) { 594 dma_xfer_mode = DDI_DMA_CONSISTENT; 595 } else { 596 dma_xfer_mode = state->ts_cfg_profile->cp_streaming_consistent; 597 } 598 if (dma_xfer_mode == DDI_DMA_STREAMING) { 599 mr_attr.mr_flags |= IBT_MR_NONCOHERENT; 600 } 601 op.mro_bind_type = state->ts_cfg_profile->cp_iommu_bypass; 602 op.mro_bind_dmahdl = new_cqinfo.qa_dmahdl; 603 op.mro_bind_override_addr = 0; 604 status = tavor_mr_register(state, pd, &mr_attr, &mr, &op); 605 if (status != DDI_SUCCESS) { 606 tavor_queue_free(state, &new_cqinfo); 607 goto cqresize_fail; 608 } 609 _NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr)) 610 611 /* Determine if later ddi_dma_sync will be necessary */ 612 cq_sync = TAVOR_CQ_IS_SYNC_REQ(state, new_cqinfo); 613 614 /* Sync entire "new" CQ for use by hardware (if necessary) */ 615 if (cq_sync) { 616 (void) ddi_dma_sync(mr->mr_bindinfo.bi_dmahdl, 0, 617 new_cqinfo.qa_size, DDI_DMA_SYNC_FORDEV); 618 } 619 620 /* 621 * Now we grab the CQ lock. Since we will be updating the actual 622 * CQ location and the producer/consumer indexes, we should hold 623 * the lock. 624 * 625 * We do a TAVOR_NOSLEEP here (and below), though, because we are 626 * holding the "cq_lock" and if we got raised to interrupt level 627 * by priority inversion, we would not want to block in this routine 628 * waiting for success. 629 */ 630 mutex_enter(&cq->cq_lock); 631 632 /* 633 * Determine the current CQ "consumer index". 634 * 635 * Note: This will depend on whether the CQ had previously been 636 * mapped for user access or whether it is a kernel CQ. If this 637 * is a kernel CQ, then all PollCQ() operations have come through 638 * the IBTF and, hence, the driver's CQ state structure will 639 * contain the current consumer index. If, however, the user has 640 * accessed this CQ by bypassing the driver (OS-bypass), then we 641 * need to query the firmware to determine the current CQ consumer 642 * index. This also assumes that the user process will not continue 643 * to consume entries while at the same time doing the ResizeCQ() 644 * operation. If the user process does not guarantee this, then it 645 * may see duplicate or missed completions. But under no 646 * circumstances should this panic the system. 647 */ 648 if (cq->cq_is_umap) { 649 status = tavor_cmn_query_cmd_post(state, QUERY_CQ, 650 cq->cq_cqnum, &cqc_entry, sizeof (tavor_hw_cqc_t), 651 TAVOR_NOSLEEP); 652 if (status != TAVOR_CMD_SUCCESS) { 653 /* Query CQ has failed, drop CQ lock and cleanup */ 654 mutex_exit(&cq->cq_lock); 655 if (tavor_mr_deregister(state, &mr, TAVOR_MR_DEREG_ALL, 656 sleepflag) != DDI_SUCCESS) { 657 TAVOR_WARNING(state, "failed to deregister " 658 "CQ memory"); 659 } 660 tavor_queue_free(state, &new_cqinfo); 661 TAVOR_WARNING(state, "failed to find in database"); 662 663 goto cqresize_fail; 664 } 665 old_cons_indx = cqc_entry.cons_indx; 666 } else { 667 old_cons_indx = cq->cq_consindx; 668 } 669 670 /* 671 * Fill in the CQC entry. For the resize operation this is the 672 * final step before attempting the resize operation on the CQC entry. 673 * We use all of the information collected/calculated above to fill 674 * in the requisite portions of the CQC. 675 */ 676 bzero(&cqc_entry, sizeof (tavor_hw_cqc_t)); 677 cqc_entry.start_addr_h = (mr->mr_bindinfo.bi_addr >> 32); 678 cqc_entry.start_addr_l = (mr->mr_bindinfo.bi_addr & 0xFFFFFFFF); 679 cqc_entry.log_cq_sz = log_cq_size; 680 cqc_entry.lkey = mr->mr_lkey; 681 682 /* 683 * Write the CQC entry to hardware. Lastly, we pass ownership of 684 * the entry to the hardware (using the Tavor RESIZE_CQ firmware 685 * command). Note: In general, this operation shouldn't fail. But 686 * if it does, we have to undo everything we've done above before 687 * returning error. Also note that the status returned may indicate 688 * the code to return to the IBTF. 689 */ 690 status = tavor_resize_cq_cmd_post(state, &cqc_entry, cq->cq_cqnum, 691 &new_prod_indx, TAVOR_CMD_NOSLEEP_SPIN); 692 if (status != TAVOR_CMD_SUCCESS) { 693 /* Resize attempt has failed, drop CQ lock and cleanup */ 694 mutex_exit(&cq->cq_lock); 695 if (tavor_mr_deregister(state, &mr, TAVOR_MR_DEREG_ALL, 696 sleepflag) != DDI_SUCCESS) { 697 TAVOR_WARNING(state, "failed to deregister CQ memory"); 698 } 699 tavor_queue_free(state, &new_cqinfo); 700 if (status == TAVOR_CMD_BAD_SIZE) { 701 return (IBT_CQ_SZ_INSUFFICIENT); 702 } else { 703 cmn_err(CE_CONT, "Tavor: RESIZE_CQ command failed: " 704 "%08x\n", status); 705 return (ibc_get_ci_failure(0)); 706 } 707 } 708 709 /* 710 * The CQ resize attempt was successful. Before dropping the CQ lock, 711 * copy all of the CQEs from the "old" CQ into the "new" CQ. Note: 712 * the Tavor firmware guarantees us that sufficient space is set aside 713 * in the "new" CQ to handle any un-polled CQEs from the "old" CQ. 714 * The two parameters to this helper function ("old_cons_indx" and 715 * "new_prod_indx") essentially indicate the starting index and number 716 * of any CQEs that might remain in the "old" CQ memory. 717 */ 718 tavor_cq_resize_helper(cq, buf, old_cons_indx, new_prod_indx); 719 720 /* Sync entire "new" CQ for use by hardware (if necessary) */ 721 if (cq_sync) { 722 (void) ddi_dma_sync(mr->mr_bindinfo.bi_dmahdl, 0, 723 new_cqinfo.qa_size, DDI_DMA_SYNC_FORDEV); 724 } 725 726 /* 727 * Update the Tavor Completion Queue handle with all the new 728 * information. At the same time, save away all the necessary 729 * information for freeing up the old resources 730 */ 731 mr_old = cq->cq_mrhdl; 732 old_cqinfo = cq->cq_cqinfo; 733 cq->cq_cqinfo = new_cqinfo; 734 cq->cq_consindx = 0; 735 cq->cq_buf = buf; 736 cq->cq_bufsz = (1 << log_cq_size); 737 cq->cq_mrhdl = mr; 738 cq->cq_sync = cq_sync; 739 740 /* 741 * If "old" CQ was a user-mappable CQ that is currently mmap()'d out 742 * to a user process, then we need to call devmap_devmem_remap() to 743 * invalidate the mapping to the CQ memory. We also need to 744 * invalidate the CQ tracking information for the user mapping. 745 */ 746 if ((cq->cq_is_umap) && (cq->cq_umap_dhp != NULL)) { 747 maxprot = (PROT_READ | PROT_WRITE | PROT_USER); 748 status = devmap_devmem_remap(cq->cq_umap_dhp, 749 state->ts_dip, 0, 0, cq->cq_cqinfo.qa_size, maxprot, 750 DEVMAP_MAPPING_INVALID, NULL); 751 if (status != DDI_SUCCESS) { 752 mutex_exit(&cq->cq_lock); 753 TAVOR_WARNING(state, "failed in CQ memory " 754 "devmap_devmem_remap()"); 755 return (ibc_get_ci_failure(0)); 756 } 757 cq->cq_umap_dhp = (devmap_cookie_t)NULL; 758 } 759 760 /* 761 * Drop the CQ lock now. The only thing left to do is to free up 762 * the old resources. 763 */ 764 mutex_exit(&cq->cq_lock); 765 766 /* 767 * Deregister the memory for the old Completion Queue. Note: We 768 * really can't return error here because we have no good way to 769 * cleanup. Plus, the deregistration really shouldn't ever happen. 770 * So, if it does, it is an indication that something has gone 771 * seriously wrong. So we print a warning message and return error 772 * (knowing, of course, that the "old" CQ memory will be leaked) 773 */ 774 status = tavor_mr_deregister(state, &mr_old, TAVOR_MR_DEREG_ALL, 775 sleepflag); 776 if (status != DDI_SUCCESS) { 777 TAVOR_WARNING(state, "failed to deregister old CQ memory"); 778 goto cqresize_fail; 779 } 780 781 /* Free the memory for the old CQ */ 782 tavor_queue_free(state, &old_cqinfo); 783 784 /* 785 * Fill in the return arguments (if necessary). This includes the 786 * real new completion queue size. 787 */ 788 if (actual_size != NULL) { 789 *actual_size = (1 << log_cq_size) - 1; 790 } 791 792 return (DDI_SUCCESS); 793 794 cqresize_fail: 795 return (status); 796 } 797 798 799 /* 800 * tavor_cq_notify() 801 * Context: Can be called from interrupt or base context. 802 */ 803 int 804 tavor_cq_notify(tavor_state_t *state, tavor_cqhdl_t cq, 805 ibt_cq_notify_flags_t flags) 806 { 807 uint_t cqnum; 808 809 /* 810 * Determine if we are trying to get the next completion or the next 811 * "solicited" completion. Then hit the appropriate doorbell. 812 * 813 * NOTE: Please see the comment in tavor_event.c:tavor_eq_poll 814 * regarding why we do not have to do an extra PIO read here, and we 815 * will not lose an event after writing this doorbell. 816 */ 817 cqnum = cq->cq_cqnum; 818 if (flags == IBT_NEXT_COMPLETION) { 819 tavor_cq_doorbell(state, TAVOR_CQDB_NOTIFY_CQ, cqnum, 820 TAVOR_CQDB_DEFAULT_PARAM); 821 822 } else if (flags == IBT_NEXT_SOLICITED) { 823 tavor_cq_doorbell(state, TAVOR_CQDB_NOTIFY_CQ_SOLICIT, 824 cqnum, TAVOR_CQDB_DEFAULT_PARAM); 825 826 } else { 827 return (IBT_CQ_NOTIFY_TYPE_INVALID); 828 } 829 830 return (DDI_SUCCESS); 831 } 832 833 834 /* 835 * tavor_cq_poll() 836 * Context: Can be called from interrupt or base context. 837 */ 838 int 839 tavor_cq_poll(tavor_state_t *state, tavor_cqhdl_t cq, ibt_wc_t *wc_p, 840 uint_t num_wc, uint_t *num_polled) 841 { 842 tavor_hw_cqe_t *cqe; 843 uint32_t cons_indx, wrap_around_mask; 844 uint32_t polled_cnt, num_to_increment; 845 int status; 846 847 /* 848 * Check for user-mappable CQ memory. Note: We do not allow kernel 849 * clients to poll CQ memory that is accessible directly by the user. 850 * If the CQ memory is user accessible, then return an error. 851 */ 852 if (cq->cq_is_umap) { 853 return (IBT_CQ_HDL_INVALID); 854 } 855 856 mutex_enter(&cq->cq_lock); 857 858 /* Get the consumer index */ 859 cons_indx = cq->cq_consindx; 860 861 /* 862 * Calculate the wrap around mask. Note: This operation only works 863 * because all Tavor completion queues have power-of-2 sizes 864 */ 865 wrap_around_mask = (cq->cq_bufsz - 1); 866 867 /* Calculate the pointer to the first CQ entry */ 868 cqe = &cq->cq_buf[cons_indx]; 869 870 /* Sync the current CQE to read */ 871 tavor_cqe_sync(cq, cqe, DDI_DMA_SYNC_FORCPU); 872 873 /* 874 * Keep pulling entries from the CQ until we find an entry owned by 875 * the hardware. As long as there the CQE's owned by SW, process 876 * each entry by calling tavor_cq_cqe_consume() and updating the CQ 877 * consumer index. Note: We only update the consumer index if 878 * tavor_cq_cqe_consume() returns TAVOR_CQ_SYNC_AND_DB. Otherwise, 879 * it indicates that we are going to "recycle" the CQE (probably 880 * because it is a error CQE and corresponds to more than one 881 * completion). 882 */ 883 polled_cnt = 0; 884 while (TAVOR_CQE_OWNER_IS_SW(cq, cqe)) { 885 status = tavor_cq_cqe_consume(state, cq, cqe, 886 &wc_p[polled_cnt++]); 887 if (status == TAVOR_CQ_SYNC_AND_DB) { 888 /* Reset entry to hardware ownership */ 889 TAVOR_CQE_OWNER_SET_HW(cq, cqe); 890 891 /* Sync the current CQE for device */ 892 tavor_cqe_sync(cq, cqe, DDI_DMA_SYNC_FORDEV); 893 894 /* Increment the consumer index */ 895 cons_indx = (cons_indx + 1) & wrap_around_mask; 896 897 /* Update the pointer to the next CQ entry */ 898 cqe = &cq->cq_buf[cons_indx]; 899 900 /* Sync the next CQE to read */ 901 tavor_cqe_sync(cq, cqe, DDI_DMA_SYNC_FORCPU); 902 } 903 904 /* 905 * If we have run out of space to store work completions, 906 * then stop and return the ones we have pulled of the CQ. 907 */ 908 if (polled_cnt >= num_wc) { 909 break; 910 } 911 } 912 913 /* 914 * Now we only ring the doorbell (to update the consumer index) if 915 * we've actually consumed a CQ entry. If we have, for example, 916 * pulled from a CQE that we are still in the process of "recycling" 917 * for error purposes, then we would not update the consumer index. 918 */ 919 if ((polled_cnt != 0) && (cq->cq_consindx != cons_indx)) { 920 /* 921 * Post doorbell to update the consumer index. Doorbell 922 * value indicates number of entries consumed (minus 1) 923 */ 924 if (cons_indx > cq->cq_consindx) { 925 num_to_increment = (cons_indx - cq->cq_consindx) - 1; 926 } else { 927 num_to_increment = ((cons_indx + cq->cq_bufsz) - 928 cq->cq_consindx) - 1; 929 } 930 cq->cq_consindx = cons_indx; 931 tavor_cq_doorbell(state, TAVOR_CQDB_INCR_CONSINDX, 932 cq->cq_cqnum, num_to_increment); 933 934 } else if (polled_cnt == 0) { 935 /* 936 * If the CQ is empty, we can try to free up some of the WRID 937 * list containers. See tavor_wr.c for more details on this 938 * operation. 939 */ 940 tavor_wrid_cq_reap(cq); 941 } 942 943 mutex_exit(&cq->cq_lock); 944 945 /* Set "num_polled" (if necessary) */ 946 if (num_polled != NULL) { 947 *num_polled = polled_cnt; 948 } 949 950 /* Set CQ_EMPTY condition if needed, otherwise return success */ 951 if (polled_cnt == 0) { 952 status = IBT_CQ_EMPTY; 953 } else { 954 status = DDI_SUCCESS; 955 } 956 957 /* 958 * Check if the system is currently panicking. If it is, then call 959 * the Tavor interrupt service routine. This step is necessary here 960 * because we might be in a polled I/O mode and without the call to 961 * tavor_isr() - and its subsequent calls to poll and rearm each 962 * event queue - we might overflow our EQs and render the system 963 * unable to sync/dump. 964 */ 965 if (ddi_in_panic() != 0) { 966 (void) tavor_isr((caddr_t)state, (caddr_t)NULL); 967 } 968 969 return (status); 970 } 971 972 973 /* 974 * tavor_cq_handler() 975 * Context: Only called from interrupt context 976 */ 977 int 978 tavor_cq_handler(tavor_state_t *state, tavor_eqhdl_t eq, 979 tavor_hw_eqe_t *eqe) 980 { 981 tavor_cqhdl_t cq; 982 uint_t cqnum; 983 uint_t eqe_evttype; 984 985 eqe_evttype = TAVOR_EQE_EVTTYPE_GET(eq, eqe); 986 987 ASSERT(eqe_evttype == TAVOR_EVT_COMPLETION || 988 eqe_evttype == TAVOR_EVT_EQ_OVERFLOW); 989 990 if (eqe_evttype == TAVOR_EVT_EQ_OVERFLOW) { 991 tavor_eq_overflow_handler(state, eq, eqe); 992 993 return (DDI_FAILURE); 994 } 995 996 997 /* Get the CQ handle from CQ number in event descriptor */ 998 cqnum = TAVOR_EQE_CQNUM_GET(eq, eqe); 999 cq = tavor_cqhdl_from_cqnum(state, cqnum); 1000 1001 /* 1002 * Post the EQ doorbell to move the CQ to the "disarmed" state. 1003 * This operation is to enable subsequent CQ doorbells (e.g. those 1004 * that can be rung by tavor_cq_notify() above) to rearm the CQ. 1005 */ 1006 tavor_eq_doorbell(state, TAVOR_EQDB_DISARM_CQ, eq->eq_eqnum, cqnum); 1007 1008 /* 1009 * If the CQ handle is NULL, this is probably an indication 1010 * that the CQ has been freed already. In which case, we 1011 * should not deliver this event. 1012 * 1013 * We also check that the CQ number in the handle is the 1014 * same as the CQ number in the event queue entry. This 1015 * extra check allows us to handle the case where a CQ was 1016 * freed and then allocated again in the time it took to 1017 * handle the event queue processing. By constantly incrementing 1018 * the non-constrained portion of the CQ number every time 1019 * a new CQ is allocated, we mitigate (somewhat) the chance 1020 * that a stale event could be passed to the client's CQ 1021 * handler. 1022 * 1023 * Lastly, we check if "ts_ibtfpriv" is NULL. If it is then it 1024 * means that we've have either received this event before we 1025 * finished attaching to the IBTF or we've received it while we 1026 * are in the process of detaching. 1027 */ 1028 if ((cq != NULL) && (cq->cq_cqnum == cqnum) && 1029 (state->ts_ibtfpriv != NULL)) { 1030 TAVOR_DO_IBTF_CQ_CALLB(state, cq); 1031 } 1032 1033 return (DDI_SUCCESS); 1034 } 1035 1036 1037 /* 1038 * tavor_cq_err_handler() 1039 * Context: Only called from interrupt context 1040 */ 1041 int 1042 tavor_cq_err_handler(tavor_state_t *state, tavor_eqhdl_t eq, 1043 tavor_hw_eqe_t *eqe) 1044 { 1045 tavor_cqhdl_t cq; 1046 uint_t cqnum; 1047 ibc_async_event_t event; 1048 ibt_async_code_t type; 1049 uint_t eqe_evttype; 1050 1051 eqe_evttype = TAVOR_EQE_EVTTYPE_GET(eq, eqe); 1052 1053 ASSERT(eqe_evttype == TAVOR_EVT_CQ_ERRORS || 1054 eqe_evttype == TAVOR_EVT_EQ_OVERFLOW); 1055 1056 if (eqe_evttype == TAVOR_EVT_EQ_OVERFLOW) { 1057 tavor_eq_overflow_handler(state, eq, eqe); 1058 1059 return (DDI_FAILURE); 1060 } 1061 1062 /* cmn_err(CE_CONT, "CQ Error handler\n"); */ 1063 1064 /* Get the CQ handle from CQ number in event descriptor */ 1065 cqnum = TAVOR_EQE_CQNUM_GET(eq, eqe); 1066 cq = tavor_cqhdl_from_cqnum(state, cqnum); 1067 1068 /* 1069 * If the CQ handle is NULL, this is probably an indication 1070 * that the CQ has been freed already. In which case, we 1071 * should not deliver this event. 1072 * 1073 * We also check that the CQ number in the handle is the 1074 * same as the CQ number in the event queue entry. This 1075 * extra check allows us to handle the case where a CQ was 1076 * freed and then allocated again in the time it took to 1077 * handle the event queue processing. By constantly incrementing 1078 * the non-constrained portion of the CQ number every time 1079 * a new CQ is allocated, we mitigate (somewhat) the chance 1080 * that a stale event could be passed to the client's CQ 1081 * handler. 1082 * 1083 * And then we check if "ts_ibtfpriv" is NULL. If it is then it 1084 * means that we've have either received this event before we 1085 * finished attaching to the IBTF or we've received it while we 1086 * are in the process of detaching. 1087 */ 1088 if ((cq != NULL) && (cq->cq_cqnum == cqnum) && 1089 (state->ts_ibtfpriv != NULL)) { 1090 event.ev_cq_hdl = (ibt_cq_hdl_t)cq->cq_hdlrarg; 1091 type = IBT_ERROR_CQ; 1092 1093 TAVOR_DO_IBTF_ASYNC_CALLB(state, type, &event); 1094 } 1095 1096 return (DDI_SUCCESS); 1097 } 1098 1099 1100 /* 1101 * tavor_cq_refcnt_inc() 1102 * Context: Can be called from interrupt or base context. 1103 */ 1104 int 1105 tavor_cq_refcnt_inc(tavor_cqhdl_t cq, uint_t is_special) 1106 { 1107 /* 1108 * Increment the completion queue's reference count. Note: In order 1109 * to ensure compliance with IBA C11-15, we must ensure that a given 1110 * CQ is not used for both special (SMI/GSI) QP and non-special QP. 1111 * This is accomplished here by keeping track of how the referenced 1112 * CQ is being used. 1113 */ 1114 mutex_enter(&cq->cq_lock); 1115 if (cq->cq_refcnt == 0) { 1116 cq->cq_is_special = is_special; 1117 } else { 1118 if (cq->cq_is_special != is_special) { 1119 mutex_exit(&cq->cq_lock); 1120 return (DDI_FAILURE); 1121 } 1122 } 1123 cq->cq_refcnt++; 1124 mutex_exit(&cq->cq_lock); 1125 return (DDI_SUCCESS); 1126 } 1127 1128 1129 /* 1130 * tavor_cq_refcnt_dec() 1131 * Context: Can be called from interrupt or base context. 1132 */ 1133 void 1134 tavor_cq_refcnt_dec(tavor_cqhdl_t cq) 1135 { 1136 /* Decrement the completion queue's reference count */ 1137 mutex_enter(&cq->cq_lock); 1138 cq->cq_refcnt--; 1139 mutex_exit(&cq->cq_lock); 1140 } 1141 1142 1143 /* 1144 * tavor_cq_doorbell() 1145 * Context: Can be called from interrupt or base context. 1146 */ 1147 static void 1148 tavor_cq_doorbell(tavor_state_t *state, uint32_t cq_cmd, uint32_t cqn, 1149 uint32_t cq_param) 1150 { 1151 uint64_t doorbell = 0; 1152 1153 /* Build the doorbell from the parameters */ 1154 doorbell = ((uint64_t)cq_cmd << TAVOR_CQDB_CMD_SHIFT) | 1155 ((uint64_t)cqn << TAVOR_CQDB_CQN_SHIFT) | cq_param; 1156 1157 /* Write the doorbell to UAR */ 1158 TAVOR_UAR_DOORBELL(state, (uint64_t *)&state->ts_uar->cq, 1159 doorbell); 1160 } 1161 1162 1163 /* 1164 * tavor_cqhdl_from_cqnum() 1165 * Context: Can be called from interrupt or base context. 1166 * 1167 * This routine is important because changing the unconstrained 1168 * portion of the CQ number is critical to the detection of a 1169 * potential race condition in the CQ handler code (i.e. the case 1170 * where a CQ is freed and alloc'd again before an event for the 1171 * "old" CQ can be handled). 1172 * 1173 * While this is not a perfect solution (not sure that one exists) 1174 * it does help to mitigate the chance that this race condition will 1175 * cause us to deliver a "stale" event to the new CQ owner. Note: 1176 * this solution does not scale well because the number of constrained 1177 * bits increases (and, hence, the number of unconstrained bits 1178 * decreases) as the number of supported CQs grows. For small and 1179 * intermediate values, it should hopefully provide sufficient 1180 * protection. 1181 */ 1182 tavor_cqhdl_t 1183 tavor_cqhdl_from_cqnum(tavor_state_t *state, uint_t cqnum) 1184 { 1185 uint_t cqindx, cqmask; 1186 1187 /* Calculate the CQ table index from the cqnum */ 1188 cqmask = (1 << state->ts_cfg_profile->cp_log_num_cq) - 1; 1189 cqindx = cqnum & cqmask; 1190 return (state->ts_cqhdl[cqindx]); 1191 } 1192 1193 1194 /* 1195 * tavor_cq_cqe_consume() 1196 * Context: Can be called from interrupt or base context. 1197 */ 1198 static int 1199 tavor_cq_cqe_consume(tavor_state_t *state, tavor_cqhdl_t cq, 1200 tavor_hw_cqe_t *cqe, ibt_wc_t *wc) 1201 { 1202 uint_t flags, type, opcode, qpnum, qp1_indx; 1203 int status; 1204 1205 /* 1206 * Determine if this is an "error" CQE by examining "opcode". If it 1207 * is an error CQE, then call tavor_cq_errcqe_consume() and return 1208 * whatever status it returns. Otherwise, this is a successful 1209 * completion. 1210 */ 1211 opcode = TAVOR_CQE_OPCODE_GET(cq, cqe); 1212 if ((opcode == TAVOR_CQE_SEND_ERR_OPCODE) || 1213 (opcode == TAVOR_CQE_RECV_ERR_OPCODE)) { 1214 status = tavor_cq_errcqe_consume(state, cq, cqe, wc); 1215 return (status); 1216 } 1217 1218 /* 1219 * Fetch the Work Request ID using the information in the CQE. 1220 * See tavor_wr.c for more details. 1221 */ 1222 wc->wc_id = tavor_wrid_get_entry(cq, cqe, NULL); 1223 1224 /* 1225 * Parse the CQE opcode to determine completion type. This will set 1226 * not only the type of the completion, but also any flags that might 1227 * be associated with it (e.g. whether immediate data is present). 1228 */ 1229 flags = IBT_WC_NO_FLAGS; 1230 if (TAVOR_CQE_SENDRECV_GET(cq, cqe) != TAVOR_COMPLETION_RECV) { 1231 1232 /* Send CQE */ 1233 switch (opcode) { 1234 case TAVOR_CQE_SND_RDMAWR_IMM: 1235 flags |= IBT_WC_IMMED_DATA_PRESENT; 1236 /* FALLTHROUGH */ 1237 case TAVOR_CQE_SND_RDMAWR: 1238 type = IBT_WRC_RDMAW; 1239 break; 1240 1241 case TAVOR_CQE_SND_SEND_IMM: 1242 flags |= IBT_WC_IMMED_DATA_PRESENT; 1243 /* FALLTHROUGH */ 1244 case TAVOR_CQE_SND_SEND: 1245 type = IBT_WRC_SEND; 1246 break; 1247 1248 case TAVOR_CQE_SND_RDMARD: 1249 type = IBT_WRC_RDMAR; 1250 break; 1251 1252 case TAVOR_CQE_SND_ATOMIC_CS: 1253 type = IBT_WRC_CSWAP; 1254 break; 1255 1256 case TAVOR_CQE_SND_ATOMIC_FA: 1257 type = IBT_WRC_FADD; 1258 break; 1259 1260 case TAVOR_CQE_SND_BIND_MW: 1261 type = IBT_WRC_BIND; 1262 break; 1263 1264 default: 1265 TAVOR_WARNING(state, "unknown send CQE type"); 1266 wc->wc_status = IBT_WC_LOCAL_QP_OP_ERR; 1267 return (TAVOR_CQ_SYNC_AND_DB); 1268 } 1269 } else { 1270 1271 /* Receive CQE */ 1272 switch (opcode & 0x1F) { 1273 case TAVOR_CQE_RCV_RECV_IMM: 1274 /* FALLTHROUGH */ 1275 case TAVOR_CQE_RCV_RECV_IMM2: 1276 /* 1277 * Note: According to the Tavor PRM, all QP1 recv 1278 * completions look like the result of a Send with 1279 * Immediate. They are not, however, (MADs are Send 1280 * Only) so we need to check the QP number and set 1281 * the flag only if it is non-QP1. 1282 */ 1283 qpnum = TAVOR_CQE_QPNUM_GET(cq, cqe); 1284 qp1_indx = state->ts_spec_qp1->tr_indx; 1285 if ((qpnum < qp1_indx) || (qpnum > qp1_indx + 1)) { 1286 flags |= IBT_WC_IMMED_DATA_PRESENT; 1287 } 1288 /* FALLTHROUGH */ 1289 case TAVOR_CQE_RCV_RECV: 1290 /* FALLTHROUGH */ 1291 case TAVOR_CQE_RCV_RECV2: 1292 type = IBT_WRC_RECV; 1293 break; 1294 1295 case TAVOR_CQE_RCV_RDMAWR_IMM: 1296 /* FALLTHROUGH */ 1297 case TAVOR_CQE_RCV_RDMAWR_IMM2: 1298 flags |= IBT_WC_IMMED_DATA_PRESENT; 1299 type = IBT_WRC_RECV_RDMAWI; 1300 break; 1301 1302 default: 1303 TAVOR_WARNING(state, "unknown recv CQE type"); 1304 wc->wc_status = IBT_WC_LOCAL_QP_OP_ERR; 1305 return (TAVOR_CQ_SYNC_AND_DB); 1306 } 1307 } 1308 wc->wc_type = type; 1309 1310 /* 1311 * Check for GRH, update the flags, then fill in "wc_flags" field 1312 * in the work completion 1313 */ 1314 if (TAVOR_CQE_GRH_GET(cq, cqe) != 0) { 1315 flags |= IBT_WC_GRH_PRESENT; 1316 } 1317 wc->wc_flags = flags; 1318 1319 /* If we got here, completion status must be success */ 1320 wc->wc_status = IBT_WC_SUCCESS; 1321 1322 /* 1323 * Parse the remaining contents of the CQE into the work completion. 1324 * This means filling in SL, QP number, SLID, immediate data, etc. 1325 * Note: Not all of these fields are valid in a given completion. 1326 * Many of them depend on the actual type of completion. So we fill 1327 * in all of the fields and leave it up to the IBTF and consumer to 1328 * sort out which are valid based on their context. 1329 */ 1330 wc->wc_sl = TAVOR_CQE_SL_GET(cq, cqe); 1331 wc->wc_immed_data = TAVOR_CQE_IMM_ETH_PKEY_CRED_GET(cq, cqe); 1332 wc->wc_qpn = TAVOR_CQE_DQPN_GET(cq, cqe); 1333 wc->wc_res_hash = 0; 1334 wc->wc_slid = TAVOR_CQE_DLID_GET(cq, cqe); 1335 wc->wc_ethertype = (wc->wc_immed_data & 0xFFFF); 1336 wc->wc_pkey_ix = (wc->wc_immed_data >> 16); 1337 1338 /* 1339 * Depending on whether the completion was a receive or a send 1340 * completion, fill in "bytes transferred" as appropriate. Also, 1341 * if necessary, fill in the "path bits" field. 1342 */ 1343 if (TAVOR_CQE_SENDRECV_GET(cq, cqe) == TAVOR_COMPLETION_RECV) { 1344 wc->wc_path_bits = TAVOR_CQE_PATHBITS_GET(cq, cqe); 1345 wc->wc_bytes_xfer = TAVOR_CQE_BYTECNT_GET(cq, cqe); 1346 1347 } else if ((wc->wc_type == IBT_WRC_RDMAR) || 1348 (wc->wc_type == IBT_WRC_CSWAP) || (wc->wc_type == IBT_WRC_FADD)) { 1349 wc->wc_bytes_xfer = TAVOR_CQE_BYTECNT_GET(cq, cqe); 1350 } 1351 1352 return (TAVOR_CQ_SYNC_AND_DB); 1353 } 1354 1355 1356 /* 1357 * tavor_cq_errcqe_consume() 1358 * Context: Can be called from interrupt or base context. 1359 */ 1360 static int 1361 tavor_cq_errcqe_consume(tavor_state_t *state, tavor_cqhdl_t cq, 1362 tavor_hw_cqe_t *cqe, ibt_wc_t *wc) 1363 { 1364 uint64_t next_wqeaddr; 1365 uint32_t imm_eth_pkey_cred; 1366 uint_t nextwqesize, dbd; 1367 uint_t doorbell_cnt, status; 1368 tavor_wrid_entry_t wre; 1369 1370 /* 1371 * Fetch the Work Request ID using the information in the CQE. 1372 * See tavor_wr.c for more details. 1373 */ 1374 wc->wc_id = tavor_wrid_get_entry(cq, cqe, &wre); 1375 1376 /* 1377 * Parse the CQE opcode to determine completion type. We know that 1378 * the CQE is an error completion, so we extract only the completion 1379 * status here. 1380 */ 1381 imm_eth_pkey_cred = TAVOR_CQE_IMM_ETH_PKEY_CRED_GET(cq, cqe); 1382 status = imm_eth_pkey_cred >> TAVOR_CQE_ERR_STATUS_SHIFT; 1383 switch (status) { 1384 case TAVOR_CQE_LOC_LEN_ERR: 1385 status = IBT_WC_LOCAL_LEN_ERR; 1386 break; 1387 1388 case TAVOR_CQE_LOC_OP_ERR: 1389 status = IBT_WC_LOCAL_QP_OP_ERR; 1390 break; 1391 1392 case TAVOR_CQE_LOC_PROT_ERR: 1393 status = IBT_WC_LOCAL_PROTECT_ERR; 1394 break; 1395 1396 case TAVOR_CQE_WR_FLUSHED_ERR: 1397 status = IBT_WC_WR_FLUSHED_ERR; 1398 break; 1399 1400 case TAVOR_CQE_MW_BIND_ERR: 1401 status = IBT_WC_MEM_WIN_BIND_ERR; 1402 break; 1403 1404 case TAVOR_CQE_BAD_RESPONSE_ERR: 1405 status = IBT_WC_BAD_RESPONSE_ERR; 1406 break; 1407 1408 case TAVOR_CQE_LOCAL_ACCESS_ERR: 1409 status = IBT_WC_LOCAL_ACCESS_ERR; 1410 break; 1411 1412 case TAVOR_CQE_REM_INV_REQ_ERR: 1413 status = IBT_WC_REMOTE_INVALID_REQ_ERR; 1414 break; 1415 1416 case TAVOR_CQE_REM_ACC_ERR: 1417 status = IBT_WC_REMOTE_ACCESS_ERR; 1418 break; 1419 1420 case TAVOR_CQE_REM_OP_ERR: 1421 status = IBT_WC_REMOTE_OP_ERR; 1422 break; 1423 1424 case TAVOR_CQE_TRANS_TO_ERR: 1425 status = IBT_WC_TRANS_TIMEOUT_ERR; 1426 break; 1427 1428 case TAVOR_CQE_RNRNAK_TO_ERR: 1429 status = IBT_WC_RNR_NAK_TIMEOUT_ERR; 1430 break; 1431 1432 /* 1433 * The following error codes are not supported in the Tavor driver 1434 * as they relate only to Reliable Datagram completion statuses: 1435 * case TAVOR_CQE_LOCAL_RDD_VIO_ERR: 1436 * case TAVOR_CQE_REM_INV_RD_REQ_ERR: 1437 * case TAVOR_CQE_EEC_REM_ABORTED_ERR: 1438 * case TAVOR_CQE_INV_EEC_NUM_ERR: 1439 * case TAVOR_CQE_INV_EEC_STATE_ERR: 1440 * case TAVOR_CQE_LOC_EEC_ERR: 1441 */ 1442 1443 default: 1444 TAVOR_WARNING(state, "unknown error CQE status"); 1445 status = IBT_WC_LOCAL_QP_OP_ERR; 1446 break; 1447 } 1448 wc->wc_status = status; 1449 1450 /* 1451 * Now we do all the checking that's necessary to handle completion 1452 * queue entry "recycling" 1453 * 1454 * It is not necessary here to try to sync the WQE as we are only 1455 * attempting to read from the Work Queue (and hardware does not 1456 * write to it). 1457 */ 1458 1459 /* 1460 * We can get doorbell info, WQE address, size for the next WQE 1461 * from the "wre" (which was filled in above in the call to the 1462 * tavor_wrid_get_entry() routine) 1463 */ 1464 dbd = (wre.wr_signaled_dbd & TAVOR_WRID_ENTRY_DOORBELLED) ? 1 : 0; 1465 next_wqeaddr = wre.wr_wqeaddrsz; 1466 nextwqesize = wre.wr_wqeaddrsz & TAVOR_WQE_NDS_MASK; 1467 1468 /* 1469 * Get the doorbell count from the CQE. This indicates how many 1470 * completions this one CQE represents. 1471 */ 1472 doorbell_cnt = imm_eth_pkey_cred & TAVOR_CQE_ERR_DBDCNT_MASK; 1473 1474 /* 1475 * Determine if we're ready to consume this CQE yet or not. If the 1476 * next WQE has size zero (i.e. no next WQE) or if the doorbell count 1477 * is down to zero, then this is the last/only completion represented 1478 * by the current CQE (return TAVOR_CQ_SYNC_AND_DB). Otherwise, the 1479 * current CQE needs to be recycled (see below). 1480 */ 1481 if ((nextwqesize == 0) || ((doorbell_cnt == 0) && (dbd == 1))) { 1482 /* 1483 * Consume the CQE 1484 * Return status to indicate that doorbell and sync may be 1485 * necessary. 1486 */ 1487 return (TAVOR_CQ_SYNC_AND_DB); 1488 1489 } else { 1490 /* 1491 * Recycle the CQE for use in the next PollCQ() call 1492 * Decrement the doorbell count, modify the error status, 1493 * and update the WQE address and size (to point to the 1494 * next WQE on the chain. Put these update entries back 1495 * into the CQE. 1496 * Despite the fact that we have updated the CQE, it is not 1497 * necessary for us to attempt to sync this entry just yet 1498 * as we have not changed the "hardware's view" of the 1499 * entry (i.e. we have not modified the "owner" bit - which 1500 * is all that the Tavor hardware really cares about. 1501 */ 1502 doorbell_cnt = doorbell_cnt - dbd; 1503 TAVOR_CQE_IMM_ETH_PKEY_CRED_SET(cq, cqe, 1504 ((TAVOR_CQE_WR_FLUSHED_ERR << TAVOR_CQE_ERR_STATUS_SHIFT) | 1505 (doorbell_cnt & TAVOR_CQE_ERR_DBDCNT_MASK))); 1506 TAVOR_CQE_WQEADDRSZ_SET(cq, cqe, 1507 TAVOR_QP_WQEADDRSZ(next_wqeaddr, nextwqesize)); 1508 1509 return (TAVOR_CQ_RECYCLE_ENTRY); 1510 } 1511 } 1512 1513 1514 /* 1515 * tavor_cqe_sync() 1516 * Context: Can be called from interrupt or base context. 1517 */ 1518 static void 1519 tavor_cqe_sync(tavor_cqhdl_t cq, tavor_hw_cqe_t *cqe, uint_t flag) 1520 { 1521 ddi_dma_handle_t dmahdl; 1522 off_t offset; 1523 1524 /* Determine if CQ needs to be synced or not */ 1525 if (cq->cq_sync == 0) 1526 return; 1527 1528 /* Get the DMA handle from CQ context */ 1529 dmahdl = cq->cq_mrhdl->mr_bindinfo.bi_dmahdl; 1530 1531 /* Calculate offset of next CQE */ 1532 offset = (off_t)((uintptr_t)cqe - (uintptr_t)&cq->cq_buf[0]); 1533 (void) ddi_dma_sync(dmahdl, offset, sizeof (tavor_hw_cqe_t), flag); 1534 } 1535 1536 1537 /* 1538 * tavor_cq_resize_helper() 1539 * Context: Can be called only from user or kernel context. 1540 */ 1541 static void 1542 tavor_cq_resize_helper(tavor_cqhdl_t cq, tavor_hw_cqe_t *new_cqbuf, 1543 uint32_t old_cons_indx, uint32_t num_newcqe) 1544 { 1545 tavor_hw_cqe_t *old_cqe, *new_cqe; 1546 uint32_t new_cons_indx, wrap_around_mask; 1547 int i; 1548 1549 ASSERT(MUTEX_HELD(&cq->cq_lock)); 1550 1551 /* Get the consumer index */ 1552 new_cons_indx = 0; 1553 1554 /* 1555 * Calculate the wrap around mask. Note: This operation only works 1556 * because all Tavor completion queues have power-of-2 sizes 1557 */ 1558 wrap_around_mask = (cq->cq_bufsz - 1); 1559 1560 /* 1561 * Calculate the pointers to the first CQ entry (in the "old" CQ) 1562 * and the first CQ entry in the "new" CQ 1563 */ 1564 old_cqe = &cq->cq_buf[old_cons_indx]; 1565 new_cqe = &new_cqbuf[new_cons_indx]; 1566 1567 /* Sync entire "old" CQ for use by software (if necessary). */ 1568 if (cq->cq_sync) { 1569 (void) ddi_dma_sync(cq->cq_mrhdl->mr_bindinfo.bi_dmahdl, 1570 0, cq->cq_cqinfo.qa_size, DDI_DMA_SYNC_FORCPU); 1571 } 1572 1573 /* 1574 * Keep pulling entries from the "old" CQ until we find an entry owned 1575 * by the hardware. Process each entry by copying it into the "new" 1576 * CQ and updating respective indices and pointers in the "old" CQ. 1577 */ 1578 for (i = 0; i < num_newcqe; i++) { 1579 1580 /* Copy this old CQE into the "new_cqe" pointer */ 1581 bcopy(old_cqe, new_cqe, sizeof (tavor_hw_cqe_t)); 1582 1583 /* Increment the consumer index (for both CQs) */ 1584 old_cons_indx = (old_cons_indx + 1) & wrap_around_mask; 1585 new_cons_indx = (new_cons_indx + 1); 1586 1587 /* Update the pointer to the next CQ entry */ 1588 old_cqe = &cq->cq_buf[old_cons_indx]; 1589 new_cqe = &new_cqbuf[new_cons_indx]; 1590 } 1591 } 1592 1593 /* 1594 * tavor_cq_srq_entries_flush() 1595 * Context: Can be called from interrupt or base context. 1596 */ 1597 void 1598 tavor_cq_srq_entries_flush(tavor_state_t *state, tavor_qphdl_t qp) 1599 { 1600 tavor_cqhdl_t cq; 1601 tavor_workq_hdr_t *wqhdr; 1602 tavor_hw_cqe_t *cqe; 1603 tavor_hw_cqe_t *next_cqe; 1604 uint32_t cons_indx, tail_cons_indx, wrap_around_mask; 1605 uint32_t new_indx, check_indx, indx; 1606 uint32_t num_to_increment; 1607 int cqe_qpnum, cqe_type; 1608 int outstanding_cqes, removed_cqes; 1609 int i; 1610 1611 ASSERT(MUTEX_HELD(&qp->qp_rq_cqhdl->cq_lock)); 1612 1613 cq = qp->qp_rq_cqhdl; 1614 wqhdr = qp->qp_rq_wqhdr; 1615 1616 ASSERT(wqhdr->wq_wrid_post != NULL); 1617 ASSERT(wqhdr->wq_wrid_post->wl_srq_en != 0); 1618 1619 /* 1620 * Check for user-mapped CQ memory. Note: We do not allow kernel 1621 * clients to modify any userland mapping CQ. If the CQ is 1622 * user-mapped, then we simply return here, and this "flush" function 1623 * becomes a NO-OP in this case. 1624 */ 1625 if (cq->cq_is_umap) { 1626 return; 1627 } 1628 1629 /* Get the consumer index */ 1630 cons_indx = cq->cq_consindx; 1631 1632 /* 1633 * Calculate the wrap around mask. Note: This operation only works 1634 * because all Tavor completion queues have power-of-2 sizes 1635 */ 1636 wrap_around_mask = (cq->cq_bufsz - 1); 1637 1638 /* Calculate the pointer to the first CQ entry */ 1639 cqe = &cq->cq_buf[cons_indx]; 1640 1641 /* Sync the current CQE to read */ 1642 tavor_cqe_sync(cq, cqe, DDI_DMA_SYNC_FORCPU); 1643 1644 /* 1645 * Loop through the CQ looking for entries owned by software. If an 1646 * entry is owned by software then we increment an 'outstanding_cqes' 1647 * count to know how many entries total we have on our CQ. We use this 1648 * value further down to know how many entries to loop through looking 1649 * for our same QP number. 1650 */ 1651 outstanding_cqes = 0; 1652 tail_cons_indx = cons_indx; 1653 while (TAVOR_CQE_OWNER_IS_SW(cq, cqe)) { 1654 /* increment total cqes count */ 1655 outstanding_cqes++; 1656 1657 /* increment the consumer index */ 1658 tail_cons_indx = (tail_cons_indx + 1) & wrap_around_mask; 1659 1660 /* update the pointer to the next cq entry */ 1661 cqe = &cq->cq_buf[tail_cons_indx]; 1662 1663 /* sync the next cqe to read */ 1664 tavor_cqe_sync(cq, cqe, DDI_DMA_SYNC_FORCPU); 1665 } 1666 1667 /* 1668 * Using the 'tail_cons_indx' that was just set, we now know how many 1669 * total CQEs possible there are. Set the 'check_indx' and the 1670 * 'new_indx' to the last entry identified by 'tail_cons_indx' 1671 */ 1672 check_indx = new_indx = (tail_cons_indx - 1) & wrap_around_mask; 1673 1674 for (i = 0; i < outstanding_cqes; i++) { 1675 cqe = &cq->cq_buf[check_indx]; 1676 1677 /* Grab QP number from CQE */ 1678 cqe_qpnum = TAVOR_CQE_QPNUM_GET(cq, cqe); 1679 cqe_type = TAVOR_CQE_SENDRECV_GET(cq, cqe); 1680 1681 /* 1682 * If the QP number is the same in the CQE as the QP that we 1683 * have on this SRQ, then we must free up the entry off the 1684 * SRQ. We also make sure that the completion type is of the 1685 * 'TAVOR_COMPLETION_RECV' type. So any send completions on 1686 * this CQ will be left as-is. The handling of returning 1687 * entries back to HW ownership happens further down. 1688 */ 1689 if (cqe_qpnum == qp->qp_qpnum && 1690 cqe_type == TAVOR_COMPLETION_RECV) { 1691 1692 /* Add back to SRQ free list */ 1693 (void) tavor_wrid_find_match_srq(wqhdr->wq_wrid_post, 1694 cq, cqe); 1695 } else { 1696 /* Do Copy */ 1697 if (check_indx != new_indx) { 1698 next_cqe = &cq->cq_buf[new_indx]; 1699 1700 /* 1701 * Copy the CQE into the "next_cqe" 1702 * pointer. 1703 */ 1704 bcopy(cqe, next_cqe, sizeof (tavor_hw_cqe_t)); 1705 } 1706 new_indx = (new_indx - 1) & wrap_around_mask; 1707 } 1708 /* Move index to next CQE to check */ 1709 check_indx = (check_indx - 1) & wrap_around_mask; 1710 } 1711 1712 /* Initialize removed cqes count */ 1713 removed_cqes = 0; 1714 1715 /* If an entry was removed */ 1716 if (check_indx != new_indx) { 1717 1718 /* 1719 * Set current pointer back to the beginning consumer index. 1720 * At this point, all unclaimed entries have been copied to the 1721 * index specified by 'new_indx'. This 'new_indx' will be used 1722 * as the new consumer index after we mark all freed entries as 1723 * having HW ownership. We do that here. 1724 */ 1725 1726 /* Loop through all entries until we reach our new pointer */ 1727 for (indx = cons_indx; indx <= new_indx; 1728 indx = (indx + 1) & wrap_around_mask) { 1729 removed_cqes++; 1730 cqe = &cq->cq_buf[indx]; 1731 1732 /* Reset entry to hardware ownership */ 1733 TAVOR_CQE_OWNER_SET_HW(cq, cqe); 1734 } 1735 } 1736 1737 /* 1738 * Update consumer index to be the 'new_indx'. This moves it past all 1739 * removed entries. Because 'new_indx' is pointing to the last 1740 * previously valid SW owned entry, we add 1 to point the cons_indx to 1741 * the first HW owned entry. 1742 */ 1743 cons_indx = (new_indx + 1) & wrap_around_mask; 1744 1745 /* 1746 * Now we only ring the doorbell (to update the consumer index) if 1747 * we've actually consumed a CQ entry. If we found no QP number 1748 * matches above, then we would not have removed anything. So only if 1749 * something was removed do we ring the doorbell. 1750 */ 1751 if ((removed_cqes != 0) && (cq->cq_consindx != cons_indx)) { 1752 /* 1753 * Post doorbell to update the consumer index. Doorbell 1754 * value indicates number of entries consumed (minus 1) 1755 */ 1756 if (cons_indx > cq->cq_consindx) { 1757 num_to_increment = (cons_indx - cq->cq_consindx) - 1; 1758 } else { 1759 num_to_increment = ((cons_indx + cq->cq_bufsz) - 1760 cq->cq_consindx) - 1; 1761 } 1762 cq->cq_consindx = cons_indx; 1763 1764 tavor_cq_doorbell(state, TAVOR_CQDB_INCR_CONSINDX, 1765 cq->cq_cqnum, num_to_increment); 1766 } 1767 } 1768