1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 31 #include <sys/param.h> 32 #include <sys/types.h> 33 #include <sys/mman.h> 34 #include <sys/pciio.h> 35 #include <sys/ioctl.h> 36 #include <sys/stat.h> 37 38 #include <sys/pci.h> 39 40 #include <dev/io/iodev.h> 41 #include <dev/pci/pcireg.h> 42 43 #include <machine/iodev.h> 44 45 #include <stdio.h> 46 #include <stdlib.h> 47 #include <string.h> 48 #include <err.h> 49 #include <errno.h> 50 #include <fcntl.h> 51 #include <sysexits.h> 52 #include <unistd.h> 53 54 #include <machine/vmm.h> 55 #include <vmmapi.h> 56 #include <sys/ppt_dev.h> 57 58 #include "config.h" 59 #include "debug.h" 60 #include "pci_passthru.h" 61 #include "mem.h" 62 63 #define LEGACY_SUPPORT 1 64 65 #define MSIX_TABLE_COUNT(ctrl) (((ctrl) & PCIM_MSIXCTRL_TABLE_SIZE) + 1) 66 #define MSIX_CAPLEN 12 67 68 struct passthru_softc { 69 struct pci_devinst *psc_pi; 70 /* ROM is handled like a BAR */ 71 struct pcibar psc_bar[PCI_BARMAX_WITH_ROM + 1]; 72 struct { 73 int capoff; 74 int msgctrl; 75 int emulated; 76 } psc_msi; 77 struct { 78 int capoff; 79 } psc_msix; 80 int pptfd; 81 int msi_limit; 82 int msix_limit; 83 84 cfgread_handler psc_pcir_rhandler[PCI_REGMAX + 1]; 85 cfgwrite_handler psc_pcir_whandler[PCI_REGMAX + 1]; 86 }; 87 88 static int 89 msi_caplen(int msgctrl) 90 { 91 int len; 92 93 len = 10; /* minimum length of msi capability */ 94 95 if (msgctrl & PCIM_MSICTRL_64BIT) 96 len += 4; 97 98 #if 0 99 /* 100 * Ignore the 'mask' and 'pending' bits in the MSI capability. 101 * We'll let the guest manipulate them directly. 102 */ 103 if (msgctrl & PCIM_MSICTRL_VECTOR) 104 len += 10; 105 #endif 106 107 return (len); 108 } 109 110 static uint32_t 111 passthru_read_config(const struct passthru_softc *sc, long reg, int width) 112 { 113 struct ppt_cfg_io pi; 114 115 pi.pci_off = reg; 116 pi.pci_width = width; 117 118 if (ioctl(sc->pptfd, PPT_CFG_READ, &pi) != 0) { 119 return (0); 120 } 121 return (pi.pci_data); 122 } 123 124 static void 125 passthru_write_config(const struct passthru_softc *sc, long reg, int width, 126 uint32_t data) 127 { 128 struct ppt_cfg_io pi; 129 130 pi.pci_off = reg; 131 pi.pci_width = width; 132 pi.pci_data = data; 133 134 (void) ioctl(sc->pptfd, PPT_CFG_WRITE, &pi); 135 } 136 137 static int 138 passthru_get_bar(struct passthru_softc *sc, int bar, enum pcibar_type *type, 139 uint64_t *base, uint64_t *size) 140 { 141 struct ppt_bar_query pb; 142 143 pb.pbq_baridx = bar; 144 145 if (ioctl(sc->pptfd, PPT_BAR_QUERY, &pb) != 0) { 146 return (-1); 147 } 148 149 switch (pb.pbq_type) { 150 case PCI_ADDR_IO: 151 *type = PCIBAR_IO; 152 break; 153 case PCI_ADDR_MEM32: 154 *type = PCIBAR_MEM32; 155 break; 156 case PCI_ADDR_MEM64: 157 *type = PCIBAR_MEM64; 158 break; 159 default: 160 err(1, "unrecognized BAR type: %u\n", pb.pbq_type); 161 break; 162 } 163 164 *base = pb.pbq_base; 165 *size = pb.pbq_size; 166 return (0); 167 } 168 169 static int 170 passthru_dev_open(const char *path, int *pptfdp) 171 { 172 int pptfd; 173 174 if ((pptfd = open(path, O_RDWR)) < 0) { 175 return (errno); 176 } 177 178 /* XXX: verify fd with ioctl? */ 179 *pptfdp = pptfd; 180 return (0); 181 } 182 183 #ifdef LEGACY_SUPPORT 184 static int 185 passthru_add_msicap(struct pci_devinst *pi, int msgnum, int nextptr) 186 { 187 int capoff; 188 struct msicap msicap; 189 u_char *capdata; 190 191 pci_populate_msicap(&msicap, msgnum, nextptr); 192 193 /* 194 * XXX 195 * Copy the msi capability structure in the last 16 bytes of the 196 * config space. This is wrong because it could shadow something 197 * useful to the device. 198 */ 199 capoff = 256 - roundup(sizeof(msicap), 4); 200 capdata = (u_char *)&msicap; 201 for (size_t i = 0; i < sizeof(msicap); i++) 202 pci_set_cfgdata8(pi, capoff + i, capdata[i]); 203 204 return (capoff); 205 } 206 #endif /* LEGACY_SUPPORT */ 207 208 static void 209 passthru_intr_limit(struct passthru_softc *sc, struct msixcap *msixcap) 210 { 211 struct pci_devinst *pi = sc->psc_pi; 212 int off; 213 214 /* Reduce the number of MSI vectors if higher than OS limit */ 215 if ((off = sc->psc_msi.capoff) != 0 && sc->msi_limit != -1) { 216 int msi_limit, mmc; 217 218 msi_limit = 219 sc->msi_limit > 16 ? PCIM_MSICTRL_MMC_32 : 220 sc->msi_limit > 8 ? PCIM_MSICTRL_MMC_16 : 221 sc->msi_limit > 4 ? PCIM_MSICTRL_MMC_8 : 222 sc->msi_limit > 2 ? PCIM_MSICTRL_MMC_4 : 223 sc->msi_limit > 1 ? PCIM_MSICTRL_MMC_2 : 224 PCIM_MSICTRL_MMC_1; 225 mmc = sc->psc_msi.msgctrl & PCIM_MSICTRL_MMC_MASK; 226 227 if (mmc > msi_limit) { 228 sc->psc_msi.msgctrl &= ~PCIM_MSICTRL_MMC_MASK; 229 sc->psc_msi.msgctrl |= msi_limit; 230 pci_set_cfgdata16(pi, off + 2, sc->psc_msi.msgctrl); 231 } 232 } 233 234 /* Reduce the number of MSI-X vectors if higher than OS limit */ 235 if ((off = sc->psc_msix.capoff) != 0 && sc->msix_limit != -1) { 236 if (MSIX_TABLE_COUNT(msixcap->msgctrl) > sc->msix_limit) { 237 msixcap->msgctrl &= ~PCIM_MSIXCTRL_TABLE_SIZE; 238 msixcap->msgctrl |= sc->msix_limit - 1; 239 pci_set_cfgdata16(pi, off + 2, msixcap->msgctrl); 240 } 241 } 242 } 243 244 static int 245 cfginitmsi(struct passthru_softc *sc) 246 { 247 int i, ptr, capptr, cap, sts, caplen, table_size; 248 uint32_t u32; 249 struct pci_devinst *pi = sc->psc_pi; 250 struct msixcap msixcap; 251 char *msixcap_ptr; 252 253 /* 254 * Parse the capabilities and cache the location of the MSI 255 * and MSI-X capabilities. 256 */ 257 sts = passthru_read_config(sc, PCIR_STATUS, 2); 258 if (sts & PCIM_STATUS_CAPPRESENT) { 259 ptr = passthru_read_config(sc, PCIR_CAP_PTR, 1); 260 while (ptr != 0 && ptr != 0xff) { 261 cap = passthru_read_config(sc, ptr + PCICAP_ID, 1); 262 if (cap == PCIY_MSI) { 263 /* 264 * Copy the MSI capability into the config 265 * space of the emulated pci device 266 */ 267 sc->psc_msi.capoff = ptr; 268 sc->psc_msi.msgctrl = passthru_read_config(sc, 269 ptr + 2, 2); 270 sc->psc_msi.emulated = 0; 271 caplen = msi_caplen(sc->psc_msi.msgctrl); 272 capptr = ptr; 273 while (caplen > 0) { 274 u32 = passthru_read_config(sc, 275 capptr, 4); 276 pci_set_cfgdata32(pi, capptr, u32); 277 caplen -= 4; 278 capptr += 4; 279 } 280 } else if (cap == PCIY_MSIX) { 281 /* 282 * Copy the MSI-X capability 283 */ 284 sc->psc_msix.capoff = ptr; 285 caplen = 12; 286 msixcap_ptr = (char *)&msixcap; 287 capptr = ptr; 288 while (caplen > 0) { 289 u32 = passthru_read_config(sc, 290 capptr, 4); 291 memcpy(msixcap_ptr, &u32, 4); 292 pci_set_cfgdata32(pi, capptr, u32); 293 caplen -= 4; 294 capptr += 4; 295 msixcap_ptr += 4; 296 } 297 } 298 ptr = passthru_read_config(sc, ptr + PCICAP_NEXTPTR, 1); 299 } 300 } 301 302 passthru_intr_limit(sc, &msixcap); 303 304 if (sc->psc_msix.capoff != 0) { 305 pi->pi_msix.pba_bar = 306 msixcap.pba_info & PCIM_MSIX_BIR_MASK; 307 pi->pi_msix.pba_offset = 308 msixcap.pba_info & ~PCIM_MSIX_BIR_MASK; 309 pi->pi_msix.table_bar = 310 msixcap.table_info & PCIM_MSIX_BIR_MASK; 311 pi->pi_msix.table_offset = 312 msixcap.table_info & ~PCIM_MSIX_BIR_MASK; 313 pi->pi_msix.table_count = MSIX_TABLE_COUNT(msixcap.msgctrl); 314 pi->pi_msix.pba_size = PBA_SIZE(pi->pi_msix.table_count); 315 316 /* Allocate the emulated MSI-X table array */ 317 table_size = pi->pi_msix.table_count * MSIX_TABLE_ENTRY_SIZE; 318 pi->pi_msix.table = calloc(1, table_size); 319 320 /* Mask all table entries */ 321 for (i = 0; i < pi->pi_msix.table_count; i++) { 322 pi->pi_msix.table[i].vector_control |= 323 PCIM_MSIX_VCTRL_MASK; 324 } 325 } 326 327 #ifdef LEGACY_SUPPORT 328 /* 329 * If the passthrough device does not support MSI then craft a 330 * MSI capability for it. We link the new MSI capability at the 331 * head of the list of capabilities. 332 */ 333 if ((sts & PCIM_STATUS_CAPPRESENT) != 0 && sc->psc_msi.capoff == 0) { 334 int origptr, msiptr; 335 origptr = passthru_read_config(sc, PCIR_CAP_PTR, 1); 336 msiptr = passthru_add_msicap(pi, 1, origptr); 337 sc->psc_msi.capoff = msiptr; 338 sc->psc_msi.msgctrl = pci_get_cfgdata16(pi, msiptr + 2); 339 sc->psc_msi.emulated = 1; 340 pci_set_cfgdata8(pi, PCIR_CAP_PTR, msiptr); 341 } 342 #endif 343 344 /* Make sure one of the capabilities is present */ 345 if (sc->psc_msi.capoff == 0 && sc->psc_msix.capoff == 0) 346 return (-1); 347 else 348 return (0); 349 } 350 351 static uint64_t 352 msix_table_read(struct passthru_softc *sc, uint64_t offset, int size) 353 { 354 struct pci_devinst *pi; 355 struct msix_table_entry *entry; 356 uint8_t *src8; 357 uint16_t *src16; 358 uint32_t *src32; 359 uint64_t *src64; 360 uint64_t data; 361 size_t entry_offset; 362 uint32_t table_offset; 363 int index, table_count; 364 365 pi = sc->psc_pi; 366 367 table_offset = pi->pi_msix.table_offset; 368 table_count = pi->pi_msix.table_count; 369 if (offset < table_offset || 370 offset >= table_offset + table_count * MSIX_TABLE_ENTRY_SIZE) { 371 switch (size) { 372 case 1: 373 src8 = (uint8_t *)(pi->pi_msix.mapped_addr + offset); 374 data = *src8; 375 break; 376 case 2: 377 src16 = (uint16_t *)(pi->pi_msix.mapped_addr + offset); 378 data = *src16; 379 break; 380 case 4: 381 src32 = (uint32_t *)(pi->pi_msix.mapped_addr + offset); 382 data = *src32; 383 break; 384 case 8: 385 src64 = (uint64_t *)(pi->pi_msix.mapped_addr + offset); 386 data = *src64; 387 break; 388 default: 389 return (-1); 390 } 391 return (data); 392 } 393 394 offset -= table_offset; 395 index = offset / MSIX_TABLE_ENTRY_SIZE; 396 assert(index < table_count); 397 398 entry = &pi->pi_msix.table[index]; 399 entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; 400 401 switch (size) { 402 case 1: 403 src8 = (uint8_t *)((uint8_t *)entry + entry_offset); 404 data = *src8; 405 break; 406 case 2: 407 src16 = (uint16_t *)((uint8_t *)entry + entry_offset); 408 data = *src16; 409 break; 410 case 4: 411 src32 = (uint32_t *)((uint8_t *)entry + entry_offset); 412 data = *src32; 413 break; 414 case 8: 415 src64 = (uint64_t *)((uint8_t *)entry + entry_offset); 416 data = *src64; 417 break; 418 default: 419 return (-1); 420 } 421 422 return (data); 423 } 424 425 static void 426 msix_table_write(struct vmctx *ctx, struct passthru_softc *sc, 427 uint64_t offset, int size, uint64_t data) 428 { 429 struct pci_devinst *pi; 430 struct msix_table_entry *entry; 431 uint8_t *dest8; 432 uint16_t *dest16; 433 uint32_t *dest32; 434 uint64_t *dest64; 435 size_t entry_offset; 436 uint32_t table_offset, vector_control; 437 int index, table_count; 438 439 pi = sc->psc_pi; 440 441 table_offset = pi->pi_msix.table_offset; 442 table_count = pi->pi_msix.table_count; 443 if (offset < table_offset || 444 offset >= table_offset + table_count * MSIX_TABLE_ENTRY_SIZE) { 445 switch (size) { 446 case 1: 447 dest8 = (uint8_t *)(pi->pi_msix.mapped_addr + offset); 448 *dest8 = data; 449 break; 450 case 2: 451 dest16 = (uint16_t *)(pi->pi_msix.mapped_addr + offset); 452 *dest16 = data; 453 break; 454 case 4: 455 dest32 = (uint32_t *)(pi->pi_msix.mapped_addr + offset); 456 *dest32 = data; 457 break; 458 case 8: 459 dest64 = (uint64_t *)(pi->pi_msix.mapped_addr + offset); 460 *dest64 = data; 461 break; 462 } 463 return; 464 } 465 466 offset -= table_offset; 467 index = offset / MSIX_TABLE_ENTRY_SIZE; 468 assert(index < table_count); 469 470 entry = &pi->pi_msix.table[index]; 471 entry_offset = offset % MSIX_TABLE_ENTRY_SIZE; 472 473 /* Only 4 byte naturally-aligned writes are supported */ 474 assert(size == 4); 475 assert(entry_offset % 4 == 0); 476 477 vector_control = entry->vector_control; 478 dest32 = (uint32_t *)((uint8_t *)entry + entry_offset); 479 *dest32 = data; 480 /* If MSI-X hasn't been enabled, do nothing */ 481 if (pi->pi_msix.enabled) { 482 /* If the entry is masked, don't set it up */ 483 if ((entry->vector_control & PCIM_MSIX_VCTRL_MASK) == 0 || 484 (vector_control & PCIM_MSIX_VCTRL_MASK) == 0) { 485 (void) vm_setup_pptdev_msix(ctx, sc->pptfd, 486 index, entry->addr, entry->msg_data, 487 entry->vector_control); 488 } 489 } 490 } 491 492 static int 493 init_msix_table(struct vmctx *ctx __unused, struct passthru_softc *sc) 494 { 495 struct pci_devinst *pi = sc->psc_pi; 496 uint32_t table_size, table_offset; 497 int i; 498 499 i = pci_msix_table_bar(pi); 500 assert(i >= 0); 501 502 /* 503 * Map the region of the BAR containing the MSI-X table. This is 504 * necessary for two reasons: 505 * 1. The PBA may reside in the first or last page containing the MSI-X 506 * table. 507 * 2. While PCI devices are not supposed to use the page(s) containing 508 * the MSI-X table for other purposes, some do in practice. 509 */ 510 511 /* 512 * Mapping pptfd provides access to the BAR containing the MSI-X 513 * table. See ppt_devmap() in usr/src/uts/intel/io/vmm/io/ppt.c 514 * 515 * This maps the whole BAR and then mprotect(PROT_NONE) is used below 516 * to prevent access to pages that don't contain the MSI-X table. 517 * When porting this, it was tempting to just map the MSI-X table pages 518 * but that would mean updating everywhere that assumes that 519 * pi->pi_msix.mapped_addr points to the start of the BAR. For now, 520 * keep closer to upstream. 521 */ 522 pi->pi_msix.mapped_size = sc->psc_bar[i].size; 523 pi->pi_msix.mapped_addr = (uint8_t *)mmap(NULL, pi->pi_msix.mapped_size, 524 PROT_READ | PROT_WRITE, MAP_SHARED, sc->pptfd, 0); 525 if (pi->pi_msix.mapped_addr == MAP_FAILED) { 526 warn("Failed to map MSI-X table BAR on %d", sc->pptfd); 527 return (-1); 528 } 529 530 table_offset = rounddown2(pi->pi_msix.table_offset, 4096); 531 532 table_size = pi->pi_msix.table_offset - table_offset; 533 table_size += pi->pi_msix.table_count * MSIX_TABLE_ENTRY_SIZE; 534 table_size = roundup2(table_size, 4096); 535 536 /* 537 * Unmap any pages not containing the table, we do not need to emulate 538 * accesses to them. Avoid releasing address space to help ensure that 539 * a buggy out-of-bounds access causes a crash. 540 */ 541 if (table_offset != 0) 542 if (mprotect((caddr_t)pi->pi_msix.mapped_addr, table_offset, 543 PROT_NONE) != 0) 544 warn("Failed to unmap MSI-X table BAR region"); 545 if (table_offset + table_size != pi->pi_msix.mapped_size) 546 if (mprotect((caddr_t) 547 pi->pi_msix.mapped_addr + table_offset + table_size, 548 pi->pi_msix.mapped_size - (table_offset + table_size), 549 PROT_NONE) != 0) 550 warn("Failed to unmap MSI-X table BAR region"); 551 552 return (0); 553 } 554 555 static int 556 cfginitbar(struct vmctx *ctx __unused, struct passthru_softc *sc) 557 { 558 struct pci_devinst *pi = sc->psc_pi; 559 uint_t i; 560 561 /* 562 * Initialize BAR registers 563 */ 564 for (i = 0; i <= PCI_BARMAX; i++) { 565 enum pcibar_type bartype; 566 uint64_t base, size; 567 int error; 568 569 if (passthru_get_bar(sc, i, &bartype, &base, &size) != 0) { 570 continue; 571 } 572 573 if (bartype != PCIBAR_IO) { 574 if (((base | size) & PAGE_MASK) != 0) { 575 warnx("passthru device %d BAR %d: " 576 "base %#lx or size %#lx not page aligned\n", 577 sc->pptfd, i, base, size); 578 return (-1); 579 } 580 } 581 582 /* Cache information about the "real" BAR */ 583 sc->psc_bar[i].type = bartype; 584 sc->psc_bar[i].size = size; 585 sc->psc_bar[i].addr = base; 586 sc->psc_bar[i].lobits = 0; 587 588 /* Allocate the BAR in the guest I/O or MMIO space */ 589 error = pci_emul_alloc_bar(pi, i, bartype, size); 590 if (error) 591 return (-1); 592 593 /* Use same lobits as physical bar */ 594 uint8_t lobits = passthru_read_config(sc, PCIR_BAR(i), 0x01); 595 if (bartype == PCIBAR_MEM32 || bartype == PCIBAR_MEM64) { 596 lobits &= ~PCIM_BAR_MEM_BASE; 597 } else { 598 lobits &= ~PCIM_BAR_IO_BASE; 599 } 600 sc->psc_bar[i].lobits = lobits; 601 pi->pi_bar[i].lobits = lobits; 602 603 /* 604 * 64-bit BAR takes up two slots so skip the next one. 605 */ 606 if (bartype == PCIBAR_MEM64) { 607 i++; 608 assert(i <= PCI_BARMAX); 609 sc->psc_bar[i].type = PCIBAR_MEMHI64; 610 } 611 } 612 return (0); 613 } 614 615 static int 616 cfginit(struct vmctx *ctx, struct passthru_softc *sc) 617 { 618 int error; 619 struct pci_devinst *pi = sc->psc_pi; 620 uint8_t intline, intpin; 621 622 /* 623 * Copy physical PCI header to virtual config space. INTLINE and INTPIN 624 * shouldn't be aligned with their physical value and they are already 625 * set by pci_emul_init(). 626 */ 627 intline = pci_get_cfgdata8(pi, PCIR_INTLINE); 628 intpin = pci_get_cfgdata8(pi, PCIR_INTPIN); 629 for (int i = 0; i <= PCIR_MAXLAT; i += 4) { 630 #ifdef __FreeBSD__ 631 pci_set_cfgdata32(pi, i, read_config(&sc->psc_sel, i, 4)); 632 #else 633 pci_set_cfgdata32(pi, i, passthru_read_config(sc, i, 4)); 634 #endif 635 } 636 637 pci_set_cfgdata8(pi, PCIR_INTLINE, intline); 638 pci_set_cfgdata8(pi, PCIR_INTPIN, intpin); 639 640 if (cfginitmsi(sc) != 0) { 641 warnx("failed to initialize MSI for PCI %d", sc->pptfd); 642 return (-1); 643 } 644 645 if (cfginitbar(ctx, sc) != 0) { 646 warnx("failed to initialize BARs for PCI %d", sc->pptfd); 647 return (-1); 648 } 649 650 passthru_write_config(sc, PCIR_COMMAND, 2, 651 pci_get_cfgdata16(pi, PCIR_COMMAND)); 652 653 /* 654 * We need to do this after PCIR_COMMAND got possibly updated, e.g., 655 * a BAR was enabled. 656 */ 657 if (pci_msix_table_bar(pi) >= 0) { 658 error = init_msix_table(ctx, sc); 659 if (error != 0) { 660 warnx("failed to initialize MSI-X table for PCI %d", 661 sc->pptfd); 662 goto done; 663 } 664 } 665 666 /* Emulate most PCI header register. */ 667 if ((error = set_pcir_handler(sc, 0, PCIR_MAXLAT + 1, 668 passthru_cfgread_emulate, passthru_cfgwrite_emulate)) != 0) 669 goto done; 670 671 /* Allow access to the physical command and status register. */ 672 if ((error = set_pcir_handler(sc, PCIR_COMMAND, 0x04, NULL, NULL)) != 0) 673 goto done; 674 675 error = 0; /* success */ 676 done: 677 return (error); 678 } 679 680 int 681 set_pcir_handler(struct passthru_softc *sc, int reg, int len, 682 cfgread_handler rhandler, cfgwrite_handler whandler) 683 { 684 if (reg > PCI_REGMAX || reg + len > PCI_REGMAX + 1) 685 return (-1); 686 687 for (int i = reg; i < reg + len; ++i) { 688 assert(sc->psc_pcir_rhandler[i] == NULL || rhandler == NULL); 689 assert(sc->psc_pcir_whandler[i] == NULL || whandler == NULL); 690 sc->psc_pcir_rhandler[i] = rhandler; 691 sc->psc_pcir_whandler[i] = whandler; 692 } 693 694 return (0); 695 } 696 697 static int 698 passthru_legacy_config(nvlist_t *nvl, const char *opt) 699 { 700 char *config, *name, *tofree, *value; 701 702 if (opt == NULL) 703 return (0); 704 705 config = tofree = strdup(opt); 706 while ((name = strsep(&config, ",")) != NULL) { 707 value = strchr(name, '='); 708 if (value != NULL) { 709 *value++ = '\0'; 710 set_config_value_node(nvl, name, value); 711 } else { 712 if (strncmp(name, "/dev/ppt", 8) != 0) { 713 EPRINTLN("passthru: invalid path \"%s\"", name); 714 free(tofree); 715 return (-1); 716 } 717 set_config_value_node(nvl, "path", name); 718 } 719 } 720 free(tofree); 721 return (0); 722 } 723 724 static int 725 passthru_init_rom(struct vmctx *const ctx __unused, 726 struct passthru_softc *const sc, const char *const romfile) 727 { 728 if (romfile == NULL) { 729 return (0); 730 } 731 732 const int fd = open(romfile, O_RDONLY); 733 if (fd < 0) { 734 warnx("%s: can't open romfile \"%s\"", __func__, romfile); 735 return (-1); 736 } 737 738 struct stat sbuf; 739 if (fstat(fd, &sbuf) < 0) { 740 warnx("%s: can't fstat romfile \"%s\"", __func__, romfile); 741 close(fd); 742 return (-1); 743 } 744 const uint64_t rom_size = sbuf.st_size; 745 746 void *const rom_data = mmap(NULL, rom_size, PROT_READ, MAP_SHARED, fd, 747 0); 748 if (rom_data == MAP_FAILED) { 749 warnx("%s: unable to mmap romfile \"%s\" (%d)", __func__, 750 romfile, errno); 751 close(fd); 752 return (-1); 753 } 754 755 void *rom_addr; 756 int error = pci_emul_alloc_rom(sc->psc_pi, rom_size, &rom_addr); 757 if (error) { 758 warnx("%s: failed to alloc rom segment", __func__); 759 munmap(rom_data, rom_size); 760 close(fd); 761 return (error); 762 } 763 memcpy(rom_addr, rom_data, rom_size); 764 765 sc->psc_bar[PCI_ROM_IDX].type = PCIBAR_ROM; 766 sc->psc_bar[PCI_ROM_IDX].addr = (uint64_t)rom_addr; 767 sc->psc_bar[PCI_ROM_IDX].size = rom_size; 768 769 munmap(rom_data, rom_size); 770 close(fd); 771 772 return (0); 773 } 774 775 static int 776 passthru_init(struct pci_devinst *pi, nvlist_t *nvl) 777 { 778 int error, memflags, pptfd; 779 struct passthru_softc *sc; 780 const char *path; 781 struct vmctx *ctx = pi->pi_vmctx; 782 783 pptfd = -1; 784 sc = NULL; 785 error = 1; 786 787 memflags = vm_get_memflags(ctx); 788 if (!(memflags & VM_MEM_F_WIRED)) { 789 warnx("passthru requires guest memory to be wired"); 790 goto done; 791 } 792 793 path = get_config_value_node(nvl, "path"); 794 if (path == NULL || passthru_dev_open(path, &pptfd) != 0) { 795 warnx("invalid passthru options"); 796 goto done; 797 } 798 799 if (vm_assign_pptdev(ctx, pptfd) != 0) { 800 warnx("PCI device at %d is not using the ppt driver", pptfd); 801 goto done; 802 } 803 804 sc = calloc(1, sizeof(struct passthru_softc)); 805 806 pi->pi_arg = sc; 807 sc->psc_pi = pi; 808 sc->pptfd = pptfd; 809 810 if ((error = vm_get_pptdev_limits(ctx, pptfd, &sc->msi_limit, 811 &sc->msix_limit)) != 0) 812 goto done; 813 814 #ifndef __FreeBSD__ 815 /* 816 * If this function uses legacy interrupt messages, then request one for 817 * the guest in case drivers expect to see it. Note that nothing in the 818 * hypervisor is currently wired up do deliver such an interrupt should 819 * the guest actually rely upon it. 820 */ 821 uint8_t intpin = passthru_read_config(sc, PCIR_INTPIN, 1); 822 if (intpin > 0 && intpin < 5) 823 pci_lintr_request(sc->psc_pi); 824 #endif 825 826 /* initialize config space */ 827 if ((error = cfginit(ctx, sc)) != 0) 828 goto done; 829 830 /* initialize ROM */ 831 if ((error = passthru_init_rom(ctx, sc, 832 get_config_value_node(nvl, "rom"))) != 0) { 833 goto done; 834 } 835 836 done: 837 if (error) { 838 free(sc); 839 if (pptfd != -1) 840 vm_unassign_pptdev(ctx, pptfd); 841 } 842 return (error); 843 } 844 845 static int 846 msicap_access(struct passthru_softc *sc, int coff) 847 { 848 int caplen; 849 850 if (sc->psc_msi.capoff == 0) 851 return (0); 852 853 caplen = msi_caplen(sc->psc_msi.msgctrl); 854 855 if (coff >= sc->psc_msi.capoff && coff < sc->psc_msi.capoff + caplen) 856 return (1); 857 else 858 return (0); 859 } 860 861 static int 862 msixcap_access(struct passthru_softc *sc, int coff) 863 { 864 if (sc->psc_msix.capoff == 0) 865 return (0); 866 867 return (coff >= sc->psc_msix.capoff && 868 coff < sc->psc_msix.capoff + MSIX_CAPLEN); 869 } 870 871 static int 872 passthru_cfgread_default(struct passthru_softc *sc, 873 struct pci_devinst *pi __unused, int coff, int bytes, uint32_t *rv) 874 { 875 /* 876 * MSI capability is emulated. 877 */ 878 if (msicap_access(sc, coff) || msixcap_access(sc, coff)) 879 return (-1); 880 881 /* 882 * MSI-X is also emulated since a limit on interrupts may be imposed by 883 * the OS, altering the perceived register state. 884 */ 885 if (msixcap_access(sc, coff)) 886 return (-1); 887 888 /* 889 * Emulate the command register. If a single read reads both the 890 * command and status registers, read the status register from the 891 * device's config space. 892 */ 893 if (coff == PCIR_COMMAND) { 894 if (bytes <= 2) 895 return (-1); 896 *rv = passthru_read_config(sc, PCIR_STATUS, 2) << 16 | 897 pci_get_cfgdata16(pi, PCIR_COMMAND); 898 return (0); 899 } 900 901 /* Everything else just read from the device's config space */ 902 *rv = passthru_read_config(sc, coff, bytes); 903 904 return (0); 905 } 906 907 int 908 passthru_cfgread_emulate(struct passthru_softc *sc __unused, 909 struct pci_devinst *pi __unused, int coff __unused, int bytes __unused, 910 uint32_t *rv __unused) 911 { 912 return (-1); 913 } 914 915 static int 916 passthru_cfgread(struct pci_devinst *pi, int coff, int bytes, uint32_t *rv) 917 { 918 struct passthru_softc *sc; 919 920 sc = pi->pi_arg; 921 922 if (sc->psc_pcir_rhandler[coff] != NULL) 923 return (sc->psc_pcir_rhandler[coff](sc, pi, coff, bytes, rv)); 924 925 return (passthru_cfgread_default(sc, pi, coff, bytes, rv)); 926 } 927 928 static int 929 passthru_cfgwrite_default(struct passthru_softc *sc, struct pci_devinst *pi, 930 int coff, int bytes, uint32_t val) 931 { 932 int error, msix_table_entries, i; 933 uint16_t cmd_old; 934 struct vmctx *ctx = pi->pi_vmctx; 935 936 /* 937 * MSI capability is emulated 938 */ 939 if (msicap_access(sc, coff)) { 940 pci_emul_capwrite(pi, coff, bytes, val, sc->psc_msi.capoff, 941 PCIY_MSI); 942 error = vm_setup_pptdev_msi(ctx, sc->pptfd, 943 pi->pi_msi.addr, pi->pi_msi.msg_data, pi->pi_msi.maxmsgnum); 944 if (error != 0) 945 err(1, "vm_setup_pptdev_msi"); 946 return (0); 947 } 948 949 if (msixcap_access(sc, coff)) { 950 pci_emul_capwrite(pi, coff, bytes, val, sc->psc_msix.capoff, 951 PCIY_MSIX); 952 if (pi->pi_msix.enabled) { 953 msix_table_entries = pi->pi_msix.table_count; 954 for (i = 0; i < msix_table_entries; i++) { 955 error = vm_setup_pptdev_msix(ctx, 956 sc->pptfd, i, 957 pi->pi_msix.table[i].addr, 958 pi->pi_msix.table[i].msg_data, 959 pi->pi_msix.table[i].vector_control); 960 961 if (error) 962 err(1, "vm_setup_pptdev_msix"); 963 } 964 } else { 965 error = vm_disable_pptdev_msix(ctx, sc->pptfd); 966 if (error) 967 err(1, "vm_disable_pptdev_msix"); 968 } 969 return (0); 970 } 971 972 #ifdef LEGACY_SUPPORT 973 /* 974 * If this device does not support MSI natively then we cannot let 975 * the guest disable legacy interrupts from the device. It is the 976 * legacy interrupt that is triggering the virtual MSI to the guest. 977 */ 978 if (sc->psc_msi.emulated && pci_msi_enabled(pi)) { 979 if (coff == PCIR_COMMAND && bytes == 2) 980 val &= ~PCIM_CMD_INTxDIS; 981 } 982 #endif 983 984 passthru_write_config(sc, coff, bytes, val); 985 if (coff == PCIR_COMMAND) { 986 cmd_old = pci_get_cfgdata16(pi, PCIR_COMMAND); 987 if (bytes == 1) 988 pci_set_cfgdata8(pi, PCIR_COMMAND, val); 989 else if (bytes == 2) 990 pci_set_cfgdata16(pi, PCIR_COMMAND, val); 991 pci_emul_cmd_changed(pi, cmd_old); 992 } 993 994 return (0); 995 } 996 997 int 998 passthru_cfgwrite_emulate(struct passthru_softc *sc __unused, 999 struct pci_devinst *pi __unused, int coff __unused, int bytes __unused, 1000 uint32_t val __unused) 1001 { 1002 return (-1); 1003 } 1004 1005 static int 1006 passthru_cfgwrite(struct pci_devinst *pi, int coff, int bytes, uint32_t val) 1007 { 1008 struct passthru_softc *sc; 1009 1010 sc = pi->pi_arg; 1011 1012 if (sc->psc_pcir_whandler[coff] != NULL) 1013 return (sc->psc_pcir_whandler[coff](sc, pi, coff, bytes, val)); 1014 1015 return (passthru_cfgwrite_default(sc, pi, coff, bytes, val)); 1016 } 1017 1018 static void 1019 passthru_write(struct pci_devinst *pi, int baridx, uint64_t offset, int size, 1020 uint64_t value) 1021 { 1022 struct passthru_softc *sc = pi->pi_arg; 1023 struct vmctx *ctx = pi->pi_vmctx; 1024 1025 if (baridx == pci_msix_table_bar(pi)) { 1026 msix_table_write(ctx, sc, offset, size, value); 1027 } else { 1028 struct ppt_bar_io pbi; 1029 1030 assert(pi->pi_bar[baridx].type == PCIBAR_IO); 1031 1032 pbi.pbi_bar = baridx; 1033 pbi.pbi_width = size; 1034 pbi.pbi_off = offset; 1035 pbi.pbi_data = value; 1036 (void) ioctl(sc->pptfd, PPT_BAR_WRITE, &pbi); 1037 } 1038 } 1039 1040 static uint64_t 1041 passthru_read(struct pci_devinst *pi, int baridx, uint64_t offset, int size) 1042 { 1043 struct passthru_softc *sc = pi->pi_arg; 1044 uint64_t val; 1045 1046 if (baridx == pci_msix_table_bar(pi)) { 1047 val = msix_table_read(sc, offset, size); 1048 } else { 1049 struct ppt_bar_io pbi; 1050 1051 assert(pi->pi_bar[baridx].type == PCIBAR_IO); 1052 1053 pbi.pbi_bar = baridx; 1054 pbi.pbi_width = size; 1055 pbi.pbi_off = offset; 1056 if (ioctl(sc->pptfd, PPT_BAR_READ, &pbi) == 0) { 1057 val = pbi.pbi_data; 1058 } else { 1059 val = 0; 1060 } 1061 } 1062 1063 return (val); 1064 } 1065 1066 static void 1067 passthru_msix_addr(struct vmctx *ctx, struct pci_devinst *pi, int baridx, 1068 int enabled, uint64_t address) 1069 { 1070 struct passthru_softc *sc; 1071 size_t remaining; 1072 uint32_t table_size, table_offset; 1073 1074 sc = pi->pi_arg; 1075 table_offset = rounddown2(pi->pi_msix.table_offset, 4096); 1076 if (table_offset > 0) { 1077 if (!enabled) { 1078 if (vm_unmap_pptdev_mmio(ctx, sc->pptfd, address, 1079 table_offset) != 0) 1080 warnx("pci_passthru: unmap_pptdev_mmio failed"); 1081 } else { 1082 if (vm_map_pptdev_mmio(ctx, sc->pptfd, address, 1083 table_offset, sc->psc_bar[baridx].addr) != 0) 1084 warnx("pci_passthru: map_pptdev_mmio failed"); 1085 } 1086 } 1087 table_size = pi->pi_msix.table_offset - table_offset; 1088 table_size += pi->pi_msix.table_count * MSIX_TABLE_ENTRY_SIZE; 1089 table_size = roundup2(table_size, 4096); 1090 remaining = pi->pi_bar[baridx].size - table_offset - table_size; 1091 if (remaining > 0) { 1092 address += table_offset + table_size; 1093 if (!enabled) { 1094 if (vm_unmap_pptdev_mmio(ctx, sc->pptfd, address, 1095 remaining) != 0) 1096 warnx("pci_passthru: unmap_pptdev_mmio failed"); 1097 } else { 1098 if (vm_map_pptdev_mmio(ctx, sc->pptfd, address, 1099 remaining, sc->psc_bar[baridx].addr + 1100 table_offset + table_size) != 0) 1101 warnx("pci_passthru: map_pptdev_mmio failed"); 1102 } 1103 } 1104 } 1105 1106 static void 1107 passthru_mmio_addr(struct vmctx *ctx, struct pci_devinst *pi, int baridx, 1108 int enabled, uint64_t address) 1109 { 1110 struct passthru_softc *sc; 1111 1112 sc = pi->pi_arg; 1113 if (!enabled) { 1114 if (vm_unmap_pptdev_mmio(ctx, sc->pptfd, address, 1115 sc->psc_bar[baridx].size) != 0) 1116 warnx("pci_passthru: unmap_pptdev_mmio failed"); 1117 } else { 1118 if (vm_map_pptdev_mmio(ctx, sc->pptfd, address, 1119 sc->psc_bar[baridx].size, sc->psc_bar[baridx].addr) != 0) 1120 warnx("pci_passthru: map_pptdev_mmio failed"); 1121 } 1122 } 1123 1124 static void 1125 passthru_addr_rom(struct pci_devinst *const pi, const int idx, 1126 const int enabled) 1127 { 1128 const uint64_t addr = pi->pi_bar[idx].addr; 1129 const uint64_t size = pi->pi_bar[idx].size; 1130 1131 if (!enabled) { 1132 if (vm_munmap_memseg(pi->pi_vmctx, addr, size) != 0) { 1133 errx(4, "%s: munmap_memseg @ [%016lx - %016lx] failed", 1134 __func__, addr, addr + size); 1135 } 1136 1137 } else { 1138 if (vm_mmap_memseg(pi->pi_vmctx, addr, VM_PCIROM, 1139 pi->pi_romoffset, size, PROT_READ | PROT_EXEC) != 0) { 1140 errx(4, "%s: mmap_memseg @ [%016lx - %016lx] failed", 1141 __func__, addr, addr + size); 1142 } 1143 } 1144 } 1145 1146 static void 1147 passthru_addr(struct pci_devinst *pi, int baridx, 1148 int enabled, uint64_t address) 1149 { 1150 struct vmctx *ctx = pi->pi_vmctx; 1151 1152 switch (pi->pi_bar[baridx].type) { 1153 case PCIBAR_IO: 1154 /* IO BARs are emulated */ 1155 break; 1156 case PCIBAR_ROM: 1157 passthru_addr_rom(pi, baridx, enabled); 1158 break; 1159 case PCIBAR_MEM32: 1160 case PCIBAR_MEM64: 1161 if (baridx == pci_msix_table_bar(pi)) 1162 passthru_msix_addr(ctx, pi, baridx, enabled, address); 1163 else 1164 passthru_mmio_addr(ctx, pi, baridx, enabled, address); 1165 break; 1166 default: 1167 errx(4, "%s: invalid BAR type %d", __func__, 1168 pi->pi_bar[baridx].type); 1169 } 1170 } 1171 1172 static const struct pci_devemu passthru = { 1173 .pe_emu = "passthru", 1174 .pe_init = passthru_init, 1175 .pe_legacy_config = passthru_legacy_config, 1176 .pe_cfgwrite = passthru_cfgwrite, 1177 .pe_cfgread = passthru_cfgread, 1178 .pe_barwrite = passthru_write, 1179 .pe_barread = passthru_read, 1180 .pe_baraddr = passthru_addr, 1181 }; 1182 PCI_EMUL_SET(passthru); 1183 1184 /* 1185 * This isn't the right place for these functions which, on FreeBSD, can 1186 * read or write from arbitrary devices. They are not supported on illumos; 1187 * not least because bhyve is generally run in a non-global zone which doesn't 1188 * have access to the devinfo tree. 1189 */ 1190 uint32_t 1191 read_config(const struct pcisel *sel __unused, long reg __unused, 1192 int width __unused) 1193 { 1194 return (-1); 1195 } 1196 1197 void 1198 write_config(const struct pcisel *sel __unused, long reg __unused, 1199 int width __unused, uint32_t data __unused) 1200 { 1201 errx(4, "write_config() unimplemented on illumos"); 1202 } 1203