1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 /* 29 * This file and its contents are supplied under the terms of the 30 * Common Development and Distribution License ("CDDL"), version 1.0. 31 * You may only use this file in accordance with the terms of version 32 * 1.0 of the CDDL. 33 * 34 * A full copy of the text of the CDDL should have accompanied this 35 * source. A copy of the CDDL is also available via the Internet at 36 * http://www.illumos.org/license/CDDL. 37 * 38 * Copyright 2018 Joyent, Inc. 39 * Copyright 2022 Oxide Computer Company 40 */ 41 42 #include <sys/cdefs.h> 43 44 #include <sys/param.h> 45 #include <sys/kernel.h> 46 #include <sys/systm.h> 47 #include <sys/kmem.h> 48 49 #include <dev/pci/pcireg.h> 50 51 #include <machine/vmparam.h> 52 #include <sys/vmm_vm.h> 53 54 #include <contrib/dev/acpica/include/acpi.h> 55 56 #include <sys/sunndi.h> 57 58 #include "io/iommu.h" 59 60 /* 61 * Documented in the "Intel Virtualization Technology for Directed I/O", 62 * Architecture Spec, September 2008. 63 */ 64 65 #define VTD_DRHD_INCLUDE_PCI_ALL(Flags) (((Flags) >> 0) & 0x1) 66 67 /* Section 10.4 "Register Descriptions" */ 68 struct vtdmap { 69 volatile uint32_t version; 70 volatile uint32_t res0; 71 volatile uint64_t cap; 72 volatile uint64_t ext_cap; 73 volatile uint32_t gcr; 74 volatile uint32_t gsr; 75 volatile uint64_t rta; 76 volatile uint64_t ccr; 77 }; 78 79 #define VTD_CAP_SAGAW(cap) (((cap) >> 8) & 0x1F) 80 #define VTD_CAP_ND(cap) ((cap) & 0x7) 81 #define VTD_CAP_CM(cap) (((cap) >> 7) & 0x1) 82 #define VTD_CAP_SPS(cap) (((cap) >> 34) & 0xF) 83 #define VTD_CAP_RWBF(cap) (((cap) >> 4) & 0x1) 84 85 #define VTD_ECAP_DI(ecap) (((ecap) >> 2) & 0x1) 86 #define VTD_ECAP_COHERENCY(ecap) ((ecap) & 0x1) 87 #define VTD_ECAP_IRO(ecap) (((ecap) >> 8) & 0x3FF) 88 89 #define VTD_GCR_WBF (1 << 27) 90 #define VTD_GCR_SRTP (1 << 30) 91 #define VTD_GCR_TE (1U << 31) 92 93 #define VTD_GSR_WBFS (1 << 27) 94 #define VTD_GSR_RTPS (1 << 30) 95 #define VTD_GSR_TES (1U << 31) 96 97 #define VTD_CCR_ICC (1UL << 63) /* invalidate context cache */ 98 #define VTD_CCR_CIRG_GLOBAL (1UL << 61) /* global invalidation */ 99 100 #define VTD_IIR_IVT (1UL << 63) /* invalidation IOTLB */ 101 #define VTD_IIR_IIRG_GLOBAL (1ULL << 60) /* global IOTLB invalidation */ 102 #define VTD_IIR_IIRG_DOMAIN (2ULL << 60) /* domain IOTLB invalidation */ 103 #define VTD_IIR_IIRG_PAGE (3ULL << 60) /* page IOTLB invalidation */ 104 #define VTD_IIR_DRAIN_READS (1ULL << 49) /* drain pending DMA reads */ 105 #define VTD_IIR_DRAIN_WRITES (1ULL << 48) /* drain pending DMA writes */ 106 #define VTD_IIR_DOMAIN_P 32 107 108 #define VTD_ROOT_PRESENT 0x1 109 #define VTD_CTX_PRESENT 0x1 110 #define VTD_CTX_TT_ALL (1UL << 2) 111 112 #define VTD_PTE_RD (1UL << 0) 113 #define VTD_PTE_WR (1UL << 1) 114 #define VTD_PTE_SUPERPAGE (1UL << 7) 115 #define VTD_PTE_ADDR_M (0x000FFFFFFFFFF000UL) 116 117 #define VTD_RID2IDX(rid) (((rid) & 0xff) * 2) 118 119 struct domain { 120 uint64_t *ptp; /* first level page table page */ 121 int pt_levels; /* number of page table levels */ 122 int addrwidth; /* 'AW' field in context entry */ 123 int spsmask; /* supported super page sizes */ 124 uint_t id; /* domain id */ 125 vm_paddr_t maxaddr; /* highest address to be mapped */ 126 SLIST_ENTRY(domain) next; 127 }; 128 129 static SLIST_HEAD(, domain) domhead; 130 131 #define DRHD_MAX_UNITS 16 132 static ACPI_DMAR_HARDWARE_UNIT *drhds[DRHD_MAX_UNITS]; 133 static int drhd_num; 134 static struct vtdmap *vtdmaps[DRHD_MAX_UNITS]; 135 static int max_domains; 136 typedef int (*drhd_ident_func_t)(void); 137 static dev_info_t *vtddips[DRHD_MAX_UNITS]; 138 139 static uint64_t root_table[PAGE_SIZE / sizeof (uint64_t)] __aligned(4096); 140 static uint64_t ctx_tables[256][PAGE_SIZE / sizeof (uint64_t)] __aligned(4096); 141 142 static int 143 vtd_max_domains(struct vtdmap *vtdmap) 144 { 145 int nd; 146 147 nd = VTD_CAP_ND(vtdmap->cap); 148 149 switch (nd) { 150 case 0: 151 return (16); 152 case 1: 153 return (64); 154 case 2: 155 return (256); 156 case 3: 157 return (1024); 158 case 4: 159 return (4 * 1024); 160 case 5: 161 return (16 * 1024); 162 case 6: 163 return (64 * 1024); 164 default: 165 panic("vtd_max_domains: invalid value of nd (0x%0x)", nd); 166 } 167 } 168 169 static uint_t 170 domain_id(void) 171 { 172 uint_t id; 173 struct domain *dom; 174 175 /* Skip domain id 0 - it is reserved when Caching Mode field is set */ 176 for (id = 1; id < max_domains; id++) { 177 SLIST_FOREACH(dom, &domhead, next) { 178 if (dom->id == id) 179 break; 180 } 181 if (dom == NULL) 182 break; /* found it */ 183 } 184 185 if (id >= max_domains) 186 panic("domain ids exhausted"); 187 188 return (id); 189 } 190 191 static struct vtdmap * 192 vtd_device_scope(uint16_t rid) 193 { 194 int i, remaining, pathrem; 195 char *end, *pathend; 196 struct vtdmap *vtdmap; 197 ACPI_DMAR_HARDWARE_UNIT *drhd; 198 ACPI_DMAR_DEVICE_SCOPE *device_scope; 199 ACPI_DMAR_PCI_PATH *path; 200 201 for (i = 0; i < drhd_num; i++) { 202 drhd = drhds[i]; 203 204 if (VTD_DRHD_INCLUDE_PCI_ALL(drhd->Flags)) { 205 /* 206 * From Intel VT-d arch spec, version 3.0: 207 * If a DRHD structure with INCLUDE_PCI_ALL flag Set is 208 * reported for a Segment, it must be enumerated by BIOS 209 * after all other DRHD structures for the same Segment. 210 */ 211 vtdmap = vtdmaps[i]; 212 return (vtdmap); 213 } 214 215 end = (char *)drhd + drhd->Header.Length; 216 remaining = drhd->Header.Length - 217 sizeof (ACPI_DMAR_HARDWARE_UNIT); 218 while (remaining > sizeof (ACPI_DMAR_DEVICE_SCOPE)) { 219 device_scope = 220 (ACPI_DMAR_DEVICE_SCOPE *)(end - remaining); 221 remaining -= device_scope->Length; 222 223 switch (device_scope->EntryType) { 224 /* 0x01 and 0x02 are PCI device entries */ 225 case 0x01: 226 case 0x02: 227 break; 228 default: 229 continue; 230 } 231 232 if (PCI_RID2BUS(rid) != device_scope->Bus) 233 continue; 234 235 pathend = (char *)device_scope + device_scope->Length; 236 pathrem = device_scope->Length - 237 sizeof (ACPI_DMAR_DEVICE_SCOPE); 238 while (pathrem >= sizeof (ACPI_DMAR_PCI_PATH)) { 239 path = (ACPI_DMAR_PCI_PATH *) 240 (pathend - pathrem); 241 pathrem -= sizeof (ACPI_DMAR_PCI_PATH); 242 243 if (PCI_RID2SLOT(rid) != path->Device) 244 continue; 245 if (PCI_RID2FUNC(rid) != path->Function) 246 continue; 247 248 vtdmap = vtdmaps[i]; 249 return (vtdmap); 250 } 251 } 252 } 253 254 /* No matching scope */ 255 return (NULL); 256 } 257 258 static void 259 vtd_wbflush(struct vtdmap *vtdmap) 260 { 261 262 if (VTD_ECAP_COHERENCY(vtdmap->ext_cap) == 0) 263 invalidate_cache_all(); 264 265 if (VTD_CAP_RWBF(vtdmap->cap)) { 266 vtdmap->gcr = VTD_GCR_WBF; 267 while ((vtdmap->gsr & VTD_GSR_WBFS) != 0) 268 ; 269 } 270 } 271 272 static void 273 vtd_ctx_global_invalidate(struct vtdmap *vtdmap) 274 { 275 276 vtdmap->ccr = VTD_CCR_ICC | VTD_CCR_CIRG_GLOBAL; 277 while ((vtdmap->ccr & VTD_CCR_ICC) != 0) 278 ; 279 } 280 281 static void 282 vtd_iotlb_global_invalidate(struct vtdmap *vtdmap) 283 { 284 int offset; 285 volatile uint64_t *iotlb_reg, val; 286 287 vtd_wbflush(vtdmap); 288 289 offset = VTD_ECAP_IRO(vtdmap->ext_cap) * 16; 290 iotlb_reg = (volatile uint64_t *)((caddr_t)vtdmap + offset + 8); 291 292 *iotlb_reg = VTD_IIR_IVT | VTD_IIR_IIRG_GLOBAL | 293 VTD_IIR_DRAIN_READS | VTD_IIR_DRAIN_WRITES; 294 295 while (1) { 296 val = *iotlb_reg; 297 if ((val & VTD_IIR_IVT) == 0) 298 break; 299 } 300 } 301 302 static void 303 vtd_translation_enable(struct vtdmap *vtdmap) 304 { 305 306 vtdmap->gcr = VTD_GCR_TE; 307 while ((vtdmap->gsr & VTD_GSR_TES) == 0) 308 ; 309 } 310 311 static void 312 vtd_translation_disable(struct vtdmap *vtdmap) 313 { 314 315 vtdmap->gcr = 0; 316 while ((vtdmap->gsr & VTD_GSR_TES) != 0) 317 ; 318 } 319 320 static void * 321 vtd_map(dev_info_t *dip) 322 { 323 caddr_t regs; 324 ddi_acc_handle_t hdl; 325 int error; 326 327 static ddi_device_acc_attr_t regs_attr = { 328 DDI_DEVICE_ATTR_V0, 329 DDI_NEVERSWAP_ACC, 330 DDI_STRICTORDER_ACC, 331 }; 332 333 error = ddi_regs_map_setup(dip, 0, ®s, 0, PAGE_SIZE, ®s_attr, 334 &hdl); 335 336 if (error != DDI_SUCCESS) 337 return (NULL); 338 339 ddi_set_driver_private(dip, hdl); 340 341 return (regs); 342 } 343 344 static void 345 vtd_unmap(dev_info_t *dip) 346 { 347 ddi_acc_handle_t hdl = ddi_get_driver_private(dip); 348 349 if (hdl != NULL) 350 ddi_regs_map_free(&hdl); 351 } 352 353 static dev_info_t * 354 vtd_get_dip(ACPI_DMAR_HARDWARE_UNIT *drhd, int unit) 355 { 356 dev_info_t *dip; 357 struct ddi_parent_private_data *pdptr; 358 struct regspec reg; 359 360 /* 361 * Try to find an existing devinfo node for this vtd unit. 362 */ 363 ndi_devi_enter(ddi_root_node()); 364 dip = ddi_find_devinfo("vtd", unit, 0); 365 ndi_devi_exit(ddi_root_node()); 366 367 if (dip != NULL) 368 return (dip); 369 370 /* 371 * None found, construct a devinfo node for this vtd unit. 372 */ 373 dip = ddi_add_child(ddi_root_node(), "vtd", 374 DEVI_SID_NODEID, unit); 375 376 reg.regspec_bustype = 0; 377 reg.regspec_addr = drhd->Address; 378 reg.regspec_size = PAGE_SIZE; 379 380 /* 381 * update the reg properties 382 * 383 * reg property will be used for register 384 * set access 385 * 386 * refer to the bus_map of root nexus driver 387 * I/O or memory mapping: 388 * 389 * <bustype=0, addr=x, len=x>: memory 390 * <bustype=1, addr=x, len=x>: i/o 391 * <bustype>1, addr=0, len=x>: x86-compatibility i/o 392 */ 393 (void) ndi_prop_update_int_array(DDI_DEV_T_NONE, 394 dip, "reg", (int *)®, 395 sizeof (struct regspec) / sizeof (int)); 396 397 /* 398 * This is an artificially constructed dev_info, and we 399 * need to set a few more things to be able to use it 400 * for ddi_dma_alloc_handle/free_handle. 401 */ 402 ddi_set_driver(dip, ddi_get_driver(ddi_root_node())); 403 DEVI(dip)->devi_bus_dma_allochdl = 404 DEVI(ddi_get_driver((ddi_root_node()))); 405 406 pdptr = kmem_zalloc(sizeof (struct ddi_parent_private_data) 407 + sizeof (struct regspec), KM_SLEEP); 408 pdptr->par_nreg = 1; 409 pdptr->par_reg = (struct regspec *)(pdptr + 1); 410 pdptr->par_reg->regspec_bustype = 0; 411 pdptr->par_reg->regspec_addr = drhd->Address; 412 pdptr->par_reg->regspec_size = PAGE_SIZE; 413 ddi_set_parent_data(dip, pdptr); 414 415 return (dip); 416 } 417 418 static int 419 vtd_init(void) 420 { 421 int i, units, remaining, tmp; 422 struct vtdmap *vtdmap; 423 vm_paddr_t ctx_paddr; 424 char *end; 425 #ifdef __FreeBSD__ 426 char envname[32]; 427 unsigned long mapaddr; 428 #endif 429 ACPI_STATUS status; 430 ACPI_TABLE_DMAR *dmar; 431 ACPI_DMAR_HEADER *hdr; 432 ACPI_DMAR_HARDWARE_UNIT *drhd; 433 434 #ifdef __FreeBSD__ 435 /* 436 * Allow the user to override the ACPI DMAR table by specifying the 437 * physical address of each remapping unit. 438 * 439 * The following example specifies two remapping units at 440 * physical addresses 0xfed90000 and 0xfeda0000 respectively. 441 * set vtd.regmap.0.addr=0xfed90000 442 * set vtd.regmap.1.addr=0xfeda0000 443 */ 444 for (units = 0; units < DRHD_MAX_UNITS; units++) { 445 snprintf(envname, sizeof (envname), "vtd.regmap.%d.addr", 446 units); 447 if (getenv_ulong(envname, &mapaddr) == 0) 448 break; 449 vtdmaps[units] = (struct vtdmap *)PHYS_TO_DMAP(mapaddr); 450 } 451 452 if (units > 0) 453 goto skip_dmar; 454 #else 455 units = 0; 456 #endif 457 /* Search for DMAR table. */ 458 status = AcpiGetTable(ACPI_SIG_DMAR, 0, (ACPI_TABLE_HEADER **)&dmar); 459 if (ACPI_FAILURE(status)) 460 return (ENXIO); 461 462 end = (char *)dmar + dmar->Header.Length; 463 remaining = dmar->Header.Length - sizeof (ACPI_TABLE_DMAR); 464 while (remaining > sizeof (ACPI_DMAR_HEADER)) { 465 hdr = (ACPI_DMAR_HEADER *)(end - remaining); 466 if (hdr->Length > remaining) 467 break; 468 /* 469 * From Intel VT-d arch spec, version 1.3: 470 * BIOS implementations must report mapping structures 471 * in numerical order, i.e. All remapping structures of 472 * type 0 (DRHD) enumerated before remapping structures of 473 * type 1 (RMRR) and so forth. 474 */ 475 if (hdr->Type != ACPI_DMAR_TYPE_HARDWARE_UNIT) 476 break; 477 478 drhd = (ACPI_DMAR_HARDWARE_UNIT *)hdr; 479 drhds[units] = drhd; 480 #ifdef __FreeBSD__ 481 vtdmaps[units] = (struct vtdmap *)PHYS_TO_DMAP(drhd->Address); 482 #else 483 vtddips[units] = vtd_get_dip(drhd, units); 484 vtdmaps[units] = (struct vtdmap *)vtd_map(vtddips[units]); 485 if (vtdmaps[units] == NULL) 486 goto fail; 487 #endif 488 if (++units >= DRHD_MAX_UNITS) 489 break; 490 remaining -= hdr->Length; 491 } 492 493 if (units <= 0) 494 return (ENXIO); 495 496 #ifdef __FreeBSD__ 497 skip_dmar: 498 #endif 499 drhd_num = units; 500 501 max_domains = 64 * 1024; /* maximum valid value */ 502 for (i = 0; i < drhd_num; i++) { 503 vtdmap = vtdmaps[i]; 504 505 if (VTD_CAP_CM(vtdmap->cap) != 0) 506 panic("vtd_init: invalid caching mode"); 507 508 /* take most compatible (minimum) value */ 509 if ((tmp = vtd_max_domains(vtdmap)) < max_domains) 510 max_domains = tmp; 511 } 512 513 /* 514 * Set up the root-table to point to the context-entry tables 515 */ 516 for (i = 0; i < 256; i++) { 517 ctx_paddr = vtophys(ctx_tables[i]); 518 if (ctx_paddr & PAGE_MASK) 519 panic("ctx table (0x%0lx) not page aligned", ctx_paddr); 520 521 root_table[i * 2] = ctx_paddr | VTD_ROOT_PRESENT; 522 } 523 524 return (0); 525 526 #ifndef __FreeBSD__ 527 fail: 528 for (i = 0; i <= units; i++) 529 vtd_unmap(vtddips[i]); 530 return (ENXIO); 531 #endif 532 } 533 534 static void 535 vtd_cleanup(void) 536 { 537 #ifndef __FreeBSD__ 538 int i; 539 540 KASSERT(SLIST_EMPTY(&domhead), ("domain list not empty")); 541 542 bzero(root_table, sizeof (root_table)); 543 544 for (i = 0; i <= drhd_num; i++) { 545 vtdmaps[i] = NULL; 546 /* 547 * Unmap the vtd registers. Note that the devinfo nodes 548 * themselves aren't removed, they are considered system state 549 * and can be reused when the module is reloaded. 550 */ 551 if (vtddips[i] != NULL) 552 vtd_unmap(vtddips[i]); 553 } 554 #endif 555 } 556 557 static void 558 vtd_enable(void) 559 { 560 int i; 561 struct vtdmap *vtdmap; 562 563 for (i = 0; i < drhd_num; i++) { 564 vtdmap = vtdmaps[i]; 565 vtd_wbflush(vtdmap); 566 567 /* Update the root table address */ 568 vtdmap->rta = vtophys(root_table); 569 vtdmap->gcr = VTD_GCR_SRTP; 570 while ((vtdmap->gsr & VTD_GSR_RTPS) == 0) 571 ; 572 573 vtd_ctx_global_invalidate(vtdmap); 574 vtd_iotlb_global_invalidate(vtdmap); 575 576 vtd_translation_enable(vtdmap); 577 } 578 } 579 580 static void 581 vtd_disable(void) 582 { 583 int i; 584 struct vtdmap *vtdmap; 585 586 for (i = 0; i < drhd_num; i++) { 587 vtdmap = vtdmaps[i]; 588 vtd_translation_disable(vtdmap); 589 } 590 } 591 592 static void 593 vtd_add_device(void *arg, uint16_t rid) 594 { 595 int idx; 596 uint64_t *ctxp; 597 struct domain *dom = arg; 598 vm_paddr_t pt_paddr; 599 struct vtdmap *vtdmap; 600 uint8_t bus; 601 602 bus = PCI_RID2BUS(rid); 603 ctxp = ctx_tables[bus]; 604 pt_paddr = vtophys(dom->ptp); 605 idx = VTD_RID2IDX(rid); 606 607 if (ctxp[idx] & VTD_CTX_PRESENT) { 608 panic("vtd_add_device: device %x is already owned by " 609 "domain %d", rid, (uint16_t)(ctxp[idx + 1] >> 8)); 610 } 611 612 if ((vtdmap = vtd_device_scope(rid)) == NULL) 613 panic("vtd_add_device: device %x is not in scope for " 614 "any DMA remapping unit", rid); 615 616 /* 617 * Order is important. The 'present' bit is set only after all fields 618 * of the context pointer are initialized. 619 */ 620 ctxp[idx + 1] = dom->addrwidth | (dom->id << 8); 621 622 if (VTD_ECAP_DI(vtdmap->ext_cap)) 623 ctxp[idx] = VTD_CTX_TT_ALL; 624 else 625 ctxp[idx] = 0; 626 627 ctxp[idx] |= pt_paddr | VTD_CTX_PRESENT; 628 629 /* 630 * 'Not Present' entries are not cached in either the Context Cache 631 * or in the IOTLB, so there is no need to invalidate either of them. 632 */ 633 } 634 635 static void 636 vtd_remove_device(void *arg, uint16_t rid) 637 { 638 int i, idx; 639 uint64_t *ctxp; 640 struct vtdmap *vtdmap; 641 uint8_t bus; 642 643 bus = PCI_RID2BUS(rid); 644 ctxp = ctx_tables[bus]; 645 idx = VTD_RID2IDX(rid); 646 647 /* 648 * Order is important. The 'present' bit is must be cleared first. 649 */ 650 ctxp[idx] = 0; 651 ctxp[idx + 1] = 0; 652 653 /* 654 * Invalidate the Context Cache and the IOTLB. 655 * 656 * XXX use device-selective invalidation for Context Cache 657 * XXX use domain-selective invalidation for IOTLB 658 */ 659 for (i = 0; i < drhd_num; i++) { 660 vtdmap = vtdmaps[i]; 661 vtd_ctx_global_invalidate(vtdmap); 662 vtd_iotlb_global_invalidate(vtdmap); 663 } 664 } 665 666 #define CREATE_MAPPING 0 667 #define REMOVE_MAPPING 1 668 669 static uint64_t 670 vtd_update_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len, 671 int remove) 672 { 673 struct domain *dom; 674 int i, spshift, ptpshift, ptpindex, nlevels; 675 uint64_t spsize, *ptp; 676 677 dom = arg; 678 ptpindex = 0; 679 ptpshift = 0; 680 681 KASSERT(gpa + len > gpa, ("%s: invalid gpa range %lx/%lx", __func__, 682 gpa, len)); 683 KASSERT(gpa + len <= dom->maxaddr, ("%s: gpa range %lx/%lx beyond " 684 "domain maxaddr %lx", __func__, gpa, len, dom->maxaddr)); 685 686 if (gpa & PAGE_MASK) 687 panic("vtd_create_mapping: unaligned gpa 0x%0lx", gpa); 688 689 if (hpa & PAGE_MASK) 690 panic("vtd_create_mapping: unaligned hpa 0x%0lx", hpa); 691 692 if (len & PAGE_MASK) 693 panic("vtd_create_mapping: unaligned len 0x%0lx", len); 694 695 /* 696 * Compute the size of the mapping that we can accommodate. 697 * 698 * This is based on three factors: 699 * - supported super page size 700 * - alignment of the region starting at 'gpa' and 'hpa' 701 * - length of the region 'len' 702 */ 703 spshift = 48; 704 for (i = 3; i >= 0; i--) { 705 spsize = 1UL << spshift; 706 if ((dom->spsmask & (1 << i)) != 0 && 707 (gpa & (spsize - 1)) == 0 && 708 (hpa & (spsize - 1)) == 0 && 709 (len >= spsize)) { 710 break; 711 } 712 spshift -= 9; 713 } 714 715 ptp = dom->ptp; 716 nlevels = dom->pt_levels; 717 while (--nlevels >= 0) { 718 ptpshift = 12 + nlevels * 9; 719 ptpindex = (gpa >> ptpshift) & 0x1FF; 720 721 /* We have reached the leaf mapping */ 722 if (spshift >= ptpshift) { 723 break; 724 } 725 726 /* 727 * We are working on a non-leaf page table page. 728 * 729 * Create a downstream page table page if necessary and point 730 * to it from the current page table. 731 */ 732 if (ptp[ptpindex] == 0) { 733 void *nlp = vmm_ptp_alloc(); 734 ptp[ptpindex] = vtophys(nlp)| VTD_PTE_RD | VTD_PTE_WR; 735 } 736 737 ptp = (uint64_t *)PHYS_TO_DMAP(ptp[ptpindex] & VTD_PTE_ADDR_M); 738 } 739 740 if ((gpa & ((1UL << ptpshift) - 1)) != 0) 741 panic("gpa 0x%lx and ptpshift %d mismatch", gpa, ptpshift); 742 743 /* 744 * Update the 'gpa' -> 'hpa' mapping 745 */ 746 if (remove) { 747 ptp[ptpindex] = 0; 748 } else { 749 ptp[ptpindex] = hpa | VTD_PTE_RD | VTD_PTE_WR; 750 751 if (nlevels > 0) 752 ptp[ptpindex] |= VTD_PTE_SUPERPAGE; 753 } 754 755 return (1UL << ptpshift); 756 } 757 758 static uint64_t 759 vtd_create_mapping(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, uint64_t len) 760 { 761 762 return (vtd_update_mapping(arg, gpa, hpa, len, CREATE_MAPPING)); 763 } 764 765 static uint64_t 766 vtd_remove_mapping(void *arg, vm_paddr_t gpa, uint64_t len) 767 { 768 769 return (vtd_update_mapping(arg, gpa, 0, len, REMOVE_MAPPING)); 770 } 771 772 static void 773 vtd_invalidate_tlb(void *dom) 774 { 775 int i; 776 struct vtdmap *vtdmap; 777 778 /* 779 * Invalidate the IOTLB. 780 * XXX use domain-selective invalidation for IOTLB 781 */ 782 for (i = 0; i < drhd_num; i++) { 783 vtdmap = vtdmaps[i]; 784 vtd_iotlb_global_invalidate(vtdmap); 785 } 786 } 787 788 static void * 789 vtd_create_domain(vm_paddr_t maxaddr) 790 { 791 struct domain *dom; 792 vm_paddr_t addr; 793 int tmp, i, gaw, agaw, sagaw, res, pt_levels, addrwidth; 794 struct vtdmap *vtdmap; 795 796 if (drhd_num <= 0) 797 panic("vtd_create_domain: no dma remapping hardware available"); 798 799 /* 800 * Calculate AGAW. 801 * Section 3.4.2 "Adjusted Guest Address Width", Architecture Spec. 802 */ 803 addr = 0; 804 for (gaw = 0; addr < maxaddr; gaw++) 805 addr = 1ULL << gaw; 806 807 res = (gaw - 12) % 9; 808 if (res == 0) 809 agaw = gaw; 810 else 811 agaw = gaw + 9 - res; 812 813 if (agaw > 64) 814 agaw = 64; 815 816 /* 817 * Select the smallest Supported AGAW and the corresponding number 818 * of page table levels. 819 */ 820 pt_levels = 2; 821 sagaw = 30; 822 addrwidth = 0; 823 824 tmp = ~0; 825 for (i = 0; i < drhd_num; i++) { 826 vtdmap = vtdmaps[i]; 827 /* take most compatible value */ 828 tmp &= VTD_CAP_SAGAW(vtdmap->cap); 829 } 830 831 for (i = 0; i < 5; i++) { 832 if ((tmp & (1 << i)) != 0 && sagaw >= agaw) 833 break; 834 pt_levels++; 835 addrwidth++; 836 sagaw += 9; 837 if (sagaw > 64) 838 sagaw = 64; 839 } 840 841 if (i >= 5) { 842 panic("vtd_create_domain: SAGAW 0x%x does not support AGAW %d", 843 tmp, agaw); 844 } 845 846 dom = kmem_zalloc(sizeof (struct domain), KM_SLEEP); 847 dom->pt_levels = pt_levels; 848 dom->addrwidth = addrwidth; 849 dom->id = domain_id(); 850 dom->maxaddr = maxaddr; 851 dom->ptp = vmm_ptp_alloc(); 852 if ((uintptr_t)dom->ptp & PAGE_MASK) 853 panic("vtd_create_domain: ptp (%p) not page aligned", dom->ptp); 854 855 #ifdef __FreeBSD__ 856 #ifdef notyet 857 /* 858 * XXX superpage mappings for the iommu do not work correctly. 859 * 860 * By default all physical memory is mapped into the host_domain. 861 * When a VM is allocated wired memory the pages belonging to it 862 * are removed from the host_domain and added to the vm's domain. 863 * 864 * If the page being removed was mapped using a superpage mapping 865 * in the host_domain then we need to demote the mapping before 866 * removing the page. 867 * 868 * There is not any code to deal with the demotion at the moment 869 * so we disable superpage mappings altogether. 870 */ 871 dom->spsmask = ~0; 872 for (i = 0; i < drhd_num; i++) { 873 vtdmap = vtdmaps[i]; 874 /* take most compatible value */ 875 dom->spsmask &= VTD_CAP_SPS(vtdmap->cap); 876 } 877 #endif 878 #else 879 /* 880 * On illumos we decidedly do not remove memory mapped to a VM's domain 881 * from the host_domain, so we don't have to deal with page demotion and 882 * can just use large pages. 883 * 884 * Since VM memory is currently allocated as 4k pages and mapped into 885 * the VM domain page by page, the use of large pages is essentially 886 * limited to the host_domain. 887 */ 888 dom->spsmask = VTD_CAP_SPS(vtdmap->cap); 889 #endif 890 891 SLIST_INSERT_HEAD(&domhead, dom, next); 892 893 return (dom); 894 } 895 896 static void 897 vtd_free_ptp(uint64_t *ptp, int level) 898 { 899 int i; 900 uint64_t *nlp; 901 902 if (level > 1) { 903 for (i = 0; i < 512; i++) { 904 if ((ptp[i] & (VTD_PTE_RD | VTD_PTE_WR)) == 0) 905 continue; 906 if ((ptp[i] & VTD_PTE_SUPERPAGE) != 0) 907 continue; 908 nlp = (uint64_t *)PHYS_TO_DMAP(ptp[i] & VTD_PTE_ADDR_M); 909 vtd_free_ptp(nlp, level - 1); 910 } 911 } 912 913 vmm_ptp_free(ptp); 914 } 915 916 static void 917 vtd_destroy_domain(void *arg) 918 { 919 struct domain *dom; 920 921 dom = arg; 922 923 SLIST_REMOVE(&domhead, dom, domain, next); 924 vtd_free_ptp(dom->ptp, dom->pt_levels); 925 kmem_free(dom, sizeof (*dom)); 926 } 927 928 const struct iommu_ops vmm_iommu_ops = { 929 .init = vtd_init, 930 .cleanup = vtd_cleanup, 931 .enable = vtd_enable, 932 .disable = vtd_disable, 933 .create_domain = vtd_create_domain, 934 .destroy_domain = vtd_destroy_domain, 935 .create_mapping = vtd_create_mapping, 936 .remove_mapping = vtd_remove_mapping, 937 .add_device = vtd_add_device, 938 .remove_device = vtd_remove_device, 939 .invalidate_tlb = vtd_invalidate_tlb, 940 }; 941 942 943 static struct modlmisc modlmisc = { 944 &mod_miscops, 945 "bhyve vmm vtd", 946 }; 947 948 static struct modlinkage modlinkage = { 949 MODREV_1, 950 &modlmisc, 951 NULL 952 }; 953 954 int 955 _init(void) 956 { 957 return (mod_install(&modlinkage)); 958 } 959 960 int 961 _fini(void) 962 { 963 return (mod_remove(&modlinkage)); 964 } 965 966 int 967 _info(struct modinfo *modinfop) 968 { 969 return (mod_info(&modlinkage, modinfop)); 970 } 971