1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #include <sys/modctl.h> 27 #include <sys/types.h> 28 #include <sys/archsystm.h> 29 #include <sys/machsystm.h> 30 #include <sys/sunndi.h> 31 #include <sys/sunddi.h> 32 #include <sys/ddi_subrdefs.h> 33 #include <sys/xpv_support.h> 34 #include <sys/xen_errno.h> 35 #include <sys/hypervisor.h> 36 #include <sys/gnttab.h> 37 #include <sys/xenbus_comms.h> 38 #include <sys/xenbus_impl.h> 39 #include <xen/sys/xendev.h> 40 #include <sys/sysmacros.h> 41 #include <sys/x86_archext.h> 42 #include <sys/mman.h> 43 #include <sys/stat.h> 44 #include <sys/conf.h> 45 #include <sys/devops.h> 46 #include <sys/pc_mmu.h> 47 #include <sys/cmn_err.h> 48 #include <sys/cpr.h> 49 #include <sys/ddi.h> 50 #include <vm/seg_kmem.h> 51 #include <vm/as.h> 52 #include <vm/hat_pte.h> 53 #include <vm/hat_i86.h> 54 55 #define XPV_MINOR 0 56 #define XPV_BUFSIZE 128 57 58 /* 59 * This structure is ordinarily constructed by Xen. In the HVM world, we 60 * manually fill in the few fields the PV drivers need. 61 */ 62 start_info_t *xen_info = NULL; 63 64 /* Xen version number. */ 65 int xen_major, xen_minor; 66 67 /* Metadata page shared between domain and Xen */ 68 shared_info_t *HYPERVISOR_shared_info = NULL; 69 70 /* Page containing code to issue hypercalls. */ 71 extern caddr_t hypercall_page; 72 73 /* Is the hypervisor 64-bit? */ 74 int xen_is_64bit = -1; 75 76 /* virtual addr for the store_mfn page */ 77 caddr_t xb_addr; 78 79 dev_info_t *xpv_dip; 80 static dev_info_t *xpvd_dip; 81 82 /* saved pfn of the shared info page */ 83 static pfn_t shared_info_frame; 84 85 #ifdef DEBUG 86 int xen_suspend_debug; 87 88 #define SUSPEND_DEBUG if (xen_suspend_debug) xen_printf 89 #else 90 #define SUSPEND_DEBUG(...) 91 #endif 92 93 /* 94 * Forward declarations 95 */ 96 static int xpv_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 97 static int xpv_attach(dev_info_t *, ddi_attach_cmd_t); 98 static int xpv_detach(dev_info_t *, ddi_detach_cmd_t); 99 static int xpv_open(dev_t *, int, int, cred_t *); 100 static int xpv_ioctl(dev_t, int, intptr_t, int, cred_t *, int *); 101 102 static struct cb_ops xpv_cb_ops = { 103 xpv_open, 104 nulldev, /* close */ 105 nodev, /* strategy */ 106 nodev, /* print */ 107 nodev, /* dump */ 108 nodev, /* read */ 109 nodev, /* write */ 110 xpv_ioctl, /* ioctl */ 111 nodev, /* devmap */ 112 nodev, /* mmap */ 113 nodev, /* segmap */ 114 nochpoll, /* poll */ 115 ddi_prop_op, 116 NULL, 117 D_MP, 118 CB_REV, 119 NULL, 120 NULL 121 }; 122 123 static struct dev_ops xpv_dv_ops = { 124 DEVO_REV, 125 0, 126 xpv_getinfo, 127 nulldev, /* identify */ 128 nulldev, /* probe */ 129 xpv_attach, 130 xpv_detach, 131 nodev, /* reset */ 132 &xpv_cb_ops, 133 NULL, /* struct bus_ops */ 134 NULL, /* power */ 135 ddi_quiesce_not_supported, /* devo_quiesce */ 136 }; 137 138 static struct modldrv modldrv = { 139 &mod_driverops, 140 "xpv driver", 141 &xpv_dv_ops 142 }; 143 144 static struct modlinkage modl = { 145 MODREV_1, 146 { 147 (void *)&modldrv, 148 NULL /* null termination */ 149 } 150 }; 151 152 static ddi_dma_attr_t xpv_dma_attr = { 153 DMA_ATTR_V0, /* version of this structure */ 154 0, /* lowest usable address */ 155 0xffffffffffffffffULL, /* highest usable address */ 156 0x7fffffff, /* maximum DMAable byte count */ 157 MMU_PAGESIZE, /* alignment in bytes */ 158 0x7ff, /* bitmap of burst sizes */ 159 1, /* minimum transfer */ 160 0xffffffffU, /* maximum transfer */ 161 0x7fffffffULL, /* maximum segment length */ 162 1, /* maximum number of segments */ 163 1, /* granularity */ 164 0, /* flags (reserved) */ 165 }; 166 167 static ddi_device_acc_attr_t xpv_accattr = { 168 DDI_DEVICE_ATTR_V0, 169 DDI_NEVERSWAP_ACC, 170 DDI_STRICTORDER_ACC 171 }; 172 173 #define MAX_ALLOCATIONS 10 174 static ddi_dma_handle_t xpv_dma_handle[MAX_ALLOCATIONS]; 175 static ddi_acc_handle_t xpv_dma_acchandle[MAX_ALLOCATIONS]; 176 static int xen_alloc_cnt = 0; 177 178 void * 179 xen_alloc_pages(pgcnt_t cnt) 180 { 181 size_t len; 182 int a = xen_alloc_cnt++; 183 caddr_t addr; 184 185 ASSERT(xen_alloc_cnt < MAX_ALLOCATIONS); 186 if (ddi_dma_alloc_handle(xpv_dip, &xpv_dma_attr, DDI_DMA_SLEEP, 0, 187 &xpv_dma_handle[a]) != DDI_SUCCESS) 188 return (NULL); 189 190 if (ddi_dma_mem_alloc(xpv_dma_handle[a], MMU_PAGESIZE * cnt, 191 &xpv_accattr, DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, 0, 192 &addr, &len, &xpv_dma_acchandle[a]) != DDI_SUCCESS) { 193 ddi_dma_free_handle(&xpv_dma_handle[a]); 194 cmn_err(CE_WARN, "Couldn't allocate memory for xpv devices"); 195 return (NULL); 196 } 197 return (addr); 198 } 199 200 /* 201 * This function is invoked twice, first time with reprogram=0 to set up 202 * the xpvd portion of the device tree. The second time it is ignored. 203 */ 204 static void 205 xpv_enumerate(int reprogram) 206 { 207 dev_info_t *dip; 208 209 if (reprogram != 0) 210 return; 211 212 ndi_devi_alloc_sleep(ddi_root_node(), "xpvd", 213 (pnode_t)DEVI_SID_NODEID, &dip); 214 215 (void) ndi_devi_bind_driver(dip, 0); 216 217 /* 218 * Too early to enumerate split device drivers in domU 219 * since we need to create taskq thread during enumeration. 220 * So, we only enumerate softdevs and console here. 221 */ 222 xendev_enum_all(dip, B_TRUE); 223 } 224 225 /* 226 * Translate a hypervisor errcode to a Solaris error code. 227 */ 228 int 229 xen_xlate_errcode(int error) 230 { 231 #define CASE(num) case X_##num: error = num; break 232 233 switch (-error) { 234 CASE(EPERM); CASE(ENOENT); CASE(ESRCH); 235 CASE(EINTR); CASE(EIO); CASE(ENXIO); 236 CASE(E2BIG); CASE(ENOMEM); CASE(EACCES); 237 CASE(EFAULT); CASE(EBUSY); CASE(EEXIST); 238 CASE(ENODEV); CASE(EISDIR); CASE(EINVAL); 239 CASE(ENOSPC); CASE(ESPIPE); CASE(EROFS); 240 CASE(ENOSYS); CASE(ENOTEMPTY); CASE(EISCONN); 241 CASE(ENODATA); 242 default: 243 panic("xen_xlate_errcode: unknown error %d", error); 244 } 245 return (error); 246 #undef CASE 247 } 248 249 /*PRINTFLIKE1*/ 250 void 251 xen_printf(const char *fmt, ...) 252 { 253 va_list adx; 254 255 va_start(adx, fmt); 256 printf(fmt, adx); 257 va_end(adx); 258 } 259 260 /* 261 * Stub functions to get the FE drivers to build, and to catch drivers that 262 * misbehave in HVM domains. 263 */ 264 /*ARGSUSED*/ 265 void 266 xen_release_pfn(pfn_t pfn, caddr_t va) 267 { 268 panic("xen_release_pfn() is not supported in HVM domains"); 269 } 270 271 /*ARGSUSED*/ 272 void 273 reassign_pfn(pfn_t pfn, mfn_t mfn) 274 { 275 panic("reassign_pfn() is not supported in HVM domains"); 276 } 277 278 /*ARGSUSED*/ 279 long 280 balloon_free_pages(uint_t page_cnt, mfn_t *mfns, caddr_t kva, pfn_t *pfns) 281 { 282 panic("balloon_free_pages() is not supported in HVM domains"); 283 return (0); 284 } 285 286 /*ARGSUSED*/ 287 void 288 balloon_drv_added(int64_t delta) 289 { 290 panic("balloon_drv_added() is not supported in HVM domains"); 291 } 292 293 /* 294 * Add a mapping for the machine page at the given virtual address. 295 */ 296 void 297 kbm_map_ma(maddr_t ma, uintptr_t va, uint_t level) 298 { 299 ASSERT(level == 0); 300 301 hat_devload(kas.a_hat, (caddr_t)va, MMU_PAGESIZE, 302 mmu_btop(ma), PROT_READ | PROT_WRITE, HAT_LOAD); 303 } 304 305 static uint64_t 306 hvm_get_param(int param_id) 307 { 308 struct xen_hvm_param xhp; 309 310 xhp.domid = DOMID_SELF; 311 xhp.index = param_id; 312 if ((HYPERVISOR_hvm_op(HVMOP_get_param, &xhp) < 0)) 313 return (-1); 314 return (xhp.value); 315 } 316 317 static struct xenbus_watch shutdown_watch; 318 taskq_t *xen_shutdown_tq; 319 320 #define SHUTDOWN_INVALID -1 321 #define SHUTDOWN_POWEROFF 0 322 #define SHUTDOWN_REBOOT 1 323 #define SHUTDOWN_SUSPEND 2 324 #define SHUTDOWN_HALT 3 325 #define SHUTDOWN_MAX 4 326 327 #define SHUTDOWN_TIMEOUT_SECS (60 * 5) 328 329 int 330 xen_suspend_devices(dev_info_t *dip) 331 { 332 int error; 333 char buf[XPV_BUFSIZE]; 334 335 SUSPEND_DEBUG("xen_suspend_devices\n"); 336 337 for (; dip != NULL; dip = ddi_get_next_sibling(dip)) { 338 if (xen_suspend_devices(ddi_get_child(dip))) 339 return (ENXIO); 340 if (ddi_get_driver(dip) == NULL) 341 continue; 342 SUSPEND_DEBUG("Suspending device %s\n", ddi_deviname(dip, buf)); 343 ASSERT((DEVI(dip)->devi_cpr_flags & DCF_CPR_SUSPENDED) == 0); 344 345 346 if (!i_ddi_devi_attached(dip)) { 347 error = DDI_FAILURE; 348 } else { 349 error = devi_detach(dip, DDI_SUSPEND); 350 } 351 352 if (error == DDI_SUCCESS) { 353 DEVI(dip)->devi_cpr_flags |= DCF_CPR_SUSPENDED; 354 } else { 355 SUSPEND_DEBUG("WARNING: Unable to suspend device %s\n", 356 ddi_deviname(dip, buf)); 357 cmn_err(CE_WARN, "Unable to suspend device %s.", 358 ddi_deviname(dip, buf)); 359 cmn_err(CE_WARN, "Device is busy or does not " 360 "support suspend/resume."); 361 return (ENXIO); 362 } 363 } 364 return (0); 365 } 366 367 int 368 xen_resume_devices(dev_info_t *start, int resume_failed) 369 { 370 dev_info_t *dip, *next, *last = NULL; 371 int did_suspend; 372 int error = resume_failed; 373 char buf[XPV_BUFSIZE]; 374 375 SUSPEND_DEBUG("xen_resume_devices\n"); 376 377 while (last != start) { 378 dip = start; 379 next = ddi_get_next_sibling(dip); 380 while (next != last) { 381 dip = next; 382 next = ddi_get_next_sibling(dip); 383 } 384 385 /* 386 * cpr is the only one that uses this field and the device 387 * itself hasn't resumed yet, there is no need to use a 388 * lock, even though kernel threads are active by now. 389 */ 390 did_suspend = DEVI(dip)->devi_cpr_flags & DCF_CPR_SUSPENDED; 391 if (did_suspend) 392 DEVI(dip)->devi_cpr_flags &= ~DCF_CPR_SUSPENDED; 393 394 /* 395 * There may be background attaches happening on devices 396 * that were not originally suspended by cpr, so resume 397 * only devices that were suspended by cpr. Also, stop 398 * resuming after the first resume failure, but traverse 399 * the entire tree to clear the suspend flag. 400 */ 401 if (did_suspend && !error) { 402 SUSPEND_DEBUG("Resuming device %s\n", 403 ddi_deviname(dip, buf)); 404 /* 405 * If a device suspended by cpr gets detached during 406 * the resume process (for example, due to hotplugging) 407 * before cpr gets around to issuing it a DDI_RESUME, 408 * we'll have problems. 409 */ 410 if (!i_ddi_devi_attached(dip)) { 411 cmn_err(CE_WARN, "Skipping %s, device " 412 "not ready for resume", 413 ddi_deviname(dip, buf)); 414 } else { 415 if (devi_attach(dip, DDI_RESUME) != 416 DDI_SUCCESS) { 417 error = ENXIO; 418 } 419 } 420 } 421 422 if (error == ENXIO) { 423 cmn_err(CE_WARN, "Unable to resume device %s", 424 ddi_deviname(dip, buf)); 425 } 426 427 error = xen_resume_devices(ddi_get_child(dip), error); 428 last = dip; 429 } 430 431 return (error); 432 } 433 434 /*ARGSUSED*/ 435 static int 436 check_xpvd(dev_info_t *dip, void *arg) 437 { 438 char *name; 439 440 name = ddi_node_name(dip); 441 if (name == NULL || strcmp(name, "xpvd")) { 442 return (DDI_WALK_CONTINUE); 443 } else { 444 xpvd_dip = dip; 445 return (DDI_WALK_TERMINATE); 446 } 447 } 448 449 /* 450 * Top level routine to direct suspend/resume of a domain. 451 */ 452 void 453 xen_suspend_domain(void) 454 { 455 extern void rtcsync(void); 456 extern void ec_resume(void); 457 extern kmutex_t ec_lock; 458 struct xen_add_to_physmap xatp; 459 ulong_t flags; 460 int err; 461 462 cmn_err(CE_NOTE, "Domain suspending for save/migrate"); 463 464 SUSPEND_DEBUG("xen_suspend_domain\n"); 465 466 /* 467 * We only want to suspend the PV devices, since the emulated devices 468 * are suspended by saving the emulated device state. The PV devices 469 * are all children of the xpvd nexus device. So we search the 470 * device tree for the xpvd node to use as the root of the tree to 471 * be suspended. 472 */ 473 if (xpvd_dip == NULL) 474 ddi_walk_devs(ddi_root_node(), check_xpvd, NULL); 475 476 /* 477 * suspend interrupts and devices 478 */ 479 if (xpvd_dip != NULL) 480 (void) xen_suspend_devices(ddi_get_child(xpvd_dip)); 481 else 482 cmn_err(CE_WARN, "No PV devices found to suspend"); 483 SUSPEND_DEBUG("xenbus_suspend\n"); 484 xenbus_suspend(); 485 486 mutex_enter(&cpu_lock); 487 488 /* 489 * Suspend on vcpu 0 490 */ 491 thread_affinity_set(curthread, 0); 492 kpreempt_disable(); 493 494 if (ncpus > 1) 495 pause_cpus(NULL); 496 /* 497 * We can grab the ec_lock as it's a spinlock with a high SPL. Hence 498 * any holder would have dropped it to get through pause_cpus(). 499 */ 500 mutex_enter(&ec_lock); 501 502 /* 503 * From here on in, we can't take locks. 504 */ 505 506 flags = intr_clear(); 507 508 SUSPEND_DEBUG("HYPERVISOR_suspend\n"); 509 /* 510 * At this point we suspend and sometime later resume. 511 * Note that this call may return with an indication of a cancelled 512 * for now no matter ehat the return we do a full resume of all 513 * suspended drivers, etc. 514 */ 515 (void) HYPERVISOR_shutdown(SHUTDOWN_suspend); 516 517 /* 518 * Point HYPERVISOR_shared_info to the proper place. 519 */ 520 xatp.domid = DOMID_SELF; 521 xatp.idx = 0; 522 xatp.space = XENMAPSPACE_shared_info; 523 xatp.gpfn = shared_info_frame; 524 if ((err = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) != 0) 525 panic("Could not set shared_info page. error: %d", err); 526 527 SUSPEND_DEBUG("gnttab_resume\n"); 528 gnttab_resume(); 529 530 SUSPEND_DEBUG("ec_resume\n"); 531 ec_resume(); 532 533 intr_restore(flags); 534 535 if (ncpus > 1) 536 start_cpus(); 537 538 mutex_exit(&ec_lock); 539 mutex_exit(&cpu_lock); 540 541 /* 542 * Now we can take locks again. 543 */ 544 545 rtcsync(); 546 547 SUSPEND_DEBUG("xenbus_resume\n"); 548 xenbus_resume(); 549 SUSPEND_DEBUG("xen_resume_devices\n"); 550 if (xpvd_dip != NULL) 551 (void) xen_resume_devices(ddi_get_child(xpvd_dip), 0); 552 553 thread_affinity_clear(curthread); 554 kpreempt_enable(); 555 556 SUSPEND_DEBUG("finished xen_suspend_domain\n"); 557 558 cmn_err(CE_NOTE, "domain restore/migrate completed"); 559 } 560 561 static void 562 xen_dirty_shutdown(void *arg) 563 { 564 int cmd = (uintptr_t)arg; 565 566 cmn_err(CE_WARN, "Externally requested shutdown failed or " 567 "timed out.\nShutting down.\n"); 568 569 switch (cmd) { 570 case SHUTDOWN_HALT: 571 case SHUTDOWN_POWEROFF: 572 (void) kadmin(A_SHUTDOWN, AD_POWEROFF, NULL, kcred); 573 break; 574 case SHUTDOWN_REBOOT: 575 (void) kadmin(A_REBOOT, AD_BOOT, NULL, kcred); 576 break; 577 } 578 } 579 580 static void 581 xen_shutdown(void *arg) 582 { 583 int cmd = (uintptr_t)arg; 584 proc_t *initpp; 585 586 ASSERT(cmd > SHUTDOWN_INVALID && cmd < SHUTDOWN_MAX); 587 588 if (cmd == SHUTDOWN_SUSPEND) { 589 xen_suspend_domain(); 590 return; 591 } 592 593 switch (cmd) { 594 case SHUTDOWN_POWEROFF: 595 force_shutdown_method = AD_POWEROFF; 596 break; 597 case SHUTDOWN_HALT: 598 force_shutdown_method = AD_HALT; 599 break; 600 case SHUTDOWN_REBOOT: 601 force_shutdown_method = AD_BOOT; 602 break; 603 } 604 605 606 /* 607 * If we're still booting and init(1) isn't set up yet, simply halt. 608 */ 609 mutex_enter(&pidlock); 610 initpp = prfind(P_INITPID); 611 mutex_exit(&pidlock); 612 if (initpp == NULL) { 613 extern void halt(char *); 614 halt("Power off the System"); /* just in case */ 615 } 616 617 /* 618 * else, graceful shutdown with inittab and all getting involved 619 */ 620 psignal(initpp, SIGPWR); 621 622 (void) timeout(xen_dirty_shutdown, arg, 623 SHUTDOWN_TIMEOUT_SECS * drv_usectohz(MICROSEC)); 624 } 625 626 /*ARGSUSED*/ 627 static void 628 xen_shutdown_handler(struct xenbus_watch *watch, const char **vec, 629 unsigned int len) 630 { 631 char *str; 632 xenbus_transaction_t xbt; 633 int err, shutdown_code = SHUTDOWN_INVALID; 634 unsigned int slen; 635 636 again: 637 err = xenbus_transaction_start(&xbt); 638 if (err) 639 return; 640 if (xenbus_read(xbt, "control", "shutdown", (void *)&str, &slen)) { 641 (void) xenbus_transaction_end(xbt, 1); 642 return; 643 } 644 645 SUSPEND_DEBUG("%d: xen_shutdown_handler: \"%s\"\n", CPU->cpu_id, str); 646 647 /* 648 * If this is a watch fired from our write below, check out early to 649 * avoid an infinite loop. 650 */ 651 if (strcmp(str, "") == 0) { 652 (void) xenbus_transaction_end(xbt, 0); 653 kmem_free(str, slen); 654 return; 655 } else if (strcmp(str, "poweroff") == 0) { 656 shutdown_code = SHUTDOWN_POWEROFF; 657 } else if (strcmp(str, "reboot") == 0) { 658 shutdown_code = SHUTDOWN_REBOOT; 659 } else if (strcmp(str, "suspend") == 0) { 660 shutdown_code = SHUTDOWN_SUSPEND; 661 } else if (strcmp(str, "halt") == 0) { 662 shutdown_code = SHUTDOWN_HALT; 663 } else { 664 printf("Ignoring shutdown request: %s\n", str); 665 } 666 667 (void) xenbus_write(xbt, "control", "shutdown", ""); 668 err = xenbus_transaction_end(xbt, 0); 669 if (err == EAGAIN) { 670 SUSPEND_DEBUG("%d: trying again\n", CPU->cpu_id); 671 kmem_free(str, slen); 672 goto again; 673 } 674 675 kmem_free(str, slen); 676 if (shutdown_code != SHUTDOWN_INVALID) { 677 (void) taskq_dispatch(xen_shutdown_tq, xen_shutdown, 678 (void *)(intptr_t)shutdown_code, 0); 679 } 680 } 681 682 static int 683 xen_pv_init(dev_info_t *xpv_dip) 684 { 685 struct cpuid_regs cp; 686 uint32_t xen_signature[4]; 687 char *xen_str; 688 struct xen_add_to_physmap xatp; 689 xen_capabilities_info_t caps; 690 pfn_t pfn; 691 uint64_t msrval; 692 int err; 693 694 /* 695 * Xen's pseudo-cpuid function 0x40000000 returns a string 696 * representing the Xen signature in %ebx, %ecx, and %edx. 697 * %eax contains the maximum supported cpuid function. 698 */ 699 cp.cp_eax = 0x40000000; 700 (void) __cpuid_insn(&cp); 701 xen_signature[0] = cp.cp_ebx; 702 xen_signature[1] = cp.cp_ecx; 703 xen_signature[2] = cp.cp_edx; 704 xen_signature[3] = 0; 705 xen_str = (char *)xen_signature; 706 if (strcmp("XenVMMXenVMM", xen_str) != 0 || 707 cp.cp_eax < 0x40000002) { 708 cmn_err(CE_WARN, 709 "Attempting to load Xen drivers on non-Xen system"); 710 return (-1); 711 } 712 713 /* 714 * cpuid function 0x40000001 returns the Xen version in %eax. The 715 * top 16 bits are the major version, the bottom 16 are the minor 716 * version. 717 */ 718 cp.cp_eax = 0x40000001; 719 (void) __cpuid_insn(&cp); 720 xen_major = cp.cp_eax >> 16; 721 xen_minor = cp.cp_eax & 0xffff; 722 723 /* 724 * The xpv driver is incompatible with xen versions older than 3.1. This 725 * is due to the changes in the vcpu_info and shared_info structs used 726 * to communicate with the hypervisor (the event channels in particular) 727 * that were introduced with 3.1. 728 */ 729 if (xen_major < 3 || (xen_major == 3 && xen_minor < 1)) { 730 cmn_err(CE_WARN, "Xen version %d.%d is not supported", 731 xen_major, xen_minor); 732 return (-1); 733 } 734 735 /* 736 * cpuid function 0x40000002 returns information about the 737 * hypercall page. %eax nominally contains the number of pages 738 * with hypercall code, but according to the Xen guys, "I'll 739 * guarantee that remains one forever more, so you can just 740 * allocate a single page and get quite upset if you ever see CPUID 741 * return more than one page." %ebx contains an MSR we use to ask 742 * Xen to remap each page at a specific pfn. 743 */ 744 cp.cp_eax = 0x40000002; 745 (void) __cpuid_insn(&cp); 746 747 /* 748 * Let Xen know where we want the hypercall page mapped. We 749 * already have a page allocated in the .text section to simplify 750 * the wrapper code. 751 */ 752 pfn = hat_getpfnum(kas.a_hat, (caddr_t)&hypercall_page); 753 msrval = mmu_ptob(pfn); 754 wrmsr(cp.cp_ebx, msrval); 755 756 /* Fill in the xen_info data */ 757 xen_info = kmem_zalloc(sizeof (start_info_t), KM_SLEEP); 758 (void) sprintf(xen_info->magic, "xen-%d.%d", xen_major, xen_minor); 759 xen_info->store_mfn = (mfn_t)hvm_get_param(HVM_PARAM_STORE_PFN); 760 xen_info->store_evtchn = (int)hvm_get_param(HVM_PARAM_STORE_EVTCHN); 761 762 /* Figure out whether the hypervisor is 32-bit or 64-bit. */ 763 if ((HYPERVISOR_xen_version(XENVER_capabilities, &caps) == 0)) { 764 ((char *)(caps))[sizeof (caps) - 1] = '\0'; 765 if (strstr(caps, "x86_64") != NULL) 766 xen_is_64bit = 1; 767 else if (strstr(caps, "x86_32") != NULL) 768 xen_is_64bit = 0; 769 } 770 if (xen_is_64bit < 0) { 771 cmn_err(CE_WARN, "Couldn't get capability info from Xen."); 772 return (-1); 773 } 774 #ifdef __amd64 775 ASSERT(xen_is_64bit == 1); 776 #endif 777 778 /* 779 * Allocate space for the shared_info page and tell Xen where it 780 * is. 781 */ 782 HYPERVISOR_shared_info = xen_alloc_pages(1); 783 shared_info_frame = hat_getpfnum(kas.a_hat, 784 (caddr_t)HYPERVISOR_shared_info); 785 xatp.domid = DOMID_SELF; 786 xatp.idx = 0; 787 xatp.space = XENMAPSPACE_shared_info; 788 xatp.gpfn = shared_info_frame; 789 if ((err = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) != 0) { 790 cmn_err(CE_WARN, "Could not get shared_info page from Xen." 791 " error: %d", err); 792 return (-1); 793 } 794 795 /* Set up the grant tables. */ 796 gnttab_init(); 797 798 /* Set up event channel support */ 799 if (ec_init(xpv_dip) != 0) 800 return (-1); 801 802 /* Set up xenbus */ 803 xb_addr = vmem_alloc(heap_arena, MMU_PAGESIZE, VM_SLEEP); 804 xs_early_init(); 805 xs_domu_init(); 806 807 /* Set up for suspend/resume/migrate */ 808 xen_shutdown_tq = taskq_create("shutdown_taskq", 1, 809 maxclsyspri - 1, 1, 1, TASKQ_PREPOPULATE); 810 shutdown_watch.node = "control/shutdown"; 811 shutdown_watch.callback = xen_shutdown_handler; 812 if (register_xenbus_watch(&shutdown_watch)) 813 cmn_err(CE_WARN, "Failed to set shutdown watcher"); 814 815 return (0); 816 } 817 818 static void 819 xen_pv_fini() 820 { 821 if (xen_info != NULL) 822 kmem_free(xen_info, sizeof (start_info_t)); 823 ec_fini(); 824 } 825 826 /*ARGSUSED*/ 827 static int 828 xpv_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result) 829 { 830 if (getminor((dev_t)arg) != XPV_MINOR) 831 return (DDI_FAILURE); 832 833 switch (cmd) { 834 case DDI_INFO_DEVT2DEVINFO: 835 *result = xpv_dip; 836 break; 837 case DDI_INFO_DEVT2INSTANCE: 838 *result = 0; 839 break; 840 default: 841 return (DDI_FAILURE); 842 } 843 844 return (DDI_SUCCESS); 845 } 846 847 static int 848 xpv_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 849 { 850 if (cmd != DDI_ATTACH) 851 return (DDI_FAILURE); 852 853 if (ddi_create_minor_node(dip, ddi_get_name(dip), S_IFCHR, 854 ddi_get_instance(dip), DDI_PSEUDO, 0) != DDI_SUCCESS) 855 return (DDI_FAILURE); 856 857 xpv_dip = dip; 858 859 if (xen_pv_init(dip) != 0) 860 return (DDI_FAILURE); 861 862 ddi_report_dev(dip); 863 864 /* 865 * If the memscrubber attempts to scrub the pages we hand to Xen, 866 * the domain will panic. 867 */ 868 memscrub_disable(); 869 870 /* 871 * Report our version to dom0. 872 */ 873 if (xenbus_printf(XBT_NULL, "hvmpv/xpv", "version", "%d", 874 HVMPV_XPV_VERS)) 875 cmn_err(CE_WARN, "xpv: couldn't write version\n"); 876 877 return (DDI_SUCCESS); 878 } 879 880 /* 881 * Attempts to reload the PV driver plumbing hang on Intel platforms, so 882 * we don't want to unload the framework by accident. 883 */ 884 int xpv_allow_detach = 0; 885 886 static int 887 xpv_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 888 { 889 if (cmd != DDI_DETACH || xpv_allow_detach == 0) 890 return (DDI_FAILURE); 891 892 if (xpv_dip != NULL) { 893 xen_pv_fini(); 894 ddi_remove_minor_node(dip, NULL); 895 xpv_dip = NULL; 896 } 897 898 return (DDI_SUCCESS); 899 } 900 901 /*ARGSUSED1*/ 902 static int 903 xpv_open(dev_t *dev, int flag, int otyp, cred_t *cr) 904 { 905 return (getminor(*dev) == XPV_MINOR ? 0 : ENXIO); 906 } 907 908 /*ARGSUSED*/ 909 static int 910 xpv_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cr, 911 int *rval_p) 912 { 913 return (EINVAL); 914 } 915 916 int 917 _init(void) 918 { 919 int err; 920 921 if ((err = mod_install(&modl)) != 0) 922 return (err); 923 924 impl_bus_add_probe(xpv_enumerate); 925 return (0); 926 } 927 928 int 929 _fini(void) 930 { 931 int err; 932 933 if ((err = mod_remove(&modl)) != 0) 934 return (err); 935 936 impl_bus_delete_probe(xpv_enumerate); 937 return (0); 938 } 939 940 int 941 _info(struct modinfo *modinfop) 942 { 943 return (mod_info(&modl, modinfop)); 944 } 945