1 /* 2 * This program is free software; you can redistribute it and/or modify 3 * it under the terms of the GNU General Public License, version 2, as 4 * published by the Free Software Foundation. 5 * 6 * This program is distributed in the hope that it will be useful, 7 * but WITHOUT ANY WARRANTY; without even the implied warranty of 8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 * GNU General Public License for more details. 10 * 11 * You should have received a copy of the GNU General Public License 12 * along with this program; if not, write to the Free Software 13 * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 14 * 15 * Copyright 2010 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com> 16 */ 17 18 #include <linux/types.h> 19 #include <linux/string.h> 20 #include <linux/kvm.h> 21 #include <linux/kvm_host.h> 22 #include <linux/highmem.h> 23 #include <linux/gfp.h> 24 #include <linux/slab.h> 25 #include <linux/hugetlb.h> 26 #include <linux/vmalloc.h> 27 28 #include <asm/tlbflush.h> 29 #include <asm/kvm_ppc.h> 30 #include <asm/kvm_book3s.h> 31 #include <asm/mmu-hash64.h> 32 #include <asm/hvcall.h> 33 #include <asm/synch.h> 34 #include <asm/ppc-opcode.h> 35 #include <asm/cputable.h> 36 37 /* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */ 38 #define MAX_LPID_970 63 39 40 /* Power architecture requires HPT is at least 256kB */ 41 #define PPC_MIN_HPT_ORDER 18 42 43 long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp) 44 { 45 unsigned long hpt; 46 struct revmap_entry *rev; 47 struct kvmppc_linear_info *li; 48 long order = kvm_hpt_order; 49 50 if (htab_orderp) { 51 order = *htab_orderp; 52 if (order < PPC_MIN_HPT_ORDER) 53 order = PPC_MIN_HPT_ORDER; 54 } 55 56 /* 57 * If the user wants a different size from default, 58 * try first to allocate it from the kernel page allocator. 59 */ 60 hpt = 0; 61 if (order != kvm_hpt_order) { 62 hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT| 63 __GFP_NOWARN, order - PAGE_SHIFT); 64 if (!hpt) 65 --order; 66 } 67 68 /* Next try to allocate from the preallocated pool */ 69 if (!hpt) { 70 li = kvm_alloc_hpt(); 71 if (li) { 72 hpt = (ulong)li->base_virt; 73 kvm->arch.hpt_li = li; 74 order = kvm_hpt_order; 75 } 76 } 77 78 /* Lastly try successively smaller sizes from the page allocator */ 79 while (!hpt && order > PPC_MIN_HPT_ORDER) { 80 hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT| 81 __GFP_NOWARN, order - PAGE_SHIFT); 82 if (!hpt) 83 --order; 84 } 85 86 if (!hpt) 87 return -ENOMEM; 88 89 kvm->arch.hpt_virt = hpt; 90 kvm->arch.hpt_order = order; 91 /* HPTEs are 2**4 bytes long */ 92 kvm->arch.hpt_npte = 1ul << (order - 4); 93 /* 128 (2**7) bytes in each HPTEG */ 94 kvm->arch.hpt_mask = (1ul << (order - 7)) - 1; 95 96 /* Allocate reverse map array */ 97 rev = vmalloc(sizeof(struct revmap_entry) * kvm->arch.hpt_npte); 98 if (!rev) { 99 pr_err("kvmppc_alloc_hpt: Couldn't alloc reverse map array\n"); 100 goto out_freehpt; 101 } 102 kvm->arch.revmap = rev; 103 kvm->arch.sdr1 = __pa(hpt) | (order - 18); 104 105 pr_info("KVM guest htab at %lx (order %ld), LPID %x\n", 106 hpt, order, kvm->arch.lpid); 107 108 if (htab_orderp) 109 *htab_orderp = order; 110 return 0; 111 112 out_freehpt: 113 if (kvm->arch.hpt_li) 114 kvm_release_hpt(kvm->arch.hpt_li); 115 else 116 free_pages(hpt, order - PAGE_SHIFT); 117 return -ENOMEM; 118 } 119 120 long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp) 121 { 122 long err = -EBUSY; 123 long order; 124 125 mutex_lock(&kvm->lock); 126 if (kvm->arch.rma_setup_done) { 127 kvm->arch.rma_setup_done = 0; 128 /* order rma_setup_done vs. vcpus_running */ 129 smp_mb(); 130 if (atomic_read(&kvm->arch.vcpus_running)) { 131 kvm->arch.rma_setup_done = 1; 132 goto out; 133 } 134 } 135 if (kvm->arch.hpt_virt) { 136 order = kvm->arch.hpt_order; 137 /* Set the entire HPT to 0, i.e. invalid HPTEs */ 138 memset((void *)kvm->arch.hpt_virt, 0, 1ul << order); 139 /* 140 * Set the whole last_vcpu array to an invalid vcpu number. 141 * This ensures that each vcpu will flush its TLB on next entry. 142 */ 143 memset(kvm->arch.last_vcpu, 0xff, sizeof(kvm->arch.last_vcpu)); 144 *htab_orderp = order; 145 err = 0; 146 } else { 147 err = kvmppc_alloc_hpt(kvm, htab_orderp); 148 order = *htab_orderp; 149 } 150 out: 151 mutex_unlock(&kvm->lock); 152 return err; 153 } 154 155 void kvmppc_free_hpt(struct kvm *kvm) 156 { 157 kvmppc_free_lpid(kvm->arch.lpid); 158 vfree(kvm->arch.revmap); 159 if (kvm->arch.hpt_li) 160 kvm_release_hpt(kvm->arch.hpt_li); 161 else 162 free_pages(kvm->arch.hpt_virt, 163 kvm->arch.hpt_order - PAGE_SHIFT); 164 } 165 166 /* Bits in first HPTE dword for pagesize 4k, 64k or 16M */ 167 static inline unsigned long hpte0_pgsize_encoding(unsigned long pgsize) 168 { 169 return (pgsize > 0x1000) ? HPTE_V_LARGE : 0; 170 } 171 172 /* Bits in second HPTE dword for pagesize 4k, 64k or 16M */ 173 static inline unsigned long hpte1_pgsize_encoding(unsigned long pgsize) 174 { 175 return (pgsize == 0x10000) ? 0x1000 : 0; 176 } 177 178 void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot, 179 unsigned long porder) 180 { 181 unsigned long i; 182 unsigned long npages; 183 unsigned long hp_v, hp_r; 184 unsigned long addr, hash; 185 unsigned long psize; 186 unsigned long hp0, hp1; 187 long ret; 188 struct kvm *kvm = vcpu->kvm; 189 190 psize = 1ul << porder; 191 npages = memslot->npages >> (porder - PAGE_SHIFT); 192 193 /* VRMA can't be > 1TB */ 194 if (npages > 1ul << (40 - porder)) 195 npages = 1ul << (40 - porder); 196 /* Can't use more than 1 HPTE per HPTEG */ 197 if (npages > kvm->arch.hpt_mask + 1) 198 npages = kvm->arch.hpt_mask + 1; 199 200 hp0 = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) | 201 HPTE_V_BOLTED | hpte0_pgsize_encoding(psize); 202 hp1 = hpte1_pgsize_encoding(psize) | 203 HPTE_R_R | HPTE_R_C | HPTE_R_M | PP_RWXX; 204 205 for (i = 0; i < npages; ++i) { 206 addr = i << porder; 207 /* can't use hpt_hash since va > 64 bits */ 208 hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25))) & kvm->arch.hpt_mask; 209 /* 210 * We assume that the hash table is empty and no 211 * vcpus are using it at this stage. Since we create 212 * at most one HPTE per HPTEG, we just assume entry 7 213 * is available and use it. 214 */ 215 hash = (hash << 3) + 7; 216 hp_v = hp0 | ((addr >> 16) & ~0x7fUL); 217 hp_r = hp1 | addr; 218 ret = kvmppc_virtmode_h_enter(vcpu, H_EXACT, hash, hp_v, hp_r); 219 if (ret != H_SUCCESS) { 220 pr_err("KVM: map_vrma at %lx failed, ret=%ld\n", 221 addr, ret); 222 break; 223 } 224 } 225 } 226 227 int kvmppc_mmu_hv_init(void) 228 { 229 unsigned long host_lpid, rsvd_lpid; 230 231 if (!cpu_has_feature(CPU_FTR_HVMODE)) 232 return -EINVAL; 233 234 /* POWER7 has 10-bit LPIDs, PPC970 and e500mc have 6-bit LPIDs */ 235 if (cpu_has_feature(CPU_FTR_ARCH_206)) { 236 host_lpid = mfspr(SPRN_LPID); /* POWER7 */ 237 rsvd_lpid = LPID_RSVD; 238 } else { 239 host_lpid = 0; /* PPC970 */ 240 rsvd_lpid = MAX_LPID_970; 241 } 242 243 kvmppc_init_lpid(rsvd_lpid + 1); 244 245 kvmppc_claim_lpid(host_lpid); 246 /* rsvd_lpid is reserved for use in partition switching */ 247 kvmppc_claim_lpid(rsvd_lpid); 248 249 return 0; 250 } 251 252 void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) 253 { 254 } 255 256 static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu) 257 { 258 kvmppc_set_msr(vcpu, MSR_SF | MSR_ME); 259 } 260 261 /* 262 * This is called to get a reference to a guest page if there isn't 263 * one already in the kvm->arch.slot_phys[][] arrays. 264 */ 265 static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn, 266 struct kvm_memory_slot *memslot, 267 unsigned long psize) 268 { 269 unsigned long start; 270 long np, err; 271 struct page *page, *hpage, *pages[1]; 272 unsigned long s, pgsize; 273 unsigned long *physp; 274 unsigned int is_io, got, pgorder; 275 struct vm_area_struct *vma; 276 unsigned long pfn, i, npages; 277 278 physp = kvm->arch.slot_phys[memslot->id]; 279 if (!physp) 280 return -EINVAL; 281 if (physp[gfn - memslot->base_gfn]) 282 return 0; 283 284 is_io = 0; 285 got = 0; 286 page = NULL; 287 pgsize = psize; 288 err = -EINVAL; 289 start = gfn_to_hva_memslot(memslot, gfn); 290 291 /* Instantiate and get the page we want access to */ 292 np = get_user_pages_fast(start, 1, 1, pages); 293 if (np != 1) { 294 /* Look up the vma for the page */ 295 down_read(¤t->mm->mmap_sem); 296 vma = find_vma(current->mm, start); 297 if (!vma || vma->vm_start > start || 298 start + psize > vma->vm_end || 299 !(vma->vm_flags & VM_PFNMAP)) 300 goto up_err; 301 is_io = hpte_cache_bits(pgprot_val(vma->vm_page_prot)); 302 pfn = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); 303 /* check alignment of pfn vs. requested page size */ 304 if (psize > PAGE_SIZE && (pfn & ((psize >> PAGE_SHIFT) - 1))) 305 goto up_err; 306 up_read(¤t->mm->mmap_sem); 307 308 } else { 309 page = pages[0]; 310 got = KVMPPC_GOT_PAGE; 311 312 /* See if this is a large page */ 313 s = PAGE_SIZE; 314 if (PageHuge(page)) { 315 hpage = compound_head(page); 316 s <<= compound_order(hpage); 317 /* Get the whole large page if slot alignment is ok */ 318 if (s > psize && slot_is_aligned(memslot, s) && 319 !(memslot->userspace_addr & (s - 1))) { 320 start &= ~(s - 1); 321 pgsize = s; 322 get_page(hpage); 323 put_page(page); 324 page = hpage; 325 } 326 } 327 if (s < psize) 328 goto out; 329 pfn = page_to_pfn(page); 330 } 331 332 npages = pgsize >> PAGE_SHIFT; 333 pgorder = __ilog2(npages); 334 physp += (gfn - memslot->base_gfn) & ~(npages - 1); 335 spin_lock(&kvm->arch.slot_phys_lock); 336 for (i = 0; i < npages; ++i) { 337 if (!physp[i]) { 338 physp[i] = ((pfn + i) << PAGE_SHIFT) + 339 got + is_io + pgorder; 340 got = 0; 341 } 342 } 343 spin_unlock(&kvm->arch.slot_phys_lock); 344 err = 0; 345 346 out: 347 if (got) 348 put_page(page); 349 return err; 350 351 up_err: 352 up_read(¤t->mm->mmap_sem); 353 return err; 354 } 355 356 /* 357 * We come here on a H_ENTER call from the guest when we are not 358 * using mmu notifiers and we don't have the requested page pinned 359 * already. 360 */ 361 long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, 362 long pte_index, unsigned long pteh, unsigned long ptel) 363 { 364 struct kvm *kvm = vcpu->kvm; 365 unsigned long psize, gpa, gfn; 366 struct kvm_memory_slot *memslot; 367 long ret; 368 369 if (kvm->arch.using_mmu_notifiers) 370 goto do_insert; 371 372 psize = hpte_page_size(pteh, ptel); 373 if (!psize) 374 return H_PARAMETER; 375 376 pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID); 377 378 /* Find the memslot (if any) for this address */ 379 gpa = (ptel & HPTE_R_RPN) & ~(psize - 1); 380 gfn = gpa >> PAGE_SHIFT; 381 memslot = gfn_to_memslot(kvm, gfn); 382 if (memslot && !(memslot->flags & KVM_MEMSLOT_INVALID)) { 383 if (!slot_is_aligned(memslot, psize)) 384 return H_PARAMETER; 385 if (kvmppc_get_guest_page(kvm, gfn, memslot, psize) < 0) 386 return H_PARAMETER; 387 } 388 389 do_insert: 390 /* Protect linux PTE lookup from page table destruction */ 391 rcu_read_lock_sched(); /* this disables preemption too */ 392 vcpu->arch.pgdir = current->mm->pgd; 393 ret = kvmppc_h_enter(vcpu, flags, pte_index, pteh, ptel); 394 rcu_read_unlock_sched(); 395 if (ret == H_TOO_HARD) { 396 /* this can't happen */ 397 pr_err("KVM: Oops, kvmppc_h_enter returned too hard!\n"); 398 ret = H_RESOURCE; /* or something */ 399 } 400 return ret; 401 402 } 403 404 static struct kvmppc_slb *kvmppc_mmu_book3s_hv_find_slbe(struct kvm_vcpu *vcpu, 405 gva_t eaddr) 406 { 407 u64 mask; 408 int i; 409 410 for (i = 0; i < vcpu->arch.slb_nr; i++) { 411 if (!(vcpu->arch.slb[i].orige & SLB_ESID_V)) 412 continue; 413 414 if (vcpu->arch.slb[i].origv & SLB_VSID_B_1T) 415 mask = ESID_MASK_1T; 416 else 417 mask = ESID_MASK; 418 419 if (((vcpu->arch.slb[i].orige ^ eaddr) & mask) == 0) 420 return &vcpu->arch.slb[i]; 421 } 422 return NULL; 423 } 424 425 static unsigned long kvmppc_mmu_get_real_addr(unsigned long v, unsigned long r, 426 unsigned long ea) 427 { 428 unsigned long ra_mask; 429 430 ra_mask = hpte_page_size(v, r) - 1; 431 return (r & HPTE_R_RPN & ~ra_mask) | (ea & ra_mask); 432 } 433 434 static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, 435 struct kvmppc_pte *gpte, bool data) 436 { 437 struct kvm *kvm = vcpu->kvm; 438 struct kvmppc_slb *slbe; 439 unsigned long slb_v; 440 unsigned long pp, key; 441 unsigned long v, gr; 442 unsigned long *hptep; 443 int index; 444 int virtmode = vcpu->arch.shregs.msr & (data ? MSR_DR : MSR_IR); 445 446 /* Get SLB entry */ 447 if (virtmode) { 448 slbe = kvmppc_mmu_book3s_hv_find_slbe(vcpu, eaddr); 449 if (!slbe) 450 return -EINVAL; 451 slb_v = slbe->origv; 452 } else { 453 /* real mode access */ 454 slb_v = vcpu->kvm->arch.vrma_slb_v; 455 } 456 457 /* Find the HPTE in the hash table */ 458 index = kvmppc_hv_find_lock_hpte(kvm, eaddr, slb_v, 459 HPTE_V_VALID | HPTE_V_ABSENT); 460 if (index < 0) 461 return -ENOENT; 462 hptep = (unsigned long *)(kvm->arch.hpt_virt + (index << 4)); 463 v = hptep[0] & ~HPTE_V_HVLOCK; 464 gr = kvm->arch.revmap[index].guest_rpte; 465 466 /* Unlock the HPTE */ 467 asm volatile("lwsync" : : : "memory"); 468 hptep[0] = v; 469 470 gpte->eaddr = eaddr; 471 gpte->vpage = ((v & HPTE_V_AVPN) << 4) | ((eaddr >> 12) & 0xfff); 472 473 /* Get PP bits and key for permission check */ 474 pp = gr & (HPTE_R_PP0 | HPTE_R_PP); 475 key = (vcpu->arch.shregs.msr & MSR_PR) ? SLB_VSID_KP : SLB_VSID_KS; 476 key &= slb_v; 477 478 /* Calculate permissions */ 479 gpte->may_read = hpte_read_permission(pp, key); 480 gpte->may_write = hpte_write_permission(pp, key); 481 gpte->may_execute = gpte->may_read && !(gr & (HPTE_R_N | HPTE_R_G)); 482 483 /* Storage key permission check for POWER7 */ 484 if (data && virtmode && cpu_has_feature(CPU_FTR_ARCH_206)) { 485 int amrfield = hpte_get_skey_perm(gr, vcpu->arch.amr); 486 if (amrfield & 1) 487 gpte->may_read = 0; 488 if (amrfield & 2) 489 gpte->may_write = 0; 490 } 491 492 /* Get the guest physical address */ 493 gpte->raddr = kvmppc_mmu_get_real_addr(v, gr, eaddr); 494 return 0; 495 } 496 497 /* 498 * Quick test for whether an instruction is a load or a store. 499 * If the instruction is a load or a store, then this will indicate 500 * which it is, at least on server processors. (Embedded processors 501 * have some external PID instructions that don't follow the rule 502 * embodied here.) If the instruction isn't a load or store, then 503 * this doesn't return anything useful. 504 */ 505 static int instruction_is_store(unsigned int instr) 506 { 507 unsigned int mask; 508 509 mask = 0x10000000; 510 if ((instr & 0xfc000000) == 0x7c000000) 511 mask = 0x100; /* major opcode 31 */ 512 return (instr & mask) != 0; 513 } 514 515 static int kvmppc_hv_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu, 516 unsigned long gpa, gva_t ea, int is_store) 517 { 518 int ret; 519 u32 last_inst; 520 unsigned long srr0 = kvmppc_get_pc(vcpu); 521 522 /* We try to load the last instruction. We don't let 523 * emulate_instruction do it as it doesn't check what 524 * kvmppc_ld returns. 525 * If we fail, we just return to the guest and try executing it again. 526 */ 527 if (vcpu->arch.last_inst == KVM_INST_FETCH_FAILED) { 528 ret = kvmppc_ld(vcpu, &srr0, sizeof(u32), &last_inst, false); 529 if (ret != EMULATE_DONE || last_inst == KVM_INST_FETCH_FAILED) 530 return RESUME_GUEST; 531 vcpu->arch.last_inst = last_inst; 532 } 533 534 /* 535 * WARNING: We do not know for sure whether the instruction we just 536 * read from memory is the same that caused the fault in the first 537 * place. If the instruction we read is neither an load or a store, 538 * then it can't access memory, so we don't need to worry about 539 * enforcing access permissions. So, assuming it is a load or 540 * store, we just check that its direction (load or store) is 541 * consistent with the original fault, since that's what we 542 * checked the access permissions against. If there is a mismatch 543 * we just return and retry the instruction. 544 */ 545 546 if (instruction_is_store(vcpu->arch.last_inst) != !!is_store) 547 return RESUME_GUEST; 548 549 /* 550 * Emulated accesses are emulated by looking at the hash for 551 * translation once, then performing the access later. The 552 * translation could be invalidated in the meantime in which 553 * point performing the subsequent memory access on the old 554 * physical address could possibly be a security hole for the 555 * guest (but not the host). 556 * 557 * This is less of an issue for MMIO stores since they aren't 558 * globally visible. It could be an issue for MMIO loads to 559 * a certain extent but we'll ignore it for now. 560 */ 561 562 vcpu->arch.paddr_accessed = gpa; 563 vcpu->arch.vaddr_accessed = ea; 564 return kvmppc_emulate_mmio(run, vcpu); 565 } 566 567 int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, 568 unsigned long ea, unsigned long dsisr) 569 { 570 struct kvm *kvm = vcpu->kvm; 571 unsigned long *hptep, hpte[3], r; 572 unsigned long mmu_seq, psize, pte_size; 573 unsigned long gfn, hva, pfn; 574 struct kvm_memory_slot *memslot; 575 unsigned long *rmap; 576 struct revmap_entry *rev; 577 struct page *page, *pages[1]; 578 long index, ret, npages; 579 unsigned long is_io; 580 unsigned int writing, write_ok; 581 struct vm_area_struct *vma; 582 unsigned long rcbits; 583 584 /* 585 * Real-mode code has already searched the HPT and found the 586 * entry we're interested in. Lock the entry and check that 587 * it hasn't changed. If it has, just return and re-execute the 588 * instruction. 589 */ 590 if (ea != vcpu->arch.pgfault_addr) 591 return RESUME_GUEST; 592 index = vcpu->arch.pgfault_index; 593 hptep = (unsigned long *)(kvm->arch.hpt_virt + (index << 4)); 594 rev = &kvm->arch.revmap[index]; 595 preempt_disable(); 596 while (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) 597 cpu_relax(); 598 hpte[0] = hptep[0] & ~HPTE_V_HVLOCK; 599 hpte[1] = hptep[1]; 600 hpte[2] = r = rev->guest_rpte; 601 asm volatile("lwsync" : : : "memory"); 602 hptep[0] = hpte[0]; 603 preempt_enable(); 604 605 if (hpte[0] != vcpu->arch.pgfault_hpte[0] || 606 hpte[1] != vcpu->arch.pgfault_hpte[1]) 607 return RESUME_GUEST; 608 609 /* Translate the logical address and get the page */ 610 psize = hpte_page_size(hpte[0], r); 611 gfn = hpte_rpn(r, psize); 612 memslot = gfn_to_memslot(kvm, gfn); 613 614 /* No memslot means it's an emulated MMIO region */ 615 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) { 616 unsigned long gpa = (gfn << PAGE_SHIFT) | (ea & (psize - 1)); 617 return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea, 618 dsisr & DSISR_ISSTORE); 619 } 620 621 if (!kvm->arch.using_mmu_notifiers) 622 return -EFAULT; /* should never get here */ 623 624 /* used to check for invalidations in progress */ 625 mmu_seq = kvm->mmu_notifier_seq; 626 smp_rmb(); 627 628 is_io = 0; 629 pfn = 0; 630 page = NULL; 631 pte_size = PAGE_SIZE; 632 writing = (dsisr & DSISR_ISSTORE) != 0; 633 /* If writing != 0, then the HPTE must allow writing, if we get here */ 634 write_ok = writing; 635 hva = gfn_to_hva_memslot(memslot, gfn); 636 npages = get_user_pages_fast(hva, 1, writing, pages); 637 if (npages < 1) { 638 /* Check if it's an I/O mapping */ 639 down_read(¤t->mm->mmap_sem); 640 vma = find_vma(current->mm, hva); 641 if (vma && vma->vm_start <= hva && hva + psize <= vma->vm_end && 642 (vma->vm_flags & VM_PFNMAP)) { 643 pfn = vma->vm_pgoff + 644 ((hva - vma->vm_start) >> PAGE_SHIFT); 645 pte_size = psize; 646 is_io = hpte_cache_bits(pgprot_val(vma->vm_page_prot)); 647 write_ok = vma->vm_flags & VM_WRITE; 648 } 649 up_read(¤t->mm->mmap_sem); 650 if (!pfn) 651 return -EFAULT; 652 } else { 653 page = pages[0]; 654 if (PageHuge(page)) { 655 page = compound_head(page); 656 pte_size <<= compound_order(page); 657 } 658 /* if the guest wants write access, see if that is OK */ 659 if (!writing && hpte_is_writable(r)) { 660 pte_t *ptep, pte; 661 662 /* 663 * We need to protect against page table destruction 664 * while looking up and updating the pte. 665 */ 666 rcu_read_lock_sched(); 667 ptep = find_linux_pte_or_hugepte(current->mm->pgd, 668 hva, NULL); 669 if (ptep && pte_present(*ptep)) { 670 pte = kvmppc_read_update_linux_pte(ptep, 1); 671 if (pte_write(pte)) 672 write_ok = 1; 673 } 674 rcu_read_unlock_sched(); 675 } 676 pfn = page_to_pfn(page); 677 } 678 679 ret = -EFAULT; 680 if (psize > pte_size) 681 goto out_put; 682 683 /* Check WIMG vs. the actual page we're accessing */ 684 if (!hpte_cache_flags_ok(r, is_io)) { 685 if (is_io) 686 return -EFAULT; 687 /* 688 * Allow guest to map emulated device memory as 689 * uncacheable, but actually make it cacheable. 690 */ 691 r = (r & ~(HPTE_R_W|HPTE_R_I|HPTE_R_G)) | HPTE_R_M; 692 } 693 694 /* Set the HPTE to point to pfn */ 695 r = (r & ~(HPTE_R_PP0 - pte_size)) | (pfn << PAGE_SHIFT); 696 if (hpte_is_writable(r) && !write_ok) 697 r = hpte_make_readonly(r); 698 ret = RESUME_GUEST; 699 preempt_disable(); 700 while (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) 701 cpu_relax(); 702 if ((hptep[0] & ~HPTE_V_HVLOCK) != hpte[0] || hptep[1] != hpte[1] || 703 rev->guest_rpte != hpte[2]) 704 /* HPTE has been changed under us; let the guest retry */ 705 goto out_unlock; 706 hpte[0] = (hpte[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID; 707 708 rmap = &memslot->rmap[gfn - memslot->base_gfn]; 709 lock_rmap(rmap); 710 711 /* Check if we might have been invalidated; let the guest retry if so */ 712 ret = RESUME_GUEST; 713 if (mmu_notifier_retry(vcpu, mmu_seq)) { 714 unlock_rmap(rmap); 715 goto out_unlock; 716 } 717 718 /* Only set R/C in real HPTE if set in both *rmap and guest_rpte */ 719 rcbits = *rmap >> KVMPPC_RMAP_RC_SHIFT; 720 r &= rcbits | ~(HPTE_R_R | HPTE_R_C); 721 722 if (hptep[0] & HPTE_V_VALID) { 723 /* HPTE was previously valid, so we need to invalidate it */ 724 unlock_rmap(rmap); 725 hptep[0] |= HPTE_V_ABSENT; 726 kvmppc_invalidate_hpte(kvm, hptep, index); 727 /* don't lose previous R and C bits */ 728 r |= hptep[1] & (HPTE_R_R | HPTE_R_C); 729 } else { 730 kvmppc_add_revmap_chain(kvm, rev, rmap, index, 0); 731 } 732 733 hptep[1] = r; 734 eieio(); 735 hptep[0] = hpte[0]; 736 asm volatile("ptesync" : : : "memory"); 737 preempt_enable(); 738 if (page && hpte_is_writable(r)) 739 SetPageDirty(page); 740 741 out_put: 742 if (page) { 743 /* 744 * We drop pages[0] here, not page because page might 745 * have been set to the head page of a compound, but 746 * we have to drop the reference on the correct tail 747 * page to match the get inside gup() 748 */ 749 put_page(pages[0]); 750 } 751 return ret; 752 753 out_unlock: 754 hptep[0] &= ~HPTE_V_HVLOCK; 755 preempt_enable(); 756 goto out_put; 757 } 758 759 static int kvm_handle_hva(struct kvm *kvm, unsigned long hva, 760 int (*handler)(struct kvm *kvm, unsigned long *rmapp, 761 unsigned long gfn)) 762 { 763 int ret; 764 int retval = 0; 765 struct kvm_memslots *slots; 766 struct kvm_memory_slot *memslot; 767 768 slots = kvm_memslots(kvm); 769 kvm_for_each_memslot(memslot, slots) { 770 unsigned long start = memslot->userspace_addr; 771 unsigned long end; 772 773 end = start + (memslot->npages << PAGE_SHIFT); 774 if (hva >= start && hva < end) { 775 gfn_t gfn_offset = (hva - start) >> PAGE_SHIFT; 776 777 ret = handler(kvm, &memslot->rmap[gfn_offset], 778 memslot->base_gfn + gfn_offset); 779 retval |= ret; 780 } 781 } 782 783 return retval; 784 } 785 786 static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, 787 unsigned long gfn) 788 { 789 struct revmap_entry *rev = kvm->arch.revmap; 790 unsigned long h, i, j; 791 unsigned long *hptep; 792 unsigned long ptel, psize, rcbits; 793 794 for (;;) { 795 lock_rmap(rmapp); 796 if (!(*rmapp & KVMPPC_RMAP_PRESENT)) { 797 unlock_rmap(rmapp); 798 break; 799 } 800 801 /* 802 * To avoid an ABBA deadlock with the HPTE lock bit, 803 * we can't spin on the HPTE lock while holding the 804 * rmap chain lock. 805 */ 806 i = *rmapp & KVMPPC_RMAP_INDEX; 807 hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4)); 808 if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) { 809 /* unlock rmap before spinning on the HPTE lock */ 810 unlock_rmap(rmapp); 811 while (hptep[0] & HPTE_V_HVLOCK) 812 cpu_relax(); 813 continue; 814 } 815 j = rev[i].forw; 816 if (j == i) { 817 /* chain is now empty */ 818 *rmapp &= ~(KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_INDEX); 819 } else { 820 /* remove i from chain */ 821 h = rev[i].back; 822 rev[h].forw = j; 823 rev[j].back = h; 824 rev[i].forw = rev[i].back = i; 825 *rmapp = (*rmapp & ~KVMPPC_RMAP_INDEX) | j; 826 } 827 828 /* Now check and modify the HPTE */ 829 ptel = rev[i].guest_rpte; 830 psize = hpte_page_size(hptep[0], ptel); 831 if ((hptep[0] & HPTE_V_VALID) && 832 hpte_rpn(ptel, psize) == gfn) { 833 hptep[0] |= HPTE_V_ABSENT; 834 kvmppc_invalidate_hpte(kvm, hptep, i); 835 /* Harvest R and C */ 836 rcbits = hptep[1] & (HPTE_R_R | HPTE_R_C); 837 *rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT; 838 rev[i].guest_rpte = ptel | rcbits; 839 } 840 unlock_rmap(rmapp); 841 hptep[0] &= ~HPTE_V_HVLOCK; 842 } 843 return 0; 844 } 845 846 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) 847 { 848 if (kvm->arch.using_mmu_notifiers) 849 kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); 850 return 0; 851 } 852 853 static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, 854 unsigned long gfn) 855 { 856 struct revmap_entry *rev = kvm->arch.revmap; 857 unsigned long head, i, j; 858 unsigned long *hptep; 859 int ret = 0; 860 861 retry: 862 lock_rmap(rmapp); 863 if (*rmapp & KVMPPC_RMAP_REFERENCED) { 864 *rmapp &= ~KVMPPC_RMAP_REFERENCED; 865 ret = 1; 866 } 867 if (!(*rmapp & KVMPPC_RMAP_PRESENT)) { 868 unlock_rmap(rmapp); 869 return ret; 870 } 871 872 i = head = *rmapp & KVMPPC_RMAP_INDEX; 873 do { 874 hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4)); 875 j = rev[i].forw; 876 877 /* If this HPTE isn't referenced, ignore it */ 878 if (!(hptep[1] & HPTE_R_R)) 879 continue; 880 881 if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) { 882 /* unlock rmap before spinning on the HPTE lock */ 883 unlock_rmap(rmapp); 884 while (hptep[0] & HPTE_V_HVLOCK) 885 cpu_relax(); 886 goto retry; 887 } 888 889 /* Now check and modify the HPTE */ 890 if ((hptep[0] & HPTE_V_VALID) && (hptep[1] & HPTE_R_R)) { 891 kvmppc_clear_ref_hpte(kvm, hptep, i); 892 rev[i].guest_rpte |= HPTE_R_R; 893 ret = 1; 894 } 895 hptep[0] &= ~HPTE_V_HVLOCK; 896 } while ((i = j) != head); 897 898 unlock_rmap(rmapp); 899 return ret; 900 } 901 902 int kvm_age_hva(struct kvm *kvm, unsigned long hva) 903 { 904 if (!kvm->arch.using_mmu_notifiers) 905 return 0; 906 return kvm_handle_hva(kvm, hva, kvm_age_rmapp); 907 } 908 909 static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp, 910 unsigned long gfn) 911 { 912 struct revmap_entry *rev = kvm->arch.revmap; 913 unsigned long head, i, j; 914 unsigned long *hp; 915 int ret = 1; 916 917 if (*rmapp & KVMPPC_RMAP_REFERENCED) 918 return 1; 919 920 lock_rmap(rmapp); 921 if (*rmapp & KVMPPC_RMAP_REFERENCED) 922 goto out; 923 924 if (*rmapp & KVMPPC_RMAP_PRESENT) { 925 i = head = *rmapp & KVMPPC_RMAP_INDEX; 926 do { 927 hp = (unsigned long *)(kvm->arch.hpt_virt + (i << 4)); 928 j = rev[i].forw; 929 if (hp[1] & HPTE_R_R) 930 goto out; 931 } while ((i = j) != head); 932 } 933 ret = 0; 934 935 out: 936 unlock_rmap(rmapp); 937 return ret; 938 } 939 940 int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) 941 { 942 if (!kvm->arch.using_mmu_notifiers) 943 return 0; 944 return kvm_handle_hva(kvm, hva, kvm_test_age_rmapp); 945 } 946 947 void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) 948 { 949 if (!kvm->arch.using_mmu_notifiers) 950 return; 951 kvm_handle_hva(kvm, hva, kvm_unmap_rmapp); 952 } 953 954 static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp) 955 { 956 struct revmap_entry *rev = kvm->arch.revmap; 957 unsigned long head, i, j; 958 unsigned long *hptep; 959 int ret = 0; 960 961 retry: 962 lock_rmap(rmapp); 963 if (*rmapp & KVMPPC_RMAP_CHANGED) { 964 *rmapp &= ~KVMPPC_RMAP_CHANGED; 965 ret = 1; 966 } 967 if (!(*rmapp & KVMPPC_RMAP_PRESENT)) { 968 unlock_rmap(rmapp); 969 return ret; 970 } 971 972 i = head = *rmapp & KVMPPC_RMAP_INDEX; 973 do { 974 hptep = (unsigned long *) (kvm->arch.hpt_virt + (i << 4)); 975 j = rev[i].forw; 976 977 if (!(hptep[1] & HPTE_R_C)) 978 continue; 979 980 if (!try_lock_hpte(hptep, HPTE_V_HVLOCK)) { 981 /* unlock rmap before spinning on the HPTE lock */ 982 unlock_rmap(rmapp); 983 while (hptep[0] & HPTE_V_HVLOCK) 984 cpu_relax(); 985 goto retry; 986 } 987 988 /* Now check and modify the HPTE */ 989 if ((hptep[0] & HPTE_V_VALID) && (hptep[1] & HPTE_R_C)) { 990 /* need to make it temporarily absent to clear C */ 991 hptep[0] |= HPTE_V_ABSENT; 992 kvmppc_invalidate_hpte(kvm, hptep, i); 993 hptep[1] &= ~HPTE_R_C; 994 eieio(); 995 hptep[0] = (hptep[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID; 996 rev[i].guest_rpte |= HPTE_R_C; 997 ret = 1; 998 } 999 hptep[0] &= ~HPTE_V_HVLOCK; 1000 } while ((i = j) != head); 1001 1002 unlock_rmap(rmapp); 1003 return ret; 1004 } 1005 1006 long kvmppc_hv_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) 1007 { 1008 unsigned long i; 1009 unsigned long *rmapp, *map; 1010 1011 preempt_disable(); 1012 rmapp = memslot->rmap; 1013 map = memslot->dirty_bitmap; 1014 for (i = 0; i < memslot->npages; ++i) { 1015 if (kvm_test_clear_dirty(kvm, rmapp)) 1016 __set_bit_le(i, map); 1017 ++rmapp; 1018 } 1019 preempt_enable(); 1020 return 0; 1021 } 1022 1023 void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, 1024 unsigned long *nb_ret) 1025 { 1026 struct kvm_memory_slot *memslot; 1027 unsigned long gfn = gpa >> PAGE_SHIFT; 1028 struct page *page, *pages[1]; 1029 int npages; 1030 unsigned long hva, psize, offset; 1031 unsigned long pa; 1032 unsigned long *physp; 1033 1034 memslot = gfn_to_memslot(kvm, gfn); 1035 if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) 1036 return NULL; 1037 if (!kvm->arch.using_mmu_notifiers) { 1038 physp = kvm->arch.slot_phys[memslot->id]; 1039 if (!physp) 1040 return NULL; 1041 physp += gfn - memslot->base_gfn; 1042 pa = *physp; 1043 if (!pa) { 1044 if (kvmppc_get_guest_page(kvm, gfn, memslot, 1045 PAGE_SIZE) < 0) 1046 return NULL; 1047 pa = *physp; 1048 } 1049 page = pfn_to_page(pa >> PAGE_SHIFT); 1050 get_page(page); 1051 } else { 1052 hva = gfn_to_hva_memslot(memslot, gfn); 1053 npages = get_user_pages_fast(hva, 1, 1, pages); 1054 if (npages < 1) 1055 return NULL; 1056 page = pages[0]; 1057 } 1058 psize = PAGE_SIZE; 1059 if (PageHuge(page)) { 1060 page = compound_head(page); 1061 psize <<= compound_order(page); 1062 } 1063 offset = gpa & (psize - 1); 1064 if (nb_ret) 1065 *nb_ret = psize - offset; 1066 return page_address(page) + offset; 1067 } 1068 1069 void kvmppc_unpin_guest_page(struct kvm *kvm, void *va) 1070 { 1071 struct page *page = virt_to_page(va); 1072 1073 put_page(page); 1074 } 1075 1076 void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu) 1077 { 1078 struct kvmppc_mmu *mmu = &vcpu->arch.mmu; 1079 1080 if (cpu_has_feature(CPU_FTR_ARCH_206)) 1081 vcpu->arch.slb_nr = 32; /* POWER7 */ 1082 else 1083 vcpu->arch.slb_nr = 64; 1084 1085 mmu->xlate = kvmppc_mmu_book3s_64_hv_xlate; 1086 mmu->reset_msr = kvmppc_mmu_book3s_64_hv_reset_msr; 1087 1088 vcpu->arch.hflags |= BOOK3S_HFLAG_SLB; 1089 } 1090