/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * External routines and data structures */ extern void sfmmu_cache_flushcolor(int, pfn_t); extern uint_t mmu_page_sizes; /* * Static routines */ static void sfmmu_set_tlb(void); /* * Global Data: */ caddr_t textva, datava; tte_t ktext_tte, kdata_tte; /* ttes for kernel text and data */ int enable_bigktsb = 1; int shtsb4m_first = 0; tte_t bigktsb_ttes[MAX_BIGKTSB_TTES]; int bigktsb_nttes = 0; /* * Controls the logic which enables the use of the * QUAD_LDD_PHYS ASI for TSB accesses. */ int ktsb_phys = 1; #ifdef SET_MMU_STATS struct mmu_stat mmu_stat_area[NCPU]; #endif /* SET_MMU_STATS */ #ifdef DEBUG /* * The following two variables control if the hypervisor/hardware will * be used to do the TSB table walk for kernel and user contexts. */ int hv_use_0_tsb = 1; int hv_use_non0_tsb = 1; #endif /* DEBUG */ static void sfmmu_set_fault_status_area(void) { caddr_t mmfsa_va; extern caddr_t mmu_fault_status_area; mmfsa_va = mmu_fault_status_area + (MMFSA_SIZE * getprocessorid()); set_mmfsa_scratchpad(mmfsa_va); prom_set_mmfsa_traptable(&trap_table, va_to_pa(mmfsa_va)); } void sfmmu_set_tsbs() { uint64_t rv; struct hv_tsb_block *hvbp = &ksfmmup->sfmmu_hvblock; #ifdef DEBUG if (hv_use_0_tsb == 0) return; #endif /* DEBUG */ rv = hv_set_ctx0(hvbp->hv_tsb_info_cnt, hvbp->hv_tsb_info_pa); if (rv != H_EOK) prom_printf("cpu%d: hv_set_ctx0() returned %lx\n", getprocessorid(), rv); #ifdef SET_MMU_STATS ASSERT(getprocessorid() < NCPU); rv = hv_mmu_set_stat_area(va_to_pa(&mmu_stat_area[getprocessorid()]), sizeof (mmu_stat_area[0])); if (rv != H_EOK) prom_printf("cpu%d: hv_mmu_set_stat_area() returned %lx\n", getprocessorid(), rv); #endif /* SET_MMU_STATS */ } /* * This routine remaps the kernel using large ttes * All entries except locked ones will be removed from the tlb. * It assumes that both the text and data segments reside in a separate * 4mb virtual and physical contigous memory chunk. This routine * is only executed by the first cpu. The remaining cpus execute * sfmmu_mp_startup() instead. * XXX It assumes that the start of the text segment is KERNELBASE. It should * actually be based on start. */ void sfmmu_remap_kernel(void) { pfn_t pfn; uint_t attr; int flags; extern char end[]; extern struct as kas; textva = (caddr_t)(KERNELBASE & MMU_PAGEMASK4M); pfn = va_to_pfn(textva); if (pfn == PFN_INVALID) prom_panic("can't find kernel text pfn"); pfn &= TTE_PFNMASK(TTE4M); attr = PROC_TEXT | HAT_NOSYNC | HAT_ATTR_NOSOFTEXEC; flags = HAT_LOAD_LOCK | SFMMU_NO_TSBLOAD; sfmmu_memtte(&ktext_tte, pfn, attr, TTE4M); /* * We set the lock bit in the tte to lock the translation in * the tlb. */ TTE_SET_LOCKED(&ktext_tte); sfmmu_tteload(kas.a_hat, &ktext_tte, textva, NULL, flags); datava = (caddr_t)((uintptr_t)end & MMU_PAGEMASK4M); pfn = va_to_pfn(datava); if (pfn == PFN_INVALID) prom_panic("can't find kernel data pfn"); pfn &= TTE_PFNMASK(TTE4M); attr = PROC_DATA | HAT_NOSYNC | HAT_ATTR_NOSOFTEXEC; sfmmu_memtte(&kdata_tte, pfn, attr, TTE4M); /* * We set the lock bit in the tte to lock the translation in * the tlb. We also set the mod bit to avoid taking dirty bit * traps on kernel data. */ TTE_SET_LOCKED(&kdata_tte); TTE_SET_LOFLAGS(&kdata_tte, 0, TTE_HWWR_INT); sfmmu_tteload(kas.a_hat, &kdata_tte, datava, (struct page *)NULL, flags); /* * create bigktsb ttes if necessary. */ if (enable_bigktsb) { int i = 0; caddr_t va = ktsb_base; size_t tsbsz = ktsb_sz; tte_t tte; ASSERT(va >= datava + MMU_PAGESIZE4M); ASSERT(tsbsz >= MMU_PAGESIZE4M); ASSERT(IS_P2ALIGNED(tsbsz, tsbsz)); ASSERT(IS_P2ALIGNED(va, tsbsz)); attr = PROC_DATA | HAT_NOSYNC | HAT_ATTR_NOSOFTEXEC; while (tsbsz != 0) { ASSERT(i < MAX_BIGKTSB_TTES); pfn = va_to_pfn(va); ASSERT(pfn != PFN_INVALID); ASSERT((pfn & ~TTE_PFNMASK(TTE4M)) == 0); sfmmu_memtte(&tte, pfn, attr, TTE4M); ASSERT(TTE_IS_MOD(&tte)); /* * No need to lock if we use physical addresses. * Since we invalidate the kernel TSB using virtual * addresses, it's an optimization to load them now * so that we won't have to load them later. */ if (!ktsb_phys) { TTE_SET_LOCKED(&tte); } sfmmu_tteload(kas.a_hat, &tte, va, NULL, flags); bigktsb_ttes[i] = tte; va += MMU_PAGESIZE4M; tsbsz -= MMU_PAGESIZE4M; i++; } bigktsb_nttes = i; } sfmmu_set_tlb(); } /* * Setup the kernel's locked tte's */ void sfmmu_set_tlb(void) { (void) hv_mmu_map_perm_addr(textva, KCONTEXT, *(uint64_t *)&ktext_tte, MAP_ITLB | MAP_DTLB); (void) hv_mmu_map_perm_addr(datava, KCONTEXT, *(uint64_t *)&kdata_tte, MAP_DTLB); if (!ktsb_phys && enable_bigktsb) { int i; caddr_t va = ktsb_base; uint64_t tte; ASSERT(bigktsb_nttes <= MAX_BIGKTSB_TTES); for (i = 0; i < bigktsb_nttes; i++) { tte = *(uint64_t *)&bigktsb_ttes[i]; (void) hv_mmu_map_perm_addr(va, KCONTEXT, tte, MAP_DTLB); va += MMU_PAGESIZE4M; } } } /* * This routine is executed by all other cpus except the first one * at initialization time. It is responsible for taking over the * mmu from the prom. We follow these steps. * Lock the kernel's ttes in the TLB * Initialize the tsb hardware registers * Take over the trap table * Flush the prom's locked entries from the TLB */ void sfmmu_mp_startup(void) { sfmmu_set_tlb(); setwstate(WSTATE_KERN); /* * sfmmu_set_fault_status_area() takes over trap_table */ sfmmu_set_fault_status_area(); sfmmu_set_tsbs(); install_va_to_tte(); } void kdi_tlb_page_lock(caddr_t va, int do_dtlb) { tte_t tte; pfn_t pfn = va_to_pfn(va); uint64_t ret; sfmmu_memtte(&tte, pfn, PROC_TEXT | HAT_NOSYNC | HAT_ATTR_NOSOFTEXEC, TTE8K); ret = hv_mmu_map_perm_addr(va, KCONTEXT, *(uint64_t *)&tte, MAP_ITLB | (do_dtlb ? MAP_DTLB : 0)); if (ret != H_EOK) { cmn_err(CE_PANIC, "cpu%d: cannot set permanent mapping for " "va=0x%p, hv error code 0x%lx", getprocessorid(), (void *)va, ret); } } void kdi_tlb_page_unlock(caddr_t va, int do_dtlb) { (void) hv_mmu_unmap_perm_addr(va, KCONTEXT, MAP_ITLB | (do_dtlb ? MAP_DTLB : 0)); } /* * Clear machine specific TSB information for a user process */ void sfmmu_clear_utsbinfo() { (void) hv_set_ctxnon0(0, NULL); } /* * The tsbord[] array is set up to translate from the order of tsbs in the sfmmu * list to the order of tsbs in the tsb descriptor array passed to the hv, which * is the search order used during Hardware Table Walk. * So, the tsb with index i in the sfmmu list will have search order tsbord[i]. * * The order of tsbs in the sfmmu list will be as follows: * * 0 8K - 512K private TSB * 1 4M - 256M private TSB * 2 8K - 512K shared TSB * 3 4M - 256M shared TSB * * Shared TSBs are only used if a process is part of an SCD. * * So, e.g. tsbord[3] = 1; * corresponds to searching the shared 4M TSB second. * * The search order is selected so that the 8K-512K private TSB is always first. * Currently shared context is not expected to map many 8K-512K pages that cause * TLB misses so we order the shared TSB for 4M-256M pages in front of the * shared TSB for 8K-512K pages. We also expect more TLB misses against private * context mappings than shared context mappings and place private TSBs ahead of * shared TSBs in descriptor order. The shtsb4m_first /etc/system tuneable can * be used to change the default ordering of private and shared TSBs for * 4M-256M pages. */ void sfmmu_setup_tsbinfo(sfmmu_t *sfmmup) { struct tsb_info *tsbinfop; hv_tsb_info_t *tdp; int i; int j; int scd = 0; int tsbord[NHV_TSB_INFO]; #ifdef DEBUG ASSERT(max_mmu_ctxdoms > 0); if (sfmmup != ksfmmup) { /* Process should have INVALID_CONTEXT on all MMUs. */ for (i = 0; i < max_mmu_ctxdoms; i++) { ASSERT(sfmmup->sfmmu_ctxs[i].cnum == INVALID_CONTEXT); } } #endif tsbinfop = sfmmup->sfmmu_tsb; if (tsbinfop == NULL) { sfmmup->sfmmu_hvblock.hv_tsb_info_pa = (uint64_t)-1; sfmmup->sfmmu_hvblock.hv_tsb_info_cnt = 0; return; } ASSERT(sfmmup != ksfmmup || sfmmup->sfmmu_scdp == NULL); ASSERT(sfmmup->sfmmu_scdp == NULL || sfmmup->sfmmu_scdp->scd_sfmmup->sfmmu_tsb != NULL); tsbord[0] = 0; if (sfmmup->sfmmu_scdp == NULL) { tsbord[1] = 1; } else { struct tsb_info *scd8ktsbp = sfmmup->sfmmu_scdp->scd_sfmmup->sfmmu_tsb; ulong_t shared_4mttecnt = 0; ulong_t priv_4mttecnt = 0; int scd4mtsb = (scd8ktsbp->tsb_next != NULL); for (i = TTE4M; i < MMU_PAGE_SIZES; i++) { if (scd4mtsb) { shared_4mttecnt += sfmmup->sfmmu_scdismttecnt[i] + sfmmup->sfmmu_scdrttecnt[i]; } if (tsbinfop->tsb_next != NULL) { priv_4mttecnt += sfmmup->sfmmu_ttecnt[i] + sfmmup->sfmmu_ismttecnt[i]; } } if (tsbinfop->tsb_next == NULL) { if (shared_4mttecnt) { tsbord[1] = 2; tsbord[2] = 1; } else { tsbord[1] = 1; tsbord[2] = 2; } } else if (priv_4mttecnt) { if (shared_4mttecnt) { tsbord[1] = shtsb4m_first ? 2 : 1; tsbord[2] = 3; tsbord[3] = shtsb4m_first ? 1 : 2; } else { tsbord[1] = 1; tsbord[2] = 2; tsbord[3] = 3; } } else if (shared_4mttecnt) { tsbord[1] = 3; tsbord[2] = 2; tsbord[3] = 1; } else { tsbord[1] = 2; tsbord[2] = 1; tsbord[3] = 3; } } ASSERT(tsbinfop != NULL); for (i = 0; tsbinfop != NULL && i < NHV_TSB_INFO; i++) { if (i == 0) { tdp = &sfmmup->sfmmu_hvblock.hv_tsb_info[i]; sfmmup->sfmmu_hvblock.hv_tsb_info_pa = va_to_pa(tdp); } j = tsbord[i]; tdp = &sfmmup->sfmmu_hvblock.hv_tsb_info[j]; ASSERT(tsbinfop->tsb_ttesz_mask != 0); tdp->hvtsb_idxpgsz = lowbit(tsbinfop->tsb_ttesz_mask) - 1; tdp->hvtsb_assoc = 1; tdp->hvtsb_ntte = TSB_ENTRIES(tsbinfop->tsb_szc); tdp->hvtsb_ctx_index = scd; tdp->hvtsb_pgszs = tsbinfop->tsb_ttesz_mask; tdp->hvtsb_rsvd = 0; tdp->hvtsb_pa = tsbinfop->tsb_pa; tsbinfop = tsbinfop->tsb_next; if (tsbinfop == NULL && !scd && sfmmup->sfmmu_scdp != NULL) { tsbinfop = sfmmup->sfmmu_scdp->scd_sfmmup->sfmmu_tsb; scd = 1; } } sfmmup->sfmmu_hvblock.hv_tsb_info_cnt = i; ASSERT(tsbinfop == NULL); } /* * Invalidate a TSB via processor specific TSB invalidation routine */ void sfmmu_inv_tsb(caddr_t tsb_base, uint_t tsb_bytes) { extern void cpu_inv_tsb(caddr_t, uint_t); cpu_inv_tsb(tsb_base, tsb_bytes); } /* * Completely flush the D-cache on all cpus. * Not applicable to sun4v. */ void sfmmu_cache_flushall() { } /* * Initialise the real address field in sfmmu_pgsz_order. */ void sfmmu_init_pgsz_hv(sfmmu_t *sfmmup) { int i; /* * Initialize mmu counts for pagesize register programming. */ for (i = 0; i < max_mmu_page_sizes; i++) { sfmmup->sfmmu_mmuttecnt[i] = 0; } sfmmup->sfmmu_pgsz_order.hv_pgsz_order_pa = va_to_pa(&sfmmup->sfmmu_pgsz_order.hv_pgsz_order); }