xref: /linux/arch/x86/hyperv/mmu.c (revision 307797159ac25fe5a2048bf5c6a5718298edca57)
1 #define pr_fmt(fmt)  "Hyper-V: " fmt
2 
3 #include <linux/hyperv.h>
4 #include <linux/log2.h>
5 #include <linux/slab.h>
6 #include <linux/types.h>
7 
8 #include <asm/fpu/api.h>
9 #include <asm/mshyperv.h>
10 #include <asm/msr.h>
11 #include <asm/tlbflush.h>
12 
13 #define CREATE_TRACE_POINTS
14 #include <asm/trace/hyperv.h>
15 
16 /* Each gva in gva_list encodes up to 4096 pages to flush */
17 #define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE)
18 
19 static u64 hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
20 				      const struct flush_tlb_info *info);
21 
22 /*
23  * Fills in gva_list starting from offset. Returns the number of items added.
24  */
25 static inline int fill_gva_list(u64 gva_list[], int offset,
26 				unsigned long start, unsigned long end)
27 {
28 	int gva_n = offset;
29 	unsigned long cur = start, diff;
30 
31 	do {
32 		diff = end > cur ? end - cur : 0;
33 
34 		gva_list[gva_n] = cur & PAGE_MASK;
35 		/*
36 		 * Lower 12 bits encode the number of additional
37 		 * pages to flush (in addition to the 'cur' page).
38 		 */
39 		if (diff >= HV_TLB_FLUSH_UNIT)
40 			gva_list[gva_n] |= ~PAGE_MASK;
41 		else if (diff)
42 			gva_list[gva_n] |= (diff - 1) >> PAGE_SHIFT;
43 
44 		cur += HV_TLB_FLUSH_UNIT;
45 		gva_n++;
46 
47 	} while (cur < end);
48 
49 	return gva_n - offset;
50 }
51 
52 static void hyperv_flush_tlb_others(const struct cpumask *cpus,
53 				    const struct flush_tlb_info *info)
54 {
55 	int cpu, vcpu, gva_n, max_gvas;
56 	struct hv_tlb_flush **flush_pcpu;
57 	struct hv_tlb_flush *flush;
58 	u64 status = U64_MAX;
59 	unsigned long flags;
60 
61 	trace_hyperv_mmu_flush_tlb_others(cpus, info);
62 
63 	if (!hv_hypercall_pg)
64 		goto do_native;
65 
66 	if (cpumask_empty(cpus))
67 		return;
68 
69 	local_irq_save(flags);
70 
71 	flush_pcpu = (struct hv_tlb_flush **)
72 		     this_cpu_ptr(hyperv_pcpu_input_arg);
73 
74 	flush = *flush_pcpu;
75 
76 	if (unlikely(!flush)) {
77 		local_irq_restore(flags);
78 		goto do_native;
79 	}
80 
81 	if (info->mm) {
82 		/*
83 		 * AddressSpace argument must match the CR3 with PCID bits
84 		 * stripped out.
85 		 */
86 		flush->address_space = virt_to_phys(info->mm->pgd);
87 		flush->address_space &= CR3_ADDR_MASK;
88 		flush->flags = 0;
89 	} else {
90 		flush->address_space = 0;
91 		flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
92 	}
93 
94 	flush->processor_mask = 0;
95 	if (cpumask_equal(cpus, cpu_present_mask)) {
96 		flush->flags |= HV_FLUSH_ALL_PROCESSORS;
97 	} else {
98 		/*
99 		 * From the supplied CPU set we need to figure out if we can get
100 		 * away with cheaper HVCALL_FLUSH_VIRTUAL_ADDRESS_{LIST,SPACE}
101 		 * hypercalls. This is possible when the highest VP number in
102 		 * the set is < 64. As VP numbers are usually in ascending order
103 		 * and match Linux CPU ids, here is an optimization: we check
104 		 * the VP number for the highest bit in the supplied set first
105 		 * so we can quickly find out if using *_EX hypercalls is a
106 		 * must. We will also check all VP numbers when walking the
107 		 * supplied CPU set to remain correct in all cases.
108 		 */
109 		if (hv_cpu_number_to_vp_number(cpumask_last(cpus)) >= 64)
110 			goto do_ex_hypercall;
111 
112 		for_each_cpu(cpu, cpus) {
113 			vcpu = hv_cpu_number_to_vp_number(cpu);
114 			if (vcpu == VP_INVAL) {
115 				local_irq_restore(flags);
116 				goto do_native;
117 			}
118 
119 			if (vcpu >= 64)
120 				goto do_ex_hypercall;
121 
122 			__set_bit(vcpu, (unsigned long *)
123 				  &flush->processor_mask);
124 		}
125 	}
126 
127 	/*
128 	 * We can flush not more than max_gvas with one hypercall. Flush the
129 	 * whole address space if we were asked to do more.
130 	 */
131 	max_gvas = (PAGE_SIZE - sizeof(*flush)) / sizeof(flush->gva_list[0]);
132 
133 	if (info->end == TLB_FLUSH_ALL) {
134 		flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
135 		status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE,
136 					 flush, NULL);
137 	} else if (info->end &&
138 		   ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) {
139 		status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE,
140 					 flush, NULL);
141 	} else {
142 		gva_n = fill_gva_list(flush->gva_list, 0,
143 				      info->start, info->end);
144 		status = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST,
145 					     gva_n, 0, flush, NULL);
146 	}
147 	goto check_status;
148 
149 do_ex_hypercall:
150 	status = hyperv_flush_tlb_others_ex(cpus, info);
151 
152 check_status:
153 	local_irq_restore(flags);
154 
155 	if (!(status & HV_HYPERCALL_RESULT_MASK))
156 		return;
157 do_native:
158 	native_flush_tlb_others(cpus, info);
159 }
160 
161 static u64 hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
162 				      const struct flush_tlb_info *info)
163 {
164 	int nr_bank = 0, max_gvas, gva_n;
165 	struct hv_tlb_flush_ex **flush_pcpu;
166 	struct hv_tlb_flush_ex *flush;
167 	u64 status;
168 
169 	if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
170 		return U64_MAX;
171 
172 	flush_pcpu = (struct hv_tlb_flush_ex **)
173 		     this_cpu_ptr(hyperv_pcpu_input_arg);
174 
175 	flush = *flush_pcpu;
176 
177 	if (info->mm) {
178 		/*
179 		 * AddressSpace argument must match the CR3 with PCID bits
180 		 * stripped out.
181 		 */
182 		flush->address_space = virt_to_phys(info->mm->pgd);
183 		flush->address_space &= CR3_ADDR_MASK;
184 		flush->flags = 0;
185 	} else {
186 		flush->address_space = 0;
187 		flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
188 	}
189 
190 	flush->hv_vp_set.valid_bank_mask = 0;
191 
192 	flush->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
193 	nr_bank = cpumask_to_vpset(&(flush->hv_vp_set), cpus);
194 	if (nr_bank < 0)
195 		return U64_MAX;
196 
197 	/*
198 	 * We can flush not more than max_gvas with one hypercall. Flush the
199 	 * whole address space if we were asked to do more.
200 	 */
201 	max_gvas =
202 		(PAGE_SIZE - sizeof(*flush) - nr_bank *
203 		 sizeof(flush->hv_vp_set.bank_contents[0])) /
204 		sizeof(flush->gva_list[0]);
205 
206 	if (info->end == TLB_FLUSH_ALL) {
207 		flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
208 		status = hv_do_rep_hypercall(
209 			HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
210 			0, nr_bank, flush, NULL);
211 	} else if (info->end &&
212 		   ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) {
213 		status = hv_do_rep_hypercall(
214 			HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
215 			0, nr_bank, flush, NULL);
216 	} else {
217 		gva_n = fill_gva_list(flush->gva_list, nr_bank,
218 				      info->start, info->end);
219 		status = hv_do_rep_hypercall(
220 			HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX,
221 			gva_n, nr_bank, flush, NULL);
222 	}
223 
224 	return status;
225 }
226 
227 void hyperv_setup_mmu_ops(void)
228 {
229 	if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED))
230 		return;
231 
232 	pr_info("Using hypercall for remote TLB flush\n");
233 	pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others;
234 }
235