xref: /linux/arch/s390/kvm/kvm-s390.c (revision e2be04c7f9958dde770eeb8b30e829ca969b37bb)
1 /*
2  * hosting zSeries kernel virtual machines
3  *
4  * Copyright IBM Corp. 2008, 2009
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License (version 2 only)
8  * as published by the Free Software Foundation.
9  *
10  *    Author(s): Carsten Otte <cotte@de.ibm.com>
11  *               Christian Borntraeger <borntraeger@de.ibm.com>
12  *               Heiko Carstens <heiko.carstens@de.ibm.com>
13  *               Christian Ehrhardt <ehrhardt@de.ibm.com>
14  *               Jason J. Herne <jjherne@us.ibm.com>
15  */
16 
17 #include <linux/compiler.h>
18 #include <linux/err.h>
19 #include <linux/fs.h>
20 #include <linux/hrtimer.h>
21 #include <linux/init.h>
22 #include <linux/kvm.h>
23 #include <linux/kvm_host.h>
24 #include <linux/mman.h>
25 #include <linux/module.h>
26 #include <linux/moduleparam.h>
27 #include <linux/random.h>
28 #include <linux/slab.h>
29 #include <linux/timer.h>
30 #include <linux/vmalloc.h>
31 #include <linux/bitmap.h>
32 #include <linux/sched/signal.h>
33 #include <linux/string.h>
34 
35 #include <asm/asm-offsets.h>
36 #include <asm/lowcore.h>
37 #include <asm/stp.h>
38 #include <asm/pgtable.h>
39 #include <asm/gmap.h>
40 #include <asm/nmi.h>
41 #include <asm/switch_to.h>
42 #include <asm/isc.h>
43 #include <asm/sclp.h>
44 #include <asm/cpacf.h>
45 #include <asm/timex.h>
46 #include "kvm-s390.h"
47 #include "gaccess.h"
48 
49 #define KMSG_COMPONENT "kvm-s390"
50 #undef pr_fmt
51 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
52 
53 #define CREATE_TRACE_POINTS
54 #include "trace.h"
55 #include "trace-s390.h"
56 
57 #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
58 #define LOCAL_IRQS 32
59 #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
60 			   (KVM_MAX_VCPUS + LOCAL_IRQS))
61 
62 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
63 
64 struct kvm_stats_debugfs_item debugfs_entries[] = {
65 	{ "userspace_handled", VCPU_STAT(exit_userspace) },
66 	{ "exit_null", VCPU_STAT(exit_null) },
67 	{ "exit_validity", VCPU_STAT(exit_validity) },
68 	{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
69 	{ "exit_external_request", VCPU_STAT(exit_external_request) },
70 	{ "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) },
71 	{ "exit_instruction", VCPU_STAT(exit_instruction) },
72 	{ "exit_pei", VCPU_STAT(exit_pei) },
73 	{ "exit_program_interruption", VCPU_STAT(exit_program_interruption) },
74 	{ "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) },
75 	{ "exit_operation_exception", VCPU_STAT(exit_operation_exception) },
76 	{ "halt_successful_poll", VCPU_STAT(halt_successful_poll) },
77 	{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) },
78 	{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) },
79 	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
80 	{ "instruction_lctlg", VCPU_STAT(instruction_lctlg) },
81 	{ "instruction_lctl", VCPU_STAT(instruction_lctl) },
82 	{ "instruction_stctl", VCPU_STAT(instruction_stctl) },
83 	{ "instruction_stctg", VCPU_STAT(instruction_stctg) },
84 	{ "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) },
85 	{ "deliver_external_call", VCPU_STAT(deliver_external_call) },
86 	{ "deliver_service_signal", VCPU_STAT(deliver_service_signal) },
87 	{ "deliver_virtio_interrupt", VCPU_STAT(deliver_virtio_interrupt) },
88 	{ "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) },
89 	{ "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) },
90 	{ "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) },
91 	{ "deliver_program_interruption", VCPU_STAT(deliver_program_int) },
92 	{ "exit_wait_state", VCPU_STAT(exit_wait_state) },
93 	{ "instruction_pfmf", VCPU_STAT(instruction_pfmf) },
94 	{ "instruction_stidp", VCPU_STAT(instruction_stidp) },
95 	{ "instruction_spx", VCPU_STAT(instruction_spx) },
96 	{ "instruction_stpx", VCPU_STAT(instruction_stpx) },
97 	{ "instruction_stap", VCPU_STAT(instruction_stap) },
98 	{ "instruction_storage_key", VCPU_STAT(instruction_storage_key) },
99 	{ "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) },
100 	{ "instruction_stsch", VCPU_STAT(instruction_stsch) },
101 	{ "instruction_chsc", VCPU_STAT(instruction_chsc) },
102 	{ "instruction_essa", VCPU_STAT(instruction_essa) },
103 	{ "instruction_stsi", VCPU_STAT(instruction_stsi) },
104 	{ "instruction_stfl", VCPU_STAT(instruction_stfl) },
105 	{ "instruction_tprot", VCPU_STAT(instruction_tprot) },
106 	{ "instruction_sthyi", VCPU_STAT(instruction_sthyi) },
107 	{ "instruction_sie", VCPU_STAT(instruction_sie) },
108 	{ "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) },
109 	{ "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) },
110 	{ "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) },
111 	{ "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) },
112 	{ "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) },
113 	{ "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) },
114 	{ "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) },
115 	{ "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) },
116 	{ "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) },
117 	{ "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) },
118 	{ "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) },
119 	{ "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) },
120 	{ "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) },
121 	{ "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) },
122 	{ "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) },
123 	{ "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) },
124 	{ "diagnose_10", VCPU_STAT(diagnose_10) },
125 	{ "diagnose_44", VCPU_STAT(diagnose_44) },
126 	{ "diagnose_9c", VCPU_STAT(diagnose_9c) },
127 	{ "diagnose_258", VCPU_STAT(diagnose_258) },
128 	{ "diagnose_308", VCPU_STAT(diagnose_308) },
129 	{ "diagnose_500", VCPU_STAT(diagnose_500) },
130 	{ NULL }
131 };
132 
133 struct kvm_s390_tod_clock_ext {
134 	__u8 epoch_idx;
135 	__u64 tod;
136 	__u8 reserved[7];
137 } __packed;
138 
139 /* allow nested virtualization in KVM (if enabled by user space) */
140 static int nested;
141 module_param(nested, int, S_IRUGO);
142 MODULE_PARM_DESC(nested, "Nested virtualization support");
143 
144 /* upper facilities limit for kvm */
145 unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
146 
147 unsigned long kvm_s390_fac_list_mask_size(void)
148 {
149 	BUILD_BUG_ON(ARRAY_SIZE(kvm_s390_fac_list_mask) > S390_ARCH_FAC_MASK_SIZE_U64);
150 	return ARRAY_SIZE(kvm_s390_fac_list_mask);
151 }
152 
153 /* available cpu features supported by kvm */
154 static DECLARE_BITMAP(kvm_s390_available_cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
155 /* available subfunctions indicated via query / "test bit" */
156 static struct kvm_s390_vm_cpu_subfunc kvm_s390_available_subfunc;
157 
158 static struct gmap_notifier gmap_notifier;
159 static struct gmap_notifier vsie_gmap_notifier;
160 debug_info_t *kvm_s390_dbf;
161 
162 /* Section: not file related */
163 int kvm_arch_hardware_enable(void)
164 {
165 	/* every s390 is virtualization enabled ;-) */
166 	return 0;
167 }
168 
169 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
170 			      unsigned long end);
171 
172 /*
173  * This callback is executed during stop_machine(). All CPUs are therefore
174  * temporarily stopped. In order not to change guest behavior, we have to
175  * disable preemption whenever we touch the epoch of kvm and the VCPUs,
176  * so a CPU won't be stopped while calculating with the epoch.
177  */
178 static int kvm_clock_sync(struct notifier_block *notifier, unsigned long val,
179 			  void *v)
180 {
181 	struct kvm *kvm;
182 	struct kvm_vcpu *vcpu;
183 	int i;
184 	unsigned long long *delta = v;
185 
186 	list_for_each_entry(kvm, &vm_list, vm_list) {
187 		kvm->arch.epoch -= *delta;
188 		kvm_for_each_vcpu(i, vcpu, kvm) {
189 			vcpu->arch.sie_block->epoch -= *delta;
190 			if (vcpu->arch.cputm_enabled)
191 				vcpu->arch.cputm_start += *delta;
192 			if (vcpu->arch.vsie_block)
193 				vcpu->arch.vsie_block->epoch -= *delta;
194 		}
195 	}
196 	return NOTIFY_OK;
197 }
198 
199 static struct notifier_block kvm_clock_notifier = {
200 	.notifier_call = kvm_clock_sync,
201 };
202 
203 int kvm_arch_hardware_setup(void)
204 {
205 	gmap_notifier.notifier_call = kvm_gmap_notifier;
206 	gmap_register_pte_notifier(&gmap_notifier);
207 	vsie_gmap_notifier.notifier_call = kvm_s390_vsie_gmap_notifier;
208 	gmap_register_pte_notifier(&vsie_gmap_notifier);
209 	atomic_notifier_chain_register(&s390_epoch_delta_notifier,
210 				       &kvm_clock_notifier);
211 	return 0;
212 }
213 
214 void kvm_arch_hardware_unsetup(void)
215 {
216 	gmap_unregister_pte_notifier(&gmap_notifier);
217 	gmap_unregister_pte_notifier(&vsie_gmap_notifier);
218 	atomic_notifier_chain_unregister(&s390_epoch_delta_notifier,
219 					 &kvm_clock_notifier);
220 }
221 
222 static void allow_cpu_feat(unsigned long nr)
223 {
224 	set_bit_inv(nr, kvm_s390_available_cpu_feat);
225 }
226 
227 static inline int plo_test_bit(unsigned char nr)
228 {
229 	register unsigned long r0 asm("0") = (unsigned long) nr | 0x100;
230 	int cc;
231 
232 	asm volatile(
233 		/* Parameter registers are ignored for "test bit" */
234 		"	plo	0,0,0,0(0)\n"
235 		"	ipm	%0\n"
236 		"	srl	%0,28\n"
237 		: "=d" (cc)
238 		: "d" (r0)
239 		: "cc");
240 	return cc == 0;
241 }
242 
243 static void kvm_s390_cpu_feat_init(void)
244 {
245 	int i;
246 
247 	for (i = 0; i < 256; ++i) {
248 		if (plo_test_bit(i))
249 			kvm_s390_available_subfunc.plo[i >> 3] |= 0x80 >> (i & 7);
250 	}
251 
252 	if (test_facility(28)) /* TOD-clock steering */
253 		ptff(kvm_s390_available_subfunc.ptff,
254 		     sizeof(kvm_s390_available_subfunc.ptff),
255 		     PTFF_QAF);
256 
257 	if (test_facility(17)) { /* MSA */
258 		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
259 			      kvm_s390_available_subfunc.kmac);
260 		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
261 			      kvm_s390_available_subfunc.kmc);
262 		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
263 			      kvm_s390_available_subfunc.km);
264 		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
265 			      kvm_s390_available_subfunc.kimd);
266 		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
267 			      kvm_s390_available_subfunc.klmd);
268 	}
269 	if (test_facility(76)) /* MSA3 */
270 		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
271 			      kvm_s390_available_subfunc.pckmo);
272 	if (test_facility(77)) { /* MSA4 */
273 		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
274 			      kvm_s390_available_subfunc.kmctr);
275 		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
276 			      kvm_s390_available_subfunc.kmf);
277 		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
278 			      kvm_s390_available_subfunc.kmo);
279 		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
280 			      kvm_s390_available_subfunc.pcc);
281 	}
282 	if (test_facility(57)) /* MSA5 */
283 		__cpacf_query(CPACF_PRNO, (cpacf_mask_t *)
284 			      kvm_s390_available_subfunc.ppno);
285 
286 	if (test_facility(146)) /* MSA8 */
287 		__cpacf_query(CPACF_KMA, (cpacf_mask_t *)
288 			      kvm_s390_available_subfunc.kma);
289 
290 	if (MACHINE_HAS_ESOP)
291 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
292 	/*
293 	 * We need SIE support, ESOP (PROT_READ protection for gmap_shadow),
294 	 * 64bit SCAO (SCA passthrough) and IDTE (for gmap_shadow unshadowing).
295 	 */
296 	if (!sclp.has_sief2 || !MACHINE_HAS_ESOP || !sclp.has_64bscao ||
297 	    !test_facility(3) || !nested)
298 		return;
299 	allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIEF2);
300 	if (sclp.has_64bscao)
301 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_64BSCAO);
302 	if (sclp.has_siif)
303 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_SIIF);
304 	if (sclp.has_gpere)
305 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GPERE);
306 	if (sclp.has_gsls)
307 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_GSLS);
308 	if (sclp.has_ib)
309 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IB);
310 	if (sclp.has_cei)
311 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
312 	if (sclp.has_ibs)
313 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
314 	if (sclp.has_kss)
315 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
316 	/*
317 	 * KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
318 	 * all skey handling functions read/set the skey from the PGSTE
319 	 * instead of the real storage key.
320 	 *
321 	 * KVM_S390_VM_CPU_FEAT_CMMA: Wrong shadow of PTE.I bits will make
322 	 * pages being detected as preserved although they are resident.
323 	 *
324 	 * KVM_S390_VM_CPU_FEAT_PFMFI: Wrong shadow of PTE.I bits will
325 	 * have the same effect as for KVM_S390_VM_CPU_FEAT_SKEY.
326 	 *
327 	 * For KVM_S390_VM_CPU_FEAT_SKEY, KVM_S390_VM_CPU_FEAT_CMMA and
328 	 * KVM_S390_VM_CPU_FEAT_PFMFI, all PTE.I and PGSTE bits have to be
329 	 * correctly shadowed. We can do that for the PGSTE but not for PTE.I.
330 	 *
331 	 * KVM_S390_VM_CPU_FEAT_SIGPIF: Wrong SCB addresses in the SCA. We
332 	 * cannot easily shadow the SCA because of the ipte lock.
333 	 */
334 }
335 
336 int kvm_arch_init(void *opaque)
337 {
338 	kvm_s390_dbf = debug_register("kvm-trace", 32, 1, 7 * sizeof(long));
339 	if (!kvm_s390_dbf)
340 		return -ENOMEM;
341 
342 	if (debug_register_view(kvm_s390_dbf, &debug_sprintf_view)) {
343 		debug_unregister(kvm_s390_dbf);
344 		return -ENOMEM;
345 	}
346 
347 	kvm_s390_cpu_feat_init();
348 
349 	/* Register floating interrupt controller interface. */
350 	return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
351 }
352 
353 void kvm_arch_exit(void)
354 {
355 	debug_unregister(kvm_s390_dbf);
356 }
357 
358 /* Section: device related */
359 long kvm_arch_dev_ioctl(struct file *filp,
360 			unsigned int ioctl, unsigned long arg)
361 {
362 	if (ioctl == KVM_S390_ENABLE_SIE)
363 		return s390_enable_sie();
364 	return -EINVAL;
365 }
366 
367 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
368 {
369 	int r;
370 
371 	switch (ext) {
372 	case KVM_CAP_S390_PSW:
373 	case KVM_CAP_S390_GMAP:
374 	case KVM_CAP_SYNC_MMU:
375 #ifdef CONFIG_KVM_S390_UCONTROL
376 	case KVM_CAP_S390_UCONTROL:
377 #endif
378 	case KVM_CAP_ASYNC_PF:
379 	case KVM_CAP_SYNC_REGS:
380 	case KVM_CAP_ONE_REG:
381 	case KVM_CAP_ENABLE_CAP:
382 	case KVM_CAP_S390_CSS_SUPPORT:
383 	case KVM_CAP_IOEVENTFD:
384 	case KVM_CAP_DEVICE_CTRL:
385 	case KVM_CAP_ENABLE_CAP_VM:
386 	case KVM_CAP_S390_IRQCHIP:
387 	case KVM_CAP_VM_ATTRIBUTES:
388 	case KVM_CAP_MP_STATE:
389 	case KVM_CAP_IMMEDIATE_EXIT:
390 	case KVM_CAP_S390_INJECT_IRQ:
391 	case KVM_CAP_S390_USER_SIGP:
392 	case KVM_CAP_S390_USER_STSI:
393 	case KVM_CAP_S390_SKEYS:
394 	case KVM_CAP_S390_IRQ_STATE:
395 	case KVM_CAP_S390_USER_INSTR0:
396 	case KVM_CAP_S390_CMMA_MIGRATION:
397 	case KVM_CAP_S390_AIS:
398 		r = 1;
399 		break;
400 	case KVM_CAP_S390_MEM_OP:
401 		r = MEM_OP_MAX_SIZE;
402 		break;
403 	case KVM_CAP_NR_VCPUS:
404 	case KVM_CAP_MAX_VCPUS:
405 		r = KVM_S390_BSCA_CPU_SLOTS;
406 		if (!kvm_s390_use_sca_entries())
407 			r = KVM_MAX_VCPUS;
408 		else if (sclp.has_esca && sclp.has_64bscao)
409 			r = KVM_S390_ESCA_CPU_SLOTS;
410 		break;
411 	case KVM_CAP_NR_MEMSLOTS:
412 		r = KVM_USER_MEM_SLOTS;
413 		break;
414 	case KVM_CAP_S390_COW:
415 		r = MACHINE_HAS_ESOP;
416 		break;
417 	case KVM_CAP_S390_VECTOR_REGISTERS:
418 		r = MACHINE_HAS_VX;
419 		break;
420 	case KVM_CAP_S390_RI:
421 		r = test_facility(64);
422 		break;
423 	case KVM_CAP_S390_GS:
424 		r = test_facility(133);
425 		break;
426 	default:
427 		r = 0;
428 	}
429 	return r;
430 }
431 
432 static void kvm_s390_sync_dirty_log(struct kvm *kvm,
433 					struct kvm_memory_slot *memslot)
434 {
435 	gfn_t cur_gfn, last_gfn;
436 	unsigned long address;
437 	struct gmap *gmap = kvm->arch.gmap;
438 
439 	/* Loop over all guest pages */
440 	last_gfn = memslot->base_gfn + memslot->npages;
441 	for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) {
442 		address = gfn_to_hva_memslot(memslot, cur_gfn);
443 
444 		if (test_and_clear_guest_dirty(gmap->mm, address))
445 			mark_page_dirty(kvm, cur_gfn);
446 		if (fatal_signal_pending(current))
447 			return;
448 		cond_resched();
449 	}
450 }
451 
452 /* Section: vm related */
453 static void sca_del_vcpu(struct kvm_vcpu *vcpu);
454 
455 /*
456  * Get (and clear) the dirty memory log for a memory slot.
457  */
458 int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm,
459 			       struct kvm_dirty_log *log)
460 {
461 	int r;
462 	unsigned long n;
463 	struct kvm_memslots *slots;
464 	struct kvm_memory_slot *memslot;
465 	int is_dirty = 0;
466 
467 	if (kvm_is_ucontrol(kvm))
468 		return -EINVAL;
469 
470 	mutex_lock(&kvm->slots_lock);
471 
472 	r = -EINVAL;
473 	if (log->slot >= KVM_USER_MEM_SLOTS)
474 		goto out;
475 
476 	slots = kvm_memslots(kvm);
477 	memslot = id_to_memslot(slots, log->slot);
478 	r = -ENOENT;
479 	if (!memslot->dirty_bitmap)
480 		goto out;
481 
482 	kvm_s390_sync_dirty_log(kvm, memslot);
483 	r = kvm_get_dirty_log(kvm, log, &is_dirty);
484 	if (r)
485 		goto out;
486 
487 	/* Clear the dirty log */
488 	if (is_dirty) {
489 		n = kvm_dirty_bitmap_bytes(memslot);
490 		memset(memslot->dirty_bitmap, 0, n);
491 	}
492 	r = 0;
493 out:
494 	mutex_unlock(&kvm->slots_lock);
495 	return r;
496 }
497 
498 static void icpt_operexc_on_all_vcpus(struct kvm *kvm)
499 {
500 	unsigned int i;
501 	struct kvm_vcpu *vcpu;
502 
503 	kvm_for_each_vcpu(i, vcpu, kvm) {
504 		kvm_s390_sync_request(KVM_REQ_ICPT_OPEREXC, vcpu);
505 	}
506 }
507 
508 static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
509 {
510 	int r;
511 
512 	if (cap->flags)
513 		return -EINVAL;
514 
515 	switch (cap->cap) {
516 	case KVM_CAP_S390_IRQCHIP:
517 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_IRQCHIP");
518 		kvm->arch.use_irqchip = 1;
519 		r = 0;
520 		break;
521 	case KVM_CAP_S390_USER_SIGP:
522 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_SIGP");
523 		kvm->arch.user_sigp = 1;
524 		r = 0;
525 		break;
526 	case KVM_CAP_S390_VECTOR_REGISTERS:
527 		mutex_lock(&kvm->lock);
528 		if (kvm->created_vcpus) {
529 			r = -EBUSY;
530 		} else if (MACHINE_HAS_VX) {
531 			set_kvm_facility(kvm->arch.model.fac_mask, 129);
532 			set_kvm_facility(kvm->arch.model.fac_list, 129);
533 			if (test_facility(134)) {
534 				set_kvm_facility(kvm->arch.model.fac_mask, 134);
535 				set_kvm_facility(kvm->arch.model.fac_list, 134);
536 			}
537 			if (test_facility(135)) {
538 				set_kvm_facility(kvm->arch.model.fac_mask, 135);
539 				set_kvm_facility(kvm->arch.model.fac_list, 135);
540 			}
541 			r = 0;
542 		} else
543 			r = -EINVAL;
544 		mutex_unlock(&kvm->lock);
545 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_VECTOR_REGISTERS %s",
546 			 r ? "(not available)" : "(success)");
547 		break;
548 	case KVM_CAP_S390_RI:
549 		r = -EINVAL;
550 		mutex_lock(&kvm->lock);
551 		if (kvm->created_vcpus) {
552 			r = -EBUSY;
553 		} else if (test_facility(64)) {
554 			set_kvm_facility(kvm->arch.model.fac_mask, 64);
555 			set_kvm_facility(kvm->arch.model.fac_list, 64);
556 			r = 0;
557 		}
558 		mutex_unlock(&kvm->lock);
559 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
560 			 r ? "(not available)" : "(success)");
561 		break;
562 	case KVM_CAP_S390_AIS:
563 		mutex_lock(&kvm->lock);
564 		if (kvm->created_vcpus) {
565 			r = -EBUSY;
566 		} else {
567 			set_kvm_facility(kvm->arch.model.fac_mask, 72);
568 			set_kvm_facility(kvm->arch.model.fac_list, 72);
569 			r = 0;
570 		}
571 		mutex_unlock(&kvm->lock);
572 		VM_EVENT(kvm, 3, "ENABLE: AIS %s",
573 			 r ? "(not available)" : "(success)");
574 		break;
575 	case KVM_CAP_S390_GS:
576 		r = -EINVAL;
577 		mutex_lock(&kvm->lock);
578 		if (atomic_read(&kvm->online_vcpus)) {
579 			r = -EBUSY;
580 		} else if (test_facility(133)) {
581 			set_kvm_facility(kvm->arch.model.fac_mask, 133);
582 			set_kvm_facility(kvm->arch.model.fac_list, 133);
583 			r = 0;
584 		}
585 		mutex_unlock(&kvm->lock);
586 		VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
587 			 r ? "(not available)" : "(success)");
588 		break;
589 	case KVM_CAP_S390_USER_STSI:
590 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
591 		kvm->arch.user_stsi = 1;
592 		r = 0;
593 		break;
594 	case KVM_CAP_S390_USER_INSTR0:
595 		VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_INSTR0");
596 		kvm->arch.user_instr0 = 1;
597 		icpt_operexc_on_all_vcpus(kvm);
598 		r = 0;
599 		break;
600 	default:
601 		r = -EINVAL;
602 		break;
603 	}
604 	return r;
605 }
606 
607 static int kvm_s390_get_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
608 {
609 	int ret;
610 
611 	switch (attr->attr) {
612 	case KVM_S390_VM_MEM_LIMIT_SIZE:
613 		ret = 0;
614 		VM_EVENT(kvm, 3, "QUERY: max guest memory: %lu bytes",
615 			 kvm->arch.mem_limit);
616 		if (put_user(kvm->arch.mem_limit, (u64 __user *)attr->addr))
617 			ret = -EFAULT;
618 		break;
619 	default:
620 		ret = -ENXIO;
621 		break;
622 	}
623 	return ret;
624 }
625 
626 static int kvm_s390_set_mem_control(struct kvm *kvm, struct kvm_device_attr *attr)
627 {
628 	int ret;
629 	unsigned int idx;
630 	switch (attr->attr) {
631 	case KVM_S390_VM_MEM_ENABLE_CMMA:
632 		ret = -ENXIO;
633 		if (!sclp.has_cmma)
634 			break;
635 
636 		ret = -EBUSY;
637 		VM_EVENT(kvm, 3, "%s", "ENABLE: CMMA support");
638 		mutex_lock(&kvm->lock);
639 		if (!kvm->created_vcpus) {
640 			kvm->arch.use_cmma = 1;
641 			ret = 0;
642 		}
643 		mutex_unlock(&kvm->lock);
644 		break;
645 	case KVM_S390_VM_MEM_CLR_CMMA:
646 		ret = -ENXIO;
647 		if (!sclp.has_cmma)
648 			break;
649 		ret = -EINVAL;
650 		if (!kvm->arch.use_cmma)
651 			break;
652 
653 		VM_EVENT(kvm, 3, "%s", "RESET: CMMA states");
654 		mutex_lock(&kvm->lock);
655 		idx = srcu_read_lock(&kvm->srcu);
656 		s390_reset_cmma(kvm->arch.gmap->mm);
657 		srcu_read_unlock(&kvm->srcu, idx);
658 		mutex_unlock(&kvm->lock);
659 		ret = 0;
660 		break;
661 	case KVM_S390_VM_MEM_LIMIT_SIZE: {
662 		unsigned long new_limit;
663 
664 		if (kvm_is_ucontrol(kvm))
665 			return -EINVAL;
666 
667 		if (get_user(new_limit, (u64 __user *)attr->addr))
668 			return -EFAULT;
669 
670 		if (kvm->arch.mem_limit != KVM_S390_NO_MEM_LIMIT &&
671 		    new_limit > kvm->arch.mem_limit)
672 			return -E2BIG;
673 
674 		if (!new_limit)
675 			return -EINVAL;
676 
677 		/* gmap_create takes last usable address */
678 		if (new_limit != KVM_S390_NO_MEM_LIMIT)
679 			new_limit -= 1;
680 
681 		ret = -EBUSY;
682 		mutex_lock(&kvm->lock);
683 		if (!kvm->created_vcpus) {
684 			/* gmap_create will round the limit up */
685 			struct gmap *new = gmap_create(current->mm, new_limit);
686 
687 			if (!new) {
688 				ret = -ENOMEM;
689 			} else {
690 				gmap_remove(kvm->arch.gmap);
691 				new->private = kvm;
692 				kvm->arch.gmap = new;
693 				ret = 0;
694 			}
695 		}
696 		mutex_unlock(&kvm->lock);
697 		VM_EVENT(kvm, 3, "SET: max guest address: %lu", new_limit);
698 		VM_EVENT(kvm, 3, "New guest asce: 0x%pK",
699 			 (void *) kvm->arch.gmap->asce);
700 		break;
701 	}
702 	default:
703 		ret = -ENXIO;
704 		break;
705 	}
706 	return ret;
707 }
708 
709 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu);
710 
711 static int kvm_s390_vm_set_crypto(struct kvm *kvm, struct kvm_device_attr *attr)
712 {
713 	struct kvm_vcpu *vcpu;
714 	int i;
715 
716 	if (!test_kvm_facility(kvm, 76))
717 		return -EINVAL;
718 
719 	mutex_lock(&kvm->lock);
720 	switch (attr->attr) {
721 	case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
722 		get_random_bytes(
723 			kvm->arch.crypto.crycb->aes_wrapping_key_mask,
724 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
725 		kvm->arch.crypto.aes_kw = 1;
726 		VM_EVENT(kvm, 3, "%s", "ENABLE: AES keywrapping support");
727 		break;
728 	case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
729 		get_random_bytes(
730 			kvm->arch.crypto.crycb->dea_wrapping_key_mask,
731 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
732 		kvm->arch.crypto.dea_kw = 1;
733 		VM_EVENT(kvm, 3, "%s", "ENABLE: DEA keywrapping support");
734 		break;
735 	case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
736 		kvm->arch.crypto.aes_kw = 0;
737 		memset(kvm->arch.crypto.crycb->aes_wrapping_key_mask, 0,
738 			sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
739 		VM_EVENT(kvm, 3, "%s", "DISABLE: AES keywrapping support");
740 		break;
741 	case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
742 		kvm->arch.crypto.dea_kw = 0;
743 		memset(kvm->arch.crypto.crycb->dea_wrapping_key_mask, 0,
744 			sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
745 		VM_EVENT(kvm, 3, "%s", "DISABLE: DEA keywrapping support");
746 		break;
747 	default:
748 		mutex_unlock(&kvm->lock);
749 		return -ENXIO;
750 	}
751 
752 	kvm_for_each_vcpu(i, vcpu, kvm) {
753 		kvm_s390_vcpu_crypto_setup(vcpu);
754 		exit_sie(vcpu);
755 	}
756 	mutex_unlock(&kvm->lock);
757 	return 0;
758 }
759 
760 static void kvm_s390_sync_request_broadcast(struct kvm *kvm, int req)
761 {
762 	int cx;
763 	struct kvm_vcpu *vcpu;
764 
765 	kvm_for_each_vcpu(cx, vcpu, kvm)
766 		kvm_s390_sync_request(req, vcpu);
767 }
768 
769 /*
770  * Must be called with kvm->srcu held to avoid races on memslots, and with
771  * kvm->lock to avoid races with ourselves and kvm_s390_vm_stop_migration.
772  */
773 static int kvm_s390_vm_start_migration(struct kvm *kvm)
774 {
775 	struct kvm_s390_migration_state *mgs;
776 	struct kvm_memory_slot *ms;
777 	/* should be the only one */
778 	struct kvm_memslots *slots;
779 	unsigned long ram_pages;
780 	int slotnr;
781 
782 	/* migration mode already enabled */
783 	if (kvm->arch.migration_state)
784 		return 0;
785 
786 	slots = kvm_memslots(kvm);
787 	if (!slots || !slots->used_slots)
788 		return -EINVAL;
789 
790 	mgs = kzalloc(sizeof(*mgs), GFP_KERNEL);
791 	if (!mgs)
792 		return -ENOMEM;
793 	kvm->arch.migration_state = mgs;
794 
795 	if (kvm->arch.use_cmma) {
796 		/*
797 		 * Get the last slot. They should be sorted by base_gfn, so the
798 		 * last slot is also the one at the end of the address space.
799 		 * We have verified above that at least one slot is present.
800 		 */
801 		ms = slots->memslots + slots->used_slots - 1;
802 		/* round up so we only use full longs */
803 		ram_pages = roundup(ms->base_gfn + ms->npages, BITS_PER_LONG);
804 		/* allocate enough bytes to store all the bits */
805 		mgs->pgste_bitmap = vmalloc(ram_pages / 8);
806 		if (!mgs->pgste_bitmap) {
807 			kfree(mgs);
808 			kvm->arch.migration_state = NULL;
809 			return -ENOMEM;
810 		}
811 
812 		mgs->bitmap_size = ram_pages;
813 		atomic64_set(&mgs->dirty_pages, ram_pages);
814 		/* mark all the pages in active slots as dirty */
815 		for (slotnr = 0; slotnr < slots->used_slots; slotnr++) {
816 			ms = slots->memslots + slotnr;
817 			bitmap_set(mgs->pgste_bitmap, ms->base_gfn, ms->npages);
818 		}
819 
820 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_START_MIGRATION);
821 	}
822 	return 0;
823 }
824 
825 /*
826  * Must be called with kvm->lock to avoid races with ourselves and
827  * kvm_s390_vm_start_migration.
828  */
829 static int kvm_s390_vm_stop_migration(struct kvm *kvm)
830 {
831 	struct kvm_s390_migration_state *mgs;
832 
833 	/* migration mode already disabled */
834 	if (!kvm->arch.migration_state)
835 		return 0;
836 	mgs = kvm->arch.migration_state;
837 	kvm->arch.migration_state = NULL;
838 
839 	if (kvm->arch.use_cmma) {
840 		kvm_s390_sync_request_broadcast(kvm, KVM_REQ_STOP_MIGRATION);
841 		vfree(mgs->pgste_bitmap);
842 	}
843 	kfree(mgs);
844 	return 0;
845 }
846 
847 static int kvm_s390_vm_set_migration(struct kvm *kvm,
848 				     struct kvm_device_attr *attr)
849 {
850 	int idx, res = -ENXIO;
851 
852 	mutex_lock(&kvm->lock);
853 	switch (attr->attr) {
854 	case KVM_S390_VM_MIGRATION_START:
855 		idx = srcu_read_lock(&kvm->srcu);
856 		res = kvm_s390_vm_start_migration(kvm);
857 		srcu_read_unlock(&kvm->srcu, idx);
858 		break;
859 	case KVM_S390_VM_MIGRATION_STOP:
860 		res = kvm_s390_vm_stop_migration(kvm);
861 		break;
862 	default:
863 		break;
864 	}
865 	mutex_unlock(&kvm->lock);
866 
867 	return res;
868 }
869 
870 static int kvm_s390_vm_get_migration(struct kvm *kvm,
871 				     struct kvm_device_attr *attr)
872 {
873 	u64 mig = (kvm->arch.migration_state != NULL);
874 
875 	if (attr->attr != KVM_S390_VM_MIGRATION_STATUS)
876 		return -ENXIO;
877 
878 	if (copy_to_user((void __user *)attr->addr, &mig, sizeof(mig)))
879 		return -EFAULT;
880 	return 0;
881 }
882 
883 static int kvm_s390_set_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
884 {
885 	struct kvm_s390_vm_tod_clock gtod;
886 
887 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
888 		return -EFAULT;
889 
890 	if (test_kvm_facility(kvm, 139))
891 		kvm_s390_set_tod_clock_ext(kvm, &gtod);
892 	else if (gtod.epoch_idx == 0)
893 		kvm_s390_set_tod_clock(kvm, gtod.tod);
894 	else
895 		return -EINVAL;
896 
897 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x, TOD base: 0x%llx",
898 		gtod.epoch_idx, gtod.tod);
899 
900 	return 0;
901 }
902 
903 static int kvm_s390_set_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
904 {
905 	u8 gtod_high;
906 
907 	if (copy_from_user(&gtod_high, (void __user *)attr->addr,
908 					   sizeof(gtod_high)))
909 		return -EFAULT;
910 
911 	if (gtod_high != 0)
912 		return -EINVAL;
913 	VM_EVENT(kvm, 3, "SET: TOD extension: 0x%x", gtod_high);
914 
915 	return 0;
916 }
917 
918 static int kvm_s390_set_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
919 {
920 	u64 gtod;
921 
922 	if (copy_from_user(&gtod, (void __user *)attr->addr, sizeof(gtod)))
923 		return -EFAULT;
924 
925 	kvm_s390_set_tod_clock(kvm, gtod);
926 	VM_EVENT(kvm, 3, "SET: TOD base: 0x%llx", gtod);
927 	return 0;
928 }
929 
930 static int kvm_s390_set_tod(struct kvm *kvm, struct kvm_device_attr *attr)
931 {
932 	int ret;
933 
934 	if (attr->flags)
935 		return -EINVAL;
936 
937 	switch (attr->attr) {
938 	case KVM_S390_VM_TOD_EXT:
939 		ret = kvm_s390_set_tod_ext(kvm, attr);
940 		break;
941 	case KVM_S390_VM_TOD_HIGH:
942 		ret = kvm_s390_set_tod_high(kvm, attr);
943 		break;
944 	case KVM_S390_VM_TOD_LOW:
945 		ret = kvm_s390_set_tod_low(kvm, attr);
946 		break;
947 	default:
948 		ret = -ENXIO;
949 		break;
950 	}
951 	return ret;
952 }
953 
954 static void kvm_s390_get_tod_clock_ext(struct kvm *kvm,
955 					struct kvm_s390_vm_tod_clock *gtod)
956 {
957 	struct kvm_s390_tod_clock_ext htod;
958 
959 	preempt_disable();
960 
961 	get_tod_clock_ext((char *)&htod);
962 
963 	gtod->tod = htod.tod + kvm->arch.epoch;
964 	gtod->epoch_idx = htod.epoch_idx + kvm->arch.epdx;
965 
966 	if (gtod->tod < htod.tod)
967 		gtod->epoch_idx += 1;
968 
969 	preempt_enable();
970 }
971 
972 static int kvm_s390_get_tod_ext(struct kvm *kvm, struct kvm_device_attr *attr)
973 {
974 	struct kvm_s390_vm_tod_clock gtod;
975 
976 	memset(&gtod, 0, sizeof(gtod));
977 
978 	if (test_kvm_facility(kvm, 139))
979 		kvm_s390_get_tod_clock_ext(kvm, &gtod);
980 	else
981 		gtod.tod = kvm_s390_get_tod_clock_fast(kvm);
982 
983 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
984 		return -EFAULT;
985 
986 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x, TOD base: 0x%llx",
987 		gtod.epoch_idx, gtod.tod);
988 	return 0;
989 }
990 
991 static int kvm_s390_get_tod_high(struct kvm *kvm, struct kvm_device_attr *attr)
992 {
993 	u8 gtod_high = 0;
994 
995 	if (copy_to_user((void __user *)attr->addr, &gtod_high,
996 					 sizeof(gtod_high)))
997 		return -EFAULT;
998 	VM_EVENT(kvm, 3, "QUERY: TOD extension: 0x%x", gtod_high);
999 
1000 	return 0;
1001 }
1002 
1003 static int kvm_s390_get_tod_low(struct kvm *kvm, struct kvm_device_attr *attr)
1004 {
1005 	u64 gtod;
1006 
1007 	gtod = kvm_s390_get_tod_clock_fast(kvm);
1008 	if (copy_to_user((void __user *)attr->addr, &gtod, sizeof(gtod)))
1009 		return -EFAULT;
1010 	VM_EVENT(kvm, 3, "QUERY: TOD base: 0x%llx", gtod);
1011 
1012 	return 0;
1013 }
1014 
1015 static int kvm_s390_get_tod(struct kvm *kvm, struct kvm_device_attr *attr)
1016 {
1017 	int ret;
1018 
1019 	if (attr->flags)
1020 		return -EINVAL;
1021 
1022 	switch (attr->attr) {
1023 	case KVM_S390_VM_TOD_EXT:
1024 		ret = kvm_s390_get_tod_ext(kvm, attr);
1025 		break;
1026 	case KVM_S390_VM_TOD_HIGH:
1027 		ret = kvm_s390_get_tod_high(kvm, attr);
1028 		break;
1029 	case KVM_S390_VM_TOD_LOW:
1030 		ret = kvm_s390_get_tod_low(kvm, attr);
1031 		break;
1032 	default:
1033 		ret = -ENXIO;
1034 		break;
1035 	}
1036 	return ret;
1037 }
1038 
1039 static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1040 {
1041 	struct kvm_s390_vm_cpu_processor *proc;
1042 	u16 lowest_ibc, unblocked_ibc;
1043 	int ret = 0;
1044 
1045 	mutex_lock(&kvm->lock);
1046 	if (kvm->created_vcpus) {
1047 		ret = -EBUSY;
1048 		goto out;
1049 	}
1050 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1051 	if (!proc) {
1052 		ret = -ENOMEM;
1053 		goto out;
1054 	}
1055 	if (!copy_from_user(proc, (void __user *)attr->addr,
1056 			    sizeof(*proc))) {
1057 		kvm->arch.model.cpuid = proc->cpuid;
1058 		lowest_ibc = sclp.ibc >> 16 & 0xfff;
1059 		unblocked_ibc = sclp.ibc & 0xfff;
1060 		if (lowest_ibc && proc->ibc) {
1061 			if (proc->ibc > unblocked_ibc)
1062 				kvm->arch.model.ibc = unblocked_ibc;
1063 			else if (proc->ibc < lowest_ibc)
1064 				kvm->arch.model.ibc = lowest_ibc;
1065 			else
1066 				kvm->arch.model.ibc = proc->ibc;
1067 		}
1068 		memcpy(kvm->arch.model.fac_list, proc->fac_list,
1069 		       S390_ARCH_FAC_LIST_SIZE_BYTE);
1070 		VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1071 			 kvm->arch.model.ibc,
1072 			 kvm->arch.model.cpuid);
1073 		VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1074 			 kvm->arch.model.fac_list[0],
1075 			 kvm->arch.model.fac_list[1],
1076 			 kvm->arch.model.fac_list[2]);
1077 	} else
1078 		ret = -EFAULT;
1079 	kfree(proc);
1080 out:
1081 	mutex_unlock(&kvm->lock);
1082 	return ret;
1083 }
1084 
1085 static int kvm_s390_set_processor_feat(struct kvm *kvm,
1086 				       struct kvm_device_attr *attr)
1087 {
1088 	struct kvm_s390_vm_cpu_feat data;
1089 	int ret = -EBUSY;
1090 
1091 	if (copy_from_user(&data, (void __user *)attr->addr, sizeof(data)))
1092 		return -EFAULT;
1093 	if (!bitmap_subset((unsigned long *) data.feat,
1094 			   kvm_s390_available_cpu_feat,
1095 			   KVM_S390_VM_CPU_FEAT_NR_BITS))
1096 		return -EINVAL;
1097 
1098 	mutex_lock(&kvm->lock);
1099 	if (!atomic_read(&kvm->online_vcpus)) {
1100 		bitmap_copy(kvm->arch.cpu_feat, (unsigned long *) data.feat,
1101 			    KVM_S390_VM_CPU_FEAT_NR_BITS);
1102 		ret = 0;
1103 	}
1104 	mutex_unlock(&kvm->lock);
1105 	return ret;
1106 }
1107 
1108 static int kvm_s390_set_processor_subfunc(struct kvm *kvm,
1109 					  struct kvm_device_attr *attr)
1110 {
1111 	/*
1112 	 * Once supported by kernel + hw, we have to store the subfunctions
1113 	 * in kvm->arch and remember that user space configured them.
1114 	 */
1115 	return -ENXIO;
1116 }
1117 
1118 static int kvm_s390_set_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1119 {
1120 	int ret = -ENXIO;
1121 
1122 	switch (attr->attr) {
1123 	case KVM_S390_VM_CPU_PROCESSOR:
1124 		ret = kvm_s390_set_processor(kvm, attr);
1125 		break;
1126 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1127 		ret = kvm_s390_set_processor_feat(kvm, attr);
1128 		break;
1129 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1130 		ret = kvm_s390_set_processor_subfunc(kvm, attr);
1131 		break;
1132 	}
1133 	return ret;
1134 }
1135 
1136 static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr)
1137 {
1138 	struct kvm_s390_vm_cpu_processor *proc;
1139 	int ret = 0;
1140 
1141 	proc = kzalloc(sizeof(*proc), GFP_KERNEL);
1142 	if (!proc) {
1143 		ret = -ENOMEM;
1144 		goto out;
1145 	}
1146 	proc->cpuid = kvm->arch.model.cpuid;
1147 	proc->ibc = kvm->arch.model.ibc;
1148 	memcpy(&proc->fac_list, kvm->arch.model.fac_list,
1149 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1150 	VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx",
1151 		 kvm->arch.model.ibc,
1152 		 kvm->arch.model.cpuid);
1153 	VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx",
1154 		 kvm->arch.model.fac_list[0],
1155 		 kvm->arch.model.fac_list[1],
1156 		 kvm->arch.model.fac_list[2]);
1157 	if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc)))
1158 		ret = -EFAULT;
1159 	kfree(proc);
1160 out:
1161 	return ret;
1162 }
1163 
1164 static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr)
1165 {
1166 	struct kvm_s390_vm_cpu_machine *mach;
1167 	int ret = 0;
1168 
1169 	mach = kzalloc(sizeof(*mach), GFP_KERNEL);
1170 	if (!mach) {
1171 		ret = -ENOMEM;
1172 		goto out;
1173 	}
1174 	get_cpu_id((struct cpuid *) &mach->cpuid);
1175 	mach->ibc = sclp.ibc;
1176 	memcpy(&mach->fac_mask, kvm->arch.model.fac_mask,
1177 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1178 	memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list,
1179 	       sizeof(S390_lowcore.stfle_fac_list));
1180 	VM_EVENT(kvm, 3, "GET: host ibc:  0x%4.4x, host cpuid:  0x%16.16llx",
1181 		 kvm->arch.model.ibc,
1182 		 kvm->arch.model.cpuid);
1183 	VM_EVENT(kvm, 3, "GET: host facmask:  0x%16.16llx.%16.16llx.%16.16llx",
1184 		 mach->fac_mask[0],
1185 		 mach->fac_mask[1],
1186 		 mach->fac_mask[2]);
1187 	VM_EVENT(kvm, 3, "GET: host faclist:  0x%16.16llx.%16.16llx.%16.16llx",
1188 		 mach->fac_list[0],
1189 		 mach->fac_list[1],
1190 		 mach->fac_list[2]);
1191 	if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach)))
1192 		ret = -EFAULT;
1193 	kfree(mach);
1194 out:
1195 	return ret;
1196 }
1197 
1198 static int kvm_s390_get_processor_feat(struct kvm *kvm,
1199 				       struct kvm_device_attr *attr)
1200 {
1201 	struct kvm_s390_vm_cpu_feat data;
1202 
1203 	bitmap_copy((unsigned long *) data.feat, kvm->arch.cpu_feat,
1204 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1205 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1206 		return -EFAULT;
1207 	return 0;
1208 }
1209 
1210 static int kvm_s390_get_machine_feat(struct kvm *kvm,
1211 				     struct kvm_device_attr *attr)
1212 {
1213 	struct kvm_s390_vm_cpu_feat data;
1214 
1215 	bitmap_copy((unsigned long *) data.feat,
1216 		    kvm_s390_available_cpu_feat,
1217 		    KVM_S390_VM_CPU_FEAT_NR_BITS);
1218 	if (copy_to_user((void __user *)attr->addr, &data, sizeof(data)))
1219 		return -EFAULT;
1220 	return 0;
1221 }
1222 
1223 static int kvm_s390_get_processor_subfunc(struct kvm *kvm,
1224 					  struct kvm_device_attr *attr)
1225 {
1226 	/*
1227 	 * Once we can actually configure subfunctions (kernel + hw support),
1228 	 * we have to check if they were already set by user space, if so copy
1229 	 * them from kvm->arch.
1230 	 */
1231 	return -ENXIO;
1232 }
1233 
1234 static int kvm_s390_get_machine_subfunc(struct kvm *kvm,
1235 					struct kvm_device_attr *attr)
1236 {
1237 	if (copy_to_user((void __user *)attr->addr, &kvm_s390_available_subfunc,
1238 	    sizeof(struct kvm_s390_vm_cpu_subfunc)))
1239 		return -EFAULT;
1240 	return 0;
1241 }
1242 static int kvm_s390_get_cpu_model(struct kvm *kvm, struct kvm_device_attr *attr)
1243 {
1244 	int ret = -ENXIO;
1245 
1246 	switch (attr->attr) {
1247 	case KVM_S390_VM_CPU_PROCESSOR:
1248 		ret = kvm_s390_get_processor(kvm, attr);
1249 		break;
1250 	case KVM_S390_VM_CPU_MACHINE:
1251 		ret = kvm_s390_get_machine(kvm, attr);
1252 		break;
1253 	case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1254 		ret = kvm_s390_get_processor_feat(kvm, attr);
1255 		break;
1256 	case KVM_S390_VM_CPU_MACHINE_FEAT:
1257 		ret = kvm_s390_get_machine_feat(kvm, attr);
1258 		break;
1259 	case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1260 		ret = kvm_s390_get_processor_subfunc(kvm, attr);
1261 		break;
1262 	case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1263 		ret = kvm_s390_get_machine_subfunc(kvm, attr);
1264 		break;
1265 	}
1266 	return ret;
1267 }
1268 
1269 static int kvm_s390_vm_set_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1270 {
1271 	int ret;
1272 
1273 	switch (attr->group) {
1274 	case KVM_S390_VM_MEM_CTRL:
1275 		ret = kvm_s390_set_mem_control(kvm, attr);
1276 		break;
1277 	case KVM_S390_VM_TOD:
1278 		ret = kvm_s390_set_tod(kvm, attr);
1279 		break;
1280 	case KVM_S390_VM_CPU_MODEL:
1281 		ret = kvm_s390_set_cpu_model(kvm, attr);
1282 		break;
1283 	case KVM_S390_VM_CRYPTO:
1284 		ret = kvm_s390_vm_set_crypto(kvm, attr);
1285 		break;
1286 	case KVM_S390_VM_MIGRATION:
1287 		ret = kvm_s390_vm_set_migration(kvm, attr);
1288 		break;
1289 	default:
1290 		ret = -ENXIO;
1291 		break;
1292 	}
1293 
1294 	return ret;
1295 }
1296 
1297 static int kvm_s390_vm_get_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1298 {
1299 	int ret;
1300 
1301 	switch (attr->group) {
1302 	case KVM_S390_VM_MEM_CTRL:
1303 		ret = kvm_s390_get_mem_control(kvm, attr);
1304 		break;
1305 	case KVM_S390_VM_TOD:
1306 		ret = kvm_s390_get_tod(kvm, attr);
1307 		break;
1308 	case KVM_S390_VM_CPU_MODEL:
1309 		ret = kvm_s390_get_cpu_model(kvm, attr);
1310 		break;
1311 	case KVM_S390_VM_MIGRATION:
1312 		ret = kvm_s390_vm_get_migration(kvm, attr);
1313 		break;
1314 	default:
1315 		ret = -ENXIO;
1316 		break;
1317 	}
1318 
1319 	return ret;
1320 }
1321 
1322 static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
1323 {
1324 	int ret;
1325 
1326 	switch (attr->group) {
1327 	case KVM_S390_VM_MEM_CTRL:
1328 		switch (attr->attr) {
1329 		case KVM_S390_VM_MEM_ENABLE_CMMA:
1330 		case KVM_S390_VM_MEM_CLR_CMMA:
1331 			ret = sclp.has_cmma ? 0 : -ENXIO;
1332 			break;
1333 		case KVM_S390_VM_MEM_LIMIT_SIZE:
1334 			ret = 0;
1335 			break;
1336 		default:
1337 			ret = -ENXIO;
1338 			break;
1339 		}
1340 		break;
1341 	case KVM_S390_VM_TOD:
1342 		switch (attr->attr) {
1343 		case KVM_S390_VM_TOD_LOW:
1344 		case KVM_S390_VM_TOD_HIGH:
1345 			ret = 0;
1346 			break;
1347 		default:
1348 			ret = -ENXIO;
1349 			break;
1350 		}
1351 		break;
1352 	case KVM_S390_VM_CPU_MODEL:
1353 		switch (attr->attr) {
1354 		case KVM_S390_VM_CPU_PROCESSOR:
1355 		case KVM_S390_VM_CPU_MACHINE:
1356 		case KVM_S390_VM_CPU_PROCESSOR_FEAT:
1357 		case KVM_S390_VM_CPU_MACHINE_FEAT:
1358 		case KVM_S390_VM_CPU_MACHINE_SUBFUNC:
1359 			ret = 0;
1360 			break;
1361 		/* configuring subfunctions is not supported yet */
1362 		case KVM_S390_VM_CPU_PROCESSOR_SUBFUNC:
1363 		default:
1364 			ret = -ENXIO;
1365 			break;
1366 		}
1367 		break;
1368 	case KVM_S390_VM_CRYPTO:
1369 		switch (attr->attr) {
1370 		case KVM_S390_VM_CRYPTO_ENABLE_AES_KW:
1371 		case KVM_S390_VM_CRYPTO_ENABLE_DEA_KW:
1372 		case KVM_S390_VM_CRYPTO_DISABLE_AES_KW:
1373 		case KVM_S390_VM_CRYPTO_DISABLE_DEA_KW:
1374 			ret = 0;
1375 			break;
1376 		default:
1377 			ret = -ENXIO;
1378 			break;
1379 		}
1380 		break;
1381 	case KVM_S390_VM_MIGRATION:
1382 		ret = 0;
1383 		break;
1384 	default:
1385 		ret = -ENXIO;
1386 		break;
1387 	}
1388 
1389 	return ret;
1390 }
1391 
1392 static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1393 {
1394 	uint8_t *keys;
1395 	uint64_t hva;
1396 	int srcu_idx, i, r = 0;
1397 
1398 	if (args->flags != 0)
1399 		return -EINVAL;
1400 
1401 	/* Is this guest using storage keys? */
1402 	if (!mm_use_skey(current->mm))
1403 		return KVM_S390_GET_SKEYS_NONE;
1404 
1405 	/* Enforce sane limit on memory allocation */
1406 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1407 		return -EINVAL;
1408 
1409 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1410 	if (!keys)
1411 		return -ENOMEM;
1412 
1413 	down_read(&current->mm->mmap_sem);
1414 	srcu_idx = srcu_read_lock(&kvm->srcu);
1415 	for (i = 0; i < args->count; i++) {
1416 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1417 		if (kvm_is_error_hva(hva)) {
1418 			r = -EFAULT;
1419 			break;
1420 		}
1421 
1422 		r = get_guest_storage_key(current->mm, hva, &keys[i]);
1423 		if (r)
1424 			break;
1425 	}
1426 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1427 	up_read(&current->mm->mmap_sem);
1428 
1429 	if (!r) {
1430 		r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
1431 				 sizeof(uint8_t) * args->count);
1432 		if (r)
1433 			r = -EFAULT;
1434 	}
1435 
1436 	kvfree(keys);
1437 	return r;
1438 }
1439 
1440 static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
1441 {
1442 	uint8_t *keys;
1443 	uint64_t hva;
1444 	int srcu_idx, i, r = 0;
1445 
1446 	if (args->flags != 0)
1447 		return -EINVAL;
1448 
1449 	/* Enforce sane limit on memory allocation */
1450 	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
1451 		return -EINVAL;
1452 
1453 	keys = kvmalloc_array(args->count, sizeof(uint8_t), GFP_KERNEL);
1454 	if (!keys)
1455 		return -ENOMEM;
1456 
1457 	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
1458 			   sizeof(uint8_t) * args->count);
1459 	if (r) {
1460 		r = -EFAULT;
1461 		goto out;
1462 	}
1463 
1464 	/* Enable storage key handling for the guest */
1465 	r = s390_enable_skey();
1466 	if (r)
1467 		goto out;
1468 
1469 	down_read(&current->mm->mmap_sem);
1470 	srcu_idx = srcu_read_lock(&kvm->srcu);
1471 	for (i = 0; i < args->count; i++) {
1472 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1473 		if (kvm_is_error_hva(hva)) {
1474 			r = -EFAULT;
1475 			break;
1476 		}
1477 
1478 		/* Lowest order bit is reserved */
1479 		if (keys[i] & 0x01) {
1480 			r = -EINVAL;
1481 			break;
1482 		}
1483 
1484 		r = set_guest_storage_key(current->mm, hva, keys[i], 0);
1485 		if (r)
1486 			break;
1487 	}
1488 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1489 	up_read(&current->mm->mmap_sem);
1490 out:
1491 	kvfree(keys);
1492 	return r;
1493 }
1494 
1495 /*
1496  * Base address and length must be sent at the start of each block, therefore
1497  * it's cheaper to send some clean data, as long as it's less than the size of
1498  * two longs.
1499  */
1500 #define KVM_S390_MAX_BIT_DISTANCE (2 * sizeof(void *))
1501 /* for consistency */
1502 #define KVM_S390_CMMA_SIZE_MAX ((u32)KVM_S390_SKEYS_MAX)
1503 
1504 /*
1505  * This function searches for the next page with dirty CMMA attributes, and
1506  * saves the attributes in the buffer up to either the end of the buffer or
1507  * until a block of at least KVM_S390_MAX_BIT_DISTANCE clean bits is found;
1508  * no trailing clean bytes are saved.
1509  * In case no dirty bits were found, or if CMMA was not enabled or used, the
1510  * output buffer will indicate 0 as length.
1511  */
1512 static int kvm_s390_get_cmma_bits(struct kvm *kvm,
1513 				  struct kvm_s390_cmma_log *args)
1514 {
1515 	struct kvm_s390_migration_state *s = kvm->arch.migration_state;
1516 	unsigned long bufsize, hva, pgstev, i, next, cur;
1517 	int srcu_idx, peek, r = 0, rr;
1518 	u8 *res;
1519 
1520 	cur = args->start_gfn;
1521 	i = next = pgstev = 0;
1522 
1523 	if (unlikely(!kvm->arch.use_cmma))
1524 		return -ENXIO;
1525 	/* Invalid/unsupported flags were specified */
1526 	if (args->flags & ~KVM_S390_CMMA_PEEK)
1527 		return -EINVAL;
1528 	/* Migration mode query, and we are not doing a migration */
1529 	peek = !!(args->flags & KVM_S390_CMMA_PEEK);
1530 	if (!peek && !s)
1531 		return -EINVAL;
1532 	/* CMMA is disabled or was not used, or the buffer has length zero */
1533 	bufsize = min(args->count, KVM_S390_CMMA_SIZE_MAX);
1534 	if (!bufsize || !kvm->mm->context.use_cmma) {
1535 		memset(args, 0, sizeof(*args));
1536 		return 0;
1537 	}
1538 
1539 	if (!peek) {
1540 		/* We are not peeking, and there are no dirty pages */
1541 		if (!atomic64_read(&s->dirty_pages)) {
1542 			memset(args, 0, sizeof(*args));
1543 			return 0;
1544 		}
1545 		cur = find_next_bit(s->pgste_bitmap, s->bitmap_size,
1546 				    args->start_gfn);
1547 		if (cur >= s->bitmap_size)	/* nothing found, loop back */
1548 			cur = find_next_bit(s->pgste_bitmap, s->bitmap_size, 0);
1549 		if (cur >= s->bitmap_size) {	/* again! (very unlikely) */
1550 			memset(args, 0, sizeof(*args));
1551 			return 0;
1552 		}
1553 		next = find_next_bit(s->pgste_bitmap, s->bitmap_size, cur + 1);
1554 	}
1555 
1556 	res = vmalloc(bufsize);
1557 	if (!res)
1558 		return -ENOMEM;
1559 
1560 	args->start_gfn = cur;
1561 
1562 	down_read(&kvm->mm->mmap_sem);
1563 	srcu_idx = srcu_read_lock(&kvm->srcu);
1564 	while (i < bufsize) {
1565 		hva = gfn_to_hva(kvm, cur);
1566 		if (kvm_is_error_hva(hva)) {
1567 			r = -EFAULT;
1568 			break;
1569 		}
1570 		/* decrement only if we actually flipped the bit to 0 */
1571 		if (!peek && test_and_clear_bit(cur, s->pgste_bitmap))
1572 			atomic64_dec(&s->dirty_pages);
1573 		r = get_pgste(kvm->mm, hva, &pgstev);
1574 		if (r < 0)
1575 			pgstev = 0;
1576 		/* save the value */
1577 		res[i++] = (pgstev >> 24) & 0x43;
1578 		/*
1579 		 * if the next bit is too far away, stop.
1580 		 * if we reached the previous "next", find the next one
1581 		 */
1582 		if (!peek) {
1583 			if (next > cur + KVM_S390_MAX_BIT_DISTANCE)
1584 				break;
1585 			if (cur == next)
1586 				next = find_next_bit(s->pgste_bitmap,
1587 						     s->bitmap_size, cur + 1);
1588 		/* reached the end of the bitmap or of the buffer, stop */
1589 			if ((next >= s->bitmap_size) ||
1590 			    (next >= args->start_gfn + bufsize))
1591 				break;
1592 		}
1593 		cur++;
1594 	}
1595 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1596 	up_read(&kvm->mm->mmap_sem);
1597 	args->count = i;
1598 	args->remaining = s ? atomic64_read(&s->dirty_pages) : 0;
1599 
1600 	rr = copy_to_user((void __user *)args->values, res, args->count);
1601 	if (rr)
1602 		r = -EFAULT;
1603 
1604 	vfree(res);
1605 	return r;
1606 }
1607 
1608 /*
1609  * This function sets the CMMA attributes for the given pages. If the input
1610  * buffer has zero length, no action is taken, otherwise the attributes are
1611  * set and the mm->context.use_cmma flag is set.
1612  */
1613 static int kvm_s390_set_cmma_bits(struct kvm *kvm,
1614 				  const struct kvm_s390_cmma_log *args)
1615 {
1616 	unsigned long hva, mask, pgstev, i;
1617 	uint8_t *bits;
1618 	int srcu_idx, r = 0;
1619 
1620 	mask = args->mask;
1621 
1622 	if (!kvm->arch.use_cmma)
1623 		return -ENXIO;
1624 	/* invalid/unsupported flags */
1625 	if (args->flags != 0)
1626 		return -EINVAL;
1627 	/* Enforce sane limit on memory allocation */
1628 	if (args->count > KVM_S390_CMMA_SIZE_MAX)
1629 		return -EINVAL;
1630 	/* Nothing to do */
1631 	if (args->count == 0)
1632 		return 0;
1633 
1634 	bits = vmalloc(sizeof(*bits) * args->count);
1635 	if (!bits)
1636 		return -ENOMEM;
1637 
1638 	r = copy_from_user(bits, (void __user *)args->values, args->count);
1639 	if (r) {
1640 		r = -EFAULT;
1641 		goto out;
1642 	}
1643 
1644 	down_read(&kvm->mm->mmap_sem);
1645 	srcu_idx = srcu_read_lock(&kvm->srcu);
1646 	for (i = 0; i < args->count; i++) {
1647 		hva = gfn_to_hva(kvm, args->start_gfn + i);
1648 		if (kvm_is_error_hva(hva)) {
1649 			r = -EFAULT;
1650 			break;
1651 		}
1652 
1653 		pgstev = bits[i];
1654 		pgstev = pgstev << 24;
1655 		mask &= _PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT;
1656 		set_pgste_bits(kvm->mm, hva, mask, pgstev);
1657 	}
1658 	srcu_read_unlock(&kvm->srcu, srcu_idx);
1659 	up_read(&kvm->mm->mmap_sem);
1660 
1661 	if (!kvm->mm->context.use_cmma) {
1662 		down_write(&kvm->mm->mmap_sem);
1663 		kvm->mm->context.use_cmma = 1;
1664 		up_write(&kvm->mm->mmap_sem);
1665 	}
1666 out:
1667 	vfree(bits);
1668 	return r;
1669 }
1670 
1671 long kvm_arch_vm_ioctl(struct file *filp,
1672 		       unsigned int ioctl, unsigned long arg)
1673 {
1674 	struct kvm *kvm = filp->private_data;
1675 	void __user *argp = (void __user *)arg;
1676 	struct kvm_device_attr attr;
1677 	int r;
1678 
1679 	switch (ioctl) {
1680 	case KVM_S390_INTERRUPT: {
1681 		struct kvm_s390_interrupt s390int;
1682 
1683 		r = -EFAULT;
1684 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
1685 			break;
1686 		r = kvm_s390_inject_vm(kvm, &s390int);
1687 		break;
1688 	}
1689 	case KVM_ENABLE_CAP: {
1690 		struct kvm_enable_cap cap;
1691 		r = -EFAULT;
1692 		if (copy_from_user(&cap, argp, sizeof(cap)))
1693 			break;
1694 		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
1695 		break;
1696 	}
1697 	case KVM_CREATE_IRQCHIP: {
1698 		struct kvm_irq_routing_entry routing;
1699 
1700 		r = -EINVAL;
1701 		if (kvm->arch.use_irqchip) {
1702 			/* Set up dummy routing. */
1703 			memset(&routing, 0, sizeof(routing));
1704 			r = kvm_set_irq_routing(kvm, &routing, 0, 0);
1705 		}
1706 		break;
1707 	}
1708 	case KVM_SET_DEVICE_ATTR: {
1709 		r = -EFAULT;
1710 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1711 			break;
1712 		r = kvm_s390_vm_set_attr(kvm, &attr);
1713 		break;
1714 	}
1715 	case KVM_GET_DEVICE_ATTR: {
1716 		r = -EFAULT;
1717 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1718 			break;
1719 		r = kvm_s390_vm_get_attr(kvm, &attr);
1720 		break;
1721 	}
1722 	case KVM_HAS_DEVICE_ATTR: {
1723 		r = -EFAULT;
1724 		if (copy_from_user(&attr, (void __user *)arg, sizeof(attr)))
1725 			break;
1726 		r = kvm_s390_vm_has_attr(kvm, &attr);
1727 		break;
1728 	}
1729 	case KVM_S390_GET_SKEYS: {
1730 		struct kvm_s390_skeys args;
1731 
1732 		r = -EFAULT;
1733 		if (copy_from_user(&args, argp,
1734 				   sizeof(struct kvm_s390_skeys)))
1735 			break;
1736 		r = kvm_s390_get_skeys(kvm, &args);
1737 		break;
1738 	}
1739 	case KVM_S390_SET_SKEYS: {
1740 		struct kvm_s390_skeys args;
1741 
1742 		r = -EFAULT;
1743 		if (copy_from_user(&args, argp,
1744 				   sizeof(struct kvm_s390_skeys)))
1745 			break;
1746 		r = kvm_s390_set_skeys(kvm, &args);
1747 		break;
1748 	}
1749 	case KVM_S390_GET_CMMA_BITS: {
1750 		struct kvm_s390_cmma_log args;
1751 
1752 		r = -EFAULT;
1753 		if (copy_from_user(&args, argp, sizeof(args)))
1754 			break;
1755 		r = kvm_s390_get_cmma_bits(kvm, &args);
1756 		if (!r) {
1757 			r = copy_to_user(argp, &args, sizeof(args));
1758 			if (r)
1759 				r = -EFAULT;
1760 		}
1761 		break;
1762 	}
1763 	case KVM_S390_SET_CMMA_BITS: {
1764 		struct kvm_s390_cmma_log args;
1765 
1766 		r = -EFAULT;
1767 		if (copy_from_user(&args, argp, sizeof(args)))
1768 			break;
1769 		r = kvm_s390_set_cmma_bits(kvm, &args);
1770 		break;
1771 	}
1772 	default:
1773 		r = -ENOTTY;
1774 	}
1775 
1776 	return r;
1777 }
1778 
1779 static int kvm_s390_query_ap_config(u8 *config)
1780 {
1781 	u32 fcn_code = 0x04000000UL;
1782 	u32 cc = 0;
1783 
1784 	memset(config, 0, 128);
1785 	asm volatile(
1786 		"lgr 0,%1\n"
1787 		"lgr 2,%2\n"
1788 		".long 0xb2af0000\n"		/* PQAP(QCI) */
1789 		"0: ipm %0\n"
1790 		"srl %0,28\n"
1791 		"1:\n"
1792 		EX_TABLE(0b, 1b)
1793 		: "+r" (cc)
1794 		: "r" (fcn_code), "r" (config)
1795 		: "cc", "0", "2", "memory"
1796 	);
1797 
1798 	return cc;
1799 }
1800 
1801 static int kvm_s390_apxa_installed(void)
1802 {
1803 	u8 config[128];
1804 	int cc;
1805 
1806 	if (test_facility(12)) {
1807 		cc = kvm_s390_query_ap_config(config);
1808 
1809 		if (cc)
1810 			pr_err("PQAP(QCI) failed with cc=%d", cc);
1811 		else
1812 			return config[0] & 0x40;
1813 	}
1814 
1815 	return 0;
1816 }
1817 
1818 static void kvm_s390_set_crycb_format(struct kvm *kvm)
1819 {
1820 	kvm->arch.crypto.crycbd = (__u32)(unsigned long) kvm->arch.crypto.crycb;
1821 
1822 	if (kvm_s390_apxa_installed())
1823 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT2;
1824 	else
1825 		kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
1826 }
1827 
1828 static u64 kvm_s390_get_initial_cpuid(void)
1829 {
1830 	struct cpuid cpuid;
1831 
1832 	get_cpu_id(&cpuid);
1833 	cpuid.version = 0xff;
1834 	return *((u64 *) &cpuid);
1835 }
1836 
1837 static void kvm_s390_crypto_init(struct kvm *kvm)
1838 {
1839 	if (!test_kvm_facility(kvm, 76))
1840 		return;
1841 
1842 	kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
1843 	kvm_s390_set_crycb_format(kvm);
1844 
1845 	/* Enable AES/DEA protected key functions by default */
1846 	kvm->arch.crypto.aes_kw = 1;
1847 	kvm->arch.crypto.dea_kw = 1;
1848 	get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask,
1849 			 sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask));
1850 	get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask,
1851 			 sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask));
1852 }
1853 
1854 static void sca_dispose(struct kvm *kvm)
1855 {
1856 	if (kvm->arch.use_esca)
1857 		free_pages_exact(kvm->arch.sca, sizeof(struct esca_block));
1858 	else
1859 		free_page((unsigned long)(kvm->arch.sca));
1860 	kvm->arch.sca = NULL;
1861 }
1862 
1863 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
1864 {
1865 	gfp_t alloc_flags = GFP_KERNEL;
1866 	int i, rc;
1867 	char debug_name[16];
1868 	static unsigned long sca_offset;
1869 
1870 	rc = -EINVAL;
1871 #ifdef CONFIG_KVM_S390_UCONTROL
1872 	if (type & ~KVM_VM_S390_UCONTROL)
1873 		goto out_err;
1874 	if ((type & KVM_VM_S390_UCONTROL) && (!capable(CAP_SYS_ADMIN)))
1875 		goto out_err;
1876 #else
1877 	if (type)
1878 		goto out_err;
1879 #endif
1880 
1881 	rc = s390_enable_sie();
1882 	if (rc)
1883 		goto out_err;
1884 
1885 	rc = -ENOMEM;
1886 
1887 	ratelimit_state_init(&kvm->arch.sthyi_limit, 5 * HZ, 500);
1888 
1889 	kvm->arch.use_esca = 0; /* start with basic SCA */
1890 	if (!sclp.has_64bscao)
1891 		alloc_flags |= GFP_DMA;
1892 	rwlock_init(&kvm->arch.sca_lock);
1893 	kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
1894 	if (!kvm->arch.sca)
1895 		goto out_err;
1896 	spin_lock(&kvm_lock);
1897 	sca_offset += 16;
1898 	if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
1899 		sca_offset = 0;
1900 	kvm->arch.sca = (struct bsca_block *)
1901 			((char *) kvm->arch.sca + sca_offset);
1902 	spin_unlock(&kvm_lock);
1903 
1904 	sprintf(debug_name, "kvm-%u", current->pid);
1905 
1906 	kvm->arch.dbf = debug_register(debug_name, 32, 1, 7 * sizeof(long));
1907 	if (!kvm->arch.dbf)
1908 		goto out_err;
1909 
1910 	kvm->arch.sie_page2 =
1911 	     (struct sie_page2 *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
1912 	if (!kvm->arch.sie_page2)
1913 		goto out_err;
1914 
1915 	/* Populate the facility mask initially. */
1916 	memcpy(kvm->arch.model.fac_mask, S390_lowcore.stfle_fac_list,
1917 	       sizeof(S390_lowcore.stfle_fac_list));
1918 	for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) {
1919 		if (i < kvm_s390_fac_list_mask_size())
1920 			kvm->arch.model.fac_mask[i] &= kvm_s390_fac_list_mask[i];
1921 		else
1922 			kvm->arch.model.fac_mask[i] = 0UL;
1923 	}
1924 
1925 	/* Populate the facility list initially. */
1926 	kvm->arch.model.fac_list = kvm->arch.sie_page2->fac_list;
1927 	memcpy(kvm->arch.model.fac_list, kvm->arch.model.fac_mask,
1928 	       S390_ARCH_FAC_LIST_SIZE_BYTE);
1929 
1930 	/* we are always in czam mode - even on pre z14 machines */
1931 	set_kvm_facility(kvm->arch.model.fac_mask, 138);
1932 	set_kvm_facility(kvm->arch.model.fac_list, 138);
1933 	/* we emulate STHYI in kvm */
1934 	set_kvm_facility(kvm->arch.model.fac_mask, 74);
1935 	set_kvm_facility(kvm->arch.model.fac_list, 74);
1936 	if (MACHINE_HAS_TLB_GUEST) {
1937 		set_kvm_facility(kvm->arch.model.fac_mask, 147);
1938 		set_kvm_facility(kvm->arch.model.fac_list, 147);
1939 	}
1940 
1941 	kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
1942 	kvm->arch.model.ibc = sclp.ibc & 0x0fff;
1943 
1944 	kvm_s390_crypto_init(kvm);
1945 
1946 	mutex_init(&kvm->arch.float_int.ais_lock);
1947 	kvm->arch.float_int.simm = 0;
1948 	kvm->arch.float_int.nimm = 0;
1949 	spin_lock_init(&kvm->arch.float_int.lock);
1950 	for (i = 0; i < FIRQ_LIST_COUNT; i++)
1951 		INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
1952 	init_waitqueue_head(&kvm->arch.ipte_wq);
1953 	mutex_init(&kvm->arch.ipte_mutex);
1954 
1955 	debug_register_view(kvm->arch.dbf, &debug_sprintf_view);
1956 	VM_EVENT(kvm, 3, "vm created with type %lu", type);
1957 
1958 	if (type & KVM_VM_S390_UCONTROL) {
1959 		kvm->arch.gmap = NULL;
1960 		kvm->arch.mem_limit = KVM_S390_NO_MEM_LIMIT;
1961 	} else {
1962 		if (sclp.hamax == U64_MAX)
1963 			kvm->arch.mem_limit = TASK_SIZE_MAX;
1964 		else
1965 			kvm->arch.mem_limit = min_t(unsigned long, TASK_SIZE_MAX,
1966 						    sclp.hamax + 1);
1967 		kvm->arch.gmap = gmap_create(current->mm, kvm->arch.mem_limit - 1);
1968 		if (!kvm->arch.gmap)
1969 			goto out_err;
1970 		kvm->arch.gmap->private = kvm;
1971 		kvm->arch.gmap->pfault_enabled = 0;
1972 	}
1973 
1974 	kvm->arch.css_support = 0;
1975 	kvm->arch.use_irqchip = 0;
1976 	kvm->arch.epoch = 0;
1977 
1978 	spin_lock_init(&kvm->arch.start_stop_lock);
1979 	kvm_s390_vsie_init(kvm);
1980 	KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid);
1981 
1982 	return 0;
1983 out_err:
1984 	free_page((unsigned long)kvm->arch.sie_page2);
1985 	debug_unregister(kvm->arch.dbf);
1986 	sca_dispose(kvm);
1987 	KVM_EVENT(3, "creation of vm failed: %d", rc);
1988 	return rc;
1989 }
1990 
1991 bool kvm_arch_has_vcpu_debugfs(void)
1992 {
1993 	return false;
1994 }
1995 
1996 int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu)
1997 {
1998 	return 0;
1999 }
2000 
2001 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
2002 {
2003 	VCPU_EVENT(vcpu, 3, "%s", "free cpu");
2004 	trace_kvm_s390_destroy_vcpu(vcpu->vcpu_id);
2005 	kvm_s390_clear_local_irqs(vcpu);
2006 	kvm_clear_async_pf_completion_queue(vcpu);
2007 	if (!kvm_is_ucontrol(vcpu->kvm))
2008 		sca_del_vcpu(vcpu);
2009 
2010 	if (kvm_is_ucontrol(vcpu->kvm))
2011 		gmap_remove(vcpu->arch.gmap);
2012 
2013 	if (vcpu->kvm->arch.use_cmma)
2014 		kvm_s390_vcpu_unsetup_cmma(vcpu);
2015 	free_page((unsigned long)(vcpu->arch.sie_block));
2016 
2017 	kvm_vcpu_uninit(vcpu);
2018 	kmem_cache_free(kvm_vcpu_cache, vcpu);
2019 }
2020 
2021 static void kvm_free_vcpus(struct kvm *kvm)
2022 {
2023 	unsigned int i;
2024 	struct kvm_vcpu *vcpu;
2025 
2026 	kvm_for_each_vcpu(i, vcpu, kvm)
2027 		kvm_arch_vcpu_destroy(vcpu);
2028 
2029 	mutex_lock(&kvm->lock);
2030 	for (i = 0; i < atomic_read(&kvm->online_vcpus); i++)
2031 		kvm->vcpus[i] = NULL;
2032 
2033 	atomic_set(&kvm->online_vcpus, 0);
2034 	mutex_unlock(&kvm->lock);
2035 }
2036 
2037 void kvm_arch_destroy_vm(struct kvm *kvm)
2038 {
2039 	kvm_free_vcpus(kvm);
2040 	sca_dispose(kvm);
2041 	debug_unregister(kvm->arch.dbf);
2042 	free_page((unsigned long)kvm->arch.sie_page2);
2043 	if (!kvm_is_ucontrol(kvm))
2044 		gmap_remove(kvm->arch.gmap);
2045 	kvm_s390_destroy_adapters(kvm);
2046 	kvm_s390_clear_float_irqs(kvm);
2047 	kvm_s390_vsie_destroy(kvm);
2048 	if (kvm->arch.migration_state) {
2049 		vfree(kvm->arch.migration_state->pgste_bitmap);
2050 		kfree(kvm->arch.migration_state);
2051 	}
2052 	KVM_EVENT(3, "vm 0x%pK destroyed", kvm);
2053 }
2054 
2055 /* Section: vcpu related */
2056 static int __kvm_ucontrol_vcpu_init(struct kvm_vcpu *vcpu)
2057 {
2058 	vcpu->arch.gmap = gmap_create(current->mm, -1UL);
2059 	if (!vcpu->arch.gmap)
2060 		return -ENOMEM;
2061 	vcpu->arch.gmap->private = vcpu->kvm;
2062 
2063 	return 0;
2064 }
2065 
2066 static void sca_del_vcpu(struct kvm_vcpu *vcpu)
2067 {
2068 	if (!kvm_s390_use_sca_entries())
2069 		return;
2070 	read_lock(&vcpu->kvm->arch.sca_lock);
2071 	if (vcpu->kvm->arch.use_esca) {
2072 		struct esca_block *sca = vcpu->kvm->arch.sca;
2073 
2074 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2075 		sca->cpu[vcpu->vcpu_id].sda = 0;
2076 	} else {
2077 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2078 
2079 		clear_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2080 		sca->cpu[vcpu->vcpu_id].sda = 0;
2081 	}
2082 	read_unlock(&vcpu->kvm->arch.sca_lock);
2083 }
2084 
2085 static void sca_add_vcpu(struct kvm_vcpu *vcpu)
2086 {
2087 	if (!kvm_s390_use_sca_entries()) {
2088 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2089 
2090 		/* we still need the basic sca for the ipte control */
2091 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2092 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2093 	}
2094 	read_lock(&vcpu->kvm->arch.sca_lock);
2095 	if (vcpu->kvm->arch.use_esca) {
2096 		struct esca_block *sca = vcpu->kvm->arch.sca;
2097 
2098 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2099 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2100 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
2101 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2102 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
2103 	} else {
2104 		struct bsca_block *sca = vcpu->kvm->arch.sca;
2105 
2106 		sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
2107 		vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
2108 		vcpu->arch.sie_block->scaol = (__u32)(__u64)sca;
2109 		set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn);
2110 	}
2111 	read_unlock(&vcpu->kvm->arch.sca_lock);
2112 }
2113 
2114 /* Basic SCA to Extended SCA data copy routines */
2115 static inline void sca_copy_entry(struct esca_entry *d, struct bsca_entry *s)
2116 {
2117 	d->sda = s->sda;
2118 	d->sigp_ctrl.c = s->sigp_ctrl.c;
2119 	d->sigp_ctrl.scn = s->sigp_ctrl.scn;
2120 }
2121 
2122 static void sca_copy_b_to_e(struct esca_block *d, struct bsca_block *s)
2123 {
2124 	int i;
2125 
2126 	d->ipte_control = s->ipte_control;
2127 	d->mcn[0] = s->mcn;
2128 	for (i = 0; i < KVM_S390_BSCA_CPU_SLOTS; i++)
2129 		sca_copy_entry(&d->cpu[i], &s->cpu[i]);
2130 }
2131 
2132 static int sca_switch_to_extended(struct kvm *kvm)
2133 {
2134 	struct bsca_block *old_sca = kvm->arch.sca;
2135 	struct esca_block *new_sca;
2136 	struct kvm_vcpu *vcpu;
2137 	unsigned int vcpu_idx;
2138 	u32 scaol, scaoh;
2139 
2140 	new_sca = alloc_pages_exact(sizeof(*new_sca), GFP_KERNEL|__GFP_ZERO);
2141 	if (!new_sca)
2142 		return -ENOMEM;
2143 
2144 	scaoh = (u32)((u64)(new_sca) >> 32);
2145 	scaol = (u32)(u64)(new_sca) & ~0x3fU;
2146 
2147 	kvm_s390_vcpu_block_all(kvm);
2148 	write_lock(&kvm->arch.sca_lock);
2149 
2150 	sca_copy_b_to_e(new_sca, old_sca);
2151 
2152 	kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
2153 		vcpu->arch.sie_block->scaoh = scaoh;
2154 		vcpu->arch.sie_block->scaol = scaol;
2155 		vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
2156 	}
2157 	kvm->arch.sca = new_sca;
2158 	kvm->arch.use_esca = 1;
2159 
2160 	write_unlock(&kvm->arch.sca_lock);
2161 	kvm_s390_vcpu_unblock_all(kvm);
2162 
2163 	free_page((unsigned long)old_sca);
2164 
2165 	VM_EVENT(kvm, 2, "Switched to ESCA (0x%pK -> 0x%pK)",
2166 		 old_sca, kvm->arch.sca);
2167 	return 0;
2168 }
2169 
2170 static int sca_can_add_vcpu(struct kvm *kvm, unsigned int id)
2171 {
2172 	int rc;
2173 
2174 	if (!kvm_s390_use_sca_entries()) {
2175 		if (id < KVM_MAX_VCPUS)
2176 			return true;
2177 		return false;
2178 	}
2179 	if (id < KVM_S390_BSCA_CPU_SLOTS)
2180 		return true;
2181 	if (!sclp.has_esca || !sclp.has_64bscao)
2182 		return false;
2183 
2184 	mutex_lock(&kvm->lock);
2185 	rc = kvm->arch.use_esca ? 0 : sca_switch_to_extended(kvm);
2186 	mutex_unlock(&kvm->lock);
2187 
2188 	return rc == 0 && id < KVM_S390_ESCA_CPU_SLOTS;
2189 }
2190 
2191 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
2192 {
2193 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2194 	kvm_clear_async_pf_completion_queue(vcpu);
2195 	vcpu->run->kvm_valid_regs = KVM_SYNC_PREFIX |
2196 				    KVM_SYNC_GPRS |
2197 				    KVM_SYNC_ACRS |
2198 				    KVM_SYNC_CRS |
2199 				    KVM_SYNC_ARCH0 |
2200 				    KVM_SYNC_PFAULT;
2201 	kvm_s390_set_prefix(vcpu, 0);
2202 	if (test_kvm_facility(vcpu->kvm, 64))
2203 		vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
2204 	if (test_kvm_facility(vcpu->kvm, 133))
2205 		vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
2206 	/* fprs can be synchronized via vrs, even if the guest has no vx. With
2207 	 * MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
2208 	 */
2209 	if (MACHINE_HAS_VX)
2210 		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
2211 	else
2212 		vcpu->run->kvm_valid_regs |= KVM_SYNC_FPRS;
2213 
2214 	if (kvm_is_ucontrol(vcpu->kvm))
2215 		return __kvm_ucontrol_vcpu_init(vcpu);
2216 
2217 	return 0;
2218 }
2219 
2220 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2221 static void __start_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2222 {
2223 	WARN_ON_ONCE(vcpu->arch.cputm_start != 0);
2224 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2225 	vcpu->arch.cputm_start = get_tod_clock_fast();
2226 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2227 }
2228 
2229 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2230 static void __stop_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2231 {
2232 	WARN_ON_ONCE(vcpu->arch.cputm_start == 0);
2233 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2234 	vcpu->arch.sie_block->cputm -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2235 	vcpu->arch.cputm_start = 0;
2236 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2237 }
2238 
2239 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2240 static void __enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2241 {
2242 	WARN_ON_ONCE(vcpu->arch.cputm_enabled);
2243 	vcpu->arch.cputm_enabled = true;
2244 	__start_cpu_timer_accounting(vcpu);
2245 }
2246 
2247 /* needs disabled preemption to protect from TOD sync and vcpu_load/put */
2248 static void __disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2249 {
2250 	WARN_ON_ONCE(!vcpu->arch.cputm_enabled);
2251 	__stop_cpu_timer_accounting(vcpu);
2252 	vcpu->arch.cputm_enabled = false;
2253 }
2254 
2255 static void enable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2256 {
2257 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2258 	__enable_cpu_timer_accounting(vcpu);
2259 	preempt_enable();
2260 }
2261 
2262 static void disable_cpu_timer_accounting(struct kvm_vcpu *vcpu)
2263 {
2264 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2265 	__disable_cpu_timer_accounting(vcpu);
2266 	preempt_enable();
2267 }
2268 
2269 /* set the cpu timer - may only be called from the VCPU thread itself */
2270 void kvm_s390_set_cpu_timer(struct kvm_vcpu *vcpu, __u64 cputm)
2271 {
2272 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2273 	raw_write_seqcount_begin(&vcpu->arch.cputm_seqcount);
2274 	if (vcpu->arch.cputm_enabled)
2275 		vcpu->arch.cputm_start = get_tod_clock_fast();
2276 	vcpu->arch.sie_block->cputm = cputm;
2277 	raw_write_seqcount_end(&vcpu->arch.cputm_seqcount);
2278 	preempt_enable();
2279 }
2280 
2281 /* update and get the cpu timer - can also be called from other VCPU threads */
2282 __u64 kvm_s390_get_cpu_timer(struct kvm_vcpu *vcpu)
2283 {
2284 	unsigned int seq;
2285 	__u64 value;
2286 
2287 	if (unlikely(!vcpu->arch.cputm_enabled))
2288 		return vcpu->arch.sie_block->cputm;
2289 
2290 	preempt_disable(); /* protect from TOD sync and vcpu_load/put */
2291 	do {
2292 		seq = raw_read_seqcount(&vcpu->arch.cputm_seqcount);
2293 		/*
2294 		 * If the writer would ever execute a read in the critical
2295 		 * section, e.g. in irq context, we have a deadlock.
2296 		 */
2297 		WARN_ON_ONCE((seq & 1) && smp_processor_id() == vcpu->cpu);
2298 		value = vcpu->arch.sie_block->cputm;
2299 		/* if cputm_start is 0, accounting is being started/stopped */
2300 		if (likely(vcpu->arch.cputm_start))
2301 			value -= get_tod_clock_fast() - vcpu->arch.cputm_start;
2302 	} while (read_seqcount_retry(&vcpu->arch.cputm_seqcount, seq & ~1));
2303 	preempt_enable();
2304 	return value;
2305 }
2306 
2307 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
2308 {
2309 
2310 	gmap_enable(vcpu->arch.enabled_gmap);
2311 	atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2312 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2313 		__start_cpu_timer_accounting(vcpu);
2314 	vcpu->cpu = cpu;
2315 }
2316 
2317 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
2318 {
2319 	vcpu->cpu = -1;
2320 	if (vcpu->arch.cputm_enabled && !is_vcpu_idle(vcpu))
2321 		__stop_cpu_timer_accounting(vcpu);
2322 	atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
2323 	vcpu->arch.enabled_gmap = gmap_get_enabled();
2324 	gmap_disable(vcpu->arch.enabled_gmap);
2325 
2326 }
2327 
2328 static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
2329 {
2330 	/* this equals initial cpu reset in pop, but we don't switch to ESA */
2331 	vcpu->arch.sie_block->gpsw.mask = 0UL;
2332 	vcpu->arch.sie_block->gpsw.addr = 0UL;
2333 	kvm_s390_set_prefix(vcpu, 0);
2334 	kvm_s390_set_cpu_timer(vcpu, 0);
2335 	vcpu->arch.sie_block->ckc       = 0UL;
2336 	vcpu->arch.sie_block->todpr     = 0;
2337 	memset(vcpu->arch.sie_block->gcr, 0, 16 * sizeof(__u64));
2338 	vcpu->arch.sie_block->gcr[0]  = 0xE0UL;
2339 	vcpu->arch.sie_block->gcr[14] = 0xC2000000UL;
2340 	/* make sure the new fpc will be lazily loaded */
2341 	save_fpu_regs();
2342 	current->thread.fpu.fpc = 0;
2343 	vcpu->arch.sie_block->gbea = 1;
2344 	vcpu->arch.sie_block->pp = 0;
2345 	vcpu->arch.pfault_token = KVM_S390_PFAULT_TOKEN_INVALID;
2346 	kvm_clear_async_pf_completion_queue(vcpu);
2347 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm))
2348 		kvm_s390_vcpu_stop(vcpu);
2349 	kvm_s390_clear_local_irqs(vcpu);
2350 }
2351 
2352 void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
2353 {
2354 	mutex_lock(&vcpu->kvm->lock);
2355 	preempt_disable();
2356 	vcpu->arch.sie_block->epoch = vcpu->kvm->arch.epoch;
2357 	preempt_enable();
2358 	mutex_unlock(&vcpu->kvm->lock);
2359 	if (!kvm_is_ucontrol(vcpu->kvm)) {
2360 		vcpu->arch.gmap = vcpu->kvm->arch.gmap;
2361 		sca_add_vcpu(vcpu);
2362 	}
2363 	if (test_kvm_facility(vcpu->kvm, 74) || vcpu->kvm->arch.user_instr0)
2364 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2365 	/* make vcpu_load load the right gmap on the first trigger */
2366 	vcpu->arch.enabled_gmap = vcpu->arch.gmap;
2367 }
2368 
2369 static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu)
2370 {
2371 	if (!test_kvm_facility(vcpu->kvm, 76))
2372 		return;
2373 
2374 	vcpu->arch.sie_block->ecb3 &= ~(ECB3_AES | ECB3_DEA);
2375 
2376 	if (vcpu->kvm->arch.crypto.aes_kw)
2377 		vcpu->arch.sie_block->ecb3 |= ECB3_AES;
2378 	if (vcpu->kvm->arch.crypto.dea_kw)
2379 		vcpu->arch.sie_block->ecb3 |= ECB3_DEA;
2380 
2381 	vcpu->arch.sie_block->crycbd = vcpu->kvm->arch.crypto.crycbd;
2382 }
2383 
2384 void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu)
2385 {
2386 	free_page(vcpu->arch.sie_block->cbrlo);
2387 	vcpu->arch.sie_block->cbrlo = 0;
2388 }
2389 
2390 int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
2391 {
2392 	vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL);
2393 	if (!vcpu->arch.sie_block->cbrlo)
2394 		return -ENOMEM;
2395 
2396 	vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
2397 	return 0;
2398 }
2399 
2400 static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu)
2401 {
2402 	struct kvm_s390_cpu_model *model = &vcpu->kvm->arch.model;
2403 
2404 	vcpu->arch.sie_block->ibc = model->ibc;
2405 	if (test_kvm_facility(vcpu->kvm, 7))
2406 		vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list;
2407 }
2408 
2409 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
2410 {
2411 	int rc = 0;
2412 
2413 	atomic_set(&vcpu->arch.sie_block->cpuflags, CPUSTAT_ZARCH |
2414 						    CPUSTAT_SM |
2415 						    CPUSTAT_STOPPED);
2416 
2417 	if (test_kvm_facility(vcpu->kvm, 78))
2418 		atomic_or(CPUSTAT_GED2, &vcpu->arch.sie_block->cpuflags);
2419 	else if (test_kvm_facility(vcpu->kvm, 8))
2420 		atomic_or(CPUSTAT_GED, &vcpu->arch.sie_block->cpuflags);
2421 
2422 	kvm_s390_vcpu_setup_model(vcpu);
2423 
2424 	/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
2425 	if (MACHINE_HAS_ESOP)
2426 		vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
2427 	if (test_kvm_facility(vcpu->kvm, 9))
2428 		vcpu->arch.sie_block->ecb |= ECB_SRSI;
2429 	if (test_kvm_facility(vcpu->kvm, 73))
2430 		vcpu->arch.sie_block->ecb |= ECB_TE;
2431 
2432 	if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
2433 		vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
2434 	if (test_kvm_facility(vcpu->kvm, 130))
2435 		vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
2436 	vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
2437 	if (sclp.has_cei)
2438 		vcpu->arch.sie_block->eca |= ECA_CEI;
2439 	if (sclp.has_ib)
2440 		vcpu->arch.sie_block->eca |= ECA_IB;
2441 	if (sclp.has_siif)
2442 		vcpu->arch.sie_block->eca |= ECA_SII;
2443 	if (sclp.has_sigpif)
2444 		vcpu->arch.sie_block->eca |= ECA_SIGPI;
2445 	if (test_kvm_facility(vcpu->kvm, 129)) {
2446 		vcpu->arch.sie_block->eca |= ECA_VX;
2447 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
2448 	}
2449 	if (test_kvm_facility(vcpu->kvm, 139))
2450 		vcpu->arch.sie_block->ecd |= ECD_MEF;
2451 
2452 	vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
2453 					| SDNXC;
2454 	vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
2455 
2456 	if (sclp.has_kss)
2457 		atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags);
2458 	else
2459 		vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
2460 
2461 	if (vcpu->kvm->arch.use_cmma) {
2462 		rc = kvm_s390_vcpu_setup_cmma(vcpu);
2463 		if (rc)
2464 			return rc;
2465 	}
2466 	hrtimer_init(&vcpu->arch.ckc_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
2467 	vcpu->arch.ckc_timer.function = kvm_s390_idle_wakeup;
2468 
2469 	kvm_s390_vcpu_crypto_setup(vcpu);
2470 
2471 	return rc;
2472 }
2473 
2474 struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
2475 				      unsigned int id)
2476 {
2477 	struct kvm_vcpu *vcpu;
2478 	struct sie_page *sie_page;
2479 	int rc = -EINVAL;
2480 
2481 	if (!kvm_is_ucontrol(kvm) && !sca_can_add_vcpu(kvm, id))
2482 		goto out;
2483 
2484 	rc = -ENOMEM;
2485 
2486 	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
2487 	if (!vcpu)
2488 		goto out;
2489 
2490 	BUILD_BUG_ON(sizeof(struct sie_page) != 4096);
2491 	sie_page = (struct sie_page *) get_zeroed_page(GFP_KERNEL);
2492 	if (!sie_page)
2493 		goto out_free_cpu;
2494 
2495 	vcpu->arch.sie_block = &sie_page->sie_block;
2496 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
2497 
2498 	/* the real guest size will always be smaller than msl */
2499 	vcpu->arch.sie_block->mso = 0;
2500 	vcpu->arch.sie_block->msl = sclp.hamax;
2501 
2502 	vcpu->arch.sie_block->icpua = id;
2503 	spin_lock_init(&vcpu->arch.local_int.lock);
2504 	vcpu->arch.local_int.float_int = &kvm->arch.float_int;
2505 	vcpu->arch.local_int.wq = &vcpu->wq;
2506 	vcpu->arch.local_int.cpuflags = &vcpu->arch.sie_block->cpuflags;
2507 	seqcount_init(&vcpu->arch.cputm_seqcount);
2508 
2509 	rc = kvm_vcpu_init(vcpu, kvm, id);
2510 	if (rc)
2511 		goto out_free_sie_block;
2512 	VM_EVENT(kvm, 3, "create cpu %d at 0x%pK, sie block at 0x%pK", id, vcpu,
2513 		 vcpu->arch.sie_block);
2514 	trace_kvm_s390_create_vcpu(id, vcpu, vcpu->arch.sie_block);
2515 
2516 	return vcpu;
2517 out_free_sie_block:
2518 	free_page((unsigned long)(vcpu->arch.sie_block));
2519 out_free_cpu:
2520 	kmem_cache_free(kvm_vcpu_cache, vcpu);
2521 out:
2522 	return ERR_PTR(rc);
2523 }
2524 
2525 int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
2526 {
2527 	return kvm_s390_vcpu_has_irq(vcpu, 0);
2528 }
2529 
2530 bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
2531 {
2532 	return !(vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE);
2533 }
2534 
2535 void kvm_s390_vcpu_block(struct kvm_vcpu *vcpu)
2536 {
2537 	atomic_or(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2538 	exit_sie(vcpu);
2539 }
2540 
2541 void kvm_s390_vcpu_unblock(struct kvm_vcpu *vcpu)
2542 {
2543 	atomic_andnot(PROG_BLOCK_SIE, &vcpu->arch.sie_block->prog20);
2544 }
2545 
2546 static void kvm_s390_vcpu_request(struct kvm_vcpu *vcpu)
2547 {
2548 	atomic_or(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2549 	exit_sie(vcpu);
2550 }
2551 
2552 static void kvm_s390_vcpu_request_handled(struct kvm_vcpu *vcpu)
2553 {
2554 	atomic_andnot(PROG_REQUEST, &vcpu->arch.sie_block->prog20);
2555 }
2556 
2557 /*
2558  * Kick a guest cpu out of SIE and wait until SIE is not running.
2559  * If the CPU is not running (e.g. waiting as idle) the function will
2560  * return immediately. */
2561 void exit_sie(struct kvm_vcpu *vcpu)
2562 {
2563 	atomic_or(CPUSTAT_STOP_INT, &vcpu->arch.sie_block->cpuflags);
2564 	while (vcpu->arch.sie_block->prog0c & PROG_IN_SIE)
2565 		cpu_relax();
2566 }
2567 
2568 /* Kick a guest cpu out of SIE to process a request synchronously */
2569 void kvm_s390_sync_request(int req, struct kvm_vcpu *vcpu)
2570 {
2571 	kvm_make_request(req, vcpu);
2572 	kvm_s390_vcpu_request(vcpu);
2573 }
2574 
2575 static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
2576 			      unsigned long end)
2577 {
2578 	struct kvm *kvm = gmap->private;
2579 	struct kvm_vcpu *vcpu;
2580 	unsigned long prefix;
2581 	int i;
2582 
2583 	if (gmap_is_shadow(gmap))
2584 		return;
2585 	if (start >= 1UL << 31)
2586 		/* We are only interested in prefix pages */
2587 		return;
2588 	kvm_for_each_vcpu(i, vcpu, kvm) {
2589 		/* match against both prefix pages */
2590 		prefix = kvm_s390_get_prefix(vcpu);
2591 		if (prefix <= end && start <= prefix + 2*PAGE_SIZE - 1) {
2592 			VCPU_EVENT(vcpu, 2, "gmap notifier for %lx-%lx",
2593 				   start, end);
2594 			kvm_s390_sync_request(KVM_REQ_MMU_RELOAD, vcpu);
2595 		}
2596 	}
2597 }
2598 
2599 int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
2600 {
2601 	/* kvm common code refers to this, but never calls it */
2602 	BUG();
2603 	return 0;
2604 }
2605 
2606 static int kvm_arch_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu,
2607 					   struct kvm_one_reg *reg)
2608 {
2609 	int r = -EINVAL;
2610 
2611 	switch (reg->id) {
2612 	case KVM_REG_S390_TODPR:
2613 		r = put_user(vcpu->arch.sie_block->todpr,
2614 			     (u32 __user *)reg->addr);
2615 		break;
2616 	case KVM_REG_S390_EPOCHDIFF:
2617 		r = put_user(vcpu->arch.sie_block->epoch,
2618 			     (u64 __user *)reg->addr);
2619 		break;
2620 	case KVM_REG_S390_CPU_TIMER:
2621 		r = put_user(kvm_s390_get_cpu_timer(vcpu),
2622 			     (u64 __user *)reg->addr);
2623 		break;
2624 	case KVM_REG_S390_CLOCK_COMP:
2625 		r = put_user(vcpu->arch.sie_block->ckc,
2626 			     (u64 __user *)reg->addr);
2627 		break;
2628 	case KVM_REG_S390_PFTOKEN:
2629 		r = put_user(vcpu->arch.pfault_token,
2630 			     (u64 __user *)reg->addr);
2631 		break;
2632 	case KVM_REG_S390_PFCOMPARE:
2633 		r = put_user(vcpu->arch.pfault_compare,
2634 			     (u64 __user *)reg->addr);
2635 		break;
2636 	case KVM_REG_S390_PFSELECT:
2637 		r = put_user(vcpu->arch.pfault_select,
2638 			     (u64 __user *)reg->addr);
2639 		break;
2640 	case KVM_REG_S390_PP:
2641 		r = put_user(vcpu->arch.sie_block->pp,
2642 			     (u64 __user *)reg->addr);
2643 		break;
2644 	case KVM_REG_S390_GBEA:
2645 		r = put_user(vcpu->arch.sie_block->gbea,
2646 			     (u64 __user *)reg->addr);
2647 		break;
2648 	default:
2649 		break;
2650 	}
2651 
2652 	return r;
2653 }
2654 
2655 static int kvm_arch_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu,
2656 					   struct kvm_one_reg *reg)
2657 {
2658 	int r = -EINVAL;
2659 	__u64 val;
2660 
2661 	switch (reg->id) {
2662 	case KVM_REG_S390_TODPR:
2663 		r = get_user(vcpu->arch.sie_block->todpr,
2664 			     (u32 __user *)reg->addr);
2665 		break;
2666 	case KVM_REG_S390_EPOCHDIFF:
2667 		r = get_user(vcpu->arch.sie_block->epoch,
2668 			     (u64 __user *)reg->addr);
2669 		break;
2670 	case KVM_REG_S390_CPU_TIMER:
2671 		r = get_user(val, (u64 __user *)reg->addr);
2672 		if (!r)
2673 			kvm_s390_set_cpu_timer(vcpu, val);
2674 		break;
2675 	case KVM_REG_S390_CLOCK_COMP:
2676 		r = get_user(vcpu->arch.sie_block->ckc,
2677 			     (u64 __user *)reg->addr);
2678 		break;
2679 	case KVM_REG_S390_PFTOKEN:
2680 		r = get_user(vcpu->arch.pfault_token,
2681 			     (u64 __user *)reg->addr);
2682 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
2683 			kvm_clear_async_pf_completion_queue(vcpu);
2684 		break;
2685 	case KVM_REG_S390_PFCOMPARE:
2686 		r = get_user(vcpu->arch.pfault_compare,
2687 			     (u64 __user *)reg->addr);
2688 		break;
2689 	case KVM_REG_S390_PFSELECT:
2690 		r = get_user(vcpu->arch.pfault_select,
2691 			     (u64 __user *)reg->addr);
2692 		break;
2693 	case KVM_REG_S390_PP:
2694 		r = get_user(vcpu->arch.sie_block->pp,
2695 			     (u64 __user *)reg->addr);
2696 		break;
2697 	case KVM_REG_S390_GBEA:
2698 		r = get_user(vcpu->arch.sie_block->gbea,
2699 			     (u64 __user *)reg->addr);
2700 		break;
2701 	default:
2702 		break;
2703 	}
2704 
2705 	return r;
2706 }
2707 
2708 static int kvm_arch_vcpu_ioctl_initial_reset(struct kvm_vcpu *vcpu)
2709 {
2710 	kvm_s390_vcpu_initial_reset(vcpu);
2711 	return 0;
2712 }
2713 
2714 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2715 {
2716 	memcpy(&vcpu->run->s.regs.gprs, &regs->gprs, sizeof(regs->gprs));
2717 	return 0;
2718 }
2719 
2720 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
2721 {
2722 	memcpy(&regs->gprs, &vcpu->run->s.regs.gprs, sizeof(regs->gprs));
2723 	return 0;
2724 }
2725 
2726 int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
2727 				  struct kvm_sregs *sregs)
2728 {
2729 	memcpy(&vcpu->run->s.regs.acrs, &sregs->acrs, sizeof(sregs->acrs));
2730 	memcpy(&vcpu->arch.sie_block->gcr, &sregs->crs, sizeof(sregs->crs));
2731 	return 0;
2732 }
2733 
2734 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
2735 				  struct kvm_sregs *sregs)
2736 {
2737 	memcpy(&sregs->acrs, &vcpu->run->s.regs.acrs, sizeof(sregs->acrs));
2738 	memcpy(&sregs->crs, &vcpu->arch.sie_block->gcr, sizeof(sregs->crs));
2739 	return 0;
2740 }
2741 
2742 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2743 {
2744 	if (test_fp_ctl(fpu->fpc))
2745 		return -EINVAL;
2746 	vcpu->run->s.regs.fpc = fpu->fpc;
2747 	if (MACHINE_HAS_VX)
2748 		convert_fp_to_vx((__vector128 *) vcpu->run->s.regs.vrs,
2749 				 (freg_t *) fpu->fprs);
2750 	else
2751 		memcpy(vcpu->run->s.regs.fprs, &fpu->fprs, sizeof(fpu->fprs));
2752 	return 0;
2753 }
2754 
2755 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
2756 {
2757 	/* make sure we have the latest values */
2758 	save_fpu_regs();
2759 	if (MACHINE_HAS_VX)
2760 		convert_vx_to_fp((freg_t *) fpu->fprs,
2761 				 (__vector128 *) vcpu->run->s.regs.vrs);
2762 	else
2763 		memcpy(fpu->fprs, vcpu->run->s.regs.fprs, sizeof(fpu->fprs));
2764 	fpu->fpc = vcpu->run->s.regs.fpc;
2765 	return 0;
2766 }
2767 
2768 static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
2769 {
2770 	int rc = 0;
2771 
2772 	if (!is_vcpu_stopped(vcpu))
2773 		rc = -EBUSY;
2774 	else {
2775 		vcpu->run->psw_mask = psw.mask;
2776 		vcpu->run->psw_addr = psw.addr;
2777 	}
2778 	return rc;
2779 }
2780 
2781 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
2782 				  struct kvm_translation *tr)
2783 {
2784 	return -EINVAL; /* not implemented yet */
2785 }
2786 
2787 #define VALID_GUESTDBG_FLAGS (KVM_GUESTDBG_SINGLESTEP | \
2788 			      KVM_GUESTDBG_USE_HW_BP | \
2789 			      KVM_GUESTDBG_ENABLE)
2790 
2791 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
2792 					struct kvm_guest_debug *dbg)
2793 {
2794 	int rc = 0;
2795 
2796 	vcpu->guest_debug = 0;
2797 	kvm_s390_clear_bp_data(vcpu);
2798 
2799 	if (dbg->control & ~VALID_GUESTDBG_FLAGS)
2800 		return -EINVAL;
2801 	if (!sclp.has_gpere)
2802 		return -EINVAL;
2803 
2804 	if (dbg->control & KVM_GUESTDBG_ENABLE) {
2805 		vcpu->guest_debug = dbg->control;
2806 		/* enforce guest PER */
2807 		atomic_or(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2808 
2809 		if (dbg->control & KVM_GUESTDBG_USE_HW_BP)
2810 			rc = kvm_s390_import_bp_data(vcpu, dbg);
2811 	} else {
2812 		atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2813 		vcpu->arch.guestdbg.last_bp = 0;
2814 	}
2815 
2816 	if (rc) {
2817 		vcpu->guest_debug = 0;
2818 		kvm_s390_clear_bp_data(vcpu);
2819 		atomic_andnot(CPUSTAT_P, &vcpu->arch.sie_block->cpuflags);
2820 	}
2821 
2822 	return rc;
2823 }
2824 
2825 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
2826 				    struct kvm_mp_state *mp_state)
2827 {
2828 	/* CHECK_STOP and LOAD are not supported yet */
2829 	return is_vcpu_stopped(vcpu) ? KVM_MP_STATE_STOPPED :
2830 				       KVM_MP_STATE_OPERATING;
2831 }
2832 
2833 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
2834 				    struct kvm_mp_state *mp_state)
2835 {
2836 	int rc = 0;
2837 
2838 	/* user space knows about this interface - let it control the state */
2839 	vcpu->kvm->arch.user_cpu_state_ctrl = 1;
2840 
2841 	switch (mp_state->mp_state) {
2842 	case KVM_MP_STATE_STOPPED:
2843 		kvm_s390_vcpu_stop(vcpu);
2844 		break;
2845 	case KVM_MP_STATE_OPERATING:
2846 		kvm_s390_vcpu_start(vcpu);
2847 		break;
2848 	case KVM_MP_STATE_LOAD:
2849 	case KVM_MP_STATE_CHECK_STOP:
2850 		/* fall through - CHECK_STOP and LOAD are not supported yet */
2851 	default:
2852 		rc = -ENXIO;
2853 	}
2854 
2855 	return rc;
2856 }
2857 
2858 static bool ibs_enabled(struct kvm_vcpu *vcpu)
2859 {
2860 	return atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_IBS;
2861 }
2862 
2863 static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu)
2864 {
2865 retry:
2866 	kvm_s390_vcpu_request_handled(vcpu);
2867 	if (!kvm_request_pending(vcpu))
2868 		return 0;
2869 	/*
2870 	 * We use MMU_RELOAD just to re-arm the ipte notifier for the
2871 	 * guest prefix page. gmap_mprotect_notify will wait on the ptl lock.
2872 	 * This ensures that the ipte instruction for this request has
2873 	 * already finished. We might race against a second unmapper that
2874 	 * wants to set the blocking bit. Lets just retry the request loop.
2875 	 */
2876 	if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) {
2877 		int rc;
2878 		rc = gmap_mprotect_notify(vcpu->arch.gmap,
2879 					  kvm_s390_get_prefix(vcpu),
2880 					  PAGE_SIZE * 2, PROT_WRITE);
2881 		if (rc) {
2882 			kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
2883 			return rc;
2884 		}
2885 		goto retry;
2886 	}
2887 
2888 	if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
2889 		vcpu->arch.sie_block->ihcpu = 0xffff;
2890 		goto retry;
2891 	}
2892 
2893 	if (kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu)) {
2894 		if (!ibs_enabled(vcpu)) {
2895 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 1);
2896 			atomic_or(CPUSTAT_IBS,
2897 					&vcpu->arch.sie_block->cpuflags);
2898 		}
2899 		goto retry;
2900 	}
2901 
2902 	if (kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu)) {
2903 		if (ibs_enabled(vcpu)) {
2904 			trace_kvm_s390_enable_disable_ibs(vcpu->vcpu_id, 0);
2905 			atomic_andnot(CPUSTAT_IBS,
2906 					  &vcpu->arch.sie_block->cpuflags);
2907 		}
2908 		goto retry;
2909 	}
2910 
2911 	if (kvm_check_request(KVM_REQ_ICPT_OPEREXC, vcpu)) {
2912 		vcpu->arch.sie_block->ictl |= ICTL_OPEREXC;
2913 		goto retry;
2914 	}
2915 
2916 	if (kvm_check_request(KVM_REQ_START_MIGRATION, vcpu)) {
2917 		/*
2918 		 * Disable CMMA virtualization; we will emulate the ESSA
2919 		 * instruction manually, in order to provide additional
2920 		 * functionalities needed for live migration.
2921 		 */
2922 		vcpu->arch.sie_block->ecb2 &= ~ECB2_CMMA;
2923 		goto retry;
2924 	}
2925 
2926 	if (kvm_check_request(KVM_REQ_STOP_MIGRATION, vcpu)) {
2927 		/*
2928 		 * Re-enable CMMA virtualization if CMMA is available and
2929 		 * was used.
2930 		 */
2931 		if ((vcpu->kvm->arch.use_cmma) &&
2932 		    (vcpu->kvm->mm->context.use_cmma))
2933 			vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
2934 		goto retry;
2935 	}
2936 
2937 	/* nothing to do, just clear the request */
2938 	kvm_clear_request(KVM_REQ_UNHALT, vcpu);
2939 
2940 	return 0;
2941 }
2942 
2943 void kvm_s390_set_tod_clock_ext(struct kvm *kvm,
2944 				 const struct kvm_s390_vm_tod_clock *gtod)
2945 {
2946 	struct kvm_vcpu *vcpu;
2947 	struct kvm_s390_tod_clock_ext htod;
2948 	int i;
2949 
2950 	mutex_lock(&kvm->lock);
2951 	preempt_disable();
2952 
2953 	get_tod_clock_ext((char *)&htod);
2954 
2955 	kvm->arch.epoch = gtod->tod - htod.tod;
2956 	kvm->arch.epdx = gtod->epoch_idx - htod.epoch_idx;
2957 
2958 	if (kvm->arch.epoch > gtod->tod)
2959 		kvm->arch.epdx -= 1;
2960 
2961 	kvm_s390_vcpu_block_all(kvm);
2962 	kvm_for_each_vcpu(i, vcpu, kvm) {
2963 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2964 		vcpu->arch.sie_block->epdx  = kvm->arch.epdx;
2965 	}
2966 
2967 	kvm_s390_vcpu_unblock_all(kvm);
2968 	preempt_enable();
2969 	mutex_unlock(&kvm->lock);
2970 }
2971 
2972 void kvm_s390_set_tod_clock(struct kvm *kvm, u64 tod)
2973 {
2974 	struct kvm_vcpu *vcpu;
2975 	int i;
2976 
2977 	mutex_lock(&kvm->lock);
2978 	preempt_disable();
2979 	kvm->arch.epoch = tod - get_tod_clock();
2980 	kvm_s390_vcpu_block_all(kvm);
2981 	kvm_for_each_vcpu(i, vcpu, kvm)
2982 		vcpu->arch.sie_block->epoch = kvm->arch.epoch;
2983 	kvm_s390_vcpu_unblock_all(kvm);
2984 	preempt_enable();
2985 	mutex_unlock(&kvm->lock);
2986 }
2987 
2988 /**
2989  * kvm_arch_fault_in_page - fault-in guest page if necessary
2990  * @vcpu: The corresponding virtual cpu
2991  * @gpa: Guest physical address
2992  * @writable: Whether the page should be writable or not
2993  *
2994  * Make sure that a guest page has been faulted-in on the host.
2995  *
2996  * Return: Zero on success, negative error code otherwise.
2997  */
2998 long kvm_arch_fault_in_page(struct kvm_vcpu *vcpu, gpa_t gpa, int writable)
2999 {
3000 	return gmap_fault(vcpu->arch.gmap, gpa,
3001 			  writable ? FAULT_FLAG_WRITE : 0);
3002 }
3003 
3004 static void __kvm_inject_pfault_token(struct kvm_vcpu *vcpu, bool start_token,
3005 				      unsigned long token)
3006 {
3007 	struct kvm_s390_interrupt inti;
3008 	struct kvm_s390_irq irq;
3009 
3010 	if (start_token) {
3011 		irq.u.ext.ext_params2 = token;
3012 		irq.type = KVM_S390_INT_PFAULT_INIT;
3013 		WARN_ON_ONCE(kvm_s390_inject_vcpu(vcpu, &irq));
3014 	} else {
3015 		inti.type = KVM_S390_INT_PFAULT_DONE;
3016 		inti.parm64 = token;
3017 		WARN_ON_ONCE(kvm_s390_inject_vm(vcpu->kvm, &inti));
3018 	}
3019 }
3020 
3021 void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
3022 				     struct kvm_async_pf *work)
3023 {
3024 	trace_kvm_s390_pfault_init(vcpu, work->arch.pfault_token);
3025 	__kvm_inject_pfault_token(vcpu, true, work->arch.pfault_token);
3026 }
3027 
3028 void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
3029 				 struct kvm_async_pf *work)
3030 {
3031 	trace_kvm_s390_pfault_done(vcpu, work->arch.pfault_token);
3032 	__kvm_inject_pfault_token(vcpu, false, work->arch.pfault_token);
3033 }
3034 
3035 void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu,
3036 			       struct kvm_async_pf *work)
3037 {
3038 	/* s390 will always inject the page directly */
3039 }
3040 
3041 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)
3042 {
3043 	/*
3044 	 * s390 will always inject the page directly,
3045 	 * but we still want check_async_completion to cleanup
3046 	 */
3047 	return true;
3048 }
3049 
3050 static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu)
3051 {
3052 	hva_t hva;
3053 	struct kvm_arch_async_pf arch;
3054 	int rc;
3055 
3056 	if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3057 		return 0;
3058 	if ((vcpu->arch.sie_block->gpsw.mask & vcpu->arch.pfault_select) !=
3059 	    vcpu->arch.pfault_compare)
3060 		return 0;
3061 	if (psw_extint_disabled(vcpu))
3062 		return 0;
3063 	if (kvm_s390_vcpu_has_irq(vcpu, 0))
3064 		return 0;
3065 	if (!(vcpu->arch.sie_block->gcr[0] & 0x200ul))
3066 		return 0;
3067 	if (!vcpu->arch.gmap->pfault_enabled)
3068 		return 0;
3069 
3070 	hva = gfn_to_hva(vcpu->kvm, gpa_to_gfn(current->thread.gmap_addr));
3071 	hva += current->thread.gmap_addr & ~PAGE_MASK;
3072 	if (read_guest_real(vcpu, vcpu->arch.pfault_token, &arch.pfault_token, 8))
3073 		return 0;
3074 
3075 	rc = kvm_setup_async_pf(vcpu, current->thread.gmap_addr, hva, &arch);
3076 	return rc;
3077 }
3078 
3079 static int vcpu_pre_run(struct kvm_vcpu *vcpu)
3080 {
3081 	int rc, cpuflags;
3082 
3083 	/*
3084 	 * On s390 notifications for arriving pages will be delivered directly
3085 	 * to the guest but the house keeping for completed pfaults is
3086 	 * handled outside the worker.
3087 	 */
3088 	kvm_check_async_pf_completion(vcpu);
3089 
3090 	vcpu->arch.sie_block->gg14 = vcpu->run->s.regs.gprs[14];
3091 	vcpu->arch.sie_block->gg15 = vcpu->run->s.regs.gprs[15];
3092 
3093 	if (need_resched())
3094 		schedule();
3095 
3096 	if (test_cpu_flag(CIF_MCCK_PENDING))
3097 		s390_handle_mcck();
3098 
3099 	if (!kvm_is_ucontrol(vcpu->kvm)) {
3100 		rc = kvm_s390_deliver_pending_interrupts(vcpu);
3101 		if (rc)
3102 			return rc;
3103 	}
3104 
3105 	rc = kvm_s390_handle_requests(vcpu);
3106 	if (rc)
3107 		return rc;
3108 
3109 	if (guestdbg_enabled(vcpu)) {
3110 		kvm_s390_backup_guest_per_regs(vcpu);
3111 		kvm_s390_patch_guest_per_regs(vcpu);
3112 	}
3113 
3114 	vcpu->arch.sie_block->icptcode = 0;
3115 	cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
3116 	VCPU_EVENT(vcpu, 6, "entering sie flags %x", cpuflags);
3117 	trace_kvm_s390_sie_enter(vcpu, cpuflags);
3118 
3119 	return 0;
3120 }
3121 
3122 static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu)
3123 {
3124 	struct kvm_s390_pgm_info pgm_info = {
3125 		.code = PGM_ADDRESSING,
3126 	};
3127 	u8 opcode, ilen;
3128 	int rc;
3129 
3130 	VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
3131 	trace_kvm_s390_sie_fault(vcpu);
3132 
3133 	/*
3134 	 * We want to inject an addressing exception, which is defined as a
3135 	 * suppressing or terminating exception. However, since we came here
3136 	 * by a DAT access exception, the PSW still points to the faulting
3137 	 * instruction since DAT exceptions are nullifying. So we've got
3138 	 * to look up the current opcode to get the length of the instruction
3139 	 * to be able to forward the PSW.
3140 	 */
3141 	rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1);
3142 	ilen = insn_length(opcode);
3143 	if (rc < 0) {
3144 		return rc;
3145 	} else if (rc) {
3146 		/* Instruction-Fetching Exceptions - we can't detect the ilen.
3147 		 * Forward by arbitrary ilc, injection will take care of
3148 		 * nullification if necessary.
3149 		 */
3150 		pgm_info = vcpu->arch.pgm;
3151 		ilen = 4;
3152 	}
3153 	pgm_info.flags = ilen | KVM_S390_PGM_FLAGS_ILC_VALID;
3154 	kvm_s390_forward_psw(vcpu, ilen);
3155 	return kvm_s390_inject_prog_irq(vcpu, &pgm_info);
3156 }
3157 
3158 static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
3159 {
3160 	struct mcck_volatile_info *mcck_info;
3161 	struct sie_page *sie_page;
3162 
3163 	VCPU_EVENT(vcpu, 6, "exit sie icptcode %d",
3164 		   vcpu->arch.sie_block->icptcode);
3165 	trace_kvm_s390_sie_exit(vcpu, vcpu->arch.sie_block->icptcode);
3166 
3167 	if (guestdbg_enabled(vcpu))
3168 		kvm_s390_restore_guest_per_regs(vcpu);
3169 
3170 	vcpu->run->s.regs.gprs[14] = vcpu->arch.sie_block->gg14;
3171 	vcpu->run->s.regs.gprs[15] = vcpu->arch.sie_block->gg15;
3172 
3173 	if (exit_reason == -EINTR) {
3174 		VCPU_EVENT(vcpu, 3, "%s", "machine check");
3175 		sie_page = container_of(vcpu->arch.sie_block,
3176 					struct sie_page, sie_block);
3177 		mcck_info = &sie_page->mcck_info;
3178 		kvm_s390_reinject_machine_check(vcpu, mcck_info);
3179 		return 0;
3180 	}
3181 
3182 	if (vcpu->arch.sie_block->icptcode > 0) {
3183 		int rc = kvm_handle_sie_intercept(vcpu);
3184 
3185 		if (rc != -EOPNOTSUPP)
3186 			return rc;
3187 		vcpu->run->exit_reason = KVM_EXIT_S390_SIEIC;
3188 		vcpu->run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
3189 		vcpu->run->s390_sieic.ipa = vcpu->arch.sie_block->ipa;
3190 		vcpu->run->s390_sieic.ipb = vcpu->arch.sie_block->ipb;
3191 		return -EREMOTE;
3192 	} else if (exit_reason != -EFAULT) {
3193 		vcpu->stat.exit_null++;
3194 		return 0;
3195 	} else if (kvm_is_ucontrol(vcpu->kvm)) {
3196 		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
3197 		vcpu->run->s390_ucontrol.trans_exc_code =
3198 						current->thread.gmap_addr;
3199 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
3200 		return -EREMOTE;
3201 	} else if (current->thread.gmap_pfault) {
3202 		trace_kvm_s390_major_guest_pfault(vcpu);
3203 		current->thread.gmap_pfault = 0;
3204 		if (kvm_arch_setup_async_pf(vcpu))
3205 			return 0;
3206 		return kvm_arch_fault_in_page(vcpu, current->thread.gmap_addr, 1);
3207 	}
3208 	return vcpu_post_run_fault_in_sie(vcpu);
3209 }
3210 
3211 static int __vcpu_run(struct kvm_vcpu *vcpu)
3212 {
3213 	int rc, exit_reason;
3214 
3215 	/*
3216 	 * We try to hold kvm->srcu during most of vcpu_run (except when run-
3217 	 * ning the guest), so that memslots (and other stuff) are protected
3218 	 */
3219 	vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3220 
3221 	do {
3222 		rc = vcpu_pre_run(vcpu);
3223 		if (rc)
3224 			break;
3225 
3226 		srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3227 		/*
3228 		 * As PF_VCPU will be used in fault handler, between
3229 		 * guest_enter and guest_exit should be no uaccess.
3230 		 */
3231 		local_irq_disable();
3232 		guest_enter_irqoff();
3233 		__disable_cpu_timer_accounting(vcpu);
3234 		local_irq_enable();
3235 		exit_reason = sie64a(vcpu->arch.sie_block,
3236 				     vcpu->run->s.regs.gprs);
3237 		local_irq_disable();
3238 		__enable_cpu_timer_accounting(vcpu);
3239 		guest_exit_irqoff();
3240 		local_irq_enable();
3241 		vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3242 
3243 		rc = vcpu_post_run(vcpu, exit_reason);
3244 	} while (!signal_pending(current) && !guestdbg_exit_pending(vcpu) && !rc);
3245 
3246 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
3247 	return rc;
3248 }
3249 
3250 static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3251 {
3252 	struct runtime_instr_cb *riccb;
3253 	struct gs_cb *gscb;
3254 
3255 	riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
3256 	gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
3257 	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
3258 	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
3259 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
3260 		kvm_s390_set_prefix(vcpu, kvm_run->s.regs.prefix);
3261 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_CRS) {
3262 		memcpy(&vcpu->arch.sie_block->gcr, &kvm_run->s.regs.crs, 128);
3263 		/* some control register changes require a tlb flush */
3264 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3265 	}
3266 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_ARCH0) {
3267 		kvm_s390_set_cpu_timer(vcpu, kvm_run->s.regs.cputm);
3268 		vcpu->arch.sie_block->ckc = kvm_run->s.regs.ckc;
3269 		vcpu->arch.sie_block->todpr = kvm_run->s.regs.todpr;
3270 		vcpu->arch.sie_block->pp = kvm_run->s.regs.pp;
3271 		vcpu->arch.sie_block->gbea = kvm_run->s.regs.gbea;
3272 	}
3273 	if (kvm_run->kvm_dirty_regs & KVM_SYNC_PFAULT) {
3274 		vcpu->arch.pfault_token = kvm_run->s.regs.pft;
3275 		vcpu->arch.pfault_select = kvm_run->s.regs.pfs;
3276 		vcpu->arch.pfault_compare = kvm_run->s.regs.pfc;
3277 		if (vcpu->arch.pfault_token == KVM_S390_PFAULT_TOKEN_INVALID)
3278 			kvm_clear_async_pf_completion_queue(vcpu);
3279 	}
3280 	/*
3281 	 * If userspace sets the riccb (e.g. after migration) to a valid state,
3282 	 * we should enable RI here instead of doing the lazy enablement.
3283 	 */
3284 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
3285 	    test_kvm_facility(vcpu->kvm, 64) &&
3286 	    riccb->valid &&
3287 	    !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
3288 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
3289 		vcpu->arch.sie_block->ecb3 |= ECB3_RI;
3290 	}
3291 	/*
3292 	 * If userspace sets the gscb (e.g. after migration) to non-zero,
3293 	 * we should enable GS here instead of doing the lazy enablement.
3294 	 */
3295 	if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
3296 	    test_kvm_facility(vcpu->kvm, 133) &&
3297 	    gscb->gssm &&
3298 	    !vcpu->arch.gs_enabled) {
3299 		VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
3300 		vcpu->arch.sie_block->ecb |= ECB_GS;
3301 		vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
3302 		vcpu->arch.gs_enabled = 1;
3303 	}
3304 	save_access_regs(vcpu->arch.host_acrs);
3305 	restore_access_regs(vcpu->run->s.regs.acrs);
3306 	/* save host (userspace) fprs/vrs */
3307 	save_fpu_regs();
3308 	vcpu->arch.host_fpregs.fpc = current->thread.fpu.fpc;
3309 	vcpu->arch.host_fpregs.regs = current->thread.fpu.regs;
3310 	if (MACHINE_HAS_VX)
3311 		current->thread.fpu.regs = vcpu->run->s.regs.vrs;
3312 	else
3313 		current->thread.fpu.regs = vcpu->run->s.regs.fprs;
3314 	current->thread.fpu.fpc = vcpu->run->s.regs.fpc;
3315 	if (test_fp_ctl(current->thread.fpu.fpc))
3316 		/* User space provided an invalid FPC, let's clear it */
3317 		current->thread.fpu.fpc = 0;
3318 	if (MACHINE_HAS_GS) {
3319 		preempt_disable();
3320 		__ctl_set_bit(2, 4);
3321 		if (current->thread.gs_cb) {
3322 			vcpu->arch.host_gscb = current->thread.gs_cb;
3323 			save_gs_cb(vcpu->arch.host_gscb);
3324 		}
3325 		if (vcpu->arch.gs_enabled) {
3326 			current->thread.gs_cb = (struct gs_cb *)
3327 						&vcpu->run->s.regs.gscb;
3328 			restore_gs_cb(current->thread.gs_cb);
3329 		}
3330 		preempt_enable();
3331 	}
3332 
3333 	kvm_run->kvm_dirty_regs = 0;
3334 }
3335 
3336 static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3337 {
3338 	kvm_run->psw_mask = vcpu->arch.sie_block->gpsw.mask;
3339 	kvm_run->psw_addr = vcpu->arch.sie_block->gpsw.addr;
3340 	kvm_run->s.regs.prefix = kvm_s390_get_prefix(vcpu);
3341 	memcpy(&kvm_run->s.regs.crs, &vcpu->arch.sie_block->gcr, 128);
3342 	kvm_run->s.regs.cputm = kvm_s390_get_cpu_timer(vcpu);
3343 	kvm_run->s.regs.ckc = vcpu->arch.sie_block->ckc;
3344 	kvm_run->s.regs.todpr = vcpu->arch.sie_block->todpr;
3345 	kvm_run->s.regs.pp = vcpu->arch.sie_block->pp;
3346 	kvm_run->s.regs.gbea = vcpu->arch.sie_block->gbea;
3347 	kvm_run->s.regs.pft = vcpu->arch.pfault_token;
3348 	kvm_run->s.regs.pfs = vcpu->arch.pfault_select;
3349 	kvm_run->s.regs.pfc = vcpu->arch.pfault_compare;
3350 	save_access_regs(vcpu->run->s.regs.acrs);
3351 	restore_access_regs(vcpu->arch.host_acrs);
3352 	/* Save guest register state */
3353 	save_fpu_regs();
3354 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3355 	/* Restore will be done lazily at return */
3356 	current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
3357 	current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
3358 	if (MACHINE_HAS_GS) {
3359 		__ctl_set_bit(2, 4);
3360 		if (vcpu->arch.gs_enabled)
3361 			save_gs_cb(current->thread.gs_cb);
3362 		preempt_disable();
3363 		current->thread.gs_cb = vcpu->arch.host_gscb;
3364 		restore_gs_cb(vcpu->arch.host_gscb);
3365 		preempt_enable();
3366 		if (!vcpu->arch.host_gscb)
3367 			__ctl_clear_bit(2, 4);
3368 		vcpu->arch.host_gscb = NULL;
3369 	}
3370 
3371 }
3372 
3373 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
3374 {
3375 	int rc;
3376 	sigset_t sigsaved;
3377 
3378 	if (kvm_run->immediate_exit)
3379 		return -EINTR;
3380 
3381 	if (guestdbg_exit_pending(vcpu)) {
3382 		kvm_s390_prepare_debug_exit(vcpu);
3383 		return 0;
3384 	}
3385 
3386 	if (vcpu->sigset_active)
3387 		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
3388 
3389 	if (!kvm_s390_user_cpu_state_ctrl(vcpu->kvm)) {
3390 		kvm_s390_vcpu_start(vcpu);
3391 	} else if (is_vcpu_stopped(vcpu)) {
3392 		pr_err_ratelimited("can't run stopped vcpu %d\n",
3393 				   vcpu->vcpu_id);
3394 		return -EINVAL;
3395 	}
3396 
3397 	sync_regs(vcpu, kvm_run);
3398 	enable_cpu_timer_accounting(vcpu);
3399 
3400 	might_fault();
3401 	rc = __vcpu_run(vcpu);
3402 
3403 	if (signal_pending(current) && !rc) {
3404 		kvm_run->exit_reason = KVM_EXIT_INTR;
3405 		rc = -EINTR;
3406 	}
3407 
3408 	if (guestdbg_exit_pending(vcpu) && !rc)  {
3409 		kvm_s390_prepare_debug_exit(vcpu);
3410 		rc = 0;
3411 	}
3412 
3413 	if (rc == -EREMOTE) {
3414 		/* userspace support is needed, kvm_run has been prepared */
3415 		rc = 0;
3416 	}
3417 
3418 	disable_cpu_timer_accounting(vcpu);
3419 	store_regs(vcpu, kvm_run);
3420 
3421 	if (vcpu->sigset_active)
3422 		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
3423 
3424 	vcpu->stat.exit_userspace++;
3425 	return rc;
3426 }
3427 
3428 /*
3429  * store status at address
3430  * we use have two special cases:
3431  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
3432  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
3433  */
3434 int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long gpa)
3435 {
3436 	unsigned char archmode = 1;
3437 	freg_t fprs[NUM_FPRS];
3438 	unsigned int px;
3439 	u64 clkcomp, cputm;
3440 	int rc;
3441 
3442 	px = kvm_s390_get_prefix(vcpu);
3443 	if (gpa == KVM_S390_STORE_STATUS_NOADDR) {
3444 		if (write_guest_abs(vcpu, 163, &archmode, 1))
3445 			return -EFAULT;
3446 		gpa = 0;
3447 	} else if (gpa == KVM_S390_STORE_STATUS_PREFIXED) {
3448 		if (write_guest_real(vcpu, 163, &archmode, 1))
3449 			return -EFAULT;
3450 		gpa = px;
3451 	} else
3452 		gpa -= __LC_FPREGS_SAVE_AREA;
3453 
3454 	/* manually convert vector registers if necessary */
3455 	if (MACHINE_HAS_VX) {
3456 		convert_vx_to_fp(fprs, (__vector128 *) vcpu->run->s.regs.vrs);
3457 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3458 				     fprs, 128);
3459 	} else {
3460 		rc = write_guest_abs(vcpu, gpa + __LC_FPREGS_SAVE_AREA,
3461 				     vcpu->run->s.regs.fprs, 128);
3462 	}
3463 	rc |= write_guest_abs(vcpu, gpa + __LC_GPREGS_SAVE_AREA,
3464 			      vcpu->run->s.regs.gprs, 128);
3465 	rc |= write_guest_abs(vcpu, gpa + __LC_PSW_SAVE_AREA,
3466 			      &vcpu->arch.sie_block->gpsw, 16);
3467 	rc |= write_guest_abs(vcpu, gpa + __LC_PREFIX_SAVE_AREA,
3468 			      &px, 4);
3469 	rc |= write_guest_abs(vcpu, gpa + __LC_FP_CREG_SAVE_AREA,
3470 			      &vcpu->run->s.regs.fpc, 4);
3471 	rc |= write_guest_abs(vcpu, gpa + __LC_TOD_PROGREG_SAVE_AREA,
3472 			      &vcpu->arch.sie_block->todpr, 4);
3473 	cputm = kvm_s390_get_cpu_timer(vcpu);
3474 	rc |= write_guest_abs(vcpu, gpa + __LC_CPU_TIMER_SAVE_AREA,
3475 			      &cputm, 8);
3476 	clkcomp = vcpu->arch.sie_block->ckc >> 8;
3477 	rc |= write_guest_abs(vcpu, gpa + __LC_CLOCK_COMP_SAVE_AREA,
3478 			      &clkcomp, 8);
3479 	rc |= write_guest_abs(vcpu, gpa + __LC_AREGS_SAVE_AREA,
3480 			      &vcpu->run->s.regs.acrs, 64);
3481 	rc |= write_guest_abs(vcpu, gpa + __LC_CREGS_SAVE_AREA,
3482 			      &vcpu->arch.sie_block->gcr, 128);
3483 	return rc ? -EFAULT : 0;
3484 }
3485 
3486 int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
3487 {
3488 	/*
3489 	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
3490 	 * switch in the run ioctl. Let's update our copies before we save
3491 	 * it into the save area
3492 	 */
3493 	save_fpu_regs();
3494 	vcpu->run->s.regs.fpc = current->thread.fpu.fpc;
3495 	save_access_regs(vcpu->run->s.regs.acrs);
3496 
3497 	return kvm_s390_store_status_unloaded(vcpu, addr);
3498 }
3499 
3500 static void __disable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3501 {
3502 	kvm_check_request(KVM_REQ_ENABLE_IBS, vcpu);
3503 	kvm_s390_sync_request(KVM_REQ_DISABLE_IBS, vcpu);
3504 }
3505 
3506 static void __disable_ibs_on_all_vcpus(struct kvm *kvm)
3507 {
3508 	unsigned int i;
3509 	struct kvm_vcpu *vcpu;
3510 
3511 	kvm_for_each_vcpu(i, vcpu, kvm) {
3512 		__disable_ibs_on_vcpu(vcpu);
3513 	}
3514 }
3515 
3516 static void __enable_ibs_on_vcpu(struct kvm_vcpu *vcpu)
3517 {
3518 	if (!sclp.has_ibs)
3519 		return;
3520 	kvm_check_request(KVM_REQ_DISABLE_IBS, vcpu);
3521 	kvm_s390_sync_request(KVM_REQ_ENABLE_IBS, vcpu);
3522 }
3523 
3524 void kvm_s390_vcpu_start(struct kvm_vcpu *vcpu)
3525 {
3526 	int i, online_vcpus, started_vcpus = 0;
3527 
3528 	if (!is_vcpu_stopped(vcpu))
3529 		return;
3530 
3531 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 1);
3532 	/* Only one cpu at a time may enter/leave the STOPPED state. */
3533 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
3534 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3535 
3536 	for (i = 0; i < online_vcpus; i++) {
3537 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i]))
3538 			started_vcpus++;
3539 	}
3540 
3541 	if (started_vcpus == 0) {
3542 		/* we're the only active VCPU -> speed it up */
3543 		__enable_ibs_on_vcpu(vcpu);
3544 	} else if (started_vcpus == 1) {
3545 		/*
3546 		 * As we are starting a second VCPU, we have to disable
3547 		 * the IBS facility on all VCPUs to remove potentially
3548 		 * oustanding ENABLE requests.
3549 		 */
3550 		__disable_ibs_on_all_vcpus(vcpu->kvm);
3551 	}
3552 
3553 	atomic_andnot(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3554 	/*
3555 	 * Another VCPU might have used IBS while we were offline.
3556 	 * Let's play safe and flush the VCPU at startup.
3557 	 */
3558 	kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
3559 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3560 	return;
3561 }
3562 
3563 void kvm_s390_vcpu_stop(struct kvm_vcpu *vcpu)
3564 {
3565 	int i, online_vcpus, started_vcpus = 0;
3566 	struct kvm_vcpu *started_vcpu = NULL;
3567 
3568 	if (is_vcpu_stopped(vcpu))
3569 		return;
3570 
3571 	trace_kvm_s390_vcpu_start_stop(vcpu->vcpu_id, 0);
3572 	/* Only one cpu at a time may enter/leave the STOPPED state. */
3573 	spin_lock(&vcpu->kvm->arch.start_stop_lock);
3574 	online_vcpus = atomic_read(&vcpu->kvm->online_vcpus);
3575 
3576 	/* SIGP STOP and SIGP STOP AND STORE STATUS has been fully processed */
3577 	kvm_s390_clear_stop_irq(vcpu);
3578 
3579 	atomic_or(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
3580 	__disable_ibs_on_vcpu(vcpu);
3581 
3582 	for (i = 0; i < online_vcpus; i++) {
3583 		if (!is_vcpu_stopped(vcpu->kvm->vcpus[i])) {
3584 			started_vcpus++;
3585 			started_vcpu = vcpu->kvm->vcpus[i];
3586 		}
3587 	}
3588 
3589 	if (started_vcpus == 1) {
3590 		/*
3591 		 * As we only have one VCPU left, we want to enable the
3592 		 * IBS facility for that VCPU to speed it up.
3593 		 */
3594 		__enable_ibs_on_vcpu(started_vcpu);
3595 	}
3596 
3597 	spin_unlock(&vcpu->kvm->arch.start_stop_lock);
3598 	return;
3599 }
3600 
3601 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
3602 				     struct kvm_enable_cap *cap)
3603 {
3604 	int r;
3605 
3606 	if (cap->flags)
3607 		return -EINVAL;
3608 
3609 	switch (cap->cap) {
3610 	case KVM_CAP_S390_CSS_SUPPORT:
3611 		if (!vcpu->kvm->arch.css_support) {
3612 			vcpu->kvm->arch.css_support = 1;
3613 			VM_EVENT(vcpu->kvm, 3, "%s", "ENABLE: CSS support");
3614 			trace_kvm_s390_enable_css(vcpu->kvm);
3615 		}
3616 		r = 0;
3617 		break;
3618 	default:
3619 		r = -EINVAL;
3620 		break;
3621 	}
3622 	return r;
3623 }
3624 
3625 static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
3626 				  struct kvm_s390_mem_op *mop)
3627 {
3628 	void __user *uaddr = (void __user *)mop->buf;
3629 	void *tmpbuf = NULL;
3630 	int r, srcu_idx;
3631 	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
3632 				    | KVM_S390_MEMOP_F_CHECK_ONLY;
3633 
3634 	if (mop->flags & ~supported_flags)
3635 		return -EINVAL;
3636 
3637 	if (mop->size > MEM_OP_MAX_SIZE)
3638 		return -E2BIG;
3639 
3640 	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
3641 		tmpbuf = vmalloc(mop->size);
3642 		if (!tmpbuf)
3643 			return -ENOMEM;
3644 	}
3645 
3646 	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
3647 
3648 	switch (mop->op) {
3649 	case KVM_S390_MEMOP_LOGICAL_READ:
3650 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3651 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3652 					    mop->size, GACC_FETCH);
3653 			break;
3654 		}
3655 		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3656 		if (r == 0) {
3657 			if (copy_to_user(uaddr, tmpbuf, mop->size))
3658 				r = -EFAULT;
3659 		}
3660 		break;
3661 	case KVM_S390_MEMOP_LOGICAL_WRITE:
3662 		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
3663 			r = check_gva_range(vcpu, mop->gaddr, mop->ar,
3664 					    mop->size, GACC_STORE);
3665 			break;
3666 		}
3667 		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
3668 			r = -EFAULT;
3669 			break;
3670 		}
3671 		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
3672 		break;
3673 	default:
3674 		r = -EINVAL;
3675 	}
3676 
3677 	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
3678 
3679 	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
3680 		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
3681 
3682 	vfree(tmpbuf);
3683 	return r;
3684 }
3685 
3686 long kvm_arch_vcpu_ioctl(struct file *filp,
3687 			 unsigned int ioctl, unsigned long arg)
3688 {
3689 	struct kvm_vcpu *vcpu = filp->private_data;
3690 	void __user *argp = (void __user *)arg;
3691 	int idx;
3692 	long r;
3693 
3694 	switch (ioctl) {
3695 	case KVM_S390_IRQ: {
3696 		struct kvm_s390_irq s390irq;
3697 
3698 		r = -EFAULT;
3699 		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
3700 			break;
3701 		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3702 		break;
3703 	}
3704 	case KVM_S390_INTERRUPT: {
3705 		struct kvm_s390_interrupt s390int;
3706 		struct kvm_s390_irq s390irq;
3707 
3708 		r = -EFAULT;
3709 		if (copy_from_user(&s390int, argp, sizeof(s390int)))
3710 			break;
3711 		if (s390int_to_s390irq(&s390int, &s390irq))
3712 			return -EINVAL;
3713 		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
3714 		break;
3715 	}
3716 	case KVM_S390_STORE_STATUS:
3717 		idx = srcu_read_lock(&vcpu->kvm->srcu);
3718 		r = kvm_s390_vcpu_store_status(vcpu, arg);
3719 		srcu_read_unlock(&vcpu->kvm->srcu, idx);
3720 		break;
3721 	case KVM_S390_SET_INITIAL_PSW: {
3722 		psw_t psw;
3723 
3724 		r = -EFAULT;
3725 		if (copy_from_user(&psw, argp, sizeof(psw)))
3726 			break;
3727 		r = kvm_arch_vcpu_ioctl_set_initial_psw(vcpu, psw);
3728 		break;
3729 	}
3730 	case KVM_S390_INITIAL_RESET:
3731 		r = kvm_arch_vcpu_ioctl_initial_reset(vcpu);
3732 		break;
3733 	case KVM_SET_ONE_REG:
3734 	case KVM_GET_ONE_REG: {
3735 		struct kvm_one_reg reg;
3736 		r = -EFAULT;
3737 		if (copy_from_user(&reg, argp, sizeof(reg)))
3738 			break;
3739 		if (ioctl == KVM_SET_ONE_REG)
3740 			r = kvm_arch_vcpu_ioctl_set_one_reg(vcpu, &reg);
3741 		else
3742 			r = kvm_arch_vcpu_ioctl_get_one_reg(vcpu, &reg);
3743 		break;
3744 	}
3745 #ifdef CONFIG_KVM_S390_UCONTROL
3746 	case KVM_S390_UCAS_MAP: {
3747 		struct kvm_s390_ucas_mapping ucasmap;
3748 
3749 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3750 			r = -EFAULT;
3751 			break;
3752 		}
3753 
3754 		if (!kvm_is_ucontrol(vcpu->kvm)) {
3755 			r = -EINVAL;
3756 			break;
3757 		}
3758 
3759 		r = gmap_map_segment(vcpu->arch.gmap, ucasmap.user_addr,
3760 				     ucasmap.vcpu_addr, ucasmap.length);
3761 		break;
3762 	}
3763 	case KVM_S390_UCAS_UNMAP: {
3764 		struct kvm_s390_ucas_mapping ucasmap;
3765 
3766 		if (copy_from_user(&ucasmap, argp, sizeof(ucasmap))) {
3767 			r = -EFAULT;
3768 			break;
3769 		}
3770 
3771 		if (!kvm_is_ucontrol(vcpu->kvm)) {
3772 			r = -EINVAL;
3773 			break;
3774 		}
3775 
3776 		r = gmap_unmap_segment(vcpu->arch.gmap, ucasmap.vcpu_addr,
3777 			ucasmap.length);
3778 		break;
3779 	}
3780 #endif
3781 	case KVM_S390_VCPU_FAULT: {
3782 		r = gmap_fault(vcpu->arch.gmap, arg, 0);
3783 		break;
3784 	}
3785 	case KVM_ENABLE_CAP:
3786 	{
3787 		struct kvm_enable_cap cap;
3788 		r = -EFAULT;
3789 		if (copy_from_user(&cap, argp, sizeof(cap)))
3790 			break;
3791 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
3792 		break;
3793 	}
3794 	case KVM_S390_MEM_OP: {
3795 		struct kvm_s390_mem_op mem_op;
3796 
3797 		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
3798 			r = kvm_s390_guest_mem_op(vcpu, &mem_op);
3799 		else
3800 			r = -EFAULT;
3801 		break;
3802 	}
3803 	case KVM_S390_SET_IRQ_STATE: {
3804 		struct kvm_s390_irq_state irq_state;
3805 
3806 		r = -EFAULT;
3807 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3808 			break;
3809 		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
3810 		    irq_state.len == 0 ||
3811 		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
3812 			r = -EINVAL;
3813 			break;
3814 		}
3815 		r = kvm_s390_set_irq_state(vcpu,
3816 					   (void __user *) irq_state.buf,
3817 					   irq_state.len);
3818 		break;
3819 	}
3820 	case KVM_S390_GET_IRQ_STATE: {
3821 		struct kvm_s390_irq_state irq_state;
3822 
3823 		r = -EFAULT;
3824 		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
3825 			break;
3826 		if (irq_state.len == 0) {
3827 			r = -EINVAL;
3828 			break;
3829 		}
3830 		r = kvm_s390_get_irq_state(vcpu,
3831 					   (__u8 __user *)  irq_state.buf,
3832 					   irq_state.len);
3833 		break;
3834 	}
3835 	default:
3836 		r = -ENOTTY;
3837 	}
3838 	return r;
3839 }
3840 
3841 int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf)
3842 {
3843 #ifdef CONFIG_KVM_S390_UCONTROL
3844 	if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET)
3845 		 && (kvm_is_ucontrol(vcpu->kvm))) {
3846 		vmf->page = virt_to_page(vcpu->arch.sie_block);
3847 		get_page(vmf->page);
3848 		return 0;
3849 	}
3850 #endif
3851 	return VM_FAULT_SIGBUS;
3852 }
3853 
3854 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
3855 			    unsigned long npages)
3856 {
3857 	return 0;
3858 }
3859 
3860 /* Section: memory related */
3861 int kvm_arch_prepare_memory_region(struct kvm *kvm,
3862 				   struct kvm_memory_slot *memslot,
3863 				   const struct kvm_userspace_memory_region *mem,
3864 				   enum kvm_mr_change change)
3865 {
3866 	/* A few sanity checks. We can have memory slots which have to be
3867 	   located/ended at a segment boundary (1MB). The memory in userland is
3868 	   ok to be fragmented into various different vmas. It is okay to mmap()
3869 	   and munmap() stuff in this slot after doing this call at any time */
3870 
3871 	if (mem->userspace_addr & 0xffffful)
3872 		return -EINVAL;
3873 
3874 	if (mem->memory_size & 0xffffful)
3875 		return -EINVAL;
3876 
3877 	if (mem->guest_phys_addr + mem->memory_size > kvm->arch.mem_limit)
3878 		return -EINVAL;
3879 
3880 	return 0;
3881 }
3882 
3883 void kvm_arch_commit_memory_region(struct kvm *kvm,
3884 				const struct kvm_userspace_memory_region *mem,
3885 				const struct kvm_memory_slot *old,
3886 				const struct kvm_memory_slot *new,
3887 				enum kvm_mr_change change)
3888 {
3889 	int rc;
3890 
3891 	/* If the basics of the memslot do not change, we do not want
3892 	 * to update the gmap. Every update causes several unnecessary
3893 	 * segment translation exceptions. This is usually handled just
3894 	 * fine by the normal fault handler + gmap, but it will also
3895 	 * cause faults on the prefix page of running guest CPUs.
3896 	 */
3897 	if (old->userspace_addr == mem->userspace_addr &&
3898 	    old->base_gfn * PAGE_SIZE == mem->guest_phys_addr &&
3899 	    old->npages * PAGE_SIZE == mem->memory_size)
3900 		return;
3901 
3902 	rc = gmap_map_segment(kvm->arch.gmap, mem->userspace_addr,
3903 		mem->guest_phys_addr, mem->memory_size);
3904 	if (rc)
3905 		pr_warn("failed to commit memory region\n");
3906 	return;
3907 }
3908 
3909 static inline unsigned long nonhyp_mask(int i)
3910 {
3911 	unsigned int nonhyp_fai = (sclp.hmfai << i * 2) >> 30;
3912 
3913 	return 0x0000ffffffffffffUL >> (nonhyp_fai << 4);
3914 }
3915 
3916 void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu)
3917 {
3918 	vcpu->valid_wakeup = false;
3919 }
3920 
3921 static int __init kvm_s390_init(void)
3922 {
3923 	int i;
3924 
3925 	if (!sclp.has_sief2) {
3926 		pr_info("SIE not available\n");
3927 		return -ENODEV;
3928 	}
3929 
3930 	for (i = 0; i < 16; i++)
3931 		kvm_s390_fac_list_mask[i] |=
3932 			S390_lowcore.stfle_fac_list[i] & nonhyp_mask(i);
3933 
3934 	return kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE);
3935 }
3936 
3937 static void __exit kvm_s390_exit(void)
3938 {
3939 	kvm_exit();
3940 }
3941 
3942 module_init(kvm_s390_init);
3943 module_exit(kvm_s390_exit);
3944 
3945 /*
3946  * Enable autoloading of the kvm module.
3947  * Note that we add the module alias here instead of virt/kvm/kvm_main.c
3948  * since x86 takes a different approach.
3949  */
3950 #include <linux/miscdevice.h>
3951 MODULE_ALIAS_MISCDEV(KVM_MINOR);
3952 MODULE_ALIAS("devname:kvm");
3953