xref: /illumos-gate/usr/src/test/bhyve-tests/tests/common/in_guest.c (revision 5d9d9091f564c198a760790b0bfa72c44e17912b)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2022 Oxide Computer Company
14  */
15 
16 #include <stdio.h>
17 #include <unistd.h>
18 #include <stdlib.h>
19 #include <strings.h>
20 #include <assert.h>
21 #include <errno.h>
22 
23 #include <sys/types.h>
24 #include <sys/segments.h>
25 #include <sys/psw.h>
26 #include <sys/controlregs.h>
27 #include <sys/sysmacros.h>
28 #include <sys/varargs.h>
29 #include <sys/debug.h>
30 #include <sys/mman.h>
31 
32 #include <sys/vmm.h>
33 #include <sys/vmm_dev.h>
34 #include <vmmapi.h>
35 
36 #include "in_guest.h"
37 
38 
39 #define	PT_VALID	0x01
40 #define	PT_WRITABLE	0x02
41 #define	PT_WRITETHRU	0x08
42 #define	PT_NOCACHE	0x10
43 #define	PT_PAGESIZE	0x80
44 
45 #define	SEG_ACCESS_TYPE_MASK	0x1f
46 #define	SEG_ACCESS_DPL_MASK	0x60
47 #define	SEG_ACCESS_P		(1 << 7)
48 #define	SEG_ACCESS_AVL		(1 << 12)
49 #define	SEG_ACCESS_L		(1 << 13)
50 #define	SEG_ACCESS_D		(1 << 14)
51 #define	SEG_ACCESS_G		(1 << 15)
52 #define	SEG_ACCESS_UNUSABLE	(1 << 16)
53 
54 
55 /*
56  * Keep the test name and VM context around so the consumer is not required to
57  * pass either of them to us for subsequent test-related operations after the
58  * initialization has been performed.
59  *
60  * The test code is not designed to be reentrant at this point.
61  */
62 static struct vmctx *test_vmctx = NULL;
63 static const char *test_name = NULL;
64 
65 static int
66 setup_rom(struct vmctx *ctx)
67 {
68 	const size_t seg_sz = 0x1000;
69 	const uintptr_t seg_addr = MEM_LOC_ROM;
70 	const int fd = vm_get_device_fd(ctx);
71 	int err;
72 
73 	struct vm_memseg memseg = {
74 		.segid = VM_BOOTROM,
75 		.len = 0x1000,
76 	};
77 	(void) strlcpy(memseg.name, "testrom", sizeof (memseg.name));
78 	err = ioctl(fd, VM_ALLOC_MEMSEG, &memseg);
79 	if (err != 0) {
80 		return (err);
81 	}
82 	err = vm_mmap_memseg(ctx, seg_addr, VM_BOOTROM, 0, seg_sz,
83 	    PROT_READ | PROT_EXEC);
84 	return (err);
85 }
86 
87 static void
88 populate_identity_table(struct vmctx *ctx)
89 {
90 	uint64_t gpa, pte_loc;
91 
92 	/* Set up 2MiB PTEs for everything up through 0xffffffff */
93 	for (gpa = 0, pte_loc = MEM_LOC_PAGE_TABLE_2M;
94 	    gpa < 0x100000000;
95 	    pte_loc += PAGE_SIZE) {
96 		uint64_t *ptep = vm_map_gpa(ctx, pte_loc, PAGE_SIZE);
97 
98 		for (uint_t i = 0; i < 512; i++, ptep++, gpa += 0x200000) {
99 			*ptep =  gpa | PT_VALID | PT_WRITABLE | PT_PAGESIZE;
100 			/* Make traditional MMIO space uncachable */
101 			if (gpa >= 0xc0000000) {
102 				*ptep |= PT_WRITETHRU | PT_NOCACHE;
103 			}
104 		}
105 	}
106 	assert(gpa == 0x100000000 && pte_loc == MEM_LOC_PAGE_TABLE_1G);
107 
108 	uint64_t *pdep = vm_map_gpa(ctx, MEM_LOC_PAGE_TABLE_1G, PAGE_SIZE);
109 	pdep[0] = MEM_LOC_PAGE_TABLE_2M | PT_VALID | PT_WRITABLE;
110 	pdep[1] = (MEM_LOC_PAGE_TABLE_2M + PAGE_SIZE) | PT_VALID | PT_WRITABLE;
111 	pdep[2] =
112 	    (MEM_LOC_PAGE_TABLE_2M + 2 * PAGE_SIZE) | PT_VALID | PT_WRITABLE;
113 	pdep[3] =
114 	    (MEM_LOC_PAGE_TABLE_2M + 3 * PAGE_SIZE) | PT_VALID | PT_WRITABLE;
115 
116 	pdep = vm_map_gpa(ctx, MEM_LOC_PAGE_TABLE_512G, PAGE_SIZE);
117 	pdep[0] = MEM_LOC_PAGE_TABLE_1G | PT_VALID | PT_WRITABLE;
118 }
119 
120 static void
121 populate_desc_tables(struct vmctx *ctx)
122 {
123 
124 }
125 
126 void
127 test_cleanup(bool is_failure)
128 {
129 	if (test_vmctx != NULL) {
130 		bool keep_on_fail = false;
131 
132 		const char *keep_var;
133 		if ((keep_var = getenv("KEEP_ON_FAIL")) != NULL) {
134 			if (strlen(keep_var) != 0 &&
135 			    strcmp(keep_var, "0") != 0) {
136 				keep_on_fail = true;
137 			}
138 		}
139 
140 		/*
141 		 * Destroy the instance unless the test failed and it was
142 		 * requested that we keep it around.
143 		 */
144 		if (!is_failure || !keep_on_fail) {
145 			vm_destroy(test_vmctx);
146 		}
147 		test_name = NULL;
148 		test_vmctx = NULL;
149 	}
150 }
151 
152 static void fail_finish(void)
153 {
154 	assert(test_name != NULL);
155 	(void) printf("FAIL %s\n", test_name);
156 
157 	test_cleanup(true);
158 	exit(EXIT_FAILURE);
159 }
160 
161 void
162 test_fail_errno(int err, const char *msg)
163 {
164 	const char *err_str = strerror(err);
165 
166 	(void) fprintf(stderr, "%s: %s\n", msg, err_str);
167 	fail_finish();
168 }
169 
170 void
171 test_fail_msg(const char *fmt, ...)
172 {
173 	va_list ap;
174 
175 	va_start(ap, fmt);
176 	(void) vfprintf(stderr, fmt, ap);
177 
178 	fail_finish();
179 }
180 
181 void
182 test_fail_vmexit(const struct vm_exit *vexit)
183 {
184 	const char *hdr_fmt = "Unexpected %s exit:\n\t%%rip: %lx\n";
185 
186 	switch (vexit->exitcode) {
187 	case VM_EXITCODE_INOUT:
188 		(void) fprintf(stderr, hdr_fmt, "IN/OUT", vexit->rip);
189 		(void) fprintf(stderr,
190 		    "\teax: %08x\n"
191 		    "\tport: %04x\n"
192 		    "\tbytes: %u\n"
193 		    "\tflags: %x\n",
194 		    vexit->u.inout.eax,
195 		    vexit->u.inout.port,
196 		    vexit->u.inout.bytes,
197 		    vexit->u.inout.flags);
198 		break;
199 	case VM_EXITCODE_RDMSR:
200 		(void) fprintf(stderr, hdr_fmt, "RDMSR", vexit->rip);
201 		(void) fprintf(stderr, "\tcode: %08x\n", vexit->u.msr.code);
202 		break;
203 	case VM_EXITCODE_WRMSR:
204 		(void) fprintf(stderr, hdr_fmt, "WRMSR", vexit->rip);
205 		(void) fprintf(stderr,
206 		    "\tcode: %08x\n"
207 		    "\twval: %016lx\n",
208 		    vexit->u.msr.code, vexit->u.msr.wval);
209 		break;
210 	case VM_EXITCODE_MMIO:
211 		(void) fprintf(stderr, hdr_fmt, "MMIO", vexit->rip);
212 		(void) fprintf(stderr,
213 		    "\tbytes: %u\n"
214 		    "\ttype: %s\n"
215 		    "\tgpa: %x\n"
216 		    "\tdata: %016x\n",
217 		    vexit->u.mmio.bytes,
218 		    vexit->u.mmio.read == 0 ? "write" : "read",
219 		    vexit->u.mmio.gpa,
220 		    vexit->u.mmio.data);
221 		break;
222 	case VM_EXITCODE_VMX:
223 		(void) fprintf(stderr, hdr_fmt, "VMX", vexit->rip);
224 		(void) fprintf(stderr,
225 		    "\tstatus: %x\n"
226 		    "\treason: %x\n"
227 		    "\tqualification: %lx\n"
228 		    "\tinst_type: %x\n"
229 		    "\tinst_error: %x\n",
230 		    vexit->u.vmx.status,
231 		    vexit->u.vmx.exit_reason,
232 		    vexit->u.vmx.exit_qualification,
233 		    vexit->u.vmx.inst_type,
234 		    vexit->u.vmx.inst_error);
235 		break;
236 	case VM_EXITCODE_SVM:
237 		(void) fprintf(stderr, hdr_fmt, "SVM", vexit->rip);
238 		break;
239 	case VM_EXITCODE_INST_EMUL:
240 		(void) fprintf(stderr, hdr_fmt, "instruction emulation",
241 		    vexit->rip);
242 		const uint_t len = vexit->u.inst_emul.num_valid > 0 ?
243 		    vexit->u.inst_emul.num_valid : 15;
244 		(void) fprintf(stderr, "\tinstruction bytes: [");
245 		for (uint_t i = 0; i < len; i++) {
246 			(void) fprintf(stderr, "%s%02x",
247 			    i == 0 ? "" : ", ",
248 			    vexit->u.inst_emul.inst[i]);
249 		}
250 		(void) fprintf(stderr, "]\n");
251 		break;
252 	case VM_EXITCODE_SUSPENDED:
253 		(void) fprintf(stderr, hdr_fmt, "suspend", vexit->rip);
254 		switch (vexit->u.suspended.how) {
255 		case VM_SUSPEND_RESET:
256 			(void) fprintf(stderr, "\thow: reset");
257 			break;
258 		case VM_SUSPEND_POWEROFF:
259 			(void) fprintf(stderr, "\thow: poweroff");
260 			break;
261 		case VM_SUSPEND_HALT:
262 			(void) fprintf(stderr, "\thow: halt");
263 			break;
264 		case VM_SUSPEND_TRIPLEFAULT:
265 			(void) fprintf(stderr, "\thow: triple-fault");
266 			break;
267 		default:
268 			(void) fprintf(stderr, "\thow: unknown - %d",
269 			    vexit->u.suspended.how);
270 			break;
271 		}
272 		break;
273 	default:
274 		(void) fprintf(stderr, "Unexpected code %d exit:\n"
275 		    "\t%%rip: %lx\n", vexit->exitcode, vexit->rip);
276 		break;
277 	}
278 	fail_finish();
279 }
280 
281 void
282 test_pass(void)
283 {
284 	assert(test_name != NULL);
285 	(void) printf("PASS %s\n", test_name);
286 	test_cleanup(false);
287 	exit(EXIT_SUCCESS);
288 }
289 
290 static int
291 load_payload(struct vmctx *ctx)
292 {
293 	extern uint8_t payload_data;
294 	extern uint32_t payload_size;
295 
296 	const uint32_t len = payload_size;
297 	const uint32_t cap = (MEM_TOTAL_SZ - MEM_LOC_PAYLOAD);
298 
299 	if (len > cap) {
300 		test_fail_msg("Payload size %u > capacity %u\n", len, cap);
301 	}
302 
303 	const size_t map_len = P2ROUNDUP(len, PAGE_SIZE);
304 	void *outp = vm_map_gpa(ctx, MEM_LOC_PAYLOAD, map_len);
305 	bcopy(&payload_data, outp, len);
306 
307 	return (0);
308 }
309 
310 struct vmctx *
311 test_initialize(const char *tname)
312 {
313 	return (test_initialize_flags(tname, 0));
314 }
315 
316 struct vmctx *
317 test_initialize_flags(const char *tname, uint64_t create_flags)
318 {
319 	char vm_name[VM_MAX_NAMELEN];
320 	int err;
321 	struct vmctx *ctx;
322 
323 	assert(test_vmctx == NULL);
324 	assert(test_name == NULL);
325 
326 	test_name = strdup(tname);
327 	(void) snprintf(vm_name, sizeof (vm_name), "bhyve-test-%s-%d",
328 	    test_name, getpid());
329 
330 	err = vm_create(vm_name, create_flags);
331 	if (err != 0) {
332 		test_fail_errno(err, "Could not create VM");
333 	}
334 
335 	ctx = vm_open(vm_name);
336 	if (ctx == NULL) {
337 		test_fail_errno(errno, "Could not open VM");
338 	}
339 	test_vmctx = ctx;
340 
341 	err = vm_setup_memory(ctx, MEM_TOTAL_SZ, VM_MMAP_ALL);
342 	if (err != 0) {
343 		test_fail_errno(err, "Could not set up VM memory");
344 	}
345 
346 	err = setup_rom(ctx);
347 	if (err != 0) {
348 		test_fail_errno(err, "Could not set up VM ROM segment");
349 	}
350 
351 	populate_identity_table(ctx);
352 	populate_desc_tables(ctx);
353 
354 	err = load_payload(ctx);
355 	if (err != 0) {
356 		test_fail_errno(err, "Could not load payload");
357 	}
358 
359 	return (ctx);
360 }
361 
362 int
363 test_setup_vcpu(struct vmctx *ctx, int vcpu, uint64_t rip, uint64_t rsp)
364 {
365 	int err;
366 
367 	err = vm_activate_cpu(ctx, vcpu);
368 	if (err != 0 && err != EBUSY) {
369 		return (err);
370 	}
371 
372 	/*
373 	 * Granularity bit important here for VMX validity:
374 	 * "If any bit in the limit field in the range 31:20 is 1, G must be 1"
375 	 */
376 	err = vm_set_desc(ctx, vcpu, VM_REG_GUEST_CS, 0, UINT32_MAX,
377 	    SDT_MEMERA | SEG_ACCESS_P | SEG_ACCESS_L | SEG_ACCESS_G);
378 	if (err != 0) {
379 		return (err);
380 	}
381 
382 	err = vm_set_desc(ctx, vcpu, VM_REG_GUEST_SS, 0, UINT32_MAX,
383 	    SDT_MEMRWA | SEG_ACCESS_P | SEG_ACCESS_L |
384 	    SEG_ACCESS_D | SEG_ACCESS_G);
385 	if (err != 0) {
386 		return (err);
387 	}
388 
389 	err = vm_set_desc(ctx, vcpu, VM_REG_GUEST_DS, 0, UINT32_MAX,
390 	    SDT_MEMRWA | SEG_ACCESS_P | SEG_ACCESS_D | SEG_ACCESS_G);
391 	if (err != 0) {
392 		return (err);
393 	}
394 
395 	/*
396 	 * While SVM will happilly run with an otherwise unusable TR, VMX
397 	 * includes it among its entry checks.
398 	 */
399 	err = vm_set_desc(ctx, vcpu, VM_REG_GUEST_TR, MEM_LOC_TSS, 0xff,
400 	    SDT_SYSTSSBSY | SEG_ACCESS_P);
401 	if (err != 0) {
402 		return (err);
403 	}
404 	err = vm_set_desc(ctx, vcpu, VM_REG_GUEST_GDTR, MEM_LOC_GDT, 0x1ff, 0);
405 	if (err != 0) {
406 		return (err);
407 	}
408 	err = vm_set_desc(ctx, vcpu, VM_REG_GUEST_IDTR, MEM_LOC_IDT, 0xfff, 0);
409 	if (err != 0) {
410 		return (err);
411 	}
412 
413 	/* Mark unused segments as explicitly unusable (for VMX) */
414 	const int unsable_segs[] = {
415 		VM_REG_GUEST_ES,
416 		VM_REG_GUEST_FS,
417 		VM_REG_GUEST_GS,
418 		VM_REG_GUEST_LDTR,
419 	};
420 	for (uint_t i = 0; i < ARRAY_SIZE(unsable_segs); i++) {
421 		err = vm_set_desc(ctx, vcpu, unsable_segs[i], 0, 0,
422 		    SEG_ACCESS_UNUSABLE);
423 		if (err != 0) {
424 			return (err);
425 		}
426 	}
427 
428 	/* Place CPU directly in long mode */
429 	const int regnums[] = {
430 		VM_REG_GUEST_CR0,
431 		VM_REG_GUEST_CR3,
432 		VM_REG_GUEST_CR4,
433 		VM_REG_GUEST_EFER,
434 		VM_REG_GUEST_RFLAGS,
435 		VM_REG_GUEST_RIP,
436 		VM_REG_GUEST_RSP,
437 		VM_REG_GUEST_CS,
438 		VM_REG_GUEST_SS,
439 		VM_REG_GUEST_DS,
440 		VM_REG_GUEST_TR,
441 	};
442 	uint64_t regvals[] = {
443 		CR0_PG | CR0_AM | CR0_WP | CR0_NE | CR0_ET | CR0_TS |
444 		    CR0_MP | CR0_PE,
445 		MEM_LOC_PAGE_TABLE_512G,
446 		CR4_DE | CR4_PSE | CR4_PAE | CR4_MCE | CR4_PGE | CR4_FSGSBASE,
447 		AMD_EFER_SCE | AMD_EFER_LME | AMD_EFER_LMA | AMD_EFER_NXE,
448 		/* start with interrupts disabled */
449 		PS_MB1,
450 		rip,
451 		rsp,
452 		(GDT_KCODE << 3),
453 		(GDT_KDATA << 3),
454 		(GDT_KDATA << 3),
455 		(GDT_KTSS << 3),
456 	};
457 	assert(ARRAY_SIZE(regnums) == ARRAY_SIZE(regvals));
458 
459 	err = vm_set_register_set(ctx, vcpu, ARRAY_SIZE(regnums), regnums,
460 	    regvals);
461 	if (err != 0) {
462 		return (err);
463 	}
464 
465 	err = vm_set_run_state(ctx, vcpu, VRS_RUN, 0);
466 	if (err != 0) {
467 		return (err);
468 	}
469 
470 	return (0);
471 }
472 
473 static enum vm_exit_kind
474 which_exit_kind(struct vm_entry *ventry, const struct vm_exit *vexit)
475 {
476 	const struct vm_inout *inout = &vexit->u.inout;
477 
478 	switch (vexit->exitcode) {
479 	case VM_EXITCODE_BOGUS:
480 	case VM_EXITCODE_REQIDLE:
481 		bzero(ventry, sizeof (ventry));
482 		return (VEK_REENTR);
483 	case VM_EXITCODE_INOUT:
484 		if (inout->port == IOP_TEST_RESULT &&
485 		    (inout->flags & INOUT_IN) == 0) {
486 			if (inout->eax == TEST_RESULT_PASS) {
487 				return (VEK_TEST_PASS);
488 			} else {
489 				return (VEK_TEST_FAIL);
490 			}
491 		}
492 		break;
493 	default:
494 		break;
495 	}
496 	return (VEK_UNHANDLED);
497 }
498 
499 enum vm_exit_kind
500 test_run_vcpu(struct vmctx *ctx, int vcpu, struct vm_entry *ventry,
501     struct vm_exit *vexit)
502 {
503 	int err;
504 
505 	err = vm_run(ctx, vcpu, ventry, vexit);
506 	if (err != 0) {
507 		test_fail_errno(err, "Failure during vcpu entry");
508 	}
509 
510 	return (which_exit_kind(ventry, vexit));
511 }
512 
513 void
514 ventry_fulfill_inout(const struct vm_exit *vexit, struct vm_entry *ventry,
515     uint32_t data)
516 {
517 	VERIFY3U(vexit->exitcode, ==, VM_EXITCODE_INOUT);
518 
519 	ventry->cmd = VEC_FULFILL_INOUT;
520 	bcopy(&vexit->u.inout, &ventry->u.inout, sizeof (struct vm_inout));
521 	if ((ventry->u.inout.flags & INOUT_IN) != 0) {
522 		ventry->u.inout.eax = data;
523 	}
524 }
525 
526 void
527 ventry_fulfill_mmio(const struct vm_exit *vexit, struct vm_entry *ventry,
528     uint64_t data)
529 {
530 	VERIFY3U(vexit->exitcode, ==, VM_EXITCODE_MMIO);
531 
532 	ventry->cmd = VEC_FULFILL_MMIO;
533 	bcopy(&vexit->u.mmio, &ventry->u.mmio, sizeof (struct vm_mmio));
534 	if (ventry->u.mmio.read != 0) {
535 		ventry->u.mmio.data = data;
536 	}
537 }
538 
539 bool
540 vexit_match_inout(const struct vm_exit *vexit, bool is_read, uint16_t port,
541     uint_t len, uint32_t *valp)
542 {
543 	if (vexit->exitcode != VM_EXITCODE_INOUT) {
544 		return (false);
545 	}
546 
547 	const uint_t flag = is_read ? INOUT_IN : 0;
548 	if (vexit->u.inout.port != port ||
549 	    vexit->u.inout.bytes != len ||
550 	    (vexit->u.inout.flags & INOUT_IN) != flag) {
551 		return (false);
552 	}
553 
554 	if (!is_read && valp != NULL) {
555 		*valp = vexit->u.inout.eax;
556 	}
557 	return (true);
558 }
559 
560 bool
561 vexit_match_mmio(const struct vm_exit *vexit, bool is_read, uint64_t addr,
562     uint_t len, uint64_t *valp)
563 {
564 	if (vexit->exitcode != VM_EXITCODE_MMIO) {
565 		return (false);
566 	}
567 
568 	if (vexit->u.mmio.gpa != addr ||
569 	    vexit->u.mmio.bytes != len ||
570 	    (vexit->u.mmio.read != 0) != is_read) {
571 		return (false);
572 	}
573 
574 	if (!is_read && valp != NULL) {
575 		*valp = vexit->u.mmio.data;
576 	}
577 	return (true);
578 }
579