xref: /illumos-gate/usr/src/uts/intel/sys/vmm.h (revision 55fea89dcaa64928bed4327112404dcb3e07b79f)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2011 NetApp, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 /*
31  * This file and its contents are supplied under the terms of the
32  * Common Development and Distribution License ("CDDL"), version 1.0.
33  * You may only use this file in accordance with the terms of version
34  * 1.0 of the CDDL.
35  *
36  * A full copy of the text of the CDDL should have accompanied this
37  * source.  A copy of the CDDL is also available via the Internet at
38  * http://www.illumos.org/license/CDDL.
39  *
40  * Copyright 2015 Pluribus Networks Inc.
41  * Copyright 2019 Joyent, Inc.
42  * Copyright 2022 Oxide Computer Company
43  */
44 
45 #ifndef _VMM_H_
46 #define	_VMM_H_
47 
48 enum vm_suspend_how {
49 	VM_SUSPEND_NONE,
50 	VM_SUSPEND_RESET,
51 	VM_SUSPEND_POWEROFF,
52 	VM_SUSPEND_HALT,
53 	VM_SUSPEND_TRIPLEFAULT,
54 	VM_SUSPEND_LAST
55 };
56 
57 /*
58  * Identifiers for architecturally defined registers.
59  */
60 enum vm_reg_name {
61 	VM_REG_GUEST_RAX,
62 	VM_REG_GUEST_RBX,
63 	VM_REG_GUEST_RCX,
64 	VM_REG_GUEST_RDX,
65 	VM_REG_GUEST_RSI,
66 	VM_REG_GUEST_RDI,
67 	VM_REG_GUEST_RBP,
68 	VM_REG_GUEST_R8,
69 	VM_REG_GUEST_R9,
70 	VM_REG_GUEST_R10,
71 	VM_REG_GUEST_R11,
72 	VM_REG_GUEST_R12,
73 	VM_REG_GUEST_R13,
74 	VM_REG_GUEST_R14,
75 	VM_REG_GUEST_R15,
76 	VM_REG_GUEST_CR0,
77 	VM_REG_GUEST_CR3,
78 	VM_REG_GUEST_CR4,
79 	VM_REG_GUEST_DR7,
80 	VM_REG_GUEST_RSP,
81 	VM_REG_GUEST_RIP,
82 	VM_REG_GUEST_RFLAGS,
83 	VM_REG_GUEST_ES,
84 	VM_REG_GUEST_CS,
85 	VM_REG_GUEST_SS,
86 	VM_REG_GUEST_DS,
87 	VM_REG_GUEST_FS,
88 	VM_REG_GUEST_GS,
89 	VM_REG_GUEST_LDTR,
90 	VM_REG_GUEST_TR,
91 	VM_REG_GUEST_IDTR,
92 	VM_REG_GUEST_GDTR,
93 	VM_REG_GUEST_EFER,
94 	VM_REG_GUEST_CR2,
95 	VM_REG_GUEST_PDPTE0,
96 	VM_REG_GUEST_PDPTE1,
97 	VM_REG_GUEST_PDPTE2,
98 	VM_REG_GUEST_PDPTE3,
99 	VM_REG_GUEST_INTR_SHADOW,
100 	VM_REG_GUEST_DR0,
101 	VM_REG_GUEST_DR1,
102 	VM_REG_GUEST_DR2,
103 	VM_REG_GUEST_DR3,
104 	VM_REG_GUEST_DR6,
105 	VM_REG_GUEST_ENTRY_INST_LENGTH,
106 	VM_REG_GUEST_XCR0,
107 	VM_REG_LAST
108 };
109 
110 enum x2apic_state {
111 	X2APIC_DISABLED,
112 	X2APIC_ENABLED,
113 	X2APIC_STATE_LAST
114 };
115 
116 #define	VM_INTINFO_MASK_VECTOR	0xffUL
117 #define	VM_INTINFO_MASK_TYPE	0x700UL
118 #define	VM_INTINFO_MASK_RSVD	0x7ffff000UL
119 #define	VM_INTINFO_SHIFT_ERRCODE 32
120 
121 #define	VM_INTINFO_VECTOR(val)	((val) & VM_INTINFO_MASK_VECTOR)
122 #define	VM_INTINFO_TYPE(val)	((val) & VM_INTINFO_MASK_TYPE)
123 #define	VM_INTINFO_ERRCODE(val)	((val) >> VM_INTINFO_SHIFT_ERRCODE)
124 #define	VM_INTINFO_PENDING(val)	(((val) & VM_INTINFO_VALID) != 0)
125 #define	VM_INTINFO_HAS_ERRCODE(val) (((val) & VM_INTINFO_DEL_ERRCODE) != 0)
126 
127 #define	VM_INTINFO_VALID	(1UL << 31)
128 #define	VM_INTINFO_DEL_ERRCODE	(1UL << 11)
129 
130 #define	VM_INTINFO_HWINTR	(0 << 8)
131 #define	VM_INTINFO_NMI		(2 << 8)
132 #define	VM_INTINFO_HWEXCP	(3 << 8)
133 #define	VM_INTINFO_SWINTR	(4 << 8)
134 /* Reserved for CPU (read: Intel) specific types */
135 #define	VM_INTINFO_RESV1	(1 << 8)
136 #define	VM_INTINFO_RESV5	(5 << 8)
137 #define	VM_INTINFO_RESV6	(6 << 8)
138 #define	VM_INTINFO_RESV7	(7 << 8)
139 
140 /*
141  * illumos doesn't have a limitation based on SPECNAMELEN like FreeBSD does.
142  * To simplify structure definitions, an arbitrary limit has been chosen.
143  * This same limit is used for memory segment names
144  */
145 
146 #define	VM_MAX_NAMELEN		128
147 #define	VM_MAX_SEG_NAMELEN	128
148 
149 #ifdef _KERNEL
150 #define	VM_MAXCPU	32			/* maximum virtual cpus */
151 #endif
152 
153 /*
154  * Identifiers for optional vmm capabilities
155  */
156 enum vm_cap_type {
157 	VM_CAP_HALT_EXIT,
158 	VM_CAP_MTRAP_EXIT,
159 	VM_CAP_PAUSE_EXIT,
160 	VM_CAP_ENABLE_INVPCID,
161 	VM_CAP_BPT_EXIT,
162 	VM_CAP_MAX
163 };
164 
165 enum vmx_caps {
166 	VMX_CAP_NONE		= 0,
167 	VMX_CAP_TPR_SHADOW	= (1UL << 0),
168 	VMX_CAP_APICV		= (1UL << 1),
169 	VMX_CAP_APICV_X2APIC	= (1UL << 2),
170 	VMX_CAP_APICV_PIR	= (1UL << 3),
171 };
172 
173 enum vm_intr_trigger {
174 	EDGE_TRIGGER,
175 	LEVEL_TRIGGER
176 };
177 
178 /*
179  * The 'access' field has the format specified in Table 21-2 of the Intel
180  * Architecture Manual vol 3b.
181  *
182  * XXX The contents of the 'access' field are architecturally defined except
183  * bit 16 - Segment Unusable.
184  */
185 struct seg_desc {
186 	uint64_t	base;
187 	uint32_t	limit;
188 	uint32_t	access;
189 };
190 #define	SEG_DESC_TYPE(access)		((access) & 0x001f)
191 #define	SEG_DESC_DPL_MASK		0x3
192 #define	SEG_DESC_DPL_SHIFT		5
193 #define	SEG_DESC_DPL(access)		\
194 	(((access) >> SEG_DESC_DPL_SHIFT) & SEG_DESC_DPL_MASK)
195 #define	SEG_DESC_PRESENT(access)	(((access) & 0x0080) ? 1 : 0)
196 #define	SEG_DESC_DEF32(access)		(((access) & 0x4000) ? 1 : 0)
197 #define	SEG_DESC_GRANULARITY(access)	(((access) & 0x8000) ? 1 : 0)
198 #define	SEG_DESC_UNUSABLE(access)	(((access) & 0x10000) ? 1 : 0)
199 
200 enum vm_cpu_mode {
201 	CPU_MODE_REAL,
202 	CPU_MODE_PROTECTED,
203 	CPU_MODE_COMPATIBILITY,		/* IA-32E mode (CS.L = 0) */
204 	CPU_MODE_64BIT,			/* IA-32E mode (CS.L = 1) */
205 };
206 
207 enum vm_paging_mode {
208 	PAGING_MODE_FLAT,
209 	PAGING_MODE_32,
210 	PAGING_MODE_PAE,
211 	PAGING_MODE_64,
212 };
213 
214 struct vm_guest_paging {
215 	uint64_t	cr3;
216 	int		cpl;
217 	enum vm_cpu_mode cpu_mode;
218 	enum vm_paging_mode paging_mode;
219 };
220 
221 enum vm_exitcode {
222 	VM_EXITCODE_INOUT,
223 	VM_EXITCODE_VMX,
224 	VM_EXITCODE_BOGUS,
225 	VM_EXITCODE_RDMSR,
226 	VM_EXITCODE_WRMSR,
227 	VM_EXITCODE_HLT,
228 	VM_EXITCODE_MTRAP,
229 	VM_EXITCODE_PAUSE,
230 	VM_EXITCODE_PAGING,
231 	VM_EXITCODE_INST_EMUL,
232 	VM_EXITCODE_RUN_STATE,
233 	VM_EXITCODE_MMIO_EMUL,
234 	VM_EXITCODE_DEPRECATED,	/* formerly RUNBLOCK */
235 	VM_EXITCODE_IOAPIC_EOI,
236 	VM_EXITCODE_SUSPENDED,
237 	VM_EXITCODE_MMIO,
238 	VM_EXITCODE_TASK_SWITCH,
239 	VM_EXITCODE_MONITOR,
240 	VM_EXITCODE_MWAIT,
241 	VM_EXITCODE_SVM,
242 	VM_EXITCODE_REQIDLE,
243 	VM_EXITCODE_DEBUG,
244 	VM_EXITCODE_VMINSN,
245 	VM_EXITCODE_BPT,
246 	VM_EXITCODE_HT,
247 	VM_EXITCODE_MAX
248 };
249 
250 enum inout_flags {
251 	INOUT_IN	= (1U << 0), /* direction: 'in' when set, else 'out' */
252 
253 	/*
254 	 * The following flags are used only for in-kernel emulation logic and
255 	 * are not exposed to userspace.
256 	 */
257 	INOUT_STR	= (1U << 1), /* ins/outs operation */
258 	INOUT_REP	= (1U << 2), /* 'rep' prefix present on instruction */
259 };
260 
261 struct vm_inout {
262 	uint32_t	eax;
263 	uint16_t	port;
264 	uint8_t		bytes;		/* 1 or 2 or 4 */
265 	uint8_t		flags;		/* see: inout_flags */
266 
267 	/*
268 	 * The address size and segment are relevant to INS/OUTS operations.
269 	 * Userspace is not concerned with them since the in-kernel emulation
270 	 * handles those specific aspects.
271 	 */
272 	uint8_t		addrsize;
273 	uint8_t		segment;
274 };
275 
276 struct vm_mmio {
277 	uint8_t		bytes;		/* 1/2/4/8 bytes */
278 	uint8_t		read;		/* read: 1, write: 0 */
279 	uint16_t	_pad[3];
280 	uint64_t	gpa;
281 	uint64_t	data;
282 };
283 
284 enum task_switch_reason {
285 	TSR_CALL,
286 	TSR_IRET,
287 	TSR_JMP,
288 	TSR_IDT_GATE,	/* task gate in IDT */
289 };
290 
291 struct vm_task_switch {
292 	uint16_t	tsssel;		/* new TSS selector */
293 	int		ext;		/* task switch due to external event */
294 	uint32_t	errcode;
295 	int		errcode_valid;	/* push 'errcode' on the new stack */
296 	enum task_switch_reason reason;
297 	struct vm_guest_paging paging;
298 };
299 
300 enum vcpu_run_state {
301 	VRS_HALT		= 0,
302 	VRS_INIT		= (1 << 0),
303 	VRS_RUN			= (1 << 1),
304 
305 	VRS_PEND_INIT		= (1 << 14),
306 	VRS_PEND_SIPI		= (1 << 15),
307 };
308 #define VRS_MASK_VALID(v)	\
309 	((v) & (VRS_INIT | VRS_RUN | VRS_PEND_SIPI | VRS_PEND_SIPI))
310 #define VRS_IS_VALID(v)		((v) == VRS_MASK_VALID(v))
311 
312 struct vm_exit {
313 	enum vm_exitcode	exitcode;
314 	int			inst_length;	/* 0 means unknown */
315 	uint64_t		rip;
316 	union {
317 		struct vm_inout	inout;
318 		struct vm_mmio	mmio;
319 		struct {
320 			uint64_t	gpa;
321 			int		fault_type;
322 		} paging;
323 		/*
324 		 * Kernel-internal MMIO decoding and emulation.
325 		 * Userspace should not expect to see this, but rather a
326 		 * VM_EXITCODE_MMIO with the above 'mmio' context.
327 		 */
328 		struct {
329 			uint64_t	gpa;
330 			uint64_t	gla;
331 			uint64_t	cs_base;
332 			int		cs_d;		/* CS.D */
333 		} mmio_emul;
334 		struct {
335 			uint8_t		inst[15];
336 			uint8_t		num_valid;
337 		} inst_emul;
338 		/*
339 		 * VMX specific payload. Used when there is no "better"
340 		 * exitcode to represent the VM-exit.
341 		 */
342 		struct {
343 			int		status;		/* vmx inst status */
344 			/*
345 			 * 'exit_reason' and 'exit_qualification' are valid
346 			 * only if 'status' is zero.
347 			 */
348 			uint32_t	exit_reason;
349 			uint64_t	exit_qualification;
350 			/*
351 			 * 'inst_error' and 'inst_type' are valid
352 			 * only if 'status' is non-zero.
353 			 */
354 			int		inst_type;
355 			int		inst_error;
356 		} vmx;
357 		/*
358 		 * SVM specific payload.
359 		 */
360 		struct {
361 			uint64_t	exitcode;
362 			uint64_t	exitinfo1;
363 			uint64_t	exitinfo2;
364 		} svm;
365 		struct {
366 			int		inst_length;
367 		} bpt;
368 		struct {
369 			uint32_t	code;		/* ecx value */
370 			uint64_t	wval;
371 		} msr;
372 		struct {
373 			uint64_t	rflags;
374 		} hlt;
375 		struct {
376 			int		vector;
377 		} ioapic_eoi;
378 		struct {
379 			enum vm_suspend_how how;
380 		} suspended;
381 		struct vm_task_switch task_switch;
382 	} u;
383 };
384 
385 enum vm_entry_cmds {
386 	VEC_DEFAULT = 0,
387 	VEC_DISCARD_INSTR,	/* discard inst emul state */
388 	VEC_FULFILL_MMIO,	/* entry includes result for mmio emul */
389 	VEC_FULFILL_INOUT,	/* entry includes result for inout emul */
390 
391 	/* Below are flags which can be combined with the above commands: */
392 
393 	/*
394 	 * Exit to userspace when vCPU is in consistent state: when any pending
395 	 * instruction emulation tasks have been completed and committed to the
396 	 * architecturally defined state.
397 	 */
398 	VEC_FLAG_EXIT_CONSISTENT	= 1 << 31,
399 };
400 
401 struct vm_entry {
402 	int cpuid;
403 	uint_t cmd;		/* see: vm_entry_cmds */
404 	void *exit_data;
405 	union {
406 		struct vm_inout inout;
407 		struct vm_mmio mmio;
408 	} u;
409 };
410 
411 int vm_restart_instruction(void *vm, int vcpuid);
412 
413 enum vm_create_flags {
414 	/*
415 	 * Allocate guest memory segments from existing reservoir capacity,
416 	 * rather than attempting to create transient allocations.
417 	 */
418 	VCF_RESERVOIR_MEM = (1 << 0),
419 
420 	/*
421 	 * Enable dirty page tracking for the guest.
422 	 */
423 	VCF_TRACK_DIRTY = (1 << 1),
424 };
425 
426 /*
427  * Describes an entry for `cpuid` emulation.
428  * Used internally by bhyve (kernel) in addition to exposed ioctl(2) interface.
429  */
430 struct vcpu_cpuid_entry {
431 	uint32_t	vce_function;
432 	uint32_t	vce_index;
433 	uint32_t	vce_flags;
434 	uint32_t	vce_eax;
435 	uint32_t	vce_ebx;
436 	uint32_t	vce_ecx;
437 	uint32_t	vce_edx;
438 	uint32_t	_pad;
439 };
440 
441 /*
442  * Defined flags for vcpu_cpuid_entry`vce_flags are below.
443  */
444 
445 /* Use index (ecx) input value when matching entry */
446 #define	VCE_FLAG_MATCH_INDEX		(1 << 0)
447 
448 /* All valid flacts for vcpu_cpuid_entry`vce_flags */
449 #define	VCE_FLAGS_VALID		VCE_FLAG_MATCH_INDEX
450 
451 /*
452  * Defined flags for vcpu_cpuid configuration are below.
453  * These are used by both the ioctl(2) interface via vm_vcpu_cpuid_config and
454  * internally in the kernel vmm.
455  */
456 
457 /* Use legacy hard-coded cpuid masking tables applied to the host CPU */
458 #define	VCC_FLAG_LEGACY_HANDLING	(1 << 0)
459 /*
460  * Emulate Intel-style fallback behavior (emit highest "standard" entry) if the
461  * queried function/index do not match.  If not set, emulate AMD-style, where
462  * all zeroes are returned in such cases.
463  */
464 #define	VCC_FLAG_INTEL_FALLBACK		(1 << 1)
465 
466 /* All valid flacts for vm_vcpu_cpuid_config`vvcc_flags */
467 #define	VCC_FLAGS_VALID		\
468 	(VCC_FLAG_LEGACY_HANDLING | VCC_FLAG_INTEL_FALLBACK)
469 
470 /* Maximum vcpu_cpuid_entry records per vCPU */
471 #define	VMM_MAX_CPUID_ENTRIES		256
472 
473 #endif	/* _VMM_H_ */
474