xref: /illumos-gate/usr/src/uts/i86pc/os/cpr_impl.c (revision 45818ee124adeaaf947698996b4f4c722afc6d1f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 /*
26  * Platform specific implementation code
27  * Currently only suspend to RAM is supported (ACPI S3)
28  */
29 
30 #define	SUNDDI_IMPL
31 
32 #include <sys/types.h>
33 #include <sys/promif.h>
34 #include <sys/prom_isa.h>
35 #include <sys/prom_plat.h>
36 #include <sys/cpuvar.h>
37 #include <sys/pte.h>
38 #include <vm/hat.h>
39 #include <vm/page.h>
40 #include <vm/as.h>
41 #include <sys/cpr.h>
42 #include <sys/kmem.h>
43 #include <sys/clock.h>
44 #include <sys/kmem.h>
45 #include <sys/panic.h>
46 #include <vm/seg_kmem.h>
47 #include <sys/cpu_module.h>
48 #include <sys/callb.h>
49 #include <sys/machsystm.h>
50 #include <sys/vmsystm.h>
51 #include <sys/systm.h>
52 #include <sys/archsystm.h>
53 #include <sys/stack.h>
54 #include <sys/fs/ufs_fs.h>
55 #include <sys/memlist.h>
56 #include <sys/bootconf.h>
57 #include <sys/thread.h>
58 #include <sys/x_call.h>
59 #include <sys/smp_impldefs.h>
60 #include <vm/vm_dep.h>
61 #include <sys/psm.h>
62 #include <sys/epm.h>
63 #include <sys/cpr_wakecode.h>
64 #include <sys/x86_archext.h>
65 #include <sys/reboot.h>
66 #include <sys/acpi/acpi.h>
67 #include <sys/acpica.h>
68 #include <sys/fp.h>
69 #include <sys/sysmacros.h>
70 
71 #define	AFMT	"%lx"
72 
73 extern int	flushes_require_xcalls;
74 extern cpuset_t	cpu_ready_set;
75 
76 #if defined(__amd64)
77 extern void	*wc_long_mode_64(void);
78 #endif	/* __amd64 */
79 extern int	tsc_gethrtime_enable;
80 extern	void	i_cpr_start_cpu(void);
81 
82 ushort_t	cpr_mach_type = CPR_MACHTYPE_X86;
83 void		(*cpr_start_cpu_func)(void) = i_cpr_start_cpu;
84 
85 static wc_cpu_t	*wc_other_cpus = NULL;
86 static cpuset_t procset;
87 
88 static void
89 init_real_mode_platter(int cpun, uint32_t offset, uint_t cr4, wc_desctbr_t gdt);
90 
91 static int i_cpr_platform_alloc(psm_state_request_t *req);
92 static void i_cpr_platform_free(psm_state_request_t *req);
93 static int i_cpr_save_apic(psm_state_request_t *req);
94 static int i_cpr_restore_apic(psm_state_request_t *req);
95 static int wait_for_set(cpuset_t *set, int who);
96 
97 static	void i_cpr_save_stack(kthread_t *t, wc_cpu_t *wc_cpu);
98 void i_cpr_restore_stack(kthread_t *t, greg_t *save_stack);
99 
100 #ifdef STACK_GROWTH_DOWN
101 #define	CPR_GET_STACK_START(t) ((t)->t_stkbase)
102 #define	CPR_GET_STACK_END(t) ((t)->t_stk)
103 #else
104 #define	CPR_GET_STACK_START(t) ((t)->t_stk)
105 #define	CPR_GET_STACK_END(t) ((t)->t_stkbase)
106 #endif	/* STACK_GROWTH_DOWN */
107 
108 /*
109  * restart paused slave cpus
110  */
111 void
112 i_cpr_machdep_setup(void)
113 {
114 	if (ncpus > 1) {
115 		CPR_DEBUG(CPR_DEBUG1, ("MP restarted...\n"));
116 		mutex_enter(&cpu_lock);
117 		start_cpus();
118 		mutex_exit(&cpu_lock);
119 	}
120 }
121 
122 
123 /*
124  * Stop all interrupt activities in the system
125  */
126 void
127 i_cpr_stop_intr(void)
128 {
129 	(void) spl7();
130 }
131 
132 /*
133  * Set machine up to take interrupts
134  */
135 void
136 i_cpr_enable_intr(void)
137 {
138 	(void) spl0();
139 }
140 
141 /*
142  * Save miscellaneous information which needs to be written to the
143  * state file.  This information is required to re-initialize
144  * kernel/prom handshaking.
145  */
146 void
147 i_cpr_save_machdep_info(void)
148 {
149 	int notcalled = 0;
150 	ASSERT(notcalled);
151 }
152 
153 
154 void
155 i_cpr_set_tbr(void)
156 {
157 }
158 
159 
160 processorid_t
161 i_cpr_bootcpuid(void)
162 {
163 	return (0);
164 }
165 
166 /*
167  * cpu0 should contain bootcpu info
168  */
169 cpu_t *
170 i_cpr_bootcpu(void)
171 {
172 	ASSERT(MUTEX_HELD(&cpu_lock));
173 
174 	return (cpu_get(i_cpr_bootcpuid()));
175 }
176 
177 /*
178  *	Save context for the specified CPU
179  */
180 void *
181 i_cpr_save_context(void *arg)
182 {
183 	long	index = (long)arg;
184 	psm_state_request_t *papic_state;
185 	int resuming;
186 	int	ret;
187 	wc_cpu_t	*wc_cpu = wc_other_cpus + index;
188 
189 	PMD(PMD_SX, ("i_cpr_save_context() index = %ld\n", index))
190 
191 	ASSERT(index < NCPU);
192 
193 	papic_state = &(wc_cpu)->wc_apic_state;
194 
195 	ret = i_cpr_platform_alloc(papic_state);
196 	ASSERT(ret == 0);
197 
198 	ret = i_cpr_save_apic(papic_state);
199 	ASSERT(ret == 0);
200 
201 	i_cpr_save_stack(curthread, wc_cpu);
202 
203 	/*
204 	 * wc_save_context returns twice, once when susending and
205 	 * once when resuming,  wc_save_context() returns 0 when
206 	 * suspending and non-zero upon resume
207 	 */
208 	resuming = (wc_save_context(wc_cpu) == 0);
209 
210 	/*
211 	 * do NOT call any functions after this point, because doing so
212 	 * will modify the stack that we are running on
213 	 */
214 
215 	if (resuming) {
216 
217 		ret = i_cpr_restore_apic(papic_state);
218 		ASSERT(ret == 0);
219 
220 		i_cpr_platform_free(papic_state);
221 
222 		/*
223 		 * Enable interrupts on this cpu.
224 		 * Do not bind interrupts to this CPU's local APIC until
225 		 * the CPU is ready to receive interrupts.
226 		 */
227 		ASSERT(CPU->cpu_id != i_cpr_bootcpuid());
228 		mutex_enter(&cpu_lock);
229 		cpu_enable_intr(CPU);
230 		mutex_exit(&cpu_lock);
231 
232 		/*
233 		 * Setting the bit in cpu_ready_set must be the last operation
234 		 * in processor initialization; the boot CPU will continue to
235 		 * boot once it sees this bit set for all active CPUs.
236 		 */
237 		CPUSET_ATOMIC_ADD(cpu_ready_set, CPU->cpu_id);
238 
239 		PMD(PMD_SX,
240 		    ("i_cpr_save_context() resuming cpu %d in cpu_ready_set\n",
241 		    CPU->cpu_id))
242 	} else {
243 		/*
244 		 * Disable interrupts on this CPU so that PSM knows not to bind
245 		 * interrupts here on resume until the CPU has executed
246 		 * cpu_enable_intr() (above) in the resume path.
247 		 * We explicitly do not grab cpu_lock here because at this point
248 		 * in the suspend process, the boot cpu owns cpu_lock and all
249 		 * other cpus are also executing in the pause thread (only
250 		 * modifying their respective CPU structure).
251 		 */
252 		(void) cpu_disable_intr(CPU);
253 	}
254 
255 	PMD(PMD_SX, ("i_cpr_save_context: wc_save_context returns %d\n",
256 	    resuming))
257 
258 	return (NULL);
259 }
260 
261 static ushort_t *warm_reset_vector = NULL;
262 
263 static ushort_t *
264 map_warm_reset_vector()
265 {
266 	/*LINTED*/
267 	if (!(warm_reset_vector = (ushort_t *)psm_map_phys(WARM_RESET_VECTOR,
268 	    sizeof (ushort_t *), PROT_READ|PROT_WRITE)))
269 		return (NULL);
270 
271 	/*
272 	 * setup secondary cpu bios boot up vector
273 	 */
274 	*warm_reset_vector = (ushort_t)((caddr_t)
275 	    /*LINTED*/
276 	    ((struct rm_platter *)rm_platter_va)->rm_code - rm_platter_va
277 	    + ((ulong_t)rm_platter_va & 0xf));
278 	warm_reset_vector++;
279 	*warm_reset_vector = (ushort_t)(rm_platter_pa >> 4);
280 
281 	--warm_reset_vector;
282 	return (warm_reset_vector);
283 }
284 
285 void
286 i_cpr_pre_resume_cpus()
287 {
288 	/*
289 	 * this is a cut down version of start_other_cpus()
290 	 * just do the initialization to wake the other cpus
291 	 */
292 	unsigned who;
293 	int boot_cpuid = i_cpr_bootcpuid();
294 	uint32_t		code_length = 0;
295 	caddr_t			wakevirt = rm_platter_va;
296 	/*LINTED*/
297 	wakecode_t		*wp = (wakecode_t *)wakevirt;
298 	char *str = "i_cpr_pre_resume_cpus";
299 	extern int get_tsc_ready();
300 	int err;
301 
302 	/*LINTED*/
303 	rm_platter_t *real_mode_platter = (rm_platter_t *)rm_platter_va;
304 
305 	/*
306 	 * If startup wasn't able to find a page under 1M, we cannot
307 	 * proceed.
308 	 */
309 	if (rm_platter_va == 0) {
310 		cmn_err(CE_WARN, "Cannot suspend the system because no "
311 		    "memory below 1M could be found for processor startup");
312 		return;
313 	}
314 
315 	/*
316 	 * Copy the real mode code at "real_mode_start" to the
317 	 * page at rm_platter_va.
318 	 */
319 	warm_reset_vector = map_warm_reset_vector();
320 	if (warm_reset_vector == NULL) {
321 		PMD(PMD_SX, ("i_cpr_pre_resume_cpus() returning #2\n"))
322 		return;
323 	}
324 
325 	flushes_require_xcalls = 1;
326 
327 	/*
328 	 * We lock our affinity to the master CPU to ensure that all slave CPUs
329 	 * do their TSC syncs with the same CPU.
330 	 */
331 
332 	affinity_set(CPU_CURRENT);
333 
334 	/*
335 	 * Mark the boot cpu as being ready and in the procset, since we are
336 	 * running on that cpu.
337 	 */
338 	CPUSET_ONLY(cpu_ready_set, boot_cpuid);
339 	CPUSET_ONLY(procset, boot_cpuid);
340 
341 	for (who = 0; who < max_ncpus; who++) {
342 
343 		wc_cpu_t	*cpup = wc_other_cpus + who;
344 		wc_desctbr_t	gdt;
345 
346 		if (who == boot_cpuid)
347 			continue;
348 
349 		if (!CPU_IN_SET(mp_cpus, who))
350 			continue;
351 
352 		PMD(PMD_SX, ("%s() waking up %d cpu\n", str, who))
353 
354 		bcopy(cpup, &(wp->wc_cpu), sizeof (wc_cpu_t));
355 
356 		gdt.base = cpup->wc_gdt_base;
357 		gdt.limit = cpup->wc_gdt_limit;
358 
359 #if defined(__amd64)
360 		code_length = (uint32_t)((uintptr_t)wc_long_mode_64 -
361 		    (uintptr_t)wc_rm_start);
362 #else
363 		code_length = 0;
364 #endif
365 
366 		init_real_mode_platter(who, code_length, cpup->wc_cr4, gdt);
367 
368 		mutex_enter(&cpu_lock);
369 		err = mach_cpuid_start(who, rm_platter_va);
370 		mutex_exit(&cpu_lock);
371 		if (err != 0) {
372 			cmn_err(CE_WARN, "cpu%d: failed to start during "
373 			    "suspend/resume error %d", who, err);
374 			continue;
375 		}
376 
377 		PMD(PMD_SX, ("%s() #1 waiting for %d in procset\n", str, who))
378 
379 		if (!wait_for_set(&procset, who))
380 			continue;
381 
382 		PMD(PMD_SX, ("%s() %d cpu started\n", str, who))
383 
384 		PMD(PMD_SX, ("%s() tsc_ready = %d\n", str, get_tsc_ready()))
385 
386 		if (tsc_gethrtime_enable) {
387 			PMD(PMD_SX, ("%s() calling tsc_sync_master\n", str))
388 			tsc_sync_master(who);
389 		}
390 
391 		PMD(PMD_SX, ("%s() waiting for %d in cpu_ready_set\n", str,
392 		    who))
393 		/*
394 		 * Wait for cpu to declare that it is ready, we want the
395 		 * cpus to start serially instead of in parallel, so that
396 		 * they do not contend with each other in wc_rm_start()
397 		 */
398 		if (!wait_for_set(&cpu_ready_set, who))
399 			continue;
400 
401 		/*
402 		 * do not need to re-initialize dtrace using dtrace_cpu_init
403 		 * function
404 		 */
405 		PMD(PMD_SX, ("%s() cpu %d now ready\n", str, who))
406 	}
407 
408 	affinity_clear();
409 
410 	PMD(PMD_SX, ("%s() all cpus now ready\n", str))
411 
412 }
413 
414 static void
415 unmap_warm_reset_vector(ushort_t *warm_reset_vector)
416 {
417 	psm_unmap_phys((caddr_t)warm_reset_vector, sizeof (ushort_t *));
418 }
419 
420 /*
421  * We need to setup a 1:1 (virtual to physical) mapping for the
422  * page containing the wakeup code.
423  */
424 static struct as *save_as;	/* when switching to kas */
425 
426 static void
427 unmap_wakeaddr_1to1(uint64_t wakephys)
428 {
429 	uintptr_t	wp = (uintptr_t)wakephys;
430 	hat_setup(save_as->a_hat, 0);	/* switch back from kernel hat */
431 	hat_unload(kas.a_hat, (caddr_t)wp, PAGESIZE, HAT_UNLOAD);
432 }
433 
434 void
435 i_cpr_post_resume_cpus()
436 {
437 	uint64_t	wakephys = rm_platter_pa;
438 
439 	if (warm_reset_vector != NULL)
440 		unmap_warm_reset_vector(warm_reset_vector);
441 
442 	hat_unload(kas.a_hat, (caddr_t)(uintptr_t)rm_platter_pa, MMU_PAGESIZE,
443 	    HAT_UNLOAD);
444 
445 	/*
446 	 * cmi_post_mpstartup() is only required upon boot not upon
447 	 * resume from RAM
448 	 */
449 
450 	PT(PT_UNDO1to1);
451 	/* Tear down 1:1 mapping for wakeup code */
452 	unmap_wakeaddr_1to1(wakephys);
453 }
454 
455 /* ARGSUSED */
456 void
457 i_cpr_handle_xc(int flag)
458 {
459 }
460 
461 int
462 i_cpr_reusable_supported(void)
463 {
464 	return (0);
465 }
466 static void
467 map_wakeaddr_1to1(uint64_t wakephys)
468 {
469 	uintptr_t	wp = (uintptr_t)wakephys;
470 	hat_devload(kas.a_hat, (caddr_t)wp, PAGESIZE, btop(wakephys),
471 	    (PROT_READ|PROT_WRITE|PROT_EXEC|HAT_STORECACHING_OK|HAT_NOSYNC),
472 	    HAT_LOAD);
473 	save_as = curthread->t_procp->p_as;
474 	hat_setup(kas.a_hat, 0);	/* switch to kernel-only hat */
475 }
476 
477 
478 void
479 prt_other_cpus()
480 {
481 	int	who;
482 
483 	if (ncpus == 1) {
484 		PMD(PMD_SX, ("prt_other_cpus() other cpu table empty for "
485 		    "uniprocessor machine\n"))
486 		return;
487 	}
488 
489 	for (who = 0; who < max_ncpus; who++) {
490 
491 		wc_cpu_t	*cpup = wc_other_cpus + who;
492 
493 		if (!CPU_IN_SET(mp_cpus, who))
494 			continue;
495 
496 		PMD(PMD_SX, ("prt_other_cpus() who = %d, gdt=%p:%x, "
497 		    "idt=%p:%x, ldt=%lx, tr=%lx, kgsbase="
498 		    AFMT ", sp=%lx\n", who,
499 		    (void *)cpup->wc_gdt_base, cpup->wc_gdt_limit,
500 		    (void *)cpup->wc_idt_base, cpup->wc_idt_limit,
501 		    (long)cpup->wc_ldt, (long)cpup->wc_tr,
502 		    (long)cpup->wc_kgsbase, (long)cpup->wc_rsp))
503 	}
504 }
505 
506 /*
507  * Power down the system.
508  */
509 int
510 i_cpr_power_down(int sleeptype)
511 {
512 	caddr_t		wakevirt = rm_platter_va;
513 	uint64_t	wakephys = rm_platter_pa;
514 	ulong_t		saved_intr;
515 	uint32_t	code_length = 0;
516 	wc_desctbr_t	gdt;
517 	/*LINTED*/
518 	wakecode_t	*wp = (wakecode_t *)wakevirt;
519 	/*LINTED*/
520 	rm_platter_t	*wcpp = (rm_platter_t *)wakevirt;
521 	wc_cpu_t	*cpup = &(wp->wc_cpu);
522 	dev_info_t	*ppm;
523 	int		ret = 0;
524 	power_req_t	power_req;
525 	char *str =	"i_cpr_power_down";
526 #if defined(__amd64)
527 	/*LINTED*/
528 	rm_platter_t *real_mode_platter = (rm_platter_t *)rm_platter_va;
529 #endif
530 	extern int	cpr_suspend_succeeded;
531 	extern void	kernel_wc_code();
532 
533 	ASSERT(sleeptype == CPR_TORAM);
534 	ASSERT(CPU->cpu_id == 0);
535 
536 	if ((ppm = PPM(ddi_root_node())) == NULL) {
537 		PMD(PMD_SX, ("%s: root node not claimed\n", str))
538 		return (ENOTTY);
539 	}
540 
541 	PMD(PMD_SX, ("Entering %s()\n", str))
542 
543 	PT(PT_IC);
544 	saved_intr = intr_clear();
545 
546 	PT(PT_1to1);
547 	/* Setup 1:1 mapping for wakeup code */
548 	map_wakeaddr_1to1(wakephys);
549 
550 	PMD(PMD_SX, ("ncpus=%d\n", ncpus))
551 
552 	PMD(PMD_SX, ("wc_rm_end - wc_rm_start=%lx WC_CODESIZE=%x\n",
553 	    ((size_t)((uintptr_t)wc_rm_end - (uintptr_t)wc_rm_start)),
554 	    WC_CODESIZE))
555 
556 	PMD(PMD_SX, ("wakevirt=%p, wakephys=%x\n",
557 	    (void *)wakevirt, (uint_t)wakephys))
558 
559 	ASSERT(((size_t)((uintptr_t)wc_rm_end - (uintptr_t)wc_rm_start)) <
560 	    WC_CODESIZE);
561 
562 	bzero(wakevirt, PAGESIZE);
563 
564 	/* Copy code to rm_platter */
565 	bcopy((caddr_t)wc_rm_start, wakevirt,
566 	    (size_t)((uintptr_t)wc_rm_end - (uintptr_t)wc_rm_start));
567 
568 	prt_other_cpus();
569 
570 #if defined(__amd64)
571 
572 	PMD(PMD_SX, ("real_mode_platter->rm_cr4=%lx, getcr4()=%lx\n",
573 	    (ulong_t)real_mode_platter->rm_cr4, (ulong_t)getcr4()))
574 
575 	PMD(PMD_SX, ("real_mode_platter->rm_pdbr=%lx, getcr3()=%lx\n",
576 	    (ulong_t)real_mode_platter->rm_pdbr, getcr3()))
577 
578 	real_mode_platter->rm_cr4 = getcr4();
579 	real_mode_platter->rm_pdbr = getcr3();
580 
581 	rmp_gdt_init(real_mode_platter);
582 
583 	/*
584 	 * Since the CPU needs to jump to protected mode using an identity
585 	 * mapped address, we need to calculate it here.
586 	 */
587 	real_mode_platter->rm_longmode64_addr = rm_platter_pa +
588 	    (uint32_t)((uintptr_t)wc_long_mode_64 - (uintptr_t)wc_rm_start);
589 
590 	PMD(PMD_SX, ("real_mode_platter->rm_cr4=%lx, getcr4()=%lx\n",
591 	    (ulong_t)real_mode_platter->rm_cr4, getcr4()))
592 	PMD(PMD_SX, ("real_mode_platter->rm_pdbr=%lx, getcr3()=%lx\n",
593 	    (ulong_t)real_mode_platter->rm_pdbr, getcr3()))
594 
595 	PMD(PMD_SX, ("real_mode_platter->rm_longmode64_addr=%lx\n",
596 	    (ulong_t)real_mode_platter->rm_longmode64_addr))
597 
598 #endif
599 
600 	PT(PT_SC);
601 	if (wc_save_context(cpup)) {
602 
603 		ret = i_cpr_platform_alloc(&(wc_other_cpus->wc_apic_state));
604 		if (ret != 0)
605 			return (ret);
606 
607 		ret = i_cpr_save_apic(&(wc_other_cpus->wc_apic_state));
608 		PMD(PMD_SX, ("%s: i_cpr_save_apic() returned %d\n", str, ret))
609 		if (ret != 0)
610 			return (ret);
611 
612 		PMD(PMD_SX, ("wakephys=%x, kernel_wc_code=%p\n",
613 		    (uint_t)wakephys, (void *)&kernel_wc_code))
614 		PMD(PMD_SX, ("virtaddr=%lx, retaddr=%lx\n",
615 		    (long)cpup->wc_virtaddr, (long)cpup->wc_retaddr))
616 		PMD(PMD_SX, ("ebx=%x, edi=%x, esi=%x, ebp=%x, esp=%x\n",
617 		    cpup->wc_ebx, cpup->wc_edi, cpup->wc_esi, cpup->wc_ebp,
618 		    cpup->wc_esp))
619 		PMD(PMD_SX, ("cr0=%lx, cr3=%lx, cr4=%lx\n",
620 		    (long)cpup->wc_cr0, (long)cpup->wc_cr3,
621 		    (long)cpup->wc_cr4))
622 		PMD(PMD_SX, ("cs=%x, ds=%x, es=%x, ss=%x, fs=%lx, gs=%lx, "
623 		    "flgs=%lx\n", cpup->wc_cs, cpup->wc_ds, cpup->wc_es,
624 		    cpup->wc_ss, (long)cpup->wc_fs, (long)cpup->wc_gs,
625 		    (long)cpup->wc_eflags))
626 
627 		PMD(PMD_SX, ("gdt=%p:%x, idt=%p:%x, ldt=%lx, tr=%lx, "
628 		    "kgbase=%lx\n", (void *)cpup->wc_gdt_base,
629 		    cpup->wc_gdt_limit, (void *)cpup->wc_idt_base,
630 		    cpup->wc_idt_limit, (long)cpup->wc_ldt,
631 		    (long)cpup->wc_tr, (long)cpup->wc_kgsbase))
632 
633 		gdt.base = cpup->wc_gdt_base;
634 		gdt.limit = cpup->wc_gdt_limit;
635 
636 #if defined(__amd64)
637 		code_length = (uint32_t)((uintptr_t)wc_long_mode_64 -
638 		    (uintptr_t)wc_rm_start);
639 #else
640 		code_length = 0;
641 #endif
642 
643 		init_real_mode_platter(0, code_length, cpup->wc_cr4, gdt);
644 
645 #if defined(__amd64)
646 		PMD(PMD_SX, ("real_mode_platter->rm_cr4=%lx, getcr4()=%lx\n",
647 		    (ulong_t)wcpp->rm_cr4, getcr4()))
648 
649 		PMD(PMD_SX, ("real_mode_platter->rm_pdbr=%lx, getcr3()=%lx\n",
650 		    (ulong_t)wcpp->rm_pdbr, getcr3()))
651 
652 		PMD(PMD_SX, ("real_mode_platter->rm_longmode64_addr=%lx\n",
653 		    (ulong_t)wcpp->rm_longmode64_addr))
654 
655 		PMD(PMD_SX,
656 		    ("real_mode_platter->rm_temp_gdt[TEMPGDT_KCODE64]=%lx\n",
657 		    (ulong_t)wcpp->rm_temp_gdt[TEMPGDT_KCODE64]))
658 #endif
659 
660 		PMD(PMD_SX, ("gdt=%p:%x, idt=%p:%x, ldt=%lx, tr=%lx, "
661 		    "kgsbase=%lx\n", (void *)wcpp->rm_gdt_base,
662 		    wcpp->rm_gdt_lim, (void *)wcpp->rm_idt_base,
663 		    wcpp->rm_idt_lim, (long)cpup->wc_ldt, (long)cpup->wc_tr,
664 		    (long)cpup->wc_kgsbase))
665 
666 		power_req.request_type = PMR_PPM_ENTER_SX;
667 		power_req.req.ppm_power_enter_sx_req.sx_state = S3;
668 		power_req.req.ppm_power_enter_sx_req.test_point =
669 		    cpr_test_point;
670 		power_req.req.ppm_power_enter_sx_req.wakephys = wakephys;
671 
672 		PMD(PMD_SX, ("%s: pm_ctlops PMR_PPM_ENTER_SX\n", str))
673 		PT(PT_PPMCTLOP);
674 		(void) pm_ctlops(ppm, ddi_root_node(), DDI_CTLOPS_POWER,
675 		    &power_req, &ret);
676 		PMD(PMD_SX, ("%s: returns %d\n", str, ret))
677 
678 		/*
679 		 * If it works, we get control back to the else branch below
680 		 * If we get control back here, it didn't work.
681 		 * XXX return EINVAL here?
682 		 */
683 
684 		unmap_wakeaddr_1to1(wakephys);
685 		intr_restore(saved_intr);
686 
687 		return (ret);
688 	} else {
689 		cpr_suspend_succeeded = 1;
690 
691 		power_req.request_type = PMR_PPM_EXIT_SX;
692 		power_req.req.ppm_power_enter_sx_req.sx_state = S3;
693 
694 		PMD(PMD_SX, ("%s: pm_ctlops PMR_PPM_EXIT_SX\n", str))
695 		PT(PT_PPMCTLOP);
696 		(void) pm_ctlops(ppm, ddi_root_node(), DDI_CTLOPS_POWER,
697 		    &power_req, &ret);
698 		PMD(PMD_SX, ("%s: returns %d\n", str, ret))
699 
700 		ret = i_cpr_restore_apic(&(wc_other_cpus->wc_apic_state));
701 		/*
702 		 * the restore should never fail, if the saved suceeded
703 		 */
704 		ASSERT(ret == 0);
705 
706 		i_cpr_platform_free(&(wc_other_cpus->wc_apic_state));
707 
708 		/*
709 		 * Enable interrupts on boot cpu.
710 		 */
711 		ASSERT(CPU->cpu_id == i_cpr_bootcpuid());
712 		mutex_enter(&cpu_lock);
713 		cpu_enable_intr(CPU);
714 		mutex_exit(&cpu_lock);
715 
716 		PT(PT_INTRRESTORE);
717 		intr_restore(saved_intr);
718 		PT(PT_CPU);
719 
720 		return (ret);
721 	}
722 }
723 
724 /*
725  * Stop all other cpu's before halting or rebooting. We pause the cpu's
726  * instead of sending a cross call.
727  * Stolen from sun4/os/mp_states.c
728  */
729 
730 static int cpu_are_paused;	/* sic */
731 
732 void
733 i_cpr_stop_other_cpus(void)
734 {
735 	mutex_enter(&cpu_lock);
736 	if (cpu_are_paused) {
737 		mutex_exit(&cpu_lock);
738 		return;
739 	}
740 	pause_cpus(NULL, NULL);
741 	cpu_are_paused = 1;
742 
743 	mutex_exit(&cpu_lock);
744 }
745 
746 int
747 i_cpr_is_supported(int sleeptype)
748 {
749 	extern int cpr_supported_override;
750 	extern int cpr_platform_enable;
751 	extern int pm_S3_enabled;
752 
753 	if (sleeptype != CPR_TORAM)
754 		return (0);
755 
756 	/*
757 	 * The next statement tests if a specific platform has turned off
758 	 * cpr support.
759 	 */
760 	if (cpr_supported_override)
761 		return (0);
762 
763 	/*
764 	 * If a platform has specifically turned on cpr support ...
765 	 */
766 	if (cpr_platform_enable)
767 		return (1);
768 
769 	return (pm_S3_enabled);
770 }
771 
772 void
773 i_cpr_bitmap_cleanup(void)
774 {
775 }
776 
777 void
778 i_cpr_free_memory_resources(void)
779 {
780 }
781 
782 /*
783  * Needed only for S3 so far
784  */
785 static int
786 i_cpr_platform_alloc(psm_state_request_t *req)
787 {
788 #ifdef DEBUG
789 	char	*str = "i_cpr_platform_alloc";
790 #endif
791 
792 	PMD(PMD_SX, ("cpu = %d, %s(%p) \n", CPU->cpu_id, str, (void *)req))
793 
794 	if (psm_state == NULL) {
795 		PMD(PMD_SX, ("%s() : psm_state == NULL\n", str))
796 		return (0);
797 	}
798 
799 	req->psr_cmd = PSM_STATE_ALLOC;
800 	return ((*psm_state)(req));
801 }
802 
803 /*
804  * Needed only for S3 so far
805  */
806 static void
807 i_cpr_platform_free(psm_state_request_t *req)
808 {
809 #ifdef DEBUG
810 	char	*str = "i_cpr_platform_free";
811 #endif
812 
813 	PMD(PMD_SX, ("cpu = %d, %s(%p) \n", CPU->cpu_id, str, (void *)req))
814 
815 	if (psm_state == NULL) {
816 		PMD(PMD_SX, ("%s() : psm_state == NULL\n", str))
817 		return;
818 	}
819 
820 	req->psr_cmd = PSM_STATE_FREE;
821 	(void) (*psm_state)(req);
822 }
823 
824 static int
825 i_cpr_save_apic(psm_state_request_t *req)
826 {
827 #ifdef DEBUG
828 	char	*str = "i_cpr_save_apic";
829 #endif
830 
831 	if (psm_state == NULL) {
832 		PMD(PMD_SX, ("%s() : psm_state == NULL\n", str))
833 		return (0);
834 	}
835 
836 	req->psr_cmd = PSM_STATE_SAVE;
837 	return ((*psm_state)(req));
838 }
839 
840 static int
841 i_cpr_restore_apic(psm_state_request_t *req)
842 {
843 #ifdef DEBUG
844 	char	*str = "i_cpr_restore_apic";
845 #endif
846 
847 	if (psm_state == NULL) {
848 		PMD(PMD_SX, ("%s() : psm_state == NULL\n", str))
849 		return (0);
850 	}
851 
852 	req->psr_cmd = PSM_STATE_RESTORE;
853 	return ((*psm_state)(req));
854 }
855 
856 
857 /* stop lint complaining about offset not being used in 32bit mode */
858 #if !defined(__amd64)
859 /*ARGSUSED*/
860 #endif
861 static void
862 init_real_mode_platter(int cpun, uint32_t offset, uint_t cr4, wc_desctbr_t gdt)
863 {
864 	/*LINTED*/
865 	rm_platter_t *real_mode_platter = (rm_platter_t *)rm_platter_va;
866 
867 	/*
868 	 * Fill up the real mode platter to make it easy for real mode code to
869 	 * kick it off. This area should really be one passed by boot to kernel
870 	 * and guaranteed to be below 1MB and aligned to 16 bytes. Should also
871 	 * have identical physical and virtual address in paged mode.
872 	 */
873 
874 	real_mode_platter->rm_pdbr = getcr3();
875 	real_mode_platter->rm_cpu = cpun;
876 	real_mode_platter->rm_cr4 = cr4;
877 
878 	real_mode_platter->rm_gdt_base = gdt.base;
879 	real_mode_platter->rm_gdt_lim = gdt.limit;
880 
881 #if defined(__amd64)
882 	if (getcr3() > 0xffffffffUL)
883 		panic("Cannot initialize CPUs; kernel's 64-bit page tables\n"
884 		    "located above 4G in physical memory (@ 0x%llx).",
885 		    (unsigned long long)getcr3());
886 
887 	/*
888 	 * Setup pseudo-descriptors for temporary GDT and IDT for use ONLY
889 	 * by code in real_mode_start():
890 	 *
891 	 * GDT[0]:  NULL selector
892 	 * GDT[1]:  64-bit CS: Long = 1, Present = 1, bits 12, 11 = 1
893 	 *
894 	 * Clear the IDT as interrupts will be off and a limit of 0 will cause
895 	 * the CPU to triple fault and reset on an NMI, seemingly as reasonable
896 	 * a course of action as any other, though it may cause the entire
897 	 * platform to reset in some cases...
898 	 */
899 	real_mode_platter->rm_temp_gdt[0] = 0ULL;
900 	real_mode_platter->rm_temp_gdt[TEMPGDT_KCODE64] = 0x20980000000000ULL;
901 
902 	real_mode_platter->rm_temp_gdt_lim = (ushort_t)
903 	    (sizeof (real_mode_platter->rm_temp_gdt) - 1);
904 	real_mode_platter->rm_temp_gdt_base = rm_platter_pa +
905 	    offsetof(rm_platter_t, rm_temp_gdt);
906 
907 	real_mode_platter->rm_temp_idt_lim = 0;
908 	real_mode_platter->rm_temp_idt_base = 0;
909 
910 	/*
911 	 * Since the CPU needs to jump to protected mode using an identity
912 	 * mapped address, we need to calculate it here.
913 	 */
914 	real_mode_platter->rm_longmode64_addr = rm_platter_pa + offset;
915 #endif	/* __amd64 */
916 
917 	/* return; */
918 }
919 
920 void
921 i_cpr_start_cpu(void)
922 {
923 
924 	struct cpu *cp = CPU;
925 
926 	char *str = "i_cpr_start_cpu";
927 	extern void init_cpu_syscall(struct cpu *cp);
928 
929 	PMD(PMD_SX, ("%s() called\n", str))
930 
931 	PMD(PMD_SX, ("%s() #0 cp->cpu_base_spl %d\n", str,
932 	    cp->cpu_base_spl))
933 
934 	mutex_enter(&cpu_lock);
935 	if (cp == i_cpr_bootcpu()) {
936 		mutex_exit(&cpu_lock);
937 		PMD(PMD_SX,
938 		    ("%s() called on bootcpu nothing to do!\n", str))
939 		return;
940 	}
941 	mutex_exit(&cpu_lock);
942 
943 	/*
944 	 * We need to Sync PAT with cpu0's PAT. We have to do
945 	 * this with interrupts disabled.
946 	 */
947 	pat_sync();
948 
949 	/*
950 	 * If we use XSAVE, we need to restore XFEATURE_ENABLE_MASK register.
951 	 */
952 	if (fp_save_mech == FP_XSAVE) {
953 		setup_xfem();
954 	}
955 
956 	/*
957 	 * Initialize this CPU's syscall handlers
958 	 */
959 	init_cpu_syscall(cp);
960 
961 	PMD(PMD_SX, ("%s() #1 cp->cpu_base_spl %d\n", str, cp->cpu_base_spl))
962 
963 	/*
964 	 * Do not need to call cpuid_pass2(), cpuid_pass3(), cpuid_pass4() or
965 	 * init_cpu_info(), since the work that they do is only needed to
966 	 * be done once at boot time
967 	 */
968 
969 
970 	mutex_enter(&cpu_lock);
971 	CPUSET_ADD(procset, cp->cpu_id);
972 	mutex_exit(&cpu_lock);
973 
974 	PMD(PMD_SX, ("%s() #2 cp->cpu_base_spl %d\n", str,
975 	    cp->cpu_base_spl))
976 
977 	if (tsc_gethrtime_enable) {
978 		PMD(PMD_SX, ("%s() calling tsc_sync_slave\n", str))
979 		tsc_sync_slave();
980 	}
981 
982 	PMD(PMD_SX, ("%s() cp->cpu_id %d, cp->cpu_intr_actv %d\n", str,
983 	    cp->cpu_id, cp->cpu_intr_actv))
984 	PMD(PMD_SX, ("%s() #3 cp->cpu_base_spl %d\n", str,
985 	    cp->cpu_base_spl))
986 
987 	(void) spl0();		/* enable interrupts */
988 
989 	PMD(PMD_SX, ("%s() #4 cp->cpu_base_spl %d\n", str,
990 	    cp->cpu_base_spl))
991 
992 	/*
993 	 * Set up the CPU module for this CPU.  This can't be done before
994 	 * this CPU is made CPU_READY, because we may (in heterogeneous systems)
995 	 * need to go load another CPU module.  The act of attempting to load
996 	 * a module may trigger a cross-call, which will ASSERT unless this
997 	 * cpu is CPU_READY.
998 	 */
999 
1000 	/*
1001 	 * cmi already been init'd (during boot), so do not need to do it again
1002 	 */
1003 #ifdef PM_REINITMCAONRESUME
1004 	if (is_x86_feature(x86_featureset, X86FSET_MCA))
1005 		cmi_mca_init();
1006 #endif
1007 
1008 	PMD(PMD_SX, ("%s() returning\n", str))
1009 
1010 	/* return; */
1011 }
1012 
1013 void
1014 i_cpr_alloc_cpus(void)
1015 {
1016 	char *str = "i_cpr_alloc_cpus";
1017 
1018 	PMD(PMD_SX, ("%s() CPU->cpu_id %d\n", str, CPU->cpu_id))
1019 	/*
1020 	 * we allocate this only when we actually need it to save on
1021 	 * kernel memory
1022 	 */
1023 
1024 	if (wc_other_cpus == NULL) {
1025 		wc_other_cpus = kmem_zalloc(max_ncpus * sizeof (wc_cpu_t),
1026 		    KM_SLEEP);
1027 	}
1028 
1029 }
1030 
1031 void
1032 i_cpr_free_cpus(void)
1033 {
1034 	int index;
1035 	wc_cpu_t *wc_cpu;
1036 
1037 	if (wc_other_cpus != NULL) {
1038 		for (index = 0; index < max_ncpus; index++) {
1039 			wc_cpu = wc_other_cpus + index;
1040 			if (wc_cpu->wc_saved_stack != NULL) {
1041 				kmem_free(wc_cpu->wc_saved_stack,
1042 				    wc_cpu->wc_saved_stack_size);
1043 			}
1044 		}
1045 
1046 		kmem_free((void *) wc_other_cpus,
1047 		    max_ncpus * sizeof (wc_cpu_t));
1048 		wc_other_cpus = NULL;
1049 	}
1050 }
1051 
1052 /*
1053  * wrapper for acpica_ddi_save_resources()
1054  */
1055 void
1056 i_cpr_save_configuration(dev_info_t *dip)
1057 {
1058 	acpica_ddi_save_resources(dip);
1059 }
1060 
1061 /*
1062  * wrapper for acpica_ddi_restore_resources()
1063  */
1064 void
1065 i_cpr_restore_configuration(dev_info_t *dip)
1066 {
1067 	acpica_ddi_restore_resources(dip);
1068 }
1069 
1070 static int
1071 wait_for_set(cpuset_t *set, int who)
1072 {
1073 	int delays;
1074 	char *str = "wait_for_set";
1075 
1076 	for (delays = 0; !CPU_IN_SET(*set, who); delays++) {
1077 		if (delays == 500) {
1078 			/*
1079 			 * After five seconds, things are probably
1080 			 * looking a bit bleak - explain the hang.
1081 			 */
1082 			cmn_err(CE_NOTE, "cpu%d: started, "
1083 			    "but not running in the kernel yet", who);
1084 			PMD(PMD_SX, ("%s() %d cpu started "
1085 			    "but not running in the kernel yet\n",
1086 			    str, who))
1087 		} else if (delays > 2000) {
1088 			/*
1089 			 * We waited at least 20 seconds, bail ..
1090 			 */
1091 			cmn_err(CE_WARN, "cpu%d: timed out", who);
1092 			PMD(PMD_SX, ("%s() %d cpu timed out\n",
1093 			    str, who))
1094 			return (0);
1095 		}
1096 
1097 		/*
1098 		 * wait at least 10ms, then check again..
1099 		 */
1100 		drv_usecwait(10000);
1101 	}
1102 
1103 	return (1);
1104 }
1105 
1106 static	void
1107 i_cpr_save_stack(kthread_t *t, wc_cpu_t *wc_cpu)
1108 {
1109 	size_t	stack_size;	/* size of stack */
1110 	caddr_t	start = CPR_GET_STACK_START(t);	/* stack start */
1111 	caddr_t	end = CPR_GET_STACK_END(t);	/* stack end  */
1112 
1113 	stack_size = (size_t)end - (size_t)start;
1114 
1115 	if (wc_cpu->wc_saved_stack_size < stack_size) {
1116 		if (wc_cpu->wc_saved_stack != NULL) {
1117 			kmem_free(wc_cpu->wc_saved_stack,
1118 			    wc_cpu->wc_saved_stack_size);
1119 		}
1120 		wc_cpu->wc_saved_stack = kmem_zalloc(stack_size, KM_SLEEP);
1121 		wc_cpu->wc_saved_stack_size = stack_size;
1122 	}
1123 
1124 	bcopy(start, wc_cpu->wc_saved_stack, stack_size);
1125 }
1126 
1127 void
1128 i_cpr_restore_stack(kthread_t *t, greg_t *save_stack)
1129 {
1130 	size_t	stack_size;	/* size of stack */
1131 	caddr_t	start = CPR_GET_STACK_START(t);	/* stack start */
1132 	caddr_t	end = CPR_GET_STACK_END(t);	/* stack end  */
1133 
1134 	stack_size = (size_t)end - (size_t)start;
1135 
1136 	bcopy(save_stack, start, stack_size);
1137 }
1138