xref: /illumos-gate/usr/src/uts/sun4u/os/cpr_impl.c (revision 0ed5c46e82c989cfa9726d9dae452e3d24ef83be)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Platform specific implementation code
28  */
29 
30 #define	SUNDDI_IMPL
31 
32 #include <sys/types.h>
33 #include <sys/promif.h>
34 #include <sys/prom_isa.h>
35 #include <sys/prom_plat.h>
36 #include <sys/mmu.h>
37 #include <vm/hat_sfmmu.h>
38 #include <sys/iommu.h>
39 #include <sys/scb.h>
40 #include <sys/cpuvar.h>
41 #include <sys/intreg.h>
42 #include <sys/pte.h>
43 #include <vm/hat.h>
44 #include <vm/page.h>
45 #include <vm/as.h>
46 #include <sys/cpr.h>
47 #include <sys/kmem.h>
48 #include <sys/clock.h>
49 #include <sys/kmem.h>
50 #include <sys/panic.h>
51 #include <vm/seg_kmem.h>
52 #include <sys/cpu_module.h>
53 #include <sys/callb.h>
54 #include <sys/machsystm.h>
55 #include <sys/vmsystm.h>
56 #include <sys/systm.h>
57 #include <sys/archsystm.h>
58 #include <sys/stack.h>
59 #include <sys/fs/ufs_fs.h>
60 #include <sys/memlist.h>
61 #include <sys/bootconf.h>
62 #include <sys/thread.h>
63 #include <vm/vm_dep.h>
64 
65 extern	void cpr_clear_bitmaps(void);
66 extern	int cpr_setbit(pfn_t ppn, int mapflag);
67 extern	int cpr_clrbit(pfn_t ppn, int mapflag);
68 extern	pgcnt_t cpr_scan_kvseg(int mapflag, bitfunc_t bitfunc, struct seg *seg);
69 extern	pgcnt_t cpr_count_seg_pages(int mapflag, bitfunc_t bitfunc);
70 extern	void dtlb_wr_entry(uint_t, tte_t *, uint64_t *);
71 extern	void itlb_wr_entry(uint_t, tte_t *, uint64_t *);
72 
73 static	int i_cpr_storage_desc_alloc(csd_t **, pgcnt_t *, csd_t **, int);
74 static	void i_cpr_storage_desc_init(csd_t *, pgcnt_t, csd_t *);
75 static	caddr_t i_cpr_storage_data_alloc(pgcnt_t, pgcnt_t *, int);
76 static	int cpr_dump_sensitive(vnode_t *, csd_t *);
77 static	void i_cpr_clear_entries(uint64_t, uint64_t);
78 static	void i_cpr_xcall(xcfunc_t);
79 
80 void	i_cpr_storage_free(void);
81 
82 extern void *i_cpr_data_page;
83 extern int cpr_test_mode;
84 extern int cpr_nbitmaps;
85 extern char cpr_default_path[];
86 extern caddr_t textva, datava;
87 
88 static struct cpr_map_info cpr_prom_retain[CPR_PROM_RETAIN_CNT];
89 caddr_t cpr_vaddr = NULL;
90 
91 static	uint_t sensitive_pages_saved;
92 static	uint_t sensitive_size_saved;
93 
94 caddr_t	i_cpr_storage_data_base;
95 caddr_t	i_cpr_storage_data_end;
96 csd_t *i_cpr_storage_desc_base;
97 csd_t *i_cpr_storage_desc_end;		/* one byte beyond last used descp */
98 csd_t *i_cpr_storage_desc_last_used;	/* last used descriptor */
99 caddr_t sensitive_write_ptr;		/* position for next storage write */
100 
101 size_t	i_cpr_sensitive_bytes_dumped;
102 pgcnt_t	i_cpr_sensitive_pgs_dumped;
103 pgcnt_t	i_cpr_storage_data_sz;		/* in pages */
104 pgcnt_t	i_cpr_storage_desc_pgcnt;	/* in pages */
105 
106 ushort_t cpr_mach_type = CPR_MACHTYPE_4U;
107 static	csu_md_t m_info;
108 
109 
110 #define	MAX_STORAGE_RETRY	3
111 #define	MAX_STORAGE_ALLOC_RETRY	3
112 #define	INITIAL_ALLOC_PCNT	40	/* starting allocation percentage */
113 #define	INTEGRAL		100	/* to get 1% precision */
114 
115 #define	EXTRA_RATE		2	/* add EXTRA_RATE% extra space */
116 #define	EXTRA_DESCS		10
117 
118 #define	CPR_NO_STORAGE_DESC	1
119 #define	CPR_NO_STORAGE_DATA	2
120 
121 #define	CIF_SPLICE		0
122 #define	CIF_UNLINK		1
123 
124 
125 /*
126  * CPR miscellaneous support routines
127  */
128 #define	cpr_open(path, mode,  vpp)	(vn_open(path, UIO_SYSSPACE, \
129 		mode, 0600, vpp, CRCREAT, 0))
130 #define	cpr_rdwr(rw, vp, basep, cnt)	(vn_rdwr(rw, vp,  (caddr_t)(basep), \
131 		cnt, 0LL, UIO_SYSSPACE, 0, (rlim64_t)MAXOFF_T, CRED(), \
132 		(ssize_t *)NULL))
133 
134 /*
135  * definitions for saving/restoring prom pages
136  */
137 static void	*ppage_buf;
138 static pgcnt_t	ppage_count;
139 static pfn_t	*pphys_list;
140 static size_t	pphys_list_size;
141 
142 typedef void (*tlb_rw_t)(uint_t, tte_t *, uint64_t *);
143 typedef void (*tlb_filter_t)(int, tte_t *, uint64_t, void *);
144 
145 /*
146  * private struct for tlb handling
147  */
148 struct cpr_trans_info {
149 	sutlb_t		*dst;
150 	sutlb_t		*tail;
151 	tlb_rw_t	reader;
152 	tlb_rw_t	writer;
153 	tlb_filter_t	filter;
154 	int		index;
155 	uint64_t	skip;		/* assumes TLB <= 64 locked entries */
156 };
157 typedef struct cpr_trans_info cti_t;
158 
159 
160 /*
161  * special handling for tlb info
162  */
163 #define	WITHIN_OFW(va) \
164 	(((va) > (uint64_t)OFW_START_ADDR) && ((va) < (uint64_t)OFW_END_ADDR))
165 
166 #define	WITHIN_NUCLEUS(va, base) \
167 	(((va) >= (base)) && \
168 	(((va) + MMU_PAGESIZE) <= ((base) + MMU_PAGESIZE4M)))
169 
170 #define	IS_BIGKTSB(va) \
171 	(enable_bigktsb && \
172 	((va) >= (uint64_t)ktsb_base) && \
173 	((va) < (uint64_t)(ktsb_base + ktsb_sz)))
174 
175 
176 /*
177  * WARNING:
178  * the text from this file is linked to follow cpr_resume_setup.o;
179  * only add text between here and i_cpr_end_jumpback when it needs
180  * to be called during resume before we switch back to the kernel
181  * trap table.  all the text in this range must fit within a page.
182  */
183 
184 
185 /*
186  * each time a machine is reset, the prom uses an inconsistent set of phys
187  * pages and the cif cookie may differ as well.  so prior to restoring the
188  * original prom, we have to use to use the new/tmp prom's translations
189  * when requesting prom services.
190  *
191  * cif_handler starts out as the original prom cookie, and that gets used
192  * by client_handler() to jump into the prom.  here we splice-in a wrapper
193  * routine by writing cif_handler; client_handler() will now jump to the
194  * wrapper which switches the %tba to the new/tmp prom's trap table then
195  * jumps to the new cookie.
196  */
197 void
i_cpr_cif_setup(int action)198 i_cpr_cif_setup(int action)
199 {
200 	extern void *i_cpr_orig_cif, *cif_handler;
201 	extern int i_cpr_cif_wrapper(void *);
202 
203 	/*
204 	 * save the original cookie and change the current cookie to the
205 	 * wrapper routine.  later we just restore the original cookie.
206 	 */
207 	if (action == CIF_SPLICE) {
208 		i_cpr_orig_cif = cif_handler;
209 		cif_handler = (void *)i_cpr_cif_wrapper;
210 	} else if (action == CIF_UNLINK)
211 		cif_handler = i_cpr_orig_cif;
212 }
213 
214 
215 /*
216  * launch slave cpus into kernel text, pause them,
217  * and restore the original prom pages
218  */
219 void
i_cpr_mp_setup(void)220 i_cpr_mp_setup(void)
221 {
222 	extern void restart_other_cpu(int);
223 	cpu_t *cp;
224 
225 	uint64_t kctx = kcontextreg;
226 
227 	/*
228 	 * Do not allow setting page size codes in MMU primary context
229 	 * register while using cif wrapper. This is needed to work
230 	 * around OBP incorrect handling of this MMU register.
231 	 */
232 	kcontextreg = 0;
233 
234 	/*
235 	 * reset cpu_ready_set so x_calls work properly
236 	 */
237 	CPUSET_ZERO(cpu_ready_set);
238 	CPUSET_ADD(cpu_ready_set, getprocessorid());
239 
240 	/*
241 	 * setup cif to use the cookie from the new/tmp prom
242 	 * and setup tmp handling for calling prom services.
243 	 */
244 	i_cpr_cif_setup(CIF_SPLICE);
245 
246 	/*
247 	 * at this point, only the nucleus and a few cpr pages are
248 	 * mapped in.  once we switch to the kernel trap table,
249 	 * we can access the rest of kernel space.
250 	 */
251 	prom_set_traptable(&trap_table);
252 
253 	if (ncpus > 1) {
254 		sfmmu_init_tsbs();
255 
256 		mutex_enter(&cpu_lock);
257 		/*
258 		 * All of the slave cpus are not ready at this time,
259 		 * yet the cpu structures have various cpu_flags set;
260 		 * clear cpu_flags and mutex_ready.
261 		 * Since we are coming up from a CPU suspend, the slave cpus
262 		 * are frozen.
263 		 */
264 		for (cp = CPU->cpu_next; cp != CPU; cp = cp->cpu_next) {
265 			cp->cpu_flags = CPU_FROZEN;
266 			cp->cpu_m.mutex_ready = 0;
267 		}
268 
269 		for (cp = CPU->cpu_next; cp != CPU; cp = cp->cpu_next)
270 			restart_other_cpu(cp->cpu_id);
271 
272 		pause_cpus(NULL, NULL);
273 		mutex_exit(&cpu_lock);
274 
275 		i_cpr_xcall(i_cpr_clear_entries);
276 	} else
277 		i_cpr_clear_entries(0, 0);
278 
279 	/*
280 	 * now unlink the cif wrapper;  WARNING: do not call any
281 	 * prom_xxx() routines until after prom pages are restored.
282 	 */
283 	i_cpr_cif_setup(CIF_UNLINK);
284 
285 	(void) i_cpr_prom_pages(CPR_PROM_RESTORE);
286 
287 	/* allow setting page size codes in MMU primary context register */
288 	kcontextreg = kctx;
289 }
290 
291 
292 /*
293  * end marker for jumpback page;
294  * this symbol is used to check the size of i_cpr_resume_setup()
295  * and the above text.  For simplicity, the Makefile needs to
296  * link i_cpr_resume_setup.o and cpr_impl.o consecutively.
297  */
298 void
i_cpr_end_jumpback(void)299 i_cpr_end_jumpback(void)
300 {
301 }
302 
303 
304 /*
305  * scan tlb entries with reader; when valid entries are found,
306  * the filter routine will selectively save/clear them
307  */
308 static void
i_cpr_scan_tlb(cti_t * ctip)309 i_cpr_scan_tlb(cti_t *ctip)
310 {
311 	uint64_t va_tag;
312 	int tlb_index;
313 	tte_t tte;
314 
315 	for (tlb_index = ctip->index; tlb_index >= 0; tlb_index--) {
316 		(*ctip->reader)((uint_t)tlb_index, &tte, &va_tag);
317 		if (va_tag && TTE_IS_VALID(&tte))
318 			(*ctip->filter)(tlb_index, &tte, va_tag, ctip);
319 	}
320 }
321 
322 
323 /*
324  * filter for locked tlb entries that reference the text/data nucleus
325  * and any bigktsb's; these will be reinstalled by cprboot on all cpus
326  */
327 /* ARGSUSED */
328 static void
i_cpr_lnb(int index,tte_t * ttep,uint64_t va_tag,void * ctrans)329 i_cpr_lnb(int index, tte_t *ttep, uint64_t va_tag, void *ctrans)
330 {
331 	cti_t *ctip;
332 
333 	/*
334 	 * record tlb data at ctip->dst; the target tlb index starts
335 	 * at the highest tlb offset and moves towards 0.  the prom
336 	 * reserves both dtlb and itlb index 0.  any selected entry
337 	 * also gets marked to prevent being flushed during resume
338 	 */
339 	if (TTE_IS_LOCKED(ttep) && (va_tag == (uint64_t)textva ||
340 	    va_tag == (uint64_t)datava || IS_BIGKTSB(va_tag))) {
341 		ctip = ctrans;
342 		while ((1 << ctip->index) & ctip->skip)
343 			ctip->index--;
344 		ASSERT(ctip->index > 0);
345 		ASSERT(ctip->dst < ctip->tail);
346 		ctip->dst->tte.ll = ttep->ll;
347 		ctip->dst->va_tag = va_tag;
348 		ctip->dst->index = ctip->index--;
349 		ctip->dst->tmp = 0;
350 		ctip->dst++;
351 	}
352 }
353 
354 
355 /*
356  * some tlb entries are stale, filter for unlocked entries
357  * within the prom virt range and clear them
358  */
359 static void
i_cpr_ufw(int index,tte_t * ttep,uint64_t va_tag,void * ctrans)360 i_cpr_ufw(int index, tte_t *ttep, uint64_t va_tag, void *ctrans)
361 {
362 	sutlb_t clr;
363 	cti_t *ctip;
364 
365 	if (!TTE_IS_LOCKED(ttep) && WITHIN_OFW(va_tag)) {
366 		ctip = ctrans;
367 		bzero(&clr, sizeof (clr));
368 		(*ctip->writer)((uint_t)index, &clr.tte, &clr.va_tag);
369 	}
370 }
371 
372 
373 /*
374  * some of the entries installed by cprboot are needed only on a
375  * short-term basis and need to be flushed to avoid clogging the tlbs.
376  * scan the dtte/itte arrays for items marked as temporary and clear
377  * dtlb/itlb entries using wrfunc.
378  */
379 static void
i_cpr_clear_tmp(sutlb_t * listp,int max,tlb_rw_t wrfunc)380 i_cpr_clear_tmp(sutlb_t *listp, int max, tlb_rw_t wrfunc)
381 {
382 	sutlb_t clr, *tail;
383 
384 	bzero(&clr, sizeof (clr));
385 	for (tail = listp + max; listp < tail && listp->va_tag; listp++) {
386 		if (listp->tmp)
387 			(*wrfunc)((uint_t)listp->index, &clr.tte, &clr.va_tag);
388 	}
389 }
390 
391 
392 /* ARGSUSED */
393 static void
i_cpr_clear_entries(uint64_t arg1,uint64_t arg2)394 i_cpr_clear_entries(uint64_t arg1, uint64_t arg2)
395 {
396 	extern void demap_all(void);
397 	cti_t cti;
398 
399 	i_cpr_clear_tmp(m_info.dtte, CPR_MAX_TLB, dtlb_wr_entry);
400 	i_cpr_clear_tmp(m_info.itte, CPR_MAX_TLB, itlb_wr_entry);
401 
402 	/*
403 	 * for newer cpus that implement DEMAP_ALL_TYPE, demap_all is
404 	 * a second label for vtag_flushall.  the call is made using
405 	 * vtag_flushall() instead of demap_all() due to runtime and
406 	 * krtld results with both older and newer cpu modules.
407 	 */
408 	if (&demap_all != 0) {
409 		vtag_flushall();
410 		return;
411 	}
412 
413 	/*
414 	 * for older V9 cpus, scan tlbs and clear stale entries
415 	 */
416 	bzero(&cti, sizeof (cti));
417 	cti.filter = i_cpr_ufw;
418 
419 	cti.index = cpunodes[CPU->cpu_id].dtlb_size - 1;
420 	cti.reader = dtlb_rd_entry;
421 	cti.writer = dtlb_wr_entry;
422 	i_cpr_scan_tlb(&cti);
423 
424 	cti.index = cpunodes[CPU->cpu_id].itlb_size - 1;
425 	cti.reader = itlb_rd_entry;
426 	cti.writer = itlb_wr_entry;
427 	i_cpr_scan_tlb(&cti);
428 }
429 
430 
431 /*
432  * craft tlb info for tmp use during resume; this data gets used by
433  * cprboot to install tlb entries.  we also mark each struct as tmp
434  * so those tlb entries will get flushed after switching to the kernel
435  * trap table.  no data needs to be recorded for vaddr when it falls
436  * within the nucleus since we've already recorded nucleus ttes and
437  * a 8K tte would conflict with a 4MB tte.  eg: the cpr module
438  * text/data may have been loaded into the text/data nucleus.
439  */
440 static void
i_cpr_make_tte(cti_t * ctip,void * vaddr,caddr_t nbase)441 i_cpr_make_tte(cti_t *ctip, void *vaddr, caddr_t nbase)
442 {
443 	pfn_t ppn;
444 	uint_t rw;
445 
446 	if (WITHIN_NUCLEUS((caddr_t)vaddr, nbase))
447 		return;
448 
449 	while ((1 << ctip->index) & ctip->skip)
450 		ctip->index--;
451 	ASSERT(ctip->index > 0);
452 	ASSERT(ctip->dst < ctip->tail);
453 
454 	/*
455 	 * without any global service available to lookup
456 	 * a tte by vaddr, we craft our own here:
457 	 */
458 	ppn = va_to_pfn(vaddr);
459 	rw = (nbase == datava) ? TTE_HWWR_INT : 0;
460 	ctip->dst->tte.tte_inthi = TTE_VALID_INT | TTE_PFN_INTHI(ppn);
461 	ctip->dst->tte.tte_intlo = TTE_PFN_INTLO(ppn) | TTE_LCK_INT |
462 	    TTE_CP_INT | TTE_PRIV_INT | rw;
463 	ctip->dst->va_tag = ((uintptr_t)vaddr & MMU_PAGEMASK);
464 	ctip->dst->index = ctip->index--;
465 	ctip->dst->tmp = 1;
466 	ctip->dst++;
467 }
468 
469 
470 static void
i_cpr_xcall(xcfunc_t func)471 i_cpr_xcall(xcfunc_t func)
472 {
473 	uint_t pil, reset_pil;
474 
475 	pil = getpil();
476 	if (pil < XCALL_PIL)
477 		reset_pil = 0;
478 	else {
479 		reset_pil = 1;
480 		setpil(XCALL_PIL - 1);
481 	}
482 	xc_some(cpu_ready_set, func, 0, 0);
483 	if (reset_pil)
484 		setpil(pil);
485 }
486 
487 
488 /*
489  * restart paused slave cpus
490  */
491 void
i_cpr_machdep_setup(void)492 i_cpr_machdep_setup(void)
493 {
494 	if (ncpus > 1) {
495 		CPR_DEBUG(CPR_DEBUG1, "MP restarted...\n");
496 		mutex_enter(&cpu_lock);
497 		start_cpus();
498 		mutex_exit(&cpu_lock);
499 	}
500 }
501 
502 
503 /*
504  * Stop all interrupt activities in the system
505  */
506 void
i_cpr_stop_intr(void)507 i_cpr_stop_intr(void)
508 {
509 	(void) spl7();
510 }
511 
512 /*
513  * Set machine up to take interrupts
514  */
515 void
i_cpr_enable_intr(void)516 i_cpr_enable_intr(void)
517 {
518 	(void) spl0();
519 }
520 
521 
522 /*
523  * record cpu nodes and ids
524  */
525 static void
i_cpr_save_cpu_info(void)526 i_cpr_save_cpu_info(void)
527 {
528 	struct sun4u_cpu_info *scip;
529 	cpu_t *cp;
530 
531 	scip = m_info.sci;
532 	cp = CPU;
533 	do {
534 		ASSERT(scip < &m_info.sci[NCPU]);
535 		scip->cpu_id = cp->cpu_id;
536 		scip->node = cpunodes[cp->cpu_id].nodeid;
537 		scip++;
538 	} while ((cp = cp->cpu_next) != CPU);
539 }
540 
541 
542 /*
543  * Write necessary machine dependent information to cpr state file,
544  * eg. sun4u mmu ctx secondary for the current running process (cpr) ...
545  */
546 int
i_cpr_write_machdep(vnode_t * vp)547 i_cpr_write_machdep(vnode_t *vp)
548 {
549 	extern uint_t getpstate(), getwstate();
550 	extern uint_t i_cpr_tstack_size;
551 	const char ustr[] = ": unix-tte 2drop false ;";
552 	uintptr_t tinfo;
553 	label_t *ltp;
554 	cmd_t cmach;
555 	char *fmt;
556 	int rc;
557 
558 	/*
559 	 * ustr[] is used as temporary forth words during
560 	 * slave startup sequence, see sfmmu_mp_startup()
561 	 */
562 
563 	cmach.md_magic = (uint_t)CPR_MACHDEP_MAGIC;
564 	cmach.md_size = sizeof (m_info) + sizeof (ustr);
565 
566 	if (rc = cpr_write(vp, (caddr_t)&cmach, sizeof (cmach))) {
567 		cpr_err(CE_WARN, "Failed to write descriptor.");
568 		return (rc);
569 	}
570 
571 	/*
572 	 * m_info is now cleared in i_cpr_dump_setup()
573 	 */
574 	m_info.ksb = (uint32_t)STACK_BIAS;
575 	m_info.kpstate = (uint16_t)getpstate();
576 	m_info.kwstate = (uint16_t)getwstate();
577 	CPR_DEBUG(CPR_DEBUG1, "stack bias 0x%x, pstate 0x%x, wstate 0x%x\n",
578 	    m_info.ksb, m_info.kpstate, m_info.kwstate);
579 
580 	ltp = &ttolwp(curthread)->lwp_qsav;
581 	m_info.qsav_pc = (cpr_ext)ltp->val[0];
582 	m_info.qsav_sp = (cpr_ext)ltp->val[1];
583 
584 	/*
585 	 * Set secondary context to INVALID_CONTEXT to force the HAT
586 	 * to re-setup the MMU registers and locked TTEs it needs for
587 	 * TLB miss handling.
588 	 */
589 	m_info.mmu_ctx_sec = INVALID_CONTEXT;
590 	m_info.mmu_ctx_pri = KCONTEXT;
591 
592 	tinfo = (uintptr_t)curthread;
593 	m_info.thrp = (cpr_ptr)tinfo;
594 
595 	tinfo = (uintptr_t)i_cpr_resume_setup;
596 	m_info.func = (cpr_ptr)tinfo;
597 
598 	/*
599 	 * i_cpr_data_page is comprised of a 4K stack area and a few
600 	 * trailing data symbols; the page is shared by the prom and
601 	 * kernel during resume.  the stack size is recorded here
602 	 * and used by cprboot to set %sp
603 	 */
604 	tinfo = (uintptr_t)&i_cpr_data_page;
605 	m_info.tmp_stack = (cpr_ptr)tinfo;
606 	m_info.tmp_stacksize = i_cpr_tstack_size;
607 
608 	m_info.test_mode = cpr_test_mode;
609 
610 	i_cpr_save_cpu_info();
611 
612 	if (rc = cpr_write(vp, (caddr_t)&m_info, sizeof (m_info))) {
613 		cpr_err(CE_WARN, "Failed to write machdep info.");
614 		return (rc);
615 	}
616 
617 	fmt = "error writing %s forth info";
618 	if (rc = cpr_write(vp, (caddr_t)ustr, sizeof (ustr)))
619 		cpr_err(CE_WARN, fmt, "unix-tte");
620 
621 	return (rc);
622 }
623 
624 
625 /*
626  * Save miscellaneous information which needs to be written to the
627  * state file.  This information is required to re-initialize
628  * kernel/prom handshaking.
629  */
630 void
i_cpr_save_machdep_info(void)631 i_cpr_save_machdep_info(void)
632 {
633 	CPR_DEBUG(CPR_DEBUG5, "jumpback size = 0x%lx\n",
634 	    (uintptr_t)&i_cpr_end_jumpback -
635 	    (uintptr_t)i_cpr_resume_setup);
636 
637 	/*
638 	 * Verify the jumpback code all falls in one page.
639 	 */
640 	if (((uintptr_t)&i_cpr_end_jumpback & MMU_PAGEMASK) !=
641 	    ((uintptr_t)i_cpr_resume_setup & MMU_PAGEMASK))
642 		cpr_err(CE_PANIC, "jumpback code exceeds one page.");
643 }
644 
645 
646 /*
647  * cpu0 should contain bootcpu info
648  */
649 cpu_t *
i_cpr_bootcpu(void)650 i_cpr_bootcpu(void)
651 {
652 	return (&cpu0);
653 }
654 
655 processorid_t
i_cpr_bootcpuid(void)656 i_cpr_bootcpuid(void)
657 {
658 	return (0);
659 }
660 
661 /*
662  * Return the virtual address of the mapping area
663  */
664 caddr_t
i_cpr_map_setup(void)665 i_cpr_map_setup(void)
666 {
667 	/*
668 	 * Allocate a virtual memory range spanned by an hmeblk.
669 	 * This would be 8 hments or 64k bytes.  Starting VA
670 	 * must be 64k (8-page) aligned.
671 	 */
672 	cpr_vaddr = vmem_xalloc(heap_arena,
673 	    mmu_ptob(NHMENTS), mmu_ptob(NHMENTS),
674 	    0, 0, NULL, NULL, VM_NOSLEEP);
675 	return (cpr_vaddr);
676 }
677 
678 /*
679  * create tmp locked tlb entries for a group of phys pages;
680  *
681  * i_cpr_mapin/i_cpr_mapout should always be called in pairs,
682  * otherwise would fill up a tlb with locked entries
683  */
684 void
i_cpr_mapin(caddr_t vaddr,uint_t pages,pfn_t ppn)685 i_cpr_mapin(caddr_t vaddr, uint_t pages, pfn_t ppn)
686 {
687 	tte_t tte;
688 	extern pfn_t curthreadpfn;
689 	extern int curthreadremapped;
690 
691 	curthreadremapped = (ppn <= curthreadpfn && curthreadpfn < ppn + pages);
692 
693 	for (; pages--; ppn++, vaddr += MMU_PAGESIZE) {
694 		tte.tte_inthi = TTE_VALID_INT | TTE_PFN_INTHI(ppn);
695 		tte.tte_intlo = TTE_PFN_INTLO(ppn) | TTE_LCK_INT |
696 		    TTE_CP_INT | TTE_PRIV_INT | TTE_HWWR_INT;
697 		sfmmu_dtlb_ld_kva(vaddr, &tte);
698 	}
699 }
700 
701 void
i_cpr_mapout(caddr_t vaddr,uint_t pages)702 i_cpr_mapout(caddr_t vaddr, uint_t pages)
703 {
704 	extern int curthreadremapped;
705 
706 	if (curthreadremapped && vaddr <= (caddr_t)curthread &&
707 	    (caddr_t)curthread < vaddr + pages * MMU_PAGESIZE)
708 		curthreadremapped = 0;
709 
710 	for (; pages--; vaddr += MMU_PAGESIZE)
711 		vtag_flushpage(vaddr, (uint64_t)ksfmmup);
712 }
713 
714 /*
715  * We're done using the mapping area; release virtual space
716  */
717 void
i_cpr_map_destroy(void)718 i_cpr_map_destroy(void)
719 {
720 	vmem_free(heap_arena, cpr_vaddr, mmu_ptob(NHMENTS));
721 	cpr_vaddr = NULL;
722 }
723 
724 /* ARGSUSED */
725 void
i_cpr_handle_xc(int flag)726 i_cpr_handle_xc(int flag)
727 {
728 }
729 
730 
731 /*
732  * This function takes care of pages which are not in kas or need to be
733  * taken care of in a special way.  For example, panicbuf pages are not
734  * in kas and their pages are allocated via prom_retain().
735  */
736 pgcnt_t
i_cpr_count_special_kpages(int mapflag,bitfunc_t bitfunc)737 i_cpr_count_special_kpages(int mapflag, bitfunc_t bitfunc)
738 {
739 	struct cpr_map_info *pri, *tail;
740 	pgcnt_t pages, total = 0;
741 	pfn_t pfn;
742 
743 	/*
744 	 * Save information about prom retained panicbuf pages
745 	 */
746 	if (bitfunc == cpr_setbit) {
747 		pri = &cpr_prom_retain[CPR_PANICBUF];
748 		pri->virt = (cpr_ptr)panicbuf;
749 		pri->phys = va_to_pa(panicbuf);
750 		pri->size = sizeof (panicbuf);
751 	}
752 
753 	/*
754 	 * Go through the prom_retain array to tag those pages.
755 	 */
756 	tail = &cpr_prom_retain[CPR_PROM_RETAIN_CNT];
757 	for (pri = cpr_prom_retain; pri < tail; pri++) {
758 		pages = mmu_btopr(pri->size);
759 		for (pfn = ADDR_TO_PN(pri->phys); pages--; pfn++) {
760 			if (pf_is_memory(pfn)) {
761 				if (bitfunc == cpr_setbit) {
762 					if ((*bitfunc)(pfn, mapflag) == 0)
763 						total++;
764 				} else
765 					total++;
766 			}
767 		}
768 	}
769 
770 	return (total);
771 }
772 
773 
774 /*
775  * Free up memory-related resources here.  We start by freeing buffers
776  * allocated during suspend initialization.  Also, free up the mapping
777  * resources allocated in cpr_init().
778  */
779 void
i_cpr_free_memory_resources(void)780 i_cpr_free_memory_resources(void)
781 {
782 	(void) i_cpr_prom_pages(CPR_PROM_FREE);
783 	i_cpr_map_destroy();
784 	i_cpr_storage_free();
785 }
786 
787 
788 /*
789  * Derived from cpr_write_statefile().
790  * Save the sensitive pages to the storage area and do bookkeeping
791  * using the sensitive descriptors. Each descriptor will contain no more
792  * than CPR_MAXCONTIG amount of contiguous pages to match the max amount
793  * of pages that statefile gets written to disk at each write.
794  * XXX The CPR_MAXCONTIG can be changed to the size of the compression
795  * scratch area.
796  */
797 static int
i_cpr_save_to_storage(void)798 i_cpr_save_to_storage(void)
799 {
800 	sensitive_size_saved = 0;
801 	sensitive_pages_saved = 0;
802 	sensitive_write_ptr = i_cpr_storage_data_base;
803 	return (cpr_contig_pages(NULL, SAVE_TO_STORAGE));
804 }
805 
806 
807 /*
808  * This routine allocates space to save the sensitive kernel pages,
809  * i.e. kernel data nucleus, kvalloc and kvseg segments.
810  * It's assumed that those segments are the only areas that can be
811  * contaminated by memory allocations during statefile dumping.
812  * The space allocated here contains:
813  * 	A list of descriptors describing the saved sensitive pages.
814  * 	The storage area for saving the compressed sensitive kernel pages.
815  * Since storage pages are allocated from segkmem, they need to be
816  * excluded when saving.
817  */
818 int
i_cpr_save_sensitive_kpages(void)819 i_cpr_save_sensitive_kpages(void)
820 {
821 	static const char pages_fmt[] = "\n%s %s allocs\n"
822 	    "	spages %ld, vpages %ld, diff %ld\n";
823 	int retry_cnt;
824 	int error = 0;
825 	pgcnt_t pages, spages, vpages;
826 	caddr_t	addr;
827 	char *str;
828 
829 	/*
830 	 * Tag sensitive kpages. Allocate space for storage descriptors
831 	 * and storage data area based on the resulting bitmaps.
832 	 * Note: The storage space will be part of the sensitive
833 	 * segment, so we need to tag kpages here before the storage
834 	 * is actually allocated just so their space won't be accounted
835 	 * for. They will not be part of the statefile although those
836 	 * pages will be claimed by cprboot.
837 	 */
838 	cpr_clear_bitmaps();
839 
840 	spages = i_cpr_count_sensitive_kpages(REGULAR_BITMAP, cpr_setbit);
841 	vpages = cpr_count_volatile_pages(REGULAR_BITMAP, cpr_clrbit);
842 	pages = spages - vpages;
843 
844 	str = "i_cpr_save_sensitive_kpages:";
845 	CPR_DEBUG(CPR_DEBUG7, pages_fmt, "before", str, spages, vpages, pages);
846 
847 	/*
848 	 * Allocate space to save the clean sensitive kpages
849 	 */
850 	for (retry_cnt = 0; retry_cnt < MAX_STORAGE_ALLOC_RETRY; retry_cnt++) {
851 		/*
852 		 * Alloc on first pass or realloc if we are retrying because
853 		 * of insufficient storage for sensitive pages
854 		 */
855 		if (retry_cnt == 0 || error == ENOMEM) {
856 			if (i_cpr_storage_data_base) {
857 				kmem_free(i_cpr_storage_data_base,
858 				    mmu_ptob(i_cpr_storage_data_sz));
859 				i_cpr_storage_data_base = NULL;
860 				i_cpr_storage_data_sz = 0;
861 			}
862 			addr = i_cpr_storage_data_alloc(pages,
863 			    &i_cpr_storage_data_sz, retry_cnt);
864 			if (addr == NULL) {
865 				CPR_DEBUG(CPR_DEBUG7,
866 				    "\n%s can't allocate data storage space!\n",
867 				    str);
868 				return (ENOMEM);
869 			}
870 			i_cpr_storage_data_base = addr;
871 			i_cpr_storage_data_end =
872 			    addr + mmu_ptob(i_cpr_storage_data_sz);
873 		}
874 
875 		/*
876 		 * Allocate on first pass, only realloc if retry is because of
877 		 * insufficient descriptors, but reset contents on each pass
878 		 * (desc_alloc resets contents as well)
879 		 */
880 		if (retry_cnt == 0 || error == -1) {
881 			error = i_cpr_storage_desc_alloc(
882 			    &i_cpr_storage_desc_base, &i_cpr_storage_desc_pgcnt,
883 			    &i_cpr_storage_desc_end, retry_cnt);
884 			if (error != 0)
885 				return (error);
886 		} else {
887 			i_cpr_storage_desc_init(i_cpr_storage_desc_base,
888 			    i_cpr_storage_desc_pgcnt, i_cpr_storage_desc_end);
889 		}
890 
891 		/*
892 		 * We are ready to save the sensitive kpages to storage.
893 		 * We cannot trust what's tagged in the bitmaps anymore
894 		 * after storage allocations.  Clear up the bitmaps and
895 		 * retag the sensitive kpages again.  The storage pages
896 		 * should be untagged.
897 		 */
898 		cpr_clear_bitmaps();
899 
900 		spages =
901 		    i_cpr_count_sensitive_kpages(REGULAR_BITMAP, cpr_setbit);
902 		vpages = cpr_count_volatile_pages(REGULAR_BITMAP, cpr_clrbit);
903 
904 		CPR_DEBUG(CPR_DEBUG7, pages_fmt, "after ", str,
905 		    spages, vpages, spages - vpages);
906 
907 		/*
908 		 * Returns 0 on success, -1 if too few descriptors, and
909 		 * ENOMEM if not enough space to save sensitive pages
910 		 */
911 		CPR_DEBUG(CPR_DEBUG1, "compressing pages to storage...\n");
912 		error = i_cpr_save_to_storage();
913 		if (error == 0) {
914 			/* Saving to storage succeeded */
915 			CPR_DEBUG(CPR_DEBUG1, "compressed %d pages\n",
916 			    sensitive_pages_saved);
917 			break;
918 		} else if (error == -1)
919 			CPR_DEBUG(CPR_DEBUG1, "%s too few descriptors\n", str);
920 	}
921 	if (error == -1)
922 		error = ENOMEM;
923 	return (error);
924 }
925 
926 
927 /*
928  * Estimate how much memory we will need to save
929  * the sensitive pages with compression.
930  */
931 static caddr_t
i_cpr_storage_data_alloc(pgcnt_t pages,pgcnt_t * alloc_pages,int retry_cnt)932 i_cpr_storage_data_alloc(pgcnt_t pages, pgcnt_t *alloc_pages, int retry_cnt)
933 {
934 	pgcnt_t alloc_pcnt, last_pcnt;
935 	caddr_t addr;
936 	char *str;
937 
938 	str = "i_cpr_storage_data_alloc:";
939 	if (retry_cnt == 0) {
940 		/*
941 		 * common compression ratio is about 3:1
942 		 * initial storage allocation is estimated at 40%
943 		 * to cover the majority of cases
944 		 */
945 		alloc_pcnt = INITIAL_ALLOC_PCNT;
946 		*alloc_pages = (pages * alloc_pcnt) / INTEGRAL;
947 		CPR_DEBUG(CPR_DEBUG7, "%s sensitive pages: %ld\n", str, pages);
948 		CPR_DEBUG(CPR_DEBUG7,
949 		    "%s initial est pages: %ld, alloc %ld%%\n",
950 		    str, *alloc_pages, alloc_pcnt);
951 	} else {
952 		/*
953 		 * calculate the prior compression percentage (x100)
954 		 * from the last attempt to save sensitive pages
955 		 */
956 		ASSERT(sensitive_pages_saved != 0);
957 		last_pcnt = (mmu_btopr(sensitive_size_saved) * INTEGRAL) /
958 		    sensitive_pages_saved;
959 		CPR_DEBUG(CPR_DEBUG7, "%s last ratio %ld%%\n", str, last_pcnt);
960 
961 		/*
962 		 * new estimated storage size is based on
963 		 * the larger ratio + 5% for each retry:
964 		 * pages * (last + [5%, 10%])
965 		 */
966 		alloc_pcnt = MAX(last_pcnt, INITIAL_ALLOC_PCNT) +
967 		    (retry_cnt * 5);
968 		*alloc_pages = (pages * alloc_pcnt) / INTEGRAL;
969 		CPR_DEBUG(CPR_DEBUG7, "%s Retry est pages: %ld, alloc %ld%%\n",
970 		    str, *alloc_pages, alloc_pcnt);
971 	}
972 
973 	addr = kmem_alloc(mmu_ptob(*alloc_pages), KM_NOSLEEP);
974 	CPR_DEBUG(CPR_DEBUG7, "%s alloc %ld pages\n", str, *alloc_pages);
975 	return (addr);
976 }
977 
978 
979 void
i_cpr_storage_free(void)980 i_cpr_storage_free(void)
981 {
982 	/* Free descriptors */
983 	if (i_cpr_storage_desc_base) {
984 		kmem_free(i_cpr_storage_desc_base,
985 		    mmu_ptob(i_cpr_storage_desc_pgcnt));
986 		i_cpr_storage_desc_base = NULL;
987 		i_cpr_storage_desc_pgcnt = 0;
988 	}
989 
990 
991 	/* Data storage */
992 	if (i_cpr_storage_data_base) {
993 		kmem_free(i_cpr_storage_data_base,
994 		    mmu_ptob(i_cpr_storage_data_sz));
995 		i_cpr_storage_data_base = NULL;
996 		i_cpr_storage_data_sz = 0;
997 	}
998 }
999 
1000 
1001 /*
1002  * This routine is derived from cpr_compress_and_write().
1003  * 1. Do bookkeeping in the descriptor for the contiguous sensitive chunk.
1004  * 2. Compress and save the clean sensitive pages into the storage area.
1005  */
1006 int
i_cpr_compress_and_save(int chunks,pfn_t spfn,pgcnt_t pages)1007 i_cpr_compress_and_save(int chunks, pfn_t spfn, pgcnt_t pages)
1008 {
1009 	extern char *cpr_compress_pages(cpd_t *, pgcnt_t, int);
1010 	extern caddr_t i_cpr_storage_data_end;
1011 	uint_t remaining, datalen;
1012 	uint32_t test_usum;
1013 	char *datap;
1014 	csd_t *descp;
1015 	cpd_t cpd;
1016 	int error;
1017 
1018 	/*
1019 	 * Fill next empty storage descriptor
1020 	 */
1021 	descp = i_cpr_storage_desc_base + chunks - 1;
1022 	if (descp >= i_cpr_storage_desc_end) {
1023 		CPR_DEBUG(CPR_DEBUG1, "ran out of descriptors, base 0x%p, "
1024 		    "chunks %d, end 0x%p, descp 0x%p\n",
1025 		    (void *)i_cpr_storage_desc_base, chunks,
1026 		    (void *)i_cpr_storage_desc_end, (void *)descp);
1027 		return (-1);
1028 	}
1029 	ASSERT(descp->csd_dirty_spfn == (uint_t)-1);
1030 	i_cpr_storage_desc_last_used = descp;
1031 
1032 	descp->csd_dirty_spfn = spfn;
1033 	descp->csd_dirty_npages = pages;
1034 
1035 	i_cpr_mapin(CPR->c_mapping_area, pages, spfn);
1036 
1037 	/*
1038 	 * try compressing pages and copy cpd fields
1039 	 * pfn is copied for debug use
1040 	 */
1041 	cpd.cpd_pfn = spfn;
1042 	datap = cpr_compress_pages(&cpd, pages, C_COMPRESSING);
1043 	datalen = cpd.cpd_length;
1044 	descp->csd_clean_compressed = (cpd.cpd_flag & CPD_COMPRESS);
1045 #ifdef DEBUG
1046 	descp->csd_usum = cpd.cpd_usum;
1047 	descp->csd_csum = cpd.cpd_csum;
1048 #endif
1049 
1050 	error = 0;
1051 
1052 	/*
1053 	 * Save the raw or compressed data to the storage area pointed to by
1054 	 * sensitive_write_ptr. Make sure the storage space is big enough to
1055 	 * hold the result. Otherwise roll back to increase the storage space.
1056 	 */
1057 	descp->csd_clean_sva = (cpr_ptr)sensitive_write_ptr;
1058 	descp->csd_clean_sz = datalen;
1059 	if ((sensitive_write_ptr + datalen) < i_cpr_storage_data_end) {
1060 		extern	void cprbcopy(void *, void *, size_t);
1061 
1062 		cprbcopy(datap, sensitive_write_ptr, datalen);
1063 		sensitive_size_saved += datalen;
1064 		sensitive_pages_saved += descp->csd_dirty_npages;
1065 		sensitive_write_ptr += datalen;
1066 	} else {
1067 		remaining = (i_cpr_storage_data_end - sensitive_write_ptr);
1068 		CPR_DEBUG(CPR_DEBUG1, "i_cpr_compress_and_save: The storage "
1069 		    "space is too small!\ngot %d, want %d\n\n",
1070 		    remaining, (remaining + datalen));
1071 #ifdef	DEBUG
1072 		/*
1073 		 * Check to see if the content of the sensitive pages that we
1074 		 * just copied have changed during this small time window.
1075 		 */
1076 		test_usum = checksum32(CPR->c_mapping_area, mmu_ptob(pages));
1077 		descp->csd_usum = cpd.cpd_usum;
1078 		if (test_usum != descp->csd_usum) {
1079 			CPR_DEBUG(CPR_DEBUG1, "\nWARNING: "
1080 			    "i_cpr_compress_and_save: "
1081 			    "Data in the range of pfn 0x%lx to pfn "
1082 			    "0x%lx has changed after they are saved "
1083 			    "into storage.", spfn, (spfn + pages - 1));
1084 		}
1085 #endif
1086 		error = ENOMEM;
1087 	}
1088 
1089 	i_cpr_mapout(CPR->c_mapping_area, pages);
1090 	return (error);
1091 }
1092 
1093 
1094 /*
1095  * This routine is derived from cpr_count_kpages().
1096  * It goes through kernel data nucleus and segkmem segments to select
1097  * pages in use and mark them in the corresponding bitmap.
1098  */
1099 pgcnt_t
i_cpr_count_sensitive_kpages(int mapflag,bitfunc_t bitfunc)1100 i_cpr_count_sensitive_kpages(int mapflag, bitfunc_t bitfunc)
1101 {
1102 	pgcnt_t kdata_cnt = 0, segkmem_cnt = 0;
1103 	extern caddr_t e_moddata;
1104 	extern struct seg kvalloc;
1105 	extern struct seg kmem64;
1106 	size_t size;
1107 
1108 	/*
1109 	 * Kernel data nucleus pages
1110 	 */
1111 	size = e_moddata - s_data;
1112 	kdata_cnt += cpr_count_pages(s_data, size,
1113 	    mapflag, bitfunc, DBG_SHOWRANGE);
1114 
1115 	/*
1116 	 * kvseg and kvalloc pages
1117 	 */
1118 	segkmem_cnt += cpr_scan_kvseg(mapflag, bitfunc, &kvseg);
1119 	segkmem_cnt += cpr_count_pages(kvalloc.s_base, kvalloc.s_size,
1120 	    mapflag, bitfunc, DBG_SHOWRANGE);
1121 
1122 	/* segment to support kernel memory usage above 32-bit space (4GB) */
1123 	if (kmem64.s_base)
1124 		segkmem_cnt += cpr_count_pages(kmem64.s_base, kmem64.s_size,
1125 		    mapflag, bitfunc, DBG_SHOWRANGE);
1126 
1127 	CPR_DEBUG(CPR_DEBUG7, "\ni_cpr_count_sensitive_kpages:\n"
1128 	    "\tkdata_cnt %ld + segkmem_cnt %ld = %ld pages\n",
1129 	    kdata_cnt, segkmem_cnt, kdata_cnt + segkmem_cnt);
1130 
1131 	return (kdata_cnt + segkmem_cnt);
1132 }
1133 
1134 
1135 pgcnt_t
i_cpr_count_storage_pages(int mapflag,bitfunc_t bitfunc)1136 i_cpr_count_storage_pages(int mapflag, bitfunc_t bitfunc)
1137 {
1138 	pgcnt_t count = 0;
1139 
1140 	if (i_cpr_storage_desc_base) {
1141 		count += cpr_count_pages((caddr_t)i_cpr_storage_desc_base,
1142 		    (size_t)mmu_ptob(i_cpr_storage_desc_pgcnt),
1143 		    mapflag, bitfunc, DBG_SHOWRANGE);
1144 	}
1145 	if (i_cpr_storage_data_base) {
1146 		count += cpr_count_pages(i_cpr_storage_data_base,
1147 		    (size_t)mmu_ptob(i_cpr_storage_data_sz),
1148 		    mapflag, bitfunc, DBG_SHOWRANGE);
1149 	}
1150 	return (count);
1151 }
1152 
1153 
1154 /*
1155  * Derived from cpr_write_statefile().
1156  * Allocate (or reallocate after exhausting the supply) descriptors for each
1157  * chunk of contiguous sensitive kpages.
1158  */
1159 static int
i_cpr_storage_desc_alloc(csd_t ** basepp,pgcnt_t * pgsp,csd_t ** endpp,int retry)1160 i_cpr_storage_desc_alloc(csd_t **basepp, pgcnt_t *pgsp, csd_t **endpp,
1161     int retry)
1162 {
1163 	pgcnt_t npages;
1164 	int chunks;
1165 	csd_t	*descp, *end;
1166 	size_t	len;
1167 	char *str = "i_cpr_storage_desc_alloc:";
1168 
1169 	/*
1170 	 * On initial allocation, add some extra to cover overhead caused
1171 	 * by the allocation for the storage area later.
1172 	 */
1173 	if (retry == 0) {
1174 		chunks = cpr_contig_pages(NULL, STORAGE_DESC_ALLOC) +
1175 		    EXTRA_DESCS;
1176 		npages = mmu_btopr(sizeof (**basepp) * (pgcnt_t)chunks);
1177 		CPR_DEBUG(CPR_DEBUG7, "%s chunks %d, ", str, chunks);
1178 	} else {
1179 		CPR_DEBUG(CPR_DEBUG7, "%s retry %d: ", str, retry);
1180 		npages = *pgsp + 1;
1181 	}
1182 	/* Free old descriptors, if any */
1183 	if (*basepp)
1184 		kmem_free((caddr_t)*basepp, mmu_ptob(*pgsp));
1185 
1186 	descp = *basepp = kmem_alloc(mmu_ptob(npages), KM_NOSLEEP);
1187 	if (descp == NULL) {
1188 		CPR_DEBUG(CPR_DEBUG7, "%s no space for descriptors!\n", str);
1189 		return (ENOMEM);
1190 	}
1191 
1192 	*pgsp = npages;
1193 	len = mmu_ptob(npages);
1194 	end = *endpp = descp + (len / (sizeof (**basepp)));
1195 	CPR_DEBUG(CPR_DEBUG7, "npages 0x%lx, len 0x%lx, items 0x%lx\n\t*basepp "
1196 	    "%p, *endpp %p\n", npages, len, (len / (sizeof (**basepp))),
1197 	    (void *)*basepp, (void *)*endpp);
1198 	i_cpr_storage_desc_init(descp, npages, end);
1199 	return (0);
1200 }
1201 
1202 static void
i_cpr_storage_desc_init(csd_t * descp,pgcnt_t npages,csd_t * end)1203 i_cpr_storage_desc_init(csd_t *descp, pgcnt_t npages, csd_t *end)
1204 {
1205 	size_t	len = mmu_ptob(npages);
1206 
1207 	/* Initialize the descriptors to something impossible. */
1208 	bzero(descp, len);
1209 #ifdef	DEBUG
1210 	/*
1211 	 * This condition is tested by an ASSERT
1212 	 */
1213 	for (; descp < end; descp++)
1214 		descp->csd_dirty_spfn = (uint_t)-1;
1215 #endif
1216 }
1217 
1218 int
i_cpr_dump_sensitive_kpages(vnode_t * vp)1219 i_cpr_dump_sensitive_kpages(vnode_t *vp)
1220 {
1221 	int	error = 0;
1222 	uint_t	spin_cnt = 0;
1223 	csd_t	*descp;
1224 
1225 	/*
1226 	 * These following two variables need to be reinitialized
1227 	 * for each cpr cycle.
1228 	 */
1229 	i_cpr_sensitive_bytes_dumped = 0;
1230 	i_cpr_sensitive_pgs_dumped = 0;
1231 
1232 	if (i_cpr_storage_desc_base) {
1233 		for (descp = i_cpr_storage_desc_base;
1234 		    descp <= i_cpr_storage_desc_last_used; descp++) {
1235 			if (error = cpr_dump_sensitive(vp, descp))
1236 				return (error);
1237 			spin_cnt++;
1238 			if ((spin_cnt & 0x5F) == 1)
1239 				cpr_spinning_bar();
1240 		}
1241 		prom_printf(" \b");
1242 	}
1243 
1244 	CPR_DEBUG(CPR_DEBUG7, "\ni_cpr_dump_sensitive_kpages: dumped %ld\n",
1245 	    i_cpr_sensitive_pgs_dumped);
1246 	return (0);
1247 }
1248 
1249 
1250 /*
1251  * 1. Fill the cpr page descriptor with the info of the dirty pages
1252  *    and
1253  *    write the descriptor out. It will be used at resume.
1254  * 2. Write the clean data in stead of the dirty data out.
1255  *    Note: to save space, the clean data is already compressed.
1256  */
1257 static int
cpr_dump_sensitive(vnode_t * vp,csd_t * descp)1258 cpr_dump_sensitive(vnode_t *vp, csd_t *descp)
1259 {
1260 	int error = 0;
1261 	caddr_t datap;
1262 	cpd_t cpd;	/* cpr page descriptor */
1263 	pfn_t	dirty_spfn;
1264 	pgcnt_t dirty_npages;
1265 	size_t clean_sz;
1266 	caddr_t	clean_sva;
1267 	int	clean_compressed;
1268 	extern uchar_t cpr_pagecopy[];
1269 
1270 	dirty_spfn = descp->csd_dirty_spfn;
1271 	dirty_npages = descp->csd_dirty_npages;
1272 	clean_sva = (caddr_t)descp->csd_clean_sva;
1273 	clean_sz = descp->csd_clean_sz;
1274 	clean_compressed = descp->csd_clean_compressed;
1275 
1276 	/* Fill cpr page descriptor. */
1277 	cpd.cpd_magic = (uint_t)CPR_PAGE_MAGIC;
1278 	cpd.cpd_pfn = dirty_spfn;
1279 	cpd.cpd_flag = 0;  /* must init to zero */
1280 	cpd.cpd_pages = dirty_npages;
1281 
1282 #ifdef	DEBUG
1283 	if ((cpd.cpd_usum = descp->csd_usum) != 0)
1284 		cpd.cpd_flag |= CPD_USUM;
1285 	if ((cpd.cpd_csum = descp->csd_csum) != 0)
1286 		cpd.cpd_flag |= CPD_CSUM;
1287 #endif
1288 
1289 	STAT->cs_dumped_statefsz += mmu_ptob(dirty_npages);
1290 
1291 	/*
1292 	 * The sensitive kpages are usually saved with compression
1293 	 * unless compression could not reduce the size of the data.
1294 	 * If user choose not to have the statefile compressed,
1295 	 * we need to decompress the data back before dumping it to disk.
1296 	 */
1297 	if (CPR->c_flags & C_COMPRESSING) {
1298 		cpd.cpd_length = clean_sz;
1299 		datap = clean_sva;
1300 		if (clean_compressed)
1301 			cpd.cpd_flag |= CPD_COMPRESS;
1302 	} else {
1303 		if (clean_compressed) {
1304 			cpd.cpd_length = decompress(clean_sva, cpr_pagecopy,
1305 			    clean_sz, mmu_ptob(dirty_npages));
1306 			datap = (caddr_t)cpr_pagecopy;
1307 			ASSERT(cpd.cpd_length == mmu_ptob(dirty_npages));
1308 		} else {
1309 			cpd.cpd_length = clean_sz;
1310 			datap = clean_sva;
1311 		}
1312 		cpd.cpd_csum = 0;
1313 	}
1314 
1315 	/* Write cpr page descriptor */
1316 	error = cpr_write(vp, (caddr_t)&cpd, sizeof (cpd));
1317 	if (error) {
1318 		CPR_DEBUG(CPR_DEBUG7, "descp: %p\n", (void *)descp);
1319 #ifdef DEBUG
1320 		debug_enter("cpr_dump_sensitive: cpr_write() page "
1321 		    "descriptor failed!\n");
1322 #endif
1323 		return (error);
1324 	}
1325 
1326 	i_cpr_sensitive_bytes_dumped += sizeof (cpd_t);
1327 
1328 	/* Write page data */
1329 	error = cpr_write(vp, (caddr_t)datap, cpd.cpd_length);
1330 	if (error) {
1331 		CPR_DEBUG(CPR_DEBUG7, "error: %x\n", error);
1332 		CPR_DEBUG(CPR_DEBUG7, "descp: %p\n", (void *)descp);
1333 		CPR_DEBUG(CPR_DEBUG7, "cpr_write(%p, %p , %lx)\n",
1334 		    (void *)vp, (void *)datap, cpd.cpd_length);
1335 #ifdef DEBUG
1336 		debug_enter("cpr_dump_sensitive: cpr_write() data failed!\n");
1337 #endif
1338 		return (error);
1339 	}
1340 
1341 	i_cpr_sensitive_bytes_dumped += cpd.cpd_length;
1342 	i_cpr_sensitive_pgs_dumped += dirty_npages;
1343 
1344 	return (error);
1345 }
1346 
1347 
1348 /*
1349  * Sanity check to make sure that we have dumped right amount
1350  * of pages from different sources to statefile.
1351  */
1352 int
i_cpr_check_pgs_dumped(uint_t pgs_expected,uint_t regular_pgs_dumped)1353 i_cpr_check_pgs_dumped(uint_t pgs_expected, uint_t regular_pgs_dumped)
1354 {
1355 	uint_t total_pgs_dumped;
1356 
1357 	total_pgs_dumped = regular_pgs_dumped + i_cpr_sensitive_pgs_dumped;
1358 
1359 	CPR_DEBUG(CPR_DEBUG7, "\ncheck_pgs: reg %d + sens %ld = %d, "
1360 	    "expect %d\n\n", regular_pgs_dumped, i_cpr_sensitive_pgs_dumped,
1361 	    total_pgs_dumped, pgs_expected);
1362 
1363 	if (pgs_expected == total_pgs_dumped)
1364 		return (0);
1365 
1366 	return (EINVAL);
1367 }
1368 
1369 
1370 int
i_cpr_reusefini(void)1371 i_cpr_reusefini(void)
1372 {
1373 	struct vnode *vp;
1374 	cdef_t *cdef;
1375 	size_t size;
1376 	char *bufp;
1377 	int rc;
1378 
1379 	if (cpr_reusable_mode)
1380 		cpr_reusable_mode = 0;
1381 
1382 	if (rc = cpr_open_deffile(FREAD|FWRITE, &vp)) {
1383 		if (rc == EROFS) {
1384 			cpr_err(CE_CONT, "uadmin A_FREEZE AD_REUSEFINI "
1385 			    "(uadmin %d %d)\nmust be done with / mounted "
1386 			    "writeable.\n", A_FREEZE, AD_REUSEFINI);
1387 		}
1388 		return (rc);
1389 	}
1390 
1391 	cdef = kmem_alloc(sizeof (*cdef), KM_SLEEP);
1392 	rc = cpr_rdwr(UIO_READ, vp, cdef, sizeof (*cdef));
1393 
1394 	if (rc) {
1395 		cpr_err(CE_WARN, "Failed reading %s, errno = %d",
1396 		    cpr_default_path, rc);
1397 	} else if (cdef->mini.magic != CPR_DEFAULT_MAGIC) {
1398 		cpr_err(CE_WARN, "bad magic number in %s, cannot restore "
1399 		    "prom values for %s", cpr_default_path,
1400 		    cpr_enumerate_promprops(&bufp, &size));
1401 		kmem_free(bufp, size);
1402 		rc = EINVAL;
1403 	} else {
1404 		/*
1405 		 * clean up prom properties
1406 		 */
1407 		rc = cpr_update_nvram(cdef->props);
1408 		if (rc == 0) {
1409 			/*
1410 			 * invalidate the disk copy and turn off reusable
1411 			 */
1412 			cdef->mini.magic = 0;
1413 			cdef->mini.reusable = 0;
1414 			if (rc = cpr_rdwr(UIO_WRITE, vp,
1415 			    &cdef->mini, sizeof (cdef->mini))) {
1416 				cpr_err(CE_WARN, "Failed writing %s, errno %d",
1417 				    cpr_default_path, rc);
1418 			}
1419 		}
1420 	}
1421 
1422 	(void) VOP_CLOSE(vp, FREAD|FWRITE, 1, (offset_t)0, CRED(), NULL);
1423 	VN_RELE(vp);
1424 	kmem_free(cdef, sizeof (*cdef));
1425 
1426 	return (rc);
1427 }
1428 
1429 
1430 int
i_cpr_reuseinit(void)1431 i_cpr_reuseinit(void)
1432 {
1433 	int rc = 0;
1434 
1435 	if (rc = cpr_default_setup(1))
1436 		return (rc);
1437 
1438 	/*
1439 	 * We need to validate default file
1440 	 */
1441 	rc = cpr_validate_definfo(1);
1442 	if (rc == 0)
1443 		cpr_reusable_mode = 1;
1444 	else if (rc == EROFS) {
1445 		cpr_err(CE_NOTE, "reuseinit must be performed "
1446 		    "while / is mounted writeable");
1447 	}
1448 
1449 	(void) cpr_default_setup(0);
1450 
1451 	return (rc);
1452 }
1453 
1454 
1455 int
i_cpr_check_cprinfo(void)1456 i_cpr_check_cprinfo(void)
1457 {
1458 	struct vnode *vp;
1459 	cmini_t mini;
1460 	int rc = 0;
1461 
1462 	if (rc = cpr_open_deffile(FREAD, &vp)) {
1463 		if (rc == ENOENT)
1464 			cpr_err(CE_NOTE, "cprinfo file does not "
1465 			    "exist.  You must run 'uadmin %d %d' "
1466 			    "command while / is mounted writeable,\n"
1467 			    "then reboot and run 'uadmin %d %d' "
1468 			    "to create a reusable statefile",
1469 			    A_FREEZE, AD_REUSEINIT, A_FREEZE, AD_REUSABLE);
1470 		return (rc);
1471 	}
1472 
1473 	rc = cpr_rdwr(UIO_READ, vp, &mini, sizeof (mini));
1474 	(void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
1475 	VN_RELE(vp);
1476 
1477 	if (rc) {
1478 		cpr_err(CE_WARN, "Failed reading %s, errno = %d",
1479 		    cpr_default_path, rc);
1480 	} else if (mini.magic != CPR_DEFAULT_MAGIC) {
1481 		cpr_err(CE_CONT, "bad magic number in cprinfo file.\n"
1482 		    "You must run 'uadmin %d %d' while / is mounted "
1483 		    "writeable, then reboot and run 'uadmin %d %d' "
1484 		    "to create a reusable statefile\n",
1485 		    A_FREEZE, AD_REUSEINIT, A_FREEZE, AD_REUSABLE);
1486 		rc = EINVAL;
1487 	}
1488 
1489 	return (rc);
1490 }
1491 
1492 
1493 int
i_cpr_reusable_supported(void)1494 i_cpr_reusable_supported(void)
1495 {
1496 	return (1);
1497 }
1498 
1499 
1500 /*
1501  * find prom phys pages and alloc space for a tmp copy
1502  */
1503 static int
i_cpr_find_ppages(void)1504 i_cpr_find_ppages(void)
1505 {
1506 	struct page *pp;
1507 	struct memlist *pmem;
1508 	pgcnt_t npages, pcnt, scnt, vcnt;
1509 	pfn_t ppn, plast, *dst;
1510 	int mapflag;
1511 
1512 	cpr_clear_bitmaps();
1513 	mapflag = REGULAR_BITMAP;
1514 
1515 	/*
1516 	 * there should be a page_t for each phys page used by the kernel;
1517 	 * set a bit for each phys page not tracked by a page_t
1518 	 */
1519 	pcnt = 0;
1520 	memlist_read_lock();
1521 	for (pmem = phys_install; pmem; pmem = pmem->ml_next) {
1522 		npages = mmu_btop(pmem->ml_size);
1523 		ppn = mmu_btop(pmem->ml_address);
1524 		for (plast = ppn + npages; ppn < plast; ppn++) {
1525 			if (page_numtopp_nolock(ppn))
1526 				continue;
1527 			(void) cpr_setbit(ppn, mapflag);
1528 			pcnt++;
1529 		}
1530 	}
1531 	memlist_read_unlock();
1532 
1533 	/*
1534 	 * clear bits for phys pages in each segment
1535 	 */
1536 	scnt = cpr_count_seg_pages(mapflag, cpr_clrbit);
1537 
1538 	/*
1539 	 * set bits for phys pages referenced by the promvp vnode;
1540 	 * these pages are mostly comprised of forthdebug words
1541 	 */
1542 	vcnt = 0;
1543 	for (pp = promvp.v_pages; pp; ) {
1544 		if (cpr_setbit(pp->p_offset, mapflag) == 0)
1545 			vcnt++;
1546 		pp = pp->p_vpnext;
1547 		if (pp == promvp.v_pages)
1548 			break;
1549 	}
1550 
1551 	/*
1552 	 * total number of prom pages are:
1553 	 * (non-page_t pages - seg pages + vnode pages)
1554 	 */
1555 	ppage_count = pcnt - scnt + vcnt;
1556 	CPR_DEBUG(CPR_DEBUG1,
1557 	    "find_ppages: pcnt %ld - scnt %ld + vcnt %ld = %ld\n",
1558 	    pcnt, scnt, vcnt, ppage_count);
1559 
1560 	/*
1561 	 * alloc array of pfn_t to store phys page list
1562 	 */
1563 	pphys_list_size = ppage_count * sizeof (pfn_t);
1564 	pphys_list = kmem_alloc(pphys_list_size, KM_NOSLEEP);
1565 	if (pphys_list == NULL) {
1566 		cpr_err(CE_WARN, "cannot alloc pphys_list");
1567 		return (ENOMEM);
1568 	}
1569 
1570 	/*
1571 	 * phys pages referenced in the bitmap should be
1572 	 * those used by the prom; scan bitmap and save
1573 	 * a list of prom phys page numbers
1574 	 */
1575 	dst = pphys_list;
1576 	memlist_read_lock();
1577 	for (pmem = phys_install; pmem; pmem = pmem->ml_next) {
1578 		npages = mmu_btop(pmem->ml_size);
1579 		ppn = mmu_btop(pmem->ml_address);
1580 		for (plast = ppn + npages; ppn < plast; ppn++) {
1581 			if (cpr_isset(ppn, mapflag)) {
1582 				ASSERT(dst < (pphys_list + ppage_count));
1583 				*dst++ = ppn;
1584 			}
1585 		}
1586 	}
1587 	memlist_read_unlock();
1588 
1589 	/*
1590 	 * allocate space to store prom pages
1591 	 */
1592 	ppage_buf = kmem_alloc(mmu_ptob(ppage_count), KM_NOSLEEP);
1593 	if (ppage_buf == NULL) {
1594 		kmem_free(pphys_list, pphys_list_size);
1595 		pphys_list = NULL;
1596 		cpr_err(CE_WARN, "cannot alloc ppage_buf");
1597 		return (ENOMEM);
1598 	}
1599 
1600 	return (0);
1601 }
1602 
1603 
1604 /*
1605  * save prom pages to kmem pages
1606  */
1607 static void
i_cpr_save_ppages(void)1608 i_cpr_save_ppages(void)
1609 {
1610 	pfn_t *pphys, *plast;
1611 	caddr_t dst;
1612 
1613 	/*
1614 	 * map in each prom page and copy to a kmem page
1615 	 */
1616 	dst = ppage_buf;
1617 	plast = pphys_list + ppage_count;
1618 	for (pphys = pphys_list; pphys < plast; pphys++) {
1619 		i_cpr_mapin(cpr_vaddr, 1, *pphys);
1620 		bcopy(cpr_vaddr, dst, MMU_PAGESIZE);
1621 		i_cpr_mapout(cpr_vaddr, 1);
1622 		dst += MMU_PAGESIZE;
1623 	}
1624 
1625 	CPR_DEBUG(CPR_DEBUG1, "saved %ld prom pages\n", ppage_count);
1626 }
1627 
1628 
1629 /*
1630  * restore prom pages from kmem pages
1631  */
1632 static void
i_cpr_restore_ppages(void)1633 i_cpr_restore_ppages(void)
1634 {
1635 	pfn_t *pphys, *plast;
1636 	caddr_t src;
1637 
1638 	dcache_flushall();
1639 
1640 	/*
1641 	 * map in each prom page and copy from a kmem page
1642 	 */
1643 	src = ppage_buf;
1644 	plast = pphys_list + ppage_count;
1645 	for (pphys = pphys_list; pphys < plast; pphys++) {
1646 		i_cpr_mapin(cpr_vaddr, 1, *pphys);
1647 		bcopy(src, cpr_vaddr, MMU_PAGESIZE);
1648 		i_cpr_mapout(cpr_vaddr, 1);
1649 		src += MMU_PAGESIZE;
1650 	}
1651 
1652 	dcache_flushall();
1653 
1654 	CPR_DEBUG(CPR_DEBUG1, "restored %ld prom pages\n", ppage_count);
1655 }
1656 
1657 
1658 /*
1659  * save/restore prom pages or free related allocs
1660  */
1661 int
i_cpr_prom_pages(int action)1662 i_cpr_prom_pages(int action)
1663 {
1664 	int error;
1665 
1666 	if (action == CPR_PROM_SAVE) {
1667 		if (ppage_buf == NULL) {
1668 			ASSERT(pphys_list == NULL);
1669 			if (error = i_cpr_find_ppages())
1670 				return (error);
1671 			i_cpr_save_ppages();
1672 		}
1673 	} else if (action == CPR_PROM_RESTORE) {
1674 		i_cpr_restore_ppages();
1675 	} else if (action == CPR_PROM_FREE) {
1676 		if (pphys_list) {
1677 			ASSERT(pphys_list_size);
1678 			kmem_free(pphys_list, pphys_list_size);
1679 			pphys_list = NULL;
1680 			pphys_list_size = 0;
1681 		}
1682 		if (ppage_buf) {
1683 			ASSERT(ppage_count);
1684 			kmem_free(ppage_buf, mmu_ptob(ppage_count));
1685 			CPR_DEBUG(CPR_DEBUG1, "freed %ld prom pages\n",
1686 			    ppage_count);
1687 			ppage_buf = NULL;
1688 			ppage_count = 0;
1689 		}
1690 	}
1691 	return (0);
1692 }
1693 
1694 
1695 /*
1696  * record tlb data for the nucleus, bigktsb's, and the cpr module;
1697  * this data is later used by cprboot to install dtlb/itlb entries.
1698  * when we jump into the cpr module during the resume phase, those
1699  * mappings are needed until switching to the kernel trap table.
1700  * to make the dtte/itte info available during resume, we need
1701  * the info recorded prior to saving sensitive pages, otherwise
1702  * all the data would appear as NULLs.
1703  */
1704 static void
i_cpr_save_tlbinfo(void)1705 i_cpr_save_tlbinfo(void)
1706 {
1707 	cti_t cti = {0};
1708 
1709 	/*
1710 	 * during resume - shortly after jumping into the cpr module,
1711 	 * sfmmu_load_mmustate() will overwrite any dtlb entry at any
1712 	 * index used for TSBs; skip is set so that any saved tte will
1713 	 * target other tlb offsets and prevent being lost during
1714 	 * resume.  now scan the dtlb and save locked entries,
1715 	 * then add entries for the tmp stack / data page and the
1716 	 * cpr thread structure.
1717 	 */
1718 	cti.dst = m_info.dtte;
1719 	cti.tail = cti.dst + CPR_MAX_TLB;
1720 	cti.reader = dtlb_rd_entry;
1721 	cti.writer = NULL;
1722 	cti.filter = i_cpr_lnb;
1723 	cti.index = cpunodes[CPU->cpu_id].dtlb_size - 1;
1724 
1725 	if (utsb_dtlb_ttenum != -1)
1726 		cti.skip = (1 << utsb_dtlb_ttenum);
1727 
1728 	if (utsb4m_dtlb_ttenum != -1)
1729 		cti.skip |= (1 << utsb4m_dtlb_ttenum);
1730 
1731 	i_cpr_scan_tlb(&cti);
1732 	i_cpr_make_tte(&cti, &i_cpr_data_page, datava);
1733 	i_cpr_make_tte(&cti, curthread, datava);
1734 
1735 	/*
1736 	 * scan itlb and save locked entries; add an entry for
1737 	 * the first text page of the cpr module; cprboot will
1738 	 * jump to that page after restoring kernel pages.
1739 	 */
1740 	cti.dst = m_info.itte;
1741 	cti.tail = cti.dst + CPR_MAX_TLB;
1742 	cti.reader = itlb_rd_entry;
1743 	cti.index = cpunodes[CPU->cpu_id].itlb_size - 1;
1744 	cti.skip = 0;
1745 	i_cpr_scan_tlb(&cti);
1746 	i_cpr_make_tte(&cti, (void *)i_cpr_resume_setup, textva);
1747 }
1748 
1749 
1750 /* ARGSUSED */
1751 int
i_cpr_dump_setup(vnode_t * vp)1752 i_cpr_dump_setup(vnode_t *vp)
1753 {
1754 	/*
1755 	 * zero out m_info and add info to dtte/itte arrays
1756 	 */
1757 	bzero(&m_info, sizeof (m_info));
1758 	i_cpr_save_tlbinfo();
1759 	return (0);
1760 }
1761 
1762 
1763 int
i_cpr_is_supported(int sleeptype)1764 i_cpr_is_supported(int sleeptype)
1765 {
1766 	char es_prop[] = "energystar-v2";
1767 	pnode_t node;
1768 	int last;
1769 	extern int cpr_supported_override;
1770 	extern int cpr_platform_enable;
1771 
1772 	if (sleeptype != CPR_TODISK)
1773 		return (0);
1774 
1775 	/*
1776 	 * The next statement tests if a specific platform has turned off
1777 	 * cpr support.
1778 	 */
1779 	if (cpr_supported_override)
1780 		return (0);
1781 
1782 	/*
1783 	 * Do not inspect energystar-v* property if a platform has
1784 	 * specifically turned on cpr support
1785 	 */
1786 	if (cpr_platform_enable)
1787 		return (1);
1788 
1789 	node = prom_rootnode();
1790 	if (prom_getproplen(node, es_prop) != -1)
1791 		return (1);
1792 	last = strlen(es_prop) - 1;
1793 	es_prop[last] = '3';
1794 	return (prom_getproplen(node, es_prop) != -1);
1795 }
1796 
1797 
1798 /*
1799  * the actual size of the statefile data isn't known until after all the
1800  * compressed pages are written; even the inode size doesn't reflect the
1801  * data size since there are usually many extra fs blocks.  for recording
1802  * the actual data size, the first sector of the statefile is copied to
1803  * a tmp buf, and the copy is later updated and flushed to disk.
1804  */
1805 int
i_cpr_blockzero(char * base,char ** bufpp,int * blkno,vnode_t * vp)1806 i_cpr_blockzero(char *base, char **bufpp, int *blkno, vnode_t *vp)
1807 {
1808 	extern int cpr_flush_write(vnode_t *);
1809 	static char cpr_sector[DEV_BSIZE];
1810 	cpr_ext bytes, *dst;
1811 
1812 	/*
1813 	 * this routine is called after cdd_t and csu_md_t are copied
1814 	 * to cpr_buf; mini-hack alert: the save/update method creates
1815 	 * a dependency on the combined struct size being >= one sector
1816 	 * or DEV_BSIZE; since introduction in Sol2.7, csu_md_t size is
1817 	 * over 1K bytes and will probably grow with any changes.
1818 	 *
1819 	 * copy when vp is NULL, flush when non-NULL
1820 	 */
1821 	if (vp == NULL) {
1822 		ASSERT((*bufpp - base) >= DEV_BSIZE);
1823 		bcopy(base, cpr_sector, sizeof (cpr_sector));
1824 		return (0);
1825 	} else {
1826 		bytes = dbtob(*blkno);
1827 		dst = &((cdd_t *)cpr_sector)->cdd_filesize;
1828 		bcopy(&bytes, dst, sizeof (bytes));
1829 		bcopy(cpr_sector, base, sizeof (cpr_sector));
1830 		*bufpp = base + sizeof (cpr_sector);
1831 		*blkno = cpr_statefile_offset();
1832 		CPR_DEBUG(CPR_DEBUG1, "statefile data size: %ld\n\n", bytes);
1833 		return (cpr_flush_write(vp));
1834 	}
1835 }
1836 
1837 
1838 /*
1839  * Allocate bitmaps according to the phys_install list.
1840  */
1841 static int
i_cpr_bitmap_setup(void)1842 i_cpr_bitmap_setup(void)
1843 {
1844 	struct memlist *pmem;
1845 	cbd_t *dp, *tail;
1846 	void *space;
1847 	size_t size;
1848 
1849 	/*
1850 	 * The number of bitmap descriptors will be the count of
1851 	 * phys_install ranges plus 1 for a trailing NULL struct.
1852 	 */
1853 	cpr_nbitmaps = 1;
1854 	for (pmem = phys_install; pmem; pmem = pmem->ml_next)
1855 		cpr_nbitmaps++;
1856 
1857 	if (cpr_nbitmaps > (CPR_MAX_BMDESC - 1)) {
1858 		cpr_err(CE_WARN, "too many physical memory ranges %d, max %d",
1859 		    cpr_nbitmaps, CPR_MAX_BMDESC - 1);
1860 		return (EFBIG);
1861 	}
1862 
1863 	/* Alloc an array of bitmap descriptors. */
1864 	dp = kmem_zalloc(cpr_nbitmaps * sizeof (*dp), KM_NOSLEEP);
1865 	if (dp == NULL) {
1866 		cpr_nbitmaps = 0;
1867 		return (ENOMEM);
1868 	}
1869 	tail = dp + cpr_nbitmaps;
1870 
1871 	CPR->c_bmda = dp;
1872 	for (pmem = phys_install; pmem; pmem = pmem->ml_next) {
1873 		size = BITMAP_BYTES(pmem->ml_size);
1874 		space = kmem_zalloc(size * 2, KM_NOSLEEP);
1875 		if (space == NULL)
1876 			return (ENOMEM);
1877 		ASSERT(dp < tail);
1878 		dp->cbd_magic = CPR_BITMAP_MAGIC;
1879 		dp->cbd_spfn = mmu_btop(pmem->ml_address);
1880 		dp->cbd_epfn = mmu_btop(pmem->ml_address + pmem->ml_size) - 1;
1881 		dp->cbd_size = size;
1882 		dp->cbd_reg_bitmap = (cpr_ptr)space;
1883 		dp->cbd_vlt_bitmap = (cpr_ptr)((caddr_t)space + size);
1884 		dp++;
1885 	}
1886 
1887 	/* set magic for the last descriptor */
1888 	ASSERT(dp == (tail - 1));
1889 	dp->cbd_magic = CPR_BITMAP_MAGIC;
1890 
1891 	return (0);
1892 }
1893 
1894 
1895 void
i_cpr_bitmap_cleanup(void)1896 i_cpr_bitmap_cleanup(void)
1897 {
1898 	cbd_t *dp;
1899 
1900 	if (CPR->c_bmda == NULL)
1901 		return;
1902 	for (dp = CPR->c_bmda; dp->cbd_size; dp++)
1903 		kmem_free((void *)dp->cbd_reg_bitmap, dp->cbd_size * 2);
1904 	kmem_free(CPR->c_bmda, cpr_nbitmaps * sizeof (*CPR->c_bmda));
1905 	CPR->c_bmda = NULL;
1906 	cpr_nbitmaps = 0;
1907 }
1908 
1909 
1910 /*
1911  * A "regular" and "volatile" bitmap are created for each range of
1912  * physical memory.  The volatile maps are used to count and track pages
1913  * susceptible to heap corruption - caused by drivers that allocate mem
1914  * during VOP_DUMP(); the regular maps are used for all the other non-
1915  * susceptible pages.  Before writing the bitmaps to the statefile,
1916  * each bitmap pair gets merged to simplify handling within cprboot.
1917  */
1918 int
i_cpr_alloc_bitmaps(void)1919 i_cpr_alloc_bitmaps(void)
1920 {
1921 	int err;
1922 
1923 	memlist_read_lock();
1924 	err = i_cpr_bitmap_setup();
1925 	memlist_read_unlock();
1926 	if (err)
1927 		i_cpr_bitmap_cleanup();
1928 	return (err);
1929 }
1930 
1931 
1932 
1933 /*
1934  * Power down the system.
1935  */
1936 int
i_cpr_power_down(int sleeptype)1937 i_cpr_power_down(int sleeptype)
1938 {
1939 	int is_defined = 0;
1940 	char *wordexists = "p\" power-off\" find nip swap l! ";
1941 	char *req = "power-off";
1942 
1943 	ASSERT(sleeptype == CPR_TODISK);
1944 
1945 	/*
1946 	 * is_defined has value -1 when defined
1947 	 */
1948 	prom_interpret(wordexists, (uintptr_t)&is_defined, 0, 0, 0, 0);
1949 	if (is_defined) {
1950 		CPR_DEBUG(CPR_DEBUG1, "\ncpr: %s...\n", req);
1951 		prom_interpret(req, 0, 0, 0, 0, 0);
1952 	}
1953 	/*
1954 	 * Only returns if failed
1955 	 */
1956 	return (EIO);
1957 }
1958 
1959 void
i_cpr_stop_other_cpus(void)1960 i_cpr_stop_other_cpus(void)
1961 {
1962 	stop_other_cpus();
1963 }
1964 
1965 /*
1966  *	Save context for the specified CPU
1967  */
1968 /* ARGSUSED */
1969 void *
i_cpr_save_context(void * arg)1970 i_cpr_save_context(void *arg)
1971 {
1972 	/*
1973 	 * Not yet
1974 	 */
1975 	ASSERT(0);
1976 	return (NULL);
1977 }
1978 
1979 void
i_cpr_pre_resume_cpus(void)1980 i_cpr_pre_resume_cpus(void)
1981 {
1982 	/*
1983 	 * Not yet
1984 	 */
1985 	ASSERT(0);
1986 }
1987 
1988 void
i_cpr_post_resume_cpus(void)1989 i_cpr_post_resume_cpus(void)
1990 {
1991 	/*
1992 	 * Not yet
1993 	 */
1994 	ASSERT(0);
1995 }
1996 
1997 /*
1998  * nothing to do
1999  */
2000 void
i_cpr_alloc_cpus(void)2001 i_cpr_alloc_cpus(void)
2002 {
2003 }
2004 
2005 /*
2006  * nothing to do
2007  */
2008 void
i_cpr_free_cpus(void)2009 i_cpr_free_cpus(void)
2010 {
2011 }
2012 
2013 /* ARGSUSED */
2014 void
i_cpr_save_configuration(dev_info_t * dip)2015 i_cpr_save_configuration(dev_info_t *dip)
2016 {
2017 	/*
2018 	 * this is a no-op on sparc
2019 	 */
2020 }
2021 
2022 /* ARGSUSED */
2023 void
i_cpr_restore_configuration(dev_info_t * dip)2024 i_cpr_restore_configuration(dev_info_t *dip)
2025 {
2026 	/*
2027 	 * this is a no-op on sparc
2028 	 */
2029 }
2030