xref: /illumos-gate/usr/src/uts/sun4u/cpu/us3_common.c (revision 581cede61ac9c14d8d4ea452562a567189eead78)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/systm.h>
30 #include <sys/ddi.h>
31 #include <sys/sysmacros.h>
32 #include <sys/archsystm.h>
33 #include <sys/vmsystm.h>
34 #include <sys/machparam.h>
35 #include <sys/machsystm.h>
36 #include <sys/machthread.h>
37 #include <sys/cpu.h>
38 #include <sys/cmp.h>
39 #include <sys/elf_SPARC.h>
40 #include <vm/vm_dep.h>
41 #include <vm/hat_sfmmu.h>
42 #include <vm/seg_kpm.h>
43 #include <sys/cpuvar.h>
44 #include <sys/cheetahregs.h>
45 #include <sys/us3_module.h>
46 #include <sys/async.h>
47 #include <sys/cmn_err.h>
48 #include <sys/debug.h>
49 #include <sys/dditypes.h>
50 #include <sys/prom_debug.h>
51 #include <sys/prom_plat.h>
52 #include <sys/cpu_module.h>
53 #include <sys/sysmacros.h>
54 #include <sys/intreg.h>
55 #include <sys/clock.h>
56 #include <sys/platform_module.h>
57 #include <sys/machtrap.h>
58 #include <sys/ontrap.h>
59 #include <sys/panic.h>
60 #include <sys/memlist.h>
61 #include <sys/bootconf.h>
62 #include <sys/ivintr.h>
63 #include <sys/atomic.h>
64 #include <sys/taskq.h>
65 #include <sys/note.h>
66 #include <sys/ndifm.h>
67 #include <sys/ddifm.h>
68 #include <sys/fm/protocol.h>
69 #include <sys/fm/util.h>
70 #include <sys/fm/cpu/UltraSPARC-III.h>
71 #include <sys/fpras_impl.h>
72 #include <sys/dtrace.h>
73 #include <sys/watchpoint.h>
74 #include <sys/plat_ecc_unum.h>
75 #include <sys/cyclic.h>
76 #include <sys/errorq.h>
77 #include <sys/errclassify.h>
78 #include <sys/pghw.h>
79 
80 #ifdef	CHEETAHPLUS_ERRATUM_25
81 #include <sys/xc_impl.h>
82 #endif	/* CHEETAHPLUS_ERRATUM_25 */
83 
84 ch_cpu_logout_t	clop_before_flush;
85 ch_cpu_logout_t	clop_after_flush;
86 uint_t	flush_retries_done = 0;
87 /*
88  * Note that 'Cheetah PRM' refers to:
89  *   SPARC V9 JPS1 Implementation Supplement: Sun UltraSPARC-III
90  */
91 
92 /*
93  * Per CPU pointers to physical address of TL>0 logout data areas.
94  * These pointers have to be in the kernel nucleus to avoid MMU
95  * misses.
96  */
97 uint64_t ch_err_tl1_paddrs[NCPU];
98 
99 /*
100  * One statically allocated structure to use during startup/DR
101  * to prevent unnecessary panics.
102  */
103 ch_err_tl1_data_t ch_err_tl1_data;
104 
105 /*
106  * Per CPU pending error at TL>0, used by level15 softint handler
107  */
108 uchar_t ch_err_tl1_pending[NCPU];
109 
110 /*
111  * For deferred CE re-enable after trap.
112  */
113 taskq_t		*ch_check_ce_tq;
114 
115 /*
116  * Internal functions.
117  */
118 static int cpu_async_log_err(void *flt, errorq_elem_t *eqep);
119 static void cpu_log_diag_info(ch_async_flt_t *ch_flt);
120 static void cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason,
121     ecc_type_to_info_t *eccp, ch_diag_data_t *cdp);
122 static int cpu_flt_in_memory_one_event(ch_async_flt_t *ch_flt,
123     uint64_t t_afsr_bit);
124 static int clear_ecc(struct async_flt *ecc);
125 #if defined(CPU_IMP_ECACHE_ASSOC)
126 static int cpu_ecache_line_valid(ch_async_flt_t *ch_flt);
127 #endif
128 int cpu_ecache_set_size(struct cpu *cp);
129 static int cpu_ectag_line_invalid(int cachesize, uint64_t tag);
130 int cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr);
131 uint64_t cpu_ectag_to_pa(int setsize, uint64_t tag);
132 int cpu_ectag_pa_to_subblk_state(int cachesize,
133 				uint64_t subaddr, uint64_t tag);
134 static void cpu_flush_ecache_line(ch_async_flt_t *ch_flt);
135 static int afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit);
136 static int afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit);
137 static int afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit);
138 static int afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit);
139 static int synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit);
140 static int cpu_get_mem_unum_synd(int synd_code, struct async_flt *, char *buf);
141 static void cpu_uninit_ecache_scrub_dr(struct cpu *cp);
142 static void cpu_scrubphys(struct async_flt *aflt);
143 static void cpu_payload_add_aflt(struct async_flt *, nvlist_t *, nvlist_t *,
144     int *, int *);
145 static void cpu_payload_add_ecache(struct async_flt *, nvlist_t *);
146 static void cpu_ereport_init(struct async_flt *aflt);
147 static int cpu_check_secondary_errors(ch_async_flt_t *, uint64_t, uint64_t);
148 static uint8_t cpu_flt_bit_to_plat_error(struct async_flt *aflt);
149 static void cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen,
150     uint64_t nceen, ch_cpu_logout_t *clop);
151 static int cpu_ce_delayed_ec_logout(uint64_t);
152 static int cpu_matching_ecache_line(uint64_t, void *, int, int *);
153 static int cpu_error_is_ecache_data(int, uint64_t);
154 static void cpu_fmri_cpu_set(nvlist_t *, int);
155 static int cpu_error_to_resource_type(struct async_flt *aflt);
156 
157 #ifdef	CHEETAHPLUS_ERRATUM_25
158 static int mondo_recover_proc(uint16_t, int);
159 static void cheetah_nudge_init(void);
160 static void cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr,
161     cyc_time_t *when);
162 static void cheetah_nudge_buddy(void);
163 #endif	/* CHEETAHPLUS_ERRATUM_25 */
164 
165 #if defined(CPU_IMP_L1_CACHE_PARITY)
166 static void cpu_dcache_parity_info(ch_async_flt_t *ch_flt);
167 static void cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index);
168 static void cpu_record_dc_data_parity(ch_async_flt_t *ch_flt,
169     ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word);
170 static void cpu_icache_parity_info(ch_async_flt_t *ch_flt);
171 static void cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index);
172 static void cpu_pcache_parity_info(ch_async_flt_t *ch_flt);
173 static void cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index);
174 static void cpu_payload_add_dcache(struct async_flt *, nvlist_t *);
175 static void cpu_payload_add_icache(struct async_flt *, nvlist_t *);
176 #endif	/* CPU_IMP_L1_CACHE_PARITY */
177 
178 int (*p2get_mem_info)(int synd_code, uint64_t paddr,
179     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
180     int *segsp, int *banksp, int *mcidp);
181 
182 /*
183  * This table is used to determine which bit(s) is(are) bad when an ECC
184  * error occurs.  The array is indexed by an 9-bit syndrome.  The entries
185  * of this array have the following semantics:
186  *
187  *      00-127  The number of the bad bit, when only one bit is bad.
188  *      128     ECC bit C0 is bad.
189  *      129     ECC bit C1 is bad.
190  *      130     ECC bit C2 is bad.
191  *      131     ECC bit C3 is bad.
192  *      132     ECC bit C4 is bad.
193  *      133     ECC bit C5 is bad.
194  *      134     ECC bit C6 is bad.
195  *      135     ECC bit C7 is bad.
196  *      136     ECC bit C8 is bad.
197  *	137-143 reserved for Mtag Data and ECC.
198  *      144(M2) Two bits are bad within a nibble.
199  *      145(M3) Three bits are bad within a nibble.
200  *      146(M3) Four bits are bad within a nibble.
201  *      147(M)  Multiple bits (5 or more) are bad.
202  *      148     NO bits are bad.
203  * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-4,11-5.
204  */
205 
206 #define	C0	128
207 #define	C1	129
208 #define	C2	130
209 #define	C3	131
210 #define	C4	132
211 #define	C5	133
212 #define	C6	134
213 #define	C7	135
214 #define	C8	136
215 #define	MT0	137	/* Mtag Data bit 0 */
216 #define	MT1	138
217 #define	MT2	139
218 #define	MTC0	140	/* Mtag Check bit 0 */
219 #define	MTC1	141
220 #define	MTC2	142
221 #define	MTC3	143
222 #define	M2	144
223 #define	M3	145
224 #define	M4	146
225 #define	M	147
226 #define	NA	148
227 #if defined(JALAPENO) || defined(SERRANO)
228 #define	S003	149	/* Syndrome 0x003 => likely from CPU/EDU:ST/FRU/BP */
229 #define	S003MEM	150	/* Syndrome 0x003 => likely from WDU/WBP */
230 #define	SLAST	S003MEM	/* last special syndrome */
231 #else /* JALAPENO || SERRANO */
232 #define	S003	149	/* Syndrome 0x003 => likely from EDU:ST */
233 #define	S071	150	/* Syndrome 0x071 => likely from WDU/CPU */
234 #define	S11C	151	/* Syndrome 0x11c => likely from BERR/DBERR */
235 #define	SLAST	S11C	/* last special syndrome */
236 #endif /* JALAPENO || SERRANO */
237 #if defined(JALAPENO) || defined(SERRANO)
238 #define	BPAR0	152	/* syndrom 152 through 167 for bus parity */
239 #define	BPAR15	167
240 #endif	/* JALAPENO || SERRANO */
241 
242 static uint8_t ecc_syndrome_tab[] =
243 {
244 NA,  C0,  C1, S003, C2,  M2,  M3,  47,  C3,  M2,  M2,  53,  M2,  41,  29,   M,
245 C4,   M,   M,  50,  M2,  38,  25,  M2,  M2,  33,  24,  M2,  11,   M,  M2,  16,
246 C5,   M,   M,  46,  M2,  37,  19,  M2,   M,  31,  32,   M,   7,  M2,  M2,  10,
247 M2,  40,  13,  M2,  59,   M,  M2,  66,   M,  M2,  M2,   0,  M2,  67,  71,   M,
248 C6,   M,   M,  43,   M,  36,  18,   M,  M2,  49,  15,   M,  63,  M2,  M2,   6,
249 M2,  44,  28,  M2,   M,  M2,  M2,  52,  68,  M2,  M2,  62,  M2,  M3,  M3,  M4,
250 M2,  26, 106,  M2,  64,   M,  M2,   2, 120,   M,  M2,  M3,   M,  M3,  M3,  M4,
251 #if defined(JALAPENO) || defined(SERRANO)
252 116, M2,  M2,  M3,  M2,  M3,   M,  M4,  M2,  58,  54,  M2,   M,  M4,  M4,  M3,
253 #else	/* JALAPENO || SERRANO */
254 116, S071, M2,  M3,  M2,  M3,   M,  M4,  M2,  58,  54,  M2,   M,  M4,  M4,  M3,
255 #endif	/* JALAPENO || SERRANO */
256 C7,  M2,   M,  42,   M,  35,  17,  M2,   M,  45,  14,  M2,  21,  M2,  M2,   5,
257 M,   27,   M,   M,  99,   M,   M,   3, 114,  M2,  M2,  20,  M2,  M3,  M3,   M,
258 M2,  23, 113,  M2, 112,  M2,   M,  51,  95,   M,  M2,  M3,  M2,  M3,  M3,  M2,
259 103,  M,  M2,  M3,  M2,  M3,  M3,  M4,  M2,  48,   M,   M,  73,  M2,   M,  M3,
260 M2,  22, 110,  M2, 109,  M2,   M,   9, 108,  M2,   M,  M3,  M2,  M3,  M3,   M,
261 102, M2,   M,   M,  M2,  M3,  M3,   M,  M2,  M3,  M3,  M2,   M,  M4,   M,  M3,
262 98,   M,  M2,  M3,  M2,   M,  M3,  M4,  M2,  M3,  M3,  M4,  M3,   M,   M,   M,
263 M2,  M3,  M3,   M,  M3,   M,   M,   M,  56,  M4,   M,  M3,  M4,   M,   M,   M,
264 C8,   M,  M2,  39,   M,  34, 105,  M2,   M,  30, 104,   M, 101,   M,   M,   4,
265 #if defined(JALAPENO) || defined(SERRANO)
266 M,    M, 100,   M,  83,   M,  M2,  12,  87,   M,   M,  57,  M2,   M,  M3,   M,
267 #else	/* JALAPENO || SERRANO */
268 M,    M, 100,   M,  83,   M,  M2,  12,  87,   M,   M,  57, S11C,  M,  M3,   M,
269 #endif	/* JALAPENO || SERRANO */
270 M2,  97,  82,  M2,  78,  M2,  M2,   1,  96,   M,   M,   M,   M,   M,  M3,  M2,
271 94,   M,  M2,  M3,  M2,   M,  M3,   M,  M2,   M,  79,   M,  69,   M,  M4,   M,
272 M2,  93,  92,   M,  91,   M,  M2,   8,  90,  M2,  M2,   M,   M,   M,   M,  M4,
273 89,   M,   M,  M3,  M2,  M3,  M3,   M,   M,   M,  M3,  M2,  M3,  M2,   M,  M3,
274 86,   M,  M2,  M3,  M2,   M,  M3,   M,  M2,   M,  M3,   M,  M3,   M,   M,  M3,
275 M,    M,  M3,  M2,  M3,  M2,  M4,   M,  60,   M,  M2,  M3,  M4,   M,   M,  M2,
276 M2,  88,  85,  M2,  84,   M,  M2,  55,  81,  M2,  M2,  M3,  M2,  M3,  M3,  M4,
277 77,   M,   M,   M,  M2,  M3,   M,   M,  M2,  M3,  M3,  M4,  M3,  M2,   M,   M,
278 74,   M,  M2,  M3,   M,   M,  M3,   M,   M,   M,  M3,   M,  M3,   M,  M4,  M3,
279 M2,  70, 107,  M4,  65,  M2,  M2,   M, 127,   M,   M,   M,  M2,  M3,  M3,   M,
280 80,  M2,  M2,  72,   M, 119, 118,   M,  M2, 126,  76,   M, 125,   M,  M4,  M3,
281 M2, 115, 124,   M,  75,   M,   M,  M3,  61,   M,  M4,   M,  M4,   M,   M,   M,
282 M,  123, 122,  M4, 121,  M4,   M,  M3, 117,  M2,  M2,  M3,  M4,  M3,   M,   M,
283 111,  M,   M,   M,  M4,  M3,  M3,   M,   M,   M,  M3,   M,  M3,  M2,   M,   M
284 };
285 
286 #define	ESYND_TBL_SIZE	(sizeof (ecc_syndrome_tab) / sizeof (uint8_t))
287 
288 #if !(defined(JALAPENO) || defined(SERRANO))
289 /*
290  * This table is used to determine which bit(s) is(are) bad when a Mtag
291  * error occurs.  The array is indexed by an 4-bit ECC syndrome. The entries
292  * of this array have the following semantics:
293  *
294  *      -1	Invalid mtag syndrome.
295  *      137     Mtag Data 0 is bad.
296  *      138     Mtag Data 1 is bad.
297  *      139     Mtag Data 2 is bad.
298  *      140     Mtag ECC 0 is bad.
299  *      141     Mtag ECC 1 is bad.
300  *      142     Mtag ECC 2 is bad.
301  *      143     Mtag ECC 3 is bad.
302  * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-6.
303  */
304 short mtag_syndrome_tab[] =
305 {
306 NA, MTC0, MTC1, M2, MTC2, M2, M2, MT0, MTC3, M2, M2,  MT1, M2, MT2, M2, M2
307 };
308 
309 #define	MSYND_TBL_SIZE	(sizeof (mtag_syndrome_tab) / sizeof (short))
310 
311 #else /* !(JALAPENO || SERRANO) */
312 
313 #define	BSYND_TBL_SIZE	16
314 
315 #endif /* !(JALAPENO || SERRANO) */
316 
317 /*
318  * Types returned from cpu_error_to_resource_type()
319  */
320 #define	ERRTYPE_UNKNOWN		0
321 #define	ERRTYPE_CPU		1
322 #define	ERRTYPE_MEMORY		2
323 #define	ERRTYPE_ECACHE_DATA	3
324 
325 /*
326  * CE initial classification and subsequent action lookup table
327  */
328 static ce_dispact_t ce_disp_table[CE_INITDISPTBL_SIZE];
329 static int ce_disp_inited;
330 
331 /*
332  * Set to disable leaky and partner check for memory correctables
333  */
334 int ce_xdiag_off;
335 
336 /*
337  * The following are not incremented atomically so are indicative only
338  */
339 static int ce_xdiag_drops;
340 static int ce_xdiag_lkydrops;
341 static int ce_xdiag_ptnrdrops;
342 static int ce_xdiag_bad;
343 
344 /*
345  * CE leaky check callback structure
346  */
347 typedef struct {
348 	struct async_flt *lkycb_aflt;
349 	errorq_t *lkycb_eqp;
350 	errorq_elem_t *lkycb_eqep;
351 } ce_lkychk_cb_t;
352 
353 /*
354  * defines for various ecache_flush_flag's
355  */
356 #define	ECACHE_FLUSH_LINE	1
357 #define	ECACHE_FLUSH_ALL	2
358 
359 /*
360  * STICK sync
361  */
362 #define	STICK_ITERATION 10
363 #define	MAX_TSKEW	1
364 #define	EV_A_START	0
365 #define	EV_A_END	1
366 #define	EV_B_START	2
367 #define	EV_B_END	3
368 #define	EVENTS		4
369 
370 static int64_t stick_iter = STICK_ITERATION;
371 static int64_t stick_tsk = MAX_TSKEW;
372 
373 typedef enum {
374 	EVENT_NULL = 0,
375 	SLAVE_START,
376 	SLAVE_CONT,
377 	MASTER_START
378 } event_cmd_t;
379 
380 static volatile event_cmd_t stick_sync_cmd = EVENT_NULL;
381 static int64_t timestamp[EVENTS];
382 static volatile int slave_done;
383 
384 #ifdef DEBUG
385 #define	DSYNC_ATTEMPTS 64
386 typedef struct {
387 	int64_t	skew_val[DSYNC_ATTEMPTS];
388 } ss_t;
389 
390 ss_t stick_sync_stats[NCPU];
391 #endif /* DEBUG */
392 
393 uint_t cpu_impl_dual_pgsz = 0;
394 #if defined(CPU_IMP_DUAL_PAGESIZE)
395 uint_t disable_dual_pgsz = 0;
396 #endif	/* CPU_IMP_DUAL_PAGESIZE */
397 
398 /*
399  * Save the cache bootup state for use when internal
400  * caches are to be re-enabled after an error occurs.
401  */
402 uint64_t cache_boot_state;
403 
404 /*
405  * PA[22:0] represent Displacement in Safari configuration space.
406  */
407 uint_t	root_phys_addr_lo_mask = 0x7fffffu;
408 
409 bus_config_eclk_t bus_config_eclk[] = {
410 #if defined(JALAPENO) || defined(SERRANO)
411 	{JBUS_CONFIG_ECLK_1_DIV, JBUS_CONFIG_ECLK_1},
412 	{JBUS_CONFIG_ECLK_2_DIV, JBUS_CONFIG_ECLK_2},
413 	{JBUS_CONFIG_ECLK_32_DIV, JBUS_CONFIG_ECLK_32},
414 #else /* JALAPENO || SERRANO */
415 	{SAFARI_CONFIG_ECLK_1_DIV, SAFARI_CONFIG_ECLK_1},
416 	{SAFARI_CONFIG_ECLK_2_DIV, SAFARI_CONFIG_ECLK_2},
417 	{SAFARI_CONFIG_ECLK_32_DIV, SAFARI_CONFIG_ECLK_32},
418 #endif /* JALAPENO || SERRANO */
419 	{0, 0}
420 };
421 
422 /*
423  * Interval for deferred CEEN reenable
424  */
425 int cpu_ceen_delay_secs = CPU_CEEN_DELAY_SECS;
426 
427 /*
428  * set in /etc/system to control logging of user BERR/TO's
429  */
430 int cpu_berr_to_verbose = 0;
431 
432 /*
433  * set to 0 in /etc/system to defer CEEN reenable for all CEs
434  */
435 uint64_t cpu_ce_not_deferred = CPU_CE_NOT_DEFERRED;
436 uint64_t cpu_ce_not_deferred_ext = CPU_CE_NOT_DEFERRED_EXT;
437 
438 /*
439  * Set of all offline cpus
440  */
441 cpuset_t cpu_offline_set;
442 
443 static void cpu_delayed_check_ce_errors(void *);
444 static void cpu_check_ce_errors(void *);
445 void cpu_error_ecache_flush(ch_async_flt_t *);
446 static int cpu_error_ecache_flush_required(ch_async_flt_t *);
447 static void cpu_log_and_clear_ce(ch_async_flt_t *);
448 void cpu_ce_detected(ch_cpu_errors_t *, int);
449 
450 /*
451  * CE Leaky check timeout in microseconds.  This is chosen to be twice the
452  * memory refresh interval of current DIMMs (64ms).  After initial fix that
453  * gives at least one full refresh cycle in which the cell can leak
454  * (whereafter further refreshes simply reinforce any incorrect bit value).
455  */
456 clock_t cpu_ce_lkychk_timeout_usec = 128000;
457 
458 /*
459  * CE partner check partner caching period in seconds
460  */
461 int cpu_ce_ptnr_cachetime_sec = 60;
462 
463 /*
464  * Sets trap table entry ttentry by overwriting eight instructions from ttlabel
465  */
466 #define	CH_SET_TRAP(ttentry, ttlabel)			\
467 		bcopy((const void *)&ttlabel, &ttentry, 32);		\
468 		flush_instr_mem((caddr_t)&ttentry, 32);
469 
470 static int min_ecache_size;
471 static uint_t priv_hcl_1;
472 static uint_t priv_hcl_2;
473 static uint_t priv_hcl_4;
474 static uint_t priv_hcl_8;
475 
476 void
477 cpu_setup(void)
478 {
479 	extern int at_flags;
480 	extern int cpc_has_overflow_intr;
481 
482 	/*
483 	 * Setup chip-specific trap handlers.
484 	 */
485 	cpu_init_trap();
486 
487 	cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT);
488 
489 	at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1 | EF_SPARC_SUN_US3;
490 
491 	/*
492 	 * save the cache bootup state.
493 	 */
494 	cache_boot_state = get_dcu() & DCU_CACHE;
495 
496 	/*
497 	 * Due to the number of entries in the fully-associative tlb
498 	 * this may have to be tuned lower than in spitfire.
499 	 */
500 	pp_slots = MIN(8, MAXPP_SLOTS);
501 
502 	/*
503 	 * Block stores do not invalidate all pages of the d$, pagecopy
504 	 * et. al. need virtual translations with virtual coloring taken
505 	 * into consideration.  prefetch/ldd will pollute the d$ on the
506 	 * load side.
507 	 */
508 	pp_consistent_coloring = PPAGE_STORE_VCOLORING | PPAGE_LOADS_POLLUTE;
509 
510 	if (use_page_coloring) {
511 		do_pg_coloring = 1;
512 	}
513 
514 	isa_list =
515 	    "sparcv9+vis2 sparcv9+vis sparcv9 "
516 	    "sparcv8plus+vis2 sparcv8plus+vis sparcv8plus "
517 	    "sparcv8 sparcv8-fsmuld sparcv7 sparc";
518 
519 	/*
520 	 * On Panther-based machines, this should
521 	 * also include AV_SPARC_POPC too
522 	 */
523 	cpu_hwcap_flags = AV_SPARC_VIS | AV_SPARC_VIS2;
524 
525 	/*
526 	 * On cheetah, there's no hole in the virtual address space
527 	 */
528 	hole_start = hole_end = 0;
529 
530 	/*
531 	 * The kpm mapping window.
532 	 * kpm_size:
533 	 *	The size of a single kpm range.
534 	 *	The overall size will be: kpm_size * vac_colors.
535 	 * kpm_vbase:
536 	 *	The virtual start address of the kpm range within the kernel
537 	 *	virtual address space. kpm_vbase has to be kpm_size aligned.
538 	 */
539 	kpm_size = (size_t)(8ull * 1024 * 1024 * 1024 * 1024); /* 8TB */
540 	kpm_size_shift = 43;
541 	kpm_vbase = (caddr_t)0x8000000000000000ull; /* 8EB */
542 	kpm_smallpages = 1;
543 
544 	/*
545 	 * The traptrace code uses either %tick or %stick for
546 	 * timestamping.  We have %stick so we can use it.
547 	 */
548 	traptrace_use_stick = 1;
549 
550 	/*
551 	 * Cheetah has a performance counter overflow interrupt
552 	 */
553 	cpc_has_overflow_intr = 1;
554 
555 #if defined(CPU_IMP_DUAL_PAGESIZE)
556 	/*
557 	 * Use Cheetah+ and later dual page size support.
558 	 */
559 	if (!disable_dual_pgsz) {
560 		cpu_impl_dual_pgsz = 1;
561 	}
562 #endif	/* CPU_IMP_DUAL_PAGESIZE */
563 
564 	/*
565 	 * Declare that this architecture/cpu combination does fpRAS.
566 	 */
567 	fpras_implemented = 1;
568 
569 	/*
570 	 * Setup CE lookup table
571 	 */
572 	CE_INITDISPTBL_POPULATE(ce_disp_table);
573 	ce_disp_inited = 1;
574 }
575 
576 /*
577  * Called by setcpudelay
578  */
579 void
580 cpu_init_tick_freq(void)
581 {
582 	/*
583 	 * For UltraSPARC III and beyond we want to use the
584 	 * system clock rate as the basis for low level timing,
585 	 * due to support of mixed speed CPUs and power managment.
586 	 */
587 	if (system_clock_freq == 0)
588 		cmn_err(CE_PANIC, "setcpudelay: invalid system_clock_freq");
589 
590 	sys_tick_freq = system_clock_freq;
591 }
592 
593 #ifdef CHEETAHPLUS_ERRATUM_25
594 /*
595  * Tunables
596  */
597 int cheetah_bpe_off = 0;
598 int cheetah_sendmondo_recover = 1;
599 int cheetah_sendmondo_fullscan = 0;
600 int cheetah_sendmondo_recover_delay = 5;
601 
602 #define	CHEETAH_LIVELOCK_MIN_DELAY	1
603 
604 /*
605  * Recovery Statistics
606  */
607 typedef struct cheetah_livelock_entry	{
608 	int cpuid;		/* fallen cpu */
609 	int buddy;		/* cpu that ran recovery */
610 	clock_t lbolt;		/* when recovery started */
611 	hrtime_t recovery_time;	/* time spent in recovery */
612 } cheetah_livelock_entry_t;
613 
614 #define	CHEETAH_LIVELOCK_NENTRY	32
615 
616 cheetah_livelock_entry_t cheetah_livelock_hist[CHEETAH_LIVELOCK_NENTRY];
617 int cheetah_livelock_entry_nxt;
618 
619 #define	CHEETAH_LIVELOCK_ENTRY_NEXT(statp)	{			\
620 	statp = cheetah_livelock_hist + cheetah_livelock_entry_nxt;	\
621 	if (++cheetah_livelock_entry_nxt >= CHEETAH_LIVELOCK_NENTRY) {	\
622 		cheetah_livelock_entry_nxt = 0;				\
623 	}								\
624 }
625 
626 #define	CHEETAH_LIVELOCK_ENTRY_SET(statp, item, val)	statp->item = val
627 
628 struct {
629 	hrtime_t hrt;		/* maximum recovery time */
630 	int recovery;		/* recovered */
631 	int full_claimed;	/* maximum pages claimed in full recovery */
632 	int proc_entry;		/* attempted to claim TSB */
633 	int proc_tsb_scan;	/* tsb scanned */
634 	int proc_tsb_partscan;	/* tsb partially scanned */
635 	int proc_tsb_fullscan;	/* whole tsb scanned */
636 	int proc_claimed;	/* maximum pages claimed in tsb scan */
637 	int proc_user;		/* user thread */
638 	int proc_kernel;	/* kernel thread */
639 	int proc_onflt;		/* bad stack */
640 	int proc_cpu;		/* null cpu */
641 	int proc_thread;	/* null thread */
642 	int proc_proc;		/* null proc */
643 	int proc_as;		/* null as */
644 	int proc_hat;		/* null hat */
645 	int proc_hat_inval;	/* hat contents don't make sense */
646 	int proc_hat_busy;	/* hat is changing TSBs */
647 	int proc_tsb_reloc;	/* TSB skipped because being relocated */
648 	int proc_cnum_bad;	/* cnum out of range */
649 	int proc_cnum;		/* last cnum processed */
650 	tte_t proc_tte;		/* last tte processed */
651 } cheetah_livelock_stat;
652 
653 #define	CHEETAH_LIVELOCK_STAT(item)	cheetah_livelock_stat.item++
654 
655 #define	CHEETAH_LIVELOCK_STATSET(item, value)		\
656 	cheetah_livelock_stat.item = value
657 
658 #define	CHEETAH_LIVELOCK_MAXSTAT(item, value)	{	\
659 	if (value > cheetah_livelock_stat.item)		\
660 		cheetah_livelock_stat.item = value;	\
661 }
662 
663 /*
664  * Attempt to recover a cpu by claiming every cache line as saved
665  * in the TSB that the non-responsive cpu is using. Since we can't
666  * grab any adaptive lock, this is at best an attempt to do so. Because
667  * we don't grab any locks, we must operate under the protection of
668  * on_fault().
669  *
670  * Return 1 if cpuid could be recovered, 0 if failed.
671  */
672 int
673 mondo_recover_proc(uint16_t cpuid, int bn)
674 {
675 	label_t ljb;
676 	cpu_t *cp;
677 	kthread_t *t;
678 	proc_t *p;
679 	struct as *as;
680 	struct hat *hat;
681 	uint_t  cnum;
682 	struct tsb_info *tsbinfop;
683 	struct tsbe *tsbep;
684 	caddr_t tsbp;
685 	caddr_t end_tsbp;
686 	uint64_t paddr;
687 	uint64_t idsr;
688 	u_longlong_t pahi, palo;
689 	int pages_claimed = 0;
690 	tte_t tsbe_tte;
691 	int tried_kernel_tsb = 0;
692 	mmu_ctx_t *mmu_ctxp;
693 
694 	CHEETAH_LIVELOCK_STAT(proc_entry);
695 
696 	if (on_fault(&ljb)) {
697 		CHEETAH_LIVELOCK_STAT(proc_onflt);
698 		goto badstruct;
699 	}
700 
701 	if ((cp = cpu[cpuid]) == NULL) {
702 		CHEETAH_LIVELOCK_STAT(proc_cpu);
703 		goto badstruct;
704 	}
705 
706 	if ((t = cp->cpu_thread) == NULL) {
707 		CHEETAH_LIVELOCK_STAT(proc_thread);
708 		goto badstruct;
709 	}
710 
711 	if ((p = ttoproc(t)) == NULL) {
712 		CHEETAH_LIVELOCK_STAT(proc_proc);
713 		goto badstruct;
714 	}
715 
716 	if ((as = p->p_as) == NULL) {
717 		CHEETAH_LIVELOCK_STAT(proc_as);
718 		goto badstruct;
719 	}
720 
721 	if ((hat = as->a_hat) == NULL) {
722 		CHEETAH_LIVELOCK_STAT(proc_hat);
723 		goto badstruct;
724 	}
725 
726 	if (hat != ksfmmup) {
727 		CHEETAH_LIVELOCK_STAT(proc_user);
728 		if (hat->sfmmu_flags & (HAT_BUSY | HAT_SWAPPED | HAT_SWAPIN)) {
729 			CHEETAH_LIVELOCK_STAT(proc_hat_busy);
730 			goto badstruct;
731 		}
732 		tsbinfop = hat->sfmmu_tsb;
733 		if (tsbinfop == NULL) {
734 			CHEETAH_LIVELOCK_STAT(proc_hat_inval);
735 			goto badstruct;
736 		}
737 		tsbp = tsbinfop->tsb_va;
738 		end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc);
739 	} else {
740 		CHEETAH_LIVELOCK_STAT(proc_kernel);
741 		tsbinfop = NULL;
742 		tsbp = ktsb_base;
743 		end_tsbp = tsbp + TSB_BYTES(ktsb_sz);
744 	}
745 
746 	/* Verify as */
747 	if (hat->sfmmu_as != as) {
748 		CHEETAH_LIVELOCK_STAT(proc_hat_inval);
749 		goto badstruct;
750 	}
751 
752 	mmu_ctxp = CPU_MMU_CTXP(cp);
753 	ASSERT(mmu_ctxp);
754 	cnum = hat->sfmmu_ctxs[mmu_ctxp->mmu_idx].cnum;
755 	CHEETAH_LIVELOCK_STATSET(proc_cnum, cnum);
756 
757 	if ((cnum < 0) || (cnum == INVALID_CONTEXT) ||
758 	    (cnum >= mmu_ctxp->mmu_nctxs)) {
759 		CHEETAH_LIVELOCK_STAT(proc_cnum_bad);
760 		goto badstruct;
761 	}
762 
763 	do {
764 		CHEETAH_LIVELOCK_STAT(proc_tsb_scan);
765 
766 		/*
767 		 * Skip TSBs being relocated.  This is important because
768 		 * we want to avoid the following deadlock scenario:
769 		 *
770 		 * 1) when we came in we set ourselves to "in recover" state.
771 		 * 2) when we try to touch TSB being relocated the mapping
772 		 *    will be in the suspended state so we'll spin waiting
773 		 *    for it to be unlocked.
774 		 * 3) when the CPU that holds the TSB mapping locked tries to
775 		 *    unlock it it will send a xtrap which will fail to xcall
776 		 *    us or the CPU we're trying to recover, and will in turn
777 		 *    enter the mondo code.
778 		 * 4) since we are still spinning on the locked mapping
779 		 *    no further progress will be made and the system will
780 		 *    inevitably hard hang.
781 		 *
782 		 * A TSB not being relocated can't begin being relocated
783 		 * while we're accessing it because we check
784 		 * sendmondo_in_recover before relocating TSBs.
785 		 */
786 		if (hat != ksfmmup &&
787 		    (tsbinfop->tsb_flags & TSB_RELOC_FLAG) != 0) {
788 			CHEETAH_LIVELOCK_STAT(proc_tsb_reloc);
789 			goto next_tsbinfo;
790 		}
791 
792 		for (tsbep = (struct tsbe *)tsbp;
793 		    tsbep < (struct tsbe *)end_tsbp; tsbep++) {
794 			tsbe_tte = tsbep->tte_data;
795 
796 			if (tsbe_tte.tte_val == 0) {
797 				/*
798 				 * Invalid tte
799 				 */
800 				continue;
801 			}
802 			if (tsbe_tte.tte_se) {
803 				/*
804 				 * Don't want device registers
805 				 */
806 				continue;
807 			}
808 			if (tsbe_tte.tte_cp == 0) {
809 				/*
810 				 * Must be cached in E$
811 				 */
812 				continue;
813 			}
814 			if (tsbep->tte_tag.tag_invalid != 0) {
815 				/*
816 				 * Invalid tag, ingnore this entry.
817 				 */
818 				continue;
819 			}
820 			CHEETAH_LIVELOCK_STATSET(proc_tte, tsbe_tte);
821 			idsr = getidsr();
822 			if ((idsr & (IDSR_NACK_BIT(bn) |
823 			    IDSR_BUSY_BIT(bn))) == 0) {
824 				CHEETAH_LIVELOCK_STAT(proc_tsb_partscan);
825 				goto done;
826 			}
827 			pahi = tsbe_tte.tte_pahi;
828 			palo = tsbe_tte.tte_palo;
829 			paddr = (uint64_t)((pahi << 32) |
830 			    (palo << MMU_PAGESHIFT));
831 			claimlines(paddr, TTEBYTES(TTE_CSZ(&tsbe_tte)),
832 			    CH_ECACHE_SUBBLK_SIZE);
833 			if ((idsr & IDSR_BUSY_BIT(bn)) == 0) {
834 				shipit(cpuid, bn);
835 			}
836 			pages_claimed++;
837 		}
838 next_tsbinfo:
839 		if (tsbinfop != NULL)
840 			tsbinfop = tsbinfop->tsb_next;
841 		if (tsbinfop != NULL) {
842 			tsbp = tsbinfop->tsb_va;
843 			end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc);
844 		} else if (tsbp == ktsb_base) {
845 			tried_kernel_tsb = 1;
846 		} else if (!tried_kernel_tsb) {
847 			tsbp = ktsb_base;
848 			end_tsbp = tsbp + TSB_BYTES(ktsb_sz);
849 			hat = ksfmmup;
850 			tsbinfop = NULL;
851 		}
852 	} while (tsbinfop != NULL ||
853 	    ((tsbp == ktsb_base) && !tried_kernel_tsb));
854 
855 	CHEETAH_LIVELOCK_STAT(proc_tsb_fullscan);
856 	CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed);
857 	no_fault();
858 	idsr = getidsr();
859 	if ((idsr & (IDSR_NACK_BIT(bn) |
860 	    IDSR_BUSY_BIT(bn))) == 0) {
861 		return (1);
862 	} else {
863 		return (0);
864 	}
865 
866 done:
867 	no_fault();
868 	CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed);
869 	return (1);
870 
871 badstruct:
872 	no_fault();
873 	return (0);
874 }
875 
876 /*
877  * Attempt to claim ownership, temporarily, of every cache line that a
878  * non-responsive cpu might be using.  This might kick that cpu out of
879  * this state.
880  *
881  * The return value indicates to the caller if we have exhausted all recovery
882  * techniques. If 1 is returned, it is useless to call this function again
883  * even for a different target CPU.
884  */
885 int
886 mondo_recover(uint16_t cpuid, int bn)
887 {
888 	struct memseg *seg;
889 	uint64_t begin_pa, end_pa, cur_pa;
890 	hrtime_t begin_hrt, end_hrt;
891 	int retval = 0;
892 	int pages_claimed = 0;
893 	cheetah_livelock_entry_t *histp;
894 	uint64_t idsr;
895 
896 	if (cas32(&sendmondo_in_recover, 0, 1) != 0) {
897 		/*
898 		 * Wait while recovery takes place
899 		 */
900 		while (sendmondo_in_recover) {
901 			drv_usecwait(1);
902 		}
903 		/*
904 		 * Assume we didn't claim the whole memory. If
905 		 * the target of this caller is not recovered,
906 		 * it will come back.
907 		 */
908 		return (retval);
909 	}
910 
911 	CHEETAH_LIVELOCK_ENTRY_NEXT(histp)
912 	CHEETAH_LIVELOCK_ENTRY_SET(histp, lbolt, lbolt);
913 	CHEETAH_LIVELOCK_ENTRY_SET(histp, cpuid, cpuid);
914 	CHEETAH_LIVELOCK_ENTRY_SET(histp, buddy, CPU->cpu_id);
915 
916 	begin_hrt = gethrtime_waitfree();
917 	/*
918 	 * First try to claim the lines in the TSB the target
919 	 * may have been using.
920 	 */
921 	if (mondo_recover_proc(cpuid, bn) == 1) {
922 		/*
923 		 * Didn't claim the whole memory
924 		 */
925 		goto done;
926 	}
927 
928 	/*
929 	 * We tried using the TSB. The target is still
930 	 * not recovered. Check if complete memory scan is
931 	 * enabled.
932 	 */
933 	if (cheetah_sendmondo_fullscan == 0) {
934 		/*
935 		 * Full memory scan is disabled.
936 		 */
937 		retval = 1;
938 		goto done;
939 	}
940 
941 	/*
942 	 * Try claiming the whole memory.
943 	 */
944 	for (seg = memsegs; seg; seg = seg->next) {
945 		begin_pa = (uint64_t)(seg->pages_base) << MMU_PAGESHIFT;
946 		end_pa = (uint64_t)(seg->pages_end) << MMU_PAGESHIFT;
947 		for (cur_pa = begin_pa; cur_pa < end_pa;
948 		    cur_pa += MMU_PAGESIZE) {
949 			idsr = getidsr();
950 			if ((idsr & (IDSR_NACK_BIT(bn) |
951 			    IDSR_BUSY_BIT(bn))) == 0) {
952 				/*
953 				 * Didn't claim all memory
954 				 */
955 				goto done;
956 			}
957 			claimlines(cur_pa, MMU_PAGESIZE,
958 			    CH_ECACHE_SUBBLK_SIZE);
959 			if ((idsr & IDSR_BUSY_BIT(bn)) == 0) {
960 				shipit(cpuid, bn);
961 			}
962 			pages_claimed++;
963 		}
964 	}
965 
966 	/*
967 	 * We did all we could.
968 	 */
969 	retval = 1;
970 
971 done:
972 	/*
973 	 * Update statistics
974 	 */
975 	end_hrt = gethrtime_waitfree();
976 	CHEETAH_LIVELOCK_STAT(recovery);
977 	CHEETAH_LIVELOCK_MAXSTAT(hrt, (end_hrt - begin_hrt));
978 	CHEETAH_LIVELOCK_MAXSTAT(full_claimed, pages_claimed);
979 	CHEETAH_LIVELOCK_ENTRY_SET(histp, recovery_time, \
980 	    (end_hrt -  begin_hrt));
981 
982 	while (cas32(&sendmondo_in_recover, 1, 0) != 1)
983 		;
984 
985 	return (retval);
986 }
987 
988 /*
989  * This is called by the cyclic framework when this CPU becomes online
990  */
991 /*ARGSUSED*/
992 static void
993 cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, cyc_time_t *when)
994 {
995 
996 	hdlr->cyh_func = (cyc_func_t)cheetah_nudge_buddy;
997 	hdlr->cyh_level = CY_LOW_LEVEL;
998 	hdlr->cyh_arg = NULL;
999 
1000 	/*
1001 	 * Stagger the start time
1002 	 */
1003 	when->cyt_when = cpu->cpu_id * (NANOSEC / NCPU);
1004 	if (cheetah_sendmondo_recover_delay < CHEETAH_LIVELOCK_MIN_DELAY) {
1005 		cheetah_sendmondo_recover_delay = CHEETAH_LIVELOCK_MIN_DELAY;
1006 	}
1007 	when->cyt_interval = cheetah_sendmondo_recover_delay * NANOSEC;
1008 }
1009 
1010 /*
1011  * Create a low level cyclic to send a xtrap to the next cpu online.
1012  * However, there's no need to have this running on a uniprocessor system.
1013  */
1014 static void
1015 cheetah_nudge_init(void)
1016 {
1017 	cyc_omni_handler_t hdlr;
1018 
1019 	if (max_ncpus == 1) {
1020 		return;
1021 	}
1022 
1023 	hdlr.cyo_online = cheetah_nudge_onln;
1024 	hdlr.cyo_offline = NULL;
1025 	hdlr.cyo_arg = NULL;
1026 
1027 	mutex_enter(&cpu_lock);
1028 	(void) cyclic_add_omni(&hdlr);
1029 	mutex_exit(&cpu_lock);
1030 }
1031 
1032 /*
1033  * Cyclic handler to wake up buddy
1034  */
1035 void
1036 cheetah_nudge_buddy(void)
1037 {
1038 	/*
1039 	 * Disable kernel preemption to protect the cpu list
1040 	 */
1041 	kpreempt_disable();
1042 	if ((CPU->cpu_next_onln != CPU) && (sendmondo_in_recover == 0)) {
1043 		xt_one(CPU->cpu_next_onln->cpu_id, (xcfunc_t *)xt_sync_tl1,
1044 		    0, 0);
1045 	}
1046 	kpreempt_enable();
1047 }
1048 
1049 #endif	/* CHEETAHPLUS_ERRATUM_25 */
1050 
1051 #ifdef SEND_MONDO_STATS
1052 uint32_t x_one_stimes[64];
1053 uint32_t x_one_ltimes[16];
1054 uint32_t x_set_stimes[64];
1055 uint32_t x_set_ltimes[16];
1056 uint32_t x_set_cpus[NCPU];
1057 uint32_t x_nack_stimes[64];
1058 #endif
1059 
1060 /*
1061  * Note: A version of this function is used by the debugger via the KDI,
1062  * and must be kept in sync with this version.  Any changes made to this
1063  * function to support new chips or to accomodate errata must also be included
1064  * in the KDI-specific version.  See us3_kdi.c.
1065  */
1066 void
1067 send_one_mondo(int cpuid)
1068 {
1069 	int busy, nack;
1070 	uint64_t idsr, starttick, endtick, tick, lasttick;
1071 	uint64_t busymask;
1072 #ifdef	CHEETAHPLUS_ERRATUM_25
1073 	int recovered = 0;
1074 #endif
1075 
1076 	CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
1077 	starttick = lasttick = gettick();
1078 	shipit(cpuid, 0);
1079 	endtick = starttick + xc_tick_limit;
1080 	busy = nack = 0;
1081 #if defined(JALAPENO) || defined(SERRANO)
1082 	/*
1083 	 * Lower 2 bits of the agent ID determine which BUSY/NACK pair
1084 	 * will be used for dispatching interrupt. For now, assume
1085 	 * there are no more than IDSR_BN_SETS CPUs, hence no aliasing
1086 	 * issues with respect to BUSY/NACK pair usage.
1087 	 */
1088 	busymask  = IDSR_BUSY_BIT(cpuid);
1089 #else /* JALAPENO || SERRANO */
1090 	busymask = IDSR_BUSY;
1091 #endif /* JALAPENO || SERRANO */
1092 	for (;;) {
1093 		idsr = getidsr();
1094 		if (idsr == 0)
1095 			break;
1096 
1097 		tick = gettick();
1098 		/*
1099 		 * If there is a big jump between the current tick
1100 		 * count and lasttick, we have probably hit a break
1101 		 * point.  Adjust endtick accordingly to avoid panic.
1102 		 */
1103 		if (tick > (lasttick + xc_tick_jump_limit))
1104 			endtick += (tick - lasttick);
1105 		lasttick = tick;
1106 		if (tick > endtick) {
1107 			if (panic_quiesce)
1108 				return;
1109 #ifdef	CHEETAHPLUS_ERRATUM_25
1110 			if (cheetah_sendmondo_recover && recovered == 0) {
1111 				if (mondo_recover(cpuid, 0)) {
1112 					/*
1113 					 * We claimed the whole memory or
1114 					 * full scan is disabled.
1115 					 */
1116 					recovered++;
1117 				}
1118 				tick = gettick();
1119 				endtick = tick + xc_tick_limit;
1120 				lasttick = tick;
1121 				/*
1122 				 * Recheck idsr
1123 				 */
1124 				continue;
1125 			} else
1126 #endif	/* CHEETAHPLUS_ERRATUM_25 */
1127 			{
1128 				cmn_err(CE_PANIC, "send mondo timeout "
1129 				    "(target 0x%x) [%d NACK %d BUSY]",
1130 				    cpuid, nack, busy);
1131 			}
1132 		}
1133 
1134 		if (idsr & busymask) {
1135 			busy++;
1136 			continue;
1137 		}
1138 		drv_usecwait(1);
1139 		shipit(cpuid, 0);
1140 		nack++;
1141 		busy = 0;
1142 	}
1143 #ifdef SEND_MONDO_STATS
1144 	{
1145 		int n = gettick() - starttick;
1146 		if (n < 8192)
1147 			x_one_stimes[n >> 7]++;
1148 		else
1149 			x_one_ltimes[(n >> 13) & 0xf]++;
1150 	}
1151 #endif
1152 }
1153 
1154 void
1155 syncfpu(void)
1156 {
1157 }
1158 
1159 /*
1160  * Return processor specific async error structure
1161  * size used.
1162  */
1163 int
1164 cpu_aflt_size(void)
1165 {
1166 	return (sizeof (ch_async_flt_t));
1167 }
1168 
1169 /*
1170  * Tunable to disable the checking of other cpu logout areas during panic for
1171  * potential syndrome 71 generating errors.
1172  */
1173 int enable_check_other_cpus_logout = 1;
1174 
1175 /*
1176  * Check other cpus logout area for potential synd 71 generating
1177  * errors.
1178  */
1179 static void
1180 cpu_check_cpu_logout(int cpuid, caddr_t tpc, int tl, int ecc_type,
1181     ch_cpu_logout_t *clop)
1182 {
1183 	struct async_flt *aflt;
1184 	ch_async_flt_t ch_flt;
1185 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1186 
1187 	if (clop == NULL || clop->clo_data.chd_afar == LOGOUT_INVALID) {
1188 		return;
1189 	}
1190 
1191 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1192 
1193 	t_afar = clop->clo_data.chd_afar;
1194 	t_afsr = clop->clo_data.chd_afsr;
1195 	t_afsr_ext = clop->clo_data.chd_afsr_ext;
1196 #if defined(SERRANO)
1197 	ch_flt.afar2 = clop->clo_data.chd_afar2;
1198 #endif	/* SERRANO */
1199 
1200 	/*
1201 	 * In order to simplify code, we maintain this afsr_errs
1202 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1203 	 * sticky bits.
1204 	 */
1205 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1206 	    (t_afsr & C_AFSR_ALL_ERRS);
1207 
1208 	/* Setup the async fault structure */
1209 	aflt = (struct async_flt *)&ch_flt;
1210 	aflt->flt_id = gethrtime_waitfree();
1211 	ch_flt.afsr_ext = t_afsr_ext;
1212 	ch_flt.afsr_errs = t_afsr_errs;
1213 	aflt->flt_stat = t_afsr;
1214 	aflt->flt_addr = t_afar;
1215 	aflt->flt_bus_id = cpuid;
1216 	aflt->flt_inst = cpuid;
1217 	aflt->flt_pc = tpc;
1218 	aflt->flt_prot = AFLT_PROT_NONE;
1219 	aflt->flt_class = CPU_FAULT;
1220 	aflt->flt_priv = ((t_afsr & C_AFSR_PRIV) != 0);
1221 	aflt->flt_tl = tl;
1222 	aflt->flt_status = ecc_type;
1223 	aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
1224 
1225 	/*
1226 	 * Queue events on the async event queue, one event per error bit.
1227 	 * If no events are queued, queue an event to complain.
1228 	 */
1229 	if (cpu_queue_events(&ch_flt, NULL, t_afsr_errs, clop) == 0) {
1230 		ch_flt.flt_type = CPU_INV_AFSR;
1231 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
1232 		    (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
1233 		    aflt->flt_panic);
1234 	}
1235 
1236 	/*
1237 	 * Zero out + invalidate CPU logout.
1238 	 */
1239 	bzero(clop, sizeof (ch_cpu_logout_t));
1240 	clop->clo_data.chd_afar = LOGOUT_INVALID;
1241 }
1242 
1243 /*
1244  * Check the logout areas of all other cpus for unlogged errors.
1245  */
1246 static void
1247 cpu_check_other_cpus_logout(void)
1248 {
1249 	int i, j;
1250 	processorid_t myid;
1251 	struct cpu *cp;
1252 	ch_err_tl1_data_t *cl1p;
1253 
1254 	myid = CPU->cpu_id;
1255 	for (i = 0; i < NCPU; i++) {
1256 		cp = cpu[i];
1257 
1258 		if ((cp == NULL) || !(cp->cpu_flags & CPU_EXISTS) ||
1259 		    (cp->cpu_id == myid) || (CPU_PRIVATE(cp) == NULL)) {
1260 			continue;
1261 		}
1262 
1263 		/*
1264 		 * Check each of the tl>0 logout areas
1265 		 */
1266 		cl1p = CPU_PRIVATE_PTR(cp, chpr_tl1_err_data[0]);
1267 		for (j = 0; j < CH_ERR_TL1_TLMAX; j++, cl1p++) {
1268 			if (cl1p->ch_err_tl1_flags == 0)
1269 				continue;
1270 
1271 			cpu_check_cpu_logout(i, (caddr_t)cl1p->ch_err_tl1_tpc,
1272 			    1, ECC_F_TRAP, &cl1p->ch_err_tl1_logout);
1273 		}
1274 
1275 		/*
1276 		 * Check each of the remaining logout areas
1277 		 */
1278 		cpu_check_cpu_logout(i, NULL, 0, ECC_F_TRAP,
1279 		    CPU_PRIVATE_PTR(cp, chpr_fecctl0_logout));
1280 		cpu_check_cpu_logout(i, NULL, 0, ECC_C_TRAP,
1281 		    CPU_PRIVATE_PTR(cp, chpr_cecc_logout));
1282 		cpu_check_cpu_logout(i, NULL, 0, ECC_D_TRAP,
1283 		    CPU_PRIVATE_PTR(cp, chpr_async_logout));
1284 	}
1285 }
1286 
1287 /*
1288  * The fast_ecc_err handler transfers control here for UCU, UCC events.
1289  * Note that we flush Ecache twice, once in the fast_ecc_err handler to
1290  * flush the error that caused the UCU/UCC, then again here at the end to
1291  * flush the TL=1 trap handler code out of the Ecache, so we can minimize
1292  * the probability of getting a TL>1 Fast ECC trap when we're fielding
1293  * another Fast ECC trap.
1294  *
1295  * Cheetah+ also handles: TSCE: No additional processing required.
1296  * Panther adds L3_UCU and L3_UCC which are reported in AFSR_EXT.
1297  *
1298  * Note that the p_clo_flags input is only valid in cases where the
1299  * cpu_private struct is not yet initialized (since that is the only
1300  * time that information cannot be obtained from the logout struct.)
1301  */
1302 /*ARGSUSED*/
1303 void
1304 cpu_fast_ecc_error(struct regs *rp, ulong_t p_clo_flags)
1305 {
1306 	ch_cpu_logout_t *clop;
1307 	uint64_t ceen, nceen;
1308 
1309 	/*
1310 	 * Get the CPU log out info. If we can't find our CPU private
1311 	 * pointer, then we will have to make due without any detailed
1312 	 * logout information.
1313 	 */
1314 	if (CPU_PRIVATE(CPU) == NULL) {
1315 		clop = NULL;
1316 		ceen = p_clo_flags & EN_REG_CEEN;
1317 		nceen = p_clo_flags & EN_REG_NCEEN;
1318 	} else {
1319 		clop = CPU_PRIVATE_PTR(CPU, chpr_fecctl0_logout);
1320 		ceen = clop->clo_flags & EN_REG_CEEN;
1321 		nceen = clop->clo_flags & EN_REG_NCEEN;
1322 	}
1323 
1324 	cpu_log_fast_ecc_error((caddr_t)rp->r_pc,
1325 	    (rp->r_tstate & TSTATE_PRIV) ? 1 : 0, 0, ceen, nceen, clop);
1326 }
1327 
1328 /*
1329  * Log fast ecc error, called from either Fast ECC at TL=0 or Fast
1330  * ECC at TL>0.  Need to supply either a error register pointer or a
1331  * cpu logout structure pointer.
1332  */
1333 static void
1334 cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen,
1335     uint64_t nceen, ch_cpu_logout_t *clop)
1336 {
1337 	struct async_flt *aflt;
1338 	ch_async_flt_t ch_flt;
1339 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1340 	char pr_reason[MAX_REASON_STRING];
1341 	ch_cpu_errors_t cpu_error_regs;
1342 
1343 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1344 	/*
1345 	 * If no cpu logout data, then we will have to make due without
1346 	 * any detailed logout information.
1347 	 */
1348 	if (clop == NULL) {
1349 		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1350 		get_cpu_error_state(&cpu_error_regs);
1351 		set_cpu_error_state(&cpu_error_regs);
1352 		t_afar = cpu_error_regs.afar;
1353 		t_afsr = cpu_error_regs.afsr;
1354 		t_afsr_ext = cpu_error_regs.afsr_ext;
1355 #if defined(SERRANO)
1356 		ch_flt.afar2 = cpu_error_regs.afar2;
1357 #endif	/* SERRANO */
1358 	} else {
1359 		t_afar = clop->clo_data.chd_afar;
1360 		t_afsr = clop->clo_data.chd_afsr;
1361 		t_afsr_ext = clop->clo_data.chd_afsr_ext;
1362 #if defined(SERRANO)
1363 		ch_flt.afar2 = clop->clo_data.chd_afar2;
1364 #endif	/* SERRANO */
1365 	}
1366 
1367 	/*
1368 	 * In order to simplify code, we maintain this afsr_errs
1369 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1370 	 * sticky bits.
1371 	 */
1372 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1373 	    (t_afsr & C_AFSR_ALL_ERRS);
1374 	pr_reason[0] = '\0';
1375 
1376 	/* Setup the async fault structure */
1377 	aflt = (struct async_flt *)&ch_flt;
1378 	aflt->flt_id = gethrtime_waitfree();
1379 	ch_flt.afsr_ext = t_afsr_ext;
1380 	ch_flt.afsr_errs = t_afsr_errs;
1381 	aflt->flt_stat = t_afsr;
1382 	aflt->flt_addr = t_afar;
1383 	aflt->flt_bus_id = getprocessorid();
1384 	aflt->flt_inst = CPU->cpu_id;
1385 	aflt->flt_pc = tpc;
1386 	aflt->flt_prot = AFLT_PROT_NONE;
1387 	aflt->flt_class = CPU_FAULT;
1388 	aflt->flt_priv = priv;
1389 	aflt->flt_tl = tl;
1390 	aflt->flt_status = ECC_F_TRAP;
1391 	aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
1392 
1393 	/*
1394 	 * XXXX - Phenomenal hack to get around Solaris not getting all the
1395 	 * cmn_err messages out to the console.  The situation is a UCU (in
1396 	 * priv mode) which causes a WDU which causes a UE (on the retry).
1397 	 * The messages for the UCU and WDU are enqueued and then pulled off
1398 	 * the async queue via softint and syslogd starts to process them
1399 	 * but doesn't get them to the console.  The UE causes a panic, but
1400 	 * since the UCU/WDU messages are already in transit, those aren't
1401 	 * on the async queue.  The hack is to check if we have a matching
1402 	 * WDU event for the UCU, and if it matches, we're more than likely
1403 	 * going to panic with a UE, unless we're under protection.  So, we
1404 	 * check to see if we got a matching WDU event and if we're under
1405 	 * protection.
1406 	 *
1407 	 * For Cheetah/Cheetah+/Jaguar/Jalapeno, the sequence we care about
1408 	 * looks like this:
1409 	 *    UCU->WDU->UE
1410 	 * For Panther, it could look like either of these:
1411 	 *    UCU---->WDU->L3_WDU->UE
1412 	 *    L3_UCU->WDU->L3_WDU->UE
1413 	 */
1414 	if ((t_afsr_errs & (C_AFSR_UCU | C_AFSR_L3_UCU)) &&
1415 	    aflt->flt_panic == 0 && aflt->flt_priv != 0 &&
1416 	    curthread->t_ontrap == NULL && curthread->t_lofault == NULL) {
1417 		get_cpu_error_state(&cpu_error_regs);
1418 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
1419 			aflt->flt_panic |=
1420 			    ((cpu_error_regs.afsr & C_AFSR_WDU) &&
1421 			    (cpu_error_regs.afsr_ext & C_AFSR_L3_WDU) &&
1422 			    (cpu_error_regs.afar == t_afar));
1423 			aflt->flt_panic |= ((clop == NULL) &&
1424 			    (t_afsr_errs & C_AFSR_WDU) &&
1425 			    (t_afsr_errs & C_AFSR_L3_WDU));
1426 		} else {
1427 			aflt->flt_panic |=
1428 			    ((cpu_error_regs.afsr & C_AFSR_WDU) &&
1429 			    (cpu_error_regs.afar == t_afar));
1430 			aflt->flt_panic |= ((clop == NULL) &&
1431 			    (t_afsr_errs & C_AFSR_WDU));
1432 		}
1433 	}
1434 
1435 	/*
1436 	 * Queue events on the async event queue, one event per error bit.
1437 	 * If no events are queued or no Fast ECC events are on in the AFSR,
1438 	 * queue an event to complain.
1439 	 */
1440 	if (cpu_queue_events(&ch_flt, pr_reason, t_afsr_errs, clop) == 0 ||
1441 	    ((t_afsr_errs & (C_AFSR_FECC_ERRS | C_AFSR_EXT_FECC_ERRS)) == 0)) {
1442 		ch_flt.flt_type = CPU_INV_AFSR;
1443 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
1444 		    (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
1445 		    aflt->flt_panic);
1446 	}
1447 
1448 	/*
1449 	 * Zero out + invalidate CPU logout.
1450 	 */
1451 	if (clop) {
1452 		bzero(clop, sizeof (ch_cpu_logout_t));
1453 		clop->clo_data.chd_afar = LOGOUT_INVALID;
1454 	}
1455 
1456 	/*
1457 	 * We carefully re-enable NCEEN and CEEN and then check if any deferred
1458 	 * or disrupting errors have happened.  We do this because if a
1459 	 * deferred or disrupting error had occurred with NCEEN/CEEN off, the
1460 	 * trap will not be taken when NCEEN/CEEN is re-enabled.  Note that
1461 	 * CEEN works differently on Cheetah than on Spitfire.  Also, we enable
1462 	 * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a
1463 	 * deferred or disrupting error happening between checking the AFSR and
1464 	 * enabling NCEEN/CEEN.
1465 	 *
1466 	 * Note: CEEN and NCEEN are only reenabled if they were on when trap
1467 	 * taken.
1468 	 */
1469 	set_error_enable(get_error_enable() | (nceen | ceen));
1470 	if (clear_errors(&ch_flt)) {
1471 		aflt->flt_panic |= ((ch_flt.afsr_errs &
1472 		    (C_AFSR_EXT_ASYNC_ERRS | C_AFSR_ASYNC_ERRS)) != 0);
1473 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
1474 		    NULL);
1475 	}
1476 
1477 	/*
1478 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
1479 	 * be logged as part of the panic flow.
1480 	 */
1481 	if (aflt->flt_panic)
1482 		fm_panic("%sError(s)", pr_reason);
1483 
1484 	/*
1485 	 * Flushing the Ecache here gets the part of the trap handler that
1486 	 * is run at TL=1 out of the Ecache.
1487 	 */
1488 	cpu_flush_ecache();
1489 }
1490 
1491 /*
1492  * This is called via sys_trap from pil15_interrupt code if the
1493  * corresponding entry in ch_err_tl1_pending is set.  Checks the
1494  * various ch_err_tl1_data structures for valid entries based on the bit
1495  * settings in the ch_err_tl1_flags entry of the structure.
1496  */
1497 /*ARGSUSED*/
1498 void
1499 cpu_tl1_error(struct regs *rp, int panic)
1500 {
1501 	ch_err_tl1_data_t *cl1p, cl1;
1502 	int i, ncl1ps;
1503 	uint64_t me_flags;
1504 	uint64_t ceen, nceen;
1505 
1506 	if (ch_err_tl1_paddrs[CPU->cpu_id] == 0) {
1507 		cl1p = &ch_err_tl1_data;
1508 		ncl1ps = 1;
1509 	} else if (CPU_PRIVATE(CPU) != NULL) {
1510 		cl1p = CPU_PRIVATE_PTR(CPU, chpr_tl1_err_data[0]);
1511 		ncl1ps = CH_ERR_TL1_TLMAX;
1512 	} else {
1513 		ncl1ps = 0;
1514 	}
1515 
1516 	for (i = 0; i < ncl1ps; i++, cl1p++) {
1517 		if (cl1p->ch_err_tl1_flags == 0)
1518 			continue;
1519 
1520 		/*
1521 		 * Grab a copy of the logout data and invalidate
1522 		 * the logout area.
1523 		 */
1524 		cl1 = *cl1p;
1525 		bzero(cl1p, sizeof (ch_err_tl1_data_t));
1526 		cl1p->ch_err_tl1_logout.clo_data.chd_afar = LOGOUT_INVALID;
1527 		me_flags = CH_ERR_ME_FLAGS(cl1.ch_err_tl1_flags);
1528 
1529 		/*
1530 		 * Log "first error" in ch_err_tl1_data.
1531 		 */
1532 		if (cl1.ch_err_tl1_flags & CH_ERR_FECC) {
1533 			ceen = get_error_enable() & EN_REG_CEEN;
1534 			nceen = get_error_enable() & EN_REG_NCEEN;
1535 			cpu_log_fast_ecc_error((caddr_t)cl1.ch_err_tl1_tpc, 1,
1536 			    1, ceen, nceen, &cl1.ch_err_tl1_logout);
1537 		}
1538 #if defined(CPU_IMP_L1_CACHE_PARITY)
1539 		if (cl1.ch_err_tl1_flags & (CH_ERR_IPE | CH_ERR_DPE)) {
1540 			cpu_parity_error(rp, cl1.ch_err_tl1_flags,
1541 			    (caddr_t)cl1.ch_err_tl1_tpc);
1542 		}
1543 #endif	/* CPU_IMP_L1_CACHE_PARITY */
1544 
1545 		/*
1546 		 * Log "multiple events" in ch_err_tl1_data.  Note that
1547 		 * we don't read and clear the AFSR/AFAR in the TL>0 code
1548 		 * if the structure is busy, we just do the cache flushing
1549 		 * we have to do and then do the retry.  So the AFSR/AFAR
1550 		 * at this point *should* have some relevant info.  If there
1551 		 * are no valid errors in the AFSR, we'll assume they've
1552 		 * already been picked up and logged.  For I$/D$ parity,
1553 		 * we just log an event with an "Unknown" (NULL) TPC.
1554 		 */
1555 		if (me_flags & CH_ERR_FECC) {
1556 			ch_cpu_errors_t cpu_error_regs;
1557 			uint64_t t_afsr_errs;
1558 
1559 			/*
1560 			 * Get the error registers and see if there's
1561 			 * a pending error.  If not, don't bother
1562 			 * generating an "Invalid AFSR" error event.
1563 			 */
1564 			get_cpu_error_state(&cpu_error_regs);
1565 			t_afsr_errs = (cpu_error_regs.afsr_ext &
1566 			    C_AFSR_EXT_ALL_ERRS) |
1567 			    (cpu_error_regs.afsr & C_AFSR_ALL_ERRS);
1568 			if (t_afsr_errs != 0) {
1569 				ceen = get_error_enable() & EN_REG_CEEN;
1570 				nceen = get_error_enable() & EN_REG_NCEEN;
1571 				cpu_log_fast_ecc_error((caddr_t)NULL, 1,
1572 				    1, ceen, nceen, NULL);
1573 			}
1574 		}
1575 #if defined(CPU_IMP_L1_CACHE_PARITY)
1576 		if (me_flags & (CH_ERR_IPE | CH_ERR_DPE)) {
1577 			cpu_parity_error(rp, me_flags, (caddr_t)NULL);
1578 		}
1579 #endif	/* CPU_IMP_L1_CACHE_PARITY */
1580 	}
1581 }
1582 
1583 /*
1584  * Called from Fast ECC TL>0 handler in case of fatal error.
1585  * cpu_tl1_error should always find an associated ch_err_tl1_data structure,
1586  * but if we don't, we'll panic with something reasonable.
1587  */
1588 /*ARGSUSED*/
1589 void
1590 cpu_tl1_err_panic(struct regs *rp, ulong_t flags)
1591 {
1592 	cpu_tl1_error(rp, 1);
1593 	/*
1594 	 * Should never return, but just in case.
1595 	 */
1596 	fm_panic("Unsurvivable ECC Error at TL>0");
1597 }
1598 
1599 /*
1600  * The ce_err/ce_err_tl1 handlers transfer control here for CE, EMC, EDU:ST,
1601  * EDC, WDU, WDC, CPU, CPC, IVU, IVC events.
1602  * Disrupting errors controlled by NCEEN: EDU:ST, WDU, CPU, IVU
1603  * Disrupting errors controlled by CEEN: CE, EMC, EDC, WDC, CPC, IVC
1604  *
1605  * Cheetah+ also handles (No additional processing required):
1606  *    DUE, DTO, DBERR	(NCEEN controlled)
1607  *    THCE		(CEEN and ET_ECC_en controlled)
1608  *    TUE		(ET_ECC_en controlled)
1609  *
1610  * Panther further adds:
1611  *    IMU, L3_EDU, L3_WDU, L3_CPU		(NCEEN controlled)
1612  *    IMC, L3_EDC, L3_WDC, L3_CPC, L3_THCE	(CEEN controlled)
1613  *    TUE_SH, TUE		(NCEEN and L2_tag_ECC_en controlled)
1614  *    L3_TUE, L3_TUE_SH		(NCEEN and ET_ECC_en controlled)
1615  *    THCE			(CEEN and L2_tag_ECC_en controlled)
1616  *    L3_THCE			(CEEN and ET_ECC_en controlled)
1617  *
1618  * Note that the p_clo_flags input is only valid in cases where the
1619  * cpu_private struct is not yet initialized (since that is the only
1620  * time that information cannot be obtained from the logout struct.)
1621  */
1622 /*ARGSUSED*/
1623 void
1624 cpu_disrupting_error(struct regs *rp, ulong_t p_clo_flags)
1625 {
1626 	struct async_flt *aflt;
1627 	ch_async_flt_t ch_flt;
1628 	char pr_reason[MAX_REASON_STRING];
1629 	ch_cpu_logout_t *clop;
1630 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1631 	ch_cpu_errors_t cpu_error_regs;
1632 
1633 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1634 	/*
1635 	 * Get the CPU log out info. If we can't find our CPU private
1636 	 * pointer, then we will have to make due without any detailed
1637 	 * logout information.
1638 	 */
1639 	if (CPU_PRIVATE(CPU) == NULL) {
1640 		clop = NULL;
1641 		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1642 		get_cpu_error_state(&cpu_error_regs);
1643 		set_cpu_error_state(&cpu_error_regs);
1644 		t_afar = cpu_error_regs.afar;
1645 		t_afsr = cpu_error_regs.afsr;
1646 		t_afsr_ext = cpu_error_regs.afsr_ext;
1647 #if defined(SERRANO)
1648 		ch_flt.afar2 = cpu_error_regs.afar2;
1649 #endif	/* SERRANO */
1650 	} else {
1651 		clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
1652 		t_afar = clop->clo_data.chd_afar;
1653 		t_afsr = clop->clo_data.chd_afsr;
1654 		t_afsr_ext = clop->clo_data.chd_afsr_ext;
1655 #if defined(SERRANO)
1656 		ch_flt.afar2 = clop->clo_data.chd_afar2;
1657 #endif	/* SERRANO */
1658 	}
1659 
1660 	/*
1661 	 * In order to simplify code, we maintain this afsr_errs
1662 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1663 	 * sticky bits.
1664 	 */
1665 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1666 	    (t_afsr & C_AFSR_ALL_ERRS);
1667 
1668 	pr_reason[0] = '\0';
1669 	/* Setup the async fault structure */
1670 	aflt = (struct async_flt *)&ch_flt;
1671 	ch_flt.afsr_ext = t_afsr_ext;
1672 	ch_flt.afsr_errs = t_afsr_errs;
1673 	aflt->flt_stat = t_afsr;
1674 	aflt->flt_addr = t_afar;
1675 	aflt->flt_pc = (caddr_t)rp->r_pc;
1676 	aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ?  1 : 0;
1677 	aflt->flt_tl = 0;
1678 	aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
1679 
1680 	/*
1681 	 * If this trap is a result of one of the errors not masked
1682 	 * by cpu_ce_not_deferred, we don't reenable CEEN. Instead
1683 	 * indicate that a timeout is to be set later.
1684 	 */
1685 	if (!(t_afsr_errs & (cpu_ce_not_deferred | cpu_ce_not_deferred_ext)) &&
1686 	    !aflt->flt_panic)
1687 		ch_flt.flt_trapped_ce = CE_CEEN_DEFER | CE_CEEN_TRAPPED;
1688 	else
1689 		ch_flt.flt_trapped_ce = CE_CEEN_NODEFER | CE_CEEN_TRAPPED;
1690 
1691 	/*
1692 	 * log the CE and clean up
1693 	 */
1694 	cpu_log_and_clear_ce(&ch_flt);
1695 
1696 	/*
1697 	 * We re-enable CEEN (if required) and check if any disrupting errors
1698 	 * have happened.  We do this because if a disrupting error had occurred
1699 	 * with CEEN off, the trap will not be taken when CEEN is re-enabled.
1700 	 * Note that CEEN works differently on Cheetah than on Spitfire.  Also,
1701 	 * we enable CEEN *before* checking the AFSR to avoid the small window
1702 	 * of a error happening between checking the AFSR and enabling CEEN.
1703 	 */
1704 	if (ch_flt.flt_trapped_ce & CE_CEEN_NODEFER)
1705 		set_error_enable(get_error_enable() | EN_REG_CEEN);
1706 	if (clear_errors(&ch_flt)) {
1707 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
1708 		    NULL);
1709 	}
1710 
1711 	/*
1712 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
1713 	 * be logged as part of the panic flow.
1714 	 */
1715 	if (aflt->flt_panic)
1716 		fm_panic("%sError(s)", pr_reason);
1717 }
1718 
1719 /*
1720  * The async_err handler transfers control here for UE, EMU, EDU:BLD,
1721  * L3_EDU:BLD, TO, and BERR events.
1722  * Deferred errors controlled by NCEEN: UE, EMU, EDU:BLD, L3_EDU:BLD, TO, BERR
1723  *
1724  * Cheetah+: No additional errors handled.
1725  *
1726  * Note that the p_clo_flags input is only valid in cases where the
1727  * cpu_private struct is not yet initialized (since that is the only
1728  * time that information cannot be obtained from the logout struct.)
1729  */
1730 /*ARGSUSED*/
1731 void
1732 cpu_deferred_error(struct regs *rp, ulong_t p_clo_flags)
1733 {
1734 	ushort_t ttype, tl;
1735 	ch_async_flt_t ch_flt;
1736 	struct async_flt *aflt;
1737 	int trampolined = 0;
1738 	char pr_reason[MAX_REASON_STRING];
1739 	ch_cpu_logout_t *clop;
1740 	uint64_t ceen, clo_flags;
1741 	uint64_t log_afsr;
1742 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1743 	ch_cpu_errors_t cpu_error_regs;
1744 	int expected = DDI_FM_ERR_UNEXPECTED;
1745 	ddi_acc_hdl_t *hp;
1746 
1747 	/*
1748 	 * We need to look at p_flag to determine if the thread detected an
1749 	 * error while dumping core.  We can't grab p_lock here, but it's ok
1750 	 * because we just need a consistent snapshot and we know that everyone
1751 	 * else will store a consistent set of bits while holding p_lock.  We
1752 	 * don't have to worry about a race because SDOCORE is set once prior
1753 	 * to doing i/o from the process's address space and is never cleared.
1754 	 */
1755 	uint_t pflag = ttoproc(curthread)->p_flag;
1756 
1757 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1758 	/*
1759 	 * Get the CPU log out info. If we can't find our CPU private
1760 	 * pointer then we will have to make due without any detailed
1761 	 * logout information.
1762 	 */
1763 	if (CPU_PRIVATE(CPU) == NULL) {
1764 		clop = NULL;
1765 		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1766 		get_cpu_error_state(&cpu_error_regs);
1767 		set_cpu_error_state(&cpu_error_regs);
1768 		t_afar = cpu_error_regs.afar;
1769 		t_afsr = cpu_error_regs.afsr;
1770 		t_afsr_ext = cpu_error_regs.afsr_ext;
1771 #if defined(SERRANO)
1772 		ch_flt.afar2 = cpu_error_regs.afar2;
1773 #endif	/* SERRANO */
1774 		clo_flags = p_clo_flags;
1775 	} else {
1776 		clop = CPU_PRIVATE_PTR(CPU, chpr_async_logout);
1777 		t_afar = clop->clo_data.chd_afar;
1778 		t_afsr = clop->clo_data.chd_afsr;
1779 		t_afsr_ext = clop->clo_data.chd_afsr_ext;
1780 #if defined(SERRANO)
1781 		ch_flt.afar2 = clop->clo_data.chd_afar2;
1782 #endif	/* SERRANO */
1783 		clo_flags = clop->clo_flags;
1784 	}
1785 
1786 	/*
1787 	 * In order to simplify code, we maintain this afsr_errs
1788 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1789 	 * sticky bits.
1790 	 */
1791 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1792 	    (t_afsr & C_AFSR_ALL_ERRS);
1793 	pr_reason[0] = '\0';
1794 
1795 	/*
1796 	 * Grab information encoded into our clo_flags field.
1797 	 */
1798 	ceen = clo_flags & EN_REG_CEEN;
1799 	tl = (clo_flags & CLO_FLAGS_TL_MASK) >> CLO_FLAGS_TL_SHIFT;
1800 	ttype = (clo_flags & CLO_FLAGS_TT_MASK) >> CLO_FLAGS_TT_SHIFT;
1801 
1802 	/*
1803 	 * handle the specific error
1804 	 */
1805 	aflt = (struct async_flt *)&ch_flt;
1806 	aflt->flt_id = gethrtime_waitfree();
1807 	aflt->flt_bus_id = getprocessorid();
1808 	aflt->flt_inst = CPU->cpu_id;
1809 	ch_flt.afsr_ext = t_afsr_ext;
1810 	ch_flt.afsr_errs = t_afsr_errs;
1811 	aflt->flt_stat = t_afsr;
1812 	aflt->flt_addr = t_afar;
1813 	aflt->flt_pc = (caddr_t)rp->r_pc;
1814 	aflt->flt_prot = AFLT_PROT_NONE;
1815 	aflt->flt_class = CPU_FAULT;
1816 	aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ?  1 : 0;
1817 	aflt->flt_tl = (uchar_t)tl;
1818 	aflt->flt_panic = ((tl != 0) || (aft_testfatal != 0) ||
1819 	    C_AFSR_PANIC(t_afsr_errs));
1820 	aflt->flt_core = (pflag & SDOCORE) ? 1 : 0;
1821 	aflt->flt_status = ((ttype == T_DATA_ERROR) ? ECC_D_TRAP : ECC_I_TRAP);
1822 
1823 	/*
1824 	 * If the trap occurred in privileged mode at TL=0, we need to check to
1825 	 * see if we were executing in the kernel under on_trap() or t_lofault
1826 	 * protection.  If so, modify the saved registers so that we return
1827 	 * from the trap to the appropriate trampoline routine.
1828 	 */
1829 	if (aflt->flt_priv && tl == 0) {
1830 		if (curthread->t_ontrap != NULL) {
1831 			on_trap_data_t *otp = curthread->t_ontrap;
1832 
1833 			if (otp->ot_prot & OT_DATA_EC) {
1834 				aflt->flt_prot = AFLT_PROT_EC;
1835 				otp->ot_trap |= OT_DATA_EC;
1836 				rp->r_pc = otp->ot_trampoline;
1837 				rp->r_npc = rp->r_pc + 4;
1838 				trampolined = 1;
1839 			}
1840 
1841 			if ((t_afsr & (C_AFSR_TO | C_AFSR_BERR)) &&
1842 			    (otp->ot_prot & OT_DATA_ACCESS)) {
1843 				aflt->flt_prot = AFLT_PROT_ACCESS;
1844 				otp->ot_trap |= OT_DATA_ACCESS;
1845 				rp->r_pc = otp->ot_trampoline;
1846 				rp->r_npc = rp->r_pc + 4;
1847 				trampolined = 1;
1848 				/*
1849 				 * for peeks and caut_gets errors are expected
1850 				 */
1851 				hp = (ddi_acc_hdl_t *)otp->ot_handle;
1852 				if (!hp)
1853 					expected = DDI_FM_ERR_PEEK;
1854 				else if (hp->ah_acc.devacc_attr_access ==
1855 				    DDI_CAUTIOUS_ACC)
1856 					expected = DDI_FM_ERR_EXPECTED;
1857 			}
1858 
1859 		} else if (curthread->t_lofault) {
1860 			aflt->flt_prot = AFLT_PROT_COPY;
1861 			rp->r_g1 = EFAULT;
1862 			rp->r_pc = curthread->t_lofault;
1863 			rp->r_npc = rp->r_pc + 4;
1864 			trampolined = 1;
1865 		}
1866 	}
1867 
1868 	/*
1869 	 * If we're in user mode or we're doing a protected copy, we either
1870 	 * want the ASTON code below to send a signal to the user process
1871 	 * or we want to panic if aft_panic is set.
1872 	 *
1873 	 * If we're in privileged mode and we're not doing a copy, then we
1874 	 * need to check if we've trampolined.  If we haven't trampolined,
1875 	 * we should panic.
1876 	 */
1877 	if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
1878 		if (t_afsr_errs &
1879 		    ((C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS) &
1880 		    ~(C_AFSR_BERR | C_AFSR_TO)))
1881 			aflt->flt_panic |= aft_panic;
1882 	} else if (!trampolined) {
1883 			aflt->flt_panic = 1;
1884 	}
1885 
1886 	/*
1887 	 * If we've trampolined due to a privileged TO or BERR, or if an
1888 	 * unprivileged TO or BERR occurred, we don't want to enqueue an
1889 	 * event for that TO or BERR.  Queue all other events (if any) besides
1890 	 * the TO/BERR.  Since we may not be enqueing any events, we need to
1891 	 * ignore the number of events queued.  If we haven't trampolined due
1892 	 * to a TO or BERR, just enqueue events normally.
1893 	 */
1894 	log_afsr = t_afsr_errs;
1895 	if (trampolined) {
1896 		log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR);
1897 	} else if (!aflt->flt_priv) {
1898 		/*
1899 		 * User mode, suppress messages if
1900 		 * cpu_berr_to_verbose is not set.
1901 		 */
1902 		if (!cpu_berr_to_verbose)
1903 			log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR);
1904 	}
1905 
1906 	/*
1907 	 * Log any errors that occurred
1908 	 */
1909 	if (((log_afsr &
1910 	    ((C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS) & ~C_AFSR_ME)) &&
1911 	    cpu_queue_events(&ch_flt, pr_reason, log_afsr, clop) == 0) ||
1912 	    (t_afsr_errs & (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) == 0) {
1913 		ch_flt.flt_type = CPU_INV_AFSR;
1914 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
1915 		    (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
1916 		    aflt->flt_panic);
1917 	}
1918 
1919 	/*
1920 	 * Zero out + invalidate CPU logout.
1921 	 */
1922 	if (clop) {
1923 		bzero(clop, sizeof (ch_cpu_logout_t));
1924 		clop->clo_data.chd_afar = LOGOUT_INVALID;
1925 	}
1926 
1927 #if defined(JALAPENO) || defined(SERRANO)
1928 	/*
1929 	 * UE/RUE/BERR/TO: Call our bus nexus friends to check for
1930 	 * IO errors that may have resulted in this trap.
1931 	 */
1932 	if (t_afsr & (C_AFSR_UE|C_AFSR_RUE|C_AFSR_TO|C_AFSR_BERR)) {
1933 		cpu_run_bus_error_handlers(aflt, expected);
1934 	}
1935 
1936 	/*
1937 	 * UE/RUE: If UE or RUE is in memory, we need to flush the bad
1938 	 * line from the Ecache.  We also need to query the bus nexus for
1939 	 * fatal errors.  Attempts to do diagnostic read on caches may
1940 	 * introduce more errors (especially when the module is bad).
1941 	 */
1942 	if (t_afsr & (C_AFSR_UE|C_AFSR_RUE)) {
1943 		/*
1944 		 * Ask our bus nexus friends if they have any fatal errors.  If
1945 		 * so, they will log appropriate error messages.
1946 		 */
1947 		if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL)
1948 			aflt->flt_panic = 1;
1949 
1950 		/*
1951 		 * We got a UE or RUE and are panicking, save the fault PA in
1952 		 * a known location so that the platform specific panic code
1953 		 * can check for copyback errors.
1954 		 */
1955 		if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) {
1956 			panic_aflt = *aflt;
1957 		}
1958 	}
1959 
1960 	/*
1961 	 * Flush Ecache line or entire Ecache
1962 	 */
1963 	if (t_afsr & (C_AFSR_UE | C_AFSR_RUE | C_AFSR_EDU | C_AFSR_BERR))
1964 		cpu_error_ecache_flush(&ch_flt);
1965 #else /* JALAPENO || SERRANO */
1966 	/*
1967 	 * UE/BERR/TO: Call our bus nexus friends to check for
1968 	 * IO errors that may have resulted in this trap.
1969 	 */
1970 	if (t_afsr & (C_AFSR_UE|C_AFSR_TO|C_AFSR_BERR)) {
1971 		cpu_run_bus_error_handlers(aflt, expected);
1972 	}
1973 
1974 	/*
1975 	 * UE: If the UE is in memory, we need to flush the bad
1976 	 * line from the Ecache.  We also need to query the bus nexus for
1977 	 * fatal errors.  Attempts to do diagnostic read on caches may
1978 	 * introduce more errors (especially when the module is bad).
1979 	 */
1980 	if (t_afsr & C_AFSR_UE) {
1981 		/*
1982 		 * Ask our legacy bus nexus friends if they have any fatal
1983 		 * errors.  If so, they will log appropriate error messages.
1984 		 */
1985 		if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL)
1986 			aflt->flt_panic = 1;
1987 
1988 		/*
1989 		 * We got a UE and are panicking, save the fault PA in a known
1990 		 * location so that the platform specific panic code can check
1991 		 * for copyback errors.
1992 		 */
1993 		if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) {
1994 			panic_aflt = *aflt;
1995 		}
1996 	}
1997 
1998 	/*
1999 	 * Flush Ecache line or entire Ecache
2000 	 */
2001 	if (t_afsr_errs &
2002 	    (C_AFSR_UE | C_AFSR_EDU | C_AFSR_BERR | C_AFSR_L3_EDU))
2003 		cpu_error_ecache_flush(&ch_flt);
2004 #endif /* JALAPENO || SERRANO */
2005 
2006 	/*
2007 	 * We carefully re-enable NCEEN and CEEN and then check if any deferred
2008 	 * or disrupting errors have happened.  We do this because if a
2009 	 * deferred or disrupting error had occurred with NCEEN/CEEN off, the
2010 	 * trap will not be taken when NCEEN/CEEN is re-enabled.  Note that
2011 	 * CEEN works differently on Cheetah than on Spitfire.  Also, we enable
2012 	 * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a
2013 	 * deferred or disrupting error happening between checking the AFSR and
2014 	 * enabling NCEEN/CEEN.
2015 	 *
2016 	 * Note: CEEN reenabled only if it was on when trap taken.
2017 	 */
2018 	set_error_enable(get_error_enable() | (EN_REG_NCEEN | ceen));
2019 	if (clear_errors(&ch_flt)) {
2020 		/*
2021 		 * Check for secondary errors, and avoid panicking if we
2022 		 * have them
2023 		 */
2024 		if (cpu_check_secondary_errors(&ch_flt, t_afsr_errs,
2025 		    t_afar) == 0) {
2026 			aflt->flt_panic |= ((ch_flt.afsr_errs &
2027 			    (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) != 0);
2028 		}
2029 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
2030 		    NULL);
2031 	}
2032 
2033 	/*
2034 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
2035 	 * be logged as part of the panic flow.
2036 	 */
2037 	if (aflt->flt_panic)
2038 		fm_panic("%sError(s)", pr_reason);
2039 
2040 	/*
2041 	 * If we queued an error and we are going to return from the trap and
2042 	 * the error was in user mode or inside of a copy routine, set AST flag
2043 	 * so the queue will be drained before returning to user mode.  The
2044 	 * AST processing will also act on our failure policy.
2045 	 */
2046 	if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
2047 		int pcb_flag = 0;
2048 
2049 		if (t_afsr_errs &
2050 		    (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS &
2051 		    ~(C_AFSR_BERR | C_AFSR_TO)))
2052 			pcb_flag |= ASYNC_HWERR;
2053 
2054 		if (t_afsr & C_AFSR_BERR)
2055 			pcb_flag |= ASYNC_BERR;
2056 
2057 		if (t_afsr & C_AFSR_TO)
2058 			pcb_flag |= ASYNC_BTO;
2059 
2060 		ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag;
2061 		aston(curthread);
2062 	}
2063 }
2064 
2065 #if defined(CPU_IMP_L1_CACHE_PARITY)
2066 /*
2067  * Handling of data and instruction parity errors (traps 0x71, 0x72).
2068  *
2069  * For Panther, P$ data parity errors during floating point load hits
2070  * are also detected (reported as TT 0x71) and handled by this trap
2071  * handler.
2072  *
2073  * AFSR/AFAR are not set for parity errors, only TPC (a virtual address)
2074  * is available.
2075  */
2076 /*ARGSUSED*/
2077 void
2078 cpu_parity_error(struct regs *rp, uint_t flags, caddr_t tpc)
2079 {
2080 	ch_async_flt_t ch_flt;
2081 	struct async_flt *aflt;
2082 	uchar_t tl = ((flags & CH_ERR_TL) != 0);
2083 	uchar_t iparity = ((flags & CH_ERR_IPE) != 0);
2084 	uchar_t panic = ((flags & CH_ERR_PANIC) != 0);
2085 	char *error_class;
2086 
2087 	/*
2088 	 * Log the error.
2089 	 * For icache parity errors the fault address is the trap PC.
2090 	 * For dcache/pcache parity errors the instruction would have to
2091 	 * be decoded to determine the address and that isn't possible
2092 	 * at high PIL.
2093 	 */
2094 	bzero(&ch_flt, sizeof (ch_async_flt_t));
2095 	aflt = (struct async_flt *)&ch_flt;
2096 	aflt->flt_id = gethrtime_waitfree();
2097 	aflt->flt_bus_id = getprocessorid();
2098 	aflt->flt_inst = CPU->cpu_id;
2099 	aflt->flt_pc = tpc;
2100 	aflt->flt_addr = iparity ? (uint64_t)tpc : AFLT_INV_ADDR;
2101 	aflt->flt_prot = AFLT_PROT_NONE;
2102 	aflt->flt_class = CPU_FAULT;
2103 	aflt->flt_priv = (tl || (rp->r_tstate & TSTATE_PRIV)) ?  1 : 0;
2104 	aflt->flt_tl = tl;
2105 	aflt->flt_panic = panic;
2106 	aflt->flt_status = iparity ? ECC_IP_TRAP : ECC_DP_TRAP;
2107 	ch_flt.flt_type = iparity ? CPU_IC_PARITY : CPU_DC_PARITY;
2108 
2109 	if (iparity) {
2110 		cpu_icache_parity_info(&ch_flt);
2111 		if (ch_flt.parity_data.ipe.cpl_off != -1)
2112 			error_class = FM_EREPORT_CPU_USIII_IDSPE;
2113 		else if (ch_flt.parity_data.ipe.cpl_way != -1)
2114 			error_class = FM_EREPORT_CPU_USIII_ITSPE;
2115 		else
2116 			error_class = FM_EREPORT_CPU_USIII_IPE;
2117 		aflt->flt_payload = FM_EREPORT_PAYLOAD_ICACHE_PE;
2118 	} else {
2119 		cpu_dcache_parity_info(&ch_flt);
2120 		if (ch_flt.parity_data.dpe.cpl_off != -1)
2121 			error_class = FM_EREPORT_CPU_USIII_DDSPE;
2122 		else if (ch_flt.parity_data.dpe.cpl_way != -1)
2123 			error_class = FM_EREPORT_CPU_USIII_DTSPE;
2124 		else
2125 			error_class = FM_EREPORT_CPU_USIII_DPE;
2126 		aflt->flt_payload = FM_EREPORT_PAYLOAD_DCACHE_PE;
2127 		/*
2128 		 * For panther we also need to check the P$ for parity errors.
2129 		 */
2130 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2131 			cpu_pcache_parity_info(&ch_flt);
2132 			if (ch_flt.parity_data.dpe.cpl_cache == CPU_PC_PARITY) {
2133 				error_class = FM_EREPORT_CPU_USIII_PDSPE;
2134 				aflt->flt_payload =
2135 				    FM_EREPORT_PAYLOAD_PCACHE_PE;
2136 			}
2137 		}
2138 	}
2139 
2140 	cpu_errorq_dispatch(error_class, (void *)&ch_flt,
2141 	    sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic);
2142 
2143 	if (iparity) {
2144 		/*
2145 		 * Invalidate entire I$.
2146 		 * This is required due to the use of diagnostic ASI
2147 		 * accesses that may result in a loss of I$ coherency.
2148 		 */
2149 		if (cache_boot_state & DCU_IC) {
2150 			flush_icache();
2151 		}
2152 		/*
2153 		 * According to section P.3.1 of the Panther PRM, we
2154 		 * need to do a little more for recovery on those
2155 		 * CPUs after encountering an I$ parity error.
2156 		 */
2157 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2158 			flush_ipb();
2159 			correct_dcache_parity(dcache_size,
2160 			    dcache_linesize);
2161 			flush_pcache();
2162 		}
2163 	} else {
2164 		/*
2165 		 * Since the valid bit is ignored when checking parity the
2166 		 * D$ data and tag must also be corrected.  Set D$ data bits
2167 		 * to zero and set utag to 0, 1, 2, 3.
2168 		 */
2169 		correct_dcache_parity(dcache_size, dcache_linesize);
2170 
2171 		/*
2172 		 * According to section P.3.3 of the Panther PRM, we
2173 		 * need to do a little more for recovery on those
2174 		 * CPUs after encountering a D$ or P$ parity error.
2175 		 *
2176 		 * As far as clearing P$ parity errors, it is enough to
2177 		 * simply invalidate all entries in the P$ since P$ parity
2178 		 * error traps are only generated for floating point load
2179 		 * hits.
2180 		 */
2181 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2182 			flush_icache();
2183 			flush_ipb();
2184 			flush_pcache();
2185 		}
2186 	}
2187 
2188 	/*
2189 	 * Invalidate entire D$ if it was enabled.
2190 	 * This is done to avoid stale data in the D$ which might
2191 	 * occur with the D$ disabled and the trap handler doing
2192 	 * stores affecting lines already in the D$.
2193 	 */
2194 	if (cache_boot_state & DCU_DC) {
2195 		flush_dcache();
2196 	}
2197 
2198 	/*
2199 	 * Restore caches to their bootup state.
2200 	 */
2201 	set_dcu(get_dcu() | cache_boot_state);
2202 
2203 	/*
2204 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
2205 	 * be logged as part of the panic flow.
2206 	 */
2207 	if (aflt->flt_panic)
2208 		fm_panic("%sError(s)", iparity ? "IPE " : "DPE ");
2209 
2210 	/*
2211 	 * If this error occurred at TL>0 then flush the E$ here to reduce
2212 	 * the chance of getting an unrecoverable Fast ECC error.  This
2213 	 * flush will evict the part of the parity trap handler that is run
2214 	 * at TL>1.
2215 	 */
2216 	if (tl) {
2217 		cpu_flush_ecache();
2218 	}
2219 }
2220 
2221 /*
2222  * On an I$ parity error, mark the appropriate entries in the ch_async_flt_t
2223  * to indicate which portions of the captured data should be in the ereport.
2224  */
2225 void
2226 cpu_async_log_ic_parity_err(ch_async_flt_t *ch_flt)
2227 {
2228 	int way = ch_flt->parity_data.ipe.cpl_way;
2229 	int offset = ch_flt->parity_data.ipe.cpl_off;
2230 	int tag_index;
2231 	struct async_flt *aflt = (struct async_flt *)ch_flt;
2232 
2233 
2234 	if ((offset != -1) || (way != -1)) {
2235 		/*
2236 		 * Parity error in I$ tag or data
2237 		 */
2238 		tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx;
2239 		if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
2240 			ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2241 			    PN_ICIDX_TO_WAY(tag_index);
2242 		else
2243 			ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2244 			    CH_ICIDX_TO_WAY(tag_index);
2245 		ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag =
2246 		    IC_LOGFLAG_MAGIC;
2247 	} else {
2248 		/*
2249 		 * Parity error was not identified.
2250 		 * Log tags and data for all ways.
2251 		 */
2252 		for (way = 0; way < CH_ICACHE_NWAY; way++) {
2253 			tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx;
2254 			if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
2255 				ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2256 				    PN_ICIDX_TO_WAY(tag_index);
2257 			else
2258 				ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2259 				    CH_ICIDX_TO_WAY(tag_index);
2260 			ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag =
2261 			    IC_LOGFLAG_MAGIC;
2262 		}
2263 	}
2264 }
2265 
2266 /*
2267  * On an D$ parity error, mark the appropriate entries in the ch_async_flt_t
2268  * to indicate which portions of the captured data should be in the ereport.
2269  */
2270 void
2271 cpu_async_log_dc_parity_err(ch_async_flt_t *ch_flt)
2272 {
2273 	int way = ch_flt->parity_data.dpe.cpl_way;
2274 	int offset = ch_flt->parity_data.dpe.cpl_off;
2275 	int tag_index;
2276 
2277 	if (offset != -1) {
2278 		/*
2279 		 * Parity error in D$ or P$ data array.
2280 		 *
2281 		 * First check to see whether the parity error is in D$ or P$
2282 		 * since P$ data parity errors are reported in Panther using
2283 		 * the same trap.
2284 		 */
2285 		if (ch_flt->parity_data.dpe.cpl_cache == CPU_PC_PARITY) {
2286 			tag_index = ch_flt->parity_data.dpe.cpl_pc[way].pc_idx;
2287 			ch_flt->parity_data.dpe.cpl_pc[way].pc_way =
2288 			    CH_PCIDX_TO_WAY(tag_index);
2289 			ch_flt->parity_data.dpe.cpl_pc[way].pc_logflag =
2290 			    PC_LOGFLAG_MAGIC;
2291 		} else {
2292 			tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx;
2293 			ch_flt->parity_data.dpe.cpl_dc[way].dc_way =
2294 			    CH_DCIDX_TO_WAY(tag_index);
2295 			ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag =
2296 			    DC_LOGFLAG_MAGIC;
2297 		}
2298 	} else if (way != -1) {
2299 		/*
2300 		 * Parity error in D$ tag.
2301 		 */
2302 		tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx;
2303 		ch_flt->parity_data.dpe.cpl_dc[way].dc_way =
2304 		    CH_DCIDX_TO_WAY(tag_index);
2305 		ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag =
2306 		    DC_LOGFLAG_MAGIC;
2307 	}
2308 }
2309 #endif	/* CPU_IMP_L1_CACHE_PARITY */
2310 
2311 /*
2312  * The cpu_async_log_err() function is called via the [uc]e_drain() function to
2313  * post-process CPU events that are dequeued.  As such, it can be invoked
2314  * from softint context, from AST processing in the trap() flow, or from the
2315  * panic flow.  We decode the CPU-specific data, and take appropriate actions.
2316  * Historically this entry point was used to log the actual cmn_err(9F) text;
2317  * now with FMA it is used to prepare 'flt' to be converted into an ereport.
2318  * With FMA this function now also returns a flag which indicates to the
2319  * caller whether the ereport should be posted (1) or suppressed (0).
2320  */
2321 static int
2322 cpu_async_log_err(void *flt, errorq_elem_t *eqep)
2323 {
2324 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)flt;
2325 	struct async_flt *aflt = (struct async_flt *)flt;
2326 	uint64_t errors;
2327 	extern void memscrub_induced_error(void);
2328 
2329 	switch (ch_flt->flt_type) {
2330 	case CPU_INV_AFSR:
2331 		/*
2332 		 * If it is a disrupting trap and the AFSR is zero, then
2333 		 * the event has probably already been noted. Do not post
2334 		 * an ereport.
2335 		 */
2336 		if ((aflt->flt_status & ECC_C_TRAP) &&
2337 		    (!(aflt->flt_stat & C_AFSR_MASK)))
2338 			return (0);
2339 		else
2340 			return (1);
2341 	case CPU_TO:
2342 	case CPU_BERR:
2343 	case CPU_FATAL:
2344 	case CPU_FPUERR:
2345 		return (1);
2346 
2347 	case CPU_UE_ECACHE_RETIRE:
2348 		cpu_log_err(aflt);
2349 		cpu_page_retire(ch_flt);
2350 		return (1);
2351 
2352 	/*
2353 	 * Cases where we may want to suppress logging or perform
2354 	 * extended diagnostics.
2355 	 */
2356 	case CPU_CE:
2357 	case CPU_EMC:
2358 		/*
2359 		 * We want to skip logging and further classification
2360 		 * only if ALL the following conditions are true:
2361 		 *
2362 		 *	1. There is only one error
2363 		 *	2. That error is a correctable memory error
2364 		 *	3. The error is caused by the memory scrubber (in
2365 		 *	   which case the error will have occurred under
2366 		 *	   on_trap protection)
2367 		 *	4. The error is on a retired page
2368 		 *
2369 		 * Note: AFLT_PROT_EC is used places other than the memory
2370 		 * scrubber.  However, none of those errors should occur
2371 		 * on a retired page.
2372 		 */
2373 		if ((ch_flt->afsr_errs &
2374 		    (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_CE &&
2375 		    aflt->flt_prot == AFLT_PROT_EC) {
2376 
2377 			if (page_retire_check(aflt->flt_addr, NULL) == 0) {
2378 				if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) {
2379 
2380 				/*
2381 				 * Since we're skipping logging, we'll need
2382 				 * to schedule the re-enabling of CEEN
2383 				 */
2384 				(void) timeout(cpu_delayed_check_ce_errors,
2385 				    (void *)(uintptr_t)aflt->flt_inst,
2386 				    drv_usectohz((clock_t)cpu_ceen_delay_secs
2387 				    * MICROSEC));
2388 				}
2389 
2390 				/*
2391 				 * Inform memscrubber - scrubbing induced
2392 				 * CE on a retired page.
2393 				 */
2394 				memscrub_induced_error();
2395 				return (0);
2396 			}
2397 		}
2398 
2399 		/*
2400 		 * Perform/schedule further classification actions, but
2401 		 * only if the page is healthy (we don't want bad
2402 		 * pages inducing too much diagnostic activity).  If we could
2403 		 * not find a page pointer then we also skip this.  If
2404 		 * ce_scrub_xdiag_recirc returns nonzero then it has chosen
2405 		 * to copy and recirculate the event (for further diagnostics)
2406 		 * and we should not proceed to log it here.
2407 		 *
2408 		 * This must be the last step here before the cpu_log_err()
2409 		 * below - if an event recirculates cpu_ce_log_err() will
2410 		 * not call the current function but just proceed directly
2411 		 * to cpu_ereport_post after the cpu_log_err() avoided below.
2412 		 *
2413 		 * Note: Check cpu_impl_async_log_err if changing this
2414 		 */
2415 		if (page_retire_check(aflt->flt_addr, &errors) == EINVAL) {
2416 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
2417 			    CE_XDIAG_SKIP_NOPP);
2418 		} else {
2419 			if (errors != PR_OK) {
2420 				CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
2421 				    CE_XDIAG_SKIP_PAGEDET);
2422 			} else if (ce_scrub_xdiag_recirc(aflt, ce_queue, eqep,
2423 			    offsetof(ch_async_flt_t, cmn_asyncflt))) {
2424 				return (0);
2425 			}
2426 		}
2427 		/*FALLTHRU*/
2428 
2429 	/*
2430 	 * Cases where we just want to report the error and continue.
2431 	 */
2432 	case CPU_CE_ECACHE:
2433 	case CPU_UE_ECACHE:
2434 	case CPU_IV:
2435 	case CPU_ORPH:
2436 		cpu_log_err(aflt);
2437 		return (1);
2438 
2439 	/*
2440 	 * Cases where we want to fall through to handle panicking.
2441 	 */
2442 	case CPU_UE:
2443 		/*
2444 		 * We want to skip logging in the same conditions as the
2445 		 * CE case.  In addition, we want to make sure we're not
2446 		 * panicking.
2447 		 */
2448 		if (!panicstr && (ch_flt->afsr_errs &
2449 		    (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_UE &&
2450 		    aflt->flt_prot == AFLT_PROT_EC) {
2451 			if (page_retire_check(aflt->flt_addr, NULL) == 0) {
2452 				/* Zero the address to clear the error */
2453 				softcall(ecc_page_zero, (void *)aflt->flt_addr);
2454 				/*
2455 				 * Inform memscrubber - scrubbing induced
2456 				 * UE on a retired page.
2457 				 */
2458 				memscrub_induced_error();
2459 				return (0);
2460 			}
2461 		}
2462 		cpu_log_err(aflt);
2463 		break;
2464 
2465 	default:
2466 		/*
2467 		 * If the us3_common.c code doesn't know the flt_type, it may
2468 		 * be an implementation-specific code.  Call into the impldep
2469 		 * backend to find out what to do: if it tells us to continue,
2470 		 * break and handle as if falling through from a UE; if not,
2471 		 * the impldep backend has handled the error and we're done.
2472 		 */
2473 		switch (cpu_impl_async_log_err(flt, eqep)) {
2474 		case CH_ASYNC_LOG_DONE:
2475 			return (1);
2476 		case CH_ASYNC_LOG_RECIRC:
2477 			return (0);
2478 		case CH_ASYNC_LOG_CONTINUE:
2479 			break; /* continue on to handle UE-like error */
2480 		default:
2481 			cmn_err(CE_WARN, "discarding error 0x%p with "
2482 			    "invalid fault type (0x%x)",
2483 			    (void *)aflt, ch_flt->flt_type);
2484 			return (0);
2485 		}
2486 	}
2487 
2488 	/* ... fall through from the UE case */
2489 
2490 	if (aflt->flt_addr != AFLT_INV_ADDR && aflt->flt_in_memory) {
2491 		if (!panicstr) {
2492 			cpu_page_retire(ch_flt);
2493 		} else {
2494 			/*
2495 			 * Clear UEs on panic so that we don't
2496 			 * get haunted by them during panic or
2497 			 * after reboot
2498 			 */
2499 			cpu_clearphys(aflt);
2500 			(void) clear_errors(NULL);
2501 		}
2502 	}
2503 
2504 	return (1);
2505 }
2506 
2507 /*
2508  * Retire the bad page that may contain the flushed error.
2509  */
2510 void
2511 cpu_page_retire(ch_async_flt_t *ch_flt)
2512 {
2513 	struct async_flt *aflt = (struct async_flt *)ch_flt;
2514 	(void) page_retire(aflt->flt_addr, PR_UE);
2515 }
2516 
2517 /*
2518  * Return true if the error specified in the AFSR indicates
2519  * an E$ data error (L2$ for Cheetah/Cheetah+/Jaguar, L3$
2520  * for Panther, none for Jalapeno/Serrano).
2521  */
2522 /* ARGSUSED */
2523 static int
2524 cpu_error_is_ecache_data(int cpuid, uint64_t t_afsr)
2525 {
2526 #if defined(JALAPENO) || defined(SERRANO)
2527 	return (0);
2528 #elif defined(CHEETAH_PLUS)
2529 	if (IS_PANTHER(cpunodes[cpuid].implementation))
2530 		return ((t_afsr & C_AFSR_EXT_L3_DATA_ERRS) != 0);
2531 	return ((t_afsr & C_AFSR_EC_DATA_ERRS) != 0);
2532 #else	/* CHEETAH_PLUS */
2533 	return ((t_afsr & C_AFSR_EC_DATA_ERRS) != 0);
2534 #endif
2535 }
2536 
2537 /*
2538  * The cpu_log_err() function is called by cpu_async_log_err() to perform the
2539  * generic event post-processing for correctable and uncorrectable memory,
2540  * E$, and MTag errors.  Historically this entry point was used to log bits of
2541  * common cmn_err(9F) text; now with FMA it is used to prepare 'flt' to be
2542  * converted into an ereport.  In addition, it transmits the error to any
2543  * platform-specific service-processor FRU logging routines, if available.
2544  */
2545 void
2546 cpu_log_err(struct async_flt *aflt)
2547 {
2548 	char unum[UNUM_NAMLEN];
2549 	int synd_status, synd_code, afar_status;
2550 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
2551 
2552 	if (cpu_error_is_ecache_data(aflt->flt_inst, ch_flt->flt_bit))
2553 		aflt->flt_status |= ECC_ECACHE;
2554 	else
2555 		aflt->flt_status &= ~ECC_ECACHE;
2556 	/*
2557 	 * Determine syndrome status.
2558 	 */
2559 	synd_status = afsr_to_synd_status(aflt->flt_inst,
2560 	    ch_flt->afsr_errs, ch_flt->flt_bit);
2561 
2562 	/*
2563 	 * Determine afar status.
2564 	 */
2565 	if (pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT))
2566 		afar_status = afsr_to_afar_status(ch_flt->afsr_errs,
2567 		    ch_flt->flt_bit);
2568 	else
2569 		afar_status = AFLT_STAT_INVALID;
2570 
2571 	synd_code = synd_to_synd_code(synd_status,
2572 	    aflt->flt_synd, ch_flt->flt_bit);
2573 
2574 	/*
2575 	 * If afar status is not invalid do a unum lookup.
2576 	 */
2577 	if (afar_status != AFLT_STAT_INVALID) {
2578 		(void) cpu_get_mem_unum_synd(synd_code, aflt, unum);
2579 	} else {
2580 		unum[0] = '\0';
2581 	}
2582 
2583 	/*
2584 	 * Do not send the fruid message (plat_ecc_error_data_t)
2585 	 * to the SC if it can handle the enhanced error information
2586 	 * (plat_ecc_error2_data_t) or when the tunable
2587 	 * ecc_log_fruid_enable is set to 0.
2588 	 */
2589 
2590 	if (&plat_ecc_capability_sc_get &&
2591 	    plat_ecc_capability_sc_get(PLAT_ECC_ERROR_MESSAGE)) {
2592 		if (&plat_log_fruid_error)
2593 			plat_log_fruid_error(synd_code, aflt, unum,
2594 			    ch_flt->flt_bit);
2595 	}
2596 
2597 	if (aflt->flt_func != NULL)
2598 		aflt->flt_func(aflt, unum);
2599 
2600 	if (afar_status != AFLT_STAT_INVALID)
2601 		cpu_log_diag_info(ch_flt);
2602 
2603 	/*
2604 	 * If we have a CEEN error , we do not reenable CEEN until after
2605 	 * we exit the trap handler. Otherwise, another error may
2606 	 * occur causing the handler to be entered recursively.
2607 	 * We set a timeout to trigger in cpu_ceen_delay_secs seconds,
2608 	 * to try and ensure that the CPU makes progress in the face
2609 	 * of a CE storm.
2610 	 */
2611 	if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) {
2612 		(void) timeout(cpu_delayed_check_ce_errors,
2613 		    (void *)(uintptr_t)aflt->flt_inst,
2614 		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
2615 	}
2616 }
2617 
2618 /*
2619  * Invoked by error_init() early in startup and therefore before
2620  * startup_errorq() is called to drain any error Q -
2621  *
2622  * startup()
2623  *   startup_end()
2624  *     error_init()
2625  *       cpu_error_init()
2626  * errorq_init()
2627  *   errorq_drain()
2628  * start_other_cpus()
2629  *
2630  * The purpose of this routine is to create error-related taskqs.  Taskqs
2631  * are used for this purpose because cpu_lock can't be grabbed from interrupt
2632  * context.
2633  */
2634 void
2635 cpu_error_init(int items)
2636 {
2637 	/*
2638 	 * Create taskq(s) to reenable CE
2639 	 */
2640 	ch_check_ce_tq = taskq_create("cheetah_check_ce", 1, minclsyspri,
2641 	    items, items, TASKQ_PREPOPULATE);
2642 }
2643 
2644 void
2645 cpu_ce_log_err(struct async_flt *aflt, errorq_elem_t *eqep)
2646 {
2647 	char unum[UNUM_NAMLEN];
2648 	int len;
2649 
2650 	switch (aflt->flt_class) {
2651 	case CPU_FAULT:
2652 		cpu_ereport_init(aflt);
2653 		if (cpu_async_log_err(aflt, eqep))
2654 			cpu_ereport_post(aflt);
2655 		break;
2656 
2657 	case BUS_FAULT:
2658 		if (aflt->flt_func != NULL) {
2659 			(void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, aflt,
2660 			    unum, UNUM_NAMLEN, &len);
2661 			aflt->flt_func(aflt, unum);
2662 		}
2663 		break;
2664 
2665 	case RECIRC_CPU_FAULT:
2666 		aflt->flt_class = CPU_FAULT;
2667 		cpu_log_err(aflt);
2668 		cpu_ereport_post(aflt);
2669 		break;
2670 
2671 	case RECIRC_BUS_FAULT:
2672 		ASSERT(aflt->flt_class != RECIRC_BUS_FAULT);
2673 		/*FALLTHRU*/
2674 	default:
2675 		cmn_err(CE_WARN, "discarding CE error 0x%p with invalid "
2676 		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
2677 		return;
2678 	}
2679 }
2680 
2681 /*
2682  * Scrub and classify a CE.  This function must not modify the
2683  * fault structure passed to it but instead should return the classification
2684  * information.
2685  */
2686 
2687 static uchar_t
2688 cpu_ce_scrub_mem_err_common(struct async_flt *ecc, boolean_t logout_tried)
2689 {
2690 	uchar_t disp = CE_XDIAG_EXTALG;
2691 	on_trap_data_t otd;
2692 	uint64_t orig_err;
2693 	ch_cpu_logout_t *clop;
2694 
2695 	/*
2696 	 * Clear CEEN.  CPU CE TL > 0 trap handling will already have done
2697 	 * this, but our other callers have not.  Disable preemption to
2698 	 * avoid CPU migration so that we restore CEEN on the correct
2699 	 * cpu later.
2700 	 *
2701 	 * CEEN is cleared so that further CEs that our instruction and
2702 	 * data footprint induce do not cause use to either creep down
2703 	 * kernel stack to the point of overflow, or do so much CE
2704 	 * notification as to make little real forward progress.
2705 	 *
2706 	 * NCEEN must not be cleared.  However it is possible that
2707 	 * our accesses to the flt_addr may provoke a bus error or timeout
2708 	 * if the offending address has just been unconfigured as part of
2709 	 * a DR action.  So we must operate under on_trap protection.
2710 	 */
2711 	kpreempt_disable();
2712 	orig_err = get_error_enable();
2713 	if (orig_err & EN_REG_CEEN)
2714 		set_error_enable(orig_err & ~EN_REG_CEEN);
2715 
2716 	/*
2717 	 * Our classification algorithm includes the line state before
2718 	 * the scrub; we'd like this captured after the detection and
2719 	 * before the algorithm below - the earlier the better.
2720 	 *
2721 	 * If we've come from a cpu CE trap then this info already exists
2722 	 * in the cpu logout area.
2723 	 *
2724 	 * For a CE detected by memscrub for which there was no trap
2725 	 * (running with CEEN off) cpu_log_and_clear_ce has called
2726 	 * cpu_ce_delayed_ec_logout to capture some cache data, and
2727 	 * marked the fault structure as incomplete as a flag to later
2728 	 * logging code.
2729 	 *
2730 	 * If called directly from an IO detected CE there has been
2731 	 * no line data capture.  In this case we logout to the cpu logout
2732 	 * area - that's appropriate since it's the cpu cache data we need
2733 	 * for classification.  We thus borrow the cpu logout area for a
2734 	 * short time, and cpu_ce_delayed_ec_logout will mark it as busy in
2735 	 * this time (we will invalidate it again below).
2736 	 *
2737 	 * If called from the partner check xcall handler then this cpu
2738 	 * (the partner) has not necessarily experienced a CE at this
2739 	 * address.  But we want to capture line state before its scrub
2740 	 * attempt since we use that in our classification.
2741 	 */
2742 	if (logout_tried == B_FALSE) {
2743 		if (!cpu_ce_delayed_ec_logout(ecc->flt_addr))
2744 			disp |= CE_XDIAG_NOLOGOUT;
2745 	}
2746 
2747 	/*
2748 	 * Scrub memory, then check AFSR for errors.  The AFAR we scrub may
2749 	 * no longer be valid (if DR'd since the initial event) so we
2750 	 * perform this scrub under on_trap protection.  If this access is
2751 	 * ok then further accesses below will also be ok - DR cannot
2752 	 * proceed while this thread is active (preemption is disabled);
2753 	 * to be safe we'll nonetheless use on_trap again below.
2754 	 */
2755 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
2756 		cpu_scrubphys(ecc);
2757 	} else {
2758 		no_trap();
2759 		if (orig_err & EN_REG_CEEN)
2760 			set_error_enable(orig_err);
2761 		kpreempt_enable();
2762 		return (disp);
2763 	}
2764 	no_trap();
2765 
2766 	/*
2767 	 * Did the casx read of the scrub log a CE that matches the AFAR?
2768 	 * Note that it's quite possible that the read sourced the data from
2769 	 * another cpu.
2770 	 */
2771 	if (clear_ecc(ecc))
2772 		disp |= CE_XDIAG_CE1;
2773 
2774 	/*
2775 	 * Read the data again.  This time the read is very likely to
2776 	 * come from memory since the scrub induced a writeback to memory.
2777 	 */
2778 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
2779 		(void) lddphys(P2ALIGN(ecc->flt_addr, 8));
2780 	} else {
2781 		no_trap();
2782 		if (orig_err & EN_REG_CEEN)
2783 			set_error_enable(orig_err);
2784 		kpreempt_enable();
2785 		return (disp);
2786 	}
2787 	no_trap();
2788 
2789 	/* Did that read induce a CE that matches the AFAR? */
2790 	if (clear_ecc(ecc))
2791 		disp |= CE_XDIAG_CE2;
2792 
2793 	/*
2794 	 * Look at the logout information and record whether we found the
2795 	 * line in l2/l3 cache.  For Panther we are interested in whether
2796 	 * we found it in either cache (it won't reside in both but
2797 	 * it is possible to read it that way given the moving target).
2798 	 */
2799 	clop = CPU_PRIVATE(CPU) ? CPU_PRIVATE_PTR(CPU, chpr_cecc_logout) : NULL;
2800 	if (!(disp & CE_XDIAG_NOLOGOUT) && clop &&
2801 	    clop->clo_data.chd_afar != LOGOUT_INVALID) {
2802 		int hit, level;
2803 		int state;
2804 		int totalsize;
2805 		ch_ec_data_t *ecp;
2806 
2807 		/*
2808 		 * If hit is nonzero then a match was found and hit will
2809 		 * be one greater than the index which hit.  For Panther we
2810 		 * also need to pay attention to level to see which of l2$ or
2811 		 * l3$ it hit in.
2812 		 */
2813 		hit = cpu_matching_ecache_line(ecc->flt_addr, &clop->clo_data,
2814 		    0, &level);
2815 
2816 		if (hit) {
2817 			--hit;
2818 			disp |= CE_XDIAG_AFARMATCH;
2819 
2820 			if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2821 				if (level == 2)
2822 					ecp = &clop->clo_data.chd_l2_data[hit];
2823 				else
2824 					ecp = &clop->clo_data.chd_ec_data[hit];
2825 			} else {
2826 				ASSERT(level == 2);
2827 				ecp = &clop->clo_data.chd_ec_data[hit];
2828 			}
2829 			totalsize = cpunodes[CPU->cpu_id].ecache_size;
2830 			state = cpu_ectag_pa_to_subblk_state(totalsize,
2831 			    ecc->flt_addr, ecp->ec_tag);
2832 
2833 			/*
2834 			 * Cheetah variants use different state encodings -
2835 			 * the CH_ECSTATE_* defines vary depending on the
2836 			 * module we're compiled for.  Translate into our
2837 			 * one true version.  Conflate Owner-Shared state
2838 			 * of SSM mode with Owner as victimisation of such
2839 			 * lines may cause a writeback.
2840 			 */
2841 			switch (state) {
2842 			case CH_ECSTATE_MOD:
2843 				disp |= EC_STATE_M;
2844 				break;
2845 
2846 			case CH_ECSTATE_OWN:
2847 			case CH_ECSTATE_OWS:
2848 				disp |= EC_STATE_O;
2849 				break;
2850 
2851 			case CH_ECSTATE_EXL:
2852 				disp |= EC_STATE_E;
2853 				break;
2854 
2855 			case CH_ECSTATE_SHR:
2856 				disp |= EC_STATE_S;
2857 				break;
2858 
2859 			default:
2860 				disp |= EC_STATE_I;
2861 				break;
2862 			}
2863 		}
2864 
2865 		/*
2866 		 * If we initiated the delayed logout then we are responsible
2867 		 * for invalidating the logout area.
2868 		 */
2869 		if (logout_tried == B_FALSE) {
2870 			bzero(clop, sizeof (ch_cpu_logout_t));
2871 			clop->clo_data.chd_afar = LOGOUT_INVALID;
2872 		}
2873 	}
2874 
2875 	/*
2876 	 * Re-enable CEEN if we turned it off.
2877 	 */
2878 	if (orig_err & EN_REG_CEEN)
2879 		set_error_enable(orig_err);
2880 	kpreempt_enable();
2881 
2882 	return (disp);
2883 }
2884 
2885 /*
2886  * Scrub a correctable memory error and collect data for classification
2887  * of CE type.  This function is called in the detection path, ie tl0 handling
2888  * of a correctable error trap (cpus) or interrupt (IO) at high PIL.
2889  */
2890 void
2891 cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t logout_tried)
2892 {
2893 	/*
2894 	 * Cheetah CE classification does not set any bits in flt_status.
2895 	 * Instead we will record classification datapoints in flt_disp.
2896 	 */
2897 	ecc->flt_status &= ~(ECC_INTERMITTENT | ECC_PERSISTENT | ECC_STICKY);
2898 
2899 	/*
2900 	 * To check if the error detected by IO is persistent, sticky or
2901 	 * intermittent.  This is noticed by clear_ecc().
2902 	 */
2903 	if (ecc->flt_status & ECC_IOBUS)
2904 		ecc->flt_stat = C_AFSR_MEMORY;
2905 
2906 	/*
2907 	 * Record information from this first part of the algorithm in
2908 	 * flt_disp.
2909 	 */
2910 	ecc->flt_disp = cpu_ce_scrub_mem_err_common(ecc, logout_tried);
2911 }
2912 
2913 /*
2914  * Select a partner to perform a further CE classification check from.
2915  * Must be called with kernel preemption disabled (to stop the cpu list
2916  * from changing).  The detecting cpu we are partnering has cpuid
2917  * aflt->flt_inst; we might not be running on the detecting cpu.
2918  *
2919  * Restrict choice to active cpus in the same cpu partition as ourselves in
2920  * an effort to stop bad cpus in one partition causing other partitions to
2921  * perform excessive diagnostic activity.  Actually since the errorq drain
2922  * is run from a softint most of the time and that is a global mechanism
2923  * this isolation is only partial.  Return NULL if we fail to find a
2924  * suitable partner.
2925  *
2926  * We prefer a partner that is in a different latency group to ourselves as
2927  * we will share fewer datapaths.  If such a partner is unavailable then
2928  * choose one in the same lgroup but prefer a different chip and only allow
2929  * a sibling core if flags includes PTNR_SIBLINGOK.  If all else fails and
2930  * flags includes PTNR_SELFOK then permit selection of the original detector.
2931  *
2932  * We keep a cache of the last partner selected for a cpu, and we'll try to
2933  * use that previous partner if no more than cpu_ce_ptnr_cachetime_sec seconds
2934  * have passed since that selection was made.  This provides the benefit
2935  * of the point-of-view of different partners over time but without
2936  * requiring frequent cpu list traversals.
2937  */
2938 
2939 #define	PTNR_SIBLINGOK	0x1	/* Allow selection of sibling core */
2940 #define	PTNR_SELFOK	0x2	/* Allow selection of cpu to "partner" itself */
2941 
2942 static cpu_t *
2943 ce_ptnr_select(struct async_flt *aflt, int flags, int *typep)
2944 {
2945 	cpu_t *sp, *dtcr, *ptnr, *locptnr, *sibptnr;
2946 	hrtime_t lasttime, thistime;
2947 
2948 	ASSERT(curthread->t_preempt > 0 || getpil() >= DISP_LEVEL);
2949 
2950 	dtcr = cpu[aflt->flt_inst];
2951 
2952 	/*
2953 	 * Short-circuit for the following cases:
2954 	 *	. the dtcr is not flagged active
2955 	 *	. there is just one cpu present
2956 	 *	. the detector has disappeared
2957 	 *	. we were given a bad flt_inst cpuid; this should not happen
2958 	 *	  (eg PCI code now fills flt_inst) but if it does it is no
2959 	 *	  reason to panic.
2960 	 *	. there is just one cpu left online in the cpu partition
2961 	 *
2962 	 * If we return NULL after this point then we do not update the
2963 	 * chpr_ceptnr_seltime which will cause us to perform a full lookup
2964 	 * again next time; this is the case where the only other cpu online
2965 	 * in the detector's partition is on the same chip as the detector
2966 	 * and since CEEN re-enable is throttled even that case should not
2967 	 * hurt performance.
2968 	 */
2969 	if (dtcr == NULL || !cpu_flagged_active(dtcr->cpu_flags)) {
2970 		return (NULL);
2971 	}
2972 	if (ncpus == 1 || dtcr->cpu_part->cp_ncpus == 1) {
2973 		if (flags & PTNR_SELFOK) {
2974 			*typep = CE_XDIAG_PTNR_SELF;
2975 			return (dtcr);
2976 		} else {
2977 			return (NULL);
2978 		}
2979 	}
2980 
2981 	thistime = gethrtime();
2982 	lasttime = CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime);
2983 
2984 	/*
2985 	 * Select a starting point.
2986 	 */
2987 	if (!lasttime) {
2988 		/*
2989 		 * We've never selected a partner for this detector before.
2990 		 * Start the scan at the next online cpu in the same cpu
2991 		 * partition.
2992 		 */
2993 		sp = dtcr->cpu_next_part;
2994 	} else if (thistime - lasttime < cpu_ce_ptnr_cachetime_sec * NANOSEC) {
2995 		/*
2996 		 * Our last selection has not aged yet.  If this partner:
2997 		 *	. is still a valid cpu,
2998 		 *	. is still in the same partition as the detector
2999 		 *	. is still marked active
3000 		 *	. satisfies the 'flags' argument criteria
3001 		 * then select it again without updating the timestamp.
3002 		 */
3003 		sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)];
3004 		if (sp == NULL || sp->cpu_part != dtcr->cpu_part ||
3005 		    !cpu_flagged_active(sp->cpu_flags) ||
3006 		    (sp == dtcr && !(flags & PTNR_SELFOK)) ||
3007 		    (pg_plat_cpus_share(sp, dtcr, PGHW_CHIP) &&
3008 		    !(flags & PTNR_SIBLINGOK))) {
3009 			sp = dtcr->cpu_next_part;
3010 		} else {
3011 			if (sp->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) {
3012 				*typep = CE_XDIAG_PTNR_REMOTE;
3013 			} else if (sp == dtcr) {
3014 				*typep = CE_XDIAG_PTNR_SELF;
3015 			} else if (pg_plat_cpus_share(sp, dtcr, PGHW_CHIP)) {
3016 				*typep = CE_XDIAG_PTNR_SIBLING;
3017 			} else {
3018 				*typep = CE_XDIAG_PTNR_LOCAL;
3019 			}
3020 			return (sp);
3021 		}
3022 	} else {
3023 		/*
3024 		 * Our last selection has aged.  If it is nonetheless still a
3025 		 * valid cpu then start the scan at the next cpu in the
3026 		 * partition after our last partner.  If the last selection
3027 		 * is no longer a valid cpu then go with our default.  In
3028 		 * this way we slowly cycle through possible partners to
3029 		 * obtain multiple viewpoints over time.
3030 		 */
3031 		sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)];
3032 		if (sp == NULL) {
3033 			sp = dtcr->cpu_next_part;
3034 		} else {
3035 			sp = sp->cpu_next_part;		/* may be dtcr */
3036 			if (sp->cpu_part != dtcr->cpu_part)
3037 				sp = dtcr;
3038 		}
3039 	}
3040 
3041 	/*
3042 	 * We have a proposed starting point for our search, but if this
3043 	 * cpu is offline then its cpu_next_part will point to itself
3044 	 * so we can't use that to iterate over cpus in this partition in
3045 	 * the loop below.  We still want to avoid iterating over cpus not
3046 	 * in our partition, so in the case that our starting point is offline
3047 	 * we will repoint it to be the detector itself;  and if the detector
3048 	 * happens to be offline we'll return NULL from the following loop.
3049 	 */
3050 	if (!cpu_flagged_active(sp->cpu_flags)) {
3051 		sp = dtcr;
3052 	}
3053 
3054 	ptnr = sp;
3055 	locptnr = NULL;
3056 	sibptnr = NULL;
3057 	do {
3058 		if (ptnr == dtcr || !cpu_flagged_active(ptnr->cpu_flags))
3059 			continue;
3060 		if (ptnr->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) {
3061 			CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = ptnr->cpu_id;
3062 			CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3063 			*typep = CE_XDIAG_PTNR_REMOTE;
3064 			return (ptnr);
3065 		}
3066 		if (pg_plat_cpus_share(ptnr, dtcr, PGHW_CHIP)) {
3067 			if (sibptnr == NULL)
3068 				sibptnr = ptnr;
3069 			continue;
3070 		}
3071 		if (locptnr == NULL)
3072 			locptnr = ptnr;
3073 	} while ((ptnr = ptnr->cpu_next_part) != sp);
3074 
3075 	/*
3076 	 * A foreign partner has already been returned if one was available.
3077 	 *
3078 	 * If locptnr is not NULL it is a cpu in the same lgroup as the
3079 	 * detector, is active, and is not a sibling of the detector.
3080 	 *
3081 	 * If sibptnr is not NULL it is a sibling of the detector, and is
3082 	 * active.
3083 	 *
3084 	 * If we have to resort to using the detector itself we have already
3085 	 * checked that it is active.
3086 	 */
3087 	if (locptnr) {
3088 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = locptnr->cpu_id;
3089 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3090 		*typep = CE_XDIAG_PTNR_LOCAL;
3091 		return (locptnr);
3092 	} else if (sibptnr && flags & PTNR_SIBLINGOK) {
3093 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = sibptnr->cpu_id;
3094 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3095 		*typep = CE_XDIAG_PTNR_SIBLING;
3096 		return (sibptnr);
3097 	} else if (flags & PTNR_SELFOK) {
3098 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = dtcr->cpu_id;
3099 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3100 		*typep = CE_XDIAG_PTNR_SELF;
3101 		return (dtcr);
3102 	}
3103 
3104 	return (NULL);
3105 }
3106 
3107 /*
3108  * Cross call handler that is requested to run on the designated partner of
3109  * a cpu that experienced a possibly sticky or possibly persistnet CE.
3110  */
3111 static void
3112 ce_ptnrchk_xc(struct async_flt *aflt, uchar_t *dispp)
3113 {
3114 	*dispp = cpu_ce_scrub_mem_err_common(aflt, B_FALSE);
3115 }
3116 
3117 /*
3118  * The associated errorqs are never destroyed so we do not need to deal with
3119  * them disappearing before this timeout fires.  If the affected memory
3120  * has been DR'd out since the original event the scrub algrithm will catch
3121  * any errors and return null disposition info.  If the original detecting
3122  * cpu has been DR'd out then ereport detector info will not be able to
3123  * lookup CPU type;  with a small timeout this is unlikely.
3124  */
3125 static void
3126 ce_lkychk_cb(ce_lkychk_cb_t *cbarg)
3127 {
3128 	struct async_flt *aflt = cbarg->lkycb_aflt;
3129 	uchar_t disp;
3130 	cpu_t *cp;
3131 	int ptnrtype;
3132 
3133 	kpreempt_disable();
3134 	if (cp = ce_ptnr_select(aflt, PTNR_SIBLINGOK | PTNR_SELFOK,
3135 	    &ptnrtype)) {
3136 		xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc, (uint64_t)aflt,
3137 		    (uint64_t)&disp);
3138 		CE_XDIAG_SETLKYINFO(aflt->flt_disp, disp);
3139 		CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id);
3140 		CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype);
3141 	} else {
3142 		ce_xdiag_lkydrops++;
3143 		if (ncpus > 1)
3144 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3145 			    CE_XDIAG_SKIP_NOPTNR);
3146 	}
3147 	kpreempt_enable();
3148 
3149 	errorq_commit(cbarg->lkycb_eqp, cbarg->lkycb_eqep, ERRORQ_ASYNC);
3150 	kmem_free(cbarg, sizeof (ce_lkychk_cb_t));
3151 }
3152 
3153 /*
3154  * Called from errorq drain code when processing a CE error, both from
3155  * CPU and PCI drain functions.  Decide what further classification actions,
3156  * if any, we will perform.  Perform immediate actions now, and schedule
3157  * delayed actions as required.  Note that we are no longer necessarily running
3158  * on the detecting cpu, and that the async_flt structure will not persist on
3159  * return from this function.
3160  *
3161  * Calls to this function should aim to be self-throtlling in some way.  With
3162  * the delayed re-enable of CEEN the absolute rate of calls should not
3163  * be excessive.  Callers should also avoid performing in-depth classification
3164  * for events in pages that are already known to be suspect.
3165  *
3166  * We return nonzero to indicate that the event has been copied and
3167  * recirculated for further testing.  The caller should not log the event
3168  * in this case - it will be logged when further test results are available.
3169  *
3170  * Our possible contexts are that of errorq_drain: below lock level or from
3171  * panic context.  We can assume that the cpu we are running on is online.
3172  */
3173 
3174 
3175 #ifdef DEBUG
3176 static int ce_xdiag_forceaction;
3177 #endif
3178 
3179 int
3180 ce_scrub_xdiag_recirc(struct async_flt *aflt, errorq_t *eqp,
3181     errorq_elem_t *eqep, size_t afltoffset)
3182 {
3183 	ce_dispact_t dispact, action;
3184 	cpu_t *cp;
3185 	uchar_t dtcrinfo, disp;
3186 	int ptnrtype;
3187 
3188 	if (!ce_disp_inited || panicstr || ce_xdiag_off) {
3189 		ce_xdiag_drops++;
3190 		return (0);
3191 	} else if (!aflt->flt_in_memory) {
3192 		ce_xdiag_drops++;
3193 		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOTMEM);
3194 		return (0);
3195 	}
3196 
3197 	dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp);
3198 
3199 	/*
3200 	 * Some correctable events are not scrubbed/classified, such as those
3201 	 * noticed at the tail of cpu_deferred_error.  So if there is no
3202 	 * initial detector classification go no further.
3203 	 */
3204 	if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo)) {
3205 		ce_xdiag_drops++;
3206 		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOSCRUB);
3207 		return (0);
3208 	}
3209 
3210 	dispact = CE_DISPACT(ce_disp_table,
3211 	    CE_XDIAG_AFARMATCHED(dtcrinfo),
3212 	    CE_XDIAG_STATE(dtcrinfo),
3213 	    CE_XDIAG_CE1SEEN(dtcrinfo),
3214 	    CE_XDIAG_CE2SEEN(dtcrinfo));
3215 
3216 
3217 	action = CE_ACT(dispact);	/* bad lookup caught below */
3218 #ifdef DEBUG
3219 	if (ce_xdiag_forceaction != 0)
3220 		action = ce_xdiag_forceaction;
3221 #endif
3222 
3223 	switch (action) {
3224 	case CE_ACT_LKYCHK: {
3225 		caddr_t ndata;
3226 		errorq_elem_t *neqep;
3227 		struct async_flt *ecc;
3228 		ce_lkychk_cb_t *cbargp;
3229 
3230 		if ((ndata = errorq_elem_dup(eqp, eqep, &neqep)) == NULL) {
3231 			ce_xdiag_lkydrops++;
3232 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3233 			    CE_XDIAG_SKIP_DUPFAIL);
3234 			break;
3235 		}
3236 		ecc = (struct async_flt *)(ndata + afltoffset);
3237 
3238 		ASSERT(ecc->flt_class == CPU_FAULT ||
3239 		    ecc->flt_class == BUS_FAULT);
3240 		ecc->flt_class = (ecc->flt_class == CPU_FAULT) ?
3241 		    RECIRC_CPU_FAULT : RECIRC_BUS_FAULT;
3242 
3243 		cbargp = kmem_alloc(sizeof (ce_lkychk_cb_t), KM_SLEEP);
3244 		cbargp->lkycb_aflt = ecc;
3245 		cbargp->lkycb_eqp = eqp;
3246 		cbargp->lkycb_eqep = neqep;
3247 
3248 		(void) timeout((void (*)(void *))ce_lkychk_cb,
3249 		    (void *)cbargp, drv_usectohz(cpu_ce_lkychk_timeout_usec));
3250 		return (1);
3251 	}
3252 
3253 	case CE_ACT_PTNRCHK:
3254 		kpreempt_disable();	/* stop cpu list changing */
3255 		if ((cp = ce_ptnr_select(aflt, 0, &ptnrtype)) != NULL) {
3256 			xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc,
3257 			    (uint64_t)aflt, (uint64_t)&disp);
3258 			CE_XDIAG_SETPTNRINFO(aflt->flt_disp, disp);
3259 			CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id);
3260 			CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype);
3261 		} else if (ncpus > 1) {
3262 			ce_xdiag_ptnrdrops++;
3263 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3264 			    CE_XDIAG_SKIP_NOPTNR);
3265 		} else {
3266 			ce_xdiag_ptnrdrops++;
3267 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3268 			    CE_XDIAG_SKIP_UNIPROC);
3269 		}
3270 		kpreempt_enable();
3271 		break;
3272 
3273 	case CE_ACT_DONE:
3274 		break;
3275 
3276 	case CE_ACT(CE_DISP_BAD):
3277 	default:
3278 #ifdef DEBUG
3279 		cmn_err(CE_PANIC, "ce_scrub_post: Bad action '%d'", action);
3280 #endif
3281 		ce_xdiag_bad++;
3282 		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_ACTBAD);
3283 		break;
3284 	}
3285 
3286 	return (0);
3287 }
3288 
3289 /*
3290  * We route all errors through a single switch statement.
3291  */
3292 void
3293 cpu_ue_log_err(struct async_flt *aflt)
3294 {
3295 	switch (aflt->flt_class) {
3296 	case CPU_FAULT:
3297 		cpu_ereport_init(aflt);
3298 		if (cpu_async_log_err(aflt, NULL))
3299 			cpu_ereport_post(aflt);
3300 		break;
3301 
3302 	case BUS_FAULT:
3303 		bus_async_log_err(aflt);
3304 		break;
3305 
3306 	default:
3307 		cmn_err(CE_WARN, "discarding async error %p with invalid "
3308 		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
3309 		return;
3310 	}
3311 }
3312 
3313 /*
3314  * Routine for panic hook callback from panic_idle().
3315  */
3316 void
3317 cpu_async_panic_callb(void)
3318 {
3319 	ch_async_flt_t ch_flt;
3320 	struct async_flt *aflt;
3321 	ch_cpu_errors_t cpu_error_regs;
3322 	uint64_t afsr_errs;
3323 
3324 	get_cpu_error_state(&cpu_error_regs);
3325 
3326 	afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3327 	    (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS);
3328 
3329 	if (afsr_errs) {
3330 
3331 		bzero(&ch_flt, sizeof (ch_async_flt_t));
3332 		aflt = (struct async_flt *)&ch_flt;
3333 		aflt->flt_id = gethrtime_waitfree();
3334 		aflt->flt_bus_id = getprocessorid();
3335 		aflt->flt_inst = CPU->cpu_id;
3336 		aflt->flt_stat = cpu_error_regs.afsr;
3337 		aflt->flt_addr = cpu_error_regs.afar;
3338 		aflt->flt_prot = AFLT_PROT_NONE;
3339 		aflt->flt_class = CPU_FAULT;
3340 		aflt->flt_priv = ((cpu_error_regs.afsr & C_AFSR_PRIV) != 0);
3341 		aflt->flt_panic = 1;
3342 		ch_flt.afsr_ext = cpu_error_regs.afsr_ext;
3343 		ch_flt.afsr_errs = afsr_errs;
3344 #if defined(SERRANO)
3345 		ch_flt.afar2 = cpu_error_regs.afar2;
3346 #endif	/* SERRANO */
3347 		(void) cpu_queue_events(&ch_flt, NULL, afsr_errs, NULL);
3348 	}
3349 }
3350 
3351 /*
3352  * Routine to convert a syndrome into a syndrome code.
3353  */
3354 static int
3355 synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit)
3356 {
3357 	if (synd_status == AFLT_STAT_INVALID)
3358 		return (-1);
3359 
3360 	/*
3361 	 * Use the syndrome to index the appropriate syndrome table,
3362 	 * to get the code indicating which bit(s) is(are) bad.
3363 	 */
3364 	if (afsr_bit &
3365 	    (C_AFSR_MSYND_ERRS | C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
3366 		if (afsr_bit & C_AFSR_MSYND_ERRS) {
3367 #if defined(JALAPENO) || defined(SERRANO)
3368 			if ((synd == 0) || (synd >= BSYND_TBL_SIZE))
3369 				return (-1);
3370 			else
3371 				return (BPAR0 + synd);
3372 #else /* JALAPENO || SERRANO */
3373 			if ((synd == 0) || (synd >= MSYND_TBL_SIZE))
3374 				return (-1);
3375 			else
3376 				return (mtag_syndrome_tab[synd]);
3377 #endif /* JALAPENO || SERRANO */
3378 		} else {
3379 			if ((synd == 0) || (synd >= ESYND_TBL_SIZE))
3380 				return (-1);
3381 			else
3382 				return (ecc_syndrome_tab[synd]);
3383 		}
3384 	} else {
3385 		return (-1);
3386 	}
3387 }
3388 
3389 int
3390 cpu_get_mem_sid(char *unum, char *buf, int buflen, int *lenp)
3391 {
3392 	if (&plat_get_mem_sid)
3393 		return (plat_get_mem_sid(unum, buf, buflen, lenp));
3394 	else
3395 		return (ENOTSUP);
3396 }
3397 
3398 int
3399 cpu_get_mem_offset(uint64_t flt_addr, uint64_t *offp)
3400 {
3401 	if (&plat_get_mem_offset)
3402 		return (plat_get_mem_offset(flt_addr, offp));
3403 	else
3404 		return (ENOTSUP);
3405 }
3406 
3407 int
3408 cpu_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp)
3409 {
3410 	if (&plat_get_mem_addr)
3411 		return (plat_get_mem_addr(unum, sid, offset, addrp));
3412 	else
3413 		return (ENOTSUP);
3414 }
3415 
3416 /*
3417  * Routine to return a string identifying the physical name
3418  * associated with a memory/cache error.
3419  */
3420 int
3421 cpu_get_mem_unum(int synd_status, ushort_t flt_synd, uint64_t flt_stat,
3422     uint64_t flt_addr, int flt_bus_id, int flt_in_memory,
3423     ushort_t flt_status, char *buf, int buflen, int *lenp)
3424 {
3425 	int synd_code;
3426 	int ret;
3427 
3428 	/*
3429 	 * An AFSR of -1 defaults to a memory syndrome.
3430 	 */
3431 	if (flt_stat == (uint64_t)-1)
3432 		flt_stat = C_AFSR_CE;
3433 
3434 	synd_code = synd_to_synd_code(synd_status, flt_synd, flt_stat);
3435 
3436 	/*
3437 	 * Syndrome code must be either a single-bit error code
3438 	 * (0...143) or -1 for unum lookup.
3439 	 */
3440 	if (synd_code < 0 || synd_code >= M2)
3441 		synd_code = -1;
3442 	if (&plat_get_mem_unum) {
3443 		if ((ret = plat_get_mem_unum(synd_code, flt_addr, flt_bus_id,
3444 		    flt_in_memory, flt_status, buf, buflen, lenp)) != 0) {
3445 			buf[0] = '\0';
3446 			*lenp = 0;
3447 		}
3448 
3449 		return (ret);
3450 	}
3451 
3452 	return (ENOTSUP);
3453 }
3454 
3455 /*
3456  * Wrapper for cpu_get_mem_unum() routine that takes an
3457  * async_flt struct rather than explicit arguments.
3458  */
3459 int
3460 cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt,
3461     char *buf, int buflen, int *lenp)
3462 {
3463 	/*
3464 	 * If we come thru here for an IO bus error aflt->flt_stat will
3465 	 * not be the CPU AFSR, and we pass in a -1 to cpu_get_mem_unum()
3466 	 * so it will interpret this as a memory error.
3467 	 */
3468 	return (cpu_get_mem_unum(synd_status, aflt->flt_synd,
3469 	    (aflt->flt_class == BUS_FAULT) ?
3470 	    (uint64_t)-1 : ((ch_async_flt_t *)aflt)->flt_bit,
3471 	    aflt->flt_addr, aflt->flt_bus_id, aflt->flt_in_memory,
3472 	    aflt->flt_status, buf, buflen, lenp));
3473 }
3474 
3475 /*
3476  * Return unum string given synd_code and async_flt into
3477  * the buf with size UNUM_NAMLEN
3478  */
3479 static int
3480 cpu_get_mem_unum_synd(int synd_code, struct async_flt *aflt, char *buf)
3481 {
3482 	int ret, len;
3483 
3484 	/*
3485 	 * Syndrome code must be either a single-bit error code
3486 	 * (0...143) or -1 for unum lookup.
3487 	 */
3488 	if (synd_code < 0 || synd_code >= M2)
3489 		synd_code = -1;
3490 	if (&plat_get_mem_unum) {
3491 		if ((ret = plat_get_mem_unum(synd_code, aflt->flt_addr,
3492 		    aflt->flt_bus_id, aflt->flt_in_memory,
3493 		    aflt->flt_status, buf, UNUM_NAMLEN, &len)) != 0) {
3494 			buf[0] = '\0';
3495 		}
3496 		return (ret);
3497 	}
3498 
3499 	buf[0] = '\0';
3500 	return (ENOTSUP);
3501 }
3502 
3503 /*
3504  * This routine is a more generic interface to cpu_get_mem_unum()
3505  * that may be used by other modules (e.g. the 'mm' driver, through
3506  * the 'MEM_NAME' ioctl, which is used by fmd to resolve unum's
3507  * for Jalapeno/Serrano FRC/RCE or FRU/RUE paired events).
3508  */
3509 int
3510 cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar,
3511     char *buf, int buflen, int *lenp)
3512 {
3513 	int synd_status, flt_in_memory, ret;
3514 	ushort_t flt_status = 0;
3515 	char unum[UNUM_NAMLEN];
3516 	uint64_t t_afsr_errs;
3517 
3518 	/*
3519 	 * Check for an invalid address.
3520 	 */
3521 	if (afar == (uint64_t)-1)
3522 		return (ENXIO);
3523 
3524 	if (synd == (uint64_t)-1)
3525 		synd_status = AFLT_STAT_INVALID;
3526 	else
3527 		synd_status = AFLT_STAT_VALID;
3528 
3529 	flt_in_memory = (*afsr & C_AFSR_MEMORY) &&
3530 	    pf_is_memory(afar >> MMU_PAGESHIFT);
3531 
3532 	/*
3533 	 * Get aggregate AFSR for call to cpu_error_is_ecache_data.
3534 	 */
3535 	if (*afsr == (uint64_t)-1)
3536 		t_afsr_errs = C_AFSR_CE;
3537 	else {
3538 		t_afsr_errs = (*afsr & C_AFSR_ALL_ERRS);
3539 #if defined(CHEETAH_PLUS)
3540 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
3541 			t_afsr_errs |= (*(afsr + 1) & C_AFSR_EXT_ALL_ERRS);
3542 #endif	/* CHEETAH_PLUS */
3543 	}
3544 
3545 	/*
3546 	 * Turn on ECC_ECACHE if error type is E$ Data.
3547 	 */
3548 	if (cpu_error_is_ecache_data(CPU->cpu_id, t_afsr_errs))
3549 		flt_status |= ECC_ECACHE;
3550 
3551 	ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, t_afsr_errs, afar,
3552 	    CPU->cpu_id, flt_in_memory, flt_status, unum, UNUM_NAMLEN, lenp);
3553 	if (ret != 0)
3554 		return (ret);
3555 
3556 	if (*lenp >= buflen)
3557 		return (ENAMETOOLONG);
3558 
3559 	(void) strncpy(buf, unum, buflen);
3560 
3561 	return (0);
3562 }
3563 
3564 /*
3565  * Routine to return memory information associated
3566  * with a physical address and syndrome.
3567  */
3568 int
3569 cpu_get_mem_info(uint64_t synd, uint64_t afar,
3570     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
3571     int *segsp, int *banksp, int *mcidp)
3572 {
3573 	int synd_status, synd_code;
3574 
3575 	if (afar == (uint64_t)-1)
3576 		return (ENXIO);
3577 
3578 	if (synd == (uint64_t)-1)
3579 		synd_status = AFLT_STAT_INVALID;
3580 	else
3581 		synd_status = AFLT_STAT_VALID;
3582 
3583 	synd_code = synd_to_synd_code(synd_status, synd, C_AFSR_CE);
3584 
3585 	if (p2get_mem_info != NULL)
3586 		return ((p2get_mem_info)(synd_code, afar,
3587 		    mem_sizep, seg_sizep, bank_sizep,
3588 		    segsp, banksp, mcidp));
3589 	else
3590 		return (ENOTSUP);
3591 }
3592 
3593 /*
3594  * Routine to return a string identifying the physical
3595  * name associated with a cpuid.
3596  */
3597 int
3598 cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp)
3599 {
3600 	int ret;
3601 	char unum[UNUM_NAMLEN];
3602 
3603 	if (&plat_get_cpu_unum) {
3604 		if ((ret = plat_get_cpu_unum(cpuid, unum, UNUM_NAMLEN, lenp))
3605 		    != 0)
3606 			return (ret);
3607 	} else {
3608 		return (ENOTSUP);
3609 	}
3610 
3611 	if (*lenp >= buflen)
3612 		return (ENAMETOOLONG);
3613 
3614 	(void) strncpy(buf, unum, buflen);
3615 
3616 	return (0);
3617 }
3618 
3619 /*
3620  * This routine exports the name buffer size.
3621  */
3622 size_t
3623 cpu_get_name_bufsize()
3624 {
3625 	return (UNUM_NAMLEN);
3626 }
3627 
3628 /*
3629  * Historical function, apparantly not used.
3630  */
3631 /* ARGSUSED */
3632 void
3633 cpu_read_paddr(struct async_flt *ecc, short verbose, short ce_err)
3634 {}
3635 
3636 /*
3637  * Historical function only called for SBus errors in debugging.
3638  */
3639 /*ARGSUSED*/
3640 void
3641 read_ecc_data(struct async_flt *aflt, short verbose, short ce_err)
3642 {}
3643 
3644 /*
3645  * Clear the AFSR sticky bits.  The routine returns a non-zero value if
3646  * any of the AFSR's sticky errors are detected.  If a non-null pointer to
3647  * an async fault structure argument is passed in, the captured error state
3648  * (AFSR, AFAR) info will be returned in the structure.
3649  */
3650 int
3651 clear_errors(ch_async_flt_t *ch_flt)
3652 {
3653 	struct async_flt *aflt = (struct async_flt *)ch_flt;
3654 	ch_cpu_errors_t	cpu_error_regs;
3655 
3656 	get_cpu_error_state(&cpu_error_regs);
3657 
3658 	if (ch_flt != NULL) {
3659 		aflt->flt_stat = cpu_error_regs.afsr & C_AFSR_MASK;
3660 		aflt->flt_addr = cpu_error_regs.afar;
3661 		ch_flt->afsr_ext = cpu_error_regs.afsr_ext;
3662 		ch_flt->afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3663 		    (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS);
3664 #if defined(SERRANO)
3665 		ch_flt->afar2 = cpu_error_regs.afar2;
3666 #endif	/* SERRANO */
3667 	}
3668 
3669 	set_cpu_error_state(&cpu_error_regs);
3670 
3671 	return (((cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3672 	    (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS)) != 0);
3673 }
3674 
3675 /*
3676  * Clear any AFSR error bits, and check for persistence.
3677  *
3678  * It would be desirable to also insist that syndrome match.  PCI handling
3679  * has already filled flt_synd.  For errors trapped by CPU we only fill
3680  * flt_synd when we queue the event, so we do not have a valid flt_synd
3681  * during initial classification (it is valid if we're called as part of
3682  * subsequent low-pil additional classification attempts).  We could try
3683  * to determine which syndrome to use: we know we're only called for
3684  * CE/RCE (Jalapeno & Serrano) and CE/EMC (others) so the syndrome to use
3685  * would be esynd/none and esynd/msynd, respectively.  If that is
3686  * implemented then what do we do in the case that we do experience an
3687  * error on the same afar but with different syndrome?  At the very least
3688  * we should count such occurences.  Anyway, for now, we'll leave it as
3689  * it has been for ages.
3690  */
3691 static int
3692 clear_ecc(struct async_flt *aflt)
3693 {
3694 	ch_cpu_errors_t	cpu_error_regs;
3695 
3696 	/*
3697 	 * Snapshot the AFSR and AFAR and clear any errors
3698 	 */
3699 	get_cpu_error_state(&cpu_error_regs);
3700 	set_cpu_error_state(&cpu_error_regs);
3701 
3702 	/*
3703 	 * If any of the same memory access error bits are still on and
3704 	 * the AFAR matches, return that the error is persistent.
3705 	 */
3706 	return ((cpu_error_regs.afsr & (C_AFSR_MEMORY & aflt->flt_stat)) != 0 &&
3707 	    cpu_error_regs.afar == aflt->flt_addr);
3708 }
3709 
3710 /*
3711  * Turn off all cpu error detection, normally only used for panics.
3712  */
3713 void
3714 cpu_disable_errors(void)
3715 {
3716 	xt_all(set_error_enable_tl1, EN_REG_DISABLE, EER_SET_ABSOLUTE);
3717 
3718 	/*
3719 	 * With error detection now turned off, check the other cpus
3720 	 * logout areas for any unlogged errors.
3721 	 */
3722 	if (enable_check_other_cpus_logout) {
3723 		cpu_check_other_cpus_logout();
3724 		/*
3725 		 * Make a second pass over the logout areas, in case
3726 		 * there is a failing CPU in an error-trap loop which
3727 		 * will write to the logout area once it is emptied.
3728 		 */
3729 		cpu_check_other_cpus_logout();
3730 	}
3731 }
3732 
3733 /*
3734  * Enable errors.
3735  */
3736 void
3737 cpu_enable_errors(void)
3738 {
3739 	xt_all(set_error_enable_tl1, EN_REG_ENABLE, EER_SET_ABSOLUTE);
3740 }
3741 
3742 /*
3743  * Flush the entire ecache using displacement flush by reading through a
3744  * physical address range twice as large as the Ecache.
3745  */
3746 void
3747 cpu_flush_ecache(void)
3748 {
3749 	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size,
3750 	    cpunodes[CPU->cpu_id].ecache_linesize);
3751 }
3752 
3753 /*
3754  * Return CPU E$ set size - E$ size divided by the associativity.
3755  * We use this function in places where the CPU_PRIVATE ptr may not be
3756  * initialized yet.  Note that for send_mondo and in the Ecache scrubber,
3757  * we're guaranteed that CPU_PRIVATE is initialized.  Also, cpunodes is set
3758  * up before the kernel switches from OBP's to the kernel's trap table, so
3759  * we don't have to worry about cpunodes being unitialized.
3760  */
3761 int
3762 cpu_ecache_set_size(struct cpu *cp)
3763 {
3764 	if (CPU_PRIVATE(cp))
3765 		return (CPU_PRIVATE_VAL(cp, chpr_ec_set_size));
3766 
3767 	return (cpunodes[cp->cpu_id].ecache_size / cpu_ecache_nway());
3768 }
3769 
3770 /*
3771  * Flush Ecache line.
3772  * Uses ASI_EC_DIAG for Cheetah+ and Jalapeno.
3773  * Uses normal displacement flush for Cheetah.
3774  */
3775 static void
3776 cpu_flush_ecache_line(ch_async_flt_t *ch_flt)
3777 {
3778 	struct async_flt *aflt = (struct async_flt *)ch_flt;
3779 	int ec_set_size = cpu_ecache_set_size(CPU);
3780 
3781 	ecache_flush_line(aflt->flt_addr, ec_set_size);
3782 }
3783 
3784 /*
3785  * Scrub physical address.
3786  * Scrub code is different depending upon whether this a Cheetah+ with 2-way
3787  * Ecache or direct-mapped Ecache.
3788  */
3789 static void
3790 cpu_scrubphys(struct async_flt *aflt)
3791 {
3792 	int ec_set_size = cpu_ecache_set_size(CPU);
3793 
3794 	scrubphys(aflt->flt_addr, ec_set_size);
3795 }
3796 
3797 /*
3798  * Clear physical address.
3799  * Scrub code is different depending upon whether this a Cheetah+ with 2-way
3800  * Ecache or direct-mapped Ecache.
3801  */
3802 void
3803 cpu_clearphys(struct async_flt *aflt)
3804 {
3805 	int lsize = cpunodes[CPU->cpu_id].ecache_linesize;
3806 	int ec_set_size = cpu_ecache_set_size(CPU);
3807 
3808 
3809 	clearphys(aflt->flt_addr, ec_set_size, lsize);
3810 }
3811 
3812 #if defined(CPU_IMP_ECACHE_ASSOC)
3813 /*
3814  * Check for a matching valid line in all the sets.
3815  * If found, return set# + 1. Otherwise return 0.
3816  */
3817 static int
3818 cpu_ecache_line_valid(ch_async_flt_t *ch_flt)
3819 {
3820 	struct async_flt *aflt = (struct async_flt *)ch_flt;
3821 	int totalsize = cpunodes[CPU->cpu_id].ecache_size;
3822 	int ec_set_size = cpu_ecache_set_size(CPU);
3823 	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
3824 	int nway = cpu_ecache_nway();
3825 	int i;
3826 
3827 	for (i = 0; i < nway; i++, ecp++) {
3828 		if (!cpu_ectag_line_invalid(totalsize, ecp->ec_tag) &&
3829 		    (aflt->flt_addr & P2ALIGN(C_AFAR_PA, ec_set_size)) ==
3830 		    cpu_ectag_to_pa(ec_set_size, ecp->ec_tag))
3831 			return (i+1);
3832 	}
3833 	return (0);
3834 }
3835 #endif /* CPU_IMP_ECACHE_ASSOC */
3836 
3837 /*
3838  * Check whether a line in the given logout info matches the specified
3839  * fault address.  If reqval is set then the line must not be Invalid.
3840  * Returns 0 on failure;  on success (way + 1) is returned an *level is
3841  * set to 2 for l2$ or 3 for l3$.
3842  */
3843 static int
3844 cpu_matching_ecache_line(uint64_t faddr, void *data, int reqval, int *level)
3845 {
3846 	ch_diag_data_t *cdp = data;
3847 	ch_ec_data_t *ecp;
3848 	int totalsize, ec_set_size;
3849 	int i, ways;
3850 	int match = 0;
3851 	int tagvalid;
3852 	uint64_t addr, tagpa;
3853 	int ispanther = IS_PANTHER(cpunodes[CPU->cpu_id].implementation);
3854 
3855 	/*
3856 	 * Check the l2$ logout data
3857 	 */
3858 	if (ispanther) {
3859 		ecp = &cdp->chd_l2_data[0];
3860 		ec_set_size = PN_L2_SET_SIZE;
3861 		ways = PN_L2_NWAYS;
3862 	} else {
3863 		ecp = &cdp->chd_ec_data[0];
3864 		ec_set_size = cpu_ecache_set_size(CPU);
3865 		ways = cpu_ecache_nway();
3866 		totalsize = cpunodes[CPU->cpu_id].ecache_size;
3867 	}
3868 	/* remove low order PA bits from fault address not used in PA tag */
3869 	addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size);
3870 	for (i = 0; i < ways; i++, ecp++) {
3871 		if (ispanther) {
3872 			tagpa = PN_L2TAG_TO_PA(ecp->ec_tag);
3873 			tagvalid = !PN_L2_LINE_INVALID(ecp->ec_tag);
3874 		} else {
3875 			tagpa = cpu_ectag_to_pa(ec_set_size, ecp->ec_tag);
3876 			tagvalid = !cpu_ectag_line_invalid(totalsize,
3877 			    ecp->ec_tag);
3878 		}
3879 		if (tagpa == addr && (!reqval || tagvalid)) {
3880 			match = i + 1;
3881 			*level = 2;
3882 			break;
3883 		}
3884 	}
3885 
3886 	if (match || !ispanther)
3887 		return (match);
3888 
3889 	/* For Panther we also check the l3$ */
3890 	ecp = &cdp->chd_ec_data[0];
3891 	ec_set_size = PN_L3_SET_SIZE;
3892 	ways = PN_L3_NWAYS;
3893 	addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size);
3894 
3895 	for (i = 0; i < ways; i++, ecp++) {
3896 		if (PN_L3TAG_TO_PA(ecp->ec_tag) == addr && (!reqval ||
3897 		    !PN_L3_LINE_INVALID(ecp->ec_tag))) {
3898 			match = i + 1;
3899 			*level = 3;
3900 			break;
3901 		}
3902 	}
3903 
3904 	return (match);
3905 }
3906 
3907 #if defined(CPU_IMP_L1_CACHE_PARITY)
3908 /*
3909  * Record information related to the source of an Dcache Parity Error.
3910  */
3911 static void
3912 cpu_dcache_parity_info(ch_async_flt_t *ch_flt)
3913 {
3914 	int dc_set_size = dcache_size / CH_DCACHE_NWAY;
3915 	int index;
3916 
3917 	/*
3918 	 * Since instruction decode cannot be done at high PIL
3919 	 * just examine the entire Dcache to locate the error.
3920 	 */
3921 	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
3922 		ch_flt->parity_data.dpe.cpl_way = -1;
3923 		ch_flt->parity_data.dpe.cpl_off = -1;
3924 	}
3925 	for (index = 0; index < dc_set_size; index += dcache_linesize)
3926 		cpu_dcache_parity_check(ch_flt, index);
3927 }
3928 
3929 /*
3930  * Check all ways of the Dcache at a specified index for good parity.
3931  */
3932 static void
3933 cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index)
3934 {
3935 	int dc_set_size = dcache_size / CH_DCACHE_NWAY;
3936 	uint64_t parity_bits, pbits, data_word;
3937 	static int parity_bits_popc[] = { 0, 1, 1, 0 };
3938 	int way, word, data_byte;
3939 	ch_dc_data_t *dcp = &ch_flt->parity_data.dpe.cpl_dc[0];
3940 	ch_dc_data_t tmp_dcp;
3941 
3942 	for (way = 0; way < CH_DCACHE_NWAY; way++, dcp++) {
3943 		/*
3944 		 * Perform diagnostic read.
3945 		 */
3946 		get_dcache_dtag(index + way * dc_set_size,
3947 		    (uint64_t *)&tmp_dcp);
3948 
3949 		/*
3950 		 * Check tag for even parity.
3951 		 * Sum of 1 bits (including parity bit) should be even.
3952 		 */
3953 		if (popc64(tmp_dcp.dc_tag & CHP_DCTAG_PARMASK) & 1) {
3954 			/*
3955 			 * If this is the first error log detailed information
3956 			 * about it and check the snoop tag. Otherwise just
3957 			 * record the fact that we found another error.
3958 			 */
3959 			if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
3960 				ch_flt->parity_data.dpe.cpl_way = way;
3961 				ch_flt->parity_data.dpe.cpl_cache =
3962 				    CPU_DC_PARITY;
3963 				ch_flt->parity_data.dpe.cpl_tag |= CHP_DC_TAG;
3964 
3965 				if (popc64(tmp_dcp.dc_sntag &
3966 				    CHP_DCSNTAG_PARMASK) & 1) {
3967 					ch_flt->parity_data.dpe.cpl_tag |=
3968 					    CHP_DC_SNTAG;
3969 					ch_flt->parity_data.dpe.cpl_lcnt++;
3970 				}
3971 
3972 				bcopy(&tmp_dcp, dcp, sizeof (ch_dc_data_t));
3973 			}
3974 
3975 			ch_flt->parity_data.dpe.cpl_lcnt++;
3976 		}
3977 
3978 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
3979 			/*
3980 			 * Panther has more parity bits than the other
3981 			 * processors for covering dcache data and so each
3982 			 * byte of data in each word has its own parity bit.
3983 			 */
3984 			parity_bits = tmp_dcp.dc_pn_data_parity;
3985 			for (word = 0; word < 4; word++) {
3986 				data_word = tmp_dcp.dc_data[word];
3987 				pbits = parity_bits & PN_DC_DATA_PARITY_MASK;
3988 				for (data_byte = 0; data_byte < 8;
3989 				    data_byte++) {
3990 					if (((popc64(data_word &
3991 					    PN_DC_DATA_PARITY_MASK)) & 1) ^
3992 					    (pbits & 1)) {
3993 						cpu_record_dc_data_parity(
3994 						    ch_flt, dcp, &tmp_dcp, way,
3995 						    word);
3996 					}
3997 					pbits >>= 1;
3998 					data_word >>= 8;
3999 				}
4000 				parity_bits >>= 8;
4001 			}
4002 		} else {
4003 			/*
4004 			 * Check data array for even parity.
4005 			 * The 8 parity bits are grouped into 4 pairs each
4006 			 * of which covers a 64-bit word.  The endianness is
4007 			 * reversed -- the low-order parity bits cover the
4008 			 * high-order data words.
4009 			 */
4010 			parity_bits = tmp_dcp.dc_utag >> 8;
4011 			for (word = 0; word < 4; word++) {
4012 				pbits = (parity_bits >> (6 - word * 2)) & 3;
4013 				if ((popc64(tmp_dcp.dc_data[word]) +
4014 				    parity_bits_popc[pbits]) & 1) {
4015 					cpu_record_dc_data_parity(ch_flt, dcp,
4016 					    &tmp_dcp, way, word);
4017 				}
4018 			}
4019 		}
4020 	}
4021 }
4022 
4023 static void
4024 cpu_record_dc_data_parity(ch_async_flt_t *ch_flt,
4025     ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word)
4026 {
4027 	/*
4028 	 * If this is the first error log detailed information about it.
4029 	 * Otherwise just record the fact that we found another error.
4030 	 */
4031 	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
4032 		ch_flt->parity_data.dpe.cpl_way = way;
4033 		ch_flt->parity_data.dpe.cpl_cache = CPU_DC_PARITY;
4034 		ch_flt->parity_data.dpe.cpl_off = word * 8;
4035 		bcopy(src_dcp, dest_dcp, sizeof (ch_dc_data_t));
4036 	}
4037 	ch_flt->parity_data.dpe.cpl_lcnt++;
4038 }
4039 
4040 /*
4041  * Record information related to the source of an Icache Parity Error.
4042  *
4043  * Called with the Icache disabled so any diagnostic accesses are safe.
4044  */
4045 static void
4046 cpu_icache_parity_info(ch_async_flt_t *ch_flt)
4047 {
4048 	int	ic_set_size;
4049 	int	ic_linesize;
4050 	int	index;
4051 
4052 	if (CPU_PRIVATE(CPU)) {
4053 		ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) /
4054 		    CH_ICACHE_NWAY;
4055 		ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize);
4056 	} else {
4057 		ic_set_size = icache_size / CH_ICACHE_NWAY;
4058 		ic_linesize = icache_linesize;
4059 	}
4060 
4061 	ch_flt->parity_data.ipe.cpl_way = -1;
4062 	ch_flt->parity_data.ipe.cpl_off = -1;
4063 
4064 	for (index = 0; index < ic_set_size; index += ic_linesize)
4065 		cpu_icache_parity_check(ch_flt, index);
4066 }
4067 
4068 /*
4069  * Check all ways of the Icache at a specified index for good parity.
4070  */
4071 static void
4072 cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index)
4073 {
4074 	uint64_t parmask, pn_inst_parity;
4075 	int ic_set_size;
4076 	int ic_linesize;
4077 	int flt_index, way, instr, num_instr;
4078 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4079 	ch_ic_data_t *icp = &ch_flt->parity_data.ipe.cpl_ic[0];
4080 	ch_ic_data_t tmp_icp;
4081 
4082 	if (CPU_PRIVATE(CPU)) {
4083 		ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) /
4084 		    CH_ICACHE_NWAY;
4085 		ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize);
4086 	} else {
4087 		ic_set_size = icache_size / CH_ICACHE_NWAY;
4088 		ic_linesize = icache_linesize;
4089 	}
4090 
4091 	/*
4092 	 * Panther has twice as many instructions per icache line and the
4093 	 * instruction parity bit is in a different location.
4094 	 */
4095 	if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
4096 		num_instr = PN_IC_DATA_REG_SIZE / sizeof (uint64_t);
4097 		pn_inst_parity = PN_ICDATA_PARITY_BIT_MASK;
4098 	} else {
4099 		num_instr = CH_IC_DATA_REG_SIZE / sizeof (uint64_t);
4100 		pn_inst_parity = 0;
4101 	}
4102 
4103 	/*
4104 	 * Index at which we expect to find the parity error.
4105 	 */
4106 	flt_index = P2ALIGN(aflt->flt_addr % ic_set_size, ic_linesize);
4107 
4108 	for (way = 0; way < CH_ICACHE_NWAY; way++, icp++) {
4109 		/*
4110 		 * Diagnostic reads expect address argument in ASI format.
4111 		 */
4112 		get_icache_dtag(2 * (index + way * ic_set_size),
4113 		    (uint64_t *)&tmp_icp);
4114 
4115 		/*
4116 		 * If this is the index in which we expect to find the
4117 		 * error log detailed information about each of the ways.
4118 		 * This information will be displayed later if we can't
4119 		 * determine the exact way in which the error is located.
4120 		 */
4121 		if (flt_index == index)
4122 			bcopy(&tmp_icp, icp, sizeof (ch_ic_data_t));
4123 
4124 		/*
4125 		 * Check tag for even parity.
4126 		 * Sum of 1 bits (including parity bit) should be even.
4127 		 */
4128 		if (popc64(tmp_icp.ic_patag & CHP_ICPATAG_PARMASK) & 1) {
4129 			/*
4130 			 * If this way is the one in which we expected
4131 			 * to find the error record the way and check the
4132 			 * snoop tag. Otherwise just record the fact we
4133 			 * found another error.
4134 			 */
4135 			if (flt_index == index) {
4136 				ch_flt->parity_data.ipe.cpl_way = way;
4137 				ch_flt->parity_data.ipe.cpl_tag |= CHP_IC_TAG;
4138 
4139 				if (popc64(tmp_icp.ic_sntag &
4140 				    CHP_ICSNTAG_PARMASK) & 1) {
4141 					ch_flt->parity_data.ipe.cpl_tag |=
4142 					    CHP_IC_SNTAG;
4143 					ch_flt->parity_data.ipe.cpl_lcnt++;
4144 				}
4145 
4146 			}
4147 			ch_flt->parity_data.ipe.cpl_lcnt++;
4148 			continue;
4149 		}
4150 
4151 		/*
4152 		 * Check instruction data for even parity.
4153 		 * Bits participating in parity differ for PC-relative
4154 		 * versus non-PC-relative instructions.
4155 		 */
4156 		for (instr = 0; instr < num_instr; instr++) {
4157 			parmask = (tmp_icp.ic_data[instr] &
4158 			    CH_ICDATA_PRED_ISPCREL) ?
4159 			    (CHP_ICDATA_PCREL_PARMASK | pn_inst_parity) :
4160 			    (CHP_ICDATA_NPCREL_PARMASK | pn_inst_parity);
4161 			if (popc64(tmp_icp.ic_data[instr] & parmask) & 1) {
4162 				/*
4163 				 * If this way is the one in which we expected
4164 				 * to find the error record the way and offset.
4165 				 * Otherwise just log the fact we found another
4166 				 * error.
4167 				 */
4168 				if (flt_index == index) {
4169 					ch_flt->parity_data.ipe.cpl_way = way;
4170 					ch_flt->parity_data.ipe.cpl_off =
4171 					    instr * 4;
4172 				}
4173 				ch_flt->parity_data.ipe.cpl_lcnt++;
4174 				continue;
4175 			}
4176 		}
4177 	}
4178 }
4179 
4180 /*
4181  * Record information related to the source of an Pcache Parity Error.
4182  */
4183 static void
4184 cpu_pcache_parity_info(ch_async_flt_t *ch_flt)
4185 {
4186 	int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY;
4187 	int index;
4188 
4189 	/*
4190 	 * Since instruction decode cannot be done at high PIL just
4191 	 * examine the entire Pcache to check for any parity errors.
4192 	 */
4193 	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
4194 		ch_flt->parity_data.dpe.cpl_way = -1;
4195 		ch_flt->parity_data.dpe.cpl_off = -1;
4196 	}
4197 	for (index = 0; index < pc_set_size; index += CH_PCACHE_LSIZE)
4198 		cpu_pcache_parity_check(ch_flt, index);
4199 }
4200 
4201 /*
4202  * Check all ways of the Pcache at a specified index for good parity.
4203  */
4204 static void
4205 cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index)
4206 {
4207 	int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY;
4208 	int pc_data_words = CH_PC_DATA_REG_SIZE / sizeof (uint64_t);
4209 	int way, word, pbit, parity_bits;
4210 	ch_pc_data_t *pcp = &ch_flt->parity_data.dpe.cpl_pc[0];
4211 	ch_pc_data_t tmp_pcp;
4212 
4213 	for (way = 0; way < CH_PCACHE_NWAY; way++, pcp++) {
4214 		/*
4215 		 * Perform diagnostic read.
4216 		 */
4217 		get_pcache_dtag(index + way * pc_set_size,
4218 		    (uint64_t *)&tmp_pcp);
4219 		/*
4220 		 * Check data array for odd parity. There are 8 parity
4221 		 * bits (bits 57:50 of ASI_PCACHE_STATUS_DATA) and each
4222 		 * of those bits covers exactly 8 bytes of the data
4223 		 * array:
4224 		 *
4225 		 *	parity bit	P$ data bytes covered
4226 		 *	----------	---------------------
4227 		 *	50		63:56
4228 		 *	51		55:48
4229 		 *	52		47:40
4230 		 *	53		39:32
4231 		 *	54		31:24
4232 		 *	55		23:16
4233 		 *	56		15:8
4234 		 *	57		7:0
4235 		 */
4236 		parity_bits = PN_PC_PARITY_BITS(tmp_pcp.pc_status);
4237 		for (word = 0; word < pc_data_words; word++) {
4238 			pbit = (parity_bits >> (pc_data_words - word - 1)) & 1;
4239 			if ((popc64(tmp_pcp.pc_data[word]) & 1) ^ pbit) {
4240 				/*
4241 				 * If this is the first error log detailed
4242 				 * information about it. Otherwise just record
4243 				 * the fact that we found another error.
4244 				 */
4245 				if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
4246 					ch_flt->parity_data.dpe.cpl_way = way;
4247 					ch_flt->parity_data.dpe.cpl_cache =
4248 					    CPU_PC_PARITY;
4249 					ch_flt->parity_data.dpe.cpl_off =
4250 					    word * sizeof (uint64_t);
4251 					bcopy(&tmp_pcp, pcp,
4252 					    sizeof (ch_pc_data_t));
4253 				}
4254 				ch_flt->parity_data.dpe.cpl_lcnt++;
4255 			}
4256 		}
4257 	}
4258 }
4259 
4260 
4261 /*
4262  * Add L1 Data cache data to the ereport payload.
4263  */
4264 static void
4265 cpu_payload_add_dcache(struct async_flt *aflt, nvlist_t *nvl)
4266 {
4267 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4268 	ch_dc_data_t *dcp;
4269 	ch_dc_data_t dcdata[CH_DCACHE_NWAY];
4270 	uint_t nelem;
4271 	int i, ways_to_check, ways_logged = 0;
4272 
4273 	/*
4274 	 * If this is an D$ fault then there may be multiple
4275 	 * ways captured in the ch_parity_log_t structure.
4276 	 * Otherwise, there will be at most one way captured
4277 	 * in the ch_diag_data_t struct.
4278 	 * Check each way to see if it should be encoded.
4279 	 */
4280 	if (ch_flt->flt_type == CPU_DC_PARITY)
4281 		ways_to_check = CH_DCACHE_NWAY;
4282 	else
4283 		ways_to_check = 1;
4284 	for (i = 0; i < ways_to_check; i++) {
4285 		if (ch_flt->flt_type == CPU_DC_PARITY)
4286 			dcp = &ch_flt->parity_data.dpe.cpl_dc[i];
4287 		else
4288 			dcp = &ch_flt->flt_diag_data.chd_dc_data;
4289 		if (dcp->dc_logflag == DC_LOGFLAG_MAGIC) {
4290 			bcopy(dcp, &dcdata[ways_logged],
4291 			    sizeof (ch_dc_data_t));
4292 			ways_logged++;
4293 		}
4294 	}
4295 
4296 	/*
4297 	 * Add the dcache data to the payload.
4298 	 */
4299 	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_WAYS,
4300 	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4301 	if (ways_logged != 0) {
4302 		nelem = sizeof (ch_dc_data_t) / sizeof (uint64_t) * ways_logged;
4303 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_DATA,
4304 		    DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)dcdata, NULL);
4305 	}
4306 }
4307 
4308 /*
4309  * Add L1 Instruction cache data to the ereport payload.
4310  */
4311 static void
4312 cpu_payload_add_icache(struct async_flt *aflt, nvlist_t *nvl)
4313 {
4314 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4315 	ch_ic_data_t *icp;
4316 	ch_ic_data_t icdata[CH_ICACHE_NWAY];
4317 	uint_t nelem;
4318 	int i, ways_to_check, ways_logged = 0;
4319 
4320 	/*
4321 	 * If this is an I$ fault then there may be multiple
4322 	 * ways captured in the ch_parity_log_t structure.
4323 	 * Otherwise, there will be at most one way captured
4324 	 * in the ch_diag_data_t struct.
4325 	 * Check each way to see if it should be encoded.
4326 	 */
4327 	if (ch_flt->flt_type == CPU_IC_PARITY)
4328 		ways_to_check = CH_ICACHE_NWAY;
4329 	else
4330 		ways_to_check = 1;
4331 	for (i = 0; i < ways_to_check; i++) {
4332 		if (ch_flt->flt_type == CPU_IC_PARITY)
4333 			icp = &ch_flt->parity_data.ipe.cpl_ic[i];
4334 		else
4335 			icp = &ch_flt->flt_diag_data.chd_ic_data;
4336 		if (icp->ic_logflag == IC_LOGFLAG_MAGIC) {
4337 			bcopy(icp, &icdata[ways_logged],
4338 			    sizeof (ch_ic_data_t));
4339 			ways_logged++;
4340 		}
4341 	}
4342 
4343 	/*
4344 	 * Add the icache data to the payload.
4345 	 */
4346 	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_WAYS,
4347 	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4348 	if (ways_logged != 0) {
4349 		nelem = sizeof (ch_ic_data_t) / sizeof (uint64_t) * ways_logged;
4350 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_DATA,
4351 		    DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)icdata, NULL);
4352 	}
4353 }
4354 
4355 #endif	/* CPU_IMP_L1_CACHE_PARITY */
4356 
4357 /*
4358  * Add ecache data to payload.
4359  */
4360 static void
4361 cpu_payload_add_ecache(struct async_flt *aflt, nvlist_t *nvl)
4362 {
4363 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4364 	ch_ec_data_t *ecp;
4365 	ch_ec_data_t ecdata[CHD_EC_DATA_SETS];
4366 	uint_t nelem;
4367 	int i, ways_logged = 0;
4368 
4369 	/*
4370 	 * Check each way to see if it should be encoded
4371 	 * and concatinate it into a temporary buffer.
4372 	 */
4373 	for (i = 0; i < CHD_EC_DATA_SETS; i++) {
4374 		ecp = &ch_flt->flt_diag_data.chd_ec_data[i];
4375 		if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) {
4376 			bcopy(ecp, &ecdata[ways_logged],
4377 			    sizeof (ch_ec_data_t));
4378 			ways_logged++;
4379 		}
4380 	}
4381 
4382 	/*
4383 	 * Panther CPUs have an additional level of cache and so
4384 	 * what we just collected was the L3 (ecache) and not the
4385 	 * L2 cache.
4386 	 */
4387 	if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) {
4388 		/*
4389 		 * Add the L3 (ecache) data to the payload.
4390 		 */
4391 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_WAYS,
4392 		    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4393 		if (ways_logged != 0) {
4394 			nelem = sizeof (ch_ec_data_t) /
4395 			    sizeof (uint64_t) * ways_logged;
4396 			fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_DATA,
4397 			    DATA_TYPE_UINT64_ARRAY, nelem,
4398 			    (uint64_t *)ecdata, NULL);
4399 		}
4400 
4401 		/*
4402 		 * Now collect the L2 cache.
4403 		 */
4404 		ways_logged = 0;
4405 		for (i = 0; i < PN_L2_NWAYS; i++) {
4406 			ecp = &ch_flt->flt_diag_data.chd_l2_data[i];
4407 			if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) {
4408 				bcopy(ecp, &ecdata[ways_logged],
4409 				    sizeof (ch_ec_data_t));
4410 				ways_logged++;
4411 			}
4412 		}
4413 	}
4414 
4415 	/*
4416 	 * Add the L2 cache data to the payload.
4417 	 */
4418 	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_WAYS,
4419 	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4420 	if (ways_logged != 0) {
4421 		nelem = sizeof (ch_ec_data_t) /
4422 		    sizeof (uint64_t) * ways_logged;
4423 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_DATA,
4424 		    DATA_TYPE_UINT64_ARRAY, nelem,  (uint64_t *)ecdata, NULL);
4425 	}
4426 }
4427 
4428 /*
4429  * Initialize cpu scheme for specified cpu.
4430  */
4431 static void
4432 cpu_fmri_cpu_set(nvlist_t *cpu_fmri, int cpuid)
4433 {
4434 	char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */
4435 	uint8_t mask;
4436 
4437 	mask = cpunodes[cpuid].version;
4438 	(void) snprintf(sbuf, sizeof (sbuf), "%llX",
4439 	    (u_longlong_t)cpunodes[cpuid].device_id);
4440 	(void) fm_fmri_cpu_set(cpu_fmri, FM_CPU_SCHEME_VERSION, NULL,
4441 	    cpuid, &mask, (const char *)sbuf);
4442 }
4443 
4444 /*
4445  * Returns ereport resource type.
4446  */
4447 static int
4448 cpu_error_to_resource_type(struct async_flt *aflt)
4449 {
4450 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4451 
4452 	switch (ch_flt->flt_type) {
4453 
4454 	case CPU_CE_ECACHE:
4455 	case CPU_UE_ECACHE:
4456 	case CPU_UE_ECACHE_RETIRE:
4457 	case CPU_ORPH:
4458 		/*
4459 		 * If AFSR error bit indicates L2$ Data for Cheetah,
4460 		 * Cheetah+ or Jaguar, or L3$ Data for Panther, return
4461 		 * E$ Data type, otherwise, return CPU type.
4462 		 */
4463 		if (cpu_error_is_ecache_data(aflt->flt_inst,
4464 		    ch_flt->flt_bit))
4465 			return (ERRTYPE_ECACHE_DATA);
4466 		return (ERRTYPE_CPU);
4467 
4468 	case CPU_CE:
4469 	case CPU_UE:
4470 	case CPU_EMC:
4471 	case CPU_DUE:
4472 	case CPU_RCE:
4473 	case CPU_RUE:
4474 	case CPU_FRC:
4475 	case CPU_FRU:
4476 		return (ERRTYPE_MEMORY);
4477 
4478 	case CPU_IC_PARITY:
4479 	case CPU_DC_PARITY:
4480 	case CPU_FPUERR:
4481 	case CPU_PC_PARITY:
4482 	case CPU_ITLB_PARITY:
4483 	case CPU_DTLB_PARITY:
4484 		return (ERRTYPE_CPU);
4485 	}
4486 	return (ERRTYPE_UNKNOWN);
4487 }
4488 
4489 /*
4490  * Encode the data saved in the ch_async_flt_t struct into
4491  * the FM ereport payload.
4492  */
4493 static void
4494 cpu_payload_add_aflt(struct async_flt *aflt, nvlist_t *payload,
4495 	nvlist_t *resource, int *afar_status, int *synd_status)
4496 {
4497 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4498 	*synd_status = AFLT_STAT_INVALID;
4499 	*afar_status = AFLT_STAT_INVALID;
4500 
4501 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR) {
4502 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR,
4503 		    DATA_TYPE_UINT64, aflt->flt_stat, NULL);
4504 	}
4505 
4506 	if ((aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR_EXT) &&
4507 	    IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) {
4508 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR_EXT,
4509 		    DATA_TYPE_UINT64, ch_flt->afsr_ext, NULL);
4510 	}
4511 
4512 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR_STATUS) {
4513 		*afar_status = afsr_to_afar_status(ch_flt->afsr_errs,
4514 		    ch_flt->flt_bit);
4515 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR_STATUS,
4516 		    DATA_TYPE_UINT8, (uint8_t)*afar_status, NULL);
4517 	}
4518 
4519 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR) {
4520 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR,
4521 		    DATA_TYPE_UINT64, aflt->flt_addr, NULL);
4522 	}
4523 
4524 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PC) {
4525 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PC,
4526 		    DATA_TYPE_UINT64, (uint64_t)aflt->flt_pc, NULL);
4527 	}
4528 
4529 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TL) {
4530 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TL,
4531 		    DATA_TYPE_UINT8, (uint8_t)aflt->flt_tl, NULL);
4532 	}
4533 
4534 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TT) {
4535 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TT,
4536 		    DATA_TYPE_UINT8, flt_to_trap_type(aflt), NULL);
4537 	}
4538 
4539 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PRIV) {
4540 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PRIV,
4541 		    DATA_TYPE_BOOLEAN_VALUE,
4542 		    (aflt->flt_priv ? B_TRUE : B_FALSE), NULL);
4543 	}
4544 
4545 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ME) {
4546 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ME,
4547 		    DATA_TYPE_BOOLEAN_VALUE,
4548 		    (aflt->flt_stat & C_AFSR_ME) ? B_TRUE : B_FALSE, NULL);
4549 	}
4550 
4551 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND_STATUS) {
4552 		*synd_status = afsr_to_synd_status(aflt->flt_inst,
4553 		    ch_flt->afsr_errs, ch_flt->flt_bit);
4554 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND_STATUS,
4555 		    DATA_TYPE_UINT8, (uint8_t)*synd_status, NULL);
4556 	}
4557 
4558 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND) {
4559 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND,
4560 		    DATA_TYPE_UINT16, (uint16_t)aflt->flt_synd, NULL);
4561 	}
4562 
4563 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_TYPE) {
4564 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_TYPE,
4565 		    DATA_TYPE_STRING, flt_to_error_type(aflt), NULL);
4566 	}
4567 
4568 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_DISP) {
4569 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_DISP,
4570 		    DATA_TYPE_UINT64, aflt->flt_disp, NULL);
4571 	}
4572 
4573 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L2)
4574 		cpu_payload_add_ecache(aflt, payload);
4575 
4576 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_COPYFUNCTION) {
4577 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_COPYFUNCTION,
4578 		    DATA_TYPE_UINT8, (uint8_t)aflt->flt_status & 0xff, NULL);
4579 	}
4580 
4581 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_HOWDETECTED) {
4582 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_HOWDETECTED,
4583 		    DATA_TYPE_UINT8, (uint8_t)(aflt->flt_status >> 8), NULL);
4584 	}
4585 
4586 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_INSTRBLOCK) {
4587 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_INSTRBLOCK,
4588 		    DATA_TYPE_UINT32_ARRAY, 16,
4589 		    (uint32_t *)&ch_flt->flt_fpdata, NULL);
4590 	}
4591 
4592 #if defined(CPU_IMP_L1_CACHE_PARITY)
4593 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1D)
4594 		cpu_payload_add_dcache(aflt, payload);
4595 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1I)
4596 		cpu_payload_add_icache(aflt, payload);
4597 #endif	/* CPU_IMP_L1_CACHE_PARITY */
4598 
4599 #if defined(CHEETAH_PLUS)
4600 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1P)
4601 		cpu_payload_add_pcache(aflt, payload);
4602 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_TLB)
4603 		cpu_payload_add_tlb(aflt, payload);
4604 #endif	/* CHEETAH_PLUS */
4605 	/*
4606 	 * Create the FMRI that goes into the payload
4607 	 * and contains the unum info if necessary.
4608 	 */
4609 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_RESOURCE) {
4610 		char unum[UNUM_NAMLEN] = "";
4611 		char sid[DIMM_SERIAL_ID_LEN] = "";
4612 		int len, ret, rtype, synd_code;
4613 		uint64_t offset = (uint64_t)-1;
4614 
4615 		rtype = cpu_error_to_resource_type(aflt);
4616 		switch (rtype) {
4617 
4618 		case ERRTYPE_MEMORY:
4619 		case ERRTYPE_ECACHE_DATA:
4620 
4621 			/*
4622 			 * Memory errors, do unum lookup
4623 			 */
4624 			if (*afar_status == AFLT_STAT_INVALID)
4625 				break;
4626 
4627 			if (rtype == ERRTYPE_ECACHE_DATA)
4628 				aflt->flt_status |= ECC_ECACHE;
4629 			else
4630 				aflt->flt_status &= ~ECC_ECACHE;
4631 
4632 			synd_code = synd_to_synd_code(*synd_status,
4633 			    aflt->flt_synd, ch_flt->flt_bit);
4634 
4635 			if (cpu_get_mem_unum_synd(synd_code, aflt, unum) != 0)
4636 				break;
4637 
4638 			ret = cpu_get_mem_sid(unum, sid, DIMM_SERIAL_ID_LEN,
4639 			    &len);
4640 
4641 			if (ret == 0) {
4642 				(void) cpu_get_mem_offset(aflt->flt_addr,
4643 				    &offset);
4644 			}
4645 
4646 			fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION,
4647 			    NULL, unum, (ret == 0) ? sid : NULL, offset);
4648 			fm_payload_set(payload,
4649 			    FM_EREPORT_PAYLOAD_NAME_RESOURCE,
4650 			    DATA_TYPE_NVLIST, resource, NULL);
4651 			break;
4652 
4653 		case ERRTYPE_CPU:
4654 			/*
4655 			 * On-board processor array error, add cpu resource.
4656 			 */
4657 			cpu_fmri_cpu_set(resource, aflt->flt_inst);
4658 			fm_payload_set(payload,
4659 			    FM_EREPORT_PAYLOAD_NAME_RESOURCE,
4660 			    DATA_TYPE_NVLIST, resource, NULL);
4661 			break;
4662 		}
4663 	}
4664 }
4665 
4666 /*
4667  * Initialize the way info if necessary.
4668  */
4669 void
4670 cpu_ereport_init(struct async_flt *aflt)
4671 {
4672 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4673 	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
4674 	ch_ec_data_t *l2p = &ch_flt->flt_diag_data.chd_l2_data[0];
4675 	int i;
4676 
4677 	/*
4678 	 * Initialize the info in the CPU logout structure.
4679 	 * The I$/D$ way information is not initialized here
4680 	 * since it is captured in the logout assembly code.
4681 	 */
4682 	for (i = 0; i < CHD_EC_DATA_SETS; i++)
4683 		(ecp + i)->ec_way = i;
4684 
4685 	for (i = 0; i < PN_L2_NWAYS; i++)
4686 		(l2p + i)->ec_way = i;
4687 }
4688 
4689 /*
4690  * Returns whether fault address is valid for this error bit and
4691  * whether the address is "in memory" (i.e. pf_is_memory returns 1).
4692  */
4693 int
4694 cpu_flt_in_memory(ch_async_flt_t *ch_flt, uint64_t t_afsr_bit)
4695 {
4696 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4697 
4698 	return ((t_afsr_bit & C_AFSR_MEMORY) &&
4699 	    afsr_to_afar_status(ch_flt->afsr_errs, t_afsr_bit) ==
4700 	    AFLT_STAT_VALID &&
4701 	    pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT));
4702 }
4703 
4704 /*
4705  * Returns whether fault address is valid based on the error bit for the
4706  * one event being queued and whether the address is "in memory".
4707  */
4708 static int
4709 cpu_flt_in_memory_one_event(ch_async_flt_t *ch_flt, uint64_t t_afsr_bit)
4710 {
4711 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4712 	int afar_status;
4713 	uint64_t afsr_errs, afsr_ow, *ow_bits;
4714 
4715 	if (!(t_afsr_bit & C_AFSR_MEMORY) ||
4716 	    !pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT))
4717 		return (0);
4718 
4719 	afsr_errs = ch_flt->afsr_errs;
4720 	afar_status = afsr_to_afar_status(afsr_errs, t_afsr_bit);
4721 
4722 	switch (afar_status) {
4723 	case AFLT_STAT_VALID:
4724 		return (1);
4725 
4726 	case AFLT_STAT_AMBIGUOUS:
4727 		/*
4728 		 * Status is ambiguous since another error bit (or bits)
4729 		 * of equal priority to the specified bit on in the afsr,
4730 		 * so check those bits. Return 1 only if the bits on in the
4731 		 * same class as the t_afsr_bit are also C_AFSR_MEMORY bits.
4732 		 * Otherwise not all the equal priority bits are for memory
4733 		 * errors, so return 0.
4734 		 */
4735 		ow_bits = afar_overwrite;
4736 		while ((afsr_ow = *ow_bits++) != 0) {
4737 			/*
4738 			 * Get other bits that are on in t_afsr_bit's priority
4739 			 * class to check for Memory Error bits only.
4740 			 */
4741 			if (afsr_ow & t_afsr_bit) {
4742 				if ((afsr_errs & afsr_ow) & ~C_AFSR_MEMORY)
4743 					return (0);
4744 				else
4745 					return (1);
4746 			}
4747 		}
4748 		/*FALLTHRU*/
4749 
4750 	default:
4751 		return (0);
4752 	}
4753 }
4754 
4755 static void
4756 cpu_log_diag_info(ch_async_flt_t *ch_flt)
4757 {
4758 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4759 	ch_dc_data_t *dcp = &ch_flt->flt_diag_data.chd_dc_data;
4760 	ch_ic_data_t *icp = &ch_flt->flt_diag_data.chd_ic_data;
4761 	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
4762 #if defined(CPU_IMP_ECACHE_ASSOC)
4763 	int i, nway;
4764 #endif /* CPU_IMP_ECACHE_ASSOC */
4765 
4766 	/*
4767 	 * Check if the CPU log out captured was valid.
4768 	 */
4769 	if (ch_flt->flt_diag_data.chd_afar == LOGOUT_INVALID ||
4770 	    ch_flt->flt_data_incomplete)
4771 		return;
4772 
4773 #if defined(CPU_IMP_ECACHE_ASSOC)
4774 	nway = cpu_ecache_nway();
4775 	i =  cpu_ecache_line_valid(ch_flt);
4776 	if (i == 0 || i > nway) {
4777 		for (i = 0; i < nway; i++)
4778 			ecp[i].ec_logflag = EC_LOGFLAG_MAGIC;
4779 	} else
4780 		ecp[i - 1].ec_logflag = EC_LOGFLAG_MAGIC;
4781 #else /* CPU_IMP_ECACHE_ASSOC */
4782 	ecp->ec_logflag = EC_LOGFLAG_MAGIC;
4783 #endif /* CPU_IMP_ECACHE_ASSOC */
4784 
4785 #if defined(CHEETAH_PLUS)
4786 	pn_cpu_log_diag_l2_info(ch_flt);
4787 #endif /* CHEETAH_PLUS */
4788 
4789 	if (CH_DCTAG_MATCH(dcp->dc_tag, aflt->flt_addr)) {
4790 		dcp->dc_way = CH_DCIDX_TO_WAY(dcp->dc_idx);
4791 		dcp->dc_logflag = DC_LOGFLAG_MAGIC;
4792 	}
4793 
4794 	if (CH_ICTAG_MATCH(icp, aflt->flt_addr)) {
4795 		if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
4796 			icp->ic_way = PN_ICIDX_TO_WAY(icp->ic_idx);
4797 		else
4798 			icp->ic_way = CH_ICIDX_TO_WAY(icp->ic_idx);
4799 		icp->ic_logflag = IC_LOGFLAG_MAGIC;
4800 	}
4801 }
4802 
4803 /*
4804  * Cheetah ECC calculation.
4805  *
4806  * We only need to do the calculation on the data bits and can ignore check
4807  * bit and Mtag bit terms in the calculation.
4808  */
4809 static uint64_t ch_ecc_table[9][2] = {
4810 	/*
4811 	 * low order 64-bits   high-order 64-bits
4812 	 */
4813 	{ 0x46bffffeccd1177f, 0x488800022100014c },
4814 	{ 0x42fccc81331ff77f, 0x14424f1010249184 },
4815 	{ 0x8898827c222f1ffe, 0x22c1222808184aaf },
4816 	{ 0xf7632203e131ccf1, 0xe1241121848292b8 },
4817 	{ 0x7f5511421b113809, 0x901c88d84288aafe },
4818 	{ 0x1d49412184882487, 0x8f338c87c044c6ef },
4819 	{ 0xf552181014448344, 0x7ff8f4443e411911 },
4820 	{ 0x2189240808f24228, 0xfeeff8cc81333f42 },
4821 	{ 0x3280008440001112, 0xfee88b337ffffd62 },
4822 };
4823 
4824 /*
4825  * 64-bit population count, use well-known popcnt trick.
4826  * We could use the UltraSPARC V9 POPC instruction, but some
4827  * CPUs including Cheetahplus and Jaguar do not support that
4828  * instruction.
4829  */
4830 int
4831 popc64(uint64_t val)
4832 {
4833 	int cnt;
4834 
4835 	for (cnt = 0; val != 0; val &= val - 1)
4836 		cnt++;
4837 	return (cnt);
4838 }
4839 
4840 /*
4841  * Generate the 9 ECC bits for the 128-bit chunk based on the table above.
4842  * Note that xor'ing an odd number of 1 bits == 1 and xor'ing an even number
4843  * of 1 bits == 0, so we can just use the least significant bit of the popcnt
4844  * instead of doing all the xor's.
4845  */
4846 uint32_t
4847 us3_gen_ecc(uint64_t data_low, uint64_t data_high)
4848 {
4849 	int bitno, s;
4850 	int synd = 0;
4851 
4852 	for (bitno = 0; bitno < 9; bitno++) {
4853 		s = (popc64(data_low & ch_ecc_table[bitno][0]) +
4854 		    popc64(data_high & ch_ecc_table[bitno][1])) & 1;
4855 		synd |= (s << bitno);
4856 	}
4857 	return (synd);
4858 
4859 }
4860 
4861 /*
4862  * Queue one event based on ecc_type_to_info entry.  If the event has an AFT1
4863  * tag associated with it or is a fatal event (aflt_panic set), it is sent to
4864  * the UE event queue.  Otherwise it is dispatched to the CE event queue.
4865  */
4866 static void
4867 cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason,
4868     ecc_type_to_info_t *eccp, ch_diag_data_t *cdp)
4869 {
4870 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4871 
4872 	if (reason &&
4873 	    strlen(reason) + strlen(eccp->ec_reason) < MAX_REASON_STRING) {
4874 		(void) strcat(reason, eccp->ec_reason);
4875 	}
4876 
4877 	ch_flt->flt_bit = eccp->ec_afsr_bit;
4878 	ch_flt->flt_type = eccp->ec_flt_type;
4879 	if (cdp != NULL && cdp->chd_afar != LOGOUT_INVALID)
4880 		ch_flt->flt_diag_data = *cdp;
4881 	else
4882 		ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID;
4883 	aflt->flt_in_memory =
4884 	    cpu_flt_in_memory_one_event(ch_flt, ch_flt->flt_bit);
4885 
4886 	if (ch_flt->flt_bit & C_AFSR_MSYND_ERRS)
4887 		aflt->flt_synd = GET_M_SYND(aflt->flt_stat);
4888 	else if (ch_flt->flt_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS))
4889 		aflt->flt_synd = GET_E_SYND(aflt->flt_stat);
4890 	else
4891 		aflt->flt_synd = 0;
4892 
4893 	aflt->flt_payload = eccp->ec_err_payload;
4894 
4895 	if (aflt->flt_panic || (eccp->ec_afsr_bit &
4896 	    (C_AFSR_LEVEL1 | C_AFSR_EXT_LEVEL1)))
4897 		cpu_errorq_dispatch(eccp->ec_err_class,
4898 		    (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue,
4899 		    aflt->flt_panic);
4900 	else
4901 		cpu_errorq_dispatch(eccp->ec_err_class,
4902 		    (void *)ch_flt, sizeof (ch_async_flt_t), ce_queue,
4903 		    aflt->flt_panic);
4904 }
4905 
4906 /*
4907  * Queue events on async event queue one event per error bit.  First we
4908  * queue the events that we "expect" for the given trap, then we queue events
4909  * that we may not expect.  Return number of events queued.
4910  */
4911 int
4912 cpu_queue_events(ch_async_flt_t *ch_flt, char *reason, uint64_t t_afsr_errs,
4913     ch_cpu_logout_t *clop)
4914 {
4915 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4916 	ecc_type_to_info_t *eccp;
4917 	int nevents = 0;
4918 	uint64_t primary_afar = aflt->flt_addr, primary_afsr = aflt->flt_stat;
4919 #if defined(CHEETAH_PLUS)
4920 	uint64_t orig_t_afsr_errs;
4921 #endif
4922 	uint64_t primary_afsr_ext = ch_flt->afsr_ext;
4923 	uint64_t primary_afsr_errs = ch_flt->afsr_errs;
4924 	ch_diag_data_t *cdp = NULL;
4925 
4926 	t_afsr_errs &= ((C_AFSR_ALL_ERRS & ~C_AFSR_ME) | C_AFSR_EXT_ALL_ERRS);
4927 
4928 #if defined(CHEETAH_PLUS)
4929 	orig_t_afsr_errs = t_afsr_errs;
4930 
4931 	/*
4932 	 * For Cheetah+, log the shadow AFSR/AFAR bits first.
4933 	 */
4934 	if (clop != NULL) {
4935 		/*
4936 		 * Set the AFSR and AFAR fields to the shadow registers.  The
4937 		 * flt_addr and flt_stat fields will be reset to the primaries
4938 		 * below, but the sdw_addr and sdw_stat will stay as the
4939 		 * secondaries.
4940 		 */
4941 		cdp = &clop->clo_sdw_data;
4942 		aflt->flt_addr = ch_flt->flt_sdw_afar = cdp->chd_afar;
4943 		aflt->flt_stat = ch_flt->flt_sdw_afsr = cdp->chd_afsr;
4944 		ch_flt->afsr_ext = ch_flt->flt_sdw_afsr_ext = cdp->chd_afsr_ext;
4945 		ch_flt->afsr_errs = (cdp->chd_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
4946 		    (cdp->chd_afsr & C_AFSR_ALL_ERRS);
4947 
4948 		/*
4949 		 * If the primary and shadow AFSR differ, tag the shadow as
4950 		 * the first fault.
4951 		 */
4952 		if ((primary_afar != cdp->chd_afar) ||
4953 		    (primary_afsr_errs != ch_flt->afsr_errs)) {
4954 			aflt->flt_stat |= (1ull << C_AFSR_FIRSTFLT_SHIFT);
4955 		}
4956 
4957 		/*
4958 		 * Check AFSR bits as well as AFSR_EXT bits in order of
4959 		 * the AFAR overwrite priority. Our stored AFSR_EXT value
4960 		 * is expected to be zero for those CPUs which do not have
4961 		 * an AFSR_EXT register.
4962 		 */
4963 		for (eccp = ecc_type_to_info; eccp->ec_desc != NULL; eccp++) {
4964 			if ((eccp->ec_afsr_bit &
4965 			    (ch_flt->afsr_errs & t_afsr_errs)) &&
4966 			    ((eccp->ec_flags & aflt->flt_status) != 0)) {
4967 				cpu_queue_one_event(ch_flt, reason, eccp, cdp);
4968 				cdp = NULL;
4969 				t_afsr_errs &= ~eccp->ec_afsr_bit;
4970 				nevents++;
4971 			}
4972 		}
4973 
4974 		/*
4975 		 * If the ME bit is on in the primary AFSR turn all the
4976 		 * error bits on again that may set the ME bit to make
4977 		 * sure we see the ME AFSR error logs.
4978 		 */
4979 		if ((primary_afsr & C_AFSR_ME) != 0)
4980 			t_afsr_errs = (orig_t_afsr_errs & C_AFSR_ALL_ME_ERRS);
4981 	}
4982 #endif	/* CHEETAH_PLUS */
4983 
4984 	if (clop != NULL)
4985 		cdp = &clop->clo_data;
4986 
4987 	/*
4988 	 * Queue expected errors, error bit and fault type must match
4989 	 * in the ecc_type_to_info table.
4990 	 */
4991 	for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL;
4992 	    eccp++) {
4993 		if ((eccp->ec_afsr_bit & t_afsr_errs) != 0 &&
4994 		    (eccp->ec_flags & aflt->flt_status) != 0) {
4995 #if defined(SERRANO)
4996 			/*
4997 			 * For FRC/FRU errors on Serrano the afar2 captures
4998 			 * the address and the associated data is
4999 			 * in the shadow logout area.
5000 			 */
5001 			if (eccp->ec_afsr_bit  & (C_AFSR_FRC | C_AFSR_FRU)) {
5002 				if (clop != NULL)
5003 					cdp = &clop->clo_sdw_data;
5004 				aflt->flt_addr = ch_flt->afar2;
5005 			} else {
5006 				if (clop != NULL)
5007 					cdp = &clop->clo_data;
5008 				aflt->flt_addr = primary_afar;
5009 			}
5010 #else	/* SERRANO */
5011 			aflt->flt_addr = primary_afar;
5012 #endif	/* SERRANO */
5013 			aflt->flt_stat = primary_afsr;
5014 			ch_flt->afsr_ext = primary_afsr_ext;
5015 			ch_flt->afsr_errs = primary_afsr_errs;
5016 			cpu_queue_one_event(ch_flt, reason, eccp, cdp);
5017 			cdp = NULL;
5018 			t_afsr_errs &= ~eccp->ec_afsr_bit;
5019 			nevents++;
5020 		}
5021 	}
5022 
5023 	/*
5024 	 * Queue unexpected errors, error bit only match.
5025 	 */
5026 	for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL;
5027 	    eccp++) {
5028 		if (eccp->ec_afsr_bit & t_afsr_errs) {
5029 #if defined(SERRANO)
5030 			/*
5031 			 * For FRC/FRU errors on Serrano the afar2 captures
5032 			 * the address and the associated data is
5033 			 * in the shadow logout area.
5034 			 */
5035 			if (eccp->ec_afsr_bit  & (C_AFSR_FRC | C_AFSR_FRU)) {
5036 				if (clop != NULL)
5037 					cdp = &clop->clo_sdw_data;
5038 				aflt->flt_addr = ch_flt->afar2;
5039 			} else {
5040 				if (clop != NULL)
5041 					cdp = &clop->clo_data;
5042 				aflt->flt_addr = primary_afar;
5043 			}
5044 #else	/* SERRANO */
5045 			aflt->flt_addr = primary_afar;
5046 #endif	/* SERRANO */
5047 			aflt->flt_stat = primary_afsr;
5048 			ch_flt->afsr_ext = primary_afsr_ext;
5049 			ch_flt->afsr_errs = primary_afsr_errs;
5050 			cpu_queue_one_event(ch_flt, reason, eccp, cdp);
5051 			cdp = NULL;
5052 			t_afsr_errs &= ~eccp->ec_afsr_bit;
5053 			nevents++;
5054 		}
5055 	}
5056 	return (nevents);
5057 }
5058 
5059 /*
5060  * Return trap type number.
5061  */
5062 uint8_t
5063 flt_to_trap_type(struct async_flt *aflt)
5064 {
5065 	if (aflt->flt_status & ECC_I_TRAP)
5066 		return (TRAP_TYPE_ECC_I);
5067 	if (aflt->flt_status & ECC_D_TRAP)
5068 		return (TRAP_TYPE_ECC_D);
5069 	if (aflt->flt_status & ECC_F_TRAP)
5070 		return (TRAP_TYPE_ECC_F);
5071 	if (aflt->flt_status & ECC_C_TRAP)
5072 		return (TRAP_TYPE_ECC_C);
5073 	if (aflt->flt_status & ECC_DP_TRAP)
5074 		return (TRAP_TYPE_ECC_DP);
5075 	if (aflt->flt_status & ECC_IP_TRAP)
5076 		return (TRAP_TYPE_ECC_IP);
5077 	if (aflt->flt_status & ECC_ITLB_TRAP)
5078 		return (TRAP_TYPE_ECC_ITLB);
5079 	if (aflt->flt_status & ECC_DTLB_TRAP)
5080 		return (TRAP_TYPE_ECC_DTLB);
5081 	return (TRAP_TYPE_UNKNOWN);
5082 }
5083 
5084 /*
5085  * Decide an error type based on detector and leaky/partner tests.
5086  * The following array is used for quick translation - it must
5087  * stay in sync with ce_dispact_t.
5088  */
5089 
5090 static char *cetypes[] = {
5091 	CE_DISP_DESC_U,
5092 	CE_DISP_DESC_I,
5093 	CE_DISP_DESC_PP,
5094 	CE_DISP_DESC_P,
5095 	CE_DISP_DESC_L,
5096 	CE_DISP_DESC_PS,
5097 	CE_DISP_DESC_S
5098 };
5099 
5100 char *
5101 flt_to_error_type(struct async_flt *aflt)
5102 {
5103 	ce_dispact_t dispact, disp;
5104 	uchar_t dtcrinfo, ptnrinfo, lkyinfo;
5105 
5106 	/*
5107 	 * The memory payload bundle is shared by some events that do
5108 	 * not perform any classification.  For those flt_disp will be
5109 	 * 0 and we will return "unknown".
5110 	 */
5111 	if (!ce_disp_inited || !aflt->flt_in_memory || aflt->flt_disp == 0)
5112 		return (cetypes[CE_DISP_UNKNOWN]);
5113 
5114 	dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp);
5115 
5116 	/*
5117 	 * It is also possible that no scrub/classification was performed
5118 	 * by the detector, for instance where a disrupting error logged
5119 	 * in the AFSR while CEEN was off in cpu_deferred_error.
5120 	 */
5121 	if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo))
5122 		return (cetypes[CE_DISP_UNKNOWN]);
5123 
5124 	/*
5125 	 * Lookup type in initial classification/action table
5126 	 */
5127 	dispact = CE_DISPACT(ce_disp_table,
5128 	    CE_XDIAG_AFARMATCHED(dtcrinfo),
5129 	    CE_XDIAG_STATE(dtcrinfo),
5130 	    CE_XDIAG_CE1SEEN(dtcrinfo),
5131 	    CE_XDIAG_CE2SEEN(dtcrinfo));
5132 
5133 	/*
5134 	 * A bad lookup is not something to panic production systems for.
5135 	 */
5136 	ASSERT(dispact != CE_DISP_BAD);
5137 	if (dispact == CE_DISP_BAD)
5138 		return (cetypes[CE_DISP_UNKNOWN]);
5139 
5140 	disp = CE_DISP(dispact);
5141 
5142 	switch (disp) {
5143 	case CE_DISP_UNKNOWN:
5144 	case CE_DISP_INTERMITTENT:
5145 		break;
5146 
5147 	case CE_DISP_POSS_PERS:
5148 		/*
5149 		 * "Possible persistent" errors to which we have applied a valid
5150 		 * leaky test can be separated into "persistent" or "leaky".
5151 		 */
5152 		lkyinfo = CE_XDIAG_LKYINFO(aflt->flt_disp);
5153 		if (CE_XDIAG_TESTVALID(lkyinfo)) {
5154 			if (CE_XDIAG_CE1SEEN(lkyinfo) ||
5155 			    CE_XDIAG_CE2SEEN(lkyinfo))
5156 				disp = CE_DISP_LEAKY;
5157 			else
5158 				disp = CE_DISP_PERS;
5159 		}
5160 		break;
5161 
5162 	case CE_DISP_POSS_STICKY:
5163 		/*
5164 		 * Promote "possible sticky" results that have been
5165 		 * confirmed by a partner test to "sticky".  Unconfirmed
5166 		 * "possible sticky" events are left at that status - we do not
5167 		 * guess at any bad reader/writer etc status here.
5168 		 */
5169 		ptnrinfo = CE_XDIAG_PTNRINFO(aflt->flt_disp);
5170 		if (CE_XDIAG_TESTVALID(ptnrinfo) &&
5171 		    CE_XDIAG_CE1SEEN(ptnrinfo) && CE_XDIAG_CE2SEEN(ptnrinfo))
5172 			disp = CE_DISP_STICKY;
5173 
5174 		/*
5175 		 * Promote "possible sticky" results on a uniprocessor
5176 		 * to "sticky"
5177 		 */
5178 		if (disp == CE_DISP_POSS_STICKY &&
5179 		    CE_XDIAG_SKIPCODE(disp) == CE_XDIAG_SKIP_UNIPROC)
5180 			disp = CE_DISP_STICKY;
5181 		break;
5182 
5183 	default:
5184 		disp = CE_DISP_UNKNOWN;
5185 		break;
5186 	}
5187 
5188 	return (cetypes[disp]);
5189 }
5190 
5191 /*
5192  * Given the entire afsr, the specific bit to check and a prioritized list of
5193  * error bits, determine the validity of the various overwrite priority
5194  * features of the AFSR/AFAR: AFAR, ESYND and MSYND, each of which have
5195  * different overwrite priorities.
5196  *
5197  * Given a specific afsr error bit and the entire afsr, there are three cases:
5198  *   INVALID:	The specified bit is lower overwrite priority than some other
5199  *		error bit which is on in the afsr (or IVU/IVC).
5200  *   VALID:	The specified bit is higher priority than all other error bits
5201  *		which are on in the afsr.
5202  *   AMBIGUOUS: Another error bit (or bits) of equal priority to the specified
5203  *		bit is on in the afsr.
5204  */
5205 int
5206 afsr_to_overw_status(uint64_t afsr, uint64_t afsr_bit, uint64_t *ow_bits)
5207 {
5208 	uint64_t afsr_ow;
5209 
5210 	while ((afsr_ow = *ow_bits++) != 0) {
5211 		/*
5212 		 * If bit is in the priority class, check to see if another
5213 		 * bit in the same class is on => ambiguous.  Otherwise,
5214 		 * the value is valid.  If the bit is not on at this priority
5215 		 * class, but a higher priority bit is on, then the value is
5216 		 * invalid.
5217 		 */
5218 		if (afsr_ow & afsr_bit) {
5219 			/*
5220 			 * If equal pri bit is on, ambiguous.
5221 			 */
5222 			if (afsr & (afsr_ow & ~afsr_bit))
5223 				return (AFLT_STAT_AMBIGUOUS);
5224 			return (AFLT_STAT_VALID);
5225 		} else if (afsr & afsr_ow)
5226 			break;
5227 	}
5228 
5229 	/*
5230 	 * We didn't find a match or a higher priority bit was on.  Not
5231 	 * finding a match handles the case of invalid AFAR for IVC, IVU.
5232 	 */
5233 	return (AFLT_STAT_INVALID);
5234 }
5235 
5236 static int
5237 afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit)
5238 {
5239 #if defined(SERRANO)
5240 	if (afsr_bit & (C_AFSR_FRC | C_AFSR_FRU))
5241 		return (afsr_to_overw_status(afsr, afsr_bit, afar2_overwrite));
5242 	else
5243 #endif	/* SERRANO */
5244 		return (afsr_to_overw_status(afsr, afsr_bit, afar_overwrite));
5245 }
5246 
5247 static int
5248 afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit)
5249 {
5250 	return (afsr_to_overw_status(afsr, afsr_bit, esynd_overwrite));
5251 }
5252 
5253 static int
5254 afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit)
5255 {
5256 	return (afsr_to_overw_status(afsr, afsr_bit, msynd_overwrite));
5257 }
5258 
5259 static int
5260 afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit)
5261 {
5262 #ifdef lint
5263 	cpuid = cpuid;
5264 #endif
5265 #if defined(CHEETAH_PLUS)
5266 	/*
5267 	 * The M_SYND overwrite policy is combined with the E_SYND overwrite
5268 	 * policy for Cheetah+ and separate for Panther CPUs.
5269 	 */
5270 	if (afsr_bit & C_AFSR_MSYND_ERRS) {
5271 		if (IS_PANTHER(cpunodes[cpuid].implementation))
5272 			return (afsr_to_msynd_status(afsr, afsr_bit));
5273 		else
5274 			return (afsr_to_esynd_status(afsr, afsr_bit));
5275 	} else if (afsr_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
5276 		if (IS_PANTHER(cpunodes[cpuid].implementation))
5277 			return (afsr_to_pn_esynd_status(afsr, afsr_bit));
5278 		else
5279 			return (afsr_to_esynd_status(afsr, afsr_bit));
5280 #else /* CHEETAH_PLUS */
5281 	if (afsr_bit & C_AFSR_MSYND_ERRS) {
5282 		return (afsr_to_msynd_status(afsr, afsr_bit));
5283 	} else if (afsr_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
5284 		return (afsr_to_esynd_status(afsr, afsr_bit));
5285 #endif /* CHEETAH_PLUS */
5286 	} else {
5287 		return (AFLT_STAT_INVALID);
5288 	}
5289 }
5290 
5291 /*
5292  * Slave CPU stick synchronization.
5293  */
5294 void
5295 sticksync_slave(void)
5296 {
5297 	int 		i;
5298 	int		tries = 0;
5299 	int64_t		tskew;
5300 	int64_t		av_tskew;
5301 
5302 	kpreempt_disable();
5303 	/* wait for the master side */
5304 	while (stick_sync_cmd != SLAVE_START)
5305 		;
5306 	/*
5307 	 * Synchronization should only take a few tries at most. But in the
5308 	 * odd case where the cpu isn't cooperating we'll keep trying. A cpu
5309 	 * without it's stick synchronized wouldn't be a good citizen.
5310 	 */
5311 	while (slave_done == 0) {
5312 		/*
5313 		 * Time skew calculation.
5314 		 */
5315 		av_tskew = tskew = 0;
5316 
5317 		for (i = 0; i < stick_iter; i++) {
5318 			/* make location hot */
5319 			timestamp[EV_A_START] = 0;
5320 			stick_timestamp(&timestamp[EV_A_START]);
5321 
5322 			/* tell the master we're ready */
5323 			stick_sync_cmd = MASTER_START;
5324 
5325 			/* and wait */
5326 			while (stick_sync_cmd != SLAVE_CONT)
5327 				;
5328 			/* Event B end */
5329 			stick_timestamp(&timestamp[EV_B_END]);
5330 
5331 			/* calculate time skew */
5332 			tskew = ((timestamp[EV_B_END] - timestamp[EV_B_START])
5333 			    - (timestamp[EV_A_END] - timestamp[EV_A_START]))
5334 			    / 2;
5335 
5336 			/* keep running count */
5337 			av_tskew += tskew;
5338 		} /* for */
5339 
5340 		/*
5341 		 * Adjust stick for time skew if not within the max allowed;
5342 		 * otherwise we're all done.
5343 		 */
5344 		if (stick_iter != 0)
5345 			av_tskew = av_tskew/stick_iter;
5346 		if (ABS(av_tskew) > stick_tsk) {
5347 			/*
5348 			 * If the skew is 1 (the slave's STICK register
5349 			 * is 1 STICK ahead of the master's), stick_adj
5350 			 * could fail to adjust the slave's STICK register
5351 			 * if the STICK read on the slave happens to
5352 			 * align with the increment of the STICK.
5353 			 * Therefore, we increment the skew to 2.
5354 			 */
5355 			if (av_tskew == 1)
5356 				av_tskew++;
5357 			stick_adj(-av_tskew);
5358 		} else
5359 			slave_done = 1;
5360 #ifdef DEBUG
5361 		if (tries < DSYNC_ATTEMPTS)
5362 			stick_sync_stats[CPU->cpu_id].skew_val[tries] =
5363 			    av_tskew;
5364 		++tries;
5365 #endif /* DEBUG */
5366 #ifdef lint
5367 		tries = tries;
5368 #endif
5369 
5370 	} /* while */
5371 
5372 	/* allow the master to finish */
5373 	stick_sync_cmd = EVENT_NULL;
5374 	kpreempt_enable();
5375 }
5376 
5377 /*
5378  * Master CPU side of stick synchronization.
5379  *  - timestamp end of Event A
5380  *  - timestamp beginning of Event B
5381  */
5382 void
5383 sticksync_master(void)
5384 {
5385 	int		i;
5386 
5387 	kpreempt_disable();
5388 	/* tell the slave we've started */
5389 	slave_done = 0;
5390 	stick_sync_cmd = SLAVE_START;
5391 
5392 	while (slave_done == 0) {
5393 		for (i = 0; i < stick_iter; i++) {
5394 			/* wait for the slave */
5395 			while (stick_sync_cmd != MASTER_START)
5396 				;
5397 			/* Event A end */
5398 			stick_timestamp(&timestamp[EV_A_END]);
5399 
5400 			/* make location hot */
5401 			timestamp[EV_B_START] = 0;
5402 			stick_timestamp(&timestamp[EV_B_START]);
5403 
5404 			/* tell the slave to continue */
5405 			stick_sync_cmd = SLAVE_CONT;
5406 		} /* for */
5407 
5408 		/* wait while slave calculates time skew */
5409 		while (stick_sync_cmd == SLAVE_CONT)
5410 			;
5411 	} /* while */
5412 	kpreempt_enable();
5413 }
5414 
5415 /*
5416  * Cheetah/Cheetah+ have disrupting error for copyback's, so we don't need to
5417  * do Spitfire hack of xcall'ing all the cpus to ask to check for them.  Also,
5418  * in cpu_async_panic_callb, each cpu checks for CPU events on its way to
5419  * panic idle.
5420  */
5421 /*ARGSUSED*/
5422 void
5423 cpu_check_allcpus(struct async_flt *aflt)
5424 {}
5425 
5426 struct kmem_cache *ch_private_cache;
5427 
5428 /*
5429  * Cpu private unitialization.  Uninitialize the Ecache scrubber and
5430  * deallocate the scrubber data structures and cpu_private data structure.
5431  */
5432 void
5433 cpu_uninit_private(struct cpu *cp)
5434 {
5435 	cheetah_private_t *chprp = CPU_PRIVATE(cp);
5436 
5437 	ASSERT(chprp);
5438 	cpu_uninit_ecache_scrub_dr(cp);
5439 	CPU_PRIVATE(cp) = NULL;
5440 	ch_err_tl1_paddrs[cp->cpu_id] = NULL;
5441 	kmem_cache_free(ch_private_cache, chprp);
5442 	cmp_delete_cpu(cp->cpu_id);
5443 
5444 }
5445 
5446 /*
5447  * Cheetah Cache Scrubbing
5448  *
5449  * The primary purpose of Cheetah cache scrubbing is to reduce the exposure
5450  * of E$ tags, D$ data, and I$ data to cosmic ray events since they are not
5451  * protected by either parity or ECC.
5452  *
5453  * We currently default the E$ and D$ scan rate to 100 (scan 10% of the
5454  * cache per second). Due to the the specifics of how the I$ control
5455  * logic works with respect to the ASI used to scrub I$ lines, the entire
5456  * I$ is scanned at once.
5457  */
5458 
5459 /*
5460  * Tuneables to enable and disable the scrubbing of the caches, and to tune
5461  * scrubbing behavior.  These may be changed via /etc/system or using mdb
5462  * on a running system.
5463  */
5464 int dcache_scrub_enable = 1;		/* D$ scrubbing is on by default */
5465 
5466 /*
5467  * The following are the PIL levels that the softints/cross traps will fire at.
5468  */
5469 uint_t ecache_scrub_pil = PIL_9;	/* E$ scrub PIL for cross traps */
5470 uint_t dcache_scrub_pil = PIL_9;	/* D$ scrub PIL for cross traps */
5471 uint_t icache_scrub_pil = PIL_9;	/* I$ scrub PIL for cross traps */
5472 
5473 #if defined(JALAPENO)
5474 
5475 /*
5476  * Due to several errata (82, 85, 86), we don't enable the L2$ scrubber
5477  * on Jalapeno.
5478  */
5479 int ecache_scrub_enable = 0;
5480 
5481 #else	/* JALAPENO */
5482 
5483 /*
5484  * With all other cpu types, E$ scrubbing is on by default
5485  */
5486 int ecache_scrub_enable = 1;
5487 
5488 #endif	/* JALAPENO */
5489 
5490 
5491 #if defined(CHEETAH_PLUS) || defined(JALAPENO) || defined(SERRANO)
5492 
5493 /*
5494  * The I$ scrubber tends to cause latency problems for real-time SW, so it
5495  * is disabled by default on non-Cheetah systems
5496  */
5497 int icache_scrub_enable = 0;
5498 
5499 /*
5500  * Tuneables specifying the scrub calls per second and the scan rate
5501  * for each cache
5502  *
5503  * The cyclic times are set during boot based on the following values.
5504  * Changing these values in mdb after this time will have no effect.  If
5505  * a different value is desired, it must be set in /etc/system before a
5506  * reboot.
5507  */
5508 int ecache_calls_a_sec = 1;
5509 int dcache_calls_a_sec = 2;
5510 int icache_calls_a_sec = 2;
5511 
5512 int ecache_scan_rate_idle = 1;
5513 int ecache_scan_rate_busy = 1;
5514 int dcache_scan_rate_idle = 1;
5515 int dcache_scan_rate_busy = 1;
5516 int icache_scan_rate_idle = 1;
5517 int icache_scan_rate_busy = 1;
5518 
5519 #else	/* CHEETAH_PLUS || JALAPENO || SERRANO */
5520 
5521 int icache_scrub_enable = 1;		/* I$ scrubbing is on by default */
5522 
5523 int ecache_calls_a_sec = 100;		/* E$ scrub calls per seconds */
5524 int dcache_calls_a_sec = 100;		/* D$ scrub calls per seconds */
5525 int icache_calls_a_sec = 100;		/* I$ scrub calls per seconds */
5526 
5527 int ecache_scan_rate_idle = 100;	/* E$ scan rate (in tenths of a %) */
5528 int ecache_scan_rate_busy = 100;	/* E$ scan rate (in tenths of a %) */
5529 int dcache_scan_rate_idle = 100;	/* D$ scan rate (in tenths of a %) */
5530 int dcache_scan_rate_busy = 100;	/* D$ scan rate (in tenths of a %) */
5531 int icache_scan_rate_idle = 100;	/* I$ scan rate (in tenths of a %) */
5532 int icache_scan_rate_busy = 100;	/* I$ scan rate (in tenths of a %) */
5533 
5534 #endif	/* CHEETAH_PLUS || JALAPENO || SERRANO */
5535 
5536 /*
5537  * In order to scrub on offline cpus, a cross trap is sent.  The handler will
5538  * increment the outstanding request counter and schedule a softint to run
5539  * the scrubber.
5540  */
5541 extern xcfunc_t cache_scrubreq_tl1;
5542 
5543 /*
5544  * These are the softint functions for each cache scrubber
5545  */
5546 static uint_t scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2);
5547 static uint_t scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2);
5548 static uint_t scrub_icache_line_intr(caddr_t arg1, caddr_t arg2);
5549 
5550 /*
5551  * The cache scrub info table contains cache specific information
5552  * and allows for some of the scrub code to be table driven, reducing
5553  * duplication of cache similar code.
5554  *
5555  * This table keeps a copy of the value in the calls per second variable
5556  * (?cache_calls_a_sec).  This makes it much more difficult for someone
5557  * to cause us problems (for example, by setting ecache_calls_a_sec to 0 in
5558  * mdb in a misguided attempt to disable the scrubber).
5559  */
5560 struct scrub_info {
5561 	int		*csi_enable;	/* scrubber enable flag */
5562 	int		csi_freq;	/* scrubber calls per second */
5563 	int		csi_index;	/* index to chsm_outstanding[] */
5564 	uint64_t	csi_inum;	/* scrubber interrupt number */
5565 	cyclic_id_t	csi_omni_cyc_id;	/* omni cyclic ID */
5566 	cyclic_id_t	csi_offline_cyc_id;	/* offline cyclic ID */
5567 	char		csi_name[3];	/* cache name for this scrub entry */
5568 } cache_scrub_info[] = {
5569 { &ecache_scrub_enable, 0, CACHE_SCRUBBER_INFO_E, 0, 0, 0, "E$"},
5570 { &dcache_scrub_enable, 0, CACHE_SCRUBBER_INFO_D, 0, 0, 0, "D$"},
5571 { &icache_scrub_enable, 0, CACHE_SCRUBBER_INFO_I, 0, 0, 0, "I$"}
5572 };
5573 
5574 /*
5575  * If scrubbing is enabled, increment the outstanding request counter.  If it
5576  * is 1 (meaning there were no previous requests outstanding), call
5577  * setsoftint_tl1 through xt_one_unchecked, which eventually ends up doing
5578  * a self trap.
5579  */
5580 static void
5581 do_scrub(struct scrub_info *csi)
5582 {
5583 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5584 	int index = csi->csi_index;
5585 	uint32_t *outstanding = &csmp->chsm_outstanding[index];
5586 
5587 	if (*(csi->csi_enable) && (csmp->chsm_enable[index])) {
5588 		if (atomic_add_32_nv(outstanding, 1) == 1) {
5589 			xt_one_unchecked(CPU->cpu_id, setsoftint_tl1,
5590 			    csi->csi_inum, 0);
5591 		}
5592 	}
5593 }
5594 
5595 /*
5596  * Omni cyclics don't fire on offline cpus, so we use another cyclic to
5597  * cross-trap the offline cpus.
5598  */
5599 static void
5600 do_scrub_offline(struct scrub_info *csi)
5601 {
5602 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5603 
5604 	if (CPUSET_ISNULL(cpu_offline_set)) {
5605 		/*
5606 		 * No offline cpus - nothing to do
5607 		 */
5608 		return;
5609 	}
5610 
5611 	if (*(csi->csi_enable) && (csmp->chsm_enable[csi->csi_index])) {
5612 		xt_some(cpu_offline_set, cache_scrubreq_tl1, csi->csi_inum,
5613 		    csi->csi_index);
5614 	}
5615 }
5616 
5617 /*
5618  * This is the initial setup for the scrubber cyclics - it sets the
5619  * interrupt level, frequency, and function to call.
5620  */
5621 /*ARGSUSED*/
5622 static void
5623 cpu_scrub_cyclic_setup(void *arg, cpu_t *cpu, cyc_handler_t *hdlr,
5624     cyc_time_t *when)
5625 {
5626 	struct scrub_info *csi = (struct scrub_info *)arg;
5627 
5628 	ASSERT(csi != NULL);
5629 	hdlr->cyh_func = (cyc_func_t)do_scrub;
5630 	hdlr->cyh_level = CY_LOW_LEVEL;
5631 	hdlr->cyh_arg = arg;
5632 
5633 	when->cyt_when = 0;	/* Start immediately */
5634 	when->cyt_interval = NANOSEC / csi->csi_freq;
5635 }
5636 
5637 /*
5638  * Initialization for cache scrubbing.
5639  * This routine is called AFTER all cpus have had cpu_init_private called
5640  * to initialize their private data areas.
5641  */
5642 void
5643 cpu_init_cache_scrub(void)
5644 {
5645 	int i;
5646 	struct scrub_info *csi;
5647 	cyc_omni_handler_t omni_hdlr;
5648 	cyc_handler_t offline_hdlr;
5649 	cyc_time_t when;
5650 
5651 	/*
5652 	 * save away the maximum number of lines for the D$
5653 	 */
5654 	dcache_nlines = dcache_size / dcache_linesize;
5655 
5656 	/*
5657 	 * register the softints for the cache scrubbing
5658 	 */
5659 	cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_inum =
5660 	    add_softintr(ecache_scrub_pil, scrub_ecache_line_intr,
5661 	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_E], SOFTINT_MT);
5662 	cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_freq = ecache_calls_a_sec;
5663 
5664 	cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_inum =
5665 	    add_softintr(dcache_scrub_pil, scrub_dcache_line_intr,
5666 	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_D], SOFTINT_MT);
5667 	cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_freq = dcache_calls_a_sec;
5668 
5669 	cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_inum =
5670 	    add_softintr(icache_scrub_pil, scrub_icache_line_intr,
5671 	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_I], SOFTINT_MT);
5672 	cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_freq = icache_calls_a_sec;
5673 
5674 	/*
5675 	 * start the scrubbing for all the caches
5676 	 */
5677 	mutex_enter(&cpu_lock);
5678 	for (i = 0; i < CACHE_SCRUBBER_COUNT; i++) {
5679 
5680 		csi = &cache_scrub_info[i];
5681 
5682 		if (!(*csi->csi_enable))
5683 			continue;
5684 
5685 		/*
5686 		 * force the following to be true:
5687 		 *	1 <= calls_a_sec <= hz
5688 		 */
5689 		if (csi->csi_freq > hz) {
5690 			cmn_err(CE_NOTE, "%s scrub calls_a_sec set too high "
5691 			    "(%d); resetting to hz (%d)", csi->csi_name,
5692 			    csi->csi_freq, hz);
5693 			csi->csi_freq = hz;
5694 		} else if (csi->csi_freq < 1) {
5695 			cmn_err(CE_NOTE, "%s scrub calls_a_sec set too low "
5696 			    "(%d); resetting to 1", csi->csi_name,
5697 			    csi->csi_freq);
5698 			csi->csi_freq = 1;
5699 		}
5700 
5701 		omni_hdlr.cyo_online = cpu_scrub_cyclic_setup;
5702 		omni_hdlr.cyo_offline = NULL;
5703 		omni_hdlr.cyo_arg = (void *)csi;
5704 
5705 		offline_hdlr.cyh_func = (cyc_func_t)do_scrub_offline;
5706 		offline_hdlr.cyh_arg = (void *)csi;
5707 		offline_hdlr.cyh_level = CY_LOW_LEVEL;
5708 
5709 		when.cyt_when = 0;	/* Start immediately */
5710 		when.cyt_interval = NANOSEC / csi->csi_freq;
5711 
5712 		csi->csi_omni_cyc_id = cyclic_add_omni(&omni_hdlr);
5713 		csi->csi_offline_cyc_id = cyclic_add(&offline_hdlr, &when);
5714 	}
5715 	register_cpu_setup_func(cpu_scrub_cpu_setup, NULL);
5716 	mutex_exit(&cpu_lock);
5717 }
5718 
5719 /*
5720  * Indicate that the specified cpu is idle.
5721  */
5722 void
5723 cpu_idle_ecache_scrub(struct cpu *cp)
5724 {
5725 	if (CPU_PRIVATE(cp) != NULL) {
5726 		ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5727 		csmp->chsm_ecache_busy = ECACHE_CPU_IDLE;
5728 	}
5729 }
5730 
5731 /*
5732  * Indicate that the specified cpu is busy.
5733  */
5734 void
5735 cpu_busy_ecache_scrub(struct cpu *cp)
5736 {
5737 	if (CPU_PRIVATE(cp) != NULL) {
5738 		ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5739 		csmp->chsm_ecache_busy = ECACHE_CPU_BUSY;
5740 	}
5741 }
5742 
5743 /*
5744  * Initialization for cache scrubbing for the specified cpu.
5745  */
5746 void
5747 cpu_init_ecache_scrub_dr(struct cpu *cp)
5748 {
5749 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5750 	int cpuid = cp->cpu_id;
5751 
5752 	/* initialize the number of lines in the caches */
5753 	csmp->chsm_ecache_nlines = cpunodes[cpuid].ecache_size /
5754 	    cpunodes[cpuid].ecache_linesize;
5755 	csmp->chsm_icache_nlines = CPU_PRIVATE_VAL(cp, chpr_icache_size) /
5756 	    CPU_PRIVATE_VAL(cp, chpr_icache_linesize);
5757 
5758 	/*
5759 	 * do_scrub() and do_scrub_offline() check both the global
5760 	 * ?cache_scrub_enable and this per-cpu enable variable.  All scrubbers
5761 	 * check this value before scrubbing.  Currently, we use it to
5762 	 * disable the E$ scrubber on multi-core cpus or while running at
5763 	 * slowed speed.  For now, just turn everything on and allow
5764 	 * cpu_init_private() to change it if necessary.
5765 	 */
5766 	csmp->chsm_enable[CACHE_SCRUBBER_INFO_E] = 1;
5767 	csmp->chsm_enable[CACHE_SCRUBBER_INFO_D] = 1;
5768 	csmp->chsm_enable[CACHE_SCRUBBER_INFO_I] = 1;
5769 
5770 	cpu_busy_ecache_scrub(cp);
5771 }
5772 
5773 /*
5774  * Un-initialization for cache scrubbing for the specified cpu.
5775  */
5776 static void
5777 cpu_uninit_ecache_scrub_dr(struct cpu *cp)
5778 {
5779 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5780 
5781 	/*
5782 	 * un-initialize bookkeeping for cache scrubbing
5783 	 */
5784 	bzero(csmp, sizeof (ch_scrub_misc_t));
5785 
5786 	cpu_idle_ecache_scrub(cp);
5787 }
5788 
5789 /*
5790  * Called periodically on each CPU to scrub the D$.
5791  */
5792 static void
5793 scrub_dcache(int how_many)
5794 {
5795 	int i;
5796 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5797 	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D];
5798 
5799 	/*
5800 	 * scrub the desired number of lines
5801 	 */
5802 	for (i = 0; i < how_many; i++) {
5803 		/*
5804 		 * scrub a D$ line
5805 		 */
5806 		dcache_inval_line(index);
5807 
5808 		/*
5809 		 * calculate the next D$ line to scrub, assumes
5810 		 * that dcache_nlines is a power of 2
5811 		 */
5812 		index = (index + 1) & (dcache_nlines - 1);
5813 	}
5814 
5815 	/*
5816 	 * set the scrub index for the next visit
5817 	 */
5818 	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D] = index;
5819 }
5820 
5821 /*
5822  * Handler for D$ scrub inum softint. Call scrub_dcache until
5823  * we decrement the outstanding request count to zero.
5824  */
5825 /*ARGSUSED*/
5826 static uint_t
5827 scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2)
5828 {
5829 	int i;
5830 	int how_many;
5831 	int outstanding;
5832 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5833 	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_D];
5834 	struct scrub_info *csi = (struct scrub_info *)arg1;
5835 	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
5836 	    dcache_scan_rate_idle : dcache_scan_rate_busy;
5837 
5838 	/*
5839 	 * The scan rates are expressed in units of tenths of a
5840 	 * percent.  A scan rate of 1000 (100%) means the whole
5841 	 * cache is scanned every second.
5842 	 */
5843 	how_many = (dcache_nlines * scan_rate) / (1000 * csi->csi_freq);
5844 
5845 	do {
5846 		outstanding = *countp;
5847 		for (i = 0; i < outstanding; i++) {
5848 			scrub_dcache(how_many);
5849 		}
5850 	} while (atomic_add_32_nv(countp, -outstanding));
5851 
5852 	return (DDI_INTR_CLAIMED);
5853 }
5854 
5855 /*
5856  * Called periodically on each CPU to scrub the I$. The I$ is scrubbed
5857  * by invalidating lines. Due to the characteristics of the ASI which
5858  * is used to invalidate an I$ line, the entire I$ must be invalidated
5859  * vs. an individual I$ line.
5860  */
5861 static void
5862 scrub_icache(int how_many)
5863 {
5864 	int i;
5865 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5866 	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I];
5867 	int icache_nlines = csmp->chsm_icache_nlines;
5868 
5869 	/*
5870 	 * scrub the desired number of lines
5871 	 */
5872 	for (i = 0; i < how_many; i++) {
5873 		/*
5874 		 * since the entire I$ must be scrubbed at once,
5875 		 * wait until the index wraps to zero to invalidate
5876 		 * the entire I$
5877 		 */
5878 		if (index == 0) {
5879 			icache_inval_all();
5880 		}
5881 
5882 		/*
5883 		 * calculate the next I$ line to scrub, assumes
5884 		 * that chsm_icache_nlines is a power of 2
5885 		 */
5886 		index = (index + 1) & (icache_nlines - 1);
5887 	}
5888 
5889 	/*
5890 	 * set the scrub index for the next visit
5891 	 */
5892 	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I] = index;
5893 }
5894 
5895 /*
5896  * Handler for I$ scrub inum softint. Call scrub_icache until
5897  * we decrement the outstanding request count to zero.
5898  */
5899 /*ARGSUSED*/
5900 static uint_t
5901 scrub_icache_line_intr(caddr_t arg1, caddr_t arg2)
5902 {
5903 	int i;
5904 	int how_many;
5905 	int outstanding;
5906 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5907 	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_I];
5908 	struct scrub_info *csi = (struct scrub_info *)arg1;
5909 	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
5910 	    icache_scan_rate_idle : icache_scan_rate_busy;
5911 	int icache_nlines = csmp->chsm_icache_nlines;
5912 
5913 	/*
5914 	 * The scan rates are expressed in units of tenths of a
5915 	 * percent.  A scan rate of 1000 (100%) means the whole
5916 	 * cache is scanned every second.
5917 	 */
5918 	how_many = (icache_nlines * scan_rate) / (1000 * csi->csi_freq);
5919 
5920 	do {
5921 		outstanding = *countp;
5922 		for (i = 0; i < outstanding; i++) {
5923 			scrub_icache(how_many);
5924 		}
5925 	} while (atomic_add_32_nv(countp, -outstanding));
5926 
5927 	return (DDI_INTR_CLAIMED);
5928 }
5929 
5930 /*
5931  * Called periodically on each CPU to scrub the E$.
5932  */
5933 static void
5934 scrub_ecache(int how_many)
5935 {
5936 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5937 	int i;
5938 	int cpuid = CPU->cpu_id;
5939 	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E];
5940 	int nlines = csmp->chsm_ecache_nlines;
5941 	int linesize = cpunodes[cpuid].ecache_linesize;
5942 	int ec_set_size = cpu_ecache_set_size(CPU);
5943 
5944 	/*
5945 	 * scrub the desired number of lines
5946 	 */
5947 	for (i = 0; i < how_many; i++) {
5948 		/*
5949 		 * scrub the E$ line
5950 		 */
5951 		ecache_flush_line(ecache_flushaddr + (index * linesize),
5952 		    ec_set_size);
5953 
5954 		/*
5955 		 * calculate the next E$ line to scrub based on twice
5956 		 * the number of E$ lines (to displace lines containing
5957 		 * flush area data), assumes that the number of lines
5958 		 * is a power of 2
5959 		 */
5960 		index = (index + 1) & ((nlines << 1) - 1);
5961 	}
5962 
5963 	/*
5964 	 * set the ecache scrub index for the next visit
5965 	 */
5966 	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E] = index;
5967 }
5968 
5969 /*
5970  * Handler for E$ scrub inum softint. Call the E$ scrubber until
5971  * we decrement the outstanding request count to zero.
5972  *
5973  * Due to interactions with cpu_scrub_cpu_setup(), the outstanding count may
5974  * become negative after the atomic_add_32_nv().  This is not a problem, as
5975  * the next trip around the loop won't scrub anything, and the next add will
5976  * reset the count back to zero.
5977  */
5978 /*ARGSUSED*/
5979 static uint_t
5980 scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2)
5981 {
5982 	int i;
5983 	int how_many;
5984 	int outstanding;
5985 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5986 	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_E];
5987 	struct scrub_info *csi = (struct scrub_info *)arg1;
5988 	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
5989 	    ecache_scan_rate_idle : ecache_scan_rate_busy;
5990 	int ecache_nlines = csmp->chsm_ecache_nlines;
5991 
5992 	/*
5993 	 * The scan rates are expressed in units of tenths of a
5994 	 * percent.  A scan rate of 1000 (100%) means the whole
5995 	 * cache is scanned every second.
5996 	 */
5997 	how_many = (ecache_nlines * scan_rate) / (1000 * csi->csi_freq);
5998 
5999 	do {
6000 		outstanding = *countp;
6001 		for (i = 0; i < outstanding; i++) {
6002 			scrub_ecache(how_many);
6003 		}
6004 	} while (atomic_add_32_nv(countp, -outstanding));
6005 
6006 	return (DDI_INTR_CLAIMED);
6007 }
6008 
6009 /*
6010  * Timeout function to reenable CE
6011  */
6012 static void
6013 cpu_delayed_check_ce_errors(void *arg)
6014 {
6015 	if (!taskq_dispatch(ch_check_ce_tq, cpu_check_ce_errors, arg,
6016 	    TQ_NOSLEEP)) {
6017 		(void) timeout(cpu_delayed_check_ce_errors, arg,
6018 		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
6019 	}
6020 }
6021 
6022 /*
6023  * CE Deferred Re-enable after trap.
6024  *
6025  * When the CPU gets a disrupting trap for any of the errors
6026  * controlled by the CEEN bit, CEEN is disabled in the trap handler
6027  * immediately. To eliminate the possibility of multiple CEs causing
6028  * recursive stack overflow in the trap handler, we cannot
6029  * reenable CEEN while still running in the trap handler. Instead,
6030  * after a CE is logged on a CPU, we schedule a timeout function,
6031  * cpu_check_ce_errors(), to trigger after cpu_ceen_delay_secs
6032  * seconds. This function will check whether any further CEs
6033  * have occurred on that CPU, and if none have, will reenable CEEN.
6034  *
6035  * If further CEs have occurred while CEEN is disabled, another
6036  * timeout will be scheduled. This is to ensure that the CPU can
6037  * make progress in the face of CE 'storms', and that it does not
6038  * spend all its time logging CE errors.
6039  */
6040 static void
6041 cpu_check_ce_errors(void *arg)
6042 {
6043 	int	cpuid = (int)(uintptr_t)arg;
6044 	cpu_t	*cp;
6045 
6046 	/*
6047 	 * We acquire cpu_lock.
6048 	 */
6049 	ASSERT(curthread->t_pil == 0);
6050 
6051 	/*
6052 	 * verify that the cpu is still around, DR
6053 	 * could have got there first ...
6054 	 */
6055 	mutex_enter(&cpu_lock);
6056 	cp = cpu_get(cpuid);
6057 	if (cp == NULL) {
6058 		mutex_exit(&cpu_lock);
6059 		return;
6060 	}
6061 	/*
6062 	 * make sure we don't migrate across CPUs
6063 	 * while checking our CE status.
6064 	 */
6065 	kpreempt_disable();
6066 
6067 	/*
6068 	 * If we are running on the CPU that got the
6069 	 * CE, we can do the checks directly.
6070 	 */
6071 	if (cp->cpu_id == CPU->cpu_id) {
6072 		mutex_exit(&cpu_lock);
6073 		cpu_check_ce(TIMEOUT_CEEN_CHECK, 0, 0, 0);
6074 		kpreempt_enable();
6075 		return;
6076 	}
6077 	kpreempt_enable();
6078 
6079 	/*
6080 	 * send an x-call to get the CPU that originally
6081 	 * got the CE to do the necessary checks. If we can't
6082 	 * send the x-call, reschedule the timeout, otherwise we
6083 	 * lose CEEN forever on that CPU.
6084 	 */
6085 	if (CPU_XCALL_READY(cp->cpu_id) && (!(cp->cpu_flags & CPU_QUIESCED))) {
6086 		xc_one(cp->cpu_id, (xcfunc_t *)cpu_check_ce,
6087 		    TIMEOUT_CEEN_CHECK, 0);
6088 		mutex_exit(&cpu_lock);
6089 	} else {
6090 		/*
6091 		 * When the CPU is not accepting xcalls, or
6092 		 * the processor is offlined, we don't want to
6093 		 * incur the extra overhead of trying to schedule the
6094 		 * CE timeout indefinitely. However, we don't want to lose
6095 		 * CE checking forever.
6096 		 *
6097 		 * Keep rescheduling the timeout, accepting the additional
6098 		 * overhead as the cost of correctness in the case where we get
6099 		 * a CE, disable CEEN, offline the CPU during the
6100 		 * the timeout interval, and then online it at some
6101 		 * point in the future. This is unlikely given the short
6102 		 * cpu_ceen_delay_secs.
6103 		 */
6104 		mutex_exit(&cpu_lock);
6105 		(void) timeout(cpu_delayed_check_ce_errors,
6106 		    (void *)(uintptr_t)cp->cpu_id,
6107 		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
6108 	}
6109 }
6110 
6111 /*
6112  * This routine will check whether CEs have occurred while
6113  * CEEN is disabled. Any CEs detected will be logged and, if
6114  * possible, scrubbed.
6115  *
6116  * The memscrubber will also use this routine to clear any errors
6117  * caused by its scrubbing with CEEN disabled.
6118  *
6119  * flag == SCRUBBER_CEEN_CHECK
6120  *		called from memscrubber, just check/scrub, no reset
6121  *		paddr 	physical addr. for start of scrub pages
6122  *		vaddr 	virtual addr. for scrub area
6123  *		psz	page size of area to be scrubbed
6124  *
6125  * flag == TIMEOUT_CEEN_CHECK
6126  *		timeout function has triggered, reset timeout or CEEN
6127  *
6128  * Note: We must not migrate cpus during this function.  This can be
6129  * achieved by one of:
6130  *    - invoking as target of an x-call in which case we're at XCALL_PIL
6131  *	The flag value must be first xcall argument.
6132  *    - disabling kernel preemption.  This should be done for very short
6133  *	periods so is not suitable for SCRUBBER_CEEN_CHECK where we might
6134  *	scrub an extended area with cpu_check_block.  The call for
6135  *	TIMEOUT_CEEN_CHECK uses this so cpu_check_ce must be kept
6136  *	brief for this case.
6137  *    - binding to a cpu, eg with thread_affinity_set().  This is used
6138  *	in the SCRUBBER_CEEN_CHECK case, but is not practical for
6139  *	the TIMEOUT_CEEN_CHECK because both need cpu_lock.
6140  */
6141 void
6142 cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t psz)
6143 {
6144 	ch_cpu_errors_t	cpu_error_regs;
6145 	uint64_t	ec_err_enable;
6146 	uint64_t	page_offset;
6147 
6148 	/* Read AFSR */
6149 	get_cpu_error_state(&cpu_error_regs);
6150 
6151 	/*
6152 	 * If no CEEN errors have occurred during the timeout
6153 	 * interval, it is safe to re-enable CEEN and exit.
6154 	 */
6155 	if (((cpu_error_regs.afsr & C_AFSR_CECC_ERRS) |
6156 	    (cpu_error_regs.afsr_ext & C_AFSR_EXT_CECC_ERRS)) == 0) {
6157 		if (flag == TIMEOUT_CEEN_CHECK &&
6158 		    !((ec_err_enable = get_error_enable()) & EN_REG_CEEN))
6159 			set_error_enable(ec_err_enable | EN_REG_CEEN);
6160 		return;
6161 	}
6162 
6163 	/*
6164 	 * Ensure that CEEN was not reenabled (maybe by DR) before
6165 	 * we log/clear the error.
6166 	 */
6167 	if ((ec_err_enable = get_error_enable()) & EN_REG_CEEN)
6168 		set_error_enable(ec_err_enable & ~EN_REG_CEEN);
6169 
6170 	/*
6171 	 * log/clear the CE. If CE_CEEN_DEFER is passed, the
6172 	 * timeout will be rescheduled when the error is logged.
6173 	 */
6174 	if (!((cpu_error_regs.afsr & cpu_ce_not_deferred) |
6175 	    (cpu_error_regs.afsr_ext & cpu_ce_not_deferred_ext)))
6176 		cpu_ce_detected(&cpu_error_regs,
6177 		    CE_CEEN_DEFER | CE_CEEN_TIMEOUT);
6178 	else
6179 		cpu_ce_detected(&cpu_error_regs, CE_CEEN_TIMEOUT);
6180 
6181 	/*
6182 	 * If the memory scrubber runs while CEEN is
6183 	 * disabled, (or if CEEN is disabled during the
6184 	 * scrub as a result of a CE being triggered by
6185 	 * it), the range being scrubbed will not be
6186 	 * completely cleaned. If there are multiple CEs
6187 	 * in the range at most two of these will be dealt
6188 	 * with, (one by the trap handler and one by the
6189 	 * timeout). It is also possible that none are dealt
6190 	 * with, (CEEN disabled and another CE occurs before
6191 	 * the timeout triggers). So to ensure that the
6192 	 * memory is actually scrubbed, we have to access each
6193 	 * memory location in the range and then check whether
6194 	 * that access causes a CE.
6195 	 */
6196 	if (flag == SCRUBBER_CEEN_CHECK && va) {
6197 		if ((cpu_error_regs.afar >= pa) &&
6198 		    (cpu_error_regs.afar < (pa + psz))) {
6199 			/*
6200 			 * Force a load from physical memory for each
6201 			 * 64-byte block, then check AFSR to determine
6202 			 * whether this access caused an error.
6203 			 *
6204 			 * This is a slow way to do a scrub, but as it will
6205 			 * only be invoked when the memory scrubber actually
6206 			 * triggered a CE, it should not happen too
6207 			 * frequently.
6208 			 *
6209 			 * cut down what we need to check as the scrubber
6210 			 * has verified up to AFAR, so get it's offset
6211 			 * into the page and start there.
6212 			 */
6213 			page_offset = (uint64_t)(cpu_error_regs.afar &
6214 			    (psz - 1));
6215 			va = (caddr_t)(va + (P2ALIGN(page_offset, 64)));
6216 			psz -= (uint_t)(P2ALIGN(page_offset, 64));
6217 			cpu_check_block((caddr_t)(P2ALIGN((uint64_t)va, 64)),
6218 			    psz);
6219 		}
6220 	}
6221 
6222 	/*
6223 	 * Reset error enable if this CE is not masked.
6224 	 */
6225 	if ((flag == TIMEOUT_CEEN_CHECK) &&
6226 	    (cpu_error_regs.afsr & cpu_ce_not_deferred))
6227 		set_error_enable(ec_err_enable | EN_REG_CEEN);
6228 
6229 }
6230 
6231 /*
6232  * Attempt a cpu logout for an error that we did not trap for, such
6233  * as a CE noticed with CEEN off.  It is assumed that we are still running
6234  * on the cpu that took the error and that we cannot migrate.  Returns
6235  * 0 on success, otherwise nonzero.
6236  */
6237 static int
6238 cpu_ce_delayed_ec_logout(uint64_t afar)
6239 {
6240 	ch_cpu_logout_t *clop;
6241 
6242 	if (CPU_PRIVATE(CPU) == NULL)
6243 		return (0);
6244 
6245 	clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
6246 	if (cas64(&clop->clo_data.chd_afar, LOGOUT_INVALID, afar) !=
6247 	    LOGOUT_INVALID)
6248 		return (0);
6249 
6250 	cpu_delayed_logout(afar, clop);
6251 	return (1);
6252 }
6253 
6254 /*
6255  * We got an error while CEEN was disabled. We
6256  * need to clean up after it and log whatever
6257  * information we have on the CE.
6258  */
6259 void
6260 cpu_ce_detected(ch_cpu_errors_t *cpu_error_regs, int flag)
6261 {
6262 	ch_async_flt_t 	ch_flt;
6263 	struct async_flt *aflt;
6264 	char 		pr_reason[MAX_REASON_STRING];
6265 
6266 	bzero(&ch_flt, sizeof (ch_async_flt_t));
6267 	ch_flt.flt_trapped_ce = flag;
6268 	aflt = (struct async_flt *)&ch_flt;
6269 	aflt->flt_stat = cpu_error_regs->afsr & C_AFSR_MASK;
6270 	ch_flt.afsr_ext = cpu_error_regs->afsr_ext;
6271 	ch_flt.afsr_errs = (cpu_error_regs->afsr_ext & C_AFSR_EXT_ALL_ERRS) |
6272 	    (cpu_error_regs->afsr & C_AFSR_ALL_ERRS);
6273 	aflt->flt_addr = cpu_error_regs->afar;
6274 #if defined(SERRANO)
6275 	ch_flt.afar2 = cpu_error_regs->afar2;
6276 #endif	/* SERRANO */
6277 	aflt->flt_pc = NULL;
6278 	aflt->flt_priv = ((cpu_error_regs->afsr & C_AFSR_PRIV) != 0);
6279 	aflt->flt_tl = 0;
6280 	aflt->flt_panic = 0;
6281 	cpu_log_and_clear_ce(&ch_flt);
6282 
6283 	/*
6284 	 * check if we caused any errors during cleanup
6285 	 */
6286 	if (clear_errors(&ch_flt)) {
6287 		pr_reason[0] = '\0';
6288 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
6289 		    NULL);
6290 	}
6291 }
6292 
6293 /*
6294  * Log/clear CEEN-controlled disrupting errors
6295  */
6296 static void
6297 cpu_log_and_clear_ce(ch_async_flt_t *ch_flt)
6298 {
6299 	struct async_flt *aflt;
6300 	uint64_t afsr, afsr_errs;
6301 	ch_cpu_logout_t *clop;
6302 	char 		pr_reason[MAX_REASON_STRING];
6303 	on_trap_data_t	*otp = curthread->t_ontrap;
6304 
6305 	aflt = (struct async_flt *)ch_flt;
6306 	afsr = aflt->flt_stat;
6307 	afsr_errs = ch_flt->afsr_errs;
6308 	aflt->flt_id = gethrtime_waitfree();
6309 	aflt->flt_bus_id = getprocessorid();
6310 	aflt->flt_inst = CPU->cpu_id;
6311 	aflt->flt_prot = AFLT_PROT_NONE;
6312 	aflt->flt_class = CPU_FAULT;
6313 	aflt->flt_status = ECC_C_TRAP;
6314 
6315 	pr_reason[0] = '\0';
6316 	/*
6317 	 * Get the CPU log out info for Disrupting Trap.
6318 	 */
6319 	if (CPU_PRIVATE(CPU) == NULL) {
6320 		clop = NULL;
6321 		ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID;
6322 	} else {
6323 		clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
6324 	}
6325 
6326 	if (clop && ch_flt->flt_trapped_ce & CE_CEEN_TIMEOUT) {
6327 		ch_cpu_errors_t cpu_error_regs;
6328 
6329 		get_cpu_error_state(&cpu_error_regs);
6330 		(void) cpu_ce_delayed_ec_logout(cpu_error_regs.afar);
6331 		clop->clo_data.chd_afsr = cpu_error_regs.afsr;
6332 		clop->clo_data.chd_afar = cpu_error_regs.afar;
6333 		clop->clo_data.chd_afsr_ext = cpu_error_regs.afsr_ext;
6334 		clop->clo_sdw_data.chd_afsr = cpu_error_regs.shadow_afsr;
6335 		clop->clo_sdw_data.chd_afar = cpu_error_regs.shadow_afar;
6336 		clop->clo_sdw_data.chd_afsr_ext =
6337 		    cpu_error_regs.shadow_afsr_ext;
6338 #if defined(SERRANO)
6339 		clop->clo_data.chd_afar2 = cpu_error_regs.afar2;
6340 #endif	/* SERRANO */
6341 		ch_flt->flt_data_incomplete = 1;
6342 
6343 		/*
6344 		 * The logging/clear code expects AFSR/AFAR to be cleared.
6345 		 * The trap handler does it for CEEN enabled errors
6346 		 * so we need to do it here.
6347 		 */
6348 		set_cpu_error_state(&cpu_error_regs);
6349 	}
6350 
6351 #if defined(JALAPENO) || defined(SERRANO)
6352 	/*
6353 	 * FRC: Can't scrub memory as we don't have AFAR for Jalapeno.
6354 	 * For Serrano, even thou we do have the AFAR, we still do the
6355 	 * scrub on the RCE side since that's where the error type can
6356 	 * be properly classified as intermittent, persistent, etc.
6357 	 *
6358 	 * CE/RCE:  If error is in memory and AFAR is valid, scrub the memory.
6359 	 * Must scrub memory before cpu_queue_events, as scrubbing memory sets
6360 	 * the flt_status bits.
6361 	 */
6362 	if ((afsr & (C_AFSR_CE|C_AFSR_RCE)) &&
6363 	    (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) ||
6364 	    cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_RCE)))) {
6365 		cpu_ce_scrub_mem_err(aflt, B_TRUE);
6366 	}
6367 #else /* JALAPENO || SERRANO */
6368 	/*
6369 	 * CE/EMC:  If error is in memory and AFAR is valid, scrub the memory.
6370 	 * Must scrub memory before cpu_queue_events, as scrubbing memory sets
6371 	 * the flt_status bits.
6372 	 */
6373 	if (afsr & (C_AFSR_CE|C_AFSR_EMC)) {
6374 		if (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) ||
6375 		    cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_EMC))) {
6376 			cpu_ce_scrub_mem_err(aflt, B_TRUE);
6377 		}
6378 	}
6379 
6380 #endif /* JALAPENO || SERRANO */
6381 
6382 	/*
6383 	 * Update flt_prot if this error occurred under on_trap protection.
6384 	 */
6385 	if (otp != NULL && (otp->ot_prot & OT_DATA_EC))
6386 		aflt->flt_prot = AFLT_PROT_EC;
6387 
6388 	/*
6389 	 * Queue events on the async event queue, one event per error bit.
6390 	 */
6391 	if (cpu_queue_events(ch_flt, pr_reason, afsr_errs, clop) == 0 ||
6392 	    (afsr_errs & (C_AFSR_CECC_ERRS | C_AFSR_EXT_CECC_ERRS)) == 0) {
6393 		ch_flt->flt_type = CPU_INV_AFSR;
6394 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
6395 		    (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue,
6396 		    aflt->flt_panic);
6397 	}
6398 
6399 	/*
6400 	 * Zero out + invalidate CPU logout.
6401 	 */
6402 	if (clop) {
6403 		bzero(clop, sizeof (ch_cpu_logout_t));
6404 		clop->clo_data.chd_afar = LOGOUT_INVALID;
6405 	}
6406 
6407 	/*
6408 	 * If either a CPC, WDC or EDC error has occurred while CEEN
6409 	 * was disabled, we need to flush either the entire
6410 	 * E$ or an E$ line.
6411 	 */
6412 #if defined(JALAPENO) || defined(SERRANO)
6413 	if (afsr & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_CPU | C_AFSR_WDC))
6414 #else	/* JALAPENO || SERRANO */
6415 	if (afsr_errs & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_WDC | C_AFSR_L3_EDC |
6416 	    C_AFSR_L3_CPC | C_AFSR_L3_WDC))
6417 #endif	/* JALAPENO || SERRANO */
6418 		cpu_error_ecache_flush(ch_flt);
6419 
6420 }
6421 
6422 /*
6423  * depending on the error type, we determine whether we
6424  * need to flush the entire ecache or just a line.
6425  */
6426 static int
6427 cpu_error_ecache_flush_required(ch_async_flt_t *ch_flt)
6428 {
6429 	struct async_flt *aflt;
6430 	uint64_t	afsr;
6431 	uint64_t	afsr_errs = ch_flt->afsr_errs;
6432 
6433 	aflt = (struct async_flt *)ch_flt;
6434 	afsr = aflt->flt_stat;
6435 
6436 	/*
6437 	 * If we got multiple errors, no point in trying
6438 	 * the individual cases, just flush the whole cache
6439 	 */
6440 	if (afsr & C_AFSR_ME) {
6441 		return (ECACHE_FLUSH_ALL);
6442 	}
6443 
6444 	/*
6445 	 * If either a CPC, WDC or EDC error has occurred while CEEN
6446 	 * was disabled, we need to flush entire E$. We can't just
6447 	 * flush the cache line affected as the ME bit
6448 	 * is not set when multiple correctable errors of the same
6449 	 * type occur, so we might have multiple CPC or EDC errors,
6450 	 * with only the first recorded.
6451 	 */
6452 #if defined(JALAPENO) || defined(SERRANO)
6453 	if (afsr & (C_AFSR_CPC | C_AFSR_CPU | C_AFSR_EDC | C_AFSR_WDC)) {
6454 #else	/* JALAPENO || SERRANO */
6455 	if (afsr_errs & (C_AFSR_CPC | C_AFSR_EDC | C_AFSR_WDC | C_AFSR_L3_CPC |
6456 	    C_AFSR_L3_EDC | C_AFSR_L3_WDC)) {
6457 #endif	/* JALAPENO || SERRANO */
6458 		return (ECACHE_FLUSH_ALL);
6459 	}
6460 
6461 #if defined(JALAPENO) || defined(SERRANO)
6462 	/*
6463 	 * If only UE or RUE is set, flush the Ecache line, otherwise
6464 	 * flush the entire Ecache.
6465 	 */
6466 	if (afsr & (C_AFSR_UE|C_AFSR_RUE)) {
6467 		if ((afsr & C_AFSR_ALL_ERRS) == C_AFSR_UE ||
6468 		    (afsr & C_AFSR_ALL_ERRS) == C_AFSR_RUE) {
6469 			return (ECACHE_FLUSH_LINE);
6470 		} else {
6471 			return (ECACHE_FLUSH_ALL);
6472 		}
6473 	}
6474 #else /* JALAPENO || SERRANO */
6475 	/*
6476 	 * If UE only is set, flush the Ecache line, otherwise
6477 	 * flush the entire Ecache.
6478 	 */
6479 	if (afsr_errs & C_AFSR_UE) {
6480 		if ((afsr_errs & (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) ==
6481 		    C_AFSR_UE) {
6482 			return (ECACHE_FLUSH_LINE);
6483 		} else {
6484 			return (ECACHE_FLUSH_ALL);
6485 		}
6486 	}
6487 #endif /* JALAPENO || SERRANO */
6488 
6489 	/*
6490 	 * EDU: If EDU only is set, flush the ecache line, otherwise
6491 	 * flush the entire Ecache.
6492 	 */
6493 	if (afsr_errs & (C_AFSR_EDU | C_AFSR_L3_EDU)) {
6494 		if (((afsr_errs & ~C_AFSR_EDU) == 0) ||
6495 		    ((afsr_errs & ~C_AFSR_L3_EDU) == 0)) {
6496 			return (ECACHE_FLUSH_LINE);
6497 		} else {
6498 			return (ECACHE_FLUSH_ALL);
6499 		}
6500 	}
6501 
6502 	/*
6503 	 * BERR: If BERR only is set, flush the Ecache line, otherwise
6504 	 * flush the entire Ecache.
6505 	 */
6506 	if (afsr_errs & C_AFSR_BERR) {
6507 		if ((afsr_errs & ~C_AFSR_BERR) == 0) {
6508 			return (ECACHE_FLUSH_LINE);
6509 		} else {
6510 			return (ECACHE_FLUSH_ALL);
6511 		}
6512 	}
6513 
6514 	return (0);
6515 }
6516 
6517 void
6518 cpu_error_ecache_flush(ch_async_flt_t *ch_flt)
6519 {
6520 	int	ecache_flush_flag =
6521 	    cpu_error_ecache_flush_required(ch_flt);
6522 
6523 	/*
6524 	 * Flush Ecache line or entire Ecache based on above checks.
6525 	 */
6526 	if (ecache_flush_flag == ECACHE_FLUSH_ALL)
6527 		cpu_flush_ecache();
6528 	else if (ecache_flush_flag == ECACHE_FLUSH_LINE) {
6529 		cpu_flush_ecache_line(ch_flt);
6530 	}
6531 
6532 }
6533 
6534 /*
6535  * Extract the PA portion from the E$ tag.
6536  */
6537 uint64_t
6538 cpu_ectag_to_pa(int setsize, uint64_t tag)
6539 {
6540 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6541 		return (JG_ECTAG_TO_PA(setsize, tag));
6542 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6543 		return (PN_L3TAG_TO_PA(tag));
6544 	else
6545 		return (CH_ECTAG_TO_PA(setsize, tag));
6546 }
6547 
6548 /*
6549  * Convert the E$ tag PA into an E$ subblock index.
6550  */
6551 int
6552 cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr)
6553 {
6554 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6555 		return (JG_ECTAG_PA_TO_SUBBLK(cachesize, subaddr));
6556 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6557 		/* Panther has only one subblock per line */
6558 		return (0);
6559 	else
6560 		return (CH_ECTAG_PA_TO_SUBBLK(cachesize, subaddr));
6561 }
6562 
6563 /*
6564  * All subblocks in an E$ line must be invalid for
6565  * the line to be invalid.
6566  */
6567 int
6568 cpu_ectag_line_invalid(int cachesize, uint64_t tag)
6569 {
6570 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6571 		return (JG_ECTAG_LINE_INVALID(cachesize, tag));
6572 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6573 		return (PN_L3_LINE_INVALID(tag));
6574 	else
6575 		return (CH_ECTAG_LINE_INVALID(cachesize, tag));
6576 }
6577 
6578 /*
6579  * Extract state bits for a subblock given the tag.  Note that for Panther
6580  * this works on both l2 and l3 tags.
6581  */
6582 int
6583 cpu_ectag_pa_to_subblk_state(int cachesize, uint64_t subaddr, uint64_t tag)
6584 {
6585 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6586 		return (JG_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag));
6587 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6588 		return (tag & CH_ECSTATE_MASK);
6589 	else
6590 		return (CH_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag));
6591 }
6592 
6593 /*
6594  * Cpu specific initialization.
6595  */
6596 void
6597 cpu_mp_init(void)
6598 {
6599 #ifdef	CHEETAHPLUS_ERRATUM_25
6600 	if (cheetah_sendmondo_recover) {
6601 		cheetah_nudge_init();
6602 	}
6603 #endif
6604 }
6605 
6606 void
6607 cpu_ereport_post(struct async_flt *aflt)
6608 {
6609 	char *cpu_type, buf[FM_MAX_CLASS];
6610 	nv_alloc_t *nva = NULL;
6611 	nvlist_t *ereport, *detector, *resource;
6612 	errorq_elem_t *eqep;
6613 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
6614 	char unum[UNUM_NAMLEN];
6615 	int synd_code;
6616 	uint8_t msg_type;
6617 	plat_ecc_ch_async_flt_t	plat_ecc_ch_flt;
6618 
6619 	if (aflt->flt_panic || panicstr) {
6620 		eqep = errorq_reserve(ereport_errorq);
6621 		if (eqep == NULL)
6622 			return;
6623 		ereport = errorq_elem_nvl(ereport_errorq, eqep);
6624 		nva = errorq_elem_nva(ereport_errorq, eqep);
6625 	} else {
6626 		ereport = fm_nvlist_create(nva);
6627 	}
6628 
6629 	/*
6630 	 * Create the scheme "cpu" FMRI.
6631 	 */
6632 	detector = fm_nvlist_create(nva);
6633 	resource = fm_nvlist_create(nva);
6634 	switch (cpunodes[aflt->flt_inst].implementation) {
6635 	case CHEETAH_IMPL:
6636 		cpu_type = FM_EREPORT_CPU_USIII;
6637 		break;
6638 	case CHEETAH_PLUS_IMPL:
6639 		cpu_type = FM_EREPORT_CPU_USIIIplus;
6640 		break;
6641 	case JALAPENO_IMPL:
6642 		cpu_type = FM_EREPORT_CPU_USIIIi;
6643 		break;
6644 	case SERRANO_IMPL:
6645 		cpu_type = FM_EREPORT_CPU_USIIIiplus;
6646 		break;
6647 	case JAGUAR_IMPL:
6648 		cpu_type = FM_EREPORT_CPU_USIV;
6649 		break;
6650 	case PANTHER_IMPL:
6651 		cpu_type = FM_EREPORT_CPU_USIVplus;
6652 		break;
6653 	default:
6654 		cpu_type = FM_EREPORT_CPU_UNSUPPORTED;
6655 		break;
6656 	}
6657 
6658 	cpu_fmri_cpu_set(detector, aflt->flt_inst);
6659 
6660 	/*
6661 	 * Encode all the common data into the ereport.
6662 	 */
6663 	(void) snprintf(buf, FM_MAX_CLASS, "%s.%s.%s",
6664 	    FM_ERROR_CPU, cpu_type, aflt->flt_erpt_class);
6665 
6666 	fm_ereport_set(ereport, FM_EREPORT_VERSION, buf,
6667 	    fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst, FM_ENA_FMT1),
6668 	    detector, NULL);
6669 
6670 	/*
6671 	 * Encode the error specific data that was saved in
6672 	 * the async_flt structure into the ereport.
6673 	 */
6674 	cpu_payload_add_aflt(aflt, ereport, resource,
6675 	    &plat_ecc_ch_flt.ecaf_afar_status,
6676 	    &plat_ecc_ch_flt.ecaf_synd_status);
6677 
6678 	if (aflt->flt_panic || panicstr) {
6679 		errorq_commit(ereport_errorq, eqep, ERRORQ_SYNC);
6680 	} else {
6681 		(void) fm_ereport_post(ereport, EVCH_TRYHARD);
6682 		fm_nvlist_destroy(ereport, FM_NVA_FREE);
6683 		fm_nvlist_destroy(detector, FM_NVA_FREE);
6684 		fm_nvlist_destroy(resource, FM_NVA_FREE);
6685 	}
6686 	/*
6687 	 * Send the enhanced error information (plat_ecc_error2_data_t)
6688 	 * to the SC olny if it can process it.
6689 	 */
6690 
6691 	if (&plat_ecc_capability_sc_get &&
6692 	    plat_ecc_capability_sc_get(PLAT_ECC_ERROR2_MESSAGE)) {
6693 		msg_type = cpu_flt_bit_to_plat_error(aflt);
6694 		if (msg_type != PLAT_ECC_ERROR2_NONE) {
6695 			/*
6696 			 * If afar status is not invalid do a unum lookup.
6697 			 */
6698 			if (plat_ecc_ch_flt.ecaf_afar_status !=
6699 			    AFLT_STAT_INVALID) {
6700 				synd_code = synd_to_synd_code(
6701 				    plat_ecc_ch_flt.ecaf_synd_status,
6702 				    aflt->flt_synd, ch_flt->flt_bit);
6703 				(void) cpu_get_mem_unum_synd(synd_code,
6704 				    aflt, unum);
6705 			} else {
6706 				unum[0] = '\0';
6707 			}
6708 			plat_ecc_ch_flt.ecaf_sdw_afar = ch_flt->flt_sdw_afar;
6709 			plat_ecc_ch_flt.ecaf_sdw_afsr = ch_flt->flt_sdw_afsr;
6710 			plat_ecc_ch_flt.ecaf_afsr_ext = ch_flt->afsr_ext;
6711 			plat_ecc_ch_flt.ecaf_sdw_afsr_ext =
6712 			    ch_flt->flt_sdw_afsr_ext;
6713 
6714 			if (&plat_log_fruid_error2)
6715 				plat_log_fruid_error2(msg_type, unum, aflt,
6716 				    &plat_ecc_ch_flt);
6717 		}
6718 	}
6719 }
6720 
6721 void
6722 cpu_run_bus_error_handlers(struct async_flt *aflt, int expected)
6723 {
6724 	int status;
6725 	ddi_fm_error_t de;
6726 
6727 	bzero(&de, sizeof (ddi_fm_error_t));
6728 
6729 	de.fme_version = DDI_FME_VERSION;
6730 	de.fme_ena = fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst,
6731 	    FM_ENA_FMT1);
6732 	de.fme_flag = expected;
6733 	de.fme_bus_specific = (void *)aflt->flt_addr;
6734 	status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de);
6735 	if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL))
6736 		aflt->flt_panic = 1;
6737 }
6738 
6739 void
6740 cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz,
6741     errorq_t *eqp, uint_t flag)
6742 {
6743 	struct async_flt *aflt = (struct async_flt *)payload;
6744 
6745 	aflt->flt_erpt_class = error_class;
6746 	errorq_dispatch(eqp, payload, payload_sz, flag);
6747 }
6748 
6749 /*
6750  * This routine may be called by the IO module, but does not do
6751  * anything in this cpu module. The SERD algorithm is handled by
6752  * cpumem-diagnosis engine instead.
6753  */
6754 /*ARGSUSED*/
6755 void
6756 cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum)
6757 {}
6758 
6759 void
6760 adjust_hw_copy_limits(int ecache_size)
6761 {
6762 	/*
6763 	 * Set hw copy limits.
6764 	 *
6765 	 * /etc/system will be parsed later and can override one or more
6766 	 * of these settings.
6767 	 *
6768 	 * At this time, ecache size seems only mildly relevant.
6769 	 * We seem to run into issues with the d-cache and stalls
6770 	 * we see on misses.
6771 	 *
6772 	 * Cycle measurement indicates that 2 byte aligned copies fare
6773 	 * little better than doing things with VIS at around 512 bytes.
6774 	 * 4 byte aligned shows promise until around 1024 bytes. 8 Byte
6775 	 * aligned is faster whenever the source and destination data
6776 	 * in cache and the total size is less than 2 Kbytes.  The 2K
6777 	 * limit seems to be driven by the 2K write cache.
6778 	 * When more than 2K of copies are done in non-VIS mode, stores
6779 	 * backup in the write cache.  In VIS mode, the write cache is
6780 	 * bypassed, allowing faster cache-line writes aligned on cache
6781 	 * boundaries.
6782 	 *
6783 	 * In addition, in non-VIS mode, there is no prefetching, so
6784 	 * for larger copies, the advantage of prefetching to avoid even
6785 	 * occasional cache misses is enough to justify using the VIS code.
6786 	 *
6787 	 * During testing, it was discovered that netbench ran 3% slower
6788 	 * when hw_copy_limit_8 was 2K or larger.  Apparently for server
6789 	 * applications, data is only used once (copied to the output
6790 	 * buffer, then copied by the network device off the system).  Using
6791 	 * the VIS copy saves more L2 cache state.  Network copies are
6792 	 * around 1.3K to 1.5K in size for historical reasons.
6793 	 *
6794 	 * Therefore, a limit of 1K bytes will be used for the 8 byte
6795 	 * aligned copy even for large caches and 8 MB ecache.  The
6796 	 * infrastructure to allow different limits for different sized
6797 	 * caches is kept to allow further tuning in later releases.
6798 	 */
6799 
6800 	if (min_ecache_size == 0 && use_hw_bcopy) {
6801 		/*
6802 		 * First time through - should be before /etc/system
6803 		 * is read.
6804 		 * Could skip the checks for zero but this lets us
6805 		 * preserve any debugger rewrites.
6806 		 */
6807 		if (hw_copy_limit_1 == 0) {
6808 			hw_copy_limit_1 = VIS_COPY_THRESHOLD;
6809 			priv_hcl_1 = hw_copy_limit_1;
6810 		}
6811 		if (hw_copy_limit_2 == 0) {
6812 			hw_copy_limit_2 = 2 * VIS_COPY_THRESHOLD;
6813 			priv_hcl_2 = hw_copy_limit_2;
6814 		}
6815 		if (hw_copy_limit_4 == 0) {
6816 			hw_copy_limit_4 = 4 * VIS_COPY_THRESHOLD;
6817 			priv_hcl_4 = hw_copy_limit_4;
6818 		}
6819 		if (hw_copy_limit_8 == 0) {
6820 			hw_copy_limit_8 = 4 * VIS_COPY_THRESHOLD;
6821 			priv_hcl_8 = hw_copy_limit_8;
6822 		}
6823 		min_ecache_size = ecache_size;
6824 	} else {
6825 		/*
6826 		 * MP initialization. Called *after* /etc/system has
6827 		 * been parsed. One CPU has already been initialized.
6828 		 * Need to cater for /etc/system having scragged one
6829 		 * of our values.
6830 		 */
6831 		if (ecache_size == min_ecache_size) {
6832 			/*
6833 			 * Same size ecache. We do nothing unless we
6834 			 * have a pessimistic ecache setting. In that
6835 			 * case we become more optimistic (if the cache is
6836 			 * large enough).
6837 			 */
6838 			if (hw_copy_limit_8 == 4 * VIS_COPY_THRESHOLD) {
6839 				/*
6840 				 * Need to adjust hw_copy_limit* from our
6841 				 * pessimistic uniprocessor value to a more
6842 				 * optimistic UP value *iff* it hasn't been
6843 				 * reset.
6844 				 */
6845 				if ((ecache_size > 1048576) &&
6846 				    (priv_hcl_8 == hw_copy_limit_8)) {
6847 					if (ecache_size <= 2097152)
6848 						hw_copy_limit_8 = 4 *
6849 						    VIS_COPY_THRESHOLD;
6850 					else if (ecache_size <= 4194304)
6851 						hw_copy_limit_8 = 4 *
6852 						    VIS_COPY_THRESHOLD;
6853 					else
6854 						hw_copy_limit_8 = 4 *
6855 						    VIS_COPY_THRESHOLD;
6856 					priv_hcl_8 = hw_copy_limit_8;
6857 				}
6858 			}
6859 		} else if (ecache_size < min_ecache_size) {
6860 			/*
6861 			 * A different ecache size. Can this even happen?
6862 			 */
6863 			if (priv_hcl_8 == hw_copy_limit_8) {
6864 				/*
6865 				 * The previous value that we set
6866 				 * is unchanged (i.e., it hasn't been
6867 				 * scragged by /etc/system). Rewrite it.
6868 				 */
6869 				if (ecache_size <= 1048576)
6870 					hw_copy_limit_8 = 8 *
6871 					    VIS_COPY_THRESHOLD;
6872 				else if (ecache_size <= 2097152)
6873 					hw_copy_limit_8 = 8 *
6874 					    VIS_COPY_THRESHOLD;
6875 				else if (ecache_size <= 4194304)
6876 					hw_copy_limit_8 = 8 *
6877 					    VIS_COPY_THRESHOLD;
6878 				else
6879 					hw_copy_limit_8 = 10 *
6880 					    VIS_COPY_THRESHOLD;
6881 				priv_hcl_8 = hw_copy_limit_8;
6882 				min_ecache_size = ecache_size;
6883 			}
6884 		}
6885 	}
6886 }
6887 
6888 /*
6889  * Called from illegal instruction trap handler to see if we can attribute
6890  * the trap to a fpras check.
6891  */
6892 int
6893 fpras_chktrap(struct regs *rp)
6894 {
6895 	int op;
6896 	struct fpras_chkfngrp *cgp;
6897 	uintptr_t tpc = (uintptr_t)rp->r_pc;
6898 
6899 	if (fpras_chkfngrps == NULL)
6900 		return (0);
6901 
6902 	cgp = &fpras_chkfngrps[CPU->cpu_id];
6903 	for (op = 0; op < FPRAS_NCOPYOPS; ++op) {
6904 		if (tpc >= (uintptr_t)&cgp->fpras_fn[op].fpras_blk0 &&
6905 		    tpc < (uintptr_t)&cgp->fpras_fn[op].fpras_chkresult)
6906 			break;
6907 	}
6908 	if (op == FPRAS_NCOPYOPS)
6909 		return (0);
6910 
6911 	/*
6912 	 * This is an fpRAS failure caught through an illegal
6913 	 * instruction - trampoline.
6914 	 */
6915 	rp->r_pc = (uintptr_t)&cgp->fpras_fn[op].fpras_trampoline;
6916 	rp->r_npc = rp->r_pc + 4;
6917 	return (1);
6918 }
6919 
6920 /*
6921  * fpras_failure is called when a fpras check detects a bad calculation
6922  * result or an illegal instruction trap is attributed to an fpras
6923  * check.  In all cases we are still bound to CPU.
6924  */
6925 int
6926 fpras_failure(int op, int how)
6927 {
6928 	int use_hw_bcopy_orig, use_hw_bzero_orig;
6929 	uint_t hcl1_orig, hcl2_orig, hcl4_orig, hcl8_orig;
6930 	ch_async_flt_t ch_flt;
6931 	struct async_flt *aflt = (struct async_flt *)&ch_flt;
6932 	struct fpras_chkfn *sfp, *cfp;
6933 	uint32_t *sip, *cip;
6934 	int i;
6935 
6936 	/*
6937 	 * We're running on a sick CPU.  Avoid further FPU use at least for
6938 	 * the time in which we dispatch an ereport and (if applicable) panic.
6939 	 */
6940 	use_hw_bcopy_orig = use_hw_bcopy;
6941 	use_hw_bzero_orig = use_hw_bzero;
6942 	hcl1_orig = hw_copy_limit_1;
6943 	hcl2_orig = hw_copy_limit_2;
6944 	hcl4_orig = hw_copy_limit_4;
6945 	hcl8_orig = hw_copy_limit_8;
6946 	use_hw_bcopy = use_hw_bzero = 0;
6947 	hw_copy_limit_1 = hw_copy_limit_2 = hw_copy_limit_4 =
6948 	    hw_copy_limit_8 = 0;
6949 
6950 	bzero(&ch_flt, sizeof (ch_async_flt_t));
6951 	aflt->flt_id = gethrtime_waitfree();
6952 	aflt->flt_class = CPU_FAULT;
6953 	aflt->flt_inst = CPU->cpu_id;
6954 	aflt->flt_status = (how << 8) | op;
6955 	aflt->flt_payload = FM_EREPORT_PAYLOAD_FPU_HWCOPY;
6956 	ch_flt.flt_type = CPU_FPUERR;
6957 
6958 	/*
6959 	 * We must panic if the copy operation had no lofault protection -
6960 	 * ie, don't panic for copyin, copyout, kcopy and bcopy called
6961 	 * under on_fault and do panic for unprotected bcopy and hwblkpagecopy.
6962 	 */
6963 	aflt->flt_panic = (curthread->t_lofault == NULL);
6964 
6965 	/*
6966 	 * XOR the source instruction block with the copied instruction
6967 	 * block - this will show us which bit(s) are corrupted.
6968 	 */
6969 	sfp = (struct fpras_chkfn *)fpras_chkfn_type1;
6970 	cfp = &fpras_chkfngrps[CPU->cpu_id].fpras_fn[op];
6971 	if (op == FPRAS_BCOPY || op == FPRAS_COPYOUT) {
6972 		sip = &sfp->fpras_blk0[0];
6973 		cip = &cfp->fpras_blk0[0];
6974 	} else {
6975 		sip = &sfp->fpras_blk1[0];
6976 		cip = &cfp->fpras_blk1[0];
6977 	}
6978 	for (i = 0; i < 16; ++i, ++sip, ++cip)
6979 		ch_flt.flt_fpdata[i] = *sip ^ *cip;
6980 
6981 	cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_FPU_HWCOPY, (void *)&ch_flt,
6982 	    sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic);
6983 
6984 	if (aflt->flt_panic)
6985 		fm_panic("FPU failure on CPU %d", CPU->cpu_id);
6986 
6987 	/*
6988 	 * We get here for copyin/copyout and kcopy or bcopy where the
6989 	 * caller has used on_fault.  We will flag the error so that
6990 	 * the process may be killed  The trap_async_hwerr mechanism will
6991 	 * take appropriate further action (such as a reboot, contract
6992 	 * notification etc).  Since we may be continuing we will
6993 	 * restore the global hardware copy acceleration switches.
6994 	 *
6995 	 * When we return from this function to the copy function we want to
6996 	 * avoid potentially bad data being used, ie we want the affected
6997 	 * copy function to return an error.  The caller should therefore
6998 	 * invoke its lofault handler (which always exists for these functions)
6999 	 * which will return the appropriate error.
7000 	 */
7001 	ttolwp(curthread)->lwp_pcb.pcb_flags |= ASYNC_HWERR;
7002 	aston(curthread);
7003 
7004 	use_hw_bcopy = use_hw_bcopy_orig;
7005 	use_hw_bzero = use_hw_bzero_orig;
7006 	hw_copy_limit_1 = hcl1_orig;
7007 	hw_copy_limit_2 = hcl2_orig;
7008 	hw_copy_limit_4 = hcl4_orig;
7009 	hw_copy_limit_8 = hcl8_orig;
7010 
7011 	return (1);
7012 }
7013 
7014 #define	VIS_BLOCKSIZE		64
7015 
7016 int
7017 dtrace_blksuword32_err(uintptr_t addr, uint32_t *data)
7018 {
7019 	int ret, watched;
7020 
7021 	watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
7022 	ret = dtrace_blksuword32(addr, data, 0);
7023 	if (watched)
7024 		watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
7025 
7026 	return (ret);
7027 }
7028 
7029 /*
7030  * Called when a cpu enters the CPU_FAULTED state (by the cpu placing the
7031  * faulted cpu into that state).  Cross-trap to the faulted cpu to clear
7032  * CEEN from the EER to disable traps for further disrupting error types
7033  * on that cpu.  We could cross-call instead, but that has a larger
7034  * instruction and data footprint than cross-trapping, and the cpu is known
7035  * to be faulted.
7036  */
7037 
7038 void
7039 cpu_faulted_enter(struct cpu *cp)
7040 {
7041 	xt_one(cp->cpu_id, set_error_enable_tl1, EN_REG_CEEN, EER_SET_CLRBITS);
7042 }
7043 
7044 /*
7045  * Called when a cpu leaves the CPU_FAULTED state to return to one of
7046  * offline, spare, or online (by the cpu requesting this state change).
7047  * First we cross-call to clear the AFSR (and AFSR_EXT on Panther) of
7048  * disrupting error bits that have accumulated without trapping, then
7049  * we cross-trap to re-enable CEEN controlled traps.
7050  */
7051 void
7052 cpu_faulted_exit(struct cpu *cp)
7053 {
7054 	ch_cpu_errors_t cpu_error_regs;
7055 
7056 	cpu_error_regs.afsr = C_AFSR_CECC_ERRS;
7057 	if (IS_PANTHER(cpunodes[cp->cpu_id].implementation))
7058 		cpu_error_regs.afsr_ext &= C_AFSR_EXT_CECC_ERRS;
7059 	xc_one(cp->cpu_id, (xcfunc_t *)set_cpu_error_state,
7060 	    (uint64_t)&cpu_error_regs, 0);
7061 
7062 	xt_one(cp->cpu_id, set_error_enable_tl1, EN_REG_CEEN, EER_SET_SETBITS);
7063 }
7064 
7065 /*
7066  * Return 1 if the errors in ch_flt's AFSR are secondary errors caused by
7067  * the errors in the original AFSR, 0 otherwise.
7068  *
7069  * For all procs if the initial error was a BERR or TO, then it is possible
7070  * that we may have caused a secondary BERR or TO in the process of logging the
7071  * inital error via cpu_run_bus_error_handlers().  If this is the case then
7072  * if the request was protected then a panic is still not necessary, if not
7073  * protected then aft_panic is already set - so either way there's no need
7074  * to set aft_panic for the secondary error.
7075  *
7076  * For Cheetah and Jalapeno, if the original error was a UE which occurred on
7077  * a store merge, then the error handling code will call cpu_deferred_error().
7078  * When clear_errors() is called, it will determine that secondary errors have
7079  * occurred - in particular, the store merge also caused a EDU and WDU that
7080  * weren't discovered until this point.
7081  *
7082  * We do three checks to verify that we are in this case.  If we pass all three
7083  * checks, we return 1 to indicate that we should not panic.  If any unexpected
7084  * errors occur, we return 0.
7085  *
7086  * For Cheetah+ and derivative procs, the store merge causes a DUE, which is
7087  * handled in cpu_disrupting_errors().  Since this function is not even called
7088  * in the case we are interested in, we just return 0 for these processors.
7089  */
7090 /*ARGSUSED*/
7091 static int
7092 cpu_check_secondary_errors(ch_async_flt_t *ch_flt, uint64_t t_afsr_errs,
7093     uint64_t t_afar)
7094 {
7095 #if defined(CHEETAH_PLUS)
7096 #else	/* CHEETAH_PLUS */
7097 	struct async_flt *aflt = (struct async_flt *)ch_flt;
7098 #endif	/* CHEETAH_PLUS */
7099 
7100 	/*
7101 	 * Was the original error a BERR or TO and only a BERR or TO
7102 	 * (multiple errors are also OK)
7103 	 */
7104 	if ((t_afsr_errs & ~(C_AFSR_BERR | C_AFSR_TO | C_AFSR_ME)) == 0) {
7105 		/*
7106 		 * Is the new error a BERR or TO and only a BERR or TO
7107 		 * (multiple errors are also OK)
7108 		 */
7109 		if ((ch_flt->afsr_errs &
7110 		    ~(C_AFSR_BERR | C_AFSR_TO | C_AFSR_ME)) == 0)
7111 			return (1);
7112 	}
7113 
7114 #if defined(CHEETAH_PLUS)
7115 	return (0);
7116 #else	/* CHEETAH_PLUS */
7117 	/*
7118 	 * Now look for secondary effects of a UE on cheetah/jalapeno
7119 	 *
7120 	 * Check the original error was a UE, and only a UE.  Note that
7121 	 * the ME bit will cause us to fail this check.
7122 	 */
7123 	if (t_afsr_errs != C_AFSR_UE)
7124 		return (0);
7125 
7126 	/*
7127 	 * Check the secondary errors were exclusively an EDU and/or WDU.
7128 	 */
7129 	if ((ch_flt->afsr_errs & ~(C_AFSR_EDU|C_AFSR_WDU)) != 0)
7130 		return (0);
7131 
7132 	/*
7133 	 * Check the AFAR of the original error and secondary errors
7134 	 * match to the 64-byte boundary
7135 	 */
7136 	if (P2ALIGN(aflt->flt_addr, 64) != P2ALIGN(t_afar, 64))
7137 		return (0);
7138 
7139 	/*
7140 	 * We've passed all the checks, so it's a secondary error!
7141 	 */
7142 	return (1);
7143 #endif	/* CHEETAH_PLUS */
7144 }
7145 
7146 /*
7147  * Translate the flt_bit or flt_type into an error type.  First, flt_bit
7148  * is checked for any valid errors.  If found, the error type is
7149  * returned. If not found, the flt_type is checked for L1$ parity errors.
7150  */
7151 /*ARGSUSED*/
7152 static uint8_t
7153 cpu_flt_bit_to_plat_error(struct async_flt *aflt)
7154 {
7155 #if defined(JALAPENO)
7156 	/*
7157 	 * Currently, logging errors to the SC is not supported on Jalapeno
7158 	 */
7159 	return (PLAT_ECC_ERROR2_NONE);
7160 #else
7161 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
7162 
7163 	switch (ch_flt->flt_bit) {
7164 	case C_AFSR_CE:
7165 		return (PLAT_ECC_ERROR2_CE);
7166 	case C_AFSR_UCC:
7167 	case C_AFSR_EDC:
7168 	case C_AFSR_WDC:
7169 	case C_AFSR_CPC:
7170 		return (PLAT_ECC_ERROR2_L2_CE);
7171 	case C_AFSR_EMC:
7172 		return (PLAT_ECC_ERROR2_EMC);
7173 	case C_AFSR_IVC:
7174 		return (PLAT_ECC_ERROR2_IVC);
7175 	case C_AFSR_UE:
7176 		return (PLAT_ECC_ERROR2_UE);
7177 	case C_AFSR_UCU:
7178 	case C_AFSR_EDU:
7179 	case C_AFSR_WDU:
7180 	case C_AFSR_CPU:
7181 		return (PLAT_ECC_ERROR2_L2_UE);
7182 	case C_AFSR_IVU:
7183 		return (PLAT_ECC_ERROR2_IVU);
7184 	case C_AFSR_TO:
7185 		return (PLAT_ECC_ERROR2_TO);
7186 	case C_AFSR_BERR:
7187 		return (PLAT_ECC_ERROR2_BERR);
7188 #if defined(CHEETAH_PLUS)
7189 	case C_AFSR_L3_EDC:
7190 	case C_AFSR_L3_UCC:
7191 	case C_AFSR_L3_CPC:
7192 	case C_AFSR_L3_WDC:
7193 		return (PLAT_ECC_ERROR2_L3_CE);
7194 	case C_AFSR_IMC:
7195 		return (PLAT_ECC_ERROR2_IMC);
7196 	case C_AFSR_TSCE:
7197 		return (PLAT_ECC_ERROR2_L2_TSCE);
7198 	case C_AFSR_THCE:
7199 		return (PLAT_ECC_ERROR2_L2_THCE);
7200 	case C_AFSR_L3_MECC:
7201 		return (PLAT_ECC_ERROR2_L3_MECC);
7202 	case C_AFSR_L3_THCE:
7203 		return (PLAT_ECC_ERROR2_L3_THCE);
7204 	case C_AFSR_L3_CPU:
7205 	case C_AFSR_L3_EDU:
7206 	case C_AFSR_L3_UCU:
7207 	case C_AFSR_L3_WDU:
7208 		return (PLAT_ECC_ERROR2_L3_UE);
7209 	case C_AFSR_DUE:
7210 		return (PLAT_ECC_ERROR2_DUE);
7211 	case C_AFSR_DTO:
7212 		return (PLAT_ECC_ERROR2_DTO);
7213 	case C_AFSR_DBERR:
7214 		return (PLAT_ECC_ERROR2_DBERR);
7215 #endif	/* CHEETAH_PLUS */
7216 	default:
7217 		switch (ch_flt->flt_type) {
7218 #if defined(CPU_IMP_L1_CACHE_PARITY)
7219 		case CPU_IC_PARITY:
7220 			return (PLAT_ECC_ERROR2_IPE);
7221 		case CPU_DC_PARITY:
7222 			if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
7223 				if (ch_flt->parity_data.dpe.cpl_cache ==
7224 				    CPU_PC_PARITY) {
7225 					return (PLAT_ECC_ERROR2_PCACHE);
7226 				}
7227 			}
7228 			return (PLAT_ECC_ERROR2_DPE);
7229 #endif /* CPU_IMP_L1_CACHE_PARITY */
7230 		case CPU_ITLB_PARITY:
7231 			return (PLAT_ECC_ERROR2_ITLB);
7232 		case CPU_DTLB_PARITY:
7233 			return (PLAT_ECC_ERROR2_DTLB);
7234 		default:
7235 			return (PLAT_ECC_ERROR2_NONE);
7236 		}
7237 	}
7238 #endif	/* JALAPENO */
7239 }
7240