xref: /illumos-gate/usr/src/uts/sparc/v9/fpu/fpu.c (revision 7232236b5511ebe9b3343a5e1ab57a898e709218)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/param.h>
31 #include <sys/signal.h>
32 #include <sys/trap.h>
33 #include <sys/machtrap.h>
34 #include <sys/fault.h>
35 #include <sys/systm.h>
36 #include <sys/user.h>
37 #include <sys/file.h>
38 #include <sys/proc.h>
39 #include <sys/core.h>
40 #include <sys/pcb.h>
41 #include <sys/cpuvar.h>
42 #include <sys/thread.h>
43 #include <sys/disp.h>
44 #include <sys/stack.h>
45 #include <sys/cmn_err.h>
46 #include <sys/privregs.h>
47 #include <sys/debug.h>
48 
49 #include <sys/fpu/fpu_simulator.h>
50 #include <sys/fpu/globals.h>
51 #include <sys/fpu/fpusystm.h>
52 
53 int fpdispr = 0;
54 
55 /*
56  * For use by procfs to save the floating point context of the thread.
57  * Note the if (ttolwp(lwp) == curthread) in prstop, which calls
58  * this function, ensures that it is safe to read the fprs here.
59  */
60 void
61 fp_prsave(kfpu_t *fp)
62 {
63 	if ((fp->fpu_en) || (fp->fpu_fprs & FPRS_FEF))  {
64 		kpreempt_disable();
65 		if (fpu_exists) {
66 			fp->fpu_fprs = _fp_read_fprs();
67 			if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
68 				uint32_t fprs = (FPRS_FEF|FPRS_DU|FPRS_DL);
69 
70 				_fp_write_fprs(fprs);
71 				fp->fpu_fprs = fprs;
72 #ifdef DEBUG
73 				if (fpdispr)
74 					cmn_err(CE_NOTE,
75 					    "fp_prsave with fp disabled!");
76 #endif
77 			}
78 			fp_fksave(fp);
79 		}
80 		kpreempt_enable();
81 	}
82 }
83 
84 /*
85  * Copy the floating point context of the forked thread.
86  */
87 void
88 fp_fork(klwp_t *lwp, klwp_t *clwp)
89 {
90 	kfpu_t *cfp, *pfp;
91 	int i;
92 
93 	cfp = lwptofpu(clwp);
94 	pfp = lwptofpu(lwp);
95 
96 	/*
97 	 * copy the parents fpq
98 	 */
99 	cfp->fpu_qcnt = pfp->fpu_qcnt;
100 	for (i = 0; i < pfp->fpu_qcnt; i++)
101 		cfp->fpu_q[i] = pfp->fpu_q[i];
102 
103 	/*
104 	 * save the context of the parent into the childs fpu structure
105 	 */
106 	cfp->fpu_fprs = pfp->fpu_fprs;
107 	if (ttolwp(curthread) == lwp && fpu_exists) {
108 		fp_fksave(cfp);
109 	} else {
110 		for (i = 0; i < 32; i++)
111 			cfp->fpu_fr.fpu_regs[i] = pfp->fpu_fr.fpu_regs[i];
112 		for (i = 16; i < 32; i++)
113 			cfp->fpu_fr.fpu_dregs[i] = pfp->fpu_fr.fpu_dregs[i];
114 	}
115 	cfp->fpu_en = 1;
116 }
117 
118 /*
119  * Free any state associated with floating point context.
120  * Fp_free can be called in two cases:
121  * 1) from reaper -> thread_free -> lwp_freeregs -> fp_free
122  *	fp context belongs to a thread on deathrow
123  *	nothing to do,  thread will never be resumed
124  *	thread calling ctxfree is reaper
125  *
126  * 2) from exec -> lwp_freeregs -> fp_free
127  *	fp context belongs to the current thread
128  *	must disable fpu, thread calling ctxfree is curthread
129  */
130 /*ARGSUSED1*/
131 void
132 fp_free(kfpu_t *fp, int isexec)
133 {
134 	int s;
135 	uint32_t fprs = 0;
136 
137 	if (curthread->t_lwp != NULL && lwptofpu(curthread->t_lwp) == fp) {
138 		fp->fpu_en = 0;
139 		fp->fpu_fprs = fprs;
140 		s = splhigh();
141 		_fp_write_fprs(fprs);
142 		splx(s);
143 	}
144 }
145 
146 
147 #ifdef SF_ERRATA_30 /* call causes fp-disabled */
148 extern int spitfire_call_bug;
149 int ill_fpcalls;
150 #endif
151 
152 void
153 fp_enable(void)
154 {
155 	klwp_id_t lwp;
156 	kfpu_t *fp;
157 
158 	lwp = ttolwp(curthread);
159 	ASSERT(lwp != NULL);
160 	fp = lwptofpu(lwp);
161 
162 	if (fpu_exists) {
163 		if (fp->fpu_en) {
164 #ifdef DEBUG
165 			if (fpdispr)
166 				cmn_err(CE_NOTE,
167 				    "fpu disabled, but already enabled\n");
168 #endif
169 			if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
170 				fp->fpu_fprs = FPRS_FEF;
171 #ifdef DEBUG
172 				if (fpdispr)
173 					cmn_err(CE_NOTE,
174 					"fpu disabled, saved fprs disabled\n");
175 #endif
176 			}
177 			_fp_write_fprs(FPRS_FEF);
178 			fp_restore(fp);
179 		} else {
180 			fp->fpu_en = 1;
181 			fp->fpu_fsr = 0;
182 			fp->fpu_fprs = FPRS_FEF;
183 			_fp_write_fprs(FPRS_FEF);
184 			fp_clearregs(fp);
185 		}
186 	} else {
187 		int i;
188 
189 		if (!fp->fpu_en) {
190 			fp->fpu_en = 1;
191 			fp->fpu_fsr = 0;
192 			for (i = 0; i < 32; i++)
193 				fp->fpu_fr.fpu_regs[i] = (uint_t)-1; /* NaN */
194 			for (i = 16; i < 32; i++)		/* NaN */
195 				fp->fpu_fr.fpu_dregs[i] = (uint64_t)-1;
196 		}
197 	}
198 }
199 
200 /*
201  * fp_disabled normally occurs when the first floating point in a non-threaded
202  * program causes an fp_disabled trap. For threaded programs, the ILP32 threads
203  * library calls the .setpsr fasttrap, which has been modified to also set the
204  * appropriate bits in fpu_en and fpu_fprs, as well as to enable the %fprs,
205  * as before. The LP64 threads library will write to the %fprs directly,
206  * so fpu_en will never get updated for LP64 threaded programs,
207  * although fpu_fprs will, via resume.
208  */
209 void
210 fp_disabled(struct regs *rp)
211 {
212 	klwp_id_t lwp;
213 	kfpu_t *fp;
214 	int ftt;
215 
216 #ifdef SF_ERRATA_30 /* call causes fp-disabled */
217 	/*
218 	 * This code is here because sometimes the call instruction
219 	 * generates an fp_disabled trap when the call offset is large.
220 	 */
221 	if (spitfire_call_bug) {
222 		uint_t instr = 0;
223 		extern void trap(struct regs *rp, caddr_t addr, uint32_t type,
224 		    uint32_t mmu_fsr);
225 
226 		if (USERMODE(rp->r_tstate)) {
227 			(void) fuword32((void *)rp->r_pc, &instr);
228 		} else {
229 			instr = *(uint_t *)(rp->r_pc);
230 		}
231 		if ((instr & 0xc0000000) == 0x40000000) {
232 			ill_fpcalls++;
233 			trap(rp, NULL, T_UNIMP_INSTR, 0);
234 			return;
235 		}
236 	}
237 #endif /* SF_ERRATA_30 - call causes fp-disabled */
238 
239 #ifdef CHEETAH_ERRATUM_109 /* interrupts not taken during fpops */
240 	/*
241 	 * UltraSPARC III will report spurious fp-disabled exceptions when
242 	 * the pipe is full of fpops and an interrupt is triggered.  By the
243 	 * time we get here the interrupt has been taken and we just need
244 	 * to return to where we came from and try again.
245 	 */
246 	if (fpu_exists && _fp_read_fprs() & FPRS_FEF)
247 		return;
248 #endif /* CHEETAH_ERRATUM_109 */
249 
250 	lwp = ttolwp(curthread);
251 	ASSERT(lwp != NULL);
252 	fp = lwptofpu(lwp);
253 	if (fpu_exists) {
254 		kpreempt_disable();
255 		if (fp->fpu_en) {
256 #ifdef DEBUG
257 			if (fpdispr)
258 				cmn_err(CE_NOTE,
259 				    "fpu disabled, but already enabled\n");
260 #endif
261 			if ((fp->fpu_fprs & FPRS_FEF) != FPRS_FEF) {
262 				fp->fpu_fprs = FPRS_FEF;
263 #ifdef DEBUG
264 				if (fpdispr)
265 					cmn_err(CE_NOTE,
266 					"fpu disabled, saved fprs disabled\n");
267 #endif
268 			}
269 			_fp_write_fprs(FPRS_FEF);
270 			fp_restore(fp);
271 		} else {
272 			fp->fpu_en = 1;
273 			fp->fpu_fsr = 0;
274 			fp->fpu_fprs = FPRS_FEF;
275 			_fp_write_fprs(FPRS_FEF);
276 			fp_clearregs(fp);
277 		}
278 		kpreempt_enable();
279 	} else {
280 		fp_simd_type fpsd;
281 		int i;
282 
283 		(void) flush_user_windows_to_stack(NULL);
284 		if (!fp->fpu_en) {
285 			fp->fpu_en = 1;
286 			fp->fpu_fsr = 0;
287 			for (i = 0; i < 32; i++)
288 				fp->fpu_fr.fpu_regs[i] = (uint_t)-1; /* NaN */
289 			for (i = 16; i < 32; i++)		/* NaN */
290 				fp->fpu_fr.fpu_dregs[i] = (uint64_t)-1;
291 		}
292 		if (ftt = fp_emulator(&fpsd, (fp_inst_type *)rp->r_pc,
293 		    rp, (ulong_t *)rp->r_sp, fp)) {
294 			fp->fpu_q_entrysize = sizeof (struct fpq);
295 			fp_traps(&fpsd, ftt, rp);
296 		}
297 	}
298 }
299 
300 /*
301  * Process the floating point queue in lwp->lwp_pcb.
302  *
303  * Each entry in the floating point queue is processed in turn.
304  * If processing an entry results in an exception fp_traps() is called to
305  * handle the exception - this usually results in the generation of a signal
306  * to be delivered to the user. There are 2 possible outcomes to this (note
307  * that hardware generated signals cannot be held!):
308  *
309  *   1. If the signal is being ignored we continue to process the rest
310  *	of the entries in the queue.
311  *
312  *   2. If arrangements have been made for return to a user signal handler,
313  *	sendsig() will have copied the floating point queue onto the user's
314  *	signal stack and zero'ed the queue count in the u_pcb. Note that
315  *	this has the side effect of terminating fp_runq's processing loop.
316  *	We will re-run the floating point queue on return from the user
317  *	signal handler if necessary as part of normal setcontext processing.
318  */
319 void
320 fp_runq(struct regs *rp)
321 {
322 	kfpu_t *fp = lwptofpu(curthread->t_lwp);
323 	struct fq *fqp = fp->fpu_q;
324 	fp_simd_type fpsd;
325 	uint64_t gsr = get_gsr(fp);
326 
327 	/*
328 	 * don't preempt while manipulating the queue
329 	 */
330 	kpreempt_disable();
331 
332 	while (fp->fpu_qcnt) {
333 		int fptrap;
334 
335 		fptrap = fpu_simulator((fp_simd_type *)&fpsd,
336 		    (fp_inst_type *)fqp->FQu.fpq.fpq_addr,
337 		    (fsr_type *)&fp->fpu_fsr, gsr,
338 		    fqp->FQu.fpq.fpq_instr);
339 		if (fptrap) {
340 			/*
341 			 * Instruction could not be simulated so we will
342 			 * attempt to deliver a signal.
343 			 * We may be called again upon signal exit (setcontext)
344 			 * and can continue to process the queue then.
345 			 */
346 			if (fqp != fp->fpu_q) {
347 				int i;
348 				struct fq *fqdp;
349 
350 				/*
351 				 * We need to normalize the floating queue so
352 				 * the excepting instruction is at the head,
353 				 * so that the queue may be copied onto the
354 				 * user signal stack by sendsig().
355 				 */
356 				fqdp = fp->fpu_q;
357 				for (i = fp->fpu_qcnt; i; i--) {
358 					*fqdp++ = *fqp++;
359 				}
360 				fqp = fp->fpu_q;
361 			}
362 			fp->fpu_q_entrysize = sizeof (struct fpq);
363 
364 			/*
365 			 * fpu_simulator uses the fp registers directly but it
366 			 * uses the software copy of the fsr. We need to write
367 			 * that back to fpu so that fpu's state is current for
368 			 * ucontext.
369 			 */
370 			if (fpu_exists)
371 				_fp_write_pfsr(&fp->fpu_fsr);
372 
373 			/* post signal */
374 			fp_traps(&fpsd, fptrap, rp);
375 
376 			/*
377 			 * Break from loop to allow signal to be sent.
378 			 * If there are other instructions in the fp queue
379 			 * they will be processed when/if the user retuns
380 			 * from the signal handler with a non-empty queue.
381 			 */
382 			break;
383 		}
384 		fp->fpu_qcnt--;
385 		fqp++;
386 	}
387 
388 	/*
389 	 * fpu_simulator uses the fp registers directly, so we have
390 	 * to update the pcb copies to keep current, but it uses the
391 	 * software copy of the fsr, so we write that back to fpu
392 	 */
393 	if (fpu_exists) {
394 		int i;
395 
396 		for (i = 0; i < 32; i++)
397 			_fp_read_pfreg(&fp->fpu_fr.fpu_regs[i], i);
398 		for (i = 16; i < 32; i++)
399 			_fp_read_pdreg(&fp->fpu_fr.fpu_dregs[i], i);
400 		_fp_write_pfsr(&fp->fpu_fsr);
401 	}
402 
403 	kpreempt_enable();
404 }
405 
406 /*
407  * Get the precise trapped V9 floating point instruction.
408  * Fake up a queue to process. If getting the instruction results
409  * in an exception fp_traps() is called to handle the exception - this
410  * usually results in the generation of a signal to be delivered to the user.
411  */
412 
413 void
414 fp_precise(struct regs *rp)
415 {
416 	fp_simd_type	fpsd;
417 	int		inst_ftt;
418 
419 	union {
420 		uint_t		i;
421 		fp_inst_type	inst;
422 	} kluge;
423 
424 	klwp_t *lwp = ttolwp(curthread);
425 	kfpu_t *fp = lwptofpu(lwp);
426 	uint64_t gsr;
427 	int mstate;
428 	if (fpu_exists)
429 		save_gsr(fp);
430 	gsr = get_gsr(fp);
431 
432 	/*
433 	 * Get the instruction to be emulated from the pc saved by the trap.
434 	 * Note that the kernel is NOT prepared to handle a kernel fp
435 	 * exception if it can't pass successfully through the fp simulator.
436 	 *
437 	 * If the trap occurred in user mode, set lwp_state to LWP_SYS for the
438 	 * purposes of clock accounting and switch to the LMS_TRAP microstate.
439 	 */
440 	if (USERMODE(rp->r_tstate)) {
441 		inst_ftt = _fp_read_inst((uint32_t *)rp->r_pc, &kluge.i, &fpsd);
442 		mstate = new_mstate(curthread, LMS_TRAP);
443 		lwp->lwp_state = LWP_SYS;
444 	} else {
445 		kluge.i = *(uint_t *)rp->r_pc;
446 		inst_ftt = ftt_none;
447 	}
448 
449 	if (inst_ftt != ftt_none) {
450 		/*
451 		 * Save the bad address and post the signal.
452 		 * It can only be an ftt_alignment or ftt_fault trap.
453 		 * XXX - How can this work w/mainsail and do_unaligned?
454 		 */
455 		fpsd.fp_trapaddr = (caddr_t)rp->r_pc;
456 		fp_traps(&fpsd, inst_ftt, rp);
457 	} else {
458 		/*
459 		 * Conjure up a floating point queue and advance the pc/npc
460 		 * to fake a deferred fp trap. We now run the fp simulator
461 		 * in fp_precise, while allowing setfpregs to call fp_runq,
462 		 * because this allows us to do the ugly machinations to
463 		 * inc/dec the pc depending on the trap type, as per
464 		 * bugid 1210159. fp_runq is still going to have the
465 		 * generic "how do I connect the "fp queue to the pc/npc"
466 		 * problem alluded to in bugid 1192883, which is only a
467 		 * problem for a restorecontext of a v8 fp queue on a
468 		 * v9 system, which seems like the .000000001% case (on v9)!
469 		 */
470 		struct fpq *pfpq = &fp->fpu_q->FQu.fpq;
471 		fp_simd_type	fpsd;
472 		int fptrap;
473 
474 		pfpq->fpq_addr = (uint_t *)rp->r_pc;
475 		pfpq->fpq_instr = kluge.i;
476 		fp->fpu_qcnt = 1;
477 		fp->fpu_q_entrysize = sizeof (struct fpq);
478 
479 		kpreempt_disable();
480 		(void) flush_user_windows_to_stack(NULL);
481 		fptrap = fpu_vis_sim((fp_simd_type *)&fpsd,
482 		    (fp_inst_type *)pfpq->fpq_addr, rp,
483 		    (fsr_type *)&fp->fpu_fsr, gsr, kluge.i);
484 
485 		/* update the hardware fp fsr state for sake of ucontext */
486 		if (fpu_exists)
487 			_fp_write_pfsr(&fp->fpu_fsr);
488 
489 		if (fptrap) {
490 			/* back up the pc if the signal needs to be precise */
491 			if (fptrap != ftt_ieee) {
492 				fp->fpu_qcnt = 0;
493 			}
494 			/* post signal */
495 			fp_traps(&fpsd, fptrap, rp);
496 
497 			/* decrement queue count for ieee exceptions */
498 			if (fptrap == ftt_ieee) {
499 				fp->fpu_qcnt = 0;
500 			}
501 		} else {
502 			fp->fpu_qcnt = 0;
503 		}
504 		/* update the software pcb copies of hardware fp registers */
505 		if (fpu_exists) {
506 			fp_save(fp);
507 		}
508 		kpreempt_enable();
509 	}
510 
511 	/*
512 	 * Reset lwp_state to LWP_USER for the purposes of clock accounting,
513 	 * and restore the previously saved microstate.
514 	 */
515 	if (USERMODE(rp->r_tstate)) {
516 		(void) new_mstate(curthread, mstate);
517 		lwp->lwp_state = LWP_USER;
518 	}
519 }
520 
521 /*
522  * Handle floating point traps generated by simulation/emulation.
523  */
524 void
525 fp_traps(
526 	fp_simd_type *pfpsd,	/* Pointer to simulator data */
527 	enum ftt_type ftt,	/* trap type */
528 	struct regs *rp)	/* ptr to regs fro trap */
529 {
530 	/*
531 	 * If we take a user's exception in kernel mode, we want to trap
532 	 * with the user's registers.
533 	 */
534 	switch (ftt) {
535 	case ftt_ieee:
536 		fpu_trap(rp, pfpsd->fp_trapaddr, T_FP_EXCEPTION_IEEE,
537 		    pfpsd->fp_trapcode);
538 		break;
539 	case ftt_fault:
540 		fpu_trap(rp, pfpsd->fp_trapaddr, T_DATA_EXCEPTION, 0);
541 		break;
542 	case ftt_alignment:
543 		fpu_trap(rp, pfpsd->fp_trapaddr, T_ALIGNMENT, 0);
544 		break;
545 	case ftt_unimplemented:
546 		fpu_trap(rp, pfpsd->fp_trapaddr, T_UNIMP_INSTR, 0);
547 		break;
548 	default:
549 		/*
550 		 * We don't expect any of the other types here.
551 		 */
552 		cmn_err(CE_PANIC, "fp_traps: bad ftt");
553 	}
554 }
555