xref: /linux/arch/parisc/kernel/pacache.S (revision 6ed7ffddcf61f668114edb676417e5fb33773b59)
1/*
2 *  PARISC TLB and cache flushing support
3 *  Copyright (C) 2000-2001 Hewlett-Packard (John Marvin)
4 *  Copyright (C) 2001 Matthew Wilcox (willy at parisc-linux.org)
5 *  Copyright (C) 2002 Richard Hirst (rhirst with parisc-linux.org)
6 *
7 *    This program is free software; you can redistribute it and/or modify
8 *    it under the terms of the GNU General Public License as published by
9 *    the Free Software Foundation; either version 2, or (at your option)
10 *    any later version.
11 *
12 *    This program is distributed in the hope that it will be useful,
13 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
14 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 *    GNU General Public License for more details.
16 *
17 *    You should have received a copy of the GNU General Public License
18 *    along with this program; if not, write to the Free Software
19 *    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
20 */
21
22/*
23 * NOTE: fdc,fic, and pdc instructions that use base register modification
24 *       should only use index and base registers that are not shadowed,
25 *       so that the fast path emulation in the non access miss handler
26 *       can be used.
27 */
28
29#ifdef CONFIG_64BIT
30	.level	2.0w
31#else
32	.level	2.0
33#endif
34
35#include <asm/psw.h>
36#include <asm/assembly.h>
37#include <asm/pgtable.h>
38#include <asm/cache.h>
39#include <linux/linkage.h>
40
41	.text
42	.align	128
43
44ENTRY(flush_tlb_all_local)
45	.proc
46	.callinfo NO_CALLS
47	.entry
48
49	/*
50	 * The pitlbe and pdtlbe instructions should only be used to
51	 * flush the entire tlb. Also, there needs to be no intervening
52	 * tlb operations, e.g. tlb misses, so the operation needs
53	 * to happen in real mode with all interruptions disabled.
54	 */
55
56	/* pcxt_ssm_bug	- relied upon translation! PA 2.0 Arch. F-4 and F-5 */
57	rsm		PSW_SM_I, %r19		/* save I-bit state */
58	load32		PA(1f), %r1
59	nop
60	nop
61	nop
62	nop
63	nop
64
65	rsm		PSW_SM_Q, %r0		/* prep to load iia queue */
66	mtctl		%r0, %cr17		/* Clear IIASQ tail */
67	mtctl		%r0, %cr17		/* Clear IIASQ head */
68	mtctl		%r1, %cr18		/* IIAOQ head */
69	ldo		4(%r1), %r1
70	mtctl		%r1, %cr18		/* IIAOQ tail */
71	load32		REAL_MODE_PSW, %r1
72	mtctl           %r1, %ipsw
73	rfi
74	nop
75
761:      load32		PA(cache_info), %r1
77
78	/* Flush Instruction Tlb */
79
80	LDREG		ITLB_SID_BASE(%r1), %r20
81	LDREG		ITLB_SID_STRIDE(%r1), %r21
82	LDREG		ITLB_SID_COUNT(%r1), %r22
83	LDREG		ITLB_OFF_BASE(%r1), %arg0
84	LDREG		ITLB_OFF_STRIDE(%r1), %arg1
85	LDREG		ITLB_OFF_COUNT(%r1), %arg2
86	LDREG		ITLB_LOOP(%r1), %arg3
87
88	addib,COND(=)		-1, %arg3, fitoneloop	/* Preadjust and test */
89	movb,<,n	%arg3, %r31, fitdone	/* If loop < 0, skip */
90	copy		%arg0, %r28		/* Init base addr */
91
92fitmanyloop:					/* Loop if LOOP >= 2 */
93	mtsp		%r20, %sr1
94	add		%r21, %r20, %r20	/* increment space */
95	copy		%arg2, %r29		/* Init middle loop count */
96
97fitmanymiddle:					/* Loop if LOOP >= 2 */
98	addib,COND(>)		-1, %r31, fitmanymiddle	/* Adjusted inner loop decr */
99	pitlbe		0(%sr1, %r28)
100	pitlbe,m	%arg1(%sr1, %r28)	/* Last pitlbe and addr adjust */
101	addib,COND(>)		-1, %r29, fitmanymiddle	/* Middle loop decr */
102	copy		%arg3, %r31		/* Re-init inner loop count */
103
104	movb,tr		%arg0, %r28, fitmanyloop /* Re-init base addr */
105	addib,COND(<=),n	-1, %r22, fitdone	/* Outer loop count decr */
106
107fitoneloop:					/* Loop if LOOP = 1 */
108	mtsp		%r20, %sr1
109	copy		%arg0, %r28		/* init base addr */
110	copy		%arg2, %r29		/* init middle loop count */
111
112fitonemiddle:					/* Loop if LOOP = 1 */
113	addib,COND(>)		-1, %r29, fitonemiddle	/* Middle loop count decr */
114	pitlbe,m	%arg1(%sr1, %r28)	/* pitlbe for one loop */
115
116	addib,COND(>)		-1, %r22, fitoneloop	/* Outer loop count decr */
117	add		%r21, %r20, %r20		/* increment space */
118
119fitdone:
120
121	/* Flush Data Tlb */
122
123	LDREG		DTLB_SID_BASE(%r1), %r20
124	LDREG		DTLB_SID_STRIDE(%r1), %r21
125	LDREG		DTLB_SID_COUNT(%r1), %r22
126	LDREG		DTLB_OFF_BASE(%r1), %arg0
127	LDREG		DTLB_OFF_STRIDE(%r1), %arg1
128	LDREG		DTLB_OFF_COUNT(%r1), %arg2
129	LDREG		DTLB_LOOP(%r1), %arg3
130
131	addib,COND(=)		-1, %arg3, fdtoneloop	/* Preadjust and test */
132	movb,<,n	%arg3, %r31, fdtdone	/* If loop < 0, skip */
133	copy		%arg0, %r28		/* Init base addr */
134
135fdtmanyloop:					/* Loop if LOOP >= 2 */
136	mtsp		%r20, %sr1
137	add		%r21, %r20, %r20	/* increment space */
138	copy		%arg2, %r29		/* Init middle loop count */
139
140fdtmanymiddle:					/* Loop if LOOP >= 2 */
141	addib,COND(>)		-1, %r31, fdtmanymiddle	/* Adjusted inner loop decr */
142	pdtlbe		0(%sr1, %r28)
143	pdtlbe,m	%arg1(%sr1, %r28)	/* Last pdtlbe and addr adjust */
144	addib,COND(>)		-1, %r29, fdtmanymiddle	/* Middle loop decr */
145	copy		%arg3, %r31		/* Re-init inner loop count */
146
147	movb,tr		%arg0, %r28, fdtmanyloop /* Re-init base addr */
148	addib,COND(<=),n	-1, %r22,fdtdone	/* Outer loop count decr */
149
150fdtoneloop:					/* Loop if LOOP = 1 */
151	mtsp		%r20, %sr1
152	copy		%arg0, %r28		/* init base addr */
153	copy		%arg2, %r29		/* init middle loop count */
154
155fdtonemiddle:					/* Loop if LOOP = 1 */
156	addib,COND(>)		-1, %r29, fdtonemiddle	/* Middle loop count decr */
157	pdtlbe,m	%arg1(%sr1, %r28)	/* pdtlbe for one loop */
158
159	addib,COND(>)		-1, %r22, fdtoneloop	/* Outer loop count decr */
160	add		%r21, %r20, %r20	/* increment space */
161
162
163fdtdone:
164	/*
165	 * Switch back to virtual mode
166	 */
167	/* pcxt_ssm_bug */
168	rsm		PSW_SM_I, %r0
169	load32		2f, %r1
170	nop
171	nop
172	nop
173	nop
174	nop
175
176	rsm		PSW_SM_Q, %r0		/* prep to load iia queue */
177	mtctl		%r0, %cr17		/* Clear IIASQ tail */
178	mtctl		%r0, %cr17		/* Clear IIASQ head */
179	mtctl		%r1, %cr18		/* IIAOQ head */
180	ldo		4(%r1), %r1
181	mtctl		%r1, %cr18		/* IIAOQ tail */
182	load32		KERNEL_PSW, %r1
183	or		%r1, %r19, %r1	/* I-bit to state on entry */
184	mtctl		%r1, %ipsw	/* restore I-bit (entire PSW) */
185	rfi
186	nop
187
1882:      bv		%r0(%r2)
189	nop
190
191	.exit
192	.procend
193ENDPROC(flush_tlb_all_local)
194
195	.import cache_info,data
196
197ENTRY(flush_instruction_cache_local)
198	.proc
199	.callinfo NO_CALLS
200	.entry
201
202	load32		cache_info, %r1
203
204	/* Flush Instruction Cache */
205
206	LDREG		ICACHE_BASE(%r1), %arg0
207	LDREG		ICACHE_STRIDE(%r1), %arg1
208	LDREG		ICACHE_COUNT(%r1), %arg2
209	LDREG		ICACHE_LOOP(%r1), %arg3
210	rsm		PSW_SM_I, %r22		/* No mmgt ops during loop*/
211	mtsp		%r0, %sr1
212	addib,COND(=)		-1, %arg3, fioneloop	/* Preadjust and test */
213	movb,<,n	%arg3, %r31, fisync	/* If loop < 0, do sync */
214
215fimanyloop:					/* Loop if LOOP >= 2 */
216	addib,COND(>)		-1, %r31, fimanyloop	/* Adjusted inner loop decr */
217	fice            %r0(%sr1, %arg0)
218	fice,m		%arg1(%sr1, %arg0)	/* Last fice and addr adjust */
219	movb,tr		%arg3, %r31, fimanyloop	/* Re-init inner loop count */
220	addib,COND(<=),n	-1, %arg2, fisync	/* Outer loop decr */
221
222fioneloop:					/* Loop if LOOP = 1 */
223	/* Some implementations may flush with a single fice instruction */
224	cmpib,COND(>>=),n	15, %arg2, fioneloop2
225
226fioneloop1:
227	fice,m		%arg1(%sr1, %arg0)
228	fice,m		%arg1(%sr1, %arg0)
229	fice,m		%arg1(%sr1, %arg0)
230	fice,m		%arg1(%sr1, %arg0)
231	fice,m		%arg1(%sr1, %arg0)
232	fice,m		%arg1(%sr1, %arg0)
233	fice,m		%arg1(%sr1, %arg0)
234	fice,m		%arg1(%sr1, %arg0)
235	fice,m		%arg1(%sr1, %arg0)
236	fice,m		%arg1(%sr1, %arg0)
237	fice,m		%arg1(%sr1, %arg0)
238	fice,m		%arg1(%sr1, %arg0)
239	fice,m		%arg1(%sr1, %arg0)
240	fice,m		%arg1(%sr1, %arg0)
241	fice,m		%arg1(%sr1, %arg0)
242	addib,COND(>)	-16, %arg2, fioneloop1
243	fice,m		%arg1(%sr1, %arg0)
244
245	/* Check if done */
246	cmpb,COND(=),n	%arg2, %r0, fisync	/* Predict branch taken */
247
248fioneloop2:
249	addib,COND(>)	-1, %arg2, fioneloop2	/* Outer loop count decr */
250	fice,m		%arg1(%sr1, %arg0)	/* Fice for one loop */
251
252fisync:
253	sync
254	mtsm		%r22			/* restore I-bit */
255	bv		%r0(%r2)
256	nop
257	.exit
258
259	.procend
260ENDPROC(flush_instruction_cache_local)
261
262
263	.import cache_info, data
264ENTRY(flush_data_cache_local)
265	.proc
266	.callinfo NO_CALLS
267	.entry
268
269	load32		cache_info, %r1
270
271	/* Flush Data Cache */
272
273	LDREG		DCACHE_BASE(%r1), %arg0
274	LDREG		DCACHE_STRIDE(%r1), %arg1
275	LDREG		DCACHE_COUNT(%r1), %arg2
276	LDREG		DCACHE_LOOP(%r1), %arg3
277	rsm		PSW_SM_I, %r22		/* No mmgt ops during loop*/
278	mtsp		%r0, %sr1
279	addib,COND(=)		-1, %arg3, fdoneloop	/* Preadjust and test */
280	movb,<,n	%arg3, %r31, fdsync	/* If loop < 0, do sync */
281
282fdmanyloop:					/* Loop if LOOP >= 2 */
283	addib,COND(>)		-1, %r31, fdmanyloop	/* Adjusted inner loop decr */
284	fdce		%r0(%sr1, %arg0)
285	fdce,m		%arg1(%sr1, %arg0)	/* Last fdce and addr adjust */
286	movb,tr		%arg3, %r31, fdmanyloop	/* Re-init inner loop count */
287	addib,COND(<=),n	-1, %arg2, fdsync	/* Outer loop decr */
288
289fdoneloop:					/* Loop if LOOP = 1 */
290	/* Some implementations may flush with a single fdce instruction */
291	cmpib,COND(>>=),n	15, %arg2, fdoneloop2
292
293fdoneloop1:
294	fdce,m		%arg1(%sr1, %arg0)
295	fdce,m		%arg1(%sr1, %arg0)
296	fdce,m		%arg1(%sr1, %arg0)
297	fdce,m		%arg1(%sr1, %arg0)
298	fdce,m		%arg1(%sr1, %arg0)
299	fdce,m		%arg1(%sr1, %arg0)
300	fdce,m		%arg1(%sr1, %arg0)
301	fdce,m		%arg1(%sr1, %arg0)
302	fdce,m		%arg1(%sr1, %arg0)
303	fdce,m		%arg1(%sr1, %arg0)
304	fdce,m		%arg1(%sr1, %arg0)
305	fdce,m		%arg1(%sr1, %arg0)
306	fdce,m		%arg1(%sr1, %arg0)
307	fdce,m		%arg1(%sr1, %arg0)
308	fdce,m		%arg1(%sr1, %arg0)
309	addib,COND(>)	-16, %arg2, fdoneloop1
310	fdce,m		%arg1(%sr1, %arg0)
311
312	/* Check if done */
313	cmpb,COND(=),n	%arg2, %r0, fdsync	/* Predict branch taken */
314
315fdoneloop2:
316	addib,COND(>)	-1, %arg2, fdoneloop2	/* Outer loop count decr */
317	fdce,m		%arg1(%sr1, %arg0)	/* Fdce for one loop */
318
319fdsync:
320	syncdma
321	sync
322	mtsm		%r22			/* restore I-bit */
323	bv		%r0(%r2)
324	nop
325	.exit
326
327	.procend
328ENDPROC(flush_data_cache_local)
329
330	.align	16
331
332/* Macros to serialize TLB purge operations on SMP.  */
333
334	.macro	tlb_lock	la,flags,tmp
335#ifdef CONFIG_SMP
336	ldil		L%pa_tlb_lock,%r1
337	ldo		R%pa_tlb_lock(%r1),\la
338	rsm		PSW_SM_I,\flags
3391:	LDCW		0(\la),\tmp
340	cmpib,<>,n	0,\tmp,3f
3412:	ldw		0(\la),\tmp
342	cmpb,<>		%r0,\tmp,1b
343	nop
344	b,n		2b
3453:
346#endif
347	.endm
348
349	.macro	tlb_unlock	la,flags,tmp
350#ifdef CONFIG_SMP
351	ldi		1,\tmp
352	stw		\tmp,0(\la)
353	mtsm		\flags
354#endif
355	.endm
356
357/* Clear page using kernel mapping.  */
358
359ENTRY(clear_page_asm)
360	.proc
361	.callinfo NO_CALLS
362	.entry
363
364#ifdef CONFIG_64BIT
365
366	/* Unroll the loop.  */
367	ldi		(PAGE_SIZE / 128), %r1
368
3691:
370	std		%r0, 0(%r26)
371	std		%r0, 8(%r26)
372	std		%r0, 16(%r26)
373	std		%r0, 24(%r26)
374	std		%r0, 32(%r26)
375	std		%r0, 40(%r26)
376	std		%r0, 48(%r26)
377	std		%r0, 56(%r26)
378	std		%r0, 64(%r26)
379	std		%r0, 72(%r26)
380	std		%r0, 80(%r26)
381	std		%r0, 88(%r26)
382	std		%r0, 96(%r26)
383	std		%r0, 104(%r26)
384	std		%r0, 112(%r26)
385	std		%r0, 120(%r26)
386
387	/* Note reverse branch hint for addib is taken.  */
388	addib,COND(>),n	-1, %r1, 1b
389	ldo		128(%r26), %r26
390
391#else
392
393	/*
394	 * Note that until (if) we start saving the full 64-bit register
395	 * values on interrupt, we can't use std on a 32 bit kernel.
396	 */
397	ldi		(PAGE_SIZE / 64), %r1
398
3991:
400	stw		%r0, 0(%r26)
401	stw		%r0, 4(%r26)
402	stw		%r0, 8(%r26)
403	stw		%r0, 12(%r26)
404	stw		%r0, 16(%r26)
405	stw		%r0, 20(%r26)
406	stw		%r0, 24(%r26)
407	stw		%r0, 28(%r26)
408	stw		%r0, 32(%r26)
409	stw		%r0, 36(%r26)
410	stw		%r0, 40(%r26)
411	stw		%r0, 44(%r26)
412	stw		%r0, 48(%r26)
413	stw		%r0, 52(%r26)
414	stw		%r0, 56(%r26)
415	stw		%r0, 60(%r26)
416
417	addib,COND(>),n	-1, %r1, 1b
418	ldo		64(%r26), %r26
419#endif
420	bv		%r0(%r2)
421	nop
422	.exit
423
424	.procend
425ENDPROC(clear_page_asm)
426
427/* Copy page using kernel mapping.  */
428
429ENTRY(copy_page_asm)
430	.proc
431	.callinfo NO_CALLS
432	.entry
433
434#ifdef CONFIG_64BIT
435	/* PA8x00 CPUs can consume 2 loads or 1 store per cycle.
436	 * Unroll the loop by hand and arrange insn appropriately.
437	 * Prefetch doesn't improve performance on rp3440.
438	 * GCC probably can do this just as well...
439	 */
440
441	ldi		(PAGE_SIZE / 128), %r1
442
4431:	ldd		0(%r25), %r19
444	ldd		8(%r25), %r20
445
446	ldd		16(%r25), %r21
447	ldd		24(%r25), %r22
448	std		%r19, 0(%r26)
449	std		%r20, 8(%r26)
450
451	ldd		32(%r25), %r19
452	ldd		40(%r25), %r20
453	std		%r21, 16(%r26)
454	std		%r22, 24(%r26)
455
456	ldd		48(%r25), %r21
457	ldd		56(%r25), %r22
458	std		%r19, 32(%r26)
459	std		%r20, 40(%r26)
460
461	ldd		64(%r25), %r19
462	ldd		72(%r25), %r20
463	std		%r21, 48(%r26)
464	std		%r22, 56(%r26)
465
466	ldd		80(%r25), %r21
467	ldd		88(%r25), %r22
468	std		%r19, 64(%r26)
469	std		%r20, 72(%r26)
470
471	ldd		 96(%r25), %r19
472	ldd		104(%r25), %r20
473	std		%r21, 80(%r26)
474	std		%r22, 88(%r26)
475
476	ldd		112(%r25), %r21
477	ldd		120(%r25), %r22
478	ldo		128(%r25), %r25
479	std		%r19, 96(%r26)
480	std		%r20, 104(%r26)
481
482	std		%r21, 112(%r26)
483	std		%r22, 120(%r26)
484
485	/* Note reverse branch hint for addib is taken.  */
486	addib,COND(>),n	-1, %r1, 1b
487	ldo		128(%r26), %r26
488
489#else
490
491	/*
492	 * This loop is optimized for PCXL/PCXL2 ldw/ldw and stw/stw
493	 * bundles (very restricted rules for bundling).
494	 * Note that until (if) we start saving
495	 * the full 64 bit register values on interrupt, we can't
496	 * use ldd/std on a 32 bit kernel.
497	 */
498	ldw		0(%r25), %r19
499	ldi		(PAGE_SIZE / 64), %r1
500
5011:
502	ldw		4(%r25), %r20
503	ldw		8(%r25), %r21
504	ldw		12(%r25), %r22
505	stw		%r19, 0(%r26)
506	stw		%r20, 4(%r26)
507	stw		%r21, 8(%r26)
508	stw		%r22, 12(%r26)
509	ldw		16(%r25), %r19
510	ldw		20(%r25), %r20
511	ldw		24(%r25), %r21
512	ldw		28(%r25), %r22
513	stw		%r19, 16(%r26)
514	stw		%r20, 20(%r26)
515	stw		%r21, 24(%r26)
516	stw		%r22, 28(%r26)
517	ldw		32(%r25), %r19
518	ldw		36(%r25), %r20
519	ldw		40(%r25), %r21
520	ldw		44(%r25), %r22
521	stw		%r19, 32(%r26)
522	stw		%r20, 36(%r26)
523	stw		%r21, 40(%r26)
524	stw		%r22, 44(%r26)
525	ldw		48(%r25), %r19
526	ldw		52(%r25), %r20
527	ldw		56(%r25), %r21
528	ldw		60(%r25), %r22
529	stw		%r19, 48(%r26)
530	stw		%r20, 52(%r26)
531	ldo		64(%r25), %r25
532	stw		%r21, 56(%r26)
533	stw		%r22, 60(%r26)
534	ldo		64(%r26), %r26
535	addib,COND(>),n	-1, %r1, 1b
536	ldw		0(%r25), %r19
537#endif
538	bv		%r0(%r2)
539	nop
540	.exit
541
542	.procend
543ENDPROC(copy_page_asm)
544
545/*
546 * NOTE: Code in clear_user_page has a hard coded dependency on the
547 *       maximum alias boundary being 4 Mb. We've been assured by the
548 *       parisc chip designers that there will not ever be a parisc
549 *       chip with a larger alias boundary (Never say never :-) ).
550 *
551 *       Subtle: the dtlb miss handlers support the temp alias region by
552 *       "knowing" that if a dtlb miss happens within the temp alias
553 *       region it must have occurred while in clear_user_page. Since
554 *       this routine makes use of processor local translations, we
555 *       don't want to insert them into the kernel page table. Instead,
556 *       we load up some general registers (they need to be registers
557 *       which aren't shadowed) with the physical page numbers (preshifted
558 *       for tlb insertion) needed to insert the translations. When we
559 *       miss on the translation, the dtlb miss handler inserts the
560 *       translation into the tlb using these values:
561 *
562 *          %r26 physical page (shifted for tlb insert) of "to" translation
563 *          %r23 physical page (shifted for tlb insert) of "from" translation
564 */
565
566	/*
567	 * We can't do this since copy_user_page is used to bring in
568	 * file data that might have instructions. Since the data would
569	 * then need to be flushed out so the i-fetch can see it, it
570	 * makes more sense to just copy through the kernel translation
571	 * and flush it.
572	 *
573	 * I'm still keeping this around because it may be possible to
574	 * use it if more information is passed into copy_user_page().
575	 * Have to do some measurements to see if it is worthwhile to
576	 * lobby for such a change.
577	 *
578	 */
579
580ENTRY(copy_user_page_asm)
581	.proc
582	.callinfo NO_CALLS
583	.entry
584
585	/* Convert virtual `to' and `from' addresses to physical addresses.
586	   Move `from' physical address to non shadowed register.  */
587	ldil		L%(__PAGE_OFFSET), %r1
588	sub		%r26, %r1, %r26
589	sub		%r25, %r1, %r23
590
591	ldil		L%(TMPALIAS_MAP_START), %r28
592	/* FIXME for different page sizes != 4k */
593#ifdef CONFIG_64BIT
594#if (TMPALIAS_MAP_START >= 0x80000000)
595	depdi		0, 31,32, %r28		/* clear any sign extension */
596#endif
597	extrd,u		%r26,56,32, %r26	/* convert phys addr to tlb insert format */
598	extrd,u		%r23,56,32, %r23	/* convert phys addr to tlb insert format */
599	depd		%r24,63,22, %r28	/* Form aliased virtual address 'to' */
600	depdi		0, 63,12, %r28		/* Clear any offset bits */
601	copy		%r28, %r29
602	depdi		1, 41,1, %r29		/* Form aliased virtual address 'from' */
603#else
604	extrw,u		%r26, 24,25, %r26	/* convert phys addr to tlb insert format */
605	extrw,u		%r23, 24,25, %r23	/* convert phys addr to tlb insert format */
606	depw		%r24, 31,22, %r28	/* Form aliased virtual address 'to' */
607	depwi		0, 31,12, %r28		/* Clear any offset bits */
608	copy		%r28, %r29
609	depwi		1, 9,1, %r29		/* Form aliased virtual address 'from' */
610#endif
611
612	/* Purge any old translations */
613
614#ifdef CONFIG_PA20
615	pdtlb,l		0(%r28)
616	pdtlb,l		0(%r29)
617#else
618	tlb_lock	%r20,%r21,%r22
619	pdtlb		0(%r28)
620	pdtlb		0(%r29)
621	tlb_unlock	%r20,%r21,%r22
622#endif
623
624#ifdef CONFIG_64BIT
625	/* PA8x00 CPUs can consume 2 loads or 1 store per cycle.
626	 * Unroll the loop by hand and arrange insn appropriately.
627	 * GCC probably can do this just as well.
628	 */
629
630	ldd		0(%r29), %r19
631	ldi		(PAGE_SIZE / 128), %r1
632
6331:	ldd		8(%r29), %r20
634
635	ldd		16(%r29), %r21
636	ldd		24(%r29), %r22
637	std		%r19, 0(%r28)
638	std		%r20, 8(%r28)
639
640	ldd		32(%r29), %r19
641	ldd		40(%r29), %r20
642	std		%r21, 16(%r28)
643	std		%r22, 24(%r28)
644
645	ldd		48(%r29), %r21
646	ldd		56(%r29), %r22
647	std		%r19, 32(%r28)
648	std		%r20, 40(%r28)
649
650	ldd		64(%r29), %r19
651	ldd		72(%r29), %r20
652	std		%r21, 48(%r28)
653	std		%r22, 56(%r28)
654
655	ldd		80(%r29), %r21
656	ldd		88(%r29), %r22
657	std		%r19, 64(%r28)
658	std		%r20, 72(%r28)
659
660	ldd		 96(%r29), %r19
661	ldd		104(%r29), %r20
662	std		%r21, 80(%r28)
663	std		%r22, 88(%r28)
664
665	ldd		112(%r29), %r21
666	ldd		120(%r29), %r22
667	std		%r19, 96(%r28)
668	std		%r20, 104(%r28)
669
670	ldo		128(%r29), %r29
671	std		%r21, 112(%r28)
672	std		%r22, 120(%r28)
673	ldo		128(%r28), %r28
674
675	/* conditional branches nullify on forward taken branch, and on
676	 * non-taken backward branch. Note that .+4 is a backwards branch.
677	 * The ldd should only get executed if the branch is taken.
678	 */
679	addib,COND(>),n	-1, %r1, 1b		/* bundle 10 */
680	ldd		0(%r29), %r19		/* start next loads */
681
682#else
683	ldi		(PAGE_SIZE / 64), %r1
684
685	/*
686	 * This loop is optimized for PCXL/PCXL2 ldw/ldw and stw/stw
687	 * bundles (very restricted rules for bundling). It probably
688	 * does OK on PCXU and better, but we could do better with
689	 * ldd/std instructions. Note that until (if) we start saving
690	 * the full 64 bit register values on interrupt, we can't
691	 * use ldd/std on a 32 bit kernel.
692	 */
693
6941:	ldw		0(%r29), %r19
695	ldw		4(%r29), %r20
696	ldw		8(%r29), %r21
697	ldw		12(%r29), %r22
698	stw		%r19, 0(%r28)
699	stw		%r20, 4(%r28)
700	stw		%r21, 8(%r28)
701	stw		%r22, 12(%r28)
702	ldw		16(%r29), %r19
703	ldw		20(%r29), %r20
704	ldw		24(%r29), %r21
705	ldw		28(%r29), %r22
706	stw		%r19, 16(%r28)
707	stw		%r20, 20(%r28)
708	stw		%r21, 24(%r28)
709	stw		%r22, 28(%r28)
710	ldw		32(%r29), %r19
711	ldw		36(%r29), %r20
712	ldw		40(%r29), %r21
713	ldw		44(%r29), %r22
714	stw		%r19, 32(%r28)
715	stw		%r20, 36(%r28)
716	stw		%r21, 40(%r28)
717	stw		%r22, 44(%r28)
718	ldw		48(%r29), %r19
719	ldw		52(%r29), %r20
720	ldw		56(%r29), %r21
721	ldw		60(%r29), %r22
722	stw		%r19, 48(%r28)
723	stw		%r20, 52(%r28)
724	stw		%r21, 56(%r28)
725	stw		%r22, 60(%r28)
726	ldo		64(%r28), %r28
727
728	addib,COND(>)		-1, %r1,1b
729	ldo		64(%r29), %r29
730#endif
731
732	bv		%r0(%r2)
733	nop
734	.exit
735
736	.procend
737ENDPROC(copy_user_page_asm)
738
739ENTRY(clear_user_page_asm)
740	.proc
741	.callinfo NO_CALLS
742	.entry
743
744	tophys_r1	%r26
745
746	ldil		L%(TMPALIAS_MAP_START), %r28
747#ifdef CONFIG_64BIT
748#if (TMPALIAS_MAP_START >= 0x80000000)
749	depdi		0, 31,32, %r28		/* clear any sign extension */
750	/* FIXME: page size dependend */
751#endif
752	extrd,u		%r26, 56,32, %r26	/* convert phys addr to tlb insert format */
753	depd		%r25, 63,22, %r28	/* Form aliased virtual address 'to' */
754	depdi		0, 63,12, %r28		/* Clear any offset bits */
755#else
756	extrw,u		%r26, 24,25, %r26	/* convert phys addr to tlb insert format */
757	depw		%r25, 31,22, %r28	/* Form aliased virtual address 'to' */
758	depwi		0, 31,12, %r28		/* Clear any offset bits */
759#endif
760
761	/* Purge any old translation */
762
763#ifdef CONFIG_PA20
764	pdtlb,l		0(%r28)
765#else
766	tlb_lock	%r20,%r21,%r22
767	pdtlb		0(%r28)
768	tlb_unlock	%r20,%r21,%r22
769#endif
770
771#ifdef CONFIG_64BIT
772	ldi		(PAGE_SIZE / 128), %r1
773
774	/* PREFETCH (Write) has not (yet) been proven to help here */
775	/* #define	PREFETCHW_OP	ldd		256(%0), %r0 */
776
7771:	std		%r0, 0(%r28)
778	std		%r0, 8(%r28)
779	std		%r0, 16(%r28)
780	std		%r0, 24(%r28)
781	std		%r0, 32(%r28)
782	std		%r0, 40(%r28)
783	std		%r0, 48(%r28)
784	std		%r0, 56(%r28)
785	std		%r0, 64(%r28)
786	std		%r0, 72(%r28)
787	std		%r0, 80(%r28)
788	std		%r0, 88(%r28)
789	std		%r0, 96(%r28)
790	std		%r0, 104(%r28)
791	std		%r0, 112(%r28)
792	std		%r0, 120(%r28)
793	addib,COND(>)		-1, %r1, 1b
794	ldo		128(%r28), %r28
795
796#else	/* ! CONFIG_64BIT */
797	ldi		(PAGE_SIZE / 64), %r1
798
7991:	stw		%r0, 0(%r28)
800	stw		%r0, 4(%r28)
801	stw		%r0, 8(%r28)
802	stw		%r0, 12(%r28)
803	stw		%r0, 16(%r28)
804	stw		%r0, 20(%r28)
805	stw		%r0, 24(%r28)
806	stw		%r0, 28(%r28)
807	stw		%r0, 32(%r28)
808	stw		%r0, 36(%r28)
809	stw		%r0, 40(%r28)
810	stw		%r0, 44(%r28)
811	stw		%r0, 48(%r28)
812	stw		%r0, 52(%r28)
813	stw		%r0, 56(%r28)
814	stw		%r0, 60(%r28)
815	addib,COND(>)		-1, %r1, 1b
816	ldo		64(%r28), %r28
817#endif	/* CONFIG_64BIT */
818
819	bv		%r0(%r2)
820	nop
821	.exit
822
823	.procend
824ENDPROC(clear_user_page_asm)
825
826ENTRY(flush_dcache_page_asm)
827	.proc
828	.callinfo NO_CALLS
829	.entry
830
831	ldil		L%(TMPALIAS_MAP_START), %r28
832#ifdef CONFIG_64BIT
833#if (TMPALIAS_MAP_START >= 0x80000000)
834	depdi		0, 31,32, %r28		/* clear any sign extension */
835	/* FIXME: page size dependend */
836#endif
837	extrd,u		%r26, 56,32, %r26	/* convert phys addr to tlb insert format */
838	depd		%r25, 63,22, %r28	/* Form aliased virtual address 'to' */
839	depdi		0, 63,12, %r28		/* Clear any offset bits */
840#else
841	extrw,u		%r26, 24,25, %r26	/* convert phys addr to tlb insert format */
842	depw		%r25, 31,22, %r28	/* Form aliased virtual address 'to' */
843	depwi		0, 31,12, %r28		/* Clear any offset bits */
844#endif
845
846	/* Purge any old translation */
847
848#ifdef CONFIG_PA20
849	pdtlb,l		0(%r28)
850#else
851	tlb_lock	%r20,%r21,%r22
852	pdtlb		0(%r28)
853	tlb_unlock	%r20,%r21,%r22
854#endif
855
856	ldil		L%dcache_stride, %r1
857	ldw		R%dcache_stride(%r1), %r1
858
859#ifdef CONFIG_64BIT
860	depdi,z		1, 63-PAGE_SHIFT,1, %r25
861#else
862	depwi,z		1, 31-PAGE_SHIFT,1, %r25
863#endif
864	add		%r28, %r25, %r25
865	sub		%r25, %r1, %r25
866
867
8681:      fdc,m		%r1(%r28)
869	fdc,m		%r1(%r28)
870	fdc,m		%r1(%r28)
871	fdc,m		%r1(%r28)
872	fdc,m		%r1(%r28)
873	fdc,m		%r1(%r28)
874	fdc,m		%r1(%r28)
875	fdc,m		%r1(%r28)
876	fdc,m		%r1(%r28)
877	fdc,m		%r1(%r28)
878	fdc,m		%r1(%r28)
879	fdc,m		%r1(%r28)
880	fdc,m		%r1(%r28)
881	fdc,m		%r1(%r28)
882	fdc,m		%r1(%r28)
883	cmpb,COND(<<)		%r28, %r25,1b
884	fdc,m		%r1(%r28)
885
886	sync
887
888#ifdef CONFIG_PA20
889	pdtlb,l		0(%r25)
890#else
891	tlb_lock	%r20,%r21,%r22
892	pdtlb		0(%r25)
893	tlb_unlock	%r20,%r21,%r22
894#endif
895
896	bv		%r0(%r2)
897	nop
898	.exit
899
900	.procend
901ENDPROC(flush_dcache_page_asm)
902
903ENTRY(flush_icache_page_asm)
904	.proc
905	.callinfo NO_CALLS
906	.entry
907
908	ldil		L%(TMPALIAS_MAP_START), %r28
909#ifdef CONFIG_64BIT
910#if (TMPALIAS_MAP_START >= 0x80000000)
911	depdi		0, 31,32, %r28		/* clear any sign extension */
912	/* FIXME: page size dependend */
913#endif
914	extrd,u		%r26, 56,32, %r26	/* convert phys addr to tlb insert format */
915	depd		%r25, 63,22, %r28	/* Form aliased virtual address 'to' */
916	depdi		0, 63,12, %r28		/* Clear any offset bits */
917#else
918	extrw,u		%r26, 24,25, %r26	/* convert phys addr to tlb insert format */
919	depw		%r25, 31,22, %r28	/* Form aliased virtual address 'to' */
920	depwi		0, 31,12, %r28		/* Clear any offset bits */
921#endif
922
923	/* Purge any old translation */
924
925#ifdef CONFIG_PA20
926	pitlb,l         %r0(%sr4,%r28)
927#else
928	tlb_lock        %r20,%r21,%r22
929	pitlb           (%sr4,%r28)
930	tlb_unlock      %r20,%r21,%r22
931#endif
932
933	ldil		L%icache_stride, %r1
934	ldw		R%icache_stride(%r1), %r1
935
936#ifdef CONFIG_64BIT
937	depdi,z		1, 63-PAGE_SHIFT,1, %r25
938#else
939	depwi,z		1, 31-PAGE_SHIFT,1, %r25
940#endif
941	add		%r28, %r25, %r25
942	sub		%r25, %r1, %r25
943
944
945	/* fic only has the type 26 form on PA1.1, requiring an
946	 * explicit space specification, so use %sr4 */
9471:      fic,m		%r1(%sr4,%r28)
948	fic,m		%r1(%sr4,%r28)
949	fic,m		%r1(%sr4,%r28)
950	fic,m		%r1(%sr4,%r28)
951	fic,m		%r1(%sr4,%r28)
952	fic,m		%r1(%sr4,%r28)
953	fic,m		%r1(%sr4,%r28)
954	fic,m		%r1(%sr4,%r28)
955	fic,m		%r1(%sr4,%r28)
956	fic,m		%r1(%sr4,%r28)
957	fic,m		%r1(%sr4,%r28)
958	fic,m		%r1(%sr4,%r28)
959	fic,m		%r1(%sr4,%r28)
960	fic,m		%r1(%sr4,%r28)
961	fic,m		%r1(%sr4,%r28)
962	cmpb,COND(<<)		%r28, %r25,1b
963	fic,m		%r1(%sr4,%r28)
964
965	sync
966
967#ifdef CONFIG_PA20
968	pitlb,l         %r0(%sr4,%r25)
969#else
970	tlb_lock        %r20,%r21,%r22
971	pitlb           (%sr4,%r25)
972	tlb_unlock      %r20,%r21,%r22
973#endif
974
975	bv		%r0(%r2)
976	nop
977	.exit
978
979	.procend
980ENDPROC(flush_icache_page_asm)
981
982ENTRY(flush_kernel_dcache_page_asm)
983	.proc
984	.callinfo NO_CALLS
985	.entry
986
987	ldil		L%dcache_stride, %r1
988	ldw		R%dcache_stride(%r1), %r23
989
990#ifdef CONFIG_64BIT
991	depdi,z		1, 63-PAGE_SHIFT,1, %r25
992#else
993	depwi,z		1, 31-PAGE_SHIFT,1, %r25
994#endif
995	add		%r26, %r25, %r25
996	sub		%r25, %r23, %r25
997
998
9991:      fdc,m		%r23(%r26)
1000	fdc,m		%r23(%r26)
1001	fdc,m		%r23(%r26)
1002	fdc,m		%r23(%r26)
1003	fdc,m		%r23(%r26)
1004	fdc,m		%r23(%r26)
1005	fdc,m		%r23(%r26)
1006	fdc,m		%r23(%r26)
1007	fdc,m		%r23(%r26)
1008	fdc,m		%r23(%r26)
1009	fdc,m		%r23(%r26)
1010	fdc,m		%r23(%r26)
1011	fdc,m		%r23(%r26)
1012	fdc,m		%r23(%r26)
1013	fdc,m		%r23(%r26)
1014	cmpb,COND(<<)		%r26, %r25,1b
1015	fdc,m		%r23(%r26)
1016
1017	sync
1018	bv		%r0(%r2)
1019	nop
1020	.exit
1021
1022	.procend
1023ENDPROC(flush_kernel_dcache_page_asm)
1024
1025ENTRY(purge_kernel_dcache_page_asm)
1026	.proc
1027	.callinfo NO_CALLS
1028	.entry
1029
1030	ldil		L%dcache_stride, %r1
1031	ldw		R%dcache_stride(%r1), %r23
1032
1033#ifdef CONFIG_64BIT
1034	depdi,z		1, 63-PAGE_SHIFT,1, %r25
1035#else
1036	depwi,z		1, 31-PAGE_SHIFT,1, %r25
1037#endif
1038	add		%r26, %r25, %r25
1039	sub		%r25, %r23, %r25
1040
10411:      pdc,m		%r23(%r26)
1042	pdc,m		%r23(%r26)
1043	pdc,m		%r23(%r26)
1044	pdc,m		%r23(%r26)
1045	pdc,m		%r23(%r26)
1046	pdc,m		%r23(%r26)
1047	pdc,m		%r23(%r26)
1048	pdc,m		%r23(%r26)
1049	pdc,m		%r23(%r26)
1050	pdc,m		%r23(%r26)
1051	pdc,m		%r23(%r26)
1052	pdc,m		%r23(%r26)
1053	pdc,m		%r23(%r26)
1054	pdc,m		%r23(%r26)
1055	pdc,m		%r23(%r26)
1056	cmpb,COND(<<)		%r26, %r25, 1b
1057	pdc,m		%r23(%r26)
1058
1059	sync
1060	bv		%r0(%r2)
1061	nop
1062	.exit
1063
1064	.procend
1065ENDPROC(purge_kernel_dcache_page_asm)
1066
1067ENTRY(flush_user_dcache_range_asm)
1068	.proc
1069	.callinfo NO_CALLS
1070	.entry
1071
1072	ldil		L%dcache_stride, %r1
1073	ldw		R%dcache_stride(%r1), %r23
1074	ldo		-1(%r23), %r21
1075	ANDCM		%r26, %r21, %r26
1076
10771:      cmpb,COND(<<),n	%r26, %r25, 1b
1078	fdc,m		%r23(%sr3, %r26)
1079
1080	sync
1081	bv		%r0(%r2)
1082	nop
1083	.exit
1084
1085	.procend
1086ENDPROC(flush_user_dcache_range_asm)
1087
1088ENTRY(flush_kernel_dcache_range_asm)
1089	.proc
1090	.callinfo NO_CALLS
1091	.entry
1092
1093	ldil		L%dcache_stride, %r1
1094	ldw		R%dcache_stride(%r1), %r23
1095	ldo		-1(%r23), %r21
1096	ANDCM		%r26, %r21, %r26
1097
10981:      cmpb,COND(<<),n	%r26, %r25,1b
1099	fdc,m		%r23(%r26)
1100
1101	sync
1102	syncdma
1103	bv		%r0(%r2)
1104	nop
1105	.exit
1106
1107	.procend
1108ENDPROC(flush_kernel_dcache_range_asm)
1109
1110ENTRY(flush_user_icache_range_asm)
1111	.proc
1112	.callinfo NO_CALLS
1113	.entry
1114
1115	ldil		L%icache_stride, %r1
1116	ldw		R%icache_stride(%r1), %r23
1117	ldo		-1(%r23), %r21
1118	ANDCM		%r26, %r21, %r26
1119
11201:      cmpb,COND(<<),n	%r26, %r25,1b
1121	fic,m		%r23(%sr3, %r26)
1122
1123	sync
1124	bv		%r0(%r2)
1125	nop
1126	.exit
1127
1128	.procend
1129ENDPROC(flush_user_icache_range_asm)
1130
1131ENTRY(flush_kernel_icache_page)
1132	.proc
1133	.callinfo NO_CALLS
1134	.entry
1135
1136	ldil		L%icache_stride, %r1
1137	ldw		R%icache_stride(%r1), %r23
1138
1139#ifdef CONFIG_64BIT
1140	depdi,z		1, 63-PAGE_SHIFT,1, %r25
1141#else
1142	depwi,z		1, 31-PAGE_SHIFT,1, %r25
1143#endif
1144	add		%r26, %r25, %r25
1145	sub		%r25, %r23, %r25
1146
1147
11481:      fic,m		%r23(%sr4, %r26)
1149	fic,m		%r23(%sr4, %r26)
1150	fic,m		%r23(%sr4, %r26)
1151	fic,m		%r23(%sr4, %r26)
1152	fic,m		%r23(%sr4, %r26)
1153	fic,m		%r23(%sr4, %r26)
1154	fic,m		%r23(%sr4, %r26)
1155	fic,m		%r23(%sr4, %r26)
1156	fic,m		%r23(%sr4, %r26)
1157	fic,m		%r23(%sr4, %r26)
1158	fic,m		%r23(%sr4, %r26)
1159	fic,m		%r23(%sr4, %r26)
1160	fic,m		%r23(%sr4, %r26)
1161	fic,m		%r23(%sr4, %r26)
1162	fic,m		%r23(%sr4, %r26)
1163	cmpb,COND(<<)		%r26, %r25, 1b
1164	fic,m		%r23(%sr4, %r26)
1165
1166	sync
1167	bv		%r0(%r2)
1168	nop
1169	.exit
1170
1171	.procend
1172ENDPROC(flush_kernel_icache_page)
1173
1174ENTRY(flush_kernel_icache_range_asm)
1175	.proc
1176	.callinfo NO_CALLS
1177	.entry
1178
1179	ldil		L%icache_stride, %r1
1180	ldw		R%icache_stride(%r1), %r23
1181	ldo		-1(%r23), %r21
1182	ANDCM		%r26, %r21, %r26
1183
11841:      cmpb,COND(<<),n	%r26, %r25, 1b
1185	fic,m		%r23(%sr4, %r26)
1186
1187	sync
1188	bv		%r0(%r2)
1189	nop
1190	.exit
1191	.procend
1192ENDPROC(flush_kernel_icache_range_asm)
1193
1194	/* align should cover use of rfi in disable_sr_hashing_asm and
1195	 * srdis_done.
1196	 */
1197	.align	256
1198ENTRY(disable_sr_hashing_asm)
1199	.proc
1200	.callinfo NO_CALLS
1201	.entry
1202
1203	/*
1204	 * Switch to real mode
1205	 */
1206	/* pcxt_ssm_bug */
1207	rsm		PSW_SM_I, %r0
1208	load32		PA(1f), %r1
1209	nop
1210	nop
1211	nop
1212	nop
1213	nop
1214
1215	rsm		PSW_SM_Q, %r0		/* prep to load iia queue */
1216	mtctl		%r0, %cr17		/* Clear IIASQ tail */
1217	mtctl		%r0, %cr17		/* Clear IIASQ head */
1218	mtctl		%r1, %cr18		/* IIAOQ head */
1219	ldo		4(%r1), %r1
1220	mtctl		%r1, %cr18		/* IIAOQ tail */
1221	load32		REAL_MODE_PSW, %r1
1222	mtctl		%r1, %ipsw
1223	rfi
1224	nop
1225
12261:      cmpib,=,n	SRHASH_PCXST, %r26,srdis_pcxs
1227	cmpib,=,n	SRHASH_PCXL, %r26,srdis_pcxl
1228	cmpib,=,n	SRHASH_PA20, %r26,srdis_pa20
1229	b,n		srdis_done
1230
1231srdis_pcxs:
1232
1233	/* Disable Space Register Hashing for PCXS,PCXT,PCXT' */
1234
1235	.word		0x141c1a00		/* mfdiag %dr0, %r28 */
1236	.word		0x141c1a00		/* must issue twice */
1237	depwi		0,18,1, %r28		/* Clear DHE (dcache hash enable) */
1238	depwi		0,20,1, %r28		/* Clear IHE (icache hash enable) */
1239	.word		0x141c1600		/* mtdiag %r28, %dr0 */
1240	.word		0x141c1600		/* must issue twice */
1241	b,n		srdis_done
1242
1243srdis_pcxl:
1244
1245	/* Disable Space Register Hashing for PCXL */
1246
1247	.word		0x141c0600		/* mfdiag %dr0, %r28 */
1248	depwi           0,28,2, %r28		/* Clear DHASH_EN & IHASH_EN */
1249	.word		0x141c0240		/* mtdiag %r28, %dr0 */
1250	b,n		srdis_done
1251
1252srdis_pa20:
1253
1254	/* Disable Space Register Hashing for PCXU,PCXU+,PCXW,PCXW+,PCXW2 */
1255
1256	.word		0x144008bc		/* mfdiag %dr2, %r28 */
1257	depdi		0, 54,1, %r28		/* clear DIAG_SPHASH_ENAB (bit 54) */
1258	.word		0x145c1840		/* mtdiag %r28, %dr2 */
1259
1260
1261srdis_done:
1262	/* Switch back to virtual mode */
1263	rsm		PSW_SM_I, %r0		/* prep to load iia queue */
1264	load32 	   	2f, %r1
1265	nop
1266	nop
1267	nop
1268	nop
1269	nop
1270
1271	rsm		PSW_SM_Q, %r0		/* prep to load iia queue */
1272	mtctl		%r0, %cr17		/* Clear IIASQ tail */
1273	mtctl		%r0, %cr17		/* Clear IIASQ head */
1274	mtctl		%r1, %cr18		/* IIAOQ head */
1275	ldo		4(%r1), %r1
1276	mtctl		%r1, %cr18		/* IIAOQ tail */
1277	load32		KERNEL_PSW, %r1
1278	mtctl		%r1, %ipsw
1279	rfi
1280	nop
1281
12822:      bv		%r0(%r2)
1283	nop
1284	.exit
1285
1286	.procend
1287ENDPROC(disable_sr_hashing_asm)
1288
1289	.end
1290