xref: /linux/arch/arm/mm/proc-xsc3.S (revision 33619f0d3ff715a2a5499520967d526ad931d70d)
1/*
2 * linux/arch/arm/mm/proc-xsc3.S
3 *
4 * Original Author: Matthew Gilbert
5 * Current Maintainer: Lennert Buytenhek <buytenh@wantstofly.org>
6 *
7 * Copyright 2004 (C) Intel Corp.
8 * Copyright 2005 (C) MontaVista Software, Inc.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2 as
12 * published by the Free Software Foundation.
13 *
14 * MMU functions for the Intel XScale3 Core (XSC3).  The XSC3 core is
15 * an extension to Intel's original XScale core that adds the following
16 * features:
17 *
18 * - ARMv6 Supersections
19 * - Low Locality Reference pages (replaces mini-cache)
20 * - 36-bit addressing
21 * - L2 cache
22 * - Cache coherency if chipset supports it
23 *
24 * Based on original XScale code by Nicolas Pitre.
25 */
26
27#include <linux/linkage.h>
28#include <linux/init.h>
29#include <asm/assembler.h>
30#include <asm/hwcap.h>
31#include <mach/hardware.h>
32#include <asm/pgtable.h>
33#include <asm/pgtable-hwdef.h>
34#include <asm/page.h>
35#include <asm/ptrace.h>
36#include "proc-macros.S"
37
38/*
39 * This is the maximum size of an area which will be flushed.  If the
40 * area is larger than this, then we flush the whole cache.
41 */
42#define MAX_AREA_SIZE	32768
43
44/*
45 * The cache line size of the L1 I, L1 D and unified L2 cache.
46 */
47#define CACHELINESIZE	32
48
49/*
50 * The size of the L1 D cache.
51 */
52#define CACHESIZE	32768
53
54/*
55 * This macro is used to wait for a CP15 write and is needed when we
56 * have to ensure that the last operation to the coprocessor was
57 * completed before continuing with operation.
58 */
59	.macro	cpwait_ret, lr, rd
60	mrc	p15, 0, \rd, c2, c0, 0		@ arbitrary read of cp15
61	sub	pc, \lr, \rd, LSR #32		@ wait for completion and
62						@ flush instruction pipeline
63	.endm
64
65/*
66 * This macro cleans and invalidates the entire L1 D cache.
67 */
68
69 	.macro  clean_d_cache rd, rs
70	mov	\rd, #0x1f00
71	orr	\rd, \rd, #0x00e0
721:	mcr	p15, 0, \rd, c7, c14, 2		@ clean/invalidate L1 D line
73	adds	\rd, \rd, #0x40000000
74	bcc	1b
75	subs	\rd, \rd, #0x20
76	bpl	1b
77	.endm
78
79	.text
80
81/*
82 * cpu_xsc3_proc_init()
83 *
84 * Nothing too exciting at the moment
85 */
86ENTRY(cpu_xsc3_proc_init)
87	mov	pc, lr
88
89/*
90 * cpu_xsc3_proc_fin()
91 */
92ENTRY(cpu_xsc3_proc_fin)
93	mrc	p15, 0, r0, c1, c0, 0		@ ctrl register
94	bic	r0, r0, #0x1800			@ ...IZ...........
95	bic	r0, r0, #0x0006			@ .............CA.
96	mcr	p15, 0, r0, c1, c0, 0		@ disable caches
97	mov	pc, lr
98
99/*
100 * cpu_xsc3_reset(loc)
101 *
102 * Perform a soft reset of the system.  Put the CPU into the
103 * same state as it would be if it had been reset, and branch
104 * to what would be the reset vector.
105 *
106 * loc: location to jump to for soft reset
107 */
108	.align	5
109ENTRY(cpu_xsc3_reset)
110	mov	r1, #PSR_F_BIT|PSR_I_BIT|SVC_MODE
111	msr	cpsr_c, r1			@ reset CPSR
112	mrc	p15, 0, r1, c1, c0, 0		@ ctrl register
113	bic	r1, r1, #0x3900			@ ..VIZ..S........
114	bic	r1, r1, #0x0086			@ ........B....CA.
115	mcr	p15, 0, r1, c1, c0, 0		@ ctrl register
116	mcr	p15, 0, ip, c7, c7, 0		@ invalidate L1 caches and BTB
117	bic	r1, r1, #0x0001			@ ...............M
118	mcr	p15, 0, r1, c1, c0, 0		@ ctrl register
119	@ CAUTION: MMU turned off from this point.  We count on the pipeline
120	@ already containing those two last instructions to survive.
121	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I and D TLBs
122	mov	pc, r0
123
124/*
125 * cpu_xsc3_do_idle()
126 *
127 * Cause the processor to idle
128 *
129 * For now we do nothing but go to idle mode for every case
130 *
131 * XScale supports clock switching, but using idle mode support
132 * allows external hardware to react to system state changes.
133 */
134	.align	5
135
136ENTRY(cpu_xsc3_do_idle)
137	mov	r0, #1
138	mcr	p14, 0, r0, c7, c0, 0		@ go to idle
139	mov	pc, lr
140
141/* ================================= CACHE ================================ */
142
143/*
144 *	flush_icache_all()
145 *
146 *	Unconditionally clean and invalidate the entire icache.
147 */
148ENTRY(xsc3_flush_icache_all)
149	mov	r0, #0
150	mcr	p15, 0, r0, c7, c5, 0		@ invalidate I cache
151	mov	pc, lr
152ENDPROC(xsc3_flush_icache_all)
153
154/*
155 *	flush_user_cache_all()
156 *
157 *	Invalidate all cache entries in a particular address
158 *	space.
159 */
160ENTRY(xsc3_flush_user_cache_all)
161	/* FALLTHROUGH */
162
163/*
164 *	flush_kern_cache_all()
165 *
166 *	Clean and invalidate the entire cache.
167 */
168ENTRY(xsc3_flush_kern_cache_all)
169	mov	r2, #VM_EXEC
170	mov	ip, #0
171__flush_whole_cache:
172	clean_d_cache r0, r1
173	tst	r2, #VM_EXEC
174	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate L1 I cache and BTB
175	mcrne	p15, 0, ip, c7, c10, 4		@ data write barrier
176	mcrne	p15, 0, ip, c7, c5, 4		@ prefetch flush
177	mov	pc, lr
178
179/*
180 *	flush_user_cache_range(start, end, vm_flags)
181 *
182 *	Invalidate a range of cache entries in the specified
183 *	address space.
184 *
185 *	- start - start address (may not be aligned)
186 *	- end	- end address (exclusive, may not be aligned)
187 *	- vma	- vma_area_struct describing address space
188 */
189	.align	5
190ENTRY(xsc3_flush_user_cache_range)
191	mov	ip, #0
192	sub	r3, r1, r0			@ calculate total size
193	cmp	r3, #MAX_AREA_SIZE
194	bhs	__flush_whole_cache
195
1961:	tst	r2, #VM_EXEC
197	mcrne	p15, 0, r0, c7, c5, 1		@ invalidate L1 I line
198	mcr	p15, 0, r0, c7, c14, 1		@ clean/invalidate L1 D line
199	add	r0, r0, #CACHELINESIZE
200	cmp	r0, r1
201	blo	1b
202	tst	r2, #VM_EXEC
203	mcrne	p15, 0, ip, c7, c5, 6		@ invalidate BTB
204	mcrne	p15, 0, ip, c7, c10, 4		@ data write barrier
205	mcrne	p15, 0, ip, c7, c5, 4		@ prefetch flush
206	mov	pc, lr
207
208/*
209 *	coherent_kern_range(start, end)
210 *
211 *	Ensure coherency between the I cache and the D cache in the
212 *	region described by start.  If you have non-snooping
213 *	Harvard caches, you need to implement this function.
214 *
215 *	- start  - virtual start address
216 *	- end	 - virtual end address
217 *
218 *	Note: single I-cache line invalidation isn't used here since
219 *	it also trashes the mini I-cache used by JTAG debuggers.
220 */
221ENTRY(xsc3_coherent_kern_range)
222/* FALLTHROUGH */
223ENTRY(xsc3_coherent_user_range)
224	bic	r0, r0, #CACHELINESIZE - 1
2251:	mcr	p15, 0, r0, c7, c10, 1		@ clean L1 D line
226	add	r0, r0, #CACHELINESIZE
227	cmp	r0, r1
228	blo	1b
229	mov	r0, #0
230	mcr	p15, 0, r0, c7, c5, 0		@ invalidate L1 I cache and BTB
231	mcr	p15, 0, r0, c7, c10, 4		@ data write barrier
232	mcr	p15, 0, r0, c7, c5, 4		@ prefetch flush
233	mov	pc, lr
234
235/*
236 *	flush_kern_dcache_area(void *addr, size_t size)
237 *
238 *	Ensure no D cache aliasing occurs, either with itself or
239 *	the I cache.
240 *
241 *	- addr	- kernel address
242 *	- size	- region size
243 */
244ENTRY(xsc3_flush_kern_dcache_area)
245	add	r1, r0, r1
2461:	mcr	p15, 0, r0, c7, c14, 1		@ clean/invalidate L1 D line
247	add	r0, r0, #CACHELINESIZE
248	cmp	r0, r1
249	blo	1b
250	mov	r0, #0
251	mcr	p15, 0, r0, c7, c5, 0		@ invalidate L1 I cache and BTB
252	mcr	p15, 0, r0, c7, c10, 4		@ data write barrier
253	mcr	p15, 0, r0, c7, c5, 4		@ prefetch flush
254	mov	pc, lr
255
256/*
257 *	dma_inv_range(start, end)
258 *
259 *	Invalidate (discard) the specified virtual address range.
260 *	May not write back any entries.  If 'start' or 'end'
261 *	are not cache line aligned, those lines must be written
262 *	back.
263 *
264 *	- start  - virtual start address
265 *	- end	 - virtual end address
266 */
267xsc3_dma_inv_range:
268	tst	r0, #CACHELINESIZE - 1
269	bic	r0, r0, #CACHELINESIZE - 1
270	mcrne	p15, 0, r0, c7, c10, 1		@ clean L1 D line
271	tst	r1, #CACHELINESIZE - 1
272	mcrne	p15, 0, r1, c7, c10, 1		@ clean L1 D line
2731:	mcr	p15, 0, r0, c7, c6, 1		@ invalidate L1 D line
274	add	r0, r0, #CACHELINESIZE
275	cmp	r0, r1
276	blo	1b
277	mcr	p15, 0, r0, c7, c10, 4		@ data write barrier
278	mov	pc, lr
279
280/*
281 *	dma_clean_range(start, end)
282 *
283 *	Clean the specified virtual address range.
284 *
285 *	- start  - virtual start address
286 *	- end	 - virtual end address
287 */
288xsc3_dma_clean_range:
289	bic	r0, r0, #CACHELINESIZE - 1
2901:	mcr	p15, 0, r0, c7, c10, 1		@ clean L1 D line
291	add	r0, r0, #CACHELINESIZE
292	cmp	r0, r1
293	blo	1b
294	mcr	p15, 0, r0, c7, c10, 4		@ data write barrier
295	mov	pc, lr
296
297/*
298 *	dma_flush_range(start, end)
299 *
300 *	Clean and invalidate the specified virtual address range.
301 *
302 *	- start  - virtual start address
303 *	- end	 - virtual end address
304 */
305ENTRY(xsc3_dma_flush_range)
306	bic	r0, r0, #CACHELINESIZE - 1
3071:	mcr	p15, 0, r0, c7, c14, 1		@ clean/invalidate L1 D line
308	add	r0, r0, #CACHELINESIZE
309	cmp	r0, r1
310	blo	1b
311	mcr	p15, 0, r0, c7, c10, 4		@ data write barrier
312	mov	pc, lr
313
314/*
315 *	dma_map_area(start, size, dir)
316 *	- start	- kernel virtual start address
317 *	- size	- size of region
318 *	- dir	- DMA direction
319 */
320ENTRY(xsc3_dma_map_area)
321	add	r1, r1, r0
322	cmp	r2, #DMA_TO_DEVICE
323	beq	xsc3_dma_clean_range
324	bcs	xsc3_dma_inv_range
325	b	xsc3_dma_flush_range
326ENDPROC(xsc3_dma_map_area)
327
328/*
329 *	dma_unmap_area(start, size, dir)
330 *	- start	- kernel virtual start address
331 *	- size	- size of region
332 *	- dir	- DMA direction
333 */
334ENTRY(xsc3_dma_unmap_area)
335	mov	pc, lr
336ENDPROC(xsc3_dma_unmap_area)
337
338ENTRY(xsc3_cache_fns)
339	.long	xsc3_flush_icache_all
340	.long	xsc3_flush_kern_cache_all
341	.long	xsc3_flush_user_cache_all
342	.long	xsc3_flush_user_cache_range
343	.long	xsc3_coherent_kern_range
344	.long	xsc3_coherent_user_range
345	.long	xsc3_flush_kern_dcache_area
346	.long	xsc3_dma_map_area
347	.long	xsc3_dma_unmap_area
348	.long	xsc3_dma_flush_range
349
350ENTRY(cpu_xsc3_dcache_clean_area)
3511:	mcr	p15, 0, r0, c7, c10, 1		@ clean L1 D line
352	add	r0, r0, #CACHELINESIZE
353	subs	r1, r1, #CACHELINESIZE
354	bhi	1b
355	mov	pc, lr
356
357/* =============================== PageTable ============================== */
358
359/*
360 * cpu_xsc3_switch_mm(pgd)
361 *
362 * Set the translation base pointer to be as described by pgd.
363 *
364 * pgd: new page tables
365 */
366	.align	5
367ENTRY(cpu_xsc3_switch_mm)
368	clean_d_cache r1, r2
369	mcr	p15, 0, ip, c7, c5, 0		@ invalidate L1 I cache and BTB
370	mcr	p15, 0, ip, c7, c10, 4		@ data write barrier
371	mcr	p15, 0, ip, c7, c5, 4		@ prefetch flush
372	orr	r0, r0, #0x18			@ cache the page table in L2
373	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
374	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I and D TLBs
375	cpwait_ret lr, ip
376
377/*
378 * cpu_xsc3_set_pte_ext(ptep, pte, ext)
379 *
380 * Set a PTE and flush it out
381 */
382cpu_xsc3_mt_table:
383	.long	0x00						@ L_PTE_MT_UNCACHED
384	.long	PTE_EXT_TEX(1)					@ L_PTE_MT_BUFFERABLE
385	.long	PTE_EXT_TEX(5) | PTE_CACHEABLE			@ L_PTE_MT_WRITETHROUGH
386	.long	PTE_CACHEABLE | PTE_BUFFERABLE			@ L_PTE_MT_WRITEBACK
387	.long	PTE_EXT_TEX(1) | PTE_BUFFERABLE			@ L_PTE_MT_DEV_SHARED
388	.long	0x00						@ unused
389	.long	0x00						@ L_PTE_MT_MINICACHE (not present)
390	.long	PTE_EXT_TEX(5) | PTE_CACHEABLE | PTE_BUFFERABLE	@ L_PTE_MT_WRITEALLOC (not present?)
391	.long	0x00						@ unused
392	.long	PTE_EXT_TEX(1)					@ L_PTE_MT_DEV_WC
393	.long	0x00						@ unused
394	.long	PTE_CACHEABLE | PTE_BUFFERABLE			@ L_PTE_MT_DEV_CACHED
395	.long	PTE_EXT_TEX(2)					@ L_PTE_MT_DEV_NONSHARED
396	.long	0x00						@ unused
397	.long	0x00						@ unused
398	.long	0x00						@ unused
399
400	.align	5
401ENTRY(cpu_xsc3_set_pte_ext)
402	xscale_set_pte_ext_prologue
403
404	tst	r1, #L_PTE_SHARED		@ shared?
405	and	r1, r1, #L_PTE_MT_MASK
406	adr	ip, cpu_xsc3_mt_table
407	ldr	ip, [ip, r1]
408	orrne	r2, r2, #PTE_EXT_COHERENT	@ interlock: mask in coherent bit
409	bic	r2, r2, #0x0c			@ clear old C,B bits
410	orr	r2, r2, ip
411
412	xscale_set_pte_ext_epilogue
413	mov	pc, lr
414
415	.ltorg
416
417	.align
418
419	__CPUINIT
420
421	.type	__xsc3_setup, #function
422__xsc3_setup:
423	mov	r0, #PSR_F_BIT|PSR_I_BIT|SVC_MODE
424	msr	cpsr_c, r0
425	mcr	p15, 0, ip, c7, c7, 0		@ invalidate L1 caches and BTB
426	mcr	p15, 0, ip, c7, c10, 4		@ data write barrier
427	mcr	p15, 0, ip, c7, c5, 4		@ prefetch flush
428	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I and D TLBs
429	orr	r4, r4, #0x18			@ cache the page table in L2
430	mcr	p15, 0, r4, c2, c0, 0		@ load page table pointer
431
432	mov	r0, #1 << 6			@ cp6 access for early sched_clock
433	mcr	p15, 0, r0, c15, c1, 0		@ write CP access register
434
435	mrc	p15, 0, r0, c1, c0, 1		@ get auxiliary control reg
436	and	r0, r0, #2			@ preserve bit P bit setting
437	orr	r0, r0, #(1 << 10)		@ enable L2 for LLR cache
438	mcr	p15, 0, r0, c1, c0, 1		@ set auxiliary control reg
439
440	adr	r5, xsc3_crval
441	ldmia	r5, {r5, r6}
442
443#ifdef CONFIG_CACHE_XSC3L2
444	mrc	p15, 1, r0, c0, c0, 1		@ get L2 present information
445	ands	r0, r0, #0xf8
446	orrne	r6, r6, #(1 << 26)		@ enable L2 if present
447#endif
448
449	mrc	p15, 0, r0, c1, c0, 0		@ get control register
450	bic	r0, r0, r5			@ ..V. ..R. .... ..A.
451	orr	r0, r0, r6			@ ..VI Z..S .... .C.M (mmu)
452						@ ...I Z..S .... .... (uc)
453	mov	pc, lr
454
455	.size	__xsc3_setup, . - __xsc3_setup
456
457	.type	xsc3_crval, #object
458xsc3_crval:
459	crval	clear=0x04002202, mmuset=0x00003905, ucset=0x00001900
460
461	__INITDATA
462
463/*
464 * Purpose : Function pointers used to access above functions - all calls
465 *	     come through these
466 */
467
468	.type	xsc3_processor_functions, #object
469ENTRY(xsc3_processor_functions)
470	.word	v5t_early_abort
471	.word	legacy_pabort
472	.word	cpu_xsc3_proc_init
473	.word	cpu_xsc3_proc_fin
474	.word	cpu_xsc3_reset
475	.word	cpu_xsc3_do_idle
476	.word	cpu_xsc3_dcache_clean_area
477	.word	cpu_xsc3_switch_mm
478	.word	cpu_xsc3_set_pte_ext
479	.size	xsc3_processor_functions, . - xsc3_processor_functions
480
481	.section ".rodata"
482
483	.type	cpu_arch_name, #object
484cpu_arch_name:
485	.asciz	"armv5te"
486	.size	cpu_arch_name, . - cpu_arch_name
487
488	.type	cpu_elf_name, #object
489cpu_elf_name:
490	.asciz	"v5"
491	.size	cpu_elf_name, . - cpu_elf_name
492
493	.type	cpu_xsc3_name, #object
494cpu_xsc3_name:
495	.asciz	"XScale-V3 based processor"
496	.size	cpu_xsc3_name, . - cpu_xsc3_name
497
498	.align
499
500	.section ".proc.info.init", #alloc, #execinstr
501
502	.type	__xsc3_proc_info,#object
503__xsc3_proc_info:
504	.long	0x69056000
505	.long	0xffffe000
506	.long	PMD_TYPE_SECT | \
507		PMD_SECT_BUFFERABLE | \
508		PMD_SECT_CACHEABLE | \
509		PMD_SECT_AP_WRITE | \
510		PMD_SECT_AP_READ
511	.long	PMD_TYPE_SECT | \
512		PMD_SECT_AP_WRITE | \
513		PMD_SECT_AP_READ
514	b	__xsc3_setup
515	.long	cpu_arch_name
516	.long	cpu_elf_name
517	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
518	.long	cpu_xsc3_name
519	.long	xsc3_processor_functions
520	.long	v4wbi_tlb_fns
521	.long	xsc3_mc_user_fns
522	.long	xsc3_cache_fns
523	.size	__xsc3_proc_info, . - __xsc3_proc_info
524
525/* Note: PXA935 changed its implementor ID from Intel to Marvell */
526
527	.type	__xsc3_pxa935_proc_info,#object
528__xsc3_pxa935_proc_info:
529	.long	0x56056000
530	.long	0xffffe000
531	.long	PMD_TYPE_SECT | \
532		PMD_SECT_BUFFERABLE | \
533		PMD_SECT_CACHEABLE | \
534		PMD_SECT_AP_WRITE | \
535		PMD_SECT_AP_READ
536	.long	PMD_TYPE_SECT | \
537		PMD_SECT_AP_WRITE | \
538		PMD_SECT_AP_READ
539	b	__xsc3_setup
540	.long	cpu_arch_name
541	.long	cpu_elf_name
542	.long	HWCAP_SWP|HWCAP_HALF|HWCAP_THUMB|HWCAP_FAST_MULT|HWCAP_EDSP
543	.long	cpu_xsc3_name
544	.long	xsc3_processor_functions
545	.long	v4wbi_tlb_fns
546	.long	xsc3_mc_user_fns
547	.long	xsc3_cache_fns
548	.size	__xsc3_pxa935_proc_info, . - __xsc3_pxa935_proc_info
549