xref: /linux/tools/testing/selftests/powerpc/primitives/word-at-a-time.h (revision a13d7201d7deedcbb6ac6efa94a1a7d34d3d79ec)
1 #ifndef _ASM_WORD_AT_A_TIME_H
2 #define _ASM_WORD_AT_A_TIME_H
3 
4 /*
5  * Word-at-a-time interfaces for PowerPC.
6  */
7 
8 #include <linux/kernel.h>
9 #include <asm/asm-compat.h>
10 
11 #ifdef __BIG_ENDIAN__
12 
13 struct word_at_a_time {
14 	const unsigned long high_bits, low_bits;
15 };
16 
17 #define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0xfe) + 1, REPEAT_BYTE(0x7f) }
18 
19 /* Bit set in the bytes that have a zero */
20 static inline long prep_zero_mask(unsigned long val, unsigned long rhs, const struct word_at_a_time *c)
21 {
22 	unsigned long mask = (val & c->low_bits) + c->low_bits;
23 	return ~(mask | rhs);
24 }
25 
26 #define create_zero_mask(mask) (mask)
27 
28 static inline long find_zero(unsigned long mask)
29 {
30 	long leading_zero_bits;
31 
32 	asm (PPC_CNTLZL "%0,%1" : "=r" (leading_zero_bits) : "r" (mask));
33 	return leading_zero_bits >> 3;
34 }
35 
36 static inline bool has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c)
37 {
38 	unsigned long rhs = val | c->low_bits;
39 	*data = rhs;
40 	return (val + c->high_bits) & ~rhs;
41 }
42 
43 #else
44 
45 #ifdef CONFIG_64BIT
46 
47 /* unused */
48 struct word_at_a_time {
49 };
50 
51 #define WORD_AT_A_TIME_CONSTANTS { }
52 
53 /* This will give us 0xff for a NULL char and 0x00 elsewhere */
54 static inline unsigned long has_zero(unsigned long a, unsigned long *bits, const struct word_at_a_time *c)
55 {
56 	unsigned long ret;
57 	unsigned long zero = 0;
58 
59 	asm("cmpb %0,%1,%2" : "=r" (ret) : "r" (a), "r" (zero));
60 	*bits = ret;
61 
62 	return ret;
63 }
64 
65 static inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits, const struct word_at_a_time *c)
66 {
67 	return bits;
68 }
69 
70 /* Alan Modra's little-endian strlen tail for 64-bit */
71 static inline unsigned long create_zero_mask(unsigned long bits)
72 {
73 	unsigned long leading_zero_bits;
74 	long trailing_zero_bit_mask;
75 
76 	asm("addi	%1,%2,-1\n\t"
77 	    "andc	%1,%1,%2\n\t"
78 	    "popcntd	%0,%1"
79 		: "=r" (leading_zero_bits), "=&r" (trailing_zero_bit_mask)
80 		: "r" (bits));
81 
82 	return leading_zero_bits;
83 }
84 
85 static inline unsigned long find_zero(unsigned long mask)
86 {
87 	return mask >> 3;
88 }
89 
90 /* This assumes that we never ask for an all 1s bitmask */
91 static inline unsigned long zero_bytemask(unsigned long mask)
92 {
93 	return (1UL << mask) - 1;
94 }
95 
96 #else	/* 32-bit case */
97 
98 struct word_at_a_time {
99 	const unsigned long one_bits, high_bits;
100 };
101 
102 #define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) }
103 
104 /*
105  * This is largely generic for little-endian machines, but the
106  * optimal byte mask counting is probably going to be something
107  * that is architecture-specific. If you have a reliably fast
108  * bit count instruction, that might be better than the multiply
109  * and shift, for example.
110  */
111 
112 /* Carl Chatfield / Jan Achrenius G+ version for 32-bit */
113 static inline long count_masked_bytes(long mask)
114 {
115 	/* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */
116 	long a = (0x0ff0001+mask) >> 23;
117 	/* Fix the 1 for 00 case */
118 	return a & mask;
119 }
120 
121 static inline unsigned long create_zero_mask(unsigned long bits)
122 {
123 	bits = (bits - 1) & ~bits;
124 	return bits >> 7;
125 }
126 
127 static inline unsigned long find_zero(unsigned long mask)
128 {
129 	return count_masked_bytes(mask);
130 }
131 
132 /* Return nonzero if it has a zero */
133 static inline unsigned long has_zero(unsigned long a, unsigned long *bits, const struct word_at_a_time *c)
134 {
135 	unsigned long mask = ((a - c->one_bits) & ~a) & c->high_bits;
136 	*bits = mask;
137 	return mask;
138 }
139 
140 static inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits, const struct word_at_a_time *c)
141 {
142 	return bits;
143 }
144 
145 /* The mask we created is directly usable as a bytemask */
146 #define zero_bytemask(mask) (mask)
147 
148 #endif /* CONFIG_64BIT */
149 
150 #endif /* __BIG_ENDIAN__ */
151 
152 /*
153  * We use load_unaligned_zero() in a selftest, which builds a userspace
154  * program. Some linker scripts seem to discard the .fixup section, so allow
155  * the test code to use a different section name.
156  */
157 #ifndef FIXUP_SECTION
158 #define FIXUP_SECTION ".fixup"
159 #endif
160 
161 static inline unsigned long load_unaligned_zeropad(const void *addr)
162 {
163 	unsigned long ret, offset, tmp;
164 
165 	asm(
166 	"1:	" PPC_LL "%[ret], 0(%[addr])\n"
167 	"2:\n"
168 	".section " FIXUP_SECTION ",\"ax\"\n"
169 	"3:	"
170 #ifdef __powerpc64__
171 	"clrrdi		%[tmp], %[addr], 3\n\t"
172 	"clrlsldi	%[offset], %[addr], 61, 3\n\t"
173 	"ld		%[ret], 0(%[tmp])\n\t"
174 #ifdef __BIG_ENDIAN__
175 	"sld		%[ret], %[ret], %[offset]\n\t"
176 #else
177 	"srd		%[ret], %[ret], %[offset]\n\t"
178 #endif
179 #else
180 	"clrrwi		%[tmp], %[addr], 2\n\t"
181 	"clrlslwi	%[offset], %[addr], 30, 3\n\t"
182 	"lwz		%[ret], 0(%[tmp])\n\t"
183 #ifdef __BIG_ENDIAN__
184 	"slw		%[ret], %[ret], %[offset]\n\t"
185 #else
186 	"srw		%[ret], %[ret], %[offset]\n\t"
187 #endif
188 #endif
189 	"b	2b\n"
190 	".previous\n"
191 	".section __ex_table,\"a\"\n\t"
192 		PPC_LONG_ALIGN "\n\t"
193 		PPC_LONG "1b,3b\n"
194 	".previous"
195 	: [tmp] "=&b" (tmp), [offset] "=&r" (offset), [ret] "=&r" (ret)
196 	: [addr] "b" (addr), "m" (*(unsigned long *)addr));
197 
198 	return ret;
199 }
200 
201 #undef FIXUP_SECTION
202 
203 #endif /* _ASM_WORD_AT_A_TIME_H */
204