xref: /linux/lib/sbitmap.c (revision ab520be8cd5d56867fc95cfbc34b90880faf1f9d)
1 /*
2  * Copyright (C) 2016 Facebook
3  * Copyright (C) 2013-2014 Jens Axboe
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public
7  * License v2 as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program.  If not, see <https://www.gnu.org/licenses/>.
16  */
17 
18 #include <linux/random.h>
19 #include <linux/sbitmap.h>
20 
21 int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift,
22 		      gfp_t flags, int node)
23 {
24 	unsigned int bits_per_word;
25 	unsigned int i;
26 
27 	if (shift < 0) {
28 		shift = ilog2(BITS_PER_LONG);
29 		/*
30 		 * If the bitmap is small, shrink the number of bits per word so
31 		 * we spread over a few cachelines, at least. If less than 4
32 		 * bits, just forget about it, it's not going to work optimally
33 		 * anyway.
34 		 */
35 		if (depth >= 4) {
36 			while ((4U << shift) > depth)
37 				shift--;
38 		}
39 	}
40 	bits_per_word = 1U << shift;
41 	if (bits_per_word > BITS_PER_LONG)
42 		return -EINVAL;
43 
44 	sb->shift = shift;
45 	sb->depth = depth;
46 	sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word);
47 
48 	if (depth == 0) {
49 		sb->map = NULL;
50 		return 0;
51 	}
52 
53 	sb->map = kzalloc_node(sb->map_nr * sizeof(*sb->map), flags, node);
54 	if (!sb->map)
55 		return -ENOMEM;
56 
57 	for (i = 0; i < sb->map_nr; i++) {
58 		sb->map[i].depth = min(depth, bits_per_word);
59 		depth -= sb->map[i].depth;
60 	}
61 	return 0;
62 }
63 EXPORT_SYMBOL_GPL(sbitmap_init_node);
64 
65 void sbitmap_resize(struct sbitmap *sb, unsigned int depth)
66 {
67 	unsigned int bits_per_word = 1U << sb->shift;
68 	unsigned int i;
69 
70 	sb->depth = depth;
71 	sb->map_nr = DIV_ROUND_UP(sb->depth, bits_per_word);
72 
73 	for (i = 0; i < sb->map_nr; i++) {
74 		sb->map[i].depth = min(depth, bits_per_word);
75 		depth -= sb->map[i].depth;
76 	}
77 }
78 EXPORT_SYMBOL_GPL(sbitmap_resize);
79 
80 static int __sbitmap_get_word(struct sbitmap_word *word, unsigned int hint,
81 			      bool wrap)
82 {
83 	unsigned int orig_hint = hint;
84 	int nr;
85 
86 	while (1) {
87 		nr = find_next_zero_bit(&word->word, word->depth, hint);
88 		if (unlikely(nr >= word->depth)) {
89 			/*
90 			 * We started with an offset, and we didn't reset the
91 			 * offset to 0 in a failure case, so start from 0 to
92 			 * exhaust the map.
93 			 */
94 			if (orig_hint && hint && wrap) {
95 				hint = orig_hint = 0;
96 				continue;
97 			}
98 			return -1;
99 		}
100 
101 		if (!test_and_set_bit(nr, &word->word))
102 			break;
103 
104 		hint = nr + 1;
105 		if (hint >= word->depth - 1)
106 			hint = 0;
107 	}
108 
109 	return nr;
110 }
111 
112 int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool round_robin)
113 {
114 	unsigned int i, index;
115 	int nr = -1;
116 
117 	index = SB_NR_TO_INDEX(sb, alloc_hint);
118 
119 	for (i = 0; i < sb->map_nr; i++) {
120 		nr = __sbitmap_get_word(&sb->map[index],
121 					SB_NR_TO_BIT(sb, alloc_hint),
122 					!round_robin);
123 		if (nr != -1) {
124 			nr += index << sb->shift;
125 			break;
126 		}
127 
128 		/* Jump to next index. */
129 		index++;
130 		alloc_hint = index << sb->shift;
131 
132 		if (index >= sb->map_nr) {
133 			index = 0;
134 			alloc_hint = 0;
135 		}
136 	}
137 
138 	return nr;
139 }
140 EXPORT_SYMBOL_GPL(sbitmap_get);
141 
142 bool sbitmap_any_bit_set(const struct sbitmap *sb)
143 {
144 	unsigned int i;
145 
146 	for (i = 0; i < sb->map_nr; i++) {
147 		if (sb->map[i].word)
148 			return true;
149 	}
150 	return false;
151 }
152 EXPORT_SYMBOL_GPL(sbitmap_any_bit_set);
153 
154 bool sbitmap_any_bit_clear(const struct sbitmap *sb)
155 {
156 	unsigned int i;
157 
158 	for (i = 0; i < sb->map_nr; i++) {
159 		const struct sbitmap_word *word = &sb->map[i];
160 		unsigned long ret;
161 
162 		ret = find_first_zero_bit(&word->word, word->depth);
163 		if (ret < word->depth)
164 			return true;
165 	}
166 	return false;
167 }
168 EXPORT_SYMBOL_GPL(sbitmap_any_bit_clear);
169 
170 unsigned int sbitmap_weight(const struct sbitmap *sb)
171 {
172 	unsigned int i, weight = 0;
173 
174 	for (i = 0; i < sb->map_nr; i++) {
175 		const struct sbitmap_word *word = &sb->map[i];
176 
177 		weight += bitmap_weight(&word->word, word->depth);
178 	}
179 	return weight;
180 }
181 EXPORT_SYMBOL_GPL(sbitmap_weight);
182 
183 static unsigned int sbq_calc_wake_batch(unsigned int depth)
184 {
185 	unsigned int wake_batch;
186 
187 	/*
188 	 * For each batch, we wake up one queue. We need to make sure that our
189 	 * batch size is small enough that the full depth of the bitmap is
190 	 * enough to wake up all of the queues.
191 	 */
192 	wake_batch = SBQ_WAKE_BATCH;
193 	if (wake_batch > depth / SBQ_WAIT_QUEUES)
194 		wake_batch = max(1U, depth / SBQ_WAIT_QUEUES);
195 
196 	return wake_batch;
197 }
198 
199 int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth,
200 			    int shift, bool round_robin, gfp_t flags, int node)
201 {
202 	int ret;
203 	int i;
204 
205 	ret = sbitmap_init_node(&sbq->sb, depth, shift, flags, node);
206 	if (ret)
207 		return ret;
208 
209 	sbq->alloc_hint = alloc_percpu_gfp(unsigned int, flags);
210 	if (!sbq->alloc_hint) {
211 		sbitmap_free(&sbq->sb);
212 		return -ENOMEM;
213 	}
214 
215 	if (depth && !round_robin) {
216 		for_each_possible_cpu(i)
217 			*per_cpu_ptr(sbq->alloc_hint, i) = prandom_u32() % depth;
218 	}
219 
220 	sbq->wake_batch = sbq_calc_wake_batch(depth);
221 	atomic_set(&sbq->wake_index, 0);
222 
223 	sbq->ws = kzalloc_node(SBQ_WAIT_QUEUES * sizeof(*sbq->ws), flags, node);
224 	if (!sbq->ws) {
225 		free_percpu(sbq->alloc_hint);
226 		sbitmap_free(&sbq->sb);
227 		return -ENOMEM;
228 	}
229 
230 	for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
231 		init_waitqueue_head(&sbq->ws[i].wait);
232 		atomic_set(&sbq->ws[i].wait_cnt, sbq->wake_batch);
233 	}
234 
235 	sbq->round_robin = round_robin;
236 	return 0;
237 }
238 EXPORT_SYMBOL_GPL(sbitmap_queue_init_node);
239 
240 void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth)
241 {
242 	sbq->wake_batch = sbq_calc_wake_batch(depth);
243 	sbitmap_resize(&sbq->sb, depth);
244 }
245 EXPORT_SYMBOL_GPL(sbitmap_queue_resize);
246 
247 int __sbitmap_queue_get(struct sbitmap_queue *sbq)
248 {
249 	unsigned int hint, depth;
250 	int nr;
251 
252 	hint = this_cpu_read(*sbq->alloc_hint);
253 	depth = READ_ONCE(sbq->sb.depth);
254 	if (unlikely(hint >= depth)) {
255 		hint = depth ? prandom_u32() % depth : 0;
256 		this_cpu_write(*sbq->alloc_hint, hint);
257 	}
258 	nr = sbitmap_get(&sbq->sb, hint, sbq->round_robin);
259 
260 	if (nr == -1) {
261 		/* If the map is full, a hint won't do us much good. */
262 		this_cpu_write(*sbq->alloc_hint, 0);
263 	} else if (nr == hint || unlikely(sbq->round_robin)) {
264 		/* Only update the hint if we used it. */
265 		hint = nr + 1;
266 		if (hint >= depth - 1)
267 			hint = 0;
268 		this_cpu_write(*sbq->alloc_hint, hint);
269 	}
270 
271 	return nr;
272 }
273 EXPORT_SYMBOL_GPL(__sbitmap_queue_get);
274 
275 static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq)
276 {
277 	int i, wake_index;
278 
279 	wake_index = atomic_read(&sbq->wake_index);
280 	for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
281 		struct sbq_wait_state *ws = &sbq->ws[wake_index];
282 
283 		if (waitqueue_active(&ws->wait)) {
284 			int o = atomic_read(&sbq->wake_index);
285 
286 			if (wake_index != o)
287 				atomic_cmpxchg(&sbq->wake_index, o, wake_index);
288 			return ws;
289 		}
290 
291 		wake_index = sbq_index_inc(wake_index);
292 	}
293 
294 	return NULL;
295 }
296 
297 static void sbq_wake_up(struct sbitmap_queue *sbq)
298 {
299 	struct sbq_wait_state *ws;
300 	int wait_cnt;
301 
302 	/* Ensure that the wait list checks occur after clear_bit(). */
303 	smp_mb();
304 
305 	ws = sbq_wake_ptr(sbq);
306 	if (!ws)
307 		return;
308 
309 	wait_cnt = atomic_dec_return(&ws->wait_cnt);
310 	if (unlikely(wait_cnt < 0))
311 		wait_cnt = atomic_inc_return(&ws->wait_cnt);
312 	if (wait_cnt == 0) {
313 		atomic_add(sbq->wake_batch, &ws->wait_cnt);
314 		sbq_index_atomic_inc(&sbq->wake_index);
315 		wake_up(&ws->wait);
316 	}
317 }
318 
319 void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr,
320 			 unsigned int cpu)
321 {
322 	sbitmap_clear_bit(&sbq->sb, nr);
323 	sbq_wake_up(sbq);
324 	if (likely(!sbq->round_robin && nr < sbq->sb.depth))
325 		*per_cpu_ptr(sbq->alloc_hint, cpu) = nr;
326 }
327 EXPORT_SYMBOL_GPL(sbitmap_queue_clear);
328 
329 void sbitmap_queue_wake_all(struct sbitmap_queue *sbq)
330 {
331 	int i, wake_index;
332 
333 	/*
334 	 * Make sure all changes prior to this are visible from other CPUs.
335 	 */
336 	smp_mb();
337 	wake_index = atomic_read(&sbq->wake_index);
338 	for (i = 0; i < SBQ_WAIT_QUEUES; i++) {
339 		struct sbq_wait_state *ws = &sbq->ws[wake_index];
340 
341 		if (waitqueue_active(&ws->wait))
342 			wake_up(&ws->wait);
343 
344 		wake_index = sbq_index_inc(wake_index);
345 	}
346 }
347 EXPORT_SYMBOL_GPL(sbitmap_queue_wake_all);
348