xref: /linux/kernel/rcu/rcu_segcblist.c (revision e5a52fd2b8cdb700b3c07b030e050a49ef3156b9)
1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3  * RCU segmented callback lists, function definitions
4  *
5  * Copyright IBM Corporation, 2017
6  *
7  * Authors: Paul E. McKenney <paulmck@linux.ibm.com>
8  */
9 
10 #include <linux/types.h>
11 #include <linux/kernel.h>
12 #include <linux/interrupt.h>
13 #include <linux/rcupdate.h>
14 
15 #include "rcu_segcblist.h"
16 
17 /* Initialize simple callback list. */
18 void rcu_cblist_init(struct rcu_cblist *rclp)
19 {
20 	rclp->head = NULL;
21 	rclp->tail = &rclp->head;
22 	rclp->len = 0;
23 }
24 
25 /*
26  * Enqueue an rcu_head structure onto the specified callback list.
27  */
28 void rcu_cblist_enqueue(struct rcu_cblist *rclp, struct rcu_head *rhp)
29 {
30 	*rclp->tail = rhp;
31 	rclp->tail = &rhp->next;
32 	WRITE_ONCE(rclp->len, rclp->len + 1);
33 }
34 
35 /*
36  * Flush the second rcu_cblist structure onto the first one, obliterating
37  * any contents of the first.  If rhp is non-NULL, enqueue it as the sole
38  * element of the second rcu_cblist structure, but ensuring that the second
39  * rcu_cblist structure, if initially non-empty, always appears non-empty
40  * throughout the process.  If rdp is NULL, the second rcu_cblist structure
41  * is instead initialized to empty.
42  */
43 void rcu_cblist_flush_enqueue(struct rcu_cblist *drclp,
44 			      struct rcu_cblist *srclp,
45 			      struct rcu_head *rhp)
46 {
47 	drclp->head = srclp->head;
48 	if (drclp->head)
49 		drclp->tail = srclp->tail;
50 	else
51 		drclp->tail = &drclp->head;
52 	drclp->len = srclp->len;
53 	if (!rhp) {
54 		rcu_cblist_init(srclp);
55 	} else {
56 		rhp->next = NULL;
57 		srclp->head = rhp;
58 		srclp->tail = &rhp->next;
59 		WRITE_ONCE(srclp->len, 1);
60 	}
61 }
62 
63 /*
64  * Dequeue the oldest rcu_head structure from the specified callback
65  * list.
66  */
67 struct rcu_head *rcu_cblist_dequeue(struct rcu_cblist *rclp)
68 {
69 	struct rcu_head *rhp;
70 
71 	rhp = rclp->head;
72 	if (!rhp)
73 		return NULL;
74 	rclp->len--;
75 	rclp->head = rhp->next;
76 	if (!rclp->head)
77 		rclp->tail = &rclp->head;
78 	return rhp;
79 }
80 
81 /* Set the length of an rcu_segcblist structure. */
82 static void rcu_segcblist_set_len(struct rcu_segcblist *rsclp, long v)
83 {
84 #ifdef CONFIG_RCU_NOCB_CPU
85 	atomic_long_set(&rsclp->len, v);
86 #else
87 	WRITE_ONCE(rsclp->len, v);
88 #endif
89 }
90 
91 /*
92  * Increase the numeric length of an rcu_segcblist structure by the
93  * specified amount, which can be negative.  This can cause the ->len
94  * field to disagree with the actual number of callbacks on the structure.
95  * This increase is fully ordered with respect to the callers accesses
96  * both before and after.
97  */
98 static void rcu_segcblist_add_len(struct rcu_segcblist *rsclp, long v)
99 {
100 #ifdef CONFIG_RCU_NOCB_CPU
101 	smp_mb__before_atomic(); /* Up to the caller! */
102 	atomic_long_add(v, &rsclp->len);
103 	smp_mb__after_atomic(); /* Up to the caller! */
104 #else
105 	smp_mb(); /* Up to the caller! */
106 	WRITE_ONCE(rsclp->len, rsclp->len + v);
107 	smp_mb(); /* Up to the caller! */
108 #endif
109 }
110 
111 /*
112  * Increase the numeric length of an rcu_segcblist structure by one.
113  * This can cause the ->len field to disagree with the actual number of
114  * callbacks on the structure.  This increase is fully ordered with respect
115  * to the callers accesses both before and after.
116  */
117 void rcu_segcblist_inc_len(struct rcu_segcblist *rsclp)
118 {
119 	rcu_segcblist_add_len(rsclp, 1);
120 }
121 
122 /*
123  * Exchange the numeric length of the specified rcu_segcblist structure
124  * with the specified value.  This can cause the ->len field to disagree
125  * with the actual number of callbacks on the structure.  This exchange is
126  * fully ordered with respect to the callers accesses both before and after.
127  */
128 static long rcu_segcblist_xchg_len(struct rcu_segcblist *rsclp, long v)
129 {
130 #ifdef CONFIG_RCU_NOCB_CPU
131 	return atomic_long_xchg(&rsclp->len, v);
132 #else
133 	long ret = rsclp->len;
134 
135 	smp_mb(); /* Up to the caller! */
136 	WRITE_ONCE(rsclp->len, v);
137 	smp_mb(); /* Up to the caller! */
138 	return ret;
139 #endif
140 }
141 
142 /*
143  * Initialize an rcu_segcblist structure.
144  */
145 void rcu_segcblist_init(struct rcu_segcblist *rsclp)
146 {
147 	int i;
148 
149 	BUILD_BUG_ON(RCU_NEXT_TAIL + 1 != ARRAY_SIZE(rsclp->gp_seq));
150 	BUILD_BUG_ON(ARRAY_SIZE(rsclp->tails) != ARRAY_SIZE(rsclp->gp_seq));
151 	rsclp->head = NULL;
152 	for (i = 0; i < RCU_CBLIST_NSEGS; i++)
153 		rsclp->tails[i] = &rsclp->head;
154 	rcu_segcblist_set_len(rsclp, 0);
155 	rsclp->enabled = 1;
156 }
157 
158 /*
159  * Disable the specified rcu_segcblist structure, so that callbacks can
160  * no longer be posted to it.  This structure must be empty.
161  */
162 void rcu_segcblist_disable(struct rcu_segcblist *rsclp)
163 {
164 	WARN_ON_ONCE(!rcu_segcblist_empty(rsclp));
165 	WARN_ON_ONCE(rcu_segcblist_n_cbs(rsclp));
166 	rsclp->enabled = 0;
167 }
168 
169 /*
170  * Mark the specified rcu_segcblist structure as offloaded.  This
171  * structure must be empty.
172  */
173 void rcu_segcblist_offload(struct rcu_segcblist *rsclp)
174 {
175 	rsclp->offloaded = 1;
176 }
177 
178 /*
179  * Does the specified rcu_segcblist structure contain callbacks that
180  * are ready to be invoked?
181  */
182 bool rcu_segcblist_ready_cbs(struct rcu_segcblist *rsclp)
183 {
184 	return rcu_segcblist_is_enabled(rsclp) &&
185 	       &rsclp->head != READ_ONCE(rsclp->tails[RCU_DONE_TAIL]);
186 }
187 
188 /*
189  * Does the specified rcu_segcblist structure contain callbacks that
190  * are still pending, that is, not yet ready to be invoked?
191  */
192 bool rcu_segcblist_pend_cbs(struct rcu_segcblist *rsclp)
193 {
194 	return rcu_segcblist_is_enabled(rsclp) &&
195 	       !rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL);
196 }
197 
198 /*
199  * Return a pointer to the first callback in the specified rcu_segcblist
200  * structure.  This is useful for diagnostics.
201  */
202 struct rcu_head *rcu_segcblist_first_cb(struct rcu_segcblist *rsclp)
203 {
204 	if (rcu_segcblist_is_enabled(rsclp))
205 		return rsclp->head;
206 	return NULL;
207 }
208 
209 /*
210  * Return a pointer to the first pending callback in the specified
211  * rcu_segcblist structure.  This is useful just after posting a given
212  * callback -- if that callback is the first pending callback, then
213  * you cannot rely on someone else having already started up the required
214  * grace period.
215  */
216 struct rcu_head *rcu_segcblist_first_pend_cb(struct rcu_segcblist *rsclp)
217 {
218 	if (rcu_segcblist_is_enabled(rsclp))
219 		return *rsclp->tails[RCU_DONE_TAIL];
220 	return NULL;
221 }
222 
223 /*
224  * Return false if there are no CBs awaiting grace periods, otherwise,
225  * return true and store the nearest waited-upon grace period into *lp.
226  */
227 bool rcu_segcblist_nextgp(struct rcu_segcblist *rsclp, unsigned long *lp)
228 {
229 	if (!rcu_segcblist_pend_cbs(rsclp))
230 		return false;
231 	*lp = rsclp->gp_seq[RCU_WAIT_TAIL];
232 	return true;
233 }
234 
235 /*
236  * Enqueue the specified callback onto the specified rcu_segcblist
237  * structure, updating accounting as needed.  Note that the ->len
238  * field may be accessed locklessly, hence the WRITE_ONCE().
239  * The ->len field is used by rcu_barrier() and friends to determine
240  * if it must post a callback on this structure, and it is OK
241  * for rcu_barrier() to sometimes post callbacks needlessly, but
242  * absolutely not OK for it to ever miss posting a callback.
243  */
244 void rcu_segcblist_enqueue(struct rcu_segcblist *rsclp,
245 			   struct rcu_head *rhp)
246 {
247 	rcu_segcblist_inc_len(rsclp);
248 	smp_mb(); /* Ensure counts are updated before callback is enqueued. */
249 	rhp->next = NULL;
250 	WRITE_ONCE(*rsclp->tails[RCU_NEXT_TAIL], rhp);
251 	WRITE_ONCE(rsclp->tails[RCU_NEXT_TAIL], &rhp->next);
252 }
253 
254 /*
255  * Entrain the specified callback onto the specified rcu_segcblist at
256  * the end of the last non-empty segment.  If the entire rcu_segcblist
257  * is empty, make no change, but return false.
258  *
259  * This is intended for use by rcu_barrier()-like primitives, -not-
260  * for normal grace-period use.  IMPORTANT:  The callback you enqueue
261  * will wait for all prior callbacks, NOT necessarily for a grace
262  * period.  You have been warned.
263  */
264 bool rcu_segcblist_entrain(struct rcu_segcblist *rsclp,
265 			   struct rcu_head *rhp)
266 {
267 	int i;
268 
269 	if (rcu_segcblist_n_cbs(rsclp) == 0)
270 		return false;
271 	rcu_segcblist_inc_len(rsclp);
272 	smp_mb(); /* Ensure counts are updated before callback is entrained. */
273 	rhp->next = NULL;
274 	for (i = RCU_NEXT_TAIL; i > RCU_DONE_TAIL; i--)
275 		if (rsclp->tails[i] != rsclp->tails[i - 1])
276 			break;
277 	WRITE_ONCE(*rsclp->tails[i], rhp);
278 	for (; i <= RCU_NEXT_TAIL; i++)
279 		WRITE_ONCE(rsclp->tails[i], &rhp->next);
280 	return true;
281 }
282 
283 /*
284  * Extract only the counts from the specified rcu_segcblist structure,
285  * and place them in the specified rcu_cblist structure.  This function
286  * supports both callback orphaning and invocation, hence the separation
287  * of counts and callbacks.  (Callbacks ready for invocation must be
288  * orphaned and adopted separately from pending callbacks, but counts
289  * apply to all callbacks.  Locking must be used to make sure that
290  * both orphaned-callbacks lists are consistent.)
291  */
292 void rcu_segcblist_extract_count(struct rcu_segcblist *rsclp,
293 					       struct rcu_cblist *rclp)
294 {
295 	rclp->len = rcu_segcblist_xchg_len(rsclp, 0);
296 }
297 
298 /*
299  * Extract only those callbacks ready to be invoked from the specified
300  * rcu_segcblist structure and place them in the specified rcu_cblist
301  * structure.
302  */
303 void rcu_segcblist_extract_done_cbs(struct rcu_segcblist *rsclp,
304 				    struct rcu_cblist *rclp)
305 {
306 	int i;
307 
308 	if (!rcu_segcblist_ready_cbs(rsclp))
309 		return; /* Nothing to do. */
310 	*rclp->tail = rsclp->head;
311 	WRITE_ONCE(rsclp->head, *rsclp->tails[RCU_DONE_TAIL]);
312 	WRITE_ONCE(*rsclp->tails[RCU_DONE_TAIL], NULL);
313 	rclp->tail = rsclp->tails[RCU_DONE_TAIL];
314 	for (i = RCU_CBLIST_NSEGS - 1; i >= RCU_DONE_TAIL; i--)
315 		if (rsclp->tails[i] == rsclp->tails[RCU_DONE_TAIL])
316 			WRITE_ONCE(rsclp->tails[i], &rsclp->head);
317 }
318 
319 /*
320  * Extract only those callbacks still pending (not yet ready to be
321  * invoked) from the specified rcu_segcblist structure and place them in
322  * the specified rcu_cblist structure.  Note that this loses information
323  * about any callbacks that might have been partway done waiting for
324  * their grace period.  Too bad!  They will have to start over.
325  */
326 void rcu_segcblist_extract_pend_cbs(struct rcu_segcblist *rsclp,
327 				    struct rcu_cblist *rclp)
328 {
329 	int i;
330 
331 	if (!rcu_segcblist_pend_cbs(rsclp))
332 		return; /* Nothing to do. */
333 	*rclp->tail = *rsclp->tails[RCU_DONE_TAIL];
334 	rclp->tail = rsclp->tails[RCU_NEXT_TAIL];
335 	WRITE_ONCE(*rsclp->tails[RCU_DONE_TAIL], NULL);
336 	for (i = RCU_DONE_TAIL + 1; i < RCU_CBLIST_NSEGS; i++)
337 		WRITE_ONCE(rsclp->tails[i], rsclp->tails[RCU_DONE_TAIL]);
338 }
339 
340 /*
341  * Insert counts from the specified rcu_cblist structure in the
342  * specified rcu_segcblist structure.
343  */
344 void rcu_segcblist_insert_count(struct rcu_segcblist *rsclp,
345 				struct rcu_cblist *rclp)
346 {
347 	rcu_segcblist_add_len(rsclp, rclp->len);
348 	rclp->len = 0;
349 }
350 
351 /*
352  * Move callbacks from the specified rcu_cblist to the beginning of the
353  * done-callbacks segment of the specified rcu_segcblist.
354  */
355 void rcu_segcblist_insert_done_cbs(struct rcu_segcblist *rsclp,
356 				   struct rcu_cblist *rclp)
357 {
358 	int i;
359 
360 	if (!rclp->head)
361 		return; /* No callbacks to move. */
362 	*rclp->tail = rsclp->head;
363 	WRITE_ONCE(rsclp->head, rclp->head);
364 	for (i = RCU_DONE_TAIL; i < RCU_CBLIST_NSEGS; i++)
365 		if (&rsclp->head == rsclp->tails[i])
366 			WRITE_ONCE(rsclp->tails[i], rclp->tail);
367 		else
368 			break;
369 	rclp->head = NULL;
370 	rclp->tail = &rclp->head;
371 }
372 
373 /*
374  * Move callbacks from the specified rcu_cblist to the end of the
375  * new-callbacks segment of the specified rcu_segcblist.
376  */
377 void rcu_segcblist_insert_pend_cbs(struct rcu_segcblist *rsclp,
378 				   struct rcu_cblist *rclp)
379 {
380 	if (!rclp->head)
381 		return; /* Nothing to do. */
382 	WRITE_ONCE(*rsclp->tails[RCU_NEXT_TAIL], rclp->head);
383 	WRITE_ONCE(rsclp->tails[RCU_NEXT_TAIL], rclp->tail);
384 }
385 
386 /*
387  * Advance the callbacks in the specified rcu_segcblist structure based
388  * on the current value passed in for the grace-period counter.
389  */
390 void rcu_segcblist_advance(struct rcu_segcblist *rsclp, unsigned long seq)
391 {
392 	int i, j;
393 
394 	WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp));
395 	if (rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL))
396 		return;
397 
398 	/*
399 	 * Find all callbacks whose ->gp_seq numbers indicate that they
400 	 * are ready to invoke, and put them into the RCU_DONE_TAIL segment.
401 	 */
402 	for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) {
403 		if (ULONG_CMP_LT(seq, rsclp->gp_seq[i]))
404 			break;
405 		WRITE_ONCE(rsclp->tails[RCU_DONE_TAIL], rsclp->tails[i]);
406 	}
407 
408 	/* If no callbacks moved, nothing more need be done. */
409 	if (i == RCU_WAIT_TAIL)
410 		return;
411 
412 	/* Clean up tail pointers that might have been misordered above. */
413 	for (j = RCU_WAIT_TAIL; j < i; j++)
414 		WRITE_ONCE(rsclp->tails[j], rsclp->tails[RCU_DONE_TAIL]);
415 
416 	/*
417 	 * Callbacks moved, so clean up the misordered ->tails[] pointers
418 	 * that now point into the middle of the list of ready-to-invoke
419 	 * callbacks.  The overall effect is to copy down the later pointers
420 	 * into the gap that was created by the now-ready segments.
421 	 */
422 	for (j = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++, j++) {
423 		if (rsclp->tails[j] == rsclp->tails[RCU_NEXT_TAIL])
424 			break;  /* No more callbacks. */
425 		WRITE_ONCE(rsclp->tails[j], rsclp->tails[i]);
426 		rsclp->gp_seq[j] = rsclp->gp_seq[i];
427 	}
428 }
429 
430 /*
431  * "Accelerate" callbacks based on more-accurate grace-period information.
432  * The reason for this is that RCU does not synchronize the beginnings and
433  * ends of grace periods, and that callbacks are posted locally.  This in
434  * turn means that the callbacks must be labelled conservatively early
435  * on, as getting exact information would degrade both performance and
436  * scalability.  When more accurate grace-period information becomes
437  * available, previously posted callbacks can be "accelerated", marking
438  * them to complete at the end of the earlier grace period.
439  *
440  * This function operates on an rcu_segcblist structure, and also the
441  * grace-period sequence number seq at which new callbacks would become
442  * ready to invoke.  Returns true if there are callbacks that won't be
443  * ready to invoke until seq, false otherwise.
444  */
445 bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp, unsigned long seq)
446 {
447 	int i;
448 
449 	WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp));
450 	if (rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL))
451 		return false;
452 
453 	/*
454 	 * Find the segment preceding the oldest segment of callbacks
455 	 * whose ->gp_seq[] completion is at or after that passed in via
456 	 * "seq", skipping any empty segments.  This oldest segment, along
457 	 * with any later segments, can be merged in with any newly arrived
458 	 * callbacks in the RCU_NEXT_TAIL segment, and assigned "seq"
459 	 * as their ->gp_seq[] grace-period completion sequence number.
460 	 */
461 	for (i = RCU_NEXT_READY_TAIL; i > RCU_DONE_TAIL; i--)
462 		if (rsclp->tails[i] != rsclp->tails[i - 1] &&
463 		    ULONG_CMP_LT(rsclp->gp_seq[i], seq))
464 			break;
465 
466 	/*
467 	 * If all the segments contain callbacks that correspond to
468 	 * earlier grace-period sequence numbers than "seq", leave.
469 	 * Assuming that the rcu_segcblist structure has enough
470 	 * segments in its arrays, this can only happen if some of
471 	 * the non-done segments contain callbacks that really are
472 	 * ready to invoke.  This situation will get straightened
473 	 * out by the next call to rcu_segcblist_advance().
474 	 *
475 	 * Also advance to the oldest segment of callbacks whose
476 	 * ->gp_seq[] completion is at or after that passed in via "seq",
477 	 * skipping any empty segments.
478 	 */
479 	if (++i >= RCU_NEXT_TAIL)
480 		return false;
481 
482 	/*
483 	 * Merge all later callbacks, including newly arrived callbacks,
484 	 * into the segment located by the for-loop above.  Assign "seq"
485 	 * as the ->gp_seq[] value in order to correctly handle the case
486 	 * where there were no pending callbacks in the rcu_segcblist
487 	 * structure other than in the RCU_NEXT_TAIL segment.
488 	 */
489 	for (; i < RCU_NEXT_TAIL; i++) {
490 		WRITE_ONCE(rsclp->tails[i], rsclp->tails[RCU_NEXT_TAIL]);
491 		rsclp->gp_seq[i] = seq;
492 	}
493 	return true;
494 }
495 
496 /*
497  * Merge the source rcu_segcblist structure into the destination
498  * rcu_segcblist structure, then initialize the source.  Any pending
499  * callbacks from the source get to start over.  It is best to
500  * advance and accelerate both the destination and the source
501  * before merging.
502  */
503 void rcu_segcblist_merge(struct rcu_segcblist *dst_rsclp,
504 			 struct rcu_segcblist *src_rsclp)
505 {
506 	struct rcu_cblist donecbs;
507 	struct rcu_cblist pendcbs;
508 
509 	rcu_cblist_init(&donecbs);
510 	rcu_cblist_init(&pendcbs);
511 	rcu_segcblist_extract_count(src_rsclp, &donecbs);
512 	rcu_segcblist_extract_done_cbs(src_rsclp, &donecbs);
513 	rcu_segcblist_extract_pend_cbs(src_rsclp, &pendcbs);
514 	rcu_segcblist_insert_count(dst_rsclp, &donecbs);
515 	rcu_segcblist_insert_done_cbs(dst_rsclp, &donecbs);
516 	rcu_segcblist_insert_pend_cbs(dst_rsclp, &pendcbs);
517 	rcu_segcblist_init(src_rsclp);
518 }
519