xref: /linux/mm/slab_common.c (revision a4cdb556cae05cd3e7b602b3a44c01420c4e2258)
1 /*
2  * Slab allocator functions that are independent of the allocator strategy
3  *
4  * (C) 2012 Christoph Lameter <cl@linux.com>
5  */
6 #include <linux/slab.h>
7 
8 #include <linux/mm.h>
9 #include <linux/poison.h>
10 #include <linux/interrupt.h>
11 #include <linux/memory.h>
12 #include <linux/compiler.h>
13 #include <linux/module.h>
14 #include <linux/cpu.h>
15 #include <linux/uaccess.h>
16 #include <linux/seq_file.h>
17 #include <linux/proc_fs.h>
18 #include <asm/cacheflush.h>
19 #include <asm/tlbflush.h>
20 #include <asm/page.h>
21 #include <linux/memcontrol.h>
22 
23 #define CREATE_TRACE_POINTS
24 #include <trace/events/kmem.h>
25 
26 #include "slab.h"
27 
28 enum slab_state slab_state;
29 LIST_HEAD(slab_caches);
30 DEFINE_MUTEX(slab_mutex);
31 struct kmem_cache *kmem_cache;
32 
33 /*
34  * Set of flags that will prevent slab merging
35  */
36 #define SLAB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
37 		SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE | \
38 		SLAB_FAILSLAB)
39 
40 #define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | SLAB_NOTRACK)
41 
42 /*
43  * Merge control. If this is set then no merging of slab caches will occur.
44  * (Could be removed. This was introduced to pacify the merge skeptics.)
45  */
46 static int slab_nomerge;
47 
48 static int __init setup_slab_nomerge(char *str)
49 {
50 	slab_nomerge = 1;
51 	return 1;
52 }
53 
54 #ifdef CONFIG_SLUB
55 __setup_param("slub_nomerge", slub_nomerge, setup_slab_nomerge, 0);
56 #endif
57 
58 __setup("slab_nomerge", setup_slab_nomerge);
59 
60 /*
61  * Determine the size of a slab object
62  */
63 unsigned int kmem_cache_size(struct kmem_cache *s)
64 {
65 	return s->object_size;
66 }
67 EXPORT_SYMBOL(kmem_cache_size);
68 
69 #ifdef CONFIG_DEBUG_VM
70 static int kmem_cache_sanity_check(const char *name, size_t size)
71 {
72 	struct kmem_cache *s = NULL;
73 
74 	if (!name || in_interrupt() || size < sizeof(void *) ||
75 		size > KMALLOC_MAX_SIZE) {
76 		pr_err("kmem_cache_create(%s) integrity check failed\n", name);
77 		return -EINVAL;
78 	}
79 
80 	list_for_each_entry(s, &slab_caches, list) {
81 		char tmp;
82 		int res;
83 
84 		/*
85 		 * This happens when the module gets unloaded and doesn't
86 		 * destroy its slab cache and no-one else reuses the vmalloc
87 		 * area of the module.  Print a warning.
88 		 */
89 		res = probe_kernel_address(s->name, tmp);
90 		if (res) {
91 			pr_err("Slab cache with size %d has lost its name\n",
92 			       s->object_size);
93 			continue;
94 		}
95 	}
96 
97 	WARN_ON(strchr(name, ' '));	/* It confuses parsers */
98 	return 0;
99 }
100 #else
101 static inline int kmem_cache_sanity_check(const char *name, size_t size)
102 {
103 	return 0;
104 }
105 #endif
106 
107 void __kmem_cache_free_bulk(struct kmem_cache *s, size_t nr, void **p)
108 {
109 	size_t i;
110 
111 	for (i = 0; i < nr; i++)
112 		kmem_cache_free(s, p[i]);
113 }
114 
115 int __kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t nr,
116 								void **p)
117 {
118 	size_t i;
119 
120 	for (i = 0; i < nr; i++) {
121 		void *x = p[i] = kmem_cache_alloc(s, flags);
122 		if (!x) {
123 			__kmem_cache_free_bulk(s, i, p);
124 			return 0;
125 		}
126 	}
127 	return i;
128 }
129 
130 #ifdef CONFIG_MEMCG_KMEM
131 void slab_init_memcg_params(struct kmem_cache *s)
132 {
133 	s->memcg_params.is_root_cache = true;
134 	INIT_LIST_HEAD(&s->memcg_params.list);
135 	RCU_INIT_POINTER(s->memcg_params.memcg_caches, NULL);
136 }
137 
138 static int init_memcg_params(struct kmem_cache *s,
139 		struct mem_cgroup *memcg, struct kmem_cache *root_cache)
140 {
141 	struct memcg_cache_array *arr;
142 
143 	if (memcg) {
144 		s->memcg_params.is_root_cache = false;
145 		s->memcg_params.memcg = memcg;
146 		s->memcg_params.root_cache = root_cache;
147 		return 0;
148 	}
149 
150 	slab_init_memcg_params(s);
151 
152 	if (!memcg_nr_cache_ids)
153 		return 0;
154 
155 	arr = kzalloc(sizeof(struct memcg_cache_array) +
156 		      memcg_nr_cache_ids * sizeof(void *),
157 		      GFP_KERNEL);
158 	if (!arr)
159 		return -ENOMEM;
160 
161 	RCU_INIT_POINTER(s->memcg_params.memcg_caches, arr);
162 	return 0;
163 }
164 
165 static void destroy_memcg_params(struct kmem_cache *s)
166 {
167 	if (is_root_cache(s))
168 		kfree(rcu_access_pointer(s->memcg_params.memcg_caches));
169 }
170 
171 static int update_memcg_params(struct kmem_cache *s, int new_array_size)
172 {
173 	struct memcg_cache_array *old, *new;
174 
175 	if (!is_root_cache(s))
176 		return 0;
177 
178 	new = kzalloc(sizeof(struct memcg_cache_array) +
179 		      new_array_size * sizeof(void *), GFP_KERNEL);
180 	if (!new)
181 		return -ENOMEM;
182 
183 	old = rcu_dereference_protected(s->memcg_params.memcg_caches,
184 					lockdep_is_held(&slab_mutex));
185 	if (old)
186 		memcpy(new->entries, old->entries,
187 		       memcg_nr_cache_ids * sizeof(void *));
188 
189 	rcu_assign_pointer(s->memcg_params.memcg_caches, new);
190 	if (old)
191 		kfree_rcu(old, rcu);
192 	return 0;
193 }
194 
195 int memcg_update_all_caches(int num_memcgs)
196 {
197 	struct kmem_cache *s;
198 	int ret = 0;
199 
200 	mutex_lock(&slab_mutex);
201 	list_for_each_entry(s, &slab_caches, list) {
202 		ret = update_memcg_params(s, num_memcgs);
203 		/*
204 		 * Instead of freeing the memory, we'll just leave the caches
205 		 * up to this point in an updated state.
206 		 */
207 		if (ret)
208 			break;
209 	}
210 	mutex_unlock(&slab_mutex);
211 	return ret;
212 }
213 #else
214 static inline int init_memcg_params(struct kmem_cache *s,
215 		struct mem_cgroup *memcg, struct kmem_cache *root_cache)
216 {
217 	return 0;
218 }
219 
220 static inline void destroy_memcg_params(struct kmem_cache *s)
221 {
222 }
223 #endif /* CONFIG_MEMCG_KMEM */
224 
225 /*
226  * Find a mergeable slab cache
227  */
228 int slab_unmergeable(struct kmem_cache *s)
229 {
230 	if (slab_nomerge || (s->flags & SLAB_NEVER_MERGE))
231 		return 1;
232 
233 	if (!is_root_cache(s))
234 		return 1;
235 
236 	if (s->ctor)
237 		return 1;
238 
239 	/*
240 	 * We may have set a slab to be unmergeable during bootstrap.
241 	 */
242 	if (s->refcount < 0)
243 		return 1;
244 
245 	return 0;
246 }
247 
248 struct kmem_cache *find_mergeable(size_t size, size_t align,
249 		unsigned long flags, const char *name, void (*ctor)(void *))
250 {
251 	struct kmem_cache *s;
252 
253 	if (slab_nomerge || (flags & SLAB_NEVER_MERGE))
254 		return NULL;
255 
256 	if (ctor)
257 		return NULL;
258 
259 	size = ALIGN(size, sizeof(void *));
260 	align = calculate_alignment(flags, align, size);
261 	size = ALIGN(size, align);
262 	flags = kmem_cache_flags(size, flags, name, NULL);
263 
264 	list_for_each_entry_reverse(s, &slab_caches, list) {
265 		if (slab_unmergeable(s))
266 			continue;
267 
268 		if (size > s->size)
269 			continue;
270 
271 		if ((flags & SLAB_MERGE_SAME) != (s->flags & SLAB_MERGE_SAME))
272 			continue;
273 		/*
274 		 * Check if alignment is compatible.
275 		 * Courtesy of Adrian Drzewiecki
276 		 */
277 		if ((s->size & ~(align - 1)) != s->size)
278 			continue;
279 
280 		if (s->size - size >= sizeof(void *))
281 			continue;
282 
283 		if (IS_ENABLED(CONFIG_SLAB) && align &&
284 			(align > s->align || s->align % align))
285 			continue;
286 
287 		return s;
288 	}
289 	return NULL;
290 }
291 
292 /*
293  * Figure out what the alignment of the objects will be given a set of
294  * flags, a user specified alignment and the size of the objects.
295  */
296 unsigned long calculate_alignment(unsigned long flags,
297 		unsigned long align, unsigned long size)
298 {
299 	/*
300 	 * If the user wants hardware cache aligned objects then follow that
301 	 * suggestion if the object is sufficiently large.
302 	 *
303 	 * The hardware cache alignment cannot override the specified
304 	 * alignment though. If that is greater then use it.
305 	 */
306 	if (flags & SLAB_HWCACHE_ALIGN) {
307 		unsigned long ralign = cache_line_size();
308 		while (size <= ralign / 2)
309 			ralign /= 2;
310 		align = max(align, ralign);
311 	}
312 
313 	if (align < ARCH_SLAB_MINALIGN)
314 		align = ARCH_SLAB_MINALIGN;
315 
316 	return ALIGN(align, sizeof(void *));
317 }
318 
319 static struct kmem_cache *create_cache(const char *name,
320 		size_t object_size, size_t size, size_t align,
321 		unsigned long flags, void (*ctor)(void *),
322 		struct mem_cgroup *memcg, struct kmem_cache *root_cache)
323 {
324 	struct kmem_cache *s;
325 	int err;
326 
327 	err = -ENOMEM;
328 	s = kmem_cache_zalloc(kmem_cache, GFP_KERNEL);
329 	if (!s)
330 		goto out;
331 
332 	s->name = name;
333 	s->object_size = object_size;
334 	s->size = size;
335 	s->align = align;
336 	s->ctor = ctor;
337 
338 	err = init_memcg_params(s, memcg, root_cache);
339 	if (err)
340 		goto out_free_cache;
341 
342 	err = __kmem_cache_create(s, flags);
343 	if (err)
344 		goto out_free_cache;
345 
346 	s->refcount = 1;
347 	list_add(&s->list, &slab_caches);
348 out:
349 	if (err)
350 		return ERR_PTR(err);
351 	return s;
352 
353 out_free_cache:
354 	destroy_memcg_params(s);
355 	kmem_cache_free(kmem_cache, s);
356 	goto out;
357 }
358 
359 /*
360  * kmem_cache_create - Create a cache.
361  * @name: A string which is used in /proc/slabinfo to identify this cache.
362  * @size: The size of objects to be created in this cache.
363  * @align: The required alignment for the objects.
364  * @flags: SLAB flags
365  * @ctor: A constructor for the objects.
366  *
367  * Returns a ptr to the cache on success, NULL on failure.
368  * Cannot be called within a interrupt, but can be interrupted.
369  * The @ctor is run when new pages are allocated by the cache.
370  *
371  * The flags are
372  *
373  * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
374  * to catch references to uninitialised memory.
375  *
376  * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check
377  * for buffer overruns.
378  *
379  * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
380  * cacheline.  This can be beneficial if you're counting cycles as closely
381  * as davem.
382  */
383 struct kmem_cache *
384 kmem_cache_create(const char *name, size_t size, size_t align,
385 		  unsigned long flags, void (*ctor)(void *))
386 {
387 	struct kmem_cache *s = NULL;
388 	const char *cache_name;
389 	int err;
390 
391 	get_online_cpus();
392 	get_online_mems();
393 	memcg_get_cache_ids();
394 
395 	mutex_lock(&slab_mutex);
396 
397 	err = kmem_cache_sanity_check(name, size);
398 	if (err) {
399 		goto out_unlock;
400 	}
401 
402 	/*
403 	 * Some allocators will constraint the set of valid flags to a subset
404 	 * of all flags. We expect them to define CACHE_CREATE_MASK in this
405 	 * case, and we'll just provide them with a sanitized version of the
406 	 * passed flags.
407 	 */
408 	flags &= CACHE_CREATE_MASK;
409 
410 	s = __kmem_cache_alias(name, size, align, flags, ctor);
411 	if (s)
412 		goto out_unlock;
413 
414 	cache_name = kstrdup_const(name, GFP_KERNEL);
415 	if (!cache_name) {
416 		err = -ENOMEM;
417 		goto out_unlock;
418 	}
419 
420 	s = create_cache(cache_name, size, size,
421 			 calculate_alignment(flags, align, size),
422 			 flags, ctor, NULL, NULL);
423 	if (IS_ERR(s)) {
424 		err = PTR_ERR(s);
425 		kfree_const(cache_name);
426 	}
427 
428 out_unlock:
429 	mutex_unlock(&slab_mutex);
430 
431 	memcg_put_cache_ids();
432 	put_online_mems();
433 	put_online_cpus();
434 
435 	if (err) {
436 		if (flags & SLAB_PANIC)
437 			panic("kmem_cache_create: Failed to create slab '%s'. Error %d\n",
438 				name, err);
439 		else {
440 			printk(KERN_WARNING "kmem_cache_create(%s) failed with error %d",
441 				name, err);
442 			dump_stack();
443 		}
444 		return NULL;
445 	}
446 	return s;
447 }
448 EXPORT_SYMBOL(kmem_cache_create);
449 
450 static int shutdown_cache(struct kmem_cache *s,
451 		struct list_head *release, bool *need_rcu_barrier)
452 {
453 	if (__kmem_cache_shutdown(s) != 0)
454 		return -EBUSY;
455 
456 	if (s->flags & SLAB_DESTROY_BY_RCU)
457 		*need_rcu_barrier = true;
458 
459 	list_move(&s->list, release);
460 	return 0;
461 }
462 
463 static void release_caches(struct list_head *release, bool need_rcu_barrier)
464 {
465 	struct kmem_cache *s, *s2;
466 
467 	if (need_rcu_barrier)
468 		rcu_barrier();
469 
470 	list_for_each_entry_safe(s, s2, release, list) {
471 #ifdef SLAB_SUPPORTS_SYSFS
472 		sysfs_slab_remove(s);
473 #else
474 		slab_kmem_cache_release(s);
475 #endif
476 	}
477 }
478 
479 #ifdef CONFIG_MEMCG_KMEM
480 /*
481  * memcg_create_kmem_cache - Create a cache for a memory cgroup.
482  * @memcg: The memory cgroup the new cache is for.
483  * @root_cache: The parent of the new cache.
484  *
485  * This function attempts to create a kmem cache that will serve allocation
486  * requests going from @memcg to @root_cache. The new cache inherits properties
487  * from its parent.
488  */
489 void memcg_create_kmem_cache(struct mem_cgroup *memcg,
490 			     struct kmem_cache *root_cache)
491 {
492 	static char memcg_name_buf[NAME_MAX + 1]; /* protected by slab_mutex */
493 	struct cgroup_subsys_state *css = &memcg->css;
494 	struct memcg_cache_array *arr;
495 	struct kmem_cache *s = NULL;
496 	char *cache_name;
497 	int idx;
498 
499 	get_online_cpus();
500 	get_online_mems();
501 
502 	mutex_lock(&slab_mutex);
503 
504 	/*
505 	 * The memory cgroup could have been deactivated while the cache
506 	 * creation work was pending.
507 	 */
508 	if (!memcg_kmem_is_active(memcg))
509 		goto out_unlock;
510 
511 	idx = memcg_cache_id(memcg);
512 	arr = rcu_dereference_protected(root_cache->memcg_params.memcg_caches,
513 					lockdep_is_held(&slab_mutex));
514 
515 	/*
516 	 * Since per-memcg caches are created asynchronously on first
517 	 * allocation (see memcg_kmem_get_cache()), several threads can try to
518 	 * create the same cache, but only one of them may succeed.
519 	 */
520 	if (arr->entries[idx])
521 		goto out_unlock;
522 
523 	cgroup_name(css->cgroup, memcg_name_buf, sizeof(memcg_name_buf));
524 	cache_name = kasprintf(GFP_KERNEL, "%s(%d:%s)", root_cache->name,
525 			       css->id, memcg_name_buf);
526 	if (!cache_name)
527 		goto out_unlock;
528 
529 	s = create_cache(cache_name, root_cache->object_size,
530 			 root_cache->size, root_cache->align,
531 			 root_cache->flags, root_cache->ctor,
532 			 memcg, root_cache);
533 	/*
534 	 * If we could not create a memcg cache, do not complain, because
535 	 * that's not critical at all as we can always proceed with the root
536 	 * cache.
537 	 */
538 	if (IS_ERR(s)) {
539 		kfree(cache_name);
540 		goto out_unlock;
541 	}
542 
543 	list_add(&s->memcg_params.list, &root_cache->memcg_params.list);
544 
545 	/*
546 	 * Since readers won't lock (see cache_from_memcg_idx()), we need a
547 	 * barrier here to ensure nobody will see the kmem_cache partially
548 	 * initialized.
549 	 */
550 	smp_wmb();
551 	arr->entries[idx] = s;
552 
553 out_unlock:
554 	mutex_unlock(&slab_mutex);
555 
556 	put_online_mems();
557 	put_online_cpus();
558 }
559 
560 void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg)
561 {
562 	int idx;
563 	struct memcg_cache_array *arr;
564 	struct kmem_cache *s, *c;
565 
566 	idx = memcg_cache_id(memcg);
567 
568 	get_online_cpus();
569 	get_online_mems();
570 
571 	mutex_lock(&slab_mutex);
572 	list_for_each_entry(s, &slab_caches, list) {
573 		if (!is_root_cache(s))
574 			continue;
575 
576 		arr = rcu_dereference_protected(s->memcg_params.memcg_caches,
577 						lockdep_is_held(&slab_mutex));
578 		c = arr->entries[idx];
579 		if (!c)
580 			continue;
581 
582 		__kmem_cache_shrink(c, true);
583 		arr->entries[idx] = NULL;
584 	}
585 	mutex_unlock(&slab_mutex);
586 
587 	put_online_mems();
588 	put_online_cpus();
589 }
590 
591 static int __shutdown_memcg_cache(struct kmem_cache *s,
592 		struct list_head *release, bool *need_rcu_barrier)
593 {
594 	BUG_ON(is_root_cache(s));
595 
596 	if (shutdown_cache(s, release, need_rcu_barrier))
597 		return -EBUSY;
598 
599 	list_del(&s->memcg_params.list);
600 	return 0;
601 }
602 
603 void memcg_destroy_kmem_caches(struct mem_cgroup *memcg)
604 {
605 	LIST_HEAD(release);
606 	bool need_rcu_barrier = false;
607 	struct kmem_cache *s, *s2;
608 
609 	get_online_cpus();
610 	get_online_mems();
611 
612 	mutex_lock(&slab_mutex);
613 	list_for_each_entry_safe(s, s2, &slab_caches, list) {
614 		if (is_root_cache(s) || s->memcg_params.memcg != memcg)
615 			continue;
616 		/*
617 		 * The cgroup is about to be freed and therefore has no charges
618 		 * left. Hence, all its caches must be empty by now.
619 		 */
620 		BUG_ON(__shutdown_memcg_cache(s, &release, &need_rcu_barrier));
621 	}
622 	mutex_unlock(&slab_mutex);
623 
624 	put_online_mems();
625 	put_online_cpus();
626 
627 	release_caches(&release, need_rcu_barrier);
628 }
629 
630 static int shutdown_memcg_caches(struct kmem_cache *s,
631 		struct list_head *release, bool *need_rcu_barrier)
632 {
633 	struct memcg_cache_array *arr;
634 	struct kmem_cache *c, *c2;
635 	LIST_HEAD(busy);
636 	int i;
637 
638 	BUG_ON(!is_root_cache(s));
639 
640 	/*
641 	 * First, shutdown active caches, i.e. caches that belong to online
642 	 * memory cgroups.
643 	 */
644 	arr = rcu_dereference_protected(s->memcg_params.memcg_caches,
645 					lockdep_is_held(&slab_mutex));
646 	for_each_memcg_cache_index(i) {
647 		c = arr->entries[i];
648 		if (!c)
649 			continue;
650 		if (__shutdown_memcg_cache(c, release, need_rcu_barrier))
651 			/*
652 			 * The cache still has objects. Move it to a temporary
653 			 * list so as not to try to destroy it for a second
654 			 * time while iterating over inactive caches below.
655 			 */
656 			list_move(&c->memcg_params.list, &busy);
657 		else
658 			/*
659 			 * The cache is empty and will be destroyed soon. Clear
660 			 * the pointer to it in the memcg_caches array so that
661 			 * it will never be accessed even if the root cache
662 			 * stays alive.
663 			 */
664 			arr->entries[i] = NULL;
665 	}
666 
667 	/*
668 	 * Second, shutdown all caches left from memory cgroups that are now
669 	 * offline.
670 	 */
671 	list_for_each_entry_safe(c, c2, &s->memcg_params.list,
672 				 memcg_params.list)
673 		__shutdown_memcg_cache(c, release, need_rcu_barrier);
674 
675 	list_splice(&busy, &s->memcg_params.list);
676 
677 	/*
678 	 * A cache being destroyed must be empty. In particular, this means
679 	 * that all per memcg caches attached to it must be empty too.
680 	 */
681 	if (!list_empty(&s->memcg_params.list))
682 		return -EBUSY;
683 	return 0;
684 }
685 #else
686 static inline int shutdown_memcg_caches(struct kmem_cache *s,
687 		struct list_head *release, bool *need_rcu_barrier)
688 {
689 	return 0;
690 }
691 #endif /* CONFIG_MEMCG_KMEM */
692 
693 void slab_kmem_cache_release(struct kmem_cache *s)
694 {
695 	destroy_memcg_params(s);
696 	kfree_const(s->name);
697 	kmem_cache_free(kmem_cache, s);
698 }
699 
700 void kmem_cache_destroy(struct kmem_cache *s)
701 {
702 	LIST_HEAD(release);
703 	bool need_rcu_barrier = false;
704 	int err;
705 
706 	if (unlikely(!s))
707 		return;
708 
709 	get_online_cpus();
710 	get_online_mems();
711 
712 	mutex_lock(&slab_mutex);
713 
714 	s->refcount--;
715 	if (s->refcount)
716 		goto out_unlock;
717 
718 	err = shutdown_memcg_caches(s, &release, &need_rcu_barrier);
719 	if (!err)
720 		err = shutdown_cache(s, &release, &need_rcu_barrier);
721 
722 	if (err) {
723 		pr_err("kmem_cache_destroy %s: "
724 		       "Slab cache still has objects\n", s->name);
725 		dump_stack();
726 	}
727 out_unlock:
728 	mutex_unlock(&slab_mutex);
729 
730 	put_online_mems();
731 	put_online_cpus();
732 
733 	release_caches(&release, need_rcu_barrier);
734 }
735 EXPORT_SYMBOL(kmem_cache_destroy);
736 
737 /**
738  * kmem_cache_shrink - Shrink a cache.
739  * @cachep: The cache to shrink.
740  *
741  * Releases as many slabs as possible for a cache.
742  * To help debugging, a zero exit status indicates all slabs were released.
743  */
744 int kmem_cache_shrink(struct kmem_cache *cachep)
745 {
746 	int ret;
747 
748 	get_online_cpus();
749 	get_online_mems();
750 	ret = __kmem_cache_shrink(cachep, false);
751 	put_online_mems();
752 	put_online_cpus();
753 	return ret;
754 }
755 EXPORT_SYMBOL(kmem_cache_shrink);
756 
757 bool slab_is_available(void)
758 {
759 	return slab_state >= UP;
760 }
761 
762 #ifndef CONFIG_SLOB
763 /* Create a cache during boot when no slab services are available yet */
764 void __init create_boot_cache(struct kmem_cache *s, const char *name, size_t size,
765 		unsigned long flags)
766 {
767 	int err;
768 
769 	s->name = name;
770 	s->size = s->object_size = size;
771 	s->align = calculate_alignment(flags, ARCH_KMALLOC_MINALIGN, size);
772 
773 	slab_init_memcg_params(s);
774 
775 	err = __kmem_cache_create(s, flags);
776 
777 	if (err)
778 		panic("Creation of kmalloc slab %s size=%zu failed. Reason %d\n",
779 					name, size, err);
780 
781 	s->refcount = -1;	/* Exempt from merging for now */
782 }
783 
784 struct kmem_cache *__init create_kmalloc_cache(const char *name, size_t size,
785 				unsigned long flags)
786 {
787 	struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
788 
789 	if (!s)
790 		panic("Out of memory when creating slab %s\n", name);
791 
792 	create_boot_cache(s, name, size, flags);
793 	list_add(&s->list, &slab_caches);
794 	s->refcount = 1;
795 	return s;
796 }
797 
798 struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1];
799 EXPORT_SYMBOL(kmalloc_caches);
800 
801 #ifdef CONFIG_ZONE_DMA
802 struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1];
803 EXPORT_SYMBOL(kmalloc_dma_caches);
804 #endif
805 
806 /*
807  * Conversion table for small slabs sizes / 8 to the index in the
808  * kmalloc array. This is necessary for slabs < 192 since we have non power
809  * of two cache sizes there. The size of larger slabs can be determined using
810  * fls.
811  */
812 static s8 size_index[24] = {
813 	3,	/* 8 */
814 	4,	/* 16 */
815 	5,	/* 24 */
816 	5,	/* 32 */
817 	6,	/* 40 */
818 	6,	/* 48 */
819 	6,	/* 56 */
820 	6,	/* 64 */
821 	1,	/* 72 */
822 	1,	/* 80 */
823 	1,	/* 88 */
824 	1,	/* 96 */
825 	7,	/* 104 */
826 	7,	/* 112 */
827 	7,	/* 120 */
828 	7,	/* 128 */
829 	2,	/* 136 */
830 	2,	/* 144 */
831 	2,	/* 152 */
832 	2,	/* 160 */
833 	2,	/* 168 */
834 	2,	/* 176 */
835 	2,	/* 184 */
836 	2	/* 192 */
837 };
838 
839 static inline int size_index_elem(size_t bytes)
840 {
841 	return (bytes - 1) / 8;
842 }
843 
844 /*
845  * Find the kmem_cache structure that serves a given size of
846  * allocation
847  */
848 struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags)
849 {
850 	int index;
851 
852 	if (unlikely(size > KMALLOC_MAX_SIZE)) {
853 		WARN_ON_ONCE(!(flags & __GFP_NOWARN));
854 		return NULL;
855 	}
856 
857 	if (size <= 192) {
858 		if (!size)
859 			return ZERO_SIZE_PTR;
860 
861 		index = size_index[size_index_elem(size)];
862 	} else
863 		index = fls(size - 1);
864 
865 #ifdef CONFIG_ZONE_DMA
866 	if (unlikely((flags & GFP_DMA)))
867 		return kmalloc_dma_caches[index];
868 
869 #endif
870 	return kmalloc_caches[index];
871 }
872 
873 /*
874  * kmalloc_info[] is to make slub_debug=,kmalloc-xx option work at boot time.
875  * kmalloc_index() supports up to 2^26=64MB, so the final entry of the table is
876  * kmalloc-67108864.
877  */
878 static struct {
879 	const char *name;
880 	unsigned long size;
881 } const kmalloc_info[] __initconst = {
882 	{NULL,                      0},		{"kmalloc-96",             96},
883 	{"kmalloc-192",           192},		{"kmalloc-8",               8},
884 	{"kmalloc-16",             16},		{"kmalloc-32",             32},
885 	{"kmalloc-64",             64},		{"kmalloc-128",           128},
886 	{"kmalloc-256",           256},		{"kmalloc-512",           512},
887 	{"kmalloc-1024",         1024},		{"kmalloc-2048",         2048},
888 	{"kmalloc-4096",         4096},		{"kmalloc-8192",         8192},
889 	{"kmalloc-16384",       16384},		{"kmalloc-32768",       32768},
890 	{"kmalloc-65536",       65536},		{"kmalloc-131072",     131072},
891 	{"kmalloc-262144",     262144},		{"kmalloc-524288",     524288},
892 	{"kmalloc-1048576",   1048576},		{"kmalloc-2097152",   2097152},
893 	{"kmalloc-4194304",   4194304},		{"kmalloc-8388608",   8388608},
894 	{"kmalloc-16777216", 16777216},		{"kmalloc-33554432", 33554432},
895 	{"kmalloc-67108864", 67108864}
896 };
897 
898 /*
899  * Patch up the size_index table if we have strange large alignment
900  * requirements for the kmalloc array. This is only the case for
901  * MIPS it seems. The standard arches will not generate any code here.
902  *
903  * Largest permitted alignment is 256 bytes due to the way we
904  * handle the index determination for the smaller caches.
905  *
906  * Make sure that nothing crazy happens if someone starts tinkering
907  * around with ARCH_KMALLOC_MINALIGN
908  */
909 void __init setup_kmalloc_cache_index_table(void)
910 {
911 	int i;
912 
913 	BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 ||
914 		(KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1)));
915 
916 	for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) {
917 		int elem = size_index_elem(i);
918 
919 		if (elem >= ARRAY_SIZE(size_index))
920 			break;
921 		size_index[elem] = KMALLOC_SHIFT_LOW;
922 	}
923 
924 	if (KMALLOC_MIN_SIZE >= 64) {
925 		/*
926 		 * The 96 byte size cache is not used if the alignment
927 		 * is 64 byte.
928 		 */
929 		for (i = 64 + 8; i <= 96; i += 8)
930 			size_index[size_index_elem(i)] = 7;
931 
932 	}
933 
934 	if (KMALLOC_MIN_SIZE >= 128) {
935 		/*
936 		 * The 192 byte sized cache is not used if the alignment
937 		 * is 128 byte. Redirect kmalloc to use the 256 byte cache
938 		 * instead.
939 		 */
940 		for (i = 128 + 8; i <= 192; i += 8)
941 			size_index[size_index_elem(i)] = 8;
942 	}
943 }
944 
945 static void __init new_kmalloc_cache(int idx, unsigned long flags)
946 {
947 	kmalloc_caches[idx] = create_kmalloc_cache(kmalloc_info[idx].name,
948 					kmalloc_info[idx].size, flags);
949 }
950 
951 /*
952  * Create the kmalloc array. Some of the regular kmalloc arrays
953  * may already have been created because they were needed to
954  * enable allocations for slab creation.
955  */
956 void __init create_kmalloc_caches(unsigned long flags)
957 {
958 	int i;
959 
960 	for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) {
961 		if (!kmalloc_caches[i])
962 			new_kmalloc_cache(i, flags);
963 
964 		/*
965 		 * Caches that are not of the two-to-the-power-of size.
966 		 * These have to be created immediately after the
967 		 * earlier power of two caches
968 		 */
969 		if (KMALLOC_MIN_SIZE <= 32 && !kmalloc_caches[1] && i == 6)
970 			new_kmalloc_cache(1, flags);
971 		if (KMALLOC_MIN_SIZE <= 64 && !kmalloc_caches[2] && i == 7)
972 			new_kmalloc_cache(2, flags);
973 	}
974 
975 	/* Kmalloc array is now usable */
976 	slab_state = UP;
977 
978 #ifdef CONFIG_ZONE_DMA
979 	for (i = 0; i <= KMALLOC_SHIFT_HIGH; i++) {
980 		struct kmem_cache *s = kmalloc_caches[i];
981 
982 		if (s) {
983 			int size = kmalloc_size(i);
984 			char *n = kasprintf(GFP_NOWAIT,
985 				 "dma-kmalloc-%d", size);
986 
987 			BUG_ON(!n);
988 			kmalloc_dma_caches[i] = create_kmalloc_cache(n,
989 				size, SLAB_CACHE_DMA | flags);
990 		}
991 	}
992 #endif
993 }
994 #endif /* !CONFIG_SLOB */
995 
996 /*
997  * To avoid unnecessary overhead, we pass through large allocation requests
998  * directly to the page allocator. We use __GFP_COMP, because we will need to
999  * know the allocation order to free the pages properly in kfree.
1000  */
1001 void *kmalloc_order(size_t size, gfp_t flags, unsigned int order)
1002 {
1003 	void *ret;
1004 	struct page *page;
1005 
1006 	flags |= __GFP_COMP;
1007 	page = alloc_kmem_pages(flags, order);
1008 	ret = page ? page_address(page) : NULL;
1009 	kmemleak_alloc(ret, size, 1, flags);
1010 	kasan_kmalloc_large(ret, size);
1011 	return ret;
1012 }
1013 EXPORT_SYMBOL(kmalloc_order);
1014 
1015 #ifdef CONFIG_TRACING
1016 void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
1017 {
1018 	void *ret = kmalloc_order(size, flags, order);
1019 	trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags);
1020 	return ret;
1021 }
1022 EXPORT_SYMBOL(kmalloc_order_trace);
1023 #endif
1024 
1025 #ifdef CONFIG_SLABINFO
1026 
1027 #ifdef CONFIG_SLAB
1028 #define SLABINFO_RIGHTS (S_IWUSR | S_IRUSR)
1029 #else
1030 #define SLABINFO_RIGHTS S_IRUSR
1031 #endif
1032 
1033 static void print_slabinfo_header(struct seq_file *m)
1034 {
1035 	/*
1036 	 * Output format version, so at least we can change it
1037 	 * without _too_ many complaints.
1038 	 */
1039 #ifdef CONFIG_DEBUG_SLAB
1040 	seq_puts(m, "slabinfo - version: 2.1 (statistics)\n");
1041 #else
1042 	seq_puts(m, "slabinfo - version: 2.1\n");
1043 #endif
1044 	seq_puts(m, "# name            <active_objs> <num_objs> <objsize> "
1045 		 "<objperslab> <pagesperslab>");
1046 	seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
1047 	seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
1048 #ifdef CONFIG_DEBUG_SLAB
1049 	seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> "
1050 		 "<error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>");
1051 	seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>");
1052 #endif
1053 	seq_putc(m, '\n');
1054 }
1055 
1056 void *slab_start(struct seq_file *m, loff_t *pos)
1057 {
1058 	mutex_lock(&slab_mutex);
1059 	return seq_list_start(&slab_caches, *pos);
1060 }
1061 
1062 void *slab_next(struct seq_file *m, void *p, loff_t *pos)
1063 {
1064 	return seq_list_next(p, &slab_caches, pos);
1065 }
1066 
1067 void slab_stop(struct seq_file *m, void *p)
1068 {
1069 	mutex_unlock(&slab_mutex);
1070 }
1071 
1072 static void
1073 memcg_accumulate_slabinfo(struct kmem_cache *s, struct slabinfo *info)
1074 {
1075 	struct kmem_cache *c;
1076 	struct slabinfo sinfo;
1077 
1078 	if (!is_root_cache(s))
1079 		return;
1080 
1081 	for_each_memcg_cache(c, s) {
1082 		memset(&sinfo, 0, sizeof(sinfo));
1083 		get_slabinfo(c, &sinfo);
1084 
1085 		info->active_slabs += sinfo.active_slabs;
1086 		info->num_slabs += sinfo.num_slabs;
1087 		info->shared_avail += sinfo.shared_avail;
1088 		info->active_objs += sinfo.active_objs;
1089 		info->num_objs += sinfo.num_objs;
1090 	}
1091 }
1092 
1093 static void cache_show(struct kmem_cache *s, struct seq_file *m)
1094 {
1095 	struct slabinfo sinfo;
1096 
1097 	memset(&sinfo, 0, sizeof(sinfo));
1098 	get_slabinfo(s, &sinfo);
1099 
1100 	memcg_accumulate_slabinfo(s, &sinfo);
1101 
1102 	seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d",
1103 		   cache_name(s), sinfo.active_objs, sinfo.num_objs, s->size,
1104 		   sinfo.objects_per_slab, (1 << sinfo.cache_order));
1105 
1106 	seq_printf(m, " : tunables %4u %4u %4u",
1107 		   sinfo.limit, sinfo.batchcount, sinfo.shared);
1108 	seq_printf(m, " : slabdata %6lu %6lu %6lu",
1109 		   sinfo.active_slabs, sinfo.num_slabs, sinfo.shared_avail);
1110 	slabinfo_show_stats(m, s);
1111 	seq_putc(m, '\n');
1112 }
1113 
1114 static int slab_show(struct seq_file *m, void *p)
1115 {
1116 	struct kmem_cache *s = list_entry(p, struct kmem_cache, list);
1117 
1118 	if (p == slab_caches.next)
1119 		print_slabinfo_header(m);
1120 	if (is_root_cache(s))
1121 		cache_show(s, m);
1122 	return 0;
1123 }
1124 
1125 #ifdef CONFIG_MEMCG_KMEM
1126 int memcg_slab_show(struct seq_file *m, void *p)
1127 {
1128 	struct kmem_cache *s = list_entry(p, struct kmem_cache, list);
1129 	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
1130 
1131 	if (p == slab_caches.next)
1132 		print_slabinfo_header(m);
1133 	if (!is_root_cache(s) && s->memcg_params.memcg == memcg)
1134 		cache_show(s, m);
1135 	return 0;
1136 }
1137 #endif
1138 
1139 /*
1140  * slabinfo_op - iterator that generates /proc/slabinfo
1141  *
1142  * Output layout:
1143  * cache-name
1144  * num-active-objs
1145  * total-objs
1146  * object size
1147  * num-active-slabs
1148  * total-slabs
1149  * num-pages-per-slab
1150  * + further values on SMP and with statistics enabled
1151  */
1152 static const struct seq_operations slabinfo_op = {
1153 	.start = slab_start,
1154 	.next = slab_next,
1155 	.stop = slab_stop,
1156 	.show = slab_show,
1157 };
1158 
1159 static int slabinfo_open(struct inode *inode, struct file *file)
1160 {
1161 	return seq_open(file, &slabinfo_op);
1162 }
1163 
1164 static const struct file_operations proc_slabinfo_operations = {
1165 	.open		= slabinfo_open,
1166 	.read		= seq_read,
1167 	.write          = slabinfo_write,
1168 	.llseek		= seq_lseek,
1169 	.release	= seq_release,
1170 };
1171 
1172 static int __init slab_proc_init(void)
1173 {
1174 	proc_create("slabinfo", SLABINFO_RIGHTS, NULL,
1175 						&proc_slabinfo_operations);
1176 	return 0;
1177 }
1178 module_init(slab_proc_init);
1179 #endif /* CONFIG_SLABINFO */
1180 
1181 static __always_inline void *__do_krealloc(const void *p, size_t new_size,
1182 					   gfp_t flags)
1183 {
1184 	void *ret;
1185 	size_t ks = 0;
1186 
1187 	if (p)
1188 		ks = ksize(p);
1189 
1190 	if (ks >= new_size) {
1191 		kasan_krealloc((void *)p, new_size);
1192 		return (void *)p;
1193 	}
1194 
1195 	ret = kmalloc_track_caller(new_size, flags);
1196 	if (ret && p)
1197 		memcpy(ret, p, ks);
1198 
1199 	return ret;
1200 }
1201 
1202 /**
1203  * __krealloc - like krealloc() but don't free @p.
1204  * @p: object to reallocate memory for.
1205  * @new_size: how many bytes of memory are required.
1206  * @flags: the type of memory to allocate.
1207  *
1208  * This function is like krealloc() except it never frees the originally
1209  * allocated buffer. Use this if you don't want to free the buffer immediately
1210  * like, for example, with RCU.
1211  */
1212 void *__krealloc(const void *p, size_t new_size, gfp_t flags)
1213 {
1214 	if (unlikely(!new_size))
1215 		return ZERO_SIZE_PTR;
1216 
1217 	return __do_krealloc(p, new_size, flags);
1218 
1219 }
1220 EXPORT_SYMBOL(__krealloc);
1221 
1222 /**
1223  * krealloc - reallocate memory. The contents will remain unchanged.
1224  * @p: object to reallocate memory for.
1225  * @new_size: how many bytes of memory are required.
1226  * @flags: the type of memory to allocate.
1227  *
1228  * The contents of the object pointed to are preserved up to the
1229  * lesser of the new and old sizes.  If @p is %NULL, krealloc()
1230  * behaves exactly like kmalloc().  If @new_size is 0 and @p is not a
1231  * %NULL pointer, the object pointed to is freed.
1232  */
1233 void *krealloc(const void *p, size_t new_size, gfp_t flags)
1234 {
1235 	void *ret;
1236 
1237 	if (unlikely(!new_size)) {
1238 		kfree(p);
1239 		return ZERO_SIZE_PTR;
1240 	}
1241 
1242 	ret = __do_krealloc(p, new_size, flags);
1243 	if (ret && p != ret)
1244 		kfree(p);
1245 
1246 	return ret;
1247 }
1248 EXPORT_SYMBOL(krealloc);
1249 
1250 /**
1251  * kzfree - like kfree but zero memory
1252  * @p: object to free memory of
1253  *
1254  * The memory of the object @p points to is zeroed before freed.
1255  * If @p is %NULL, kzfree() does nothing.
1256  *
1257  * Note: this function zeroes the whole allocated buffer which can be a good
1258  * deal bigger than the requested buffer size passed to kmalloc(). So be
1259  * careful when using this function in performance sensitive code.
1260  */
1261 void kzfree(const void *p)
1262 {
1263 	size_t ks;
1264 	void *mem = (void *)p;
1265 
1266 	if (unlikely(ZERO_OR_NULL_PTR(mem)))
1267 		return;
1268 	ks = ksize(mem);
1269 	memset(mem, 0, ks);
1270 	kfree(mem);
1271 }
1272 EXPORT_SYMBOL(kzfree);
1273 
1274 /* Tracepoints definitions. */
1275 EXPORT_TRACEPOINT_SYMBOL(kmalloc);
1276 EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc);
1277 EXPORT_TRACEPOINT_SYMBOL(kmalloc_node);
1278 EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc_node);
1279 EXPORT_TRACEPOINT_SYMBOL(kfree);
1280 EXPORT_TRACEPOINT_SYMBOL(kmem_cache_free);
1281