[Linux] Kmalloc Code Reading Records

Posted by Fog Juice on Mon, 07 Oct 2019 21:42:28 +0200

Kmalloc Code Reading Records


Base Linux RC5.0

SLOB SLAB SLUB

static __always_inline void *kmalloc(size_t size, gfp_t flags)
{
	if (__builtin_constant_p(size)) {
#ifndef CONFIG_SLOB
		unsigned int index;
#endif
		if (size > KMALLOC_MAX_CACHE_SIZE)
			return kmalloc_large(size, flags);
#ifndef CONFIG_SLOB
		index = kmalloc_index(size);

		if (!index)
			return ZERO_SIZE_PTR;

		return kmem_cache_alloc_trace(
				kmalloc_caches[kmalloc_type(flags)][index],
				flags, size);
#endif
	}
	return __kmalloc(size, flags);
}

kmalloc has two paths

Static size

Compile optimization if static size is required

size > MAX CACHE ----> kmalloc_large

size <=MAX CACHE ----> kmalloc_index

KMALLOC_MAX_CACHE_SIZE defines 11+12(13)-1 = 23 or 25(23 power of 1), 25, 32M 23, 8M 22, 4M in the case of slab definition without CONFIG_FORCE_MAX_ZONE ORDER.

KMALLOC_MAX_CACHE_SIZE defines PAGE_SHIFT 12(13)+1 SLUB in slub to directly allocate requests for order-1 pages (page size*2). Larger requests are passed to page allocator 13 definition: 16k 12 definition: 8k

SLUB process

< MAX CACHE

kmalloc_index: Logarithmic index

kmem_cache_alloc_trace -> slab_alloc -> slab_alloc_node
-> slab_alloc_node -> __slab_alloc or object = c->freelist
The general process is to call _slab_alloc if there is a free object in the cpu slab and take one from the cpu slab.

object = c->freelist;
	page = c->page;
if (unlikely(!object || !node_match(page, node))) {
		object = __slab_alloc(s, gfpflags, node, addr, c);
		stat(s, ALLOC_SLOWPATH);
	} else {
		void *next_object = get_freepointer_safe(s, object);

		/*
		 * The cmpxchg will only match if there was no additional
		 * operation and if we are on the right processor.
		 *
		 * The cmpxchg does the following atomically (without lock
		 * semantics!)
		 * 1. Relocate first pointer to the current per cpu area.
		 * 2. Verify that tid and freelist have not been changed
		 * 3. If they were not changed replace tid and freelist
		 *
		 * Since this is without lock semantics the protection is only
		 * against code executing on this cpu *not* from access by
		 * other cpus.
		 */
		if (unlikely(!this_cpu_cmpxchg_double(
				s->cpu_slab->freelist, s->cpu_slab->tid,
				object, tid,
				next_object, next_tid(tid)))) {

			note_cmpxchg_failure("slab_alloc", s, tid);
			goto redo;
		}
		prefetch_freepointer(s, next_object);
		stat(s, ALLOC_FASTPATH);
	}

	if (unlikely(gfpflags & __GFP_ZERO) && object)
		memset(object, 0, s->object_size);

	slab_post_alloc_hook(s, gfpflags, 1, &object);

	return object;

kmalloc_large:

kmalloc_large -> kmalloc_order_trace

static __always_inline void *kmalloc_large(size_t size, gfp_t flags)
{
	unsigned int order = get_order(size);   
	/*Take the highest bit of size, logarithmic scale, and find the minimum allocation granularity required
	    * 0 -> 2^0 * PAGE_SIZE And below
        * 1 -> 2^1 * PAGE_SIZE To 2^0 * PAGE_SIZE + 1
        * 2 -> 2^2 * PAGE_SIZE To 2^1 * PAGE_SIZE + 1
        * 3 -> 2^3 * PAGE_SIZE To 2^2 * PAGE_SIZE + 1
        * 4 -> 2^4 * PAGE_SIZE To 2^3 * PAGE_SIZE + 1*
    /
	return kmalloc_order_trace(size, flags, order);
}

void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
{
	void *ret = kmalloc_order(size, flags, order);
	trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags);
	return ret;
}

void *kmalloc_order(size_t size, gfp_t flags, unsigned int order)
{
	void *ret;
	struct page *page;

	flags |= __GFP_COMP;
	page = alloc_pages(flags, order);
	/*To avoid unnecessary overhead, we pass large allocation requests directly to the page allocator*/
	ret = page ? page_address(page) : NULL;
	kmemleak_alloc(ret, size, 1, flags);
	ret = kasan_kmalloc_large(ret, size, flags);
	return ret;
}

Dynamic size

If dynamic size, slab, slub and slob call different functions

SLAB _kmalloc Implementation

__kmalloc —> _do_kmalloc

static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
					  unsigned long caller)
{
	struct kmem_cache *cachep;
	void *ret;

	if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
		return NULL;
	cachep = kmalloc_slab(size, flags);
	if (unlikely(ZERO_OR_NULL_PTR(cachep)))
		return cachep;
	ret = slab_alloc(cachep, flags, caller);

	ret = kasan_kmalloc(cachep, ret, size, flags);
	trace_kmalloc(caller, ret,
		      size, cachep->size, flags);

	return ret;
}

MAX CACHE is defined in the slab header file. If the slab allocator is used, the maximum size should not exceed 32M and return NULL after the size of KMALLOC_MAX_CACHE_SIZE exceeds that of KMALLOC_MAX_CACHE_SIZE.

#ifdef CONFIG_SLAB
/*
 * The largest kmalloc size supported by the SLAB allocators is
 * 32 megabyte (2^25) or the maximum allocatable page order if that is
 * less than 32 MB.

SLUB__kmalloc Implementation

Slb allocation _kmalloc implementation, over KMALLOC_MAX_CACHE_SIZE, calls kmalloc_large directly

If not, call slab_alloc_node, like the static size path above

void *__kmalloc(size_t size, gfp_t flags)
{
	struct kmem_cache *s;
	void *ret;

	if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
		return kmalloc_large(size, flags);

	s = kmalloc_slab(size, flags);

	if (unlikely(ZERO_OR_NULL_PTR(s)))
		return s;

	ret = slab_alloc(s, flags, _RET_IP_);

	trace_kmalloc(_RET_IP_, ret, size, s->size, flags);

	ret = kasan_kmalloc(s, ret, size, flags);

	return ret;
}

Topics: Linux less