SLUB Allocator Internals
struct kmem_cache, freelist mechanics, per-CPU caches, and slab debugging
Why a slab allocator?
kmalloc could just call the page allocator for every allocation, but that's wasteful:
- Page allocator minimum granularity: 4096 bytes
- Typical kernel allocation: 32–512 bytes
- Objects of the same type are created and freed repeatedly
The slab allocator solves this by: 1. Allocating objects from pre-divided pages ("slabs") 2. Caching freed objects for immediate reuse (no page allocator round-trip) 3. Exploiting constructor patterns: objects are often the same type
Linux uses SLUB (the "unqueued" slab allocator) since 2.6.23.
Architecture overview
kmalloc(128, GFP_KERNEL)
│
▼
kmalloc-128 cache (struct kmem_cache)
│
┌────┴─────────────────────────────────┐
│ Per-CPU slab (struct kmem_cache_cpu) │ ← current CPU
│ freelist: →obj1 →obj2 →obj3 │ ← hot, no lock
└────┬─────────────────────────────────┘
│ (per-CPU depleted)
┌────▼─────────────────────────────────┐
│ Per-node partial list (struct kmem_cache_node) │
│ partial slabs with some free objects │
└────┬─────────────────────────────────┘
│ (no partial slabs)
▼
Page allocator (get a new slab page)
struct kmem_cache
/* include/linux/slub_def.h */
struct kmem_cache {
struct kmem_cache_cpu __percpu *cpu_slab; /* per-CPU fast path */
/* Allocation flags and configuration: */
slab_flags_t flags;
unsigned long min_partial; /* min partial slabs in node */
unsigned int size; /* object size (with metadata) */
unsigned int object_size; /* real object size */
struct reciprocal_value reciprocal_size;
unsigned int offset; /* offset to freelist pointer */
struct kmem_cache_order_objects oo; /* order and objects per slab */
struct kmem_cache_order_objects max;
struct kmem_cache_order_objects min;
gfp_t allocflags; /* gfp flags for slab allocation */
int refcount;
void (*ctor)(void *); /* constructor */
unsigned int inuse; /* offset to metadata */
unsigned int align;
unsigned int red_left_pad; /* for redzone debugging */
const char *name;
struct list_head list; /* in slab_caches list */
struct kmem_cache_node *node[MAX_NUMNODES];
};
struct kmem_cache_cpu (per-CPU fast path)
struct kmem_cache_cpu {
void **freelist; /* pointer to first free object */
unsigned long tid; /* transaction ID (ABA prevention) */
struct slab *slab; /* current slab being used */
/* ... */
};
The freelist is an embedded linked list: each free object's first bytes contain a pointer to the next free object (or NULL at the end of the list).
Slab page layout (objects = 128 bytes, SLUB):
┌────────┬────────┬────────┬────────┬──────────────────┐
│ obj[0] │ obj[1] │ obj[2] │ obj[3] │ ... until end │
│ free │ used │ free │ free │ │
│ →obj[2]│ [data] │ →obj[3]│ NULL │ │
└────────┴────────┴────────┴────────┴──────────────────┘
↑
cpu_slab->freelist = &obj[0] → obj[2] → obj[3] → NULL
kmalloc path (alloc_cache_obj)
/* mm/slub.c (simplified) */
static __always_inline void *__kmalloc(size_t size, gfp_t flags)
{
struct kmem_cache *s;
/* Round up to the next kmalloc-N size: */
s = kmalloc_caches[kmalloc_type(flags, _RET_IP_)][kmalloc_index(size)];
return slab_alloc(s, size, flags, _RET_IP_);
}
static __always_inline void *slab_alloc(struct kmem_cache *s,
size_t orig_size, gfp_t gfpflags,
unsigned long addr)
{
void *object;
struct kmem_cache_cpu *c;
unsigned long tid;
/* Fast path: per-CPU freelist */
c = this_cpu_ptr(s->cpu_slab);
tid = c->tid;
barrier();
object = c->freelist;
if (unlikely(!object || !c->slab)) {
/* Per-CPU slab exhausted → slow path */
object = __slab_alloc(s, gfpflags, NUMA_NO_NODE, addr, c, orig_size);
} else {
/* Advance the freelist pointer (the embedded next ptr): */
void *next = get_freepointer_safe(s, object);
/* CAS to prevent races with IRQs stealing from same freelist: */
if (unlikely(!__update_cpu_freelist_fast(s, object, next, tid))) {
object = __slab_alloc(s, gfpflags, NUMA_NO_NODE, addr, c, orig_size);
}
}
return object;
}
kfree path
void kfree(const void *object)
{
struct slab *slab;
struct kmem_cache *s;
/* Get the slab (and thus the cache) from the page: */
slab = virt_to_slab(object);
if (unlikely(!slab)) {
/* Might be a large allocation (compound page) */
free_large_kmalloc(page, object);
return;
}
s = slab->slab_cache;
slab_free(s, slab, object, NULL, &object, 1, _RET_IP_);
}
static void slab_free(struct kmem_cache *s, struct slab *slab,
void *head, void *tail, void **p, int cnt,
unsigned long addr)
{
struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab);
/* If freeing to the current slab, just prepend to freelist: */
if (slab == c->slab) {
/* Fast path: set object's next ptr to current freelist head */
set_freepointer(s, head, c->freelist);
c->freelist = head;
c->tid = next_tid(c->tid);
return;
}
/* Slow path: different slab → lock and handle */
__slab_free(s, slab, head, tail, cnt, addr);
}
kmem_cache_create (named caches)
/* For frequently allocated objects with a specific type: */
struct kmem_cache *my_cache;
/* At module init: */
my_cache = kmem_cache_create(
"my_struct", /* name (shown in /proc/slabinfo) */
sizeof(struct my_struct), /* object size */
__alignof__(struct my_struct), /* alignment */
SLAB_HWCACHE_ALIGN | /* align to cache lines */
SLAB_PANIC, /* BUG() if creation fails */
my_struct_ctor /* optional constructor */
);
/* Allocate and free: */
struct my_struct *obj = kmem_cache_alloc(my_cache, GFP_KERNEL);
kmem_cache_free(my_cache, obj);
/* At module exit: */
kmem_cache_destroy(my_cache);
SLUB debugging
# Enable SLUB debugging at boot (heavy overhead!):
# SLUB_DEBUG in config, then:
# slub_debug=FZP (FreeZone, Poison, track Padding)
# or per-cache: slub_debug=FZP,my_struct
# /proc/slabinfo: cache statistics
cat /proc/slabinfo
# name <active_objs> <num_objs> <objsize> <objperslab> ...
# kmalloc-128 12453 13000 128 32 ...
# my_struct 123 256 256 16 ...
# More detailed: slabtop (like top for slab)
slabtop
# Active / Total Objects (% used) : 1234567 / 1500000 (82.3%)
# Active / Total Slabs (% used) : 12345 / 15000 (82.3%)
# ...
# Find which cache holds a pointer:
# (in crash or gdb with vmlinux):
# crash> kmem -s <pointer>
# SLUB debug info for a cache:
ls /sys/kernel/slab/
cat /sys/kernel/slab/kmalloc-128/alloc_fastpath # per-CPU alloc count
cat /sys/kernel/slab/kmalloc-128/free_fastpath
cat /sys/kernel/slab/kmalloc-128/slabs # total slab pages
cat /sys/kernel/slab/kmalloc-128/objects # total objects
cat /sys/kernel/slab/kmalloc-128/object_size # 128
# KASAN: detects use-after-free and out-of-bounds in slab:
# CONFIG_KASAN=y causes SLUB to insert red zones and poison memory
Memory poisoning
When CONFIG_SLUB_DEBUG is enabled, freed objects are poisoned:
Poison values:
0x6b ('k') — object content after free (POISON_FREE)
0xbb — red zone bytes while object is free (SLUB_RED_INACTIVE)
0xcc — red zone bytes while object is allocated (SLUB_RED_ACTIVE)
On the next allocation, SLUB checks that POISON_FREE pattern is intact.
If not: use-after-free detected, kernel dumps the offending allocation.
Further reading
- kmalloc and slab — user-facing API
- Page Allocator — SLUB gets pages from here
- KASAN — slab memory error detection
- KFENCE — lightweight slab sampling-based detection
mm/slub.c— SLUB implementation (~10,000 lines)Documentation/mm/slub.rst— SLUB documentation