Use the this_cpu_cmpxchg_double functionality to implement a lockless allocation algorithm on arches that support fast this_cpu_ops. Each of the per cpu pointers is paired with a transaction id that ensures that updates of the per cpu information can only occur in sequence on a certain cpu. A transaction id is a "long" integer that is comprised of an event number and the cpu number. The event number is incremented for every change to the per cpu state. This means that the cmpxchg instruction can verify for an update that nothing interfered and that we are updating the percpu structure for the processor where we picked up the information and that we are also currently on that processor when we update the information. This results in a significant decrease of the overhead in the fastpaths. It also makes it easy to adopt the fast path for realtime kernels since this is lockless and does not require the use of the current per cpu area over the critical section. It is only important that the per cpu area is current at the beginning of the critical section and at the end. So there is no need even to disable preemption. Test results show that the fastpath cycle count is reduced by up to ~ 40% (alloc/free test goes from ~140 cycles down to ~80). The slowpath for kfree adds a few cycles. Sadly this does nothing for the slowpath which is where the main issues with performance in slub are but the best case performance rises significantly. (For that see the more complex slub patches that require cmpxchg_double) Kmalloc: alloc/free test Before: 10000 times kmalloc(8)/kfree -> 134 cycles 10000 times kmalloc(16)/kfree -> 152 cycles 10000 times kmalloc(32)/kfree -> 144 cycles 10000 times kmalloc(64)/kfree -> 142 cycles 10000 times kmalloc(128)/kfree -> 142 cycles 10000 times kmalloc(256)/kfree -> 132 cycles 10000 times kmalloc(512)/kfree -> 132 cycles 10000 times kmalloc(1024)/kfree -> 135 cycles 10000 times kmalloc(2048)/kfree -> 135 cycles 10000 times kmalloc(4096)/kfree -> 135 cycles 10000 times kmalloc(8192)/kfree -> 144 cycles 10000 times kmalloc(16384)/kfree -> 754 cycles After: 10000 times kmalloc(8)/kfree -> 78 cycles 10000 times kmalloc(16)/kfree -> 78 cycles 10000 times kmalloc(32)/kfree -> 82 cycles 10000 times kmalloc(64)/kfree -> 88 cycles 10000 times kmalloc(128)/kfree -> 79 cycles 10000 times kmalloc(256)/kfree -> 79 cycles 10000 times kmalloc(512)/kfree -> 85 cycles 10000 times kmalloc(1024)/kfree -> 82 cycles 10000 times kmalloc(2048)/kfree -> 82 cycles 10000 times kmalloc(4096)/kfree -> 85 cycles 10000 times kmalloc(8192)/kfree -> 82 cycles 10000 times kmalloc(16384)/kfree -> 706 cycles Kmalloc: Repeatedly allocate then free test Before: 10000 times kmalloc(8) -> 211 cycles kfree -> 113 cycles 10000 times kmalloc(16) -> 174 cycles kfree -> 115 cycles 10000 times kmalloc(32) -> 235 cycles kfree -> 129 cycles 10000 times kmalloc(64) -> 222 cycles kfree -> 120 cycles 10000 times kmalloc(128) -> 343 cycles kfree -> 139 cycles 10000 times kmalloc(256) -> 827 cycles kfree -> 147 cycles 10000 times kmalloc(512) -> 1048 cycles kfree -> 272 cycles 10000 times kmalloc(1024) -> 2043 cycles kfree -> 528 cycles 10000 times kmalloc(2048) -> 4002 cycles kfree -> 571 cycles 10000 times kmalloc(4096) -> 7740 cycles kfree -> 628 cycles 10000 times kmalloc(8192) -> 8062 cycles kfree -> 850 cycles 10000 times kmalloc(16384) -> 8895 cycles kfree -> 1249 cycles After: 10000 times kmalloc(8) -> 190 cycles kfree -> 129 cycles 10000 times kmalloc(16) -> 76 cycles kfree -> 123 cycles 10000 times kmalloc(32) -> 126 cycles kfree -> 124 cycles 10000 times kmalloc(64) -> 181 cycles kfree -> 128 cycles 10000 times kmalloc(128) -> 310 cycles kfree -> 140 cycles 10000 times kmalloc(256) -> 809 cycles kfree -> 165 cycles 10000 times kmalloc(512) -> 1005 cycles kfree -> 269 cycles 10000 times kmalloc(1024) -> 1999 cycles kfree -> 527 cycles 10000 times kmalloc(2048) -> 3967 cycles kfree -> 570 cycles 10000 times kmalloc(4096) -> 7658 cycles kfree -> 637 cycles 10000 times kmalloc(8192) -> 8111 cycles kfree -> 859 cycles 10000 times kmalloc(16384) -> 8791 cycles kfree -> 1173 cycles Signed-off-by: Christoph Lameter <cl@linux.com> Signed-off-by: Pekka Enberg <penberg@kernel.org>
304 lines
8.3 KiB
C
304 lines
8.3 KiB
C
#ifndef _LINUX_SLUB_DEF_H
|
|
#define _LINUX_SLUB_DEF_H
|
|
|
|
/*
|
|
* SLUB : A Slab allocator without object queues.
|
|
*
|
|
* (C) 2007 SGI, Christoph Lameter
|
|
*/
|
|
#include <linux/types.h>
|
|
#include <linux/gfp.h>
|
|
#include <linux/workqueue.h>
|
|
#include <linux/kobject.h>
|
|
|
|
#include <linux/kmemleak.h>
|
|
|
|
enum stat_item {
|
|
ALLOC_FASTPATH, /* Allocation from cpu slab */
|
|
ALLOC_SLOWPATH, /* Allocation by getting a new cpu slab */
|
|
FREE_FASTPATH, /* Free to cpu slub */
|
|
FREE_SLOWPATH, /* Freeing not to cpu slab */
|
|
FREE_FROZEN, /* Freeing to frozen slab */
|
|
FREE_ADD_PARTIAL, /* Freeing moves slab to partial list */
|
|
FREE_REMOVE_PARTIAL, /* Freeing removes last object */
|
|
ALLOC_FROM_PARTIAL, /* Cpu slab acquired from partial list */
|
|
ALLOC_SLAB, /* Cpu slab acquired from page allocator */
|
|
ALLOC_REFILL, /* Refill cpu slab from slab freelist */
|
|
FREE_SLAB, /* Slab freed to the page allocator */
|
|
CPUSLAB_FLUSH, /* Abandoning of the cpu slab */
|
|
DEACTIVATE_FULL, /* Cpu slab was full when deactivated */
|
|
DEACTIVATE_EMPTY, /* Cpu slab was empty when deactivated */
|
|
DEACTIVATE_TO_HEAD, /* Cpu slab was moved to the head of partials */
|
|
DEACTIVATE_TO_TAIL, /* Cpu slab was moved to the tail of partials */
|
|
DEACTIVATE_REMOTE_FREES,/* Slab contained remotely freed objects */
|
|
ORDER_FALLBACK, /* Number of times fallback was necessary */
|
|
NR_SLUB_STAT_ITEMS };
|
|
|
|
struct kmem_cache_cpu {
|
|
void **freelist; /* Pointer to next available object */
|
|
#ifdef CONFIG_CMPXCHG_LOCAL
|
|
unsigned long tid; /* Globally unique transaction id */
|
|
#endif
|
|
struct page *page; /* The slab from which we are allocating */
|
|
int node; /* The node of the page (or -1 for debug) */
|
|
#ifdef CONFIG_SLUB_STATS
|
|
unsigned stat[NR_SLUB_STAT_ITEMS];
|
|
#endif
|
|
};
|
|
|
|
struct kmem_cache_node {
|
|
spinlock_t list_lock; /* Protect partial list and nr_partial */
|
|
unsigned long nr_partial;
|
|
struct list_head partial;
|
|
#ifdef CONFIG_SLUB_DEBUG
|
|
atomic_long_t nr_slabs;
|
|
atomic_long_t total_objects;
|
|
struct list_head full;
|
|
#endif
|
|
};
|
|
|
|
/*
|
|
* Word size structure that can be atomically updated or read and that
|
|
* contains both the order and the number of objects that a slab of the
|
|
* given order would contain.
|
|
*/
|
|
struct kmem_cache_order_objects {
|
|
unsigned long x;
|
|
};
|
|
|
|
/*
|
|
* Slab cache management.
|
|
*/
|
|
struct kmem_cache {
|
|
struct kmem_cache_cpu __percpu *cpu_slab;
|
|
/* Used for retriving partial slabs etc */
|
|
unsigned long flags;
|
|
unsigned long min_partial;
|
|
int size; /* The size of an object including meta data */
|
|
int objsize; /* The size of an object without meta data */
|
|
int offset; /* Free pointer offset. */
|
|
struct kmem_cache_order_objects oo;
|
|
|
|
/* Allocation and freeing of slabs */
|
|
struct kmem_cache_order_objects max;
|
|
struct kmem_cache_order_objects min;
|
|
gfp_t allocflags; /* gfp flags to use on each alloc */
|
|
int refcount; /* Refcount for slab cache destroy */
|
|
void (*ctor)(void *);
|
|
int inuse; /* Offset to metadata */
|
|
int align; /* Alignment */
|
|
const char *name; /* Name (only for display!) */
|
|
struct list_head list; /* List of slab caches */
|
|
#ifdef CONFIG_SYSFS
|
|
struct kobject kobj; /* For sysfs */
|
|
#endif
|
|
|
|
#ifdef CONFIG_NUMA
|
|
/*
|
|
* Defragmentation by allocating from a remote node.
|
|
*/
|
|
int remote_node_defrag_ratio;
|
|
#endif
|
|
struct kmem_cache_node *node[MAX_NUMNODES];
|
|
};
|
|
|
|
/*
|
|
* Kmalloc subsystem.
|
|
*/
|
|
#if defined(ARCH_DMA_MINALIGN) && ARCH_DMA_MINALIGN > 8
|
|
#define KMALLOC_MIN_SIZE ARCH_DMA_MINALIGN
|
|
#else
|
|
#define KMALLOC_MIN_SIZE 8
|
|
#endif
|
|
|
|
#define KMALLOC_SHIFT_LOW ilog2(KMALLOC_MIN_SIZE)
|
|
|
|
#ifdef ARCH_DMA_MINALIGN
|
|
#define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN
|
|
#else
|
|
#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
|
|
#endif
|
|
|
|
#ifndef ARCH_SLAB_MINALIGN
|
|
#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
|
|
#endif
|
|
|
|
/*
|
|
* Maximum kmalloc object size handled by SLUB. Larger object allocations
|
|
* are passed through to the page allocator. The page allocator "fastpath"
|
|
* is relatively slow so we need this value sufficiently high so that
|
|
* performance critical objects are allocated through the SLUB fastpath.
|
|
*
|
|
* This should be dropped to PAGE_SIZE / 2 once the page allocator
|
|
* "fastpath" becomes competitive with the slab allocator fastpaths.
|
|
*/
|
|
#define SLUB_MAX_SIZE (2 * PAGE_SIZE)
|
|
|
|
#define SLUB_PAGE_SHIFT (PAGE_SHIFT + 2)
|
|
|
|
#ifdef CONFIG_ZONE_DMA
|
|
#define SLUB_DMA __GFP_DMA
|
|
#else
|
|
/* Disable DMA functionality */
|
|
#define SLUB_DMA (__force gfp_t)0
|
|
#endif
|
|
|
|
/*
|
|
* We keep the general caches in an array of slab caches that are used for
|
|
* 2^x bytes of allocations.
|
|
*/
|
|
extern struct kmem_cache *kmalloc_caches[SLUB_PAGE_SHIFT];
|
|
|
|
/*
|
|
* Sorry that the following has to be that ugly but some versions of GCC
|
|
* have trouble with constant propagation and loops.
|
|
*/
|
|
static __always_inline int kmalloc_index(size_t size)
|
|
{
|
|
if (!size)
|
|
return 0;
|
|
|
|
if (size <= KMALLOC_MIN_SIZE)
|
|
return KMALLOC_SHIFT_LOW;
|
|
|
|
if (KMALLOC_MIN_SIZE <= 32 && size > 64 && size <= 96)
|
|
return 1;
|
|
if (KMALLOC_MIN_SIZE <= 64 && size > 128 && size <= 192)
|
|
return 2;
|
|
if (size <= 8) return 3;
|
|
if (size <= 16) return 4;
|
|
if (size <= 32) return 5;
|
|
if (size <= 64) return 6;
|
|
if (size <= 128) return 7;
|
|
if (size <= 256) return 8;
|
|
if (size <= 512) return 9;
|
|
if (size <= 1024) return 10;
|
|
if (size <= 2 * 1024) return 11;
|
|
if (size <= 4 * 1024) return 12;
|
|
/*
|
|
* The following is only needed to support architectures with a larger page
|
|
* size than 4k.
|
|
*/
|
|
if (size <= 8 * 1024) return 13;
|
|
if (size <= 16 * 1024) return 14;
|
|
if (size <= 32 * 1024) return 15;
|
|
if (size <= 64 * 1024) return 16;
|
|
if (size <= 128 * 1024) return 17;
|
|
if (size <= 256 * 1024) return 18;
|
|
if (size <= 512 * 1024) return 19;
|
|
if (size <= 1024 * 1024) return 20;
|
|
if (size <= 2 * 1024 * 1024) return 21;
|
|
return -1;
|
|
|
|
/*
|
|
* What we really wanted to do and cannot do because of compiler issues is:
|
|
* int i;
|
|
* for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++)
|
|
* if (size <= (1 << i))
|
|
* return i;
|
|
*/
|
|
}
|
|
|
|
/*
|
|
* Find the slab cache for a given combination of allocation flags and size.
|
|
*
|
|
* This ought to end up with a global pointer to the right cache
|
|
* in kmalloc_caches.
|
|
*/
|
|
static __always_inline struct kmem_cache *kmalloc_slab(size_t size)
|
|
{
|
|
int index = kmalloc_index(size);
|
|
|
|
if (index == 0)
|
|
return NULL;
|
|
|
|
return kmalloc_caches[index];
|
|
}
|
|
|
|
void *kmem_cache_alloc(struct kmem_cache *, gfp_t);
|
|
void *__kmalloc(size_t size, gfp_t flags);
|
|
|
|
static __always_inline void *
|
|
kmalloc_order(size_t size, gfp_t flags, unsigned int order)
|
|
{
|
|
void *ret = (void *) __get_free_pages(flags | __GFP_COMP, order);
|
|
kmemleak_alloc(ret, size, 1, flags);
|
|
return ret;
|
|
}
|
|
|
|
#ifdef CONFIG_TRACING
|
|
extern void *
|
|
kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size);
|
|
extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order);
|
|
#else
|
|
static __always_inline void *
|
|
kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
|
|
{
|
|
return kmem_cache_alloc(s, gfpflags);
|
|
}
|
|
|
|
static __always_inline void *
|
|
kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
|
|
{
|
|
return kmalloc_order(size, flags, order);
|
|
}
|
|
#endif
|
|
|
|
static __always_inline void *kmalloc_large(size_t size, gfp_t flags)
|
|
{
|
|
unsigned int order = get_order(size);
|
|
return kmalloc_order_trace(size, flags, order);
|
|
}
|
|
|
|
static __always_inline void *kmalloc(size_t size, gfp_t flags)
|
|
{
|
|
if (__builtin_constant_p(size)) {
|
|
if (size > SLUB_MAX_SIZE)
|
|
return kmalloc_large(size, flags);
|
|
|
|
if (!(flags & SLUB_DMA)) {
|
|
struct kmem_cache *s = kmalloc_slab(size);
|
|
|
|
if (!s)
|
|
return ZERO_SIZE_PTR;
|
|
|
|
return kmem_cache_alloc_trace(s, flags, size);
|
|
}
|
|
}
|
|
return __kmalloc(size, flags);
|
|
}
|
|
|
|
#ifdef CONFIG_NUMA
|
|
void *__kmalloc_node(size_t size, gfp_t flags, int node);
|
|
void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
|
|
|
|
#ifdef CONFIG_TRACING
|
|
extern void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
|
|
gfp_t gfpflags,
|
|
int node, size_t size);
|
|
#else
|
|
static __always_inline void *
|
|
kmem_cache_alloc_node_trace(struct kmem_cache *s,
|
|
gfp_t gfpflags,
|
|
int node, size_t size)
|
|
{
|
|
return kmem_cache_alloc_node(s, gfpflags, node);
|
|
}
|
|
#endif
|
|
|
|
static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
|
|
{
|
|
if (__builtin_constant_p(size) &&
|
|
size <= SLUB_MAX_SIZE && !(flags & SLUB_DMA)) {
|
|
struct kmem_cache *s = kmalloc_slab(size);
|
|
|
|
if (!s)
|
|
return ZERO_SIZE_PTR;
|
|
|
|
return kmem_cache_alloc_node_trace(s, flags, node, size);
|
|
}
|
|
return __kmalloc_node(size, flags, node);
|
|
}
|
|
#endif
|
|
|
|
#endif /* _LINUX_SLUB_DEF_H */
|