From e51bfd0ad10600a9fe4c8ede5ac2272e80075008 Mon Sep 17 00:00:00 2001 From: Marcin Slusarz Date: Sun, 10 Feb 2008 11:21:54 +0100 Subject: slab: avoid double initialization & do initialization in 1 place - alloc_slabmgmt: initialize all slab fields in 1 place - slab->nodeid was initialized twice: in alloc_slabmgmt and immediately after it in cache_grow Signed-off-by: Marcin Slusarz CC: Christoph Lameter Reviewed-by: Pekka Enberg Signed-off-by: Christoph Lameter --- mm/slab.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mm/slab.c b/mm/slab.c index 40c00dacbe4..473e6c2eaef 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -2630,6 +2630,7 @@ static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp, slabp->colouroff = colour_off; slabp->s_mem = objp + colour_off; slabp->nodeid = nodeid; + slabp->free = 0; return slabp; } @@ -2683,7 +2684,6 @@ static void cache_init_objs(struct kmem_cache *cachep, slab_bufctl(slabp)[i] = i + 1; } slab_bufctl(slabp)[i - 1] = BUFCTL_END; - slabp->free = 0; } static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags) @@ -2816,7 +2816,6 @@ static int cache_grow(struct kmem_cache *cachep, if (!slabp) goto opps1; - slabp->nodeid = nodeid; slab_map_pages(cachep, slabp, objp); cache_init_objs(cachep, slabp); -- cgit v1.2.3 From eada35efcb2773cf49aa26277e056122e1a3405c Mon Sep 17 00:00:00 2001 From: Pekka Enberg Date: Mon, 11 Feb 2008 22:47:46 +0200 Subject: slub: kmalloc page allocator pass-through cleanup This adds a proper function for kmalloc page allocator pass-through. While it simplifies any code that does slab tracing code a lot, I think it's a worthwhile cleanup in itself. Signed-off-by: Pekka Enberg Signed-off-by: Christoph Lameter --- include/linux/slub_def.h | 8 ++++++-- mm/slub.c | 14 ++++++-------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 5e6d3d634d5..a849c472b84 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -188,12 +188,16 @@ static __always_inline struct kmem_cache *kmalloc_slab(size_t size) void *kmem_cache_alloc(struct kmem_cache *, gfp_t); void *__kmalloc(size_t size, gfp_t flags); +static __always_inline void *kmalloc_large(size_t size, gfp_t flags) +{ + return (void *)__get_free_pages(flags | __GFP_COMP, get_order(size)); +} + static __always_inline void *kmalloc(size_t size, gfp_t flags) { if (__builtin_constant_p(size)) { if (size > PAGE_SIZE / 2) - return (void *)__get_free_pages(flags | __GFP_COMP, - get_order(size)); + return kmalloc_large(size, flags); if (!(flags & SLUB_DMA)) { struct kmem_cache *s = kmalloc_slab(size); diff --git a/mm/slub.c b/mm/slub.c index e2989ae243b..7870ef9d863 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2671,8 +2671,7 @@ void *__kmalloc(size_t size, gfp_t flags) struct kmem_cache *s; if (unlikely(size > PAGE_SIZE / 2)) - return (void *)__get_free_pages(flags | __GFP_COMP, - get_order(size)); + return kmalloc_large(size, flags); s = get_slab(size, flags); @@ -2689,8 +2688,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node) struct kmem_cache *s; if (unlikely(size > PAGE_SIZE / 2)) - return (void *)__get_free_pages(flags | __GFP_COMP, - get_order(size)); + return kmalloc_large(size, flags); s = get_slab(size, flags); @@ -3219,8 +3217,8 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, void *caller) struct kmem_cache *s; if (unlikely(size > PAGE_SIZE / 2)) - return (void *)__get_free_pages(gfpflags | __GFP_COMP, - get_order(size)); + return kmalloc_large(size, gfpflags); + s = get_slab(size, gfpflags); if (unlikely(ZERO_OR_NULL_PTR(s))) @@ -3235,8 +3233,8 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, struct kmem_cache *s; if (unlikely(size > PAGE_SIZE / 2)) - return (void *)__get_free_pages(gfpflags | __GFP_COMP, - get_order(size)); + return kmalloc_large(size, gfpflags); + s = get_slab(size, gfpflags); if (unlikely(ZERO_OR_NULL_PTR(s))) -- cgit v1.2.3 From dada123d99c241d1a45798a7c77bcf99c4968704 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Wed, 13 Feb 2008 23:30:32 +0200 Subject: make slub.c:slab_address() static slab_address() can become static. Signed-off-by: Adrian Bunk Signed-off-by: Christoph Lameter --- mm/slub.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/slub.c b/mm/slub.c index 7870ef9d863..1af7f2f1942 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -308,7 +308,7 @@ static inline int is_end(void *addr) return (unsigned long)addr & PAGE_MAPPING_ANON; } -void *slab_address(struct page *page) +static void *slab_address(struct page *page) { return page->end - PAGE_MAPPING_ANON; } -- cgit v1.2.3 From b7a49f0d4c34166ae84089d9f145cfaae1b0eec5 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Thu, 14 Feb 2008 14:21:32 -0800 Subject: slub: Determine gfpflags once and not every time a slab is allocated Currently we determine the gfp flags to pass to the page allocator each time a slab is being allocated. Determine the bits to be set at the time the slab is created. Store in a new allocflags field and add the flags in allocate_slab(). Acked-by: Mel Gorman Reviewed-by: Pekka Enberg Signed-off-by: Christoph Lameter --- include/linux/slub_def.h | 1 + mm/slub.c | 19 +++++++++++-------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index a849c472b84..98be113cf93 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -71,6 +71,7 @@ struct kmem_cache { /* Allocation and freeing of slabs */ int objects; /* Number of objects in slab */ + gfp_t allocflags; /* gfp flags to use on each alloc */ int refcount; /* Refcount for slab cache destroy */ void (*ctor)(struct kmem_cache *, void *); int inuse; /* Offset to metadata */ diff --git a/mm/slub.c b/mm/slub.c index 1af7f2f1942..ccfd41141b6 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -1078,14 +1078,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) struct page *page; int pages = 1 << s->order; - if (s->order) - flags |= __GFP_COMP; - - if (s->flags & SLAB_CACHE_DMA) - flags |= SLUB_DMA; - - if (s->flags & SLAB_RECLAIM_ACCOUNT) - flags |= __GFP_RECLAIMABLE; + flags |= s->allocflags; if (node == -1) page = alloc_pages(flags, s->order); @@ -2333,6 +2326,16 @@ static int calculate_sizes(struct kmem_cache *s) if (s->order < 0) return 0; + s->allocflags = 0; + if (s->order) + s->allocflags |= __GFP_COMP; + + if (s->flags & SLAB_CACHE_DMA) + s->allocflags |= SLUB_DMA; + + if (s->flags & SLAB_RECLAIM_ACCOUNT) + s->allocflags |= __GFP_RECLAIMABLE; + /* * Determine the number of objects per slab */ -- cgit v1.2.3 From 71c7a06ff0a2ba0434ace4d7aa679537c4211d9d Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Thu, 14 Feb 2008 14:28:01 -0800 Subject: slub: Fallback to kmalloc_large for failing higher order allocs Slub already has two ways of allocating an object. One is via its own logic and the other is via the call to kmalloc_large to hand off object allocation to the page allocator. kmalloc_large is typically used for objects >= PAGE_SIZE. We can use that handoff to avoid failing if a higher order kmalloc slab allocation cannot be satisfied by the page allocator. If we reach the out of memory path then simply try a kmalloc_large(). kfree() can already handle the case of an object that was allocated via the page allocator and so this will work just fine (apart from object accounting...). For any kmalloc slab that already requires higher order allocs (which makes it impossible to use the page allocator fastpath!) we just use PAGE_ALLOC_COSTLY_ORDER to get the largest number of objects in one go from the page allocator slowpath. On a 4k platform this patch will lead to the following use of higher order pages for the following kmalloc slabs: 8 ... 1024 order 0 2048 .. 4096 order 3 (4k slab only after the next patch) We may waste some space if fallback occurs on a 2k slab but we are always able to fallback to an order 0 alloc. Reviewed-by: Pekka Enberg Signed-off-by: Christoph Lameter --- mm/slub.c | 43 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 38 insertions(+), 5 deletions(-) diff --git a/mm/slub.c b/mm/slub.c index ccfd41141b6..644fd0aaeaf 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -211,6 +211,8 @@ static inline void ClearSlabDebug(struct page *page) /* Internal SLUB flags */ #define __OBJECT_POISON 0x80000000 /* Poison object */ #define __SYSFS_ADD_DEFERRED 0x40000000 /* Not yet visible via sysfs */ +#define __KMALLOC_CACHE 0x20000000 /* objects freed using kfree */ +#define __PAGE_ALLOC_FALLBACK 0x10000000 /* Allow fallback to page alloc */ /* Not all arches define cache_line_size */ #ifndef cache_line_size @@ -1539,7 +1541,6 @@ load_freelist: unlock_out: slab_unlock(c->page); stat(c, ALLOC_SLOWPATH); -out: #ifdef SLUB_FASTPATH local_irq_restore(flags); #endif @@ -1574,8 +1575,24 @@ new_slab: c->page = new; goto load_freelist; } - object = NULL; - goto out; +#ifdef SLUB_FASTPATH + local_irq_restore(flags); +#endif + /* + * No memory available. + * + * If the slab uses higher order allocs but the object is + * smaller than a page size then we can fallback in emergencies + * to the page allocator via kmalloc_large. The page allocator may + * have failed to obtain a higher order page and we can try to + * allocate a single page if the object fits into a single page. + * That is only possible if certain conditions are met that are being + * checked when a slab is created. + */ + if (!(gfpflags & __GFP_NORETRY) && (s->flags & __PAGE_ALLOC_FALLBACK)) + return kmalloc_large(s->objsize, gfpflags); + + return NULL; debug: object = c->page->freelist; if (!alloc_debug_processing(s, c->page, object, addr)) @@ -2322,7 +2339,20 @@ static int calculate_sizes(struct kmem_cache *s) size = ALIGN(size, align); s->size = size; - s->order = calculate_order(size); + if ((flags & __KMALLOC_CACHE) && + PAGE_SIZE / size < slub_min_objects) { + /* + * Kmalloc cache that would not have enough objects in + * an order 0 page. Kmalloc slabs can fallback to + * page allocator order 0 allocs so take a reasonably large + * order that will allows us a good number of objects. + */ + s->order = max(slub_max_order, PAGE_ALLOC_COSTLY_ORDER); + s->flags |= __PAGE_ALLOC_FALLBACK; + s->allocflags |= __GFP_NOWARN; + } else + s->order = calculate_order(size); + if (s->order < 0) return 0; @@ -2539,7 +2569,7 @@ static struct kmem_cache *create_kmalloc_cache(struct kmem_cache *s, down_write(&slub_lock); if (!kmem_cache_open(s, gfp_flags, name, size, ARCH_KMALLOC_MINALIGN, - flags, NULL)) + flags | __KMALLOC_CACHE, NULL)) goto panic; list_add(&s->list, &slab_caches); @@ -3058,6 +3088,9 @@ static int slab_unmergeable(struct kmem_cache *s) if (slub_nomerge || (s->flags & SLUB_NEVER_MERGE)) return 1; + if ((s->flags & __PAGE_ALLOC_FALLBACK) + return 1; + if (s->ctor) return 1; -- cgit v1.2.3 From 331dc558fa020451ff773973cee855fd721aa88e Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Thu, 14 Feb 2008 14:28:09 -0800 Subject: slub: Support 4k kmallocs again to compensate for page allocator slowness Currently we hand off PAGE_SIZEd kmallocs to the page allocator in the mistaken belief that the page allocator can handle these allocations effectively. However, measurements indicate a minimum slowdown by the factor of 8 (and that is only SMP, NUMA is much worse) vs the slub fastpath which causes regressions in tbench. Increase the number of kmalloc caches by one so that we again handle 4k kmallocs directly from slub. 4k page buffering for the page allocator will be performed by slub like done by slab. At some point the page allocator fastpath should be fixed. A lot of the kernel would benefit from a faster ability to allocate a single page. If that is done then the 4k allocs may again be forwarded to the page allocator and this patch could be reverted. Reviewed-by: Pekka Enberg Acked-by: Mel Gorman Signed-off-by: Christoph Lameter --- include/linux/slub_def.h | 6 +++--- mm/slub.c | 18 +++++++++--------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 98be113cf93..57deecc79d5 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -111,7 +111,7 @@ struct kmem_cache { * We keep the general caches in an array of slab caches that are used for * 2^x bytes of allocations. */ -extern struct kmem_cache kmalloc_caches[PAGE_SHIFT]; +extern struct kmem_cache kmalloc_caches[PAGE_SHIFT + 1]; /* * Sorry that the following has to be that ugly but some versions of GCC @@ -197,7 +197,7 @@ static __always_inline void *kmalloc_large(size_t size, gfp_t flags) static __always_inline void *kmalloc(size_t size, gfp_t flags) { if (__builtin_constant_p(size)) { - if (size > PAGE_SIZE / 2) + if (size > PAGE_SIZE) return kmalloc_large(size, flags); if (!(flags & SLUB_DMA)) { @@ -219,7 +219,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node); static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) { if (__builtin_constant_p(size) && - size <= PAGE_SIZE / 2 && !(flags & SLUB_DMA)) { + size <= PAGE_SIZE && !(flags & SLUB_DMA)) { struct kmem_cache *s = kmalloc_slab(size); if (!s) diff --git a/mm/slub.c b/mm/slub.c index 644fd0aaeaf..4b3895cb90e 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -2517,11 +2517,11 @@ EXPORT_SYMBOL(kmem_cache_destroy); * Kmalloc subsystem *******************************************************************/ -struct kmem_cache kmalloc_caches[PAGE_SHIFT] __cacheline_aligned; +struct kmem_cache kmalloc_caches[PAGE_SHIFT + 1] __cacheline_aligned; EXPORT_SYMBOL(kmalloc_caches); #ifdef CONFIG_ZONE_DMA -static struct kmem_cache *kmalloc_caches_dma[PAGE_SHIFT]; +static struct kmem_cache *kmalloc_caches_dma[PAGE_SHIFT + 1]; #endif static int __init setup_slub_min_order(char *str) @@ -2703,7 +2703,7 @@ void *__kmalloc(size_t size, gfp_t flags) { struct kmem_cache *s; - if (unlikely(size > PAGE_SIZE / 2)) + if (unlikely(size > PAGE_SIZE)) return kmalloc_large(size, flags); s = get_slab(size, flags); @@ -2720,7 +2720,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node) { struct kmem_cache *s; - if (unlikely(size > PAGE_SIZE / 2)) + if (unlikely(size > PAGE_SIZE)) return kmalloc_large(size, flags); s = get_slab(size, flags); @@ -3032,7 +3032,7 @@ void __init kmem_cache_init(void) caches++; } - for (i = KMALLOC_SHIFT_LOW; i < PAGE_SHIFT; i++) { + for (i = KMALLOC_SHIFT_LOW; i <= PAGE_SHIFT; i++) { create_kmalloc_cache(&kmalloc_caches[i], "kmalloc", 1 << i, GFP_KERNEL); caches++; @@ -3059,7 +3059,7 @@ void __init kmem_cache_init(void) slab_state = UP; /* Provide the correct kmalloc names now that the caches are up */ - for (i = KMALLOC_SHIFT_LOW; i < PAGE_SHIFT; i++) + for (i = KMALLOC_SHIFT_LOW; i <= PAGE_SHIFT; i++) kmalloc_caches[i]. name = kasprintf(GFP_KERNEL, "kmalloc-%d", 1 << i); @@ -3088,7 +3088,7 @@ static int slab_unmergeable(struct kmem_cache *s) if (slub_nomerge || (s->flags & SLUB_NEVER_MERGE)) return 1; - if ((s->flags & __PAGE_ALLOC_FALLBACK) + if ((s->flags & __PAGE_ALLOC_FALLBACK)) return 1; if (s->ctor) @@ -3252,7 +3252,7 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, void *caller) { struct kmem_cache *s; - if (unlikely(size > PAGE_SIZE / 2)) + if (unlikely(size > PAGE_SIZE)) return kmalloc_large(size, gfpflags); s = get_slab(size, gfpflags); @@ -3268,7 +3268,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags, { struct kmem_cache *s; - if (unlikely(size > PAGE_SIZE / 2)) + if (unlikely(size > PAGE_SIZE)) return kmalloc_large(size, gfpflags); s = get_slab(size, gfpflags); -- cgit v1.2.3