From e51bfd0ad10600a9fe4c8ede5ac2272e80075008 Mon Sep 17 00:00:00 2001
From: Marcin Slusarz <marcin.slusarz@gmail.com>
Date: Sun, 10 Feb 2008 11:21:54 +0100
Subject: slab: avoid double initialization & do initialization in 1 place

- alloc_slabmgmt: initialize all slab fields in 1 place
- slab->nodeid was initialized twice: in alloc_slabmgmt
  and immediately after it in cache_grow

Signed-off-by: Marcin Slusarz <marcin.slusarz@gmail.com>
CC: Christoph Lameter <clameter@sgi.com>
Reviewed-by: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Christoph Lameter <clameter@sgi.com>
---
 mm/slab.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/mm/slab.c b/mm/slab.c
index 40c00dacbe4..473e6c2eaef 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2630,6 +2630,7 @@ static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp,
 	slabp->colouroff = colour_off;
 	slabp->s_mem = objp + colour_off;
 	slabp->nodeid = nodeid;
+	slabp->free = 0;
 	return slabp;
 }
 
@@ -2683,7 +2684,6 @@ static void cache_init_objs(struct kmem_cache *cachep,
 		slab_bufctl(slabp)[i] = i + 1;
 	}
 	slab_bufctl(slabp)[i - 1] = BUFCTL_END;
-	slabp->free = 0;
 }
 
 static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags)
@@ -2816,7 +2816,6 @@ static int cache_grow(struct kmem_cache *cachep,
 	if (!slabp)
 		goto opps1;
 
-	slabp->nodeid = nodeid;
 	slab_map_pages(cachep, slabp, objp);
 
 	cache_init_objs(cachep, slabp);
-- 
cgit v1.2.3


From eada35efcb2773cf49aa26277e056122e1a3405c Mon Sep 17 00:00:00 2001
From: Pekka Enberg <penberg@cs.helsinki.fi>
Date: Mon, 11 Feb 2008 22:47:46 +0200
Subject: slub: kmalloc page allocator pass-through cleanup

This adds a proper function for kmalloc page allocator pass-through. While it
simplifies any code that does slab tracing code a lot, I think it's a
worthwhile cleanup in itself.

Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Christoph Lameter <clameter@sgi.com>
---
 include/linux/slub_def.h |  8 ++++++--
 mm/slub.c                | 14 ++++++--------
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 5e6d3d634d5..a849c472b84 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -188,12 +188,16 @@ static __always_inline struct kmem_cache *kmalloc_slab(size_t size)
 void *kmem_cache_alloc(struct kmem_cache *, gfp_t);
 void *__kmalloc(size_t size, gfp_t flags);
 
+static __always_inline void *kmalloc_large(size_t size, gfp_t flags)
+{
+	return (void *)__get_free_pages(flags | __GFP_COMP, get_order(size));
+}
+
 static __always_inline void *kmalloc(size_t size, gfp_t flags)
 {
 	if (__builtin_constant_p(size)) {
 		if (size > PAGE_SIZE / 2)
-			return (void *)__get_free_pages(flags | __GFP_COMP,
-							get_order(size));
+			return kmalloc_large(size, flags);
 
 		if (!(flags & SLUB_DMA)) {
 			struct kmem_cache *s = kmalloc_slab(size);
diff --git a/mm/slub.c b/mm/slub.c
index e2989ae243b..7870ef9d863 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2671,8 +2671,7 @@ void *__kmalloc(size_t size, gfp_t flags)
 	struct kmem_cache *s;
 
 	if (unlikely(size > PAGE_SIZE / 2))
-		return (void *)__get_free_pages(flags | __GFP_COMP,
-							get_order(size));
+		return kmalloc_large(size, flags);
 
 	s = get_slab(size, flags);
 
@@ -2689,8 +2688,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
 	struct kmem_cache *s;
 
 	if (unlikely(size > PAGE_SIZE / 2))
-		return (void *)__get_free_pages(flags | __GFP_COMP,
-							get_order(size));
+		return kmalloc_large(size, flags);
 
 	s = get_slab(size, flags);
 
@@ -3219,8 +3217,8 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, void *caller)
 	struct kmem_cache *s;
 
 	if (unlikely(size > PAGE_SIZE / 2))
-		return (void *)__get_free_pages(gfpflags | __GFP_COMP,
-							get_order(size));
+		return kmalloc_large(size, gfpflags);
+
 	s = get_slab(size, gfpflags);
 
 	if (unlikely(ZERO_OR_NULL_PTR(s)))
@@ -3235,8 +3233,8 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
 	struct kmem_cache *s;
 
 	if (unlikely(size > PAGE_SIZE / 2))
-		return (void *)__get_free_pages(gfpflags | __GFP_COMP,
-							get_order(size));
+		return kmalloc_large(size, gfpflags);
+
 	s = get_slab(size, gfpflags);
 
 	if (unlikely(ZERO_OR_NULL_PTR(s)))
-- 
cgit v1.2.3


From dada123d99c241d1a45798a7c77bcf99c4968704 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Wed, 13 Feb 2008 23:30:32 +0200
Subject: make slub.c:slab_address() static

slab_address() can become static.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Christoph Lameter <clameter@sgi.com>
---
 mm/slub.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/slub.c b/mm/slub.c
index 7870ef9d863..1af7f2f1942 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -308,7 +308,7 @@ static inline int is_end(void *addr)
 	return (unsigned long)addr & PAGE_MAPPING_ANON;
 }
 
-void *slab_address(struct page *page)
+static void *slab_address(struct page *page)
 {
 	return page->end - PAGE_MAPPING_ANON;
 }
-- 
cgit v1.2.3


From b7a49f0d4c34166ae84089d9f145cfaae1b0eec5 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Thu, 14 Feb 2008 14:21:32 -0800
Subject: slub: Determine gfpflags once and not every time a slab is allocated

Currently we determine the gfp flags to pass to the page allocator
each time a slab is being allocated.

Determine the bits to be set at the time the slab is created. Store
in a new allocflags field and add the flags in allocate_slab().

Acked-by: Mel Gorman <mel@csn.ul.ie>
Reviewed-by: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Christoph Lameter <clameter@sgi.com>
---
 include/linux/slub_def.h |  1 +
 mm/slub.c                | 19 +++++++++++--------
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index a849c472b84..98be113cf93 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -71,6 +71,7 @@ struct kmem_cache {
 
 	/* Allocation and freeing of slabs */
 	int objects;		/* Number of objects in slab */
+	gfp_t allocflags;	/* gfp flags to use on each alloc */
 	int refcount;		/* Refcount for slab cache destroy */
 	void (*ctor)(struct kmem_cache *, void *);
 	int inuse;		/* Offset to metadata */
diff --git a/mm/slub.c b/mm/slub.c
index 1af7f2f1942..ccfd41141b6 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1078,14 +1078,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
 	struct page *page;
 	int pages = 1 << s->order;
 
-	if (s->order)
-		flags |= __GFP_COMP;
-
-	if (s->flags & SLAB_CACHE_DMA)
-		flags |= SLUB_DMA;
-
-	if (s->flags & SLAB_RECLAIM_ACCOUNT)
-		flags |= __GFP_RECLAIMABLE;
+	flags |= s->allocflags;
 
 	if (node == -1)
 		page = alloc_pages(flags, s->order);
@@ -2333,6 +2326,16 @@ static int calculate_sizes(struct kmem_cache *s)
 	if (s->order < 0)
 		return 0;
 
+	s->allocflags = 0;
+	if (s->order)
+		s->allocflags |= __GFP_COMP;
+
+	if (s->flags & SLAB_CACHE_DMA)
+		s->allocflags |= SLUB_DMA;
+
+	if (s->flags & SLAB_RECLAIM_ACCOUNT)
+		s->allocflags |= __GFP_RECLAIMABLE;
+
 	/*
 	 * Determine the number of objects per slab
 	 */
-- 
cgit v1.2.3


From 71c7a06ff0a2ba0434ace4d7aa679537c4211d9d Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Thu, 14 Feb 2008 14:28:01 -0800
Subject: slub: Fallback to kmalloc_large for failing higher order allocs

Slub already has two ways of allocating an object. One is via its own
logic and the other is via the call to kmalloc_large to hand off object
allocation to the page allocator. kmalloc_large is typically used
for objects >= PAGE_SIZE.

We can use that handoff to avoid failing if a higher order kmalloc slab
allocation cannot be satisfied by the page allocator. If we reach the
out of memory path then simply try a kmalloc_large(). kfree() can
already handle the case of an object that was allocated via the page
allocator and so this will work just fine (apart from object
accounting...).

For any kmalloc slab that already requires higher order allocs (which
makes it impossible to use the page allocator fastpath!)
we just use PAGE_ALLOC_COSTLY_ORDER to get the largest number of
objects in one go from the page allocator slowpath.

On a 4k platform this patch will lead to the following use of higher
order pages for the following kmalloc slabs:

8 ... 1024	order 0
2048 .. 4096	order 3 (4k slab only after the next patch)

We may waste some space if fallback occurs on a 2k slab but we
are always able to fallback to an order 0 alloc.

Reviewed-by: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Christoph Lameter <clameter@sgi.com>
---
 mm/slub.c | 43 ++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 38 insertions(+), 5 deletions(-)

diff --git a/mm/slub.c b/mm/slub.c
index ccfd41141b6..644fd0aaeaf 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -211,6 +211,8 @@ static inline void ClearSlabDebug(struct page *page)
 /* Internal SLUB flags */
 #define __OBJECT_POISON		0x80000000 /* Poison object */
 #define __SYSFS_ADD_DEFERRED	0x40000000 /* Not yet visible via sysfs */
+#define __KMALLOC_CACHE		0x20000000 /* objects freed using kfree */
+#define __PAGE_ALLOC_FALLBACK	0x10000000 /* Allow fallback to page alloc */
 
 /* Not all arches define cache_line_size */
 #ifndef cache_line_size
@@ -1539,7 +1541,6 @@ load_freelist:
 unlock_out:
 	slab_unlock(c->page);
 	stat(c, ALLOC_SLOWPATH);
-out:
 #ifdef SLUB_FASTPATH
 	local_irq_restore(flags);
 #endif
@@ -1574,8 +1575,24 @@ new_slab:
 		c->page = new;
 		goto load_freelist;
 	}
-	object = NULL;
-	goto out;
+#ifdef SLUB_FASTPATH
+	local_irq_restore(flags);
+#endif
+	/*
+	 * No memory available.
+	 *
+	 * If the slab uses higher order allocs but the object is
+	 * smaller than a page size then we can fallback in emergencies
+	 * to the page allocator via kmalloc_large. The page allocator may
+	 * have failed to obtain a higher order page and we can try to
+	 * allocate a single page if the object fits into a single page.
+	 * That is only possible if certain conditions are met that are being
+	 * checked when a slab is created.
+	 */
+	if (!(gfpflags & __GFP_NORETRY) && (s->flags & __PAGE_ALLOC_FALLBACK))
+		return kmalloc_large(s->objsize, gfpflags);
+
+	return NULL;
 debug:
 	object = c->page->freelist;
 	if (!alloc_debug_processing(s, c->page, object, addr))
@@ -2322,7 +2339,20 @@ static int calculate_sizes(struct kmem_cache *s)
 	size = ALIGN(size, align);
 	s->size = size;
 
-	s->order = calculate_order(size);
+	if ((flags & __KMALLOC_CACHE) &&
+			PAGE_SIZE / size < slub_min_objects) {
+		/*
+		 * Kmalloc cache that would not have enough objects in
+		 * an order 0 page. Kmalloc slabs can fallback to
+		 * page allocator order 0 allocs so take a reasonably large
+		 * order that will allows us a good number of objects.
+		 */
+		s->order = max(slub_max_order, PAGE_ALLOC_COSTLY_ORDER);
+		s->flags |= __PAGE_ALLOC_FALLBACK;
+		s->allocflags |= __GFP_NOWARN;
+	} else
+		s->order = calculate_order(size);
+
 	if (s->order < 0)
 		return 0;
 
@@ -2539,7 +2569,7 @@ static struct kmem_cache *create_kmalloc_cache(struct kmem_cache *s,
 
 	down_write(&slub_lock);
 	if (!kmem_cache_open(s, gfp_flags, name, size, ARCH_KMALLOC_MINALIGN,
-			flags, NULL))
+			flags | __KMALLOC_CACHE, NULL))
 		goto panic;
 
 	list_add(&s->list, &slab_caches);
@@ -3058,6 +3088,9 @@ static int slab_unmergeable(struct kmem_cache *s)
 	if (slub_nomerge || (s->flags & SLUB_NEVER_MERGE))
 		return 1;
 
+	if ((s->flags & __PAGE_ALLOC_FALLBACK)
+		return 1;
+
 	if (s->ctor)
 		return 1;
 
-- 
cgit v1.2.3


From 331dc558fa020451ff773973cee855fd721aa88e Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@sgi.com>
Date: Thu, 14 Feb 2008 14:28:09 -0800
Subject: slub: Support 4k kmallocs again to compensate for page allocator
 slowness

Currently we hand off PAGE_SIZEd kmallocs to the page allocator in the
mistaken belief that the page allocator can handle these allocations
effectively. However, measurements indicate a minimum slowdown by the
factor of 8 (and that is only SMP, NUMA is much worse) vs the slub fastpath
which causes regressions in tbench.

Increase the number of kmalloc caches by one so that we again handle 4k
kmallocs directly from slub. 4k page buffering for the page allocator
will be performed by slub like done by slab.

At some point the page allocator fastpath should be fixed. A lot of the kernel
would benefit from a faster ability to allocate a single page. If that is
done then the 4k allocs may again be forwarded to the page allocator and this
patch could be reverted.

Reviewed-by: Pekka Enberg <penberg@cs.helsinki.fi>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Christoph Lameter <clameter@sgi.com>
---
 include/linux/slub_def.h |  6 +++---
 mm/slub.c                | 18 +++++++++---------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 98be113cf93..57deecc79d5 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -111,7 +111,7 @@ struct kmem_cache {
  * We keep the general caches in an array of slab caches that are used for
  * 2^x bytes of allocations.
  */
-extern struct kmem_cache kmalloc_caches[PAGE_SHIFT];
+extern struct kmem_cache kmalloc_caches[PAGE_SHIFT + 1];
 
 /*
  * Sorry that the following has to be that ugly but some versions of GCC
@@ -197,7 +197,7 @@ static __always_inline void *kmalloc_large(size_t size, gfp_t flags)
 static __always_inline void *kmalloc(size_t size, gfp_t flags)
 {
 	if (__builtin_constant_p(size)) {
-		if (size > PAGE_SIZE / 2)
+		if (size > PAGE_SIZE)
 			return kmalloc_large(size, flags);
 
 		if (!(flags & SLUB_DMA)) {
@@ -219,7 +219,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
 static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
 {
 	if (__builtin_constant_p(size) &&
-		size <= PAGE_SIZE / 2 && !(flags & SLUB_DMA)) {
+		size <= PAGE_SIZE && !(flags & SLUB_DMA)) {
 			struct kmem_cache *s = kmalloc_slab(size);
 
 		if (!s)
diff --git a/mm/slub.c b/mm/slub.c
index 644fd0aaeaf..4b3895cb90e 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2517,11 +2517,11 @@ EXPORT_SYMBOL(kmem_cache_destroy);
  *		Kmalloc subsystem
  *******************************************************************/
 
-struct kmem_cache kmalloc_caches[PAGE_SHIFT] __cacheline_aligned;
+struct kmem_cache kmalloc_caches[PAGE_SHIFT + 1] __cacheline_aligned;
 EXPORT_SYMBOL(kmalloc_caches);
 
 #ifdef CONFIG_ZONE_DMA
-static struct kmem_cache *kmalloc_caches_dma[PAGE_SHIFT];
+static struct kmem_cache *kmalloc_caches_dma[PAGE_SHIFT + 1];
 #endif
 
 static int __init setup_slub_min_order(char *str)
@@ -2703,7 +2703,7 @@ void *__kmalloc(size_t size, gfp_t flags)
 {
 	struct kmem_cache *s;
 
-	if (unlikely(size > PAGE_SIZE / 2))
+	if (unlikely(size > PAGE_SIZE))
 		return kmalloc_large(size, flags);
 
 	s = get_slab(size, flags);
@@ -2720,7 +2720,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
 {
 	struct kmem_cache *s;
 
-	if (unlikely(size > PAGE_SIZE / 2))
+	if (unlikely(size > PAGE_SIZE))
 		return kmalloc_large(size, flags);
 
 	s = get_slab(size, flags);
@@ -3032,7 +3032,7 @@ void __init kmem_cache_init(void)
 		caches++;
 	}
 
-	for (i = KMALLOC_SHIFT_LOW; i < PAGE_SHIFT; i++) {
+	for (i = KMALLOC_SHIFT_LOW; i <= PAGE_SHIFT; i++) {
 		create_kmalloc_cache(&kmalloc_caches[i],
 			"kmalloc", 1 << i, GFP_KERNEL);
 		caches++;
@@ -3059,7 +3059,7 @@ void __init kmem_cache_init(void)
 	slab_state = UP;
 
 	/* Provide the correct kmalloc names now that the caches are up */
-	for (i = KMALLOC_SHIFT_LOW; i < PAGE_SHIFT; i++)
+	for (i = KMALLOC_SHIFT_LOW; i <= PAGE_SHIFT; i++)
 		kmalloc_caches[i]. name =
 			kasprintf(GFP_KERNEL, "kmalloc-%d", 1 << i);
 
@@ -3088,7 +3088,7 @@ static int slab_unmergeable(struct kmem_cache *s)
 	if (slub_nomerge || (s->flags & SLUB_NEVER_MERGE))
 		return 1;
 
-	if ((s->flags & __PAGE_ALLOC_FALLBACK)
+	if ((s->flags & __PAGE_ALLOC_FALLBACK))
 		return 1;
 
 	if (s->ctor)
@@ -3252,7 +3252,7 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, void *caller)
 {
 	struct kmem_cache *s;
 
-	if (unlikely(size > PAGE_SIZE / 2))
+	if (unlikely(size > PAGE_SIZE))
 		return kmalloc_large(size, gfpflags);
 
 	s = get_slab(size, gfpflags);
@@ -3268,7 +3268,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
 {
 	struct kmem_cache *s;
 
-	if (unlikely(size > PAGE_SIZE / 2))
+	if (unlikely(size > PAGE_SIZE))
 		return kmalloc_large(size, gfpflags);
 
 	s = get_slab(size, gfpflags);
-- 
cgit v1.2.3