Re: Mainline kernel OLTP performance update

!MAILaRCHIVE_VOTE_RePLACE
Previous message: [thread] [date] [author]
Next message: [thread] [date] [author]
To: Zhang, Yanmin <yanmin_zhang@...>
Cc: Christoph Lameter <cl@...>, Andi Kleen <andi@...>, Matthew Wilcox <matthew@...>, Nick Piggin <nickpiggin@...>, Andrew Morton <akpm@...>, <netdev@...>, <sfr@...>, <matthew.r.wilcox@...>, <chinang.ma@...>, <linux-kernel@...>, <sharad.c.tripathi@...>, <arjan@...>, <suresh.b.siddha@...>, <harita.chilukuri@...>, <douglas.w.styner@...>, <peter.xihong.wang@...>, <hubert.nueckel@...>, <chris.mason@...>, <srostedt@...>, <linux-scsi@...>, <andrew.vasquez@...>, <anirban.chakraborty@...>, <mingo@...>
Date: Friday, January 23, 2009 - 5:46 am

On Fri, 2009-01-23 at 16:30 +0800, Zhang, Yanmin wrote:

That's bit surprising, actually. FWIW, I've included a patch for empty
slab lists. But it's probably not going to help here.


Looking at __slab_free(), unless page->inuse is constantly zero and we
discard the slab, it really is just cache effects (10% sounds like a
lot, though!). AFAICT, the only way to optimize that is with Christoph's
unfinished pointer freelists patches or with a remote free list like in
SLQB.

		Pekka

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 3bd3662..41a4c1a 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -48,6 +48,9 @@ struct kmem_cache_node {
 	unsigned long nr_partial;
 	unsigned long min_partial;
 	struct list_head partial;
+	unsigned long nr_empty;
+	unsigned long max_empty;
+	struct list_head empty;
 #ifdef CONFIG_SLUB_DEBUG
 	atomic_long_t nr_slabs;
 	atomic_long_t total_objects;
diff --git a/mm/slub.c b/mm/slub.c
index 8fad23f..5a12597 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -134,6 +134,11 @@
  */
 #define MAX_PARTIAL 10
 
+/*
+ * Maximum number of empty slabs.
+ */
+#define MAX_EMPTY 1
+
 #define DEBUG_DEFAULT_FLAGS (SLAB_DEBUG_FREE | SLAB_RED_ZONE | \
 				SLAB_POISON | SLAB_STORE_USER)
 
@@ -1205,6 +1210,24 @@ static void discard_slab(struct kmem_cache *s, struct page *page)
 	free_slab(s, page);
 }
 
+static void discard_or_cache_slab(struct kmem_cache *s, struct page *page)
+{
+	struct kmem_cache_node *n;
+	int node;
+
+	node = page_to_nid(page);
+	n = get_node(s, node);
+
+	dec_slabs_node(s, node, page->objects);
+
+	if (likely(n->nr_empty >= n->max_empty)) {
+		free_slab(s, page);
+	} else {
+		n->nr_empty++;
+		list_add(&page->lru, &n->partial);
+	}
+}
+
 /*
  * Per slab locking using the pagelock
  */
@@ -1252,7 +1275,7 @@ static void remove_partial(struct kmem_cache *s, struct page *page)
 }
 
 /*
- * Lock slab and remove from the partial list.
+ * Lock slab and remove from the partial or empty list.
  *
  * Must hold list_lock.
  */
@@ -1261,7 +1284,6 @@ static inline int lock_and_freeze_slab(struct kmem_cache_node *n,
 {
 	if (slab_trylock(page)) {
 		list_del(&page->lru);
-		n->nr_partial--;
 		__SetPageSlubFrozen(page);
 		return 1;
 	}
@@ -1271,7 +1293,7 @@ static inline int lock_and_freeze_slab(struct kmem_cache_node *n,
 /*
  * Try to allocate a partial slab from a specific node.
  */
-static struct page *get_partial_node(struct kmem_cache_node *n)
+static struct page *get_partial_or_empty_node(struct kmem_cache_node *n)
 {
 	struct page *page;
 
@@ -1281,13 +1303,22 @@ static struct page *get_partial_node(struct kmem_cache_node *n)
 	 * partial slab and there is none available then get_partials()
 	 * will return NULL.
 	 */
-	if (!n || !n->nr_partial)
+	if (!n || (!n->nr_partial && !n->nr_empty))
 		return NULL;
 
 	spin_lock(&n->list_lock);
+
 	list_for_each_entry(page, &n->partial, lru)
-		if (lock_and_freeze_slab(n, page))
+		if (lock_and_freeze_slab(n, page)) {
+			n->nr_partial--;
+			goto out;
+		}
+
+	list_for_each_entry(page, &n->empty, lru)
+		if (lock_and_freeze_slab(n, page)) {
+			n->nr_empty--;
 			goto out;
+		}
 	page = NULL;
 out:
 	spin_unlock(&n->list_lock);
@@ -1297,7 +1328,7 @@ out:
 /*
  * Get a page from somewhere. Search in increasing NUMA distances.
  */
-static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags)
+static struct page *get_any_partial_or_empty(struct kmem_cache *s, gfp_t flags)
 {
 #ifdef CONFIG_NUMA
 	struct zonelist *zonelist;
@@ -1336,7 +1367,7 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags)
 
 		if (n && cpuset_zone_allowed_hardwall(zone, flags) &&
 				n->nr_partial > n->min_partial) {
-			page = get_partial_node(n);
+			page = get_partial_or_empty_node(n);
 			if (page)
 				return page;
 		}
@@ -1346,18 +1377,19 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags)
 }
 
 /*
- * Get a partial page, lock it and return it.
+ * Get a partial or empty page, lock it and return it.
  */
-static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node)
+static struct page *
+get_partial_or_empty(struct kmem_cache *s, gfp_t flags, int node)
 {
 	struct page *page;
 	int searchnode = (node == -1) ? numa_node_id() : node;
 
-	page = get_partial_node(get_node(s, searchnode));
+	page = get_partial_or_empty_node(get_node(s, searchnode));
 	if (page || (flags & __GFP_THISNODE))
 		return page;
 
-	return get_any_partial(s, flags);
+	return get_any_partial_or_empty(s, flags);
 }
 
 /*
@@ -1403,7 +1435,7 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
 		} else {
 			slab_unlock(page);
 			stat(get_cpu_slab(s, raw_smp_processor_id()), FREE_SLAB);
-			discard_slab(s, page);
+			discard_or_cache_slab(s, page);
 		}
 	}
 }
@@ -1542,7 +1574,7 @@ another_slab:
 	deactivate_slab(s, c);
 
 new_slab:
-	new = get_partial(s, gfpflags, node);
+	new = get_partial_or_empty(s, gfpflags, node);
 	if (new) {
 		c->page = new;
 		stat(c, ALLOC_FROM_PARTIAL);
@@ -1693,7 +1725,7 @@ slab_empty:
 	}
 	slab_unlock(page);
 	stat(c, FREE_SLAB);
-	discard_slab(s, page);
+	discard_or_cache_slab(s, page);
 	return;
 
 debug:
@@ -1927,6 +1959,8 @@ static void init_kmem_cache_cpu(struct kmem_cache *s,
 static void
 init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s)
 {
+	spin_lock_init(&n->list_lock);
+
 	n->nr_partial = 0;
 
 	/*
@@ -1939,8 +1973,18 @@ init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s)
 	else if (n->min_partial > MAX_PARTIAL)
 		n->min_partial = MAX_PARTIAL;
 
-	spin_lock_init(&n->list_lock);
 	INIT_LIST_HEAD(&n->partial);
+
+	n->nr_empty = 0;
+	/*
+	 * XXX: This needs to take object size into account. We don't need
+	 * empty slabs for caches which will have plenty of partial slabs
+	 * available. Only caches that have either full or empty slabs need
+	 * this kind of optimization.
+	 */
+	n->max_empty = MAX_EMPTY;
+	INIT_LIST_HEAD(&n->empty);
+
 #ifdef CONFIG_SLUB_DEBUG
 	atomic_long_set(&n->nr_slabs, 0);
 	atomic_long_set(&n->total_objects, 0);
@@ -2427,6 +2471,32 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
 	spin_unlock_irqrestore(&n->list_lock, flags);
 }
 
+static void free_empty_slabs(struct kmem_cache *s)
+{
+	int node;
+
+	for_each_node_state(node, N_NORMAL_MEMORY) {
+		struct kmem_cache_node *n;
+		struct page *page, *t;
+		unsigned long flags;
+
+		n = get_node(s, node);
+
+		if (!n->nr_empty)
+			continue;
+
+		spin_lock_irqsave(&n->list_lock, flags);
+
+		list_for_each_entry_safe(page, t, &n->empty, lru) {
+			list_del(&page->lru);
+			n->nr_empty--;
+
+			free_slab(s, page);
+		}
+		spin_unlock_irqrestore(&n->list_lock, flags);
+	}
+}
+
 /*
  * Release all resources used by a slab cache.
  */
@@ -2436,6 +2506,8 @@ static inline int kmem_cache_close(struct kmem_cache *s)
 
 	flush_all(s);
 
+	free_empty_slabs(s);
+
 	/* Attempt to free all objects */
 	free_kmem_cache_cpus(s);
 	for_each_node_state(node, N_NORMAL_MEMORY) {
@@ -2765,6 +2837,7 @@ int kmem_cache_shrink(struct kmem_cache *s)
 		return -ENOMEM;
 
 	flush_all(s);
+	free_empty_slabs(s);
 	for_each_node_state(node, N_NORMAL_MEMORY) {
 		n = get_node(s, node);
 


--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Previous message: [thread] [date] [author]
Next message: [thread] [date] [author]

Messages in current thread:
Re: Mainline kernel OLTP performance update, Nick Piggin, (Fri Jan 16, 2:46 am)
Re: Mainline kernel OLTP performance update, Rick Jones, (Fri Jan 16, 2:11 pm)
Re: Mainline kernel OLTP performance update, Nick Piggin, (Mon Jan 19, 3:43 am)
Re: Mainline kernel OLTP performance update, Rick Jones, (Mon Jan 19, 6:19 pm)
Re: Mainline kernel OLTP performance update, Andrew Morton, (Fri Jan 16, 3:00 am)
Re: Mainline kernel OLTP performance update, Nick Piggin, (Fri Jan 16, 4:59 am)
Re: Mainline kernel OLTP performance update, Nick Piggin, (Fri Jan 16, 3:25 am)
Re: Mainline kernel OLTP performance update, Matthew Wilcox, (Fri Jan 16, 2:55 am)
Re: Mainline kernel OLTP performance update, Zhang, Yanmin, (Fri Jan 16, 3:53 am)
Re: Mainline kernel OLTP performance update, Andi Kleen, (Fri Jan 16, 6:20 am)
Re: Mainline kernel OLTP performance update, Zhang, Yanmin, (Tue Jan 20, 1:16 am)
Re: Mainline kernel OLTP performance update, Christoph Lameter, (Wed Jan 21, 7:58 pm)
Re: Mainline kernel OLTP performance update, Zhang, Yanmin, (Thu Jan 22, 4:36 am)
Re: Mainline kernel OLTP performance update, Pekka Enberg, (Thu Jan 22, 5:15 am)
Re: Mainline kernel OLTP performance update, Zhang, Yanmin, (Thu Jan 22, 5:28 am)
Re: Mainline kernel OLTP performance update, Pekka Enberg, (Thu Jan 22, 5:47 am)
Re: Mainline kernel OLTP performance update, Zhang, Yanmin, (Thu Jan 22, 11:02 pm)
Re: Mainline kernel OLTP performance update, Nick Piggin, (Fri Jan 23, 4:33 am)
Re: Mainline kernel OLTP performance update, Zhang, Yanmin, (Fri Jan 23, 5:02 am)
Re: Mainline kernel OLTP performance update, Pekka Enberg, (Fri Jan 23, 2:52 am)
Re: Mainline kernel OLTP performance update, Pekka Enberg, (Fri Jan 23, 4:06 am)
Re: Mainline kernel OLTP performance update, Zhang, Yanmin, (Fri Jan 23, 4:30 am)
Re: Mainline kernel OLTP performance update, Pekka Enberg, (Fri Jan 23, 5:46 am)
Re: Mainline kernel OLTP performance update, Christoph Lameter, (Fri Jan 23, 11:22 am)
Re: Mainline kernel OLTP performance update, Zhang, Yanmin, (Fri Jan 23, 10:55 pm)
Re: Mainline kernel OLTP performance update, Christoph Lameter, (Mon Jan 26, 1:36 pm)
Re: Mainline kernel OLTP performance update, Zhang, Yanmin, (Sat Jan 31, 10:52 pm)
Re: Mainline kernel OLTP performance update, Pekka Enberg, (Sat Jan 24, 3:36 am)
Re: Mainline kernel OLTP performance update, Zhang, Yanmin, (Thu Feb 12, 1:22 am)
Re: Mainline kernel OLTP performance update, Zhang, Yanmin, (Thu Feb 12, 1:47 am)
Re: Mainline kernel OLTP performance update, Pekka Enberg, (Thu Feb 12, 12:03 pm)
Re: Mainline kernel OLTP performance update, Christoph Lameter, (Thu Feb 12, 11:25 am)
Re: Mainline kernel OLTP performance update, Pekka Enberg, (Thu Feb 12, 12:07 pm)
Re: Mainline kernel OLTP performance update, Pekka Enberg, (Fri Jan 23, 11:31 am)
Re: Mainline kernel OLTP performance update, Christoph Lameter, (Fri Jan 23, 11:55 am)
Re: Mainline kernel OLTP performance update, Pekka Enberg, (Fri Jan 23, 12:01 pm)
Re: Mainline kernel OLTP performance update, Pekka Enberg, (Fri Jan 23, 4:40 am)
Re: Mainline kernel OLTP performance update, Nick Piggin, (Fri Jan 16, 3:06 am)