On Wed, 2007-05-16 at 11:43 -0700, Christoph Lameter wrote:If this 4k cpu system ever gets to touch the new lock it is in way deeper problems than a bouncing cache-line. Please look at it more carefully. We differentiate pages allocated at the level where GFP_ATOMIC starts to fail. By not updating the percpu slabs those are retried every time, except for ALLOC_NO_WATERMARKS allocations; those are served from the ->reserve_slab. Once a regular slab allocation succeeds again, the ->reserve_slab is cleaned up and never again looked at it until we're in distress again. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> --- include/linux/slub_def.h | 2 + mm/slub.c | 85 ++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 78 insertions(+), 9 deletions(-) Index: linux-2.6-git/include/linux/slub_def.h =================================================================== --- linux-2.6-git.orig/include/linux/slub_def.h +++ linux-2.6-git/include/linux/slub_def.h @@ -46,6 +46,8 @@ struct kmem_cache { struct list_head list; /* List of slab caches */ struct kobject kobj; /* For sysfs */ + struct page *reserve_slab; + #ifdef CONFIG_NUMA int defrag_ratio; struct kmem_cache_node *node[MAX_NUMNODES]; Index: linux-2.6-git/mm/slub.c =================================================================== --- linux-2.6-git.orig/mm/slub.c +++ linux-2.6-git/mm/slub.c @@ -20,11 +20,13 @@ #include <linux/mempolicy.h> #include <linux/ctype.h> #include <linux/kallsyms.h> +#include "internal.h" /* * Lock order: - * 1. slab_lock(page) - * 2. slab->list_lock + * 1. reserve_lock + * 2. slab_lock(page) + * 3. node->list_lock * * The slab_lock protects operations on the object of a particular * slab and its metadata in the page struct. If the slab lock @@ -259,6 +261,8 @@ static int sysfs_slab_alias(struct kmem_ static void sysfs_slab_remove(struct kmem_cache *s) {} #endif +static DEFINE_SPINLOCK(reserve_lock); + /******************************************************************** * Core slab cache functions *******************************************************************/ @@ -1007,7 +1011,7 @@ static void setup_object(struct kmem_cac s->ctor(object, s, 0); } -static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) +static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node, int *rank) { struct page *page; struct kmem_cache_node *n; @@ -1025,6 +1029,7 @@ static struct page *new_slab(struct kmem if (!page) goto out; + *rank = page->rank; n = get_node(s, page_to_nid(page)); if (n) atomic_long_inc(&n->nr_slabs); @@ -1311,7 +1316,7 @@ static void unfreeze_slab(struct kmem_ca /* * Remove the cpu slab */ -static void deactivate_slab(struct kmem_cache *s, struct page *page, int cpu) +static void __deactivate_slab(struct kmem_cache *s, struct page *page) { /* * Merge cpu freelist into freelist. Typically we get here @@ -1330,10 +1335,15 @@ static void deactivate_slab(struct kmem_ page->freelist = object; page->inuse--; } - s->cpu_slab[cpu] = NULL; unfreeze_slab(s, page); } +static void deactivate_slab(struct kmem_cache *s, struct page *page, int cpu) +{ + __deactive_slab(s, page); + s->cpu_slab[cpu] = NULL; +} + static void flush_slab(struct kmem_cache *s, struct page *page, int cpu) { slab_lock(page); @@ -1395,6 +1405,7 @@ static void *__slab_alloc(struct kmem_ca { void **object; int cpu = smp_processor_id(); + int rank = 0; if (!page) goto new_slab; @@ -1424,10 +1435,26 @@ new_slab: if (page) { s->cpu_slab[cpu] = page; goto load_freelist; - } + } else if (unlikely(gfp_to_alloc_flags(gfpflags) & ALLOC_NO_WATERMARKS)) + goto try_reserve; - page = new_slab(s, gfpflags, node); - if (page) { +alloc_slab: + page = new_slab(s, gfpflags, node, &rank); + if (page && rank) { + if (unlikely(s->reserve_slab)) { + struct page *reserve; + + spin_lock(&reserve_lock); + reserve = s->reserve_slab; + s->reserve_slab = NULL; + spin_unlock(&reserve_lock); + + if (reserve) { + slab_lock(reserve); + __deactivate_slab(s, reserve); + putback_slab(s, reserve); + } + } cpu = smp_processor_id(); if (s->cpu_slab[cpu]) { /* @@ -1455,6 +1482,18 @@ new_slab: SetSlabFrozen(page); s->cpu_slab[cpu] = page; goto load_freelist; + } else if (page) { + spin_lock(&reserve_lock); + if (s->reserve_slab) { + discard_slab(s, page); + page = s->reserve_slab; + } + slab_lock(page); + SetPageActive(page); + s->reserve_slab = page; + spin_unlock(&reserve_lock); + + goto got_reserve; } return NULL; debug: @@ -1470,6 +1509,31 @@ debug: page->freelist = object[page->offset]; slab_unlock(page); return object; + +try_reserve: + spin_lock(&reserve_lock); + page = s->reserve_slab; + if (!page) { + spin_unlock(&reserve_lock); + goto alloc_slab; + } + + slab_lock(page); + if (!page->freelist) { + s->reserve_slab = NULL; + spin_unlock(&reserve_lock); + __deactivate_slab(s, page); + putback_slab(s, page); + goto alloc_slab; + } + spin_unlock(&reserve_lock); + +got_reserve: + object = page->freelist; + page->inuse++; + page->freelist = object[page->offset]; + slab_unlock(page); + return object; } /* @@ -1807,10 +1871,11 @@ static struct kmem_cache_node * __init e { struct page *page; struct kmem_cache_node *n; + int rank; BUG_ON(kmalloc_caches->size < sizeof(struct kmem_cache_node)); - page = new_slab(kmalloc_caches, gfpflags | GFP_THISNODE, node); + page = new_slab(kmalloc_caches, gfpflags | GFP_THISNODE, node, &rank); /* new_slab() disables interupts */ local_irq_enable(); @@ -2018,6 +2083,8 @@ static int kmem_cache_open(struct kmem_c #ifdef CONFIG_NUMA s->defrag_ratio = 100; #endif + s->reserve_slab = NULL; + if (init_kmem_cache_nodes(s, gfpflags & ~SLUB_DMA)) return 1; error: -
| Ingo Molnar | Re: [patch] paravirt: VDSO page is essential |
| Johannes Weiner | Re: Versioning file system |
| Matt Mackall | [PATCH 1/13] maps: Uninline some functions in the page walker |
| Greg KH | [patch 00/49] 2.6.25-stable review |
git: | |
| Johannes Schindelin | Re: [PATCH 1/4] Move redo merge code in a function |
| Dmitry Potapov | Re: [RFC] Git User's Survey 2008 |
| Johannes Schindelin | Re: [PATCH] Teach 'git apply' to look at $GIT_DIR/config |
| Shawn O. Pearce | Re: [kernel.org users] [RFD] On deprecating "git-foo" for builtins |
| Brian A. Seklecki | sshd_config(5) PermitRootLogin yes |
| Richard Stallman | Real men don't attack straw men |
| ropers | Re: low-MHz server |
| Diego Fernando Nieto Moreno | Intel DG33 Support |
| Holger Schurig | Re: Linux Wireless Mini-Summit -- Ottawa -- July 22, 2008 |
| Tilman Schmidt | Re: 2.6.25-rc8: FTP transfer errors |
| Eric Dumazet | Re: [rfc][patch 3/3] use SLAB_ALIGN_SMP |
| Lennert Buytenhek | [PATCH 21/39] mv643xx_eth: move port_receive() into its only caller |
| high memory | 13 hours ago | Linux kernel |
| semaphore access speed | 16 hours ago | Applications and Utilities |
| the kernel how to power off the machine | 17 hours ago | Linux kernel |
| Easter Eggs in windows XP | 20 hours ago | Windows |
| Shared swap partition | 21 hours ago | Linux general |
| Root password | 21 hours ago | Linux general |
| Where/when DNOTIFY is used? | 23 hours ago | Linux kernel |
| How to convert Linux Kernel built-in module into a loadable module | 1 day ago | Linux kernel |
| Linux 2.6.24 and I/O schedulers | 1 day ago | Linux kernel |
| USB Driver -- Interrupt Polling -- A Little Help Please | 1 day ago | Linux general |
