Hi, this is per-zone/reclaim support patch set for memory controller (cgroup).
Major changes from previous one is
-- tested with 2.6.24-rc3-mm1 + ia64/NUMA
-- applied comments.I did small test on real NUMA machine.
My machine was ia64/8CPU/2Node NUMA. I tried to complile the kernel under 800M
bytes limit with 32 parallel make. (make -j 32)- 2.6.24-rc3-mm1 (+ scsi fix) .... shows soft lock-up.
before soft lock-up, %sys was almost 100% in several times.- 2.6.24-rc3-mm1 (+ scsi fix) + this set .... completed succesfully
It seems %iowait dominates the total performance.
(current memory controller has no background reclaim)Seems this set give us some progress.
(*) I'd like to merge YAMAMOTO-san's background page reclaim for memory
controller before discussing about the number of performance.Andrew, could you pick these up to -mm ?
Patch series brief description:
[1/10] ... add scan_global_lru() macro (clean up)
[2/10] ... nid/zid helper function for cgroup
[3/10] ... introduce per-zone object for memory controller and add
active/inactive counter.
[4/10] ... calculate mapper_ratio per cgroup (for memory reclaim)
[5/10] ... calculate active/inactive imbalance per cgroup (based on [3])
[6/10] ... remember reclaim priority in memory controller
[7/10] ... calculate the number of pages to be reclaimed per cgroup[8/10] ... modifies vmscan.c to isolate global-lru-reclaim and
memory-cgroup-reclaim in obvious manner.
(this patch uses functions defined in [4 - 7])
[9/10] ... implement per-zone-lru for cgroup (based on [3])
[10/10] ... implement per-zone lru lock for cgroup (based on [3][9])Any comments are welcome.
Thanks,
-Kame-
Now, lru is per-zone.
Then, lru_lock can be (should be) per-zone, too.
This patch implementes per-zone lru lock.lru_lock is placed into mem_cgroup_per_zone struct.
lock can be accessed by
mz = mem_cgroup_zoneinfo(mem_cgroup, node, zone);
&mz->lru_lockor
mz = page_cgroup_zoneinfo(page_cgroup);
&mz->lru_lockSigned-off-by: KAMEZAWA hiroyuki <kmaezawa.hiroyu@jp.fujitsu.com>
mm/memcontrol.c | 71 ++++++++++++++++++++++++++++++++++----------------------
1 file changed, 44 insertions(+), 27 deletions(-)Index: linux-2.6.24-rc3-mm1/mm/memcontrol.c
===================================================================
--- linux-2.6.24-rc3-mm1.orig/mm/memcontrol.c 2007-11-27 11:24:16.000000000 +0900
+++ linux-2.6.24-rc3-mm1/mm/memcontrol.c 2007-11-27 11:24:22.000000000 +0900
@@ -89,6 +89,10 @@
};struct mem_cgroup_per_zone {
+ /*
+ * spin_lock to protect the per cgroup LRU
+ */
+ spinlock_t lru_lock;
struct list_head active_list;
struct list_head inactive_list;
unsigned long count[NR_MEM_CGROUP_ZSTAT];
@@ -126,10 +130,7 @@
* per zone LRU lists.
*/
struct mem_cgroup_lru_info info;
- /*
- * spin_lock to protect the per cgroup LRU
- */
- spinlock_t lru_lock;
+
unsigned long control_type; /* control RSS or RSS+Pagecache */
int prev_priority; /* for recording reclaim priority */
/*
@@ -410,15 +411,16 @@
*/
void mem_cgroup_move_lists(struct page_cgroup *pc, bool active)
{
- struct mem_cgroup *mem;
+ struct mem_cgroup_per_zone *mz;
+ unsigned long flags;
+
if (!pc)
return;- mem = pc->mem_cgroup;
-
- spin_lock(&mem->lru_lock);
+ mz = page_cgroup_zoneinfo(pc);
+ spin_lock_irqsave(&mz->lru_lock, flags);
__mem_cgroup_move_lists(pc, active);
- spin_unlock(&mem->lru_lock);
+ spin_unlock_irqrestore(&mz->lru_lock, flags);
}/*
@@ -528,7 +530,7 @@
src = &mz->inactive_list;- spin_lock(&mem_cont->lru_lock);
+ spin_lock(&...
This patch implements per-zone lru for memory cgroup.
This patch makes use of mem_cgroup_per_zone struct for per zone lru.LRU can be accessed by
mz = mem_cgroup_zoneinfo(mem_cgroup, node, zone);
&mz->active_list
&mz->inactive_listor
mz = page_cgroup_zoneinfo(page_cgroup);
&mz->active_list
&mz->inactive_listChangelog v1->v2
- merged to mem_cgroup_per_zone struct.
- handle page migraiton.Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
mm/memcontrol.c | 63 ++++++++++++++++++++++++++++++++++----------------------
1 file changed, 39 insertions(+), 24 deletions(-)Index: linux-2.6.24-rc3-mm1/mm/memcontrol.c
===================================================================
--- linux-2.6.24-rc3-mm1.orig/mm/memcontrol.c 2007-11-27 11:24:04.000000000 +0900
+++ linux-2.6.24-rc3-mm1/mm/memcontrol.c 2007-11-27 11:24:16.000000000 +0900
@@ -89,6 +89,8 @@
};struct mem_cgroup_per_zone {
+ struct list_head active_list;
+ struct list_head inactive_list;
unsigned long count[NR_MEM_CGROUP_ZSTAT];
};
/* Macro for accessing counter */
@@ -122,10 +124,7 @@
/*
* Per cgroup active and inactive list, similar to the
* per zone LRU lists.
- * TODO: Consider making these lists per zone
*/
- struct list_head active_list;
- struct list_head inactive_list;
struct mem_cgroup_lru_info info;
/*
* spin_lock to protect the per cgroup LRU
@@ -367,10 +366,10 @@if (!to) {
MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_INACTIVE) += 1;
- list_add(&pc->lru, &pc->mem_cgroup->inactive_list);
+ list_add(&pc->lru, &mz->inactive_list);
} else {
MEM_CGROUP_ZSTAT(mz, MEM_CGROUP_ZSTAT_ACTIVE) += 1;
- list_add(&pc->lru, &pc->mem_cgroup->active_list);
+ list_add(&pc->lru, &mz->active_list);
}
mem_cgroup_charge_statistics(pc->mem_cgroup, pc->flags, true);
}
@@ -388,11 +387,11 @@
if (active) {
MEM_...
When using memory controller, there are 2 levels of memory reclaim.
1. zone memory reclaim because of system/zone memory shortage.
2. memory cgroup memory reclaim because of hitting limit.These two can be distinguished by sc->mem_cgroup parameter.
(scan_global_lru() macro)This patch tries to make memory cgroup reclaim routine avoid affecting
system/zone memory reclaim. This patch inserts if (scan_global_lru()) and
hook to memory_cgroup reclaim support functions.This patch can be a help for isolating system lru activity and group lru
activity and shows what additional functions are necessary.* mem_cgroup_calc_mapped_ratio() ... calculate mapped ratio for cgroup.
* mem_cgroup_reclaim_imbalance() ... calculate active/inactive balance in
cgroup.
* mem_cgroup_calc_reclaim_active() ... calculate the number of active pages to
be scanned in this priority in mem_cgroup.* mem_cgroup_calc_reclaim_inactive() ... calculate the number of inactive pages
to be scanned in this priority in mem_cgroup.* mem_cgroup_all_unreclaimable() .. checks cgroup's page is all unreclaimable
or not.
* mem_cgroup_get_reclaim_priority() ...
* mem_cgroup_note_reclaim_priority() ... record reclaim priority (temporal)
* mem_cgroup_remember_reclaim_priority()
.... record reclaim priority as
zone->prev_priority.
This value is used for calc reclaim_mapped.
Changelog V1->V2:
- merged calc_reclaim_mapped patch in previous version.Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
mm/vmscan.c | 326 ++++++++++++++++++++++++++++++++++++------------------------
1 file changed, 197 insertions(+), 129 deletions(-)Index: linux-2.6.24-rc3-mm1/mm/vmscan.c
===================================================================
--- linu...
Define function for calculating the number of scan target on each Zone/LRU.
Changelog V1->V2.
- fixed types of variable.Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
include/linux/memcontrol.h | 15 +++++++++++++++
mm/memcontrol.c | 33 +++++++++++++++++++++++++++++++++
2 files changed, 48 insertions(+)Index: linux-2.6.24-rc3-mm1/include/linux/memcontrol.h
===================================================================
--- linux-2.6.24-rc3-mm1.orig/include/linux/memcontrol.h 2007-11-27 11:22:14.000000000 +0900
+++ linux-2.6.24-rc3-mm1/include/linux/memcontrol.h 2007-11-27 11:22:51.000000000 +0900
@@ -73,6 +73,10 @@
extern void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem,
int priority);+extern long mem_cgroup_calc_reclaim_active(struct mem_cgroup *mem,
+ struct zone *zone, int priority);
+extern long mem_cgroup_calc_reclaim_inactive(struct mem_cgroup *mem,
+ struct zone *zone, int priority);#else /* CONFIG_CGROUP_MEM_CONT */
static inline void mm_init_cgroup(struct mm_struct *mm,
@@ -173,6 +177,17 @@
return 0;
}+static inline long mem_cgroup_calc_reclaim_active(struct mem_cgroup *mem,
+ struct zone *zone, int priority)
+{
+ return 0;
+}
+
+static inline long mem_cgroup_calc_reclaim_inactive(struct mem_cgroup *mem,
+ struct zone *zone, int priority)
+{
+ return 0;
+}
#endif /* CONFIG_CGROUP_MEM_CONT */#endif /* _LINUX_MEMCONTROL_H */
Index: linux-2.6.24-rc3-mm1/mm/memcontrol.c
===================================================================
--- linux-2.6.24-rc3-mm1.orig/mm/memcontrol.c 2007-11-27 11:22:14.000000000 +0900
+++ linux-2.6.24-rc3-mm1/mm/memcontrol.c 2007-11-27 11:24:04.000000000 +0900
@@ -472,6 +472,39 @@
mem->prev_priority = priority;
}+/*
+ * Calculate # of pages to be scanned in this priority/zone.
+ * See also vmscan.c
+ *
+ * priority starts from "DEF_PRIORITY" and decremented in each loop.
+ * (see include/linux/mmzone....
Functions to remember reclaim priority per cgroup (as zone->prev_priority)
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
include/linux/memcontrol.h | 23 +++++++++++++++++++++++
mm/memcontrol.c | 20 ++++++++++++++++++++
2 files changed, 43 insertions(+)Index: linux-2.6.24-rc3-mm1/mm/memcontrol.c
===================================================================
--- linux-2.6.24-rc3-mm1.orig/mm/memcontrol.c 2007-11-27 11:19:51.000000000 +0900
+++ linux-2.6.24-rc3-mm1/mm/memcontrol.c 2007-11-27 11:22:14.000000000 +0900
@@ -132,6 +132,7 @@
*/
spinlock_t lru_lock;
unsigned long control_type; /* control RSS or RSS+Pagecache */
+ int prev_priority; /* for recording reclaim priority */
/*
* statistics.
*/
@@ -452,6 +453,25 @@
return (long) (active / (inactive + 1));
}+/*
+ * prev_priority control...this will be used in memory reclaim path.
+ */
+int mem_cgroup_get_reclaim_priority(struct mem_cgroup *mem)
+{
+ return mem->prev_priority;
+}
+
+void mem_cgroup_note_reclaim_priority(struct mem_cgroup *mem, int priority)
+{
+ if (priority < mem->prev_priority)
+ mem->prev_priority = priority;
+}
+
+void mem_cgroup_record_reclaim_priority(struct mem_cgroup *mem, int priority)
+{
+ mem->prev_priority = priority;
+}
+
unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
struct list_head *dst,
unsigned long *scanned, int order,
Index: linux-2.6.24-rc3-mm1/include/linux/memcontrol.h
===================================================================
--- linux-2.6.24-rc3-mm1.orig/include/linux/memcontrol.h 2007-11-27 11:19:00.000000000 +0900
+++ linux-2.6.24-rc3-mm1/include/linux/memcontrol.h 2007-11-27 11:22:14.000000000 +0900
@@ -67,6 +67,11 @@
extern int mem_cgroup_calc_mapped_ratio(struct mem_cgroup *mem);
extern long mem_cgroup_reclaim_imbalance(struct mem_cgroup *mem);+extern int mem_cgroup_get_reclaim_priority(struct mem_cgroup *mem);
+extern void mem_cg...
calculate active/inactive imbalance per memory cgroup.
Changelog V1 -> V2:
- removed "total" (just count inactive and active)
- fixed comment
- fixed return type to be "long".Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
include/linux/memcontrol.h | 8 ++++++++
mm/memcontrol.c | 14 ++++++++++++++
2 files changed, 22 insertions(+)Index: linux-2.6.24-rc3-mm1/mm/memcontrol.c
===================================================================
--- linux-2.6.24-rc3-mm1.orig/mm/memcontrol.c 2007-11-27 10:44:19.000000000 +0900
+++ linux-2.6.24-rc3-mm1/mm/memcontrol.c 2007-11-27 11:19:51.000000000 +0900
@@ -437,6 +437,20 @@
rss = (long)mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_RSS);
return (int)((rss * 100L) / total);
}
+/*
+ * This function is called from vmscan.c. In page reclaiming loop. balance
+ * between active and inactive list is calculated. For memory controller
+ * page reclaiming, we should use using mem_cgroup's imbalance rather than
+ * zone's global lru imbalance.
+ */
+long mem_cgroup_reclaim_imbalance(struct mem_cgroup *mem)
+{
+ unsigned long active, inactive;
+ /* active and inactive are the number of pages. 'long' is ok.*/
+ active = mem_cgroup_get_all_zonestat(mem, MEM_CGROUP_ZSTAT_ACTIVE);
+ inactive = mem_cgroup_get_all_zonestat(mem, MEM_CGROUP_ZSTAT_INACTIVE);
+ return (long) (active / (inactive + 1));
+}unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
struct list_head *dst,
Index: linux-2.6.24-rc3-mm1/include/linux/memcontrol.h
===================================================================
--- linux-2.6.24-rc3-mm1.orig/include/linux/memcontrol.h 2007-11-27 10:44:19.000000000 +0900
+++ linux-2.6.24-rc3-mm1/include/linux/memcontrol.h 2007-11-27 11:19:00.000000000 +0900
@@ -65,6 +65,8 @@
* For memory reclaim.
*/
extern int mem_cgroup_calc_mapped_ratio(struct mem_cgroup *mem);
+extern long mem_cgroup_reclaim_imbalance(struct mem_cgroup *mem);
+
...
Define function for calculating mapped_ratio in memory cgroup.
Changelog V1->V2
- Fixed possible divide-by-zero bug.
- Use "long" to avoid 64bit division on 32 bit system.
and does necessary type casts.
- Added comments.Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
include/linux/memcontrol.h | 11 ++++++++++-
mm/memcontrol.c | 17 +++++++++++++++++
2 files changed, 27 insertions(+), 1 deletion(-)Index: linux-2.6.24-rc3-mm1/mm/memcontrol.c
===================================================================
--- linux-2.6.24-rc3-mm1.orig/mm/memcontrol.c 2007-11-26 16:39:02.000000000 +0900
+++ linux-2.6.24-rc3-mm1/mm/memcontrol.c 2007-11-26 16:41:34.000000000 +0900
@@ -421,6 +421,23 @@
spin_unlock(&mem->lru_lock);
}+/*
+ * Calculate mapped_ratio under memory controller. This will be used in
+ * vmscan.c for deteremining we have to reclaim mapped pages.
+ */
+int mem_cgroup_calc_mapped_ratio(struct mem_cgroup *mem)
+{
+ long total, rss;
+
+ /*
+ * usage is recorded in bytes. But, here, we assume the number of
+ * physical pages can be represented by "long" on any arch.
+ */
+ total = (long) (mem->res.usage >> PAGE_SHIFT) + 1L;
+ rss = (long)mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_RSS);
+ return (int)((rss * 100L) / total);
+}
+
unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan,
struct list_head *dst,
unsigned long *scanned, int order,
Index: linux-2.6.24-rc3-mm1/include/linux/memcontrol.h
===================================================================
--- linux-2.6.24-rc3-mm1.orig/include/linux/memcontrol.h 2007-11-26 15:31:19.000000000 +0900
+++ linux-2.6.24-rc3-mm1/include/linux/memcontrol.h 2007-11-26 16:39:05.000000000 +0900
@@ -61,6 +61,12 @@
extern void mem_cgroup_end_migration(struct page *page);
extern void mem_cgroup_page_migration(struct page *page, struct page *newpage);+/*
+ * For memory reclaim.
+ */
+extern int mem_cgrou...
Counting active/inactive per-zone in memory controller.
This patch adds per-zone status in memory cgroup.
These values are often read (as per-zone value) by page reclaiming.In current design, per-zone stat is just a unsigned long value and
not an atomic value because they are modified only under lru_lock.
(So, atomic_ops is not necessary.)This patch adds ACTIVE and INACTIVE per-zone status values.
For handling per-zone status, this patch adds
struct mem_cgroup_per_zone {
...
}
and some helper functions. This will be useful to add per-zone objects
in mem_cgroup.This patch turns memory controller's early_init to be 0 for calling
kmalloc() in initialization.Changelog V2 -> V3
- fixed comments.Changelog V1 -> V2
- added mem_cgroup_per_zone struct.
This will help following patches to implement per-zone objects and
pack them into a struct.
- added __mem_cgroup_add_list() and __mem_cgroup_remove_list()
- fixed page migration handling.
- renamed zstat to info (per-zone-info)
This will be place for per-zone information(lru, lock, ..)
- use page_cgroup_nid()/zid() funcs.Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>mm/memcontrol.c | 164 +++++++++++++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 157 insertions(+), 7 deletions(-)Index: linux-2.6.24-rc3-mm1/mm/memcontrol.c
===================================================================
--- linux-2.6.24-rc3-mm1.orig/mm/memcontrol.c 2007-11-26 16:39:00.000000000 +0900
+++ linux-2.6.24-rc3-mm1/mm/memcontrol.c 2007-11-26 16:39:02.000000000 +0900
@@ -78,6 +78,31 @@
}/*
+ * per-zone information in memory controller.
+ */
+
+enum mem_cgroup_zstat_index {
+ MEM_CGROUP_ZSTAT_ACTIVE,
+ MEM_CGROUP_ZSTAT_INACTIVE,
+
+ NR_MEM_CGROUP_ZSTAT,
+};
+
+struct mem_cgroup_per_zone {
+ unsigned long count[NR_MEM_CGROUP_ZSTAT];
+};
+/* Macro for accessing counter */
+#de...
can this be true?
-
On Thu, 29 Nov 2007 12:33:28 +0900 (JST)
When I set early_init=1, I added that check.
BUG_ON() is better ?Thanks,
-Kame-
'active' seems unused.
YAMAMOTO Takashi
-
On Thu, 29 Nov 2007 12:19:37 +0900 (JST)
ok, I will post clean-up against -mm2.Thanks,
-Kame-
Just a "heads up": This patch is the apparent cause of a boot time
panic--null pointer deref--on my numa platform. See below.As soon as this loop hits the first non-existent node on my platform, I
get a NULL pointer deref down in __alloc_pages. Stack trace below.Perhaps N_POSSIBLE should be N_HIGH_MEMORY? That would require handling
of memory/node hotplug for each memory control group, right? But, I'm
going to try N_HIGH_MEMORY as a work around.Initializing cgroup subsys memory
Unable to handle kernel NULL pointer dereference (address 0000000000003c80)
swapper[0]: Oops 11012296146944 [1]
Modules linked in:Pid: 0, CPU 0, comm: swapper
psr : 00001210084a6010 ifs : 8000000000000b1a ip : [<a000000100132e11>] Not tainted
ip is at __alloc_pages+0x31/0x6e0
unat: 0000000000000000 pfs : 000000000000060f rsc : 0000000000000003
rnat: a0000001009db3b8 bsps: a0000001009e0490 pr : 656960155aa65659
ldrs: 0000000000000000 ccv : 0000000000000000 fpsr: 0009804c8a70433f
csd : 0000000000000000 ssd : 0000000000000000
b0 : a000000100187370 b6 : a000000100194440 b7 : a00000010086d560
f6 : 1003e0000000000000000 f7 : 1003e0000000000000055
f8 : 1003e00000000000000c0 f9 : 1003e0000000000003fc0
f10 : 1003e00000000000000c0 f11 : 1003e0000000000000055
r1 : a000000100bc0f10 r2 : ffffffffffe00006 r3 : 0000000000020000
r8 : 0000000000071ef0 r9 : 0000000000000005 r10 : e00007002034d588
r11 : e00007002034d580 r12 : a0000001008e3df0 r13 : a0000001008dc000
r14 : 0000000000000001 r15 : e00007002034d5b0 r16 : 0000000000001e78
r17 : ffffffffffff04e0 r18 : 0000000000100002 r19 : 0000000000000000
r20 : 0000000000100002 r21 : 00000000000003cf r22 : 000000000000000f
r23 : 00000000000003c0 r24 : 0000000000000010 r25 : 0000000000000001
r26 : a0000001008e3e20 r27 : 0000000000000000 r28 : e0000701813dc088
r29 : e0000701813dc080 r30 : 0000000000000000 r31 : a000000100918ea8Call Trace:
[<a000000100014de0>] show_stack+0x80/0xa0
sp=a0...
On Wed, 28 Nov 2007 16:19:59 -0500
Maybe zonelists of NODE_DATA() is not initialized. you are right.
I think N_HIGH_MEMORY will be suitable here...(I'll consider node-hotplug case later.)Thank you for test!
Regards,
-Kame-
On Thu, 29 Nov 2007 10:37:02 +0900
Could you try this ?Thanks,
-Kame
==Don't call kmalloc() against possible but offline node.
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
mm/memcontrol.c | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)Index: test-2.6.24-rc3-mm1/mm/memcontrol.c
===================================================================
--- test-2.6.24-rc3-mm1.orig/mm/memcontrol.c
+++ test-2.6.24-rc3-mm1/mm/memcontrol.c
@@ -1117,8 +1117,14 @@ static int alloc_mem_cgroup_per_zone_inf
struct mem_cgroup_per_node *pn;
struct mem_cgroup_per_zone *mz;
int zone;
-
- pn = kmalloc_node(sizeof(*pn), GFP_KERNEL, node);
+ /*
+ * This routine is called against possible nodes.
+ * But it's BUG to call kmalloc() against offline node.
+ */
+ if (node_state(N_ONLINE, node))
+ pn = kmalloc_node(sizeof(*pn), GFP_KERNEL, node);
+ else
+ pn = kmalloc(sizeof(*pn), GFP_KERNEL);
if (!pn)
return 1;-
On Thu, 29 Nov 2007 11:24:06 +0900
Sorry..this can be a workaround but I noticed I miss something..ok, just use N_HIGH_MEMORY here and add comment for hotplugging support is not yet.
Christoph-san, Lee-san, could you confirm following ?
- when SLAB is used, kmalloc_node() against offline node will success.
- when SLUB is used, kmalloc_node() against offline node will panic.Then, the caller should take care that node is online before kmalloc().
Regards,
-Kame-
Hmmmm... An offline node implies that the per node structure does not
exist. SLAB should fail too. If there is something wrong with the allocs
then its likely a difference in the way hotplug was put into SLAB and
SLUB.-
Add macro to get node_id and zone_id of page_cgroup.
Will be used in per-zone-xxx patches and others.Changelog:
- returns zone_type instead of int.Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
mm/memcontrol.c | 10 ++++++++++
1 file changed, 10 insertions(+)Index: linux-2.6.24-rc3-mm1/mm/memcontrol.c
===================================================================
--- linux-2.6.24-rc3-mm1.orig/mm/memcontrol.c 2007-11-26 15:31:19.000000000 +0900
+++ linux-2.6.24-rc3-mm1/mm/memcontrol.c 2007-11-26 16:39:00.000000000 +0900
@@ -135,6 +135,16 @@
#define PAGE_CGROUP_FLAG_CACHE (0x1) /* charged as cache */
#define PAGE_CGROUP_FLAG_ACTIVE (0x2) /* page is active in this cgroup */+static inline int page_cgroup_nid(struct page_cgroup *pc)
+{
+ return page_to_nid(pc->page);
+}
+
+static inline enum zone_type page_cgroup_zid(struct page_cgroup *pc)
+{
+ return page_zonenum(pc->page);
+}
+
enum {
MEM_CGROUP_TYPE_UNSPEC = 0,
MEM_CGROUP_TYPE_MAPPED,-
add macro scan_global_lru().
This is used to detect which scan_control scans global lru or
mem_cgroup lru. And compiled to be static value (1) when
memory controller is not configured. This may make the meaning obvious.Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>mm/vmscan.c | 17 ++++++++++++-----
1 file changed, 12 insertions(+), 5 deletions(-)Index: linux-2.6.24-rc3-mm1/mm/vmscan.c
===================================================================
--- linux-2.6.24-rc3-mm1.orig/mm/vmscan.c 2007-11-26 15:31:19.000000000 +0900
+++ linux-2.6.24-rc3-mm1/mm/vmscan.c 2007-11-26 16:38:46.000000000 +0900
@@ -127,6 +127,12 @@
static LIST_HEAD(shrinker_list);
static DECLARE_RWSEM(shrinker_rwsem);+#ifdef CONFIG_CGROUP_MEM_CONT
+#define scan_global_lru(sc) (!(sc)->mem_cgroup)
+#else
+#define scan_global_lru(sc) (1)
+#endif
+
/*
* Add a shrinker callback to be called from the vm
*/
@@ -1290,11 +1296,12 @@
* Don't shrink slabs when reclaiming memory from
* over limit cgroups
*/
- if (sc->mem_cgroup == NULL)
+ if (scan_global_lru(sc)) {
shrink_slab(sc->nr_scanned, gfp_mask, lru_pages);
- if (reclaim_state) {
- nr_reclaimed += reclaim_state->reclaimed_slab;
- reclaim_state->reclaimed_slab = 0;
+ if (reclaim_state) {
+ nr_reclaimed += reclaim_state->reclaimed_slab;
+ reclaim_state->reclaimed_slab = 0;
+ }
}
total_scanned += sc->nr_scanned;
if (nr_reclaimed >= sc->swap_cluster_max) {
@@ -1321,7 +1328,7 @@
congestion_wait(WRITE, HZ/10);
}
/* top priority shrink_caches still had more to do? don't OOM, then */
- if (!sc->all_unreclaimable && sc->mem_cgroup == NULL)
+ if (!sc->all_unreclaimable && scan_global_lru(sc))
ret = 1;
out:
/*-
| Amit K. Arora | [RFC] Heads up on sys_fallocate() |
| Greg KH | [GIT PATCH] driver core patches against 2.6.24 |
| Linus Torvalds | Linux 2.6.25-rc4 |
| Greg KH | Linux 2.6.25.10 |
git: | |
| Gerrit Renker | [PATCH 15/37] dccp: Set per-connection CCIDs via socket options |
| David Miller | Re: [PATCH] pkt_sched: Destroy gen estimators under rtnl_lock(). |
| David Miller | [GIT]: Networking |
| Ilpo Järvinen | Re: Strange Application bug, race in MSG_PEEK complaints (was: Bug#513695: fetchma... |
