On Fri, 2010-11-26 at 00:15 +0800, Mel Gorman wrote:
record the order seems not sufficient. in balance_pgdat(), the for look
exit only when:
priority <0 or sc.nr_reclaimed >= SWAP_CLUSTER_MAX.
but we do if (sc.nr_reclaimed < SWAP_CLUSTER_MAX)
order = sc.order = 0;
this means before we set order to 0, we already reclaimed a lot of
pages, so I thought we need set order to 0 earlier before there are
enough free pages. below is a debug patch.
diff --git a/mm/vmscan.c b/mm/vmscan.c
index d31d7ce..ee5d2ed 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2117,6 +2117,26 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
}
#endif
+static int all_zone_enough_free_pages(pg_data_t *pgdat)
+{
+ int i;
+
+ for (i = 0; i < pgdat->nr_zones; i++) {
+ struct zone *zone = pgdat->node_zones + i;
+
+ if (!populated_zone(zone))
+ continue;
+
+ if (zone->all_unreclaimable)
+ continue;
+
+ if (!zone_watermark_ok(zone, 0, high_wmark_pages(zone) * 8,
+ 0, 0))
+ return 0;
+ }
+ return 1;
+}
+
/* is kswapd sleeping prematurely? */
static int sleeping_prematurely(pg_data_t *pgdat, int order, long remaining)
{
@@ -2355,7 +2375,8 @@ out:
* back to sleep. High-order users can still perform direct
* reclaim if they wish.
*/
- if (sc.nr_reclaimed < SWAP_CLUSTER_MAX)
+ if (sc.nr_reclaimed < SWAP_CLUSTER_MAX ||
+ (order > 0 && all_zone_enough_free_pages(pgdat)))
order = sc.order = 0;
goto loop_again;
--