Hi, Here is a "HWPOISON for hugepage" patchset which reflects Mel's comments on hugepage rmapping code. Only patch 1/8 and 2/8 are changed since the previous post. Mel, could you please restart reviewing and testing? include/linux/hugetlb.h | 14 +--- include/linux/hugetlb_inline.h | 22 +++++++ include/linux/pagemap.h | 9 +++- include/linux/poison.h | 9 --- include/linux/rmap.h | 5 ++ mm/hugetlb.c | 100 ++++++++++++++++++++++++++++++++- mm/hwpoison-inject.c | 15 +++-- mm/memory-failure.c | 120 ++++++++++++++++++++++++++++++---------- mm/rmap.c | 59 ++++++++++++++++++++ 9 files changed, 295 insertions(+), 58 deletions(-) ChangeLog from v5: - rebased to 2.6.34 - fix logic error (in case that private mapping and shared mapping coexist) - move is_vm_hugetlb_page() into include/linux/mm.h to use this function from linear_page_index() - define and use linear_hugepage_index() instead of compound_order() - use page_move_anon_rmap() in hugetlb_cow() - copy exclusive switch of __set_page_anon_rmap() into hugepage counterpart. - revert commit 24be7468 completely - create hugetlb_inline.h and move is_vm_hugetlb_index() in it. - move functions setting up anon_vma for hugepage into mm/rmap.c. ChangeLog from v4: - rebased to 2.6.34-rc7 - add isolation code for free/reserved hugepage in me_huge_page() - set/clear PG_hwpoison bits of all pages in hugepage. - mce_bad_pages counts all pages in hugepage. - rename __hugepage_set_anon_rmap() to hugepage_add_anon_rmap() - add huge_pte_offset() dummy function in header file on !CONFIG_HUGETLBFS ChangeLog from v3: - rebased to 2.6.34-rc5 - support for privately mapped hugepage ChangeLog from v2: - rebase to 2.6.34-rc3 - consider mapcount of hugepage - rename pointer "head" into "hpage" ChangeLog from v1: - rebase to 2.6.34-rc1 - add comment from Wu Fengguang Thanks, Naoya Horiguchi --
is_vm_hugetlb_page() is a widely used inline function to insert hooks
into hugetlb code.
But we can't use it in pagemap.h because of circular dependency of
the header files. This patch removes this limitation.
Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
---
include/linux/hugetlb.h | 11 +----------
include/linux/hugetlb_inline.h | 22 ++++++++++++++++++++++
include/linux/pagemap.h | 1 +
3 files changed, 24 insertions(+), 10 deletions(-)
create mode 100644 include/linux/hugetlb_inline.h
diff --git v2.6.34/include/linux/hugetlb.h v2.6.34/include/linux/hugetlb.h
index 78b4bc6..d47a7c4 100644
--- v2.6.34/include/linux/hugetlb.h
+++ v2.6.34/include/linux/hugetlb.h
@@ -2,6 +2,7 @@
#define _LINUX_HUGETLB_H
#include <linux/fs.h>
+#include <linux/hugetlb_inline.h>
struct ctl_table;
struct user_struct;
@@ -14,11 +15,6 @@ struct user_struct;
int PageHuge(struct page *page);
-static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
-{
- return vma->vm_flags & VM_HUGETLB;
-}
-
void reset_vma_resv_huge_pages(struct vm_area_struct *vma);
int hugetlb_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);
int hugetlb_overcommit_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);
@@ -77,11 +73,6 @@ static inline int PageHuge(struct page *page)
return 0;
}
-static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
-{
- return 0;
-}
-
static inline void reset_vma_resv_huge_pages(struct vm_area_struct *vma)
{
}
diff --git v2.6.34/include/linux/hugetlb_inline.h v2.6.34/include/linux/hugetlb_inline.h
new file mode 100644
index 0000000..cf00b6d
--- /dev/null
+++ v2.6.34/include/linux/hugetlb_inline.h
@@ -0,0 +1,22 @@
+#ifndef _LINUX_HUGETLB_INLINE_H
+#define _LINUX_HUGETLB_INLINE_H 1
+
+#ifdef CONFIG_HUGETLBFS
+
+#include <linux/mm.h>
+
+static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
+{
+ return vma->vm_flags & ...Should be CONFIG_HUGETLB_PAGE With those corrections; -- Mel Gorman Part-time Phd Student Linux Technology Center University of Limerick IBM Dublin Software Lab --
Both fixed in Andi's tree, so Acked-by: Wu Fengguang <fengguang.wu@intel.com> --
If error hugepage is not in-use, we can fully recovery from error
by dequeuing it from freelist, so return RECOVERY.
Otherwise whether or not we can recovery depends on user processes,
so return DELAYED.
Dependency:
"HWPOISON, hugetlb: enable error handling path for hugepage"
Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Wu Fengguang <fengguang.wu@intel.com>
---
include/linux/hugetlb.h | 2 ++
mm/hugetlb.c | 16 ++++++++++++++++
mm/memory-failure.c | 28 ++++++++++++++++++++--------
3 files changed, 38 insertions(+), 8 deletions(-)
diff --git v2.6.34/include/linux/hugetlb.h v2.6.34/include/linux/hugetlb.h
index e688fd8..f479700 100644
--- v2.6.34/include/linux/hugetlb.h
+++ v2.6.34/include/linux/hugetlb.h
@@ -43,6 +43,7 @@ int hugetlb_reserve_pages(struct inode *inode, long from, long to,
struct vm_area_struct *vma,
int acctflags);
void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed);
+void __isolate_hwpoisoned_huge_page(struct page *page);
extern unsigned long hugepages_treat_as_movable;
extern const unsigned long hugetlb_zero, hugetlb_infinity;
@@ -100,6 +101,7 @@ static inline void hugetlb_report_meminfo(struct seq_file *m)
#define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; })
#define hugetlb_fault(mm, vma, addr, flags) ({ BUG(); 0; })
#define huge_pte_offset(mm, address) 0
+#define __isolate_hwpoisoned_huge_page(page) 0
#define hugetlb_change_protection(vma, address, end, newprot)
diff --git v2.6.34/mm/hugetlb.c v2.6.34/mm/hugetlb.c
index b1aa0d8..aaba3cc 100644
--- v2.6.34/mm/hugetlb.c
+++ v2.6.34/mm/hugetlb.c
@@ -2821,3 +2821,19 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
hugetlb_put_quota(inode->i_mapping, (chg - freed));
hugetlb_acct_memory(h, -(chg - freed));
}
+
+/*
+ * This function is called from memory failure ...To avoid race condition between concurrent memory errors on identified
hugepage, we atomically test and set PG_hwpoison bit on the head page.
All pages in the error hugepage are considered as hwpoisoned
for now, so set and clear all PG_hwpoison bits in the hugepage
with page lock of the head page held.
Dependency:
"HWPOISON, hugetlb: enable error handling path for hugepage"
Signed-off-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Wu Fengguang <fengguang.wu@intel.com>
---
mm/memory-failure.c | 38 ++++++++++++++++++++++++++++++++++++++
1 files changed, 38 insertions(+), 0 deletions(-)
diff --git v2.6.34/mm/memory-failure.c v2.6.34/mm/memory-failure.c
index 1ec68c8..fee648b 100644
--- v2.6.34/mm/memory-failure.c
+++ v2.6.34/mm/memory-failure.c
@@ -920,6 +920,22 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
return ret;
}
+static void set_page_hwpoison_huge_page(struct page *hpage)
+{
+ int i;
+ int nr_pages = 1 << compound_order(hpage);
+ for (i = 0; i < nr_pages; i++)
+ SetPageHWPoison(hpage + i);
+}
+
+static void clear_page_hwpoison_huge_page(struct page *hpage)
+{
+ int i;
+ int nr_pages = 1 << compound_order(hpage);
+ for (i = 0; i < nr_pages; i++)
+ ClearPageHWPoison(hpage + i);
+}
+
int __memory_failure(unsigned long pfn, int trapno, int flags)
{
struct page_state *ps;
@@ -1014,6 +1030,26 @@ int __memory_failure(unsigned long pfn, int trapno, int flags)
return 0;
}
+ /*
+ * For error on the tail page, we should set PG_hwpoison
+ * on the head page to show that the hugepage is hwpoisoned
+ */
+ if (PageTail(p) && TestSetPageHWPoison(hpage)) {
+ action_result(pfn, "hugepage already hardware poisoned",
+ IGNORED);
+ unlock_page(hpage);
+ put_page(hpage);
+ return 0;
+ }
+ /*
+ * Set PG_hwpoison on all pages in an error hugepage,
+ * because containment is done in hugepage unit for now.
+ * Since ...Thanks everyone, I merged this patch series in the hwpoison tree now, aimed for 2.6.36. It should appear in linux-next shortly. Question is how to proceed now: the next steps would be early kill support and soft offline/migration support for hugetlb too. Horiguchi-san, is this something you're interested in working on? -Andi --
Does early kill for hugetlb work with this patchset, doesn't it? Yes, it is. I'll do it with pleasure :) Thanks, Naoya Horiguchi --
