[PATCH] x86: ignore spurious faults

Previous message: [thread] [date] [author]
Next message: [thread] [date] [author]
From: Jeremy Fitzhardinge
Date: Wednesday, January 23, 2008 - 5:05 pm

When changing a kernel page from RO->RW, it's OK to leave stale TLB
entries around, since doing a global flush is expensive and they pose
no security problem.  They can, however, generate a spurious fault,
which we should catch and simply return from (which will have the
side-effect of reloading the TLB to the current PTE).

This can occur when running under Xen, because it frequently changes
kernel pages from RW->RO->RW to implement Xen's pagetable semantics.
It could also occur when using CONFIG_DEBUG_PAGEALLOC, since it avoids
doing a global TLB flush after changing page permissions.

[ Changes to fault_32.c and fault_64.c are identical, and should be
  easy unify when the time comes. ]

Signed-off-by: Jeremy Fitzhardinge <jeremy@xensource.com>
Cc: Harvey Harrison <harvey.harrison@gmail.com>
---
 arch/x86/mm/fault_32.c |   52 ++++++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/mm/fault_64.c |   52 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 104 insertions(+)

===================================================================
--- a/arch/x86/mm/fault_32.c
+++ b/arch/x86/mm/fault_32.c
@@ -290,6 +290,53 @@ static int is_errata93(struct pt_regs *r
 
 
 /*
+ * Handle a spurious fault caused by a stale TLB entry.  This allows
+ * us to lazily refresh the TLB when increasing the permissions of a
+ * kernel page (RO -> RW or NX -> X).  Doing it eagerly is very
+ * expensive since that implies doing a full cross-processor TLB
+ * flush, even if no stale TLB entries exist on other processors.
+ * There are no security implications to leaving a stale TLB when
+ * increasing the permissions on a page.
+ */
+static int spurious_fault(unsigned long address,
+			  unsigned long error_code)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+
+	/* Reserved-bit violation or user access to kernel space? */
+	if (error_code & (PF_USER | PF_RSVD))
+		return 0;
+
+	pgd = init_mm.pgd + pgd_index(address);
+	if (!pgd_present(*pgd))
+		return 0;
+
+	pud = pud_offset(pgd, address);
+	if (!pud_present(*pud))
+		return 0;
+
+	pmd = pmd_offset(pud, address);
+	if (!pmd_present(*pmd))
+		return 0;
+
+	pte = pte_offset_kernel(pmd, address);
+	if (!pte_present(*pte))
+		return 0;
+	if ((error_code & 0x02) && !pte_write(*pte))
+		return 0;
+
+#if _PAGE_NX
+	if ((error_code & PF_INSTR) && !pte_exec(*pte))
+		return 0;
+#endif
+
+	return 1;
+}
+
+/*
  * Handle a fault on the vmalloc or module mapping area
  *
  * This assumes no large pages in there.
@@ -412,6 +459,11 @@ void __kprobes do_page_fault(struct pt_r
 		if (!(error_code & (PF_RSVD|PF_USER|PF_PROT)) &&
 		    vmalloc_fault(address) >= 0)
 			return;
+
+		/* Can handle a stale RO->RW TLB */
+		if (spurious_fault(address, error_code))
+			return;
+
 		/*
 		 * Don't take the mm semaphore here. If we fixup a prefetch
 		 * fault we could otherwise deadlock.
===================================================================
--- a/arch/x86/mm/fault_64.c
+++ b/arch/x86/mm/fault_64.c
@@ -275,6 +275,53 @@ static noinline void pgtable_bad(unsigne
 }
 
 /*
+ * Handle a spurious fault caused by a stale TLB entry.  This allows
+ * us to lazily refresh the TLB when increasing the permissions of a
+ * kernel page (RO -> RW or NX -> X).  Doing it eagerly is very
+ * expensive since that implies doing a full cross-processor TLB
+ * flush, even if no stale TLB entries exist on other processors.
+ * There are no security implications to leaving a stale TLB when
+ * increasing the permissions on a page.
+ */
+static int spurious_fault(unsigned long address,
+			  unsigned long error_code)
+{
+	pgd_t *pgd;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+
+	/* Reserved-bit violation or user access to kernel space? */
+	if (error_code & (PF_USER | PF_RSVD))
+		return 0;
+
+	pgd = init_mm.pgd + pgd_index(address);
+	if (!pgd_present(*pgd))
+		return 0;
+
+	pud = pud_offset(pgd, address);
+	if (!pud_present(*pud))
+		return 0;
+
+	pmd = pmd_offset(pud, address);
+	if (!pmd_present(*pmd))
+		return 0;
+
+	pte = pte_offset_kernel(pmd, address);
+	if (!pte_present(*pte))
+		return 0;
+	if ((error_code & 0x02) && !pte_write(*pte))
+		return 0;
+
+#if _PAGE_NX
+	if ((error_code & PF_INSTR) && !pte_exec(*pte))
+		return 0;
+#endif
+
+	return 1;
+}
+
+/*
  * Handle a fault on the vmalloc area
  *
  * This assumes no large pages in there.
@@ -406,6 +453,11 @@ asmlinkage void __kprobes do_page_fault(
 			if (vmalloc_fault(address) >= 0)
 				return;
 		}
+
+		/* Can handle a stale RO->RW TLB */
+		if (spurious_fault(address, error_code))
+			return;
+
 		/*
 		 * Don't take the mm semaphore here. If we fixup a prefetch
 		 * fault we could otherwise deadlock.

--
Previous message: [thread] [date] [author]
Next message: [thread] [date] [author]

Messages in current thread:
[PATCH] x86: ignore spurious faults, Jeremy Fitzhardinge, (Wed Jan 23, 5:05 pm)
Re: [PATCH] x86: ignore spurious faults, Harvey Harrison, (Wed Jan 23, 5:18 pm)
Re: [PATCH] x86: ignore spurious faults, Jeremy Fitzhardinge, (Wed Jan 23, 5:26 pm)
[PATCH UPDATE] x86: ignore spurious faults, Jeremy Fitzhardinge, (Wed Jan 23, 5:28 pm)
Re: [PATCH] x86: ignore spurious faults, Andi Kleen, (Wed Jan 23, 11:49 pm)
Re: [PATCH] x86: ignore spurious faults, Jeremy Fitzhardinge, (Thu Jan 24, 12:02 am)
Re: [PATCH] x86: ignore spurious faults, Andi Kleen, (Thu Jan 24, 12:11 am)
Re: [PATCH UPDATE] x86: ignore spurious faults, Matt Mackall, (Thu Jan 24, 12:14 pm)
Re: [PATCH UPDATE] x86: ignore spurious faults, Jeremy Fitzhardinge, (Thu Jan 24, 12:21 pm)
Re: [PATCH UPDATE] x86: ignore spurious faults, Nick Piggin, (Thu Jan 24, 4:41 pm)
Re: [PATCH UPDATE] x86: ignore spurious faults, Jeremy Fitzhardinge, (Thu Jan 24, 5:26 pm)
Re: [PATCH UPDATE] x86: ignore spurious faults, Keir Fraser, (Fri Jan 25, 12:36 am)
Re: [PATCH UPDATE] x86: ignore spurious faults, Jan Beulich, (Fri Jan 25, 1:15 am)
Re: [PATCH UPDATE] x86: ignore spurious faults, Nick Piggin, (Fri Jan 25, 1:38 am)
Re: [PATCH UPDATE] x86: ignore spurious faults, Andi Kleen, (Fri Jan 25, 2:11 am)
Re: [PATCH UPDATE] x86: ignore spurious faults, Jan Beulich, (Fri Jan 25, 2:18 am)
Re: [PATCH UPDATE] x86: ignore spurious faults, Keir Fraser, (Fri Jan 25, 2:18 am)
Re: [PATCH UPDATE] x86: ignore spurious faults, Andi Kleen, (Fri Jan 25, 2:51 am)
Re: [PATCH UPDATE] x86: ignore spurious faults, Andi Kleen, (Fri Jan 25, 3:19 am)
Re: [PATCH UPDATE] x86: ignore spurious faults, Keir Fraser, (Fri Jan 25, 6:17 am)
Re: [PATCH UPDATE] x86: ignore spurious faults, Ingo Molnar, (Fri Jan 25, 8:30 am)
Re: [PATCH UPDATE] x86: ignore spurious faults, Jeremy Fitzhardinge, (Fri Jan 25, 8:54 am)
Re: [PATCH UPDATE] x86: ignore spurious faults, Ingo Molnar, (Fri Jan 25, 11:08 am)
Re: [PATCH UPDATE] x86: ignore spurious faults, Jeremy Fitzhardinge, (Fri Jan 25, 11:39 am)