When the self-snoop CPUID bit is set change_page_attr() only needs to flush
TLBs, but not the caches.
The description of self-snoop in the Intel manuals is a bit vague
but I got confirmation that this is what SS really means.
This should improve c_p_a() performance significantly on newer
Intel CPUs.
Note: the line > 80 characters will be modified again in a followup
Signed-off-by: Andi Kleen <ak@suse.de>
---
arch/x86/mm/pageattr_32.c | 5 +++--
arch/x86/mm/pageattr_64.c | 4 ++--
include/asm-x86/cpufeature.h | 1 +
3 files changed, 6 insertions(+), 4 deletions(-)
Index: linux/arch/x86/mm/pageattr_32.c
===================================================================
--- linux.orig/arch/x86/mm/pageattr_32.c
+++ linux/arch/x86/mm/pageattr_32.c
@@ -97,10 +97,11 @@ static void flush_kernel_map(void *arg)
struct flush_arg *a = (struct flush_arg *)arg;
struct flush *f;
- if ((!cpu_has_clflush || a->full_flush) && boot_cpu_data.x86_model >= 4)
+ if ((!cpu_has_clflush || a->full_flush) && boot_cpu_data.x86_model >= 4 &&
+ !cpu_has_ss)
wbinvd();
list_for_each_entry(f, &a->l, l) {
- if (!a->full_flush)
+ if (!a->full_flush && !cpu_has_ss)
clflush_cache_range((void *)f->addr, PAGE_SIZE);
if (!a->full_flush)
__flush_tlb_one(f->addr);
Index: linux/include/asm-x86/cpufeature.h
===================================================================
--- linux.orig/include/asm-x86/cpufeature.h
+++ linux/include/asm-x86/cpufeature.h
@@ -167,6 +167,7 @@
#define cpu_has_pebs boot_cpu_has(X86_FEATURE_PEBS)
#define cpu_has_clflush boot_cpu_has(X86_FEATURE_CLFLSH)
#define cpu_has_bts boot_cpu_has(X86_FEATURE_BTS)
+#define cpu_has_ss boot_cpu_has(X86_FEATURE_SELFSNOOP)
#if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64)
# define cpu_has_invlpg 1
Index: linux/arch/x86/mm/pageattr_64.c
===================================================================
--- linux.orig/arch/x86/mm/pageattr_64.c
+++ linux/arch/x86/mm/pageattr_64.c
@@ -94,10 +94,10 @@ static void flush_kernel_map(void *arg)
/* When clflush is available always use it because it is
much cheaper than WBINVD. */
- if (a->full_flush || !cpu_has_clflush)
+ if ((a->full_flush || !cpu_has_clflush) && !cpu_has_ss)
asm volatile("wbinvd" ::: "memory");
list_for_each_entry(f, &a->l, l) {
- if (!a->full_flush)
+ if (!a->full_flush && !cpu_has_ss)
clflush_cache_range((void *)f->addr, PAGE_SIZE);
if (!a->full_flush)
__flush_tlb_one(f->addr);
--
| Linus Torvalds | Linux 2.6.27-rc5 |
| Greg KH | Re: Dual-Licensing Linux Kernel with GPL V2 and GPL V3 |
| Greg Kroah-Hartman | [PATCH 004/196] Chinese: add translation of SubmittingPatches |
| Trent Piepho | Re: [PATCH] [POWERPC] Improve (in|out)_beXX() asm code |
git: | |
| Christoph Hellwig | Re: [PATCH 06/32] IGET: Mark iget() and read_inode() as being obsolete [try #2] |
| Gerrit Renker | [PATCH 0/37] dccp: Feature negotiation - last call for comments |
| David Miller | Re: [PATCH] pkt_sched: Destroy gen estimators under rtnl_lock(). |
| David Miller | [GIT]: Networking |
