login
Header Space

 
 

Re: [PATCH] x86: mtrr cleanup for converting continuous to discrete - auto detect

Previous thread: [PATCH 2/2] autofs4 - fix incorect return from root.c:try_to_fill_dentry() by Ian Kent on Monday, April 28, 2008 - 2:30 am. (1 message)

Next thread: Fwd: [PATCH] [MMC] fix clock problem in PXA255/270 by Tadeusz Gozdek on Monday, April 28, 2008 - 3:32 am. (7 messages)
To: Andrew Morton <akpm@...>, Ingo Molnar <mingo@...>
Cc: linux-kernel@vger.kernel.org <linux-kernel@...>
Date: Monday, April 28, 2008 - 2:37 am

some BIOS like to use continus MTRR layout, and X driver can not add
WB entries for graphical cards when 4g or more RAM installed.

the patch will change MTRR to discrete.

mtrr_chunk_size= could be used to have smaller continuous block to hold holes.
default is 256m, could be set according to size of graphics card memory.

Signed-off-by: Yinghai Lu &lt;yhlu.kernel@gmail.com&gt;

Index: linux-2.6/arch/x86/kernel/cpu/mtrr/generic.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/generic.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/generic.c
@@ -158,6 +158,20 @@ get_mtrr_var_range(unsigned int index, s
 	rdmsr(MTRRphysMask_MSR(index), vr-&gt;mask_lo, vr-&gt;mask_hi);
 }
 
+/*  fill the MSR pair relating to a var range  */
+void fill_mtrr_var_range(unsigned int index,
+		u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi)
+{
+	struct mtrr_var_range *vr;
+
+	vr = mtrr_state.var_ranges;
+
+	vr[index].base_lo = base_lo;
+	vr[index].base_hi = base_hi;
+	vr[index].mask_lo = mask_lo;
+	vr[index].mask_hi = mask_hi;
+}
+
 static void
 get_fixed_ranges(mtrr_type * frs)
 {
Index: linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/main.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
@@ -37,6 +37,7 @@
 #include &lt;linux/smp.h&gt;
 #include &lt;linux/cpu.h&gt;
 #include &lt;linux/mutex.h&gt;
+#include &lt;linux/sort.h&gt;
 
 #include &lt;asm/e820.h&gt;
 #include &lt;asm/mtrr.h&gt;
@@ -609,6 +610,348 @@ static struct sysdev_driver mtrr_sysdev_
 	.resume		= mtrr_restore,
 };
 
+static int disable_mtrr_cleanup;
+
+static int __init disable_mtrr_cleanup_setup(char *str)
+{
+	disable_mtrr_cleanup = 1;
+	return 0;
+}
+early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
+
+#define RANGE_NUM 256
+
+struct res_range {
+	size_t start;
+	size_t end;
+};
+
+static void __init subtract_range(struct r...
To: Andrew Morton <akpm@...>, Ingo Molnar <mingo@...>
Cc: linux-kernel@vger.kernel.org <linux-kernel@...>
Date: Monday, April 28, 2008 - 5:06 am

some BIOS like to use continus MTRR layout, and X driver can not add
WB entries for graphical cards when 4g or more RAM installed.

the patch will change MTRR to discrete.

mtrr_chunk_size= could be used to have smaller continuous block to hold holes.
default is 256m, could be set according to size of graphics card memory.

v2: fix -1 for UC 

Signed-off-by: Yinghai Lu &lt;yhlu.kernel@gmail.com&gt;

Index: linux-2.6/arch/x86/kernel/cpu/mtrr/generic.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/generic.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/generic.c
@@ -158,6 +158,20 @@ get_mtrr_var_range(unsigned int index, s
 	rdmsr(MTRRphysMask_MSR(index), vr-&gt;mask_lo, vr-&gt;mask_hi);
 }
 
+/*  fill the MSR pair relating to a var range  */
+void fill_mtrr_var_range(unsigned int index,
+		u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi)
+{
+	struct mtrr_var_range *vr;
+
+	vr = mtrr_state.var_ranges;
+
+	vr[index].base_lo = base_lo;
+	vr[index].base_hi = base_hi;
+	vr[index].mask_lo = mask_lo;
+	vr[index].mask_hi = mask_hi;
+}
+
 static void
 get_fixed_ranges(mtrr_type * frs)
 {
Index: linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/main.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
@@ -37,6 +37,7 @@
 #include &lt;linux/smp.h&gt;
 #include &lt;linux/cpu.h&gt;
 #include &lt;linux/mutex.h&gt;
+#include &lt;linux/sort.h&gt;
 
 #include &lt;asm/e820.h&gt;
 #include &lt;asm/mtrr.h&gt;
@@ -609,6 +610,348 @@ static struct sysdev_driver mtrr_sysdev_
 	.resume		= mtrr_restore,
 };
 
+static int disable_mtrr_cleanup;
+
+static int __init disable_mtrr_cleanup_setup(char *str)
+{
+	disable_mtrr_cleanup = 1;
+	return 0;
+}
+early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
+
+#define RANGE_NUM 256
+
+struct res_range {
+	size_t start;
+	size_t end;
+};
+
+static void __init s...
To: Andrew Morton <akpm@...>, Ingo Molnar <mingo@...>, H. Peter Anvin <hpa@...>, Thomas Gleixner <tglx@...>
Cc: linux-kernel@vger.kernel.org <linux-kernel@...>, Gabriel C <nix.or.die@...>, Mika Fischer <mika.fischer@...>
Date: Monday, April 28, 2008 - 3:44 pm

some BIOS like to use continus MTRR layout, and some X driver can not add
WB entries for graphical cards when 4g or more RAM installed.

the patch will change MTRR to discrete.

mtrr_chunk_size= could be used to have smaller continuous block to hold holes.
default is 256m, could be set according to size of graphics card memory.

v2: fix -1 for UC checking
v3: default to disable, and need use enable_mtrr_cleanup to enable this feature
    skip the var state change warning.
    remove next_basek in range_to_mtrr()

Signed-off-by: Yinghai Lu &lt;yhlu.kernel@gmail.com&gt;

Index: linux-2.6/arch/x86/kernel/cpu/mtrr/generic.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/generic.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/generic.c
@@ -158,6 +158,20 @@ get_mtrr_var_range(unsigned int index, s
 	rdmsr(MTRRphysMask_MSR(index), vr-&gt;mask_lo, vr-&gt;mask_hi);
 }
 
+/*  fill the MSR pair relating to a var range  */
+void fill_mtrr_var_range(unsigned int index,
+		u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi)
+{
+	struct mtrr_var_range *vr;
+
+	vr = mtrr_state.var_ranges;
+
+	vr[index].base_lo = base_lo;
+	vr[index].base_hi = base_hi;
+	vr[index].mask_lo = mask_lo;
+	vr[index].mask_hi = mask_hi;
+}
+
 static void
 get_fixed_ranges(mtrr_type * frs)
 {
Index: linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/main.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
@@ -37,6 +37,7 @@
 #include &lt;linux/smp.h&gt;
 #include &lt;linux/cpu.h&gt;
 #include &lt;linux/mutex.h&gt;
+#include &lt;linux/sort.h&gt;
 
 #include &lt;asm/e820.h&gt;
 #include &lt;asm/mtrr.h&gt;
@@ -609,6 +610,345 @@ static struct sysdev_driver mtrr_sysdev_
 	.resume		= mtrr_restore,
 };
 
+static int __initdata enable_mtrr_cleanup;
+
+static int __init enable_mtrr_cleanup_setup(char *str)
+{
+	enable_mtrr_cleanup = 1;
+	return 0;
...
To: Andrew Morton <akpm@...>, Ingo Molnar <mingo@...>, H. Peter Anvin <hpa@...>, Thomas Gleixner <tglx@...>
Cc: linux-kernel@vger.kernel.org <linux-kernel@...>, Gabriel C <nix.or.die@...>, Mika Fischer <mika.fischer@...>
Date: Monday, April 28, 2008 - 4:16 pm

some BIOS like to use continus MTRR layout, and may X driver can not add
WB entries for graphical cards when 4g or more RAM installed.

the patch will change MTRR to discrete.

mtrr_chunk_size= could be used to have smaller continuous block to hold holes.
default is 256m, could be set according to size of graphics card memory.

v2: fix -1 for UC checking
v3: default to disable, and need use enable_mtrr_cleanup to enable this feature
    skip the var state change warning.
    remove next_basek in range_to_mtrr()
v4: correct warning mask.

Signed-off-by: Yinghai Lu &lt;yhlu.kernel@gmail.com&gt;

Index: linux-2.6/arch/x86/kernel/cpu/mtrr/generic.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/generic.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/generic.c
@@ -158,6 +158,20 @@ get_mtrr_var_range(unsigned int index, s
 	rdmsr(MTRRphysMask_MSR(index), vr-&gt;mask_lo, vr-&gt;mask_hi);
 }
 
+/*  fill the MSR pair relating to a var range  */
+void fill_mtrr_var_range(unsigned int index,
+		u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi)
+{
+	struct mtrr_var_range *vr;
+
+	vr = mtrr_state.var_ranges;
+
+	vr[index].base_lo = base_lo;
+	vr[index].base_hi = base_hi;
+	vr[index].mask_lo = mask_lo;
+	vr[index].mask_hi = mask_hi;
+}
+
 static void
 get_fixed_ranges(mtrr_type * frs)
 {
Index: linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/main.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
@@ -37,6 +37,7 @@
 #include &lt;linux/smp.h&gt;
 #include &lt;linux/cpu.h&gt;
 #include &lt;linux/mutex.h&gt;
+#include &lt;linux/sort.h&gt;
 
 #include &lt;asm/e820.h&gt;
 #include &lt;asm/mtrr.h&gt;
@@ -609,6 +610,345 @@ static struct sysdev_driver mtrr_sysdev_
 	.resume		= mtrr_restore,
 };
 
+static int __initdata enable_mtrr_cleanup;
+
+static int __init enable_mtrr_cleanup_setup(char *str)
+{
+	enable_mtrr_...
To: Andrew Morton <akpm@...>, Ingo Molnar <mingo@...>, H. Peter Anvin <hpa@...>, Thomas Gleixner <tglx@...>, Gabriel C <nix.or.die@...>
Cc: linux-kernel@vger.kernel.org <linux-kernel@...>, Mika Fischer <mika.fischer@...>
Date: Monday, April 28, 2008 - 6:05 pm

some BIOS like to use continus MTRR layout, and may X driver can not add
WB entries for graphical cards when 4g or more RAM installed.

the patch will change MTRR to discrete.

mtrr_chunk_size= could be used to have smaller continuous block to hold holes.
default is 256m, could be set according to size of graphics card memory.

v2: fix -1 for UC checking
v3: default to disable, and need use enable_mtrr_cleanup to enable this feature
    skip the var state change warning.
    remove next_basek in range_to_mtrr()
v4: correct warning mask.
v5: CONFIG_MTRR_SANITIZER

Signed-off-by: Yinghai Lu &lt;yhlu.kernel@gmail.com&gt;

Index: linux-2.6/arch/x86/kernel/cpu/mtrr/generic.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/generic.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/generic.c
@@ -158,6 +158,20 @@ get_mtrr_var_range(unsigned int index, s
 	rdmsr(MTRRphysMask_MSR(index), vr-&gt;mask_lo, vr-&gt;mask_hi);
 }
 
+/*  fill the MSR pair relating to a var range  */
+void fill_mtrr_var_range(unsigned int index,
+		u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi)
+{
+	struct mtrr_var_range *vr;
+
+	vr = mtrr_state.var_ranges;
+
+	vr[index].base_lo = base_lo;
+	vr[index].base_hi = base_hi;
+	vr[index].mask_lo = mask_lo;
+	vr[index].mask_hi = mask_hi;
+}
+
 static void
 get_fixed_ranges(mtrr_type * frs)
 {
Index: linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/main.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
@@ -37,6 +37,7 @@
 #include &lt;linux/smp.h&gt;
 #include &lt;linux/cpu.h&gt;
 #include &lt;linux/mutex.h&gt;
+#include &lt;linux/sort.h&gt;
 
 #include &lt;asm/e820.h&gt;
 #include &lt;asm/mtrr.h&gt;
@@ -609,6 +610,366 @@ static struct sysdev_driver mtrr_sysdev_
 	.resume		= mtrr_restore,
 };
 
+#ifdef CONFIG_MTRR_SANITIZER
+
+#ifdef CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT
+static int enab...
To: <yhlu.kernel@...>
Cc: Andrew Morton <akpm@...>, Ingo Molnar <mingo@...>, H. Peter Anvin <hpa@...>, Thomas Gleixner <tglx@...>, Gabriel C <nix.or.die@...>, linux-kernel@vger.kernel.org <linux-kernel@...>, Mika Fischer <mika.fischer@...>
Date: Tuesday, April 29, 2008 - 3:00 pm

Skimming through the code it looks fairly sane.

I do think it would be good to split this patch into two pieces.
1) The mtrr rewriter/sanitizer/normalize.
   All it does it should do is rewrite the MTRRs with a
   semantically equivalent value.  This code should always be
   safe and work on any system with MTRRs.

   This works around otherwise sane bios's that simply prefer
   to have contiguous MTRRs.

   I don't see a reason why this code should be configurable.

   This approach avoids earlier concerns because it starts
   with the existing MTRR layout and not with the e820 map.

2) The mtrr_chunk_size code that rounds things off and allows
   us to use discrete MTRRs by reducing some RAM to uncacheable.
   Because it makes things uncacheable it has potentially bad
   side effects on performance and thus potentially bad side
   effects on functionality.  For areas like the SMM and ACPI
   especially as they usually occur at the end of RAM just
   below 4G.

   The chunk size code should be configurable and default to off
   because it has potential side effects.  A KConfig option may
   also be appropriate.  It asks an interesting trade off question
   do you want your BIOS to be fast or X.


Eric
--
To: Eric W. Biederman <ebiederm@...>
Cc: Andrew Morton <akpm@...>, Ingo Molnar <mingo@...>, H. Peter Anvin <hpa@...>, Thomas Gleixner <tglx@...>, Gabriel C <nix.or.die@...>, linux-kernel@vger.kernel.org <linux-kernel@...>, Mika Fischer <mika.fischer@...>
Date: Tuesday, April 29, 2008 - 4:04 pm

On Tue, Apr 29, 2008 at 12:00 PM, Eric W. Biederman

(less memory + fast X) or  (more 8M RAM + slow...)

YH
--
To: Yinghai Lu <yhlu.kernel@...>
Cc: Andrew Morton <akpm@...>, Ingo Molnar <mingo@...>, H. Peter Anvin <hpa@...>, Thomas Gleixner <tglx@...>, Gabriel C <nix.or.die@...>, linux-kernel@vger.kernel.org <linux-kernel@...>, Mika Fischer <mika.fischer@...>
Date: Tuesday, April 29, 2008 - 4:29 pm

Yes. That is the basic question.  Not all X drivers need it and
potentially the current kernel drm modules can use the
PAT infrastructure that has been merged.

Further a SMM monitor running 100 times or more slower may cause
problems if SMM mode is entered frequently, slowing down the entire
system not just X.

So if you don't have X or you have a crazy SMM monitor this can
be an issue.

Eric
--
To: Eric W. Biederman <ebiederm@...>
Cc: Andrew Morton <akpm@...>, Ingo Molnar <mingo@...>, H. Peter Anvin <hpa@...>, Thomas Gleixner <tglx@...>, Gabriel C <nix.or.die@...>, linux-kernel@vger.kernel.org <linux-kernel@...>, Mika Fischer <mika.fischer@...>
Date: Tuesday, April 29, 2008 - 5:57 pm

On Tue, Apr 29, 2008 at 1:29 PM, Eric W. Biederman

agreed. so that feature is compiled in but disable by default.

BTW: is any chance for OS to disable SMI etc? to verify is the
unstatbility is caused by SMI?

YH
--
To: Yinghai Lu <yhlu.kernel@...>
Cc: Andrew Morton <akpm@...>, Ingo Molnar <mingo@...>, H. Peter Anvin <hpa@...>, Thomas Gleixner <tglx@...>, Gabriel C <nix.or.die@...>, linux-kernel@vger.kernel.org <linux-kernel@...>, Mika Fischer <mika.fischer@...>
Date: Tuesday, April 29, 2008 - 6:14 pm

Not in general no.  Frequently you can get at the registers that
will enable/disable an SMI but that is chipset specific.

Think of SMM mode is a lightweight hypervisor that we can't get rid
of, if you want to understand the worst case.

In theory SMM mode is completely unnecessary as soon as we enable
ACPI.  In practice ACPI appears to frequently trap into SMM mode.

Eric

--
To: Eric W. Biederman <ebiederm@...>
Cc: Yinghai Lu <yhlu.kernel@...>, Andrew Morton <akpm@...>, Ingo Molnar <mingo@...>, H. Peter Anvin <hpa@...>, Gabriel C <nix.or.die@...>, linux-kernel@vger.kernel.org <linux-kernel@...>, Mika Fischer <mika.fischer@...>
Date: Tuesday, April 29, 2008 - 6:54 pm

SMM does more than that. It emulates legacy hardware and fixes
chip(set) bugs as well. Disabling it just makes your box stop
working. There are certain types of systems where essential safety
nets rely on SMIs (you can deep-fry P4s by disabling SMIs).

Thanks,
	tglx
--
To: Thomas Gleixner <tglx@...>
Cc: Yinghai Lu <yhlu.kernel@...>, Andrew Morton <akpm@...>, Ingo Molnar <mingo@...>, H. Peter Anvin <hpa@...>, Gabriel C <nix.or.die@...>, linux-kernel@vger.kernel.org <linux-kernel@...>, Mika Fischer <mika.fischer@...>
Date: Tuesday, April 29, 2008 - 9:16 pm

There is truth in that but it is over dramatic.  P4s don't deep fry
they almost always turn off before they overheat (you make take
physical damage to your motherboard though).


The best definition I have heard of SMM mode is: smack the stupid OS
that isn't doing what it should be doing at runtime mode.

It is the way board designers and BIOS writers can work around what
they perceive as broken OS code, that keeps them from doing what
they need to do.  Getting them to give up SMM mode even though
technically possible is requesting they give up a degree of control
and thus a major social engineering challenge for anyone who wishes
to achieve it.


So any time we tread on territory that could mess up SMM mode
we need to be careful, especially as we can not turn it off to
diagnose problems.  The interactions can be hard to root cause.


Replacing overlapping MTRRs with a non overlapping set to allow
X to set a WB region as YH is doing appears safe and reasonable, and
worth doing. 

Going one step farther and reducing some of the WB memory to UC
so we can free up an MTRR for video and to accelerate X is a
bit chancy and something I don't feel comfortable with enabling
by default.  Especially as we have a better long term fix on
the way.

This problem is hitting enough people and the odds of something
really bad happening when you take a 100x or 1000x slowdown in 
SMM are pretty low so I do think it is useful to have a kernel
option that rounds down the amount of memory you have converts WB
memory to UC to accelerate X.

Hopefully by this point we are all now reminded how this can
interact with SMM mode (although no one has ever seen a bad
interaction) and how interacting with SMM mode can be a problem.

Eric
--
To: Eric W. Biederman <ebiederm@...>
Cc: Thomas Gleixner <tglx@...>, Yinghai Lu <yhlu.kernel@...>, Andrew Morton <akpm@...>, Ingo Molnar <mingo@...>, H. Peter Anvin <hpa@...>, Gabriel C <nix.or.die@...>, linux-kernel@vger.kernel.org <linux-kernel@...>, Mika Fischer <mika.fischer@...>
Date: Wednesday, April 30, 2008 - 5:57 am

Its also used for all sorts of ugly horrible hacks - like some boards
with broken latches on timers where the firmware uses SMM to emulate the
hardware and spins until the low byte flips before loading the time into
the RTC.

Thankfully on the newest cpus SMM mode is a bit more elegantly designed,
the original is a weird not quite real mode with extra bugs while nowdays
its more of a virtual machine
--
To: Yinghai Lu <yhlu.kernel@...>
Cc: Eric W. Biederman <ebiederm@...>, Andrew Morton <akpm@...>, H. Peter Anvin <hpa@...>, Thomas Gleixner <tglx@...>, Gabriel C <nix.or.die@...>, linux-kernel@vger.kernel.org <linux-kernel@...>, Mika Fischer <mika.fischer@...>
Date: Tuesday, April 29, 2008 - 6:09 pm

i dont think there's any documented way for that. SMI might be the 
mechanism that ensures blue-smoke type of system reliability (CPU fan, 
temperature, etc.) so it would be extremely dangerous to mess with it.

	Ingo
--
To: Ingo Molnar <mingo@...>
Cc: Eric W. Biederman <ebiederm@...>, Andrew Morton <akpm@...>, H. Peter Anvin <hpa@...>, Thomas Gleixner <tglx@...>, Gabriel C <nix.or.die@...>, linux-kernel@vger.kernel.org <linux-kernel@...>, Mika Fischer <mika.fischer@...>
Date: Tuesday, April 29, 2008 - 6:18 pm

then that is bad and sick HW design.

for example. cpu fan is supposed to full speed, if SW send insane
instruction and lose connection.
also, CPU should shutdown by thermal strip is FAN is stopped.

when we were working on LinuxBIOS, found one MB cpu fan need to be
started by BIOS, and another one is auto full speed if BIOS don't
touch it. We always like the second design.

YH
--
To: Andrew Morton <akpm@...>, Ingo Molnar <mingo@...>, H. Peter Anvin <hpa@...>, Thomas Gleixner <tglx@...>, Gabriel C <nix.or.die@...>, Mika Fischer <mika.fischer@...>
Cc: linux-kernel@vger.kernel.org <linux-kernel@...>
Date: Tuesday, April 29, 2008 - 5:00 am

some BIOS like to use continus MTRR layout, and may X driver can not add
WB entries for graphical cards when 4g or more RAM installed.

the patch will change MTRR to discrete.

mtrr_chunk_size= could be used to have smaller continuous block to hold holes.
default is 256m, could be set according to size of graphics card memory.

v2: fix -1 for UC checking
v3: default to disable, and need use enable_mtrr_cleanup to enable this feature
    skip the var state change warning.
    remove next_basek in range_to_mtrr()
v4: correct warning mask.
v5: CONFIG_MTRR_SANITIZER
v6: fix 1g, 2g, 512 aligment with extra hole
v7: gran_sizek to prevent running out of MTRRs.

Signed-off-by: Yinghai Lu &lt;yhlu.kernel@gmail.com&gt;

Index: linux-2.6/arch/x86/kernel/cpu/mtrr/generic.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/generic.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/generic.c
@@ -37,7 +37,7 @@ static struct fixed_range_block fixed_ra
 static unsigned long smp_changes_mask;
 static struct mtrr_state mtrr_state = {};
 static int mtrr_state_set;
-static u64 tom2;
+u64 mtrr_tom2;
 
 #undef MODULE_PARAM_PREFIX
 #define MODULE_PARAM_PREFIX "mtrr."
@@ -139,8 +139,8 @@ u8 mtrr_type_lookup(u64 start, u64 end)
 		}
 	}
 
-	if (tom2) {
-		if (start &gt;= (1ULL&lt;&lt;32) &amp;&amp; (end &lt; tom2))
+	if (mtrr_tom2) {
+		if (start &gt;= (1ULL&lt;&lt;32) &amp;&amp; (end &lt; mtrr_tom2))
 			return MTRR_TYPE_WRBACK;
 	}
 
@@ -158,6 +158,20 @@ get_mtrr_var_range(unsigned int index, s
 	rdmsr(MTRRphysMask_MSR(index), vr-&gt;mask_lo, vr-&gt;mask_hi);
 }
 
+/*  fill the MSR pair relating to a var range  */
+void fill_mtrr_var_range(unsigned int index,
+		u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi)
+{
+	struct mtrr_var_range *vr;
+
+	vr = mtrr_state.var_ranges;
+
+	vr[index].base_lo = base_lo;
+	vr[index].base_hi = base_hi;
+	vr[index].mask_lo = mask_lo;
+	vr[index].mask_hi = mask_hi;
+}
+
 static void
 get_...
To: Andrew Morton <akpm@...>, Ingo Molnar <mingo@...>, H. Peter Anvin <hpa@...>, Thomas Gleixner <tglx@...>, Gabriel C <nix.or.die@...>, Mika Fischer <mika.fischer@...>
Cc: linux-kernel@vger.kernel.org <linux-kernel@...>
Date: Tuesday, April 29, 2008 - 6:52 am

some BIOS like to use continus MTRR layout, and X driver can not add
WB entries for graphical cards when 4g or more RAM installed.

the patch will change MTRR to discrete.

mtrr_chunk_size= could be used to have smaller continuous block to hold holes.
default is 256m, could be set according to size of graphics card memory.

mtrr_gran_size= could be used to send smallest mtrr block to avoid run out of MTRRs

v2: fix -1 for UC checking
v3: default to disable, and need use enable_mtrr_cleanup to enable this feature
    skip the var state change warning.
    remove next_basek in range_to_mtrr()
v4: correct warning mask.
v5: CONFIG_MTRR_SANITIZER
v6: fix 1g, 2g, 512 aligment with extra hole
v7: gran_sizek to prevent running out of MTRRs.
v8: fix hole_basek caculation caused when removing next_basek
    gran_sizek using when basek is 0.

need to apply
	[PATCH] x86: fix trimming e820 with MTRR holes.
right after this one.

Signed-off-by: Yinghai Lu &lt;yhlu.kernel@gmail.com&gt;

Index: linux-2.6/arch/x86/kernel/cpu/mtrr/generic.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/generic.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/generic.c
@@ -37,7 +37,7 @@ static struct fixed_range_block fixed_ra
 static unsigned long smp_changes_mask;
 static struct mtrr_state mtrr_state = {};
 static int mtrr_state_set;
-static u64 tom2;
+u64 mtrr_tom2;
 
 #undef MODULE_PARAM_PREFIX
 #define MODULE_PARAM_PREFIX "mtrr."
@@ -139,8 +139,8 @@ u8 mtrr_type_lookup(u64 start, u64 end)
 		}
 	}
 
-	if (tom2) {
-		if (start &gt;= (1ULL&lt;&lt;32) &amp;&amp; (end &lt; tom2))
+	if (mtrr_tom2) {
+		if (start &gt;= (1ULL&lt;&lt;32) &amp;&amp; (end &lt; mtrr_tom2))
 			return MTRR_TYPE_WRBACK;
 	}
 
@@ -158,6 +158,20 @@ get_mtrr_var_range(unsigned int index, s
 	rdmsr(MTRRphysMask_MSR(index), vr-&gt;mask_lo, vr-&gt;mask_hi);
 }
 
+/*  fill the MSR pair relating to a var range  */
+void fill_mtrr_var_range(unsigned int index,
+		u32...
To: Andrew Morton <akpm@...>, Ingo Molnar <mingo@...>, H. Peter Anvin <hpa@...>, Thomas Gleixner <tglx@...>, Gabriel C <nix.or.die@...>, Mika Fischer <mika.fischer@...>
Cc: linux-kernel@vger.kernel.org <linux-kernel@...>
Date: Tuesday, April 29, 2008 - 11:25 pm

v9: address format change requests by Ingo
    more case handling in range_to_var_with_hole

Signed-off-by: Yinghai Lu &lt;yhlu.kernel@gmail.com&gt;

Index: linux-2.6/arch/x86/Kconfig
===================================================================
--- linux-2.6.orig/arch/x86/Kconfig
+++ linux-2.6/arch/x86/Kconfig
@@ -1092,13 +1092,12 @@ config MTRR_SANITIZER
 	  If unsure, say Y.
 
 config MTRR_SANITIZER_ENABLE_DEFAULT
-	def_bool y
-	prompt "Enable MTRR cleanup by default"
+	int "MTRR cleanup enable value (0-1)"
+	range 0 1
+	default "0"
 	depends on MTRR_SANITIZER
 	help
-	  Enable mtrr cleanup by default
-
-	  If unsure, say Y.
+	  Enable mtrr cleanup default value
 
 config X86_PAT
 	bool
Index: linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/main.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
@@ -611,17 +611,9 @@ static struct sysdev_driver mtrr_sysdev_
 };
 
 #ifdef CONFIG_MTRR_SANITIZER
-
-#ifdef CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT
-static int enable_mtrr_cleanup __initdata = 1;
-#else
-static int enable_mtrr_cleanup __initdata;
-#endif
-
+static int enable_mtrr_cleanup __initdata = CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT;
 #else
-
 static int enable_mtrr_cleanup __initdata = -1;
-
 #endif
 
 static int __init disable_mtrr_cleanup_setup(char *str)
@@ -640,6 +632,7 @@ static int __init enable_mtrr_cleanup_se
 }
 early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup);
 
+/* should be related to MTRR_VAR_RANGES nums */
 #define RANGE_NUM 256
 
 struct res_range {
@@ -647,13 +640,27 @@ struct res_range {
 	unsigned long end;
 };
 
-static int __init add_range(struct res_range *range, int nr_range, unsigned long start,
-			      unsigned long end, int merge)
+static int __init
+add_range(struct res_range *range, int nr_range, unsigned long start,
+			      unsigned long end)
 {
-	int i;
+	/* out of slots */
+	if (nr_range &gt;= ...
To: Andrew Morton <akpm@...>, Ingo Molnar <mingo@...>, H. Peter Anvin <hpa@...>, Thomas Gleixner <tglx@...>, Gabriel C <nix.or.die@...>, Mika Fischer <mika.fischer@...>
Cc: linux-kernel@vger.kernel.org <linux-kernel@...>
Date: Thursday, May 1, 2008 - 4:00 am

loop mtrr chunk_size and gran_size from 1M to 2G to find out optimal value.

so user don't need to add mtrr_chunk_size and mtrr_gran_size, 

if optimal value is not found, print out all list to help select less optimal
value.

add mtrr_spare_reg_nr= so user could set 2 instead of 1, if the card need more entries.

Signed-off-by: Yinghai Lu &lt;yhlu.kernel@gmail.com&gt;

Index: linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/main.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
@@ -610,28 +610,6 @@ static struct sysdev_driver mtrr_sysdev_
 	.resume		= mtrr_restore,
 };
 
-#ifdef CONFIG_MTRR_SANITIZER
-static int enable_mtrr_cleanup __initdata = CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT;
-#else
-static int enable_mtrr_cleanup __initdata = -1;
-#endif
-
-static int __init disable_mtrr_cleanup_setup(char *str)
-{
-	if (enable_mtrr_cleanup != -1)
-		enable_mtrr_cleanup = 0;
-	return 0;
-}
-early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
-
-static int __init enable_mtrr_cleanup_setup(char *str)
-{
-	if (enable_mtrr_cleanup != -1)
-		enable_mtrr_cleanup = 1;
-	return 0;
-}
-early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup);
-
 /* should be related to MTRR_VAR_RANGES nums */
 #define RANGE_NUM 256
 
@@ -702,13 +680,15 @@ subtract_range(struct res_range *range, 
 			continue;
 		}
 
-		if (start &lt;= range[j].start &amp;&amp; end &lt; range[j].end &amp;&amp; range[j].start &lt; end + 1) {
+		if (start &lt;= range[j].start &amp;&amp; end &lt; range[j].end &amp;&amp;
+		    range[j].start &lt; end + 1) {
 			range[j].start = end + 1;
 			continue;
 		}
 
 
-		if (start &gt; range[j].start &amp;&amp; end &gt;= range[j].end &amp;&amp; range[j].end &gt; start - 1) {
+		if (start &gt; range[j].start &amp;&amp; end &gt;= range[j].end &amp;&amp;
+		    range[j].end &gt; start - 1) {
 			range[j].end = start - 1;
 			continue;
 		}
@@ -743,1...
To: Andrew Morton <akpm@...>, Ingo Molnar <mingo@...>, H. Peter Anvin <hpa@...>, Thomas Gleixner <tglx@...>, Gabriel C <nix.or.die@...>, Mika Fischer <mika.fischer@...>
Cc: linux-kernel@vger.kernel.org <linux-kernel@...>
Date: Thursday, May 1, 2008 - 2:57 pm

loop mtrr chunk_size and gran_size from 1M to 2G to find out optimal value.

so user don't need to add mtrr_chunk_size and mtrr_gran_size

if optimal value is not found, print out all list to help select less optimal
value.

add mtrr_spare_reg_nr= so user could set 2 instead of 1, if the card need more entries.

v2: find the one with more spare entries
      if the specify mtrr_chunk_size and mtrr_gran_size if not good, will try to find one 

Signed-off-by: Yinghai Lu &lt;yhlu.kernel@gmail.com&gt;

Index: linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/main.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
@@ -610,28 +610,6 @@ static struct sysdev_driver mtrr_sysdev_
 	.resume		= mtrr_restore,
 };
 
-#ifdef CONFIG_MTRR_SANITIZER
-static int enable_mtrr_cleanup __initdata = CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT;
-#else
-static int enable_mtrr_cleanup __initdata = -1;
-#endif
-
-static int __init disable_mtrr_cleanup_setup(char *str)
-{
-	if (enable_mtrr_cleanup != -1)
-		enable_mtrr_cleanup = 0;
-	return 0;
-}
-early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
-
-static int __init enable_mtrr_cleanup_setup(char *str)
-{
-	if (enable_mtrr_cleanup != -1)
-		enable_mtrr_cleanup = 1;
-	return 0;
-}
-early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup);
-
 /* should be related to MTRR_VAR_RANGES nums */
 #define RANGE_NUM 256
 
@@ -702,13 +680,15 @@ subtract_range(struct res_range *range, 
 			continue;
 		}
 
-		if (start &lt;= range[j].start &amp;&amp; end &lt; range[j].end &amp;&amp; range[j].start &lt; end + 1) {
+		if (start &lt;= range[j].start &amp;&amp; end &lt; range[j].end &amp;&amp;
+		    range[j].start &lt; end + 1) {
 			range[j].start = end + 1;
 			continue;
 		}
 
 
-		if (start &gt; range[j].start &amp;&amp; end &gt;= range[j].end &amp;&amp; range[j].end &gt; start - 1) {
+		if (start &gt; range[j].start &amp;&amp; e...
To: Andrew Morton <akpm@...>, Ingo Molnar <mingo@...>, H. Peter Anvin <hpa@...>, Thomas Gleixner <tglx@...>, Gabriel C <nix.or.die@...>, Mika Fischer <mika.fischer@...>
Cc: linux-kernel@vger.kernel.org <linux-kernel@...>
Date: Thursday, May 1, 2008 - 8:52 pm

loop mtrr chunk_size and gran_size from 1M to 2G to find out optimal value.

so user don't need to add mtrr_chunk_size and mtrr_gran_size

if optimal value is not found, print out all list to help select less optimal
value.

add mtrr_spare_reg_nr= so user could set 2 instead of 1, if the card need more entries.

v2: find the one with more spare entries
v3: fix hole_basek offset

Signed-off-by: Yinghai Lu &lt;yhlu.kernel@gmail.com&gt;

Index: linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/main.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
@@ -610,28 +610,6 @@ static struct sysdev_driver mtrr_sysdev_
 	.resume		= mtrr_restore,
 };
 
-#ifdef CONFIG_MTRR_SANITIZER
-static int enable_mtrr_cleanup __initdata = CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT;
-#else
-static int enable_mtrr_cleanup __initdata = -1;
-#endif
-
-static int __init disable_mtrr_cleanup_setup(char *str)
-{
-	if (enable_mtrr_cleanup != -1)
-		enable_mtrr_cleanup = 0;
-	return 0;
-}
-early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
-
-static int __init enable_mtrr_cleanup_setup(char *str)
-{
-	if (enable_mtrr_cleanup != -1)
-		enable_mtrr_cleanup = 1;
-	return 0;
-}
-early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup);
-
 /* should be related to MTRR_VAR_RANGES nums */
 #define RANGE_NUM 256
 
@@ -702,13 +680,15 @@ subtract_range(struct res_range *range, 
 			continue;
 		}
 
-		if (start &lt;= range[j].start &amp;&amp; end &lt; range[j].end &amp;&amp; range[j].start &lt; end + 1) {
+		if (start &lt;= range[j].start &amp;&amp; end &lt; range[j].end &amp;&amp;
+		    range[j].start &lt; end + 1) {
 			range[j].start = end + 1;
 			continue;
 		}
 
 
-		if (start &gt; range[j].start &amp;&amp; end &gt;= range[j].end &amp;&amp; range[j].end &gt; start - 1) {
+		if (start &gt; range[j].start &amp;&amp; end &gt;= range[j].end &amp;&amp;
+		    range[j].end &gt; start ...
To: Andrew Morton <akpm@...>, Ingo Molnar <mingo@...>, H. Peter Anvin <hpa@...>, Thomas Gleixner <tglx@...>, Gabriel C <nix.or.die@...>, Mika Fischer <mika.fischer@...>
Cc: linux-kernel@vger.kernel.org <linux-kernel@...>
Date: Friday, May 2, 2008 - 5:40 am

loop mtrr chunk_size and gran_size from 1M to 2G to find out optimal value.

so user don't need to add mtrr_chunk_size and mtrr_gran_size

if optimal value is not found, print out all list to help select less optimal
value.

add mtrr_spare_reg_nr= so user could set 2 instead of 1, if the card need more entries.

v2: find the one with more spare entries
v3: fix hole_basek offset
v4: tight the compare between range and range_new
    loop stop with 4g

Signed-off-by: Yinghai Lu &lt;yhlu.kernel@gmail.com&gt;

Index: linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/main.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
@@ -610,28 +610,6 @@ static struct sysdev_driver mtrr_sysdev_
 	.resume		= mtrr_restore,
 };
 
-#ifdef CONFIG_MTRR_SANITIZER
-static int enable_mtrr_cleanup __initdata = CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT;
-#else
-static int enable_mtrr_cleanup __initdata = -1;
-#endif
-
-static int __init disable_mtrr_cleanup_setup(char *str)
-{
-	if (enable_mtrr_cleanup != -1)
-		enable_mtrr_cleanup = 0;
-	return 0;
-}
-early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
-
-static int __init enable_mtrr_cleanup_setup(char *str)
-{
-	if (enable_mtrr_cleanup != -1)
-		enable_mtrr_cleanup = 1;
-	return 0;
-}
-early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup);
-
 /* should be related to MTRR_VAR_RANGES nums */
 #define RANGE_NUM 256
 
@@ -702,13 +680,15 @@ subtract_range(struct res_range *range, 
 			continue;
 		}
 
-		if (start &lt;= range[j].start &amp;&amp; end &lt; range[j].end &amp;&amp; range[j].start &lt; end + 1) {
+		if (start &lt;= range[j].start &amp;&amp; end &lt; range[j].end &amp;&amp;
+		    range[j].start &lt; end + 1) {
 			range[j].start = end + 1;
 			continue;
 		}
 
 
-		if (start &gt; range[j].start &amp;&amp; end &gt;= range[j].end &amp;&amp; range[j].end &gt; start - 1) {
+		if (start &gt; range[j].start &am...
To: <yhlu.kernel@...>
Cc: Andrew Morton <akpm@...>, Ingo Molnar <mingo@...>, Thomas Gleixner <tglx@...>, Gabriel C <nix.or.die@...>, Mika Fischer <mika.fischer@...>, linux-kernel@vger.kernel.org <linux-kernel@...>
Date: Thursday, May 1, 2008 - 3:42 pm

Why stopping at 2 GB?

--
To: H. Peter Anvin <hpa@...>
Cc: Andrew Morton <akpm@...>, Ingo Molnar <mingo@...>, Thomas Gleixner <tglx@...>, Gabriel C <nix.or.die@...>, Mika Fischer <mika.fischer@...>, linux-kernel@vger.kernel.org <linux-kernel@...>
Date: Thursday, May 1, 2008 - 5:02 pm

if you select 4g for chunk size, we don't need to convert that from
continuous to discrete to make X server driver happen.

actually the code could support any chunk_size...

for example: 16 g system
orginal:
0-4g WB
3.5g-4g UC
4g-8g WB
8g-16g WB
16g-16.5g WB

if you set chunk size to 16g, and gran size &lt;= 512M
you will get
0-16g WB
3.5g-4g UC
16g-16.5g WB

YH
--
To: Yinghai Lu <yhlu.kernel@...>
Cc: Andrew Morton <akpm@...>, Ingo Molnar <mingo@...>, Thomas Gleixner <tglx@...>, Gabriel C <nix.or.die@...>, Mika Fischer <mika.fischer@...>, linux-kernel@vger.kernel.org <linux-kernel@...>
Date: Thursday, May 1, 2008 - 5:10 pm

Yes, 16 GB systems are already mainstream; 32 GB is common, so I don't 
see any reason to stop at 2 GB.  Instead, it should loop up to the 
physical address size.

	-hpa
--
To: H. Peter Anvin <hpa@...>
Cc: Andrew Morton <akpm@...>, Ingo Molnar <mingo@...>, Thomas Gleixner <tglx@...>, Gabriel C <nix.or.die@...>, Mika Fischer <mika.fischer@...>, linux-kernel@vger.kernel.org <linux-kernel@...>
Date: Thursday, May 1, 2008 - 5:20 pm

but our objective is that has 0xd0000000-0xe0000000 (WC) not to be
overlapping with other MTRR entries (UC)..

YH
--
To: Yinghai Lu <yhlu.kernel@...>
Cc: Andrew Morton <akpm@...>, Ingo Molnar <mingo@...>, Thomas Gleixner <tglx@...>, Gabriel C <nix.or.die@...>, Mika Fischer <mika.fischer@...>, linux-kernel@vger.kernel.org <linux-kernel@...>
Date: Thursday, May 1, 2008 - 5:26 pm

So, pray tell, how comes this algorithm can come up with a non-solution 
to the problem presented to it?

Overall, I'm feeling there is something really completely wrong if this 
needs manual tunables of any sort.

	-hpa
--
To: H. Peter Anvin <hpa@...>
Cc: Andrew Morton <akpm@...>, Ingo Molnar <mingo@...>, Thomas Gleixner <tglx@...>, Gabriel C <nix.or.die@...>, Mika Fischer <mika.fischer@...>, linux-kernel@vger.kernel.org <linux-kernel@...>
Date: Thursday, May 1, 2008 - 5:31 pm

the problem is BIOS set MTRR like BIG WB - SOME SAMLL UC to spare mtrr regs.

but later X server driver want to set some range to WC. that already
is fallen in UC...

YH
--
To: Yinghai Lu <yhlu.kernel@...>
Cc: Andrew Morton <akpm@...>, Ingo Molnar <mingo@...>, Thomas Gleixner <tglx@...>, Gabriel C <nix.or.die@...>, Mika Fischer <mika.fischer@...>, linux-kernel@vger.kernel.org <linux-kernel@...>
Date: Thursday, May 1, 2008 - 5:33 pm

That's not the point.  I understand you want to flatten the layout.  The 
point is: why do you need manual tunables for the algorithm to do the 
right thing?

	-hpa
--
To: H. Peter Anvin <hpa@...>
Cc: Andrew Morton <akpm@...>, Ingo Molnar <mingo@...>, Thomas Gleixner <tglx@...>, Gabriel C <nix.or.die@...>, Mika Fischer <mika.fischer@...>, linux-kernel@vger.kernel.org <linux-kernel@...>
Date: Thursday, May 1, 2008 - 5:44 pm

optimal result is not losing covering for ranges that is originally
covered, and still keep as many of spare mtrr entries for X server
driver.
we only have 8 mtrrs, could lose some covering because of run out of mtrr regs.
So we need to search it according to chunk/gran with ram ranges that
is defined by old mtrr layout.

and if we can not find the optimal setting, user could select one
setting (chunk/gran size) to boot next time, but he will lose some
covering.
for some regions. later trim_mtrr will remove those range from e820 map

YH
--
To: Yinghai Lu <yhlu.kernel@...>
Cc: Andrew Morton <akpm@...>, Ingo Molnar <mingo@...>, Thomas Gleixner <tglx@...>, Gabriel C <nix.or.die@...>, Mika Fischer <mika.fischer@...>, linux-kernel@vger.kernel.org <linux-kernel@...>
Date: Thursday, May 1, 2008 - 5:49 pm

Yes.  You have a search space of less than 1000 possible combinations 
(64..20 bits), so it hardly is any reason to not search the entire 
universe of possibilities, even if by exhaustive search.

Now, if even that searching can't come up with the optimal solution (if 

Right, now we're talking policy, which obviously has to be entered by 
the user.

	-hpa

--
To: H. Peter Anvin <hpa@...>
Cc: Andrew Morton <akpm@...>, Ingo Molnar <mingo@...>, Thomas Gleixner <tglx@...>, Gabriel C <nix.or.die@...>, Mika Fischer <mika.fischer@...>, linux-kernel@vger.kernel.org <linux-kernel@...>
Date: Thursday, May 1, 2008 - 6:52 pm

only search 78
2g, 1g, ...1m, and half matrix 13 * 6..

and don't need to search than 78.

also if we don't need to get the more spare regs than

because we only have 8 mtrrs, and user may specify mtrr_spare_reg_nr =
2 or 3 to get more entries for the graphics cards...
then can not the optimal setting without losing any covering.

so if the optimal is there (only need to search to 2g), it will catch it.

YH
--
To: Yinghai Lu <yhlu.kernel@...>
Cc: Andrew Morton <akpm@...>, Ingo Molnar <mingo@...>, Thomas Gleixner <tglx@...>, Gabriel C <nix.or.die@...>, Mika Fischer <mika.fischer@...>, linux-kernel@vger.kernel.org <linux-kernel@...>
Date: Thursday, May 1, 2008 - 6:57 pm

Again, it's not clear to me why there is an inherent limit at 2 GB.

	-hpa
--
To: H. Peter Anvin <hpa@...>
Cc: Andrew Morton <akpm@...>, Ingo Molnar <mingo@...>, Thomas Gleixner <tglx@...>, Gabriel C <nix.or.die@...>, Mika Fischer <mika.fischer@...>, linux-kernel@vger.kernel.org <linux-kernel@...>
Date: Thursday, May 1, 2008 - 7:10 pm

above 2g, we will use 4g (chunk size, because if use 4g gran_size will
trim too much RAM). it will get the continous layout like
0-4g WB
3.5g-4G UC

instead of discrete that we want. (chunk_size=2g, gran_size=512M)
0-2g WB
2g-4g WB
3.5g-4g UC

YH
--
To: <yhlu.kernel@...>
Cc: Yinghai Lu <yhlu.kernel.send@...>, Andrew Morton <akpm@...>, Ingo Molnar <mingo@...>, H. Peter Anvin <hpa@...>, Thomas Gleixner <tglx@...>, Gabriel C <nix.or.die@...>, Mika Fischer <mika.fischer@...>, linux-kernel@vger.kernel.org <linux-kernel@...>
Date: Thursday, May 1, 2008 - 11:09 am

Large value could prevent small alignment from




---
~Randy
--
To: Randy Dunlap <randy.dunlap@...>, Ingo Molnar <mingo@...>
Cc: Yinghai Lu <yhlu.kernel.send@...>, Andrew Morton <akpm@...>, H. Peter Anvin <hpa@...>, Thomas Gleixner <tglx@...>, Gabriel C <nix.or.die@...>, Mika Fischer <mika.fischer@...>, linux-kernel@vger.kernel.org <linux-kernel@...>
Date: Thursday, May 1, 2008 - 12:38 pm

Ingo, can you change that directly in the patch?
or need me send another updated patch?

YH
--
To: <yhlu.kernel@...>
Cc: Andrew Morton <akpm@...>, Ingo Molnar <mingo@...>, H. Peter Anvin <hpa@...>, Thomas Gleixner <tglx@...>, Gabriel C <nix.or.die@...>, linux-kernel@vger.kernel.org <linux-kernel@...>
Date: Thursday, May 1, 2008 - 8:09 am

On my system x86-latest + this patch and using no boot options gives me
this /proc/mtrr:
reg00: base=0x00000000 (   0MB), size=2048MB: write-back, count=1
reg01: base=0x80000000 (2048MB), size= 512MB: write-back, count=1
reg02: base=0xa0000000 (2560MB), size= 256MB: write-back, count=1
reg03: base=0xb0000000 (2816MB), size= 256MB: write-back, count=1
reg04: base=0xbf700000 (3063MB), size=   1MB: uncachable, count=1
reg05: base=0xbf800000 (3064MB), size=   8MB: uncachable, count=1
reg06: base=0x100000000 (4096MB), size=1024MB: write-back, count=1

Which is OK. It could probably collapse reg01-reg03 into one but that's
a minor issue (for me at least, there are probably cases where
collapsing them might save the user from having to specify the
mtrr_spare_reg_nr boot option).

In any case it works fine here. dmesg is attached.

Let me know if there's anything else I should test!

And thanks a lot for all your work, Yinghai! :)

Regards,
 Mika
To: Mika Fischer <mika.fischer@...>
Cc: Andrew Morton <akpm@...>, Ingo Molnar <mingo@...>, H. Peter Anvin <hpa@...>, Thomas Gleixner <tglx@...>, Gabriel C <nix.or.die@...>, linux-kernel@vger.kernel.org <linux-kernel@...>
Date: Thursday, May 1, 2008 - 12:35 pm

yes. please try mtrr_spare_reg_nr=3 or etc.

YH
--
To: Yinghai Lu <yhlu.kernel@...>
Cc: Andrew Morton <akpm@...>, Ingo Molnar <mingo@...>, H. Peter Anvin <hpa@...>, Thomas Gleixner <tglx@...>, Gabriel C <nix.or.die@...>, linux-kernel@vger.kernel.org <linux-kernel@...>
Date: Thursday, May 1, 2008 - 12:59 pm

Sure this works. But that was my point exactly. It should be possible to
figure out the better configuration automatically so that I *don't* have
to specify mtrr_spare_reg_nr=3.

Or in other words: If there are multiple equivalent configurations that
don't lose any RAM(!), the one with the most free MTRR regs should be
preferred.

AFAICT you loop over the chunk size and stop when you have found a
configuration that leaves the number of free MTRR registers requested
(default 1).

This will almost always result in a configuration where you have
*exactly* the number of requested free regs available, even if a more
efficient configuration was possible.

What I'm suggesting is, that - in the case where no RAM is lost at this
point - the loop should continue to try and free up more registers, as
long as no RAM is lost.

I.e. even if in my case chunk_size=256M gives adequate results and
leaves me with 1 free reg, since I don't lose any RAM at this point the
loop should continue as long as I do not lose any RAM. That way it would
find the ideal chunk_size (1g) automatically.

But again, this is non-critical. But I think it might help a few users
who need more than 1 free reg, because they probably will have no idea
about the kernel option...

Regards,
 Mika
--
To: Mika Fischer <mika.fischer@...>
Cc: Andrew Morton <akpm@...>, Ingo Molnar <mingo@...>, H. Peter Anvin <hpa@...>, Thomas Gleixner <tglx@...>, Gabriel C <nix.or.die@...>, linux-kernel@vger.kernel.org <linux-kernel@...>
Date: Thursday, May 1, 2008 - 1:40 pm

OK, will send another version out.

YH
--
To: <yhlu.kernel@...>
Cc: Andrew Morton <akpm@...>, Ingo Molnar <mingo@...>, H. Peter Anvin <hpa@...>, Thomas Gleixner <tglx@...>, Mika Fischer <mika.fischer@...>, linux-kernel@vger.kernel.org <linux-kernel@...>
Date: Thursday, May 1, 2008 - 7:45 am

WOW :)

With this patch all is working fine , no RAM is lost , X is fast , 
so far everything else seems to work fine. \o/

I will test on 32bit tomorrow and stress the box later on today to be sure everything works fine.

There is my dmesg , meminfo , mtrr output with this patch on top x86-latest :

http://frugalware.org/~crazy/mtrr_x86-latest/


--
To: Gabriel C <nix.or.die@...>
Cc: Andrew Morton <akpm@...>, Ingo Molnar <mingo@...>, H. Peter Anvin <hpa@...>, Thomas Gleixner <tglx@...>, Mika Fischer <mika.fischer@...>, linux-kernel@vger.kernel.org <linux-kernel@...>
Date: Thursday, May 1, 2008 - 8:06 pm

while look at that you boot log, it seems there is one bug about hole
position. but I look that code, it should already be handled.

Can you send out boot msg and /proc/mtrr when using
disable_mtrr_cleanup command line?

Thanks

Yinghai Lu
--
To: Yinghai Lu <yhlu.kernel@...>
Cc: Andrew Morton <akpm@...>, Ingo Molnar <mingo@...>, H. Peter Anvin <hpa@...>, Thomas Gleixner <tglx@...>, Mika Fischer <mika.fischer@...>, linux-kernel@vger.kernel.org <linux-kernel@...>
Date: Thursday, May 1, 2008 - 8:29 pm

Sure , there it is :

http://frugalware.org/~crazy/mtrr_x86-latest/dmesg2
http://frugalware.org/~crazy/mtrr_x86-latest/proc_mtrr2

I'm still using this version of your patch , didn't got any time to update to v2.

Gabriel
--
To: Gabriel C <nix.or.die@...>
Cc: Andrew Morton <akpm@...>, Ingo Molnar <mingo@...>, H. Peter Anvin <hpa@...>, Thomas Gleixner <tglx@...>, Mika Fischer <mika.fischer@...>, linux-kernel@vger.kernel.org <linux-kernel@...>
Date: Thursday, May 1, 2008 - 8:35 pm

original

reg00: base=0xd0000000 (3328MB), size= 256MB: uncachable, count=1
reg01: base=0xe0000000 (3584MB), size= 512MB: uncachable, count=1
reg02: base=0x00000000 (   0MB), size=4096MB: write-back, count=1
reg03: base=0x100000000 (4096MB), size= 512MB: write-back, count=1
reg04: base=0x120000000 (4608MB), size= 128MB: write-back, count=1
reg05: base=0x128000000 (4736MB), size=  64MB: write-back, count=1
reg06: base=0xcf600000 (3318MB), size=   2MB: uncachable, count=1

after clean up

reg00: base=0x00000000 (   0MB), size=2048MB: write-back, count=1
reg01: base=0x80000000 (2048MB), size=1024MB: write-back, count=1
reg02: base=0xc0000000 (3072MB), size= 256MB: write-back, count=1
reg03: base=0xcfe00000 (3326MB), size=   2MB: uncachable, count=1
reg04: base=0x100000000 (4096MB), size= 512MB: write-back, count=1
reg05: base=0x120000000 (4608MB), size= 256MB: write-back, count=1
reg06: base=0x12c000000 (4800MB), size=  64MB: uncachable, count=1

so the hole base is not right, it should be at 3318MB instead of 3326MB.
please hold to test v3 ...

Thanks

Yinghai Lu
--
To: Yinghai Lu <yhlu.kernel@...>
Cc: Andrew Morton <akpm@...>, Ingo Molnar <mingo@...>, H. Peter Anvin <hpa@...>, Thomas Gleixner <tglx@...>, Mika Fischer <mika.fischer@...>, linux-kernel@vger.kernel.org <linux-kernel@...>
Date: Thursday, May 1, 2008 - 9:18 pm

All is still fine here after an quick test ( BTW that version is really chatty :P )



Gabriel
--
To: Gabriel C <nix.or.die@...>
Cc: Andrew Morton <akpm@...>, Ingo Molnar <mingo@...>, H. Peter Anvin <hpa@...>, Thomas Gleixner <tglx@...>, Mika Fischer <mika.fischer@...>, linux-kernel@vger.kernel.org <linux-kernel@...>
Date: Thursday, May 1, 2008 - 9:55 pm

thanks. yeah, but can you remove debug in command line to see if it
still talks too much.

YH
--
To: <yhlu.kernel@...>
Cc: Andrew Morton <akpm@...>, H. Peter Anvin <hpa@...>, Thomas Gleixner <tglx@...>, Gabriel C <nix.or.die@...>, Mika Fischer <mika.fischer@...>, linux-kernel@vger.kernel.org <linux-kernel@...>
Date: Wednesday, April 30, 2008 - 8:09 am

thanks, applied.

	Ingo
--
To: <yhlu.kernel@...>
Cc: Yinghai Lu <yhlu.kernel.send@...>, Andrew Morton <akpm@...>, Ingo Molnar <mingo@...>, H. Peter Anvin <hpa@...>, Thomas Gleixner <tglx@...>, Gabriel C <nix.or.die@...>, Mika Fischer <mika.fischer@...>, linux-kernel@vger.kernel.org <linux-kernel@...>
Date: Tuesday, April 29, 2008 - 4:46 pm

s/granity/granularity/
I think that's what you mean/want.


---
~Randy
--
To: Randy Dunlap <randy.dunlap@...>
Cc: Yinghai Lu <yhlu.kernel.send@...>, Andrew Morton <akpm@...>, Ingo Molnar <mingo@...>, H. Peter Anvin <hpa@...>, Thomas Gleixner <tglx@...>, Gabriel C <nix.or.die@...>, Mika Fischer <mika.fischer@...>, linux-kernel@vger.kernel.org <linux-kernel@...>
Date: Tuesday, April 29, 2008 - 5:54 pm

considering to auto test to find optimal value for mtrr_chunk_size and
mtrr_gran_size...

YH
--
To: <yhlu.kernel@...>
Cc: Andrew Morton <akpm@...>, H. Peter Anvin <hpa@...>, Thomas Gleixner <tglx@...>, Gabriel C <nix.or.die@...>, Mika Fischer <mika.fischer@...>, linux-kernel@vger.kernel.org <linux-kernel@...>
Date: Tuesday, April 29, 2008 - 9:07 am

this should be a single:

 #ifdef CONFIG_MTRR_SANITIZER
 static int mtrr_cleanup_enabled = CONFIG_MTRR_SANITIZER_DEFAULT;
 #endif



looks cleaner this way:

  static int __init
  add_range(struct res_range *range, int nr_range, unsigned long start,


should be:

 static void __init
 subtract_range(struct res_range *range, unsigned long start,

can be:




s/unsigned address_bits/unsigned int address_bits/

also move range_sizek on a separate line.



should be:

 static void __init
 set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,

s/unsigned/unsigned int

hm, will this work on 64-bit? Above-4G is controlled via separate 




the ++ is a hard to notice side-effect of the loop. It's cleaner to 


superfluous newline.

all in one, this is a very useful and nice feature.

	Ingo
--
To: Ingo Molnar <mingo@...>
Cc: Andrew Morton <akpm@...>, H. Peter Anvin <hpa@...>, Thomas Gleixner <tglx@...>, Gabriel C <nix.or.die@...>, Mika Fischer <mika.fischer@...>, linux-kernel@vger.kernel.org <linux-kernel@...>
Date: Tuesday, April 29, 2008 - 1:25 pm

thanks. will submit a new one with fix.

YH
--
To: <yhlu.kernel@...>
Cc: Andrew Morton <akpm@...>, Ingo Molnar <mingo@...>, H. Peter Anvin <hpa@...>, Thomas Gleixner <tglx@...>, Mika Fischer <mika.fischer@...>, linux-kernel@vger.kernel.org <linux-kernel@...>
Date: Tuesday, April 29, 2008 - 5:47 am

With this version ( and patch http://lkml.org/lkml/2008/4/29/97 ) applyed on latest linus git tree
the box OOPS'es early.

Sorry I don't have time right now to write down the part of the OOPS I can see on monitor , I can try to find
some time later.

In any way OOPS'es on __free_one_page+0x191/0x21e


Gabriel
--
To: Gabriel C <nix.or.die@...>
Cc: Andrew Morton <akpm@...>, Ingo Molnar <mingo@...>, H. Peter Anvin <hpa@...>, Thomas Gleixner <tglx@...>, Mika Fischer <mika.fischer@...>, linux-kernel@vger.kernel.org <linux-kernel@...>
Date: Tuesday, April 29, 2008 - 6:30 am

thanks. found one problem with hole_basek ...

will send you v8, and hope it will be last version.

YH
--
To: Gabriel C <nix.or.die@...>
Cc: Andrew Morton <akpm@...>, Ingo Molnar <mingo@...>, H. Peter Anvin <hpa@...>, Thomas Gleixner <tglx@...>, Mika Fischer <mika.fischer@...>, linux-kernel@vger.kernel.org <linux-kernel@...>
Date: Tuesday, April 29, 2008 - 6:56 am

please try v8, it should get rid of the 8m entry. it need patch
http://lkml.org/lkml/2008/4/29/97 too.

Thanks

Yinghai Lu
--
To: Yinghai Lu <yhlu.kernel@...>
Cc: Andrew Morton <akpm@...>, Ingo Molnar <mingo@...>, H. Peter Anvin <hpa@...>, Thomas Gleixner <tglx@...>, Mika Fischer <mika.fischer@...>, linux-kernel@vger.kernel.org <linux-kernel@...>
Date: Tuesday, April 29, 2008 - 7:51 am

Box does boot with v8 but now I get that warning you fixed in v2 again =):

....
[    0.000000] Linux version 2.6.25-06058-ga01e035-dirty (crazy@thor) (gcc version 4.3.0 (Frugalware Linux) ) #805 SMP PREEMPT Tue Apr 29 13:04:49 CEST 2008
[    0.000000] Command line: root=/dev/sdb1 ro debug vga=0x317
[    0.000000] BIOS-provided physical RAM map:
[    0.000000]  BIOS-e820: 0000000000000000 - 000000000009cc00 (usable)
[    0.000000]  BIOS-e820: 000000000009cc00 - 00000000000a0000 (reserved)
[    0.000000]  BIOS-e820: 00000000000e4000 - 0000000000100000 (reserved)
[    0.000000]  BIOS-e820: 0000000000100000 - 00000000cf550000 (usable)
[    0.000000]  BIOS-e820: 00000000cf550000 - 00000000cf55e000 (ACPI data)
[    0.000000]  BIOS-e820: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
[    0.000000]  BIOS-e820: 00000000cf5e0000 - 00000000cf600000 (reserved)
[    0.000000]  BIOS-e820: 00000000fee00000 - 00000000fee01000 (reserved)
[    0.000000]  BIOS-e820: 00000000ffc00000 - 0000000100000000 (reserved)
[    0.000000]  BIOS-e820: 0000000100000000 - 000000012c000000 (usable)
[    0.000000] Entering add_active_range(0, 0, 156) 0 entries of 256 used
[    0.000000] Entering add_active_range(0, 256, 849232) 1 entries of 256 used
[    0.000000] Entering add_active_range(0, 1048576, 1228800) 2 entries of 256 used
[    0.000000] max_pfn_mapped = 1228800
[    0.000000] x86 PAT enabled: cpu 0, old 0x7040600070406, new 0x7010600070106
[    0.000000] After WB checking
[    0.000000] MTRR MAP PFN: 0000000000000000 - 000000000012c000
[    0.000000] After UC checking
[    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
[    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
[    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
[    0.000000] After sorting
[    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
[    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
[    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
[    0.000000] range0: 00...
To: Gabriel C <nix.or.die@...>
Cc: Andrew Morton <akpm@...>, Ingo Molnar <mingo@...>, H. Peter Anvin <hpa@...>, Thomas Gleixner <tglx@...>, Mika Fischer <mika.fischer@...>, linux-kernel@vger.kernel.org <linux-kernel@...>
Date: Tuesday, April 29, 2008 - 1:11 pm

please try
mtrr_chunk_size=512m mtrr_gran_size=128m
or
mtrr_chunk_size=256m mtrr_gran_size=128m

YH
--
To: Yinghai Lu <yhlu.kernel@...>
Cc: Andrew Morton <akpm@...>, Ingo Molnar <mingo@...>, H. Peter Anvin <hpa@...>, Thomas Gleixner <tglx@...>, Mika Fischer <mika.fischer@...>, linux-kernel@vger.kernel.org <linux-kernel@...>
Date: Tuesday, April 29, 2008 - 4:25 pm

...

[    0.000000] Command line: root=/dev/sdb1 ro debug vga=0x317 mtrr_chunk_size=512m mtrr_gran_size=128m 3
[    0.000000] BIOS-provided physical RAM map:
[    0.000000]  BIOS-e820: 0000000000000000 - 000000000009cc00 (usable)
[    0.000000]  BIOS-e820: 000000000009cc00 - 00000000000a0000 (reserved)
[    0.000000]  BIOS-e820: 00000000000e4000 - 0000000000100000 (reserved)
[    0.000000]  BIOS-e820: 0000000000100000 - 00000000cf550000 (usable)
[    0.000000]  BIOS-e820: 00000000cf550000 - 00000000cf55e000 (ACPI data)
[    0.000000]  BIOS-e820: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS)
[    0.000000]  BIOS-e820: 00000000cf5e0000 - 00000000cf600000 (reserved)
[    0.000000]  BIOS-e820: 00000000fee00000 - 00000000fee01000 (reserved)
[    0.000000]  BIOS-e820: 00000000ffc00000 - 0000000100000000 (reserved)
[    0.000000]  BIOS-e820: 0000000100000000 - 000000012c000000 (usable)
[    0.000000] Entering add_active_range(0, 0, 156) 0 entries of 256 used
[    0.000000] Entering add_active_range(0, 256, 849232) 1 entries of 256 used
[    0.000000] Entering add_active_range(0, 1048576, 1228800) 2 entries of 256 used
[    0.000000] max_pfn_mapped = 1228800
[    0.000000] x86 PAT enabled: cpu 0, old 0x7040600070406, new 0x7010600070106
[    0.000000] After WB checking
[    0.000000] MTRR MAP PFN: 0000000000000000 - 000000000012c000
[    0.000000] After UC checking
[    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
[    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
[    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
[    0.000000] After sorting
[    0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600
[    0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000
[    0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000
[    0.000000] range0: 0000000000000000 - 00000000c0000000
[    0.000000] Setting variable MTRR 0, base: 0MB, range: 2048MB, type WB
[    0.000000] Setting variable MTRR 1, base: 2048MB, range: 1024MB, type WB
[ ...
To: Gabriel C <nix.or.die@...>
Cc: Andrew Morton <akpm@...>, Ingo Molnar <mingo@...>, H. Peter Anvin <hpa@...>, Thomas Gleixner <tglx@...>, Mika Fischer <mika.fischer@...>, linux-kernel@vger.kernel.org <linux-kernel@...>
Date: Tuesday, April 29, 2008 - 5:49 pm

please try attached trim_holes_fix.patch..., it will fix the trim hole problem.

then check if X server works well.

then try mtrr_cleanup_fix.patch for... ==&gt; address ingo request about style etc.

Thanks

Yinghai Lu