some BIOS like to use continus MTRR layout, and X driver can not add
WB entries for graphical cards when 4g or more RAM installed.
the patch will change MTRR to discrete.
mtrr_chunk_size= could be used to have smaller continuous block to hold holes.
default is 256m, could be set according to size of graphics card memory.
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Index: linux-2.6/arch/x86/kernel/cpu/mtrr/generic.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/generic.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/generic.c
@@ -158,6 +158,20 @@ get_mtrr_var_range(unsigned int index, s
rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi);
}
+/* fill the MSR pair relating to a var range */
+void fill_mtrr_var_range(unsigned int index,
+ u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi)
+{
+ struct mtrr_var_range *vr;
+
+ vr = mtrr_state.var_ranges;
+
+ vr[index].base_lo = base_lo;
+ vr[index].base_hi = base_hi;
+ vr[index].mask_lo = mask_lo;
+ vr[index].mask_hi = mask_hi;
+}
+
static void
get_fixed_ranges(mtrr_type * frs)
{
Index: linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/main.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
@@ -37,6 +37,7 @@
#include <linux/smp.h>
#include <linux/cpu.h>
#include <linux/mutex.h>
+#include <linux/sort.h>
#include <asm/e820.h>
#include <asm/mtrr.h>
@@ -609,6 +610,348 @@ static struct sysdev_driver mtrr_sysdev_
.resume = mtrr_restore,
};
+static int disable_mtrr_cleanup;
+
+static int __init disable_mtrr_cleanup_setup(char *str)
+{
+ disable_mtrr_cleanup = 1;
+ return 0;
+}
+early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
+
+#define RANGE_NUM 256
+
+struct res_range {
+ size_t start;
+ size_t end;
+};
+
+static void __init subtract_range(struct r...some BIOS like to use continus MTRR layout, and X driver can not add
WB entries for graphical cards when 4g or more RAM installed.
the patch will change MTRR to discrete.
mtrr_chunk_size= could be used to have smaller continuous block to hold holes.
default is 256m, could be set according to size of graphics card memory.
v2: fix -1 for UC
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Index: linux-2.6/arch/x86/kernel/cpu/mtrr/generic.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/generic.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/generic.c
@@ -158,6 +158,20 @@ get_mtrr_var_range(unsigned int index, s
rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi);
}
+/* fill the MSR pair relating to a var range */
+void fill_mtrr_var_range(unsigned int index,
+ u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi)
+{
+ struct mtrr_var_range *vr;
+
+ vr = mtrr_state.var_ranges;
+
+ vr[index].base_lo = base_lo;
+ vr[index].base_hi = base_hi;
+ vr[index].mask_lo = mask_lo;
+ vr[index].mask_hi = mask_hi;
+}
+
static void
get_fixed_ranges(mtrr_type * frs)
{
Index: linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/main.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
@@ -37,6 +37,7 @@
#include <linux/smp.h>
#include <linux/cpu.h>
#include <linux/mutex.h>
+#include <linux/sort.h>
#include <asm/e820.h>
#include <asm/mtrr.h>
@@ -609,6 +610,348 @@ static struct sysdev_driver mtrr_sysdev_
.resume = mtrr_restore,
};
+static int disable_mtrr_cleanup;
+
+static int __init disable_mtrr_cleanup_setup(char *str)
+{
+ disable_mtrr_cleanup = 1;
+ return 0;
+}
+early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
+
+#define RANGE_NUM 256
+
+struct res_range {
+ size_t start;
+ size_t end;
+};
+
+static void __init s...some BIOS like to use continus MTRR layout, and some X driver can not add
WB entries for graphical cards when 4g or more RAM installed.
the patch will change MTRR to discrete.
mtrr_chunk_size= could be used to have smaller continuous block to hold holes.
default is 256m, could be set according to size of graphics card memory.
v2: fix -1 for UC checking
v3: default to disable, and need use enable_mtrr_cleanup to enable this feature
skip the var state change warning.
remove next_basek in range_to_mtrr()
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Index: linux-2.6/arch/x86/kernel/cpu/mtrr/generic.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/generic.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/generic.c
@@ -158,6 +158,20 @@ get_mtrr_var_range(unsigned int index, s
rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi);
}
+/* fill the MSR pair relating to a var range */
+void fill_mtrr_var_range(unsigned int index,
+ u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi)
+{
+ struct mtrr_var_range *vr;
+
+ vr = mtrr_state.var_ranges;
+
+ vr[index].base_lo = base_lo;
+ vr[index].base_hi = base_hi;
+ vr[index].mask_lo = mask_lo;
+ vr[index].mask_hi = mask_hi;
+}
+
static void
get_fixed_ranges(mtrr_type * frs)
{
Index: linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/main.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
@@ -37,6 +37,7 @@
#include <linux/smp.h>
#include <linux/cpu.h>
#include <linux/mutex.h>
+#include <linux/sort.h>
#include <asm/e820.h>
#include <asm/mtrr.h>
@@ -609,6 +610,345 @@ static struct sysdev_driver mtrr_sysdev_
.resume = mtrr_restore,
};
+static int __initdata enable_mtrr_cleanup;
+
+static int __init enable_mtrr_cleanup_setup(char *str)
+{
+ enable_mtrr_cleanup = 1;
+ return 0;
...some BIOS like to use continus MTRR layout, and may X driver can not add
WB entries for graphical cards when 4g or more RAM installed.
the patch will change MTRR to discrete.
mtrr_chunk_size= could be used to have smaller continuous block to hold holes.
default is 256m, could be set according to size of graphics card memory.
v2: fix -1 for UC checking
v3: default to disable, and need use enable_mtrr_cleanup to enable this feature
skip the var state change warning.
remove next_basek in range_to_mtrr()
v4: correct warning mask.
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Index: linux-2.6/arch/x86/kernel/cpu/mtrr/generic.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/generic.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/generic.c
@@ -158,6 +158,20 @@ get_mtrr_var_range(unsigned int index, s
rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi);
}
+/* fill the MSR pair relating to a var range */
+void fill_mtrr_var_range(unsigned int index,
+ u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi)
+{
+ struct mtrr_var_range *vr;
+
+ vr = mtrr_state.var_ranges;
+
+ vr[index].base_lo = base_lo;
+ vr[index].base_hi = base_hi;
+ vr[index].mask_lo = mask_lo;
+ vr[index].mask_hi = mask_hi;
+}
+
static void
get_fixed_ranges(mtrr_type * frs)
{
Index: linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/main.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
@@ -37,6 +37,7 @@
#include <linux/smp.h>
#include <linux/cpu.h>
#include <linux/mutex.h>
+#include <linux/sort.h>
#include <asm/e820.h>
#include <asm/mtrr.h>
@@ -609,6 +610,345 @@ static struct sysdev_driver mtrr_sysdev_
.resume = mtrr_restore,
};
+static int __initdata enable_mtrr_cleanup;
+
+static int __init enable_mtrr_cleanup_setup(char *str)
+{
+ enable_mtrr_...some BIOS like to use continus MTRR layout, and may X driver can not add
WB entries for graphical cards when 4g or more RAM installed.
the patch will change MTRR to discrete.
mtrr_chunk_size= could be used to have smaller continuous block to hold holes.
default is 256m, could be set according to size of graphics card memory.
v2: fix -1 for UC checking
v3: default to disable, and need use enable_mtrr_cleanup to enable this feature
skip the var state change warning.
remove next_basek in range_to_mtrr()
v4: correct warning mask.
v5: CONFIG_MTRR_SANITIZER
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Index: linux-2.6/arch/x86/kernel/cpu/mtrr/generic.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/generic.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/generic.c
@@ -158,6 +158,20 @@ get_mtrr_var_range(unsigned int index, s
rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi);
}
+/* fill the MSR pair relating to a var range */
+void fill_mtrr_var_range(unsigned int index,
+ u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi)
+{
+ struct mtrr_var_range *vr;
+
+ vr = mtrr_state.var_ranges;
+
+ vr[index].base_lo = base_lo;
+ vr[index].base_hi = base_hi;
+ vr[index].mask_lo = mask_lo;
+ vr[index].mask_hi = mask_hi;
+}
+
static void
get_fixed_ranges(mtrr_type * frs)
{
Index: linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/main.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
@@ -37,6 +37,7 @@
#include <linux/smp.h>
#include <linux/cpu.h>
#include <linux/mutex.h>
+#include <linux/sort.h>
#include <asm/e820.h>
#include <asm/mtrr.h>
@@ -609,6 +610,366 @@ static struct sysdev_driver mtrr_sysdev_
.resume = mtrr_restore,
};
+#ifdef CONFIG_MTRR_SANITIZER
+
+#ifdef CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT
+static int enab...Skimming through the code it looks fairly sane. I do think it would be good to split this patch into two pieces. 1) The mtrr rewriter/sanitizer/normalize. All it does it should do is rewrite the MTRRs with a semantically equivalent value. This code should always be safe and work on any system with MTRRs. This works around otherwise sane bios's that simply prefer to have contiguous MTRRs. I don't see a reason why this code should be configurable. This approach avoids earlier concerns because it starts with the existing MTRR layout and not with the e820 map. 2) The mtrr_chunk_size code that rounds things off and allows us to use discrete MTRRs by reducing some RAM to uncacheable. Because it makes things uncacheable it has potentially bad side effects on performance and thus potentially bad side effects on functionality. For areas like the SMM and ACPI especially as they usually occur at the end of RAM just below 4G. The chunk size code should be configurable and default to off because it has potential side effects. A KConfig option may also be appropriate. It asks an interesting trade off question do you want your BIOS to be fast or X. Eric --
On Tue, Apr 29, 2008 at 12:00 PM, Eric W. Biederman (less memory + fast X) or (more 8M RAM + slow...) YH --
Yes. That is the basic question. Not all X drivers need it and potentially the current kernel drm modules can use the PAT infrastructure that has been merged. Further a SMM monitor running 100 times or more slower may cause problems if SMM mode is entered frequently, slowing down the entire system not just X. So if you don't have X or you have a crazy SMM monitor this can be an issue. Eric --
On Tue, Apr 29, 2008 at 1:29 PM, Eric W. Biederman agreed. so that feature is compiled in but disable by default. BTW: is any chance for OS to disable SMI etc? to verify is the unstatbility is caused by SMI? YH --
Not in general no. Frequently you can get at the registers that will enable/disable an SMI but that is chipset specific. Think of SMM mode is a lightweight hypervisor that we can't get rid of, if you want to understand the worst case. In theory SMM mode is completely unnecessary as soon as we enable ACPI. In practice ACPI appears to frequently trap into SMM mode. Eric --
SMM does more than that. It emulates legacy hardware and fixes chip(set) bugs as well. Disabling it just makes your box stop working. There are certain types of systems where essential safety nets rely on SMIs (you can deep-fry P4s by disabling SMIs). Thanks, tglx --
There is truth in that but it is over dramatic. P4s don't deep fry they almost always turn off before they overheat (you make take physical damage to your motherboard though). The best definition I have heard of SMM mode is: smack the stupid OS that isn't doing what it should be doing at runtime mode. It is the way board designers and BIOS writers can work around what they perceive as broken OS code, that keeps them from doing what they need to do. Getting them to give up SMM mode even though technically possible is requesting they give up a degree of control and thus a major social engineering challenge for anyone who wishes to achieve it. So any time we tread on territory that could mess up SMM mode we need to be careful, especially as we can not turn it off to diagnose problems. The interactions can be hard to root cause. Replacing overlapping MTRRs with a non overlapping set to allow X to set a WB region as YH is doing appears safe and reasonable, and worth doing. Going one step farther and reducing some of the WB memory to UC so we can free up an MTRR for video and to accelerate X is a bit chancy and something I don't feel comfortable with enabling by default. Especially as we have a better long term fix on the way. This problem is hitting enough people and the odds of something really bad happening when you take a 100x or 1000x slowdown in SMM are pretty low so I do think it is useful to have a kernel option that rounds down the amount of memory you have converts WB memory to UC to accelerate X. Hopefully by this point we are all now reminded how this can interact with SMM mode (although no one has ever seen a bad interaction) and how interacting with SMM mode can be a problem. Eric --
Its also used for all sorts of ugly horrible hacks - like some boards with broken latches on timers where the firmware uses SMM to emulate the hardware and spins until the low byte flips before loading the time into the RTC. Thankfully on the newest cpus SMM mode is a bit more elegantly designed, the original is a weird not quite real mode with extra bugs while nowdays its more of a virtual machine --
i dont think there's any documented way for that. SMI might be the mechanism that ensures blue-smoke type of system reliability (CPU fan, temperature, etc.) so it would be extremely dangerous to mess with it. Ingo --
then that is bad and sick HW design. for example. cpu fan is supposed to full speed, if SW send insane instruction and lose connection. also, CPU should shutdown by thermal strip is FAN is stopped. when we were working on LinuxBIOS, found one MB cpu fan need to be started by BIOS, and another one is auto full speed if BIOS don't touch it. We always like the second design. YH --
some BIOS like to use continus MTRR layout, and may X driver can not add
WB entries for graphical cards when 4g or more RAM installed.
the patch will change MTRR to discrete.
mtrr_chunk_size= could be used to have smaller continuous block to hold holes.
default is 256m, could be set according to size of graphics card memory.
v2: fix -1 for UC checking
v3: default to disable, and need use enable_mtrr_cleanup to enable this feature
skip the var state change warning.
remove next_basek in range_to_mtrr()
v4: correct warning mask.
v5: CONFIG_MTRR_SANITIZER
v6: fix 1g, 2g, 512 aligment with extra hole
v7: gran_sizek to prevent running out of MTRRs.
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Index: linux-2.6/arch/x86/kernel/cpu/mtrr/generic.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/generic.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/generic.c
@@ -37,7 +37,7 @@ static struct fixed_range_block fixed_ra
static unsigned long smp_changes_mask;
static struct mtrr_state mtrr_state = {};
static int mtrr_state_set;
-static u64 tom2;
+u64 mtrr_tom2;
#undef MODULE_PARAM_PREFIX
#define MODULE_PARAM_PREFIX "mtrr."
@@ -139,8 +139,8 @@ u8 mtrr_type_lookup(u64 start, u64 end)
}
}
- if (tom2) {
- if (start >= (1ULL<<32) && (end < tom2))
+ if (mtrr_tom2) {
+ if (start >= (1ULL<<32) && (end < mtrr_tom2))
return MTRR_TYPE_WRBACK;
}
@@ -158,6 +158,20 @@ get_mtrr_var_range(unsigned int index, s
rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi);
}
+/* fill the MSR pair relating to a var range */
+void fill_mtrr_var_range(unsigned int index,
+ u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi)
+{
+ struct mtrr_var_range *vr;
+
+ vr = mtrr_state.var_ranges;
+
+ vr[index].base_lo = base_lo;
+ vr[index].base_hi = base_hi;
+ vr[index].mask_lo = mask_lo;
+ vr[index].mask_hi = mask_hi;
+}
+
static void
get_...some BIOS like to use continus MTRR layout, and X driver can not add
WB entries for graphical cards when 4g or more RAM installed.
the patch will change MTRR to discrete.
mtrr_chunk_size= could be used to have smaller continuous block to hold holes.
default is 256m, could be set according to size of graphics card memory.
mtrr_gran_size= could be used to send smallest mtrr block to avoid run out of MTRRs
v2: fix -1 for UC checking
v3: default to disable, and need use enable_mtrr_cleanup to enable this feature
skip the var state change warning.
remove next_basek in range_to_mtrr()
v4: correct warning mask.
v5: CONFIG_MTRR_SANITIZER
v6: fix 1g, 2g, 512 aligment with extra hole
v7: gran_sizek to prevent running out of MTRRs.
v8: fix hole_basek caculation caused when removing next_basek
gran_sizek using when basek is 0.
need to apply
[PATCH] x86: fix trimming e820 with MTRR holes.
right after this one.
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Index: linux-2.6/arch/x86/kernel/cpu/mtrr/generic.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/generic.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/generic.c
@@ -37,7 +37,7 @@ static struct fixed_range_block fixed_ra
static unsigned long smp_changes_mask;
static struct mtrr_state mtrr_state = {};
static int mtrr_state_set;
-static u64 tom2;
+u64 mtrr_tom2;
#undef MODULE_PARAM_PREFIX
#define MODULE_PARAM_PREFIX "mtrr."
@@ -139,8 +139,8 @@ u8 mtrr_type_lookup(u64 start, u64 end)
}
}
- if (tom2) {
- if (start >= (1ULL<<32) && (end < tom2))
+ if (mtrr_tom2) {
+ if (start >= (1ULL<<32) && (end < mtrr_tom2))
return MTRR_TYPE_WRBACK;
}
@@ -158,6 +158,20 @@ get_mtrr_var_range(unsigned int index, s
rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi);
}
+/* fill the MSR pair relating to a var range */
+void fill_mtrr_var_range(unsigned int index,
+ u32...v9: address format change requests by Ingo
more case handling in range_to_var_with_hole
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Index: linux-2.6/arch/x86/Kconfig
===================================================================
--- linux-2.6.orig/arch/x86/Kconfig
+++ linux-2.6/arch/x86/Kconfig
@@ -1092,13 +1092,12 @@ config MTRR_SANITIZER
If unsure, say Y.
config MTRR_SANITIZER_ENABLE_DEFAULT
- def_bool y
- prompt "Enable MTRR cleanup by default"
+ int "MTRR cleanup enable value (0-1)"
+ range 0 1
+ default "0"
depends on MTRR_SANITIZER
help
- Enable mtrr cleanup by default
-
- If unsure, say Y.
+ Enable mtrr cleanup default value
config X86_PAT
bool
Index: linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/main.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
@@ -611,17 +611,9 @@ static struct sysdev_driver mtrr_sysdev_
};
#ifdef CONFIG_MTRR_SANITIZER
-
-#ifdef CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT
-static int enable_mtrr_cleanup __initdata = 1;
-#else
-static int enable_mtrr_cleanup __initdata;
-#endif
-
+static int enable_mtrr_cleanup __initdata = CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT;
#else
-
static int enable_mtrr_cleanup __initdata = -1;
-
#endif
static int __init disable_mtrr_cleanup_setup(char *str)
@@ -640,6 +632,7 @@ static int __init enable_mtrr_cleanup_se
}
early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup);
+/* should be related to MTRR_VAR_RANGES nums */
#define RANGE_NUM 256
struct res_range {
@@ -647,13 +640,27 @@ struct res_range {
unsigned long end;
};
-static int __init add_range(struct res_range *range, int nr_range, unsigned long start,
- unsigned long end, int merge)
+static int __init
+add_range(struct res_range *range, int nr_range, unsigned long start,
+ unsigned long end)
{
- int i;
+ /* out of slots */
+ if (nr_range >= ...loop mtrr chunk_size and gran_size from 1M to 2G to find out optimal value.
so user don't need to add mtrr_chunk_size and mtrr_gran_size,
if optimal value is not found, print out all list to help select less optimal
value.
add mtrr_spare_reg_nr= so user could set 2 instead of 1, if the card need more entries.
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Index: linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/main.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
@@ -610,28 +610,6 @@ static struct sysdev_driver mtrr_sysdev_
.resume = mtrr_restore,
};
-#ifdef CONFIG_MTRR_SANITIZER
-static int enable_mtrr_cleanup __initdata = CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT;
-#else
-static int enable_mtrr_cleanup __initdata = -1;
-#endif
-
-static int __init disable_mtrr_cleanup_setup(char *str)
-{
- if (enable_mtrr_cleanup != -1)
- enable_mtrr_cleanup = 0;
- return 0;
-}
-early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
-
-static int __init enable_mtrr_cleanup_setup(char *str)
-{
- if (enable_mtrr_cleanup != -1)
- enable_mtrr_cleanup = 1;
- return 0;
-}
-early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup);
-
/* should be related to MTRR_VAR_RANGES nums */
#define RANGE_NUM 256
@@ -702,13 +680,15 @@ subtract_range(struct res_range *range,
continue;
}
- if (start <= range[j].start && end < range[j].end && range[j].start < end + 1) {
+ if (start <= range[j].start && end < range[j].end &&
+ range[j].start < end + 1) {
range[j].start = end + 1;
continue;
}
- if (start > range[j].start && end >= range[j].end && range[j].end > start - 1) {
+ if (start > range[j].start && end >= range[j].end &&
+ range[j].end > start - 1) {
range[j].end = start - 1;
continue;
}
@@ -743,1...loop mtrr chunk_size and gran_size from 1M to 2G to find out optimal value.
so user don't need to add mtrr_chunk_size and mtrr_gran_size
if optimal value is not found, print out all list to help select less optimal
value.
add mtrr_spare_reg_nr= so user could set 2 instead of 1, if the card need more entries.
v2: find the one with more spare entries
if the specify mtrr_chunk_size and mtrr_gran_size if not good, will try to find one
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Index: linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/main.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
@@ -610,28 +610,6 @@ static struct sysdev_driver mtrr_sysdev_
.resume = mtrr_restore,
};
-#ifdef CONFIG_MTRR_SANITIZER
-static int enable_mtrr_cleanup __initdata = CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT;
-#else
-static int enable_mtrr_cleanup __initdata = -1;
-#endif
-
-static int __init disable_mtrr_cleanup_setup(char *str)
-{
- if (enable_mtrr_cleanup != -1)
- enable_mtrr_cleanup = 0;
- return 0;
-}
-early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
-
-static int __init enable_mtrr_cleanup_setup(char *str)
-{
- if (enable_mtrr_cleanup != -1)
- enable_mtrr_cleanup = 1;
- return 0;
-}
-early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup);
-
/* should be related to MTRR_VAR_RANGES nums */
#define RANGE_NUM 256
@@ -702,13 +680,15 @@ subtract_range(struct res_range *range,
continue;
}
- if (start <= range[j].start && end < range[j].end && range[j].start < end + 1) {
+ if (start <= range[j].start && end < range[j].end &&
+ range[j].start < end + 1) {
range[j].start = end + 1;
continue;
}
- if (start > range[j].start && end >= range[j].end && range[j].end > start - 1) {
+ if (start > range[j].start && e...loop mtrr chunk_size and gran_size from 1M to 2G to find out optimal value.
so user don't need to add mtrr_chunk_size and mtrr_gran_size
if optimal value is not found, print out all list to help select less optimal
value.
add mtrr_spare_reg_nr= so user could set 2 instead of 1, if the card need more entries.
v2: find the one with more spare entries
v3: fix hole_basek offset
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Index: linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/main.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
@@ -610,28 +610,6 @@ static struct sysdev_driver mtrr_sysdev_
.resume = mtrr_restore,
};
-#ifdef CONFIG_MTRR_SANITIZER
-static int enable_mtrr_cleanup __initdata = CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT;
-#else
-static int enable_mtrr_cleanup __initdata = -1;
-#endif
-
-static int __init disable_mtrr_cleanup_setup(char *str)
-{
- if (enable_mtrr_cleanup != -1)
- enable_mtrr_cleanup = 0;
- return 0;
-}
-early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
-
-static int __init enable_mtrr_cleanup_setup(char *str)
-{
- if (enable_mtrr_cleanup != -1)
- enable_mtrr_cleanup = 1;
- return 0;
-}
-early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup);
-
/* should be related to MTRR_VAR_RANGES nums */
#define RANGE_NUM 256
@@ -702,13 +680,15 @@ subtract_range(struct res_range *range,
continue;
}
- if (start <= range[j].start && end < range[j].end && range[j].start < end + 1) {
+ if (start <= range[j].start && end < range[j].end &&
+ range[j].start < end + 1) {
range[j].start = end + 1;
continue;
}
- if (start > range[j].start && end >= range[j].end && range[j].end > start - 1) {
+ if (start > range[j].start && end >= range[j].end &&
+ range[j].end > start ...loop mtrr chunk_size and gran_size from 1M to 2G to find out optimal value.
so user don't need to add mtrr_chunk_size and mtrr_gran_size
if optimal value is not found, print out all list to help select less optimal
value.
add mtrr_spare_reg_nr= so user could set 2 instead of 1, if the card need more entries.
v2: find the one with more spare entries
v3: fix hole_basek offset
v4: tight the compare between range and range_new
loop stop with 4g
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Index: linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/main.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
@@ -610,28 +610,6 @@ static struct sysdev_driver mtrr_sysdev_
.resume = mtrr_restore,
};
-#ifdef CONFIG_MTRR_SANITIZER
-static int enable_mtrr_cleanup __initdata = CONFIG_MTRR_SANITIZER_ENABLE_DEFAULT;
-#else
-static int enable_mtrr_cleanup __initdata = -1;
-#endif
-
-static int __init disable_mtrr_cleanup_setup(char *str)
-{
- if (enable_mtrr_cleanup != -1)
- enable_mtrr_cleanup = 0;
- return 0;
-}
-early_param("disable_mtrr_cleanup", disable_mtrr_cleanup_setup);
-
-static int __init enable_mtrr_cleanup_setup(char *str)
-{
- if (enable_mtrr_cleanup != -1)
- enable_mtrr_cleanup = 1;
- return 0;
-}
-early_param("enble_mtrr_cleanup", enable_mtrr_cleanup_setup);
-
/* should be related to MTRR_VAR_RANGES nums */
#define RANGE_NUM 256
@@ -702,13 +680,15 @@ subtract_range(struct res_range *range,
continue;
}
- if (start <= range[j].start && end < range[j].end && range[j].start < end + 1) {
+ if (start <= range[j].start && end < range[j].end &&
+ range[j].start < end + 1) {
range[j].start = end + 1;
continue;
}
- if (start > range[j].start && end >= range[j].end && range[j].end > start - 1) {
+ if (start > range[j].start &am...Why stopping at 2 GB? --
if you select 4g for chunk size, we don't need to convert that from continuous to discrete to make X server driver happen. actually the code could support any chunk_size... for example: 16 g system orginal: 0-4g WB 3.5g-4g UC 4g-8g WB 8g-16g WB 16g-16.5g WB if you set chunk size to 16g, and gran size <= 512M you will get 0-16g WB 3.5g-4g UC 16g-16.5g WB YH --
Yes, 16 GB systems are already mainstream; 32 GB is common, so I don't see any reason to stop at 2 GB. Instead, it should loop up to the physical address size. -hpa --
but our objective is that has 0xd0000000-0xe0000000 (WC) not to be overlapping with other MTRR entries (UC).. YH --
So, pray tell, how comes this algorithm can come up with a non-solution to the problem presented to it? Overall, I'm feeling there is something really completely wrong if this needs manual tunables of any sort. -hpa --
the problem is BIOS set MTRR like BIG WB - SOME SAMLL UC to spare mtrr regs. but later X server driver want to set some range to WC. that already is fallen in UC... YH --
That's not the point. I understand you want to flatten the layout. The point is: why do you need manual tunables for the algorithm to do the right thing? -hpa --
optimal result is not losing covering for ranges that is originally covered, and still keep as many of spare mtrr entries for X server driver. we only have 8 mtrrs, could lose some covering because of run out of mtrr regs. So we need to search it according to chunk/gran with ram ranges that is defined by old mtrr layout. and if we can not find the optimal setting, user could select one setting (chunk/gran size) to boot next time, but he will lose some covering. for some regions. later trim_mtrr will remove those range from e820 map YH --
Yes. You have a search space of less than 1000 possible combinations (64..20 bits), so it hardly is any reason to not search the entire universe of possibilities, even if by exhaustive search. Now, if even that searching can't come up with the optimal solution (if Right, now we're talking policy, which obviously has to be entered by the user. -hpa --
only search 78 2g, 1g, ...1m, and half matrix 13 * 6.. and don't need to search than 78. also if we don't need to get the more spare regs than because we only have 8 mtrrs, and user may specify mtrr_spare_reg_nr = 2 or 3 to get more entries for the graphics cards... then can not the optimal setting without losing any covering. so if the optimal is there (only need to search to 2g), it will catch it. YH --
Again, it's not clear to me why there is an inherent limit at 2 GB. -hpa --
above 2g, we will use 4g (chunk size, because if use 4g gran_size will trim too much RAM). it will get the continous layout like 0-4g WB 3.5g-4G UC instead of discrete that we want. (chunk_size=2g, gran_size=512M) 0-2g WB 2g-4g WB 3.5g-4g UC YH --
Large value could prevent small alignment from --- ~Randy --
Ingo, can you change that directly in the patch? or need me send another updated patch? YH --
On my system x86-latest + this patch and using no boot options gives me this /proc/mtrr: reg00: base=0x00000000 ( 0MB), size=2048MB: write-back, count=1 reg01: base=0x80000000 (2048MB), size= 512MB: write-back, count=1 reg02: base=0xa0000000 (2560MB), size= 256MB: write-back, count=1 reg03: base=0xb0000000 (2816MB), size= 256MB: write-back, count=1 reg04: base=0xbf700000 (3063MB), size= 1MB: uncachable, count=1 reg05: base=0xbf800000 (3064MB), size= 8MB: uncachable, count=1 reg06: base=0x100000000 (4096MB), size=1024MB: write-back, count=1 Which is OK. It could probably collapse reg01-reg03 into one but that's a minor issue (for me at least, there are probably cases where collapsing them might save the user from having to specify the mtrr_spare_reg_nr boot option). In any case it works fine here. dmesg is attached. Let me know if there's anything else I should test! And thanks a lot for all your work, Yinghai! :) Regards, Mika
yes. please try mtrr_spare_reg_nr=3 or etc. YH --
Sure this works. But that was my point exactly. It should be possible to figure out the better configuration automatically so that I *don't* have to specify mtrr_spare_reg_nr=3. Or in other words: If there are multiple equivalent configurations that don't lose any RAM(!), the one with the most free MTRR regs should be preferred. AFAICT you loop over the chunk size and stop when you have found a configuration that leaves the number of free MTRR registers requested (default 1). This will almost always result in a configuration where you have *exactly* the number of requested free regs available, even if a more efficient configuration was possible. What I'm suggesting is, that - in the case where no RAM is lost at this point - the loop should continue to try and free up more registers, as long as no RAM is lost. I.e. even if in my case chunk_size=256M gives adequate results and leaves me with 1 free reg, since I don't lose any RAM at this point the loop should continue as long as I do not lose any RAM. That way it would find the ideal chunk_size (1g) automatically. But again, this is non-critical. But I think it might help a few users who need more than 1 free reg, because they probably will have no idea about the kernel option... Regards, Mika --
OK, will send another version out. YH --
WOW :) With this patch all is working fine , no RAM is lost , X is fast , so far everything else seems to work fine. \o/ I will test on 32bit tomorrow and stress the box later on today to be sure everything works fine. There is my dmesg , meminfo , mtrr output with this patch on top x86-latest : http://frugalware.org/~crazy/mtrr_x86-latest/ --
while look at that you boot log, it seems there is one bug about hole position. but I look that code, it should already be handled. Can you send out boot msg and /proc/mtrr when using disable_mtrr_cleanup command line? Thanks Yinghai Lu --
Sure , there it is : http://frugalware.org/~crazy/mtrr_x86-latest/dmesg2 http://frugalware.org/~crazy/mtrr_x86-latest/proc_mtrr2 I'm still using this version of your patch , didn't got any time to update to v2. Gabriel --
original reg00: base=0xd0000000 (3328MB), size= 256MB: uncachable, count=1 reg01: base=0xe0000000 (3584MB), size= 512MB: uncachable, count=1 reg02: base=0x00000000 ( 0MB), size=4096MB: write-back, count=1 reg03: base=0x100000000 (4096MB), size= 512MB: write-back, count=1 reg04: base=0x120000000 (4608MB), size= 128MB: write-back, count=1 reg05: base=0x128000000 (4736MB), size= 64MB: write-back, count=1 reg06: base=0xcf600000 (3318MB), size= 2MB: uncachable, count=1 after clean up reg00: base=0x00000000 ( 0MB), size=2048MB: write-back, count=1 reg01: base=0x80000000 (2048MB), size=1024MB: write-back, count=1 reg02: base=0xc0000000 (3072MB), size= 256MB: write-back, count=1 reg03: base=0xcfe00000 (3326MB), size= 2MB: uncachable, count=1 reg04: base=0x100000000 (4096MB), size= 512MB: write-back, count=1 reg05: base=0x120000000 (4608MB), size= 256MB: write-back, count=1 reg06: base=0x12c000000 (4800MB), size= 64MB: uncachable, count=1 so the hole base is not right, it should be at 3318MB instead of 3326MB. please hold to test v3 ... Thanks Yinghai Lu --
All is still fine here after an quick test ( BTW that version is really chatty :P ) Gabriel --
thanks. yeah, but can you remove debug in command line to see if it still talks too much. YH --
thanks, applied. Ingo --
s/granity/granularity/ I think that's what you mean/want. --- ~Randy --
considering to auto test to find optimal value for mtrr_chunk_size and mtrr_gran_size... YH --
this should be a single: #ifdef CONFIG_MTRR_SANITIZER static int mtrr_cleanup_enabled = CONFIG_MTRR_SANITIZER_DEFAULT; #endif looks cleaner this way: static int __init add_range(struct res_range *range, int nr_range, unsigned long start, should be: static void __init subtract_range(struct res_range *range, unsigned long start, can be: s/unsigned address_bits/unsigned int address_bits/ also move range_sizek on a separate line. should be: static void __init set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, s/unsigned/unsigned int hm, will this work on 64-bit? Above-4G is controlled via separate the ++ is a hard to notice side-effect of the loop. It's cleaner to superfluous newline. all in one, this is a very useful and nice feature. Ingo --
thanks. will submit a new one with fix. YH --
With this version ( and patch http://lkml.org/lkml/2008/4/29/97 ) applyed on latest linus git tree the box OOPS'es early. Sorry I don't have time right now to write down the part of the OOPS I can see on monitor , I can try to find some time later. In any way OOPS'es on __free_one_page+0x191/0x21e Gabriel --
thanks. found one problem with hole_basek ... will send you v8, and hope it will be last version. YH --
please try v8, it should get rid of the 8m entry. it need patch http://lkml.org/lkml/2008/4/29/97 too. Thanks Yinghai Lu --
Box does boot with v8 but now I get that warning you fixed in v2 again =): .... [ 0.000000] Linux version 2.6.25-06058-ga01e035-dirty (crazy@thor) (gcc version 4.3.0 (Frugalware Linux) ) #805 SMP PREEMPT Tue Apr 29 13:04:49 CEST 2008 [ 0.000000] Command line: root=/dev/sdb1 ro debug vga=0x317 [ 0.000000] BIOS-provided physical RAM map: [ 0.000000] BIOS-e820: 0000000000000000 - 000000000009cc00 (usable) [ 0.000000] BIOS-e820: 000000000009cc00 - 00000000000a0000 (reserved) [ 0.000000] BIOS-e820: 00000000000e4000 - 0000000000100000 (reserved) [ 0.000000] BIOS-e820: 0000000000100000 - 00000000cf550000 (usable) [ 0.000000] BIOS-e820: 00000000cf550000 - 00000000cf55e000 (ACPI data) [ 0.000000] BIOS-e820: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS) [ 0.000000] BIOS-e820: 00000000cf5e0000 - 00000000cf600000 (reserved) [ 0.000000] BIOS-e820: 00000000fee00000 - 00000000fee01000 (reserved) [ 0.000000] BIOS-e820: 00000000ffc00000 - 0000000100000000 (reserved) [ 0.000000] BIOS-e820: 0000000100000000 - 000000012c000000 (usable) [ 0.000000] Entering add_active_range(0, 0, 156) 0 entries of 256 used [ 0.000000] Entering add_active_range(0, 256, 849232) 1 entries of 256 used [ 0.000000] Entering add_active_range(0, 1048576, 1228800) 2 entries of 256 used [ 0.000000] max_pfn_mapped = 1228800 [ 0.000000] x86 PAT enabled: cpu 0, old 0x7040600070406, new 0x7010600070106 [ 0.000000] After WB checking [ 0.000000] MTRR MAP PFN: 0000000000000000 - 000000000012c000 [ 0.000000] After UC checking [ 0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600 [ 0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000 [ 0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000 [ 0.000000] After sorting [ 0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600 [ 0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000 [ 0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000 [ 0.000000] range0: 00...
please try mtrr_chunk_size=512m mtrr_gran_size=128m or mtrr_chunk_size=256m mtrr_gran_size=128m YH --
... [ 0.000000] Command line: root=/dev/sdb1 ro debug vga=0x317 mtrr_chunk_size=512m mtrr_gran_size=128m 3 [ 0.000000] BIOS-provided physical RAM map: [ 0.000000] BIOS-e820: 0000000000000000 - 000000000009cc00 (usable) [ 0.000000] BIOS-e820: 000000000009cc00 - 00000000000a0000 (reserved) [ 0.000000] BIOS-e820: 00000000000e4000 - 0000000000100000 (reserved) [ 0.000000] BIOS-e820: 0000000000100000 - 00000000cf550000 (usable) [ 0.000000] BIOS-e820: 00000000cf550000 - 00000000cf55e000 (ACPI data) [ 0.000000] BIOS-e820: 00000000cf55e000 - 00000000cf5e0000 (ACPI NVS) [ 0.000000] BIOS-e820: 00000000cf5e0000 - 00000000cf600000 (reserved) [ 0.000000] BIOS-e820: 00000000fee00000 - 00000000fee01000 (reserved) [ 0.000000] BIOS-e820: 00000000ffc00000 - 0000000100000000 (reserved) [ 0.000000] BIOS-e820: 0000000100000000 - 000000012c000000 (usable) [ 0.000000] Entering add_active_range(0, 0, 156) 0 entries of 256 used [ 0.000000] Entering add_active_range(0, 256, 849232) 1 entries of 256 used [ 0.000000] Entering add_active_range(0, 1048576, 1228800) 2 entries of 256 used [ 0.000000] max_pfn_mapped = 1228800 [ 0.000000] x86 PAT enabled: cpu 0, old 0x7040600070406, new 0x7010600070106 [ 0.000000] After WB checking [ 0.000000] MTRR MAP PFN: 0000000000000000 - 000000000012c000 [ 0.000000] After UC checking [ 0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600 [ 0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000 [ 0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000 [ 0.000000] After sorting [ 0.000000] MTRR MAP PFN: 0000000000000000 - 00000000000cf600 [ 0.000000] MTRR MAP PFN: 00000000000cf800 - 00000000000d0000 [ 0.000000] MTRR MAP PFN: 0000000000100000 - 000000000012c000 [ 0.000000] range0: 0000000000000000 - 00000000c0000000 [ 0.000000] Setting variable MTRR 0, base: 0MB, range: 2048MB, type WB [ 0.000000] Setting variable MTRR 1, base: 2048MB, range: 1024MB, type WB [ ...
please try attached trim_holes_fix.patch..., it will fix the trim hole problem. then check if X server works well. then try mtrr_cleanup_fix.patch for... ==> address ingo request about style etc. Thanks Yinghai Lu
