Eliminate the 6 bank restriction in 64 bit mce reporting code. This restriction
is artificial (due to static creation of sysfs files) and 32 bit code
does not have any such restriction.
This change helps in reporting the details of machine checks on a machine check
exception with errors in bank 6 and above on CPUs that support those banks.
Without the patch, machine check errors in those banks are not reported.
We still have 128 (MCE_EXTENDED_BANK) bank restriction instead of max 256
supported in hardware. That is not changed in the patch below as it will have
some user level mcelog utility dependency, with bank 128 being used for
thermal reporting currently.
The patch below does not create sysfs control (bankNctl) for banks higher
than 6 as well. That needs some pre-cleanup in /sysfs mce layout, removal of
per cpu /sysfs entries for bankctl as they are really global system level
control today. That change will follow. This basic change is critical to
report the detailed errors on banks higher than 6.
Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
---
arch/x86/kernel/cpu/mcheck/mce_64.c | 20 ++++++++++++--------
1 file changed, 12 insertions(+), 8 deletions(-)
Index: linux-2.6/arch/x86/kernel/cpu/mcheck/mce_64.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mcheck/mce_64.c 2008-05-08 13:27:53.000000000 -0700
+++ linux-2.6/arch/x86/kernel/cpu/mcheck/mce_64.c 2008-05-08 14:07:10.000000000 -0700
@@ -31,7 +31,7 @@
#include <asm/idle.h>
#define MISC_MCELOG_MINOR 227
-#define NR_BANKS 6
+#define NR_SYSFS_BANKS 6
atomic_t mce_entry;
@@ -46,7 +46,7 @@ static int mce_dont_init;
*/
static int tolerant = 1;
static int banks;
-static unsigned long bank[NR_BANKS] = { [0 ... NR_BANKS-1] = ~0UL };
+static unsigned long bank[NR_SYSFS_BANKS] = { [0 ... NR_SYSFS_BANKS-1] = ~0UL };
static unsigned long notify_user;
static int rip_msr;
static int mce_bootlog = -1;
@@ -209,7 +209,7 @@ void do_machine_check(struct pt_regs * r
barrier();
for (i = 0; i < banks; i++) {
- if (!bank[i])
+ if (i < NR_SYSFS_BANKS && !bank[i])
continue;
m.misc = 0;
@@ -444,9 +444,10 @@ static void mce_init(void *dummy)
rdmsrl(MSR_IA32_MCG_CAP, cap);
banks = cap & 0xff;
- if (banks > NR_BANKS) {
- printk(KERN_INFO "MCE: warning: using only %d banks\n", banks);
- banks = NR_BANKS;
+ if (banks > MCE_EXTENDED_BANK) {
+ printk(KERN_INFO "MCE: warning: using only %d banks\n",
+ MCE_EXTENDED_BANK);
+ banks = MCE_EXTENDED_BANK;
}
/* Use accurate RIP reporting if available. */
if ((cap & (1<<9)) && ((cap >> 16) & 0xff) >= 9)
@@ -462,7 +463,7 @@ static void mce_init(void *dummy)
wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
for (i = 0; i < banks; i++) {
- wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]);
+ wrmsrl(MSR_IA32_MC0_CTL+4*i, ~0UL);
wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
}
}
@@ -766,7 +767,10 @@ DEFINE_PER_CPU(struct sys_device, device
} \
static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name);
-/* TBD should generate these dynamically based on number of available banks */
+/*
+ * TBD should generate these dynamically based on number of available banks.
+ * Have only 6 contol banks in /sysfs until then.
+ */
ACCESSOR(bank0ctl,bank[0],mce_restart())
ACCESSOR(bank1ctl,bank[1],mce_restart())
ACCESSOR(bank2ctl,bank[2],mce_restart())
--
| Alan Cox | [PATCH 00/76] Queued TTY Patches |
| Nick Piggin | [patch 1/6] mm: debug check for the fault vs invalidate race |
| Andi Kleen | [PATCH] [1/22] x86_64: dma_ops as const |
| Linus Torvalds | Linux 2.6.27-rc8 |
git: | |
| Jeff King | Re: What's cooking in git/spearce.git (topics) |
| Jeff King | Re: [RFC] origin link for cherry-pick and revert |
| Matt Seitz (matseitz) | Symbolic link documentation |
| Jon Smirl | Huge win, compressing a window of delta runs as a unit |
| Richard Stallman | Real men don't attack straw men |
| Leon Dippenaar | New tcp stack attack |
| Nuno Magalhães | Can't scp, ssh is slow to authenticate. |
| Brandon Lee | DELL PERC 5iR slow performance |
| KOSAKI Motohiro | [bug?] tg3: Failed to load firmware "tigon/tg3_tso.bin" |
| Denys Vlasenko | Re: bnx2 dirver's firmware images |
| Pavel Emelyanov | [PATCH 0/8] Cleanup/fix the sk_alloc() call |
| Kok, Auke | Re: [PATCH] drivers/net: remove network drivers' last few uses of IRQF_SAMPLE_RANDOM |
