make mptable to be consistent to acpi routing, so we could
1. kexec kernel with acpi=off
2. workaround BIOS that acpi routing is working, but mptable is not right.
so can use kernel/kexec to start other os that doesn't have good acpi support
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Index: linux-2.6/drivers/acpi/pci_irq.c
===================================================================
--- linux-2.6.orig/drivers/acpi/pci_irq.c
+++ linux-2.6/drivers/acpi/pci_irq.c
@@ -474,6 +474,8 @@ acpi_pci_irq_derive(struct pci_dev *dev,
return irq;
}
+int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin, u32 gsi, int triggering, int polarity);
+
/*
* acpi_pci_irq_enable
* success: return 0
@@ -570,6 +572,8 @@ int acpi_pci_irq_enable(struct pci_dev *
(triggering == ACPI_LEVEL_SENSITIVE) ? "level" : "edge",
(polarity == ACPI_ACTIVE_LOW) ? "low" : "high", dev->irq);
+ mp_config_acpi_gsi(dev->bus->number, dev->devfn, dev->pin, irq, triggering, polarity);
+
return 0;
}
Index: linux-2.6/arch/x86/kernel/mpparse.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/mpparse.c
+++ linux-2.6/arch/x86/kernel/mpparse.c
@@ -180,14 +180,26 @@ static void __init MP_ioapic_info(struct
nr_ioapics++;
}
-static void __init MP_intsrc_info(struct mpc_config_intsrc *m)
+static void __init print_MP_intsrc_info(struct mpc_config_intsrc *m)
{
- mp_irqs[mp_irq_entries] = *m;
printk(KERN_INFO "Int: type %d, pol %d, trig %d, bus %02x,"
" IRQ %02x, APIC ID %x, APIC INT %02x\n",
m->mpc_irqtype, m->mpc_irqflag & 3,
(m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
+}
+
+static void __init MP_intsrc_info(struct mpc_config_intsrc *m)
+{
+ int i;
+
+ print_MP_intsrc_info(m);
+
+ for (i = 0; i < mp_irq_entries; i++)
+ if (!memcmp(m, &mp_irqs[i], sizeof(*m)))
+ return;
+
+ mp_irqs[mp_irq_entries] = *m;
if ...[PATCH] x86: fixed mtrr change WP to WB
so we modify mptable near below 1M
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Index: linux-2.6/arch/x86/kernel/cpu/mtrr/generic.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/generic.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/generic.c
@@ -192,6 +192,26 @@ void mtrr_save_fixed_ranges(void *info)
get_fixed_ranges(mtrr_state.fixed_ranges);
}
+int update_mtrr_fixed_ranges(unsigned char old_type, unsigned char new_type)
+{
+ unsigned char *type;
+ int i;
+ int changed = 0;
+
+ if (!mtrr_state.have_fixed)
+ return 0;
+
+ type = mtrr_state.fixed_ranges;
+ for (i = 0; i < NUM_FIXED_RANGES; i++) {
+ if (type[i] == old_type) {
+ type[i] = new_type;
+ changed = 1;
+ }
+ }
+
+ return changed;
+}
+
static void print_fixed(unsigned base, unsigned step, const mtrr_type*types)
{
unsigned i;
Index: linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/mtrr/main.c
+++ linux-2.6/arch/x86/kernel/cpu/mtrr/main.c
@@ -794,6 +794,24 @@ x86_get_mtrr_mem_range(struct res_range
return nr_range;
}
+extern int __initdata enable_update_mptable;
+
+static int __init fixed_mtrr_cleanup(void)
+{
+ unsigned char new_type;
+
+ if (!enable_update_mptable)
+ return 0;
+
+ /* AMD 0x1e, intel 0x06 */
+ new_type = 0x06;
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
+ (boot_cpu_data.x86 >= 0x0f && boot_cpu_data.x86 <= 0x11))
+ new_type = 0x1e;
+
+ return update_mtrr_fixed_ranges(0x05, new_type);
+}
+
static struct res_range __initdata range[RANGE_NUM];
#ifdef CONFIG_MTRR_SANITIZER
@@ -1162,6 +1180,7 @@ static int __init mtrr_cleanup(unsigned
unsigned long range_sums, range_sums_new;
int index_good;
int num_reg_good;
+ int changed;
/* extra one for all 0 */
int num[MTRR_NUM_TYPES + 1];
@@ -1173,6 +1192,8 @@ static int ...Again: This does not work on even the majority of all systems. If the mptable should be modified, it should be copied to kernel memory and modified there. Modifying it in-place is both unsafe and, in general, simply will not work. Sorry. NAK on this whole set. -hpa --
but
* 1) Scan the bottom 1K for a signature
* 2) Scan the top 1K of base RAM
* 3) Scan the 64K of bios
*/
if (smp_scan_config(0x0, 0x400, reserve) ||
smp_scan_config(639 * 0x400, 0x400, reserve) ||
smp_scan_config(0xF0000, 0x10000, reserve))
return;
and all these areas are reserved already.
wonder if we could only modify mpf->mpf_physptr to point to another
mpc and reuse mpf?
YH
--
Obviously, if you have already modified a table, you should save a pointer to it, and not rely on the kernel scanning later. -hpa --
that modified mptable is for second kernel that is kexeced. YH --
Then you would have to add a method to pass it into the kernel if you want it to be generally useful. -hpa --
but how about old stock kernel in RHEL3U9? YH --
There is nothing you can do about those that will work on any platform. If you have knowledge of the firmware you can of course break those rules. -hpa --
thanks. I will try to allocate 4k page for mpc and copy the old mpc to it..., and reuse the old mpf. YH --
make mptable to be consistent to acpi routing, so we could
1. kexec kernel with acpi=off
2. workaround BIOS that acpi routing is working, but mptable is not right.
so can use kernel/kexec to start other os that doesn't have good acpi support
command line: update_mptable
v2: add alloc_mptable for mptable that can not be changed.
new command line: alloc_mptable
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Index: linux-2.6/arch/x86/kernel/e820_64.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/e820_64.c
+++ linux-2.6/arch/x86/kernel/e820_64.c
@@ -301,6 +301,35 @@ unsigned long __init find_e820_area_size
return -1UL;
}
+
+/*
+ * pre allocated 4k and reserved it in e820
+ */
+unsigned long __init early_reserve_e820(unsigned long sizet,
+ unsigned long align)
+{
+ unsigned long start = 0, size = 0;
+ unsigned long addr;
+
+#ifdef CONFIG_X86_TRAMPOLINE
+ start = TRAMPOLINE_BASE;
+#endif
+ while (size < sizet)
+ start = find_e820_area_size(start, &size, align);
+
+ if (size < sizet)
+ return 0;
+
+ addr = start + size - sizet;
+
+ update_memory_range(addr, sizet, E820_RAM, E820_RESERVED);
+
+ printk(KERN_INFO "update e820 for early_reserve_e820\n");
+ update_e820();
+
+ return addr;
+}
+
/*
* Find the highest page frame number we have available
*/
Index: linux-2.6/arch/x86/kernel/mpparse.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/mpparse.c
+++ linux-2.6/arch/x86/kernel/mpparse.c
@@ -25,6 +25,7 @@
#include <asm/proto.h>
#include <asm/acpi.h>
#include <asm/bios_ebda.h>
+#include <asm/e820.h>
#include <mach_apic.h>
#ifdef CONFIG_X86_32
@@ -180,14 +181,26 @@ static void __init MP_ioapic_info(struct
nr_ioapics++;
}
-static void __init MP_intsrc_info(struct mpc_config_intsrc *m)
+static void __init print_MP_intsrc_info(struct mpc_config_intsrc *m)
{
- mp_irqs[mp_irq_entries] = *m;
...make mptable to be consistent to acpi routing, so we could
1. kexec kernel with acpi=off
2. workaround BIOS that acpi routing is working, but mptable is not right.
so can use kernel/kexec to start other os that doesn't have good acpi support
command line: update_mptable
v2: add alloc_mptable for mptable that can not be changed.
new command line: alloc_mptable
v3: adjust for patch that move some mp_xxx to acpi..., and mpc_ to mp_
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Index: linux-2.6/arch/x86/kernel/e820_64.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/e820_64.c
+++ linux-2.6/arch/x86/kernel/e820_64.c
@@ -243,6 +243,35 @@ unsigned long __init find_e820_area_size
return -1UL;
}
+
+/*
+ * pre allocated 4k and reserved it in e820
+ */
+unsigned long __init early_reserve_e820(unsigned long sizet,
+ unsigned long align)
+{
+ unsigned long start = 0, size = 0;
+ unsigned long addr;
+
+#ifdef CONFIG_X86_TRAMPOLINE
+ start = TRAMPOLINE_BASE;
+#endif
+ while (size < sizet)
+ start = find_e820_area_size(start, &size, align);
+
+ if (size < sizet)
+ return 0;
+
+ addr = start + size - sizet;
+
+ update_memory_range(addr, sizet, E820_RAM, E820_RESERVED);
+
+ printk(KERN_INFO "update e820 for early_reserve_e820\n");
+ update_e820();
+
+ return addr;
+}
+
/*
* Find the highest page frame number we have available
*/
Index: linux-2.6/arch/x86/kernel/mpparse.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/mpparse.c
+++ linux-2.6/arch/x86/kernel/mpparse.c
@@ -25,6 +25,7 @@
#include <asm/proto.h>
#include <asm/acpi.h>
#include <asm/bios_ebda.h>
+#include <asm/e820.h>
#include <mach_apic.h>
#ifdef CONFIG_X86_32
@@ -184,20 +185,82 @@ static void __init MP_ioapic_info(struct
nr_ioapics++;
}
-static void __init MP_intsrc_info(struct mpc_config_intsrc *m)
+static void __init ...-tip testing found a build failure with this patch: arch/x86/kernel/mpparse.c: In function 'get_MP_intsrc_index': arch/x86/kernel/mpparse.c:891: error: 'mp_irq_entries' undeclared (first use inthis function) arch/x86/kernel/mpparse.c:891: error: (Each undeclared identifier is reported only once [...] config at: http://redhat.com/~mingo/misc/config-Mon_May_19_17_38_13_CEST_2008.bad Ingo --
make mptable to be consistent to acpi routing, so we could
1. kexec kernel with acpi=off
2. workaround BIOS that acpi routing is working, but mptable is not right.
so can use kernel/kexec to start other os that doesn't have good acpi support
command line: update_mptable
v2: add alloc_mptable for mptable that can not be changed.
new command line: alloc_mptable
v3: adjust for patch that move some mp_xxx to acpi..., and mpc_ to mp_
v4: fix i386 without IO_APIC support compiling
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Index: linux-2.6/arch/x86/kernel/e820_64.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/e820_64.c
+++ linux-2.6/arch/x86/kernel/e820_64.c
@@ -243,6 +243,35 @@ unsigned long __init find_e820_area_size
return -1UL;
}
+
+/*
+ * pre allocated 4k and reserved it in e820
+ */
+unsigned long __init early_reserve_e820(unsigned long sizet,
+ unsigned long align)
+{
+ unsigned long start = 0, size = 0;
+ unsigned long addr;
+
+#ifdef CONFIG_X86_TRAMPOLINE
+ start = TRAMPOLINE_BASE;
+#endif
+ while (size < sizet)
+ start = find_e820_area_size(start, &size, align);
+
+ if (size < sizet)
+ return 0;
+
+ addr = start + size - sizet;
+
+ update_memory_range(addr, sizet, E820_RAM, E820_RESERVED);
+
+ printk(KERN_INFO "update e820 for early_reserve_e820\n");
+ update_e820();
+
+ return addr;
+}
+
/*
* Find the highest page frame number we have available
*/
Index: linux-2.6/arch/x86/kernel/mpparse.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/mpparse.c
+++ linux-2.6/arch/x86/kernel/mpparse.c
@@ -25,6 +25,7 @@
#include <asm/proto.h>
#include <asm/acpi.h>
#include <asm/bios_ebda.h>
+#include <asm/e820.h>
#include <mach_apic.h>
#ifdef CONFIG_X86_32
@@ -184,20 +185,82 @@ static void __init MP_ioapic_info(struct
nr_ioapics++;
}
-static void __init MP_intsrc_info(struct mpc_config_intsrc ...make mptable to be consistent to acpi routing, so we could
1. kexec kernel with acpi=off
2. workaround BIOS that acpi routing is working, but mptable is not right.
so can use kernel/kexec to start other os that doesn't have good acpi support
command line: update_mptable
v2: add alloc_mptable for mptable that can not be changed.
new command line: alloc_mptable
v3: adjust for patch that move some mp_xxx to acpi..., and mpc_ to mp_
v4: fix i386 without IO_APIC support compiling
v5: make it could be applied after
x86: extend e820 ealy_res support 32bit
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Index: linux-2.6/arch/x86/kernel/mpparse.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/mpparse.c
+++ linux-2.6/arch/x86/kernel/mpparse.c
@@ -25,6 +25,7 @@
#include <asm/proto.h>
#include <asm/acpi.h>
#include <asm/bios_ebda.h>
+#include <asm/e820.h>
#include <mach_apic.h>
#ifdef CONFIG_X86_32
@@ -184,20 +185,82 @@ static void __init MP_ioapic_info(struct
nr_ioapics++;
}
-static void __init MP_intsrc_info(struct mpc_config_intsrc *m)
+static void __init print_MP_intsrc_info(struct mpc_config_intsrc *m)
{
- printk(KERN_INFO "Int: type %d, pol %d, trig %d, bus %02x,"
+ printk(KERN_CONT "Int: type %d, pol %d, trig %d, bus %02x,"
" IRQ %02x, APIC ID %x, APIC INT %02x\n",
m->mpc_irqtype, m->mpc_irqflag & 3,
(m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
- mp_irqs[mp_irq_entries].mp_dstapic = m->mpc_dstapic;
- mp_irqs[mp_irq_entries].mp_type = m->mpc_type;
- mp_irqs[mp_irq_entries].mp_irqtype = m->mpc_irqtype;
- mp_irqs[mp_irq_entries].mp_irqflag = m->mpc_irqflag;
- mp_irqs[mp_irq_entries].mp_srcbus = m->mpc_srcbus;
- mp_irqs[mp_irq_entries].mp_srcbusirq = m->mpc_srcbusirq;
- mp_irqs[mp_irq_entries].mp_dstirq = m->mpc_dstirq;
+}
+
+static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq)
+{
+ printk(KERN_CONT "Int: ...make mptable to be consistent to acpi routing, so we could
1. kexec kernel with acpi=off
2. workaround BIOS that acpi routing is working, but mptable is not right.
so can use kernel/kexec to start other os that doesn't have good acpi support
command line: update_mptable
v2: add alloc_mptable for mptable that can not be changed.
new command line: alloc_mptable
v3: adjust for patch that move some mp_xxx to acpi..., and mpc_ to mp_
v4: fix i386 without IO_APIC support compiling
v5: make it could be applied after
x86: extend e820 ealy_res support 32bit
v6: 32bit support alloc_mptable too
fix ia64 compilation
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Index: linux-2.6/arch/x86/kernel/mpparse.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/mpparse.c
+++ linux-2.6/arch/x86/kernel/mpparse.c
@@ -25,6 +25,8 @@
#include <asm/proto.h>
#include <asm/acpi.h>
#include <asm/bios_ebda.h>
+#include <asm/e820.h>
+#include <asm/trampoline.h>
#include <mach_apic.h>
#ifdef CONFIG_X86_32
@@ -161,20 +163,82 @@ static void __init MP_ioapic_info(struct
nr_ioapics++;
}
-static void __init MP_intsrc_info(struct mpc_config_intsrc *m)
+static void __init print_MP_intsrc_info(struct mpc_config_intsrc *m)
{
- printk(KERN_INFO "Int: type %d, pol %d, trig %d, bus %02x,"
+ printk(KERN_CONT "Int: type %d, pol %d, trig %d, bus %02x,"
" IRQ %02x, APIC ID %x, APIC INT %02x\n",
m->mpc_irqtype, m->mpc_irqflag & 3,
(m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
- mp_irqs[mp_irq_entries].mp_dstapic = m->mpc_dstapic;
- mp_irqs[mp_irq_entries].mp_type = m->mpc_type;
- mp_irqs[mp_irq_entries].mp_irqtype = m->mpc_irqtype;
- mp_irqs[mp_irq_entries].mp_irqflag = m->mpc_irqflag;
- mp_irqs[mp_irq_entries].mp_srcbus = m->mpc_srcbus;
- mp_irqs[mp_irq_entries].mp_srcbusirq = m->mpc_srcbusirq;
- mp_irqs[mp_irq_entries].mp_dstirq = m->mpc_dstirq;
+}
+
+static ...make mptable to be consistent to acpi routing, so we could
1. kexec kernel with acpi=off
2. workaround BIOS that acpi routing is working, but mptable is not right.
so can use kernel/kexec to start other os that doesn't have good acpi support
command line: update_mptable
v2: add alloc_mptable for mptable that can not be changed.
new command line: alloc_mptable
v3: adjust for patch that move some mp_xxx to acpi..., and mpc_ to mp_
v4: fix i386 without IO_APIC support compiling
v5: make it could be applied after
x86: extend e820 ealy_res support 32bit
v6: 32bit support alloc_mptable too
fix ia64 compilation
v7: fix driver in module to call mp_config_acpi_gsi
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Index: linux-2.6/arch/x86/kernel/mpparse.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/mpparse.c
+++ linux-2.6/arch/x86/kernel/mpparse.c
@@ -25,6 +25,8 @@
#include <asm/proto.h>
#include <asm/acpi.h>
#include <asm/bios_ebda.h>
+#include <asm/e820.h>
+#include <asm/trampoline.h>
#include <mach_apic.h>
#ifdef CONFIG_X86_32
@@ -161,20 +163,81 @@ static void __init MP_ioapic_info(struct
nr_ioapics++;
}
-static void __init MP_intsrc_info(struct mpc_config_intsrc *m)
+static void print_MP_intsrc_info(struct mpc_config_intsrc *m)
{
- printk(KERN_INFO "Int: type %d, pol %d, trig %d, bus %02x,"
+ printk(KERN_CONT "Int: type %d, pol %d, trig %d, bus %02x,"
" IRQ %02x, APIC ID %x, APIC INT %02x\n",
m->mpc_irqtype, m->mpc_irqflag & 3,
(m->mpc_irqflag >> 2) & 3, m->mpc_srcbus,
m->mpc_srcbusirq, m->mpc_dstapic, m->mpc_dstirq);
- mp_irqs[mp_irq_entries].mp_dstapic = m->mpc_dstapic;
- mp_irqs[mp_irq_entries].mp_type = m->mpc_type;
- mp_irqs[mp_irq_entries].mp_irqtype = m->mpc_irqtype;
- mp_irqs[mp_irq_entries].mp_irqflag = m->mpc_irqflag;
- mp_irqs[mp_irq_entries].mp_srcbus = m->mpc_srcbus;
- mp_irqs[mp_irq_entries].mp_srcbusirq = ...[PATCH] x86: update mptable v7 - fix
need to call early_reserve_e820 to prelocate mptable for 32bit
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
Index: linux-2.6/arch/x86/kernel/setup_32.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/setup_32.c
+++ linux-2.6/arch/x86/kernel/setup_32.c
@@ -761,6 +761,8 @@ void __init setup_arch(char **cmdline_p)
*/
max_pfn = e820_end_of_ram();
+ /* pre allocte 4k for mptable mpc */
+ early_reserve_e820_mpc_new();
/* update e820 for memory not covered by WB MTRRs */
mtrr_bp_init();
if (mtrr_trim_uncached_memory(max_pfn)) {
--
applied, thanks. It needed the small build fix below as well, for UP
32-bit.
Ingo
--------------->
commit 3c803fb3e1972566f7a1613f53f7e5a70fbde83f
Author: Ingo Molnar <mingo@elte.hu>
Date: Sun May 25 21:16:06 2008 +0200
x86, mpparse: build fix
fix:
LD .tmp_vmlinux1
arch/x86/kernel/built-in.o: In function `setup_arch':
: undefined reference to `early_reserve_e820_mpc_new'
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/include/asm-x86/mpspec.h b/include/asm-x86/mpspec.h
index 6e9c958..b8ba374 100644
--- a/include/asm-x86/mpspec.h
+++ b/include/asm-x86/mpspec.h
@@ -46,7 +46,11 @@ extern unsigned long mp_lapic_addr;
extern void find_smp_config(void);
extern void get_smp_config(void);
+#ifdef CONFIG_X86_MPPARSE
extern void early_reserve_e820_mpc_new(void);
+#else
+static inline void early_reserve_e820_mpc_new(void) { }
+#endif
void __cpuinit generic_processor_info(int apicid, int version);
#ifdef CONFIG_ACPI
--
thanks. So X86_MPPARSE is not needed when SMP is not enabled? YH --
X86_MPPARSE should depend on X86_LOCAL_APIC and you can still have it enabled while !SMP. I think current dependencies express it quite clearly. Maciej --
here's the condition:
config X86_MPPARSE
def_bool y
depends on (X86_32 && (X86_LOCAL_APIC && !X86_VISWS)) || X86_64
64-bit has it unconditionally, 32-bit has it on SMP, UP-IOAPIC or
UP-local-APIC mode. [ (and VISWS is an ugly complication we should try
to eradicate similarly to NUMAQ, if possible :-) ]
Ingo
--
VISWS or VOYAGER depends on !PCI, that is some odd system. YH --
Both are long-since obsolete systems. -hpa --
Given X86_64 selects X86_LOCAL_APIC I am not sure the redundancy seen above does not actually obscure the logic behind... I think: depends on X86_LOCAL_APIC && !X86_VISWS would be clearer and get the same. Maciej --
if the system doesn't have ioapic, we don't need to store entries for mptable update Reported-by: Daniel Exner <dex@dragonslave.de> Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com> diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 04dbb35..e639d2f 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1173,6 +1173,9 @@ int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin, if (!enable_update_mptable) return 0; + if (!acpi_ioapic) + return 0; + /* print the entry should happen on mptable identically */ intsrc.mpc_type = MP_INTSRC; intsrc.mpc_irqtype = mp_INT; --
if the system doesn't have ioapic, we don't need to store entries for mptable
update
also let mp_config_acpi_gsi not call func in mpparse
so later could decouple mpparse with acpi more easily
Reported-by: Daniel Exner <dex@dragonslave.de>
Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com>
---
arch/x86/kernel/acpi/boot.c | 87 ++++++++++++++++++++++++++++----------------
arch/x86/kernel/mpparse.c | 19 ++++-----
include/asm-x86/mpspec.h | 2 -
3 files changed, 65 insertions(+), 43 deletions(-)
Index: linux-2.6/arch/x86/kernel/acpi/boot.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/acpi/boot.c
+++ linux-2.6/arch/x86/kernel/acpi/boot.c
@@ -958,10 +958,37 @@ void __init mp_register_ioapic(int id, u
nr_ioapics++;
}
+static void assign_to_mp_irq(struct mp_config_intsrc *m,
+ struct mp_config_intsrc *mp_irq)
+{
+ memcpy(mp_irq, m, sizeof(struct mp_config_intsrc));
+}
+
+static int mp_irq_cmp(struct mp_config_intsrc *mp_irq,
+ struct mp_config_intsrc *m)
+{
+ return memcmp(mp_irq, m, sizeof(struct mp_config_intsrc));
+}
+
+static void save_mp_irq(struct mp_config_intsrc *m)
+{
+ int i;
+
+ for (i = 0; i < mp_irq_entries; i++) {
+ if (!mp_irq_cmp(&mp_irqs[i], m))
+ return;
+ }
+
+ assign_to_mp_irq(m, &mp_irqs[mp_irq_entries]);
+ if (++mp_irq_entries == MAX_IRQ_SOURCES)
+ panic("Max # of irq sources exceeded!!\n");
+}
+
void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
{
int ioapic;
int pin;
+ struct mp_config_intsrc mp_irq;
/*
* Convert 'gsi' to 'ioapic.pin'.
@@ -979,18 +1006,15 @@ void __init mp_override_legacy_irq(u8 bu
if ((bus_irq == 0) && (trigger == 3))
trigger = 1;
- mp_irqs[mp_irq_entries].mp_type = MP_INTSRC;
- mp_irqs[mp_irq_entries].mp_irqtype = mp_INT;
- mp_irqs[mp_irq_entries].mp_irqflag = (trigger << 2) | polarity;
- mp_irqs[mp_irq_entries].mp_srcbus = ...Signed-off-by: Yinghai Lu <yhlu.kernel@gmail.com> --- arch/x86/Kconfig | 15 ++++++++++++++- arch/x86/Kconfig.debug | 9 --------- arch/x86/mm/k8topology_64.c | 4 ++++ 3 files changed, 18 insertions(+), 10 deletions(-) Index: linux-2.6/arch/x86/Kconfig =================================================================== --- linux-2.6.orig/arch/x86/Kconfig +++ linux-2.6/arch/x86/Kconfig @@ -233,6 +233,19 @@ config SMP If you don't know what to do here, say N. +config X86_FIND_SMP_CONFIG + def_bool y + depends on X86_MPPARSE || X86_VOYAGER || X86_VISWS + depends on X86_32 + +config X86_MPPARSE + def_bool y + bool "Enable MPS table" + depends on (X86_32 && (X86_LOCAL_APIC && !X86_VISWS)) || X86_64 + help + For old smp systems that do not have proper acpi support. Newer systems + (esp with 64bit cpus) with acpi support, MADT and DSDT will override it + choice prompt "Subarchitecture Type" default X86_PC @@ -289,7 +302,7 @@ if X86_GENERICARCH config X86_NUMAQ bool "NUMAQ (IBM/Sequent)" - depends on SMP && X86_32 && PCI + depends on SMP && X86_32 && PCI && X86_MPPARSE select NUMA help This option is used for getting Linux to run on a NUMAQ (IBM/Sequent) Index: linux-2.6/arch/x86/Kconfig.debug =================================================================== --- linux-2.6.orig/arch/x86/Kconfig.debug +++ linux-2.6/arch/x86/Kconfig.debug @@ -130,15 +130,6 @@ config 4KSTACKS on the VM subsystem for higher order allocations. This option will also use IRQ stacks to compensate for the reduced stackspace. -config X86_FIND_SMP_CONFIG - def_bool y - depends on X86_MPPARSE || X86_VOYAGER || X86_VISWS - depends on X86_32 - -config X86_MPPARSE - def_bool y - depends on (X86_32 && (X86_LOCAL_APIC && !X86_VISWS)) || X86_64 - config DOUBLEFAULT default y bool "Enable doublefault exception handler" if EMBEDDED Index: ...
applied to tip/x86/mpparse, thanks. Ingo --
-tip testing found an early 64-bit bootup crash: SMP: Allowing 0 CPUs, 0 hotplug CPUs PANIC: early exception 0e rip 10:ffffffff804aeb32 error 2 cr2 ffff810040000000 Pid: 0, comm: swapper Not tainted 2.6.26-rc6 #8188 Call Trace: [<ffffffff80233f59>] ? printk+0x77/0x98 [<ffffffff80d78196>] early_idt_handler+0x56/0x6a [<ffffffff804aeb32>] ? memset+0x32/0xc0 [<ffffffff80d88026>] ? __alloc_bootmem_core+0x1ce/0x372 [<ffffffff80d88343>] __alloc_bootmem_nopanic+0x35/0x60 [<ffffffff80d8837f>] __alloc_bootmem+0x11/0x3a [<ffffffff80d7d329>] setup_per_cpu_areas+0x8d/0x249 [<ffffffff80d78d61>] start_kernel+0x175/0x3a4 [<ffffffff80d78365>] x86_64_start_kernel+0x165/0x16e RIP 0x10 with this config (1GB RAM system): http://redhat.com/~mingo/misc/config-Thu_Jun_19_14_22_37_CEST_2008.bad so i reverted this patch for now. Ingo --
First of all you want to make sure at least one of ACPI and X86_MPPARSE is enabled if X86_LOCAL_APIC or you risk a known-broken kernel configuration, e.g. SMP which has no slightest chance to work. Personally I'd be happy to see CONFIG_ACPI_BOOT we used to have at one point back just so that you can use ACPI tables to run an SMP system without the need to pull all the power management stuff. Useful if the MP table is broken beyond recovery. I am assuming it has been removed for a reason though. Maciej --
thanks. will try to add CONFIG_ACPI_BOOT... YH --
NAK. CONFIG_ACPI_BOOT was removed because it was fundamentally ill-conceived and created a situation which was not only more difficult to maintain but also didn't work on most machines. ACPI interrupt configuration depends on the ACPI interpreter, so to boot properly and configure interrupts with ACPI, you need 90% of the kernel's ACPI code present anyway. The other 10% are the Linux policy drivers, fan, processor, etc, and those can be already be de-configured one by one if desired. If you want to use ACPI just for enumerating processors, ie to see the HT that MPS usually doesn't include, you can boot with "acpi=ht", which will not enter ACPI mode or use ACPI for anything else. We used to have a compile option for this, but most people who used it did so by mistake and then complained that all sorts of things, (starting with the power button) didn't work, so it was removed. I don't think we should be going out of our way to enhance MPS support. There probably isn't a single system shipped in this century that has MPS that doesen't have ACPI, while there are millions of systems that have ACPI and no MPS. MPS is going away, and making it a config option prepares us for the day when we completely don't care about it any more. However, I think that adding CONFIG_MPS before removing ACPI's depencency on mpparse.c has all risk and no value. thanks, -Len --
Hmm, that's quite obscure an option name! I would imagine most modern systems used as servers would not want to do any power management, but would still prefer to use ACPI for enumeration of processors (including real ones!) and interrupts, because I gather it has become common if an MP table is included in a system at all, it is not exactly correct, because the responsible BIOS engineer simply had no clue to either fix it or It looks like I have a fortunate exception, manufactured Dec 2007, which Well, this is why I think it is important to be able to drop unwanted parts of the framework, such as the P from the ACPI acronym. If you say "acpi=ht" will do, that's great; otherwise relying on the MP table used to be the alternative. I don't think we'll be able to drop MP table support entirely in the foreseeable future though, like we haven't dropped support for the original 80386 yet. Old SMP systems with MP table support only are going to be around for a while -- I have a couple myself and I am sure they are Agreed. Maciej --
that should be sorted out mostly in -tip/x86, Alexey Starikovskiy went
through a marathon session decoupling them, about two months ago:
$ git-shortlog --stat --author=astarikovskiy linus/master..tip/master
Alexey Starikovskiy (12):
x86: move es7000_plat out of mpparse.c
x86: complete move ACPI from mpparse.c
x86: make mp_ioapic_routing definition local
x86: make struct config_ioapic not MPspec specific
x86: make config_irqsrc not MPspec specific
x86: fix mpparse/acpi interaction
x86: Set pic_mode only if local apic code is present
x86: move pic_mode to apic_32.c
x86: move smp_found_config
x86: mp_bus_id_to_pci_bus is not needed
x86: move mp_bus_not_pci from mpparse.c
x86: allow MPPARSE to be deselected in SMP configs
22 files changed, 524 insertions(+), 501 deletions(-)
it's all in linux-next as well.
Ingo
--
applied to tip/x86/mpparse - thanks Yinghai. Ingo --
Is this an effort to boot an ACPI-mode kernel, and then kexec a non-ACPI kernel? Doing so could confuse the heck out of the platform firmware, which will think that an ACPI-mode kernel is still running. Note that it is a historic artifact, now considered a bug, that ACPI uses the MPS code. We should be divorcing these two bodies of code rather than mixing them further. -Len --
how about adding config option to not compile mptable related info? YH --
Why is this feature needed? There are a number of ways that the resulting kernel may fail, That's the idea. CONFIG_MPS=n CONFIG_ACPI=y should build and run on every PC built in this century. This was prototyped a long while back, but the tree has churned so much since then the old prototype is worthless. -Len --
other os still doesn't have update acpi irq routing support. but has will look at it tonight. when EFI is popular, there is no reason to compile mps related stuff in kernel again. esp for 64 bit. BTW, it is funny that Suse still has fallsafe boot entry with acpi=off. YH --
Which is at least in part a reason to go back to the BIOS manufacturer and get them to fix their table. I can see a warning coming from the kernel if these two tables are inconsistent ? EFI has nothing to do with this. ACPI is popular today and EFI preserves Silly question. Given that even writing to this table is platform specific. Any chance we can do this any a userspace utility writing through /dev/mem for the systems that need it? We can even bundle the utility in the kexec-tools package to make it easier to distribute. Eric --
On Wed, Jun 18, 2008 at 9:32 PM, Eric W. Biederman interesting, never thought that. need to call pci_acpi_enable_irq==>mp_config_acpi_gsi in kernel to save mp_irqs... also we could use old kernel kexec second kernel (with this patch and acpi, and uptable) and then use second kernel to kexec last kernel... YH --
I would prefer this to an in-kernel solution. I really feel this is too ugly to live. -hpa --
Again, there may not *be* an MPS table, and if there is but the interrupt links are programmable, the MPS table may have very little in common with the state of the machine in ACPI mode. I'm sorry, kexec continues to sound like science fiction to me. I don't understand why scribbling on upstream Linux in the name of science fiction makes any sense. I just don't get it. -Len --
In the common case if not in general the MP table and the ACPI version of the same table, provide the same data in slight different In the normal kexec case not the kexec on panic (aka kdump) we should have shutdown ACPI on the way down. So the machine won't be running in ACPI mode. I assume ACPI supports that. What YH is doing does sound potentially dangerous. If you can indeed compare the two tables and in fact see they are inconsistent. That is a good case for printing a warning message. YH clearly started this because in his testing the MP table was broken and he had an older Enterprise kernel to run that had unusable ACPI support. That however is a BIOS bug. Pushing back on BIOS bugs and making them easy to find is always a good deal. Silently fixing them (not just working around them) seems unprecedented. Eric --
On Wed, Jun 18, 2008 at 11:37 PM, Eric W. Biederman the patch did print out the old and updated mptable. and that feature is enabled via "update_mptable" command line. also it is useful for kexec from acpi kernel to no acpi kernel on MB with nvidia chipset with AMI BIOS. becuase irq routing is different from mptable and acpi. amd8111/amd8131 chipset with AMI bios has same irq routing from mptable and acpi. Phonix BIOS, has one setup option APIC/PIC, and if select APIC, then irq routing from mptable and acpi is the same with nvidia chipset. YH --
So you know this can't NEVER be reliable, yet you continue to push for upstream changes that carry risk for all systems that DO work. Please clue me into the use model that justifies risking a single line of Linux code for this effort. -Len --
Yinghai: Len has a point, could you please double-check that no potentially risky code runs by default anywhere, without a specific boot option? update_mptable is supposed to achieve that, but lets make sure it does its job. I.e. the option should be like acpi=off - risky to do as it could break some systems, and it should not run by default. I have 3 systems myself where acpi=off results in a non-working system (hard hangs, non-detected devices, etc.). Len: while i probably wont use the update_mptable option myself (i wont ever want to kexec into an OS with broken ACPI support), Yinghai's changes nevertheless have some use as they solve an external problem, and they also clean up a crufty area of code. Due to them we already found pre-existing bugs. Ingo --
will double check that, and would consider to add CONFIG_UPDATE_MPTABLE to make it not be built in be default. YH --
I don't see how this can work. What if the the platform doesn't suport MPS -- the 2nd OS will try to run in PIC mode and will see only the legacy interrupts? What if it does support MPS but ACPI has configured its PCI Interrupt Link Devices to direct interrupts There is state in the chip-set that the ACPI OS has set that makes no sense in an non-ACPI context and can lead to bizzare behaviour. The Links above is just one example. the ACPI SCI interrupt is configured in ACPI mode only. What happens when the non-ACPI OS boots and GPE's fire and result in SCI's? They'll not get serviced and will kill that IRQ and anything else on it. Can somebody clue me into why this concept is something other than totally insane? -Len --
if the system doesn't support mps, this patch will bail out, because it will use irq from mptable that is updated according to acpi when acpi irq set routing, and it will set some bits in pci config in southbridge, and the irq routing is consistent between HW and irq returned by acpi. when first kernel is shutdown, it doesn't restore the bits about irq routing in pci config. So if keep updated mptable has correct pin, then device on second the patch doesn't change entries in mptable that pin < 16 and irq is not for pci device... and normal sci/acpi is using 9..., and even mptable even doesn't has that entry. and there is no pci device share that irq with sci/acpi..., so it will be put into updated mptable. for system that have 6 pcie slots and more, with full populated cards. when boot with mptable, BIOS will use irq < 16 for all pci devices, and several devices will share same irq. but when acpi is enabled, irqs are spanned all over. also when pci card with several pci bridges is plugged in, the mptable is totally messed up, some slot will work, and some doesn't work. YH --
And the reason to worry about supporting non-ACPI on such hardware is what? -hpa --
1. some other os doesn't support acpi well, and still use mptable. 2. show BIOS engineer how to fix mptable. YH --
