> Recently a kdump bug was discovered in which a system would hang inside
> calibrate_delay during the booting of the kdump kernel. This was caused by the
> fact that the jiffies counter was not being incremented during timer
> calibration. The root cause of this problem was found to be a bios
> misconfiguration of the hypertransport bus. On system affected by this hang,
> the bios had assigned APIC ids which used extended apic bits (more than the
> nominal 4 bit ids's), but failed to configure bit 17 of the hypertransport
> transaction config register, which indicated that the mask for the destination
> field of interrupt packets accross the ht bus (see section 3.3.9 of
>
http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/26094.PDF).
> If a crash occurs on a cpu with an APIC id that extends beyond 4 bits, it will
> not recieve interrupts during the kdump kernel boot, and this hang will be the
> result. The fix is to add this patch, whcih add an early pci quirk check, to
> forcibly enable this bit in the httcfg register. This enables all cpus on a
> system to receive interrupts, and allows kdump kernel bootup to procede
> normally.
>
> Regards
> Neil
>
>
> Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
>
>
> early-quirks.c | 90 +++++++++++++++++++++++++++++++++++++++++++--------------
> 1 file changed, 69 insertions(+), 21 deletions(-)
>
>
> diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
> index 88bb83e..c0d0c69 100644
> --- a/arch/x86/kernel/early-quirks.c
> +++ b/arch/x86/kernel/early-quirks.c
> @@ -21,8 +21,36 @@
> #include <asm/gart.h>
> #endif
>
> -static void __init via_bugs(void)
> +static void __init fix_hypertransport_config(int num, int slot, int func)
> {
> + u32 htcfg;
> + /*
> + *we found a hypertransport bus
> + *make sure that are broadcasting
> + *interrupts to all cpus on the ht bus
> + *if we're using extended apic ids
> + */
> + htcfg = read_pci_config(num, slot, func, 0x68);
> + if (htcfg & (1 << 18)) {
> + printk(KERN_INFO "Detected use of extended apic ids on hypertransport bus\n");
> + if ((htcfg & (1 << 17)) == 0) {
> + printk(KERN_INFO "Enabling hypertransport extended apic interrupt broadcast\n");
> + printk(KERN_INFO "Note this is a bios bug, please contact your hw vendor\n");
> + htcfg |= (1 << 17);
> + write_pci_config(num, slot, func, 0x68, htcfg);
> + }
> + }
> +
> +
> +}
> +
> +static void __init via_bugs(int num, int slot, int func)
> +{
> + static int fix_applied = 0;
> +
> + if (fix_applied++)
> + return;
> +
> #ifdef CONFIG_GART_IOMMU
> if ((end_pfn > MAX_DMA32_PFN || force_iommu) &&
> !gart_iommu_aperture_allowed) {
> @@ -44,8 +72,13 @@ static int __init nvidia_hpet_check(struct acpi_table_header *header)
> #endif /* CONFIG_X86_IO_APIC */
> #endif /* CONFIG_ACPI */
>
> -static void __init nvidia_bugs(void)
> +static void __init nvidia_bugs(int num, int slot, int func)
> {
> + static int fix_applied = 0;
> +
> + if (fix_applied++)
> + return;
> +
> #ifdef CONFIG_ACPI
> #ifdef CONFIG_X86_IO_APIC
> /*
> @@ -72,8 +105,13 @@ static void __init nvidia_bugs(void)
>
> }
>
> -static void __init ati_bugs(void)
> +static void __init ati_bugs(int num, int slot, int func)
> {
> + static int fix_applied = 0;
> +
> + if (fix_applied++)
> + return;
> +
> #ifdef CONFIG_X86_IO_APIC
> if (timer_over_8254 == 1) {
> timer_over_8254 = 0;
> @@ -84,14 +122,18 @@ static void __init ati_bugs(void)
> }
>
> struct chipset {
> - u16 vendor;
> - void (*f)(void);
> + u32 vendor;
> + u32 device;
> + u32 class;
> + u32 class_mask;
> + void (*f)(int num, int slot, int func);
> };
>
> static struct chipset early_qrk[] __initdata = {
> - { PCI_VENDOR_ID_NVIDIA, nvidia_bugs },
> - { PCI_VENDOR_ID_VIA, via_bugs },
> - { PCI_VENDOR_ID_ATI, ati_bugs },
> + { PCI_VENDOR_ID_NVIDIA, PCI_ANY_ID, PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, nvidia_bugs },
> + { PCI_VENDOR_ID_VIA, PCI_ANY_ID, PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, via_bugs },
> + { PCI_VENDOR_ID_ATI, PCI_ANY_ID, PCI_CLASS_BRIDGE_PCI, PCI_ANY_ID, ati_bugs },
> + { PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB, PCI_CLASS_BRIDGE_HOST, PCI_ANY_ID, fix_hypertransport_config },
> {}
> };
>
> @@ -106,27 +148,33 @@ void __init early_quirks(void)
> for (num = 0; num < 32; num++) {
> for (slot = 0; slot < 32; slot++) {
> for (func = 0; func < 8; func++) {
> - u32 class;
> - u32 vendor;
> + u16 class;
> + u16 vendor;
> + u16 device;
> u8 type;
> int i;
> - class = read_pci_config(num,slot,func,
> +
> + class = read_pci_config_16(num,slot,func,
> PCI_CLASS_REVISION);
> - if (class == 0xffffffff)
> + if (class == 0xffff)
> break;
>
> - if ((class >> 16) != PCI_CLASS_BRIDGE_PCI)
> - continue;
> -
> - vendor = read_pci_config(num, slot, func,
> + vendor = read_pci_config_16(num, slot, func,
> PCI_VENDOR_ID);
> - vendor &= 0xffff;
>
> - for (i = 0; early_qrk[i].f; i++)
> - if (early_qrk[i].vendor == vendor) {
> - early_qrk[i].f();
> - return;
> + device = read_pci_config_16(num, slot, func,
> + PCI_DEVICE_ID);
> +
> + for(i=0;early_qrk[i].f != NULL;i++) {
> + if (((early_qrk[i].vendor == PCI_ANY_ID) ||
> + (early_qrk[i].vendor == vendor)) &&
> + ((early_qrk[i].device == PCI_ANY_ID) ||
> + (early_qrk[i].device == device)) &&
> + (!((early_qrk[i].class ^ class) &
> + early_qrk[i].class_mask))) {
> + early_qrk[i].f(num, slot, func);
> }
> + }
>
> type = read_pci_config_byte(num, slot, func,
> PCI_HEADER_TYPE);