Re: [BISECT] Boot failure on ia64.

!MAILaRCHIVE_VOTE_RePLACE
Previous message: [thread] [date] [author]
Next message: [thread] [date] [author]
To: Robin Holt <holt@...>
Cc: <tony.luck@...>, <linux-kernel@...>
Date: Tuesday, June 24, 2008 - 11:08 am

Hi Robin,

* Robin Holt <holt@sgi.com>:

Here's the disassembly of sn2_send_IPI:

(gdb) disass sn2_send_IPI
Dump of assembler code for function sn2_send_IPI:
0xa000000100633f80 <sn2_send_IPI+0>:    [MMI]       alloc r39=ar.pfs,17,9,0
0xa000000100633f81 <sn2_send_IPI+1>:                adds r12=-160,r12
0xa000000100633f82 <sn2_send_IPI+2>:                mov r38=b0
0xa000000100633f90 <sn2_send_IPI+16>:   [MMI]       addl r19=-1557544,r1
0xa000000100633f91 <sn2_send_IPI+17>:               mov r40=r1
0xa000000100633f92 <sn2_send_IPI+18>:               sxt4 r20=r32
0xa000000100633fa0 <sn2_send_IPI+32>:   [MMI]       mov r21=-64480
0xa000000100633fa1 <sn2_send_IPI+33>:               nop.m 0x0
0xa000000100633fa2 <sn2_send_IPI+34>:               mov r10=-33312
0xa000000100633fb0 <sn2_send_IPI+48>:   [MMI]       nop.m 0x0;;
0xa000000100633fb1 <sn2_send_IPI+49>:               ld8 r16=[r21]
0xa000000100633fb2 <sn2_send_IPI+50>:               shladd r8=r20,2,r0
0xa000000100633fc0 <sn2_send_IPI+64>:   [MII]       mov r18=r19
0xa000000100633fc1 <sn2_send_IPI+65>:               adds r41=48,r12;;
0xa000000100633fc2 <sn2_send_IPI+66>:               add r17=r8,r18;;
0xa000000100633fd0 <sn2_send_IPI+80>:   [MII]       ld4.acq r11=[r17]
0xa000000100633fd1 <sn2_send_IPI+81>:               nop.i 0x0;;
0xa000000100633fd2 <sn2_send_IPI+82>:               sxt4 r37=r11;;
0xa000000100633fe0 <sn2_send_IPI+96>:   [MMI]       add r15=r10,r16;;
0xa000000100633fe1 <sn2_send_IPI+97>:               ld8 r9=[r15]
0xa000000100633fe2 <sn2_send_IPI+98>:               nop.i 0x0;;
0xa000000100633ff0 <sn2_send_IPI+112>:  [MII]       nop.m 0x0
0xa000000100633ff1 <sn2_send_IPI+113>:              add r3=r8,r9;;
0xa000000100633ff2 <sn2_send_IPI+114>:              addl r2=17592,r3;;
0xa000000100634000 <sn2_send_IPI+128>:  [MMI]       ld2 r3=[r2];;

Looks like we're dying on this access above ^^

0xa000000100634001 <sn2_send_IPI+129>:              nop.m 0x0
0xa000000100634002 <sn2_send_IPI+130>:              sxt2 r14=r3;;
0xa000000100634010 <sn2_send_IPI+144>:  [MIB]       mov r32=r14
0xa000000100634011 <sn2_send_IPI+145>:              cmp4.eq p7,p6=-1,r14

My guess something bad is happening when we try this:

	nasid = cpuid_to_nasid(cpuid);

And include/asm-ia64/sn/sn_cpuid.h says:

#define cpuid_to_nasid(cpuid)           (sn_nodepda->phys_cpuid[cpuid].nasid)

Are we calling sn2_send_IPI too early? Do we have to do some sort
of special initialization before sn_nodepda is valid? It all
*looks* like we should be fine because we do

	cpu_init()
	platform_cpu_init()
	sn_cpu_init()

Before calling check_sal_cache_flush()... Very curious.

Can you try the debug patch included below?

Thanks.

/ac

diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c
index bb1d249..a6a0be5 100644
--- a/arch/ia64/sn/kernel/setup.c
+++ b/arch/ia64/sn/kernel/setup.c
@@ -627,13 +627,18 @@ void __cpuinit sn_cpu_init(void)
 			nodepdaindr[i]->phys_cpuid[cpuid].nasid = nasid;
 			nodepdaindr[i]->phys_cpuid[cpuid].slice = slice;
 			nodepdaindr[i]->phys_cpuid[cpuid].subnode = subnode;
+			printk(KERN_INFO "nodepdaindr[%d]->phys_cpuid[%d] - nasid %d slice %d subnode %d\n", i, cpuid, nasid, slice, subnode);
 		}
 	}
 
 	cnode = nasid_to_cnodeid(nasid);
 
+	printk(KERN_INFO "cnode %d\n", cnode);
+
 	sn_nodepda = nodepdaindr[cnode];
 
+	printk(KERN_INFO "sn_nodepda 0x%p\n", sn_nodepda);
+
 	pda->led_address =
 	    (typeof(pda->led_address)) (LED0 + (slice << LED_CPU_SHIFT));
 	pda->led_state = LED_ALWAYS_SET;
--
Previous message: [thread] [date] [author]
Next message: [thread] [date] [author]

Messages in current thread:
[BISECT] Boot failure on ia64., Robin Holt, (Tue Jun 24, 8:30 am)
Re: [BISECT] Boot failure on ia64., Alex Chiang, (Tue Jun 24, 11:08 am)
Re: [BISECT] Boot failure on ia64., Robin Holt, (Tue Jun 24, 11:26 am)
Re: [BISECT] Boot failure on ia64., Alex Chiang, (Tue Jun 24, 11:34 am)
Re: [BISECT] Boot failure on ia64., Robin Holt, (Tue Jun 24, 11:43 am)
Re: [BISECT] Boot failure on ia64., Robin Holt, (Tue Jun 24, 11:17 am)
Re: [BISECT] Boot failure on ia64., Alex Chiang, (Tue Jun 24, 11:21 am)
Re: [BISECT] Boot failure on ia64., Luming Yu, (Tue Jun 24, 9:41 am)