Hi Robin,
* Robin Holt <holt@sgi.com>:
Here's the disassembly of sn2_send_IPI:
(gdb) disass sn2_send_IPI
Dump of assembler code for function sn2_send_IPI:
0xa000000100633f80 <sn2_send_IPI+0>: [MMI] alloc r39=ar.pfs,17,9,0
0xa000000100633f81 <sn2_send_IPI+1>: adds r12=-160,r12
0xa000000100633f82 <sn2_send_IPI+2>: mov r38=b0
0xa000000100633f90 <sn2_send_IPI+16>: [MMI] addl r19=-1557544,r1
0xa000000100633f91 <sn2_send_IPI+17>: mov r40=r1
0xa000000100633f92 <sn2_send_IPI+18>: sxt4 r20=r32
0xa000000100633fa0 <sn2_send_IPI+32>: [MMI] mov r21=-64480
0xa000000100633fa1 <sn2_send_IPI+33>: nop.m 0x0
0xa000000100633fa2 <sn2_send_IPI+34>: mov r10=-33312
0xa000000100633fb0 <sn2_send_IPI+48>: [MMI] nop.m 0x0;;
0xa000000100633fb1 <sn2_send_IPI+49>: ld8 r16=[r21]
0xa000000100633fb2 <sn2_send_IPI+50>: shladd r8=r20,2,r0
0xa000000100633fc0 <sn2_send_IPI+64>: [MII] mov r18=r19
0xa000000100633fc1 <sn2_send_IPI+65>: adds r41=48,r12;;
0xa000000100633fc2 <sn2_send_IPI+66>: add r17=r8,r18;;
0xa000000100633fd0 <sn2_send_IPI+80>: [MII] ld4.acq r11=[r17]
0xa000000100633fd1 <sn2_send_IPI+81>: nop.i 0x0;;
0xa000000100633fd2 <sn2_send_IPI+82>: sxt4 r37=r11;;
0xa000000100633fe0 <sn2_send_IPI+96>: [MMI] add r15=r10,r16;;
0xa000000100633fe1 <sn2_send_IPI+97>: ld8 r9=[r15]
0xa000000100633fe2 <sn2_send_IPI+98>: nop.i 0x0;;
0xa000000100633ff0 <sn2_send_IPI+112>: [MII] nop.m 0x0
0xa000000100633ff1 <sn2_send_IPI+113>: add r3=r8,r9;;
0xa000000100633ff2 <sn2_send_IPI+114>: addl r2=17592,r3;;
0xa000000100634000 <sn2_send_IPI+128>: [MMI] ld2 r3=[r2];;
Looks like we're dying on this access above ^^
0xa000000100634001 <sn2_send_IPI+129>: nop.m 0x0
0xa000000100634002 <sn2_send_IPI+130>: sxt2 r14=r3;;
0xa000000100634010 <sn2_send_IPI+144>: [MIB] mov r32=r14
0xa000000100634011 <sn2_send_IPI+145>: cmp4.eq p7,p6=-1,r14
My guess something bad is happening when we try this:
nasid = cpuid_to_nasid(cpuid);
And include/asm-ia64/sn/sn_cpuid.h says:
#define cpuid_to_nasid(cpuid) (sn_nodepda->phys_cpuid[cpuid].nasid)
Are we calling sn2_send_IPI too early? Do we have to do some sort
of special initialization before sn_nodepda is valid? It all
*looks* like we should be fine because we do
cpu_init()
platform_cpu_init()
sn_cpu_init()
Before calling check_sal_cache_flush()... Very curious.
Can you try the debug patch included below?
Thanks.
/ac
diff --git a/arch/ia64/sn/kernel/setup.c b/arch/ia64/sn/kernel/setup.c
index bb1d249..a6a0be5 100644
--- a/arch/ia64/sn/kernel/setup.c
+++ b/arch/ia64/sn/kernel/setup.c
@@ -627,13 +627,18 @@ void __cpuinit sn_cpu_init(void)
nodepdaindr[i]->phys_cpuid[cpuid].nasid = nasid;
nodepdaindr[i]->phys_cpuid[cpuid].slice = slice;
nodepdaindr[i]->phys_cpuid[cpuid].subnode = subnode;
+ printk(KERN_INFO "nodepdaindr[%d]->phys_cpuid[%d] - nasid %d slice %d subnode %d\n", i, cpuid, nasid, slice, subnode);
}
}
cnode = nasid_to_cnodeid(nasid);
+ printk(KERN_INFO "cnode %d\n", cnode);
+
sn_nodepda = nodepdaindr[cnode];
+ printk(KERN_INFO "sn_nodepda 0x%p\n", sn_nodepda);
+
pda->led_address =
(typeof(pda->led_address)) (LED0 + (slice << LED_CPU_SHIFT));
pda->led_state = LED_ALWAYS_SET;
--