Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AST2600 SMP stuck when crashing #190

Open
shenki opened this issue Apr 14, 2020 · 0 comments
Open

AST2600 SMP stuck when crashing #190

shenki opened this issue Apr 14, 2020 · 0 comments

Comments

@shenki
Copy link
Member

shenki commented Apr 14, 2020

When Linux is running in SMP mode and we crash in a specific way the kernel gets stuck. One cpu sits in the idle loop, while the crashing CPU is stuck on a WFE.

This can be reproduced by the following in fsi-master-aspeed.c:

@@ -397,7 +397,7 @@ static int aspeed_master_init(struct fsi_master_aspeed *aspeed)
                        | fsi_mmode_crs0(fsi_divisor)
                        | fsi_mmode_crs1(fsi_divisor)
                        | FSI_MMODE_P8_TO_LSB);
+       printk("%s: mmode set to %08x (divisor %d)\n",
                        be32_to_cpu(reg), fsi_divisor);

o

(gdb) info threads 
  Id   Target Id                    Frame 
* 1    Thread 1.1 (CPU#0 [halted ]) cpu_v7_do_idle () at ../arch/arm/mm/proc-v7.S:78
  2    Thread 1.2 (CPU#1 [running]) arch_spin_lock (lock=<optimized out>) at ../arch/arm/include/asm/spinlock.h:74
(gdb) thread 2
[Switching to thread 2 (Thread 1.2)]
#0  arch_spin_lock (lock=<optimized out>) at ../arch/arm/include/asm/spinlock.h:74
74			wfe();
(gdb) bt
#0  arch_spin_lock (lock=<optimized out>) at ../arch/arm/include/asm/spinlock.h:74
#1  do_raw_spin_lock (lock=<optimized out>) at ../include/linux/spinlock.h:181
#2  __raw_spin_lock (lock=<optimized out>) at ../include/linux/spinlock_api_smp.h:143
#3  _raw_spin_lock (lock=0x80c7c04c <logbuf_lock>) at ../kernel/locking/spinlock.c:151
#4  0x801758bc in console_unlock () at ../kernel/printk/printk.c:2414
#5  0x80175edc in console_unblank () at ../kernel/printk/printk.c:2544
#6  0x8044e020 in bust_spinlocks (yes=-2134392756) at ../lib/bust_spinlocks.c:28
#7  0x8010dcec in oops_end (signr=<optimized out>, regs=<optimized out>, flags=<optimized out>) at ../arch/arm/kernel/traps.c:326
#8  die (str=0xb <error: Cannot access memory at address 0xb>, regs=0xbd10bb28, err=5) at ../arch/arm/kernel/traps.c:361
#9  0x80116a34 in __do_kernel_fault (mm=0x0, addr=1879310608, fsr=5, regs=0xbd10bb28) at ../arch/arm/mm/fault.c:125
#10 0x80116da0 in __do_kernel_fault (regs=<optimized out>, fsr=<optimized out>, addr=<optimized out>, mm=<optimized out>) at ../arch/arm/mm/fault.c:352
#11 do_page_fault (addr=3171990312, fsr=5, regs=0xbd10bb28) at ../arch/arm/mm/fault.c:376
#12 0x80116f04 in do_translation_fault (addr=639, fsr=2148626180, regs=0xbd10b9c0) at ../arch/arm/mm/fault.c:415
#13 0x80116f58 in do_DataAbort (addr=1879310608, fsr=5, regs=0xbd10bb28) at ../arch/arm/mm/fault.c:533
#14 0x80101a00 in __dabt_svc () at ../arch/arm/kernel/entry-armv.S:203
#15 0x80101a00 in __dabt_svc () at ../arch/arm/kernel/entry-armv.S:203
#16 0x80101a00 in __dabt_svc () at ../arch/arm/kernel/entry-armv.S:203
#17 0x80101a00 in __dabt_svc () at ../arch/arm/kernel/entry-armv.S:203
#18 0x80101a00 in __dabt_svc () at ../arch/arm/kernel/entry-armv.S:203
#19 0x80101a00 in __dabt_svc () at ../arch/arm/kernel/entry-armv.S:203
#20 0x80101a00 in __dabt_svc () at ../arch/arm/kernel/entry-armv.S:203
#21 0x80101a00 in __dabt_svc () at ../arch/arm/kernel/entry-armv.S:203
#22 0x80101a00 in __dabt_svc () at ../arch/arm/kernel/entry-armv.S:203
#23 0x80101a00 in __dabt_svc () at ../arch/arm/kernel/entry-armv.S:203
#24 0x80101a00 in __dabt_svc () at ../arch/arm/kernel/entry-armv.S:203
#11 do_page_fault (addr=3171990312, fsr=5, regs=0xbd10bb28) at ../arch/arm/mm/fault.c:376
        tsk = 0xbd109840
        mm = 0x0
        sig = <optimized out>
        code = 639
        fault = <optimized out>
        flags = <optimized out>
#12 0x80116f04 in do_translation_fault (addr=639, fsr=2148626180, regs=0xbd10b9c0) at ../arch/arm/mm/fault.c:415
        index = <optimized out>
        pgd = 0x80c7c04c <logbuf_lock>
        pgd_k = 0x5
        pud = <optimized out>
        pud_k = <optimized out>
        pmd = <optimized out>
        pmd_k = <optimized out>
#13 0x80116f58 in do_DataAbort (addr=1879310608, fsr=5, regs=0xbd10bb28) at ../arch/arm/mm/fault.c:533
        inf = 0x80c0d51c <fsr_info+80>

stepping through the code, we get far enough through kernel/printk/printk_safe.c:352 to call arch_local_irq_save () at ../arch/arm/include/asm/irqflags.h:29``` which disables IRQs. However we then get to the poin where we arch_local_irq_restore` (it appears?)

arch_local_irq_restore (flags=<optimized out>) at ../arch/arm/include/asm/irqflags.h:171
171		asm volatile(
(gdb) 
__printk_safe_enter () at ../kernel/printk/printk_safe.c:352
352		this_cpu_inc(printk_context);
(gdb) 
vprintk_emit (facility=0, level=-1, dict=0x0, dictlen=0, fmt=0x80a87ec0 "%s: mmode set to %08x (divisor %d)\n", args=<incomplete type>) at ../kernel/printk/printk.c:1976
1976		logbuf_lock_irqsave(flags);
(gdb) 
0x80176558	1976		logbuf_lock_irqsave(flags);
(gdb) 
_raw_spin_lock (lock=0x80c7c04c <logbuf_lock>) at ../kernel/locking/spinlock.c:150
(gdb) 
__printk_safe_enter () at ../kernel/printk/printk_safe.c:352
352		this_cpu_inc(printk_context);
(gdb) 
vprintk_emit (facility=0, level=-1, dict=0x0, dictlen=0, fmt=0x80a87ec0 "%s: mmode set to %08x (divisor %d)\n", args=<incomplete type>) at ../kernel/printk/printk.c:1976
1976		logbuf_lock_irqsave(flags);
(gdb) 
0x80176558	1976		logbuf_lock_irqsave(flags);
(gdb) 
_raw_spin_lock (lock=0x80c7c04c <logbuf_lock>) at ../kernel/locking/spinlock.c:150
150	{
(gdb) 
0x8086720c	150	{
(gdb) x/10i $pc
=> 0x8086720c <_raw_spin_lock+4>:	push	{r11, r12, lr, pc}
   0x80867210 <_raw_spin_lock+8>:	sub	r11, r12, #4
   0x80867214 <_raw_spin_lock+12>:	push	{lr}		; (str lr, [sp, #-4]!)
   0x80867218 <_raw_spin_lock+16>:	ldmfd	sp!, {lr}
   0x8086721c <_raw_spin_lock+20>:	pld	[r0]
   0x80867220 <_raw_spin_lock+24>:	ldrex	r2, [r0]
   0x80867224 <_raw_spin_lock+28>:	add	r3, r2, #65536	; 0x10000
   0x80867228 <_raw_spin_lock+32>:	strex	r1, r3, [r0]
   0x8086722c <_raw_spin_lock+36>:	teq	r1, #0
   0x80867230 <_raw_spin_lock+40>:	bne	0x80867220 <_raw_spin_lock+24>
(gdb) si
0x80867210	150	{
(gdb) 
0x80867214	150	{
(gdb) 
0x80867218	150	{
(gdb) 
arch_spin_lock (lock=<optimized out>) at ../arch/arm/include/asm/spinlock.h:62
62		prefetchw(&lock->slock);
(gdb) 
63		__asm__ __volatile__(
(gdb) 
0x80867224	63		__asm__ __volatile__(
(gdb) 
0x80867228	63		__asm__ __volatile__(
(gdb) 
0x8086722c	63		__asm__ __volatile__(
(gdb) 
0x80867230	63		__asm__ __volatile__(
(gdb) 
0x80867234	63		__asm__ __volatile__(
(gdb) 
0x80867238	63		__asm__ __volatile__(
(gdb) 
0x8086723c	63		__asm__ __volatile__(
(gdb) 
73		while (lockval.tickets.next != lockval.tickets.owner) {
(gdb) 
78		smp_mb();
(gdb) 
0x80867258	78		smp_mb();
(gdb) 
_raw_spin_lock (lock=0x80c7c04c <logbuf_lock>) at ../include/linux/spinlock.h:182
182		mmiowb_spin_lock();
(gdb) 
vprintk_emit (facility=0, level=-1, dict=0x0, dictlen=0, fmt=0x80a87ec0 "%s: mmode set to %08x (divisor %d)\n", args=<incomplete type>) at ../kernel/printk/printk.c:1977
1977		curr_log_seq = log_next_seq;
(gdb) 
0x80176560	1977		curr_log_seq = log_next_seq;
(gdb) 
0x80176564	1977		curr_log_seq = log_next_seq;
(gdb) 
0x80176568	1977		curr_log_seq = log_next_seq;
(gdb) 
1978		printed_len = vprintk_store(facility, level, dict, dictlen, fmt, args);
(gdb) 
0x80176570	1978		printed_len = vprintk_store(facility, level, dict, dictlen, fmt, args);
(gdb) 
0x80176574	1978		printed_len = vprintk_store(facility, level, dict, dictlen, fmt, args);
(gdb) 
0x80176578	1978		printed_len = vprintk_store(facility, level, dict, dictlen, fmt, args);
(gdb) 
0x8017657c	1978		printed_len = vprintk_store(facility, level, dict, dictlen, fmt, args);
(gdb) 
0x80176580	1978		printed_len = vprintk_store(facility, level, dict, dictlen, fmt, args);
(gdb) 
vprintk_store (facility=1, level=-1, dict=0x60000013 <error: Cannot access memory at address 0x60000013>, dictlen=0, 
    fmt=0x80a87ec0 "%s: mmode set to %08x (divisor %d)\n", args=<incomplete type>) at ../kernel/printk/printk.c:1907
#0  vprintk_store (facility=1, level=-1, dict=0x60000013 <error: Cannot access memory at address 0x60000013>, dictlen=0, 
    fmt=0x80a87ec0 "%s: mmode set to %08x (divisor %d)\n", args=<incomplete type>) at ../kernel/printk/printk.c:1907
#1  0x80176584 in vprintk_emit (facility=0, level=-1, dict=0x8d <error: Cannot access memory at address 0x8d>, dictlen=0, 
    fmt=0x80a87ec0 "%s: mmode set to %08x (divisor %d)\n", args=<incomplete type>) at ../kernel/printk/printk.c:1978
#2  0x8017675c in vprintk_default (fmt=0x0, args=<incomplete type>) at ../kernel/printk/printk.c:2023
#3  0x80177a48 in vprintk_func (fmt=0x80a87ec0 "%s: mmode set to %08x (divisor %d)\n", args=<incomplete type>) at ../kernel/printk/printk_safe.c:386
#4  0x80177228 in printk (fmt=0x80a87ec0 "%s: mmode set to %08x (divisor %d)\n") at ../kernel/printk/printk.c:2056
#5  0x806c9f98 in aspeed_master_init (aspeed=<optimized out>) at ../drivers/fsi/fsi-master-aspeed.c:400
#6  fsi_master_aspeed_probe (pdev=0xccccb60) at ../drivers/fsi/fsi-master-aspeed.c:561
#7  0x80554038 in platform_drv_probe (_dev=0xbd3b0e10) at ../drivers/base/platform.c:686
#8  0x80551ab0 in really_probe (dev=0xbd3b0e10, drv=0x80c4fa38 <fsi_master_aspeed_driver+20>) at ../drivers/base/dd.c:555
#9  0x80552298 in driver_probe_device (drv=0x80c4fa38 <fsi_master_aspeed_driver+20>, dev=0xbd3b0e10) at ../drivers/base/dd.c:724
#10 0x80552584 in device_driver_attach (drv=0x80c4fa38 <fsi_master_aspeed_driver+20>, dev=0xbd3b0e10) at ../drivers/base/dd.c:998
#11 0x8055260c in __driver_attach (dev=0xbd3b0e10, data=0x80c4fa38 <fsi_master_aspeed_driver+20>) at ../drivers/base/dd.c:1075
#12 0x8054fb08 in bus_for_each_dev (bus=0x80a87ec0, start=0x70040110, data=0x80c4fa38 <fsi_master_aspeed_driver+20>, fn=0x8055258c <__driver_attach>)
    at ../drivers/base/bus.c:304
#13 0x80552704 in driver_attach (drv=0x1) at ../drivers/base/dd.c:1091
#14 0x805504ac in bus_add_driver (drv=0x80c4fa38 <fsi_master_aspeed_driver+20>) at ../drivers/base/bus.c:621
#15 0x80553318 in driver_register (drv=0x80c4fa38 <fsi_master_aspeed_driver+20>) at ../drivers/base/driver.c:170
#16 0x80554e78 in __platform_driver_register (drv=0x80a87ec0, owner=0x70040110) at ../drivers/base/platform.c:741
#17 0x80b39a2c in fsi_master_aspeed_driver_init () at ../drivers/fsi/fsi-master-aspeed.c:606
#18 0x80b01344 in do_one_initcall (fn=0x80b39a0c <fsi_master_aspeed_driver_init>) at ../init/main.c:939
#19 0x80b015b4 in do_initcall_level (level=<optimized out>) at ../include/linux/init.h:131
#20 do_initcalls () at ../init/main.c:1015
#21 do_basic_setup () at ../init/main.c:1032
#22 kernel_init_freeable () at ../init/main.c:1192
#23 0x808605a4 in kernel_init (unused=0x80a87ec0) at ../init/main.c:1110
#24 0x801010e8 in ret_from_fork () at ../arch/arm/kernel/entry-common.S:155
#0  vprintk_store (facility=1, level=-1, dict=0x60000013 <error: Cannot access memory at address 0x60000013>, dictlen=0, 
    fmt=0x80a87ec0 "%s: mmode set to %08x (divisor %d)\n", args=<incomplete type>) at ../kernel/printk/printk.c:1907
        textbuf = "fsi-master-aspeed 1e79b000.fsi: hub version e0050101 (1 links)\n\000ver at slave address 0x60\n\000\060\060\060\060 ns\n\000\071\065\062\065\066\065\067\071 ns\n\000reserved, 16384K cma-reserved, 0K highmem)\n", '\000' <repeats 835 times>
        text = <optimized out>
        text_len = 2160574504
        lflags = (unknown: 0)
#1  0x80176584 in vprintk_emit (facility=0, level=-1, dict=0x8d <error: Cannot access memory at address 0x8d>, dictlen=0, 
    fmt=0x80a87ec0 "%s: mmode set to %08x (divisor %d)\n", args=<incomplete type>) at ../kernel/printk/printk.c:1978
        printed_len = -1
        in_sched = <optimized out>
        pending_output = false
        flags = <optimized out>
        curr_log_seq = 141
        __dummy = <optimized out>
        __dummy2 = <optimized out>
        __dummy = <optimized out>
        __dummy2 = <optimized out>
#2  0x8017675c in vprintk_default (fmt=0x0, args=<incomplete type>) at ../kernel/printk/printk.c:2023
        r = <optimized out>
#3  0x80177a48 in vprintk_func (fmt=0x80a87ec0 "%s: mmode set to %08x (divisor %d)\n", args=<incomplete type>) at ../kernel/printk/printk_safe.c:386
No locals.
#4  0x80177228 in printk (fmt=0x80a87ec0 "%s: mmode set to %08x (divisor %d)\n") at ../kernel/printk/printk.c:2056
        args = <incomplete type>
        r = <optimized out>
0x80858b60 in check_pointer_msg (ptr=<optimized out>) at ../lib/vsprintf.c:652
652		err_msg = check_pointer_msg(ptr);
(gdb) 
641		if ((unsigned long)ptr < PAGE_SIZE || IS_ERR_VALUE(ptr))
(gdb) 
0x80858b68	641		if ((unsigned long)ptr < PAGE_SIZE || IS_ERR_VALUE(ptr))
(gdb) 
0x80858b6c in check_pointer (buf=0xbd10bba4, end=0x80c7c8f0 <dmesg_restrict> "", ptr=0x7003f110, spec=...) at ../lib/vsprintf.c:641
641		if ((unsigned long)ptr < PAGE_SIZE || IS_ERR_VALUE(ptr))
(gdb) 
0x80858b70 in check_pointer_msg (ptr=<optimized out>) at ../lib/vsprintf.c:641
641		if ((unsigned long)ptr < PAGE_SIZE || IS_ERR_VALUE(ptr))
(gdb) 
0x80858b74 in check_pointer (buf=0xbd10bba4, end=0x80c7c8f0 <dmesg_restrict> "", ptr=0x7003f110, spec=...) at ../lib/vsprintf.c:641
641		if ((unsigned long)ptr < PAGE_SIZE || IS_ERR_VALUE(ptr))
(gdb) 
0x80858b78	641		if ((unsigned long)ptr < PAGE_SIZE || IS_ERR_VALUE(ptr))
(gdb) 
0x80858d1c in string (buf=0x80c7c510 <textbuf> "fsi-master-aspeed 1e79b000.fsi: hub version e0050101 (1 links)\n", end=0x80c7c8f0 <dmesg_restrict> "", 
    s=0x70040110 <error: Cannot access memory at address 0x70040110>, spec=...) at ../lib/vsprintf.c:665
665		if (check_pointer(&buf, end, s, spec))
(gdb) 
0x80858d20	665		if (check_pointer(&buf, end, s, spec))
(gdb) 
668		return string_nocheck(buf, end, s, spec);
(gdb) 
0x80858d48	668		return string_nocheck(buf, end, s, spec);
(gdb) 
string_nocheck (buf=0x80c7c510 <textbuf> "fsi-master-aspeed 1e79b000.fsi: hub version e0050101 (1 links)\n", 
    end=0x70040110 <error: Cannot access memory at address 0x70040110>, s=0x80c7c8f0 <dmesg_restrict> "", spec=...) at ../lib/vsprintf.c:600
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant