Golang为什么不会panic?

Golang为什么不会panic? 有人能好心帮帮我吗?最近,我被一个cgo问题困扰了。

这是使用gdb输出的backtrace。我发现c代码因为“内存访问错误”而引发了SIGSEGV信号,并且go已经捕获了这个信号。 问题是,go代码陷入了无限循环并且不会panic。

Thread 91 (Thread 0x7f64777fe700 (LWP 365)):
#0  runtime.usleep () at /usr/local/go/src/runtime/sys_linux_amd64.s:144
#1  0x000000000044435e in runtime.raisebadsignal (c=0x7f64777f5198, sig=11) at /usr/local/go/src/runtime/signal_unix.go:496
#2  0x0000000000444773 in runtime.badsignal (c=0x7f64777f5198, sig=11) at /usr/local/go/src/runtime/signal_unix.go:600
#3  0x0000000000443f58 in runtime.sigtrampgo (ctx=0x7f64777f5240, info=0x7f64777f5370, sig=11) at /usr/local/go/src/runtime/signal_unix.go:297
#4  0x000000000045efc3 in runtime.sigtramp () at /usr/local/go/src/runtime/sys_linux_amd64.s:352
#5  <signal handler called>
#6  0x00007f722fbba738 in __memcpy_ssse3_back () from /lib64/libc.so.6
#7  0x00007f705036f728 in AqcPropCalcModRun () from /ise-cn/bin/libaqc20.so
#8  0x00007f7050388092 in ModulesProcessBase () from /ise-cn/bin/libaqc20.so
#9  0x00007f705036baa7 in AqcInstAppend () from /ise-cn/bin/libaqc20.so
#10 0x00007f705036d4f4 in AQCAudioWrite () from /ise-cn/bin/libaqc20.so

以下是使用strace -e signal -ff -p命令的输出。系统调用在信号处理中陷入了无限循环!

[pid  7810] --- SIGSEGV {si_signo=SIGSEGV, si_code=SEGV_ACCERR, si_addr=0x7feb3bffe000} ---
[pid  7810] rt_sigprocmask(SIG_SETMASK, NULL, ~[KILL STOP], 8) = 0
[pid  7810] rt_sigprocmask(SIG_SETMASK, ~[], NULL, 8) = 0
[pid  7810] sigaltstack(NULL, {ss_sp=NULL, ss_flags=SS_DISABLE, ss_size=0}) = 0
[pid  7810] sigaltstack({ss_sp=0xc4535d0000, ss_flags=0, ss_size=32768}, NULL) = 0
[pid  7810] rt_sigprocmask(SIG_SETMASK, ~[HUP INT QUIT ILL TRAP ABRT BUS FPE KILL SEGV TERM STKFLT CHLD STOP PROF SYS RTMIN RT_1], NULL, 8) = 0
[pid  7810] rt_sigprocmask(SIG_UNBLOCK, [SEGV], NULL, 8) = 0
[pid  7810] rt_sigaction(SIGSEGV, {sa_handler=SIG_DFL, sa_mask=~[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x7ff8a5af3630}, NULL, 8) = 0
[pid  7810] tkill(178, SIGSEGV)         = 0
[pid  7810] --- SIGSEGV {si_signo=SIGSEGV, si_code=SI_TKILL, si_pid=1, si_uid=0} ---
[pid  7810] rt_sigaction(SIGSEGV, {sa_handler=0x45eff0, sa_mask=~[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x7ff8a5af3630}, NULL, 8) = 0
[pid  7810] rt_sigprocmask(SIG_SETMASK, ~[], NULL, 8) = 0
[pid  7810] sigaltstack({ss_sp=NULL, ss_flags=SS_DISABLE, ss_size=0}, NULL) = 0
[pid  7810] rt_sigprocmask(SIG_SETMASK, ~[KILL STOP], NULL, 8) = 0
[pid  7810] rt_sigreturn({mask=[]})     = 140625886429224
[pid  7810] --- SIGSEGV {si_signo=SIGSEGV, si_code=SEGV_ACCERR, si_addr=0x7feb3bffe000} ---
[pid  7810] rt_sigprocmask(SIG_SETMASK, NULL, ~[KILL STOP], 8) = 0
[pid  7810] rt_sigprocmask(SIG_SETMASK, ~[], NULL, 8) = 0
[pid  7810] sigaltstack(NULL, {ss_sp=NULL, ss_flags=SS_DISABLE, ss_size=0}) = 0
[pid  7810] sigaltstack({ss_sp=0xc4535d0000, ss_flags=0, ss_size=32768}, NULL) = 0
[pid  7810] rt_sigprocmask(SIG_SETMASK, ~[HUP INT QUIT ILL TRAP ABRT BUS FPE KILL SEGV TERM STKFLT CHLD STOP PROF SYS RTMIN RT_1], NULL, 8) = 0
[pid  7810] rt_sigprocmask(SIG_UNBLOCK, [SEGV], NULL, 8) = 0
[pid  7810] rt_sigaction(SIGSEGV, {sa_handler=SIG_DFL, sa_mask=~[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x7ff8a5af3630}, NULL, 8) = 0
[pid  7810] tkill(178, SIGSEGV)         = 0
[pid  7810] --- SIGSEGV {si_signo=SIGSEGV, si_code=SI_TKILL, si_pid=1, si_uid=0} ---
[pid  7810] rt_sigaction(SIGSEGV, {sa_handler=0x45eff0, sa_mask=~[], sa_flags=SA_RESTORER|SA_ONSTACK|SA_RESTART|SA_SIGINFO, sa_restorer=0x7ff8a5af3630}, NULL, 8) = 0
[pid  7810] rt_sigprocmask(SIG_SETMASK, ~[], NULL, 8) = 0
....

go版本和环境:

go version go1.10.3 linux/amd64
GOARCH="amd64"
GOBIN=""
GOCACHE="/root/.cache/go-build"
GOEXE=""
GOHOSTARCH="amd64"
GOHOSTOS="linux"
GOOS="linux"
GOPATH="/root/go"
GORACE=""
GOROOT="/usr/local/go"
GOTMPDIR=""
GOTOOLDIR="/usr/local/go/pkg/tool/linux_amd64"
GCCGO="gccgo"
CC="gcc"
CXX="g++"
CGO_ENABLED="1"
CGO_CFLAGS="-g -O2"
CGO_CPPFLAGS=""
CGO_CXXFLAGS="-g -O2"
CGO_FFLAGS="-g -O2"
CGO_LDFLAGS="-g -O2"
PKG_CONFIG="pkg-config"
GOGCCFLAGS="-fPIC -m64 -pthread -fmessage-length=0 -fdebug-prefix-map=/tmp/go-build797487429=/tmp/go-build -gno-record-gcc-switches"

内核:Linux xxxxx 4.14.49 #1 SMP Fri Jun 15 16:15:07 CST 2018 x86_64 x86_64 x86_64 GNU/Linux


更多关于Golang为什么不会panic?的实战教程也可以访问 https://www.itying.com/category-94-b0.html

1 回复

更多关于Golang为什么不会panic?的实战系列教程也可以访问 https://www.itying.com/category-94-b0.html


从你的堆栈跟踪和strace输出可以看出,这是一个典型的CGO信号处理问题。Go运行时在捕获到SIGSEGV信号后,会尝试恢复执行,但由于C库中的内存访问错误持续存在,导致信号被反复触发,形成了无限循环。

Go的信号处理机制会为同步信号(如SIGSEGV、SIGBUS等)安装信号处理器。当这些信号发生时,Go会尝试将信号转换为panic。但在CGO场景下,如果信号发生在C代码执行期间,并且C代码没有正确处理错误状态,就会导致信号被反复触发。

以下是问题重现的示例代码:

package main

/*
#include <string.h>
#include <stdlib.h>

// 模拟内存访问错误的C函数
void bad_memory_access() {
    char *ptr = NULL;
    // 故意造成段错误
    memcpy(ptr, "test", 5);
}
*/
import "C"
import (
    "fmt"
    "time"
)

func main() {
    // 启动多个goroutine调用C函数
    for i := 0; i < 10; i++ {
        go func() {
            for {
                // 调用有问题的C函数
                C.bad_memory_access()
                time.Sleep(100 * time.Millisecond)
            }
        }()
    }
    
    // 主goroutine等待
    select {}
}

要解决这个问题,需要在C代码层面修复内存访问错误。如果无法修改C代码,可以考虑以下技术方案:

  1. 使用信号掩码隔离C代码执行
package main

/*
#include <signal.h>
#include <string.h>
#include <stdlib.h>

void bad_memory_access() {
    char *ptr = NULL;
    memcpy(ptr, "test", 5);
}
*/
import "C"
import (
    "runtime"
    "syscall"
    "time"
)

func callCSafely() {
    // 保存原始信号掩码
    var oldSet syscall.SigSet
    syscall.Sigprocmask(syscall.SIG_SETMASK, nil, &oldSet)
    
    // 阻塞SIGSEGV信号
    var newSet syscall.SigSet
    newSet.Add(syscall.SIGSEGV)
    syscall.Sigprocmask(syscall.SIG_BLOCK, &newSet, nil)
    
    // 执行C代码
    C.bad_memory_access()
    
    // 恢复原始信号掩码
    syscall.Sigprocmask(syscall.SIG_SETMASK, &oldSet, nil)
}

func main() {
    runtime.LockOSThread()
    defer runtime.UnlockOSThread()
    
    go func() {
        for {
            callCSafely()
            time.Sleep(100 * time.Millisecond)
        }
    }()
    
    select {}
}
  1. 使用独立的线程执行C代码
package main

/*
#include <signal.h>
#include <string.h>
#include <stdlib.h>
#include <pthread.h>

static void* thread_func(void* arg) {
    // 设置线程特定的信号处理器
    struct sigaction sa;
    sa.sa_handler = SIG_DFL;
    sigemptyset(&sa.sa_mask);
    sa.sa_flags = 0;
    sigaction(SIGSEGV, &sa, NULL);
    
    char *ptr = NULL;
    memcpy(ptr, "test", 5);
    return NULL;
}

void run_in_separate_thread() {
    pthread_t thread;
    pthread_create(&thread, NULL, thread_func, NULL);
    pthread_join(thread, NULL);
}
*/
import "C"
import "time"

func main() {
    go func() {
        for {
            C.run_in_separate_thread()
            time.Sleep(100 * time.Millisecond)
        }
    }()
    
    select {}
}
  1. 使用cgo的recover机制(Go 1.14+):
package main

/*
#include <string.h>
#include <stdlib.h>

void bad_memory_access() {
    char *ptr = NULL;
    memcpy(ptr, "test", 5);
}
*/
import "C"
import (
    "runtime/debug"
    "time"
)

func safeCGoCall() {
    defer func() {
        if r := recover(); r != nil {
            debug.PrintStack()
        }
    }()
    
    C.bad_memory_access()
}

func main() {
    go func() {
        for {
            safeCGoCall()
            time.Sleep(100 * time.Millisecond)
        }
    }()
    
    select {}
}

根本原因在于C库libaqc20.so中的AqcPropCalcModRun函数存在内存访问错误。Go运行时捕获到SIGSEGV信号后,会重置信号处理器为默认行为并重新发送信号,但由于C代码的错误状态未清除,导致信号被反复触发。

检查C库的内存访问逻辑,确保指针有效性验证和错误处理机制完善。如果C库是第三方库且无法修改,考虑使用信号隔离或线程隔离技术来避免影响Go运行时。

回到顶部