Golang中net/http.(*Server).Serve的panic问题分析

Golang中net/http.(*Server).Serve的panic问题分析 我的程序会时不时地出现 panic 问题。 我无法复现这个问题。 到目前为止,问题发生在 10/23、10/25、10/30、11/14 和 11/22。 在最近一次尝试中,我通过 Wireshark 捕获了数据包,似乎在 panic 发生时没有数据包进入。 我也尝试了 go build -race。在崩溃转储中没有发现有用的线索。 欢迎任何建议。

go version go1.12.7 linux/amd64

go env
GOARCH="amd64"
GOBIN=""
GOCACHE="/home/ubuntu/.cache/go-build"
GOEXE=""
GOFLAGS=""
GOHOSTARCH="amd64"
GOHOSTOS="linux"
GOOS="linux"
GOPATH="/home/ubuntu/go"
GOPROXY=""
GORACE=""
GOROOT="/usr/local/go"
GOTMPDIR=""
GOTOOLDIR="/usr/local/go/pkg/tool/linux_amd64"
GCCGO="gccgo"
CC="gcc"
CXX="g++"
CGO_ENABLED="1"
GOMOD=""
CGO_CFLAGS="-g -O2"
CGO_CPPFLAGS=""
CGO_CXXFLAGS="-g -O2"
CGO_FFLAGS="-g -O2"
CGO_LDFLAGS="-g -O2"
PKG_CONFIG="pkg-config"
GOGCCFLAGS="-fPIC -m64 -pthread -fmessage-length=0 -fdebug-prefix-map=/tmp/go-build954623632=/tmp/go-build -gno-record-gcc-switches"

2019年11月22日 17:01:21 的异常转储(程序在10秒后由crontab自动启动。实际问题是 17:01:11~17:01:21):

[GIN] 2019/11/22 - 08:33:29 | 404 |         486ns |   38.21.245.102 | GET      /
[GIN] 2019/11/22 - 08:33:29 | 404 |         864ns |   38.21.245.102 | GET      /HNAP1/
[GIN] 2019/11/22 - 09:20:16 | 404 |       1.066µs |   94.242.26.158 | GET      /
[GIN] 2019/11/22 - 10:07:25 | 404 |         482ns |  107.189.11.148 | GET      /script
[GIN] 2019/11/22 - 10:19:53 | 404 |         918ns |    80.82.70.187 | GET      /cache/global/img/gs.gif
[GIN] 2019/11/22 - 11:20:51 | 404 |         774ns | 218.214.116.175 | GET      /
[GIN] 2019/11/22 - 11:56:36 | 404 |         485ns |   43.255.216.26 | GET      /
[GIN] 2019/11/22 - 12:09:17 | 404 |         450ns | 170.245.173.234 | GET      /
[GIN] 2019/11/22 - 12:41:12 | 404 |         728ns |    109.1.110.70 | GET      /
[GIN] 2019/11/22 - 13:16:05 | 404 |         698ns |  216.218.206.68 | GET      /
[GIN] 2019/11/22 - 13:40:07 | 404 |         753ns |  187.102.57.223 | GET      /
[GIN] 2019/11/22 - 15:04:29 | 404 |         554ns |    119.61.6.133 | GET      /
[GIN] 2019/11/22 - 15:24:29 | 404 |         822ns |  103.249.181.40 | GET      /
runtime: unexpected return pc for net/http.(*Server).Serve called from 0x0
stack: frame={sp:0xc000137d78, fp:0xc000137e40} stack=[0xc000137000,0xc000138000)
000000c000137c78:  000000000000000c  000000c000137cc0
000000c000137c88:  0000000000535948 <net.(*TCPListener).AcceptTCP+72>  000000c0002cc088
000000c000137c98:  000000c00002ca00  000000000040e169 <runtime.mallocgc+745>
000000c000137ca8:  000000c000098780  00000000000000a0
000000c000137cb8:  000000c000137d00  000000c000137d30
000000c000137cc8:  0000000000b24686 <go-template/vendor/github.com/fvbock/endless.(*endlessListener).Accept+70>  000000c0002cc088
000000c000137cd8:  00000000ea73cddc  62f827d6a4dee03e
000000c000137ce8:  000000000044e458 <time.now+56>  000d4614f8da2885
000000c000137cf8:  000000c016ed78e0  16ed78e000137d38
000000c000137d08:  000000005dd78357  000000c000137d38
000000c000137d18:  00000000004b7d56 <time.Now+38>  000000005dd78357
000000c000137d28:  000000000043908f <runtime.newproc+111>  000000c000137d68
000000c000137d38:  0000000000763d3c <net/http.(*onceCloseListener).Accept+60>  000000c000296140
000000c000137d48:  000000c000137d88  0000000000000018
000000c000137d58:  000000c000498780  000000000073fd54 <net/http.(*Server).Serve+756>
000000c000137d68:  000000c000137e30  000000000073fc8d <net/http.(*Server).Serve+557>
000000c000137d78: <000000c000402480  0000000000e13940
000000c000137d88:  000000c000098780  0000000000f46000
000000c000137d98:  000000c000402540  000000c0003f9200
000000c000137da8:  0000000000f46000  000000c000402540
000000c000137db8:  0000000000000000  0000000000f53a20
000000c000137dc8:  000000000042f231 <runtime.deferreturn+145>  0000000000f46000
000000c000137dd8:  000000c000296b00  00000000004735fe <sync.(*RWMutex).Unlock+126>
000000c000137de8:  0000000d00402540  000000c000138b38
000000c000137df8:  00007f7950798330  0000000000000001
000000c000137e08:  0000000000000000  000000c000582000
000000c000137e18:  000000c000000000  0000000000008000
000000c000137e28:  0000000000000000  0000000000000000
000000c000137e38: !0000000000000000 >0000000000000212
000000c000137e48:  0000000000000070  0000000000000002
000000c000137e58:  0000000000000001  0000000000000049
000000c000137e68:  0000000000000025  000000c0000f4070
000000c000137e78:  000000c000138b38  000000c00002ca00
000000c000137e88:  0000000000000067  ffffffffffffffe0
000000c000137e98:  00000000004aba60 <syscall.Syscall+48>  000000c000138ae8
000000c000137ea8:  00000000004aba60 <syscall.Syscall+48>  0000000000000212
000000c000137eb8:  0000000000000033  0000000000000000
000000c000137ec8:  0000000000000000  0000000000000000
000000c000137ed8:  0000000000000000  000000c000137fc0
000000c000137ee8:  0000000000000000  0000000000000000
000000c000137ef8:  0000000000f42fc0  000000000000000c
000000c000137f08:  000000c000304120  000000c0002cc088
000000c000137f18:  000000c000285000  0000000000f1b660
000000c000137f28:  0000000000000000  0000000000f1b630
000000c000137f38:  000000c000137f70
fatal error: unknown caller pc
runtime stack:
runtime.throw(0xde01c3, 0x11)
/usr/local/go/src/runtime/panic.go:617 +0x72
runtime.gentraceback(0xffffffffffffffff, 0xffffffffffffffff, 0x0, 0xc000498780, 0x0, 0x0, 0x7fffffff, 0x7f794b46cb08, 0x0, 0x0, ...)
/usr/local/go/src/runtime/traceback.go:275 +0x1cd1
runtime.scanstack(0xc000498780, 0xc00002dc70)
/usr/local/go/src/runtime/mgcmark.go:711 +0x15f
runtime.scang(0xc000498780, 0xc00002dc70)
/usr/local/go/src/runtime/proc.go:888 +0x1e4
runtime.markroot.func1()
/usr/local/go/src/runtime/mgcmark.go:221 +0x6e
runtime.markroot(0xc00002dc70, 0x7f790000001b)
/usr/local/go/src/runtime/mgcmark.go:202 +0x2e9
runtime.gcDrain(0xc00002dc70, 0x3)
/usr/local/go/src/runtime/mgcmark.go:899 +0x112
runtime.gcBgMarkWorker.func2()
/usr/local/go/src/runtime/mgc.go:1903 +0x80
runtime.systemstack(0x0)
/usr/local/go/src/runtime/asm_amd64.s:351 +0x66
runtime.mstart()
/usr/local/go/src/runtime/proc.go:1153

goroutine 50 [GC worker (idle), 2 minutes]:
runtime.systemstack_switch()
/usr/local/go/src/runtime/asm_amd64.s:311 fp=0xc0002d0760 sp=0xc0002d0758 pc=0x45aa40
runtime.gcBgMarkWorker(0xc00002ca00)
/usr/local/go/src/runtime/mgc.go:1890 +0x1be fp=0xc0002d07d8 sp=0xc0002d0760 pc=0x41e4be
runtime.goexit()
/usr/local/go/src/runtime/asm_amd64.s:1337 +0x1 fp=0xc0002d07e0 sp=0xc0002d07d8 pc=0x45cb11
created by runtime.gcBgMarkStartWorkers
/usr/local/go/src/runtime/mgc.go:1784 +0x77

goroutine 1 [IO wait, 4 minutes]:
internal/poll.runtime_pollWait(0x7f7949bf7ec8, 0x72, 0x0)
/usr/local/go/src/runtime/netpoll.go:182 +0x56
internal/poll.(*pollDesc).wait(0xc000331d18, 0x72, 0x0, 0x0, 0xdd741e)
/usr/local/go/src/internal/poll/fd_poll_runtime.go:87 +0x9b
internal/poll.(*pollDesc).waitRead(...)
/usr/local/go/src/internal/poll/fd_poll_runtime.go:92
internal/poll.(*FD).Accept(0xc000331d00, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0)
/usr/local/go/src/internal/poll/fd_unix.go:384 +0x1ba
net.(*netFD).accept(0xc000331d00, 0x7f7950a013b8, 0x203000, 0x203000)
/usr/local/go/src/net/fd_unix.go:238 +0x42
net.(*TCPListener).accept(0xc00055cfb8, 0xc00002a500, 0x40e169, 0xc000098780)
/usr/local/go/src/net/tcpsock_posix.go:139 +0x32
net.(*TCPListener).AcceptTCP(0xc00055cfb8, 0x83e35326, 0xa0df53898b7b9c8, 0x44e458)
/usr/local/go/src/net/tcpsock.go:247 +0x48
go-template/vendor/github.com/fvbock/endless.(*endlessListener).Accept(0xc0003fb100, 0xc00052bab0, 0x18, 0xc000000180, 0x73fd54)
/home/jessica/0909/go-template/.workspace/src/go-template/vendor/github.com/fvbock/endless/endless.go:490 +0x46
net/http.(*Server).Serve(0xc000383b00, 0xf41bc0, 0xc0003fb100, 0x0, 0x0)
/usr/local/go/src/net/http/server.go:2859 +0x22d
go-template/vendor/github.com/fvbock/endless.(*endlessServer).Serve(0xc000383b00, 0x0, 0x0)
/home/jessica/0909/go-template/.workspace/src/go-template/vendor/github.com/fvbock/endless/endless.go:195 +0xec
go-template/vendor/github.com/fvbock/endless.(*endlessServer).ListenAndServe(0xc000383b00, 0xc, 0xf30cc0)
/home/jessica/0909/go-template/.workspace/src/go-template/vendor/github.com/fvbock/endless/endless.go:229 +0x1bf
go-template/vendor/github.com/fvbock/endless.ListenAndServe(0xc000284f90, 0xc, 0xf30cc0, 0xc0003839e0, 0xf56ce0, 0xc0003839e0)
/home/jessica/0909/go-template/.workspace/src/go-template/vendor/github.com/fvbock/endless/endless.go:153 +0x57
go-template/fcs-go/http.Start()
/home/jessica/0909/go-template/.workspace/src/go-template/fcs-go/http/http.go:40 +0x1f2
go-template/fcs-go/cmd.startServices()
/home/jessica/0909/go-template/.workspace/src/go-template/fcs-go/cmd/root.go:70 +0xe0
go-template/fcs-go/cmd.glob...func1(0x16fb2e0, 0xc000296ea0, 0x0, 0x2)
/home/jessica/0909/go-template/.workspace/src/go-template/fcs-go/cmd/root.go:85 +0x33
go-template/vendor/github.com/spf13/cobra.(*Command).execute(0x16fb2e0, 0xc000020190, 0x2, 0x2, 0x16fb2e0, 0xc000020190)
/home/jessica/0909/go-template/.workspace/src/go-template/vendor/github.com/spf13/cobra/command.go:766 +0x2ae
go-template/vendor/github.com/spf13/cobra.(*Command).ExecuteC(0x16fb2e0, 0xc0001adf68, 0xbfc7ee, 0x16fb2e0)
/home/jessica/0909/go-template/.workspace/src/go-template/vendor/github.com/spf13/cobra/command.go:852 +0x2ec
go-template/vendor/github.com/spf13/cobra.(*Command).Execute(...)
/home/jessica/0909/go-template/.workspace/src/go-template/vendor/github.com/spf13/cobra/command.go:800
go-template/fcs-go/cmd.Execute()
/home/jessica/0909/go-template/.workspace/src/go-template/fcs-go/cmd/root.go:92 +0x32
main.main()
/home/jessica/0909/go-template/.workspace/src/go-template/fcs-go/main.go:8 +0x20

goroutine 18 [syscall, 22848 minutes]:
os/signal.signal_recv(0x0)
/usr/local/go/src/runtime/sigqueue.go:139 +0x9c
os/signal.loop()
/

更多关于Golang中net/http.(*Server).Serve的panic问题分析的实战教程也可以访问 https://www.itying.com/category-94-b0.html

1 回复

更多关于Golang中net/http.(*Server).Serve的panic问题分析的实战系列教程也可以访问 https://www.itying.com/category-94-b0.html


从panic堆栈信息来看,这是一个典型的Go运行时栈损坏问题。关键线索是runtime: unexpected return pc for net/http.(*Server).Serve called from 0x0fatal error: unknown caller pc,这表明程序计数器(PC)寄存器被破坏。

问题分析

这个panic发生在垃圾回收期间,GC尝试扫描goroutine栈时发现无效的返回地址。通常由以下原因导致:

  1. 内存损坏 - 野指针或缓冲区溢出
  2. CGO问题 - C代码破坏了Go的运行时栈
  3. 并发数据竞争 - 尽管race检测未发现,但可能存在数据竞争

可能的解决方案

1. 检查CGO使用

CGO_ENABLED="1"可见程序使用了CGO。检查是否有C代码破坏了内存:

// 示例:检查CGO调用
/*
#include <stdlib.h>
*/
import "C"
import "unsafe"

func exampleCGO() {
    // 确保C分配的内存正确释放
    cstr := C.CString("test")
    defer C.free(unsafe.Pointer(cstr)) // 必须释放
    
    // 避免在C和Go之间传递指针时出现悬垂指针
}

2. 检查并发访问

尽管race检测未发现问题,但可以添加更详细的同步:

import (
    "sync"
    "net/http"
)

type SafeServer struct {
    server *http.Server
    mu     sync.RWMutex
}

func (s *SafeServer) Serve(l net.Listener) error {
    s.mu.Lock()
    defer s.mu.Unlock()
    return s.server.Serve(l)
}

3. 升级Go版本

Go 1.12.7相对较旧,升级到更新的版本(至少1.13+)可能修复已知的运行时问题:

# 升级Go版本
wget https://golang.org/dl/go1.19.linux-amd64.tar.gz
sudo tar -C /usr/local -xzf go1.19.linux-amd64.tar.gz

4. 添加恢复机制

在Serve调用周围添加recover:

import (
    "log"
    "net"
    "net/http"
    "runtime/debug"
)

func safeServe(server *http.Server, l net.Listener) {
    defer func() {
        if r := recover(); r != nil {
            log.Printf("Recovered from panic in Serve: %v\n%s", r, debug.Stack())
        }
    }()
    
    if err := server.Serve(l); err != nil {
        log.Printf("Serve error: %v", err)
    }
}

5. 检查endless库

从堆栈看到使用了github.com/fvbock/endless。检查该库的版本,考虑替换为标准库的优雅重启:

// 替代endless的方案
import (
    "context"
    "net/http"
    "os"
    "os/signal"
    "syscall"
    "time"
)

func gracefulShutdown(server *http.Server) {
    quit := make(chan os.Signal, 1)
    signal.Notify(quit, syscall.SIGINT, syscall.SIGTERM)
    <-quit
    
    ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
    defer cancel()
    
    if err := server.Shutdown(ctx); err != nil {
        log.Fatal("Server forced to shutdown:", err)
    }
}

6. 内存分析

添加内存分析来检测内存损坏:

import (
    "net/http"
    "net/http/pprof"
    "runtime"
)

func enablePProf(server *http.Server) {
    mux := http.NewServeMux()
    mux.HandleFunc("/debug/pprof/", pprof.Index)
    mux.HandleFunc("/debug/pprof/heap", pprof.Index)
    mux.HandleFunc("/debug/pprof/goroutine", pprof.Index)
    
    // 设置内存采样率
    runtime.MemProfileRate = 1
    
    server.Handler = mux
}

调试建议

  1. 启用更详细的内存检查
export GODEBUG=allocfreetrace=1
go run -race main.go
  1. 使用GDB调试(如果问题可复现):
gdb ./your-program
(gdb) catch syscall exit_group
(gdb) run
  1. 检查系统限制
ulimit -a
# 确保文件描述符限制足够高
ulimit -n 65535

这个panic表明运行时栈结构被破坏,最可能的原因是CGO代码中的内存损坏或并发访问问题。建议优先升级Go版本并检查所有CGO调用。

回到顶部