现网的资源采集组件出现以下致命报错,进程直接崩溃甚至无法recover。初步判断两个可能原因:

  • Goalng程序的并发控制BUG。该程序使用go 1.20.5编译。
  • 内存硬件故障。可能性较低。

需要进一步跟进问题。

unexpected fault address 0x0
fatal error: fault
[signal SIGSEGV: segmentation violation code=0x80 addr=0x0 pc=0x1219cfb]

goroutine 78941 [running]:
runtime.throw({0x1c94002?, 0x5a00000000000060?})
	runtime/panic.go:1047 +0x5d fp=0xc02b3a5498 sp=0xc02b3a5468 pc=0x438e7d
runtime.sigpanic()
	runtime/signal_unix.go:855 +0x28a fp=0xc02b3a54f8 sp=0xc02b3a5498 pc=0x44ffaa
github.com/prometheus/client_model/go.(*LabelPair).GetName(...)
	github.com/prometheus/client_model@v0.5.0/go/metrics.pb.go:154
git.woa.com/khaos/platform/modules/khaos-metrics-agent/internal/pkg/metric-helper/common-metrics.(*Manager).PodStorageVolumeUsage(0xc011259480, {0x1fc18a0, 0xc000126000}, {0x
0?, 0x0?, 0xc02bd64840?})
	git.woa.com/khaos/platform/modules/khaos-metrics-agent/internal/pkg/metric-helper/common-metrics/common_metrics_pod_storage_volume_usage.go:92 +0x77b fp=0xc02b3a57d8 
sp=0xc02b3a54f8 pc=0x1219cfb
git.woa.com/khaos/platform/modules/khaos-metrics-agent/internal/pkg/metric-model.CommonMetrics.PodStorageVolumeUsage-fm({0x1fc18a0?, 0xc000126000?}, {0x0?, 0xc010b89d80?, 0x1
b62fa0?})
	<autogenerated>:1 +0x50 fp=0xc02b3a5818 sp=0xc02b3a57d8 pc=0x121b750
git.woa.com/khaos/platform/modules/khaos-metrics-agent/internal/pkg/metric-helper/common-metrics.(*Manager).GetBatch(0xc010b89d80?, {0x1fc18a0, 0xc000126000}, {0xc000ad6540, 
0xc, 0xc02e98cc90?})
	git.woa.com/khaos/platform/modules/khaos-metrics-agent/internal/pkg/metric-helper/common-metrics/common_metrics.go:110 +0xb0 fp=0xc02b3a58a0 sp=0xc02b3a5818 pc=0x120d
030
git.woa.com/khaos/platform/modules/khaos-metrics-agent/internal/pkg/plugins/libradb.(*Plugin).getUsedCommonMetrics(0x100000000741116?, {0x1fc18a0, 0xc000126000}, {{{0xc010b89
d80, 0x1d}, {0xc012135140, 0xb}, 0xc02e98d7a0, 0xc02e98cc90, 0x4a817c800, ...}, ...})
	git.woa.com/khaos/platform/modules/khaos-metrics-agent/internal/pkg/plugins/libradb/libradb_common_metrics.go:33 +0x584 fp=0xc02b3a5940 sp=0xc02b3a58a0 pc=0x18c36a4
git.woa.com/khaos/platform/modules/khaos-metrics-agent/internal/pkg/plugins/libradb.(*Plugin).Run(0x747c6f?, {0x1fc18a0, 0xc000126000}, {{{0xc010b89d80, 0x1d}, {0xc012135140,
 0xb}, 0xc02e98d7a0, 0xc02e98cc90, 0x4a817c800, ...}, ...})
	git.woa.com/khaos/platform/modules/khaos-metrics-agent/internal/pkg/plugins/libradb/libradb.go:100 +0x350 fp=0xc02b3a5a40 sp=0xc02b3a5940 pc=0x18c2f50
git.woa.com/khaos/platform/modules/khaos-metrics-agent/internal/pkg/plugin-manager.(*Manager).callPluginByScrapeItem.func1({0x1fc18a0, 0xc000126000})
	git.woa.com/khaos/platform/modules/khaos-metrics-agent/internal/pkg/plugin-manager/plugin_manager_plugin.go:97 +0x83 fp=0xc02b3a5c00 sp=0xc02b3a5a40 pc=0x120bea3
github.com/gogf/gf/v2/util/gutil.Try({0x1fc18a0?, 0xc000126000?}, 0xc02e0d7cac?)
	github.com/gogf/gf/v2@v2.6.1/util/gutil/gutil_try_catch.go:36 +0x71 fp=0xc02b3a5c50 sp=0xc02b3a5c00 pc=0x7eff91
github.com/gogf/gf/v2/util/gutil.TryCatch({0x1fc18a0, 0xc000126000}, 0x4935d4?, 0xc02b3a5d00)
	github.com/gogf/gf/v2@v2.6.1/util/gutil/gutil_try_catch.go:49 +0x39 fp=0xc02b3a5c80 sp=0xc02b3a5c50 pc=0x7f0199
github.com/gogf/gf/v2/frame/g.TryCatch(...)
	github.com/gogf/gf/v2@v2.6.1/frame/g/g_func.go:95
git.woa.com/khaos/platform/modules/khaos-metrics-agent/internal/pkg/plugin-manager.(*Manager).callPluginByScrapeItem(0x18e9e3ea368?, {0x1fc18a0?, 0xc000126000?}, {{0xc010b89d
80, 0x1d}, {0xc012135140, 0xb}, {0xc0121351a8, 0x7}, 0x4a817c800, ...})
	git.woa.com/khaos/platform/modules/khaos-metrics-agent/internal/pkg/plugin-manager/plugin_manager_plugin.go:96 +0x24e fp=0xc02b3a5ec8 sp=0xc02b3a5c80 pc=0x120bc0e
git.woa.com/khaos/platform/modules/khaos-metrics-agent/internal/pkg/plugin-manager.(*Manager).startPluginExecutionTimely.func1.2({{0xc010b89d80, 0x1d}, {0xc012135140, 0xb}, {
0xc0121351a8, 0x7}, 0x4a817c800, {0x1fc2a20, 0xc000128028}, 0xc007097140, ...})
	git.woa.com/khaos/platform/modules/khaos-metrics-agent/internal/pkg/plugin-manager/plugin_manager_plugin.go:73 +0x58 fp=0xc02b3a5f60 sp=0xc02b3a5ec8 pc=0x120b998
git.woa.com/khaos/platform/modules/khaos-metrics-agent/internal/pkg/plugin-manager.(*Manager).startPluginExecutionTimely.func1.3()
	git.woa.com/khaos/platform/modules/khaos-metrics-agent/internal/pkg/plugin-manager/plugin_manager_plugin.go:74 +0x42 fp=0xc02b3a5fe0 sp=0xc02b3a5f60 pc=0x120b902
runtime.goexit()
	runtime/asm_amd64.s:1598 +0x1 fp=0xc02b3a5fe8 sp=0xc02b3a5fe0 pc=0x46fde1
created by git.woa.com/khaos/platform/modules/khaos-metrics-agent/internal/pkg/plugin-manager.(*Manager).startPluginExecutionTimely.func1
	git.woa.com/khaos/platform/modules/khaos-metrics-agent/internal/pkg/plugin-manager/plugin_manager_plugin.go:71 +0x33b

goroutine 1 [chan receive, 74 minutes]:
runtime.gopark(0xc00049ed60?, 0xc00088edb8?, 0x93?, 0x76?, 0xc00088edb8?)
	runtime/proc.go:381 +0xd6 fp=0xc0006bad88 sp=0xc0006bad68 pc=0x43bbd6
runtime.chanrecv(0xc0000ce2a0, 0xc00088eee0, 0x1)
	runtime/chan.go:583 +0x49d fp=0xc0006bae18 sp=0xc0006bad88 pc=0x406f9d
runtime.chanrecv1(0xc0000ce2a0?, 0xc00003fc80?)
	runtime/chan.go:442 +0x18 fp=0xc0006bae40 sp=0xc0006bae18 pc=0x406a98
github.com/gogf/gf/v2/os/gproc.Listen()
	github.com/gogf/gf/v2@v2.6.1/os/gproc/gproc_signal.go:79 +0x9c fp=0xc0006baf28 sp=0xc0006bae40 pc=0x9dbcfc
github.com/gogf/gf/v2/frame/g.Listen(...)
	github.com/gogf/gf/v2@v2.6.1/frame/g/g_func.go:50
git.woa.com/khaos/platform/modules/khaos-metrics-agent/internal/app.(*Main).Run(0xc00049f190?, {0x1fc1910, 0xc0001ffb30}, {{}, {0x7ffe1177efee, 0x6}, {0x7ffe1177effe, 0xa}, {
0x7ffe1177f014, 0xb}, ...})
	git.woa.com/khaos/platform/modules/khaos-metrics-agent/internal/app/run.go:106 +0x30b fp=0xc0006bb050 sp=0xc0006baf28 pc=0x18d180b
runtime.call256(0xc000456180, 0xc000709b08, 0xc000265a40, 0xa0, 0xa0, 0xb8, 0xc0006bb670)
	runtime/asm_amd64.s:732 +0x6e fp=0xc0006bb160 sp=0xc0006bb050 pc=0x46e52e
runtime.reflectcall(0x1c00c20?, 0xc000328c80?, 0x4?, 0x1caf038?, 0x0?, 0x12?, 0x1c00c20?)
	<autogenerated>:1 +0x3c fp=0xc0006bb1a0 sp=0xc0006bb160 pc=0x47297c
reflect.Value.call({0x1a8b780?, 0x2e5c578?, 0x451676?}, {0x1c92964, 0x4}, {0xc000456150, 0x2, 0x2?})
	reflect/value.go:586 +0xb0b fp=0xc0006bb790 sp=0xc0006bb1a0 pc=0x4ed3cb
reflect.Value.Call({0x1a8b780?, 0x2e5c578?, 0xc0001ffb30?}, {0xc000456150?, 0x1c8a8a0?, 0x0?})
	reflect/value.go:370 +0xbc fp=0xc0006bb808 sp=0xc0006bb790 pc=0x4ec67c
github.com/gogf/gf/v2/os/gcmd.newCommandFromMethod.func1({0x1fc1910?, 0xc0001ff7a0?}, 0xc0007114c0)
	github.com/gogf/gf/v2@v2.6.1/os/gcmd/gcmd_command_object.go:324 +0x88d fp=0xc0006bba38 sp=0xc0006bb808 pc=0x96c34d
github.com/gogf/gf/v2/os/gcmd.(*Command).doRun(0xc0004fbee0, {0x1fc1910?, 0xc0001ff6b0?}, 0xc000018c00?)
	github.com/gogf/gf/v2@v2.6.1/os/gcmd/gcmd_command_run.go:152 +0xac2 fp=0xc0006bbd50 sp=0xc0006bba38 pc=0x96e8c2
github.com/gogf/gf/v2/os/gcmd.(*Command).RunWithValueError(0xc0004fbee0, {0x1fc1910, 0xc0001ff6b0})
	github.com/gogf/gf/v2@v2.6.1/os/gcmd/gcmd_command_run.go:82 +0x299 fp=0xc0006bbe20 sp=0xc0006bbd50 pc=0x96dd99
github.com/gogf/gf/v2/os/gcmd.(*Command).RunWithValue(0x1fca1e0?, {0x1fc1910, 0xc0001ff6b0})
	github.com/gogf/gf/v2@v2.6.1/os/gcmd/gcmd_command_run.go:41 +0x45 fp=0xc0006bbf28 sp=0xc0006bbe20 pc=0x96d645
github.com/gogf/gf/v2/os/gcmd.(*Command).Run(0x1fc18a0?, {0x1fc1910?, 0xc0001ff6b0?})
	github.com/gogf/gf/v2@v2.6.1/os/gcmd/gcmd_command_run.go:35 +0x25 fp=0xc0006bbf50 sp=0xc0006bbf28 pc=0x96d5c5
main.main()
	git.woa.com/khaos/platform/modules/khaos-metrics-agent/main.go:30 +0x55 fp=0xc0006bbf80 sp=0xc0006bbf50 pc=0x18d2135
runtime.main()
	runtime/proc.go:250 +0x207 fp=0xc0006bbfe0 sp=0xc0006bbf80 pc=0x43b7a7
runtime.goexit()
	runtime/asm_amd64.s:1598 +0x1 fp=0xc0006bbfe8 sp=0xc0006bbfe0 pc=0x46fde1

...




  • No labels

1 Comment

  1. 使用go v1.21版本编译,并且降低并发数执行,近期一个月该问题未继续复现。