go源码剖析笔记
01 Sep 2020
|
|
环境
go version #go version go1.10.4 linux/amd64
lsb_release -d #Description: Ubuntu 18.04.1 LTS
gdb --version #GNU gdb (Ubuntu 8.1-0ubuntu3.2) 8.1.0.20180409-git
引导
测试代码 test.go
package main
func main() {
println("hello, world");
}
go build -gcflags "-N -l" -o test test.go
gdb test
(gdb) info files
Symbols from "/mnt/c/Program Files/cmder/test".
Local exec file:
`/mnt/c/Program Files/cmder/test', file type elf64-x86-64.
Entry point: 0x4477c0
0x0000000000401000 - 0x000000000044c213 is .text
0x000000000044d000 - 0x00000000004757a3 is .rodata
0x00000000004758e0 - 0x0000000000475f80 is .typelink
0x0000000000475f80 - 0x0000000000475f88 is .itablink
0x0000000000475f88 - 0x0000000000475f88 is .gosymtab
0x0000000000475fa0 - 0x00000000004a3630 is .gopclntab
0x00000000004a4000 - 0x00000000004a4a08 is .noptrdata
0x00000000004a4a20 - 0x00000000004a65b0 is .data
0x00000000004a65c0 - 0x00000000004c2888 is .bss
0x00000000004c28a0 - 0x00000000004c4e58 is .noptrbss
0x0000000000400f9c - 0x0000000000401000 is .note.go.buildid
(gdb) b *0x4477c0
Breakpoint 1 at 0x4477c0: file /usr/lib/go-1.10/src/runtime/rt0_linux_amd64.s, line 8.
版本对应的汇编有变化,没有明显的main,但是入口肯定是_rt0_amd64
#include "textflag.h"
TEXT _rt0_amd64_linux(SB),NOSPLIT,$-8
JMP _rt0_amd64(SB)
TEXT _rt0_amd64_linux_lib(SB),NOSPLIT,$0
JMP _rt0_amd64_lib(SB)
(gdb) b _rt0_amd64
Breakpoint 2 at 0x444100: file /usr/lib/go-1.10/src/runtime/asm_amd64.s, line 15.
对应汇编是书里的runtime.rt0_go
TEXT _rt0_amd64(SB),NOSPLIT,$-8
MOVQ 0(SP), DI // argc
LEAQ 8(SP), SI // argv
JMP runtime·rt0_go(SB)
b runtime.rt0_go
Breakpoint 3 at 0x444110: file /usr/lib/go-1.10/src/runtime/asm_amd64.s, line 89.
;前面有很多对于汇编指令cpu类型的判断,参数入栈等等
// create a new goroutine to start program
MOVQ $runtime·mainPC(SB), AX // entry
PUSHQ AX
PUSHQ $0 // arg size
CALL runtime·newproc(SB)
POPQ AX
POPQ AX
// start this M
CALL runtime·mstart(SB)
MOVL $0xf1, 0xf1 // crash
RET
DATA runtime·mainPC+0(SB)/8,$runtime·main(SB)
GLOBL runtime·mainPC(SB),RODATA,$8
b runtime.schedinit
Breakpoint 6 at 0x423a60: file /usr/lib/go-1.10/src/runtime/proc.go, line 477.
b runtime.main
Breakpoint 4 at 0x4228b0: file /usr/lib/go-1.10/src/runtime/proc.go, line 109.
schedinit 入口
// The bootstrap sequence is:
//
// call osinit
// call schedinit
// make & queue new G
// call runtime·mstart
//
// The new G calls runtime·main.
func schedinit() {
// raceinit must be the first call to race detector.
// In particular, it must be done before mallocinit below calls racemapshadow.
_g_ := getg()
if raceenabled {
_g_.racectx, raceprocctx0 = raceinit()
}
sched.maxmcount = 10000
tracebackinit()
moduledataverify()
stackinit()
mallocinit()
mcommoninit(_g_.m)
alginit() // maps must not be used before this call
modulesinit() // provides activeModules
typelinksinit() // uses maps, activeModules
itabsinit() // uses activeModules
msigsave(_g_.m)
initSigmask = _g_.m.sigmask
goargs()
goenvs()
//处理GODEBUG GOTRACEBACK宏
parsedebugvars()
//垃圾回收器初始化
gcinit()
sched.lastpoll = uint64(nanotime())
//通过CPU core和GOMAXPROCS确定P数量
procs := ncpu
if n, ok := atoi32(gogetenv("GOMAXPROCS")); ok && n > 0 {
procs = n
}
// 调整P数量
if procresize(procs) != nil {
throw("unknown runnable goroutine during bootstrap")
}
// For cgocheck > 1, we turn on the write barrier at all times
// and check all pointer writes. We can't do this until after
// procresize because the write barrier needs a P.
if debug.cgocheck > 1 {
writeBarrier.cgo = true
writeBarrier.enabled = true
for _, p := range allp {
p.wbBuf.reset()
}
}
下一步是runtime.main
// The main goroutine.
func main() {
g := getg()
// Racectx of m0->g0 is used only as the parent of the main goroutine.
// It must not be used for anything else.
g.m.g0.racectx = 0
// Max stack size is 1 GB on 64-bit, 250 MB on 32-bit.
// Using decimal instead of binary GB and MB because
// they look nicer in the stack overflow failure message.
if sys.PtrSize == 8 {
maxstacksize = 1000000000
} else {
maxstacksize = 250000000
}
// Allow newproc to start new Ms.
//启动系统后台监控/定期垃圾回收,并发任务调度相关
mainStarted = true
systemstack(func() {
newm(sysmon, nil)
})
// Lock the main goroutine onto this, the main OS thread,
// during initialization. Most programs won't care, but a few
// do require certain calls to be made by the main thread.
// Those can arrange for main.main to run in the main thread
// by calling runtime.LockOSThread during initialization
// to preserve the lock.
lockOSThread()
if g.m != &m0 {
throw("runtime.main not on m0")
}
runtime_init() // must be before defer
if nanotime() == 0 {
throw("nanotime returning zero")
}
// Defer unlock so that runtime.Goexit during init does the unlock too.
needUnlock := true
defer func() {
if needUnlock {
unlockOSThread()
}
}()
// Record when the world started. Must be after runtime_init
// because nanotime on some platforms depends on startNano.
runtimeInitTime = nanotime()
gcenable()
main_init_done = make(chan bool)
if iscgo {
if _cgo_thread_start == nil {
throw("_cgo_thread_start missing")
}
if GOOS != "windows" {
if _cgo_setenv == nil {
throw("_cgo_setenv missing")
}
if _cgo_unsetenv == nil {
throw("_cgo_unsetenv missing")
}
}
if _cgo_notify_runtime_init_done == nil {
throw("_cgo_notify_runtime_init_done missing")
}
// Start the template thread in case we enter Go from
// a C-created thread and need to create a new thread.
startTemplateThread()
cgocall(_cgo_notify_runtime_init_done, nil)
}
fn := main_init // make an indirect call, as the linker doesn't know the address of the main package when laying down the runtime
fn()
close(main_init_done)
needUnlock = false
unlockOSThread()
if isarchive || islibrary {
// A program compiled with -buildmode=c-archive or c-shared
// has a main, but it is not executed.
return
}
fn = main_main // make an indirect call, as the linker doesn't know the address of the main package when laying down the runtime
fn()
if raceenabled {
racefini()
}
// Make racy client program work: if panicking on
// another goroutine at the same time as main returns,
// let the other goroutine finish printing the panic trace.
// Once it does, it will exit. See issues 3934 and 20018.
if atomic.Load(&runningPanicDefers) != 0 {
// Running deferred functions should not take long.
for c := 0; c < 1000; c++ {
if atomic.Load(&runningPanicDefers) == 0 {
break
}
Gosched()
}
}
if atomic.Load(&panicking) != 0 {
gopark(nil, nil, "panicwait", traceEvGoStop, 1)
}
exit(0)
//? 这啥
for {
var x *int32
*x = 0
}
一个复杂示例
//cat lib/sum.go
package lib
func init() {
println("sum.init")
}
func Sum(x ...int) int {
n := 0
for _, i := range x{
n += i
}
return n
}
//cat test.go
package main
import (
"./lib"
)
func init() {
println("test.init")
}
func test() {
println(lib.Sum(1,2,3))
}
//cat main.go
package main
import (
_ "net/http"
)
func init() {
println("main.init.2")
}
func main() {
test()
}
func init() {
println("main.init.1")
}
执行结果
go build -gcflags "-N -l" -o test
./test
sum.init
main.init.2
main.init.1
test.init
6
查看反汇编
;go tool objdump -s "runtime\.init\b" test
TEXT runtime.init.0(SB) /usr/lib/go-1.10/src/runtime/cpuflags_amd64.go
TEXT runtime.init.1(SB) /usr/lib/go-1.10/src/runtime/mgcwork.go
mgcwork.go:25 0x420860 c3 RET
TEXT runtime.init.2(SB) /usr/lib/go-1.10/src/runtime/mstats.go
mstats.go:438 0x4260d0 64488b0c25f8ffffff MOVQ
TEXT runtime.init.3(SB) /usr/lib/go-1.10/src/runtime/panic.go
TEXT runtime.init.4(SB) /usr/lib/go-1.10/src/runtime/proc.go
TEXT runtime.init.5(SB) /usr/lib/go-1.10/src/runtime/signal_unix.go
signal_unix.go:64 0x43e450 c3 RET
TEXT runtime.init(SB) <autogenerated>
;go tool objdump -s "main\.init\b" test
TEXT main.init.0(SB) /mnt/c/Program Files/cmder/main.go
TEXT main.init.1(SB) /mnt/c/Program Files/cmder/main.go
TEXT main.init.2(SB) /mnt/c/Program Files/cmder/test.go
TEXT main.init(SB) <autogenerated>
<autogenerated>:1 0x5e31ec e81f63ffff CALL net/http.init(SB)
<autogenerated>:1 0x5e31f1 e83afdffff CALL _/mnt/c/Program_Files/cmder/lib.init(SB)
<autogenerated>:1 0x5e31f6 e895fdffff CALL main.init.0(SB)
<autogenerated>:1 0x5e31fb e820feffff CALL main.init.1(SB)
<autogenerated>:1 0x5e3200 e87bfeffff CALL main.init.2(SB)
<autogenerated>:1 0x5e3205 c605822a1f0002 MOVB $0x2, main.initdone.(SB)
<autogenerated>:1 0x5e320c 488b2c24 MOVQ 0(SP), BP
<autogenerated>:1 0x5e3210 4883c408 ADDQ $0x8, SP
<autogenerated>:1 0x5e3214 c3 RET
<autogenerated>:1 0x5e3215 e80600e7ff CALL runtime.morestack_noctxt(SB)
<autogenerated>:1 0x5e321a eb84 JMP main.init(SB)
结论
所有init都会在同一个goroutine执行
所有init函数结束后才会执行main.main
内存分配
基本策略
- 每次从操作系统申请一大块内存,减少系统调用
- 内存分配器
- 大块内存预先切成小块构成链表
- 分配就从链表里提取一块
- 回收旧放回链表
- 空闲过多会归还给系统降低整体开销
内存块
- span page 大块内存
- object切分span多个小块
- 哦,抄的tcmalloc
初始化动作
三个数组组成内存管理结构
- spans,管理span的,按页对应,地址按页对齐能快速定位(?这里的原理不太清楚,我对页这些东西计算一直处于一知半解水平)
- bitmap 为每个对象提供4bit标记为,保存指针,GC标记
- arena 申请内存,用户可分配上限
arena和spans bitmap存在映射关系,三者可以按需同步线性扩张
都用mheap维护,在mallocinit里初始化
来个示例
//test.go
package main
import(
"fmt"
"os"
"github.com/shirou/gosutil/process"
)
var ps *process.Process
func mem(n int) {
if ps == nil {
p, err := process.NewProcess(int32(os.Getpid()))
if err != nil {
panic(err)
}
ps = p
}
mem, _ := ps.MemoryInfoEx()
fmt.Printf("%d, VMS:%d MB, RSS:%d MB\n", n, mem,.VMS>>20, mem.RSS>>20)
}
func main(){
mem(1)
data : new([10][1024*1024]byte)
mem(2)
for i := range data {
for x, n := 0, len(data[i]); x<n; x++ {
data[i][x] = 1
}
mem(3)
}
}
分配
不要以为new一定会分配在堆上,随着优化内联
package main
import ()
func test() *int {
x := new(int)
*x = 0xAABB
return x
}
func main() {
println(*test())
}
go build -gcflags "-l" -o test test.go
go tool objdump -s "main\.test" test
TEXT main.test(SB) /mnt/c/Program Files/cmder/test.go
test.go:4 0x44c150 64488b0c25f8ffffff MOVQ FS:0xfffffff8, CX
test.go:4 0x44c159 483b6110 CMPQ 0x10(CX), SP
test.go:4 0x44c15d 7639 JBE 0x44c198
test.go:4 0x44c15f 4883ec18 SUBQ $0x18, SP
test.go:4 0x44c163 48896c2410 MOVQ BP, 0x10(SP)
test.go:4 0x44c168 488d6c2410 LEAQ 0x10(SP), BP
test.go:5 0x44c16d 488d05acac0000 LEAQ 0xacac(IP), AX
test.go:5 0x44c174 48890424 MOVQ AX, 0(SP)
test.go:5 0x44c178 e8a3effbff CALL runtime.newobject(SB)
test.go:5 0x44c17d 488b442408 MOVQ 0x8(SP), AX
test.go:6 0x44c182 48c700bbaa0000 MOVQ $0xaabb, 0(AX)
test.go:7 0x44c189 4889442420 MOVQ AX, 0x20(SP)
test.go:7 0x44c18e 488b6c2410 MOVQ 0x10(SP), BP
test.go:7 0x44c193 4883c418 ADDQ $0x18, SP
test.go:7 0x44c197 c3 RET
test.go:4 0x44c198 e8d383ffff CALL runtime.morestack_noctxt(SB)
test.go:4 0x44c19d ebb1 JMP main.test(SB)
go build -o test test.go
go tool objdump -s "main\.main" test
TEXT main.main(SB) /mnt/c/Program Files/cmder/test.go
test.go:10 0x44c150 64488b0c25f8ffffff MOVQ FS:0xfffffff8, CX
test.go:10 0x44c159 483b6110 CMPQ 0x10(CX), SP
test.go:10 0x44c15d 7634 JBE 0x44c193
test.go:10 0x44c15f 4883ec10 SUBQ $0x10, SP
test.go:10 0x44c163 48896c2408 MOVQ BP, 0x8(SP)
test.go:10 0x44c168 488d6c2408 LEAQ 0x8(SP), BP
test.go:11 0x44c16d e88e59fdff CALL runtime.printlock(SB)
test.go:11 0x44c172 48c70424bbaa0000 MOVQ $0xaabb, 0(SP)
test.go:11 0x44c17a e80161fdff CALL runtime.printint(SB)
test.go:11 0x44c17f e80c5cfdff CALL runtime.printnl(SB)
test.go:11 0x44c184 e8f759fdff CALL runtime.printunlock(SB)
test.go:12 0x44c189 488b6c2408 MOVQ 0x8(SP), BP
test.go:12 0x44c18e 4883c410 ADDQ $0x10, SP
test.go:12 0x44c192 c3 RET
test.go:10 0x44c193 e8d883ffff CALL runtime.morestack_noctxt(SB)
test.go:10 0x44c198 ebb6 JMP main.main(SB)
逃逸分析-gcflag “-m”
分配思路 malloc.go
- 大对象heap
- 小对象cache.alloc[sizeclass].freelist object
- 微小对象使用cache.tiny object
回收
回收以span为单位
释放
sysmon监控任务来搞
具体释放是madvie(v, n, _MADV_DONTNEED)
系统来决定。如果物理内存资源充足,就不会回收避免无谓的损耗,不过再次使用肯定会pagefault然后分配新的内存
垃圾回收
缩短STW时间
抑制堆增长 充分利用CPU资源
- 三色标记和写屏障
- 所有都是白色
- 扫描出所有可达对象,标记成灰色,放出待处理队列
- 队列提取出灰色对象,将其引用对象标记为灰色放入队列,自身标记为黑色
- 写屏障监视对象内崔修改,重新标色或放回队列
gcController控制
辅助回收,避免分配速度大于后台标记导致的堆恶性扩张