环境

go version #go version go1.10.4 linux/amd64
lsb_release -d #Description:    Ubuntu 18.04.1 LTS
gdb --version #GNU gdb (Ubuntu 8.1-0ubuntu3.2) 8.1.0.20180409-git

引导

测试代码 test.go

package main
func main() {
    println("hello, world");
}
go build -gcflags "-N -l" -o test test.go
gdb test
(gdb) info files
Symbols from "/mnt/c/Program Files/cmder/test".
Local exec file:
        `/mnt/c/Program Files/cmder/test', file type elf64-x86-64.
        Entry point: 0x4477c0
        0x0000000000401000 - 0x000000000044c213 is .text
        0x000000000044d000 - 0x00000000004757a3 is .rodata
        0x00000000004758e0 - 0x0000000000475f80 is .typelink
        0x0000000000475f80 - 0x0000000000475f88 is .itablink
        0x0000000000475f88 - 0x0000000000475f88 is .gosymtab
        0x0000000000475fa0 - 0x00000000004a3630 is .gopclntab
        0x00000000004a4000 - 0x00000000004a4a08 is .noptrdata
        0x00000000004a4a20 - 0x00000000004a65b0 is .data
        0x00000000004a65c0 - 0x00000000004c2888 is .bss
        0x00000000004c28a0 - 0x00000000004c4e58 is .noptrbss
        0x0000000000400f9c - 0x0000000000401000 is .note.go.buildid
(gdb) b *0x4477c0
Breakpoint 1 at 0x4477c0: file /usr/lib/go-1.10/src/runtime/rt0_linux_amd64.s, line 8.

版本对应的汇编有变化,没有明显的main,但是入口肯定是_rt0_amd64

#include "textflag.h"

TEXT _rt0_amd64_linux(SB),NOSPLIT,$-8
        JMP     _rt0_amd64(SB)

TEXT _rt0_amd64_linux_lib(SB),NOSPLIT,$0
        JMP     _rt0_amd64_lib(SB)
        
        
  
(gdb) b _rt0_amd64
Breakpoint 2 at 0x444100: file /usr/lib/go-1.10/src/runtime/asm_amd64.s, line 15.

对应汇编是书里的runtime.rt0_go

TEXT _rt0_amd64(SB),NOSPLIT,$-8
        MOVQ    0(SP), DI       // argc
        LEAQ    8(SP), SI       // argv
        JMP     runtime·rt0_go(SB)
b runtime.rt0_go
Breakpoint 3 at 0x444110: file /usr/lib/go-1.10/src/runtime/asm_amd64.s, line 89.
       ;前面有很多对于汇编指令cpu类型的判断,参数入栈等等
       // create a new goroutine to start program
        MOVQ    $runtime·mainPC(SB), AX                // entry
        PUSHQ   AX
        PUSHQ   $0                      // arg size
        CALL    runtime·newproc(SB)
        POPQ    AX
        POPQ    AX

        // start this M
        CALL    runtime·mstart(SB)

        MOVL    $0xf1, 0xf1  // crash
        RET

DATA    runtime·mainPC+0(SB)/8,$runtime·main(SB)
GLOBL   runtime·mainPC(SB),RODATA,$8
b runtime.schedinit
Breakpoint 6 at 0x423a60: file /usr/lib/go-1.10/src/runtime/proc.go, line 477.
b runtime.main
Breakpoint 4 at 0x4228b0: file /usr/lib/go-1.10/src/runtime/proc.go, line 109.

schedinit 入口

// The bootstrap sequence is:
//
//      call osinit
//      call schedinit
//      make & queue new G
//      call runtime·mstart
//
// The new G calls runtime·main.
func schedinit() {
        // raceinit must be the first call to race detector.
        // In particular, it must be done before mallocinit below calls racemapshadow.
        _g_ := getg()
        if raceenabled {
                _g_.racectx, raceprocctx0 = raceinit()
        }

        sched.maxmcount = 10000

        tracebackinit()
        moduledataverify()
        stackinit()
        mallocinit()
        mcommoninit(_g_.m)
        alginit()       // maps must not be used before this call
        modulesinit()   // provides activeModules
        typelinksinit() // uses maps, activeModules
        itabsinit()     // uses activeModules
        
        msigsave(_g_.m)
        initSigmask = _g_.m.sigmask

        goargs()
        goenvs()
        //处理GODEBUG GOTRACEBACK宏
        parsedebugvars()
        //垃圾回收器初始化
        gcinit()

        sched.lastpoll = uint64(nanotime())
        //通过CPU core和GOMAXPROCS确定P数量
        procs := ncpu
        if n, ok := atoi32(gogetenv("GOMAXPROCS")); ok && n > 0 {
                procs = n
        }
        // 调整P数量
        if procresize(procs) != nil {
                throw("unknown runnable goroutine during bootstrap")
        }

        // For cgocheck > 1, we turn on the write barrier at all times
        // and check all pointer writes. We can't do this until after
        // procresize because the write barrier needs a P.
        if debug.cgocheck > 1 {
                writeBarrier.cgo = true
                writeBarrier.enabled = true
                for _, p := range allp {
                        p.wbBuf.reset()
                }
        }


下一步是runtime.main

// The main goroutine.
func main() {
        g := getg()

        // Racectx of m0->g0 is used only as the parent of the main goroutine.
        // It must not be used for anything else.
        g.m.g0.racectx = 0

        // Max stack size is 1 GB on 64-bit, 250 MB on 32-bit.
        // Using decimal instead of binary GB and MB because
        // they look nicer in the stack overflow failure message.
        if sys.PtrSize == 8 {
                maxstacksize = 1000000000
        } else {
                maxstacksize = 250000000
        }

        // Allow newproc to start new Ms.
        //启动系统后台监控/定期垃圾回收,并发任务调度相关
        mainStarted = true
        systemstack(func() {
                newm(sysmon, nil)
        })

        // Lock the main goroutine onto this, the main OS thread,
        // during initialization. Most programs won't care, but a few
        // do require certain calls to be made by the main thread.
        // Those can arrange for main.main to run in the main thread
        // by calling runtime.LockOSThread during initialization
        // to preserve the lock.
        lockOSThread()

        if g.m != &m0 {
                throw("runtime.main not on m0")
        }

        runtime_init() // must be before defer
        if nanotime() == 0 {
                throw("nanotime returning zero")
        }

        // Defer unlock so that runtime.Goexit during init does the unlock too.
        needUnlock := true
        defer func() {
                if needUnlock {
                        unlockOSThread()
                }
        }()
            // Record when the world started. Must be after runtime_init
        // because nanotime on some platforms depends on startNano.
        runtimeInitTime = nanotime()

        gcenable()

        main_init_done = make(chan bool)
        if iscgo {
                if _cgo_thread_start == nil {
                        throw("_cgo_thread_start missing")
                }
                if GOOS != "windows" {
                        if _cgo_setenv == nil {
                                throw("_cgo_setenv missing")
                        }
                        if _cgo_unsetenv == nil {
                                throw("_cgo_unsetenv missing")
                        }
                }
                if _cgo_notify_runtime_init_done == nil {
                        throw("_cgo_notify_runtime_init_done missing")
                }
                // Start the template thread in case we enter Go from
                // a C-created thread and need to create a new thread.
                startTemplateThread()
                cgocall(_cgo_notify_runtime_init_done, nil)
        }

        fn := main_init // make an indirect call, as the linker doesn't know the address of the main package when laying down the runtime
        fn()
        close(main_init_done)

        needUnlock = false
        unlockOSThread()

        if isarchive || islibrary {
                // A program compiled with -buildmode=c-archive or c-shared
                // has a main, but it is not executed.
                return
        }
        fn = main_main // make an indirect call, as the linker doesn't know the address of the main package when laying down the runtime
        fn()
        if raceenabled {
                racefini()
        }
        // Make racy client program work: if panicking on
        // another goroutine at the same time as main returns,
        // let the other goroutine finish printing the panic trace.
        // Once it does, it will exit. See issues 3934 and 20018.
        if atomic.Load(&runningPanicDefers) != 0 {
                // Running deferred functions should not take long.
                for c := 0; c < 1000; c++ {
                        if atomic.Load(&runningPanicDefers) == 0 {
                                break
                        }
                        Gosched()
                }
        }
        if atomic.Load(&panicking) != 0 {
                gopark(nil, nil, "panicwait", traceEvGoStop, 1)
        }

        exit(0)
        //? 这啥
        for {
                var x *int32
                *x = 0
        }

一个复杂示例

//cat lib/sum.go
package lib
func init() {
    println("sum.init")
}

func Sum(x ...int) int {
    n  := 0
    for _, i := range x{
        n += i
    }
    return n
}
//cat test.go
package main
import (
    "./lib"
)
func init() {
    println("test.init")
}

func test() {
    println(lib.Sum(1,2,3))
}

//cat main.go
package main

import (
        _ "net/http"
)

func init() {
    println("main.init.2")
}

func main() {
    test()
}

func init() {
    println("main.init.1")
}

执行结果

go build -gcflags "-N -l" -o test
./test
sum.init
main.init.2
main.init.1
test.init
6

查看反汇编

;go tool objdump -s "runtime\.init\b" test
TEXT runtime.init.0(SB) /usr/lib/go-1.10/src/runtime/cpuflags_amd64.go
TEXT runtime.init.1(SB) /usr/lib/go-1.10/src/runtime/mgcwork.go
  mgcwork.go:25         0x420860                c3                      RET
TEXT runtime.init.2(SB) /usr/lib/go-1.10/src/runtime/mstats.go
  mstats.go:438         0x4260d0                64488b0c25f8ffffff      MOVQ 
TEXT runtime.init.3(SB) /usr/lib/go-1.10/src/runtime/panic.go
TEXT runtime.init.4(SB) /usr/lib/go-1.10/src/runtime/proc.go
TEXT runtime.init.5(SB) /usr/lib/go-1.10/src/runtime/signal_unix.go
  signal_unix.go:64     0x43e450                c3                      RET
TEXT runtime.init(SB) <autogenerated>


;go tool objdump -s "main\.init\b" test
TEXT main.init.0(SB) /mnt/c/Program Files/cmder/main.go
TEXT main.init.1(SB) /mnt/c/Program Files/cmder/main.go
TEXT main.init.2(SB) /mnt/c/Program Files/cmder/test.go
TEXT main.init(SB) <autogenerated>
 <autogenerated>:1     0x5e31ec                e81f63ffff              CALL net/http.init(SB)

  <autogenerated>:1     0x5e31f1                e83afdffff              CALL _/mnt/c/Program_Files/cmder/lib.init(SB)
  <autogenerated>:1     0x5e31f6                e895fdffff              CALL main.init.0(SB)

  <autogenerated>:1     0x5e31fb                e820feffff              CALL main.init.1(SB)

  <autogenerated>:1     0x5e3200                e87bfeffff              CALL main.init.2(SB)

  <autogenerated>:1     0x5e3205                c605822a1f0002          MOVB $0x2, main.initdone.(SB)

  <autogenerated>:1     0x5e320c                488b2c24                MOVQ 0(SP), BP

  <autogenerated>:1     0x5e3210                4883c408                ADDQ $0x8, SP

  <autogenerated>:1     0x5e3214                c3                      RET

  <autogenerated>:1     0x5e3215                e80600e7ff              CALL runtime.morestack_noctxt(SB)

  <autogenerated>:1     0x5e321a                eb84                    JMP main.init(SB)

结论

所有init都会在同一个goroutine执行

所有init函数结束后才会执行main.main

内存分配

基本策略

  • 每次从操作系统申请一大块内存,减少系统调用
  • 内存分配器
    • 大块内存预先切成小块构成链表
    • 分配就从链表里提取一块
    • 回收旧放回链表
    • 空闲过多会归还给系统降低整体开销

内存块

  • span page 大块内存
  • object切分span多个小块
  • 哦,抄的tcmalloc

初始化动作

三个数组组成内存管理结构

  • spans,管理span的,按页对应,地址按页对齐能快速定位(?这里的原理不太清楚,我对页这些东西计算一直处于一知半解水平)
  • bitmap 为每个对象提供4bit标记为,保存指针,GC标记
  • arena 申请内存,用户可分配上限

arena和spans bitmap存在映射关系,三者可以按需同步线性扩张

都用mheap维护,在mallocinit里初始化

来个示例

//test.go
package main

import(
    "fmt"
    "os"
    "github.com/shirou/gosutil/process"
)

var ps *process.Process

func mem(n int) {
    if ps == nil {
            p, err := process.NewProcess(int32(os.Getpid()))
                if err != nil {
                    panic(err)
                }

                ps = p
        }

        mem, _ := ps.MemoryInfoEx()
        fmt.Printf("%d, VMS:%d MB, RSS:%d MB\n", n, mem,.VMS>>20, mem.RSS>>20)
}

func main(){
    mem(1)
        data : new([10][1024*1024]byte)
    mem(2)

    for i := range data {
            for x, n := 0, len(data[i]); x<n; x++ {
                    data[i][x] = 1
                }
                mem(3)
        }
}

分配

不要以为new一定会分配在堆上,随着优化内联

package main
import ()
func test() *int {
    x := new(int)
    *x = 0xAABB
    return x
}
func main() {
    println(*test())
}
go build -gcflags "-l" -o test test.go
go tool objdump -s "main\.test" test
TEXT main.test(SB) /mnt/c/Program Files/cmder/test.go
  test.go:4             0x44c150                64488b0c25f8ffffff      MOVQ FS:0xfffffff8, CX
  test.go:4             0x44c159                483b6110                CMPQ 0x10(CX), SP
  test.go:4             0x44c15d                7639                    JBE 0x44c198
  test.go:4             0x44c15f                4883ec18                SUBQ $0x18, SP
  test.go:4             0x44c163                48896c2410              MOVQ BP, 0x10(SP)
  test.go:4             0x44c168                488d6c2410              LEAQ 0x10(SP), BP
  test.go:5             0x44c16d                488d05acac0000          LEAQ 0xacac(IP), AX
  test.go:5             0x44c174                48890424                MOVQ AX, 0(SP)
  test.go:5             0x44c178                e8a3effbff              CALL runtime.newobject(SB)
  test.go:5             0x44c17d                488b442408              MOVQ 0x8(SP), AX
  test.go:6             0x44c182                48c700bbaa0000          MOVQ $0xaabb, 0(AX)
  test.go:7             0x44c189                4889442420              MOVQ AX, 0x20(SP)
  test.go:7             0x44c18e                488b6c2410              MOVQ 0x10(SP), BP
  test.go:7             0x44c193                4883c418                ADDQ $0x18, SP
  test.go:7             0x44c197                c3                      RET
  test.go:4             0x44c198                e8d383ffff              CALL runtime.morestack_noctxt(SB)
  test.go:4             0x44c19d                ebb1                    JMP main.test(SB)
  


go build -o test test.go
go tool objdump -s "main\.main" test
TEXT main.main(SB) /mnt/c/Program Files/cmder/test.go
  test.go:10            0x44c150                64488b0c25f8ffffff      MOVQ FS:0xfffffff8, CX
  test.go:10            0x44c159                483b6110                CMPQ 0x10(CX), SP
  test.go:10            0x44c15d                7634                    JBE 0x44c193
  test.go:10            0x44c15f                4883ec10                SUBQ $0x10, SP
  test.go:10            0x44c163                48896c2408              MOVQ BP, 0x8(SP)
  test.go:10            0x44c168                488d6c2408              LEAQ 0x8(SP), BP
  test.go:11            0x44c16d                e88e59fdff              CALL runtime.printlock(SB)
  test.go:11            0x44c172                48c70424bbaa0000        MOVQ $0xaabb, 0(SP)
  test.go:11            0x44c17a                e80161fdff              CALL runtime.printint(SB)
  test.go:11            0x44c17f                e80c5cfdff              CALL runtime.printnl(SB)
  test.go:11            0x44c184                e8f759fdff              CALL runtime.printunlock(SB)
  test.go:12            0x44c189                488b6c2408              MOVQ 0x8(SP), BP
  test.go:12            0x44c18e                4883c410                ADDQ $0x10, SP
  test.go:12            0x44c192                c3                      RET
  test.go:10            0x44c193                e8d883ffff              CALL runtime.morestack_noctxt(SB)
  test.go:10            0x44c198                ebb6                    JMP main.main(SB)

逃逸分析-gcflag “-m”

分配思路 malloc.go

  • 大对象heap
  • 小对象cache.alloc[sizeclass].freelist object
  • 微小对象使用cache.tiny object

回收

回收以span为单位

释放

sysmon监控任务来搞

具体释放是madvie(v, n, _MADV_DONTNEED) 系统来决定。如果物理内存资源充足,就不会回收避免无谓的损耗,不过再次使用肯定会pagefault然后分配新的内存

垃圾回收

缩短STW时间

抑制堆增长 充分利用CPU资源

  • 三色标记和写屏障
    • 所有都是白色
    • 扫描出所有可达对象,标记成灰色,放出待处理队列
    • 队列提取出灰色对象,将其引用对象标记为灰色放入队列,自身标记为黑色
    • 写屏障监视对象内崔修改,重新标色或放回队列

gcController控制

辅助回收,避免分配速度大于后台标记导致的堆恶性扩张

ref