From 967a33bdb583e05a570b4c7e6809b1495ef2e25c Mon Sep 17 00:00:00 2001 From: Filippo Valsorda Date: Wed, 16 Dec 2020 00:34:15 +0100 Subject: [PATCH] goversion: rewrite on top of cmd/go/internal/version Imported the cmd/go/internal/version implementation at c32140fa94cfc51a2152855825f57e27ae3ba133, and restored Symbols and TextRange to implement the gccgo and crypto checks. It looks like the amd64 matching had broken, but it should be more stable to just track what we do upstream with minimal modifications, so I replaced the core mechanism rather than fixing the matcher. Fixes #14 Fixes #12 Fixes #11 Fixes #7 Closes #13 Closes #9 --- README.md | 4 +- go.mod | 3 + main.go | 4 - version/asm.go | 349 ------------------------------------------------ version/exe.go | 209 +++++++++++++---------------- version/read.go | 178 +++++++++--------------- 6 files changed, 159 insertions(+), 588 deletions(-) create mode 100644 go.mod delete mode 100644 version/asm.go diff --git a/README.md b/README.md index aaa0563..4da3756 100644 --- a/README.md +++ b/README.md @@ -16,10 +16,10 @@ Goversion scans inside of tar or gzipped tar archives that it finds (named The `-crypto` flag causes goversion to print additional information about the crypto libraries linked into each executable. -The -m flag causes goversion to print the list of modules +The `-m` flag causes goversion to print the list of modules found in the executable, along with version information. -The -mh flag causes goversion to print the list of modules +The `-mh` flag causes goversion to print the list of modules found in the executable, along with version and hash information. The `-v` flag causes goversion to print information about every file it diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..151255f --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module rsc.io/goversion + +go 1.14 diff --git a/main.go b/main.go index 74f7b22..0379f3b 100644 --- a/main.go +++ b/main.go @@ -71,10 +71,6 @@ var ( verbose = flag.Bool("v", false, "print verbose information") ) -func init() { - flag.BoolVar(&version.DebugMatch, "d", version.DebugMatch, "print debug information") -} - func usage() { fmt.Fprintf(os.Stderr, "usage: goversion [-crypto] [-v] path...\n") flag.PrintDefaults() diff --git a/version/asm.go b/version/asm.go deleted file mode 100644 index 3d5122b..0000000 --- a/version/asm.go +++ /dev/null @@ -1,349 +0,0 @@ -// Copyright 2017 The Go Authors. All Rights Reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package version - -import ( - "encoding/binary" - "fmt" - "os" -) - -type matcher [][]uint32 - -const ( - pWild uint32 = 0xff00 - pAddr uint32 = 0x10000 - pEnd uint32 = 0x20000 - pRelAddr uint32 = 0x30000 - - opMaybe = 1 + iota - opMust - opDone - opAnchor = 0x100 - opSub8 = 0x200 - opFlags = opAnchor | opSub8 -) - -var amd64Matcher = matcher{ - {opMaybe | opAnchor, - // __rt0_amd64_darwin: - // JMP __rt0_amd64 - 0xe9, pWild | pAddr, pWild, pWild, pWild | pEnd, 0xcc, 0xcc, 0xcc, - }, - {opMaybe, - // _rt0_amd64_linux: - // lea 0x8(%rsp), %rsi - // mov (%rsp), %rdi - // lea ADDR(%rip), %rax # main - // jmpq *%rax - 0x48, 0x8d, 0x74, 0x24, 0x08, - 0x48, 0x8b, 0x3c, 0x24, 0x48, - 0x8d, 0x05, pWild | pAddr, pWild, pWild, pWild | pEnd, - 0xff, 0xe0, - }, - {opMaybe, - // _rt0_amd64_linux: - // lea 0x8(%rsp), %rsi - // mov (%rsp), %rdi - // mov $ADDR, %eax # main - // jmpq *%rax - 0x48, 0x8d, 0x74, 0x24, 0x08, - 0x48, 0x8b, 0x3c, 0x24, - 0xb8, pWild | pAddr, pWild, pWild, pWild, - 0xff, 0xe0, - }, - {opMaybe, - // __rt0_amd64: - // mov (%rsp), %rdi - // lea 8(%rsp), %rsi - // jmp runtime.rt0_g0 - 0x48, 0x8b, 0x3c, 0x24, - 0x48, 0x8d, 0x74, 0x24, 0x08, - 0xe9, pWild | pAddr, pWild, pWild, pWild | pEnd, - 0xcc, 0xcc, - }, - {opMaybe, - // _start (toward end) - // lea __libc_csu_fini(%rip), %r8 - // lea __libc_csu_init(%rip), %rcx - // lea ADDR(%rip), %rdi # main - // callq *xxx(%rip) - 0x4c, 0x8d, 0x05, pWild, pWild, pWild, pWild, - 0x48, 0x8d, 0x0d, pWild, pWild, pWild, pWild, - 0x48, 0x8d, 0x3d, pWild | pAddr, pWild, pWild, pWild | pEnd, - 0xff, 0x15, - }, - {opMaybe, - // _start (toward end) - // push %rsp (1) - // mov $__libc_csu_fini, %r8 (7) - // mov $__libc_csu_init, %rcx (7) - // mov $ADDR, %rdi # main (7) - // callq *xxx(%rip) - 0x54, - 0x49, 0xc7, 0xc0, pWild, pWild, pWild, pWild, - 0x48, 0xc7, 0xc1, pWild, pWild, pWild, pWild, - 0x48, 0xc7, 0xc7, pAddr | pWild, pWild, pWild, pWild, - }, - {opMaybe | opAnchor, - // main: - // lea ADDR(%rip), %rax # rt0_go - // jmpq *%rax - 0x48, 0x8d, 0x05, pWild | pAddr, pWild, pWild, pWild | pEnd, - 0xff, 0xe0, - }, - {opMaybe | opAnchor, - // main: - // mov $ADDR, %eax - // jmpq *%rax - 0xb8, pWild | pAddr, pWild, pWild, pWild, - 0xff, 0xe0, - }, - {opMaybe | opAnchor, - // main: - // JMP runtime.rt0_go(SB) - 0xe9, pWild | pAddr, pWild, pWild, pWild | pEnd, 0xcc, 0xcc, 0xcc, - }, - {opMust | opAnchor, - // rt0_go: - // mov %rdi, %rax - // mov %rsi, %rbx - // sub %0x27, %rsp - // and $0xfffffffffffffff0,%rsp - // mov %rax,0x10(%rsp) - // mov %rbx,0x18(%rsp) - 0x48, 0x89, 0xf8, - 0x48, 0x89, 0xf3, - 0x48, 0x83, 0xec, 0x27, - 0x48, 0x83, 0xe4, 0xf0, - 0x48, 0x89, 0x44, 0x24, 0x10, - 0x48, 0x89, 0x5c, 0x24, 0x18, - }, - {opMust, - // later in rt0_go: - // mov %eax, (%rsp) - // mov 0x18(%rsp), %rax - // mov %rax, 0x8(%rsp) - // callq runtime.args - // callq runtime.osinit - // callq runtime.schedinit (ADDR) - 0x89, 0x04, 0x24, - 0x48, 0x8b, 0x44, 0x24, 0x18, - 0x48, 0x89, 0x44, 0x24, 0x08, - 0xe8, pWild, pWild, pWild, pWild, - 0xe8, pWild, pWild, pWild, pWild, - 0xe8, pWild, pWild, pWild, pWild, - }, - {opMaybe, - // later in rt0_go: - // mov %eax, (%rsp) - // mov 0x18(%rsp), %rax - // mov %rax, 0x8(%rsp) - // callq runtime.args - // callq runtime.osinit - // callq runtime.schedinit (ADDR) - // lea other(%rip), %rdi - 0x89, 0x04, 0x24, - 0x48, 0x8b, 0x44, 0x24, 0x18, - 0x48, 0x89, 0x44, 0x24, 0x08, - 0xe8, pWild, pWild, pWild, pWild, - 0xe8, pWild, pWild, pWild, pWild, - 0xe8, pWild | pAddr, pWild, pWild, pWild | pEnd, - 0x48, 0x8d, 0x05, - }, - {opMaybe, - // later in rt0_go: - // mov %eax, (%rsp) - // mov 0x18(%rsp), %rax - // mov %rax, 0x8(%rsp) - // callq runtime.args - // callq runtime.osinit - // callq runtime.hashinit - // callq runtime.schedinit (ADDR) - // pushq $main.main - 0x89, 0x04, 0x24, - 0x48, 0x8b, 0x44, 0x24, 0x18, - 0x48, 0x89, 0x44, 0x24, 0x08, - 0xe8, pWild, pWild, pWild, pWild, - 0xe8, pWild, pWild, pWild, pWild, - 0xe8, pWild, pWild, pWild, pWild, - 0xe8, pWild | pAddr, pWild, pWild, pWild | pEnd, - 0x68, - }, - {opDone | opSub8, - // schedinit (toward end) - // mov ADDR(%rip), %rax - // test %rax, %rax - // jne - // movq $0x7, ADDR(%rip) - // - 0x48, 0x8b, 0x05, pWild, pWild, pWild, pWild, - 0x48, 0x85, 0xc0, - 0x75, pWild, - 0x48, 0xc7, 0x05, pWild | pAddr, pWild, pWild, pWild, 0x07, 0x00, 0x00, 0x00 | pEnd, - }, - {opDone | opSub8, - // schedinit (toward end) - // mov ADDR(%rip), %rbx - // cmp $0x0, %rbx - // jne - // lea "unknown"(%rip), %rbx - // mov %rbx, ADDR(%rip) - // movq $7, (ADDR+8)(%rip) - 0x48, 0x8b, 0x1d, pWild, pWild, pWild, pWild, - 0x48, 0x83, 0xfb, 0x00, - 0x75, pWild, - 0x48, 0x8d, 0x1d, pWild, pWild, pWild, pWild, - 0x48, 0x89, 0x1d, pWild, pWild, pWild, pWild, - 0x48, 0xc7, 0x05, pWild | pAddr, pWild, pWild, pWild, 0x07, 0x00, 0x00, 0x00 | pEnd, - }, - {opDone, - // schedinit (toward end) - // cmpq $0x0, ADDR(%rip) - // jne - // lea "unknown"(%rip), %rax - // mov %rax, ADDR(%rip) - // lea ADDR(%rip), %rax - // movq $7, 8(%rax) - 0x48, 0x83, 0x3d, pWild | pAddr, pWild, pWild, pWild, 0x00, - 0x75, pWild, - 0x48, 0x8d, 0x05, pWild, pWild, pWild, pWild, - 0x48, 0x89, 0x05, pWild, pWild, pWild, pWild, - 0x48, 0x8d, 0x05, pWild | pAddr, pWild, pWild, pWild | pEnd, - 0x48, 0xc7, 0x40, 0x08, 0x07, 0x00, 0x00, 0x00, - }, - {opDone, - // schedinit (toward end) - // cmpq $0x0, ADDR(%rip) - // jne - // movq $0x7, ADDR(%rip) - 0x48, 0x83, 0x3d, pWild | pAddr, pWild, pWild, pWild, 0x00, - 0x75, pWild, - 0x48, 0xc7, 0x05 | pEnd, pWild | pAddr, pWild, pWild, pWild, 0x07, 0x00, 0x00, 0x00, - }, - {opDone, - // test %eax, %eax - // jne - // lea "unknown"(RIP), %rax - // mov %rax, ADDR(%rip) - 0x48, 0x85, 0xc0, 0x75, pWild, 0x48, 0x8d, 0x05, pWild, pWild, pWild, pWild, 0x48, 0x89, 0x05, pWild | pAddr, pWild, pWild, pWild | pEnd, - }, - {opDone, - // schedinit (toward end) - // mov ADDR(%rip), %rcx - // test %rcx, %rcx - // jne - // movq $0x7, ADDR(%rip) - // - 0x48, 0x8b, 0x0d, pWild, pWild, pWild, pWild, - 0x48, 0x85, 0xc9, - 0x75, pWild, - 0x48, 0xc7, 0x05 | pEnd, pWild | pAddr, pWild, pWild, pWild, 0x07, 0x00, 0x00, 0x00, - }, -} - -var DebugMatch bool - -func (m matcher) match(f exe, addr uint64) (uint64, bool) { - data, err := f.ReadData(addr, 512) - if DebugMatch { - fmt.Fprintf(os.Stderr, "data @%#x: %x\n", addr, data[:16]) - } - if err != nil { - if DebugMatch { - fmt.Fprintf(os.Stderr, "match: %v\n", err) - } - return 0, false - } - if DebugMatch { - fmt.Fprintf(os.Stderr, "data: %x\n", data[:32]) - } -Matchers: - for pc, p := range m { - op := p[0] - p = p[1:] - Search: - for i := 0; i <= len(data)-len(p); i++ { - a := -1 - e := -1 - if i > 0 && op&opAnchor != 0 { - break - } - for j := 0; j < len(p); j++ { - b := byte(p[j]) - m := byte(p[j] >> 8) - if data[i+j]&^m != b { - continue Search - } - if p[j]&pAddr != 0 { - a = j - } - if p[j]&pEnd != 0 { - e = j + 1 - } - } - // matched - if DebugMatch { - fmt.Fprintf(os.Stderr, "match (%d) %#x+%d %x %x\n", pc, addr, i, p, data[i:i+len(p)]) - } - if a != -1 { - val := uint64(int32(binary.LittleEndian.Uint32(data[i+a:]))) - if e == -1 { - addr = val - } else { - addr += uint64(i+e) + val - } - if op&opSub8 != 0 { - addr -= 8 - } - } - if op&^opFlags == opDone { - if DebugMatch { - fmt.Fprintf(os.Stderr, "done %x\n", addr) - } - return addr, true - } - if a != -1 { - // changed addr, so reload - data, err = f.ReadData(addr, 512) - if err != nil { - return 0, false - } - if DebugMatch { - fmt.Fprintf(os.Stderr, "reload @%#x: %x\n", addr, data[:32]) - } - } - continue Matchers - } - // not matched - if DebugMatch { - fmt.Fprintf(os.Stderr, "no match (%d) %#x %x %x\n", pc, addr, p, data[:32]) - } - if op&^opFlags == opMust { - return 0, false - } - } - // ran off end of matcher - return 0, false -} - -func readBuildVersionX86Asm(f exe) (isGo bool, buildVersion string) { - entry := f.Entry() - if entry == 0 { - if DebugMatch { - fmt.Fprintf(os.Stderr, "missing entry!\n") - } - return - } - addr, ok := amd64Matcher.match(f, entry) - if !ok { - return - } - v, err := readBuildVersion(f, addr, 16) - if err != nil { - return - } - return true, v -} diff --git a/version/exe.go b/version/exe.go index dc87129..a86bb21 100644 --- a/version/exe.go +++ b/version/exe.go @@ -9,30 +9,30 @@ import ( "debug/elf" "debug/macho" "debug/pe" - "encoding/binary" "fmt" "io" "os" ) -type sym struct { - Name string - Addr uint64 - Size uint64 -} - +// An exe is a generic interface to an OS executable (ELF, Mach-O, PE). type exe interface { - AddrSize() int // bytes - ReadData(addr, size uint64) ([]byte, error) - Symbols() ([]sym, error) - SectionNames() []string + // Close closes the underlying file. Close() error - ByteOrder() binary.ByteOrder - Entry() uint64 + + // Symbols returns the names of the symbols in the table. + Symbols() ([]string, error) + + // ReadData reads and returns up to size byte starting at virtual address addr. + ReadData(addr, size uint64) ([]byte, error) + + // DataStart returns the writable data segment start address. + DataStart() uint64 + + // TextRange returns the text section start and end address. TextRange() (uint64, uint64) - RODataRange() (uint64, uint64) } +// openExe opens file and returns it as an exe. func openExe(file string) (exe, error) { f, err := os.Open(file) if err != nil { @@ -70,25 +70,24 @@ func openExe(file string) (exe, error) { return nil, fmt.Errorf("unrecognized executable format") } +// elfExe is the ELF implementation of the exe interface. type elfExe struct { os *os.File f *elf.File } -func (x *elfExe) AddrSize() int { return 0 } - -func (x *elfExe) ByteOrder() binary.ByteOrder { return x.f.ByteOrder } - func (x *elfExe) Close() error { return x.os.Close() } -func (x *elfExe) Entry() uint64 { return x.f.Entry } - func (x *elfExe) ReadData(addr, size uint64) ([]byte, error) { - data := make([]byte, size) for _, prog := range x.f.Progs { - if prog.Vaddr <= addr && addr+size-1 <= prog.Vaddr+prog.Filesz-1 { + if prog.Vaddr <= addr && addr <= prog.Vaddr+prog.Filesz-1 { + n := prog.Vaddr + prog.Filesz - addr + if n > size { + n = size + } + data := make([]byte, n) _, err := prog.ReadAt(data, int64(addr-prog.Vaddr)) if err != nil { return nil, err @@ -99,26 +98,18 @@ func (x *elfExe) ReadData(addr, size uint64) ([]byte, error) { return nil, fmt.Errorf("address not mapped") } -func (x *elfExe) Symbols() ([]sym, error) { +func (x *elfExe) Symbols() ([]string, error) { syms, err := x.f.Symbols() if err != nil { return nil, err } - var out []sym + var out []string for _, s := range syms { - out = append(out, sym{s.Name, s.Value, s.Size}) + out = append(out, s.Name) } return out, nil } -func (x *elfExe) SectionNames() []string { - var names []string - for _, sect := range x.f.Sections { - names = append(names, sect.Name) - } - return names -} - func (x *elfExe) TextRange() (uint64, uint64) { for _, p := range x.f.Progs { if p.Type == elf.PT_LOAD && p.Flags&elf.PF_X != 0 { @@ -128,63 +119,49 @@ func (x *elfExe) TextRange() (uint64, uint64) { return 0, 0 } -func (x *elfExe) RODataRange() (uint64, uint64) { - for _, p := range x.f.Progs { - if p.Type == elf.PT_LOAD && p.Flags&(elf.PF_R|elf.PF_W|elf.PF_X) == elf.PF_R { - return p.Vaddr, p.Vaddr + p.Filesz +func (x *elfExe) DataStart() uint64 { + for _, s := range x.f.Sections { + if s.Name == ".go.buildinfo" { + return s.Addr } } for _, p := range x.f.Progs { - if p.Type == elf.PT_LOAD && p.Flags&(elf.PF_R|elf.PF_W|elf.PF_X) == (elf.PF_R|elf.PF_X) { - return p.Vaddr, p.Vaddr + p.Filesz + if p.Type == elf.PT_LOAD && p.Flags&(elf.PF_X|elf.PF_W) == elf.PF_W { + return p.Vaddr } } - return 0, 0 + return 0 } +// peExe is the PE (Windows Portable Executable) implementation of the exe interface. type peExe struct { os *os.File f *pe.File } -func (x *peExe) imageBase() uint64 { - switch oh := x.f.OptionalHeader.(type) { - case *pe.OptionalHeader32: - return uint64(oh.ImageBase) - case *pe.OptionalHeader64: - return oh.ImageBase - } - return 0 -} - -func (x *peExe) AddrSize() int { - if x.f.Machine == pe.IMAGE_FILE_MACHINE_AMD64 { - return 8 - } - return 4 -} - -func (x *peExe) ByteOrder() binary.ByteOrder { return binary.LittleEndian } - func (x *peExe) Close() error { return x.os.Close() } -func (x *peExe) Entry() uint64 { +func (x *peExe) imageBase() uint64 { switch oh := x.f.OptionalHeader.(type) { case *pe.OptionalHeader32: - return uint64(oh.ImageBase + oh.AddressOfEntryPoint) + return uint64(oh.ImageBase) case *pe.OptionalHeader64: - return oh.ImageBase + uint64(oh.AddressOfEntryPoint) + return oh.ImageBase } return 0 } func (x *peExe) ReadData(addr, size uint64) ([]byte, error) { addr -= x.imageBase() - data := make([]byte, size) for _, sect := range x.f.Sections { - if uint64(sect.VirtualAddress) <= addr && addr+size-1 <= uint64(sect.VirtualAddress+sect.Size-1) { + if uint64(sect.VirtualAddress) <= addr && addr <= uint64(sect.VirtualAddress+sect.Size-1) { + n := uint64(sect.VirtualAddress+sect.Size) - addr + if n > size { + n = size + } + data := make([]byte, n) _, err := sect.ReadAt(data, int64(addr-uint64(sect.VirtualAddress))) if err != nil { return nil, err @@ -195,27 +172,17 @@ func (x *peExe) ReadData(addr, size uint64) ([]byte, error) { return nil, fmt.Errorf("address not mapped") } -func (x *peExe) Symbols() ([]sym, error) { - base := x.imageBase() - var out []sym +func (x *peExe) Symbols() ([]string, error) { + var out []string for _, s := range x.f.Symbols { if s.SectionNumber <= 0 || int(s.SectionNumber) > len(x.f.Sections) { continue } - sect := x.f.Sections[s.SectionNumber-1] - out = append(out, sym{s.Name, uint64(s.Value) + base + uint64(sect.VirtualAddress), 0}) + out = append(out, s.Name) } return out, nil } -func (x *peExe) SectionNames() []string { - var names []string - for _, sect := range x.f.Sections { - names = append(names, sect.Name) - } - return names -} - func (x *peExe) TextRange() (uint64, uint64) { // Assume text is first non-empty section. for _, sect := range x.f.Sections { @@ -226,55 +193,53 @@ func (x *peExe) TextRange() (uint64, uint64) { return 0, 0 } -func (x *peExe) RODataRange() (uint64, uint64) { - return x.TextRange() +func (x *peExe) DataStart() uint64 { + // Assume data is first writable section. + const ( + IMAGE_SCN_CNT_CODE = 0x00000020 + IMAGE_SCN_CNT_INITIALIZED_DATA = 0x00000040 + IMAGE_SCN_CNT_UNINITIALIZED_DATA = 0x00000080 + IMAGE_SCN_MEM_EXECUTE = 0x20000000 + IMAGE_SCN_MEM_READ = 0x40000000 + IMAGE_SCN_MEM_WRITE = 0x80000000 + IMAGE_SCN_MEM_DISCARDABLE = 0x2000000 + IMAGE_SCN_LNK_NRELOC_OVFL = 0x1000000 + IMAGE_SCN_ALIGN_32BYTES = 0x600000 + ) + for _, sect := range x.f.Sections { + if sect.VirtualAddress != 0 && sect.Size != 0 && + sect.Characteristics&^IMAGE_SCN_ALIGN_32BYTES == IMAGE_SCN_CNT_INITIALIZED_DATA|IMAGE_SCN_MEM_READ|IMAGE_SCN_MEM_WRITE { + return uint64(sect.VirtualAddress) + x.imageBase() + } + } + return 0 } +// machoExe is the Mach-O (Apple macOS/iOS) implementation of the exe interface. type machoExe struct { os *os.File f *macho.File } -func (x *machoExe) AddrSize() int { - if x.f.Cpu&0x01000000 != 0 { - return 8 - } - return 4 -} - -func (x *machoExe) ByteOrder() binary.ByteOrder { return x.f.ByteOrder } - func (x *machoExe) Close() error { return x.os.Close() } -func (x *machoExe) Entry() uint64 { - for _, load := range x.f.Loads { - b, ok := load.(macho.LoadBytes) - if !ok { - continue - } - bo := x.f.ByteOrder - const x86_THREAD_STATE64 = 4 - cmd, siz := macho.LoadCmd(bo.Uint32(b[0:4])), bo.Uint32(b[4:8]) - if cmd == macho.LoadCmdUnixThread && siz == 184 && bo.Uint32(b[8:12]) == x86_THREAD_STATE64 { - return bo.Uint64(b[144:]) - } - } - return 0 -} - func (x *machoExe) ReadData(addr, size uint64) ([]byte, error) { - data := make([]byte, size) for _, load := range x.f.Loads { seg, ok := load.(*macho.Segment) if !ok { continue } - if seg.Addr <= addr && addr+size-1 <= seg.Addr+seg.Filesz-1 { + if seg.Addr <= addr && addr <= seg.Addr+seg.Filesz-1 { if seg.Name == "__PAGEZERO" { continue } + n := seg.Addr + seg.Filesz - addr + if n > size { + n = size + } + data := make([]byte, n) _, err := seg.ReadAt(data, int64(addr-seg.Addr)) if err != nil { return nil, err @@ -285,22 +250,14 @@ func (x *machoExe) ReadData(addr, size uint64) ([]byte, error) { return nil, fmt.Errorf("address not mapped") } -func (x *machoExe) Symbols() ([]sym, error) { - var out []sym +func (x *machoExe) Symbols() ([]string, error) { + var out []string for _, s := range x.f.Symtab.Syms { - out = append(out, sym{s.Name, s.Value, 0}) + out = append(out, s.Name) } return out, nil } -func (x *machoExe) SectionNames() []string { - var names []string - for _, sect := range x.f.Sections { - names = append(names, sect.Name) - } - return names -} - func (x *machoExe) TextRange() (uint64, uint64) { // Assume text is first non-empty segment. for _, load := range x.f.Loads { @@ -312,6 +269,20 @@ func (x *machoExe) TextRange() (uint64, uint64) { return 0, 0 } -func (x *machoExe) RODataRange() (uint64, uint64) { - return x.TextRange() +func (x *machoExe) DataStart() uint64 { + // Look for section named "__go_buildinfo". + for _, sec := range x.f.Sections { + if sec.Name == "__go_buildinfo" { + return sec.Addr + } + } + // Try the first non-empty writable segment. + const RW = 3 + for _, load := range x.f.Loads { + seg, ok := load.(*macho.Segment) + if ok && seg.Addr != 0 && seg.Filesz != 0 && seg.Prot == RW && seg.Maxprot == RW { + return seg.Addr + } + } + return 0 } diff --git a/version/read.go b/version/read.go index 3792818..7fe84b2 100644 --- a/version/read.go +++ b/version/read.go @@ -7,6 +7,7 @@ package version import ( "bytes" + "encoding/binary" "encoding/hex" "errors" "fmt" @@ -32,30 +33,16 @@ func ReadExe(file string) (Version, error) { return v, err } defer f.Close() - isGo := false - for _, name := range f.SectionNames() { - if name == ".note.go.buildid" { - isGo = true - } - } - syms, symsErr := f.Symbols() - isGccgo := false - for _, sym := range syms { - name := sym.Name - if name == "runtime.main" || name == "main.main" { - isGo = true - } - if strings.HasPrefix(name, "runtime.") && strings.HasSuffix(name, "$descriptor") { - isGccgo = true - } - if name == "runtime.buildVersion" { - isGo = true - release, err := readBuildVersion(f, sym.Addr, sym.Size) - if err != nil { - return v, err - } - v.Release = release + syms, _ := f.Symbols() + for _, name := range syms { + if strings.HasPrefix(name, "runtime.") && strings.HasSuffix(name, "$descriptor") { + defer func() { + if v.Release == "" || v.Release == "unknown Go version" { + v.Release = "gccgo (version unknown)" + err = nil + } + }() } if strings.Contains(name, "_Cfunc__goboringcrypto_") || name == "crypto/internal/boring/sig.BoringCrypto" { v.BoringCrypto = true @@ -73,39 +60,73 @@ func ReadExe(file string) (Version, error) { } } - if DebugMatch { - v.Release = "" + if err := findCryptoSigs(&v, f); err != nil { + return v, err } - if err := findModuleInfo(&v, f); err != nil { + + // The build info blob left by the linker is identified by + // a 16-byte header, consisting of buildInfoMagic (14 bytes), + // the binary's pointer size (1 byte), + // and whether the binary is big endian (1 byte). + var buildInfoMagic = []byte("\xff Go buildinf:") + + // Read the first 64kB of text to find the build info blob. + text := f.DataStart() + data, err := f.ReadData(text, 64*1024) + if err != nil { return v, err } - if v.Release == "" { - g, release := readBuildVersionX86Asm(f) - if g { - isGo = true - v.Release = release - if err := findCryptoSigs(&v, f); err != nil { - return v, err - } + for ; !bytes.HasPrefix(data, buildInfoMagic); data = data[32:] { + if len(data) < 32 { + return v, errors.New("not a Go executable") } } - if isGccgo && v.Release == "" { - isGo = true - v.Release = "gccgo (version unknown)" - } - if !isGo && symsErr != nil { - return v, symsErr - } - if !isGo { - return v, errors.New("not a Go executable") + // Decode the blob. + ptrSize := int(data[14]) + bigEndian := data[15] != 0 + var bo binary.ByteOrder + if bigEndian { + bo = binary.BigEndian + } else { + bo = binary.LittleEndian } + var readPtr func([]byte) uint64 + if ptrSize == 4 { + readPtr = func(b []byte) uint64 { return uint64(bo.Uint32(b)) } + } else { + readPtr = bo.Uint64 + } + v.Release = readString(f, ptrSize, readPtr, readPtr(data[16:])) if v.Release == "" { v.Release = "unknown Go version" } + v.ModuleInfo = readString(f, ptrSize, readPtr, readPtr(data[16+ptrSize:])) + if len(v.ModuleInfo) >= 33 && v.ModuleInfo[len(v.ModuleInfo)-17] == '\n' { + // Strip module framing. + v.ModuleInfo = v.ModuleInfo[16 : len(v.ModuleInfo)-16] + } else { + v.ModuleInfo = "" + } + return v, nil } +// readString returns the string at address addr in the executable x. +func readString(x exe, ptrSize int, readPtr func([]byte) uint64, addr uint64) string { + hdr, err := x.ReadData(addr, uint64(2*ptrSize)) + if err != nil || len(hdr) < 2*ptrSize { + return "" + } + dataAddr := readPtr(hdr) + dataLen := readPtr(hdr[ptrSize:]) + data, err := x.ReadData(dataAddr, dataLen) + if err != nil || uint64(len(data)) < dataLen { + return "" + } + return string(data) +} + var re = regexp.MustCompile var standardCryptoNames = []*regexp.Regexp{ @@ -116,36 +137,6 @@ var standardCryptoNames = []*regexp.Regexp{ re(`^crypto/rsa\.decrypt$`), } -func readBuildVersion(f exe, addr, size uint64) (string, error) { - if size == 0 { - size = uint64(f.AddrSize() * 2) - } - if size != 8 && size != 16 { - return "", fmt.Errorf("invalid size for runtime.buildVersion") - } - data, err := f.ReadData(addr, size) - if err != nil { - return "", fmt.Errorf("reading runtime.buildVersion: %v", err) - } - - if size == 8 { - addr = uint64(f.ByteOrder().Uint32(data)) - size = uint64(f.ByteOrder().Uint32(data[4:])) - } else { - addr = f.ByteOrder().Uint64(data) - size = f.ByteOrder().Uint64(data[8:]) - } - if size > 1000 { - return "", fmt.Errorf("implausible string size %d for runtime.buildVersion", size) - } - - data, err = f.ReadData(addr, size) - if err != nil { - return "", fmt.Errorf("reading runtime.buildVersion string data: %v", err) - } - return string(data), nil -} - // Code signatures that indicate BoringCrypto or crypto/internal/fipsonly. // These are not byte literals in order to avoid the actual // byte signatures appearing in the goversion binary, @@ -200,44 +191,3 @@ func haveSig(data, sig []byte) bool { data = data[(i+align-1)&^(align-1):] } } - -func findModuleInfo(v *Version, f exe) error { - const maxModInfo = 128 << 10 - start, end := f.RODataRange() - for addr := start; addr < end; { - size := uint64(4 << 20) - if end-addr < size { - size = end - addr - } - data, err := f.ReadData(addr, size) - if err != nil { - return fmt.Errorf("reading text: %v", err) - } - if haveModuleInfo(data, v) { - return nil - } - if addr+size < end { - size -= maxModInfo - } - addr += size - } - return nil -} - -var ( - infoStart, _ = hex.DecodeString("3077af0c9274080241e1c107e6d618e6") - infoEnd, _ = hex.DecodeString("f932433186182072008242104116d8f2") -) - -func haveModuleInfo(data []byte, v *Version) bool { - i := bytes.Index(data, infoStart) - if i < 0 { - return false - } - j := bytes.Index(data[i:], infoEnd) - if j < 0 { - return false - } - v.ModuleInfo = string(data[i+len(infoStart) : i+j]) - return true -}