Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor and speed up _realName code #893

Merged
merged 7 commits into from
Dec 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 59 additions & 0 deletions bench_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
_ "embed"
"flag"
"fmt"
"math/rand/v2"
"os"
"os/exec"
"path/filepath"
Expand Down Expand Up @@ -166,3 +167,61 @@ func BenchmarkBuild(b *testing.B) {
}
b.ReportMetric(float64(info.Size()), "bin-B")
}

func BenchmarkAbiOriginalNames(b *testing.B) {
// Benchmark two thousand obfuscated names in _originalNamePairs
// and a variety of input strings to reverse.
// As an example, the cmd/go binary ends up with about 2200 entries
// in _originalNamePairs as of November 2024, so it's a realistic figure.
// Structs with tens of fields are also relatively normal.
salt := []byte("some salt bytes")
for n := range 2000 {
name := fmt.Sprintf("name_%d", n)
garbled := hashWithCustomSalt(salt, name)
_originalNamePairs = append(_originalNamePairs, [2]string{garbled, name})
}
// Pick twenty names at random to use as inputs below.
// Use a deterministic random source so it's stable between benchmark runs.
rnd := rand.New(rand.NewPCG(1, 2))
var chosen []string
for _, pair := range _originalNamePairs {
chosen = append(chosen, pair[0])
}
rnd.Shuffle(len(chosen), func(i, j int) {
chosen[i], chosen[j] = chosen[j], chosen[i]
})
chosen = chosen[:20]

inputs := []string{
// non-obfuscated names and types
"Error",
"int",
"*[]*interface {}",
"*map[uint64]bool",
// an obfuscated name
chosen[0],
// an obfuscated *pkg.Name
fmt.Sprintf("*%s.%s", chosen[1], chosen[2]),
// big struct with more than a dozen string field types
fmt.Sprintf("struct { %s string }", strings.Join(chosen[3:], " string ")),
}

var inputBytes int
for _, input := range inputs {
inputBytes += len(input)
}
b.SetBytes(int64(inputBytes))
b.ReportAllocs()
b.ResetTimer()

// We use a parallel benchmark because internal/abi's Name method
// is meant to be called by any goroutine at any time.
b.RunParallel(func(pb *testing.PB) {
for pb.Next() {
for _, input := range inputs {
_originalNames(input)
}
}
})
_originalNamePairs = [][2]string{}
}
62 changes: 62 additions & 0 deletions reflect_abi_code.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
package main

// The "name" internal/abi passes to this function doesn't have to be a simple "someName"

// it can also be for function names like "*pkgName.FuncName" (obfuscated)
// or for structs the entire struct definition, like
//
// *struct { AQ45rr68K string; ipq5aQSIqN string; hNfiW5O5LVq struct { gPTbGR00hu string } }
//
// Therefore all obfuscated names which occur within name need to be replaced with their original equivalents.
// The code below does a more efficient version of:
//
// func _originalNames(name string) string {
// for _, pair := range _originalNamePairs {
// name = strings.ReplaceAll(name, pair[0], pair[1])
// }
// return name
// }
//
// The linknames below are only turned on when the code is injected,
// so that we can test and benchmark this code normally.

// Injected code below this line.

//disabledgo:linkname _originalNames internal/abi._originalNames
func _originalNames(name string) string {
// We can stop once there aren't enough bytes to fit another obfuscated name.
for i := 0; i <= len(name)-minHashLength; {
switch name[i] {
case ' ', '.', '*', '{', '}', '[', ']':
// These characters never start an obfuscated name.
i++
continue
}
remLen := len(name[i:])
found := false
for _, pair := range _originalNamePairs {
obfName := pair[0]
real := pair[1]
keyLen := len(obfName)
if remLen < keyLen {
// Since the pairs are sorted from shortest to longest name,
// we know that the rest of the pairs are at least just as long.
break
}
if name[i:i+keyLen] == obfName {
name = name[:i] + real + name[i+keyLen:]
found = true
i += len(real)
break
}
}
if !found {
i++
}
}
return name
}

// Each pair is the obfuscated and then the real name.
// The slice is sorted from shortest to longest obfuscated name.
var _originalNamePairs = [][2]string{}
84 changes: 27 additions & 57 deletions reflect_abi_patch.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,13 @@ package main

import (
"bytes"
"cmp"
_ "embed"
"fmt"
"maps"
"os"
"slices"
"strconv"
"strings"
)

Expand All @@ -16,21 +19,21 @@ func abiNamePatch(path string) (string, error) {
}

find := `return unsafe.String(n.DataChecked(1+i, "non-empty string"), l)`
replace := `return _realName(unsafe.String(n.DataChecked(1+i, "non-empty string"), l))`
replace := `return _originalNames(unsafe.String(n.DataChecked(1+i, "non-empty string"), l))`

str := strings.Replace(string(data), find, replace, 1)

realname := `
//go:linkname _realName
func _realName(name string) string
originalNames := `
//go:linkname _originalNames
func _originalNames(name string) string
`

return str + realname, nil
return str + originalNames, nil
}

var reflectPatchFile = ""

// reflectMainPrePatch adds the initial empty name mapping and _realName implementation
// reflectMainPrePatch adds the initial empty name mapping and _originalNames implementation
// to a file in the main package. The name mapping will be populated later after
// analyzing the main package, since we need to know all obfuscated names that need mapping.
// We split this into pre/post steps so that all variable names in the generated code
Expand All @@ -46,65 +49,32 @@ func reflectMainPrePatch(path string) ([]byte, error) {
if err != nil {
return nil, err
}

nameMap := "\nvar _nameMap = map[string]string{}"

return append(content, []byte(realNameCode+nameMap)...), nil
_, code, _ := strings.Cut(reflectAbiCode, "// Injected code below this line.")
code = strings.ReplaceAll(code, "//disabledgo:", "//go:")
// This constant is declared in our hash.go file.
code = strings.ReplaceAll(code, "minHashLength", strconv.Itoa(minHashLength))
return append(content, []byte(code)...), nil
}

// reflectMainPostPatch populates the name mapping with the final obfuscated->real name
// mappings after all packages have been analyzed.
func reflectMainPostPatch(file []byte, lpkg *listedPackage, pkg pkgCache) []byte {
obfMapName := hashWithPackage(lpkg, "_nameMap")
nameMap := fmt.Sprintf("%s = map[string]string{", obfMapName)
obfVarName := hashWithPackage(lpkg, "_originalNamePairs")
namePairs := fmt.Appendf(nil, "%s = [][2]string{", obfVarName)

var b strings.Builder
keys := slices.Sorted(maps.Keys(pkg.ReflectObjectNames))
keys := slices.SortedFunc(maps.Keys(pkg.ReflectObjectNames), func(a, b string) int {
if c := cmp.Compare(len(a), len(b)); c != 0 {
return c
}
return cmp.Compare(a, b)
})
namePairsFilled := bytes.Clone(namePairs)
for _, obf := range keys {
b.WriteString(fmt.Sprintf(`"%s": "%s",`, obf, pkg.ReflectObjectNames[obf]))
namePairsFilled = fmt.Appendf(namePairsFilled, "{%q, %q},", obf, pkg.ReflectObjectNames[obf])
}

return bytes.Replace(file, []byte(nameMap), []byte(nameMap+b.String()), 1)
return bytes.Replace(file, namePairs, namePairsFilled, 1)
}

// The "name" internal/abi passes to this function doesn't have to be a simple "someName"
// it can also be for function names:
// "*pkgName.FuncName" (obfuscated)
// or for structs the entire struct definition:
// "*struct { AQ45rr68K string; ipq5aQSIqN string; hNfiW5O5LVq struct { gPTbGR00hu string } }"
//
// Therefore all obfuscated names which occur within name need to be replaced with their "real" equivalents.
//
// The code below does a more efficient version of:
//
// func _realName(name string) string {
// for obfName, real := range _nameMap {
// name = strings.ReplaceAll(name, obfName, real)
// }
//
// return name
// }
const realNameCode = `
//go:linkname _realName internal/abi._realName
func _realName(name string) string {
for i := 0; i < len(name); {
remLen := len(name[i:])
found := false
for obfName, real := range _nameMap {
keyLen := len(obfName)
if keyLen > remLen {
continue
}
if name[i:i+keyLen] == obfName {
name = name[:i] + real + name[i+keyLen:]
found = true
i += len(real)
break
}
}
if !found {
i++
}
}
return name
}`
//go:embed reflect_abi_code.go
var reflectAbiCode string
mvdan marked this conversation as resolved.
Show resolved Hide resolved