diff --git a/bench_test.go b/bench_test.go index 5b8aad11..af5005b3 100644 --- a/bench_test.go +++ b/bench_test.go @@ -7,6 +7,7 @@ import ( _ "embed" "flag" "fmt" + "math/rand/v2" "os" "os/exec" "path/filepath" @@ -166,3 +167,61 @@ func BenchmarkBuild(b *testing.B) { } b.ReportMetric(float64(info.Size()), "bin-B") } + +func BenchmarkAbiOriginalNames(b *testing.B) { + // Benchmark two thousand obfuscated names in _originalNamePairs + // and a variety of input strings to reverse. + // As an example, the cmd/go binary ends up with about 2200 entries + // in _originalNamePairs as of November 2024, so it's a realistic figure. + // Structs with tens of fields are also relatively normal. + salt := []byte("some salt bytes") + for n := range 2000 { + name := fmt.Sprintf("name_%d", n) + garbled := hashWithCustomSalt(salt, name) + _originalNamePairs = append(_originalNamePairs, [2]string{garbled, name}) + } + // Pick twenty names at random to use as inputs below. + // Use a deterministic random source so it's stable between benchmark runs. + rnd := rand.New(rand.NewPCG(1, 2)) + var chosen []string + for _, pair := range _originalNamePairs { + chosen = append(chosen, pair[0]) + } + rnd.Shuffle(len(chosen), func(i, j int) { + chosen[i], chosen[j] = chosen[j], chosen[i] + }) + chosen = chosen[:20] + + inputs := []string{ + // non-obfuscated names and types + "Error", + "int", + "*[]*interface {}", + "*map[uint64]bool", + // an obfuscated name + chosen[0], + // an obfuscated *pkg.Name + fmt.Sprintf("*%s.%s", chosen[1], chosen[2]), + // big struct with more than a dozen string field types + fmt.Sprintf("struct { %s string }", strings.Join(chosen[3:], " string ")), + } + + var inputBytes int + for _, input := range inputs { + inputBytes += len(input) + } + b.SetBytes(int64(inputBytes)) + b.ReportAllocs() + b.ResetTimer() + + // We use a parallel benchmark because internal/abi's Name method + // is meant to be called by any goroutine at any time. + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + for _, input := range inputs { + _originalNames(input) + } + } + }) + _originalNamePairs = [][2]string{} +} diff --git a/reflect_abi_code.go b/reflect_abi_code.go new file mode 100644 index 00000000..ce7b8781 --- /dev/null +++ b/reflect_abi_code.go @@ -0,0 +1,62 @@ +package main + +// The "name" internal/abi passes to this function doesn't have to be a simple "someName" + +// it can also be for function names like "*pkgName.FuncName" (obfuscated) +// or for structs the entire struct definition, like +// +// *struct { AQ45rr68K string; ipq5aQSIqN string; hNfiW5O5LVq struct { gPTbGR00hu string } } +// +// Therefore all obfuscated names which occur within name need to be replaced with their original equivalents. +// The code below does a more efficient version of: +// +// func _originalNames(name string) string { +// for _, pair := range _originalNamePairs { +// name = strings.ReplaceAll(name, pair[0], pair[1]) +// } +// return name +// } +// +// The linknames below are only turned on when the code is injected, +// so that we can test and benchmark this code normally. + +// Injected code below this line. + +//disabledgo:linkname _originalNames internal/abi._originalNames +func _originalNames(name string) string { + // We can stop once there aren't enough bytes to fit another obfuscated name. + for i := 0; i <= len(name)-minHashLength; { + switch name[i] { + case ' ', '.', '*', '{', '}', '[', ']': + // These characters never start an obfuscated name. + i++ + continue + } + remLen := len(name[i:]) + found := false + for _, pair := range _originalNamePairs { + obfName := pair[0] + real := pair[1] + keyLen := len(obfName) + if remLen < keyLen { + // Since the pairs are sorted from shortest to longest name, + // we know that the rest of the pairs are at least just as long. + break + } + if name[i:i+keyLen] == obfName { + name = name[:i] + real + name[i+keyLen:] + found = true + i += len(real) + break + } + } + if !found { + i++ + } + } + return name +} + +// Each pair is the obfuscated and then the real name. +// The slice is sorted from shortest to longest obfuscated name. +var _originalNamePairs = [][2]string{} diff --git a/reflect_abi_patch.go b/reflect_abi_patch.go index e934cb37..1ecd80f4 100644 --- a/reflect_abi_patch.go +++ b/reflect_abi_patch.go @@ -2,10 +2,13 @@ package main import ( "bytes" + "cmp" + _ "embed" "fmt" "maps" "os" "slices" + "strconv" "strings" ) @@ -16,21 +19,21 @@ func abiNamePatch(path string) (string, error) { } find := `return unsafe.String(n.DataChecked(1+i, "non-empty string"), l)` - replace := `return _realName(unsafe.String(n.DataChecked(1+i, "non-empty string"), l))` + replace := `return _originalNames(unsafe.String(n.DataChecked(1+i, "non-empty string"), l))` str := strings.Replace(string(data), find, replace, 1) - realname := ` -//go:linkname _realName -func _realName(name string) string + originalNames := ` +//go:linkname _originalNames +func _originalNames(name string) string ` - return str + realname, nil + return str + originalNames, nil } var reflectPatchFile = "" -// reflectMainPrePatch adds the initial empty name mapping and _realName implementation +// reflectMainPrePatch adds the initial empty name mapping and _originalNames implementation // to a file in the main package. The name mapping will be populated later after // analyzing the main package, since we need to know all obfuscated names that need mapping. // We split this into pre/post steps so that all variable names in the generated code @@ -46,65 +49,32 @@ func reflectMainPrePatch(path string) ([]byte, error) { if err != nil { return nil, err } - - nameMap := "\nvar _nameMap = map[string]string{}" - - return append(content, []byte(realNameCode+nameMap)...), nil + _, code, _ := strings.Cut(reflectAbiCode, "// Injected code below this line.") + code = strings.ReplaceAll(code, "//disabledgo:", "//go:") + // This constant is declared in our hash.go file. + code = strings.ReplaceAll(code, "minHashLength", strconv.Itoa(minHashLength)) + return append(content, []byte(code)...), nil } // reflectMainPostPatch populates the name mapping with the final obfuscated->real name // mappings after all packages have been analyzed. func reflectMainPostPatch(file []byte, lpkg *listedPackage, pkg pkgCache) []byte { - obfMapName := hashWithPackage(lpkg, "_nameMap") - nameMap := fmt.Sprintf("%s = map[string]string{", obfMapName) + obfVarName := hashWithPackage(lpkg, "_originalNamePairs") + namePairs := fmt.Appendf(nil, "%s = [][2]string{", obfVarName) - var b strings.Builder - keys := slices.Sorted(maps.Keys(pkg.ReflectObjectNames)) + keys := slices.SortedFunc(maps.Keys(pkg.ReflectObjectNames), func(a, b string) int { + if c := cmp.Compare(len(a), len(b)); c != 0 { + return c + } + return cmp.Compare(a, b) + }) + namePairsFilled := bytes.Clone(namePairs) for _, obf := range keys { - b.WriteString(fmt.Sprintf(`"%s": "%s",`, obf, pkg.ReflectObjectNames[obf])) + namePairsFilled = fmt.Appendf(namePairsFilled, "{%q, %q},", obf, pkg.ReflectObjectNames[obf]) } - return bytes.Replace(file, []byte(nameMap), []byte(nameMap+b.String()), 1) + return bytes.Replace(file, namePairs, namePairsFilled, 1) } -// The "name" internal/abi passes to this function doesn't have to be a simple "someName" -// it can also be for function names: -// "*pkgName.FuncName" (obfuscated) -// or for structs the entire struct definition: -// "*struct { AQ45rr68K string; ipq5aQSIqN string; hNfiW5O5LVq struct { gPTbGR00hu string } }" -// -// Therefore all obfuscated names which occur within name need to be replaced with their "real" equivalents. -// -// The code below does a more efficient version of: -// -// func _realName(name string) string { -// for obfName, real := range _nameMap { -// name = strings.ReplaceAll(name, obfName, real) -// } -// -// return name -// } -const realNameCode = ` -//go:linkname _realName internal/abi._realName -func _realName(name string) string { - for i := 0; i < len(name); { - remLen := len(name[i:]) - found := false - for obfName, real := range _nameMap { - keyLen := len(obfName) - if keyLen > remLen { - continue - } - if name[i:i+keyLen] == obfName { - name = name[:i] + real + name[i+keyLen:] - found = true - i += len(real) - break - } - } - if !found { - i++ - } - } - return name -}` +//go:embed reflect_abi_code.go +var reflectAbiCode string