Skip to content

Commit

Permalink
Include a fingerprint of the specific arguments in mangled names. (#4771
Browse files Browse the repository at this point in the history
)

Instead of including the raw index of the specific, which is unstable
across files and across unrelated changes, use a fingerprint of the
constant values of the specific arguments. This is a placeholder until
we decide on how we want to mangle specific functions.
  • Loading branch information
zygoloid authored Jan 8, 2025
1 parent ab12da7 commit 9a5f2d7
Show file tree
Hide file tree
Showing 6 changed files with 76 additions and 25 deletions.
12 changes: 9 additions & 3 deletions toolchain/lower/mangler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,10 +139,16 @@ auto Mangler::Mangle(SemIR::FunctionId function_id,

MangleInverseQualifiedNameScope(os, function.parent_scope_id);

// TODO: Add proper support for generic entities. The ID we emit here will not
// be consistent across object files.
// TODO: Add proper support for mangling generic entities. For now we use a
// fingerprint of the specific arguments, which should be stable across files,
// but isn't necessarily stable across toolchain changes.
if (specific_id.is_valid()) {
os << "." << specific_id.index;
os << ".";
llvm::write_hex(
os,
fingerprinter_.GetOrCompute(
&sem_ir(), sem_ir().specifics().Get(specific_id).args_id),
llvm::HexPrintStyle::Lower, 16);
}

return os.str();
Expand Down
5 changes: 5 additions & 0 deletions toolchain/lower/mangler.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "toolchain/lower/file_context.h"
#include "toolchain/sem_ir/constant.h"
#include "toolchain/sem_ir/ids.h"
#include "toolchain/sem_ir/inst_fingerprinter.h"

namespace Carbon::Lower {

Expand Down Expand Up @@ -49,6 +50,10 @@ class Mangler {
}

FileContext& file_context_;

// TODO: If `file_context_` has an `InstNamer`, we could share its
// fingerprinter.
SemIR::InstFingerprinter fingerprinter_;
};

} // namespace Carbon::Lower
Expand Down
16 changes: 8 additions & 8 deletions toolchain/lower/testdata/function/generic/call.carbon
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,11 @@ fn G() {
// CHECK:STDOUT: call void @llvm.memcpy.p0.p0.i64(ptr align 1 %d.var, ptr align 1 @D.val.loc19_16, i64 0, i1 false), !dbg !9
// CHECK:STDOUT: call void @llvm.lifetime.start.p0(i64 4, ptr %n.var), !dbg !7
// CHECK:STDOUT: store i32 0, ptr %n.var, align 4, !dbg !10
// CHECK:STDOUT: call void @_CF.Main.129(ptr %c.var), !dbg !11
// CHECK:STDOUT: call void @_CF.Main.130(ptr %d.var), !dbg !12
// CHECK:STDOUT: call void @_CF.Main.15b1f98bd9cc0c5b(ptr %c.var), !dbg !11
// CHECK:STDOUT: call void @_CF.Main.2cc450fc05045897(ptr %d.var), !dbg !12
// CHECK:STDOUT: %.loc24 = load i32, ptr %n.var, align 4, !dbg !13
// CHECK:STDOUT: call void @_CF.Main.131(i32 %.loc24), !dbg !14
// CHECK:STDOUT: call void @_CF.Main.132(%type zeroinitializer), !dbg !15
// CHECK:STDOUT: call void @_CF.Main.9b813875c98e31f1(i32 %.loc24), !dbg !14
// CHECK:STDOUT: call void @_CF.Main.5754c7a55c7cbe4a(%type zeroinitializer), !dbg !15
// CHECK:STDOUT: ret void, !dbg !16
// CHECK:STDOUT: }
// CHECK:STDOUT:
Expand All @@ -58,13 +58,13 @@ fn G() {
// CHECK:STDOUT: ; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
// CHECK:STDOUT: declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #1
// CHECK:STDOUT:
// CHECK:STDOUT: declare void @_CF.Main.129(ptr)
// CHECK:STDOUT: declare void @_CF.Main.15b1f98bd9cc0c5b(ptr)
// CHECK:STDOUT:
// CHECK:STDOUT: declare void @_CF.Main.130(ptr)
// CHECK:STDOUT: declare void @_CF.Main.2cc450fc05045897(ptr)
// CHECK:STDOUT:
// CHECK:STDOUT: declare void @_CF.Main.131(i32)
// CHECK:STDOUT: declare void @_CF.Main.9b813875c98e31f1(i32)
// CHECK:STDOUT:
// CHECK:STDOUT: declare void @_CF.Main.132(%type)
// CHECK:STDOUT: declare void @_CF.Main.5754c7a55c7cbe4a(%type)
// CHECK:STDOUT:
// CHECK:STDOUT: ; uselistorder directives
// CHECK:STDOUT: uselistorder ptr @llvm.lifetime.start.p0, { 2, 1, 0 }
Expand Down
4 changes: 2 additions & 2 deletions toolchain/lower/testdata/function/generic/call_method.carbon
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ fn CallF() -> i32 {
// CHECK:STDOUT: call void @llvm.lifetime.start.p0(i64 4, ptr %n.var), !dbg !7
// CHECK:STDOUT: store i32 0, ptr %n.var, align 4, !dbg !9
// CHECK:STDOUT: %.loc20_14 = load i32, ptr %n.var, align 4, !dbg !10
// CHECK:STDOUT: %F.call = call i32 @_CF.C.Main.129(ptr %c.var, i32 %.loc20_14), !dbg !11
// CHECK:STDOUT: %F.call = call i32 @_CF.C.Main.9b813875c98e31f1(ptr %c.var, i32 %.loc20_14), !dbg !11
// CHECK:STDOUT: ret i32 %F.call, !dbg !12
// CHECK:STDOUT: }
// CHECK:STDOUT:
Expand All @@ -44,7 +44,7 @@ fn CallF() -> i32 {
// CHECK:STDOUT: ; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
// CHECK:STDOUT: declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #1
// CHECK:STDOUT:
// CHECK:STDOUT: declare i32 @_CF.C.Main.129(ptr, i32)
// CHECK:STDOUT: declare i32 @_CF.C.Main.9b813875c98e31f1(ptr, i32)
// CHECK:STDOUT:
// CHECK:STDOUT: ; uselistorder directives
// CHECK:STDOUT: uselistorder ptr @llvm.lifetime.start.p0, { 1, 0 }
Expand Down
55 changes: 43 additions & 12 deletions toolchain/sem_ir/inst_fingerprinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ namespace Carbon::SemIR {
namespace {
struct Worklist {
// The file containing the instruction we're currently processing.
const File* sem_ir;
const File* sem_ir = nullptr;
// The instructions we need to compute fingerprints for.
llvm::SmallVector<std::pair<const File*, InstId>> todo;
// The contents of the current instruction as accumulated so far. This is used
Expand All @@ -28,6 +28,15 @@ struct Worklist {
// the cache if not already present.
Map<std::pair<const File*, InstId>, uint64_t>* fingerprints;

// Prepare to fingerprint a new instruction.
auto Prepare(const File* file) -> void {
sem_ir = file;
contents.clear();
}

// Finish fingerprinting and compute the fingerprint.
auto Finish() -> uint64_t { return llvm::stable_hash_combine(contents); }

// Add an invalid marker to the contents. This is used when the entity
// contains an invalid ID. This uses an arbitrary fixed value that is assumed
// to be unlikely to collide with a valid value.
Expand Down Expand Up @@ -286,15 +295,22 @@ struct Worklist {
Table[kind.ToIndex()](*this, arg);
}

// Ensure all the instructions on the todo list have fingerprints.
auto Run() -> void {
while (!todo.empty()) {
// Ensure all the instructions on the todo list have fingerprints. To avoid a
// re-lookup, returns the fingerprint of the first instruction on the todo
// list, and requires the todo list to be non-empty.
auto Run() -> uint64_t {
CARBON_CHECK(!todo.empty());
while (true) {
auto [next_sem_ir, next_inst_id] = todo.back();

// If we already have a fingerprint for this instruction, we have nothing
// to do. Just pop it from `todo`.
if (fingerprints->Contains(std::pair(next_sem_ir, next_inst_id))) {
if (auto lookup =
fingerprints->Lookup(std::pair(next_sem_ir, next_inst_id))) {
todo.pop_back();
if (todo.empty()) {
return lookup.value();
}
continue;
}

Expand All @@ -306,8 +322,7 @@ struct Worklist {
auto [arg0_kind, arg1_kind] = inst.ArgKinds();

// Prepare to fingerprint this instruction.
sem_ir = next_sem_ir;
contents.clear();
Prepare(next_sem_ir);

// Add the instruction's fields to the contents.
Add(inst.kind());
Expand All @@ -326,9 +341,12 @@ struct Worklist {
// pop it from the todo list. Otherwise, we leave it on the todo list so
// we can compute its fingerprint once we've finished the work we added.
if (todo.size() == init_size) {
auto fingerprint = llvm::stable_hash_combine(contents);
uint64_t fingerprint = Finish();
fingerprints->Insert(std::pair(next_sem_ir, next_inst_id), fingerprint);
todo.pop_back();
if (todo.empty()) {
return fingerprint;
}
}
}
}
Expand All @@ -337,11 +355,24 @@ struct Worklist {

auto InstFingerprinter::GetOrCompute(const File* file, InstId inst_id)
-> uint64_t {
Worklist worklist = {.sem_ir = nullptr,
.todo = {{file, inst_id}},
Worklist worklist = {.todo = {{file, inst_id}},
.fingerprints = &fingerprints_};
worklist.Run();
return fingerprints_.Lookup(std::pair(file, inst_id)).value();
return worklist.Run();
}

auto InstFingerprinter::GetOrCompute(const File* file,
InstBlockId inst_block_id) -> uint64_t {
Worklist worklist = {.todo = {}, .fingerprints = &fingerprints_};
worklist.Prepare(file);
worklist.Add(inst_block_id);
if (!worklist.todo.empty()) {
worklist.Run();
worklist.Prepare(file);
worklist.Add(inst_block_id);
}
CARBON_CHECK(worklist.todo.empty(),
"Should not require more than two passes.");
return worklist.Finish();
}

} // namespace Carbon::SemIR
9 changes: 9 additions & 0 deletions toolchain/sem_ir/inst_fingerprinter.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,18 @@ class InstFingerprinter {
// Gets or computes a fingerprint for the given instruction.
auto GetOrCompute(const File* file, InstId inst_id) -> uint64_t;

// Gets or computes a fingerprint for the given instruction block.
auto GetOrCompute(const File* file, InstBlockId inst_block_id) -> uint64_t;

private:
// The fingerprint for each instruction that has had its fingerprint computed,
// indexed by the InstId's index.
//
// TODO: Experiment with also caching fingerprints for instruction blocks once
// we can get realistic performance measurements for this. This would simplify
// the `GetOrCompute` overload for `InstBlockId`s, and may save some work if
// the same canonical inst block is used by multiple instructions, for example
// as a specific argument list.
Map<std::pair<const File*, InstId>, uint64_t> fingerprints_;
};

Expand Down

0 comments on commit 9a5f2d7

Please sign in to comment.