Skip to content

Commit fd72dbd

Browse files
committed
fix: stabilize toolchain fingerprint identity
1 parent 6482b5a commit fd72dbd

7 files changed

Lines changed: 427 additions & 3 deletions

File tree

.agents/docs/2026-05-15-fingerprint-stability-and-fastpath-coherence.md

Lines changed: 300 additions & 0 deletions
Large diffs are not rendered by default.

src/toolchain/detect.cppm

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ detect(const std::filesystem::path& explicit_compiler) {
4747
if (!ver_r) return std::unexpected(ver_r.error());
4848

4949
const auto& vstr = *ver_r;
50+
tc.driverIdent = normalize_driver_output(vstr);
5051
auto head = first_line_of(vstr);
5152
auto headLower = lower_copy(head);
5253
auto fullLower = lower_copy(vstr);

src/toolchain/fingerprint.cppm

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
//
33
// Per docs/06-toolchain-and-fingerprint.md, the fingerprint MUST cover:
44
// 1. compiler id 2. compiler version
5-
// 3. compiler binary hash 4. target triple
5+
// 3. compiler driver identity 4. target triple
66
// 5. stdlib id+version 6. C++ standard
77
// 7. compile flags hash 8. mcpp version
88
// 9. dependency lock hash 10. std module BMI hash
@@ -93,7 +93,9 @@ Fingerprint compute_fingerprint(const FingerprintInputs& in) {
9393

9494
fp.parts[0] = std::string(tc.compiler_name());
9595
fp.parts[1] = tc.version;
96-
fp.parts[2] = tc.binaryPath.empty() ? "" : hash_file(tc.binaryPath);
96+
fp.parts[2] = !tc.driverIdent.empty()
97+
? hash_string(tc.driverIdent)
98+
: (tc.binaryPath.empty() ? "" : hash_file(tc.binaryPath));
9799
fp.parts[3] = tc.targetTriple;
98100
fp.parts[4] = std::format("{} {}", tc.stdlibId, tc.stdlibVersion);
99101
fp.parts[5] = in.cppStandard;

src/toolchain/model.cppm

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ struct Toolchain {
1212
CompilerId compiler = CompilerId::Unknown;
1313
std::string version; // "15.1.0"
1414
std::filesystem::path binaryPath;
15+
std::string driverIdent; // normalized --version output
1516
std::string targetTriple; // "x86_64-linux-gnu"
1617
std::string stdlibId; // "libstdc++"
1718
std::string stdlibVersion;

src/toolchain/probe.cppm

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ std::string extract_version(std::string_view s);
1818
std::string first_line_of(std::string_view s);
1919
std::string lower_copy(std::string_view s);
2020
std::string trim_line(std::string s);
21+
std::string normalize_driver_output(std::string_view s);
2122

2223
std::vector<std::filesystem::path>
2324
discover_compiler_runtime_dirs(const std::filesystem::path& compilerBin);
@@ -128,6 +129,42 @@ std::string trim_line(std::string s) {
128129
return s;
129130
}
130131

132+
std::string normalize_driver_output(std::string_view s) {
133+
auto replace_local_paths = [](std::string line) {
134+
static constexpr std::array<std::string_view, 3> prefixes{
135+
"/home/", "/tmp/", "/var/"
136+
};
137+
for (auto prefix : prefixes) {
138+
std::size_t pos = 0;
139+
while ((pos = line.find(prefix, pos)) != std::string::npos) {
140+
auto end = pos;
141+
while (end < line.size()) {
142+
unsigned char c = static_cast<unsigned char>(line[end]);
143+
if (std::isspace(c) || line[end] == '\'' || line[end] == '"')
144+
break;
145+
++end;
146+
}
147+
line.replace(pos, end - pos, "<PATH>");
148+
pos += std::string_view("<PATH>").size();
149+
}
150+
}
151+
return line;
152+
};
153+
154+
std::string out;
155+
std::istringstream is(std::string{s});
156+
std::string line;
157+
while (std::getline(is, line)) {
158+
line = trim_line(std::move(line));
159+
if (line.empty()) continue;
160+
if (line.starts_with("PWD=")) continue;
161+
line = replace_local_paths(std::move(line));
162+
if (!out.empty()) out.push_back('\n');
163+
out += line;
164+
}
165+
return out;
166+
}
167+
131168
std::vector<std::filesystem::path>
132169
discover_compiler_runtime_dirs(const std::filesystem::path& compilerBin) {
133170
std::vector<std::filesystem::path> dirs;

tests/unit/test_fingerprint.cpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ FingerprintInputs baseline() {
1313
in.toolchain.compiler = CompilerId::GCC;
1414
in.toolchain.version = "16.1.0";
1515
in.toolchain.binaryPath = "/usr/bin/g++";
16+
in.toolchain.driverIdent = "g++ (xim-x-gcc 16.1.0) 16.1.0";
1617
in.toolchain.targetTriple = "x86_64-linux-gnu";
1718
in.toolchain.stdlibId = "libstdc++";
1819
in.toolchain.stdlibVersion = "16.1.0";
@@ -52,7 +53,7 @@ TEST(Fingerprint, ProducesSixteenHexChars) {
5253
TEST(Fingerprint, AllTenFieldsAffectHash) {
5354
EXPECT_DIFFERENT(in.toolchain.compiler = CompilerId::Clang);
5455
EXPECT_DIFFERENT(in.toolchain.version = "16.0.0");
55-
EXPECT_DIFFERENT(in.toolchain.binaryPath = "/elsewhere/g++");
56+
EXPECT_DIFFERENT(in.toolchain.driverIdent = "g++ (xim-x-gcc 15.1.0) 15.1.0");
5657
EXPECT_DIFFERENT(in.toolchain.targetTriple = "aarch64-linux-gnu");
5758
EXPECT_DIFFERENT(in.toolchain.stdlibId = "libc++");
5859
EXPECT_DIFFERENT(in.cppStandard = "c++26");
@@ -62,6 +63,15 @@ TEST(Fingerprint, AllTenFieldsAffectHash) {
6263
EXPECT_DIFFERENT(in.stdBmiHash = "ffffffffffffffff");
6364
}
6465

66+
TEST(Fingerprint, StableAcrossBinaryPathsWhenDriverIdentMatches) {
67+
auto a = baseline();
68+
auto b = baseline();
69+
a.toolchain.binaryPath = "/home/speak/.mcpp/registry/data/xpkgs/xim-x-gcc/16.1.0/bin/g++";
70+
b.toolchain.binaryPath = "/home/speak/.xlings/data/xpkgs/xim-x-mcpp/0.0.14/registry/data/xpkgs/xim-x-gcc/16.1.0/bin/g++";
71+
72+
EXPECT_EQ(compute_fingerprint(a).hex, compute_fingerprint(b).hex);
73+
}
74+
6575
TEST(Fingerprint, HashStringMatchesHashFile) {
6676
auto h1 = hash_string("hello");
6777
auto h2 = hash_string("hello");

tests/unit/test_toolchain_detect.cpp

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import std;
44
import mcpp.toolchain.detect;
5+
import mcpp.toolchain.probe;
56

67
using namespace mcpp::toolchain;
78

@@ -65,3 +66,75 @@ TEST(ToolchainDetect, ClangVersionOutputIsNotMisclassifiedByGccPaths) {
6566
EXPECT_EQ(tc->stdlibId, "libc++");
6667
EXPECT_FALSE(tc->hasImportStd);
6768
}
69+
70+
// ─── normalize_driver_output: path-free semantic identity ─────────────
71+
//
72+
// Background: the toolchain fingerprint used to hash the compiler binary
73+
// content (hash_file). When the same xim-x-gcc package is installed under
74+
// two different prefixes (~/.mcpp/... vs ~/.xlings/.../xim-x-mcpp/...),
75+
// the on-disk binaries can have different MD5s (build metadata, strip,
76+
// etc.) yet behave identically. We now use a normalized `--version`
77+
// string as the path-free identity instead.
78+
79+
TEST(NormalizeDriverOutput, TrimsWhitespaceAndCollapsesBlankLines) {
80+
std::string raw =
81+
" g++ (xim-x-gcc 16.1.0) 16.1.0\n"
82+
"\n"
83+
"Copyright (C) 2023 Free Software Foundation, Inc. \n"
84+
"\n\n"
85+
"This is free software; ...\n";
86+
auto out = normalize_driver_output(raw);
87+
EXPECT_EQ(out,
88+
"g++ (xim-x-gcc 16.1.0) 16.1.0\n"
89+
"Copyright (C) 2023 Free Software Foundation, Inc.\n"
90+
"This is free software; ...");
91+
}
92+
93+
TEST(NormalizeDriverOutput, IsStableAcrossInstallPrefixes) {
94+
// Same gcc package, two different install locations on disk.
95+
// --version output is identical regardless of where the binary lives,
96+
// so normalized identity must be identical too.
97+
std::string from_a =
98+
"g++ (xim-x-gcc 16.1.0) 16.1.0\n"
99+
"Copyright (C) 2023 Free Software Foundation, Inc.\n";
100+
std::string from_b =
101+
"g++ (xim-x-gcc 16.1.0) 16.1.0\n"
102+
"Copyright (C) 2023 Free Software Foundation, Inc.\n";
103+
EXPECT_EQ(normalize_driver_output(from_a), normalize_driver_output(from_b));
104+
}
105+
106+
TEST(NormalizeDriverOutput, ReplacesLocalInstallPaths) {
107+
std::string a =
108+
"clang version 20.1.7\n"
109+
"Configuration file: /home/speak/.mcpp/registry/data/xpkgs/llvm/bin/clang.cfg\n";
110+
std::string b =
111+
"clang version 20.1.7\n"
112+
"Configuration file: /home/speak/.xlings/data/xpkgs/llvm/bin/clang.cfg\n";
113+
114+
EXPECT_EQ(normalize_driver_output(a), normalize_driver_output(b));
115+
EXPECT_EQ(normalize_driver_output(a).find("/home/"), std::string::npos);
116+
}
117+
118+
TEST(NormalizeDriverOutput, DistinguishesDifferentVersions) {
119+
std::string a = "g++ (xim-x-gcc 16.1.0) 16.1.0\n";
120+
std::string b = "g++ (xim-x-gcc 15.1.0) 15.1.0\n";
121+
EXPECT_NE(normalize_driver_output(a), normalize_driver_output(b));
122+
}
123+
124+
TEST(NormalizeDriverOutput, EmptyInputProducesEmpty) {
125+
EXPECT_EQ(normalize_driver_output(""), "");
126+
EXPECT_EQ(normalize_driver_output("\n\n\n"), "");
127+
}
128+
129+
// ─── detect() populates driverIdent ─────────────────────────────────
130+
TEST(ToolchainDetect, PopulatesDriverIdentFromVersionOutput) {
131+
auto clang = make_fake_clang();
132+
TempDirGuard cleanup{clang.parent_path()};
133+
134+
auto tc = detect(clang);
135+
ASSERT_TRUE(tc.has_value()) << tc.error().message;
136+
EXPECT_FALSE(tc->driverIdent.empty())
137+
<< "detect() should populate Toolchain::driverIdent from --version output";
138+
EXPECT_NE(tc->driverIdent.find("clang version 20.1.7"), std::string::npos)
139+
<< "driverIdent should contain the --version header: " << tc->driverIdent;
140+
}

0 commit comments

Comments
 (0)