From 9d81f37d87900f0d399ea78d062cf4371e18cf73 Mon Sep 17 00:00:00 2001 From: Arve Knudsen Date: Wed, 20 May 2026 11:56:15 +0200 Subject: [PATCH 1/2] feat(label_namer): skip allocation for already-compliant labels MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a single forward-scan predicate canFastPathLabel that detects when LabelNamer.Build would return its input unchanged. On a hit, Build returns the input string directly with zero heap allocations, addressing the "already-compliant labels are wastefully reallocated" half of #68. The predicate covers the four LabelNamer config combinations and the reserved-label round-trip (__...__) where the inner range sanitizes to itself. A property test asserts predicate-Build agreement across every case in labelTestCases × the four configs; testing.AllocsPerRun confirms the fast path is allocation-free. Benchmarks on Apple M4 Pro: already-valid label: 15.77 ns/op 0 B/op 0 allocs/op LabelWithCapitalLetters: 18.67 ns/op 0 B/op 0 allocs/op _label_starting_with_underscore (default config): 25.27 ns/op 0 B/op 0 allocs/op Signed-off-by: Arve Knudsen --- go.mod | 2 +- label_namer.go | 4 +++ label_namer_bench_test.go | 7 +++- label_namer_test.go | 72 +++++++++++++++++++++++++++++++++++++++ strconv.go | 50 +++++++++++++++++++++++++++ 5 files changed, 133 insertions(+), 2 deletions(-) diff --git a/go.mod b/go.mod index 9969a02..f151e1a 100644 --- a/go.mod +++ b/go.mod @@ -1,5 +1,5 @@ module github.com/prometheus/otlptranslator -go 1.23.0 +go 1.24.0 toolchain go1.24.1 diff --git a/label_namer.go b/label_namer.go index 368ceda..9f71c9d 100644 --- a/label_namer.go +++ b/label_namer.go @@ -74,6 +74,10 @@ func (ln *LabelNamer) Build(label string) (string, error) { return label, nil } + if canFastPathLabel(label, ln.PreserveMultipleUnderscores, ln.UnderscoreLabelSanitization) { + return label, nil + } + normalizedName := sanitizeLabelName(label, ln.PreserveMultipleUnderscores) // If label starts with a number, prepend with "key_". diff --git a/label_namer_bench_test.go b/label_namer_bench_test.go index d39f2ab..ed8291f 100644 --- a/label_namer_bench_test.go +++ b/label_namer_bench_test.go @@ -10,6 +10,10 @@ var labelBenchmarkInputs = []struct { name: "empty label", label: "", }, + { + name: "already-valid label", + label: "http_method_total", + }, { name: "label with colons", label: "label:with:colons", @@ -48,7 +52,8 @@ func BenchmarkNormalizeLabel(b *testing.B) { labelNamer := LabelNamer{UTF8Allowed: false} for _, input := range labelBenchmarkInputs { b.Run(input.name, func(b *testing.B) { - for i := 0; i < b.N; i++ { + b.ReportAllocs() + for b.Loop() { //nolint:errcheck labelNamer.Build(input.label) } diff --git a/label_namer_test.go b/label_namer_test.go index 6e3d5ce..50c32ca 100644 --- a/label_namer_test.go +++ b/label_namer_test.go @@ -40,6 +40,11 @@ var labelTestCases = []struct { sanitizedMultipleUnderscores: "label_with_foreign_characters___", }, {label: "label.with.dots", sanitized: "label_with_dots"}, + { + label: ".foo", + sanitized: "_foo", + sanitizedUnderscoreLabelSanitization: "key_foo", + }, {label: "123label", sanitized: "key_123label"}, { label: "_label_starting_with_underscore", @@ -215,3 +220,70 @@ func TestBuildLabel_UTF8Allowed(t *testing.T) { }) } } + +// TestCanFastPathLabel verifies that the fast-path predicate agrees with +// LabelNamer.Build across every entry in labelTestCases × the four configs. +// Whenever canFastPathLabel returns true, Build must succeed and return the +// input unchanged; whenever Build returns the input unchanged for a valid +// input, canFastPathLabel must return true. +func TestCanFastPathLabel(t *testing.T) { + configs := []struct { + name string + preserveMultipleUnderscores bool + underscoreLabelSanitization bool + }{ + {"default", false, false}, + {"preserve multiple underscores", true, false}, + {"underscore label sanitization", false, true}, + {"both", true, true}, + } + for _, cfg := range configs { + t.Run(cfg.name, func(t *testing.T) { + for _, tt := range labelTestCases { + t.Run(tt.label, func(t *testing.T) { + if tt.label == "" { + return + } + + namer := LabelNamer{ + PreserveMultipleUnderscores: cfg.preserveMultipleUnderscores, + UnderscoreLabelSanitization: cfg.underscoreLabelSanitization, + } + got, err := namer.Build(tt.label) + fast := canFastPathLabel(tt.label, cfg.preserveMultipleUnderscores, cfg.underscoreLabelSanitization) + + if fast { + if err != nil { + t.Fatalf("canFastPathLabel=true but Build returned error: %s", err) + } + if got != tt.label { + t.Fatalf("canFastPathLabel=true but Build returned %q, want input %q", got, tt.label) + } + return + } + + // fast == false: nothing to assert about Build, except that if + // it succeeded with output equal to input, the predicate missed a + // no-op case (a false negative). False negatives are correct but + // suboptimal; we flag them so the predicate stays tight. + if err == nil && got == tt.label { + t.Fatalf("canFastPathLabel=false but Build returned input unchanged (false negative)") + } + }) + } + }) + } +} + +// TestLabelNamerBuildZeroAlloc asserts that Build is allocation-free on the +// fast path. The chosen input is already Prometheus-compliant; it must be +// returned unchanged with no heap allocations. +func TestLabelNamerBuildZeroAlloc(t *testing.T) { + namer := LabelNamer{} + got := testing.AllocsPerRun(100, func() { + _, _ = namer.Build("http_method_total") + }) + if got > 0 { + t.Fatalf("Build allocated %f times per run on the fast path, want 0", got) + } +} diff --git a/strconv.go b/strconv.go index 9040432..7090130 100644 --- a/strconv.go +++ b/strconv.go @@ -21,6 +21,7 @@ package otlptranslator import ( "strings" + "unicode" ) // sanitizeLabelName replaces any characters not valid according to the @@ -75,6 +76,55 @@ func isValidCompliantLabelChar(r rune) bool { return (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || (r >= '0' && r <= '9') } +// canFastPathLabel reports whether LabelNamer.Build would return label unchanged when UTF8Allowed is false. +// When it returns true, the label can be returned directly. The predicate must remain +// consistent with sanitizeLabelName and the post-sanitize prefix logic in LabelNamer.Build. +func canFastPathLabel(label string, preserveMultipleUnderscores, underscoreLabelSanitization bool) bool { + n := len(label) + if n == 0 { + return false + } + + // Leading digit triggers a "key_" prepend. + if unicode.IsDigit(rune(label[0])) { + return false + } + // Single leading '_' under sanitization triggers a "key" prepend. + if underscoreLabelSanitization && strings.HasPrefix(label, "_") && !strings.HasPrefix(label, "__") { + return false + } + + // Reserved labels (__...__) under !preserveMultipleUnderscores get stripped, + // sanitized, then re-wrapped. The output equals the input iff the inner range + // already sanitizes to itself. + start, end := 0, n + if !preserveMultipleUnderscores && n >= 4 && strings.HasPrefix(label, "__") && strings.HasSuffix(label, "__") { + start, end = 2, n-2 + } + + prevWasUnderscore := false + sawNonUnderscore := false + for i := start; i < end; i++ { + c := label[i] + if !isValidCompliantLabelChar(rune(c)) && c != '_' { + // Non-ASCII bytes (lead/continuation of multi-byte runes) fall here. + return false + } + if c == '_' { + if !preserveMultipleUnderscores && prevWasUnderscore { + return false + } + prevWasUnderscore = true + } else { + prevWasUnderscore = false + sawNonUnderscore = true + } + } + // An all-underscore (or empty inner) result would hit Build's hasUnderscoresOnly + // error path; let the slow path produce the error. + return sawNonUnderscore +} + // isReservedLabel checks if a label is a reserved label. // Reserved labels are labels that start and end with exactly __. // The returned label name is the label name without the __ prefix and suffix. From f06b2efc6ea8f82d8b27120cd950ea61679dd30e Mon Sep 17 00:00:00 2001 From: Arve Knudsen Date: Wed, 20 May 2026 14:08:49 +0200 Subject: [PATCH 2/2] perf(label_namer): fold reserved-label wrappers into builder MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit sanitizeLabelName's reserved-label branch returned "__" + b.String() + "__", which produced two allocations: one for the builder buffer and one for the string concatenation that wraps it. Write the "__" wrappers directly into the strings.Builder before and after the sanitize loop instead; b.Grow(nameLength) already accounts for the worst-case length (stripped inner ≤ nameLength-4, plus 4 wrapper bytes). Add "__reserved__label__name__" as a benchmark input so the change is visible. benchstat (Apple M4 Pro, -count=10): reserved_label: 84.71 ns/op → 66.84 ns/op (-21.1%) 56 B/op → 32 B/op (-42.9%) 2 allocs/op → 1 allocs/op (-50.0%) Other rows unchanged. Signed-off-by: Arve Knudsen --- label_namer_bench_test.go | 4 ++++ strconv.go | 5 ++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/label_namer_bench_test.go b/label_namer_bench_test.go index ed8291f..ea6e9c5 100644 --- a/label_namer_bench_test.go +++ b/label_namer_bench_test.go @@ -46,6 +46,10 @@ var labelBenchmarkInputs = []struct { name: "label starting with 2 underscores", label: "__label_starting_with_2underscores", }, + { + name: "reserved label", + label: "__reserved__label__name__", + }, } func BenchmarkNormalizeLabel(b *testing.B) { diff --git a/strconv.go b/strconv.go index 7090130..fd29215 100644 --- a/strconv.go +++ b/strconv.go @@ -53,6 +53,9 @@ func sanitizeLabelName(name string, preserveMultipleUnderscores bool) string { // Collapse multiple underscores while replacing invalid characters. var b strings.Builder b.Grow(nameLength) + if isReserved { + b.WriteString("__") + } prevWasUnderscore := false for _, r := range name { @@ -66,7 +69,7 @@ func sanitizeLabelName(name string, preserveMultipleUnderscores bool) string { } } if isReserved { - return "__" + b.String() + "__" + b.WriteString("__") } return b.String() }