From cf539cca6af43ff7a0f2c601d6f5d3b8ea9a6bc1 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 24 Apr 2026 10:46:22 -0400 Subject: [PATCH 1/3] fix: correct off-by-one bounds check in data event region lookup Context: Data events with nesting level n are logically attached to the region at regionStack[n-2]. The prior bounds check only tested whether the stack was shorter than rWant, missing two cases: a negative rWant (when mf_nesting is 0 or 1, which should have been caught by the nesting <= 1 guard but might slip through) and the case where rWant equals the stack length exactly, which would be an out-of-bounds index. Justification: Adding `rWant < 0` makes the guard explicit about the lower bound. Changing `<` to `<=` corrects the upper bound: a valid index into a slice of length L is 0..L-1, so `len >= rWant+1`, i.e. `rWant < len`, i.e. reject when `rWant >= len`, which is `len <= rWant`. The fix ensures the event is silently ignored (with a TODO log warning) rather than causing an out-of-bounds panic. Implementation: Single comparison change in apply__data_generic. No behavior change for well-formed event streams; only prevents a potential panic on malformed or unexpectedly nested data events. --- evt_apply.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/evt_apply.go b/evt_apply.go index 0119470..ba4d24d 100644 --- a/evt_apply.go +++ b/evt_apply.go @@ -844,7 +844,7 @@ func apply__data_generic(tr2 *trace2Dataset, evt *TrEvent) (err error) { return nil } rWant := evt.pm_generic_data.mf_nesting - 2 - if int64(len(th.regionStack)) < rWant { + if rWant < 0 || int64(len(th.regionStack)) <= rWant { // TODO log debug warning. return nil } From 2b956982f9b4e0215c5cafd33ea2ccecba2682b8 Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 24 Apr 2026 14:15:15 -0400 Subject: [PATCH 2/3] feat: Add important_events capture to filter settings Context: Operators monitoring Git processes that run gvfs-helper subcommands need visibility into specific data event values (error strings, curl codes, etc.) even when verbose telemetry is disabled. At dl:summary level all data events are currently dropped, so there is no way to guarantee critical diagnostic values surface in the OTEL process span without enabling expensive full-verbose collection site-wide. Justification: The rules live in filter.yml, not summary.yml, because capture is a filtering concern: it determines which data events are guaranteed to appear regardless of detail level, paralleling how filter rules control verbosity. The captured values are stored in their own OTEL attribute (trace2.process.important_events) separate from trace2.process.summary, keeping aggregated metrics distinct from verbatim strings. The capture call in apply__data_generic runs before any early-return paths so that orphaned nested events (nesting > 1 with an empty region stack) are never silently dropped. Implementation: ImportantEventRule (category exact match, key_prefix prefix match, field_name output key) is added to FilterSettings and validated in parseFilterSettingsFromBuffer. TrProcess gains an importantEvents map that is allocated only when rules exist, so the nil check in apply__important_events serves as a fast no-op when the feature is unconfigured. apply__important_events in filter_settings.go matches each data event against the configured rules and appends matching values. emitProcessSpan emits trace2.process.important_events as a JSON object whenever the map is non-empty, at all detail levels. Co-Authored-By: Claude Sonnet 4.6 --- evt_apply.go | 8 + filter_settings.go | 71 +++- filter_settings_test.go | 75 ++++ important_events_test.go | 727 +++++++++++++++++++++++++++++++++++++++ trace2dataset.go | 15 +- trace2emitotlp.go | 8 +- trace2semconv.go | 9 +- 7 files changed, 900 insertions(+), 13 deletions(-) create mode 100644 important_events_test.go diff --git a/evt_apply.go b/evt_apply.go index ba4d24d..6b69000 100644 --- a/evt_apply.go +++ b/evt_apply.go @@ -830,6 +830,14 @@ func apply__data_generic(tr2 *trace2Dataset, evt *TrEvent) (err error) { // nesting level n-1 is stored at regionStack[n-2] (assuming the // Git process properly sets things up). + // Capture matching data values into importantEvents regardless of + // nesting level. This must run before any early returns below + // so that capture is not skipped when region attachment fails. + apply__important_events(tr2, + evt.pm_generic_data.mf_category, + evt.pm_generic_data.mf_key, + evt.pm_generic_data.mf_generic_value) + if evt.pm_generic_data.mf_nesting <= 1 { tr2.process.setGenericDataValue(evt.pm_generic_data.mf_category, evt.pm_generic_data.mf_key, evt.pm_generic_data.mf_generic_value) diff --git a/filter_settings.go b/filter_settings.go index 61c1ae0..95bf568 100644 --- a/filter_settings.go +++ b/filter_settings.go @@ -10,16 +10,36 @@ import ( // look for in the Trace2 event stream to help us decide how to // filter data for a particular command. type FilterSettings struct { - Keynames FilterKeynames `mapstructure:"keynames"` - Nicknames FilterNicknames `mapstructure:"nicknames"` - Rulesets FilterRulesets `mapstructure:"rulesets"` - Defaults FilterDefaults `mapstructure:"defaults"` + Keynames FilterKeynames `mapstructure:"keynames"` + Nicknames FilterNicknames `mapstructure:"nicknames"` + Rulesets FilterRulesets `mapstructure:"rulesets"` + Defaults FilterDefaults `mapstructure:"defaults"` + ImportantEvents []ImportantEventRule `mapstructure:"important_events"` // The set of custom rulesets defined in YML are each parsed // and loaded into definitions so that we can use them. rulesetDefs map[string]*RulesetDefinition } +// ImportantEventRule defines a rule for promoting values from data events +// that match a specific (category, key prefix) pair into the process +// summary, regardless of the active detail level. This lets operators +// guarantee that certain data event values are always captured and +// surfaced in the OTEL process span even when verbose telemetry is +// disabled. Multiple matching values are collected into an array. +type ImportantEventRule struct { + // Category is the data event category to match (exact match) + Category string `mapstructure:"category"` + + // KeyPrefix is the string prefix to match at the beginning of + // the data event's key field + KeyPrefix string `mapstructure:"key_prefix"` + + // FieldName is the name of the field in the summary object + // where matched values will be stored (always as an array) + FieldName string `mapstructure:"field_name"` +} + // FilterKeynames defines the names of the Git config settings that // will be used in `def_param` events to send repository/worktree // data to us. This lets a site have their own namespace for @@ -100,9 +120,52 @@ func parseFilterSettingsFromBuffer(data []byte, path string) (*FilterSettings, e } } + fieldNames := make(map[string]bool) + for i, rule := range fs.ImportantEvents { + if len(rule.Category) == 0 { + return nil, fmt.Errorf("important_events[%d]: category cannot be empty", i) + } + if len(rule.KeyPrefix) == 0 { + return nil, fmt.Errorf("important_events[%d]: key_prefix cannot be empty", i) + } + if len(rule.FieldName) == 0 { + return nil, fmt.Errorf("important_events[%d]: field_name cannot be empty", i) + } + if fieldNames[rule.FieldName] { + return nil, fmt.Errorf("important_events[%d]: duplicate field_name '%s'", i, rule.FieldName) + } + fieldNames[rule.FieldName] = true + } + return fs, nil } +// apply__important_events checks if a data event matches any configured +// important_events rules and appends the event's value to the +// importantEvents map if a match is found. Matching events are captured +// regardless of nesting level or detail level. +func apply__important_events(tr2 *trace2Dataset, category string, key string, value interface{}) { + if tr2.process.importantEvents == nil { + return + } + + if tr2.rcvr_base == nil || tr2.rcvr_base.RcvrConfig == nil { + return + } + + fs := tr2.rcvr_base.RcvrConfig.filterSettings + if fs == nil { + return + } + + for _, rule := range fs.ImportantEvents { + if category == rule.Category && strings.HasPrefix(key, rule.KeyPrefix) { + tr2.process.importantEvents[rule.FieldName] = append( + tr2.process.importantEvents[rule.FieldName], value) + } + } +} + // Add a ruleset to the filter settings. This is primarily for writing test code. func (fs *FilterSettings) addRuleset(rs_name string, path string, rsdef *RulesetDefinition) { if fs.Rulesets == nil { diff --git a/filter_settings_test.go b/filter_settings_test.go index c5abeff..68ce188 100644 --- a/filter_settings_test.go +++ b/filter_settings_test.go @@ -290,6 +290,81 @@ func x_TryLoadRuleset(t *testing.T, fs *FilterSettings, name string, path string // ////////////////////////////////////////////////////////////// +// ////////////////////////////////////////////////////////////// +// important_events validation tests + +func Test_ImportantEvents_Valid(t *testing.T) { + yml := ` +important_events: + - category: "gvfs-helper" + key_prefix: "error/" + field_name: "gvfs_helper_errors" + - category: "network" + key_prefix: "timeout/" + field_name: "network_timeouts" +` + fs, err := parseFilterSettingsFromBuffer([]byte(yml), "test.yml") + assert.NoError(t, err) + assert.NotNil(t, fs) + assert.Equal(t, 2, len(fs.ImportantEvents)) + assert.Equal(t, "gvfs-helper", fs.ImportantEvents[0].Category) + assert.Equal(t, "error/", fs.ImportantEvents[0].KeyPrefix) + assert.Equal(t, "gvfs_helper_errors", fs.ImportantEvents[0].FieldName) +} + +func Test_ImportantEvents_EmptyCategory_Rejected(t *testing.T) { + yml := ` +important_events: + - category: "" + key_prefix: "error/" + field_name: "errors" +` + _, err := parseFilterSettingsFromBuffer([]byte(yml), "test.yml") + assert.Error(t, err) + assert.Contains(t, err.Error(), "category cannot be empty") +} + +func Test_ImportantEvents_EmptyKeyPrefix_Rejected(t *testing.T) { + yml := ` +important_events: + - category: "gvfs-helper" + key_prefix: "" + field_name: "errors" +` + _, err := parseFilterSettingsFromBuffer([]byte(yml), "test.yml") + assert.Error(t, err) + assert.Contains(t, err.Error(), "key_prefix cannot be empty") +} + +func Test_ImportantEvents_EmptyFieldName_Rejected(t *testing.T) { + yml := ` +important_events: + - category: "gvfs-helper" + key_prefix: "error/" + field_name: "" +` + _, err := parseFilterSettingsFromBuffer([]byte(yml), "test.yml") + assert.Error(t, err) + assert.Contains(t, err.Error(), "field_name cannot be empty") +} + +func Test_ImportantEvents_DuplicateFieldName_Rejected(t *testing.T) { + yml := ` +important_events: + - category: "gvfs-helper" + key_prefix: "error/" + field_name: "shared_name" + - category: "network" + key_prefix: "timeout/" + field_name: "shared_name" +` + _, err := parseFilterSettingsFromBuffer([]byte(yml), "test.yml") + assert.Error(t, err) + assert.Contains(t, err.Error(), "duplicate field_name") +} + +// ////////////////////////////////////////////////////////////// + func Test_Nil_Nil_FilterSettings(t *testing.T) { dl, dl_debug := computeDetailLevel(nil, nil, x_qn) diff --git a/important_events_test.go b/important_events_test.go new file mode 100644 index 0000000..b89e740 --- /dev/null +++ b/important_events_test.go @@ -0,0 +1,727 @@ +package trace2receiver + +import ( + "encoding/json" + "testing" + + "github.com/stretchr/testify/assert" +) + +// Test important_events rule matching with basic rules +func Test_ImportantEvents_Match_Basic(t *testing.T) { + fs := &FilterSettings{ + ImportantEvents: []ImportantEventRule{ + {Category: "gvfs-helper", KeyPrefix: "error/", FieldName: "gvfs_helper_errors"}, + }, + } + + tr2 := &trace2Dataset{ + process: TrProcess{ + importantEvents: make(map[string][]interface{}), + }, + } + tr2.rcvr_base = &Rcvr_Base{ + RcvrConfig: &Config{ + filterSettings: fs, + }, + } + + apply__important_events(tr2, "gvfs-helper", "error/curl", "(curl:35) SSL connect error [hard_fail]") + apply__important_events(tr2, "gvfs-helper", "error/http", "(http:503) Service Unavailable") + + values, ok := tr2.process.importantEvents["gvfs_helper_errors"] + assert.True(t, ok) + assert.Equal(t, 2, len(values)) + assert.Equal(t, "(curl:35) SSL connect error [hard_fail]", values[0]) + assert.Equal(t, "(http:503) Service Unavailable", values[1]) +} + +// Test important_events rule matching with no match +func Test_ImportantEvents_Match_NoMatch(t *testing.T) { + fs := &FilterSettings{ + ImportantEvents: []ImportantEventRule{ + {Category: "gvfs-helper", KeyPrefix: "error/", FieldName: "gvfs_helper_errors"}, + }, + } + + tr2 := &trace2Dataset{ + process: TrProcess{ + importantEvents: make(map[string][]interface{}), + }, + } + tr2.rcvr_base = &Rcvr_Base{ + RcvrConfig: &Config{ + filterSettings: fs, + }, + } + + // Wrong category + apply__important_events(tr2, "other-helper", "error/curl", "some error") + // Right category, wrong key prefix + apply__important_events(tr2, "gvfs-helper", "status/ok", "all good") + + assert.Equal(t, 0, len(tr2.process.importantEvents)) +} + +// Test important_events rule matching when no rules are configured +func Test_ImportantEvents_Match_NoConfig(t *testing.T) { + tr2 := &trace2Dataset{ + process: TrProcess{ + importantEvents: nil, + }, + } + tr2.rcvr_base = &Rcvr_Base{ + RcvrConfig: &Config{ + filterSettings: nil, + }, + } + + // Should not crash + apply__important_events(tr2, "gvfs-helper", "error/curl", "some error") +} + +// Test important_events values appear in their own attribute at dl:summary +func Test_ImportantEvents_EmittedAtSummaryLevel(t *testing.T) { + cfg := &Config{ + filterSettings: &FilterSettings{ + ImportantEvents: []ImportantEventRule{ + {Category: "gvfs-helper", KeyPrefix: "error/", FieldName: "gvfs_helper_errors"}, + }, + }, + } + + rcvr := &Rcvr_Base{ + RcvrConfig: cfg, + } + + tr2 := NewTrace2Dataset(rcvr) + + if tr2.process.importantEvents == nil { + t.Fatal("importantEvents should be initialized") + } + + tr2.process.importantEvents["gvfs_helper_errors"] = append( + tr2.process.importantEvents["gvfs_helper_errors"], + "(curl:35) SSL connect error [hard_fail]") + + tr2.process.paramSetValues = make(map[string]string) + tr2.process.mainThread.lifetime.startTime = mustParseTime(t, "2024-01-01T10:00:00Z") + tr2.process.mainThread.lifetime.endTime = mustParseTime(t, "2024-01-01T10:00:10Z") + + traces := tr2.ToTraces(DetailLevelSummary, FilterKeynames{}) + + assert.Equal(t, 1, traces.ResourceSpans().Len()) + rs := traces.ResourceSpans().At(0) + ss := rs.ScopeSpans().At(0) + assert.Greater(t, ss.Spans().Len(), 0) + + processSpan := ss.Spans().At(0) + attrs := processSpan.Attributes() + + val, found := attrs.Get("trace2.process.important_events") + assert.True(t, found, "important_events should be present at dl:summary level") + + jsonStr := val.Str() + assert.Contains(t, jsonStr, "gvfs_helper_errors") + assert.Contains(t, jsonStr, "SSL connect error") +} + +// Test important_events with an integer value (data events can carry int64) +func Test_ImportantEvents_Match_IntValue(t *testing.T) { + fs := &FilterSettings{ + ImportantEvents: []ImportantEventRule{ + {Category: "perf", KeyPrefix: "count/", FieldName: "perf_counts"}, + }, + } + + tr2 := &trace2Dataset{ + process: TrProcess{ + importantEvents: make(map[string][]interface{}), + }, + } + tr2.rcvr_base = &Rcvr_Base{ + RcvrConfig: &Config{ + filterSettings: fs, + }, + } + + apply__important_events(tr2, "perf", "count/objects", int64(42)) + + values := tr2.process.importantEvents["perf_counts"] + assert.Equal(t, 1, len(values)) + assert.Equal(t, int64(42), values[0]) +} + +// Test end-to-end: parse a raw data event JSON and verify the value +// is captured into important_events, even when the region stack is empty +// (nesting > 1 with no matching region). +func Test_ImportantEvents_EndToEnd_NestedEvent(t *testing.T) { + cfg := &Config{ + filterSettings: &FilterSettings{ + ImportantEvents: []ImportantEventRule{ + {Category: "gvfs-helper", KeyPrefix: "error/", FieldName: "gvfs_helper_errors"}, + }, + }, + } + rcvr := &Rcvr_Base{ + RcvrConfig: cfg, + } + tr2 := NewTrace2Dataset(rcvr) + + // Feed the exact JSON from trace.json through parse+apply. + // This event has nesting=2 but no region stack, which previously + // would have caused the value to be silently dropped. + raw := `{"event":"data","sid":"20260420T161123.706538Z-H27d9ce02-P0000f630","thread":"main","time":"2026-04-20T16:11:29.201996Z","file":"gvfs-helper.c","line":1079,"t_abs":5.306816,"t_rel":0.148534,"nesting":2,"category":"gvfs-helper","key":"error/curl","value":"(curl:35) SSL connect error [hard_fail]"}` + evt, err := parse_json([]byte(raw)) + assert.NoError(t, err) + assert.NotNil(t, evt) + + err = evt_apply(tr2, evt) + assert.NoError(t, err) + + assert.NotNil(t, tr2.process.importantEvents) + values, ok := tr2.process.importantEvents["gvfs_helper_errors"] + assert.True(t, ok, "gvfs_helper_errors should be in importantEvents") + if ok { + assert.Equal(t, 1, len(values)) + assert.Equal(t, "(curl:35) SSL connect error [hard_fail]", values[0]) + } +} + +// ================================================================ +// Full pipeline E2E tests: raw trace2 JSON events -> parse -> apply +// -> prepareDataset -> ToTraces -> extract OTLP span attributes -> +// verify important_events JSON content. +// ================================================================ + +// load_test_dataset_with_config is like load_test_dataset in +// evt_apply_test.go but accepts a Config so that important_events rules +// are active during event processing. +func load_test_dataset_with_config(t *testing.T, cfg *Config, events []string) (tr2 *trace2Dataset, sufficient bool) { + t.Helper() + + rcvr := &Rcvr_Base{ + RcvrConfig: cfg, + } + tr2 = NewTrace2Dataset(rcvr) + + for _, s := range events { + evt, err := parse_json([]byte(s)) + if err != nil { + t.Fatalf("parse of '%s' failed: %s", s, err.Error()) + } + if evt == nil { + continue + } + err = evt_apply(tr2, evt) + if err != nil { + if _, ok := err.(*RejectClientError); ok { + t.Fatalf("rejected: %s", err.Error()) + } + t.Fatalf("apply of '%s' failed: %s", s, err.Error()) + } + } + + sufficient = tr2.prepareDataset() + return tr2, sufficient +} + +// extractSummaryJSON runs the full OTLP conversion at a given detail +// level and returns the parsed summary JSON from the process span. +// Returns nil if the summary attribute is absent. +func extractSummaryJSON(t *testing.T, tr2 *trace2Dataset, dl FilterDetailLevel) map[string]interface{} { + t.Helper() + + traces := tr2.ToTraces(dl, FilterKeynames{}) + if traces.ResourceSpans().Len() == 0 { + t.Fatal("no resource spans") + } + ss := traces.ResourceSpans().At(0).ScopeSpans().At(0) + if ss.Spans().Len() == 0 { + t.Fatal("no spans") + } + + processSpan := ss.Spans().At(0) + attrs := processSpan.Attributes() + + summaryVal, found := attrs.Get(string(Trace2ProcessSummary)) + if !found { + return nil + } + + var result map[string]interface{} + err := json.Unmarshal([]byte(summaryVal.Str()), &result) + if err != nil { + t.Fatalf("failed to parse summary JSON: %s", err.Error()) + } + return result +} + +// extractImportantEventsJSON runs the full OTLP conversion at a given +// detail level and returns the parsed important_events JSON from the +// process span. Returns nil if the attribute is absent. +func extractImportantEventsJSON(t *testing.T, tr2 *trace2Dataset, dl FilterDetailLevel) map[string]interface{} { + t.Helper() + + traces := tr2.ToTraces(dl, FilterKeynames{}) + if traces.ResourceSpans().Len() == 0 { + t.Fatal("no resource spans") + } + ss := traces.ResourceSpans().At(0).ScopeSpans().At(0) + if ss.Spans().Len() == 0 { + t.Fatal("no spans") + } + + processSpan := ss.Spans().At(0) + attrs := processSpan.Attributes() + + val, found := attrs.Get(string(Trace2ProcessImportantEvents)) + if !found { + return nil + } + + var result map[string]interface{} + err := json.Unmarshal([]byte(val.Str()), &result) + if err != nil { + t.Fatalf("failed to parse important_events JSON: %s", err.Error()) + } + return result +} + +// Test: full pipeline with a process-level data event (nesting=1) +// captured and visible in the OTLP span at dl:summary. +func Test_E2E_ImportantEvents_ProcessLevel_AtSummaryDetailLevel(t *testing.T) { + cfg := &Config{ + filterSettings: &FilterSettings{ + ImportantEvents: []ImportantEventRule{ + {Category: "gvfs-helper", KeyPrefix: "error/", FieldName: "gvfs_helper_errors"}, + }, + }, + } + + events := []string{ + x_make_version(), + x_make_start(), + x_make_cmd_name(), + x_make_data_string(x_main, 1, "gvfs-helper", "error/curl", "(curl:35) SSL connect error"), + x_make_atexit(), + } + + tr2, sufficient := load_test_dataset_with_config(t, cfg, events) + assert.True(t, sufficient) + + ie := extractImportantEventsJSON(t, tr2, DetailLevelSummary) + assert.NotNil(t, ie, "important_events should be present at dl:summary") + + raw, ok := ie["gvfs_helper_errors"] + assert.True(t, ok, "gvfs_helper_errors should be in important_events") + arr := raw.([]interface{}) + assert.Equal(t, 1, len(arr)) + assert.Equal(t, "(curl:35) SSL connect error", arr[0]) +} + +// Test: data event inside a region (nesting=2) is captured even when +// the region stack is properly set up. The value should appear in the +// important_events AND the region should have it in its own data. +func Test_E2E_ImportantEvents_InsideRegion(t *testing.T) { + cfg := &Config{ + filterSettings: &FilterSettings{ + ImportantEvents: []ImportantEventRule{ + {Category: "gvfs-helper", KeyPrefix: "error/", FieldName: "gvfs_helper_errors"}, + }, + }, + } + + events := []string{ + x_make_version(), + x_make_start(), + x_make_cmd_name(), + x_make_region_enter(x_main, 1, "gvfs-helper", "fetch", "fetching"), + x_make_data_string(x_main, 2, "gvfs-helper", "error/curl", "(curl:35) SSL connect error"), + x_make_region_leave(x_main, 1, "gvfs-helper", "fetch", "fetching"), + x_make_atexit(), + } + + tr2, sufficient := load_test_dataset_with_config(t, cfg, events) + assert.True(t, sufficient) + + // important_events should have the captured value + ie := extractImportantEventsJSON(t, tr2, DetailLevelSummary) + assert.NotNil(t, ie) + raw, ok := ie["gvfs_helper_errors"] + assert.True(t, ok, "captured value should be in important_events") + arr := raw.([]interface{}) + assert.Equal(t, "(curl:35) SSL connect error", arr[0]) + + // The region should also have it in its own data + assert.Equal(t, 1, len(tr2.completedRegions)) + r := tr2.completedRegions[0] + assert.NotNil(t, r.dataValues) + assert.Equal(t, "(curl:35) SSL connect error", r.dataValues["gvfs-helper"]["error/curl"]) +} + +// Test: data event at nesting=2 with NO region on the stack (orphaned). +// Value should still be captured; region attachment fails silently. +func Test_E2E_ImportantEvents_OrphanedNesting(t *testing.T) { + cfg := &Config{ + filterSettings: &FilterSettings{ + ImportantEvents: []ImportantEventRule{ + {Category: "gvfs-helper", KeyPrefix: "error/", FieldName: "gvfs_helper_errors"}, + }, + }, + } + + events := []string{ + x_make_version(), + x_make_start(), + x_make_cmd_name(), + // No region_enter, so the region stack is empty + x_make_data_string(x_main, 2, "gvfs-helper", "error/curl", "(curl:35) SSL connect error"), + x_make_atexit(), + } + + tr2, sufficient := load_test_dataset_with_config(t, cfg, events) + assert.True(t, sufficient) + + ie := extractImportantEventsJSON(t, tr2, DetailLevelSummary) + assert.NotNil(t, ie) + raw, ok := ie["gvfs_helper_errors"] + assert.True(t, ok, "captured value should be in important_events even without region") + arr := raw.([]interface{}) + assert.Equal(t, 1, len(arr)) + assert.Equal(t, "(curl:35) SSL connect error", arr[0]) +} + +// Test: multiple data events matching the same rule accumulate all values. +func Test_E2E_ImportantEvents_MultipleValues(t *testing.T) { + cfg := &Config{ + filterSettings: &FilterSettings{ + ImportantEvents: []ImportantEventRule{ + {Category: "gvfs-helper", KeyPrefix: "error/", FieldName: "gvfs_helper_errors"}, + }, + }, + } + + events := []string{ + x_make_version(), + x_make_start(), + x_make_cmd_name(), + x_make_data_string(x_main, 1, "gvfs-helper", "error/curl", "first error"), + x_make_data_string(x_main, 1, "gvfs-helper", "error/http", "second error"), + x_make_data_string(x_main, 1, "gvfs-helper", "error/tls", "third error"), + x_make_atexit(), + } + + tr2, sufficient := load_test_dataset_with_config(t, cfg, events) + assert.True(t, sufficient) + + ie := extractImportantEventsJSON(t, tr2, DetailLevelSummary) + assert.NotNil(t, ie) + arr := ie["gvfs_helper_errors"].([]interface{}) + assert.Equal(t, 3, len(arr)) + assert.Equal(t, "first error", arr[0]) + assert.Equal(t, "second error", arr[1]) + assert.Equal(t, "third error", arr[2]) +} + +// Test: data events that do NOT match the pattern should NOT appear +// in the important_events. +func Test_E2E_ImportantEvents_NonMatchingEventsExcluded(t *testing.T) { + cfg := &Config{ + filterSettings: &FilterSettings{ + ImportantEvents: []ImportantEventRule{ + {Category: "gvfs-helper", KeyPrefix: "error/", FieldName: "gvfs_helper_errors"}, + }, + }, + } + + events := []string{ + x_make_version(), + x_make_start(), + x_make_cmd_name(), + // Wrong category + x_make_data_string(x_main, 1, "other-helper", "error/curl", "wrong category"), + // Right category, wrong key prefix + x_make_data_string(x_main, 1, "gvfs-helper", "status/ok", "wrong prefix"), + // Right category, key doesn't start with prefix + x_make_data_string(x_main, 1, "gvfs-helper", "warn/timeout", "wrong prefix too"), + x_make_atexit(), + } + + tr2, sufficient := load_test_dataset_with_config(t, cfg, events) + assert.True(t, sufficient) + + ie := extractImportantEventsJSON(t, tr2, DetailLevelSummary) + assert.Nil(t, ie, "non-matching events should not produce important_events attribute") +} + +// Test: integer values (data events can carry int64). +func Test_E2E_ImportantEvents_IntegerValue(t *testing.T) { + cfg := &Config{ + filterSettings: &FilterSettings{ + ImportantEvents: []ImportantEventRule{ + {Category: "perf", KeyPrefix: "count/", FieldName: "perf_counts"}, + }, + }, + } + + events := []string{ + x_make_version(), + x_make_start(), + x_make_cmd_name(), + x_make_data_intmax(x_main, 1, "perf", "count/objects", 42), + x_make_atexit(), + } + + tr2, sufficient := load_test_dataset_with_config(t, cfg, events) + assert.True(t, sufficient) + + ie := extractImportantEventsJSON(t, tr2, DetailLevelSummary) + assert.NotNil(t, ie) + arr := ie["perf_counts"].([]interface{}) + assert.Equal(t, 1, len(arr)) + // JSON numbers unmarshal as float64 + assert.Equal(t, float64(42), arr[0]) +} + +// Test: important_events coexists with message_patterns and region_timers +// in the same output without interference. +func Test_E2E_ImportantEvents_CoexistsWithOtherRuleTypes(t *testing.T) { + cfg := &Config{ + summary: &SummarySettings{ + MessagePatterns: []MessagePatternRule{ + {Prefix: "error:", FieldName: "error_msg_count"}, + }, + RegionTimers: []RegionTimerRule{ + {Category: "gvfs-helper", Label: "fetch", CountField: "fetch_count"}, + }, + }, + filterSettings: &FilterSettings{ + ImportantEvents: []ImportantEventRule{ + {Category: "gvfs-helper", KeyPrefix: "error/", FieldName: "gvfs_helper_errors"}, + }, + }, + } + + events := []string{ + x_make_version(), + x_make_start(), + x_make_cmd_name(), + // Trigger message pattern + x_make_error("error: something broke", "error: %s"), + // Trigger region timer + x_make_region_enter(x_main, 1, "gvfs-helper", "fetch", "fetching"), + // Trigger important_events inside region + x_make_data_string(x_main, 2, "gvfs-helper", "error/curl", "(curl:35) fail"), + x_make_region_leave(x_main, 1, "gvfs-helper", "fetch", "fetching"), + x_make_atexit(), + } + + tr2, sufficient := load_test_dataset_with_config(t, cfg, events) + assert.True(t, sufficient) + + summary := extractSummaryJSON(t, tr2, DetailLevelSummary) + assert.NotNil(t, summary) + + // Message pattern count + assert.Equal(t, float64(1), summary["error_msg_count"]) + // Region timer count + assert.Equal(t, float64(1), summary["fetch_count"]) + + // important_events value appears in its separate attribute + ie := extractImportantEventsJSON(t, tr2, DetailLevelSummary) + assert.NotNil(t, ie) + arr := ie["gvfs_helper_errors"].([]interface{}) + assert.Equal(t, 1, len(arr)) + assert.Equal(t, "(curl:35) fail", arr[0]) +} + +// Test: captured values appear at ALL detail levels, not just verbose. +func Test_E2E_ImportantEvents_AllDetailLevels(t *testing.T) { + cfg := &Config{ + filterSettings: &FilterSettings{ + ImportantEvents: []ImportantEventRule{ + {Category: "gvfs-helper", KeyPrefix: "error/", FieldName: "gvfs_helper_errors"}, + }, + }, + } + + events := []string{ + x_make_version(), + x_make_start(), + x_make_cmd_name(), + x_make_data_string(x_main, 1, "gvfs-helper", "error/curl", "(curl:35) fail"), + x_make_atexit(), + } + + for _, dl := range []FilterDetailLevel{DetailLevelSummary, DetailLevelProcess, DetailLevelVerbose} { + tr2, sufficient := load_test_dataset_with_config(t, cfg, events) + assert.True(t, sufficient) + + ie := extractImportantEventsJSON(t, tr2, dl) + assert.NotNil(t, ie, "important_events should be present at detail level %d", dl) + arr := ie["gvfs_helper_errors"].([]interface{}) + assert.Equal(t, 1, len(arr)) + assert.Equal(t, "(curl:35) fail", arr[0]) + } +} + +// Test: when no important_events are configured, data events do NOT +// create the attribute (no spurious empty arrays). +func Test_E2E_ImportantEvents_NoPatternsConfigured(t *testing.T) { + cfg := &Config{ + summary: &SummarySettings{ + MessagePatterns: []MessagePatternRule{ + {Prefix: "error:", FieldName: "error_count"}, + }, + }, + filterSettings: &FilterSettings{}, + } + + events := []string{ + x_make_version(), + x_make_start(), + x_make_cmd_name(), + x_make_data_string(x_main, 1, "gvfs-helper", "error/curl", "some error"), + x_make_atexit(), + } + + tr2, sufficient := load_test_dataset_with_config(t, cfg, events) + assert.True(t, sufficient) + + ie := extractImportantEventsJSON(t, tr2, DetailLevelSummary) + assert.Nil(t, ie, "should not have important_events when no rules configured") +} + +// Test: with no summary config at all (summary is nil), data events +// are processed without crashing. +func Test_E2E_ImportantEvents_NoSummaryConfig(t *testing.T) { + cfg := &Config{ + summary: nil, + filterSettings: &FilterSettings{}, + } + + events := []string{ + x_make_version(), + x_make_start(), + x_make_cmd_name(), + x_make_data_string(x_main, 1, "gvfs-helper", "error/curl", "some error"), + x_make_data_string(x_main, 2, "gvfs-helper", "error/http", "nested error"), + x_make_atexit(), + } + + tr2, sufficient := load_test_dataset_with_config(t, cfg, events) + assert.True(t, sufficient) + + summary := extractSummaryJSON(t, tr2, DetailLevelSummary) + assert.Nil(t, summary, "no summary when not configured") + ie := extractImportantEventsJSON(t, tr2, DetailLevelSummary) + assert.Nil(t, ie, "no important_events when no rules configured") +} + +// Test: data events on a non-main thread are still captured. +func Test_E2E_ImportantEvents_NonMainThread(t *testing.T) { + cfg := &Config{ + filterSettings: &FilterSettings{ + ImportantEvents: []ImportantEventRule{ + {Category: "gvfs-helper", KeyPrefix: "error/", FieldName: "gvfs_helper_errors"}, + }, + }, + } + + events := []string{ + x_make_version(), + x_make_start(), + x_make_cmd_name(), + x_make_thread_start("worker01"), + x_make_region_enter("worker01", 1, "gvfs-helper", "fetch", "fetching"), + x_make_data_string("worker01", 2, "gvfs-helper", "error/curl", "thread error"), + x_make_region_leave("worker01", 1, "gvfs-helper", "fetch", "fetching"), + x_make_thread_exit("worker01"), + x_make_atexit(), + } + + tr2, sufficient := load_test_dataset_with_config(t, cfg, events) + assert.True(t, sufficient) + + ie := extractImportantEventsJSON(t, tr2, DetailLevelSummary) + assert.NotNil(t, ie) + arr := ie["gvfs_helper_errors"].([]interface{}) + assert.Equal(t, 1, len(arr)) + assert.Equal(t, "thread error", arr[0]) +} + +// Test: deeply nested data event (nesting=5) with only partial region +// stack. The value should still be captured even though region attachment fails. +func Test_E2E_ImportantEvents_DeepNesting_PartialRegionStack(t *testing.T) { + cfg := &Config{ + filterSettings: &FilterSettings{ + ImportantEvents: []ImportantEventRule{ + {Category: "gvfs-helper", KeyPrefix: "error/", FieldName: "gvfs_helper_errors"}, + }, + }, + } + + events := []string{ + x_make_version(), + x_make_start(), + x_make_cmd_name(), + // Only push one region, but data claims nesting=5 + x_make_region_enter(x_main, 1, "gvfs-helper", "fetch", "fetching"), + x_make_data_string(x_main, 5, "gvfs-helper", "error/curl", "deep nested error"), + x_make_region_leave(x_main, 1, "gvfs-helper", "fetch", "fetching"), + x_make_atexit(), + } + + tr2, sufficient := load_test_dataset_with_config(t, cfg, events) + assert.True(t, sufficient) + + ie := extractImportantEventsJSON(t, tr2, DetailLevelSummary) + assert.NotNil(t, ie) + arr := ie["gvfs_helper_errors"].([]interface{}) + assert.Equal(t, 1, len(arr)) + assert.Equal(t, "deep nested error", arr[0]) +} + +// Test: multiple rules matching different categories in the same +// event stream produce independent fields. +func Test_E2E_ImportantEvents_MultipleRules(t *testing.T) { + cfg := &Config{ + filterSettings: &FilterSettings{ + ImportantEvents: []ImportantEventRule{ + {Category: "gvfs-helper", KeyPrefix: "error/", FieldName: "gvfs_errors"}, + {Category: "network", KeyPrefix: "timeout/", FieldName: "network_timeouts"}, + }, + }, + } + + events := []string{ + x_make_version(), + x_make_start(), + x_make_cmd_name(), + x_make_data_string(x_main, 1, "gvfs-helper", "error/curl", "curl error"), + x_make_data_string(x_main, 1, "network", "timeout/dns", "DNS timeout"), + x_make_data_string(x_main, 1, "network", "timeout/connect", "connect timeout"), + // This matches neither rule + x_make_data_string(x_main, 1, "unrelated", "error/foo", "should not appear"), + x_make_atexit(), + } + + tr2, sufficient := load_test_dataset_with_config(t, cfg, events) + assert.True(t, sufficient) + + ie := extractImportantEventsJSON(t, tr2, DetailLevelSummary) + assert.NotNil(t, ie) + + gvfs := ie["gvfs_errors"].([]interface{}) + assert.Equal(t, 1, len(gvfs)) + assert.Equal(t, "curl error", gvfs[0]) + + net := ie["network_timeouts"].([]interface{}) + assert.Equal(t, 2, len(net)) + assert.Equal(t, "DNS timeout", net[0]) + assert.Equal(t, "connect timeout", net[1]) + + _, ok := ie["unrelated"] + assert.False(t, ok, "unrelated category should not appear") +} diff --git a/trace2dataset.go b/trace2dataset.go index c3f267a..499faff 100644 --- a/trace2dataset.go +++ b/trace2dataset.go @@ -131,6 +131,10 @@ type TrProcess struct { summary *SummaryAccumulator + // importantEvents holds verbatim-captured values from data events that + // matched important_events rules in the filter settings. + importantEvents map[string][]interface{} + qualifiedNames QualifiedNames } @@ -267,9 +271,14 @@ func NewTrace2Dataset(rcvr_base *Rcvr_Base) *trace2Dataset { tr2.pii = make(map[string]string) tr2.exec = make(map[int64]*TrExec) - // Initialize summary accumulator if configured - if rcvr_base != nil && rcvr_base.RcvrConfig != nil && rcvr_base.RcvrConfig.summary != nil { - tr2.process.summary = configuredSummary(rcvr_base.RcvrConfig.summary) + if rcvr_base != nil && rcvr_base.RcvrConfig != nil { + cfg := rcvr_base.RcvrConfig + if cfg.summary != nil { + tr2.process.summary = configuredSummary(cfg.summary) + } + if cfg.filterSettings != nil && len(cfg.filterSettings.ImportantEvents) > 0 { + tr2.process.importantEvents = make(map[string][]interface{}) + } } return tr2 diff --git a/trace2emitotlp.go b/trace2emitotlp.go index cf289a2..2ba826f 100644 --- a/trace2emitotlp.go +++ b/trace2emitotlp.go @@ -277,8 +277,8 @@ func emitProcessSpan(span *ptrace.Span, tr2 *trace2Dataset, dl FilterDetailLevel } } - // Emit summary regardless of detail level since it's user-configured - // and opt-in (not generic noise like all process timers/counters) + // Emit summary and important_events regardless of detail level since + // both are user-configured and opt-in. if tr2.process.summary != nil { summaryMap := tr2.process.summary.toMap() if len(summaryMap) > 0 { @@ -286,6 +286,10 @@ func emitProcessSpan(span *ptrace.Span, tr2 *trace2Dataset, dl FilterDetailLevel sm.PutStr(string(Trace2ProcessSummary), string(jargs)) } } + if len(tr2.process.importantEvents) > 0 { + jargs, _ := json.Marshal(tr2.process.importantEvents) + sm.PutStr(string(Trace2ProcessImportantEvents), string(jargs)) + } if WantProcessTimersCountersAndData(dl) { if tr2.process.dataValues != nil && len(tr2.process.dataValues) > 0 { diff --git a/trace2semconv.go b/trace2semconv.go index be72beb..287552c 100644 --- a/trace2semconv.go +++ b/trace2semconv.go @@ -101,10 +101,11 @@ const ( Trace2RepoNickname = attribute.Key("trace2.repo.nickname") - Trace2ProcessData = attribute.Key("trace2.process.data") - Trace2ProcessTimers = attribute.Key("trace2.process.timers") - Trace2ProcessCounters = attribute.Key("trace2.process.counters") - Trace2ProcessSummary = attribute.Key("trace2.process.summary") + Trace2ProcessData = attribute.Key("trace2.process.data") + Trace2ProcessTimers = attribute.Key("trace2.process.timers") + Trace2ProcessCounters = attribute.Key("trace2.process.counters") + Trace2ProcessSummary = attribute.Key("trace2.process.summary") + Trace2ProcessImportantEvents = attribute.Key("trace2.process.important_events") Trace2ThreadTimers = attribute.Key("trace2.thread.timers") Trace2ThreadCounters = attribute.Key("trace2.thread.counters") From 4bc91290851bc0e58a58ca5322672bd1e0dd5afc Mon Sep 17 00:00:00 2001 From: Derrick Stolee Date: Fri, 24 Apr 2026 14:15:30 -0400 Subject: [PATCH 3/3] docs: Document important_events filter configuration Context: The important_events feature added in the previous commit has no user-facing documentation. Operators need to know the YAML syntax, the semantics of each field, the output attribute name, and how the feature relates to summary.yml so they can distinguish verbatim capture from aggregated metrics. Implementation: config-filter-settings.md gains a new "Important Events" section with YAML syntax, field descriptions, and a worked example showing gvfs-helper error capture. The filter syntax summary at the bottom is extended with the important_events block so the complete schema appears in one place. configure-custom-collector.md documents the filter: pathname key (previously undocumented in the receiver config example) and adds a cross-reference to important_events. The summary example YAML adds a note directing readers to filter.yml for verbatim value capture, distinguishing it from the aggregated summary output. Co-Authored-By: Claude Sonnet 4.6 --- Docs/Examples/summary_example.yml | 3 ++ Docs/config-filter-settings.md | 47 ++++++++++++++++++++++++++++++ Docs/configure-custom-collector.md | 21 +++++++++++++ 3 files changed, 71 insertions(+) diff --git a/Docs/Examples/summary_example.yml b/Docs/Examples/summary_example.yml index c02a237..9b4a584 100644 --- a/Docs/Examples/summary_example.yml +++ b/Docs/Examples/summary_example.yml @@ -44,3 +44,6 @@ region_timers: # "batchTime": 12.3 # } # } +# +# To also capture specific data event values verbatim in the summary, +# add important_events rules to your filter.yml instead. diff --git a/Docs/config-filter-settings.md b/Docs/config-filter-settings.md index 86be966..d78a63b 100644 --- a/Docs/config-filter-settings.md +++ b/Docs/config-filter-settings.md @@ -254,6 +254,47 @@ the `ruleset_key`.) +## Important Events + +In addition to controlling verbosity, the `filter.yml` file can +declare a list of data events that should always be captured verbatim, +regardless of the active detail level. This lets operators guarantee +that specific Trace2 data values are always surfaced in the OTEL +process span even when verbose telemetry is disabled. + +Each rule matches data events by `category` (exact match) and +`key_prefix` (prefix match on the event's key field). All matching +values are collected into an array under the specified `field_name` in +the `trace2.process.important_events` span attribute. + +``` +important_events: + - category: + key_prefix: + field_name: + ... +``` + +For example, to always capture error details from a `gvfs-helper` +subprocess regardless of how verbosity is configured: + +``` +important_events: + - category: "gvfs-helper" + key_prefix: "error/" + field_name: "gvfs_helper_errors" +``` + +This would produce the following in the OTEL process span: + +``` +"trace2.process.important_events": { + "gvfs_helper_errors": ["(curl:35) SSL connect error [hard_fail]"] +} +``` + + + ## Filter Settings Syntax Now that all of the concepts have been introduced, we can describe @@ -277,6 +318,12 @@ rulesets: defaults: ruleset: | + +important_events: + - category: + key_prefix: + field_name: + ... ``` The value of the `defaults.ruleset` parameter will be used when a Git diff --git a/Docs/configure-custom-collector.md b/Docs/configure-custom-collector.md index ea5547d..6e4667f 100644 --- a/Docs/configure-custom-collector.md +++ b/Docs/configure-custom-collector.md @@ -46,6 +46,7 @@ receivers: pipe: pii: filter: + summary: ``` For example: @@ -57,6 +58,7 @@ receivers: pipe: "//./pipe/my-collector.pipe" pii: "/usr/local/my-collector/pii.yml" filter: "/usr/local/my-collector/filter.yml" + summary: "/usr/local/my-collector/summary.yml" ``` ### `` (Required on Unix) @@ -117,3 +119,22 @@ generated OTEL telemetry data. This is optional. If omitted, summary-level telemetry will be emitted. See [config filter settings](./config-filter-settings.md) for details. + +### `` (Optional) + +The pathname to a `summary.yml` file controlling which trace2 events +are aggregated into the `trace2.process.summary` attribute on the OTEL +process span. This is optional. If omitted, no aggregated summary +metrics are emitted. + +The summary is emitted at all detail levels (including `dl:summary`), +making it useful for surfacing aggregated statistics without requiring +verbose telemetry. + +See the [summary example](./Examples/summary_example.yml) for a +complete example configuration. + +To capture specific data event values verbatim (emitted in a separate +`trace2.process.important_events` span attribute), use the +`important_events` section of the filter settings. See +[config filter settings](./config-filter-settings.md) for details.