From 81dc810d58a26ddfbfac3063ee3868d9c8431489 Mon Sep 17 00:00:00 2001 From: chatton Date: Wed, 25 Feb 2026 12:16:03 +0000 Subject: [PATCH 01/19] refactor: move spamoor benchmark into testify suite in test/e2e/benchmark - Create test/e2e/benchmark/ subpackage with SpamoorSuite (testify/suite) - Move spamoor smoke test into suite as TestSpamoorSmoke - Split helpers into focused files: traces.go, output.go, metrics.go - Introduce resultWriter for defer-based benchmark JSON output - Export shared symbols from evm_test_common.go for cross-package use - Restructure CI to fan-out benchmark jobs and fan-in publishing - Run benchmarks on PRs only when benchmark-related files change --- .github/workflows/benchmark.yml | 111 ++++++++- test/e2e/benchmark/metrics.go | 48 ++++ test/e2e/benchmark/output.go | 77 +++++++ test/e2e/benchmark/spamoor_smoke_test.go | 114 +++++++++ test/e2e/benchmark/suite_test.go | 144 ++++++++++++ test/e2e/benchmark/traces.go | 67 ++++++ test/e2e/evm_contract_bench_test.go | 6 +- test/e2e/evm_force_inclusion_e2e_test.go | 6 +- test/e2e/evm_full_node_e2e_test.go | 10 +- test/e2e/evm_spamoor_smoke_test.go | 281 ----------------------- test/e2e/evm_test_common.go | 75 +++--- test/e2e/failover_e2e_test.go | 4 +- 12 files changed, 605 insertions(+), 338 deletions(-) create mode 100644 test/e2e/benchmark/metrics.go create mode 100644 test/e2e/benchmark/output.go create mode 100644 test/e2e/benchmark/spamoor_smoke_test.go create mode 100644 test/e2e/benchmark/suite_test.go create mode 100644 test/e2e/benchmark/traces.go delete mode 100644 test/e2e/evm_spamoor_smoke_test.go diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 15cb1a4ff6..2f8fcdb1c7 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -5,6 +5,15 @@ permissions: {} push: branches: - main + pull_request: + branches: + - main + paths: + - 'test/e2e/benchmark/**' + - 'test/e2e/evm_contract_bench_test.go' + - 'test/e2e/evm_test_common.go' + - 'test/e2e/sut_helper.go' + - '.github/workflows/benchmark.yml' workflow_dispatch: jobs: @@ -12,9 +21,6 @@ jobs: name: EVM Contract Benchmark runs-on: ubuntu-latest timeout-minutes: 30 - permissions: - contents: write - issues: write steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 - name: Set up Go @@ -29,30 +35,113 @@ jobs: run: | cd test/e2e && go test -tags evm -bench=. -benchmem -run='^$' \ -timeout=10m --evm-binary=../../build/evm | tee output.txt - - name: Store benchmark result + - name: Run Block Executor benchmarks + run: | + go test -bench=BenchmarkProduceBlock -benchmem -run='^$' \ + ./block/internal/executing/... > block_executor_output.txt + - name: Upload benchmark results + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 + with: + name: evm-benchmark-results + path: | + test/e2e/output.txt + block_executor_output.txt + + spamoor-benchmark: + name: Spamoor Trace Benchmark + runs-on: ubuntu-latest + timeout-minutes: 30 + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - name: Set up Go + uses: actions/setup-go@7a3fe6cf4cb3a834922a1244abfce67bcef6a0c5 # v6.2.0 + with: + go-version-file: ./go.mod + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0 + - name: Build binaries + run: make build-evm build-da + - name: Run Spamoor smoke test + run: | + cd test/e2e && BENCH_JSON_OUTPUT=benchmark/spamoor_bench.json go test -tags evm \ + -run='^TestSpamoorSuite$/^TestSpamoorSmoke$' -v -timeout=15m \ + --evm-binary=../../build/evm ./benchmark/ + - name: Upload benchmark results + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 + with: + name: spamoor-benchmark-results + path: test/e2e/benchmark/spamoor_bench.json + + # single job to push all results to gh-pages sequentially, avoiding race conditions + publish-benchmarks: + name: Publish Benchmark Results + needs: [evm-benchmark, spamoor-benchmark] + runs-on: ubuntu-latest + permissions: + contents: write + issues: write + pull-requests: write + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - name: Download EVM benchmark results + uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0 + with: + name: evm-benchmark-results + - name: Download Spamoor benchmark results + uses: actions/download-artifact@d3f86a106a0bac45b974a628896c90dbdf5c8093 # v4.3.0 + with: + name: spamoor-benchmark-results + path: test/e2e/benchmark/ + + # only update the benchmark baseline on push/dispatch, not on PRs + - name: Store EVM Contract Roundtrip result + if: always() uses: benchmark-action/github-action-benchmark@4bdcce38c94cec68da58d012ac24b7b1155efe8b # v1.20.7 with: name: EVM Contract Roundtrip tool: 'go' output-file-path: test/e2e/output.txt - auto-push: true + auto-push: ${{ github.event_name != 'pull_request' }} + save-data-file: ${{ github.event_name != 'pull_request' }} github-token: ${{ secrets.GITHUB_TOKEN }} alert-threshold: '150%' fail-on-alert: true comment-on-alert: true - - name: Run Block Executor benchmarks - run: | - go test -bench=BenchmarkProduceBlock -benchmem -run='^$' \ - ./block/internal/executing/... > block_executor_output.txt - - name: Store Block Executor benchmark result + # delete local gh-pages so the next benchmark action step fetches fresh from remote + - name: Reset local gh-pages branch + if: always() + run: git branch -D gh-pages || true + + - name: Store Block Executor result + if: always() uses: benchmark-action/github-action-benchmark@4bdcce38c94cec68da58d012ac24b7b1155efe8b # v1.20.7 with: name: Block Executor Benchmark tool: 'go' output-file-path: block_executor_output.txt - auto-push: true + auto-push: ${{ github.event_name != 'pull_request' }} + save-data-file: ${{ github.event_name != 'pull_request' }} github-token: ${{ secrets.GITHUB_TOKEN }} alert-threshold: '150%' fail-on-alert: true comment-on-alert: true + + # delete local gh-pages so the next benchmark action step fetches fresh from remote + - name: Reset local gh-pages branch + if: always() + run: git branch -D gh-pages || true + + - name: Store Spamoor Trace result + if: always() + uses: benchmark-action/github-action-benchmark@4bdcce38c94cec68da58d012ac24b7b1155efe8b # v1.20.7 + with: + name: Spamoor Trace Benchmarks + tool: 'customSmallerIsBetter' + output-file-path: test/e2e/benchmark/spamoor_bench.json + auto-push: ${{ github.event_name != 'pull_request' }} + save-data-file: ${{ github.event_name != 'pull_request' }} + github-token: ${{ secrets.GITHUB_TOKEN }} + alert-threshold: '150%' + fail-on-alert: false + comment-on-alert: true diff --git a/test/e2e/benchmark/metrics.go b/test/e2e/benchmark/metrics.go new file mode 100644 index 0000000000..fcc51af865 --- /dev/null +++ b/test/e2e/benchmark/metrics.go @@ -0,0 +1,48 @@ +//go:build evm + +package benchmark + +import ( + "fmt" + "net/http" + "testing" + "time" + + dto "github.com/prometheus/client_model/go" +) + +// requireHTTP polls a URL until it returns a 2xx status code or the timeout expires. +func requireHTTP(t testing.TB, url string, timeout time.Duration) { + t.Helper() + client := &http.Client{Timeout: 200 * time.Millisecond} + deadline := time.Now().Add(timeout) + var lastErr error + for time.Now().Before(deadline) { + resp, err := client.Get(url) + if err == nil { + _ = resp.Body.Close() + if resp.StatusCode >= 200 && resp.StatusCode < 300 { + return + } + lastErr = fmt.Errorf("status %d", resp.StatusCode) + } else { + lastErr = err + } + time.Sleep(100 * time.Millisecond) + } + t.Fatalf("daemon not ready at %s: %v", url, lastErr) +} + +// sumCounter sums all counter values in a prometheus MetricFamily. +func sumCounter(f *dto.MetricFamily) float64 { + if f == nil || f.GetType() != dto.MetricType_COUNTER { + return 0 + } + var sum float64 + for _, m := range f.GetMetric() { + if m.GetCounter() != nil && m.GetCounter().Value != nil { + sum += m.GetCounter().GetValue() + } + } + return sum +} diff --git a/test/e2e/benchmark/output.go b/test/e2e/benchmark/output.go new file mode 100644 index 0000000000..0d05ebd4ec --- /dev/null +++ b/test/e2e/benchmark/output.go @@ -0,0 +1,77 @@ +//go:build evm + +package benchmark + +import ( + "encoding/json" + "fmt" + "os" + "sort" + "testing" + + e2e "github.com/evstack/ev-node/test/e2e" + "github.com/stretchr/testify/require" +) + +// entry matches the customSmallerIsBetter format for github-action-benchmark. +type entry struct { + Name string `json:"name"` + Unit string `json:"unit"` + Value float64 `json:"value"` +} + +// resultWriter accumulates benchmark entries and writes them to a JSON file +// when flush is called. Create one early in a test and defer flush so results +// are written regardless of where the test exits. +type resultWriter struct { + t testing.TB + label string + entries []entry +} + +func newResultWriter(t testing.TB, label string) *resultWriter { + return &resultWriter{t: t, label: label} +} + +// addSpans aggregates trace spans into per-operation avg duration entries. +func (w *resultWriter) addSpans(spans []e2e.TraceSpan) { + m := e2e.AggregateSpanStats(spans) + if len(m) == 0 { + return + } + + names := make([]string, 0, len(m)) + for name := range m { + names = append(names, name) + } + sort.Strings(names) + + for _, name := range names { + s := m[name] + avg := float64(s.Total.Microseconds()) / float64(s.Count) + w.entries = append(w.entries, entry{ + Name: fmt.Sprintf("%s - %s (avg)", w.label, name), + Unit: "us", + Value: avg, + }) + } +} + +// addEntry appends a custom entry to the results. +func (w *resultWriter) addEntry(e entry) { + w.entries = append(w.entries, e) +} + +// flush writes accumulated entries to the path in BENCH_JSON_OUTPUT. +// It is a no-op when the env var is unset or no entries were added. +func (w *resultWriter) flush() { + outputPath := os.Getenv("BENCH_JSON_OUTPUT") + if outputPath == "" || len(w.entries) == 0 { + return + } + + data, err := json.MarshalIndent(w.entries, "", " ") + require.NoError(w.t, err, "failed to marshal benchmark JSON") + require.NoError(w.t, os.WriteFile(outputPath, data, 0644), "failed to write benchmark JSON to %s", outputPath) + w.t.Logf("wrote %d benchmark entries to %s", len(w.entries), outputPath) +} diff --git a/test/e2e/benchmark/spamoor_smoke_test.go b/test/e2e/benchmark/spamoor_smoke_test.go new file mode 100644 index 0000000000..1f56f26874 --- /dev/null +++ b/test/e2e/benchmark/spamoor_smoke_test.go @@ -0,0 +1,114 @@ +//go:build evm + +package benchmark + +import ( + "time" + + "github.com/celestiaorg/tastora/framework/docker/evstack/spamoor" + e2e "github.com/evstack/ev-node/test/e2e" +) + +// TestSpamoorSmoke spins up reth + sequencer and a Spamoor node, starts a few +// basic spammers, waits briefly, then validates trace spans and prints a concise +// metrics summary. +func (s *SpamoorSuite) TestSpamoorSmoke() { + t := s.T() + w := newResultWriter(t, "SpamoorSmoke") + defer w.flush() + + e := s.setupEnv(config{ + rethTag: "pr-140", + serviceName: "ev-node-smoke", + }) + api := e.spamoorAPI + + eoatx := map[string]any{ + "throughput": 100, + "total_count": 3000, + "max_pending": 4000, + "max_wallets": 300, + "amount": 100, + "random_amount": true, + "random_target": true, + "base_fee": 20, + "tip_fee": 2, + "refill_amount": "1000000000000000000", + "refill_balance": "500000000000000000", + "refill_interval": 600, + } + + gasburner := map[string]any{ + "throughput": 25, + "total_count": 2000, + "max_pending": 8000, + "max_wallets": 500, + "gas_units_to_burn": 3000000, + "base_fee": 20, + "tip_fee": 5, + "rebroadcast": 5, + "refill_amount": "5000000000000000000", + "refill_balance": "2000000000000000000", + "refill_interval": 300, + } + + var ids []int + id, err := api.CreateSpammer("smoke-eoatx", spamoor.ScenarioEOATX, eoatx, true) + s.Require().NoError(err, "failed to create eoatx spammer") + ids = append(ids, id) + id, err = api.CreateSpammer("smoke-gasburner", spamoor.ScenarioGasBurnerTX, gasburner, true) + s.Require().NoError(err, "failed to create gasburner spammer") + ids = append(ids, id) + + for _, id := range ids { + idToDelete := id + t.Cleanup(func() { _ = api.DeleteSpammer(idToDelete) }) + } + + // allow spamoor enough time to generate transaction throughput + // so that the expected tracing spans appear in Jaeger. + time.Sleep(60 * time.Second) + + // fetch parsed metrics and print a concise summary. + metrics, err := api.GetMetrics() + s.Require().NoError(err, "failed to get metrics") + sent := sumCounter(metrics["spamoor_transactions_sent_total"]) + fail := sumCounter(metrics["spamoor_transactions_failed_total"]) + + // collect traces + evNodeSpans := s.collectServiceTraces(e, "ev-node-smoke") + evRethSpans := s.collectServiceTraces(e, "ev-reth") + e2e.PrintTraceReport(t, "ev-node-smoke", evNodeSpans) + e2e.PrintTraceReport(t, "ev-reth", evRethSpans) + + w.addSpans(append(evNodeSpans, evRethSpans...)) + + // assert expected ev-node span names + assertSpanNames(t, evNodeSpans, []string{ + "BlockExecutor.ProduceBlock", + "BlockExecutor.ApplyBlock", + "BlockExecutor.CreateBlock", + "BlockExecutor.RetrieveBatch", + "Executor.ExecuteTxs", + "Executor.SetFinal", + "Engine.ForkchoiceUpdated", + "Engine.NewPayload", + "Engine.GetPayload", + "Eth.GetBlockByNumber", + "Sequencer.GetNextBatch", + "DASubmitter.SubmitHeaders", + "DASubmitter.SubmitData", + "DA.Submit", + }, "ev-node-smoke") + + // assert expected ev-reth span names + assertSpanNames(t, evRethSpans, []string{ + "build_payload", + "execute_tx", + "try_build", + "validate_transaction", + }, "ev-reth") + + s.Require().Greater(sent, float64(0), "at least one transaction should have been sent") + s.Require().Zero(fail, "no transactions should have failed") +} diff --git a/test/e2e/benchmark/suite_test.go b/test/e2e/benchmark/suite_test.go new file mode 100644 index 0000000000..a43f25890c --- /dev/null +++ b/test/e2e/benchmark/suite_test.go @@ -0,0 +1,144 @@ +//go:build evm + +package benchmark + +import ( + "context" + "path/filepath" + "testing" + "time" + + tastoradocker "github.com/celestiaorg/tastora/framework/docker" + "github.com/celestiaorg/tastora/framework/docker/evstack/reth" + "github.com/celestiaorg/tastora/framework/docker/evstack/spamoor" + "github.com/celestiaorg/tastora/framework/docker/jaeger" + tastoratypes "github.com/celestiaorg/tastora/framework/types" + "github.com/ethereum/go-ethereum/ethclient" + "github.com/stretchr/testify/suite" + "go.uber.org/zap/zaptest" + + e2e "github.com/evstack/ev-node/test/e2e" +) + +// SpamoorSuite groups benchmarks that use Spamoor for load generation +// and Jaeger for distributed tracing. Docker client and network are shared +// across all tests in the suite. +type SpamoorSuite struct { + suite.Suite + dockerCli tastoratypes.TastoraDockerClient + networkID string +} + +func TestSpamoorSuite(t *testing.T) { + suite.Run(t, new(SpamoorSuite)) +} + +func (s *SpamoorSuite) SetupTest() { + s.dockerCli, s.networkID = tastoradocker.Setup(s.T()) +} + +// env holds a fully-wired environment created by setupEnv. +type env struct { + jaeger *jaeger.Node + evmEnv *e2e.EVMEnv + sut *e2e.SystemUnderTest + spamoorAPI *spamoor.API + ethClient *ethclient.Client +} + +// config parameterizes the per-test environment setup. +type config struct { + rethTag string + serviceName string +} + +// setupEnv creates a Jaeger + reth + sequencer + Spamoor environment for +// a single test. Each call spins up isolated infrastructure so tests +// can't interfere with each other. +func (s *SpamoorSuite) setupEnv(cfg config) *env { + t := s.T() + ctx := t.Context() + sut := e2e.NewSystemUnderTest(t) + + // jaeger + jcfg := jaeger.Config{Logger: zaptest.NewLogger(t), DockerClient: s.dockerCli, DockerNetworkID: s.networkID} + jg, err := jaeger.New(ctx, jcfg, t.Name(), 0) + s.Require().NoError(err, "failed to create jaeger node") + t.Cleanup(func() { _ = jg.Remove(t.Context()) }) + s.Require().NoError(jg.Start(ctx), "failed to start jaeger node") + + // reth + local DA with OTLP tracing to Jaeger + evmEnv := e2e.SetupCommonEVMEnv(t, sut, s.dockerCli, s.networkID, + e2e.WithRethOpts(func(b *reth.NodeBuilder) { + b.WithTag(cfg.rethTag).WithEnv( + "OTEL_EXPORTER_OTLP_ENDPOINT="+jg.Internal.IngestHTTPEndpoint()+"/v1/traces", + "OTEL_EXPORTER_OTLP_TRACES_ENDPOINT="+jg.Internal.IngestHTTPEndpoint()+"/v1/traces", + "OTEL_EXPORTER_OTLP_PROTOCOL=http", + "OTEL_EXPORTER_OTLP_TRACES_PROTOCOL=http", + "RUST_LOG=debug", + "OTEL_SDK_DISABLED=false", + ) + }), + ) + + // sequencer with tracing + sequencerHome := filepath.Join(t.TempDir(), "sequencer") + otlpHTTP := jg.External.IngestHTTPEndpoint() + e2e.SetupSequencerNode(t, sut, sequencerHome, evmEnv.SequencerJWT, evmEnv.GenesisHash, evmEnv.Endpoints, + "--evnode.instrumentation.tracing=true", + "--evnode.instrumentation.tracing_endpoint", otlpHTTP, + "--evnode.instrumentation.tracing_sample_rate", "1.0", + "--evnode.instrumentation.tracing_service_name", cfg.serviceName, + ) + t.Log("sequencer node is up") + + // eth client + ethClient, err := ethclient.Dial(evmEnv.Endpoints.GetSequencerEthURL()) + s.Require().NoError(err, "failed to dial sequencer eth endpoint") + t.Cleanup(func() { ethClient.Close() }) + + // spamoor + ni, err := evmEnv.RethNode.GetNetworkInfo(ctx) + s.Require().NoError(err, "failed to get reth network info") + internalRPC := "http://" + ni.Internal.RPCAddress() + + spBuilder := spamoor.NewNodeBuilder(t.Name()). + WithDockerClient(evmEnv.RethNode.DockerClient). + WithDockerNetworkID(evmEnv.RethNode.NetworkID). + WithLogger(evmEnv.RethNode.Logger). + WithRPCHosts(internalRPC). + WithPrivateKey(e2e.TestPrivateKey) + + spNode, err := spBuilder.Build(ctx) + s.Require().NoError(err, "failed to build spamoor node") + t.Cleanup(func() { _ = spNode.Remove(t.Context()) }) + s.Require().NoError(spNode.Start(ctx), "failed to start spamoor node") + + spInfo, err := spNode.GetNetworkInfo(ctx) + s.Require().NoError(err, "failed to get spamoor network info") + apiAddr := "http://127.0.0.1:" + spInfo.External.Ports.HTTP + requireHTTP(t, apiAddr+"/api/spammers", 30*time.Second) + + return &env{ + jaeger: jg, + evmEnv: evmEnv, + sut: sut, + spamoorAPI: spNode.API(), + ethClient: ethClient, + } +} + +// collectServiceTraces fetches traces from Jaeger for the given service and returns the spans. +func (s *SpamoorSuite) collectServiceTraces(e *env, serviceName string) []e2e.TraceSpan { + ctx, cancel := context.WithTimeout(s.T().Context(), 3*time.Minute) + defer cancel() + + ok, err := e.jaeger.External.WaitForTraces(ctx, serviceName, 1, 2*time.Second) + s.Require().NoError(err, "error waiting for %s traces; UI: %s", serviceName, e.jaeger.External.QueryURL()) + s.Require().True(ok, "expected at least one trace from %s; UI: %s", serviceName, e.jaeger.External.QueryURL()) + + traces, err := e.jaeger.External.Traces(ctx, serviceName, 10000) + s.Require().NoError(err, "failed to fetch %s traces", serviceName) + + return toTraceSpans(extractSpansFromTraces(traces)) +} diff --git a/test/e2e/benchmark/traces.go b/test/e2e/benchmark/traces.go new file mode 100644 index 0000000000..76f160dfa2 --- /dev/null +++ b/test/e2e/benchmark/traces.go @@ -0,0 +1,67 @@ +//go:build evm + +package benchmark + +import ( + "testing" + "time" + + e2e "github.com/evstack/ev-node/test/e2e" + "github.com/stretchr/testify/require" +) + +// jaegerSpan holds the fields we extract from Jaeger's untyped JSON response. +type jaegerSpan struct { + operationName string + duration float64 // microseconds +} + +func (j jaegerSpan) SpanName() string { return j.operationName } +func (j jaegerSpan) SpanDuration() time.Duration { return time.Duration(j.duration) * time.Microsecond } + +// extractSpansFromTraces walks Jaeger's []any response and pulls out span operation names and durations. +func extractSpansFromTraces(traces []any) []jaegerSpan { + var out []jaegerSpan + for _, t := range traces { + traceMap, ok := t.(map[string]any) + if !ok { + continue + } + spans, ok := traceMap["spans"].([]any) + if !ok { + continue + } + for _, s := range spans { + spanMap, ok := s.(map[string]any) + if !ok { + continue + } + name, _ := spanMap["operationName"].(string) + dur, _ := spanMap["duration"].(float64) + if name != "" { + out = append(out, jaegerSpan{operationName: name, duration: dur}) + } + } + } + return out +} + +func toTraceSpans(spans []jaegerSpan) []e2e.TraceSpan { + out := make([]e2e.TraceSpan, len(spans)) + for i, s := range spans { + out[i] = s + } + return out +} + +// assertSpanNames verifies that all expected span names appear in the trace data. +func assertSpanNames(t testing.TB, spans []e2e.TraceSpan, expected []string, label string) { + t.Helper() + opNames := make(map[string]struct{}, len(spans)) + for _, span := range spans { + opNames[span.SpanName()] = struct{}{} + } + for _, name := range expected { + require.Contains(t, opNames, name, "expected span %q not found in %s traces", name, label) + } +} diff --git a/test/e2e/evm_contract_bench_test.go b/test/e2e/evm_contract_bench_test.go index 7fd67dcc5f..006a8ea1ce 100644 --- a/test/e2e/evm_contract_bench_test.go +++ b/test/e2e/evm_contract_bench_test.go @@ -202,7 +202,7 @@ func (c *otlpCollector) getSpans() []*tracepb.Span { return cp } -// otlpSpanAdapter wraps an OTLP protobuf span to implement traceSpan. +// otlpSpanAdapter wraps an OTLP protobuf span to implement TraceSpan. type otlpSpanAdapter struct { span *tracepb.Span } @@ -215,11 +215,11 @@ func (a otlpSpanAdapter) SpanDuration() time.Duration { func printCollectedTraceReport(b testing.TB, collector *otlpCollector) { b.Helper() raw := collector.getSpans() - spans := make([]traceSpan, len(raw)) + spans := make([]TraceSpan, len(raw)) for i, s := range raw { spans[i] = otlpSpanAdapter{span: s} } - printTraceReport(b, "ev-node", spans) + PrintTraceReport(b, "ev-node", spans) } // waitForReceipt polls for a transaction receipt until it is available. diff --git a/test/e2e/evm_force_inclusion_e2e_test.go b/test/e2e/evm_force_inclusion_e2e_test.go index 00046e3b20..bc04eba6a7 100644 --- a/test/e2e/evm_force_inclusion_e2e_test.go +++ b/test/e2e/evm_force_inclusion_e2e_test.go @@ -79,7 +79,7 @@ func setupSequencerWithForceInclusion(t *testing.T, sut *SystemUnderTest, nodeHo // Use common setup (no full node needed initially) dcli, netID := tastoradocker.Setup(t) - env := setupCommonEVMEnv(t, sut, dcli, netID) + env := SetupCommonEVMEnv(t, sut, dcli, netID) // Create passphrase file passphraseFile := createPassphraseFile(t, nodeHome) @@ -195,7 +195,7 @@ func TestEvmFullNodeForceInclusionE2E(t *testing.T) { // We manually setup sequencer here because we need the force inclusion flag, // and we need to capture variables for full node setup. dockerClient, networkID := tastoradocker.Setup(t) - env := setupCommonEVMEnv(t, sut, dockerClient, networkID, WithFullNode()) + env := SetupCommonEVMEnv(t, sut, dockerClient, networkID, WithFullNode()) passphraseFile := createPassphraseFile(t, sequencerHome) jwtSecretFile := createJWTSecretFile(t, sequencerHome, env.SequencerJWT) @@ -288,7 +288,7 @@ func setupMaliciousSequencer(t *testing.T, sut *SystemUnderTest, nodeHome string // Use common setup with full node support dockerClient, networkID := tastoradocker.Setup(t) - env := setupCommonEVMEnv(t, sut, dockerClient, networkID, WithFullNode()) + env := SetupCommonEVMEnv(t, sut, dockerClient, networkID, WithFullNode()) // Use env fields inline below to reduce local vars passphraseFile := createPassphraseFile(t, nodeHome) diff --git a/test/e2e/evm_full_node_e2e_test.go b/test/e2e/evm_full_node_e2e_test.go index 23a3dbcf82..98d07fe815 100644 --- a/test/e2e/evm_full_node_e2e_test.go +++ b/test/e2e/evm_full_node_e2e_test.go @@ -215,10 +215,10 @@ func setupSequencerWithFullNode(t *testing.T, sut *SystemUnderTest, sequencerHom // Common setup for both sequencer and full node dcli, netID := tastoradocker.Setup(t) - env := setupCommonEVMEnv(t, sut, dcli, netID, WithFullNode()) + env := SetupCommonEVMEnv(t, sut, dcli, netID, WithFullNode()) // Setup sequencer - setupSequencerNode(t, sut, sequencerHome, env.SequencerJWT, env.GenesisHash, env.Endpoints) + SetupSequencerNode(t, sut, sequencerHome, env.SequencerJWT, env.GenesisHash, env.Endpoints) t.Log("Sequencer node is up") // Get P2P address and setup full node @@ -648,7 +648,7 @@ func setupSequencerWithFullNodeLazy(t *testing.T, sut *SystemUnderTest, sequence // Common setup for both sequencer and full node dcli, netID := tastoradocker.Setup(t) - env := setupCommonEVMEnv(t, sut, dcli, netID, WithFullNode()) + env := SetupCommonEVMEnv(t, sut, dcli, netID, WithFullNode()) t.Logf("Generated test endpoints - Rollkit RPC: %s, P2P: %s, Full Node RPC: %s, P2P: %s, DA Port: %s", env.Endpoints.RollkitRPCPort, env.Endpoints.RollkitP2PPort, env.Endpoints.FullNodeRPCPort, env.Endpoints.FullNodeP2PPort, env.Endpoints.DAPort) @@ -1042,14 +1042,14 @@ func testSequencerFullNodeRestart(t *testing.T, initialLazyMode, restartLazyMode t.Logf("Test mode: initial_lazy=%t, restart_lazy=%t", initialLazyMode, restartLazyMode) dcli, netID := tastoradocker.Setup(t) - env := setupCommonEVMEnv(t, sut, dcli, netID, WithFullNode()) + env := SetupCommonEVMEnv(t, sut, dcli, netID, WithFullNode()) // Setup sequencer based on initial mode if initialLazyMode { setupSequencerNodeLazy(t, sut, sequencerHome, env.SequencerJWT, env.GenesisHash, env.Endpoints) t.Log("Sequencer node (lazy mode) is up") } else { - setupSequencerNode(t, sut, sequencerHome, env.SequencerJWT, env.GenesisHash, env.Endpoints) + SetupSequencerNode(t, sut, sequencerHome, env.SequencerJWT, env.GenesisHash, env.Endpoints) t.Log("Sequencer node is up") } diff --git a/test/e2e/evm_spamoor_smoke_test.go b/test/e2e/evm_spamoor_smoke_test.go deleted file mode 100644 index ca172948a0..0000000000 --- a/test/e2e/evm_spamoor_smoke_test.go +++ /dev/null @@ -1,281 +0,0 @@ -//go:build evm - -package e2e - -import ( - "context" - "fmt" - "net/http" - "path/filepath" - "testing" - "time" - - tastoradocker "github.com/celestiaorg/tastora/framework/docker" - reth "github.com/celestiaorg/tastora/framework/docker/evstack/reth" - spamoor "github.com/celestiaorg/tastora/framework/docker/evstack/spamoor" - jaeger "github.com/celestiaorg/tastora/framework/docker/jaeger" - dto "github.com/prometheus/client_model/go" - "github.com/stretchr/testify/require" - "go.uber.org/zap/zaptest" -) - -// TestSpamoorSmoke spins up reth + sequencer and a Spamoor node, starts a few -// basic spammers, waits briefly, then prints a concise metrics summary. -func TestSpamoorSmoke(t *testing.T) { - t.Parallel() - - sut := NewSystemUnderTest(t) - // Prepare a shared docker client and network for Jaeger and reth. - ctx := t.Context() - dcli, netID := tastoradocker.Setup(t) - jcfg := jaeger.Config{Logger: zaptest.NewLogger(t), DockerClient: dcli, DockerNetworkID: netID} - jg, err := jaeger.New(ctx, jcfg, t.Name(), 0) - require.NoError(t, err, "failed to create jaeger node") - t.Cleanup(func() { _ = jg.Remove(t.Context()) }) - require.NoError(t, jg.Start(ctx), "failed to start jaeger node") - - // Bring up reth + local DA on the same docker network as Jaeger so reth can export traces. - env := setupCommonEVMEnv(t, sut, dcli, netID, - WithRethOpts(func(b *reth.NodeBuilder) { - b.WithEnv( - "OTEL_EXPORTER_OTLP_TRACES_ENDPOINT="+jg.Internal.IngestHTTPEndpoint()+"/v1/traces", - "OTEL_EXPORTER_OTLP_TRACES_PROTOCOL=http", - "RUST_LOG=info", - "OTEL_SDK_DISABLED=false", - ) - }), - ) - sequencerHome := filepath.Join(t.TempDir(), "sequencer") - - // ev-node runs on the host, so use Jaeger's external OTLP/HTTP endpoint. - otlpHTTP := jg.External.IngestHTTPEndpoint() - - // Start sequencer with tracing to Jaeger collector. - setupSequencerNode(t, sut, sequencerHome, env.SequencerJWT, env.GenesisHash, env.Endpoints, - "--evnode.instrumentation.tracing=true", - "--evnode.instrumentation.tracing_endpoint", otlpHTTP, - "--evnode.instrumentation.tracing_sample_rate", "1.0", - "--evnode.instrumentation.tracing_service_name", "ev-node-smoke", - ) - t.Log("Sequencer node is up") - - // Start Spamoor within the same Docker network, targeting reth internal RPC. - ni, err := env.RethNode.GetNetworkInfo(ctx) - require.NoError(t, err, "failed to get network info") - - internalRPC := "http://" + ni.Internal.RPCAddress() - - spBuilder := spamoor.NewNodeBuilder(t.Name()). - WithDockerClient(env.RethNode.DockerClient). - WithDockerNetworkID(env.RethNode.NetworkID). - WithLogger(env.RethNode.Logger). - WithRPCHosts(internalRPC). - WithPrivateKey(TestPrivateKey) - - spNode, err := spBuilder.Build(ctx) - require.NoError(t, err, "failed to build sp node") - - t.Cleanup(func() { _ = spNode.Remove(t.Context()) }) - require.NoError(t, spNode.Start(ctx), "failed to start spamoor node") - - // Wait for daemon readiness. - spInfo, err := spNode.GetNetworkInfo(ctx) - require.NoError(t, err, "failed to get network info") - - apiAddr := "http://127.0.0.1:" + spInfo.External.Ports.HTTP - requireHTTP(t, apiAddr+"/api/spammers", 30*time.Second) - api := spNode.API() - - // Basic scenarios (structs that YAML-marshal into the daemon config). - eoatx := map[string]any{ - "throughput": 100, - "total_count": 3000, - "max_pending": 4000, - "max_wallets": 300, - "amount": 100, - "random_amount": true, - "random_target": true, - "base_fee": 20, // gwei - "tip_fee": 2, // gwei - "refill_amount": "1000000000000000000", - "refill_balance": "500000000000000000", - "refill_interval": 600, - } - - gasburner := map[string]any{ - "throughput": 25, - "total_count": 2000, - "max_pending": 8000, - "max_wallets": 500, - "gas_units_to_burn": 3000000, - "base_fee": 20, - "tip_fee": 5, - "rebroadcast": 5, - "refill_amount": "5000000000000000000", - "refill_balance": "2000000000000000000", - "refill_interval": 300, - } - - var ids []int - id, err := api.CreateSpammer("smoke-eoatx", spamoor.ScenarioEOATX, eoatx, true) - require.NoError(t, err, "failed to create eoatx spammer") - ids = append(ids, id) - id, err = api.CreateSpammer("smoke-gasburner", spamoor.ScenarioGasBurnerTX, gasburner, true) - require.NoError(t, err, "failed to create gasburner spammer") - ids = append(ids, id) - - for _, id := range ids { - idToDelete := id - t.Cleanup(func() { _ = api.DeleteSpammer(idToDelete) }) - } - - // allow spamoor enough time to generate transaction throughput - // so that the expected tracing spans appear in Jaeger. - time.Sleep(60 * time.Second) - - // Fetch parsed metrics and print a concise summary. - metrics, err := api.GetMetrics() - require.NoError(t, err, "failed to get metrics") - sent := sumCounter(metrics["spamoor_transactions_sent_total"]) - fail := sumCounter(metrics["spamoor_transactions_failed_total"]) - - // Verify Jaeger received traces from ev-node. - // Service name is set above via --evnode.instrumentation.tracing_service_name "ev-node-smoke". - traceCtx, cancel := context.WithTimeout(ctx, 3*time.Minute) - defer cancel() - ok, err := jg.External.WaitForTraces(traceCtx, "ev-node-smoke", 1, 2*time.Second) - require.NoError(t, err, "error while waiting for Jaeger traces; UI: %s", jg.External.QueryURL()) - require.True(t, ok, "expected at least one trace in Jaeger; UI: %s", jg.External.QueryURL()) - - // Also wait for traces from ev-reth and print a small sample. - ok, err = jg.External.WaitForTraces(traceCtx, "ev-reth", 1, 2*time.Second) - require.NoError(t, err, "error while waiting for ev-reth traces; UI: %s", jg.External.QueryURL()) - require.True(t, ok, "expected at least one trace from ev-reth; UI: %s", jg.External.QueryURL()) - - // fetch traces and print reports for both services. - // use a large limit to fetch all traces from the test run. - evNodeTraces, err := jg.External.Traces(traceCtx, "ev-node-smoke", 10000) - require.NoError(t, err, "failed to fetch ev-node-smoke traces from Jaeger") - evNodeSpans := extractSpansFromTraces(evNodeTraces) - printTraceReport(t, "ev-node-smoke", toTraceSpans(evNodeSpans)) - - evRethTraces, err := jg.External.Traces(traceCtx, "ev-reth", 10000) - require.NoError(t, err, "failed to fetch ev-reth traces from Jaeger") - evRethSpans := extractSpansFromTraces(evRethTraces) - printTraceReport(t, "ev-reth", toTraceSpans(evRethSpans)) - - // assert expected ev-node span names are present. - // these spans reliably appear during block production with transactions flowing. - expectedSpans := []string{ - "BlockExecutor.ProduceBlock", - "BlockExecutor.ApplyBlock", - "BlockExecutor.CreateBlock", - "BlockExecutor.RetrieveBatch", - "Executor.ExecuteTxs", - "Executor.SetFinal", - "Engine.ForkchoiceUpdated", - "Engine.NewPayload", - "Engine.GetPayload", - "Eth.GetBlockByNumber", - "Sequencer.GetNextBatch", - "DASubmitter.SubmitHeaders", - "DASubmitter.SubmitData", - "DA.Submit", - } - opNames := make(map[string]struct{}, len(evNodeSpans)) - for _, s := range evNodeSpans { - opNames[s.operationName] = struct{}{} - } - for _, name := range expectedSpans { - require.Contains(t, opNames, name, "expected span %q not found in ev-node-smoke traces", name) - } - - // ev-reth span names are internal to the Rust OTLP exporter and may change - // across versions, so we only assert that spans were collected at all. - // TODO: check for more specific spans once implemented. - require.NotEmpty(t, evRethSpans, "expected at least one span from ev-reth") - - require.Greater(t, sent, float64(0), "at least one transaction should have been sent") - require.Zero(t, fail, "no transactions should have failed") -} - -// --- helpers --- - -func requireHTTP(t *testing.T, url string, timeout time.Duration) { - t.Helper() - client := &http.Client{Timeout: 200 * time.Millisecond} - deadline := time.Now().Add(timeout) - var lastErr error - for time.Now().Before(deadline) { - resp, err := client.Get(url) - if err == nil { - _ = resp.Body.Close() - if resp.StatusCode >= 200 && resp.StatusCode < 300 { - return - } - lastErr = fmt.Errorf("status %d", resp.StatusCode) - } else { - lastErr = err - } - time.Sleep(100 * time.Millisecond) - } - t.Fatalf("daemon not ready at %s: %v", url, lastErr) -} - -// Metric family helpers. -func sumCounter(f *dto.MetricFamily) float64 { - if f == nil || f.GetType() != dto.MetricType_COUNTER { - return 0 - } - var sum float64 - for _, m := range f.GetMetric() { - if m.GetCounter() != nil && m.GetCounter().Value != nil { - sum += m.GetCounter().GetValue() - } - } - return sum -} - -// jaegerSpan holds the fields we extract from Jaeger's untyped JSON response. -type jaegerSpan struct { - operationName string - duration float64 // microseconds -} - -func (j jaegerSpan) SpanName() string { return j.operationName } -func (j jaegerSpan) SpanDuration() time.Duration { return time.Duration(j.duration) * time.Microsecond } - -// extractSpansFromTraces walks Jaeger's []any response and pulls out span operation names and durations. -func extractSpansFromTraces(traces []any) []jaegerSpan { - var out []jaegerSpan - for _, t := range traces { - traceMap, ok := t.(map[string]any) - if !ok { - continue - } - spans, ok := traceMap["spans"].([]any) - if !ok { - continue - } - for _, s := range spans { - spanMap, ok := s.(map[string]any) - if !ok { - continue - } - name, _ := spanMap["operationName"].(string) - dur, _ := spanMap["duration"].(float64) - if name != "" { - out = append(out, jaegerSpan{operationName: name, duration: dur}) - } - } - } - return out -} - -func toTraceSpans(spans []jaegerSpan) []traceSpan { - out := make([]traceSpan, len(spans)) - for i, s := range spans { - out[i] = s - } - return out -} diff --git a/test/e2e/evm_test_common.go b/test/e2e/evm_test_common.go index 5ddaf935b0..161ff89349 100644 --- a/test/e2e/evm_test_common.go +++ b/test/e2e/evm_test_common.go @@ -315,7 +315,7 @@ func getNodeP2PAddress(t testing.TB, sut *SystemUnderTest, nodeHome string, rpcP // - jwtSecret: JWT secret for authenticating with EVM engine // - genesisHash: Hash of the genesis block for chain validation // - endpoints: TestEndpoints struct containing unique port assignments -func setupSequencerNode(t testing.TB, sut *SystemUnderTest, sequencerHome, jwtSecret, genesisHash string, endpoints *TestEndpoints, extraArgs ...string) { +func SetupSequencerNode(t testing.TB, sut *SystemUnderTest, sequencerHome, jwtSecret, genesisHash string, endpoints *TestEndpoints, extraArgs ...string) { t.Helper() // Create passphrase file @@ -531,7 +531,7 @@ func WithRethOpts(opts ...evmtest.RethNodeOpt) SetupOpt { // setupCommonEVMEnv creates and initializes ev-reth instances, while also initializing the local ev-node instance // managed by sut. If a full node is also required, we can use the WithFullNode() additional option. -func setupCommonEVMEnv(t testing.TB, sut *SystemUnderTest, client tastoratypes.TastoraDockerClient, networkID string, opts ...SetupOpt) *EVMEnv { +func SetupCommonEVMEnv(t testing.TB, sut *SystemUnderTest, client tastoratypes.TastoraDockerClient, networkID string, opts ...SetupOpt) *EVMEnv { t.Helper() // Configuration via functional options @@ -665,9 +665,9 @@ func setupSequencerOnlyTest(t testing.TB, sut *SystemUnderTest, nodeHome string, t.Helper() // Use common setup (no full node needed) - env := setupCommonEVMEnv(t, sut, client, networkID) + env := SetupCommonEVMEnv(t, sut, client, networkID) // Initialize and start sequencer node - setupSequencerNode(t, sut, nodeHome, env.SequencerJWT, env.GenesisHash, env.Endpoints, extraArgs...) + SetupSequencerNode(t, sut, nodeHome, env.SequencerJWT, env.GenesisHash, env.Endpoints, extraArgs...) t.Log("Sequencer node is up") return env.GenesisHash, env.Endpoints.GetSequencerEthURL() @@ -849,27 +849,23 @@ func verifyNoBlockProduction(t testing.TB, client *ethclient.Client, duration ti t.Logf("✅ %s maintained height %d for %v (no new blocks produced)", nodeName, initialHeight, duration) } -// traceSpan is a common interface for span data from different sources (OTLP collector, Jaeger API). -type traceSpan interface { +// TraceSpan is a common interface for span data from different sources (OTLP collector, Jaeger API). +type TraceSpan interface { SpanName() string SpanDuration() time.Duration } -// printTraceReport aggregates spans by operation name and prints a timing breakdown. -func printTraceReport(t testing.TB, label string, spans []traceSpan) { - t.Helper() - if len(spans) == 0 { - t.Logf("WARNING: no spans found for %s", label) - return - } +// SpanStats holds aggregated timing statistics for a single span operation. +type SpanStats struct { + Count int + Total time.Duration + Min time.Duration + Max time.Duration +} - type stats struct { - count int - total time.Duration - min time.Duration - max time.Duration - } - m := make(map[string]*stats) +// AggregateSpanStats groups spans by operation name and computes count, total, min, max. +func AggregateSpanStats(spans []TraceSpan) map[string]*SpanStats { + m := make(map[string]*SpanStats) for _, span := range spans { d := span.SpanDuration() if d <= 0 { @@ -878,45 +874,57 @@ func printTraceReport(t testing.TB, label string, spans []traceSpan) { name := span.SpanName() s, ok := m[name] if !ok { - s = &stats{min: d, max: d} + s = &SpanStats{Min: d, Max: d} m[name] = s } - s.count++ - s.total += d - if d < s.min { - s.min = d + s.Count++ + s.Total += d + if d < s.Min { + s.Min = d } - if d > s.max { - s.max = d + if d > s.Max { + s.Max = d } } + return m +} + +// PrintTraceReport aggregates spans by operation name and prints a timing breakdown. +func PrintTraceReport(t testing.TB, label string, spans []TraceSpan) { + t.Helper() + if len(spans) == 0 { + t.Logf("WARNING: no spans found for %s", label) + return + } + + m := AggregateSpanStats(spans) names := make([]string, 0, len(m)) for name := range m { names = append(names, name) } sort.Slice(names, func(i, j int) bool { - return m[names[i]].total > m[names[j]].total + return m[names[i]].Total > m[names[j]].Total }) var overallTotal time.Duration for _, s := range m { - overallTotal += s.total + overallTotal += s.Total } t.Logf("\n--- %s Trace Breakdown (%d spans) ---", label, len(spans)) t.Logf("%-40s %6s %12s %12s %12s %7s", "OPERATION", "COUNT", "AVG", "MIN", "MAX", "% TOTAL") for _, name := range names { s := m[name] - avg := s.total / time.Duration(s.count) - pct := float64(s.total) / float64(overallTotal) * 100 - t.Logf("%-40s %6d %12s %12s %12s %6.1f%%", name, s.count, avg, s.min, s.max, pct) + avg := s.Total / time.Duration(s.Count) + pct := float64(s.Total) / float64(overallTotal) * 100 + t.Logf("%-40s %6d %12s %12s %12s %6.1f%%", name, s.Count, avg, s.Min, s.Max, pct) } t.Logf("\n--- %s Time Distribution ---", label) for _, name := range names { s := m[name] - pct := float64(s.total) / float64(overallTotal) * 100 + pct := float64(s.Total) / float64(overallTotal) * 100 bar := "" for range int(pct / 2) { bar += "█" @@ -924,3 +932,4 @@ func printTraceReport(t testing.TB, label string, spans []traceSpan) { t.Logf("%-40s %5.1f%% %s", name, pct, bar) } } + diff --git a/test/e2e/failover_e2e_test.go b/test/e2e/failover_e2e_test.go index 905b280cf4..26333a2dd0 100644 --- a/test/e2e/failover_e2e_test.go +++ b/test/e2e/failover_e2e_test.go @@ -56,7 +56,7 @@ func TestLeaseFailoverE2E(t *testing.T) { // Get JWT secrets and setup common components first dockerClient, networkID := tastoradocker.Setup(t) - env := setupCommonEVMEnv(t, sut, dockerClient, networkID, WithFullNode()) + env := SetupCommonEVMEnv(t, sut, dockerClient, networkID, WithFullNode()) // Use a fresh reth node on the same Docker network as used by the env setup. rethFn := evmtest.SetupTestRethNode(t, dockerClient, networkID) jwtSecret3 := rethFn.JWTSecretHex() @@ -257,7 +257,7 @@ func TestHASequencerRollingRestartE2E(t *testing.T) { // Get Docker and common environment dockerClient, networkID := tastoradocker.Setup(t) - env := setupCommonEVMEnv(t, sut, dockerClient, networkID, WithFullNode()) + env := SetupCommonEVMEnv(t, sut, dockerClient, networkID, WithFullNode()) rethFn := evmtest.SetupTestRethNode(t, dockerClient, networkID) jwtSecret3 := rethFn.JWTSecretHex() fnInfo, err := rethFn.GetNetworkInfo(context.Background()) From 18fc15a6a4a72c8c2f153dff9f2a749728f0010c Mon Sep 17 00:00:00 2001 From: chatton Date: Wed, 25 Feb 2026 12:40:33 +0000 Subject: [PATCH 02/19] fix: correct BENCH_JSON_OUTPUT path for spamoor benchmark go test sets the working directory to the package under test, so the env var should be relative to test/e2e/benchmark/, not test/e2e/. --- .github/workflows/benchmark.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 2f8fcdb1c7..bb2bd296df 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -63,7 +63,7 @@ jobs: run: make build-evm build-da - name: Run Spamoor smoke test run: | - cd test/e2e && BENCH_JSON_OUTPUT=benchmark/spamoor_bench.json go test -tags evm \ + cd test/e2e && BENCH_JSON_OUTPUT=spamoor_bench.json go test -tags evm \ -run='^TestSpamoorSuite$/^TestSpamoorSmoke$' -v -timeout=15m \ --evm-binary=../../build/evm ./benchmark/ - name: Upload benchmark results From fccd9db21a00243d01b821ae8adb7f7ddc75c5f9 Mon Sep 17 00:00:00 2001 From: chatton Date: Wed, 25 Feb 2026 12:50:50 +0000 Subject: [PATCH 03/19] fix: place package pattern before test binary flags in benchmark CI go test treats all arguments after an unknown flag (--evm-binary) as test binary args, so ./benchmark/ was never recognized as a package pattern. --- .github/workflows/benchmark.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index bb2bd296df..c4be10227b 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -65,7 +65,7 @@ jobs: run: | cd test/e2e && BENCH_JSON_OUTPUT=spamoor_bench.json go test -tags evm \ -run='^TestSpamoorSuite$/^TestSpamoorSmoke$' -v -timeout=15m \ - --evm-binary=../../build/evm ./benchmark/ + ./benchmark/ --evm-binary=../../build/evm - name: Upload benchmark results uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 with: From ae525cae6fe048844f4fa5eb01180c0a534a7f5c Mon Sep 17 00:00:00 2001 From: chatton Date: Wed, 25 Feb 2026 13:48:41 +0000 Subject: [PATCH 04/19] fix: adjust evm-binary path for benchmark subpackage working directory go test sets the cwd to the package directory (test/e2e/benchmark/), so the binary path needs an extra parent traversal. --- .github/workflows/benchmark.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index c4be10227b..b3a157a0b1 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -65,7 +65,7 @@ jobs: run: | cd test/e2e && BENCH_JSON_OUTPUT=spamoor_bench.json go test -tags evm \ -run='^TestSpamoorSuite$/^TestSpamoorSmoke$' -v -timeout=15m \ - ./benchmark/ --evm-binary=../../build/evm + ./benchmark/ --evm-binary=../../../build/evm - name: Upload benchmark results uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 with: From 039eaf75c5401296236b3f56683c4e59d46ca040 Mon Sep 17 00:00:00 2001 From: chatton Date: Wed, 25 Feb 2026 14:30:13 +0000 Subject: [PATCH 05/19] wip: erc20 benchmark test --- test/e2e/benchmark/helpers.go | 108 ++++++++++++++++ test/e2e/benchmark/spamoor_erc20_test.go | 152 +++++++++++++++++++++++ 2 files changed, 260 insertions(+) create mode 100644 test/e2e/benchmark/helpers.go create mode 100644 test/e2e/benchmark/spamoor_erc20_test.go diff --git a/test/e2e/benchmark/helpers.go b/test/e2e/benchmark/helpers.go new file mode 100644 index 0000000000..4577407d75 --- /dev/null +++ b/test/e2e/benchmark/helpers.go @@ -0,0 +1,108 @@ +//go:build evm + +package benchmark + +import ( + "context" + "fmt" + "math/big" + "time" + + "github.com/celestiaorg/tastora/framework/docker/evstack/spamoor" + "github.com/ethereum/go-ethereum/ethclient" +) + +// blockMetrics holds aggregated gas and transaction data across a range of blocks. +type blockMetrics struct { + StartBlock uint64 + EndBlock uint64 + BlockCount int + TotalGasUsed uint64 + TotalTxCount int + GasPerBlock []uint64 + TxPerBlock []int +} + +// avgGasPerBlock returns the mean gas used per block. +func (m *blockMetrics) avgGasPerBlock() float64 { + if m.BlockCount == 0 { + return 0 + } + return float64(m.TotalGasUsed) / float64(m.BlockCount) +} + +// avgTxPerBlock returns the mean transaction count per block. +func (m *blockMetrics) avgTxPerBlock() float64 { + if m.BlockCount == 0 { + return 0 + } + return float64(m.TotalTxCount) / float64(m.BlockCount) +} + +// mgasPerSec calculates the achieved throughput in megagas per second. +func mgasPerSec(totalGasUsed uint64, elapsed time.Duration) float64 { + if elapsed <= 0 { + return 0 + } + return float64(totalGasUsed) / elapsed.Seconds() / 1e6 +} + +// waitForSpamoorDone polls spamoor metrics until the total sent count reaches +// the target or the context is cancelled. It returns the final sent and failed +// counts. +func waitForSpamoorDone(ctx context.Context, api *spamoor.API, targetCount int, pollInterval time.Duration) (sent, failed float64, err error) { + ticker := time.NewTicker(pollInterval) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + return sent, failed, fmt.Errorf("timed out waiting for spamoor to send %d txs (sent %.0f): %w", targetCount, sent, ctx.Err()) + case <-ticker.C: + metrics, mErr := api.GetMetrics() + if mErr != nil { + continue + } + sent = sumCounter(metrics["spamoor_transactions_sent_total"]) + failed = sumCounter(metrics["spamoor_transactions_failed_total"]) + if sent >= float64(targetCount) { + return sent, failed, nil + } + } + } +} + +// collectBlockMetrics fetches block headers from startBlock to endBlock +// (inclusive) and aggregates gas and transaction counts. Blocks with zero +// transactions are skipped to avoid skewing averages with empty blocks. +func collectBlockMetrics(ctx context.Context, client *ethclient.Client, startBlock, endBlock uint64) (*blockMetrics, error) { + if endBlock < startBlock { + return nil, fmt.Errorf("endBlock %d < startBlock %d", endBlock, startBlock) + } + + m := &blockMetrics{ + StartBlock: startBlock, + EndBlock: endBlock, + } + + for n := startBlock; n <= endBlock; n++ { + block, err := client.BlockByNumber(ctx, new(big.Int).SetUint64(n)) + if err != nil { + return nil, fmt.Errorf("failed to fetch block %d: %w", n, err) + } + + txCount := len(block.Transactions()) + if txCount == 0 { + continue + } + + gasUsed := block.GasUsed() + m.BlockCount++ + m.TotalGasUsed += gasUsed + m.TotalTxCount += txCount + m.GasPerBlock = append(m.GasPerBlock, gasUsed) + m.TxPerBlock = append(m.TxPerBlock, txCount) + } + + return m, nil +} diff --git a/test/e2e/benchmark/spamoor_erc20_test.go b/test/e2e/benchmark/spamoor_erc20_test.go new file mode 100644 index 0000000000..daab436cac --- /dev/null +++ b/test/e2e/benchmark/spamoor_erc20_test.go @@ -0,0 +1,152 @@ +//go:build evm + +package benchmark + +import ( + "context" + "time" + + "github.com/celestiaorg/tastora/framework/docker/evstack/spamoor" + e2e "github.com/evstack/ev-node/test/e2e" +) + +// TestERC20Throughput measures ERC-20 token transfer throughput in isolation. +// Each tx is ~65K gas (contract call with storage reads/writes) +// +// Primary metric: achieved MGas/s. +// Diagnostic metrics: per-span latency breakdown, gas/block, tx/block. +func (s *SpamoorSuite) TestERC20Throughput() { + const ( + totalCount = 3000 + serviceName = "ev-node-erc20" + waitTimeout = 3 * time.Minute + ) + + t := s.T() + ctx := t.Context() + w := newResultWriter(t, "ERC20Throughput") + defer w.flush() + + e := s.setupEnv(config{ + rethTag: "pr-140", + serviceName: serviceName, + }) + + erc20Config := map[string]any{ + "throughput": 100, + "total_count": totalCount, + "max_pending": 4000, + "max_wallets": 300, + "base_fee": 20, + "tip_fee": 3, + "refill_amount": "5000000000000000000", + "refill_balance": "2000000000000000000", + "refill_interval": 600, + } + + // record the starting block before generating load + startHeader, err := e.ethClient.HeaderByNumber(ctx, nil) + s.Require().NoError(err, "failed to get start block header") + startBlock := startHeader.Number.Uint64() + loadStart := time.Now() + + id, err := e.spamoorAPI.CreateSpammer("bench-erc20", spamoor.ScenarioERC20TX, erc20Config, true) + s.Require().NoError(err, "failed to create erc20 spammer") + t.Cleanup(func() { _ = e.spamoorAPI.DeleteSpammer(id) }) + + // wait for spamoor to finish sending all transactions + waitCtx, cancel := context.WithTimeout(ctx, waitTimeout) + defer cancel() + sent, failed, err := waitForSpamoorDone(waitCtx, e.spamoorAPI, totalCount, 2*time.Second) + s.Require().NoError(err, "spamoor did not finish in time") + + // allow a short settle period for remaining txs to be included in blocks + time.Sleep(5 * time.Second) + wallClock := time.Since(loadStart) + + endHeader, err := e.ethClient.HeaderByNumber(ctx, nil) + s.Require().NoError(err, "failed to get end block header") + endBlock := endHeader.Number.Uint64() + + // collect block-level gas/tx metrics + bm, err := collectBlockMetrics(ctx, e.ethClient, startBlock, endBlock) + s.Require().NoError(err, "failed to collect block metrics") + + achievedMGas := mgasPerSec(bm.TotalGasUsed, wallClock) + achievedTPS := float64(bm.TotalTxCount) / wallClock.Seconds() + + t.Logf("ERC20 throughput: %.2f MGas/s, %.1f TPS over %s (%d blocks, %d txs, %.2f avg gas/block)", + achievedMGas, achievedTPS, wallClock.Round(time.Millisecond), + bm.BlockCount, bm.TotalTxCount, bm.avgGasPerBlock()) + + // collect and report traces + evNodeSpans := s.collectServiceTraces(e, serviceName) + evRethSpans := s.collectServiceTraces(e, "ev-reth") + e2e.PrintTraceReport(t, serviceName, evNodeSpans) + e2e.PrintTraceReport(t, "ev-reth", evRethSpans) + + // compute ev-node overhead ratio + spanStats := e2e.AggregateSpanStats(evNodeSpans) + if produceBlock, ok := spanStats["BlockExecutor.ProduceBlock"]; ok { + if executeTxs, ok2 := spanStats["Executor.ExecuteTxs"]; ok2 { + produceAvg := float64(produceBlock.Total.Microseconds()) / float64(produceBlock.Count) + executeAvg := float64(executeTxs.Total.Microseconds()) / float64(executeTxs.Count) + if produceAvg > 0 { + overhead := (produceAvg - executeAvg) / produceAvg * 100 + t.Logf("ev-node overhead: %.1f%%", overhead) + w.addEntry(entry{ + Name: "ERC20Throughput - ev-node overhead", + Unit: "%", + Value: overhead, + }) + } + } + } + + // write all metrics to benchmark output + w.addEntry(entry{ + Name: "ERC20Throughput - MGas/s", + Unit: "MGas/s", + Value: achievedMGas, + }) + w.addEntry(entry{ + Name: "ERC20Throughput - TPS", + Unit: "tx/s", + Value: achievedTPS, + }) + w.addEntry(entry{ + Name: "ERC20Throughput - avg gas/block", + Unit: "gas", + Value: bm.avgGasPerBlock(), + }) + w.addEntry(entry{ + Name: "ERC20Throughput - avg tx/block", + Unit: "count", + Value: bm.avgTxPerBlock(), + }) + w.addEntry(entry{ + Name: "ERC20Throughput - blocks/s", + Unit: "blocks/s", + Value: float64(bm.BlockCount) / wallClock.Seconds(), + }) + + // add per-span avg latencies + w.addSpans(append(evNodeSpans, evRethSpans...)) + + // assertions + s.Require().Greater(sent, float64(0), "at least one transaction should have been sent") + s.Require().Zero(failed, "no transactions should have failed") + + // assert expected ev-node spans are present + assertSpanNames(t, evNodeSpans, []string{ + "BlockExecutor.ProduceBlock", + "BlockExecutor.ApplyBlock", + "Executor.ExecuteTxs", + "Executor.SetFinal", + "Engine.ForkchoiceUpdated", + "Engine.NewPayload", + "Engine.GetPayload", + "Sequencer.GetNextBatch", + "DA.Submit", + }, serviceName) +} From 85c9d2d693fa32a7195598562d3cfaeadae90e35 Mon Sep 17 00:00:00 2001 From: chatton Date: Wed, 25 Feb 2026 14:45:37 +0000 Subject: [PATCH 06/19] fix: exclude benchmark subpackage from make test-e2e The benchmark package doesn't define the --binary flag that test-e2e passes. It has its own CI workflow so it doesn't need to run here. --- scripts/test.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/test.mk b/scripts/test.mk index f9f76c1ab3..524f8a4cdb 100644 --- a/scripts/test.mk +++ b/scripts/test.mk @@ -24,7 +24,7 @@ test-integration: ## test-e2e: Running e2e tests test-e2e: build build-da build-evm docker-build-if-local @echo "--> Running e2e tests" - @cd test/e2e && go test -mod=readonly -failfast -timeout=15m -tags='e2e evm' ./... --binary=../../build/testapp --evm-binary=../../build/evm + @cd test/e2e && go test -mod=readonly -failfast -timeout=15m -tags='e2e evm' $$(go list -tags='e2e evm' ./... | grep -v /benchmark) --binary=../../build/testapp --evm-binary=../../build/evm .PHONY: test-e2e ## test-integration-cover: generate code coverage report for integration tests. From fe8d166a32c07841b3f2686853e7ea63a8a53c75 Mon Sep 17 00:00:00 2001 From: chatton Date: Thu, 26 Feb 2026 13:03:06 +0000 Subject: [PATCH 07/19] fix: replace FilterLogs with header iteration and optimize spamoor config collectBlockMetrics hit reth's 20K FilterLogs limit at high tx volumes. Replace with direct header iteration over [startBlock, endBlock] and add Phase 1 metrics: non-empty ratio, block interval p50/p99, gas/block and tx/block p50/p99. Optimize spamoor configuration for 100ms block time: - --slot-duration 100ms, --startup-delay 0 on daemon - throughput=50 per 100ms slot (500 tx/s per spammer) - max_pending=50000 to avoid 3s block poll backpressure - 5 staggered spammers with 50K txs each Results: 55 MGas/s, 1414 TPS, 19.8% non-empty blocks (up from 6%). --- test/e2e/benchmark/helpers.go | 159 +++++++++++++++++++---- test/e2e/benchmark/spamoor_erc20_test.go | 106 ++++++++++++--- test/e2e/benchmark/suite_test.go | 44 ++++++- test/e2e/go.mod | 1 + test/e2e/go.sum | 2 - 5 files changed, 263 insertions(+), 49 deletions(-) diff --git a/test/e2e/benchmark/helpers.go b/test/e2e/benchmark/helpers.go index 4577407d75..d25b06d7b5 100644 --- a/test/e2e/benchmark/helpers.go +++ b/test/e2e/benchmark/helpers.go @@ -5,7 +5,9 @@ package benchmark import ( "context" "fmt" + "math" "math/big" + "sort" "time" "github.com/celestiaorg/tastora/framework/docker/evstack/spamoor" @@ -14,13 +16,25 @@ import ( // blockMetrics holds aggregated gas and transaction data across a range of blocks. type blockMetrics struct { - StartBlock uint64 - EndBlock uint64 - BlockCount int - TotalGasUsed uint64 - TotalTxCount int - GasPerBlock []uint64 - TxPerBlock []int + StartBlock uint64 + EndBlock uint64 + BlockCount int // non-empty blocks only + TotalBlockCount int // all blocks in range including empty + TotalGasUsed uint64 + TotalTxCount int + GasPerBlock []uint64 // non-empty blocks only + TxPerBlock []int // non-empty blocks only + BlockIntervals []time.Duration // intervals between all consecutive blocks (for drift) + FirstBlockTime time.Time + LastBlockTime time.Time +} + +// steadyStateDuration returns the time between the first and last active blocks. +func (m *blockMetrics) steadyStateDuration() time.Duration { + if m.FirstBlockTime.IsZero() || m.LastBlockTime.IsZero() { + return 0 + } + return m.LastBlockTime.Sub(m.FirstBlockTime) } // avgGasPerBlock returns the mean gas used per block. @@ -39,6 +53,74 @@ func (m *blockMetrics) avgTxPerBlock() float64 { return float64(m.TotalTxCount) / float64(m.BlockCount) } +// nonEmptyRatio returns the percentage of blocks that contained transactions. +func (m *blockMetrics) nonEmptyRatio() float64 { + if m.TotalBlockCount == 0 { + return 0 + } + return float64(m.BlockCount) / float64(m.TotalBlockCount) * 100 +} + +// blockIntervalStats returns p50, p99, and max of block intervals. +func (m *blockMetrics) blockIntervalStats() (p50, p99, max time.Duration) { + if len(m.BlockIntervals) == 0 { + return 0, 0, 0 + } + sorted := make([]float64, len(m.BlockIntervals)) + for i, d := range m.BlockIntervals { + sorted[i] = float64(d) + } + sort.Float64s(sorted) + return time.Duration(percentile(sorted, 0.50)), + time.Duration(percentile(sorted, 0.99)), + time.Duration(sorted[len(sorted)-1]) +} + +// gasPerBlockStats returns p50 and p99 of gas used per non-empty block. +func (m *blockMetrics) gasPerBlockStats() (p50, p99 float64) { + if len(m.GasPerBlock) == 0 { + return 0, 0 + } + sorted := make([]float64, len(m.GasPerBlock)) + for i, g := range m.GasPerBlock { + sorted[i] = float64(g) + } + sort.Float64s(sorted) + return percentile(sorted, 0.50), percentile(sorted, 0.99) +} + +// txPerBlockStats returns p50 and p99 of tx count per non-empty block. +func (m *blockMetrics) txPerBlockStats() (p50, p99 float64) { + if len(m.TxPerBlock) == 0 { + return 0, 0 + } + sorted := make([]float64, len(m.TxPerBlock)) + for i, c := range m.TxPerBlock { + sorted[i] = float64(c) + } + sort.Float64s(sorted) + return percentile(sorted, 0.50), percentile(sorted, 0.99) +} + +// percentile returns the p-th percentile from a pre-sorted float64 slice +// using linear interpolation. +func percentile(sorted []float64, p float64) float64 { + if len(sorted) == 0 { + return 0 + } + if len(sorted) == 1 { + return sorted[0] + } + rank := p * float64(len(sorted)-1) + lower := int(math.Floor(rank)) + upper := lower + 1 + if upper >= len(sorted) { + return sorted[len(sorted)-1] + } + frac := rank - float64(lower) + return sorted[lower] + frac*(sorted[upper]-sorted[lower]) +} + // mgasPerSec calculates the achieved throughput in megagas per second. func mgasPerSec(totalGasUsed uint64, elapsed time.Duration) float64 { if elapsed <= 0 { @@ -48,12 +130,15 @@ func mgasPerSec(totalGasUsed uint64, elapsed time.Duration) float64 { } // waitForSpamoorDone polls spamoor metrics until the total sent count reaches -// the target or the context is cancelled. It returns the final sent and failed -// counts. -func waitForSpamoorDone(ctx context.Context, api *spamoor.API, targetCount int, pollInterval time.Duration) (sent, failed float64, err error) { +// the target or the context is cancelled. It logs the send rate at each poll +// interval and returns the final sent and failed counts. +func waitForSpamoorDone(ctx context.Context, log func(string, ...any), api *spamoor.API, targetCount int, pollInterval time.Duration) (sent, failed float64, err error) { ticker := time.NewTicker(pollInterval) defer ticker.Stop() + start := time.Now() + var prevSent float64 + for { select { case <-ctx.Done(): @@ -65,6 +150,14 @@ func waitForSpamoorDone(ctx context.Context, api *spamoor.API, targetCount int, } sent = sumCounter(metrics["spamoor_transactions_sent_total"]) failed = sumCounter(metrics["spamoor_transactions_failed_total"]) + + delta := sent - prevSent + rate := delta / pollInterval.Seconds() + elapsed := time.Since(start).Round(time.Second) + log("spamoor progress: %.0f/%.0f sent (%.0f tx/s instant, %.0f tx/s avg, %.0f failed) [%s]", + sent, float64(targetCount), rate, sent/time.Since(start).Seconds(), failed, elapsed) + prevSent = sent + if sent >= float64(targetCount) { return sent, failed, nil } @@ -72,36 +165,52 @@ func waitForSpamoorDone(ctx context.Context, api *spamoor.API, targetCount int, } } -// collectBlockMetrics fetches block headers from startBlock to endBlock -// (inclusive) and aggregates gas and transaction counts. Blocks with zero -// transactions are skipped to avoid skewing averages with empty blocks. +// collectBlockMetrics iterates all headers in [startBlock, endBlock] to collect +// gas and transaction metrics. Empty blocks are skipped for gas/tx aggregation +// but included in block interval tracking. func collectBlockMetrics(ctx context.Context, client *ethclient.Client, startBlock, endBlock uint64) (*blockMetrics, error) { if endBlock < startBlock { return nil, fmt.Errorf("endBlock %d < startBlock %d", endBlock, startBlock) } - m := &blockMetrics{ - StartBlock: startBlock, - EndBlock: endBlock, - } + m := &blockMetrics{StartBlock: startBlock, EndBlock: endBlock} + var prevBlockTime time.Time for n := startBlock; n <= endBlock; n++ { - block, err := client.BlockByNumber(ctx, new(big.Int).SetUint64(n)) + header, err := client.HeaderByNumber(ctx, new(big.Int).SetUint64(n)) if err != nil { - return nil, fmt.Errorf("failed to fetch block %d: %w", n, err) + return nil, fmt.Errorf("failed to fetch header %d: %w", n, err) + } + + blockTime := time.Unix(int64(header.Time), 0) + m.TotalBlockCount++ + + // track intervals between all consecutive blocks + if !prevBlockTime.IsZero() { + m.BlockIntervals = append(m.BlockIntervals, blockTime.Sub(prevBlockTime)) + } + prevBlockTime = blockTime + + txCount, err := client.TransactionCount(ctx, header.Hash()) + if err != nil { + return nil, fmt.Errorf("failed to fetch tx count for block %d: %w", n, err) } - txCount := len(block.Transactions()) if txCount == 0 { continue } - gasUsed := block.GasUsed() + // non-empty block: aggregate gas and tx metrics + if m.BlockCount == 0 { + m.FirstBlockTime = blockTime + } + m.LastBlockTime = blockTime + m.BlockCount++ - m.TotalGasUsed += gasUsed - m.TotalTxCount += txCount - m.GasPerBlock = append(m.GasPerBlock, gasUsed) - m.TxPerBlock = append(m.TxPerBlock, txCount) + m.TotalGasUsed += header.GasUsed + m.TotalTxCount += int(txCount) + m.GasPerBlock = append(m.GasPerBlock, header.GasUsed) + m.TxPerBlock = append(m.TxPerBlock, int(txCount)) } return m, nil diff --git a/test/e2e/benchmark/spamoor_erc20_test.go b/test/e2e/benchmark/spamoor_erc20_test.go index daab436cac..5cd601a670 100644 --- a/test/e2e/benchmark/spamoor_erc20_test.go +++ b/test/e2e/benchmark/spamoor_erc20_test.go @@ -4,6 +4,7 @@ package benchmark import ( "context" + "fmt" "time" "github.com/celestiaorg/tastora/framework/docker/evstack/spamoor" @@ -17,9 +18,11 @@ import ( // Diagnostic metrics: per-span latency breakdown, gas/block, tx/block. func (s *SpamoorSuite) TestERC20Throughput() { const ( - totalCount = 3000 - serviceName = "ev-node-erc20" - waitTimeout = 3 * time.Minute + numSpammers = 5 + countPerSpammer = 50000 + totalCount = numSpammers * countPerSpammer + serviceName = "ev-node-erc20" + waitTimeout = 5 * time.Minute ) t := s.T() @@ -33,10 +36,10 @@ func (s *SpamoorSuite) TestERC20Throughput() { }) erc20Config := map[string]any{ - "throughput": 100, - "total_count": totalCount, - "max_pending": 4000, - "max_wallets": 300, + "throughput": 50, // 50 tx per 100ms slot = 500 tx/s per spammer + "total_count": countPerSpammer, + "max_pending": 50000, + "max_wallets": 200, "base_fee": 20, "tip_fee": 3, "refill_amount": "5000000000000000000", @@ -50,18 +53,28 @@ func (s *SpamoorSuite) TestERC20Throughput() { startBlock := startHeader.Number.Uint64() loadStart := time.Now() - id, err := e.spamoorAPI.CreateSpammer("bench-erc20", spamoor.ScenarioERC20TX, erc20Config, true) - s.Require().NoError(err, "failed to create erc20 spammer") - t.Cleanup(func() { _ = e.spamoorAPI.DeleteSpammer(id) }) + // stagger spammer launches so their warm-up phases (contract deploy + wallet + // funding) complete at different times, producing a more continuous tx stream + // instead of synchronized bursts. + const staggerDelay = 5 * time.Second + for i := range numSpammers { + if i > 0 { + time.Sleep(staggerDelay) + } + name := fmt.Sprintf("bench-erc20-%d", i) + id, err := e.spamoorAPI.CreateSpammer(name, spamoor.ScenarioERC20TX, erc20Config, true) + s.Require().NoError(err, "failed to create spammer %s", name) + t.Cleanup(func() { _ = e.spamoorAPI.DeleteSpammer(id) }) + } // wait for spamoor to finish sending all transactions waitCtx, cancel := context.WithTimeout(ctx, waitTimeout) defer cancel() - sent, failed, err := waitForSpamoorDone(waitCtx, e.spamoorAPI, totalCount, 2*time.Second) + sent, failed, err := waitForSpamoorDone(waitCtx, t.Logf, e.spamoorAPI, totalCount, 2*time.Second) s.Require().NoError(err, "spamoor did not finish in time") - // allow a short settle period for remaining txs to be included in blocks - time.Sleep(5 * time.Second) + // allow pending txs to drain from mempool into blocks + time.Sleep(20 * time.Second) wallClock := time.Since(loadStart) endHeader, err := e.ethClient.HeaderByNumber(ctx, nil) @@ -72,18 +85,34 @@ func (s *SpamoorSuite) TestERC20Throughput() { bm, err := collectBlockMetrics(ctx, e.ethClient, startBlock, endBlock) s.Require().NoError(err, "failed to collect block metrics") - achievedMGas := mgasPerSec(bm.TotalGasUsed, wallClock) - achievedTPS := float64(bm.TotalTxCount) / wallClock.Seconds() + // use steady-state window (first active block to last active block) for + // throughput calculation, excluding warm-up and cool-down periods + steadyState := bm.steadyStateDuration() + s.Require().Greater(steadyState, time.Duration(0), "expected non-zero steady-state duration") + + achievedMGas := mgasPerSec(bm.TotalGasUsed, steadyState) + achievedTPS := float64(bm.TotalTxCount) / steadyState.Seconds() - t.Logf("ERC20 throughput: %.2f MGas/s, %.1f TPS over %s (%d blocks, %d txs, %.2f avg gas/block)", - achievedMGas, achievedTPS, wallClock.Round(time.Millisecond), - bm.BlockCount, bm.TotalTxCount, bm.avgGasPerBlock()) + intervalP50, intervalP99, intervalMax := bm.blockIntervalStats() + gasP50, gasP99 := bm.gasPerBlockStats() + txP50, txP99 := bm.txPerBlockStats() + + t.Logf("block range: %d-%d (%d total, %d non-empty, %.1f%% non-empty)", + startBlock, endBlock, bm.TotalBlockCount, bm.BlockCount, bm.nonEmptyRatio()) + t.Logf("block intervals: p50=%s, p99=%s, max=%s", + intervalP50.Round(time.Millisecond), intervalP99.Round(time.Millisecond), intervalMax.Round(time.Millisecond)) + t.Logf("gas/block (non-empty): avg=%.0f, p50=%.0f, p99=%.0f", bm.avgGasPerBlock(), gasP50, gasP99) + t.Logf("tx/block (non-empty): avg=%.1f, p50=%.0f, p99=%.0f", bm.avgTxPerBlock(), txP50, txP99) + t.Logf("ERC20 throughput: %.2f MGas/s, %.1f TPS over %s steady-state (%s wall clock)", + achievedMGas, achievedTPS, steadyState.Round(time.Millisecond), wallClock.Round(time.Millisecond)) // collect and report traces evNodeSpans := s.collectServiceTraces(e, serviceName) - evRethSpans := s.collectServiceTraces(e, "ev-reth") + evRethSpans := s.tryCollectServiceTraces(e, "ev-reth") e2e.PrintTraceReport(t, serviceName, evNodeSpans) - e2e.PrintTraceReport(t, "ev-reth", evRethSpans) + if len(evRethSpans) > 0 { + e2e.PrintTraceReport(t, "ev-reth", evRethSpans) + } // compute ev-node overhead ratio spanStats := e2e.AggregateSpanStats(evNodeSpans) @@ -127,7 +156,42 @@ func (s *SpamoorSuite) TestERC20Throughput() { w.addEntry(entry{ Name: "ERC20Throughput - blocks/s", Unit: "blocks/s", - Value: float64(bm.BlockCount) / wallClock.Seconds(), + Value: float64(bm.BlockCount) / steadyState.Seconds(), + }) + w.addEntry(entry{ + Name: "ERC20Throughput - non-empty block ratio", + Unit: "%", + Value: bm.nonEmptyRatio(), + }) + w.addEntry(entry{ + Name: "ERC20Throughput - block interval p50", + Unit: "ms", + Value: float64(intervalP50.Milliseconds()), + }) + w.addEntry(entry{ + Name: "ERC20Throughput - block interval p99", + Unit: "ms", + Value: float64(intervalP99.Milliseconds()), + }) + w.addEntry(entry{ + Name: "ERC20Throughput - gas/block p50", + Unit: "gas", + Value: gasP50, + }) + w.addEntry(entry{ + Name: "ERC20Throughput - gas/block p99", + Unit: "gas", + Value: gasP99, + }) + w.addEntry(entry{ + Name: "ERC20Throughput - tx/block p50", + Unit: "count", + Value: txP50, + }) + w.addEntry(entry{ + Name: "ERC20Throughput - tx/block p99", + Unit: "count", + Value: txP99, }) // add per-span avg latencies diff --git a/test/e2e/benchmark/suite_test.go b/test/e2e/benchmark/suite_test.go index a43f25890c..b96cc67b3a 100644 --- a/test/e2e/benchmark/suite_test.go +++ b/test/e2e/benchmark/suite_test.go @@ -107,7 +107,8 @@ func (s *SpamoorSuite) setupEnv(cfg config) *env { WithDockerNetworkID(evmEnv.RethNode.NetworkID). WithLogger(evmEnv.RethNode.Logger). WithRPCHosts(internalRPC). - WithPrivateKey(e2e.TestPrivateKey) + WithPrivateKey(e2e.TestPrivateKey). + WithAdditionalStartArgs("--slot-duration", "100ms", "--startup-delay", "0") spNode, err := spBuilder.Build(ctx) s.Require().NoError(err, "failed to build spamoor node") @@ -142,3 +143,44 @@ func (s *SpamoorSuite) collectServiceTraces(e *env, serviceName string) []e2e.Tr return toTraceSpans(extractSpansFromTraces(traces)) } + +// tryCollectServiceTraces attempts to fetch all traces by requesting them in +// batches to avoid overwhelming Jaeger with a single large response. +// Returns nil instead of failing the test if traces are unavailable. +func (s *SpamoorSuite) tryCollectServiceTraces(e *env, serviceName string) []e2e.TraceSpan { + t := s.T() + ctx, cancel := context.WithTimeout(t.Context(), 3*time.Minute) + defer cancel() + + ok, err := e.jaeger.External.WaitForTraces(ctx, serviceName, 1, 2*time.Second) + if err != nil || !ok { + t.Logf("warning: could not collect %s traces (err=%v, ok=%v)", serviceName, err, ok) + return nil + } + + const batchSize = 200 + var allSpans []e2e.TraceSpan + for batch := 0; ; batch++ { + traces, err := e.jaeger.External.Traces(ctx, serviceName, batchSize) + if err != nil { + if batch == 0 { + t.Logf("warning: failed to fetch %s traces: %v", serviceName, err) + return nil + } + // got some data before failing, use what we have + t.Logf("warning: %s trace fetch stopped after %d batches (%d spans): %v", serviceName, batch, len(allSpans), err) + break + } + spans := toTraceSpans(extractSpansFromTraces(traces)) + if len(spans) == 0 { + break + } + allSpans = append(allSpans, spans...) + if len(traces) < batchSize { + break // got fewer traces than requested, no more available + } + } + + t.Logf("collected %d %s spans", len(allSpans), serviceName) + return allSpans +} diff --git a/test/e2e/go.mod b/test/e2e/go.mod index 46710aa54f..f2a24355dc 100644 --- a/test/e2e/go.mod +++ b/test/e2e/go.mod @@ -22,6 +22,7 @@ require ( ) replace ( + github.com/celestiaorg/tastora => /Users/chatton/checkouts/celestiaorg/tastora github.com/evstack/ev-node => ../../ github.com/evstack/ev-node/core => ../../core github.com/evstack/ev-node/execution/evm => ../../execution/evm diff --git a/test/e2e/go.sum b/test/e2e/go.sum index 782994eb29..0d66192173 100644 --- a/test/e2e/go.sum +++ b/test/e2e/go.sum @@ -145,8 +145,6 @@ github.com/celestiaorg/go-square/v3 v3.0.2 h1:eSQOgNII8inK9IhiBZ+6GADQeWbRq4HYY7 github.com/celestiaorg/go-square/v3 v3.0.2/go.mod h1:oFReMLsSDMRs82ICFEeFQFCqNvwdsbIM1BzCcb0f7dM= github.com/celestiaorg/nmt v0.24.2 h1:LlpJSPOd6/Lw1Ig6HUhZuqiINHLka/ZSRTBzlNJpchg= github.com/celestiaorg/nmt v0.24.2/go.mod h1:vgLBpWBi8F5KLxTdXSwb7AU4NhiIQ1AQRGa+PzdcLEA= -github.com/celestiaorg/tastora v0.16.0 h1:V4ctGcvVR8thy4ulvrHagrTfdNfuCHOTsCYoKVRQ75U= -github.com/celestiaorg/tastora v0.16.0/go.mod h1:C867PBm6Ne6e/1JlmsRqcLeJ6RHAuMoMRCvwJzV/q8g= github.com/cenkalti/backoff v2.2.1+incompatible h1:tNowT99t7UNflLxfYYSlKYsBpXdEet03Pg2g16Swow4= github.com/cenkalti/backoff v2.2.1+incompatible/go.mod h1:90ReRw6GdpyfrHakVjL/QHaoyV4aDUVVkXQJJJ3NXXM= github.com/cenkalti/backoff/v4 v4.1.1/go.mod h1:scbssz8iZGpm3xbr14ovlUdkxfGXNInqkPWOWmG2CLw= From 99e3e42d64d47c0caa885b85ca85ead11f859dd7 Mon Sep 17 00:00:00 2001 From: chatton Date: Thu, 26 Feb 2026 15:31:07 +0000 Subject: [PATCH 08/19] fix: improve benchmark measurement window and reliability - Move startBlock capture after spammer creation to exclude warm-up - Replace 20s drain sleep with smart poll (waitForDrain) - Add deleteAllSpammers cleanup to handle stale spamoor DB entries - Lower trace sample rate to 10% to prevent Jaeger OOM --- test/e2e/benchmark/helpers.go | 59 ++++++++++++++++++++++++ test/e2e/benchmark/spamoor_erc20_test.go | 32 +++++++------ test/e2e/benchmark/suite_test.go | 4 +- 3 files changed, 79 insertions(+), 16 deletions(-) diff --git a/test/e2e/benchmark/helpers.go b/test/e2e/benchmark/helpers.go index d25b06d7b5..bfc83a9eee 100644 --- a/test/e2e/benchmark/helpers.go +++ b/test/e2e/benchmark/helpers.go @@ -165,6 +165,65 @@ func waitForSpamoorDone(ctx context.Context, log func(string, ...any), api *spam } } +// deleteAllSpammers removes any pre-existing spammers from the daemon. +// This prevents stale spammers (from previous failed runs) being restored +// from the spamoor SQLite database. +func deleteAllSpammers(api *spamoor.API) error { + existing, err := api.ListSpammers() + if err != nil { + return fmt.Errorf("list spammers: %w", err) + } + for _, sp := range existing { + if err := api.DeleteSpammer(sp.ID); err != nil { + return fmt.Errorf("delete spammer %d: %w", sp.ID, err) + } + } + return nil +} + +// waitForDrain polls the latest block until consecutiveEmpty consecutive empty +// blocks are observed, indicating the mempool has drained. +func waitForDrain(ctx context.Context, log func(string, ...any), client *ethclient.Client, consecutiveEmpty int) { + var emptyRun int + var lastBlock uint64 + ticker := time.NewTicker(200 * time.Millisecond) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + log("drain timeout after %d consecutive empty blocks (needed %d)", emptyRun, consecutiveEmpty) + return + case <-ticker.C: + header, err := client.HeaderByNumber(ctx, nil) + if err != nil { + continue + } + num := header.Number.Uint64() + if num == lastBlock { + continue + } + + txCount, err := client.TransactionCount(ctx, header.Hash()) + if err != nil { + continue + } + + lastBlock = num + if txCount == 0 { + emptyRun++ + } else { + emptyRun = 0 + } + + if emptyRun >= consecutiveEmpty { + log("mempool drained: %d consecutive empty blocks at block %d", emptyRun, num) + return + } + } + } +} + // collectBlockMetrics iterates all headers in [startBlock, endBlock] to collect // gas and transaction metrics. Empty blocks are skipped for gas/tx aggregation // but included in block interval tracking. diff --git a/test/e2e/benchmark/spamoor_erc20_test.go b/test/e2e/benchmark/spamoor_erc20_test.go index 5cd601a670..c9ba95b375 100644 --- a/test/e2e/benchmark/spamoor_erc20_test.go +++ b/test/e2e/benchmark/spamoor_erc20_test.go @@ -18,7 +18,7 @@ import ( // Diagnostic metrics: per-span latency breakdown, gas/block, tx/block. func (s *SpamoorSuite) TestERC20Throughput() { const ( - numSpammers = 5 + numSpammers = 5 countPerSpammer = 50000 totalCount = numSpammers * countPerSpammer serviceName = "ev-node-erc20" @@ -47,34 +47,36 @@ func (s *SpamoorSuite) TestERC20Throughput() { "refill_interval": 600, } - // record the starting block before generating load - startHeader, err := e.ethClient.HeaderByNumber(ctx, nil) - s.Require().NoError(err, "failed to get start block header") - startBlock := startHeader.Number.Uint64() - loadStart := time.Now() + // clear any stale spammers restored from the persistent spamoor database + s.Require().NoError(deleteAllSpammers(e.spamoorAPI), "failed to delete stale spammers") - // stagger spammer launches so their warm-up phases (contract deploy + wallet - // funding) complete at different times, producing a more continuous tx stream - // instead of synchronized bursts. - const staggerDelay = 5 * time.Second + // launch all spammers before recording startBlock so warm-up + // (contract deploy + wallet funding) is excluded from the measurement window. for i := range numSpammers { - if i > 0 { - time.Sleep(staggerDelay) - } name := fmt.Sprintf("bench-erc20-%d", i) id, err := e.spamoorAPI.CreateSpammer(name, spamoor.ScenarioERC20TX, erc20Config, true) s.Require().NoError(err, "failed to create spammer %s", name) t.Cleanup(func() { _ = e.spamoorAPI.DeleteSpammer(id) }) } + // record the starting block after all spammers are launched so the + // measurement window excludes the warm-up period. + startHeader, err := e.ethClient.HeaderByNumber(ctx, nil) + s.Require().NoError(err, "failed to get start block header") + startBlock := startHeader.Number.Uint64() + loadStart := time.Now() + // wait for spamoor to finish sending all transactions waitCtx, cancel := context.WithTimeout(ctx, waitTimeout) defer cancel() sent, failed, err := waitForSpamoorDone(waitCtx, t.Logf, e.spamoorAPI, totalCount, 2*time.Second) s.Require().NoError(err, "spamoor did not finish in time") - // allow pending txs to drain from mempool into blocks - time.Sleep(20 * time.Second) + // wait for pending txs to drain: once we see several consecutive empty + // blocks, the mempool is drained and we can stop. + drainCtx, drainCancel := context.WithTimeout(ctx, 30*time.Second) + defer drainCancel() + waitForDrain(drainCtx, t.Logf, e.ethClient, 10) wallClock := time.Since(loadStart) endHeader, err := e.ethClient.HeaderByNumber(ctx, nil) diff --git a/test/e2e/benchmark/suite_test.go b/test/e2e/benchmark/suite_test.go index b96cc67b3a..f22d522478 100644 --- a/test/e2e/benchmark/suite_test.go +++ b/test/e2e/benchmark/suite_test.go @@ -75,6 +75,8 @@ func (s *SpamoorSuite) setupEnv(cfg config) *env { "OTEL_EXPORTER_OTLP_TRACES_ENDPOINT="+jg.Internal.IngestHTTPEndpoint()+"/v1/traces", "OTEL_EXPORTER_OTLP_PROTOCOL=http", "OTEL_EXPORTER_OTLP_TRACES_PROTOCOL=http", + "OTEL_TRACES_SAMPLER=parentbased_traceidratio", + "OTEL_TRACES_SAMPLER_ARG=0.1", "RUST_LOG=debug", "OTEL_SDK_DISABLED=false", ) @@ -87,7 +89,7 @@ func (s *SpamoorSuite) setupEnv(cfg config) *env { e2e.SetupSequencerNode(t, sut, sequencerHome, evmEnv.SequencerJWT, evmEnv.GenesisHash, evmEnv.Endpoints, "--evnode.instrumentation.tracing=true", "--evnode.instrumentation.tracing_endpoint", otlpHTTP, - "--evnode.instrumentation.tracing_sample_rate", "1.0", + "--evnode.instrumentation.tracing_sample_rate", "0.1", "--evnode.instrumentation.tracing_service_name", cfg.serviceName, ) t.Log("sequencer node is up") From e4e06c5f232361ffc58545c9a0cc45f17d28e732 Mon Sep 17 00:00:00 2001 From: chatton Date: Mon, 2 Mar 2026 09:36:47 +0000 Subject: [PATCH 09/19] fix: address PR review feedback for benchmark suite - make reth tag configurable via EV_RETH_TAG env var (default pr-140) - fix OTLP config: remove duplicate env vars, use http/protobuf protocol - use require.Eventually for host readiness polling - rename requireHTTP to requireHostUp - use non-fatal logging in resultWriter.flush deferred context - fix stale doc comment (setupCommonEVMEnv -> SetupCommonEVMEnv) - rename loop variable to avoid shadowing testing.TB convention - add block/internal/executing/** to CI path trigger - remove unused require import from output.go --- .github/workflows/benchmark.yml | 1 + test/e2e/benchmark/metrics.go | 27 +++++++++--------------- test/e2e/benchmark/output.go | 11 +++++++--- test/e2e/benchmark/spamoor_smoke_test.go | 8 ++++++- test/e2e/benchmark/suite_test.go | 9 ++++---- test/e2e/benchmark/traces.go | 4 ++-- test/e2e/evm_test_common.go | 2 +- 7 files changed, 34 insertions(+), 28 deletions(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index b3a157a0b1..203ff03e22 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -13,6 +13,7 @@ permissions: {} - 'test/e2e/evm_contract_bench_test.go' - 'test/e2e/evm_test_common.go' - 'test/e2e/sut_helper.go' + - 'block/internal/executing/**' - '.github/workflows/benchmark.yml' workflow_dispatch: diff --git a/test/e2e/benchmark/metrics.go b/test/e2e/benchmark/metrics.go index fcc51af865..24ddd7506a 100644 --- a/test/e2e/benchmark/metrics.go +++ b/test/e2e/benchmark/metrics.go @@ -3,34 +3,27 @@ package benchmark import ( - "fmt" "net/http" "testing" "time" + "github.com/stretchr/testify/require" + dto "github.com/prometheus/client_model/go" ) -// requireHTTP polls a URL until it returns a 2xx status code or the timeout expires. -func requireHTTP(t testing.TB, url string, timeout time.Duration) { +// requireHostUp polls a URL until it returns a 2xx status code or the timeout expires. +func requireHostUp(t testing.TB, url string, timeout time.Duration) { t.Helper() client := &http.Client{Timeout: 200 * time.Millisecond} - deadline := time.Now().Add(timeout) - var lastErr error - for time.Now().Before(deadline) { + require.Eventually(t, func() bool { resp, err := client.Get(url) - if err == nil { - _ = resp.Body.Close() - if resp.StatusCode >= 200 && resp.StatusCode < 300 { - return - } - lastErr = fmt.Errorf("status %d", resp.StatusCode) - } else { - lastErr = err + if err != nil { + return false } - time.Sleep(100 * time.Millisecond) - } - t.Fatalf("daemon not ready at %s: %v", url, lastErr) + _ = resp.Body.Close() + return resp.StatusCode >= 200 && resp.StatusCode < 300 + }, timeout, 100*time.Millisecond, "daemon not ready at %s", url) } // sumCounter sums all counter values in a prometheus MetricFamily. diff --git a/test/e2e/benchmark/output.go b/test/e2e/benchmark/output.go index 0d05ebd4ec..39ccbe6d69 100644 --- a/test/e2e/benchmark/output.go +++ b/test/e2e/benchmark/output.go @@ -10,7 +10,6 @@ import ( "testing" e2e "github.com/evstack/ev-node/test/e2e" - "github.com/stretchr/testify/require" ) // entry matches the customSmallerIsBetter format for github-action-benchmark. @@ -71,7 +70,13 @@ func (w *resultWriter) flush() { } data, err := json.MarshalIndent(w.entries, "", " ") - require.NoError(w.t, err, "failed to marshal benchmark JSON") - require.NoError(w.t, os.WriteFile(outputPath, data, 0644), "failed to write benchmark JSON to %s", outputPath) + if err != nil { + w.t.Logf("WARNING: failed to marshal benchmark JSON: %v", err) + return + } + if err := os.WriteFile(outputPath, data, 0644); err != nil { + w.t.Logf("WARNING: failed to write benchmark JSON to %s: %v", outputPath, err) + return + } w.t.Logf("wrote %d benchmark entries to %s", len(w.entries), outputPath) } diff --git a/test/e2e/benchmark/spamoor_smoke_test.go b/test/e2e/benchmark/spamoor_smoke_test.go index 1f56f26874..fa62583736 100644 --- a/test/e2e/benchmark/spamoor_smoke_test.go +++ b/test/e2e/benchmark/spamoor_smoke_test.go @@ -3,6 +3,7 @@ package benchmark import ( + "os" "time" "github.com/celestiaorg/tastora/framework/docker/evstack/spamoor" @@ -17,8 +18,13 @@ func (s *SpamoorSuite) TestSpamoorSmoke() { w := newResultWriter(t, "SpamoorSmoke") defer w.flush() + // TODO: temporary hardcoded tag, will be replaced with a proper release tag + rethTag := os.Getenv("EV_RETH_TAG") + if rethTag == "" { + rethTag = "pr-140" + } e := s.setupEnv(config{ - rethTag: "pr-140", + rethTag: rethTag, serviceName: "ev-node-smoke", }) api := e.spamoorAPI diff --git a/test/e2e/benchmark/suite_test.go b/test/e2e/benchmark/suite_test.go index a43f25890c..bed6b6c19e 100644 --- a/test/e2e/benchmark/suite_test.go +++ b/test/e2e/benchmark/suite_test.go @@ -71,10 +71,11 @@ func (s *SpamoorSuite) setupEnv(cfg config) *env { evmEnv := e2e.SetupCommonEVMEnv(t, sut, s.dockerCli, s.networkID, e2e.WithRethOpts(func(b *reth.NodeBuilder) { b.WithTag(cfg.rethTag).WithEnv( + // ev-reth reads OTEL_EXPORTER_OTLP_ENDPOINT and passes it directly + // to with_endpoint(). opentelemetry-otlp v0.31 HTTP exporter does + // not auto-append /v1/traces, so the full path is required. "OTEL_EXPORTER_OTLP_ENDPOINT="+jg.Internal.IngestHTTPEndpoint()+"/v1/traces", - "OTEL_EXPORTER_OTLP_TRACES_ENDPOINT="+jg.Internal.IngestHTTPEndpoint()+"/v1/traces", - "OTEL_EXPORTER_OTLP_PROTOCOL=http", - "OTEL_EXPORTER_OTLP_TRACES_PROTOCOL=http", + "OTEL_EXPORTER_OTLP_PROTOCOL=http/protobuf", "RUST_LOG=debug", "OTEL_SDK_DISABLED=false", ) @@ -117,7 +118,7 @@ func (s *SpamoorSuite) setupEnv(cfg config) *env { spInfo, err := spNode.GetNetworkInfo(ctx) s.Require().NoError(err, "failed to get spamoor network info") apiAddr := "http://127.0.0.1:" + spInfo.External.Ports.HTTP - requireHTTP(t, apiAddr+"/api/spammers", 30*time.Second) + requireHostUp(t, apiAddr+"/api/spammers", 30*time.Second) return &env{ jaeger: jg, diff --git a/test/e2e/benchmark/traces.go b/test/e2e/benchmark/traces.go index 76f160dfa2..459dd16a1a 100644 --- a/test/e2e/benchmark/traces.go +++ b/test/e2e/benchmark/traces.go @@ -22,8 +22,8 @@ func (j jaegerSpan) SpanDuration() time.Duration { return time.Duration(j.durati // extractSpansFromTraces walks Jaeger's []any response and pulls out span operation names and durations. func extractSpansFromTraces(traces []any) []jaegerSpan { var out []jaegerSpan - for _, t := range traces { - traceMap, ok := t.(map[string]any) + for _, traceEntry := range traces { + traceMap, ok := traceEntry.(map[string]any) if !ok { continue } diff --git a/test/e2e/evm_test_common.go b/test/e2e/evm_test_common.go index 161ff89349..3ec25b6633 100644 --- a/test/e2e/evm_test_common.go +++ b/test/e2e/evm_test_common.go @@ -529,7 +529,7 @@ func WithRethOpts(opts ...evmtest.RethNodeOpt) SetupOpt { } } -// setupCommonEVMEnv creates and initializes ev-reth instances, while also initializing the local ev-node instance +// SetupCommonEVMEnv creates and initializes ev-reth instances, while also initializing the local ev-node instance // managed by sut. If a full node is also required, we can use the WithFullNode() additional option. func SetupCommonEVMEnv(t testing.TB, sut *SystemUnderTest, client tastoratypes.TastoraDockerClient, networkID string, opts ...SetupOpt) *EVMEnv { t.Helper() From 03b9239a074c54675fad96918fbcd21d9711856b Mon Sep 17 00:00:00 2001 From: chatton Date: Mon, 2 Mar 2026 09:50:57 +0000 Subject: [PATCH 10/19] chore: specify http --- test/e2e/benchmark/suite_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/e2e/benchmark/suite_test.go b/test/e2e/benchmark/suite_test.go index bed6b6c19e..34c0bc57a1 100644 --- a/test/e2e/benchmark/suite_test.go +++ b/test/e2e/benchmark/suite_test.go @@ -75,7 +75,7 @@ func (s *SpamoorSuite) setupEnv(cfg config) *env { // to with_endpoint(). opentelemetry-otlp v0.31 HTTP exporter does // not auto-append /v1/traces, so the full path is required. "OTEL_EXPORTER_OTLP_ENDPOINT="+jg.Internal.IngestHTTPEndpoint()+"/v1/traces", - "OTEL_EXPORTER_OTLP_PROTOCOL=http/protobuf", + "OTEL_EXPORTER_OTLP_PROTOCOL=http", "RUST_LOG=debug", "OTEL_SDK_DISABLED=false", ) From fe3ca23cc6aee89ec481078447fb4ba0e60f2491 Mon Sep 17 00:00:00 2001 From: chatton Date: Mon, 2 Mar 2026 10:17:50 +0000 Subject: [PATCH 11/19] chore: filter out benchmark tests from test-e2e --- .just/test.just | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.just/test.just b/.just/test.just index c0360d43fb..2b35df6e4e 100644 --- a/.just/test.just +++ b/.just/test.just @@ -25,7 +25,7 @@ test-integration: [group('test')] test-e2e: build build-da build-evm docker-build-if-local @echo "--> Running e2e tests" - @cd test/e2e && go test -mod=readonly -failfast -timeout=15m -tags='e2e evm' ./... --binary=../../build/testapp --evm-binary=../../build/evm + @cd test/e2e && go test -mod=readonly -failfast -timeout=15m -tags='e2e evm' $(go list -tags='e2e evm' ./... | grep -v /benchmark) --binary=../../build/testapp --evm-binary=../../build/evm # Run integration tests with coverage [group('test')] From 054f2c62d25663528ddb8cf2ad9f772bd04adef4 Mon Sep 17 00:00:00 2001 From: chatton Date: Mon, 2 Mar 2026 12:51:43 +0000 Subject: [PATCH 12/19] refactor: centralize reth config and lower ERC20 spammer count move EV_RETH_TAG resolution and rpc connection limits into setupEnv so all benchmark tests share the same reth configuration. lower ERC20 spammer count from 5 to 2 to reduce resource contention on local hardware while keeping the loop for easy scaling on dedicated infra. --- test/e2e/benchmark/spamoor_erc20_test.go | 3 +- test/e2e/benchmark/spamoor_smoke_test.go | 7 ----- test/e2e/benchmark/suite_test.go | 36 +++++++++++++++++------- 3 files changed, 27 insertions(+), 19 deletions(-) diff --git a/test/e2e/benchmark/spamoor_erc20_test.go b/test/e2e/benchmark/spamoor_erc20_test.go index c9ba95b375..10c6271179 100644 --- a/test/e2e/benchmark/spamoor_erc20_test.go +++ b/test/e2e/benchmark/spamoor_erc20_test.go @@ -18,7 +18,7 @@ import ( // Diagnostic metrics: per-span latency breakdown, gas/block, tx/block. func (s *SpamoorSuite) TestERC20Throughput() { const ( - numSpammers = 5 + numSpammers = 2 countPerSpammer = 50000 totalCount = numSpammers * countPerSpammer serviceName = "ev-node-erc20" @@ -31,7 +31,6 @@ func (s *SpamoorSuite) TestERC20Throughput() { defer w.flush() e := s.setupEnv(config{ - rethTag: "pr-140", serviceName: serviceName, }) diff --git a/test/e2e/benchmark/spamoor_smoke_test.go b/test/e2e/benchmark/spamoor_smoke_test.go index fa62583736..82c8bbf375 100644 --- a/test/e2e/benchmark/spamoor_smoke_test.go +++ b/test/e2e/benchmark/spamoor_smoke_test.go @@ -3,7 +3,6 @@ package benchmark import ( - "os" "time" "github.com/celestiaorg/tastora/framework/docker/evstack/spamoor" @@ -18,13 +17,7 @@ func (s *SpamoorSuite) TestSpamoorSmoke() { w := newResultWriter(t, "SpamoorSmoke") defer w.flush() - // TODO: temporary hardcoded tag, will be replaced with a proper release tag - rethTag := os.Getenv("EV_RETH_TAG") - if rethTag == "" { - rethTag = "pr-140" - } e := s.setupEnv(config{ - rethTag: rethTag, serviceName: "ev-node-smoke", }) api := e.spamoorAPI diff --git a/test/e2e/benchmark/suite_test.go b/test/e2e/benchmark/suite_test.go index db8854b363..410f815500 100644 --- a/test/e2e/benchmark/suite_test.go +++ b/test/e2e/benchmark/suite_test.go @@ -4,6 +4,7 @@ package benchmark import ( "context" + "os" "path/filepath" "testing" "time" @@ -48,10 +49,19 @@ type env struct { // config parameterizes the per-test environment setup. type config struct { - rethTag string serviceName string } +// TODO: temporary hardcoded tag, will be replaced with a proper release tag +const defaultRethTag = "pr-140" + +func rethTag() string { + if tag := os.Getenv("EV_RETH_TAG"); tag != "" { + return tag + } + return defaultRethTag +} + // setupEnv creates a Jaeger + reth + sequencer + Spamoor environment for // a single test. Each call spins up isolated infrastructure so tests // can't interfere with each other. @@ -70,15 +80,21 @@ func (s *SpamoorSuite) setupEnv(cfg config) *env { // reth + local DA with OTLP tracing to Jaeger evmEnv := e2e.SetupCommonEVMEnv(t, sut, s.dockerCli, s.networkID, e2e.WithRethOpts(func(b *reth.NodeBuilder) { - b.WithTag(cfg.rethTag).WithEnv( - // ev-reth reads OTEL_EXPORTER_OTLP_ENDPOINT and passes it directly - // to with_endpoint(). opentelemetry-otlp v0.31 HTTP exporter does - // not auto-append /v1/traces, so the full path is required. - "OTEL_EXPORTER_OTLP_ENDPOINT="+jg.Internal.IngestHTTPEndpoint()+"/v1/traces", - "OTEL_EXPORTER_OTLP_PROTOCOL=http", - "RUST_LOG=debug", - "OTEL_SDK_DISABLED=false", - ) + b.WithTag(rethTag()). + // increase values to facilitate spamoor. + WithAdditionalStartArgs( + "--rpc.max-connections", "5000", + "--rpc.max-tracing-requests", "1000", + ). + WithEnv( + // ev-reth reads OTEL_EXPORTER_OTLP_ENDPOINT and passes it directly + // to with_endpoint(). opentelemetry-otlp v0.31 HTTP exporter does + // not auto-append /v1/traces, so the full path is required. + "OTEL_EXPORTER_OTLP_ENDPOINT="+jg.Internal.IngestHTTPEndpoint()+"/v1/traces", + "OTEL_EXPORTER_OTLP_PROTOCOL=http", + "RUST_LOG=debug", + "OTEL_SDK_DISABLED=false", + ) }), ) From 26bb1170356edc5d4ebee55b4dce511892f4222e Mon Sep 17 00:00:00 2001 From: chatton Date: Mon, 2 Mar 2026 13:11:38 +0000 Subject: [PATCH 13/19] chore: collect all traces at once --- test/e2e/benchmark/suite_test.go | 35 +++++++++----------------------- 1 file changed, 10 insertions(+), 25 deletions(-) diff --git a/test/e2e/benchmark/suite_test.go b/test/e2e/benchmark/suite_test.go index 410f815500..7201bde991 100644 --- a/test/e2e/benchmark/suite_test.go +++ b/test/e2e/benchmark/suite_test.go @@ -104,6 +104,7 @@ func (s *SpamoorSuite) setupEnv(cfg config) *env { e2e.SetupSequencerNode(t, sut, sequencerHome, evmEnv.SequencerJWT, evmEnv.GenesisHash, evmEnv.Endpoints, "--evnode.instrumentation.tracing=true", "--evnode.instrumentation.tracing_endpoint", otlpHTTP, + // TODO: setting this to 1 produced too many spans for the local Jaeger deployment alongside everything else. "--evnode.instrumentation.tracing_sample_rate", "0.1", "--evnode.instrumentation.tracing_service_name", cfg.serviceName, ) @@ -161,9 +162,8 @@ func (s *SpamoorSuite) collectServiceTraces(e *env, serviceName string) []e2e.Tr return toTraceSpans(extractSpansFromTraces(traces)) } -// tryCollectServiceTraces attempts to fetch all traces by requesting them in -// batches to avoid overwhelming Jaeger with a single large response. -// Returns nil instead of failing the test if traces are unavailable. +// tryCollectServiceTraces fetches traces from Jaeger for the given service, +// returning nil instead of failing the test if traces are unavailable. func (s *SpamoorSuite) tryCollectServiceTraces(e *env, serviceName string) []e2e.TraceSpan { t := s.T() ctx, cancel := context.WithTimeout(t.Context(), 3*time.Minute) @@ -175,28 +175,13 @@ func (s *SpamoorSuite) tryCollectServiceTraces(e *env, serviceName string) []e2e return nil } - const batchSize = 200 - var allSpans []e2e.TraceSpan - for batch := 0; ; batch++ { - traces, err := e.jaeger.External.Traces(ctx, serviceName, batchSize) - if err != nil { - if batch == 0 { - t.Logf("warning: failed to fetch %s traces: %v", serviceName, err) - return nil - } - t.Logf("warning: %s trace fetch stopped after %d batches (%d spans): %v", serviceName, batch, len(allSpans), err) - break - } - spans := toTraceSpans(extractSpansFromTraces(traces)) - if len(spans) == 0 { - break - } - allSpans = append(allSpans, spans...) - if len(traces) < batchSize { - break - } + traces, err := e.jaeger.External.Traces(ctx, serviceName, 10000) + if err != nil { + t.Logf("warning: failed to fetch %s traces: %v", serviceName, err) + return nil } - t.Logf("collected %d %s spans", len(allSpans), serviceName) - return allSpans + spans := toTraceSpans(extractSpansFromTraces(traces)) + t.Logf("collected %d %s spans", len(spans), serviceName) + return spans } From b88cae3a6860a02d415f3ea34a654f215fc64867 Mon Sep 17 00:00:00 2001 From: chatton Date: Mon, 2 Mar 2026 13:21:24 +0000 Subject: [PATCH 14/19] chore: self review --- test/e2e/benchmark/helpers.go | 1 + test/e2e/benchmark/spamoor_smoke_test.go | 24 +++++++++++++++--------- test/e2e/go.mod | 3 +-- test/e2e/go.sum | 2 ++ 4 files changed, 19 insertions(+), 11 deletions(-) diff --git a/test/e2e/benchmark/helpers.go b/test/e2e/benchmark/helpers.go index bfc83a9eee..5f26a404ab 100644 --- a/test/e2e/benchmark/helpers.go +++ b/test/e2e/benchmark/helpers.go @@ -146,6 +146,7 @@ func waitForSpamoorDone(ctx context.Context, log func(string, ...any), api *spam case <-ticker.C: metrics, mErr := api.GetMetrics() if mErr != nil { + log("failed to get spamoor metrics: %v", mErr) continue } sent = sumCounter(metrics["spamoor_transactions_sent_total"]) diff --git a/test/e2e/benchmark/spamoor_smoke_test.go b/test/e2e/benchmark/spamoor_smoke_test.go index 82c8bbf375..8b0dbab35b 100644 --- a/test/e2e/benchmark/spamoor_smoke_test.go +++ b/test/e2e/benchmark/spamoor_smoke_test.go @@ -22,6 +22,8 @@ func (s *SpamoorSuite) TestSpamoorSmoke() { }) api := e.spamoorAPI + s.Require().NoError(deleteAllSpammers(api), "failed to delete stale spammers") + eoatx := map[string]any{ "throughput": 100, "total_count": 3000, @@ -76,9 +78,11 @@ func (s *SpamoorSuite) TestSpamoorSmoke() { // collect traces evNodeSpans := s.collectServiceTraces(e, "ev-node-smoke") - evRethSpans := s.collectServiceTraces(e, "ev-reth") + evRethSpans := s.tryCollectServiceTraces(e, "ev-reth") e2e.PrintTraceReport(t, "ev-node-smoke", evNodeSpans) - e2e.PrintTraceReport(t, "ev-reth", evRethSpans) + if len(evRethSpans) > 0 { + e2e.PrintTraceReport(t, "ev-reth", evRethSpans) + } w.addSpans(append(evNodeSpans, evRethSpans...)) @@ -100,13 +104,15 @@ func (s *SpamoorSuite) TestSpamoorSmoke() { "DA.Submit", }, "ev-node-smoke") - // assert expected ev-reth span names - assertSpanNames(t, evRethSpans, []string{ - "build_payload", - "execute_tx", - "try_build", - "validate_transaction", - }, "ev-reth") + // assert expected ev-reth span names when traces are available + if len(evRethSpans) > 0 { + assertSpanNames(t, evRethSpans, []string{ + "build_payload", + "execute_tx", + "try_build", + "validate_transaction", + }, "ev-reth") + } s.Require().Greater(sent, float64(0), "at least one transaction should have been sent") s.Require().Zero(fail, "no transactions should have failed") diff --git a/test/e2e/go.mod b/test/e2e/go.mod index f2a24355dc..784e92e5a1 100644 --- a/test/e2e/go.mod +++ b/test/e2e/go.mod @@ -5,7 +5,7 @@ go 1.25.6 require ( cosmossdk.io/math v1.5.3 github.com/celestiaorg/go-square/v3 v3.0.2 - github.com/celestiaorg/tastora v0.16.0 + github.com/celestiaorg/tastora v0.16.1-0.20260302131806-2816c7b82bfb github.com/cosmos/cosmos-sdk v0.53.6 github.com/cosmos/ibc-go/v8 v8.8.0 github.com/ethereum/go-ethereum v1.17.0 @@ -22,7 +22,6 @@ require ( ) replace ( - github.com/celestiaorg/tastora => /Users/chatton/checkouts/celestiaorg/tastora github.com/evstack/ev-node => ../../ github.com/evstack/ev-node/core => ../../core github.com/evstack/ev-node/execution/evm => ../../execution/evm diff --git a/test/e2e/go.sum b/test/e2e/go.sum index 0d66192173..588dd5b985 100644 --- a/test/e2e/go.sum +++ b/test/e2e/go.sum @@ -145,6 +145,8 @@ github.com/celestiaorg/go-square/v3 v3.0.2 h1:eSQOgNII8inK9IhiBZ+6GADQeWbRq4HYY7 github.com/celestiaorg/go-square/v3 v3.0.2/go.mod h1:oFReMLsSDMRs82ICFEeFQFCqNvwdsbIM1BzCcb0f7dM= github.com/celestiaorg/nmt v0.24.2 h1:LlpJSPOd6/Lw1Ig6HUhZuqiINHLka/ZSRTBzlNJpchg= github.com/celestiaorg/nmt v0.24.2/go.mod h1:vgLBpWBi8F5KLxTdXSwb7AU4NhiIQ1AQRGa+PzdcLEA= +github.com/celestiaorg/tastora v0.16.1-0.20260302131806-2816c7b82bfb h1:rTVChmvo3+SYi0dwd5W2yz2zyuAid8cXVAeROEY5ecs= +github.com/celestiaorg/tastora v0.16.1-0.20260302131806-2816c7b82bfb/go.mod h1:C867PBm6Ne6e/1JlmsRqcLeJ6RHAuMoMRCvwJzV/q8g= github.com/cenkalti/backoff v2.2.1+incompatible h1:tNowT99t7UNflLxfYYSlKYsBpXdEet03Pg2g16Swow4= github.com/cenkalti/backoff v2.2.1+incompatible/go.mod h1:90ReRw6GdpyfrHakVjL/QHaoyV4aDUVVkXQJJJ3NXXM= github.com/cenkalti/backoff/v4 v4.1.1/go.mod h1:scbssz8iZGpm3xbr14ovlUdkxfGXNInqkPWOWmG2CLw= From 272ab711b7a10a9347c697439ce12e44386a4b8c Mon Sep 17 00:00:00 2001 From: chatton Date: Mon, 2 Mar 2026 13:54:08 +0000 Subject: [PATCH 15/19] refactor: extract benchmark helpers to slim down ERC20 test body - add blockMetricsSummary with summarize(), log(), and entries() methods - add evNodeOverhead() for computing ProduceBlock vs ExecuteTxs overhead - add collectTraces() suite method to deduplicate trace collection pattern - add addEntries() convenience method on resultWriter - slim TestERC20Throughput from ~217 to ~119 lines - reuse collectTraces in TestSpamoorSmoke --- test/e2e/benchmark/helpers.go | 100 +++++++++++++++++++ test/e2e/benchmark/output.go | 5 + test/e2e/benchmark/spamoor_erc20_test.go | 118 ++--------------------- test/e2e/benchmark/spamoor_smoke_test.go | 17 +--- test/e2e/benchmark/suite_test.go | 26 +++++ 5 files changed, 146 insertions(+), 120 deletions(-) diff --git a/test/e2e/benchmark/helpers.go b/test/e2e/benchmark/helpers.go index 5f26a404ab..136f5daac4 100644 --- a/test/e2e/benchmark/helpers.go +++ b/test/e2e/benchmark/helpers.go @@ -8,10 +8,12 @@ import ( "math" "math/big" "sort" + "testing" "time" "github.com/celestiaorg/tastora/framework/docker/evstack/spamoor" "github.com/ethereum/go-ethereum/ethclient" + e2e "github.com/evstack/ev-node/test/e2e" ) // blockMetrics holds aggregated gas and transaction data across a range of blocks. @@ -225,6 +227,104 @@ func waitForDrain(ctx context.Context, log func(string, ...any), client *ethclie } } +// blockMetricsSummary holds the computed stats from a blockMetrics measurement window. +type blockMetricsSummary struct { + SteadyState time.Duration + AchievedMGas float64 + AchievedTPS float64 + IntervalP50 time.Duration + IntervalP99 time.Duration + IntervalMax time.Duration + GasP50 float64 + GasP99 float64 + TxP50 float64 + TxP99 float64 + AvgGas float64 + AvgTx float64 + BlocksPerSec float64 + NonEmptyRatio float64 +} + +// summarize computes all derived stats from the raw block metrics. +func (m *blockMetrics) summarize() *blockMetricsSummary { + ss := m.steadyStateDuration() + intervalP50, intervalP99, intervalMax := m.blockIntervalStats() + gasP50, gasP99 := m.gasPerBlockStats() + txP50, txP99 := m.txPerBlockStats() + + var blocksPerSec float64 + if ss > 0 { + blocksPerSec = float64(m.BlockCount) / ss.Seconds() + } + + return &blockMetricsSummary{ + SteadyState: ss, + AchievedMGas: mgasPerSec(m.TotalGasUsed, ss), + AchievedTPS: float64(m.TotalTxCount) / ss.Seconds(), + IntervalP50: intervalP50, + IntervalP99: intervalP99, + IntervalMax: intervalMax, + GasP50: gasP50, + GasP99: gasP99, + TxP50: txP50, + TxP99: txP99, + AvgGas: m.avgGasPerBlock(), + AvgTx: m.avgTxPerBlock(), + BlocksPerSec: blocksPerSec, + NonEmptyRatio: m.nonEmptyRatio(), + } +} + +// log prints a concise summary of the block metrics to the test log. +func (s *blockMetricsSummary) log(t testing.TB, startBlock, endBlock uint64, totalBlocks, nonEmptyBlocks int, wallClock time.Duration) { + t.Logf("block range: %d-%d (%d total, %d non-empty, %.1f%% non-empty)", + startBlock, endBlock, totalBlocks, nonEmptyBlocks, s.NonEmptyRatio) + t.Logf("block intervals: p50=%s, p99=%s, max=%s", + s.IntervalP50.Round(time.Millisecond), s.IntervalP99.Round(time.Millisecond), s.IntervalMax.Round(time.Millisecond)) + t.Logf("gas/block (non-empty): avg=%.0f, p50=%.0f, p99=%.0f", s.AvgGas, s.GasP50, s.GasP99) + t.Logf("tx/block (non-empty): avg=%.1f, p50=%.0f, p99=%.0f", s.AvgTx, s.TxP50, s.TxP99) + t.Logf("throughput: %.2f MGas/s, %.1f TPS over %s steady-state (%s wall clock)", + s.AchievedMGas, s.AchievedTPS, s.SteadyState.Round(time.Millisecond), wallClock.Round(time.Millisecond)) +} + +// entries returns all summary metrics as result writer entries with the given prefix. +func (s *blockMetricsSummary) entries(prefix string) []entry { + return []entry{ + {Name: prefix + " - MGas/s", Unit: "MGas/s", Value: s.AchievedMGas}, + {Name: prefix + " - TPS", Unit: "tx/s", Value: s.AchievedTPS}, + {Name: prefix + " - avg gas/block", Unit: "gas", Value: s.AvgGas}, + {Name: prefix + " - avg tx/block", Unit: "count", Value: s.AvgTx}, + {Name: prefix + " - blocks/s", Unit: "blocks/s", Value: s.BlocksPerSec}, + {Name: prefix + " - non-empty block ratio", Unit: "%", Value: s.NonEmptyRatio}, + {Name: prefix + " - block interval p50", Unit: "ms", Value: float64(s.IntervalP50.Milliseconds())}, + {Name: prefix + " - block interval p99", Unit: "ms", Value: float64(s.IntervalP99.Milliseconds())}, + {Name: prefix + " - gas/block p50", Unit: "gas", Value: s.GasP50}, + {Name: prefix + " - gas/block p99", Unit: "gas", Value: s.GasP99}, + {Name: prefix + " - tx/block p50", Unit: "count", Value: s.TxP50}, + {Name: prefix + " - tx/block p99", Unit: "count", Value: s.TxP99}, + } +} + +// evNodeOverhead computes the overhead percentage of ev-node's ProduceBlock +// over the inner ExecuteTxs span. Returns the overhead and whether it was computable. +func evNodeOverhead(spans []e2e.TraceSpan) (float64, bool) { + stats := e2e.AggregateSpanStats(spans) + produce, ok := stats["BlockExecutor.ProduceBlock"] + if !ok { + return 0, false + } + execute, ok := stats["Executor.ExecuteTxs"] + if !ok { + return 0, false + } + produceAvg := float64(produce.Total.Microseconds()) / float64(produce.Count) + executeAvg := float64(execute.Total.Microseconds()) / float64(execute.Count) + if produceAvg <= 0 { + return 0, false + } + return (produceAvg - executeAvg) / produceAvg * 100, true +} + // collectBlockMetrics iterates all headers in [startBlock, endBlock] to collect // gas and transaction metrics. Empty blocks are skipped for gas/tx aggregation // but included in block interval tracking. diff --git a/test/e2e/benchmark/output.go b/test/e2e/benchmark/output.go index 39ccbe6d69..06c60e7646 100644 --- a/test/e2e/benchmark/output.go +++ b/test/e2e/benchmark/output.go @@ -61,6 +61,11 @@ func (w *resultWriter) addEntry(e entry) { w.entries = append(w.entries, e) } +// addEntries appends multiple entries to the results. +func (w *resultWriter) addEntries(entries []entry) { + w.entries = append(w.entries, entries...) +} + // flush writes accumulated entries to the path in BENCH_JSON_OUTPUT. // It is a no-op when the env var is unset or no entries were added. func (w *resultWriter) flush() { diff --git a/test/e2e/benchmark/spamoor_erc20_test.go b/test/e2e/benchmark/spamoor_erc20_test.go index 10c6271179..357c5c4f3f 100644 --- a/test/e2e/benchmark/spamoor_erc20_test.go +++ b/test/e2e/benchmark/spamoor_erc20_test.go @@ -8,7 +8,6 @@ import ( "time" "github.com/celestiaorg/tastora/framework/docker/evstack/spamoor" - e2e "github.com/evstack/ev-node/test/e2e" ) // TestERC20Throughput measures ERC-20 token transfer throughput in isolation. @@ -86,124 +85,27 @@ func (s *SpamoorSuite) TestERC20Throughput() { bm, err := collectBlockMetrics(ctx, e.ethClient, startBlock, endBlock) s.Require().NoError(err, "failed to collect block metrics") - // use steady-state window (first active block to last active block) for - // throughput calculation, excluding warm-up and cool-down periods - steadyState := bm.steadyStateDuration() - s.Require().Greater(steadyState, time.Duration(0), "expected non-zero steady-state duration") - - achievedMGas := mgasPerSec(bm.TotalGasUsed, steadyState) - achievedTPS := float64(bm.TotalTxCount) / steadyState.Seconds() - - intervalP50, intervalP99, intervalMax := bm.blockIntervalStats() - gasP50, gasP99 := bm.gasPerBlockStats() - txP50, txP99 := bm.txPerBlockStats() - - t.Logf("block range: %d-%d (%d total, %d non-empty, %.1f%% non-empty)", - startBlock, endBlock, bm.TotalBlockCount, bm.BlockCount, bm.nonEmptyRatio()) - t.Logf("block intervals: p50=%s, p99=%s, max=%s", - intervalP50.Round(time.Millisecond), intervalP99.Round(time.Millisecond), intervalMax.Round(time.Millisecond)) - t.Logf("gas/block (non-empty): avg=%.0f, p50=%.0f, p99=%.0f", bm.avgGasPerBlock(), gasP50, gasP99) - t.Logf("tx/block (non-empty): avg=%.1f, p50=%.0f, p99=%.0f", bm.avgTxPerBlock(), txP50, txP99) - t.Logf("ERC20 throughput: %.2f MGas/s, %.1f TPS over %s steady-state (%s wall clock)", - achievedMGas, achievedTPS, steadyState.Round(time.Millisecond), wallClock.Round(time.Millisecond)) + summary := bm.summarize() + s.Require().Greater(summary.SteadyState, time.Duration(0), "expected non-zero steady-state duration") + summary.log(t, startBlock, endBlock, bm.TotalBlockCount, bm.BlockCount, wallClock) // collect and report traces - evNodeSpans := s.collectServiceTraces(e, serviceName) - evRethSpans := s.tryCollectServiceTraces(e, "ev-reth") - e2e.PrintTraceReport(t, serviceName, evNodeSpans) - if len(evRethSpans) > 0 { - e2e.PrintTraceReport(t, "ev-reth", evRethSpans) - } + traces := s.collectTraces(e, serviceName) - // compute ev-node overhead ratio - spanStats := e2e.AggregateSpanStats(evNodeSpans) - if produceBlock, ok := spanStats["BlockExecutor.ProduceBlock"]; ok { - if executeTxs, ok2 := spanStats["Executor.ExecuteTxs"]; ok2 { - produceAvg := float64(produceBlock.Total.Microseconds()) / float64(produceBlock.Count) - executeAvg := float64(executeTxs.Total.Microseconds()) / float64(executeTxs.Count) - if produceAvg > 0 { - overhead := (produceAvg - executeAvg) / produceAvg * 100 - t.Logf("ev-node overhead: %.1f%%", overhead) - w.addEntry(entry{ - Name: "ERC20Throughput - ev-node overhead", - Unit: "%", - Value: overhead, - }) - } - } + if overhead, ok := evNodeOverhead(traces.evNode); ok { + t.Logf("ev-node overhead: %.1f%%", overhead) + w.addEntry(entry{Name: "ERC20Throughput - ev-node overhead", Unit: "%", Value: overhead}) } - // write all metrics to benchmark output - w.addEntry(entry{ - Name: "ERC20Throughput - MGas/s", - Unit: "MGas/s", - Value: achievedMGas, - }) - w.addEntry(entry{ - Name: "ERC20Throughput - TPS", - Unit: "tx/s", - Value: achievedTPS, - }) - w.addEntry(entry{ - Name: "ERC20Throughput - avg gas/block", - Unit: "gas", - Value: bm.avgGasPerBlock(), - }) - w.addEntry(entry{ - Name: "ERC20Throughput - avg tx/block", - Unit: "count", - Value: bm.avgTxPerBlock(), - }) - w.addEntry(entry{ - Name: "ERC20Throughput - blocks/s", - Unit: "blocks/s", - Value: float64(bm.BlockCount) / steadyState.Seconds(), - }) - w.addEntry(entry{ - Name: "ERC20Throughput - non-empty block ratio", - Unit: "%", - Value: bm.nonEmptyRatio(), - }) - w.addEntry(entry{ - Name: "ERC20Throughput - block interval p50", - Unit: "ms", - Value: float64(intervalP50.Milliseconds()), - }) - w.addEntry(entry{ - Name: "ERC20Throughput - block interval p99", - Unit: "ms", - Value: float64(intervalP99.Milliseconds()), - }) - w.addEntry(entry{ - Name: "ERC20Throughput - gas/block p50", - Unit: "gas", - Value: gasP50, - }) - w.addEntry(entry{ - Name: "ERC20Throughput - gas/block p99", - Unit: "gas", - Value: gasP99, - }) - w.addEntry(entry{ - Name: "ERC20Throughput - tx/block p50", - Unit: "count", - Value: txP50, - }) - w.addEntry(entry{ - Name: "ERC20Throughput - tx/block p99", - Unit: "count", - Value: txP99, - }) - - // add per-span avg latencies - w.addSpans(append(evNodeSpans, evRethSpans...)) + w.addEntries(summary.entries("ERC20Throughput")) + w.addSpans(traces.allSpans()) // assertions s.Require().Greater(sent, float64(0), "at least one transaction should have been sent") s.Require().Zero(failed, "no transactions should have failed") // assert expected ev-node spans are present - assertSpanNames(t, evNodeSpans, []string{ + assertSpanNames(t, traces.evNode, []string{ "BlockExecutor.ProduceBlock", "BlockExecutor.ApplyBlock", "Executor.ExecuteTxs", diff --git a/test/e2e/benchmark/spamoor_smoke_test.go b/test/e2e/benchmark/spamoor_smoke_test.go index 8b0dbab35b..39a89f961c 100644 --- a/test/e2e/benchmark/spamoor_smoke_test.go +++ b/test/e2e/benchmark/spamoor_smoke_test.go @@ -6,7 +6,6 @@ import ( "time" "github.com/celestiaorg/tastora/framework/docker/evstack/spamoor" - e2e "github.com/evstack/ev-node/test/e2e" ) // TestSpamoorSmoke spins up reth + sequencer and a Spamoor node, starts a few @@ -77,17 +76,11 @@ func (s *SpamoorSuite) TestSpamoorSmoke() { fail := sumCounter(metrics["spamoor_transactions_failed_total"]) // collect traces - evNodeSpans := s.collectServiceTraces(e, "ev-node-smoke") - evRethSpans := s.tryCollectServiceTraces(e, "ev-reth") - e2e.PrintTraceReport(t, "ev-node-smoke", evNodeSpans) - if len(evRethSpans) > 0 { - e2e.PrintTraceReport(t, "ev-reth", evRethSpans) - } - - w.addSpans(append(evNodeSpans, evRethSpans...)) + traces := s.collectTraces(e, "ev-node-smoke") + w.addSpans(traces.allSpans()) // assert expected ev-node span names - assertSpanNames(t, evNodeSpans, []string{ + assertSpanNames(t, traces.evNode, []string{ "BlockExecutor.ProduceBlock", "BlockExecutor.ApplyBlock", "BlockExecutor.CreateBlock", @@ -105,8 +98,8 @@ func (s *SpamoorSuite) TestSpamoorSmoke() { }, "ev-node-smoke") // assert expected ev-reth span names when traces are available - if len(evRethSpans) > 0 { - assertSpanNames(t, evRethSpans, []string{ + if len(traces.evReth) > 0 { + assertSpanNames(t, traces.evReth, []string{ "build_payload", "execute_tx", "try_build", diff --git a/test/e2e/benchmark/suite_test.go b/test/e2e/benchmark/suite_test.go index 7201bde991..bb9993c587 100644 --- a/test/e2e/benchmark/suite_test.go +++ b/test/e2e/benchmark/suite_test.go @@ -147,6 +147,32 @@ func (s *SpamoorSuite) setupEnv(cfg config) *env { } } +// traceResult holds the collected spans from ev-node and (optionally) ev-reth. +type traceResult struct { + evNode []e2e.TraceSpan + evReth []e2e.TraceSpan +} + +// allSpans returns ev-node and ev-reth spans concatenated. +func (tr *traceResult) allSpans() []e2e.TraceSpan { + return append(tr.evNode, tr.evReth...) +} + +// collectTraces fetches ev-node traces (required) and ev-reth traces (optional) +// from Jaeger, then prints reports for both. +func (s *SpamoorSuite) collectTraces(e *env, serviceName string) *traceResult { + t := s.T() + tr := &traceResult{ + evNode: s.collectServiceTraces(e, serviceName), + evReth: s.tryCollectServiceTraces(e, "ev-reth"), + } + e2e.PrintTraceReport(t, serviceName, tr.evNode) + if len(tr.evReth) > 0 { + e2e.PrintTraceReport(t, "ev-reth", tr.evReth) + } + return tr +} + // collectServiceTraces fetches traces from Jaeger for the given service and returns the spans. func (s *SpamoorSuite) collectServiceTraces(e *env, serviceName string) []e2e.TraceSpan { ctx, cancel := context.WithTimeout(s.T().Context(), 3*time.Minute) From 676e0d146d4ad909388e5c9355f93c83ee37c88b Mon Sep 17 00:00:00 2001 From: chatton Date: Mon, 2 Mar 2026 14:05:08 +0000 Subject: [PATCH 16/19] docs: add detailed documentation to benchmark helper methods --- test/e2e/benchmark/helpers.go | 123 +++++++++++++++++++++++----------- 1 file changed, 83 insertions(+), 40 deletions(-) diff --git a/test/e2e/benchmark/helpers.go b/test/e2e/benchmark/helpers.go index 136f5daac4..bdeaeb05be 100644 --- a/test/e2e/benchmark/helpers.go +++ b/test/e2e/benchmark/helpers.go @@ -17,21 +17,26 @@ import ( ) // blockMetrics holds aggregated gas and transaction data across a range of blocks. +// Only blocks containing at least one transaction are counted in BlockCount, +// GasPerBlock, and TxPerBlock. All blocks (including empty) are counted in +// TotalBlockCount and contribute to BlockIntervals. type blockMetrics struct { - StartBlock uint64 - EndBlock uint64 - BlockCount int // non-empty blocks only - TotalBlockCount int // all blocks in range including empty - TotalGasUsed uint64 - TotalTxCount int - GasPerBlock []uint64 // non-empty blocks only - TxPerBlock []int // non-empty blocks only - BlockIntervals []time.Duration // intervals between all consecutive blocks (for drift) - FirstBlockTime time.Time - LastBlockTime time.Time + StartBlock uint64 + EndBlock uint64 + BlockCount int // non-empty blocks only + TotalBlockCount int // all blocks in range including empty + TotalGasUsed uint64 // cumulative gas across non-empty blocks + TotalTxCount int // cumulative tx count across non-empty blocks + GasPerBlock []uint64 // per-block gas for non-empty blocks only + TxPerBlock []int // per-block tx count for non-empty blocks only + BlockIntervals []time.Duration // time between all consecutive blocks (including empty) + FirstBlockTime time.Time // timestamp of the first non-empty block + LastBlockTime time.Time // timestamp of the last non-empty block } -// steadyStateDuration returns the time between the first and last active blocks. +// steadyStateDuration returns the wall-clock time between the first and last +// non-empty blocks. This excludes warm-up (empty blocks before the first tx) +// and cool-down (empty blocks after the last tx) from throughput calculations. func (m *blockMetrics) steadyStateDuration() time.Duration { if m.FirstBlockTime.IsZero() || m.LastBlockTime.IsZero() { return 0 @@ -39,7 +44,8 @@ func (m *blockMetrics) steadyStateDuration() time.Duration { return m.LastBlockTime.Sub(m.FirstBlockTime) } -// avgGasPerBlock returns the mean gas used per block. +// avgGasPerBlock returns TotalGasUsed / BlockCount, i.e. the mean gas used +// per non-empty block. func (m *blockMetrics) avgGasPerBlock() float64 { if m.BlockCount == 0 { return 0 @@ -47,7 +53,8 @@ func (m *blockMetrics) avgGasPerBlock() float64 { return float64(m.TotalGasUsed) / float64(m.BlockCount) } -// avgTxPerBlock returns the mean transaction count per block. +// avgTxPerBlock returns TotalTxCount / BlockCount, i.e. the mean transaction +// count per non-empty block. func (m *blockMetrics) avgTxPerBlock() float64 { if m.BlockCount == 0 { return 0 @@ -55,7 +62,8 @@ func (m *blockMetrics) avgTxPerBlock() float64 { return float64(m.TotalTxCount) / float64(m.BlockCount) } -// nonEmptyRatio returns the percentage of blocks that contained transactions. +// nonEmptyRatio returns (BlockCount / TotalBlockCount) * 100, i.e. the +// percentage of blocks in the range that contained at least one transaction. func (m *blockMetrics) nonEmptyRatio() float64 { if m.TotalBlockCount == 0 { return 0 @@ -63,7 +71,9 @@ func (m *blockMetrics) nonEmptyRatio() float64 { return float64(m.BlockCount) / float64(m.TotalBlockCount) * 100 } -// blockIntervalStats returns p50, p99, and max of block intervals. +// blockIntervalStats computes percentile statistics over the time gaps between +// all consecutive blocks (including empty ones). This measures block production +// cadence and jitter rather than transaction throughput. func (m *blockMetrics) blockIntervalStats() (p50, p99, max time.Duration) { if len(m.BlockIntervals) == 0 { return 0, 0, 0 @@ -78,7 +88,8 @@ func (m *blockMetrics) blockIntervalStats() (p50, p99, max time.Duration) { time.Duration(sorted[len(sorted)-1]) } -// gasPerBlockStats returns p50 and p99 of gas used per non-empty block. +// gasPerBlockStats returns the 50th and 99th percentile of gas used across +// non-empty blocks. Shows the distribution of per-block gas consumption. func (m *blockMetrics) gasPerBlockStats() (p50, p99 float64) { if len(m.GasPerBlock) == 0 { return 0, 0 @@ -91,7 +102,8 @@ func (m *blockMetrics) gasPerBlockStats() (p50, p99 float64) { return percentile(sorted, 0.50), percentile(sorted, 0.99) } -// txPerBlockStats returns p50 and p99 of tx count per non-empty block. +// txPerBlockStats returns the 50th and 99th percentile of transaction counts +// across non-empty blocks. Shows the distribution of per-block tx throughput. func (m *blockMetrics) txPerBlockStats() (p50, p99 float64) { if len(m.TxPerBlock) == 0 { return 0, 0 @@ -104,8 +116,9 @@ func (m *blockMetrics) txPerBlockStats() (p50, p99 float64) { return percentile(sorted, 0.50), percentile(sorted, 0.99) } -// percentile returns the p-th percentile from a pre-sorted float64 slice -// using linear interpolation. +// percentile returns the p-th percentile from a pre-sorted float64 slice using +// linear interpolation between adjacent ranks. For example, p=0.50 returns the +// median and p=0.99 returns the value below which 99% of observations fall. func percentile(sorted []float64, p float64) float64 { if len(sorted) == 0 { return 0 @@ -123,7 +136,8 @@ func percentile(sorted []float64, p float64) float64 { return sorted[lower] + frac*(sorted[upper]-sorted[lower]) } -// mgasPerSec calculates the achieved throughput in megagas per second. +// mgasPerSec computes totalGasUsed / elapsed / 1e6, giving throughput in +// millions of gas units per second (MGas/s). func mgasPerSec(totalGasUsed uint64, elapsed time.Duration) float64 { if elapsed <= 0 { return 0 @@ -227,25 +241,41 @@ func waitForDrain(ctx context.Context, log func(string, ...any), client *ethclie } } -// blockMetricsSummary holds the computed stats from a blockMetrics measurement window. +// blockMetricsSummary holds all derived statistics from a blockMetrics measurement +// window. Every field is computed from the raw block data by summarize(). type blockMetricsSummary struct { - SteadyState time.Duration - AchievedMGas float64 - AchievedTPS float64 - IntervalP50 time.Duration - IntervalP99 time.Duration - IntervalMax time.Duration - GasP50 float64 - GasP99 float64 - TxP50 float64 - TxP99 float64 - AvgGas float64 - AvgTx float64 - BlocksPerSec float64 + // SteadyState is the wall-clock duration between the first and last non-empty + // blocks, used as the denominator for throughput calculations. + SteadyState time.Duration + // AchievedMGas is total gas / steady-state seconds / 1e6 (megagas per second). + AchievedMGas float64 + // AchievedTPS is total tx count / steady-state seconds. + AchievedTPS float64 + // IntervalP50, IntervalP99, IntervalMax are percentile and max statistics + // over the time between all consecutive blocks (including empty). + IntervalP50 time.Duration + IntervalP99 time.Duration + IntervalMax time.Duration + // GasP50, GasP99 are the 50th/99th percentile of gas used per non-empty block. + GasP50 float64 + GasP99 float64 + // TxP50, TxP99 are the 50th/99th percentile of tx count per non-empty block. + TxP50 float64 + TxP99 float64 + // AvgGas is the mean gas per non-empty block (TotalGasUsed / BlockCount). + AvgGas float64 + // AvgTx is the mean tx count per non-empty block (TotalTxCount / BlockCount). + AvgTx float64 + // BlocksPerSec is non-empty blocks / steady-state seconds. + BlocksPerSec float64 + // NonEmptyRatio is (non-empty blocks / total blocks) * 100. NonEmptyRatio float64 } -// summarize computes all derived stats from the raw block metrics. +// summarize computes all derived statistics from the raw block-level data in one +// pass. It delegates to the individual stat methods (steadyStateDuration, +// blockIntervalStats, gasPerBlockStats, etc.) and packages the results into a +// blockMetricsSummary for logging and result-writing. func (m *blockMetrics) summarize() *blockMetricsSummary { ss := m.steadyStateDuration() intervalP50, intervalP99, intervalMax := m.blockIntervalStats() @@ -275,7 +305,10 @@ func (m *blockMetrics) summarize() *blockMetricsSummary { } } -// log prints a concise summary of the block metrics to the test log. +// log prints the block range, interval stats, per-block gas/tx stats, and +// throughput (MGas/s + TPS) to the test log. startBlock/endBlock and +// totalBlocks/nonEmptyBlocks are passed separately because they live on the +// raw blockMetrics, not the summary. func (s *blockMetricsSummary) log(t testing.TB, startBlock, endBlock uint64, totalBlocks, nonEmptyBlocks int, wallClock time.Duration) { t.Logf("block range: %d-%d (%d total, %d non-empty, %.1f%% non-empty)", startBlock, endBlock, totalBlocks, nonEmptyBlocks, s.NonEmptyRatio) @@ -287,7 +320,10 @@ func (s *blockMetricsSummary) log(t testing.TB, startBlock, endBlock uint64, tot s.AchievedMGas, s.AchievedTPS, s.SteadyState.Round(time.Millisecond), wallClock.Round(time.Millisecond)) } -// entries returns all summary metrics as result writer entries with the given prefix. +// entries returns all summary metrics as result writer entries in the +// customSmallerIsBetter format expected by github-action-benchmark. Each entry +// is prefixed with the given label (e.g. "ERC20Throughput") so results from +// different tests are distinguishable in the same output file. func (s *blockMetricsSummary) entries(prefix string) []entry { return []entry{ {Name: prefix + " - MGas/s", Unit: "MGas/s", Value: s.AchievedMGas}, @@ -305,8 +341,15 @@ func (s *blockMetricsSummary) entries(prefix string) []entry { } } -// evNodeOverhead computes the overhead percentage of ev-node's ProduceBlock -// over the inner ExecuteTxs span. Returns the overhead and whether it was computable. +// evNodeOverhead computes the fraction of block production time spent outside +// EVM execution. It looks up the average durations of BlockExecutor.ProduceBlock +// (the outer span covering the full block lifecycle) and Executor.ExecuteTxs +// (the inner span covering only EVM tx execution), then returns: +// +// overhead% = (avgProduce - avgExecute) / avgProduce * 100 +// +// This captures time spent on sequencing, DA submission, header construction, +// and other ev-node orchestration work. Returns false if either span is missing. func evNodeOverhead(spans []e2e.TraceSpan) (float64, bool) { stats := e2e.AggregateSpanStats(spans) produce, ok := stats["BlockExecutor.ProduceBlock"] From f4949a1be922e46c12d1763f537526925d92fe8b Mon Sep 17 00:00:00 2001 From: chatton Date: Mon, 2 Mar 2026 14:18:00 +0000 Subject: [PATCH 17/19] ci: add ERC20 throughput benchmark job --- .github/workflows/benchmark.yml | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index dea4217677..4675250d4b 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -77,6 +77,29 @@ jobs: name: spamoor-benchmark-results path: test/e2e/benchmark/spamoor_bench.json + # TODO: wire up to publish results once additional tests are in place. + erc20-benchmark: + name: ERC20 Throughput Benchmark + runs-on: ubuntu-latest + timeout-minutes: 20 + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - name: Set up Go + uses: actions/setup-go@7a3fe6cf4cb3a834922a1244abfce67bcef6a0c5 # v6.2.0 + with: + go-version-file: ./go.mod + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0 + - name: Install just + uses: extractions/setup-just@v3 + - name: Build binaries + run: just build-evm build-da + - name: Run ERC20 throughput test + run: | + cd test/e2e && go test -tags evm \ + -run='^TestSpamoorSuite$/^TestERC20Throughput$' -v -timeout=15m \ + ./benchmark/ --evm-binary=../../../build/evm + # single job to push all results to gh-pages sequentially, avoiding race conditions publish-benchmarks: name: Publish Benchmark Results From 4c7b7e17c6c5c6eef0712df96f94627c8a19d964 Mon Sep 17 00:00:00 2001 From: chatton Date: Mon, 2 Mar 2026 14:30:14 +0000 Subject: [PATCH 18/19] chore: remove span assertions --- test/e2e/benchmark/spamoor_erc20_test.go | 14 ------------ test/e2e/benchmark/spamoor_smoke_test.go | 28 ------------------------ test/e2e/benchmark/traces.go | 16 +------------- 3 files changed, 1 insertion(+), 57 deletions(-) diff --git a/test/e2e/benchmark/spamoor_erc20_test.go b/test/e2e/benchmark/spamoor_erc20_test.go index 357c5c4f3f..bba7fd156c 100644 --- a/test/e2e/benchmark/spamoor_erc20_test.go +++ b/test/e2e/benchmark/spamoor_erc20_test.go @@ -100,20 +100,6 @@ func (s *SpamoorSuite) TestERC20Throughput() { w.addEntries(summary.entries("ERC20Throughput")) w.addSpans(traces.allSpans()) - // assertions s.Require().Greater(sent, float64(0), "at least one transaction should have been sent") s.Require().Zero(failed, "no transactions should have failed") - - // assert expected ev-node spans are present - assertSpanNames(t, traces.evNode, []string{ - "BlockExecutor.ProduceBlock", - "BlockExecutor.ApplyBlock", - "Executor.ExecuteTxs", - "Executor.SetFinal", - "Engine.ForkchoiceUpdated", - "Engine.NewPayload", - "Engine.GetPayload", - "Sequencer.GetNextBatch", - "DA.Submit", - }, serviceName) } diff --git a/test/e2e/benchmark/spamoor_smoke_test.go b/test/e2e/benchmark/spamoor_smoke_test.go index 39a89f961c..d09057462b 100644 --- a/test/e2e/benchmark/spamoor_smoke_test.go +++ b/test/e2e/benchmark/spamoor_smoke_test.go @@ -79,34 +79,6 @@ func (s *SpamoorSuite) TestSpamoorSmoke() { traces := s.collectTraces(e, "ev-node-smoke") w.addSpans(traces.allSpans()) - // assert expected ev-node span names - assertSpanNames(t, traces.evNode, []string{ - "BlockExecutor.ProduceBlock", - "BlockExecutor.ApplyBlock", - "BlockExecutor.CreateBlock", - "BlockExecutor.RetrieveBatch", - "Executor.ExecuteTxs", - "Executor.SetFinal", - "Engine.ForkchoiceUpdated", - "Engine.NewPayload", - "Engine.GetPayload", - "Eth.GetBlockByNumber", - "Sequencer.GetNextBatch", - "DASubmitter.SubmitHeaders", - "DASubmitter.SubmitData", - "DA.Submit", - }, "ev-node-smoke") - - // assert expected ev-reth span names when traces are available - if len(traces.evReth) > 0 { - assertSpanNames(t, traces.evReth, []string{ - "build_payload", - "execute_tx", - "try_build", - "validate_transaction", - }, "ev-reth") - } - s.Require().Greater(sent, float64(0), "at least one transaction should have been sent") s.Require().Zero(fail, "no transactions should have failed") } diff --git a/test/e2e/benchmark/traces.go b/test/e2e/benchmark/traces.go index 459dd16a1a..c5269a1993 100644 --- a/test/e2e/benchmark/traces.go +++ b/test/e2e/benchmark/traces.go @@ -3,11 +3,9 @@ package benchmark import ( - "testing" "time" e2e "github.com/evstack/ev-node/test/e2e" - "github.com/stretchr/testify/require" ) // jaegerSpan holds the fields we extract from Jaeger's untyped JSON response. @@ -16,7 +14,7 @@ type jaegerSpan struct { duration float64 // microseconds } -func (j jaegerSpan) SpanName() string { return j.operationName } +func (j jaegerSpan) SpanName() string { return j.operationName } func (j jaegerSpan) SpanDuration() time.Duration { return time.Duration(j.duration) * time.Microsecond } // extractSpansFromTraces walks Jaeger's []any response and pulls out span operation names and durations. @@ -53,15 +51,3 @@ func toTraceSpans(spans []jaegerSpan) []e2e.TraceSpan { } return out } - -// assertSpanNames verifies that all expected span names appear in the trace data. -func assertSpanNames(t testing.TB, spans []e2e.TraceSpan, expected []string, label string) { - t.Helper() - opNames := make(map[string]struct{}, len(spans)) - for _, span := range spans { - opNames[span.SpanName()] = struct{}{} - } - for _, name := range expected { - require.Contains(t, opNames, name, "expected span %q not found in %s traces", name, label) - } -} From b14085e3a40096f3d3288708cb89198d67791a0a Mon Sep 17 00:00:00 2001 From: chatton Date: Tue, 3 Mar 2026 09:37:54 +0000 Subject: [PATCH 19/19] fix: guard against drain timeout and zero-duration TPS division - waitForDrain returns an error on timeout instead of silently logging - guard AchievedTPS computation when steady-state duration is zero --- test/e2e/benchmark/helpers.go | 12 ++++++------ test/e2e/benchmark/spamoor_erc20_test.go | 4 +++- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/test/e2e/benchmark/helpers.go b/test/e2e/benchmark/helpers.go index bdeaeb05be..83d32aa92b 100644 --- a/test/e2e/benchmark/helpers.go +++ b/test/e2e/benchmark/helpers.go @@ -200,7 +200,7 @@ func deleteAllSpammers(api *spamoor.API) error { // waitForDrain polls the latest block until consecutiveEmpty consecutive empty // blocks are observed, indicating the mempool has drained. -func waitForDrain(ctx context.Context, log func(string, ...any), client *ethclient.Client, consecutiveEmpty int) { +func waitForDrain(ctx context.Context, log func(string, ...any), client *ethclient.Client, consecutiveEmpty int) error { var emptyRun int var lastBlock uint64 ticker := time.NewTicker(200 * time.Millisecond) @@ -209,8 +209,7 @@ func waitForDrain(ctx context.Context, log func(string, ...any), client *ethclie for { select { case <-ctx.Done(): - log("drain timeout after %d consecutive empty blocks (needed %d)", emptyRun, consecutiveEmpty) - return + return fmt.Errorf("drain timeout after %d consecutive empty blocks (needed %d): %w", emptyRun, consecutiveEmpty, ctx.Err()) case <-ticker.C: header, err := client.HeaderByNumber(ctx, nil) if err != nil { @@ -235,7 +234,7 @@ func waitForDrain(ctx context.Context, log func(string, ...any), client *ethclie if emptyRun >= consecutiveEmpty { log("mempool drained: %d consecutive empty blocks at block %d", emptyRun, num) - return + return nil } } } @@ -282,15 +281,16 @@ func (m *blockMetrics) summarize() *blockMetricsSummary { gasP50, gasP99 := m.gasPerBlockStats() txP50, txP99 := m.txPerBlockStats() - var blocksPerSec float64 + var blocksPerSec, achievedTPS float64 if ss > 0 { blocksPerSec = float64(m.BlockCount) / ss.Seconds() + achievedTPS = float64(m.TotalTxCount) / ss.Seconds() } return &blockMetricsSummary{ SteadyState: ss, AchievedMGas: mgasPerSec(m.TotalGasUsed, ss), - AchievedTPS: float64(m.TotalTxCount) / ss.Seconds(), + AchievedTPS: achievedTPS, IntervalP50: intervalP50, IntervalP99: intervalP99, IntervalMax: intervalMax, diff --git a/test/e2e/benchmark/spamoor_erc20_test.go b/test/e2e/benchmark/spamoor_erc20_test.go index bba7fd156c..d0d079355a 100644 --- a/test/e2e/benchmark/spamoor_erc20_test.go +++ b/test/e2e/benchmark/spamoor_erc20_test.go @@ -74,7 +74,9 @@ func (s *SpamoorSuite) TestERC20Throughput() { // blocks, the mempool is drained and we can stop. drainCtx, drainCancel := context.WithTimeout(ctx, 30*time.Second) defer drainCancel() - waitForDrain(drainCtx, t.Logf, e.ethClient, 10) + if err := waitForDrain(drainCtx, t.Logf, e.ethClient, 10); err != nil { + t.Logf("warning: %v", err) + } wallClock := time.Since(loadStart) endHeader, err := e.ethClient.HeaderByNumber(ctx, nil)