diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 40b031d6e..0ad11ecf0 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -77,6 +77,29 @@ jobs: name: spamoor-benchmark-results path: test/e2e/benchmark/spamoor_bench.json + # TODO: wire up to publish results once additional tests are in place. + erc20-benchmark: + name: ERC20 Throughput Benchmark + runs-on: ubuntu-latest + timeout-minutes: 20 + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - name: Set up Go + uses: actions/setup-go@7a3fe6cf4cb3a834922a1244abfce67bcef6a0c5 # v6.2.0 + with: + go-version-file: ./go.mod + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0 + - name: Install just + uses: extractions/setup-just@v3 + - name: Build binaries + run: just build-evm build-da + - name: Run ERC20 throughput test + run: | + cd test/e2e && go test -tags evm \ + -run='^TestSpamoorSuite$/^TestERC20Throughput$' -v -timeout=15m \ + ./benchmark/ --evm-binary=../../../build/evm + # single job to push all results to gh-pages sequentially, avoiding race conditions publish-benchmarks: name: Publish Benchmark Results diff --git a/test/e2e/benchmark/helpers.go b/test/e2e/benchmark/helpers.go new file mode 100644 index 000000000..83d32aa92 --- /dev/null +++ b/test/e2e/benchmark/helpers.go @@ -0,0 +1,420 @@ +//go:build evm + +package benchmark + +import ( + "context" + "fmt" + "math" + "math/big" + "sort" + "testing" + "time" + + "github.com/celestiaorg/tastora/framework/docker/evstack/spamoor" + "github.com/ethereum/go-ethereum/ethclient" + e2e "github.com/evstack/ev-node/test/e2e" +) + +// blockMetrics holds aggregated gas and transaction data across a range of blocks. +// Only blocks containing at least one transaction are counted in BlockCount, +// GasPerBlock, and TxPerBlock. All blocks (including empty) are counted in +// TotalBlockCount and contribute to BlockIntervals. +type blockMetrics struct { + StartBlock uint64 + EndBlock uint64 + BlockCount int // non-empty blocks only + TotalBlockCount int // all blocks in range including empty + TotalGasUsed uint64 // cumulative gas across non-empty blocks + TotalTxCount int // cumulative tx count across non-empty blocks + GasPerBlock []uint64 // per-block gas for non-empty blocks only + TxPerBlock []int // per-block tx count for non-empty blocks only + BlockIntervals []time.Duration // time between all consecutive blocks (including empty) + FirstBlockTime time.Time // timestamp of the first non-empty block + LastBlockTime time.Time // timestamp of the last non-empty block +} + +// steadyStateDuration returns the wall-clock time between the first and last +// non-empty blocks. This excludes warm-up (empty blocks before the first tx) +// and cool-down (empty blocks after the last tx) from throughput calculations. +func (m *blockMetrics) steadyStateDuration() time.Duration { + if m.FirstBlockTime.IsZero() || m.LastBlockTime.IsZero() { + return 0 + } + return m.LastBlockTime.Sub(m.FirstBlockTime) +} + +// avgGasPerBlock returns TotalGasUsed / BlockCount, i.e. the mean gas used +// per non-empty block. +func (m *blockMetrics) avgGasPerBlock() float64 { + if m.BlockCount == 0 { + return 0 + } + return float64(m.TotalGasUsed) / float64(m.BlockCount) +} + +// avgTxPerBlock returns TotalTxCount / BlockCount, i.e. the mean transaction +// count per non-empty block. +func (m *blockMetrics) avgTxPerBlock() float64 { + if m.BlockCount == 0 { + return 0 + } + return float64(m.TotalTxCount) / float64(m.BlockCount) +} + +// nonEmptyRatio returns (BlockCount / TotalBlockCount) * 100, i.e. the +// percentage of blocks in the range that contained at least one transaction. +func (m *blockMetrics) nonEmptyRatio() float64 { + if m.TotalBlockCount == 0 { + return 0 + } + return float64(m.BlockCount) / float64(m.TotalBlockCount) * 100 +} + +// blockIntervalStats computes percentile statistics over the time gaps between +// all consecutive blocks (including empty ones). This measures block production +// cadence and jitter rather than transaction throughput. +func (m *blockMetrics) blockIntervalStats() (p50, p99, max time.Duration) { + if len(m.BlockIntervals) == 0 { + return 0, 0, 0 + } + sorted := make([]float64, len(m.BlockIntervals)) + for i, d := range m.BlockIntervals { + sorted[i] = float64(d) + } + sort.Float64s(sorted) + return time.Duration(percentile(sorted, 0.50)), + time.Duration(percentile(sorted, 0.99)), + time.Duration(sorted[len(sorted)-1]) +} + +// gasPerBlockStats returns the 50th and 99th percentile of gas used across +// non-empty blocks. Shows the distribution of per-block gas consumption. +func (m *blockMetrics) gasPerBlockStats() (p50, p99 float64) { + if len(m.GasPerBlock) == 0 { + return 0, 0 + } + sorted := make([]float64, len(m.GasPerBlock)) + for i, g := range m.GasPerBlock { + sorted[i] = float64(g) + } + sort.Float64s(sorted) + return percentile(sorted, 0.50), percentile(sorted, 0.99) +} + +// txPerBlockStats returns the 50th and 99th percentile of transaction counts +// across non-empty blocks. Shows the distribution of per-block tx throughput. +func (m *blockMetrics) txPerBlockStats() (p50, p99 float64) { + if len(m.TxPerBlock) == 0 { + return 0, 0 + } + sorted := make([]float64, len(m.TxPerBlock)) + for i, c := range m.TxPerBlock { + sorted[i] = float64(c) + } + sort.Float64s(sorted) + return percentile(sorted, 0.50), percentile(sorted, 0.99) +} + +// percentile returns the p-th percentile from a pre-sorted float64 slice using +// linear interpolation between adjacent ranks. For example, p=0.50 returns the +// median and p=0.99 returns the value below which 99% of observations fall. +func percentile(sorted []float64, p float64) float64 { + if len(sorted) == 0 { + return 0 + } + if len(sorted) == 1 { + return sorted[0] + } + rank := p * float64(len(sorted)-1) + lower := int(math.Floor(rank)) + upper := lower + 1 + if upper >= len(sorted) { + return sorted[len(sorted)-1] + } + frac := rank - float64(lower) + return sorted[lower] + frac*(sorted[upper]-sorted[lower]) +} + +// mgasPerSec computes totalGasUsed / elapsed / 1e6, giving throughput in +// millions of gas units per second (MGas/s). +func mgasPerSec(totalGasUsed uint64, elapsed time.Duration) float64 { + if elapsed <= 0 { + return 0 + } + return float64(totalGasUsed) / elapsed.Seconds() / 1e6 +} + +// waitForSpamoorDone polls spamoor metrics until the total sent count reaches +// the target or the context is cancelled. It logs the send rate at each poll +// interval and returns the final sent and failed counts. +func waitForSpamoorDone(ctx context.Context, log func(string, ...any), api *spamoor.API, targetCount int, pollInterval time.Duration) (sent, failed float64, err error) { + ticker := time.NewTicker(pollInterval) + defer ticker.Stop() + + start := time.Now() + var prevSent float64 + + for { + select { + case <-ctx.Done(): + return sent, failed, fmt.Errorf("timed out waiting for spamoor to send %d txs (sent %.0f): %w", targetCount, sent, ctx.Err()) + case <-ticker.C: + metrics, mErr := api.GetMetrics() + if mErr != nil { + log("failed to get spamoor metrics: %v", mErr) + continue + } + sent = sumCounter(metrics["spamoor_transactions_sent_total"]) + failed = sumCounter(metrics["spamoor_transactions_failed_total"]) + + delta := sent - prevSent + rate := delta / pollInterval.Seconds() + elapsed := time.Since(start).Round(time.Second) + log("spamoor progress: %.0f/%.0f sent (%.0f tx/s instant, %.0f tx/s avg, %.0f failed) [%s]", + sent, float64(targetCount), rate, sent/time.Since(start).Seconds(), failed, elapsed) + prevSent = sent + + if sent >= float64(targetCount) { + return sent, failed, nil + } + } + } +} + +// deleteAllSpammers removes any pre-existing spammers from the daemon. +// This prevents stale spammers (from previous failed runs) being restored +// from the spamoor SQLite database. +func deleteAllSpammers(api *spamoor.API) error { + existing, err := api.ListSpammers() + if err != nil { + return fmt.Errorf("list spammers: %w", err) + } + for _, sp := range existing { + if err := api.DeleteSpammer(sp.ID); err != nil { + return fmt.Errorf("delete spammer %d: %w", sp.ID, err) + } + } + return nil +} + +// waitForDrain polls the latest block until consecutiveEmpty consecutive empty +// blocks are observed, indicating the mempool has drained. +func waitForDrain(ctx context.Context, log func(string, ...any), client *ethclient.Client, consecutiveEmpty int) error { + var emptyRun int + var lastBlock uint64 + ticker := time.NewTicker(200 * time.Millisecond) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + return fmt.Errorf("drain timeout after %d consecutive empty blocks (needed %d): %w", emptyRun, consecutiveEmpty, ctx.Err()) + case <-ticker.C: + header, err := client.HeaderByNumber(ctx, nil) + if err != nil { + continue + } + num := header.Number.Uint64() + if num == lastBlock { + continue + } + + txCount, err := client.TransactionCount(ctx, header.Hash()) + if err != nil { + continue + } + + lastBlock = num + if txCount == 0 { + emptyRun++ + } else { + emptyRun = 0 + } + + if emptyRun >= consecutiveEmpty { + log("mempool drained: %d consecutive empty blocks at block %d", emptyRun, num) + return nil + } + } + } +} + +// blockMetricsSummary holds all derived statistics from a blockMetrics measurement +// window. Every field is computed from the raw block data by summarize(). +type blockMetricsSummary struct { + // SteadyState is the wall-clock duration between the first and last non-empty + // blocks, used as the denominator for throughput calculations. + SteadyState time.Duration + // AchievedMGas is total gas / steady-state seconds / 1e6 (megagas per second). + AchievedMGas float64 + // AchievedTPS is total tx count / steady-state seconds. + AchievedTPS float64 + // IntervalP50, IntervalP99, IntervalMax are percentile and max statistics + // over the time between all consecutive blocks (including empty). + IntervalP50 time.Duration + IntervalP99 time.Duration + IntervalMax time.Duration + // GasP50, GasP99 are the 50th/99th percentile of gas used per non-empty block. + GasP50 float64 + GasP99 float64 + // TxP50, TxP99 are the 50th/99th percentile of tx count per non-empty block. + TxP50 float64 + TxP99 float64 + // AvgGas is the mean gas per non-empty block (TotalGasUsed / BlockCount). + AvgGas float64 + // AvgTx is the mean tx count per non-empty block (TotalTxCount / BlockCount). + AvgTx float64 + // BlocksPerSec is non-empty blocks / steady-state seconds. + BlocksPerSec float64 + // NonEmptyRatio is (non-empty blocks / total blocks) * 100. + NonEmptyRatio float64 +} + +// summarize computes all derived statistics from the raw block-level data in one +// pass. It delegates to the individual stat methods (steadyStateDuration, +// blockIntervalStats, gasPerBlockStats, etc.) and packages the results into a +// blockMetricsSummary for logging and result-writing. +func (m *blockMetrics) summarize() *blockMetricsSummary { + ss := m.steadyStateDuration() + intervalP50, intervalP99, intervalMax := m.blockIntervalStats() + gasP50, gasP99 := m.gasPerBlockStats() + txP50, txP99 := m.txPerBlockStats() + + var blocksPerSec, achievedTPS float64 + if ss > 0 { + blocksPerSec = float64(m.BlockCount) / ss.Seconds() + achievedTPS = float64(m.TotalTxCount) / ss.Seconds() + } + + return &blockMetricsSummary{ + SteadyState: ss, + AchievedMGas: mgasPerSec(m.TotalGasUsed, ss), + AchievedTPS: achievedTPS, + IntervalP50: intervalP50, + IntervalP99: intervalP99, + IntervalMax: intervalMax, + GasP50: gasP50, + GasP99: gasP99, + TxP50: txP50, + TxP99: txP99, + AvgGas: m.avgGasPerBlock(), + AvgTx: m.avgTxPerBlock(), + BlocksPerSec: blocksPerSec, + NonEmptyRatio: m.nonEmptyRatio(), + } +} + +// log prints the block range, interval stats, per-block gas/tx stats, and +// throughput (MGas/s + TPS) to the test log. startBlock/endBlock and +// totalBlocks/nonEmptyBlocks are passed separately because they live on the +// raw blockMetrics, not the summary. +func (s *blockMetricsSummary) log(t testing.TB, startBlock, endBlock uint64, totalBlocks, nonEmptyBlocks int, wallClock time.Duration) { + t.Logf("block range: %d-%d (%d total, %d non-empty, %.1f%% non-empty)", + startBlock, endBlock, totalBlocks, nonEmptyBlocks, s.NonEmptyRatio) + t.Logf("block intervals: p50=%s, p99=%s, max=%s", + s.IntervalP50.Round(time.Millisecond), s.IntervalP99.Round(time.Millisecond), s.IntervalMax.Round(time.Millisecond)) + t.Logf("gas/block (non-empty): avg=%.0f, p50=%.0f, p99=%.0f", s.AvgGas, s.GasP50, s.GasP99) + t.Logf("tx/block (non-empty): avg=%.1f, p50=%.0f, p99=%.0f", s.AvgTx, s.TxP50, s.TxP99) + t.Logf("throughput: %.2f MGas/s, %.1f TPS over %s steady-state (%s wall clock)", + s.AchievedMGas, s.AchievedTPS, s.SteadyState.Round(time.Millisecond), wallClock.Round(time.Millisecond)) +} + +// entries returns all summary metrics as result writer entries in the +// customSmallerIsBetter format expected by github-action-benchmark. Each entry +// is prefixed with the given label (e.g. "ERC20Throughput") so results from +// different tests are distinguishable in the same output file. +func (s *blockMetricsSummary) entries(prefix string) []entry { + return []entry{ + {Name: prefix + " - MGas/s", Unit: "MGas/s", Value: s.AchievedMGas}, + {Name: prefix + " - TPS", Unit: "tx/s", Value: s.AchievedTPS}, + {Name: prefix + " - avg gas/block", Unit: "gas", Value: s.AvgGas}, + {Name: prefix + " - avg tx/block", Unit: "count", Value: s.AvgTx}, + {Name: prefix + " - blocks/s", Unit: "blocks/s", Value: s.BlocksPerSec}, + {Name: prefix + " - non-empty block ratio", Unit: "%", Value: s.NonEmptyRatio}, + {Name: prefix + " - block interval p50", Unit: "ms", Value: float64(s.IntervalP50.Milliseconds())}, + {Name: prefix + " - block interval p99", Unit: "ms", Value: float64(s.IntervalP99.Milliseconds())}, + {Name: prefix + " - gas/block p50", Unit: "gas", Value: s.GasP50}, + {Name: prefix + " - gas/block p99", Unit: "gas", Value: s.GasP99}, + {Name: prefix + " - tx/block p50", Unit: "count", Value: s.TxP50}, + {Name: prefix + " - tx/block p99", Unit: "count", Value: s.TxP99}, + } +} + +// evNodeOverhead computes the fraction of block production time spent outside +// EVM execution. It looks up the average durations of BlockExecutor.ProduceBlock +// (the outer span covering the full block lifecycle) and Executor.ExecuteTxs +// (the inner span covering only EVM tx execution), then returns: +// +// overhead% = (avgProduce - avgExecute) / avgProduce * 100 +// +// This captures time spent on sequencing, DA submission, header construction, +// and other ev-node orchestration work. Returns false if either span is missing. +func evNodeOverhead(spans []e2e.TraceSpan) (float64, bool) { + stats := e2e.AggregateSpanStats(spans) + produce, ok := stats["BlockExecutor.ProduceBlock"] + if !ok { + return 0, false + } + execute, ok := stats["Executor.ExecuteTxs"] + if !ok { + return 0, false + } + produceAvg := float64(produce.Total.Microseconds()) / float64(produce.Count) + executeAvg := float64(execute.Total.Microseconds()) / float64(execute.Count) + if produceAvg <= 0 { + return 0, false + } + return (produceAvg - executeAvg) / produceAvg * 100, true +} + +// collectBlockMetrics iterates all headers in [startBlock, endBlock] to collect +// gas and transaction metrics. Empty blocks are skipped for gas/tx aggregation +// but included in block interval tracking. +func collectBlockMetrics(ctx context.Context, client *ethclient.Client, startBlock, endBlock uint64) (*blockMetrics, error) { + if endBlock < startBlock { + return nil, fmt.Errorf("endBlock %d < startBlock %d", endBlock, startBlock) + } + + m := &blockMetrics{StartBlock: startBlock, EndBlock: endBlock} + + var prevBlockTime time.Time + for n := startBlock; n <= endBlock; n++ { + header, err := client.HeaderByNumber(ctx, new(big.Int).SetUint64(n)) + if err != nil { + return nil, fmt.Errorf("failed to fetch header %d: %w", n, err) + } + + blockTime := time.Unix(int64(header.Time), 0) + m.TotalBlockCount++ + + // track intervals between all consecutive blocks + if !prevBlockTime.IsZero() { + m.BlockIntervals = append(m.BlockIntervals, blockTime.Sub(prevBlockTime)) + } + prevBlockTime = blockTime + + txCount, err := client.TransactionCount(ctx, header.Hash()) + if err != nil { + return nil, fmt.Errorf("failed to fetch tx count for block %d: %w", n, err) + } + + if txCount == 0 { + continue + } + + // non-empty block: aggregate gas and tx metrics + if m.BlockCount == 0 { + m.FirstBlockTime = blockTime + } + m.LastBlockTime = blockTime + + m.BlockCount++ + m.TotalGasUsed += header.GasUsed + m.TotalTxCount += int(txCount) + m.GasPerBlock = append(m.GasPerBlock, header.GasUsed) + m.TxPerBlock = append(m.TxPerBlock, int(txCount)) + } + + return m, nil +} diff --git a/test/e2e/benchmark/output.go b/test/e2e/benchmark/output.go index 39ccbe6d6..06c60e764 100644 --- a/test/e2e/benchmark/output.go +++ b/test/e2e/benchmark/output.go @@ -61,6 +61,11 @@ func (w *resultWriter) addEntry(e entry) { w.entries = append(w.entries, e) } +// addEntries appends multiple entries to the results. +func (w *resultWriter) addEntries(entries []entry) { + w.entries = append(w.entries, entries...) +} + // flush writes accumulated entries to the path in BENCH_JSON_OUTPUT. // It is a no-op when the env var is unset or no entries were added. func (w *resultWriter) flush() { diff --git a/test/e2e/benchmark/spamoor_erc20_test.go b/test/e2e/benchmark/spamoor_erc20_test.go new file mode 100644 index 000000000..d0d079355 --- /dev/null +++ b/test/e2e/benchmark/spamoor_erc20_test.go @@ -0,0 +1,107 @@ +//go:build evm + +package benchmark + +import ( + "context" + "fmt" + "time" + + "github.com/celestiaorg/tastora/framework/docker/evstack/spamoor" +) + +// TestERC20Throughput measures ERC-20 token transfer throughput in isolation. +// Each tx is ~65K gas (contract call with storage reads/writes) +// +// Primary metric: achieved MGas/s. +// Diagnostic metrics: per-span latency breakdown, gas/block, tx/block. +func (s *SpamoorSuite) TestERC20Throughput() { + const ( + numSpammers = 2 + countPerSpammer = 50000 + totalCount = numSpammers * countPerSpammer + serviceName = "ev-node-erc20" + waitTimeout = 5 * time.Minute + ) + + t := s.T() + ctx := t.Context() + w := newResultWriter(t, "ERC20Throughput") + defer w.flush() + + e := s.setupEnv(config{ + serviceName: serviceName, + }) + + erc20Config := map[string]any{ + "throughput": 50, // 50 tx per 100ms slot = 500 tx/s per spammer + "total_count": countPerSpammer, + "max_pending": 50000, + "max_wallets": 200, + "base_fee": 20, + "tip_fee": 3, + "refill_amount": "5000000000000000000", + "refill_balance": "2000000000000000000", + "refill_interval": 600, + } + + // clear any stale spammers restored from the persistent spamoor database + s.Require().NoError(deleteAllSpammers(e.spamoorAPI), "failed to delete stale spammers") + + // launch all spammers before recording startBlock so warm-up + // (contract deploy + wallet funding) is excluded from the measurement window. + for i := range numSpammers { + name := fmt.Sprintf("bench-erc20-%d", i) + id, err := e.spamoorAPI.CreateSpammer(name, spamoor.ScenarioERC20TX, erc20Config, true) + s.Require().NoError(err, "failed to create spammer %s", name) + t.Cleanup(func() { _ = e.spamoorAPI.DeleteSpammer(id) }) + } + + // record the starting block after all spammers are launched so the + // measurement window excludes the warm-up period. + startHeader, err := e.ethClient.HeaderByNumber(ctx, nil) + s.Require().NoError(err, "failed to get start block header") + startBlock := startHeader.Number.Uint64() + loadStart := time.Now() + + // wait for spamoor to finish sending all transactions + waitCtx, cancel := context.WithTimeout(ctx, waitTimeout) + defer cancel() + sent, failed, err := waitForSpamoorDone(waitCtx, t.Logf, e.spamoorAPI, totalCount, 2*time.Second) + s.Require().NoError(err, "spamoor did not finish in time") + + // wait for pending txs to drain: once we see several consecutive empty + // blocks, the mempool is drained and we can stop. + drainCtx, drainCancel := context.WithTimeout(ctx, 30*time.Second) + defer drainCancel() + if err := waitForDrain(drainCtx, t.Logf, e.ethClient, 10); err != nil { + t.Logf("warning: %v", err) + } + wallClock := time.Since(loadStart) + + endHeader, err := e.ethClient.HeaderByNumber(ctx, nil) + s.Require().NoError(err, "failed to get end block header") + endBlock := endHeader.Number.Uint64() + + // collect block-level gas/tx metrics + bm, err := collectBlockMetrics(ctx, e.ethClient, startBlock, endBlock) + s.Require().NoError(err, "failed to collect block metrics") + + summary := bm.summarize() + s.Require().Greater(summary.SteadyState, time.Duration(0), "expected non-zero steady-state duration") + summary.log(t, startBlock, endBlock, bm.TotalBlockCount, bm.BlockCount, wallClock) + + // collect and report traces + traces := s.collectTraces(e, serviceName) + + if overhead, ok := evNodeOverhead(traces.evNode); ok { + t.Logf("ev-node overhead: %.1f%%", overhead) + w.addEntry(entry{Name: "ERC20Throughput - ev-node overhead", Unit: "%", Value: overhead}) + } + + w.addEntries(summary.entries("ERC20Throughput")) + w.addSpans(traces.allSpans()) + + s.Require().Greater(sent, float64(0), "at least one transaction should have been sent") + s.Require().Zero(failed, "no transactions should have failed") +} diff --git a/test/e2e/benchmark/spamoor_smoke_test.go b/test/e2e/benchmark/spamoor_smoke_test.go index fa6258373..d09057462 100644 --- a/test/e2e/benchmark/spamoor_smoke_test.go +++ b/test/e2e/benchmark/spamoor_smoke_test.go @@ -3,11 +3,9 @@ package benchmark import ( - "os" "time" "github.com/celestiaorg/tastora/framework/docker/evstack/spamoor" - e2e "github.com/evstack/ev-node/test/e2e" ) // TestSpamoorSmoke spins up reth + sequencer and a Spamoor node, starts a few @@ -18,17 +16,13 @@ func (s *SpamoorSuite) TestSpamoorSmoke() { w := newResultWriter(t, "SpamoorSmoke") defer w.flush() - // TODO: temporary hardcoded tag, will be replaced with a proper release tag - rethTag := os.Getenv("EV_RETH_TAG") - if rethTag == "" { - rethTag = "pr-140" - } e := s.setupEnv(config{ - rethTag: rethTag, serviceName: "ev-node-smoke", }) api := e.spamoorAPI + s.Require().NoError(deleteAllSpammers(api), "failed to delete stale spammers") + eoatx := map[string]any{ "throughput": 100, "total_count": 3000, @@ -82,38 +76,8 @@ func (s *SpamoorSuite) TestSpamoorSmoke() { fail := sumCounter(metrics["spamoor_transactions_failed_total"]) // collect traces - evNodeSpans := s.collectServiceTraces(e, "ev-node-smoke") - evRethSpans := s.collectServiceTraces(e, "ev-reth") - e2e.PrintTraceReport(t, "ev-node-smoke", evNodeSpans) - e2e.PrintTraceReport(t, "ev-reth", evRethSpans) - - w.addSpans(append(evNodeSpans, evRethSpans...)) - - // assert expected ev-node span names - assertSpanNames(t, evNodeSpans, []string{ - "BlockExecutor.ProduceBlock", - "BlockExecutor.ApplyBlock", - "BlockExecutor.CreateBlock", - "BlockExecutor.RetrieveBatch", - "Executor.ExecuteTxs", - "Executor.SetFinal", - "Engine.ForkchoiceUpdated", - "Engine.NewPayload", - "Engine.GetPayload", - "Eth.GetBlockByNumber", - "Sequencer.GetNextBatch", - "DASubmitter.SubmitHeaders", - "DASubmitter.SubmitData", - "DA.Submit", - }, "ev-node-smoke") - - // assert expected ev-reth span names - assertSpanNames(t, evRethSpans, []string{ - "build_payload", - "execute_tx", - "try_build", - "validate_transaction", - }, "ev-reth") + traces := s.collectTraces(e, "ev-node-smoke") + w.addSpans(traces.allSpans()) s.Require().Greater(sent, float64(0), "at least one transaction should have been sent") s.Require().Zero(fail, "no transactions should have failed") diff --git a/test/e2e/benchmark/suite_test.go b/test/e2e/benchmark/suite_test.go index 34c0bc57a..bb9993c58 100644 --- a/test/e2e/benchmark/suite_test.go +++ b/test/e2e/benchmark/suite_test.go @@ -4,6 +4,7 @@ package benchmark import ( "context" + "os" "path/filepath" "testing" "time" @@ -48,10 +49,19 @@ type env struct { // config parameterizes the per-test environment setup. type config struct { - rethTag string serviceName string } +// TODO: temporary hardcoded tag, will be replaced with a proper release tag +const defaultRethTag = "pr-140" + +func rethTag() string { + if tag := os.Getenv("EV_RETH_TAG"); tag != "" { + return tag + } + return defaultRethTag +} + // setupEnv creates a Jaeger + reth + sequencer + Spamoor environment for // a single test. Each call spins up isolated infrastructure so tests // can't interfere with each other. @@ -70,15 +80,21 @@ func (s *SpamoorSuite) setupEnv(cfg config) *env { // reth + local DA with OTLP tracing to Jaeger evmEnv := e2e.SetupCommonEVMEnv(t, sut, s.dockerCli, s.networkID, e2e.WithRethOpts(func(b *reth.NodeBuilder) { - b.WithTag(cfg.rethTag).WithEnv( - // ev-reth reads OTEL_EXPORTER_OTLP_ENDPOINT and passes it directly - // to with_endpoint(). opentelemetry-otlp v0.31 HTTP exporter does - // not auto-append /v1/traces, so the full path is required. - "OTEL_EXPORTER_OTLP_ENDPOINT="+jg.Internal.IngestHTTPEndpoint()+"/v1/traces", - "OTEL_EXPORTER_OTLP_PROTOCOL=http", - "RUST_LOG=debug", - "OTEL_SDK_DISABLED=false", - ) + b.WithTag(rethTag()). + // increase values to facilitate spamoor. + WithAdditionalStartArgs( + "--rpc.max-connections", "5000", + "--rpc.max-tracing-requests", "1000", + ). + WithEnv( + // ev-reth reads OTEL_EXPORTER_OTLP_ENDPOINT and passes it directly + // to with_endpoint(). opentelemetry-otlp v0.31 HTTP exporter does + // not auto-append /v1/traces, so the full path is required. + "OTEL_EXPORTER_OTLP_ENDPOINT="+jg.Internal.IngestHTTPEndpoint()+"/v1/traces", + "OTEL_EXPORTER_OTLP_PROTOCOL=http", + "RUST_LOG=debug", + "OTEL_SDK_DISABLED=false", + ) }), ) @@ -88,7 +104,8 @@ func (s *SpamoorSuite) setupEnv(cfg config) *env { e2e.SetupSequencerNode(t, sut, sequencerHome, evmEnv.SequencerJWT, evmEnv.GenesisHash, evmEnv.Endpoints, "--evnode.instrumentation.tracing=true", "--evnode.instrumentation.tracing_endpoint", otlpHTTP, - "--evnode.instrumentation.tracing_sample_rate", "1.0", + // TODO: setting this to 1 produced too many spans for the local Jaeger deployment alongside everything else. + "--evnode.instrumentation.tracing_sample_rate", "0.1", "--evnode.instrumentation.tracing_service_name", cfg.serviceName, ) t.Log("sequencer node is up") @@ -108,7 +125,8 @@ func (s *SpamoorSuite) setupEnv(cfg config) *env { WithDockerNetworkID(evmEnv.RethNode.NetworkID). WithLogger(evmEnv.RethNode.Logger). WithRPCHosts(internalRPC). - WithPrivateKey(e2e.TestPrivateKey) + WithPrivateKey(e2e.TestPrivateKey). + WithAdditionalStartArgs("--slot-duration", "100ms", "--startup-delay", "0") spNode, err := spBuilder.Build(ctx) s.Require().NoError(err, "failed to build spamoor node") @@ -129,6 +147,32 @@ func (s *SpamoorSuite) setupEnv(cfg config) *env { } } +// traceResult holds the collected spans from ev-node and (optionally) ev-reth. +type traceResult struct { + evNode []e2e.TraceSpan + evReth []e2e.TraceSpan +} + +// allSpans returns ev-node and ev-reth spans concatenated. +func (tr *traceResult) allSpans() []e2e.TraceSpan { + return append(tr.evNode, tr.evReth...) +} + +// collectTraces fetches ev-node traces (required) and ev-reth traces (optional) +// from Jaeger, then prints reports for both. +func (s *SpamoorSuite) collectTraces(e *env, serviceName string) *traceResult { + t := s.T() + tr := &traceResult{ + evNode: s.collectServiceTraces(e, serviceName), + evReth: s.tryCollectServiceTraces(e, "ev-reth"), + } + e2e.PrintTraceReport(t, serviceName, tr.evNode) + if len(tr.evReth) > 0 { + e2e.PrintTraceReport(t, "ev-reth", tr.evReth) + } + return tr +} + // collectServiceTraces fetches traces from Jaeger for the given service and returns the spans. func (s *SpamoorSuite) collectServiceTraces(e *env, serviceName string) []e2e.TraceSpan { ctx, cancel := context.WithTimeout(s.T().Context(), 3*time.Minute) @@ -143,3 +187,27 @@ func (s *SpamoorSuite) collectServiceTraces(e *env, serviceName string) []e2e.Tr return toTraceSpans(extractSpansFromTraces(traces)) } + +// tryCollectServiceTraces fetches traces from Jaeger for the given service, +// returning nil instead of failing the test if traces are unavailable. +func (s *SpamoorSuite) tryCollectServiceTraces(e *env, serviceName string) []e2e.TraceSpan { + t := s.T() + ctx, cancel := context.WithTimeout(t.Context(), 3*time.Minute) + defer cancel() + + ok, err := e.jaeger.External.WaitForTraces(ctx, serviceName, 1, 2*time.Second) + if err != nil || !ok { + t.Logf("warning: could not collect %s traces (err=%v, ok=%v)", serviceName, err, ok) + return nil + } + + traces, err := e.jaeger.External.Traces(ctx, serviceName, 10000) + if err != nil { + t.Logf("warning: failed to fetch %s traces: %v", serviceName, err) + return nil + } + + spans := toTraceSpans(extractSpansFromTraces(traces)) + t.Logf("collected %d %s spans", len(spans), serviceName) + return spans +} diff --git a/test/e2e/benchmark/traces.go b/test/e2e/benchmark/traces.go index 459dd16a1..c5269a199 100644 --- a/test/e2e/benchmark/traces.go +++ b/test/e2e/benchmark/traces.go @@ -3,11 +3,9 @@ package benchmark import ( - "testing" "time" e2e "github.com/evstack/ev-node/test/e2e" - "github.com/stretchr/testify/require" ) // jaegerSpan holds the fields we extract from Jaeger's untyped JSON response. @@ -16,7 +14,7 @@ type jaegerSpan struct { duration float64 // microseconds } -func (j jaegerSpan) SpanName() string { return j.operationName } +func (j jaegerSpan) SpanName() string { return j.operationName } func (j jaegerSpan) SpanDuration() time.Duration { return time.Duration(j.duration) * time.Microsecond } // extractSpansFromTraces walks Jaeger's []any response and pulls out span operation names and durations. @@ -53,15 +51,3 @@ func toTraceSpans(spans []jaegerSpan) []e2e.TraceSpan { } return out } - -// assertSpanNames verifies that all expected span names appear in the trace data. -func assertSpanNames(t testing.TB, spans []e2e.TraceSpan, expected []string, label string) { - t.Helper() - opNames := make(map[string]struct{}, len(spans)) - for _, span := range spans { - opNames[span.SpanName()] = struct{}{} - } - for _, name := range expected { - require.Contains(t, opNames, name, "expected span %q not found in %s traces", name, label) - } -} diff --git a/test/e2e/go.mod b/test/e2e/go.mod index fb254230c..a65841be8 100644 --- a/test/e2e/go.mod +++ b/test/e2e/go.mod @@ -5,7 +5,7 @@ go 1.25.6 require ( cosmossdk.io/math v1.5.3 github.com/celestiaorg/go-square/v3 v3.0.2 - github.com/celestiaorg/tastora v0.16.0 + github.com/celestiaorg/tastora v0.16.1-0.20260302131806-2816c7b82bfb github.com/cosmos/cosmos-sdk v0.53.6 github.com/cosmos/ibc-go/v8 v8.8.0 github.com/ethereum/go-ethereum v1.17.0 diff --git a/test/e2e/go.sum b/test/e2e/go.sum index 163b5e5d5..b218f0094 100644 --- a/test/e2e/go.sum +++ b/test/e2e/go.sum @@ -145,8 +145,8 @@ github.com/celestiaorg/go-square/v3 v3.0.2 h1:eSQOgNII8inK9IhiBZ+6GADQeWbRq4HYY7 github.com/celestiaorg/go-square/v3 v3.0.2/go.mod h1:oFReMLsSDMRs82ICFEeFQFCqNvwdsbIM1BzCcb0f7dM= github.com/celestiaorg/nmt v0.24.2 h1:LlpJSPOd6/Lw1Ig6HUhZuqiINHLka/ZSRTBzlNJpchg= github.com/celestiaorg/nmt v0.24.2/go.mod h1:vgLBpWBi8F5KLxTdXSwb7AU4NhiIQ1AQRGa+PzdcLEA= -github.com/celestiaorg/tastora v0.16.0 h1:V4ctGcvVR8thy4ulvrHagrTfdNfuCHOTsCYoKVRQ75U= -github.com/celestiaorg/tastora v0.16.0/go.mod h1:C867PBm6Ne6e/1JlmsRqcLeJ6RHAuMoMRCvwJzV/q8g= +github.com/celestiaorg/tastora v0.16.1-0.20260302131806-2816c7b82bfb h1:rTVChmvo3+SYi0dwd5W2yz2zyuAid8cXVAeROEY5ecs= +github.com/celestiaorg/tastora v0.16.1-0.20260302131806-2816c7b82bfb/go.mod h1:C867PBm6Ne6e/1JlmsRqcLeJ6RHAuMoMRCvwJzV/q8g= github.com/cenkalti/backoff v2.2.1+incompatible h1:tNowT99t7UNflLxfYYSlKYsBpXdEet03Pg2g16Swow4= github.com/cenkalti/backoff v2.2.1+incompatible/go.mod h1:90ReRw6GdpyfrHakVjL/QHaoyV4aDUVVkXQJJJ3NXXM= github.com/cenkalti/backoff/v4 v4.1.1/go.mod h1:scbssz8iZGpm3xbr14ovlUdkxfGXNInqkPWOWmG2CLw=