diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index 40b031d6e..0ad11ecf0 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -77,6 +77,29 @@ jobs:
           name: spamoor-benchmark-results
           path: test/e2e/benchmark/spamoor_bench.json
 
+  # TODO: wire up to publish results once additional tests are in place.
+  erc20-benchmark:
+    name: ERC20 Throughput Benchmark
+    runs-on: ubuntu-latest
+    timeout-minutes: 20
+    steps:
+      - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+      - name: Set up Go
+        uses: actions/setup-go@7a3fe6cf4cb3a834922a1244abfce67bcef6a0c5 # v6.2.0
+        with:
+          go-version-file: ./go.mod
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@b5ca514318bd6ebac0fb2aedd5d36ec1b5c232a2 # v3.10.0
+      - name: Install just
+        uses: extractions/setup-just@v3
+      - name: Build binaries
+        run: just build-evm build-da
+      - name: Run ERC20 throughput test
+        run: |
+          cd test/e2e && go test -tags evm \
+            -run='^TestSpamoorSuite$/^TestERC20Throughput$' -v -timeout=15m \
+            ./benchmark/ --evm-binary=../../../build/evm
+
   # single job to push all results to gh-pages sequentially, avoiding race conditions
   publish-benchmarks:
     name: Publish Benchmark Results
diff --git a/test/e2e/benchmark/helpers.go b/test/e2e/benchmark/helpers.go
new file mode 100644
index 000000000..83d32aa92
--- /dev/null
+++ b/test/e2e/benchmark/helpers.go
@@ -0,0 +1,420 @@
+//go:build evm
+
+package benchmark
+
+import (
+	"context"
+	"fmt"
+	"math"
+	"math/big"
+	"sort"
+	"testing"
+	"time"
+
+	"github.com/celestiaorg/tastora/framework/docker/evstack/spamoor"
+	"github.com/ethereum/go-ethereum/ethclient"
+	e2e "github.com/evstack/ev-node/test/e2e"
+)
+
+// blockMetrics holds aggregated gas and transaction data across a range of blocks.
+// Only blocks containing at least one transaction are counted in BlockCount,
+// GasPerBlock, and TxPerBlock. All blocks (including empty) are counted in
+// TotalBlockCount and contribute to BlockIntervals.
+type blockMetrics struct {
+	StartBlock      uint64
+	EndBlock        uint64
+	BlockCount      int             // non-empty blocks only
+	TotalBlockCount int             // all blocks in range including empty
+	TotalGasUsed    uint64          // cumulative gas across non-empty blocks
+	TotalTxCount    int             // cumulative tx count across non-empty blocks
+	GasPerBlock     []uint64        // per-block gas for non-empty blocks only
+	TxPerBlock      []int           // per-block tx count for non-empty blocks only
+	BlockIntervals  []time.Duration // time between all consecutive blocks (including empty)
+	FirstBlockTime  time.Time       // timestamp of the first non-empty block
+	LastBlockTime   time.Time       // timestamp of the last non-empty block
+}
+
+// steadyStateDuration returns the wall-clock time between the first and last
+// non-empty blocks. This excludes warm-up (empty blocks before the first tx)
+// and cool-down (empty blocks after the last tx) from throughput calculations.
+func (m *blockMetrics) steadyStateDuration() time.Duration {
+	if m.FirstBlockTime.IsZero() || m.LastBlockTime.IsZero() {
+		return 0
+	}
+	return m.LastBlockTime.Sub(m.FirstBlockTime)
+}
+
+// avgGasPerBlock returns TotalGasUsed / BlockCount, i.e. the mean gas used
+// per non-empty block.
+func (m *blockMetrics) avgGasPerBlock() float64 {
+	if m.BlockCount == 0 {
+		return 0
+	}
+	return float64(m.TotalGasUsed) / float64(m.BlockCount)
+}
+
+// avgTxPerBlock returns TotalTxCount / BlockCount, i.e. the mean transaction
+// count per non-empty block.
+func (m *blockMetrics) avgTxPerBlock() float64 {
+	if m.BlockCount == 0 {
+		return 0
+	}
+	return float64(m.TotalTxCount) / float64(m.BlockCount)
+}
+
+// nonEmptyRatio returns (BlockCount / TotalBlockCount) * 100, i.e. the
+// percentage of blocks in the range that contained at least one transaction.
+func (m *blockMetrics) nonEmptyRatio() float64 {
+	if m.TotalBlockCount == 0 {
+		return 0
+	}
+	return float64(m.BlockCount) / float64(m.TotalBlockCount) * 100
+}
+
+// blockIntervalStats computes percentile statistics over the time gaps between
+// all consecutive blocks (including empty ones). This measures block production
+// cadence and jitter rather than transaction throughput.
+func (m *blockMetrics) blockIntervalStats() (p50, p99, max time.Duration) {
+	if len(m.BlockIntervals) == 0 {
+		return 0, 0, 0
+	}
+	sorted := make([]float64, len(m.BlockIntervals))
+	for i, d := range m.BlockIntervals {
+		sorted[i] = float64(d)
+	}
+	sort.Float64s(sorted)
+	return time.Duration(percentile(sorted, 0.50)),
+		time.Duration(percentile(sorted, 0.99)),
+		time.Duration(sorted[len(sorted)-1])
+}
+
+// gasPerBlockStats returns the 50th and 99th percentile of gas used across
+// non-empty blocks. Shows the distribution of per-block gas consumption.
+func (m *blockMetrics) gasPerBlockStats() (p50, p99 float64) {
+	if len(m.GasPerBlock) == 0 {
+		return 0, 0
+	}
+	sorted := make([]float64, len(m.GasPerBlock))
+	for i, g := range m.GasPerBlock {
+		sorted[i] = float64(g)
+	}
+	sort.Float64s(sorted)
+	return percentile(sorted, 0.50), percentile(sorted, 0.99)
+}
+
+// txPerBlockStats returns the 50th and 99th percentile of transaction counts
+// across non-empty blocks. Shows the distribution of per-block tx throughput.
+func (m *blockMetrics) txPerBlockStats() (p50, p99 float64) {
+	if len(m.TxPerBlock) == 0 {
+		return 0, 0
+	}
+	sorted := make([]float64, len(m.TxPerBlock))
+	for i, c := range m.TxPerBlock {
+		sorted[i] = float64(c)
+	}
+	sort.Float64s(sorted)
+	return percentile(sorted, 0.50), percentile(sorted, 0.99)
+}
+
+// percentile returns the p-th percentile from a pre-sorted float64 slice using
+// linear interpolation between adjacent ranks. For example, p=0.50 returns the
+// median and p=0.99 returns the value below which 99% of observations fall.
+func percentile(sorted []float64, p float64) float64 {
+	if len(sorted) == 0 {
+		return 0
+	}
+	if len(sorted) == 1 {
+		return sorted[0]
+	}
+	rank := p * float64(len(sorted)-1)
+	lower := int(math.Floor(rank))
+	upper := lower + 1
+	if upper >= len(sorted) {
+		return sorted[len(sorted)-1]
+	}
+	frac := rank - float64(lower)
+	return sorted[lower] + frac*(sorted[upper]-sorted[lower])
+}
+
+// mgasPerSec computes totalGasUsed / elapsed / 1e6, giving throughput in
+// millions of gas units per second (MGas/s).
+func mgasPerSec(totalGasUsed uint64, elapsed time.Duration) float64 {
+	if elapsed <= 0 {
+		return 0
+	}
+	return float64(totalGasUsed) / elapsed.Seconds() / 1e6
+}
+
+// waitForSpamoorDone polls spamoor metrics until the total sent count reaches
+// the target or the context is cancelled. It logs the send rate at each poll
+// interval and returns the final sent and failed counts.
+func waitForSpamoorDone(ctx context.Context, log func(string, ...any), api *spamoor.API, targetCount int, pollInterval time.Duration) (sent, failed float64, err error) {
+	ticker := time.NewTicker(pollInterval)
+	defer ticker.Stop()
+
+	start := time.Now()
+	var prevSent float64
+
+	for {
+		select {
+		case <-ctx.Done():
+			return sent, failed, fmt.Errorf("timed out waiting for spamoor to send %d txs (sent %.0f): %w", targetCount, sent, ctx.Err())
+		case <-ticker.C:
+			metrics, mErr := api.GetMetrics()
+			if mErr != nil {
+				log("failed to get spamoor metrics: %v", mErr)
+				continue
+			}
+			sent = sumCounter(metrics["spamoor_transactions_sent_total"])
+			failed = sumCounter(metrics["spamoor_transactions_failed_total"])
+
+			delta := sent - prevSent
+			rate := delta / pollInterval.Seconds()
+			elapsed := time.Since(start).Round(time.Second)
+			log("spamoor progress: %.0f/%.0f sent (%.0f tx/s instant, %.0f tx/s avg, %.0f failed) [%s]",
+				sent, float64(targetCount), rate, sent/time.Since(start).Seconds(), failed, elapsed)
+			prevSent = sent
+
+			if sent >= float64(targetCount) {
+				return sent, failed, nil
+			}
+		}
+	}
+}
+
+// deleteAllSpammers removes any pre-existing spammers from the daemon.
+// This prevents stale spammers (from previous failed runs) being restored
+// from the spamoor SQLite database.
+func deleteAllSpammers(api *spamoor.API) error {
+	existing, err := api.ListSpammers()
+	if err != nil {
+		return fmt.Errorf("list spammers: %w", err)
+	}
+	for _, sp := range existing {
+		if err := api.DeleteSpammer(sp.ID); err != nil {
+			return fmt.Errorf("delete spammer %d: %w", sp.ID, err)
+		}
+	}
+	return nil
+}
+
+// waitForDrain polls the latest block until consecutiveEmpty consecutive empty
+// blocks are observed, indicating the mempool has drained.
+func waitForDrain(ctx context.Context, log func(string, ...any), client *ethclient.Client, consecutiveEmpty int) error {
+	var emptyRun int
+	var lastBlock uint64
+	ticker := time.NewTicker(200 * time.Millisecond)
+	defer ticker.Stop()
+
+	for {
+		select {
+		case <-ctx.Done():
+			return fmt.Errorf("drain timeout after %d consecutive empty blocks (needed %d): %w", emptyRun, consecutiveEmpty, ctx.Err())
+		case <-ticker.C:
+			header, err := client.HeaderByNumber(ctx, nil)
+			if err != nil {
+				continue
+			}
+			num := header.Number.Uint64()
+			if num == lastBlock {
+				continue
+			}
+
+			txCount, err := client.TransactionCount(ctx, header.Hash())
+			if err != nil {
+				continue
+			}
+
+			lastBlock = num
+			if txCount == 0 {
+				emptyRun++
+			} else {
+				emptyRun = 0
+			}
+
+			if emptyRun >= consecutiveEmpty {
+				log("mempool drained: %d consecutive empty blocks at block %d", emptyRun, num)
+				return nil
+			}
+		}
+	}
+}
+
+// blockMetricsSummary holds all derived statistics from a blockMetrics measurement
+// window. Every field is computed from the raw block data by summarize().
+type blockMetricsSummary struct {
+	// SteadyState is the wall-clock duration between the first and last non-empty
+	// blocks, used as the denominator for throughput calculations.
+	SteadyState time.Duration
+	// AchievedMGas is total gas / steady-state seconds / 1e6 (megagas per second).
+	AchievedMGas float64
+	// AchievedTPS is total tx count / steady-state seconds.
+	AchievedTPS float64
+	// IntervalP50, IntervalP99, IntervalMax are percentile and max statistics
+	// over the time between all consecutive blocks (including empty).
+	IntervalP50 time.Duration
+	IntervalP99 time.Duration
+	IntervalMax time.Duration
+	// GasP50, GasP99 are the 50th/99th percentile of gas used per non-empty block.
+	GasP50 float64
+	GasP99 float64
+	// TxP50, TxP99 are the 50th/99th percentile of tx count per non-empty block.
+	TxP50 float64
+	TxP99 float64
+	// AvgGas is the mean gas per non-empty block (TotalGasUsed / BlockCount).
+	AvgGas float64
+	// AvgTx is the mean tx count per non-empty block (TotalTxCount / BlockCount).
+	AvgTx float64
+	// BlocksPerSec is non-empty blocks / steady-state seconds.
+	BlocksPerSec float64
+	// NonEmptyRatio is (non-empty blocks / total blocks) * 100.
+	NonEmptyRatio float64
+}
+
+// summarize computes all derived statistics from the raw block-level data in one
+// pass. It delegates to the individual stat methods (steadyStateDuration,
+// blockIntervalStats, gasPerBlockStats, etc.) and packages the results into a
+// blockMetricsSummary for logging and result-writing.
+func (m *blockMetrics) summarize() *blockMetricsSummary {
+	ss := m.steadyStateDuration()
+	intervalP50, intervalP99, intervalMax := m.blockIntervalStats()
+	gasP50, gasP99 := m.gasPerBlockStats()
+	txP50, txP99 := m.txPerBlockStats()
+
+	var blocksPerSec, achievedTPS float64
+	if ss > 0 {
+		blocksPerSec = float64(m.BlockCount) / ss.Seconds()
+		achievedTPS = float64(m.TotalTxCount) / ss.Seconds()
+	}
+
+	return &blockMetricsSummary{
+		SteadyState:   ss,
+		AchievedMGas:  mgasPerSec(m.TotalGasUsed, ss),
+		AchievedTPS:   achievedTPS,
+		IntervalP50:   intervalP50,
+		IntervalP99:   intervalP99,
+		IntervalMax:   intervalMax,
+		GasP50:        gasP50,
+		GasP99:        gasP99,
+		TxP50:         txP50,
+		TxP99:         txP99,
+		AvgGas:        m.avgGasPerBlock(),
+		AvgTx:         m.avgTxPerBlock(),
+		BlocksPerSec:  blocksPerSec,
+		NonEmptyRatio: m.nonEmptyRatio(),
+	}
+}
+
+// log prints the block range, interval stats, per-block gas/tx stats, and
+// throughput (MGas/s + TPS) to the test log. startBlock/endBlock and
+// totalBlocks/nonEmptyBlocks are passed separately because they live on the
+// raw blockMetrics, not the summary.
+func (s *blockMetricsSummary) log(t testing.TB, startBlock, endBlock uint64, totalBlocks, nonEmptyBlocks int, wallClock time.Duration) {
+	t.Logf("block range: %d-%d (%d total, %d non-empty, %.1f%% non-empty)",
+		startBlock, endBlock, totalBlocks, nonEmptyBlocks, s.NonEmptyRatio)
+	t.Logf("block intervals: p50=%s, p99=%s, max=%s",
+		s.IntervalP50.Round(time.Millisecond), s.IntervalP99.Round(time.Millisecond), s.IntervalMax.Round(time.Millisecond))
+	t.Logf("gas/block (non-empty): avg=%.0f, p50=%.0f, p99=%.0f", s.AvgGas, s.GasP50, s.GasP99)
+	t.Logf("tx/block (non-empty): avg=%.1f, p50=%.0f, p99=%.0f", s.AvgTx, s.TxP50, s.TxP99)
+	t.Logf("throughput: %.2f MGas/s, %.1f TPS over %s steady-state (%s wall clock)",
+		s.AchievedMGas, s.AchievedTPS, s.SteadyState.Round(time.Millisecond), wallClock.Round(time.Millisecond))
+}
+
+// entries returns all summary metrics as result writer entries in the
+// customSmallerIsBetter format expected by github-action-benchmark. Each entry
+// is prefixed with the given label (e.g. "ERC20Throughput") so results from
+// different tests are distinguishable in the same output file.
+func (s *blockMetricsSummary) entries(prefix string) []entry {
+	return []entry{
+		{Name: prefix + " - MGas/s", Unit: "MGas/s", Value: s.AchievedMGas},
+		{Name: prefix + " - TPS", Unit: "tx/s", Value: s.AchievedTPS},
+		{Name: prefix + " - avg gas/block", Unit: "gas", Value: s.AvgGas},
+		{Name: prefix + " - avg tx/block", Unit: "count", Value: s.AvgTx},
+		{Name: prefix + " - blocks/s", Unit: "blocks/s", Value: s.BlocksPerSec},
+		{Name: prefix + " - non-empty block ratio", Unit: "%", Value: s.NonEmptyRatio},
+		{Name: prefix + " - block interval p50", Unit: "ms", Value: float64(s.IntervalP50.Milliseconds())},
+		{Name: prefix + " - block interval p99", Unit: "ms", Value: float64(s.IntervalP99.Milliseconds())},
+		{Name: prefix + " - gas/block p50", Unit: "gas", Value: s.GasP50},
+		{Name: prefix + " - gas/block p99", Unit: "gas", Value: s.GasP99},
+		{Name: prefix + " - tx/block p50", Unit: "count", Value: s.TxP50},
+		{Name: prefix + " - tx/block p99", Unit: "count", Value: s.TxP99},
+	}
+}
+
+// evNodeOverhead computes the fraction of block production time spent outside
+// EVM execution. It looks up the average durations of BlockExecutor.ProduceBlock
+// (the outer span covering the full block lifecycle) and Executor.ExecuteTxs
+// (the inner span covering only EVM tx execution), then returns:
+//
+//	overhead% = (avgProduce - avgExecute) / avgProduce * 100
+//
+// This captures time spent on sequencing, DA submission, header construction,
+// and other ev-node orchestration work. Returns false if either span is missing.
+func evNodeOverhead(spans []e2e.TraceSpan) (float64, bool) {
+	stats := e2e.AggregateSpanStats(spans)
+	produce, ok := stats["BlockExecutor.ProduceBlock"]
+	if !ok {
+		return 0, false
+	}
+	execute, ok := stats["Executor.ExecuteTxs"]
+	if !ok {
+		return 0, false
+	}
+	produceAvg := float64(produce.Total.Microseconds()) / float64(produce.Count)
+	executeAvg := float64(execute.Total.Microseconds()) / float64(execute.Count)
+	if produceAvg <= 0 {
+		return 0, false
+	}
+	return (produceAvg - executeAvg) / produceAvg * 100, true
+}
+
+// collectBlockMetrics iterates all headers in [startBlock, endBlock] to collect
+// gas and transaction metrics. Empty blocks are skipped for gas/tx aggregation
+// but included in block interval tracking.
+func collectBlockMetrics(ctx context.Context, client *ethclient.Client, startBlock, endBlock uint64) (*blockMetrics, error) {
+	if endBlock < startBlock {
+		return nil, fmt.Errorf("endBlock %d < startBlock %d", endBlock, startBlock)
+	}
+
+	m := &blockMetrics{StartBlock: startBlock, EndBlock: endBlock}
+
+	var prevBlockTime time.Time
+	for n := startBlock; n <= endBlock; n++ {
+		header, err := client.HeaderByNumber(ctx, new(big.Int).SetUint64(n))
+		if err != nil {
+			return nil, fmt.Errorf("failed to fetch header %d: %w", n, err)
+		}
+
+		blockTime := time.Unix(int64(header.Time), 0)
+		m.TotalBlockCount++
+
+		// track intervals between all consecutive blocks
+		if !prevBlockTime.IsZero() {
+			m.BlockIntervals = append(m.BlockIntervals, blockTime.Sub(prevBlockTime))
+		}
+		prevBlockTime = blockTime
+
+		txCount, err := client.TransactionCount(ctx, header.Hash())
+		if err != nil {
+			return nil, fmt.Errorf("failed to fetch tx count for block %d: %w", n, err)
+		}
+
+		if txCount == 0 {
+			continue
+		}
+
+		// non-empty block: aggregate gas and tx metrics
+		if m.BlockCount == 0 {
+			m.FirstBlockTime = blockTime
+		}
+		m.LastBlockTime = blockTime
+
+		m.BlockCount++
+		m.TotalGasUsed += header.GasUsed
+		m.TotalTxCount += int(txCount)
+		m.GasPerBlock = append(m.GasPerBlock, header.GasUsed)
+		m.TxPerBlock = append(m.TxPerBlock, int(txCount))
+	}
+
+	return m, nil
+}
diff --git a/test/e2e/benchmark/output.go b/test/e2e/benchmark/output.go
index 39ccbe6d6..06c60e764 100644
--- a/test/e2e/benchmark/output.go
+++ b/test/e2e/benchmark/output.go
@@ -61,6 +61,11 @@ func (w *resultWriter) addEntry(e entry) {
 	w.entries = append(w.entries, e)
 }
 
+// addEntries appends multiple entries to the results.
+func (w *resultWriter) addEntries(entries []entry) {
+	w.entries = append(w.entries, entries...)
+}
+
 // flush writes accumulated entries to the path in BENCH_JSON_OUTPUT.
 // It is a no-op when the env var is unset or no entries were added.
 func (w *resultWriter) flush() {
diff --git a/test/e2e/benchmark/spamoor_erc20_test.go b/test/e2e/benchmark/spamoor_erc20_test.go
new file mode 100644
index 000000000..d0d079355
--- /dev/null
+++ b/test/e2e/benchmark/spamoor_erc20_test.go
@@ -0,0 +1,107 @@
+//go:build evm
+
+package benchmark
+
+import (
+	"context"
+	"fmt"
+	"time"
+
+	"github.com/celestiaorg/tastora/framework/docker/evstack/spamoor"
+)
+
+// TestERC20Throughput measures ERC-20 token transfer throughput in isolation.
+// Each tx is ~65K gas (contract call with storage reads/writes)
+//
+// Primary metric: achieved MGas/s.
+// Diagnostic metrics: per-span latency breakdown, gas/block, tx/block.
+func (s *SpamoorSuite) TestERC20Throughput() {
+	const (
+		numSpammers     = 2
+		countPerSpammer = 50000
+		totalCount      = numSpammers * countPerSpammer
+		serviceName     = "ev-node-erc20"
+		waitTimeout     = 5 * time.Minute
+	)
+
+	t := s.T()
+	ctx := t.Context()
+	w := newResultWriter(t, "ERC20Throughput")
+	defer w.flush()
+
+	e := s.setupEnv(config{
+		serviceName: serviceName,
+	})
+
+	erc20Config := map[string]any{
+		"throughput":      50, // 50 tx per 100ms slot = 500 tx/s per spammer
+		"total_count":     countPerSpammer,
+		"max_pending":     50000,
+		"max_wallets":     200,
+		"base_fee":        20,
+		"tip_fee":         3,
+		"refill_amount":   "5000000000000000000",
+		"refill_balance":  "2000000000000000000",
+		"refill_interval": 600,
+	}
+
+	// clear any stale spammers restored from the persistent spamoor database
+	s.Require().NoError(deleteAllSpammers(e.spamoorAPI), "failed to delete stale spammers")
+
+	// launch all spammers before recording startBlock so warm-up
+	// (contract deploy + wallet funding) is excluded from the measurement window.
+	for i := range numSpammers {
+		name := fmt.Sprintf("bench-erc20-%d", i)
+		id, err := e.spamoorAPI.CreateSpammer(name, spamoor.ScenarioERC20TX, erc20Config, true)
+		s.Require().NoError(err, "failed to create spammer %s", name)
+		t.Cleanup(func() { _ = e.spamoorAPI.DeleteSpammer(id) })
+	}
+
+	// record the starting block after all spammers are launched so the
+	// measurement window excludes the warm-up period.
+	startHeader, err := e.ethClient.HeaderByNumber(ctx, nil)
+	s.Require().NoError(err, "failed to get start block header")
+	startBlock := startHeader.Number.Uint64()
+	loadStart := time.Now()
+
+	// wait for spamoor to finish sending all transactions
+	waitCtx, cancel := context.WithTimeout(ctx, waitTimeout)
+	defer cancel()
+	sent, failed, err := waitForSpamoorDone(waitCtx, t.Logf, e.spamoorAPI, totalCount, 2*time.Second)
+	s.Require().NoError(err, "spamoor did not finish in time")
+
+	// wait for pending txs to drain: once we see several consecutive empty
+	// blocks, the mempool is drained and we can stop.
+	drainCtx, drainCancel := context.WithTimeout(ctx, 30*time.Second)
+	defer drainCancel()
+	if err := waitForDrain(drainCtx, t.Logf, e.ethClient, 10); err != nil {
+		t.Logf("warning: %v", err)
+	}
+	wallClock := time.Since(loadStart)
+
+	endHeader, err := e.ethClient.HeaderByNumber(ctx, nil)
+	s.Require().NoError(err, "failed to get end block header")
+	endBlock := endHeader.Number.Uint64()
+
+	// collect block-level gas/tx metrics
+	bm, err := collectBlockMetrics(ctx, e.ethClient, startBlock, endBlock)
+	s.Require().NoError(err, "failed to collect block metrics")
+
+	summary := bm.summarize()
+	s.Require().Greater(summary.SteadyState, time.Duration(0), "expected non-zero steady-state duration")
+	summary.log(t, startBlock, endBlock, bm.TotalBlockCount, bm.BlockCount, wallClock)
+
+	// collect and report traces
+	traces := s.collectTraces(e, serviceName)
+
+	if overhead, ok := evNodeOverhead(traces.evNode); ok {
+		t.Logf("ev-node overhead: %.1f%%", overhead)
+		w.addEntry(entry{Name: "ERC20Throughput - ev-node overhead", Unit: "%", Value: overhead})
+	}
+
+	w.addEntries(summary.entries("ERC20Throughput"))
+	w.addSpans(traces.allSpans())
+
+	s.Require().Greater(sent, float64(0), "at least one transaction should have been sent")
+	s.Require().Zero(failed, "no transactions should have failed")
+}
diff --git a/test/e2e/benchmark/spamoor_smoke_test.go b/test/e2e/benchmark/spamoor_smoke_test.go
index fa6258373..d09057462 100644
--- a/test/e2e/benchmark/spamoor_smoke_test.go
+++ b/test/e2e/benchmark/spamoor_smoke_test.go
@@ -3,11 +3,9 @@
 package benchmark
 
 import (
-	"os"
 	"time"
 
 	"github.com/celestiaorg/tastora/framework/docker/evstack/spamoor"
-	e2e "github.com/evstack/ev-node/test/e2e"
 )
 
 // TestSpamoorSmoke spins up reth + sequencer and a Spamoor node, starts a few
@@ -18,17 +16,13 @@ func (s *SpamoorSuite) TestSpamoorSmoke() {
 	w := newResultWriter(t, "SpamoorSmoke")
 	defer w.flush()
 
-	// TODO: temporary hardcoded tag, will be replaced with a proper release tag
-	rethTag := os.Getenv("EV_RETH_TAG")
-	if rethTag == "" {
-		rethTag = "pr-140"
-	}
 	e := s.setupEnv(config{
-		rethTag:     rethTag,
 		serviceName: "ev-node-smoke",
 	})
 	api := e.spamoorAPI
 
+	s.Require().NoError(deleteAllSpammers(api), "failed to delete stale spammers")
+
 	eoatx := map[string]any{
 		"throughput":      100,
 		"total_count":     3000,
@@ -82,38 +76,8 @@ func (s *SpamoorSuite) TestSpamoorSmoke() {
 	fail := sumCounter(metrics["spamoor_transactions_failed_total"])
 
 	// collect traces
-	evNodeSpans := s.collectServiceTraces(e, "ev-node-smoke")
-	evRethSpans := s.collectServiceTraces(e, "ev-reth")
-	e2e.PrintTraceReport(t, "ev-node-smoke", evNodeSpans)
-	e2e.PrintTraceReport(t, "ev-reth", evRethSpans)
-
-	w.addSpans(append(evNodeSpans, evRethSpans...))
-
-	// assert expected ev-node span names
-	assertSpanNames(t, evNodeSpans, []string{
-		"BlockExecutor.ProduceBlock",
-		"BlockExecutor.ApplyBlock",
-		"BlockExecutor.CreateBlock",
-		"BlockExecutor.RetrieveBatch",
-		"Executor.ExecuteTxs",
-		"Executor.SetFinal",
-		"Engine.ForkchoiceUpdated",
-		"Engine.NewPayload",
-		"Engine.GetPayload",
-		"Eth.GetBlockByNumber",
-		"Sequencer.GetNextBatch",
-		"DASubmitter.SubmitHeaders",
-		"DASubmitter.SubmitData",
-		"DA.Submit",
-	}, "ev-node-smoke")
-
-	// assert expected ev-reth span names
-	assertSpanNames(t, evRethSpans, []string{
-		"build_payload",
-		"execute_tx",
-		"try_build",
-		"validate_transaction",
-	}, "ev-reth")
+	traces := s.collectTraces(e, "ev-node-smoke")
+	w.addSpans(traces.allSpans())
 
 	s.Require().Greater(sent, float64(0), "at least one transaction should have been sent")
 	s.Require().Zero(fail, "no transactions should have failed")
diff --git a/test/e2e/benchmark/suite_test.go b/test/e2e/benchmark/suite_test.go
index 34c0bc57a..bb9993c58 100644
--- a/test/e2e/benchmark/suite_test.go
+++ b/test/e2e/benchmark/suite_test.go
@@ -4,6 +4,7 @@ package benchmark
 
 import (
 	"context"
+	"os"
 	"path/filepath"
 	"testing"
 	"time"
@@ -48,10 +49,19 @@ type env struct {
 
 // config parameterizes the per-test environment setup.
 type config struct {
-	rethTag     string
 	serviceName string
 }
 
+// TODO: temporary hardcoded tag, will be replaced with a proper release tag
+const defaultRethTag = "pr-140"
+
+func rethTag() string {
+	if tag := os.Getenv("EV_RETH_TAG"); tag != "" {
+		return tag
+	}
+	return defaultRethTag
+}
+
 // setupEnv creates a Jaeger + reth + sequencer + Spamoor environment for
 // a single test. Each call spins up isolated infrastructure so tests
 // can't interfere with each other.
@@ -70,15 +80,21 @@ func (s *SpamoorSuite) setupEnv(cfg config) *env {
 	// reth + local DA with OTLP tracing to Jaeger
 	evmEnv := e2e.SetupCommonEVMEnv(t, sut, s.dockerCli, s.networkID,
 		e2e.WithRethOpts(func(b *reth.NodeBuilder) {
-			b.WithTag(cfg.rethTag).WithEnv(
-				// ev-reth reads OTEL_EXPORTER_OTLP_ENDPOINT and passes it directly
-				// to with_endpoint(). opentelemetry-otlp v0.31 HTTP exporter does
-				// not auto-append /v1/traces, so the full path is required.
-				"OTEL_EXPORTER_OTLP_ENDPOINT="+jg.Internal.IngestHTTPEndpoint()+"/v1/traces",
-				"OTEL_EXPORTER_OTLP_PROTOCOL=http",
-				"RUST_LOG=debug",
-				"OTEL_SDK_DISABLED=false",
-			)
+			b.WithTag(rethTag()).
+				// increase values to facilitate spamoor.
+				WithAdditionalStartArgs(
+					"--rpc.max-connections", "5000",
+					"--rpc.max-tracing-requests", "1000",
+				).
+				WithEnv(
+					// ev-reth reads OTEL_EXPORTER_OTLP_ENDPOINT and passes it directly
+					// to with_endpoint(). opentelemetry-otlp v0.31 HTTP exporter does
+					// not auto-append /v1/traces, so the full path is required.
+					"OTEL_EXPORTER_OTLP_ENDPOINT="+jg.Internal.IngestHTTPEndpoint()+"/v1/traces",
+					"OTEL_EXPORTER_OTLP_PROTOCOL=http",
+					"RUST_LOG=debug",
+					"OTEL_SDK_DISABLED=false",
+				)
 		}),
 	)
 
@@ -88,7 +104,8 @@ func (s *SpamoorSuite) setupEnv(cfg config) *env {
 	e2e.SetupSequencerNode(t, sut, sequencerHome, evmEnv.SequencerJWT, evmEnv.GenesisHash, evmEnv.Endpoints,
 		"--evnode.instrumentation.tracing=true",
 		"--evnode.instrumentation.tracing_endpoint", otlpHTTP,
-		"--evnode.instrumentation.tracing_sample_rate", "1.0",
+		// TODO: setting this to 1 produced too many spans for the local Jaeger deployment alongside everything else.
+		"--evnode.instrumentation.tracing_sample_rate", "0.1",
 		"--evnode.instrumentation.tracing_service_name", cfg.serviceName,
 	)
 	t.Log("sequencer node is up")
@@ -108,7 +125,8 @@ func (s *SpamoorSuite) setupEnv(cfg config) *env {
 		WithDockerNetworkID(evmEnv.RethNode.NetworkID).
 		WithLogger(evmEnv.RethNode.Logger).
 		WithRPCHosts(internalRPC).
-		WithPrivateKey(e2e.TestPrivateKey)
+		WithPrivateKey(e2e.TestPrivateKey).
+		WithAdditionalStartArgs("--slot-duration", "100ms", "--startup-delay", "0")
 
 	spNode, err := spBuilder.Build(ctx)
 	s.Require().NoError(err, "failed to build spamoor node")
@@ -129,6 +147,32 @@ func (s *SpamoorSuite) setupEnv(cfg config) *env {
 	}
 }
 
+// traceResult holds the collected spans from ev-node and (optionally) ev-reth.
+type traceResult struct {
+	evNode []e2e.TraceSpan
+	evReth []e2e.TraceSpan
+}
+
+// allSpans returns ev-node and ev-reth spans concatenated.
+func (tr *traceResult) allSpans() []e2e.TraceSpan {
+	return append(tr.evNode, tr.evReth...)
+}
+
+// collectTraces fetches ev-node traces (required) and ev-reth traces (optional)
+// from Jaeger, then prints reports for both.
+func (s *SpamoorSuite) collectTraces(e *env, serviceName string) *traceResult {
+	t := s.T()
+	tr := &traceResult{
+		evNode: s.collectServiceTraces(e, serviceName),
+		evReth: s.tryCollectServiceTraces(e, "ev-reth"),
+	}
+	e2e.PrintTraceReport(t, serviceName, tr.evNode)
+	if len(tr.evReth) > 0 {
+		e2e.PrintTraceReport(t, "ev-reth", tr.evReth)
+	}
+	return tr
+}
+
 // collectServiceTraces fetches traces from Jaeger for the given service and returns the spans.
 func (s *SpamoorSuite) collectServiceTraces(e *env, serviceName string) []e2e.TraceSpan {
 	ctx, cancel := context.WithTimeout(s.T().Context(), 3*time.Minute)
@@ -143,3 +187,27 @@ func (s *SpamoorSuite) collectServiceTraces(e *env, serviceName string) []e2e.Tr
 
 	return toTraceSpans(extractSpansFromTraces(traces))
 }
+
+// tryCollectServiceTraces fetches traces from Jaeger for the given service,
+// returning nil instead of failing the test if traces are unavailable.
+func (s *SpamoorSuite) tryCollectServiceTraces(e *env, serviceName string) []e2e.TraceSpan {
+	t := s.T()
+	ctx, cancel := context.WithTimeout(t.Context(), 3*time.Minute)
+	defer cancel()
+
+	ok, err := e.jaeger.External.WaitForTraces(ctx, serviceName, 1, 2*time.Second)
+	if err != nil || !ok {
+		t.Logf("warning: could not collect %s traces (err=%v, ok=%v)", serviceName, err, ok)
+		return nil
+	}
+
+	traces, err := e.jaeger.External.Traces(ctx, serviceName, 10000)
+	if err != nil {
+		t.Logf("warning: failed to fetch %s traces: %v", serviceName, err)
+		return nil
+	}
+
+	spans := toTraceSpans(extractSpansFromTraces(traces))
+	t.Logf("collected %d %s spans", len(spans), serviceName)
+	return spans
+}
diff --git a/test/e2e/benchmark/traces.go b/test/e2e/benchmark/traces.go
index 459dd16a1..c5269a199 100644
--- a/test/e2e/benchmark/traces.go
+++ b/test/e2e/benchmark/traces.go
@@ -3,11 +3,9 @@
 package benchmark
 
 import (
-	"testing"
 	"time"
 
 	e2e "github.com/evstack/ev-node/test/e2e"
-	"github.com/stretchr/testify/require"
 )
 
 // jaegerSpan holds the fields we extract from Jaeger's untyped JSON response.
@@ -16,7 +14,7 @@ type jaegerSpan struct {
 	duration      float64 // microseconds
 }
 
-func (j jaegerSpan) SpanName() string           { return j.operationName }
+func (j jaegerSpan) SpanName() string            { return j.operationName }
 func (j jaegerSpan) SpanDuration() time.Duration { return time.Duration(j.duration) * time.Microsecond }
 
 // extractSpansFromTraces walks Jaeger's []any response and pulls out span operation names and durations.
@@ -53,15 +51,3 @@ func toTraceSpans(spans []jaegerSpan) []e2e.TraceSpan {
 	}
 	return out
 }
-
-// assertSpanNames verifies that all expected span names appear in the trace data.
-func assertSpanNames(t testing.TB, spans []e2e.TraceSpan, expected []string, label string) {
-	t.Helper()
-	opNames := make(map[string]struct{}, len(spans))
-	for _, span := range spans {
-		opNames[span.SpanName()] = struct{}{}
-	}
-	for _, name := range expected {
-		require.Contains(t, opNames, name, "expected span %q not found in %s traces", name, label)
-	}
-}
diff --git a/test/e2e/go.mod b/test/e2e/go.mod
index fb254230c..a65841be8 100644
--- a/test/e2e/go.mod
+++ b/test/e2e/go.mod
@@ -5,7 +5,7 @@ go 1.25.6
 require (
 	cosmossdk.io/math v1.5.3
 	github.com/celestiaorg/go-square/v3 v3.0.2
-	github.com/celestiaorg/tastora v0.16.0
+	github.com/celestiaorg/tastora v0.16.1-0.20260302131806-2816c7b82bfb
 	github.com/cosmos/cosmos-sdk v0.53.6
 	github.com/cosmos/ibc-go/v8 v8.8.0
 	github.com/ethereum/go-ethereum v1.17.0
diff --git a/test/e2e/go.sum b/test/e2e/go.sum
index 163b5e5d5..b218f0094 100644
--- a/test/e2e/go.sum
+++ b/test/e2e/go.sum
@@ -145,8 +145,8 @@ github.com/celestiaorg/go-square/v3 v3.0.2 h1:eSQOgNII8inK9IhiBZ+6GADQeWbRq4HYY7
 github.com/celestiaorg/go-square/v3 v3.0.2/go.mod h1:oFReMLsSDMRs82ICFEeFQFCqNvwdsbIM1BzCcb0f7dM=
 github.com/celestiaorg/nmt v0.24.2 h1:LlpJSPOd6/Lw1Ig6HUhZuqiINHLka/ZSRTBzlNJpchg=
 github.com/celestiaorg/nmt v0.24.2/go.mod h1:vgLBpWBi8F5KLxTdXSwb7AU4NhiIQ1AQRGa+PzdcLEA=
-github.com/celestiaorg/tastora v0.16.0 h1:V4ctGcvVR8thy4ulvrHagrTfdNfuCHOTsCYoKVRQ75U=
-github.com/celestiaorg/tastora v0.16.0/go.mod h1:C867PBm6Ne6e/1JlmsRqcLeJ6RHAuMoMRCvwJzV/q8g=
+github.com/celestiaorg/tastora v0.16.1-0.20260302131806-2816c7b82bfb h1:rTVChmvo3+SYi0dwd5W2yz2zyuAid8cXVAeROEY5ecs=
+github.com/celestiaorg/tastora v0.16.1-0.20260302131806-2816c7b82bfb/go.mod h1:C867PBm6Ne6e/1JlmsRqcLeJ6RHAuMoMRCvwJzV/q8g=
 github.com/cenkalti/backoff v2.2.1+incompatible h1:tNowT99t7UNflLxfYYSlKYsBpXdEet03Pg2g16Swow4=
 github.com/cenkalti/backoff v2.2.1+incompatible/go.mod h1:90ReRw6GdpyfrHakVjL/QHaoyV4aDUVVkXQJJJ3NXXM=
 github.com/cenkalti/backoff/v4 v4.1.1/go.mod h1:scbssz8iZGpm3xbr14ovlUdkxfGXNInqkPWOWmG2CLw=