diff --git a/.gitignore b/.gitignore
index 8a8b7cf..5f18a33 100644
--- a/.gitignore
+++ b/.gitignore
@@ -16,3 +16,9 @@ Rplots.pdf
 /docs
 /doc/
 /Meta/
+
+# Profiling artifacts (examples/profile_writetfl.R writes to tempdir(),
+# but ignore these in case anyone redirects them here)
+*.Rprof
+examples/profvis_*.html
+examples/profile_output/
diff --git a/DESCRIPTION b/DESCRIPTION
index a60b7ec..d4ead32 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -26,10 +26,12 @@ Imports:
     glue,
     rlang
 Suggests:
+    bench,
     flextable,
     formatters,
     ggtibble,
     gt,
+    profvis,
     rtables,
     table1,
     testthat (>= 3.0.0),
diff --git a/R/table_utils.R b/R/table_utils.R
index 6323055..b81b17c 100644
--- a/R/table_utils.R
+++ b/R/table_utils.R
@@ -260,7 +260,11 @@
 # accept a gp argument in all grid versions.
 .measure_max_string_width <- function(strings, gp) {
   if (length(strings) == 0L) return(0)
-  max(vapply(strings, function(s) {
+  # Dedupe up front: real-world callers pass cell-string vectors where the
+  # same value typically appears in many rows (e.g. category labels, NA
+  # strings), so this saves grid round-trips with no behaviour change.
+  uniq <- unique(strings)
+  max(vapply(uniq, function(s) {
     lines <- strsplit(s, "\n", fixed = TRUE)[[1L]]
     max(vapply(lines, function(ln) {
       grob <- grid::textGrob(ln, gp = gp)
diff --git a/R/wrap.R b/R/wrap.R
index 9054861..7fd9acf 100644
--- a/R/wrap.R
+++ b/R/wrap.R
@@ -155,9 +155,18 @@ wrap_breaks_default <- function() {
 # ---------------------------------------------------------------------------
 
 # Width of a string under the active viewport's font context, in inches.
-.measure_text_width_in <- function(s, gp) {
+#
+# `cache`, if supplied, is an environment used as a (string -> width) memo
+# scoped to the caller's lifetime.  The caller is responsible for ensuring
+# every cache entry was measured under the same `gp`.
+.measure_text_width_in <- function(s, gp, cache = NULL) {
   if (!nzchar(s)) return(0)
-  .width_in(grid::grobWidth(grid::textGrob(s, gp = gp)))
+  if (!is.null(cache) && exists(s, envir = cache, inherits = FALSE)) {
+    return(get(s, envir = cache, inherits = FALSE))
+  }
+  w <- .width_in(grid::grobWidth(grid::textGrob(s, gp = gp)))
+  if (!is.null(cache)) assign(s, w, envir = cache)
+  w
 }
 
 #' Wrap text to fit a target width, preserving paragraph breaks.
@@ -182,15 +191,21 @@ wrap_breaks_default <- function() {
   if (is.null(text) || !nzchar(text)) return(text)
   if (is.null(breaks)) breaks <- wrap_breaks_default()
 
+  # Per-call width memo: within one wrap the same gp is used throughout, and
+  # the greedy walk re-measures many overlapping `cand` strings.  Sharing one
+  # cache across paragraphs deduplicates those calls.
+  width_cache <- new.env(hash = TRUE, parent = emptyenv())
+
   paragraphs <- strsplit(text, "\n", fixed = TRUE)[[1L]]
   wrapped    <- vapply(paragraphs, function(para) {
     if (!nzchar(para)) return("")
-    .wrap_paragraph(para, available_w_in, gp, breaks)
+    .wrap_paragraph(para, available_w_in, gp, breaks, width_cache)
   }, character(1L))
   paste(wrapped, collapse = "\n")
 }
 
-.wrap_paragraph <- function(para, available_w_in, gp, breaks) {
+.wrap_paragraph <- function(para, available_w_in, gp, breaks,
+                            width_cache = NULL) {
   tokens <- .tokenize_for_wrap(para, breaks)
   if (length(tokens) == 0L) return("")
 
@@ -203,7 +218,8 @@ wrap_breaks_default <- function() {
       next
     }
     cand <- paste0(current_line, tok$lead, tok$text)
-    if (.measure_text_width_in(cand, gp) <= available_w_in + 1e-6) {
+    if (.measure_text_width_in(cand, gp, width_cache) <=
+        available_w_in + 1e-6) {
       current_line <- cand
     } else {
       lines        <- c(lines, current_line)
@@ -246,6 +262,10 @@ wrap_breaks_default <- function() {
 #' @keywords internal
 .column_min_token_width_in <- function(strings, gp, breaks) {
   if (length(strings) == 0L) return(0)
+  # Single shared cache across the column: tokens like "the", units, and
+  # other short repeats appear in many cells and would otherwise each
+  # incur a fresh textGrob+grobWidth+convertWidth round-trip.
+  cache <- new.env(hash = TRUE, parent = emptyenv())
   max(vapply(strings, function(s) {
     if (!nzchar(s)) return(0)
     paragraphs <- strsplit(s, "\n", fixed = TRUE)[[1L]]
@@ -253,7 +273,7 @@ wrap_breaks_default <- function() {
       tokens <- .tokenize_for_wrap(p, breaks)
       if (length(tokens) == 0L) return(0)
       max(vapply(tokens, function(tok) {
-        .measure_text_width_in(tok$text, gp)
+        .measure_text_width_in(tok$text, gp, cache)
       }, numeric(1L)))
     }, numeric(1L)))
   }, numeric(1L)))
diff --git a/design/DECISIONS.md b/design/DECISIONS.md
index 7468310..f73f8c1 100644
--- a/design/DECISIONS.md
+++ b/design/DECISIONS.md
@@ -1299,3 +1299,105 @@ project owner's stance, no backward-compat constraint is in force.
 Single-page tables (Case A or B) produce identical output under both
 strategies (verified by a regression test).  `wrap_first` is preserved
 as an opt-in for empirical comparison and as an escape hatch.
+
+---
+
+## D-43: Token-width memoization in word-wrap measurement
+
+**Decision:** Add per-call string-width caches inside `.wrap_string()` and
+`.column_min_token_width_in()`, and dedupe inputs in
+`.measure_max_string_width()`.  The optional `cache` argument to
+`.measure_text_width_in()` lets a caller share one memo across many
+measurement calls.
+
+**Context (profiling):** With the wrap module from issue #35 landed, profile
+`tfl_table` / `export_tfl` on representative inputs and ship only
+optimisations that show a real wall-clock win without obscuring the wrap
+algorithms.  Harness lives at `examples/profile_writetfl.R`; a side-by-side
+benchmark lives at `examples/bench_compare.R`.  Both are build-ignored.
+
+Baseline self-time was dominated by `grid::grobWidth` and `grid::textGrob`
+validation paths.  `Rprof(line.profiling = TRUE)` on the 18-demo
+`wrap_demos.R` sweep showed `wrap.R#160` (the `grobWidth(textGrob(s, gp))`
+call inside `.measure_text_width_in()`) accounting for **73.7%** of total
+time, and `wrap.R#255` (per-token measurement inside
+`.column_min_token_width_in()`) accounting for **34.7%**.  Inside
+`.height_balance_widths_impl()` the existing per-(column, width) cache
+already amortises height measurement; the remaining cost was text-width
+re-measurement of repeated tokens and overlapping `cand` substrings inside
+the greedy wrapper.
+
+**Change:**
+- `R/wrap.R` — `.measure_text_width_in()` gains an optional `cache` env
+  parameter.  `.wrap_string()` creates one cache per call and passes it
+  through to `.wrap_paragraph()`.  `.column_min_token_width_in()` creates
+  one cache shared across all strings in the column.
+- `R/table_utils.R` — `.measure_max_string_width()` runs `unique()` on its
+  input vector before measuring.
+
+The cache pattern mirrors the existing `memo` env in
+`measure_row_heights_tbl()` (`R/table_rows.R:38-46`) and the per-(j, width)
+cache in `.height_balance_widths_impl()` (`R/wrap.R:713`).  Each cache is
+scoped to a single function call so lifetimes are obvious and no global
+state leaks.
+
+**Measured improvement** (medians from `examples/bench_compare.R`, 15
+iterations for the core scenarios, 3 for `wrap_demos`):
+
+| Scenario        | Before    | After     | Δ       |
+|-----------------|-----------|-----------|---------|
+| `core_small`    | 231 ms    | 198 ms    | ~14% ↓  |
+| `core_wrap`     | 334 ms    | 225 ms    | ~33% ↓  |
+| `core_paginate` | 583 ms    | 610 ms    | within noise (min-of-mins ~5% ↓) |
+| `figure_multi`  | 342 ms    | 326 ms    | within noise |
+| `wrap_demos`    | 9.88 s    | 3.43 s    | **~65% ↓** |
+
+`wrap_demos` is the broadest signal because it exercises every wrap and
+column-split code path in 18 different configurations.  `core_wrap` is the
+targeted single-scenario probe (clinical fixture with
+`wrap_balance = "height"`).  Both exceed the ≥10% bar by a large margin.
+`core_paginate` runs `tfl_table(iris)` which exercises the Pass-1 natural
+width measurement (where `.measure_max_string_width()` dedup helps) but
+spends most of its time in row drawing; differences fall inside run-to-run
+variance.
+
+**Alternatives considered and rejected:**
+- *Global LRU cache* — confuses lifetimes (when does a cache entry become
+  stale?) and risks leaking state across unrelated calls; the per-call
+  pattern already in the codebase is clearer.
+- *Closure-based `make_width_cache(gp)`* — would bind the `gp` to the cache
+  but requires changing every measurement call site from
+  `.measure_text_width_in(s, gp)` to `measure(s)`.  Equal effect, more
+  surface area than an optional `cache` arg.
+- *Vectorising `.wrap_paragraph()` with width estimates* — the algorithm is
+  intentionally a readable greedy walk; a width-estimate pre-screen would
+  add a separate code path with subtle correctness conditions.  Profile
+  did not justify the cost.
+- *Pre-screening tokens by `nchar` in `.column_min_token_width_in()`* —
+  width is not strictly monotonic in `nchar` once gpar changes are
+  considered, so this requires a lower-bound argument that is easy to get
+  wrong.  Memoization gets the same speedup without changing the algorithm.
+
+**Out of scope / not done:**
+- Touching `.compute_wrapped_widths()`, the water-fill loop, or
+  `.height_balance_widths_impl()`'s search — these had no measurable
+  bottleneck beyond what the existing per-(j, width) cache already handles.
+- Rewriting `drawDetails.tfl_table_grob()` — its fallback branch at
+  `R/table_draw.R:193-209` already short-circuits when the pipeline
+  precomputed `row_heights_in`, which is the normal path.
+
+**Files touched:**
+- `R/wrap.R` — `.measure_text_width_in()`, `.wrap_string()`,
+  `.wrap_paragraph()`, `.column_min_token_width_in()` (+~25 lines)
+- `R/table_utils.R` — `.measure_max_string_width()` (+~5 lines)
+- `examples/profile_writetfl.R` (new) — Rprof + profvis harness
+- `examples/bench_compare.R` (new) — stash-friendly before/after timer
+- `DESCRIPTION` — added `bench`, `profvis` to Suggests
+- `.gitignore` — added `*.Rprof`, `examples/profvis_*.html`,
+  `examples/profile_output/`
+
+**Verification:**
+- Full `devtools::test()` green before and after.
+- `wrap_demos.R` produces identical page counts and PDF byte sizes
+  (visual inspection of `21_*` family files).
+- `examples/bench_compare.R` reproduces the table above.
diff --git a/examples/bench_compare.R b/examples/bench_compare.R
new file mode 100644
index 0000000..e88c218
--- /dev/null
+++ b/examples/bench_compare.R
@@ -0,0 +1,87 @@
+# examples/bench_compare.R
+#
+# Standalone benchmark used to compare two code versions side-by-side.
+# Run with `git stash` flipping the optimisation on/off so both timings come
+# from the same R session settings.
+#
+# Usage:
+#   Rscript examples/bench_compare.R              # all scenarios
+#   Rscript examples/bench_compare.R core_wrap    # one scenario
+
+suppressPackageStartupMessages({
+  devtools::load_all(quiet = TRUE)
+})
+
+args <- commandArgs(trailingOnly = TRUE)
+selected <- if (length(args) > 0L) args else
+  c("core_small", "core_wrap", "core_paginate", "figure_multi", "wrap_demos")
+
+make_clinical_df <- function() {
+  data.frame(
+    ae_term      = rep(paste(rep("Headache mild moderate severe related",
+                                 4), collapse = " "), 10),
+    ae_action    = rep(paste(rep("Drug withdrawn temporarily",
+                                 4), collapse = " "), 10),
+    ae_notes     = rep(paste(rep("Patient continued safely",
+                                 4), collapse = " "), 10),
+    onset_day    = 1:10,
+    duration_day = 11:20,
+    stringsAsFactors = FALSE
+  )
+}
+
+scenarios <- list(
+  core_small = function() {
+    out <- tempfile(fileext = ".pdf")
+    export_tfl(tfl_table(head(mtcars, 20)), file = out)
+    unlink(out)
+  },
+  core_wrap = function() {
+    out <- tempfile(fileext = ".pdf")
+    export_tfl(tfl_table(make_clinical_df(),
+                         col_split_strategy = "balanced",
+                         wrap_balance       = "height"),
+               file = out, pg_width = 6, pg_height = 8.5,
+               min_content_height = grid::unit(1, "inches"))
+    unlink(out)
+  },
+  core_paginate = function() {
+    out <- tempfile(fileext = ".pdf")
+    export_tfl(tfl_table(iris), file = out)
+    unlink(out)
+  },
+  figure_multi = function() {
+    pages <- lapply(seq_len(5L), function(i)
+      list(content = ggplot2::ggplot(mtcars, ggplot2::aes(hp, mpg)) +
+             ggplot2::geom_point(),
+           header_left = sprintf("Figure %d.1", i)))
+    out <- tempfile(fileext = ".pdf")
+    export_tfl(pages, file = out)
+    unlink(out)
+  },
+  wrap_demos = function() {
+    src <- file.path("examples", "wrap_demos.R")
+    env <- new.env(parent = globalenv())
+    sf  <- tempfile()
+    con <- file(sf, open = "wt")
+    sink(con); sink(con, type = "message")
+    on.exit({ sink(type = "message"); sink(); close(con); unlink(sf) }, add = TRUE)
+    sys.source(src, envir = env)
+  }
+)
+
+iter <- list(core_small = 15L, core_wrap = 15L, core_paginate = 15L,
+             figure_multi = 15L, wrap_demos = 3L)
+
+for (name in selected) {
+  fn <- scenarios[[name]]
+  invisible(fn())  # warmup
+  bm <- bench::mark(fn(), iterations = iter[[name]], check = FALSE,
+                    filter_gc = FALSE, memory = FALSE)
+  cat(sprintf("%-15s  min=%-9s  median=%-9s  mean=%-9s  n=%d\n",
+              name,
+              format(bm$min),
+              format(bm$median),
+              format(bm$mean),
+              bm$n_itr))
+}
diff --git a/examples/ggplot_stress.R b/examples/ggplot_stress.R
new file mode 100644
index 0000000..dd37dd1
--- /dev/null
+++ b/examples/ggplot_stress.R
@@ -0,0 +1,57 @@
+# examples/ggplot_stress.R
+#
+# The original crash happened inside the figure_multi Rprof block which was
+# 20 invocations of an export that drew 5 ggplots each = 100 ggplot renders
+# under Rprof sampling.  Phase 1 here is 5x that volume.  Phases 2/3 push the
+# sampling and viewport-churn axes.
+#
+# Per ggplot, internally: ~200-500 grob ops + many push/pop viewport pairs.
+# Phase 1 = 500 renders ~= 100k-250k grob ops in the ggplot rendering path.
+
+suppressPackageStartupMessages({
+  library(ggplot2)
+})
+
+one_export <- function() {
+  pages <- lapply(1:5, function(i) {
+    list(content = ggplot(mtcars, aes(hp, mpg)) + geom_point() +
+                     ggtitle(sprintf("Page %d", i)),
+         header_left = sprintf("Figure %d", i))
+  })
+  out <- tempfile(fileext = ".pdf")
+  writetfl::export_tfl(pages, file = out)
+  unlink(out)
+}
+
+devtools::load_all(quiet = TRUE)
+
+cat("Phase 1: 100 figure-export invocations under Rprof@0.01\n")
+prof <- tempfile(fileext = ".Rprof")
+Rprof(prof, interval = 0.01, line.profiling = TRUE, gc.profiling = TRUE)
+t1 <- system.time({
+  for (k in 1:100) one_export()
+})
+Rprof(NULL)
+print(t1)
+cat("  samples:", nrow(summaryRprof(prof)$by.self), "\n")
+
+cat("\nPhase 2: 200 figure-export invocations under Rprof@0.001 (aggressive)\n")
+prof <- tempfile(fileext = ".Rprof")
+Rprof(prof, interval = 0.001, line.profiling = TRUE, gc.profiling = TRUE)
+t2 <- system.time({
+  for (k in 1:200) one_export()
+})
+Rprof(NULL)
+print(t2)
+cat("  samples:", nrow(summaryRprof(prof)$by.self), "\n")
+
+cat("\nPhase 3: 200 figure-export, no Rprof but force gc every 10 iters\n")
+t3 <- system.time({
+  for (k in 1:200) {
+    one_export()
+    if (k %% 10L == 0L) gc(verbose = FALSE)
+  }
+})
+print(t3)
+
+cat("\nNo segfault.\n")
diff --git a/examples/grob_stress.R b/examples/grob_stress.R
new file mode 100644
index 0000000..261c32e
--- /dev/null
+++ b/examples/grob_stress.R
@@ -0,0 +1,105 @@
+# examples/grob_stress.R
+#
+# Stress test: many rapid grob-width measurements under Rprof, testing the
+# hypothesis that the SIGSEGV in profile_writetfl.R was a memory/race issue
+# in grid triggered by sheer measurement volume rather than by any specific
+# writetfl code path.
+#
+# Each phase pushes a different axis:
+#   1. Many measurements, no device, no Rprof -- baseline
+#   2. Many measurements, pdf device open, no Rprof
+#   3. Many measurements, pdf device open, Rprof at default 0.02 interval
+#   4. Many measurements, pdf device open, Rprof at aggressive 0.001 interval
+#   5. Many measurements, repeated device open/close cycles, Rprof active
+#
+# Volume target: ~50k textGrob+grobWidth calls per phase, roughly an order of
+# magnitude more than profile_writetfl.R's figure_multi block produces.
+
+N <- 50000L
+
+# Vary the strings so grid cannot incidentally short-circuit on identity.
+make_strings <- function(n) {
+  pool <- c(
+    "the", "quick", "brown", "fox", "jumps", "over", "lazy", "dog",
+    "alpha beta gamma", "delta epsilon zeta", "eta theta iota",
+    "1.234", "5.678", "9.012", "NA",
+    "Subject 001", "Subject 002", "Subject 003",
+    "Headache mild", "Drug withdrawn", "Patient continued safely"
+  )
+  sample(pool, n, replace = TRUE)
+}
+
+measure_block <- function(strings, gp) {
+  for (s in strings) {
+    grid::convertWidth(grid::grobWidth(grid::textGrob(s, gp = gp)),
+                       "inches", valueOnly = TRUE)
+  }
+}
+
+gp <- grid::gpar(fontfamily = "", fontface = "plain",
+                 fontsize = 10, lineheight = 1.0)
+strs <- make_strings(N)
+
+cat("Phase 1: ", N, " grob measurements, no device, no Rprof\n", sep = "")
+t1 <- system.time(measure_block(strs, gp))
+print(t1)
+
+cat("\nPhase 2: ", N, " measurements, pdf device open, no Rprof\n", sep = "")
+pdf_file <- tempfile(fileext = ".pdf")
+grDevices::pdf(pdf_file, width = 7, height = 5)
+grid::pushViewport(grid::viewport())
+t2 <- system.time(measure_block(strs, gp))
+grid::popViewport()
+grDevices::dev.off()
+unlink(pdf_file)
+print(t2)
+
+cat("\nPhase 3: ", N, " measurements, pdf device, Rprof @ 0.02s interval\n",
+    sep = "")
+pdf_file <- tempfile(fileext = ".pdf")
+grDevices::pdf(pdf_file, width = 7, height = 5)
+grid::pushViewport(grid::viewport())
+prof <- tempfile(fileext = ".Rprof")
+Rprof(prof, interval = 0.02, line.profiling = TRUE, gc.profiling = TRUE)
+t3 <- system.time(measure_block(strs, gp))
+Rprof(NULL)
+grid::popViewport()
+grDevices::dev.off()
+unlink(pdf_file)
+print(t3)
+cat("  Rprof samples:", nrow(summaryRprof(prof)$by.self), "rows\n")
+
+cat("\nPhase 4: ", N, " measurements, pdf device, Rprof @ 0.001s interval\n",
+    sep = "")
+pdf_file <- tempfile(fileext = ".pdf")
+grDevices::pdf(pdf_file, width = 7, height = 5)
+grid::pushViewport(grid::viewport())
+prof <- tempfile(fileext = ".Rprof")
+Rprof(prof, interval = 0.001, line.profiling = TRUE, gc.profiling = TRUE)
+t4 <- system.time(measure_block(strs, gp))
+Rprof(NULL)
+grid::popViewport()
+grDevices::dev.off()
+unlink(pdf_file)
+print(t4)
+cat("  Rprof samples:", nrow(summaryRprof(prof)$by.self), "rows\n")
+
+cat("\nPhase 5: repeated open/close + measure cycles, Rprof @ 0.001s\n")
+prof <- tempfile(fileext = ".Rprof")
+Rprof(prof, interval = 0.001, line.profiling = TRUE, gc.profiling = TRUE)
+t5 <- system.time({
+  for (cyc in 1:50) {
+    pdf_file <- tempfile(fileext = ".pdf")
+    grDevices::pdf(pdf_file, width = 7, height = 5)
+    grid::pushViewport(grid::viewport())
+    measure_block(strs[seq_len(N / 50)], gp)
+    grid::popViewport()
+    grDevices::dev.off()
+    unlink(pdf_file)
+  }
+})
+Rprof(NULL)
+print(t5)
+cat("  Rprof samples:", nrow(summaryRprof(prof)$by.self), "rows\n")
+
+cat("\nNo segfault.\n")
diff --git a/examples/profile_writetfl.R b/examples/profile_writetfl.R
new file mode 100644
index 0000000..368f018
--- /dev/null
+++ b/examples/profile_writetfl.R
@@ -0,0 +1,277 @@
+# examples/profile_writetfl.R
+#
+# Profiling harness for writetfl.  Drives the package through representative
+# inputs and reports where wall-clock time is spent.
+#
+# Usage (from the worktree root):
+#   "C:/Program Files/R/R-4.5.2/bin/Rscript.exe" examples/profile_writetfl.R
+#   Rscript examples/profile_writetfl.R --scenario core_wrap
+#   Rscript examples/profile_writetfl.R --quick     # core scenarios only
+#
+# Scenarios:
+#   core_small    head(mtcars, 20) -> tfl_table -> 1 page
+#   core_wrap     issue35_clinical_df with wrap_balance="height"
+#                   -> exercises wrap floor, water-fill, height-balance
+#   core_paginate iris (150 rows) -> tfl_table -> multi-page row pagination
+#   figure_multi  5-page ggplot list -> isolates export_tfl_page overhead
+#   wrap_demos    source examples/wrap_demos.R (all 18 demos, aggregate)
+#
+# Output:
+#   - bench::mark distribution per scenario (stdout)
+#   - Top-20 self-time functions per scenario (stdout)
+#   - .Rprof and profvis .html files in {tempdir()}/, paths printed
+#   - results saved to {tempdir()}/profile_results_<timestamp>.rds
+
+suppressPackageStartupMessages({
+  devtools::load_all(quiet = TRUE)
+})
+
+have_bench   <- requireNamespace("bench",   quietly = TRUE)
+have_profvis <- requireNamespace("profvis", quietly = TRUE)
+
+if (!have_bench)   message("[note] 'bench' not installed; using system.time() instead")
+if (!have_profvis) message("[note] 'profvis' not installed; skipping HTML traces")
+
+# ---------------------------------------------------------------------------
+# CLI parsing
+# ---------------------------------------------------------------------------
+args <- commandArgs(trailingOnly = TRUE)
+opt_scenario <- NULL
+opt_quick    <- FALSE
+i <- 1L
+while (i <= length(args)) {
+  a <- args[[i]]
+  if (a == "--scenario" && i < length(args)) {
+    opt_scenario <- args[[i + 1L]]
+    i <- i + 2L
+  } else if (a == "--quick") {
+    opt_quick <- TRUE
+    i <- i + 1L
+  } else {
+    stop("Unknown arg: ", a)
+  }
+}
+
+# ---------------------------------------------------------------------------
+# Scenarios
+# ---------------------------------------------------------------------------
+
+scenario_core_small <- function() {
+  out <- tempfile(fileext = ".pdf")
+  tbl <- tfl_table(head(mtcars, 20))
+  export_tfl(tbl, file = out)
+  out
+}
+
+# Mirror of issue35_clinical_df from examples/wrap_demos.R:546-555.
+# Three wrap-eligible string cols + two numeric cols, 10 rows.  With
+# wrap_balance = "height" we exercise the full height-balance greedy search.
+make_clinical_df <- function() {
+  data.frame(
+    ae_term      = rep(paste(rep("Headache mild moderate severe related",
+                                 4), collapse = " "), 10),
+    ae_action    = rep(paste(rep("Drug withdrawn temporarily",
+                                 4), collapse = " "), 10),
+    ae_notes     = rep(paste(rep("Patient continued safely",
+                                 4), collapse = " "), 10),
+    onset_day    = 1:10,
+    duration_day = 11:20,
+    stringsAsFactors = FALSE
+  )
+}
+
+scenario_core_wrap <- function() {
+  out <- tempfile(fileext = ".pdf")
+  tbl <- tfl_table(make_clinical_df(),
+                   col_split_strategy = "balanced",
+                   wrap_balance       = "height")
+  export_tfl(tbl, file = out,
+             pg_width = 6, pg_height = 8.5,
+             min_content_height = grid::unit(1, "inches"))
+  out
+}
+
+scenario_core_paginate <- function() {
+  out <- tempfile(fileext = ".pdf")
+  tbl <- tfl_table(iris)
+  export_tfl(tbl, file = out)
+  out
+}
+
+scenario_figure_multi <- function() {
+  pages <- lapply(seq_len(5L), function(i) {
+    list(content = ggplot2::ggplot(mtcars, ggplot2::aes(hp, mpg)) +
+                     ggplot2::geom_point() +
+                     ggplot2::ggtitle(sprintf("Page %d", i)),
+         header_left = sprintf("Figure %d.1", i))
+  })
+  out <- tempfile(fileext = ".pdf")
+  export_tfl(pages, file = out)
+  out
+}
+
+scenario_wrap_demos <- function() {
+  # Source wrap_demos.R into an isolated env.  It writes PDFs to a temp
+  # directory and prints progress.  We don't care about the outputs; we
+  # care about the Rprof samples taken while it runs.
+  src <- file.path("examples", "wrap_demos.R")
+  if (!file.exists(src)) stop("wrap_demos.R not found at ", src)
+  env <- new.env(parent = globalenv())
+  sink_file <- tempfile(fileext = ".log")
+  con <- file(sink_file, open = "wt")
+  sink(con); sink(con, type = "message")
+  on.exit({ sink(type = "message"); sink(); close(con) }, add = TRUE)
+  sys.source(src, envir = env)
+  invisible(NULL)
+}
+
+scenarios <- list(
+  core_small    = scenario_core_small,
+  core_wrap     = scenario_core_wrap,
+  core_paginate = scenario_core_paginate,
+  figure_multi  = scenario_figure_multi,
+  wrap_demos    = scenario_wrap_demos
+)
+
+# bench iterations: wrap_demos runs all 18 demos (slow) so iterate once;
+# core scenarios are fast so iterate 5 times to get a usable distribution.
+iterations <- list(
+  core_small    = 5L,
+  core_wrap     = 5L,
+  core_paginate = 5L,
+  figure_multi  = 5L,
+  wrap_demos    = 1L
+)
+
+# ---------------------------------------------------------------------------
+# Scenario selection
+# ---------------------------------------------------------------------------
+if (!is.null(opt_scenario)) {
+  if (!opt_scenario %in% names(scenarios)) {
+    stop("--scenario must be one of: ", paste(names(scenarios), collapse = ", "))
+  }
+  selected <- opt_scenario
+} else if (opt_quick) {
+  selected <- c("core_small", "core_wrap", "core_paginate", "figure_multi")
+} else {
+  selected <- names(scenarios)
+}
+
+# ---------------------------------------------------------------------------
+# Runner: bench + Rprof + profvis for one scenario
+# ---------------------------------------------------------------------------
+run_scenario <- function(name) {
+  fn   <- scenarios[[name]]
+  iter <- iterations[[name]]
+  cat(sprintf("\n================ scenario: %s (%d iterations) ================\n",
+              name, iter))
+
+  # Warm-up: makes timing more representative by paying first-touch costs
+  # (devtools::load_all, grid namespace, etc.) outside the timed window.
+  invisible(fn())
+
+  out <- list(name = name)
+
+  # 1. Wall-clock distribution
+  if (have_bench) {
+    bm <- bench::mark(fn(),
+                      iterations = iter,
+                      check      = FALSE,
+                      filter_gc  = FALSE,
+                      memory     = FALSE)
+    cat("\n-- bench::mark --\n")
+    print(bm[, c("min", "median", "mem_alloc", "n_itr", "n_gc")])
+    out$bench <- bm
+  } else {
+    t <- system.time(for (k in seq_len(iter)) fn())
+    cat("\n-- system.time (total over", iter, "iter) --\n")
+    print(t)
+    out$system_time <- t
+  }
+
+  # 2. Rprof samples.  Save under examples/profile_output/ so files survive
+  # past the Rscript session's tempdir cleanup.
+  stable_dir <- file.path("examples", "profile_output")
+  dir.create(stable_dir, showWarnings = FALSE, recursive = TRUE)
+  prof_file <- file.path(stable_dir,
+                         sprintf("profile_%s_%s.Rprof", name,
+                                 format(Sys.time(), "%Y%m%d_%H%M%S")))
+  # Loop the timed run so Rprof gets enough samples on fast scenarios.
+  # Target ~5 s of profiling per scenario; wrap_demos already gets ~10 s so
+  # skip the repeat there.
+  reps <- if (name == "wrap_demos") 1L else 20L
+  Rprof(prof_file, interval = 0.01, line.profiling = TRUE,
+        gc.profiling = TRUE)
+  for (k in seq_len(reps)) fn()
+  Rprof(NULL)
+  s <- summaryRprof(prof_file, lines = "both")
+  out$rprof_path    <- prof_file
+  out$rprof_summary <- s
+  cat("\n-- top 20 by self.pct (Rprof) --\n")
+  if (nrow(s$by.self) > 0L) {
+    print(utils::head(s$by.self[, c("self.time", "self.pct", "total.time",
+                                    "total.pct")], 20L))
+  } else {
+    cat("(no samples — scenario completed faster than profiling interval)\n")
+  }
+  cat("\n-- top 15 writetfl source lines by self.pct --\n")
+  bs <- s$by.self
+  ix <- grep("^[a-z_]+\\.R#", rownames(bs))
+  if (length(ix) > 0L) {
+    sub <- bs[ix, c("self.time", "self.pct", "total.time", "total.pct"),
+              drop = FALSE]
+    print(utils::head(sub[order(-sub$self.pct), ], 15L))
+  } else {
+    cat("(no source-line samples)\n")
+  }
+  cat("Rprof file:", prof_file, "\n")
+
+  # 3. profvis HTML (optional)
+  if (have_profvis) {
+    html_file <- tempfile(pattern = paste0("profvis_", name, "_"),
+                          fileext = ".html")
+    pv <- profvis::profvis(fn(), interval = 0.01)
+    htmlwidgets::saveWidget(pv, html_file, selfcontained = TRUE)
+    out$profvis_path <- html_file
+    cat("profvis HTML:", html_file, "\n")
+  }
+
+  out
+}
+
+# ---------------------------------------------------------------------------
+# Execute
+# ---------------------------------------------------------------------------
+results <- list()
+for (name in selected) {
+  results[[name]] <- tryCatch(run_scenario(name),
+                              error = function(e) {
+                                cat("[FAIL]", name, ":", conditionMessage(e), "\n")
+                                list(name = name, error = conditionMessage(e))
+                              })
+}
+
+# ---------------------------------------------------------------------------
+# Summary table
+# ---------------------------------------------------------------------------
+cat("\n================ wall-clock summary ================\n")
+for (name in selected) {
+  r <- results[[name]]
+  if (!is.null(r$bench)) {
+    cat(sprintf("  %-15s  median = %s  (n = %d)\n",
+                name,
+                format(r$bench$median),
+                r$bench$n_itr))
+  } else if (!is.null(r$system_time)) {
+    cat(sprintf("  %-15s  elapsed = %.3f s\n",
+                name, r$system_time[["elapsed"]]))
+  } else {
+    cat(sprintf("  %-15s  [error]\n", name))
+  }
+}
+
+# Save raw results for follow-up analysis
+ts  <- format(Sys.time(), "%Y%m%d_%H%M%S")
+rds <- file.path(tempdir(), sprintf("profile_results_%s.rds", ts))
+saveRDS(results, file = rds)
+cat("\nResults saved:", rds, "\n")
diff --git a/examples/segfault_repro_attempt.R b/examples/segfault_repro_attempt.R
new file mode 100644
index 0000000..09f2648
--- /dev/null
+++ b/examples/segfault_repro_attempt.R
@@ -0,0 +1,92 @@
+# examples/segfault_repro_attempt.R
+#
+# **Status: could NOT reproduce the segfault despite multiple stress patterns.**
+#
+# The original crash:
+#   $ Rscript examples/profile_writetfl.R
+#   ... (core_small, core_wrap, core_paginate, figure_multi bench::mark output) ...
+#   /usr/bin/bash: line 1: 671 Segmentation fault   Rscript examples/profile_writetfl.R
+#   exit code 139 (= 128 + 11 = SIGSEGV)
+#
+# The crash landed after figure_multi's bench::mark had printed but before its
+# Rprof block produced any "top 20" output -- so inside:
+#
+#   Rprof(prof_file, interval = 0.01, line.profiling = TRUE, gc.profiling = TRUE)
+#   for (k in seq_len(20L)) figure_scenario()
+#   Rprof(NULL)
+#
+# Reproduction attempts (none triggered the crash):
+#   1. Run examples/profile_writetfl.R end-to-end x3      -> all exit 0
+#   2. Phase 1-3 of this script x5                        -> all exit 0
+#   3. Phase 1-3 with sampling interval reduced to 0.001  -> exit 0
+#   4. examples/grob_stress.R    -> 50,000 textGrob+grobWidth calls per
+#        phase x 5 phases (no device / pdf device / Rprof@0.02 / Rprof@0.001
+#        / 50 device cycles), all exit 0.  Volume hypothesis ruled out at
+#        10x the original scenario's count.
+#   5. examples/ggplot_stress.R  -> 100 + 200 + 200 figure-exports (each
+#        rendering 5 ggplots = up to 1000 internal grob ops apiece) with
+#        Rprof@0.01, Rprof@0.001, and manual gc().  All exit 0.
+#
+# Hypothesised cause: a transient race between Rprof's signal-based sampling
+# and grid's pdf-device teardown.  Each figure_scenario() opens a pdf device,
+# draws 5 ggplots (which push/pop many viewports), and closes the device.
+# Rprof sampling can land mid-teardown when device state is being mutated by
+# grid C code.  A single mis-timed sample, combined with line.profiling
+# walking the call stack into a half-finalised viewport tree, could plausibly
+# produce SIGSEGV without R-level errors.  This is speculative -- without a
+# native-stack crash dump we cannot confirm.
+#
+# What to do if it recurs:
+#   * Set `_R_CRASH_REPORT_=1` (R >= 4.4) before running so the crash writes
+#     /tmp/Rcrash.log with a native stack trace.
+#   * Try `Rprof(interval = 0.02)` (less aggressive sampling) to see if the
+#     crash rate drops.
+#   * Try `Rprof(gc.profiling = FALSE)` to rule out gc-stack-walk + device
+#     teardown interaction.
+#   * Run figure_multi alone (no prior scenarios) under Rprof many times.
+#     If it doesn't crash standalone, the crash depends on cross-scenario
+#     state -- likely a leaked device handle or accumulated grid scratch
+#     state.
+#
+# Until reproducible, no upstream report is actionable.
+
+suppressPackageStartupMessages({
+  devtools::load_all(quiet = TRUE)
+})
+
+figure_scenario <- function() {
+  pages <- lapply(seq_len(5L), function(i)
+    list(content = ggplot2::ggplot(mtcars, ggplot2::aes(hp, mpg)) +
+                     ggplot2::geom_point() +
+                     ggplot2::ggtitle(sprintf("Page %d", i)),
+         header_left = sprintf("Figure %d.1", i)))
+  out <- tempfile(fileext = ".pdf")
+  export_tfl(pages, file = out)
+  unlink(out)
+}
+
+table_scenario <- function() {
+  out <- tempfile(fileext = ".pdf")
+  export_tfl(tfl_table(iris), file = out)
+  unlink(out)
+}
+
+cat("Phase 1: warm-up the session (5 iris tables + 5 figure exports)\n")
+for (i in 1:5)  table_scenario()
+for (i in 1:5)  figure_scenario()
+
+cat("Phase 2: 20-rep Rprof of figure_scenario (mirrors the harness block)\n")
+prof <- tempfile(fileext = ".Rprof")
+Rprof(prof, interval = 0.01, line.profiling = TRUE, gc.profiling = TRUE)
+for (k in seq_len(20L)) figure_scenario()
+Rprof(NULL)
+cat("  ok\n")
+
+cat("Phase 3: aggressive sampling (interval = 0.001) on the same block\n")
+prof <- tempfile(fileext = ".Rprof")
+Rprof(prof, interval = 0.001, line.profiling = TRUE, gc.profiling = TRUE)
+for (k in seq_len(20L)) figure_scenario()
+Rprof(NULL)
+cat("  ok\n")
+
+cat("\nNo segfault.\n")