humanpred · billdenney · May 12, 2026 · May 11, 2026
diff --git a/.gitignore b/.gitignore
@@ -16,3 +16,9 @@ Rplots.pdf
 /docs
 /doc/
 /Meta/
+
+# Profiling artifacts (examples/profile_writetfl.R writes to tempdir(),
+# but ignore these in case anyone redirects them here)
+*.Rprof
+examples/profvis_*.html
+examples/profile_output/
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -26,10 +26,12 @@ Imports:
     glue,
     rlang
 Suggests:
+    bench,
     flextable,
     formatters,
     ggtibble,
     gt,
+    profvis,
     rtables,
     table1,
     testthat (>= 3.0.0),

diff --git a/R/table_utils.R b/R/table_utils.R
@@ -260,7 +260,11 @@
 # accept a gp argument in all grid versions.
 .measure_max_string_width <- function(strings, gp) {
   if (length(strings) == 0L) return(0)
-  max(vapply(strings, function(s) {
+  # Dedupe up front: real-world callers pass cell-string vectors where the
+  # same value typically appears in many rows (e.g. category labels, NA
+  # strings), so this saves grid round-trips with no behaviour change.
+  uniq <- unique(strings)
+  max(vapply(uniq, function(s) {
     lines <- strsplit(s, "\n", fixed = TRUE)[[1L]]
     max(vapply(lines, function(ln) {
       grob <- grid::textGrob(ln, gp = gp)

diff --git a/R/wrap.R b/R/wrap.R
@@ -155,9 +155,18 @@ wrap_breaks_default <- function() {
 # ---------------------------------------------------------------------------
 
 # Width of a string under the active viewport's font context, in inches.
-.measure_text_width_in <- function(s, gp) {
+#
+# `cache`, if supplied, is an environment used as a (string -> width) memo
+# scoped to the caller's lifetime.  The caller is responsible for ensuring
+# every cache entry was measured under the same `gp`.
+.measure_text_width_in <- function(s, gp, cache = NULL) {
   if (!nzchar(s)) return(0)
-  .width_in(grid::grobWidth(grid::textGrob(s, gp = gp)))
+  if (!is.null(cache) && exists(s, envir = cache, inherits = FALSE)) {
+    return(get(s, envir = cache, inherits = FALSE))
+  }
+  w <- .width_in(grid::grobWidth(grid::textGrob(s, gp = gp)))
+  if (!is.null(cache)) assign(s, w, envir = cache)
+  w
 }
 
 #' Wrap text to fit a target width, preserving paragraph breaks.
@@ -182,15 +191,21 @@ wrap_breaks_default <- function() {
   if (is.null(text) || !nzchar(text)) return(text)
   if (is.null(breaks)) breaks <- wrap_breaks_default()
 
+  # Per-call width memo: within one wrap the same gp is used throughout, and
+  # the greedy walk re-measures many overlapping `cand` strings.  Sharing one
+  # cache across paragraphs deduplicates those calls.
+  width_cache <- new.env(hash = TRUE, parent = emptyenv())
+
   paragraphs <- strsplit(text, "\n", fixed = TRUE)[[1L]]
   wrapped    <- vapply(paragraphs, function(para) {
     if (!nzchar(para)) return("")
-    .wrap_paragraph(para, available_w_in, gp, breaks)
+    .wrap_paragraph(para, available_w_in, gp, breaks, width_cache)
   }, character(1L))
   paste(wrapped, collapse = "\n")
 }
 
-.wrap_paragraph <- function(para, available_w_in, gp, breaks) {
+.wrap_paragraph <- function(para, available_w_in, gp, breaks,
+                            width_cache = NULL) {
   tokens <- .tokenize_for_wrap(para, breaks)
   if (length(tokens) == 0L) return("")
 
@@ -203,7 +218,8 @@ wrap_breaks_default <- function() {
       next
     }
     cand <- paste0(current_line, tok$lead, tok$text)
-    if (.measure_text_width_in(cand, gp) <= available_w_in + 1e-6) {
+    if (.measure_text_width_in(cand, gp, width_cache) <=
+        available_w_in + 1e-6) {
       current_line <- cand
     } else {
       lines        <- c(lines, current_line)
@@ -246,14 +262,18 @@ wrap_breaks_default <- function() {
 #' @keywords internal
 .column_min_token_width_in <- function(strings, gp, breaks) {
   if (length(strings) == 0L) return(0)
+  # Single shared cache across the column: tokens like "the", units, and
+  # other short repeats appear in many cells and would otherwise each
+  # incur a fresh textGrob+grobWidth+convertWidth round-trip.
+  cache <- new.env(hash = TRUE, parent = emptyenv())
   max(vapply(strings, function(s) {
     if (!nzchar(s)) return(0)
     paragraphs <- strsplit(s, "\n", fixed = TRUE)[[1L]]
     max(vapply(paragraphs, function(p) {
       tokens <- .tokenize_for_wrap(p, breaks)
       if (length(tokens) == 0L) return(0)
       max(vapply(tokens, function(tok) {
-        .measure_text_width_in(tok$text, gp)
+        .measure_text_width_in(tok$text, gp, cache)
       }, numeric(1L)))
     }, numeric(1L)))
   }, numeric(1L)))

diff --git a/design/DECISIONS.md b/design/DECISIONS.md
@@ -1299,3 +1299,105 @@ project owner's stance, no backward-compat constraint is in force.
 Single-page tables (Case A or B) produce identical output under both
 strategies (verified by a regression test).  `wrap_first` is preserved
 as an opt-in for empirical comparison and as an escape hatch.
+
+---
+
+## D-43: Token-width memoization in word-wrap measurement
+
+**Decision:** Add per-call string-width caches inside `.wrap_string()` and
+`.column_min_token_width_in()`, and dedupe inputs in
+`.measure_max_string_width()`.  The optional `cache` argument to
+`.measure_text_width_in()` lets a caller share one memo across many
+measurement calls.
+
+**Context (profiling):** With the wrap module from issue #35 landed, profile
+`tfl_table` / `export_tfl` on representative inputs and ship only
+optimisations that show a real wall-clock win without obscuring the wrap
+algorithms.  Harness lives at `examples/profile_writetfl.R`; a side-by-side
+benchmark lives at `examples/bench_compare.R`.  Both are build-ignored.
+
+Baseline self-time was dominated by `grid::grobWidth` and `grid::textGrob`
+validation paths.  `Rprof(line.profiling = TRUE)` on the 18-demo
+`wrap_demos.R` sweep showed `wrap.R#160` (the `grobWidth(textGrob(s, gp))`
+call inside `.measure_text_width_in()`) accounting for **73.7%** of total
+time, and `wrap.R#255` (per-token measurement inside
+`.column_min_token_width_in()`) accounting for **34.7%**.  Inside
+`.height_balance_widths_impl()` the existing per-(column, width) cache
+already amortises height measurement; the remaining cost was text-width
+re-measurement of repeated tokens and overlapping `cand` substrings inside
+the greedy wrapper.
+
+**Change:**
+- `R/wrap.R` — `.measure_text_width_in()` gains an optional `cache` env
+  parameter.  `.wrap_string()` creates one cache per call and passes it
+  through to `.wrap_paragraph()`.  `.column_min_token_width_in()` creates
+  one cache shared across all strings in the column.
+- `R/table_utils.R` — `.measure_max_string_width()` runs `unique()` on its
+  input vector before measuring.
+
+The cache pattern mirrors the existing `memo` env in
+`measure_row_heights_tbl()` (`R/table_rows.R:38-46`) and the per-(j, width)
+cache in `.height_balance_widths_impl()` (`R/wrap.R:713`).  Each cache is
+scoped to a single function call so lifetimes are obvious and no global
+state leaks.
+
+**Measured improvement** (medians from `examples/bench_compare.R`, 15
+iterations for the core scenarios, 3 for `wrap_demos`):
+
+| Scenario        | Before    | After     | Δ       |
+|-----------------|-----------|-----------|---------|
+| `core_small`    | 231 ms    | 198 ms    | ~14% ↓  |
+| `core_wrap`     | 334 ms    | 225 ms    | ~33% ↓  |
+| `core_paginate` | 583 ms    | 610 ms    | within noise (min-of-mins ~5% ↓) |
+| `figure_multi`  | 342 ms    | 326 ms    | within noise |
+| `wrap_demos`    | 9.88 s    | 3.43 s    | **~65% ↓** |
+
+`wrap_demos` is the broadest signal because it exercises every wrap and
+column-split code path in 18 different configurations.  `core_wrap` is the
+targeted single-scenario probe (clinical fixture with
+`wrap_balance = "height"`).  Both exceed the ≥10% bar by a large margin.
+`core_paginate` runs `tfl_table(iris)` which exercises the Pass-1 natural
+width measurement (where `.measure_max_string_width()` dedup helps) but
+spends most of its time in row drawing; differences fall inside run-to-run
+variance.
+
+**Alternatives considered and rejected:**
+- *Global LRU cache* — confuses lifetimes (when does a cache entry become
+  stale?) and risks leaking state across unrelated calls; the per-call
+  pattern already in the codebase is clearer.
+- *Closure-based `make_width_cache(gp)`* — would bind the `gp` to the cache
+  but requires changing every measurement call site from
+  `.measure_text_width_in(s, gp)` to `measure(s)`.  Equal effect, more
+  surface area than an optional `cache` arg.
+- *Vectorising `.wrap_paragraph()` with width estimates* — the algorithm is
+  intentionally a readable greedy walk; a width-estimate pre-screen would
+  add a separate code path with subtle correctness conditions.  Profile
+  did not justify the cost.
+- *Pre-screening tokens by `nchar` in `.column_min_token_width_in()`* —
+  width is not strictly monotonic in `nchar` once gpar changes are
+  considered, so this requires a lower-bound argument that is easy to get
+  wrong.  Memoization gets the same speedup without changing the algorithm.
+
+**Out of scope / not done:**
+- Touching `.compute_wrapped_widths()`, the water-fill loop, or
+  `.height_balance_widths_impl()`'s search — these had no measurable
+  bottleneck beyond what the existing per-(j, width) cache already handles.
+- Rewriting `drawDetails.tfl_table_grob()` — its fallback branch at
+  `R/table_draw.R:193-209` already short-circuits when the pipeline
+  precomputed `row_heights_in`, which is the normal path.
+
+**Files touched:**
+- `R/wrap.R` — `.measure_text_width_in()`, `.wrap_string()`,
+  `.wrap_paragraph()`, `.column_min_token_width_in()` (+~25 lines)
+- `R/table_utils.R` — `.measure_max_string_width()` (+~5 lines)
+- `examples/profile_writetfl.R` (new) — Rprof + profvis harness
+- `examples/bench_compare.R` (new) — stash-friendly before/after timer
+- `DESCRIPTION` — added `bench`, `profvis` to Suggests
+- `.gitignore` — added `*.Rprof`, `examples/profvis_*.html`,
+  `examples/profile_output/`
+
+**Verification:**
+- Full `devtools::test()` green before and after.
+- `wrap_demos.R` produces identical page counts and PDF byte sizes
+  (visual inspection of `21_*` family files).
+- `examples/bench_compare.R` reproduces the table above.
diff --git a/examples/bench_compare.R b/examples/bench_compare.R
@@ -0,0 +1,87 @@
+# examples/bench_compare.R
+#
+# Standalone benchmark used to compare two code versions side-by-side.
+# Run with `git stash` flipping the optimisation on/off so both timings come
+# from the same R session settings.
+#
+# Usage:
+#   Rscript examples/bench_compare.R              # all scenarios
+#   Rscript examples/bench_compare.R core_wrap    # one scenario
+
+suppressPackageStartupMessages({
+  devtools::load_all(quiet = TRUE)
+})
+
+args <- commandArgs(trailingOnly = TRUE)
+selected <- if (length(args) > 0L) args else
+  c("core_small", "core_wrap", "core_paginate", "figure_multi", "wrap_demos")
+
+make_clinical_df <- function() {
+  data.frame(
+    ae_term      = rep(paste(rep("Headache mild moderate severe related",
+                                 4), collapse = " "), 10),
+    ae_action    = rep(paste(rep("Drug withdrawn temporarily",
+                                 4), collapse = " "), 10),
+    ae_notes     = rep(paste(rep("Patient continued safely",
+                                 4), collapse = " "), 10),
+    onset_day    = 1:10,
+    duration_day = 11:20,
+    stringsAsFactors = FALSE
+  )
+}
+
+scenarios <- list(
+  core_small = function() {
+    out <- tempfile(fileext = ".pdf")
+    export_tfl(tfl_table(head(mtcars, 20)), file = out)
+    unlink(out)
+  },
+  core_wrap = function() {
+    out <- tempfile(fileext = ".pdf")
+    export_tfl(tfl_table(make_clinical_df(),
+                         col_split_strategy = "balanced",
+                         wrap_balance       = "height"),
+               file = out, pg_width = 6, pg_height = 8.5,
+               min_content_height = grid::unit(1, "inches"))
+    unlink(out)
+  },
+  core_paginate = function() {
+    out <- tempfile(fileext = ".pdf")
+    export_tfl(tfl_table(iris), file = out)
+    unlink(out)
+  },
+  figure_multi = function() {
+    pages <- lapply(seq_len(5L), function(i)
+      list(content = ggplot2::ggplot(mtcars, ggplot2::aes(hp, mpg)) +
+             ggplot2::geom_point(),
+           header_left = sprintf("Figure %d.1", i)))
+    out <- tempfile(fileext = ".pdf")
+    export_tfl(pages, file = out)
+    unlink(out)
+  },
+  wrap_demos = function() {
+    src <- file.path("examples", "wrap_demos.R")
+    env <- new.env(parent = globalenv())
+    sf  <- tempfile()
+    con <- file(sf, open = "wt")
+    sink(con); sink(con, type = "message")
+    on.exit({ sink(type = "message"); sink(); close(con); unlink(sf) }, add = TRUE)
+    sys.source(src, envir = env)
+  }
+)
+
+iter <- list(core_small = 15L, core_wrap = 15L, core_paginate = 15L,
+             figure_multi = 15L, wrap_demos = 3L)
+
+for (name in selected) {
+  fn <- scenarios[[name]]
+  invisible(fn())  # warmup
+  bm <- bench::mark(fn(), iterations = iter[[name]], check = FALSE,
+                    filter_gc = FALSE, memory = FALSE)
+  cat(sprintf("%-15s  min=%-9s  median=%-9s  mean=%-9s  n=%d\n",
+              name,
+              format(bm$min),
+              format(bm$median),
+              format(bm$mean),
+              bm$n_itr))
+}
diff --git a/examples/ggplot_stress.R b/examples/ggplot_stress.R
@@ -0,0 +1,57 @@
+# examples/ggplot_stress.R
+#
+# The original crash happened inside the figure_multi Rprof block which was
+# 20 invocations of an export that drew 5 ggplots each = 100 ggplot renders
+# under Rprof sampling.  Phase 1 here is 5x that volume.  Phases 2/3 push the
+# sampling and viewport-churn axes.
+#
+# Per ggplot, internally: ~200-500 grob ops + many push/pop viewport pairs.
+# Phase 1 = 500 renders ~= 100k-250k grob ops in the ggplot rendering path.
+
+suppressPackageStartupMessages({
+  library(ggplot2)
+})
+
+one_export <- function() {
+  pages <- lapply(1:5, function(i) {
+    list(content = ggplot(mtcars, aes(hp, mpg)) + geom_point() +
+                     ggtitle(sprintf("Page %d", i)),
+         header_left = sprintf("Figure %d", i))
+  })
+  out <- tempfile(fileext = ".pdf")
+  writetfl::export_tfl(pages, file = out)
+  unlink(out)
+}
+
+devtools::load_all(quiet = TRUE)
+
+cat("Phase 1: 100 figure-export invocations under Rprof@0.01\n")
+prof <- tempfile(fileext = ".Rprof")
+Rprof(prof, interval = 0.01, line.profiling = TRUE, gc.profiling = TRUE)
+t1 <- system.time({
+  for (k in 1:100) one_export()
+})
+Rprof(NULL)
+print(t1)
+cat("  samples:", nrow(summaryRprof(prof)$by.self), "\n")
+
+cat("\nPhase 2: 200 figure-export invocations under Rprof@0.001 (aggressive)\n")
+prof <- tempfile(fileext = ".Rprof")
+Rprof(prof, interval = 0.001, line.profiling = TRUE, gc.profiling = TRUE)
+t2 <- system.time({
+  for (k in 1:200) one_export()
+})
+Rprof(NULL)
+print(t2)
+cat("  samples:", nrow(summaryRprof(prof)$by.self), "\n")
+
+cat("\nPhase 3: 200 figure-export, no Rprof but force gc every 10 iters\n")
+t3 <- system.time({
+  for (k in 1:200) {
+    one_export()
+    if (k %% 10L == 0L) gc(verbose = FALSE)
+  }
+})
+print(t3)
+
+cat("\nNo segfault.\n")