Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -16,3 +16,9 @@ Rplots.pdf
/docs
/doc/
/Meta/

# Profiling artifacts (examples/profile_writetfl.R writes to tempdir(),
# but ignore these in case anyone redirects them here)
*.Rprof
examples/profvis_*.html
examples/profile_output/
2 changes: 2 additions & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,12 @@ Imports:
glue,
rlang
Suggests:
bench,
flextable,
formatters,
ggtibble,
gt,
profvis,
rtables,
table1,
testthat (>= 3.0.0),
Expand Down
6 changes: 5 additions & 1 deletion R/table_utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,11 @@
# accept a gp argument in all grid versions.
.measure_max_string_width <- function(strings, gp) {
if (length(strings) == 0L) return(0)
max(vapply(strings, function(s) {
# Dedupe up front: real-world callers pass cell-string vectors where the
# same value typically appears in many rows (e.g. category labels, NA
# strings), so this saves grid round-trips with no behaviour change.
uniq <- unique(strings)
max(vapply(uniq, function(s) {
lines <- strsplit(s, "\n", fixed = TRUE)[[1L]]
max(vapply(lines, function(ln) {
grob <- grid::textGrob(ln, gp = gp)
Expand Down
32 changes: 26 additions & 6 deletions R/wrap.R
Original file line number Diff line number Diff line change
Expand Up @@ -155,9 +155,18 @@ wrap_breaks_default <- function() {
# ---------------------------------------------------------------------------

# Width of a string under the active viewport's font context, in inches.
.measure_text_width_in <- function(s, gp) {
#
# `cache`, if supplied, is an environment used as a (string -> width) memo
# scoped to the caller's lifetime. The caller is responsible for ensuring
# every cache entry was measured under the same `gp`.
.measure_text_width_in <- function(s, gp, cache = NULL) {
if (!nzchar(s)) return(0)
.width_in(grid::grobWidth(grid::textGrob(s, gp = gp)))
if (!is.null(cache) && exists(s, envir = cache, inherits = FALSE)) {
return(get(s, envir = cache, inherits = FALSE))
}
w <- .width_in(grid::grobWidth(grid::textGrob(s, gp = gp)))
if (!is.null(cache)) assign(s, w, envir = cache)
w
}

#' Wrap text to fit a target width, preserving paragraph breaks.
Expand All @@ -182,15 +191,21 @@ wrap_breaks_default <- function() {
if (is.null(text) || !nzchar(text)) return(text)
if (is.null(breaks)) breaks <- wrap_breaks_default()

# Per-call width memo: within one wrap the same gp is used throughout, and
# the greedy walk re-measures many overlapping `cand` strings. Sharing one
# cache across paragraphs deduplicates those calls.
width_cache <- new.env(hash = TRUE, parent = emptyenv())

paragraphs <- strsplit(text, "\n", fixed = TRUE)[[1L]]
wrapped <- vapply(paragraphs, function(para) {
if (!nzchar(para)) return("")
.wrap_paragraph(para, available_w_in, gp, breaks)
.wrap_paragraph(para, available_w_in, gp, breaks, width_cache)
}, character(1L))
paste(wrapped, collapse = "\n")
}

.wrap_paragraph <- function(para, available_w_in, gp, breaks) {
.wrap_paragraph <- function(para, available_w_in, gp, breaks,
width_cache = NULL) {
tokens <- .tokenize_for_wrap(para, breaks)
if (length(tokens) == 0L) return("")

Expand All @@ -203,7 +218,8 @@ wrap_breaks_default <- function() {
next
}
cand <- paste0(current_line, tok$lead, tok$text)
if (.measure_text_width_in(cand, gp) <= available_w_in + 1e-6) {
if (.measure_text_width_in(cand, gp, width_cache) <=
available_w_in + 1e-6) {
current_line <- cand
} else {
lines <- c(lines, current_line)
Expand Down Expand Up @@ -246,14 +262,18 @@ wrap_breaks_default <- function() {
#' @keywords internal
.column_min_token_width_in <- function(strings, gp, breaks) {
if (length(strings) == 0L) return(0)
# Single shared cache across the column: tokens like "the", units, and
# other short repeats appear in many cells and would otherwise each
# incur a fresh textGrob+grobWidth+convertWidth round-trip.
cache <- new.env(hash = TRUE, parent = emptyenv())
max(vapply(strings, function(s) {
if (!nzchar(s)) return(0)
paragraphs <- strsplit(s, "\n", fixed = TRUE)[[1L]]
max(vapply(paragraphs, function(p) {
tokens <- .tokenize_for_wrap(p, breaks)
if (length(tokens) == 0L) return(0)
max(vapply(tokens, function(tok) {
.measure_text_width_in(tok$text, gp)
.measure_text_width_in(tok$text, gp, cache)
}, numeric(1L)))
}, numeric(1L)))
}, numeric(1L)))
Expand Down
102 changes: 102 additions & 0 deletions design/DECISIONS.md
Original file line number Diff line number Diff line change
Expand Up @@ -1299,3 +1299,105 @@ project owner's stance, no backward-compat constraint is in force.
Single-page tables (Case A or B) produce identical output under both
strategies (verified by a regression test). `wrap_first` is preserved
as an opt-in for empirical comparison and as an escape hatch.

---

## D-43: Token-width memoization in word-wrap measurement

**Decision:** Add per-call string-width caches inside `.wrap_string()` and
`.column_min_token_width_in()`, and dedupe inputs in
`.measure_max_string_width()`. The optional `cache` argument to
`.measure_text_width_in()` lets a caller share one memo across many
measurement calls.

**Context (profiling):** With the wrap module from issue #35 landed, profile
`tfl_table` / `export_tfl` on representative inputs and ship only
optimisations that show a real wall-clock win without obscuring the wrap
algorithms. Harness lives at `examples/profile_writetfl.R`; a side-by-side
benchmark lives at `examples/bench_compare.R`. Both are build-ignored.

Baseline self-time was dominated by `grid::grobWidth` and `grid::textGrob`
validation paths. `Rprof(line.profiling = TRUE)` on the 18-demo
`wrap_demos.R` sweep showed `wrap.R#160` (the `grobWidth(textGrob(s, gp))`
call inside `.measure_text_width_in()`) accounting for **73.7%** of total
time, and `wrap.R#255` (per-token measurement inside
`.column_min_token_width_in()`) accounting for **34.7%**. Inside
`.height_balance_widths_impl()` the existing per-(column, width) cache
already amortises height measurement; the remaining cost was text-width
re-measurement of repeated tokens and overlapping `cand` substrings inside
the greedy wrapper.

**Change:**
- `R/wrap.R` — `.measure_text_width_in()` gains an optional `cache` env
parameter. `.wrap_string()` creates one cache per call and passes it
through to `.wrap_paragraph()`. `.column_min_token_width_in()` creates
one cache shared across all strings in the column.
- `R/table_utils.R` — `.measure_max_string_width()` runs `unique()` on its
input vector before measuring.

The cache pattern mirrors the existing `memo` env in
`measure_row_heights_tbl()` (`R/table_rows.R:38-46`) and the per-(j, width)
cache in `.height_balance_widths_impl()` (`R/wrap.R:713`). Each cache is
scoped to a single function call so lifetimes are obvious and no global
state leaks.

**Measured improvement** (medians from `examples/bench_compare.R`, 15
iterations for the core scenarios, 3 for `wrap_demos`):

| Scenario | Before | After | Δ |
|-----------------|-----------|-----------|---------|
| `core_small` | 231 ms | 198 ms | ~14% ↓ |
| `core_wrap` | 334 ms | 225 ms | ~33% ↓ |
| `core_paginate` | 583 ms | 610 ms | within noise (min-of-mins ~5% ↓) |
| `figure_multi` | 342 ms | 326 ms | within noise |
| `wrap_demos` | 9.88 s | 3.43 s | **~65% ↓** |

`wrap_demos` is the broadest signal because it exercises every wrap and
column-split code path in 18 different configurations. `core_wrap` is the
targeted single-scenario probe (clinical fixture with
`wrap_balance = "height"`). Both exceed the ≥10% bar by a large margin.
`core_paginate` runs `tfl_table(iris)` which exercises the Pass-1 natural
width measurement (where `.measure_max_string_width()` dedup helps) but
spends most of its time in row drawing; differences fall inside run-to-run
variance.

**Alternatives considered and rejected:**
- *Global LRU cache* — confuses lifetimes (when does a cache entry become
stale?) and risks leaking state across unrelated calls; the per-call
pattern already in the codebase is clearer.
- *Closure-based `make_width_cache(gp)`* — would bind the `gp` to the cache
but requires changing every measurement call site from
`.measure_text_width_in(s, gp)` to `measure(s)`. Equal effect, more
surface area than an optional `cache` arg.
- *Vectorising `.wrap_paragraph()` with width estimates* — the algorithm is
intentionally a readable greedy walk; a width-estimate pre-screen would
add a separate code path with subtle correctness conditions. Profile
did not justify the cost.
- *Pre-screening tokens by `nchar` in `.column_min_token_width_in()`* —
width is not strictly monotonic in `nchar` once gpar changes are
considered, so this requires a lower-bound argument that is easy to get
wrong. Memoization gets the same speedup without changing the algorithm.

**Out of scope / not done:**
- Touching `.compute_wrapped_widths()`, the water-fill loop, or
`.height_balance_widths_impl()`'s search — these had no measurable
bottleneck beyond what the existing per-(j, width) cache already handles.
- Rewriting `drawDetails.tfl_table_grob()` — its fallback branch at
`R/table_draw.R:193-209` already short-circuits when the pipeline
precomputed `row_heights_in`, which is the normal path.

**Files touched:**
- `R/wrap.R` — `.measure_text_width_in()`, `.wrap_string()`,
`.wrap_paragraph()`, `.column_min_token_width_in()` (+~25 lines)
- `R/table_utils.R` — `.measure_max_string_width()` (+~5 lines)
- `examples/profile_writetfl.R` (new) — Rprof + profvis harness
- `examples/bench_compare.R` (new) — stash-friendly before/after timer
- `DESCRIPTION` — added `bench`, `profvis` to Suggests
- `.gitignore` — added `*.Rprof`, `examples/profvis_*.html`,
`examples/profile_output/`

**Verification:**
- Full `devtools::test()` green before and after.
- `wrap_demos.R` produces identical page counts and PDF byte sizes
(visual inspection of `21_*` family files).
- `examples/bench_compare.R` reproduces the table above.
87 changes: 87 additions & 0 deletions examples/bench_compare.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
# examples/bench_compare.R
#
# Standalone benchmark used to compare two code versions side-by-side.
# Run with `git stash` flipping the optimisation on/off so both timings come
# from the same R session settings.
#
# Usage:
# Rscript examples/bench_compare.R # all scenarios
# Rscript examples/bench_compare.R core_wrap # one scenario

suppressPackageStartupMessages({
devtools::load_all(quiet = TRUE)
})

args <- commandArgs(trailingOnly = TRUE)
selected <- if (length(args) > 0L) args else
c("core_small", "core_wrap", "core_paginate", "figure_multi", "wrap_demos")

make_clinical_df <- function() {
data.frame(
ae_term = rep(paste(rep("Headache mild moderate severe related",
4), collapse = " "), 10),
ae_action = rep(paste(rep("Drug withdrawn temporarily",
4), collapse = " "), 10),
ae_notes = rep(paste(rep("Patient continued safely",
4), collapse = " "), 10),
onset_day = 1:10,
duration_day = 11:20,
stringsAsFactors = FALSE
)
}

scenarios <- list(
core_small = function() {
out <- tempfile(fileext = ".pdf")
export_tfl(tfl_table(head(mtcars, 20)), file = out)
unlink(out)
},
core_wrap = function() {
out <- tempfile(fileext = ".pdf")
export_tfl(tfl_table(make_clinical_df(),
col_split_strategy = "balanced",
wrap_balance = "height"),
file = out, pg_width = 6, pg_height = 8.5,
min_content_height = grid::unit(1, "inches"))
unlink(out)
},
core_paginate = function() {
out <- tempfile(fileext = ".pdf")
export_tfl(tfl_table(iris), file = out)
unlink(out)
},
figure_multi = function() {
pages <- lapply(seq_len(5L), function(i)
list(content = ggplot2::ggplot(mtcars, ggplot2::aes(hp, mpg)) +
ggplot2::geom_point(),
header_left = sprintf("Figure %d.1", i)))
out <- tempfile(fileext = ".pdf")
export_tfl(pages, file = out)
unlink(out)
},
wrap_demos = function() {
src <- file.path("examples", "wrap_demos.R")
env <- new.env(parent = globalenv())
sf <- tempfile()
con <- file(sf, open = "wt")
sink(con); sink(con, type = "message")
on.exit({ sink(type = "message"); sink(); close(con); unlink(sf) }, add = TRUE)
sys.source(src, envir = env)
}
)

iter <- list(core_small = 15L, core_wrap = 15L, core_paginate = 15L,
figure_multi = 15L, wrap_demos = 3L)

for (name in selected) {
fn <- scenarios[[name]]
invisible(fn()) # warmup
bm <- bench::mark(fn(), iterations = iter[[name]], check = FALSE,
filter_gc = FALSE, memory = FALSE)
cat(sprintf("%-15s min=%-9s median=%-9s mean=%-9s n=%d\n",
name,
format(bm$min),
format(bm$median),
format(bm$mean),
bm$n_itr))
}
57 changes: 57 additions & 0 deletions examples/ggplot_stress.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# examples/ggplot_stress.R
#
# The original crash happened inside the figure_multi Rprof block which was
# 20 invocations of an export that drew 5 ggplots each = 100 ggplot renders
# under Rprof sampling. Phase 1 here is 5x that volume. Phases 2/3 push the
# sampling and viewport-churn axes.
#
# Per ggplot, internally: ~200-500 grob ops + many push/pop viewport pairs.
# Phase 1 = 500 renders ~= 100k-250k grob ops in the ggplot rendering path.

suppressPackageStartupMessages({
library(ggplot2)
})

one_export <- function() {
pages <- lapply(1:5, function(i) {
list(content = ggplot(mtcars, aes(hp, mpg)) + geom_point() +
ggtitle(sprintf("Page %d", i)),
header_left = sprintf("Figure %d", i))
})
out <- tempfile(fileext = ".pdf")
writetfl::export_tfl(pages, file = out)
unlink(out)
}

devtools::load_all(quiet = TRUE)

cat("Phase 1: 100 figure-export invocations under Rprof@0.01\n")
prof <- tempfile(fileext = ".Rprof")
Rprof(prof, interval = 0.01, line.profiling = TRUE, gc.profiling = TRUE)
t1 <- system.time({
for (k in 1:100) one_export()
})
Rprof(NULL)
print(t1)
cat(" samples:", nrow(summaryRprof(prof)$by.self), "\n")

cat("\nPhase 2: 200 figure-export invocations under Rprof@0.001 (aggressive)\n")
prof <- tempfile(fileext = ".Rprof")
Rprof(prof, interval = 0.001, line.profiling = TRUE, gc.profiling = TRUE)
t2 <- system.time({
for (k in 1:200) one_export()
})
Rprof(NULL)
print(t2)
cat(" samples:", nrow(summaryRprof(prof)$by.self), "\n")

cat("\nPhase 3: 200 figure-export, no Rprof but force gc every 10 iters\n")
t3 <- system.time({
for (k in 1:200) {
one_export()
if (k %% 10L == 0L) gc(verbose = FALSE)
}
})
print(t3)

cat("\nNo segfault.\n")
Loading
Loading