Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
469 changes: 382 additions & 87 deletions R/table_columns.R

Large diffs are not rendered by default.

212 changes: 136 additions & 76 deletions R/table_pagelist.R
Original file line number Diff line number Diff line change
Expand Up @@ -137,89 +137,149 @@ tfl_table_to_pagelist <- function(tbl, pg_width, pg_height, dots,
resolved_cols <- resolve_col_specs(tbl)
n_group_cols <- length(tbl$group_vars)

# --- Step 4: Compute column widths and determine column groups ---
# Keep a pre-width copy of resolved_cols in case a second pass is needed.
resolved_cols_0 <- resolved_cols
col_result <- compute_col_widths(
resolved_cols, tbl$data, cw, tbl, pg_width, pg_height, margins,
overflow_action = overflow_action
)
resolved_cols <- col_result$resolved_cols # widths now set in inches
col_groups <- col_result$col_groups # list of integer vectors
has_col_split <- length(col_groups) > 1L

# Second pass: if a column split was detected and col_cont_msg labels will
# appear, reserve half a character-height at each labelled viewport edge so
# table content does not overlap the rotated annotations. Labels appear on:
# left side — every col page that is NOT the first (col_cont_msg[[1L]])
# right side — every col page that is NOT the last (col_cont_msg[[2L]])
# Both conditions arise whenever n_col_groups > 1, so reduce cw by the
# relevant label half-widths and re-compute with the tighter constraint.
if (has_col_split && !is.null(tbl$col_cont_msg)) {
hw <- col_result$col_cont_label_half_w
cw_adj <- cw
if (!is.null(tbl$col_cont_msg[[1L]])) cw_adj <- cw_adj - hw
if (!is.null(tbl$col_cont_msg[[2L]])) cw_adj <- cw_adj - hw
col_result <- compute_col_widths(
resolved_cols_0, tbl$data, cw_adj, tbl, pg_width, pg_height, margins,
overflow_action = overflow_action,
validate_overflow = FALSE # first pass already validated
)
resolved_cols <- col_result$resolved_cols
col_groups <- col_result$col_groups
has_col_split <- length(col_groups) > 1L
}

# --- Step 5: Measure row heights ---
# Open scratch PDF device for height measurement
scratch_file_rh <- tempfile(fileext = ".pdf")
grDevices::pdf(scratch_file_rh, width = pg_width, height = pg_height)
outer_vp <- .make_outer_vp(margins)
grid::pushViewport(outer_vp)
on.exit({
grid::popViewport()
grDevices::dev.off()
unlink(scratch_file_rh)
}, add = TRUE)

breaks <- tbl$wrap_breaks %||% wrap_breaks_default()
# --- Step 4-6: Compute column widths, measure row heights, paginate ---
# Under col_split_strategy = "balanced", a row whose wrapped height exceeds
# the available page content height triggers a retry: the bottleneck
# column's minimum width is raised by `step_in` and the whole width-
# measurement-pagination loop runs again. Up to `row_overflow_max_retries`
# retries (default 5; 0L disables). After the cap the final paginate_rows
# call is made with the user's overflow_action so the standard error/warn
# path fires.
resolved_cols_0 <- resolved_cols # pre-width snapshot for re-runs
breaks <- tbl$wrap_breaks %||% wrap_breaks_default()
wrap_extra_pad_in <- if (!is.null(tbl$wrap_extra_padding)) {
.height_in(tbl$wrap_extra_padding)
} else 0
strategy <- tbl$col_split_strategy %||% "balanced"
max_retries <- as.integer(tbl$row_overflow_max_retries %||% 5L)
use_retry_loop <- identical(strategy, "balanced") && max_retries > 0L
floor_step_in <- 0.25 # how much to widen on each retry
floor_overrides <- numeric(0L)
names(floor_overrides) <- character(0L)
retries <- 0L

# Per-iteration helper: opens a fresh row-height scratch device, runs
# the measurement + pagination phase, closes the scratch device, and
# returns (row_pages, cell_h_mat, cont_row_h). The scratch device's
# lifecycle is fully contained inside this helper so the surrounding
# retry loop never holds a viewport across a compute_col_widths()
# call (compute_col_widths() opens its own scratch devices internally).
.run_pagination_iter <- function(resolved_cols, collect_overflows) {
scratch_file_rh <- tempfile(fileext = ".pdf")
grDevices::pdf(scratch_file_rh, width = pg_width, height = pg_height)
rh_outer_vp <- .make_outer_vp(margins)
grid::pushViewport(rh_outer_vp)
on.exit({
grid::popViewport()
grDevices::dev.off()
unlink(scratch_file_rh)
}, add = TRUE)

header_row_h <- if (tbl$show_col_names) {
.measure_header_row_height(resolved_cols, tbl$gp, tbl$cell_padding,
tbl$line_height, breaks = breaks,
wrap_extra_pad_in = wrap_extra_pad_in)
} else 0

cell_h_mat <- measure_row_heights_tbl(
tbl$data, resolved_cols, tbl$gp, tbl$cell_padding,
tbl$na_string, tbl$line_height, tbl$max_measure_rows,
breaks = breaks,
wrap_extra_pad_in = wrap_extra_pad_in
)

header_row_h <- if (tbl$show_col_names) {
.measure_header_row_height(resolved_cols, tbl$gp, tbl$cell_padding,
tbl$line_height, breaks = breaks,
wrap_extra_pad_in = wrap_extra_pad_in)
} else 0
cont_row_h <- max(
.measure_cont_row_height(tbl$row_cont_msg[[1L]], tbl$gp, tbl$cell_padding,
tbl$line_height),
.measure_cont_row_height(tbl$row_cont_msg[[2L]], tbl$gp, tbl$cell_padding,
tbl$line_height)
)

cell_h_mat <- measure_row_heights_tbl(
tbl$data, resolved_cols, tbl$gp, tbl$cell_padding,
tbl$na_string, tbl$line_height, tbl$max_measure_rows,
breaks = breaks,
wrap_extra_pad_in = wrap_extra_pad_in
)
pr_args <- list(
tbl$data, cell_h_mat, resolved_cols, tbl$group_vars,
cont_row_h, header_row_h, ch,
tbl$row_cont_msg, tbl$group_rule,
suppress_repeated_groups = isTRUE(tbl$suppress_repeated_groups),
collect_overflows = collect_overflows
)
if (!collect_overflows) {
pr_args$overflow_action <- overflow_action
}
pr_res <- do.call(paginate_rows, pr_args)
list(
pr_res = pr_res,
cell_h_mat = cell_h_mat,
cont_row_h = cont_row_h
)
}

# cont_row_h: height of a (continued) row — measure the cont message text
cont_row_h <- max(
.measure_cont_row_height(tbl$row_cont_msg[[1L]], tbl$gp, tbl$cell_padding,
tbl$line_height),
.measure_cont_row_height(tbl$row_cont_msg[[2L]], tbl$gp, tbl$cell_padding,
tbl$line_height)
)
repeat {
col_result <- compute_col_widths(
resolved_cols_0, tbl$data, cw, tbl, pg_width, pg_height, margins,
overflow_action = overflow_action,
floor_overrides = floor_overrides
)
resolved_cols <- col_result$resolved_cols
col_groups <- col_result$col_groups
has_col_split <- length(col_groups) > 1L

# Rule heights: rules are drawn within existing space (0 height), but
# we need to know if we should budget for them when computing page capacity.
# Approach: rules are infinitesimally thin — they don't consume row space.

# --- Step 6: Paginate rows ---
row_pages <- paginate_rows(
tbl$data, cell_h_mat, resolved_cols, tbl$group_vars,
cont_row_h, header_row_h, ch,
tbl$row_cont_msg, tbl$group_rule,
suppress_repeated_groups = isTRUE(tbl$suppress_repeated_groups),
overflow_action = overflow_action
)
# Second pass: if a column split was detected and col_cont_msg labels
# will appear, reserve half a character-height at each labelled
# viewport edge so table content does not overlap the rotated
# annotations. Labels appear on every column page that is NOT first
# (left side) and NOT last (right side); both conditions arise
# whenever n_col_groups > 1, so reduce cw by the relevant label
# half-widths and re-compute with the tighter constraint.
if (has_col_split && !is.null(tbl$col_cont_msg)) {
hw <- col_result$col_cont_label_half_w
cw_adj <- cw
if (!is.null(tbl$col_cont_msg[[1L]])) cw_adj <- cw_adj - hw
if (!is.null(tbl$col_cont_msg[[2L]])) cw_adj <- cw_adj - hw
col_result <- compute_col_widths(
resolved_cols_0, tbl$data, cw_adj, tbl, pg_width, pg_height, margins,
overflow_action = overflow_action,
validate_overflow = FALSE,
floor_overrides = floor_overrides
)
resolved_cols <- col_result$resolved_cols
col_groups <- col_result$col_groups
has_col_split <- length(col_groups) > 1L
}

if (use_retry_loop && retries < max_retries) {
iter_res <- .run_pagination_iter(resolved_cols, collect_overflows = TRUE)
if (length(iter_res$pr_res$overflows) == 0L) {
row_pages <- iter_res$pr_res$pages
cell_h_mat <- iter_res$cell_h_mat
cont_row_h <- iter_res$cont_row_h
break
}
# Raise the bottleneck column's floor for the next retry.
for (ev in iter_res$pr_res$overflows) {
bot_j <- ev$bottleneck_col
if (bot_j < 1L || bot_j > length(resolved_cols)) next
cs <- resolved_cols[[bot_j]]
cur_w <- cs$width_in
new_floor <- cur_w + floor_step_in
prev <- if (cs$col %in% names(floor_overrides)) {
floor_overrides[[cs$col]]
} else {
0
}
floor_overrides[cs$col] <- max(prev, new_floor)
}
retries <- retries + 1L
# Loop back: recompute widths with the new floors.
} else {
# No retries left (or wrap_first mode): make the final call with the
# user's overflow_action so error/warn fires through the normal path.
iter_res <- .run_pagination_iter(resolved_cols, collect_overflows = FALSE)
row_pages <- iter_res$pr_res
cell_h_mat <- iter_res$cell_h_mat
cont_row_h <- iter_res$cont_row_h
break
}
}

# --- Step 7: Assemble page specs ---
n_rp <- length(row_pages)
Expand Down
61 changes: 49 additions & 12 deletions R/table_rows.R
Original file line number Diff line number Diff line change
Expand Up @@ -235,27 +235,44 @@ measure_row_heights_tbl <- function(data, resolved_cols, gp_tbl, cell_padding,
#' exceeds the available page content height (a row that wraps to taller
#' than one page is almost always a sign of input that needs to change).
#' The same knob downgrades column-overflow events; see [export_tfl_page()].
#' @return A list of row-page specs, each with `$rows`, `$is_cont_top`,
#' `$is_cont_bottom`, `$group_starts`, and `$row_heights_in` (the committed
#' per-row heights for that page in inches).
#' Ignored when `collect_overflows = TRUE`.
#' @param collect_overflows Logical. When `FALSE` (default) the function
#' behaves as before: row-overflow events are routed through
#' `overflow_action`. When `TRUE` the function does *not* signal on
#' overflow but instead collects the events and returns them alongside
#' the page specs, so a caller can iterate (see the row-overflow retry
#' loop in `.tfl_table_to_pagelist_default()`).
#' @return When `collect_overflows = FALSE` (default), a list of row-page
#' specs, each with `$rows`, `$is_cont_top`, `$is_cont_bottom`,
#' `$group_starts`, and `$row_heights_in` (the committed per-row heights
#' for that page in inches).
#' When `collect_overflows = TRUE`, a 2-element list:
#' `$pages` (as above) and `$overflows` (a list of overflow events,
#' each `list(row, bottleneck_col, cell_height_in)`; empty if no
#' row overflowed).
#' @keywords internal
paginate_rows <- function(data, cell_h_mat, resolved_cols, group_vars,
cont_row_h, header_row_h, content_height_in,
row_cont_msg, group_rule,
suppress_repeated_groups = TRUE,
overflow_action = "error") {
overflow_action = "error",
collect_overflows = FALSE) {
n_rows <- nrow(data)

# Group boundaries in the *full* data — used for the page-spec $group_starts
# field that the drawing code consults for group-rule placement.
group_starts <- .compute_group_starts(data, group_vars)

pages <- list()
overflows <- list()
cur_rows <- integer(0L)
committed_rh <- numeric(0L) # heights for cur_rows after last successful add
is_cont_top <- FALSE
errors <- character(0L)

wrap_elig <- vapply(resolved_cols, function(cs) isTRUE(cs$wrap),
logical(1L))

flush_page <- function(rows, row_heights_in, is_cont_top, is_cont_bottom) {
pages[[length(pages) + 1L]] <<- list(
rows = rows,
Expand Down Expand Up @@ -313,14 +330,30 @@ paginate_rows <- function(data, cell_h_mat, resolved_cols, group_vars,
(if (is_cont_top) cont_row_h else 0) +
sum(rh)
if (min_required > content_height_in + 1e-6) {
msg <- sprintf(
paste0("Row %d of the table wraps to a height (%.3g in) that ",
"exceeds the available page content height (%.3g in). ",
"Reduce the cell content, increase the page height, widen ",
"the column, or set the column to wrap less aggressively."),
i, sum(rh), content_height_in
)
errors <- .overflow_signal(msg, overflow_action, errors)
if (collect_overflows) {
# Identify the bottleneck column: the wrap-eligible column whose
# cell in row i has the greatest measured height. Fall back to
# the tallest cell overall if no wrap-eligible column is present
# in this row (the caller's retry loop will still know which row
# was the problem).
cell_heights <- cell_h_mat[i, ]
cand_h <- cell_heights * as.numeric(wrap_elig)
bot_j <- if (any(cand_h > 0)) which.max(cand_h) else which.max(cell_heights)
overflows[[length(overflows) + 1L]] <- list(
row = i,
bottleneck_col = bot_j,
cell_height_in = cell_heights[[bot_j]]
)
} else {
msg <- sprintf(
paste0("Row %d of the table wraps to a height (%.3g in) that ",
"exceeds the available page content height (%.3g in). ",
"Reduce the cell content, increase the page height, widen ",
"the column, or set the column to wrap less aggressively."),
i, sum(rh), content_height_in
)
errors <- .overflow_signal(msg, overflow_action, errors)
}
}
# Fall through to commit the row.
}
Expand All @@ -335,6 +368,10 @@ paginate_rows <- function(data, cell_h_mat, resolved_cols, group_vars,
flush_page(cur_rows, committed_rh, is_cont_top, is_cont_bottom = FALSE)
}

if (collect_overflows) {
return(list(pages = pages, overflows = overflows))
}

if (length(errors) > 0L) {
rlang::abort(paste(errors, collapse = "\n"))
}
Expand Down
40 changes: 40 additions & 0 deletions R/tfl_table.R
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,29 @@ tfl_colspec <- function(col,
#' wrapped or `\n`-broken text. Default `unit(0.5, "lines")`. Set to
#' `unit(0, "lines")` to disable. Only multi-line cells receive the extra;
#' single-line cells are unaffected.
#' @param col_split_strategy Either `"balanced"` (default) or `"wrap_first"`.
#' Controls the order in which text wrapping and page-column-split
#' interact when a table is wider than one page.
#'
#' * `"balanced"` (default, introduced for issue #35): page-split using
#' the **minimum** survivable column widths for capacity planning, then
#' water-fill within each page so columns can be as wide as that page's
#' horizontal slack allows. Group columns are pinned at their minimum
#' width so per-page data columns get the most slack. Multi-page tables
#' end up with less-wrapped columns per page than the legacy strategy.
#' * `"wrap_first"` (pre-issue-35 behaviour): water-fill the whole table
#' down to one page's content width, then page-split using the post-
#' wrap widths. Every page-column-split page shows the same heavily
#' wrapped columns. Kept temporarily for empirical comparison; will be
#' removed in a future release if the `"balanced"` default proves
#' consistently better.
#' @param row_overflow_max_retries Non-negative integer. Maximum number of
#' times the `"balanced"` strategy will retry when `paginate_rows()`
#' reports a row whose wrapped height exceeds the page. Each retry
#' raises the bottleneck column's minimum width by 0.25 inches and
#' re-runs the width pipeline. `0L` disables the retry loop entirely
#' (the first row-overflow goes straight to `overflow_action`). Default
#' `5L`. Ignored when `col_split_strategy = "wrap_first"`.
#' @param max_measure_rows Positive numeric or `Inf` (default). Maximum number
#' of unique cell strings sampled per column when computing content-based
#' column widths. Strings are sampled in descending order of `nchar()` so
Expand Down Expand Up @@ -314,6 +337,8 @@ tfl_table <- function(x,
cell_padding = grid::unit(c(0.2, 0.5), "lines"),
line_height = 1.05,
wrap_extra_padding = grid::unit(0.5, "lines"),
col_split_strategy = c("balanced", "wrap_first"),
row_overflow_max_retries = 5L,
max_measure_rows = Inf) {

# --- Validate x ---
Expand Down Expand Up @@ -417,6 +442,19 @@ tfl_table <- function(x,
}
wrap_balance <- match.arg(wrap_balance)

# --- Validate col_split_strategy ---
if (!is.character(col_split_strategy) ||
length(col_split_strategy) == 0L ||
!all(col_split_strategy %in% c("balanced", "wrap_first"))) {
rlang::abort('`col_split_strategy` must be "balanced" or "wrap_first".')
}
col_split_strategy <- match.arg(col_split_strategy)

# --- Validate row_overflow_max_retries ---
checkmate::assert_int(row_overflow_max_retries, lower = 0L,
.var.name = "row_overflow_max_retries")
row_overflow_max_retries <- as.integer(row_overflow_max_retries)

# --- Validate min_col_width ---
checkmate::assert_class(min_col_width, "unit", .var.name = "min_col_width")

Expand Down Expand Up @@ -517,6 +555,8 @@ tfl_table <- function(x,
cell_padding = cell_padding,
line_height = line_height,
wrap_extra_padding = wrap_extra_padding,
col_split_strategy = col_split_strategy,
row_overflow_max_retries = row_overflow_max_retries,
max_measure_rows = max_measure_rows
),
class = "tfl_table"
Expand Down
Loading
Loading