diff --git a/.Rbuildignore b/.Rbuildignore index b2c217a..b77d519 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -2,6 +2,7 @@ ^CLAUDE\.md$ ^docs$ ^design$ +^examples$ ^build_docs\.R$ ^.*\.pdf$ ^\.github$ diff --git a/R/table_columns.R b/R/table_columns.R index d04ed48..d3d437f 100644 --- a/R/table_columns.R +++ b/R/table_columns.R @@ -3,8 +3,11 @@ # Functions: # resolve_col_specs() — merge tfl_colspec + flat tfl_table() args per column # compute_col_widths() — auto-size, relative, fixed, wrap; returns widths + groups -# .apply_col_wrapping() — iteratively narrow wrap-eligible columns to fit # paginate_cols() — split column indices into per-page groups +# +# The text-wrap narrowing pass that used to live here as .apply_col_wrapping() +# now lives in R/wrap.R as .compute_wrapped_widths() so it can also be used +# (and disabled) as a coherent module. # --------------------------------------------------------------------------- # resolve_col_specs() — merge tfl_colspec + flat args into unified list @@ -38,14 +41,27 @@ resolve_col_specs <- function(tbl) { align <- spec$align %||% .nlookup(tbl$col_align, cn) %||% .default_align(tbl$data[[cn]]) - # Wrap: tfl_colspec > wrap_cols flat arg - wrap <- if (!is.null(spec$wrap)) { - spec$wrap + # Wrap: tfl_colspec > wrap_cols flat arg. + # Result is logical of length 1: TRUE / FALSE / NA. NA means "auto-detect + # based on whether the column contains a break character" and is resolved + # to TRUE / FALSE inside compute_col_widths() once the data and break spec + # are in scope. + spec_wrap <- spec$wrap + wrap <- if (!is.null(spec_wrap) && !is.na(spec_wrap)) { + as.logical(spec_wrap) } else { w <- tbl$wrap_cols - if (isTRUE(w)) !is_group_col # TRUE = all data cols eligible - else if (isFALSE(w)) FALSE - else cn %in% w + if (identical(w, "auto")) { + if (is_group_col) FALSE else NA + } else if (isTRUE(w)) { + !is_group_col + } else if (isFALSE(w)) { + FALSE + } else if (is.character(w)) { + !is_group_col && cn %in% w + } else { + FALSE # nocov - validated upstream + } } # gp: tfl_colspec$gp (group cols only, already validated at construction) @@ -114,13 +130,22 @@ compute_col_widths <- function(resolved_cols, data, content_width_in, } else if (is.numeric(w) && !is.null(w)) { NA_real_ # relative weight — resolved in second pass } else { - # NULL / missing — auto-size from content + # NULL / missing - auto-size from content. Header labels are rendered + # with the header_row gpar (typically bold) and cells with the cell + # gpar (regular); measuring a bold header with the regular-weight + # cell gpar undersizes the column and makes the rendered header bleed + # into the next column. Measure each kind of text with its actual + # rendering gpar and take the larger of the two as the natural width. cell_gp <- .gp_with_lineheight( .resolve_table_cell_gp(tbl$gp, cs$is_group_col), tbl$line_height ) - strings <- .collect_col_strings(data[[cs$col]], cs$label, na_str, max_rows) - w_max <- .measure_max_string_width(strings, cell_gp) - max(min_in, w_max + h_pad_in) + hdr_gp <- .gp_with_lineheight( + .resolve_table_gp(tbl$gp, "header_row"), tbl$line_height + ) + parts <- .split_col_strings(data[[cs$col]], cs$label, na_str, max_rows) + w_data <- .measure_max_string_width(parts$data, cell_gp) + w_hdr <- .measure_max_string_width(parts$header, hdr_gp) + max(min_in, max(w_data, w_hdr) + h_pad_in) } }, numeric(1L)) @@ -160,13 +185,44 @@ compute_col_widths <- function(resolved_cols, data, content_width_in, }, numeric(1L)) } + # --- Resolve auto-detect wrap eligibility (cs$wrap == NA) --- + # The "auto" mode marks data columns as NA in resolve_col_specs(); we + # promote each NA to TRUE / FALSE here based on whether the column actually + # contains a break character. Skipping a column with no breakable text + # avoids wasting a wrap pass on numeric / single-token columns where it + # could not narrow the width anyway. + breaks <- tbl$wrap_breaks %||% wrap_breaks_default() + for (j in seq_len(n_cols)) { + cs_j <- resolved_cols[[j]] + if (is.logical(cs_j$wrap) && length(cs_j$wrap) == 1L && is.na(cs_j$wrap)) { + strings <- .collect_col_strings(data[[cs_j$col]], cs_j$label, + na_str, max_rows) + resolved_cols[[j]]$wrap <- .column_has_breakable_text(strings, breaks) + } + } + # --- Attempt word-wrap if total exceeds content width --- total_w <- sum(widths_in) if (total_w > content_width_in + 1e-6) { - widths_in <- .apply_col_wrapping( + widths_in <- .compute_wrapped_widths( widths_in, resolved_cols, data, tbl, content_width_in, - min_in, h_pad_in, na_str, max_rows, pg_width, pg_height, margins + h_pad_in, min_in, pg_width, pg_height, margins + ) + total_w <- sum(widths_in) + } + + # --- Optional height-balance pass (opt-in via wrap_balance = "height") --- + # Runs unconditionally when opted in; the algorithm itself is a no-op if + # there is no improvement available. Falls back silently to the input + # widths on any error or invariant violation, so opting in cannot worsen + # the result. + if (identical(tbl$wrap_balance, "height")) { + widths_in <- .height_balance_widths( + widths_in, resolved_cols, data, tbl, + h_pad_in = h_pad_in, na_str = na_str, max_rows = max_rows, + breaks = breaks, pg_width = pg_width, pg_height = pg_height, + margins = margins ) total_w <- sum(widths_in) } @@ -273,55 +329,6 @@ compute_col_widths <- function(resolved_cols, data, content_width_in, col_cont_label_half_w = col_cont_label_half_w) } -# --------------------------------------------------------------------------- -# .apply_col_wrapping() -# --------------------------------------------------------------------------- - -#' Iteratively narrow wrap-eligible columns until total fits or all at min -#' @keywords internal -.apply_col_wrapping <- function(widths_in, resolved_cols, data, tbl, - content_width_in, min_in, h_pad_in, - na_str, max_rows, pg_width, pg_height, margins) { - n <- length(widths_in) - wrap_eligible <- vapply(resolved_cols, `[[`, logical(1L), "wrap") - - if (!any(wrap_eligible)) return(widths_in) - - scratch_file <- tempfile(fileext = ".pdf") - grDevices::pdf(scratch_file, width = pg_width, height = pg_height) - outer_vp <- .make_outer_vp(margins) - grid::pushViewport(outer_vp) - on.exit({ - grid::popViewport() - grDevices::dev.off() - unlink(scratch_file) - }, add = TRUE) - - # Repeat reduction passes until fits or no more room - for (pass in seq_len(100L)) { - total <- sum(widths_in) - if (total <= content_width_in + 1e-6) break - - excess <- total - content_width_in - # Find widest eligible column that is still above min - eligible_above_min <- which(wrap_eligible & widths_in > min_in + 1e-6) - if (length(eligible_above_min) == 0L) break - - target_j <- eligible_above_min[which.max(widths_in[eligible_above_min])] - - # Try wrapping text in that column to a narrower target - new_w <- max(min_in, widths_in[target_j] - excess) - # Re-measure: what is the minimum content width needed after wrapping? - cs <- resolved_cols[[target_j]] - cell_gp <- .resolve_table_cell_gp(tbl$gp, cs$is_group_col) - strings <- .collect_col_strings(data[[cs$col]], cs$label, na_str, max_rows) - # Use new_w as the wrap target: accept it (word-wrap will reflow at draw time) - widths_in[target_j] <- new_w - } - - widths_in -} - # --------------------------------------------------------------------------- # paginate_cols() — split data column indices into groups # --------------------------------------------------------------------------- diff --git a/R/table_draw.R b/R/table_draw.R index 8e4faf4..121f9fc 100644 --- a/R/table_draw.R +++ b/R/table_draw.R @@ -132,6 +132,10 @@ drawDetails.tfl_table_grob <- function(x, recording) { na_str <- tbl$na_string gp_tbl <- tbl$gp v_pad_in <- v_top_in + v_bot_in + breaks <- tbl$wrap_breaks %||% wrap_breaks_default() + wrap_extra_pad_in <- if (!is.null(tbl$wrap_extra_padding)) { + .height_in(tbl$wrap_extra_padding) + } else 0 # Use cached heights from the pagination phase (ensures layout consistency). # Fall back to re-measurement only when cache is absent. @@ -140,16 +144,11 @@ drawDetails.tfl_table_grob <- function(x, recording) { lh <- tbl$line_height %||% 1.05 # defensive fallback for old grob objects - # Header row height + # Header row height (delegates to the same helper used during pagination so + # any auto-wrapping of column labels is accounted for here too). header_row_h <- if (tbl$show_col_names) { - hdr_gp <- .gp_with_lineheight(.resolve_table_gp(gp_tbl, "header_row"), lh) - max(vapply(page_cols, function(cs) { - nlines <- max(1L, length(strsplit(cs$label, "\n", fixed = TRUE)[[1L]])) - grob <- grid::textGrob(cs$label, gp = hdr_gp) - h1 <- .height_in(grid::grobHeight(grob)) - h2 <- nlines * .height_in(grid::stringHeight("M")) - max(h1, h2) - }, numeric(1L))) + v_pad_in + .measure_header_row_height(page_cols, gp_tbl, cp, lh, breaks = breaks, + wrap_extra_pad_in = wrap_extra_pad_in) } else 0 # Continuation row height — prefer cached value @@ -198,14 +197,15 @@ drawDetails.tfl_table_grob <- function(x, recording) { ) for (ri in seq_len(n_rows)) { s <- .fmt_cell(data[[cs$col]][rows[[ri]]], na_str) - disp_s <- if (cs$wrap && !is.null(cs$width_in)) { - .wrap_text(s, cs$width_in - h_lft_in - h_rgt_in, gp_c) + disp_s <- if (isTRUE(cs$wrap) && !is.null(cs$width_in)) { + .wrap_string(s, cs$width_in - h_lft_in - h_rgt_in, gp_c, breaks) } else s nlines <- max(1L, length(strsplit(disp_s, "\n", fixed = TRUE)[[1L]])) grob <- grid::textGrob(disp_s, gp = gp_c) h1 <- .height_in(grid::grobHeight(grob)) h2 <- nlines * .height_in(grid::stringHeight("M")) - fallback_mat[ri, j] <- max(h1, h2) + v_pad_in + extra <- if (nlines > 1L) wrap_extra_pad_in else 0 + fallback_mat[ri, j] <- max(h1, h2) + v_pad_in + extra } } .compute_page_row_heights( @@ -260,7 +260,8 @@ drawDetails.tfl_table_grob <- function(x, recording) { } .draw_header_row(page_cols, col_x_left, col_x_right, col_widths_in, y_cursor, header_row_h, vp_w, vp_h, - h_lft_in, h_rgt_in, v_top_in, gp_tbl, lh) + h_lft_in, h_rgt_in, v_top_in, gp_tbl, lh, + breaks = breaks) y_cursor <- y_cursor + header_row_h # Column header rule — spans table width only @@ -361,9 +362,13 @@ drawDetails.tfl_table_grob <- function(x, recording) { # Resolve cell gpar (with lineheight applied) cell_gp <- .gp_with_lineheight(.resolve_table_cell_gp(gp_tbl, cs$is_group_col), lh) - # For wrap-eligible columns, apply word-wrapping before drawing - display_str <- if (cs$wrap && nzchar(cell_str) && !is.null(cs$width_in)) { - .wrap_text(cell_str, cs$width_in - h_lft_in - h_rgt_in, cell_gp) + # For wrap-eligible columns, apply word-wrapping before drawing using + # the table's wrap_breaks spec (which may include keep_before chars + # like "-"). + display_str <- if (isTRUE(cs$wrap) && nzchar(cell_str) && + !is.null(cs$width_in)) { + .wrap_string(cell_str, cs$width_in - h_lft_in - h_rgt_in, + cell_gp, breaks) } else { cell_str } @@ -470,14 +475,25 @@ drawDetails.tfl_table_grob <- function(x, recording) { # Drawing helpers # --------------------------------------------------------------------------- -# Draw the column header row +# Draw the column header row. +# +# When `breaks` is non-NULL and a column is wrap-eligible (`cs$wrap == TRUE`) +# with a resolved width, the label is auto-wrapped to fit the column before +# drawing, so a long header in a narrow column reflows onto multiple lines +# rather than overflowing. .draw_header_row <- function(page_cols, col_x_left, col_x_right, col_widths_in, y_top_in, row_h, vp_w, vp_h, - h_lft_in, h_rgt_in, v_top_in, gp_tbl, lh) { + h_lft_in, h_rgt_in, v_top_in, gp_tbl, lh, + breaks = NULL) { hdr_gp <- .gp_with_lineheight(.resolve_table_gp(gp_tbl, "header_row"), lh) for (j in seq_along(page_cols)) { - cs <- page_cols[[j]] - .draw_cell_text(cs$label, "centre", + cs <- page_cols[[j]] + label <- cs$label + if (!is.null(breaks) && isTRUE(cs$wrap) && !is.null(cs$width_in)) { + label <- .wrap_label_for_width(label, cs$width_in, + h_lft_in + h_rgt_in, hdr_gp, breaks) + } + .draw_cell_text(label, "centre", col_x_left[[j]], col_x_right[[j]], y_top_in, row_h, vp_w, vp_h, h_lft_in, h_rgt_in, v_top_in, @@ -534,14 +550,25 @@ drawDetails.tfl_table_grob <- function(x, recording) { just <- c("centre", "top") } - # Re-measure text width in the current (rendering) device and use the wider - - # of the cached column width and the measured text width. This prevents - # clipping when font metrics differ between the PDF scratch device used for - # column-width measurement and the device used for actual rendering (e.g. - # a PNG device in knitr/RStudio preview mode). - text_w <- .width_in(grid::stringWidth(text)) - clip_w <- max(col_width_in, text_w + h_lft_in + h_rgt_in) + # Re-measure text width in the current (rendering) device using the + # actual rendering gpar (grid::stringWidth() picks up only the active + # viewport's gp, which is wrong when `gp` is, e.g., a bold header + # gpar and the active vp is regular weight). This corrects font-metric + # variance between the PDF scratch device used for column-width + # measurement and the device used for actual rendering (e.g. a PNG + # device in knitr / RStudio preview mode). + # + # Important: cap the clip width at a small tolerance past `col_width_in` + # so a column that is genuinely too narrow for its content (user set a + # fixed width below the longest unbreakable token, or a bold header + # whose measured width exceeded the regular-weight column-width pass) + # cannot bleed text into the neighboring column and hide its content. + # Anything past the tolerance gets visually clipped at the column edge, + # which is a far less destructive failure mode than overlap. + text_w <- .width_in(grid::grobWidth(grid::textGrob(text, gp = gp))) + needed <- text_w + h_lft_in + h_rgt_in + bleed_tol_in <- 0.05 + clip_w <- min(col_width_in + bleed_tol_in, max(col_width_in, needed)) # Clip to column width by using a clipping viewport vp_clip <- grid::viewport( diff --git a/R/table_pagelist.R b/R/table_pagelist.R index d32065e..5a77c92 100644 --- a/R/table_pagelist.R +++ b/R/table_pagelist.R @@ -182,14 +182,22 @@ tfl_table_to_pagelist <- function(tbl, pg_width, pg_height, dots, unlink(scratch_file_rh) }, add = TRUE) + breaks <- tbl$wrap_breaks %||% wrap_breaks_default() + wrap_extra_pad_in <- if (!is.null(tbl$wrap_extra_padding)) { + .height_in(tbl$wrap_extra_padding) + } else 0 + header_row_h <- if (tbl$show_col_names) { .measure_header_row_height(resolved_cols, tbl$gp, tbl$cell_padding, - tbl$line_height) + tbl$line_height, breaks = breaks, + wrap_extra_pad_in = wrap_extra_pad_in) } else 0 cell_h_mat <- measure_row_heights_tbl( tbl$data, resolved_cols, tbl$gp, tbl$cell_padding, - tbl$na_string, tbl$line_height, tbl$max_measure_rows + tbl$na_string, tbl$line_height, tbl$max_measure_rows, + breaks = breaks, + wrap_extra_pad_in = wrap_extra_pad_in ) # cont_row_h: height of a (continued) row — measure the cont message text @@ -209,7 +217,8 @@ tfl_table_to_pagelist <- function(tbl, pg_width, pg_height, dots, tbl$data, cell_h_mat, resolved_cols, tbl$group_vars, cont_row_h, header_row_h, ch, tbl$row_cont_msg, tbl$group_rule, - suppress_repeated_groups = isTRUE(tbl$suppress_repeated_groups) + suppress_repeated_groups = isTRUE(tbl$suppress_repeated_groups), + overflow_action = overflow_action ) # --- Step 7: Assemble page specs --- diff --git a/R/table_rows.R b/R/table_rows.R index 66e2341..df70ee5 100644 --- a/R/table_rows.R +++ b/R/table_rows.R @@ -25,7 +25,8 @@ #' @return Numeric matrix `[nrow(data) × length(resolved_cols)]` of inches. #' @keywords internal measure_row_heights_tbl <- function(data, resolved_cols, gp_tbl, cell_padding, - na_string, line_height, max_measure_rows) { + na_string, line_height, max_measure_rows, + breaks = NULL, wrap_extra_pad_in = 0) { n_rows <- nrow(data) n_cols <- length(resolved_cols) v_pad_in <- .height_in(cell_padding[["top"]]) + @@ -72,15 +73,20 @@ measure_row_heights_tbl <- function(data, resolved_cols, gp_tbl, cell_padding, for (i in sample_rows) { cell_str <- .fmt_cell(data[[cs$col]][i], na_string) - display_str <- if (cs$wrap && !is.null(cs$width_in)) { - .wrap_text(cell_str, avail_w, cell_gp) + display_str <- if (isTRUE(cs$wrap) && !is.null(cs$width_in)) { + if (is.null(breaks)) { + .wrap_text(cell_str, avail_w, cell_gp) + } else { + .wrap_string(cell_str, avail_w, cell_gp, breaks) + } } else { cell_str } nlines <- max(1L, length(strsplit(display_str, "\n", fixed = TRUE)[[1L]])) h_grob <- .memo_str_height(display_str, gp_key, cell_gp) h_line <- nlines * .height_in(grid::stringHeight("M")) - cell_h_mat[i, j] <- max(h_grob, h_line) + v_pad_in + extra <- if (nlines > 1L) wrap_extra_pad_in else 0 + cell_h_mat[i, j] <- max(h_grob, h_line) + v_pad_in + extra } } @@ -224,6 +230,11 @@ measure_row_heights_tbl <- function(data, resolved_cols, gp_tbl, cell_padding, #' @param group_rule Logical — are group rules drawn? (Reserved for future #' use; currently does not affect pagination because rules are 0-height.) #' @param suppress_repeated_groups Logical, from `tbl$suppress_repeated_groups`. +#' @param overflow_action One of `"error"` (default) or `"warn"`. Controls how +#' the row-overflow guard reports a single row whose committed height +#' exceeds the available page content height (a row that wraps to taller +#' than one page is almost always a sign of input that needs to change). +#' The same knob downgrades column-overflow events; see [export_tfl_page()]. #' @return A list of row-page specs, each with `$rows`, `$is_cont_top`, #' `$is_cont_bottom`, `$group_starts`, and `$row_heights_in` (the committed #' per-row heights for that page in inches). @@ -231,7 +242,8 @@ measure_row_heights_tbl <- function(data, resolved_cols, gp_tbl, cell_padding, paginate_rows <- function(data, cell_h_mat, resolved_cols, group_vars, cont_row_h, header_row_h, content_height_in, row_cont_msg, group_rule, - suppress_repeated_groups = TRUE) { + suppress_repeated_groups = TRUE, + overflow_action = "error") { n_rows <- nrow(data) # Group boundaries in the *full* data — used for the page-spec $group_starts @@ -242,6 +254,7 @@ paginate_rows <- function(data, cell_h_mat, resolved_cols, group_vars, cur_rows <- integer(0L) committed_rh <- numeric(0L) # heights for cur_rows after last successful add is_cont_top <- FALSE + errors <- character(0L) flush_page <- function(rows, row_heights_in, is_cont_top, is_cont_bottom) { pages[[length(pages) + 1L]] <<- list( @@ -267,28 +280,50 @@ paginate_rows <- function(data, cell_h_mat, resolved_cols, group_vars, sum(rh) + cont_row_h # reserve bottom continuation marker - if (total > content_height_in + 1e-6 && length(cur_rows) > 0L) { - # Warn whenever a group is split across pages (row i and the last row - # on the current page belong to the same group). - if (length(group_vars) > 0L) { - last_in_page <- cur_rows[[length(cur_rows)]] - same_group <- all(vapply(group_vars, function(gv) { - identical(data[[gv]][last_in_page], data[[gv]][i]) - }, logical(1L))) - if (same_group) { - rlang::warn(sprintf( - paste0("Row %d belongs to a group that spans more than one page. ", - "A '(continued)' marker will be added at the boundary."), i - )) + if (total > content_height_in + 1e-6) { + if (length(cur_rows) > 0L) { + # Warn whenever a group is split across pages (row i and the last row + # on the current page belong to the same group). + if (length(group_vars) > 0L) { + last_in_page <- cur_rows[[length(cur_rows)]] + same_group <- all(vapply(group_vars, function(gv) { + identical(data[[gv]][last_in_page], data[[gv]][i]) + }, logical(1L))) + if (same_group) { + rlang::warn(sprintf( + paste0("Row %d belongs to a group that spans more than one page. ", + "A '(continued)' marker will be added at the boundary."), i + )) + } } - } - flush_page(cur_rows, committed_rh, is_cont_top, is_cont_bottom = TRUE) + flush_page(cur_rows, committed_rh, is_cont_top, is_cont_bottom = TRUE) - cur_rows <- integer(0L) - committed_rh <- numeric(0L) - is_cont_top <- TRUE - next # re-process row i on a fresh page + cur_rows <- integer(0L) + committed_rh <- numeric(0L) + is_cont_top <- TRUE + next # re-process row i on a fresh page + } else { + # Row i alone is being committed. `total` is the conservative budget + # including a *reserved* bottom continuation marker that may not be + # drawn when this row turns out to be the last on the page. Only + # signal a true overflow when the row exceeds the page height even + # without that reserve - then no amount of pagination can rescue it. + min_required <- header_row_h + + (if (is_cont_top) cont_row_h else 0) + + sum(rh) + if (min_required > content_height_in + 1e-6) { + msg <- sprintf( + paste0("Row %d of the table wraps to a height (%.3g in) that ", + "exceeds the available page content height (%.3g in). ", + "Reduce the cell content, increase the page height, widen ", + "the column, or set the column to wrap less aggressively."), + i, sum(rh), content_height_in + ) + errors <- .overflow_signal(msg, overflow_action, errors) + } + # Fall through to commit the row. + } } cur_rows <- candidate @@ -300,5 +335,9 @@ paginate_rows <- function(data, cell_h_mat, resolved_cols, group_vars, flush_page(cur_rows, committed_rh, is_cont_top, is_cont_bottom = FALSE) } + if (length(errors) > 0L) { + rlang::abort(paste(errors, collapse = "\n")) + } + pages } diff --git a/R/table_utils.R b/R/table_utils.R index 1884093..6323055 100644 --- a/R/table_utils.R +++ b/R/table_utils.R @@ -40,20 +40,38 @@ # Row height measurement helpers # --------------------------------------------------------------------------- -# Measure column header row height (max across all column labels) +# Measure column header row height (max across all column labels). +# +# When `breaks` is non-NULL and a column is wrap-eligible (`cs$wrap == TRUE`) +# with a resolved width, the label is run through .wrap_label_for_width() +# before measurement so headers get the same auto-line-breaking treatment as +# cell content. +# +# `wrap_extra_pad_in` is an inches scalar of extra height added at the +# bottom of any column whose (post-wrap) header is multi-line, so the gap +# between the header row and the first data row is more obvious. .measure_header_row_height <- function(resolved_cols, gp_tbl, cell_padding, - line_height) { + line_height, breaks = NULL, + wrap_extra_pad_in = 0) { v_pad_in <- .height_in(cell_padding[["top"]]) + .height_in(cell_padding[["bottom"]]) + h_lft_in <- .width_in(cell_padding[["left"]]) + h_rgt_in <- .width_in(cell_padding[["right"]]) hdr_gp <- .gp_with_lineheight(.resolve_table_gp(gp_tbl, "header_row"), line_height) max(vapply(resolved_cols, function(cs) { - nlines <- max(1L, length(strsplit(cs$label, "\n", fixed = TRUE)[[1L]])) - grob <- grid::textGrob(cs$label, gp = hdr_gp) + label <- cs$label + if (!is.null(breaks) && isTRUE(cs$wrap) && !is.null(cs$width_in)) { + label <- .wrap_label_for_width(label, cs$width_in, + h_lft_in + h_rgt_in, hdr_gp, breaks) + } + nlines <- max(1L, length(strsplit(label, "\n", fixed = TRUE)[[1L]])) + grob <- grid::textGrob(label, gp = hdr_gp) h_grob <- .height_in(grid::grobHeight(grob)) h_line <- nlines * .height_in(grid::stringHeight("M")) - max(h_grob, h_line) + extra <- if (nlines > 1L) wrap_extra_pad_in else 0 + max(h_grob, h_line) + extra }, numeric(1L))) + v_pad_in } @@ -192,6 +210,26 @@ ifelse(is.na(vec), na_string, as.character(vec)) } +# Split a column's strings into header lines and (deduped, sampled) data +# values. Used when each kind of text needs to be measured with a different +# gpar (the header_row gpar is typically bold while cells use a regular +# weight; a header rendered in bold is wider than the same string measured +# in regular weight, so a column auto-sized against the regular-weight +# measurement undersizes its bold header). +.split_col_strings <- function(col_vec, label, na_string, max_rows) { + data_strs <- unique(.fmt_cell_vec(col_vec, na_string)) + if (is.finite(max_rows) && length(data_strs) > max_rows) { + data_strs <- data_strs[order(nchar(data_strs), decreasing = TRUE)[ + seq_len(max_rows)]] + } + hdr_lines <- if (is.null(label) || !nzchar(label)) { + character(0L) + } else { + strsplit(label, "\n", fixed = TRUE)[[1L]] + } + list(header = hdr_lines, data = data_strs) +} + # Collect unique strings for a column (header + data), limited by max_rows .collect_col_strings <- function(col_vec, label, na_string, max_rows) { data_strs <- unique(.fmt_cell_vec(col_vec, na_string)) @@ -232,36 +270,17 @@ } # Word-wrap a string to fit within available_w_in inches. -# Preserves explicit \n (paragraph breaks) and greedily breaks on spaces. +# +# Default-breaks shim around .wrap_string() (R/wrap.R). Used by callers that +# do not have a tfl_table in scope - the page-level character-content path +# in R/draw.R::draw_content() and the caption / footnote wrapper in +# R/normalize.R::wrap_normalized_text(). tfl_table cell and header +# rendering call .wrap_string() directly with tbl$wrap_breaks so the +# user-configured break spec applies. +# # Must be called while a viewport with the target font context is active. .wrap_text <- function(text, available_w_in, gp) { - if (!nzchar(text)) return(text) - - paragraphs <- strsplit(text, "\n", fixed = TRUE)[[1L]] - - wrapped_pars <- vapply(paragraphs, function(para) { - if (!nzchar(para)) return("") - words <- strsplit(para, " ")[[1L]] - words <- words[nzchar(words)] - if (length(words) == 0L) return("") - - lines <- character(0L) - current_line <- words[[1L]] - - for (k in seq_along(words)[-1L]) { - test <- paste0(current_line, " ", words[[k]]) - w <- .width_in(grid::grobWidth(grid::textGrob(test, gp = gp))) - if (w > available_w_in + 1e-6) { - lines <- c(lines, current_line) - current_line <- words[[k]] - } else { - current_line <- test - } - } - paste(c(lines, current_line), collapse = "\n") - }, character(1L)) - - paste(wrapped_pars, collapse = "\n") + .wrap_string(text, available_w_in, gp, wrap_breaks_default()) } # --------------------------------------------------------------------------- diff --git a/R/tfl_table.R b/R/tfl_table.R index b67eac0..c3ddef1 100644 --- a/R/tfl_table.R +++ b/R/tfl_table.R @@ -26,8 +26,14 @@ #' placed). `NULL` triggers content-based auto-sizing. #' @param align Character scalar: `"left"`, `"right"`, or `"centre"`. `NULL` #' defaults to `"right"` for numeric columns and `"left"` otherwise. -#' @param wrap Logical. Whether this column is eligible for word-wrapping when -#' total column widths exceed available width. +#' @param wrap Logical of length 1: `TRUE`, `FALSE`, or `NA`. Controls +#' text-wrapping eligibility *within this column*. `NA` (the default) means +#' "inherit from the table-level [tfl_table()]'s `wrap_cols` setting"; under +#' the default `wrap_cols = "auto"` that resolves to `TRUE` when any cell or +#' the header contains a break character (see `wrap_breaks`). `TRUE` / +#' `FALSE` are explicit overrides. This is **text wrap** inside a column; +#' for splitting a too-wide table across pages see `allow_col_split` in +#' [tfl_table()]. #' @param gp A `gpar()` object to override [tfl_table()]'s `gp$group_col` for #' this specific column. Only valid for row-header (group) columns; an error #' is raised if applied to a data column. @@ -40,7 +46,7 @@ tfl_colspec <- function(col, label = NULL, width = NULL, align = NULL, - wrap = FALSE, + wrap = NA, gp = NULL) { checkmate::assert_string(col, min.chars = 1, .var.name = "col") checkmate::assert_string(label, null.ok = TRUE, .var.name = "label") @@ -52,7 +58,9 @@ tfl_colspec <- function(col, if (!is.null(align)) { align <- match.arg(align, c("left", "right", "centre")) } - checkmate::assert_flag(wrap, .var.name = "wrap") + if (!is.logical(wrap) || length(wrap) != 1L) { + rlang::abort("`wrap` must be TRUE, FALSE, or NA (inherit from the table-level wrap_cols).") + } checkmate::assert_class(gp, "gpar", null.ok = TRUE, .var.name = "gp") structure( @@ -96,9 +104,39 @@ tfl_colspec <- function(col, #' multiline column headers. Overridden per-column by `tfl_colspec(label)`. #' @param col_align Named character vector. Each element is `"left"`, #' `"right"`, or `"centre"`. Overridden per-column by `tfl_colspec(align)`. -#' @param wrap_cols Column-wrapping eligibility. `TRUE` = all non-group -#' columns eligible; `FALSE` = none eligible; character vector = those -#' specific column names. Overridden per-column by `tfl_colspec(wrap)`. +#' @param wrap_cols Text-wrap eligibility *within columns*. Controls whether +#' long cell text and column-header labels may be broken across multiple +#' lines so that the column can be narrower. **This is not the same thing +#' as splitting a too-wide table across pages** — see `allow_col_split` +#' for that. +#' +#' * `"auto"` (default) — every non-group column whose data or header +#' contains a `wrap_breaks` character is eligible. Numeric / single-token +#' columns are skipped because they can't break. +#' * `TRUE` — all non-group columns eligible regardless of content. +#' * `FALSE` — disable the text-wrap module entirely. +#' * Character vector of column names — only those columns are eligible. +#' +#' Overridden per-column by `tfl_colspec(wrap)`. +#' @param wrap_breaks A `wrap_breaks()` object specifying the characters at +#' which the wrap module is allowed to break. The default, +#' `wrap_breaks(drop = c(" ", "\t"), keep_before = character(0))`, breaks on +#' whitespace and consumes the whitespace at the break point. Pass +#' `wrap_breaks(keep_before = "-")` to also break after `-` (the `-` stays +#' on the left of the break). +#' @param wrap_balance Either `"width"` (default) or `"height"`. Controls +#' what the wrap-narrowing pass optimises for: +#' +#' * `"width"` — balance widths between wrap-eligible columns so the +#' widest columns shrink together (water-from-top). Cheap and +#' deterministic; produces visually balanced columns. +#' * `"height"` — opt-in heuristic that runs after the width-balance pass +#' and redistributes width between wrap-eligible columns to lower the +#' total table height (more rows per page). Time-budgeted at ~1 s; if +#' the search fails or overruns, the result silently falls back to +#' the width-balanced widths so opting in cannot produce a *worse* +#' table than the default. Useful when string columns have very +#' different content density. #' @param min_col_width Minimum column width as a `unit` object. #' @param allow_col_split Logical. If `FALSE`, an error is raised when total #' column width still exceeds available width after wrapping. If `TRUE` @@ -207,6 +245,12 @@ tfl_colspec <- function(col, #' adds a small 5% breathing room. If a `gpar()` supplied through the `gp` #' argument already contains an explicit `lineheight` field for a particular #' section, that value takes precedence over this parameter. +#' @param wrap_extra_padding A `unit` object specifying additional vertical +#' space added at the bottom of any multi-line cell so the visual gap +#' between consecutive rows is more obvious when one or both contain +#' wrapped or `\n`-broken text. Default `unit(0.5, "lines")`. Set to +#' `unit(0, "lines")` to disable. Only multi-line cells receive the extra; +#' single-line cells are unaffected. #' @param max_measure_rows Positive numeric or `Inf` (default). Maximum number #' of unique cell strings sampled per column when computing content-based #' column widths. Strings are sampled in descending order of `nchar()` so @@ -226,8 +270,7 @@ tfl_colspec <- function(col, #' tbl <- tfl_table( #' df, #' col_labels = c(mpg = "MPG", hp = "Horse-\npower"), -#' col_align = c(mpg = "right", hp = "right"), -#' wrap_cols = FALSE +#' col_align = c(mpg = "right", hp = "right") #' ) #' #' export_tfl(tbl, @@ -245,7 +288,9 @@ tfl_table <- function(x, col_widths = NULL, col_labels = NULL, col_align = NULL, - wrap_cols = FALSE, + wrap_cols = "auto", + wrap_breaks = NULL, + wrap_balance = c("width", "height"), min_col_width = grid::unit(0.5, "inches"), allow_col_split = TRUE, balance_col_pages = FALSE, @@ -268,6 +313,7 @@ tfl_table <- function(x, gp = list(), cell_padding = grid::unit(c(0.2, 0.5), "lines"), line_height = 1.05, + wrap_extra_padding = grid::unit(0.5, "lines"), max_measure_rows = Inf) { # --- Validate x --- @@ -334,16 +380,43 @@ tfl_table <- function(x, # --- Validate wrap_cols --- if (!is.logical(wrap_cols) && !is.character(wrap_cols)) { - rlang::abort('`wrap_cols` must be TRUE, FALSE, or a character vector of column names.') + rlang::abort('`wrap_cols` must be TRUE, FALSE, "auto", or a character vector of column names.') + } + if (is.logical(wrap_cols) && (length(wrap_cols) != 1L || is.na(wrap_cols))) { + rlang::abort("`wrap_cols` must be a single TRUE or FALSE when logical.") } if (is.character(wrap_cols)) { - bad <- setdiff(wrap_cols, col_names) - if (length(bad) > 0L) { - rlang::abort(paste0("wrap_cols names not found in `x`: ", - paste(bad, collapse = ", "))) + if (length(wrap_cols) == 1L && identical(wrap_cols, "auto")) { + # ok + } else { + bad <- setdiff(wrap_cols, col_names) + if (length(bad) > 0L) { + rlang::abort(paste0("wrap_cols names not found in `x`: ", + paste(bad, collapse = ", "), + '. Use "auto" for auto-detect, TRUE for all data ', + "columns, or FALSE to disable.")) + } } } + # --- Validate wrap_breaks --- + # Default is NULL because using `wrap_breaks()` as the default expression + # would shadow the constructor with the parameter and trigger a recursive + # promise evaluation when the default is materialised. + if (is.null(wrap_breaks)) { + wrap_breaks <- wrap_breaks_default() + } + if (!.is_wrap_breaks(wrap_breaks)) { + rlang::abort('`wrap_breaks` must be a wrap_breaks() object.') + } + + # --- Validate wrap_balance --- + if (!is.character(wrap_balance) || length(wrap_balance) == 0L || + !all(wrap_balance %in% c("width", "height"))) { + rlang::abort('`wrap_balance` must be "width" or "height".') + } + wrap_balance <- match.arg(wrap_balance) + # --- Validate min_col_width --- checkmate::assert_class(min_col_width, "unit", .var.name = "min_col_width") @@ -400,6 +473,13 @@ tfl_table <- function(x, checkmate::assert_number(line_height, lower = .Machine$double.eps, finite = TRUE, .var.name = "line_height") + # --- Validate wrap_extra_padding --- + checkmate::assert_class(wrap_extra_padding, "unit", + .var.name = "wrap_extra_padding") + if (length(wrap_extra_padding) != 1L) { + rlang::abort("`wrap_extra_padding` must be a unit of length 1.") + } + # --- Validate max_measure_rows --- checkmate::assert_number(max_measure_rows, lower = 1, .var.name = "max_measure_rows") @@ -413,6 +493,8 @@ tfl_table <- function(x, col_labels = col_labels, col_align = col_align, wrap_cols = wrap_cols, + wrap_breaks = wrap_breaks, + wrap_balance = wrap_balance, min_col_width = min_col_width, allow_col_split = allow_col_split, balance_col_pages = balance_col_pages, @@ -434,6 +516,7 @@ tfl_table <- function(x, gp = gp, cell_padding = cell_padding, line_height = line_height, + wrap_extra_padding = wrap_extra_padding, max_measure_rows = max_measure_rows ), class = "tfl_table" diff --git a/R/wrap.R b/R/wrap.R new file mode 100644 index 0000000..fa7cf17 --- /dev/null +++ b/R/wrap.R @@ -0,0 +1,648 @@ +# wrap.R - Word wrap module for tfl_table() +# +# Owns text wrapping inside table cells and column headers, plus the +# water-from-top algorithm that narrows wrap-eligible columns when the +# table is wider than the page. +# +# All functions are internal. The page-level character-content path in +# R/draw.R and the caption / footnote wrapper in R/normalize.R reach the +# core algorithm via the .wrap_text() shim in R/table_utils.R, which +# forwards to .wrap_string() with the package default breaks. +# +# Disable: tfl_table(wrap_cols = FALSE) bypasses the whole module. +# +# Public-internal entry points: +# wrap_breaks(drop, keep_before) - constructor +# wrap_breaks_default() - package default +# .is_wrap_breaks(x) - predicate +# .wrap_string(text, avail_in, gp, breaks) +# .column_has_breakable_text(strings, breaks) +# .column_min_token_width_in(strings, gp, breaks) +# .compute_wrapped_widths(widths_in, resolved_cols, ...) +# .wrap_label_for_width(label, width_in, gp, breaks) + +# --------------------------------------------------------------------------- +# wrap_breaks() - break-character spec +# --------------------------------------------------------------------------- + +#' Specify how strings are broken when wrapping table text +#' +#' A `wrap_breaks` object lists the characters at which `.wrap_string()` is +#' allowed to insert a line break. Two modes are supported: +#' +#' * `drop` characters are consumed at the break point. The default +#' (`" "` and `"\t"`) means runs of whitespace disappear when a wrap +#' occurs there but stay inline otherwise. +#' * `keep_before` characters stay on the left of the break - the character +#' is preserved at the end of the upper line and the *next* character +#' starts the new line. Typical use: `"-"` so that a hyphenated term +#' like `"placebo-controlled"` can wrap to `"placebo-\ncontrolled"`. +#' +#' `drop` and `keep_before` must be disjoint single-character vectors. +#' +#' @param drop Character vector of single-character break points consumed +#' at the break. Defaults to `c(" ", "\t")`. +#' @param keep_before Character vector of single-character break points +#' preserved on the left of the break. Defaults to `character(0)`. +#' +#' @return A list of class `"wrap_breaks"` with components `drop` and +#' `keep_before`. +#' +#' @keywords internal +wrap_breaks <- function(drop = c(" ", "\t"), + keep_before = character(0L)) { + drop <- if (is.null(drop)) character(0L) else drop + keep_before <- if (is.null(keep_before)) character(0L) else keep_before + if (!is.character(drop) || anyNA(drop)) { + rlang::abort("`drop` must be a character vector with no NAs.") + } + if (!is.character(keep_before) || anyNA(keep_before)) { + rlang::abort("`keep_before` must be a character vector with no NAs.") + } + if (any(nchar(drop) != 1L)) { + rlang::abort("Every `drop` element must be a single character.") + } + if (any(nchar(keep_before) != 1L)) { + rlang::abort("Every `keep_before` element must be a single character.") + } + overlap <- intersect(drop, keep_before) + if (length(overlap) > 0L) { + rlang::abort(paste0( + "`drop` and `keep_before` must be disjoint. Overlapping characters: ", + paste(shQuote(overlap), collapse = ", ") + )) + } + structure( + list(drop = unique(drop), keep_before = unique(keep_before)), + class = "wrap_breaks" + ) +} + +#' Package-default break spec (whitespace only). +#' @keywords internal +wrap_breaks_default <- function() { + wrap_breaks(drop = c(" ", "\t"), keep_before = character(0L)) +} + +#' Predicate for wrap_breaks objects. +#' @keywords internal +.is_wrap_breaks <- function(x) inherits(x, "wrap_breaks") + +# --------------------------------------------------------------------------- +# Tokenizer +# --------------------------------------------------------------------------- + +# Tokenize one paragraph (no embedded \n) into a list of break-delimited +# chunks. Each chunk is a list with: +# $text - the substring that becomes part of a rendered line +# $lead - the separator that prepends this chunk when continuing on the +# same line, dropped when this chunk starts a fresh line +# +# A `drop` character produces a chunk boundary AND becomes the next chunk's +# $lead. A `keep_before` character is appended to the preceding chunk's +# $text and forces a boundary; the following chunk has $lead = "". +# +# The first chunk always has $lead = "". An empty input returns list(). +.tokenize_for_wrap <- function(s, breaks) { + if (!nzchar(s)) return(list()) + drop_chars <- breaks$drop + keep_chars <- breaks$keep_before + + chars <- strsplit(s, "", fixed = TRUE)[[1L]] + n <- length(chars) + if (n == 0L) return(list()) + + tokens <- vector("list", n) # over-allocate; trim at end + k <- 0L + cur_buf <- character(n) + cur_n <- 0L + pending <- "" # lead for the next emitted token + + flush <- function() { + if (cur_n > 0L) { + k <<- k + 1L + tokens[[k]] <<- list( + text = paste(cur_buf[seq_len(cur_n)], collapse = ""), + lead = pending + ) + cur_n <<- 0L + pending <<- "" + } + } + + for (i in seq_len(n)) { + ch <- chars[[i]] + if (length(drop_chars) > 0L && ch %in% drop_chars) { + flush() + pending <- ch + } else if (length(keep_chars) > 0L && ch %in% keep_chars) { + cur_n <- cur_n + 1L + cur_buf[cur_n] <- ch + flush() + pending <- "" + } else { + cur_n <- cur_n + 1L + cur_buf[cur_n] <- ch + } + } + flush() + + if (k == 0L) list() else tokens[seq_len(k)] +} + +# --------------------------------------------------------------------------- +# .wrap_string() - wrap one string to fit a width +# --------------------------------------------------------------------------- + +# Width of a string under the active viewport's font context, in inches. +.measure_text_width_in <- function(s, gp) { + if (!nzchar(s)) return(0) + .width_in(grid::grobWidth(grid::textGrob(s, gp = gp))) +} + +#' Wrap text to fit a target width, preserving paragraph breaks. +#' +#' Greedy left-to-right packing. Paragraphs (separated by `\n` in `text`) +#' are wrapped independently and the results re-joined with `\n`. Within +#' a paragraph the break-character spec controls where breaks may occur; +#' a single token wider than `available_w_in` is emitted unchanged on its +#' own line because there is no valid break point inside it. +#' +#' @param text Single character string. +#' @param available_w_in Numeric, available width in inches. +#' @param gp A `gpar()` for measurement font context. +#' @param breaks A `wrap_breaks` object; if `NULL`, the package default. +#' +#' @return A single character string, possibly with `\n` inserted at break +#' points. +#' +#' @keywords internal +.wrap_string <- function(text, available_w_in, gp, + breaks = wrap_breaks_default()) { + if (is.null(text) || !nzchar(text)) return(text) + if (is.null(breaks)) breaks <- wrap_breaks_default() + + paragraphs <- strsplit(text, "\n", fixed = TRUE)[[1L]] + wrapped <- vapply(paragraphs, function(para) { + if (!nzchar(para)) return("") + .wrap_paragraph(para, available_w_in, gp, breaks) + }, character(1L)) + paste(wrapped, collapse = "\n") +} + +.wrap_paragraph <- function(para, available_w_in, gp, breaks) { + tokens <- .tokenize_for_wrap(para, breaks) + if (length(tokens) == 0L) return("") + + lines <- character(0L) + current_line <- "" + + for (tok in tokens) { + if (!nzchar(current_line)) { + current_line <- tok$text + next + } + cand <- paste0(current_line, tok$lead, tok$text) + if (.measure_text_width_in(cand, gp) <= available_w_in + 1e-6) { + current_line <- cand + } else { + lines <- c(lines, current_line) + current_line <- tok$text + } + } + if (nzchar(current_line)) lines <- c(lines, current_line) + paste(lines, collapse = "\n") +} + +# --------------------------------------------------------------------------- +# Auto-detection and floor computation +# --------------------------------------------------------------------------- + +#' Does any string in `strings` contain a break character? +#' +#' Used by the `wrap_cols = "auto"` path: a column with no breakable text +#' is skipped because no amount of narrowing can wrap it. +#' +#' @keywords internal +.column_has_breakable_text <- function(strings, breaks) { + if (length(strings) == 0L) return(FALSE) + break_chars <- c(breaks$drop, breaks$keep_before) + if (length(break_chars) == 0L) return(FALSE) + pat <- paste0("[", paste(vapply(break_chars, .regex_escape_char, ""), + collapse = ""), "]") + any(grepl(pat, strings, perl = TRUE)) +} + +# Escape a single character for use inside a regex character class. +.regex_escape_char <- function(ch) { + if (ch %in% c("\\", "]", "^", "-")) paste0("\\", ch) else ch +} + +#' Width (inches) of the widest unbreakable token across a column's strings. +#' +#' This is the wrapping floor: a column cannot be narrowed below the width +#' needed to render its longest single token. +#' +#' @keywords internal +.column_min_token_width_in <- function(strings, gp, breaks) { + if (length(strings) == 0L) return(0) + max(vapply(strings, function(s) { + if (!nzchar(s)) return(0) + paragraphs <- strsplit(s, "\n", fixed = TRUE)[[1L]] + max(vapply(paragraphs, function(p) { + tokens <- .tokenize_for_wrap(p, breaks) + if (length(tokens) == 0L) return(0) + max(vapply(tokens, function(tok) { + .measure_text_width_in(tok$text, gp) + }, numeric(1L))) + }, numeric(1L))) + }, numeric(1L))) +} + +# --------------------------------------------------------------------------- +# Header / cell label wrapping helper +# --------------------------------------------------------------------------- + +#' Wrap a column-header label (or any single string) to a target width +#' minus left+right horizontal padding. +#' +#' @keywords internal +.wrap_label_for_width <- function(label, width_in, h_pad_in, gp, breaks) { + if (is.null(label) || !nzchar(label)) return(label) + inner <- max(0, width_in - h_pad_in) + .wrap_string(label, inner, gp, breaks) +} + +# --------------------------------------------------------------------------- +# .compute_wrapped_widths() - water-from-top column narrowing +# --------------------------------------------------------------------------- + +#' Iteratively narrow wrap-eligible columns to fit `content_width_in`. +#' +#' Replaces the older single-target `.apply_col_wrapping()` with a fairer +#' "water-from-top" pass: each iteration finds the widest set of +#' wrap-eligible columns above their floor and shrinks them together +#' until the next-lower competitor or a floor is hit. Floors honour +#' `min_col_width` AND the longest unbreakable token in the column. +#' +#' Deterministic, O(n^2) in column count, n <= ~30 in practice. +#' +#' @param widths_in Numeric vector of current per-column widths in inches. +#' @param resolved_cols The `resolve_col_specs()` output. +#' @param data The full data frame from `tbl$data`. +#' @param tbl A `tfl_table` object (used for `gp`, `cell_padding`, +#' `line_height`, `na_string`, `max_measure_rows`, `min_col_width`, +#' `wrap_breaks`). +#' @param content_width_in Numeric target total width in inches. +#' @param h_pad_in Horizontal padding (left+right) in inches. +#' @param min_in `min_col_width` resolved to inches. +#' @param pg_width,pg_height,margins Forwarded to the scratch device. +#' +#' @return Updated `widths_in`. +#' +#' @keywords internal +.compute_wrapped_widths <- function(widths_in, resolved_cols, data, tbl, + content_width_in, h_pad_in, min_in, + pg_width, pg_height, margins) { + n <- length(widths_in) + wrap_eligible <- vapply(resolved_cols, `[[`, logical(1L), "wrap") + if (!any(wrap_eligible)) return(widths_in) + + breaks <- tbl$wrap_breaks %||% wrap_breaks_default() + na_str <- tbl$na_string + max_rows <- tbl$max_measure_rows + + scratch_file <- tempfile(fileext = ".pdf") + grDevices::pdf(scratch_file, width = pg_width, height = pg_height) + outer_vp <- .make_outer_vp(margins) + grid::pushViewport(outer_vp) + on.exit({ + grid::popViewport() + grDevices::dev.off() + unlink(scratch_file) + }, add = TRUE) + + # Compute per-column floors (only meaningful for wrap-eligible cols). + # Headers are rendered with the header_row gpar (typically bold) and data + # cells with the cell gpar (regular). Measuring the floor with only one + # of those gpars under-counts the other - a bold header token may be + # rendered wider than its regular-weight measurement, and the wrap module + # would then promise the column a width the renderer cannot honour. + floors <- widths_in + for (j in which(wrap_eligible)) { + cs <- resolved_cols[[j]] + cell_gp <- .gp_with_lineheight( + .resolve_table_cell_gp(tbl$gp, cs$is_group_col), tbl$line_height + ) + hdr_gp <- .gp_with_lineheight( + .resolve_table_gp(tbl$gp, "header_row"), tbl$line_height + ) + parts <- .split_col_strings(data[[cs$col]], cs$label, na_str, max_rows) + t_data <- .column_min_token_width_in(parts$data, cell_gp, breaks) + t_hdr <- .column_min_token_width_in(parts$header, hdr_gp, breaks) + floors[[j]] <- max(min_in, max(t_data, t_hdr) + h_pad_in) + if (floors[[j]] > widths_in[[j]]) floors[[j]] <- widths_in[[j]] + } + + # Water-from-top + eps <- 1e-6 + max_iter <- 2L * n + 50L + for (iter in seq_len(max_iter)) { + excess <- sum(widths_in) - content_width_in + if (excess <= eps) break + + active <- which(wrap_eligible & widths_in > floors + eps) + if (length(active) == 0L) break + + max_w <- max(widths_in[active]) + at_max <- active[widths_in[active] >= max_w - eps] + + others <- setdiff(active, at_max) + next_comp <- if (length(others) > 0L) max(widths_in[others]) else -Inf + floor_max <- max(floors[at_max]) + step_floor <- max_w - floor_max + step_compete <- max_w - next_comp + step_excess <- excess / length(at_max) + step <- min(step_floor, step_compete, step_excess) + if (step <= eps) break + + widths_in[at_max] <- widths_in[at_max] - step + } + + widths_in +} + +# --------------------------------------------------------------------------- +# .height_balance_widths() - opt-in pass that redistributes width between +# wrap-eligible columns to reduce total table height. +# --------------------------------------------------------------------------- + +#' Redistribute widths between wrap-eligible columns to lower total height +#' +#' Opt-in pass triggered by `tfl_table(wrap_balance = "height")`. Runs after +#' `.compute_wrapped_widths()` (water-fill) and uses a bounded greedy local +#' search: at each iteration find the row whose cells are tallest, identify +#' the wrap-eligible column that drives that row's height (the "bottleneck") +#' and the wrap-eligible column with the most slack (shortest cell content +#' in that row, with room to give up width down to its floor), and try +#' transferring a width delta from slack to bottleneck. Accept the move +#' that reduces the *total table height* (sum of per-row heights, plus +#' header). Stop when no transfer at any tested delta improves total +#' height, when `max_iter` is reached, or when `budget_seconds` of +#' wall-time is exhausted. +#' +#' Cell heights at each `(column, width)` pair are cached so repeat +#' measurements during the search are free; with cell-string deduplication +#' inside one column, the total measurement cost is bounded by +#' `n_unique_cells * n_unique_widths_explored` per column. +#' +#' Invariants: +#' * Total width is preserved exactly (every move is a transfer). +#' * No column shrinks below its floor (the larger of `min_col_width` and +#' the rendered width of its longest unbreakable token under either the +#' cell or header gpar). +#' * No column grows past its natural width (max content width including +#' bold-rendered header tokens). +#' * Any error or invariant violation falls back silently to the input +#' widths, so opting in cannot produce a *worse* table than the default. +#' +#' Approximation: the cost function ignores the rowspan-style group-cell +#' suppression handled by `.compute_page_row_heights()` - group columns are +#' typically not wrap-eligible (auto-detect skips them), so they don't +#' participate in moves; the approximation only marginally affects which +#' move is "best" when group columns happen to dominate a row's height. +#' +#' @keywords internal +.height_balance_widths <- function(widths_in, resolved_cols, data, tbl, + h_pad_in, na_str, max_rows, breaks, + pg_width, pg_height, margins, + budget_seconds = 1.0, + max_iter = 20L) { + original <- widths_in + + wrap_eligible <- vapply(resolved_cols, `[[`, logical(1L), "wrap") + if (sum(wrap_eligible) < 2L) return(original) + + cell_padding <- tbl$cell_padding + v_pad_in <- .height_in(cell_padding[["top"]]) + + .height_in(cell_padding[["bottom"]]) + wrap_extra_pad_in <- if (!is.null(tbl$wrap_extra_padding)) { + .height_in(tbl$wrap_extra_padding) + } else 0 + line_height <- tbl$line_height %||% 1.05 + min_in <- .width_in(tbl$min_col_width) + + # Open scratch device once. Closing happens via on.exit so it runs even + # under tryCatch failure inside the search loop. + scratch_file <- tempfile(fileext = ".pdf") + grDevices::pdf(scratch_file, width = pg_width, height = pg_height) + outer_vp <- .make_outer_vp(margins) + grid::pushViewport(outer_vp) + on.exit({ + grid::popViewport() + grDevices::dev.off() + unlink(scratch_file) + }, add = TRUE) + + result <- tryCatch({ + .height_balance_widths_impl( + widths_in = widths_in, + resolved_cols = resolved_cols, + data = data, tbl = tbl, + wrap_eligible = wrap_eligible, + h_pad_in = h_pad_in, + v_pad_in = v_pad_in, + wrap_extra_pad_in = wrap_extra_pad_in, + line_height = line_height, + min_in = min_in, + na_str = na_str, max_rows = max_rows, breaks = breaks, + budget_seconds = budget_seconds, + max_iter = max_iter + ) + }, error = function(e) original) + + # Defensive sanity check before returning. If anything looks off + # (length changed, NAs, total width drifted), return the original + # widths so the caller sees water-fill behavior unchanged. + if (length(result) != length(original) || anyNA(result) || + abs(sum(result) - sum(original)) > 1e-3) { + return(original) + } + result +} + +# Search loop kept in its own function for readability; assumes the +# scratch device is already open and an outer viewport is active. +.height_balance_widths_impl <- function(widths_in, resolved_cols, data, tbl, + wrap_eligible, h_pad_in, v_pad_in, + wrap_extra_pad_in, line_height, + min_in, na_str, max_rows, breaks, + budget_seconds, max_iter) { + n <- length(widths_in) + eps <- 1e-6 + + # Per-column gpars - constant across iterations, so resolve once. + cell_gps <- lapply(resolved_cols, function(cs) { + .gp_with_lineheight( + .resolve_table_cell_gp(tbl$gp, cs$is_group_col), line_height + ) + }) + hdr_gp <- .gp_with_lineheight( + .resolve_table_gp(tbl$gp, "header_row"), line_height + ) + + # Pre-extract per-column cell-string vectors (one per column). + cell_strs_list <- lapply(resolved_cols, function(cs) { + .fmt_cell_vec(data[[cs$col]], na_str) + }) + + # Per-column floors and natural widths. Floors prevent shrinking past + # the column's longest unbreakable token; naturals prevent growing past + # the column's full content width (no benefit past that). + floors <- widths_in + natural <- widths_in + for (j in which(wrap_eligible)) { + cs <- resolved_cols[[j]] + parts <- .split_col_strings(data[[cs$col]], cs$label, na_str, max_rows) + t_data <- .column_min_token_width_in(parts$data, cell_gps[[j]], breaks) + t_hdr <- .column_min_token_width_in(parts$header, hdr_gp, breaks) + floors[[j]] <- max(min_in, max(t_data, t_hdr) + h_pad_in) + w_data <- .measure_max_string_width(parts$data, cell_gps[[j]]) + w_hdr <- .measure_max_string_width(parts$header, hdr_gp) + natural[[j]] <- max(min_in, max(w_data, w_hdr) + h_pad_in) + # No clamp on natural: a column whose post-water-fill width is below its + # natural width SHOULD be allowed to grow back up to natural during + # height-balance. The headroom check (natural - widths_in) handles the + # already-at-or-above-natural case by falling out via headroom <= eps. + # If floor is above current (an artifact of upstream invariants), + # clamp so the algorithm doesn't try to shrink an already-narrow column. + if (floors[[j]] > widths_in[[j]]) floors[[j]] <- widths_in[[j]] + } + + # Cache key: paste0(j, "|", round(width, 3)). Value: list with header + # height (no v_pad) and cell heights vector (no v_pad). + cache <- new.env(hash = TRUE, parent = emptyenv()) + + measure_col <- function(j, width) { + key <- paste0(j, "|", sprintf("%.3f", width)) + if (exists(key, envir = cache, inherits = FALSE)) { + return(get(key, envir = cache, inherits = FALSE)) + } + cs <- resolved_cols[[j]] + cgp <- cell_gps[[j]] + avail <- max(0, width - h_pad_in) + + # Header + hdr_label <- cs$label %||% "" + if (isTRUE(cs$wrap) && nzchar(hdr_label)) { + hdr_label <- .wrap_string(hdr_label, avail, hdr_gp, breaks) + } + h_nlines <- max(1L, length(strsplit(hdr_label, "\n", fixed = TRUE)[[1L]])) + h_grob <- grid::textGrob(hdr_label, gp = hdr_gp) + h_g <- .height_in(grid::grobHeight(h_grob)) + h_l <- h_nlines * .height_in(grid::stringHeight("M")) + hdr_extra <- if (h_nlines > 1L) wrap_extra_pad_in else 0 + header_h <- max(h_g, h_l) + hdr_extra + + # Cells - dedupe to amortise grob measurement cost across repeats. + strs <- cell_strs_list[[j]] + unique_strs <- unique(strs) + h_map <- vapply(unique_strs, function(s) { + disp <- if (isTRUE(cs$wrap) && nzchar(s)) { + .wrap_string(s, avail, cgp, breaks) + } else s + nl <- max(1L, length(strsplit(disp, "\n", fixed = TRUE)[[1L]])) + g <- grid::textGrob(disp, gp = cgp) + hg <- .height_in(grid::grobHeight(g)) + hl <- nl * .height_in(grid::stringHeight("M")) + ex <- if (nl > 1L) wrap_extra_pad_in else 0 + max(hg, hl) + ex + }, numeric(1L)) + names(h_map) <- unique_strs + cell_h <- as.numeric(h_map[strs]) + + out <- list(header_h = header_h, cell_h = cell_h) + assign(key, out, envir = cache) + out + } + + # Estimate total table height (header_row_h + sum of row heights). + # v_pad_in is added once per row and once for the header, since every + # cell in a row contributes the same v_pad. + estimate_total <- function(w) { + per_col <- lapply(seq_along(w), function(j) measure_col(j, w[[j]])) + hdr_h <- max(vapply(per_col, function(x) x$header_h, numeric(1L))) + + v_pad_in + cell_h_mat <- do.call(cbind, + lapply(per_col, function(x) x$cell_h)) + if (is.null(dim(cell_h_mat))) { + # Single row: cbind of length-1 numeric vectors gives a 1xN matrix + # but lapply across rows in a column-of-1 case yields 1-vectors. + # Defensive reshape so apply() works. + cell_h_mat <- matrix(cell_h_mat, nrow = 1L) + } + row_h_vec <- apply(cell_h_mat, 1L, max) + v_pad_in + hdr_h + sum(row_h_vec) + } + + current_h <- estimate_total(widths_in) + start_t <- Sys.time() + + eligible_idx <- which(wrap_eligible) + + for (iter in seq_len(max_iter)) { + if (as.numeric(difftime(Sys.time(), start_t, units = "secs")) > + budget_seconds) break + + # Identify the row with the maximum cell height (across all columns). + per_col <- lapply(seq_along(widths_in), + function(j) measure_col(j, widths_in[[j]])) + cell_h_mat <- do.call(cbind, + lapply(per_col, function(x) x$cell_h)) + if (is.null(dim(cell_h_mat))) cell_h_mat <- matrix(cell_h_mat, nrow = 1L) + row_h_vec <- apply(cell_h_mat, 1L, max) + if (length(row_h_vec) == 0L) break + worst_row <- which.max(row_h_vec) + row_cells <- cell_h_mat[worst_row, ] + + # Bottleneck = the wrap-eligible column whose cell drives the row max. + eligible_in_row <- intersect(eligible_idx, which(row_cells > 0)) + if (length(eligible_in_row) == 0L) break + cell_in_row <- row_cells[eligible_in_row] + bottleneck <- eligible_in_row[which.max(cell_in_row)] + + # Slack = the wrap-eligible column with the SHORTEST cell in this row + # (the column whose cell is most likely to absorb wrapping if narrowed). + slack_candidates <- setdiff(eligible_idx, bottleneck) + if (length(slack_candidates) == 0L) break + slack_cells <- row_cells[slack_candidates] + slack <- slack_candidates[which.min(slack_cells)] + + # Constraint check: bottleneck must have headroom; slack must have + # room to give. + headroom_b <- natural[[bottleneck]] - widths_in[[bottleneck]] + give_room_s <- widths_in[[slack]] - floors[[slack]] + if (headroom_b <= eps || give_room_s <= eps) break + + best_h <- current_h + best_w <- NULL + for (delta in c(0.5, 0.25, 0.1, 0.05)) { + d <- min(delta, headroom_b, give_room_s) + if (d <= eps) next + new_w <- widths_in + new_w[[slack]] <- new_w[[slack]] - d + new_w[[bottleneck]] <- new_w[[bottleneck]] + d + new_h <- estimate_total(new_w) + if (new_h < best_h - eps) { + best_h <- new_h + best_w <- new_w + } + } + + if (is.null(best_w)) break + widths_in <- best_w + current_h <- best_h + } + + widths_in +} diff --git a/README.md b/README.md index 65b0528..c26d049 100644 --- a/README.md +++ b/README.md @@ -279,20 +279,34 @@ it automatically across as many pages as needed: - **Column widths** — auto-sized from content, fixed (`unit()`), or relative-weight numeric. A floor is applied via `min_col_width`. -- **Word wrapping** — set `wrap_cols` to a column name (or `TRUE` for all data - columns) to reflow long text within a fixed column width. +- **Word wrapping (default-on)** — `wrap_cols = "auto"` (the default) + auto-detects which columns can wrap by looking for break characters in + cell or header content; columns that can't break (numeric, single-token) + are left at their natural width. Override with `wrap_cols = TRUE` (all + data columns), `FALSE` (disable), or a character vector of column names. + Configure break behavior via `wrap_breaks()` — the default breaks on + whitespace; pass `keep_before = "-"` to also break after hyphens. +- **Wrap optimization** — `wrap_balance = "height"` opts into a bounded + pass that redistributes width between wrap-eligible columns to reduce + total table height (more rows per page) when content density is uneven. + Falls back silently to the default width-balanced layout on any error. - **Row pagination** — rows are split across pages with optional continuation markers (`row_cont_msg`). Groups are kept together where possible; a warning - is issued when a group must be split. + is issued when a group must be split. A row whose wrapped height exceeds + one page is rejected with a clear error (downgradable to a warning via + `overflow_action = "warn"`). - **Column pagination** — if total column width exceeds the page, columns are split across pages. Set `balance_col_pages = TRUE` to distribute columns - evenly rather than packing left-to-right. + evenly rather than packing left-to-right. This is independent from + text-wrap; the two compose freely. - **Group columns** — use `dplyr::group_by()` before passing to `tfl_table()`. Group columns repeat as row headers on every column-split page; repeated values in consecutive rows are suppressed by default. - **Typography and spacing** — `cell_padding` controls space inside each cell (vertical and horizontal independently); `line_height` controls inter-line - spacing in wrapped cells. Both can be overridden per section via `gp`. + spacing in wrapped cells; `wrap_extra_padding` (default `0.5` lines) adds + visual separation below multi-line cells. All overridable per section via + `gp`. - **Column specs** — use `tfl_colspec()` for per-column control of label, width, alignment, and wrapping in a single object. diff --git a/design/ARCHITECTURE.md b/design/ARCHITECTURE.md index d4f4574..82104e9 100644 --- a/design/ARCHITECTURE.md +++ b/design/ARCHITECTURE.md @@ -144,8 +144,13 @@ export_tfl(x = tfl_table_obj, ...) [exported] │ scratch device + outer_vp to measure annotation heights ├── resolve_col_specs(tbl) — table_columns.R ├── compute_col_widths(resolved_cols, ...) — table_columns.R - │ └── .apply_col_wrapping(...) - │ paginate_cols(...) + │ ├── auto-detect wrap eligibility via — wrap.R + │ │ .column_has_breakable_text(strings, breaks) + │ ├── .compute_wrapped_widths(...) — wrap.R + │ │ water-from-top narrowing using + │ │ .column_min_token_width_in(strings, gp, breaks) + │ │ as the per-column floor + │ └── paginate_cols(...) ├── [scratch device + outer_vp] measure heights: │ .measure_header_row_height() — table_utils.R │ measure_row_heights_tbl() → cell_h_mat — table_rows.R @@ -344,7 +349,8 @@ export_tfl(x = list_of_table1, ...) [exported] | `R/table1.R` | `export_tfl.table1()`, `table1_to_pagelist()`, `.extract_table1_annotations()`, `.table1_variable_groups()`, `.paginate_table1()`, `.paginate_oversized_group()` | | `R/reexports.R` | `%||%` from rlang | | `R/tfl_table.R` | `tfl_colspec()`, `tfl_table()`, `print.tfl_table()`, `.check_named_subset()` | -| `R/table_columns.R` | `resolve_col_specs()`, `compute_col_widths()`, `.apply_col_wrapping()`, `paginate_cols()` | +| `R/table_columns.R` | `resolve_col_specs()`, `compute_col_widths()`, `paginate_cols()` | +| `R/wrap.R` | `wrap_breaks()`, `wrap_breaks_default()`, `.is_wrap_breaks()`, `.tokenize_for_wrap()`, `.wrap_string()`, `.column_has_breakable_text()`, `.column_min_token_width_in()`, `.wrap_label_for_width()`, `.compute_wrapped_widths()` | | `R/table_rows.R` | `measure_row_heights_tbl()` (returns per-cell matrix), `.compute_page_row_heights()`, `paginate_rows()` | | `R/table_draw.R` | `build_table_grob()`, `drawDetails.tfl_table_grob()`, `.compute_cell_suppression()`, `.draw_header_row()`, `.draw_cont_row()`, `.draw_cell_text()` | | `R/table_pagelist.R` | `tfl_table_to_pagelist()`, `compute_table_content_area()` | diff --git a/design/DECISIONS.md b/design/DECISIONS.md index bf2d2e2..c70db8a 100644 --- a/design/DECISIONS.md +++ b/design/DECISIONS.md @@ -1034,3 +1034,134 @@ that did not use multi-line group labels render identically (same row heights, same pagination). Tables that did use them now render in less vertical space, which may *increase* the number of rows on a page (and correspondingly *decrease* the total page count). + +--- + +## D-41: Column word-wrap module (issue #28) + +**Decision:** Move all text-wrap logic for `tfl_table` into a dedicated +module file `R/wrap.R` and make the module default-on under +`wrap_cols = "auto"`. Auto-detect promotes a column to wrap-eligible +when (and only when) its data or header contains a configured break +character. A new `wrap_breaks()` argument lets the user configure which +characters count as breaks, with whitespace as the default and an +opt-in `keep_before` slot for characters like `-` or `/` that stay on +the left of the break. Header labels are auto-wrapped using the same +mechanism as cell content. A single row whose wrapped height exceeds +one page is now flagged via the same `overflow_action = "error" / "warn"` +switch added in D-39 — input that would silently overflow becomes an +explicit failure. + +**User need (from issue #28):** "Any column that has spaces in any cell +of its contents should be considered for word wrapping ... start with +the widest column and consider it first then when it starts getting +shrunk too much start including the narrower columns." Plus +"characters to wrap on should be an argument defaulting to removing any +whitespace in favor of wrapping but other breaking characters like `-` +should also be usable where the `-` is broken after." Plus the +clarification that text-wrap and page-column-split are distinct +concepts that should be named and documented unambiguously. + +**Algorithm — water-from-top.** Replaces the prior "narrow the widest +column by the full excess" loop in `.apply_col_wrapping()`. Per +wrap-eligible column we compute a *floor* equal to +`max(min_col_width, longest_unbreakable_token_in_column)`. Each +iteration finds the maximal set of wrap-eligible columns above their +floor, computes the largest step that either (a) brings them down to +the next-widest competitor, (b) hits a floor, or (c) absorbs all +remaining excess, and applies that step uniformly to the whole set. +This keeps the widest columns balanced as they shrink rather than +crushing one column to its floor before considering the rest. +Deterministic, O(n²) in column count (n is the number of data +columns, typically << 30), terminates in at most `2n + 50` iterations +because each iteration either reduces excess by at least `eps`, +expands the active set, or saturates at a floor. + +**Break-character spec.** `wrap_breaks(drop, keep_before)` where: +- `drop` characters separate tokens *and* are consumed at the break + point (whitespace). Default: `c(" ", "\t")`. +- `keep_before` characters stay on the **left** of the break — the + character ends a token, and the next character starts a new token. + Useful for hyphenated terms (`-`) and path separators (`/`). + Default: `character(0)`. + +The two slots must be disjoint single-character vectors. The +constructor returns a `wrap_breaks` S3 object so the validator can +distinguish a valid spec from a raw list. + +**Auto-detect (`wrap_cols = "auto"`).** In `resolve_col_specs()` a +column whose effective `wrap` is unspecified (NULL on the colspec, NA +on `wrap_cols = "auto"`) is marked `wrap = NA`. +`compute_col_widths()` resolves NA to TRUE/FALSE in a single pass: a +column is auto-eligible iff it is non-group AND its strings contain +any character from `breaks$drop` or `breaks$keep_before`. Numeric +columns and single-token strings stay at FALSE because no narrowing +could break them. Explicit `wrap = TRUE` / `wrap = FALSE` always wins. + +**Header wrapping.** `.measure_header_row_height()` and +`.draw_header_row()` accept the `wrap_breaks` spec and call a new +helper `.wrap_label_for_width()` to reflow labels onto multiple lines +*before* measuring (so row heights are correct) and *before* drawing +(so the rendered layout matches). Headers wrap iff the column is +wrap-eligible AND has a resolved width — same condition as cell +wrapping. + +**Row-overflow guard.** `paginate_rows()` now accepts an +`overflow_action` argument and signals via `.overflow_signal()`. When +the first (and only) row on a page has a committed height larger than +the page, the algorithm fires the guard. The guard is suppressed for +the conservative bottom-continuation reserve case — i.e. when the row +would actually fit if no continuation marker were drawn after it — +because the existing pagination loop pessimistically reserves that +space whether or not it ends up being drawn. + +**Naming clarification.** The user pointed out that "wrap_cols" could +be misread as "split columns across pages". In writetfl the names are +already disjoint (`wrap_cols` controls text-wrap *within* a column; +`allow_col_split` controls splitting columns *across* pages) but the +roxygen and the new vignette section now spell out the distinction +explicitly. Both concepts are independent and freely composable. + +**Alternatives considered and rejected:** +- *Per-column `wrap_breaks` on `tfl_colspec`* — useful for a future + scenario where one column is a path and another is hyphenated, but + not needed for the issue. Easy follow-up because the table-level + spec already threads through `tbl$wrap_breaks`. +- *base R `strwrap()` instead of a custom algorithm* — `strwrap()` is + device-agnostic (counts characters, not rendered width) and does not + accept a `gpar()`. In a font-aware PDF context that's the wrong + layer. +- *Wrap headers always (regardless of `cs$wrap`)* — surprising for + users who explicitly set `wrap = FALSE` on a column to lock it. + Tying header-wrap to cell-wrap keeps one knob. +- *Make `wrap_cols = TRUE` mean "auto-detect"* — collapses two distinct + meanings. Keeping `TRUE` as "all data cols, no detection" matches + the prior semantics; `"auto"` is the new mode. + +**Files touched:** +- New: `R/wrap.R` (the module — break spec, tokenizer, `.wrap_string`, + auto-detect predicate, longest-token-floor measurement, water-from-top, + header-label helper). +- Modified: `R/tfl_table.R` (new `wrap_breaks` arg, `wrap_cols = "auto"` + default, `tfl_colspec(wrap = NA)` default, validation); `R/table_columns.R` + (auto-detect resolution, switch to `.compute_wrapped_widths()`, removed + `.apply_col_wrapping()`); `R/table_utils.R` (`.wrap_text()` is now a + default-breaks shim, `.measure_header_row_height()` accepts a `breaks` + arg); `R/table_draw.R` (drawDetails reads `tbl$wrap_breaks` and threads + through; `.draw_header_row()` wraps labels); `R/table_rows.R` + (`measure_row_heights_tbl()` accepts `breaks`, `paginate_rows()` adds + the row-overflow guard with `overflow_action`); `R/table_pagelist.R` + (passes `breaks` and `overflow_action` to the helpers). +- New tests: `tests/testthat/test-wrap.R` (47 unit tests for the + module). Extended `tests/testthat/test-tfl_table.R` with end-to-end + cases including auto-detect, header wrapping, `keep_before`, and the + row-overflow guard at both `error` and `warn` settings. +- New: `examples/wrap_demos.R` generating 14 demo PDFs and a README + for hands-on review. +- New vignette section in `vignettes/v03-tfl_table_styling.Rmd`. + +**Backward compatibility:** the `wrap_cols` default flips from `FALSE` +to `"auto"`, which can wrap previously-overflowing tables. Per the +project owner's confirmation, no backward-compatibility constraint is +in force at this development stage. Tables that already fit see no +behavioural change. diff --git a/design/DESIGN.md b/design/DESIGN.md index 484b212..b46cdb6 100644 --- a/design/DESIGN.md +++ b/design/DESIGN.md @@ -292,7 +292,95 @@ only function that actually knows the difference between `"error"` and `"warn"`. Each call site composes its own message; the helper handles dispatch and appends the diagnostic-mode hint. +The same knob now also gates the row-overflow guard added with the wrap +module (issue #28): a single row whose wrapped height exceeds one page is +fundamentally the same family of fault — output the user wrote that +cannot fit the layout they configured. Re-using `overflow_action` keeps +the user model symmetric across width and height. + This also keeps the API surface aligned with `min_content_height` (a single top-level argument controlling a single layout invariant) rather than with `overlap_warn_mm` (a tuning knob in `...`). Width overflow is a correctness-affecting condition, not a tuning detail. + +## Why a separate `R/wrap.R` module instead of leaving the algorithm in `R/table_columns.R`? + +The wrap algorithm is a self-contained concern with three responsibilities +that are independent of column-width computation: tokenize a string under +a configurable break spec, decide whether a column is wrap-eligible, and +narrow wrap-eligible columns under a fairness rule. Bundling these in a +single file makes the disable case (`wrap_cols = FALSE`) trivial to +reason about — every code path that consults `tbl$wrap_breaks` lives in +one place — and makes the read-and-review unit one file rather than three. + +The user explicitly asked for this as a robustness lever: "make it a +separate module that can be easily disabled in case it does something in +a way that the user would not want." Keeping the file boundary clean +makes future replacement (e.g. swapping in a Knuth-style optimal-break +algorithm) a single-file change. + +## Why "water-from-top" instead of one-column-at-a-time narrowing? + +The prior `.apply_col_wrapping()` shrank the *single* widest +wrap-eligible column by the entire excess every iteration. That converges +but produces an unfair distribution: the widest column repeatedly takes +the whole hit until it bottoms out at `min_col_width`, then the next +widest takes the remainder, and so on. For a table with three columns of +similar widths this leaves one column at minimum and two essentially +unchanged. + +Water-from-top instead identifies *all* columns at the current maximum +width and shrinks them together, stopping the step at whichever comes +first: the next-lower competitor (so the active set grows), a column +floor, or the remaining excess. This produces a balanced shrink — the +widest columns end up tied just above the next-widest, and so on, +matching the user's stated preference: "start with the widest column and +consider it first then when it starts getting shrunk too much start +including the narrower columns." + +The algorithm is still O(n²) and deterministic, but each iteration makes +strictly more progress in the visible-fairness sense. + +## Why a per-column wrap *floor* equal to the longest unbreakable token? + +A column cannot be narrower than its widest unbreakable token (after +break characters are applied). Setting only `min_col_width` as the floor +allows the algorithm to shrink the column below the rendered width of +its own content, after which the cell renders with overflow at draw +time. The pagination decisions (row heights, page splits) are then +based on a width that is impossible to honour visually. + +Instead, the wrap module computes per wrap-eligible column the rendered +width of the longest unbreakable token across the column's strings (data ++ header) and uses `max(min_col_width, longest_token_width + h_pad)` as +the floor. This keeps the algorithm honest: a width the algorithm +chooses is a width the renderer can actually deliver. + +For the user this also means setting a small `min_col_width` is safe — +the wrap module will never shrink a column below what its content +literally requires. + +## Why is text-wrap the default but page-column-split is independent? + +The two concepts answer different user questions: + +- **Text-wrap (`wrap_cols`):** "Can this column be narrower so the + table fits one page width?" +- **Page-column-split (`allow_col_split`):** "If the table cannot fit on + one page width, can I spread its columns across multiple pages?" + +Text-wrap is reversible at the data level (the cell content survives; +only the line breaks change). Page-column-split is a layout decision +that fragments the visual unit. Defaulting text-wrap on but leaving +page-split's existing default (`TRUE`) preserves the prior auto-split +behaviour while removing the single biggest cause of unintended overflow +— a wrappable string column wider than the page. + +The two also compose naturally: text-wrap runs first, narrowing what it +can; if the result still does not fit and `allow_col_split = TRUE`, the +splitter spreads the (narrower) columns across pages. + +The user observed casually that the names of the two concepts could be +read as the same thing. The vignette and the roxygen for both arguments +now spell out the distinction explicitly so the names cannot be +misread. diff --git a/design/TESTING.md b/design/TESTING.md index 7be4bb2..7d2c184 100644 --- a/design/TESTING.md +++ b/design/TESTING.md @@ -23,7 +23,8 @@ One test file per source file — `tests/testthat/test-.R` covers | `test-grob_builders.R` | `build_text_grob()`, `build_section_grobs()` | | `test-export_tfl.R` | `export_tfl()` — file validation, return values, preview mode, device lifecycle, tfl_table coercion, argument merging | | `test-export_tfl_page.R` | `export_tfl_page()` — argument resolution from x, overlap_warn_mm, page_i prefix, section presence, rules, page-level grob overflow under `overflow_action` (issue #30) | -| `test-table_utils.R` | `.compute_group_sizes()`, `.collect_col_strings()`, `.measure_max_string_width()`, `.wrap_text()` | +| `test-table_utils.R` | `.compute_group_sizes()`, `.collect_col_strings()`, `.measure_max_string_width()`, `.wrap_text()` (now a default-breaks shim) | +| `test-wrap.R` | `wrap_breaks()` constructor + validation; `.tokenize_for_wrap()` (drop / keep_before / mixed); `.wrap_string()` (paragraphs, single token, keep_before); `.column_has_breakable_text()`; `.column_min_token_width_in()` (floor calculation, keep_before reduces floor); `.wrap_label_for_width()`; `.compute_wrapped_widths()` (no-eligible no-op, water-from-top widest-first, longest-token floor) | | `test-table_draw.R` | `build_table_grob()`, `drawDetails.tfl_table_grob()` (uncached fallback, wrap branch, rotated col_cont_msg labels, first_data fallback) | | `test-tfl_table.R` | `tfl_colspec()`, `tfl_table()`, column/row pagination, column width calculation, col_cont_msg flags, `tfl_table_to_pagelist()` | | `test-sub_tfl.R` | `.compute_sub_tfl_groups()`, `.format_sub_tfl_caption()`, `.apply_sub_tfl_caption()`, `.strip_sub_tfl_cols()`, `.resolve_col_label()`, `tfl_table_to_pagelist()` sub_tfl branch (factor ordering, multi-column suffix, NULL caption, group_vars overlap, custom sep/collapse/prefix, label resolution via colspec) | @@ -238,7 +239,6 @@ Key areas covered: `footer_center` (new behaviour post D-23) - `paginate_cols()`: n_data == 0, balance paths, overflow fallback, single page, odd column counts, many columns (20), multiple group cols prepended -- `.apply_col_wrapping()`: no-eligible break path - `measure_row_heights_tbl()`: `max_measure_rows` sampling, wrap path - `tfl_table_to_pagelist()`: full pipeline smoke test, group validation, allow_col_split = FALSE error @@ -249,6 +249,17 @@ Key areas covered: wider than the page is reported with a `"Group column"` prefix; every abort and warning message includes the literal `overflow_action = "warn"` diagnostic hint. +- **Word-wrap module end-to-end (issue #28)**: `wrap_cols = "auto"` default + marks string columns wrap-eligible while leaving numeric columns alone; + a deliberately-too-wide table renders to a single PDF page when wrap is + on; `wrap_cols = FALSE` + `allow_col_split = FALSE` emits a clear + width-overflow error on the same input; long header text in a narrow + column auto-wraps without overflow; `wrap_breaks(keep_before = "-")` + causes hyphenated cell content to break after the hyphen; a single + cell whose wrapped height exceeds one page errors via the row-overflow + guard at the default `overflow_action = "error"` and produces a PDF + + warning under `overflow_action = "warn"`; `wrap_breaks` validation + rejects non-`wrap_breaks` input. --- diff --git a/examples/wrap_demos.R b/examples/wrap_demos.R new file mode 100644 index 0000000..2abcfd2 --- /dev/null +++ b/examples/wrap_demos.R @@ -0,0 +1,499 @@ +# examples/wrap_demos.R +# +# Hands-on demonstration of every behaviour in the column word-wrap module +# added for issue #28. Generates one PDF per scenario into a temporary +# directory plus a README.md summarising what each PDF shows and any +# captured warning text (for the row-overflow guard demo). +# +# Run from the worktree root: +# "C:/Program Files/R/R-4.5.2/bin/Rscript.exe" examples/wrap_demos.R + +suppressPackageStartupMessages({ + devtools::load_all(quiet = TRUE) +}) + +# --------------------------------------------------------------------------- +# Output directory +# --------------------------------------------------------------------------- + +# Place under the OS user-temp dir so the directory persists after this +# Rscript session exits (R's session tempdir() is wiped on shutdown). +.persistent_temp <- function() { + for (v in c("TEMP", "TMP", "TMPDIR")) { + val <- Sys.getenv(v, unset = "") + if (nzchar(val) && dir.exists(val)) return(val) + } + tempdir() +} +out_dir <- file.path(.persistent_temp(), + paste0("writetfl_wrap_demos_", + format(Sys.time(), "%Y%m%d_%H%M%S"))) +dir.create(out_dir, showWarnings = FALSE, recursive = TRUE) +readme <- file.path(out_dir, "wrap_demos_README.md") +cat("# wrap module demos (issue #28)\n\n", + "_Generated:_ ", format(Sys.time()), "\n\n", + "Each PDF below demonstrates one configuration of the new word-wrap ", + "module on the same family of inputs.\n\n", + file = readme, sep = "") + +# Helper: append a section to the README and run a generator that may +# also surface warnings or errors that we want recorded next to the PDF. +add_section <- function(file, title, blurb, generator) { + cat("## ", file, "\n\n", title, "\n\n", blurb, "\n\n", + file = readme, append = TRUE, sep = "") + msgs <- character(0L) + res <- tryCatch( + withCallingHandlers( + generator(), + warning = function(w) { + msgs <<- c(msgs, paste0("WARNING: ", conditionMessage(w))) + invokeRestart("muffleWarning") + } + ), + error = function(e) { + msgs <<- c(msgs, paste0("ERROR: ", conditionMessage(e))) + NULL + } + ) + if (length(msgs) > 0L) { + cat("```\n", paste(msgs, collapse = "\n"), "\n```\n\n", + file = readme, append = TRUE, sep = "") + } + invisible(res) +} + +p <- function(name) file.path(out_dir, name) + +# --------------------------------------------------------------------------- +# Common test data +# --------------------------------------------------------------------------- + +# Sized so each column individually fits a 6-inch page when not wrapped +# (so the page-column-split path can succeed) but the two together exceed +# the content area (so wrapping or splitting is needed). +wide_df <- data.frame( + alpha = rep(paste(rep("alpha", 7), collapse = " "), 3), + bravo = rep(paste(rep("bravo", 7), collapse = " "), 3), + count = c(101L, 202L, 303L) +) + +# A column whose contents only break on "-". +hyphen_df <- data.frame( + term = c("placebo-controlled-extension", + "double-blind-randomised-trial", + "open-label-rollover-study"), + n = c(120L, 88L, 64L) +) + +path_df <- data.frame( + path = c("/var/log/system/messages", + "/etc/cron.daily/backup-rotate", + "/usr/share/doc/writetfl/NEWS.md"), + size = c(1024L, 8456L, 612L) +) + +medclass_df <- data.frame( + `Concomitant Medication Class` = c("Statin", "ACE inhibitor", + "Beta blocker"), + n = c(45L, 78L, 22L), + check.names = FALSE +) + +# Three escalating-width string columns to show water-fill balance. +balance_df <- data.frame( + short = rep(paste(rep("aa bb", 4), collapse = " "), 3), + middle = rep(paste(rep("aa bb cc dd", 4), collapse = " "), 3), + longer = rep(paste(rep("aa bb cc dd ee ff gg hh", 4), collapse = " "), 3) +) + +# --------------------------------------------------------------------------- +# 01 — wrap off, allow_col_split = FALSE -> error +# --------------------------------------------------------------------------- + +add_section( + "01_off_overflows.pdf", + "Wrap off + page-column-split off on a too-wide table", + paste0("With `wrap_cols = FALSE` and `allow_col_split = FALSE` the ", + "package signals a clear width-overflow error rather than letting ", + "the table fall off the page. No PDF is produced; the captured ", + "error message is below."), + function() { + tbl <- tfl_table(wide_df, wrap_cols = FALSE, allow_col_split = FALSE) + export_tfl(tbl, file = p("01_off_overflows.pdf"), + pg_width = 6, pg_height = 8.5, + min_content_height = grid::unit(1, "inches")) + } +) + +# --------------------------------------------------------------------------- +# 02 — wrap_cols = "auto" (the new default) wraps the string cols +# --------------------------------------------------------------------------- + +add_section( + "02_auto_default.pdf", + "Default `wrap_cols = \"auto\"` auto-detects breakable columns", + paste0("Same data as 01. No `wrap_cols` argument means `\"auto\"` is in ", + "force: the two `alpha` / `bravo` string columns auto-wrap, the ", + "numeric `count` column is left at its natural width. The whole ", + "table now fits one page width."), + function() { + tbl <- tfl_table(wide_df, allow_col_split = FALSE) + export_tfl(tbl, file = p("02_auto_default.pdf"), + pg_width = 6, pg_height = 8.5, + min_content_height = grid::unit(1, "inches")) + } +) + +# --------------------------------------------------------------------------- +# 03 — wrap_cols = TRUE: identical visual outcome but explicit +# --------------------------------------------------------------------------- + +add_section( + "03_wrap_true_explicit.pdf", + "`wrap_cols = TRUE` (all data columns eligible)", + paste0("Marks every non-group column as wrap-eligible regardless of ", + "content. For this data the visual outcome matches 02 because ", + "the numeric column does not contain a break character; the ", + "wrap algorithm leaves it alone."), + function() { + tbl <- tfl_table(wide_df, wrap_cols = TRUE, allow_col_split = FALSE) + export_tfl(tbl, file = p("03_wrap_true_explicit.pdf"), + pg_width = 6, pg_height = 8.5, + min_content_height = grid::unit(1, "inches")) + } +) + +# --------------------------------------------------------------------------- +# 04 — wrap only named columns +# --------------------------------------------------------------------------- + +add_section( + "04_wrap_named_cols.pdf", + "`wrap_cols = c(\"alpha\")` - only `alpha` may wrap", + paste0("Only the `alpha` column is wrap-eligible; `bravo` keeps its full ", + "natural width. Because `alpha` alone has to absorb the entire ", + "page-width deficit, it ends up visibly narrower (more wrapped ", + "lines per cell) than in 02 where both string columns shared the ", + "burden. The whole table still fits on a single page here ", + "because the deficit is small enough that `alpha` does not hit ", + "its longest-token floor."), + function() { + tbl <- tfl_table(wide_df, wrap_cols = c("alpha")) + export_tfl(tbl, file = p("04_wrap_named_cols.pdf"), + pg_width = 6, pg_height = 8.5, + min_content_height = grid::unit(1, "inches")) + } +) + +# --------------------------------------------------------------------------- +# 05 — per-colspec override: opt one column out of "auto" +# --------------------------------------------------------------------------- + +add_section( + "05_per_colspec_override.pdf", + "Per-column override via `tfl_colspec(wrap = FALSE)`", + paste0("`wrap_cols = \"auto\"` would normally mark both string columns ", + "wrap-eligible; the per-column spec for `bravo` says \"no, never ", + "wrap me\". So only `alpha` wraps - same shape of result as 04 ", + "but the mechanism is different. The point: `tfl_colspec(wrap = ", + "FALSE)` is a hard veto that beats the table-level `wrap_cols`."), + function() { + tbl <- tfl_table( + wide_df, + cols = list(tfl_colspec("bravo", wrap = FALSE)) + ) + export_tfl(tbl, file = p("05_per_colspec_override.pdf"), + pg_width = 6, pg_height = 8.5, + min_content_height = grid::unit(1, "inches")) + } +) + +# --------------------------------------------------------------------------- +# 06 — keep_before = "-" on a hyphenated column +# --------------------------------------------------------------------------- + +add_section( + "06_keep_before_dash.pdf", + "`wrap_breaks(keep_before = \"-\")` breaks AFTER hyphens", + paste0("Each row of the `term` column is one long hyphenated phrase with ", + "no spaces. Default whitespace-only `wrap_breaks` cannot break it ", + "at all; with `keep_before = \"-\"` the wrap module breaks after ", + "each `-` and keeps the `-` on the upper line."), + function() { + tbl <- tfl_table( + hyphen_df, + wrap_breaks = wrap_breaks(drop = " ", keep_before = "-"), + cols = list(tfl_colspec("term", width = grid::unit(1.0, "inches"), + wrap = TRUE)) + ) + export_tfl(tbl, file = p("06_keep_before_dash.pdf"), + pg_width = 5, pg_height = 8.5, + min_content_height = grid::unit(1, "inches")) + } +) + +# --------------------------------------------------------------------------- +# 07 — keep_before = c("-", "/") on a path-like column +# --------------------------------------------------------------------------- + +add_section( + "07_keep_before_slash.pdf", + "`wrap_breaks(keep_before = c(\"-\", \"/\"))` for path-like content", + paste0("Demonstrates that multiple `keep_before` characters work ", + "together. Lines may end in `/` or `-`; either way the next ", + "character starts a new line."), + function() { + tbl <- tfl_table( + path_df, + wrap_breaks = wrap_breaks(drop = " ", keep_before = c("-", "/")), + cols = list(tfl_colspec("path", width = grid::unit(1.0, "inches"), + wrap = TRUE)) + ) + export_tfl(tbl, file = p("07_keep_before_slash.pdf"), + pg_width = 5, pg_height = 8.5, + min_content_height = grid::unit(1, "inches")) + } +) + +# --------------------------------------------------------------------------- +# 08 — header wraps when the column is narrow +# --------------------------------------------------------------------------- + +add_section( + "08_header_wraps.pdf", + "Header text auto-wraps when the column is wrap-eligible", + paste0("The header `Concomitant Medication Class` is much wider than the ", + "1.3-inch column but each individual word fits. The wrap module ", + "reflows the header onto three lines (`Concomitant` / ", + "`Medication` / `Class`) instead of letting it overflow. The ", + "column width is chosen to be wider than the longest single ", + "word in bold so wrap actually has somewhere to break - a ", + "narrower column would hit the longest-unbreakable-token floor ", + "and the bold-aware width measurement would refuse to undersize."), + function() { + tbl <- tfl_table( + medclass_df, + cols = list(tfl_colspec("Concomitant Medication Class", + width = grid::unit(1.3, "inches"), + wrap = TRUE)) + ) + export_tfl(tbl, file = p("08_header_wraps.pdf"), + pg_width = 6, pg_height = 8.5, + min_content_height = grid::unit(1, "inches")) + } +) + +# --------------------------------------------------------------------------- +# 09 — water-from-top balance: shrink widest, then equal pair +# --------------------------------------------------------------------------- + +add_section( + "09_water_fill_balance.pdf", + "Water-from-top algorithm: widest first, then equal pair", + paste0("Three escalating-width string columns. The wrap module shrinks ", + "the widest column first; once it matches the next-widest, the ", + "two shrink together. All three contribute fairly to absorbing ", + "the page-width deficit."), + function() { + tbl <- tfl_table(balance_df, allow_col_split = FALSE) + export_tfl(tbl, file = p("09_water_fill_balance.pdf"), + pg_width = 6, pg_height = 8.5, + min_content_height = grid::unit(1, "inches")) + } +) + +# --------------------------------------------------------------------------- +# 10 — column floor = longest unbreakable token +# --------------------------------------------------------------------------- + +add_section( + "10_floor_unbreakable.pdf", + "Wrap floor = widest unbreakable token", + paste0("The `drug` column contains a single long token with no break ", + "characters. No matter how aggressively the algorithm narrows ", + "the column, it cannot drop below the rendered width of that ", + "token. Notice the `drug` column ends up *wider* than ", + "`min_col_width` because the token sets the floor."), + function() { + df <- data.frame( + drug = rep("Cyclosporine_Microemulsion_Capsules_100mg", 3), + n = 1:3, + stringsAsFactors = FALSE + ) + tbl <- tfl_table(df, + min_col_width = grid::unit(0.2, "inches"), + allow_col_split = FALSE) + export_tfl(tbl, file = p("10_floor_unbreakable.pdf"), + pg_width = 6, pg_height = 8.5, + min_content_height = grid::unit(1, "inches")) + } +) + +# --------------------------------------------------------------------------- +# 11 — row-overflow guard: error vs warn +# --------------------------------------------------------------------------- + +add_section( + "11_row_overflow_error.pdf", + "Row-overflow guard fires when a wrapped cell exceeds page height", + paste0("A single cell containing a 6,500-character essay forced into a ", + "0.8-inch column wraps to a height larger than the page. The ", + "fail-safe in `paginate_rows()` rejects this with a clear error ", + "(no PDF written). See the captured stderr below."), + function() { + long_essay <- paste(rep(paste(rep("aa bb cc dd ee ff", 3), + collapse = " "), 120), + collapse = " ") + df <- data.frame(notes = long_essay, stringsAsFactors = FALSE) + tbl <- tfl_table(df, + cols = list(tfl_colspec("notes", + width = grid::unit(0.8, "inches"), + wrap = TRUE))) + export_tfl(tbl, file = p("11_row_overflow_error.pdf"), + pg_width = 4, pg_height = 8.5, + min_content_height = grid::unit(0.5, "inches")) + } +) + +add_section( + "11b_row_overflow_warn.pdf", + "Same input under `overflow_action = \"warn\"`", + paste0("Identical input to 11 but the user opts in to `overflow_action ", + "= \"warn\"`. The PDF is produced (with the over-tall row clipped ", + "by the page) and the warning is captured. Use this only as a ", + "diagnostic - the output is still wrong; the input needs to ", + "change."), + function() { + long_essay <- paste(rep(paste(rep("aa bb cc dd ee ff", 3), + collapse = " "), 120), + collapse = " ") + df <- data.frame(notes = long_essay, stringsAsFactors = FALSE) + tbl <- tfl_table(df, + cols = list(tfl_colspec("notes", + width = grid::unit(0.8, "inches"), + wrap = TRUE))) + export_tfl(tbl, file = p("11b_row_overflow_warn.pdf"), + pg_width = 4, pg_height = 8.5, + min_content_height = grid::unit(0.5, "inches"), + overflow_action = "warn") + } +) + +# --------------------------------------------------------------------------- +# 12 — text-wrap vs page-column-split independence (composable) +# --------------------------------------------------------------------------- + +add_section( + "12_text_wrap_only.pdf", + "Text-wrap on, page-split off (composability part 1)", + "Wide table. `wrap_cols = TRUE`, `allow_col_split = FALSE`. Text-wrap alone resolves it.", + function() { + tbl <- tfl_table(wide_df, wrap_cols = TRUE, allow_col_split = FALSE) + export_tfl(tbl, file = p("12_text_wrap_only.pdf"), + pg_width = 6, pg_height = 8.5, + min_content_height = grid::unit(1, "inches")) + } +) + +add_section( + "12_page_split_only.pdf", + "Text-wrap off, page-split on (composability part 2)", + "Wide table. `wrap_cols = FALSE`, `allow_col_split = TRUE`. Page-column-split alone resolves it.", + function() { + tbl <- tfl_table(wide_df, wrap_cols = FALSE, allow_col_split = TRUE) + export_tfl(tbl, file = p("12_page_split_only.pdf"), + pg_width = 6, pg_height = 8.5, + min_content_height = grid::unit(1, "inches")) + } +) + +add_section( + "12_both.pdf", + "Text-wrap and page-split both on (composability part 3)", + paste0("Wide table. Both `wrap_cols = TRUE` and `allow_col_split = TRUE`. ", + "Text-wrap runs first; the page-split fallback runs only if ", + "wrapping does not fit everything. The two are independent ", + "concepts and freely composable."), + function() { + tbl <- tfl_table(wide_df, wrap_cols = TRUE, allow_col_split = TRUE) + export_tfl(tbl, file = p("12_both.pdf"), + pg_width = 6, pg_height = 8.5, + min_content_height = grid::unit(1, "inches")) + } +) + +# --------------------------------------------------------------------------- +# 13 — module fully disabled +# --------------------------------------------------------------------------- + +# --------------------------------------------------------------------------- +# 14 - height-balance opt-in vs default width-balance +# --------------------------------------------------------------------------- + +# notes_a (24 alpha tokens) is dense; notes_b (5 alpha tokens) is sparse. +# Water-fill puts both columns at near-equal widths. At those widths +# notes_a wraps to one more line than notes_b, and that one extra line +# applied to every row makes the table spill across two pages. +asym_df <- data.frame( + notes_a = rep(paste(rep("alpha", 24), collapse = " "), 7), + notes_b = rep(paste(rep("alpha", 5), collapse = " "), 7) +) + +add_section( + "14_balance_width.pdf", + "Default `wrap_balance = \"width\"` on asymmetric content", + paste0("Water-fill makes the two wrap-eligible columns roughly equal in ", + "width. notes_a (24 dense tokens) wraps to 5 lines per cell; ", + "notes_b (5 dense tokens) wraps to 1 line. The row height is ", + "5 lines and the table needs two pages."), + function() { + tbl <- tfl_table(asym_df, allow_col_split = FALSE, + wrap_balance = "width") + export_tfl(tbl, file = p("14_balance_width.pdf"), + pg_width = 6, pg_height = 8.5, + min_content_height = grid::unit(1, "inches")) + } +) + +add_section( + "14_balance_height.pdf", + "Opt-in `wrap_balance = \"height\"` on the same input", + paste0("Same data and page as 14_balance_width. The opt-in height-", + "balance pass shifts width from notes_b (which had only 5 tokens ", + "per cell - its 1 line of content barely needed half its width) ", + "to notes_a, dropping notes_a from 5 lines to 4 while notes_b ", + "becomes 2 lines. Total table height drops by ~20%, and the ", + "table now fits on a single page."), + function() { + tbl <- tfl_table(asym_df, allow_col_split = FALSE, + wrap_balance = "height") + export_tfl(tbl, file = p("14_balance_height.pdf"), + pg_width = 6, pg_height = 8.5, + min_content_height = grid::unit(1, "inches")) + } +) + +add_section( + "13_disabled_module.pdf", + "Module disabled with `wrap_cols = FALSE`", + paste0("`wrap_cols = FALSE` is the escape hatch. No text-wrap is ", + "attempted; the page-column-split fallback handles too-wide ", + "tables. Visually identical to pre-PR behaviour."), + function() { + tbl <- tfl_table(wide_df, wrap_cols = FALSE) + export_tfl(tbl, file = p("13_disabled_module.pdf"), + pg_width = 6, pg_height = 8.5, + min_content_height = grid::unit(1, "inches")) + } +) + +# --------------------------------------------------------------------------- +# Summary +# --------------------------------------------------------------------------- + +cat("Demos written to:\n ", normalizePath(out_dir, mustWork = FALSE), "\n", + sep = "") +cat("Open the README at:\n ", + normalizePath(readme, mustWork = FALSE), "\n", sep = "") +cat("\nOn Windows: start \"\" \"", normalizePath(out_dir, mustWork = FALSE), + "\"\n", sep = "") diff --git a/man/dot-apply_col_wrapping.Rd b/man/dot-apply_col_wrapping.Rd deleted file mode 100644 index e754b71..0000000 --- a/man/dot-apply_col_wrapping.Rd +++ /dev/null @@ -1,25 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/table_columns.R -\name{.apply_col_wrapping} -\alias{.apply_col_wrapping} -\title{Iteratively narrow wrap-eligible columns until total fits or all at min} -\usage{ -.apply_col_wrapping( - widths_in, - resolved_cols, - data, - tbl, - content_width_in, - min_in, - h_pad_in, - na_str, - max_rows, - pg_width, - pg_height, - margins -) -} -\description{ -Iteratively narrow wrap-eligible columns until total fits or all at min -} -\keyword{internal} diff --git a/man/dot-column_has_breakable_text.Rd b/man/dot-column_has_breakable_text.Rd new file mode 100644 index 0000000..ab6fdb5 --- /dev/null +++ b/man/dot-column_has_breakable_text.Rd @@ -0,0 +1,13 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/wrap.R +\name{.column_has_breakable_text} +\alias{.column_has_breakable_text} +\title{Does any string in \code{strings} contain a break character?} +\usage{ +.column_has_breakable_text(strings, breaks) +} +\description{ +Used by the \code{wrap_cols = "auto"} path: a column with no breakable text +is skipped because no amount of narrowing can wrap it. +} +\keyword{internal} diff --git a/man/dot-column_min_token_width_in.Rd b/man/dot-column_min_token_width_in.Rd new file mode 100644 index 0000000..d6d1e84 --- /dev/null +++ b/man/dot-column_min_token_width_in.Rd @@ -0,0 +1,13 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/wrap.R +\name{.column_min_token_width_in} +\alias{.column_min_token_width_in} +\title{Width (inches) of the widest unbreakable token across a column's strings.} +\usage{ +.column_min_token_width_in(strings, gp, breaks) +} +\description{ +This is the wrapping floor: a column cannot be narrowed below the width +needed to render its longest single token. +} +\keyword{internal} diff --git a/man/dot-compute_wrapped_widths.Rd b/man/dot-compute_wrapped_widths.Rd new file mode 100644 index 0000000..d33fa01 --- /dev/null +++ b/man/dot-compute_wrapped_widths.Rd @@ -0,0 +1,52 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/wrap.R +\name{.compute_wrapped_widths} +\alias{.compute_wrapped_widths} +\title{Iteratively narrow wrap-eligible columns to fit \code{content_width_in}.} +\usage{ +.compute_wrapped_widths( + widths_in, + resolved_cols, + data, + tbl, + content_width_in, + h_pad_in, + min_in, + pg_width, + pg_height, + margins +) +} +\arguments{ +\item{widths_in}{Numeric vector of current per-column widths in inches.} + +\item{resolved_cols}{The \code{resolve_col_specs()} output.} + +\item{data}{The full data frame from \code{tbl$data}.} + +\item{tbl}{A \code{tfl_table} object (used for \code{gp}, \code{cell_padding}, +\code{line_height}, \code{na_string}, \code{max_measure_rows}, \code{min_col_width}, +\code{wrap_breaks}).} + +\item{content_width_in}{Numeric target total width in inches.} + +\item{h_pad_in}{Horizontal padding (left+right) in inches.} + +\item{min_in}{\code{min_col_width} resolved to inches.} + +\item{pg_width, pg_height, margins}{Forwarded to the scratch device.} +} +\value{ +Updated \code{widths_in}. +} +\description{ +Replaces the older single-target \code{.apply_col_wrapping()} with a fairer +"water-from-top" pass: each iteration finds the widest set of +wrap-eligible columns above their floor and shrinks them together +until the next-lower competitor or a floor is hit. Floors honour +\code{min_col_width} AND the longest unbreakable token in the column. +} +\details{ +Deterministic, O(n^2) in column count, n <= ~30 in practice. +} +\keyword{internal} diff --git a/man/dot-height_balance_widths.Rd b/man/dot-height_balance_widths.Rd new file mode 100644 index 0000000..fba5597 --- /dev/null +++ b/man/dot-height_balance_widths.Rd @@ -0,0 +1,60 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/wrap.R +\name{.height_balance_widths} +\alias{.height_balance_widths} +\title{Redistribute widths between wrap-eligible columns to lower total height} +\usage{ +.height_balance_widths( + widths_in, + resolved_cols, + data, + tbl, + h_pad_in, + na_str, + max_rows, + breaks, + pg_width, + pg_height, + margins, + budget_seconds = 1, + max_iter = 20L +) +} +\description{ +Opt-in pass triggered by \code{tfl_table(wrap_balance = "height")}. Runs after +\code{.compute_wrapped_widths()} (water-fill) and uses a bounded greedy local +search: at each iteration find the row whose cells are tallest, identify +the wrap-eligible column that drives that row's height (the "bottleneck") +and the wrap-eligible column with the most slack (shortest cell content +in that row, with room to give up width down to its floor), and try +transferring a width delta from slack to bottleneck. Accept the move +that reduces the \emph{total table height} (sum of per-row heights, plus +header). Stop when no transfer at any tested delta improves total +height, when \code{max_iter} is reached, or when \code{budget_seconds} of +wall-time is exhausted. +} +\details{ +Cell heights at each \verb{(column, width)} pair are cached so repeat +measurements during the search are free; with cell-string deduplication +inside one column, the total measurement cost is bounded by +\code{n_unique_cells * n_unique_widths_explored} per column. + +Invariants: +\itemize{ +\item Total width is preserved exactly (every move is a transfer). +\item No column shrinks below its floor (the larger of \code{min_col_width} and +the rendered width of its longest unbreakable token under either the +cell or header gpar). +\item No column grows past its natural width (max content width including +bold-rendered header tokens). +\item Any error or invariant violation falls back silently to the input +widths, so opting in cannot produce a \emph{worse} table than the default. +} + +Approximation: the cost function ignores the rowspan-style group-cell +suppression handled by \code{.compute_page_row_heights()} - group columns are +typically not wrap-eligible (auto-detect skips them), so they don't +participate in moves; the approximation only marginally affects which +move is "best" when group columns happen to dominate a row's height. +} +\keyword{internal} diff --git a/man/dot-is_wrap_breaks.Rd b/man/dot-is_wrap_breaks.Rd new file mode 100644 index 0000000..c2a9932 --- /dev/null +++ b/man/dot-is_wrap_breaks.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/wrap.R +\name{.is_wrap_breaks} +\alias{.is_wrap_breaks} +\title{Predicate for wrap_breaks objects.} +\usage{ +.is_wrap_breaks(x) +} +\description{ +Predicate for wrap_breaks objects. +} +\keyword{internal} diff --git a/man/dot-wrap_label_for_width.Rd b/man/dot-wrap_label_for_width.Rd new file mode 100644 index 0000000..2d15ec2 --- /dev/null +++ b/man/dot-wrap_label_for_width.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/wrap.R +\name{.wrap_label_for_width} +\alias{.wrap_label_for_width} +\title{Wrap a column-header label (or any single string) to a target width +minus left+right horizontal padding.} +\usage{ +.wrap_label_for_width(label, width_in, h_pad_in, gp, breaks) +} +\description{ +Wrap a column-header label (or any single string) to a target width +minus left+right horizontal padding. +} +\keyword{internal} diff --git a/man/dot-wrap_string.Rd b/man/dot-wrap_string.Rd new file mode 100644 index 0000000..22aa076 --- /dev/null +++ b/man/dot-wrap_string.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/wrap.R +\name{.wrap_string} +\alias{.wrap_string} +\title{Wrap text to fit a target width, preserving paragraph breaks.} +\usage{ +.wrap_string(text, available_w_in, gp, breaks = wrap_breaks_default()) +} +\arguments{ +\item{text}{Single character string.} + +\item{available_w_in}{Numeric, available width in inches.} + +\item{gp}{A \code{gpar()} for measurement font context.} + +\item{breaks}{A \code{wrap_breaks} object; if \code{NULL}, the package default.} +} +\value{ +A single character string, possibly with \verb{\\n} inserted at break +points. +} +\description{ +Greedy left-to-right packing. Paragraphs (separated by \verb{\\n} in \code{text}) +are wrapped independently and the results re-joined with \verb{\\n}. Within +a paragraph the break-character spec controls where breaks may occur; +a single token wider than \code{available_w_in} is emitted unchanged on its +own line because there is no valid break point inside it. +} +\keyword{internal} diff --git a/man/measure_row_heights_tbl.Rd b/man/measure_row_heights_tbl.Rd index e67ecc9..8ad051c 100644 --- a/man/measure_row_heights_tbl.Rd +++ b/man/measure_row_heights_tbl.Rd @@ -11,7 +11,9 @@ measure_row_heights_tbl( cell_padding, na_string, line_height, - max_measure_rows + max_measure_rows, + breaks = NULL, + wrap_extra_pad_in = 0 ) } \arguments{ diff --git a/man/paginate_rows.Rd b/man/paginate_rows.Rd index 8b47a7a..7aec55f 100644 --- a/man/paginate_rows.Rd +++ b/man/paginate_rows.Rd @@ -14,7 +14,8 @@ paginate_rows( content_height_in, row_cont_msg, group_rule, - suppress_repeated_groups = TRUE + suppress_repeated_groups = TRUE, + overflow_action = "error" ) } \arguments{ @@ -39,6 +40,12 @@ non-group columns).} use; currently does not affect pagination because rules are 0-height.)} \item{suppress_repeated_groups}{Logical, from \code{tbl$suppress_repeated_groups}.} + +\item{overflow_action}{One of \code{"error"} (default) or \code{"warn"}. Controls how +the row-overflow guard reports a single row whose committed height +exceeds the available page content height (a row that wraps to taller +than one page is almost always a sign of input that needs to change). +The same knob downgrades column-overflow events; see \code{\link[=export_tfl_page]{export_tfl_page()}}.} } \value{ A list of row-page specs, each with \verb{$rows}, \verb{$is_cont_top}, diff --git a/man/tfl_colspec.Rd b/man/tfl_colspec.Rd index 7a6e424..0f43c5a 100644 --- a/man/tfl_colspec.Rd +++ b/man/tfl_colspec.Rd @@ -9,7 +9,7 @@ tfl_colspec( label = NULL, width = NULL, align = NULL, - wrap = FALSE, + wrap = NA, gp = NULL ) } @@ -28,8 +28,14 @@ placed). \code{NULL} triggers content-based auto-sizing.} \item{align}{Character scalar: \code{"left"}, \code{"right"}, or \code{"centre"}. \code{NULL} defaults to \code{"right"} for numeric columns and \code{"left"} otherwise.} -\item{wrap}{Logical. Whether this column is eligible for word-wrapping when -total column widths exceed available width.} +\item{wrap}{Logical of length 1: \code{TRUE}, \code{FALSE}, or \code{NA}. Controls +text-wrapping eligibility \emph{within this column}. \code{NA} (the default) means +"inherit from the table-level \code{\link[=tfl_table]{tfl_table()}}'s \code{wrap_cols} setting"; under +the default \code{wrap_cols = "auto"} that resolves to \code{TRUE} when any cell or +the header contains a break character (see \code{wrap_breaks}). \code{TRUE} / +\code{FALSE} are explicit overrides. This is \strong{text wrap} inside a column; +for splitting a too-wide table across pages see \code{allow_col_split} in +\code{\link[=tfl_table]{tfl_table()}}.} \item{gp}{A \code{gpar()} object to override \code{\link[=tfl_table]{tfl_table()}}'s \code{gp$group_col} for this specific column. Only valid for row-header (group) columns; an error diff --git a/man/tfl_table.Rd b/man/tfl_table.Rd index 85c8079..8ea4acc 100644 --- a/man/tfl_table.Rd +++ b/man/tfl_table.Rd @@ -10,7 +10,9 @@ tfl_table( col_widths = NULL, col_labels = NULL, col_align = NULL, - wrap_cols = FALSE, + wrap_cols = "auto", + wrap_breaks = NULL, + wrap_balance = c("width", "height"), min_col_width = grid::unit(0.5, "inches"), allow_col_split = TRUE, balance_col_pages = FALSE, @@ -32,6 +34,7 @@ tfl_table( gp = list(), cell_padding = grid::unit(c(0.2, 0.5), "lines"), line_height = 1.05, + wrap_extra_padding = grid::unit(0.5, "lines"), max_measure_rows = Inf ) } @@ -54,9 +57,43 @@ multiline column headers. Overridden per-column by \code{tfl_colspec(label)}.} \item{col_align}{Named character vector. Each element is \code{"left"}, \code{"right"}, or \code{"centre"}. Overridden per-column by \code{tfl_colspec(align)}.} -\item{wrap_cols}{Column-wrapping eligibility. \code{TRUE} = all non-group -columns eligible; \code{FALSE} = none eligible; character vector = those -specific column names. Overridden per-column by \code{tfl_colspec(wrap)}.} +\item{wrap_cols}{Text-wrap eligibility \emph{within columns}. Controls whether +long cell text and column-header labels may be broken across multiple +lines so that the column can be narrower. \strong{This is not the same thing +as splitting a too-wide table across pages} — see \code{allow_col_split} +for that. +\itemize{ +\item \code{"auto"} (default) — every non-group column whose data or header +contains a \code{wrap_breaks} character is eligible. Numeric / single-token +columns are skipped because they can't break. +\item \code{TRUE} — all non-group columns eligible regardless of content. +\item \code{FALSE} — disable the text-wrap module entirely. +\item Character vector of column names — only those columns are eligible. +} + +Overridden per-column by \code{tfl_colspec(wrap)}.} + +\item{wrap_breaks}{A \code{wrap_breaks()} object specifying the characters at +which the wrap module is allowed to break. The default, +\code{wrap_breaks(drop = c(" ", "\\t"), keep_before = character(0))}, breaks on +whitespace and consumes the whitespace at the break point. Pass +\code{wrap_breaks(keep_before = "-")} to also break after \code{-} (the \code{-} stays +on the left of the break).} + +\item{wrap_balance}{Either \code{"width"} (default) or \code{"height"}. Controls +what the wrap-narrowing pass optimises for: +\itemize{ +\item \code{"width"} — balance widths between wrap-eligible columns so the +widest columns shrink together (water-from-top). Cheap and +deterministic; produces visually balanced columns. +\item \code{"height"} — opt-in heuristic that runs after the width-balance pass +and redistributes width between wrap-eligible columns to lower the +total table height (more rows per page). Time-budgeted at ~1 s; if +the search fails or overruns, the result silently falls back to +the width-balanced widths so opting in cannot produce a \emph{worse} +table than the default. Useful when string columns have very +different content density. +}} \item{min_col_width}{Minimum column width as a \code{unit} object.} @@ -189,6 +226,13 @@ adds a small 5\% breathing room. If a \code{gpar()} supplied through the \code{g argument already contains an explicit \code{lineheight} field for a particular section, that value takes precedence over this parameter.} +\item{wrap_extra_padding}{A \code{unit} object specifying additional vertical +space added at the bottom of any multi-line cell so the visual gap +between consecutive rows is more obvious when one or both contain +wrapped or \verb{\\n}-broken text. Default \code{unit(0.5, "lines")}. Set to +\code{unit(0, "lines")} to disable. Only multi-line cells receive the extra; +single-line cells are unaffected.} + \item{max_measure_rows}{Positive numeric or \code{Inf} (default). Maximum number of unique cell strings sampled per column when computing content-based column widths. Strings are sampled in descending order of \code{nchar()} so @@ -222,8 +266,7 @@ df <- group_by(mtcars, cyl) tbl <- tfl_table( df, col_labels = c(mpg = "MPG", hp = "Horse-\npower"), - col_align = c(mpg = "right", hp = "right"), - wrap_cols = FALSE + col_align = c(mpg = "right", hp = "right") ) export_tfl(tbl, diff --git a/man/wrap_breaks.Rd b/man/wrap_breaks.Rd new file mode 100644 index 0000000..2f6f124 --- /dev/null +++ b/man/wrap_breaks.Rd @@ -0,0 +1,37 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/wrap.R +\name{wrap_breaks} +\alias{wrap_breaks} +\title{Specify how strings are broken when wrapping table text} +\usage{ +wrap_breaks(drop = c(" ", "\\t"), keep_before = character(0L)) +} +\arguments{ +\item{drop}{Character vector of single-character break points consumed +at the break. Defaults to \code{c(" ", "\\t")}.} + +\item{keep_before}{Character vector of single-character break points +preserved on the left of the break. Defaults to \code{character(0)}.} +} +\value{ +A list of class \code{"wrap_breaks"} with components \code{drop} and +\code{keep_before}. +} +\description{ +A \code{wrap_breaks} object lists the characters at which \code{.wrap_string()} is +allowed to insert a line break. Two modes are supported: +} +\details{ +\itemize{ +\item \code{drop} characters are consumed at the break point. The default +(\code{" "} and \code{"\\t"}) means runs of whitespace disappear when a wrap +occurs there but stay inline otherwise. +\item \code{keep_before} characters stay on the left of the break - the character +is preserved at the end of the upper line and the \emph{next} character +starts the new line. Typical use: \code{"-"} so that a hyphenated term +like \code{"placebo-controlled"} can wrap to \code{"placebo-\\ncontrolled"}. +} + +\code{drop} and \code{keep_before} must be disjoint single-character vectors. +} +\keyword{internal} diff --git a/man/wrap_breaks_default.Rd b/man/wrap_breaks_default.Rd new file mode 100644 index 0000000..bb1ed49 --- /dev/null +++ b/man/wrap_breaks_default.Rd @@ -0,0 +1,12 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/wrap.R +\name{wrap_breaks_default} +\alias{wrap_breaks_default} +\title{Package-default break spec (whitespace only).} +\usage{ +wrap_breaks_default() +} +\description{ +Package-default break spec (whitespace only). +} +\keyword{internal} diff --git a/tests/testthat/test-tfl_table.R b/tests/testthat/test-tfl_table.R index 4704f9f..1628e3f 100644 --- a/tests/testthat/test-tfl_table.R +++ b/tests/testthat/test-tfl_table.R @@ -46,7 +46,7 @@ test_that("tfl_colspec creates object with correct class", { expect_null(cs$label) expect_null(cs$width) expect_null(cs$align) - expect_false(cs$wrap) + expect_true(is.na(cs$wrap)) # default NA = inherit from tfl_table(wrap_cols) expect_null(cs$gp) }) @@ -711,6 +711,339 @@ test_that("wrap_cols reduces wide column widths", { expect_lte(result$resolved_cols[[a_idx]]$width_in, 4) }) +# --------------------------------------------------------------------------- +# End-to-end wrap behaviour (issue #28) +# --------------------------------------------------------------------------- + +test_that("wrap_cols default 'auto' marks string columns with spaces eligible and skips numeric columns", { + df <- data.frame( + a = rep(paste(rep("word", 30), collapse = " "), 3), + b = c(1.234, 5.678, 9.012), + stringsAsFactors = FALSE + ) + tbl <- tfl_table(df) # uses default wrap_cols = "auto" + result <- compute_col_widths( + resolve_col_specs(tbl), tbl$data, content_width_in = 4, + tbl, pg_width = 11, pg_height = 8.5, + margins = grid::unit(c(0.5, 0.5, 0.5, 0.5), "inches") + ) + a_spec <- result$resolved_cols[[which(vapply(result$resolved_cols, `[[`, "", "col") == "a")]] + b_spec <- result$resolved_cols[[which(vapply(result$resolved_cols, `[[`, "", "col") == "b")]] + expect_true(isTRUE(a_spec$wrap)) + expect_false(isTRUE(b_spec$wrap)) +}) + +test_that("a deliberately-too-wide table renders to a single PDF page when wrap is on", { + df <- data.frame( + a = rep(paste(rep("alpha", 12), collapse = " "), 3), + b = rep(paste(rep("bravo", 12), collapse = " "), 3), + c = 1:3, + stringsAsFactors = FALSE + ) + tbl <- tfl_table(df, allow_col_split = FALSE) # wrap_cols defaults to "auto" + f <- tempfile(fileext = ".pdf") + on.exit(unlink(f)) + expect_no_error(export_tfl(tbl, file = f, pg_width = 6, pg_height = 8.5, + min_content_height = grid::unit(1, "inches"))) + expect_true(file.exists(f)) +}) + +test_that("wrap_cols = FALSE + allow_col_split = FALSE errors with a clear message on a too-wide table", { + df <- data.frame( + a = rep(paste(rep("alpha", 12), collapse = " "), 3), + b = rep(paste(rep("bravo", 12), collapse = " "), 3), + c = 1:3, + stringsAsFactors = FALSE + ) + tbl <- tfl_table(df, wrap_cols = FALSE, allow_col_split = FALSE) + f <- tempfile(fileext = ".pdf") + on.exit(unlink(f)) + expect_error( + export_tfl(tbl, file = f, pg_width = 6, pg_height = 8.5, + min_content_height = grid::unit(1, "inches")), + regexp = "exceeds available content width" + ) +}) + +test_that("header text wraps when the column is wrap-eligible and the header is too long", { + df <- data.frame( + `Concomitant Medication Class` = c("Statin", "ACE inhibitor"), + n = c(10L, 20L), + check.names = FALSE + ) + # Force a narrow column for the long header so wrapping is required. + tbl <- tfl_table( + df, + cols = list(tfl_colspec("Concomitant Medication Class", + width = grid::unit(0.8, "inches"), + wrap = TRUE)) + ) + f <- tempfile(fileext = ".pdf") + on.exit(unlink(f)) + expect_no_error(export_tfl(tbl, file = f, pg_width = 6, pg_height = 8.5, + min_content_height = grid::unit(1, "inches"))) +}) + +test_that("wrap_breaks(keep_before = '-') breaks AFTER hyphens in cell content", { + df <- data.frame( + term = rep("placebo-controlled-extension", 3), + n = 1:3, + stringsAsFactors = FALSE + ) + tbl <- tfl_table(df, + wrap_breaks = wrap_breaks(drop = " ", keep_before = "-"), + cols = list(tfl_colspec("term", + width = grid::unit(1.0, "inches"), + wrap = TRUE))) + f <- tempfile(fileext = ".pdf") + on.exit(unlink(f)) + expect_no_error(export_tfl(tbl, file = f, pg_width = 6, pg_height = 8.5, + min_content_height = grid::unit(1, "inches"))) +}) + +test_that("a single cell that wraps to taller than the page errors with overflow_action default 'error'", { + long_essay <- paste(rep(paste(rep("aa bb cc dd ee ff", 3), collapse = " "), + 120), + collapse = " ") + df <- data.frame(notes = long_essay, stringsAsFactors = FALSE) + tbl <- tfl_table(df, + cols = list(tfl_colspec("notes", + width = grid::unit(0.8, "inches"), + wrap = TRUE))) + f <- tempfile(fileext = ".pdf") + on.exit(unlink(f)) + expect_error( + export_tfl(tbl, file = f, pg_width = 4, pg_height = 8.5, + min_content_height = grid::unit(0.5, "inches")), + regexp = "exceeds the available page content height" + ) +}) + +test_that("the same single-cell-too-tall input renders under overflow_action = 'warn'", { + long_essay <- paste(rep(paste(rep("aa bb cc dd ee ff", 3), collapse = " "), + 120), + collapse = " ") + df <- data.frame(notes = long_essay, stringsAsFactors = FALSE) + tbl <- tfl_table(df, + cols = list(tfl_colspec("notes", + width = grid::unit(0.8, "inches"), + wrap = TRUE))) + f <- tempfile(fileext = ".pdf") + on.exit(unlink(f)) + expect_warning( + export_tfl(tbl, file = f, pg_width = 4, pg_height = 8.5, + min_content_height = grid::unit(0.5, "inches"), + overflow_action = "warn"), + regexp = "exceeds the available page content height" + ) +}) + +test_that("tfl_table validates wrap_breaks must be a wrap_breaks() object", { + expect_error(tfl_table(make_simple_df(), wrap_breaks = list(drop = " ")), + regexp = "wrap_breaks") +}) + +test_that("auto-sized column width accounts for bold header (header_row gpar)", { + # A column whose data is short numbers but whose label is a long word will + # be auto-sized to fit the label. When the header_row gpar is bold (the + # default), the bold rendered width must drive the auto-size, not the + # narrower regular-weight measurement. + df <- data.frame(Concomitant = c(1L, 2L, 3L)) # one-word bold header + tbl <- tfl_table(df) + result <- compute_col_widths( + resolve_col_specs(tbl), tbl$data, content_width_in = 6, + tbl, pg_width = 11, pg_height = 8.5, + margins = grid::unit(c(0.5, 0.5, 0.5, 0.5), "inches") + ) + bold_w <- { + f <- tempfile(fileext = ".pdf") + grDevices::pdf(f, width = 11, height = 8.5) + grid::pushViewport(grid::viewport()) + bw <- writetfl:::.width_in(grid::grobWidth(grid::textGrob( + "Concomitant", gp = grid::gpar(fontface = "bold") + ))) + grid::popViewport() + grDevices::dev.off() + unlink(f) + bw + } + # Width should be at least the bold-rendered header plus left + right + # cell padding (default 0.5 lines each). + expect_gte(result$resolved_cols[[1]]$width_in, bold_w) +}) + +test_that("wrap_extra_padding adds bottom space to multi-line cells but not single-line", { + # Two identical-data tables; the second has wrap_extra_padding = 0.5 lines. + # The multi-line column should be visibly taller in the second table; a + # numeric (single-line) column should be unchanged. + df <- data.frame( + notes = c("a b c d e f g h i j k l m n o p q r s t u v w x y z", + "short"), + n = c(1L, 2L) + ) + tbl1 <- tfl_table(df, wrap_extra_padding = grid::unit(0, "lines"), + cols = list(tfl_colspec("notes", + width = grid::unit(1, "inches"), + wrap = TRUE))) + tbl2 <- tfl_table(df, wrap_extra_padding = grid::unit(0.5, "lines"), + cols = list(tfl_colspec("notes", + width = grid::unit(1, "inches"), + wrap = TRUE))) + + measure <- function(tbl) { + rcs <- writetfl:::resolve_col_specs(tbl) + cwr <- writetfl:::compute_col_widths( + rcs, tbl$data, content_width_in = 6, tbl, pg_width = 11, + pg_height = 8.5, margins = grid::unit(c(0.5, 0.5, 0.5, 0.5), "inches") + ) + f <- tempfile(fileext = ".pdf") + grDevices::pdf(f, width = 11, height = 8.5) + grid::pushViewport(writetfl:::.make_outer_vp( + grid::unit(c(0.5, 0.5, 0.5, 0.5), "inches") + )) + on.exit({ + grid::popViewport() + grDevices::dev.off() + unlink(f) + }) + writetfl:::measure_row_heights_tbl( + tbl$data, cwr$resolved_cols, tbl$gp, tbl$cell_padding, + tbl$na_string, tbl$line_height, tbl$max_measure_rows, + breaks = tbl$wrap_breaks, + wrap_extra_pad_in = writetfl:::.height_in(tbl$wrap_extra_padding) + ) + } + m1 <- measure(tbl1) + m2 <- measure(tbl2) + notes_idx <- 1L + n_idx <- 2L + # Multi-line row: notes cell taller in tbl2 than in tbl1 by ~0.5 lines. + expect_gt(m2[1L, notes_idx], m1[1L, notes_idx]) + # Single-line row: notes cell same height in both tables (no extra applied). + expect_equal(m2[2L, notes_idx], m1[2L, notes_idx]) + # Single-line numeric column unchanged in either row. + expect_equal(m2[1L, n_idx], m1[1L, n_idx]) + expect_equal(m2[2L, n_idx], m1[2L, n_idx]) +}) + +test_that("wrap_balance = \"height\" reduces total table height vs \"width\" on asymmetric content", { + # notes_a is much denser than notes_b. Water-fill shrinks notes_a alone + # (notes_b is naturally narrow); height-balance moves width back from + # notes_b to notes_a so notes_a wraps to fewer lines. + df <- data.frame( + notes_a = rep(paste(rep("alpha", 24), collapse = " "), 7), + notes_b = rep(paste(rep("alpha", 5), collapse = " "), 7), + stringsAsFactors = FALSE + ) + measure <- function(mode) { + tbl <- tfl_table(df, allow_col_split = FALSE, wrap_balance = mode) + rcs <- writetfl:::resolve_col_specs(tbl) + cwr <- writetfl:::compute_col_widths( + rcs, tbl$data, content_width_in = 5, tbl, pg_width = 6, pg_height = 8.5, + margins = grid::unit(c(0.5, 0.5, 0.5, 0.5), "inches") + ) + f <- tempfile(fileext = ".pdf") + grDevices::pdf(f, width = 6, height = 8.5) + grid::pushViewport(writetfl:::.make_outer_vp( + grid::unit(c(0.5, 0.5, 0.5, 0.5), "inches") + )) + on.exit({ grid::popViewport(); grDevices::dev.off(); unlink(f) }) + m <- writetfl:::measure_row_heights_tbl( + df, cwr$resolved_cols, tbl$gp, tbl$cell_padding, + tbl$na_string, tbl$line_height, tbl$max_measure_rows, + breaks = tbl$wrap_breaks, + wrap_extra_pad_in = writetfl:::.height_in(tbl$wrap_extra_padding) + ) + list(widths = vapply(cwr$resolved_cols, "[[", numeric(1L), "width_in"), + total_h = sum(apply(m, 1L, max))) + } + w <- measure("width") + h <- measure("height") + # Total width must be preserved. + expect_equal(sum(h$widths), sum(w$widths), tolerance = 1e-3) + # Height-balance must produce a strictly lower total table height. + expect_lt(h$total_h, w$total_h) + # And it must do so within the documented runtime budget (~1 second). + t0 <- Sys.time() + measure("height") + expect_lt(as.numeric(difftime(Sys.time(), t0, units = "secs")), 2) +}) + +test_that("wrap_balance = \"height\" is a no-op when only one column is wrap-eligible", { + # Single-eligible-column table: there is no second column to swap with, + # so height-balance must return the input widths unchanged. + df <- data.frame( + notes = rep(paste(rep("alpha", 8), collapse = " "), 5), + n = 1:5, + stringsAsFactors = FALSE + ) + tbl_w <- tfl_table(df, allow_col_split = FALSE, wrap_balance = "width") + tbl_h <- tfl_table(df, allow_col_split = FALSE, wrap_balance = "height") + rcs_w <- writetfl:::resolve_col_specs(tbl_w) + rcs_h <- writetfl:::resolve_col_specs(tbl_h) + cwr_w <- writetfl:::compute_col_widths( + rcs_w, tbl_w$data, content_width_in = 5, tbl_w, pg_width = 6, + pg_height = 8.5, margins = grid::unit(c(0.5, 0.5, 0.5, 0.5), "inches") + ) + cwr_h <- writetfl:::compute_col_widths( + rcs_h, tbl_h$data, content_width_in = 5, tbl_h, pg_width = 6, + pg_height = 8.5, margins = grid::unit(c(0.5, 0.5, 0.5, 0.5), "inches") + ) + w_widths <- vapply(cwr_w$resolved_cols, "[[", numeric(1L), "width_in") + h_widths <- vapply(cwr_h$resolved_cols, "[[", numeric(1L), "width_in") + expect_equal(h_widths, w_widths, tolerance = 1e-6) +}) + +test_that("tfl_table validates wrap_balance must be \"width\" or \"height\"", { + expect_error(tfl_table(make_simple_df(), wrap_balance = "tall"), + regexp = "wrap_balance") +}) + +test_that("tfl_table validates wrap_extra_padding must be a length-1 unit", { + expect_error(tfl_table(make_simple_df(), wrap_extra_padding = 0.25), + regexp = "wrap_extra_padding") + expect_error(tfl_table(make_simple_df(), + wrap_extra_padding = grid::unit(c(0, 0), "lines")), + regexp = "wrap_extra_padding") +}) + +test_that("longest-unbreakable-token floor accounts for bold header", { + # When wrap is on and the data is unbreakable but the header is one + # long bold word, the wrap floor must reflect the bold header width so + # the algorithm cannot promise a width the renderer cannot honour. + df <- data.frame(`Concomitant Medication Class` = 1:3, check.names = FALSE) + tbl <- tfl_table(df) + rcs <- resolve_col_specs(tbl) + rcs[[1]]$wrap <- TRUE # force eligibility for the floor calculation + bold_token_w <- { + f <- tempfile(fileext = ".pdf") + grDevices::pdf(f, width = 11, height = 8.5) + grid::pushViewport(grid::viewport()) + bw <- writetfl:::.width_in(grid::grobWidth(grid::textGrob( + "Concomitant", gp = grid::gpar(fontface = "bold") + ))) + grid::popViewport() + grDevices::dev.off() + unlink(f) + bw + } + result <- writetfl:::.compute_wrapped_widths( + widths_in = c(10), # very wide; force narrowing + resolved_cols = rcs, + data = tbl$data, + tbl = tbl, + content_width_in = 0.5, + h_pad_in = 0, + min_in = 0.1, + pg_width = 11, pg_height = 8.5, + margins = grid::unit(c(0.5, 0.5, 0.5, 0.5), "inches") + ) + # The floor on the wrap-eligible column is at least the bold-rendered + # longest header token; the algorithm must refuse to drop the column + # below this width. + expect_gte(result[[1]], bold_token_w - 1e-3) +}) + # --------------------------------------------------------------------------- # tfl_colspec() — additional validation (R/tfl_table.R lines 49, 60) # --------------------------------------------------------------------------- @@ -720,9 +1053,14 @@ test_that("tfl_colspec errors when label is not a single character string", { expect_error(tfl_colspec("x", label = c("a", "b")), regexp = "label") }) +test_that("tfl_colspec accepts NA for wrap (the inherit-from-table sentinel)", { + cs <- tfl_colspec("x", wrap = NA) + expect_true(is.na(cs$wrap)) +}) + test_that("tfl_colspec errors when wrap is not a scalar logical", { - expect_error(tfl_colspec("x", wrap = NA), regexp = "wrap") expect_error(tfl_colspec("x", wrap = "yes"), regexp = "wrap") + expect_error(tfl_colspec("x", wrap = c(TRUE, FALSE)), regexp = "wrap") }) # --------------------------------------------------------------------------- diff --git a/tests/testthat/test-wrap.R b/tests/testthat/test-wrap.R new file mode 100644 index 0000000..4f6eeb0 --- /dev/null +++ b/tests/testthat/test-wrap.R @@ -0,0 +1,300 @@ +# test-wrap.R - Tests for R/wrap.R (the text-wrap module) + +# Reuse the with_vp helper pattern from test-table_utils.R for measurement. +with_vp <- function(expr) { + f <- tempfile(fileext = ".pdf") + grDevices::pdf(f, width = 11, height = 8.5) + vp <- grid::viewport(width = grid::unit(10, "inches"), + height = grid::unit(7.5, "inches")) + grid::pushViewport(vp) + on.exit({ + grid::popViewport() + grDevices::dev.off() + unlink(f) + }) + force(expr) +} + +# wrap_breaks() constructor ---------------------------------------------------- + +test_that("wrap_breaks() returns a wrap_breaks object with whitespace defaults", { + b <- writetfl:::wrap_breaks() + expect_s3_class(b, "wrap_breaks") + expect_setequal(b$drop, c(" ", "\t")) + expect_equal(b$keep_before, character(0L)) +}) + +test_that("wrap_breaks_default() returns the same as wrap_breaks()", { + expect_equal(writetfl:::wrap_breaks_default(), writetfl:::wrap_breaks()) +}) + +test_that(".is_wrap_breaks recognises wrap_breaks objects", { + expect_true(writetfl:::.is_wrap_breaks(writetfl:::wrap_breaks())) + expect_false(writetfl:::.is_wrap_breaks(list(drop = " "))) + expect_false(writetfl:::.is_wrap_breaks(NULL)) +}) + +test_that("wrap_breaks rejects non-character drop / keep_before", { + expect_error(writetfl:::wrap_breaks(drop = 1L), regexp = "drop") + expect_error(writetfl:::wrap_breaks(keep_before = 1L), regexp = "keep_before") +}) + +test_that("wrap_breaks rejects NA values", { + expect_error(writetfl:::wrap_breaks(drop = c(" ", NA)), regexp = "NA") + expect_error(writetfl:::wrap_breaks(keep_before = NA_character_), regexp = "NA") +}) + +test_that("wrap_breaks rejects multi-character entries", { + expect_error(writetfl:::wrap_breaks(drop = c(" ", "ab")), + regexp = "single character") + expect_error(writetfl:::wrap_breaks(keep_before = "--"), + regexp = "single character") +}) + +test_that("wrap_breaks rejects overlap between drop and keep_before", { + expect_error(writetfl:::wrap_breaks(drop = c(" ", "-"), keep_before = "-"), + regexp = "disjoint") +}) + +# .tokenize_for_wrap() -------------------------------------------------------- + +test_that(".tokenize_for_wrap splits on space (drop) - space is the lead of the next token", { + toks <- writetfl:::.tokenize_for_wrap("a bb ccc", + writetfl:::wrap_breaks_default()) + expect_equal(length(toks), 3L) + expect_equal(vapply(toks, `[[`, "", "text"), c("a", "bb", "ccc")) + expect_equal(vapply(toks, `[[`, "", "lead"), c("", " ", " ")) +}) + +test_that(".tokenize_for_wrap keeps the keep_before character on the preceding token", { + b <- writetfl:::wrap_breaks(keep_before = "-") + toks <- writetfl:::.tokenize_for_wrap("alpha-beta-gamma", b) + expect_equal(vapply(toks, `[[`, "", "text"), + c("alpha-", "beta-", "gamma")) + expect_equal(vapply(toks, `[[`, "", "lead"), c("", "", "")) +}) + +test_that(".tokenize_for_wrap handles mixed drop and keep_before", { + b <- writetfl:::wrap_breaks(drop = " ", keep_before = "-") + toks <- writetfl:::.tokenize_for_wrap("aa bb-cc dd", b) + expect_equal(vapply(toks, `[[`, "", "text"), + c("aa", "bb-", "cc", "dd")) + expect_equal(vapply(toks, `[[`, "", "lead"), + c("", " ", "", " ")) +}) + +test_that(".tokenize_for_wrap returns an empty list for an empty string", { + expect_equal(writetfl:::.tokenize_for_wrap("", + writetfl:::wrap_breaks_default()), + list()) +}) + +# .wrap_string() core behavior ----------------------------------------------- + +test_that(".wrap_string returns NULL / empty input unchanged", { + with_vp({ + expect_null(writetfl:::.wrap_string(NULL, 1, grid::gpar())) + expect_equal(writetfl:::.wrap_string("", 1, grid::gpar()), "") + }) +}) + +test_that(".wrap_string preserves explicit \\n as paragraph breaks", { + with_vp({ + out <- writetfl:::.wrap_string("first\nsecond", 5, grid::gpar()) + expect_equal(out, "first\nsecond") + }) +}) + +test_that(".wrap_string greedily breaks long text on whitespace", { + with_vp({ + text <- paste(rep("word", 20), collapse = " ") + out <- writetfl:::.wrap_string(text, 0.4, grid::gpar(fontsize = 10)) + expect_true(grepl("\n", out, fixed = TRUE)) + # No line should still contain a space if breaking was needed + lines <- strsplit(out, "\n", fixed = TRUE)[[1L]] + expect_true(length(lines) > 1L) + }) +}) + +test_that(".wrap_string returns a single unbreakable token unchanged", { + with_vp({ + token <- paste(rep("X", 200), collapse = "") + out <- writetfl:::.wrap_string(token, 0.01, grid::gpar(fontsize = 10)) + expect_equal(out, token) + }) +}) + +test_that(".wrap_string with keep_before breaks AFTER the keep char", { + with_vp({ + b <- writetfl:::wrap_breaks(keep_before = "-") + # A wide enough font + tight width forces a break. + out <- writetfl:::.wrap_string("alpha-beta-gamma", 0.5, + grid::gpar(fontsize = 14), b) + lines <- strsplit(out, "\n", fixed = TRUE)[[1L]] + expect_gte(length(lines), 2L) + # Each line that is not the last must end with "-" + for (ln in lines[-length(lines)]) { + expect_match(ln, "-$") + } + }) +}) + +test_that(".wrap_string with NULL breaks falls back to defaults", { + with_vp({ + out_null <- writetfl:::.wrap_string("aa bb cc", 0.3, + grid::gpar(fontsize = 12), + breaks = NULL) + out_def <- writetfl:::.wrap_string("aa bb cc", 0.3, + grid::gpar(fontsize = 12)) + expect_equal(out_null, out_def) + }) +}) + +# .column_has_breakable_text() ------------------------------------------------ + +test_that(".column_has_breakable_text detects whitespace by default", { + b <- writetfl:::wrap_breaks_default() + expect_true(writetfl:::.column_has_breakable_text(c("hello world"), b)) + expect_false(writetfl:::.column_has_breakable_text(c("noBreak", "1.23"), b)) + expect_false(writetfl:::.column_has_breakable_text(character(0L), b)) +}) + +test_that(".column_has_breakable_text detects keep_before chars", { + b <- writetfl:::wrap_breaks(drop = character(0L), keep_before = "-") + expect_true(writetfl:::.column_has_breakable_text(c("a-b"), b)) + expect_false(writetfl:::.column_has_breakable_text(c("ab", "cd"), b)) +}) + +# .column_min_token_width_in() ----------------------------------------------- + +test_that(".column_min_token_width_in returns 0 for an empty input", { + with_vp({ + w <- writetfl:::.column_min_token_width_in(character(0L), grid::gpar(), + writetfl:::wrap_breaks_default()) + expect_equal(w, 0) + }) +}) + +test_that(".column_min_token_width_in returns the widest unbreakable token", { + with_vp({ + gp <- grid::gpar(fontsize = 12) + b <- writetfl:::wrap_breaks_default() + short <- writetfl:::.column_min_token_width_in("aa bb", gp, b) + long <- writetfl:::.column_min_token_width_in("aa LongUnbreakableToken", gp, b) + expect_gt(long, short) + }) +}) + +test_that(".column_min_token_width_in counts keep_before char as part of the left token", { + with_vp({ + gp <- grid::gpar(fontsize = 12) + b <- writetfl:::wrap_breaks(keep_before = "-") + # "alpha-beta" tokenises to "alpha-" and "beta"; the longest is "alpha-" + w_with_dash <- writetfl:::.column_min_token_width_in("alpha-beta", gp, b) + w_no_dash <- writetfl:::.column_min_token_width_in("alphabeta", gp, + writetfl:::wrap_breaks_default()) + expect_lt(w_with_dash, w_no_dash) # break after "-" reduces the floor + }) +}) + +# .wrap_label_for_width() ---------------------------------------------------- + +test_that(".wrap_label_for_width returns NULL / empty input unchanged", { + expect_null(writetfl:::.wrap_label_for_width(NULL, 1, 0.1, grid::gpar(), + writetfl:::wrap_breaks_default())) + expect_equal( + writetfl:::.wrap_label_for_width("", 1, 0.1, grid::gpar(), + writetfl:::wrap_breaks_default()), + "" + ) +}) + +test_that(".wrap_label_for_width subtracts horizontal padding from the available width", { + with_vp({ + gp <- grid::gpar(fontsize = 12) + b <- writetfl:::wrap_breaks_default() + out <- writetfl:::.wrap_label_for_width("Concomitant Medication Class", + width_in = 1.0, + h_pad_in = 0.2, + gp = gp, breaks = b) + expect_true(grepl("\n", out, fixed = TRUE)) + }) +}) + +# .compute_wrapped_widths() water-fill --------------------------------------- + +test_that(".compute_wrapped_widths is a no-op when no column is wrap-eligible", { + resolved <- list( + list(col = "a", label = "a", wrap = FALSE, is_group_col = FALSE), + list(col = "b", label = "b", wrap = FALSE, is_group_col = FALSE) + ) + data <- data.frame(a = "aa bb", b = "cc dd", stringsAsFactors = FALSE) + tbl <- list(gp = list(), wrap_breaks = writetfl:::wrap_breaks_default(), + line_height = 1.05, na_string = "", max_measure_rows = Inf, + cell_padding = grid::unit(c(0, 0, 0, 0), "inches")) + out <- writetfl:::.compute_wrapped_widths( + widths_in = c(2, 2), + resolved_cols = resolved, + data = data, tbl = tbl, + content_width_in = 3, # would force narrowing if anything were eligible + h_pad_in = 0, min_in = 0.5, + pg_width = 11, pg_height = 8.5, + margins = grid::unit(c(0.5, 0.5, 0.5, 0.5), "inches") + ) + expect_equal(out, c(2, 2)) +}) + +test_that(".compute_wrapped_widths narrows the widest wrap-eligible col first", { + resolved <- list( + list(col = "a", label = "a", wrap = TRUE, is_group_col = FALSE), + list(col = "b", label = "b", wrap = TRUE, is_group_col = FALSE) + ) + # both columns have only " " breaks; lots of small words -> floor is small + data <- data.frame(a = paste(rep("xx", 30), collapse = " "), + b = paste(rep("yy", 30), collapse = " "), + stringsAsFactors = FALSE) + tbl <- list(gp = list(), wrap_breaks = writetfl:::wrap_breaks_default(), + line_height = 1.05, na_string = "", max_measure_rows = Inf, + cell_padding = grid::unit(c(0, 0, 0, 0), "inches")) + out <- writetfl:::.compute_wrapped_widths( + widths_in = c(4, 2), # widest = a + resolved_cols = resolved, + data = data, tbl = tbl, + content_width_in = 3, + h_pad_in = 0, min_in = 0.2, + pg_width = 11, pg_height = 8.5, + margins = grid::unit(c(0.5, 0.5, 0.5, 0.5), "inches") + ) + # Total fits within the target. + expect_lte(sum(out), 3 + 1e-6) + # The widest column shrank. + expect_lt(out[[1]], 4) +}) + +test_that(".compute_wrapped_widths respects the longest-token floor", { + resolved <- list( + list(col = "a", label = "a", wrap = TRUE, is_group_col = FALSE) + ) + # A single unbreakable token whose rendered width sets the floor. + long <- paste(rep("X", 60), collapse = "") + data <- data.frame(a = long, stringsAsFactors = FALSE) + tbl <- list(gp = list(), wrap_breaks = writetfl:::wrap_breaks_default(), + line_height = 1.05, na_string = "", max_measure_rows = Inf, + cell_padding = grid::unit(c(0, 0, 0, 0), "inches")) + out <- writetfl:::.compute_wrapped_widths( + widths_in = c(8), + resolved_cols = resolved, + data = data, tbl = tbl, + content_width_in = 0.5, # tiny target - much less than the token width + h_pad_in = 0, min_in = 0.1, + pg_width = 11, pg_height = 8.5, + margins = grid::unit(c(0.5, 0.5, 0.5, 0.5), "inches") + ) + # The result is bounded below by the widest token's rendered width. + with_vp({ + floor_w <- writetfl:::.column_min_token_width_in( + long, grid::gpar(), writetfl:::wrap_breaks_default() + ) + }) + expect_gte(out[[1]], floor_w - 1e-6) +}) diff --git a/vignettes/v02-tfl_table_intro.Rmd b/vignettes/v02-tfl_table_intro.Rmd index 50a2407..e9edf23 100644 --- a/vignettes/v02-tfl_table_intro.Rmd +++ b/vignettes/v02-tfl_table_intro.Rmd @@ -152,12 +152,20 @@ Valid alignment values are `"left"`, `"right"`, and `"center"`. ### Word-wrapping -`wrap_cols` accepts column names (or `TRUE`/`FALSE`) marking columns as -eligible for greedy word-wrapping. Wrap-eligible columns are sized to fit -within their assigned width, with cell text wrapped on word boundaries. -This is useful for free-text columns (narrative descriptions, verbatim -terms) that would otherwise force very wide pages or illegible small -fonts. +`writetfl` ships with text wrapping turned on by default. `wrap_cols` +controls which columns are eligible: + +| `wrap_cols` value | Meaning | +|---|---| +| `"auto"` (default) | Eligible if the column's data or header contains a break character (whitespace by default). Numeric and single-token columns are skipped automatically. | +| `TRUE` | All non-group columns are eligible regardless of content. | +| `FALSE` | Disable text wrapping entirely. | +| Character vector of column names | Only the named columns are eligible. | + +The default ("auto") is the recommended setting for clinical reports — +free-text columns (narrative descriptions, verbatim terms) wrap to fit +their column without you having to specify; numeric columns are left at +their natural width. ```{r wrap-cols, fig.width = 11, fig.height = 8.5, out.width = "100%"} ae_verbatim <- data.frame( @@ -173,6 +181,7 @@ ae_verbatim <- data.frame( stringsAsFactors = FALSE ) +# wrap_cols = "auto" is the default; specifying it is shown for clarity. tbl <- tfl_table( ae_verbatim, col_labels = c( @@ -184,8 +193,7 @@ tbl <- tfl_table( subject_id = unit(0.8, "inches"), ae_term = unit(3.5, "inches"), onset_day = NULL - ), - wrap_cols = "ae_term" + ) ) export_tfl( @@ -196,6 +204,60 @@ export_tfl( ) ``` +#### Per-column override + +Set `tfl_colspec(wrap = TRUE)` or `tfl_colspec(wrap = FALSE)` to override +the table-level setting for one column. `wrap = NA` (the default) means +"inherit from `wrap_cols`". + +#### Custom break characters — `wrap_breaks()` + +By default the wrap algorithm breaks on whitespace and consumes the +whitespace at the break. `wrap_breaks()` lets you opt in to break-after +characters that *stay* on the upper line: + +```{r wrap-breaks, eval = FALSE} +# Break after hyphens for hyphenated terms like "placebo-controlled" +tbl <- tfl_table( + hyphen_df, + wrap_breaks = wrap_breaks(drop = " ", keep_before = "-") +) + +# Break after "/" or "-" for path-like content +tbl <- tfl_table( + path_df, + wrap_breaks = wrap_breaks(drop = " ", keep_before = c("-", "/")) +) +``` + +#### Optimising for total table height — `wrap_balance` + +The default `wrap_balance = "width"` keeps the widths of wrap-eligible +columns balanced (water-from-top), which is fast and produces visually +tidy columns. When two wrap-eligible columns have very different content +density, the dense one wraps to many more lines than the sparse one even +at the same width — and the resulting tall rows can spill across pages. +Set `wrap_balance = "height"` to opt in to a bounded extra pass that +redistributes width between wrap-eligible columns to lower the total +table height (more rows per page). The pass is time-budgeted and falls +back silently to the default if it can't find an improvement. + +#### Row-overflow guard + +A row whose wrapped height exceeds one page is almost always a sign of +input that needs to change (e.g. a 5,000-character cell forced into a +0.5-inch column). The package raises a clear error in this case; +`overflow_action = "warn"` on `export_tfl()` downgrades it to a warning +and produces output for diagnosis. See +`vignette("v04-troubleshooting")` for the full overflow story. + +#### Visual separation between multi-line cells + +`wrap_extra_padding` (default `unit(0.5, "lines")`) adds extra vertical +space at the bottom of any cell whose displayed text spans more than one +line, so consecutive multi-line rows don't visually run together. Set to +`unit(0, "lines")` to disable. + ### Per-column specification with `tfl_colspec()` For complex tables it can be cleaner to specify each column separately @@ -603,7 +665,9 @@ see `vignette("v03-tfl_table_styling")`. | `col_widths` | `NULL` (auto) | Named list of `unit()`, plain numeric, or `NULL` per column | | `col_labels` | column names | Named character vector of header labels; supports `\n` | | `col_align` | type-based | Named vector: `"left"`, `"right"`, or `"center"` | -| `wrap_cols` | `NULL` | Names of columns to word-wrap | +| `wrap_cols` | `"auto"` | `"auto"` (auto-detect), `TRUE` (all data cols), `FALSE` (off), or character vector of column names | +| `wrap_breaks` | `wrap_breaks()` | Break-character spec — defaults to whitespace; opt into `keep_before` chars like `-` or `/` | +| `wrap_balance` | `"width"` | `"width"` (fast water-fill) or `"height"` (opt-in pass that lowers total table height) | | `min_col_width` | `unit(0.5, "inches")` | Floor applied to auto-sized columns | | `allow_col_split` | `TRUE` | If `FALSE`, error when columns exceed page width | | `balance_col_pages` | `FALSE` | Redistribute columns evenly across column-split pages | @@ -628,4 +692,5 @@ see `vignette("v03-tfl_table_styling")`. | `fill_by` | `"row"` | `"row"` or `"group"` for cell fill cycling | | `cell_padding` | `unit(c(0.2, 0.5), "lines")` | Vertical and horizontal padding inside each cell | | `line_height` | `1.05` | Inter-line spacing multiplier for word-wrapped cells | +| `wrap_extra_padding` | `unit(0.5, "lines")` | Extra space below multi-line cells so rows are visually distinguishable; `unit(0, "lines")` to disable | | `max_measure_rows` | `Inf` | Number of rows sampled when measuring auto column widths | diff --git a/vignettes/v03-tfl_table_styling.Rmd b/vignettes/v03-tfl_table_styling.Rmd index 3fe99fd..93a7ace 100644 --- a/vignettes/v03-tfl_table_styling.Rmd +++ b/vignettes/v03-tfl_table_styling.Rmd @@ -475,6 +475,170 @@ export_tfl(tbl, preview = TRUE, --- +## Word wrapping — `wrap_cols`, `wrap_breaks` + +`wrap_cols` and `wrap_breaks` control **text wrapping inside columns** — +the table reflows long cell text and column-header labels onto multiple +lines so a column can be narrower. Distinct from +[**page-column-split**](#multi-page-accessories), which handles a table +that is too wide *as a whole* by spreading its columns over more than +one PDF page. The two run in order: text-wrap first; then page-column-split +as a fallback if the table still doesn't fit. + +### Default behaviour — `wrap_cols = "auto"` + +Out of the box, every non-group column whose cells or header contain a +break character (whitespace by default) is eligible for wrapping. Numeric +columns and single-token strings are skipped because no amount of +narrowing can make them break. The wrap pass only runs when the natural +column widths exceed the available page width. + +```{r wrap-default, eval = FALSE} +notes_df <- data.frame( + visit = c("Baseline", "Week 4", "Week 12"), + notes = c("Patient enrolled and signed informed consent", + "Mild headache reported, resolved within 24 hours", + "All scheduled assessments completed without issue") +) +tbl <- tfl_table(notes_df) # wrap_cols = "auto" by default +``` + +### Disabling wrap entirely + +Pass `wrap_cols = FALSE`. The wrap module is bypassed; if the table is +wider than the page it will fall through to page-column-split (or error +when `allow_col_split = FALSE`). + +```{r wrap-off, eval = FALSE} +tbl <- tfl_table(notes_df, wrap_cols = FALSE) +``` + +### Per-column control + +`tfl_colspec(wrap = ...)` overrides the table-level setting: + +- `wrap = TRUE` — always eligible, even when no break character is present. +- `wrap = FALSE` — never eligible, even when `wrap_cols = "auto"` would mark it. +- `wrap = NA` (default) — inherit from `wrap_cols`. + +```{r wrap-per-col, eval = FALSE} +tbl <- tfl_table( + notes_df, + cols = list( + tfl_colspec("notes", width = unit(2, "inches"), wrap = TRUE) + ) +) +``` + +### Custom break characters — `wrap_breaks()` + +By default the wrap module breaks on whitespace (space, tab) and consumes +the whitespace at the break point. Pass a `wrap_breaks()` object to +configure additional break characters: + +- `drop` — characters consumed at the break (whitespace; the default). +- `keep_before` — characters that stay on the **left** of the break, with + the *next* character starting the new line. Useful for hyphenated terms + or path-like strings. + +```{r wrap-breaks, eval = FALSE} +# Break after "-" (hyphenated terms) +tbl <- tfl_table( + hyphen_df, + wrap_breaks = wrap_breaks(drop = " ", keep_before = "-") +) + +# Break after "/" or "-" (path-like content) +tbl <- tfl_table( + path_df, + wrap_breaks = wrap_breaks(drop = " ", keep_before = c("-", "/")) +) +``` + +### Algorithm in one paragraph + +The wrap module computes a *floor* per wrap-eligible column equal to the +larger of `min_col_width` and the rendered width of that column's longest +unbreakable token. It then runs a "water-from-top" pass: at each step it +finds the widest set of wrap-eligible columns above their floor and +shrinks them together until they meet the next-widest column or hit a +floor — repeating until the total fits or every wrap-eligible column has +hit its floor. Deterministic and bounded. + +### Optimising for height — `wrap_balance = "height"` + +The default narrowing pass (`wrap_balance = "width"`, water-from-top) +balances the *widths* of wrap-eligible columns. That's content-blind +and fast, but on tables with very different content density per +column it can leave one column wrapping to many more lines than its +neighbour. When the goal is to fit more rows per page, opt in to +`wrap_balance = "height"`: + +```{r wrap-balance-height, eval = FALSE} +tbl <- tfl_table(my_df, wrap_balance = "height") +``` + +The height pass runs *after* water-fill, takes width away from +columns whose cells have slack (short content) and gives it to +columns whose cells are the bottleneck (long content). It accepts a +move only if the resulting *total table height* is smaller, so the +result is never worse than the default. The pass is time-budgeted at +~1 second; on any error or budget overrun the result silently falls +back to water-fill widths. + +Use the height pass when: + +- You have multiple wrap-eligible string columns of obviously + different content density. +- The default `wrap_balance = "width"` is producing one column with + many wrapped lines next to a column with one line of content. +- You'd rather fit more rows on a page than have visually balanced + column widths. + +The default stays `"width"` because it's deterministic, fast, +visually tidy, and produces good results on most clinical TFLs. + +### Visual gap between adjacent multi-line cells — `wrap_extra_padding` + +When two consecutive rows both contain wrapped (multi-line) cells, the +bottom of one row's wrapped text can sit visually flush against the top +of the next row's wrapped text — making it hard to see where one row +ends and the next begins. `wrap_extra_padding` adds a configurable +amount of vertical space *only* at the bottom of multi-line cells. + +```{r wrap-extra-padding, eval = FALSE} +# Default: 0.5 lines of extra space below every multi-line cell. +tbl <- tfl_table(notes_df) + +# Disable: pack rows tightly even when cells wrap. +tbl <- tfl_table(notes_df, wrap_extra_padding = unit(0, "lines")) + +# More breathing room. +tbl <- tfl_table(notes_df, wrap_extra_padding = unit(0.5, "lines")) +``` + +The extra applies to any cell whose displayed text spans more than one +line — whether the lines come from the wrap algorithm or from explicit +`\n` in the data. Single-line cells are unaffected, so the padding does +not inflate compact tables. + +### Failsafe — row-overflow guard + +A row whose wrapped height exceeds one page is almost always a sign of +input that needs to change (e.g. a 5,000-character cell forced into a +0.5-inch column). `paginate_rows()` raises an error in this case via the +same `overflow_action` switch as the column-overflow guard: + +```r +# Raise error (default) +export_tfl(tbl, file = "out.pdf") + +# Downgrade to a warning and still produce diagnostic output +export_tfl(tbl, file = "out.pdf", overflow_action = "warn") +``` + +--- + ## Multi-page accessories ### Column continuation message — `col_cont_msg` diff --git a/vignettes/v04-troubleshooting.Rmd b/vignettes/v04-troubleshooting.Rmd index 4be7a75..05b1858 100644 --- a/vignettes/v04-troubleshooting.Rmd +++ b/vignettes/v04-troubleshooting.Rmd @@ -61,13 +61,24 @@ per-column width breakdown for cases (2) and (3). **Solutions:** -- Enable column splitting: `tfl_table(..., allow_col_split = TRUE)` -- Enable word wrapping on wide columns: - `tfl_table(..., wrap_cols = c("wide_col1", "wide_col2"))` -- Set narrower fixed widths on specific columns via `tfl_colspec()` -- Increase page width or decrease margins +- Word wrapping is on by default (`wrap_cols = "auto"`), but a column with + a single long unbreakable token (e.g. a 40-character drug code) can't + shrink below that token's rendered width. Either: + - Pass a `wrap_breaks()` spec that lets the algorithm break inside the + token: e.g. `wrap_breaks(drop = " ", keep_before = c("-", "_"))` for + a hyphen- or underscore-separated identifier. + - Or split the column into more shorter columns. +- Enable column splitting: `tfl_table(..., allow_col_split = TRUE)`. +- Force wrap on a column the auto-detect skipped: + `tfl_table(..., wrap_cols = c("wide_col1", "wide_col2"))`. +- Set narrower fixed widths on specific columns via `tfl_colspec()`. +- Increase page width or decrease margins. - Reduce the number of group columns (group columns repeat on every - column-paginated page and reserve space for themselves on every page) + column-paginated page and reserve space for themselves on every page). +- For uneven content density across wrap-eligible columns, opt in to + `wrap_balance = "height"` — it redistributes width between columns to + reduce total table height (often fewer pages overall). Falls back + silently to the default if no improvement is found. ### Diagnosing by inspecting the broken layout @@ -91,6 +102,37 @@ export_tfl(tbl, file = "out.pdf", overflow_action = "warn") Use `"warn"` for diagnosis only — it does not fix the layout; it just lets you see what the renderer was forced to clip. +## Row wrapped to taller than one page + +**Error:** `Row N of the table wraps to a height (X.XX in) that exceeds the available page content height (Y.YY in)…` + +A row's content wrapped to more vertical space than fits on a single +page. Almost always caused by an outsize cell (long narrative pasted +into a column whose width is set very narrow, an unbroken token forced +to wrap to many lines, etc.). No amount of pagination can rescue this +case — text-wrap can't make the row shorter than its content allows, and +splitting it across pages would leave words mid-sentence at the page +break. + +**Solutions (in order of preference):** + +- Reduce the offending cell's content (the input data is almost always + the right thing to fix here). +- Widen the column so the cell wraps to fewer lines. +- Increase `pg_height` so a multi-line row fits. +- For diagnosis only, pass `overflow_action = "warn"` to `export_tfl()` + to see what the renderer would produce. The output PDF will be + visibly broken (text clipped at the page edge), but it tells you + exactly which row was responsible. + +```r +export_tfl(tbl, file = "out.pdf", overflow_action = "warn") +#> Warning: Row 3 of the table wraps to a height (12.4 in) that exceeds +#> the available page content height (7.3 in). Reduce the cell content, +#> increase the page height, widen the column, or set the column to +#> wrap less aggressively. +``` + ## Overlap warnings **Warning:** `Header/footer elements may overlap` diff --git a/vignettes/writetfl.Rmd b/vignettes/writetfl.Rmd index dbd3409..5c797f6 100644 --- a/vignettes/writetfl.Rmd +++ b/vignettes/writetfl.Rmd @@ -168,8 +168,11 @@ pk_data |> pages; set `balance_col_pages = TRUE` to distribute columns evenly. - **Column widths** — auto-sized from content, fixed (`unit()`), or relative-weight numeric. -- **Word wrapping** — `wrap_cols` reflows long text within a fixed column - width. +- **Word wrapping (default-on)** — `wrap_cols = "auto"` auto-detects + wrap-eligible columns; configure break characters with `wrap_breaks()` + (whitespace by default; opt into `keep_before = "-"` for hyphenated + content). Use `wrap_balance = "height"` to optimise widths for fewer + pages. For the complete table reference — column specs, continuation messages, cell padding, line height, and more — see `vignette("v02-tfl_table_intro")`.