diff --git a/R/table_draw.R b/R/table_draw.R index 9ae50b9..8e4faf4 100644 --- a/R/table_draw.R +++ b/R/table_draw.R @@ -63,24 +63,25 @@ #' @keywords internal build_table_grob <- function(row_page, col_group_idx, n_group_cols, resolved_cols, tbl, - row_heights_in = NULL, - cont_row_h_in = NULL, - is_first_col_page = TRUE, - is_last_col_page = TRUE) { + cell_heights_in_mat = NULL, + cont_row_h_in = NULL, + is_first_col_page = TRUE, + is_last_col_page = TRUE) { # Subset to display columns for this page page_cols <- resolved_cols[col_group_idx] grid::gTree( - row_page = row_page, - col_group_idx = col_group_idx, - n_group_cols = n_group_cols, - page_cols = page_cols, - tbl = tbl, - row_heights_in = row_heights_in, # cached from paginate phase - cont_row_h_in = cont_row_h_in, # cached from paginate phase - is_first_col_page = is_first_col_page, # FALSE when prior col pages exist - is_last_col_page = is_last_col_page, # FALSE when more col pages follow - cl = "tfl_table_grob" + row_page = row_page, + col_group_idx = col_group_idx, + n_group_cols = n_group_cols, + page_cols = page_cols, + resolved_cols = resolved_cols, # full list, for span recompute + tbl = tbl, + cell_heights_in_mat = cell_heights_in_mat, # cached full matrix + cont_row_h_in = cont_row_h_in, # cached from paginate phase + is_first_col_page = is_first_col_page, # FALSE when prior col pages exist + is_last_col_page = is_last_col_page, # FALSE when more col pages follow + cl = "tfl_table_grob" ) } @@ -165,15 +166,38 @@ drawDetails.tfl_table_grob <- function(x, recording) { max(vapply(tbl$row_cont_msg, .cont_h, numeric(1L))) } - # Data row heights — prefer cached values - row_h_vec <- if (!is.null(x$row_heights_in) && - length(x$row_heights_in) >= (if (n_rows > 0L) max(rows) else 0L)) { - x$row_heights_in[rows] + group_vars <- tbl$group_vars + + # Precompute the per-page suppression matrix. Drives cell-content + # blanking, span-aware row heights, span clipping for non-suppressed + # group cells, and within-span row-rule suppression. + suppress_mat <- if (isTRUE(tbl$suppress_repeated_groups) && + length(group_vars) > 0L) { + .compute_cell_suppression(data, group_vars, rows) + } else NULL + + # Per-page row heights — prefer the heights that pagination committed; if + # absent, recompute from the cached cell-height matrix using the same + # algorithm pagination uses. As a final fallback, build a per-page + # matrix on the fly (covers grobs assembled outside the normal pipeline). + row_h_vec <- if (!is.null(row_page$row_heights_in) && + length(row_page$row_heights_in) == n_rows) { + row_page$row_heights_in + } else if (!is.null(x$cell_heights_in_mat) && !is.null(x$resolved_cols)) { + .compute_page_row_heights( + x$cell_heights_in_mat, rows, x$resolved_cols, group_vars, suppress_mat + ) } else { - vapply(rows, function(i) { - max(vapply(page_cols, function(cs) { - s <- .fmt_cell(data[[cs$col]][i], na_str) - gp_c <- .gp_with_lineheight(.resolve_table_cell_gp(gp_tbl, cs$is_group_col), lh) + # Per-page fallback: build a small matrix for just the rows on this page + # using page_cols, then apply the algorithm. + fallback_mat <- matrix(0, nrow = n_rows, ncol = length(page_cols)) + for (j in seq_along(page_cols)) { + cs <- page_cols[[j]] + gp_c <- .gp_with_lineheight( + .resolve_table_cell_gp(gp_tbl, cs$is_group_col), lh + ) + for (ri in seq_len(n_rows)) { + s <- .fmt_cell(data[[cs$col]][rows[[ri]]], na_str) disp_s <- if (cs$wrap && !is.null(cs$width_in)) { .wrap_text(s, cs$width_in - h_lft_in - h_rgt_in, gp_c) } else s @@ -181,15 +205,35 @@ drawDetails.tfl_table_grob <- function(x, recording) { grob <- grid::textGrob(disp_s, gp = gp_c) h1 <- .height_in(grid::grobHeight(grob)) h2 <- nlines * .height_in(grid::stringHeight("M")) - max(h1, h2) - }, numeric(1L))) + v_pad_in - }, numeric(1L)) + fallback_mat[ri, j] <- max(h1, h2) + v_pad_in + } + } + .compute_page_row_heights( + fallback_mat, seq_len(n_rows), page_cols, group_vars, suppress_mat + ) } - # Precompute group sizes — group rules are suppressed for single-row groups - group_vars <- tbl$group_vars - group_sizes <- if (tbl$group_rule && length(group_vars) > 0L) { - .compute_group_sizes(data, group_vars) + # Group-rule metadata: outermost-changing-level size + level for each + # group_start. Drawing reads $levels to set the rule's left-edge column + # (so unchanged outer columns aren't sliced through by the rule line). + group_rule_info <- if (tbl$group_rule && length(group_vars) > 0L) { + .compute_group_rule_info(data, group_vars) + } else NULL + + # Precompute span ends per group column on this page so non-suppressed + # group cells can be drawn with a clip viewport that covers the whole + # span. span_end_mat[ri, g] is the last row index in the same span as + # ri for group column g. + span_end_mat <- if (!is.null(suppress_mat)) { + se <- matrix(NA_integer_, nrow = n_rows, ncol = length(group_vars)) + for (g in seq_along(group_vars)) { + starts <- which(!suppress_mat[, g]) + if (length(starts) > 0L) { + ends <- c(starts[-1L] - 1L, n_rows) + se[starts, g] <- ends + } + } + se } else NULL # --- Build row y-positions (top-to-bottom, in inches from top of vp) --- @@ -240,11 +284,7 @@ drawDetails.tfl_table_grob <- function(x, recording) { } # Group boundaries (track previous group key to detect changes) - grp_starts <- row_page$group_starts - # Precompute which group cells to suppress (hierarchical: outer change resets inner) - suppress_mat <- if (tbl$suppress_repeated_groups && length(group_vars) > 0L) { - .compute_cell_suppression(data, group_vars, rows) - } else NULL + grp_starts <- row_page$group_starts # Data row background fill setup data_row_gp <- .resolve_table_gp(gp_tbl, "data_row") @@ -256,16 +296,21 @@ drawDetails.tfl_table_grob <- function(x, recording) { i <- rows[[ri]] row_h <- row_h_vec[[ri]] - # Group rule before this row (if it starts a group, not the first visible row, - # and the group has more than one row in the full data) + # Group rule before this row (if it starts a group and is not the first + # visible row). The rule starts at the column corresponding to the + # outermost group_var level that actually changed at this transition, + # so unchanged outer columns through which the label is flowing + # aren't sliced. Drawn at every transition. if (i %in% grp_starts && ri > 1L) group_fill_idx <- group_fill_idx + 1L - if (tbl$group_rule && i %in% grp_starts && y_cursor > header_row_h + 1e-6) { - gs <- if (!is.null(group_sizes)) group_sizes[as.character(i)] else NA_integer_ - if (is.na(gs) || gs > 1L) { - rule_gp <- .resolve_table_gp(gp_tbl, "group_rule") - y_rule_npc <- 1 - y_cursor / vp_h - x_left_npc <- col_x_left[[1L]] / vp_w - x_right_npc <- col_x_right[[n_disp_cols]] / vp_w + if (tbl$group_rule && i %in% grp_starts && y_cursor > header_row_h + 1e-6 && + !is.null(group_rule_info)) { + gk <- group_rule_info$levels[as.character(i)] + if (!is.na(gk)) { + rule_start_col <- min(as.integer(gk), n_disp_cols) + rule_gp <- .resolve_table_gp(gp_tbl, "group_rule") + y_rule_npc <- 1 - y_cursor / vp_h + x_left_npc <- col_x_left[[rule_start_col]] / vp_w + x_right_npc <- col_x_right[[n_disp_cols]] / vp_w grid::grid.lines(x = grid::unit(c(x_left_npc, x_right_npc), "npc"), y = grid::unit(c(y_rule_npc, y_rule_npc), "npc"), gp = rule_gp) @@ -294,10 +339,23 @@ drawDetails.tfl_table_grob <- function(x, recording) { raw_val <- data[[cs$col]][i] cell_str <- .fmt_cell(raw_val, na_str) - # Group repeat suppression + # Group repeat suppression and span detection + clip_h <- row_h if (!is.null(suppress_mat) && cs$is_group_col) { col_pos <- match(cs$col, group_vars, nomatch = 0L) - if (col_pos > 0L && suppress_mat[[ri, col_pos]]) cell_str <- "" + if (col_pos > 0L) { + if (suppress_mat[[ri, col_pos]]) { + cell_str <- "" + } else if (!is.null(span_end_mat)) { + # Non-suppressed group cell: clip to the full span height so the + # (possibly multi-line) label can flow into the suppressed rows + # below it (HTML rowspan-style). + ri_end <- span_end_mat[[ri, col_pos]] + if (!is.na(ri_end) && ri_end > ri) { + clip_h <- sum(row_h_vec[ri:ri_end]) + } + } + } } # Resolve cell gpar (with lineheight applied) @@ -312,15 +370,20 @@ drawDetails.tfl_table_grob <- function(x, recording) { .draw_cell_text(display_str, cs$align, col_x_left[[j]], col_x_right[[j]], - y_cursor, row_h, vp_w, vp_h, + y_cursor, clip_h, vp_w, vp_h, h_lft_in, h_rgt_in, v_top_in, cell_gp, cs$width_in) } y_cursor <- y_cursor + row_h - # Row rule between data rows (not after last) - if (tbl$row_rule && ri < n_rows) { + # Row rule between data rows (not after last). Suppress the rule if + # the next row is part of a multi-row group span starting at or + # before this row — drawing a horizontal line through a label that + # flows downward would visually slice it. + rule_inside_span <- !is.null(suppress_mat) && ri < n_rows && + any(suppress_mat[ri + 1L, ]) + if (tbl$row_rule && ri < n_rows && !rule_inside_span) { rule_gp <- .resolve_table_gp(gp_tbl, "row_rule") y_rule_npc <- 1 - y_cursor / vp_h x_left_npc <- col_x_left[[1L]] / vp_w diff --git a/R/table_pagelist.R b/R/table_pagelist.R index f080273..d32065e 100644 --- a/R/table_pagelist.R +++ b/R/table_pagelist.R @@ -187,7 +187,7 @@ tfl_table_to_pagelist <- function(tbl, pg_width, pg_height, dots, tbl$line_height) } else 0 - row_heights <- measure_row_heights_tbl( + cell_h_mat <- measure_row_heights_tbl( tbl$data, resolved_cols, tbl$gp, tbl$cell_padding, tbl$na_string, tbl$line_height, tbl$max_measure_rows ) @@ -206,8 +206,10 @@ tfl_table_to_pagelist <- function(tbl, pg_width, pg_height, dots, # --- Step 6: Paginate rows --- row_pages <- paginate_rows( - tbl$data, row_heights, cont_row_h, header_row_h, ch, - tbl$group_vars, tbl$row_cont_msg, tbl$group_rule + tbl$data, cell_h_mat, resolved_cols, tbl$group_vars, + cont_row_h, header_row_h, ch, + tbl$row_cont_msg, tbl$group_rule, + suppress_repeated_groups = isTRUE(tbl$suppress_repeated_groups) ) # --- Step 7: Assemble page specs --- @@ -219,15 +221,15 @@ tfl_table_to_pagelist <- function(tbl, pg_width, pg_height, dots, for (rp in seq_len(n_rp)) { for (cg in seq_len(n_cg)) { grob <- build_table_grob( - row_page = row_pages[[rp]], - col_group_idx = col_groups[[cg]], - n_group_cols = n_group_cols, - resolved_cols = resolved_cols, - tbl = tbl, - row_heights_in = row_heights, - cont_row_h_in = cont_row_h, - is_first_col_page = (cg == 1L), - is_last_col_page = (cg == n_cg) + row_page = row_pages[[rp]], + col_group_idx = col_groups[[cg]], + n_group_cols = n_group_cols, + resolved_cols = resolved_cols, + tbl = tbl, + cell_heights_in_mat = cell_h_mat, + cont_row_h_in = cont_row_h, + is_first_col_page = (cg == 1L), + is_last_col_page = (cg == n_cg) ) page_spec <- list(content = grob) pages[[idx]] <- page_spec diff --git a/R/table_rows.R b/R/table_rows.R index a88fd38..66e2341 100644 --- a/R/table_rows.R +++ b/R/table_rows.R @@ -1,29 +1,39 @@ # table_rows.R — Row height measurement and group-aware row pagination # # Functions: -# measure_row_heights_tbl() — memoised per-row height measurement -# paginate_rows() — split rows into pages respecting group boundaries +# measure_row_heights_tbl() — per-cell height matrix +# .compute_page_row_heights() — resolve per-row heights with group spanning +# paginate_rows() — split rows into pages (span-aware) # --------------------------------------------------------------------------- -# measure_row_heights_tbl() — memoised row height measurement +# measure_row_heights_tbl() — per-cell height matrix # --------------------------------------------------------------------------- -#' Measure the rendered height of each data row in inches +#' Measure the rendered height of each table cell in inches #' #' Must be called while a viewport is active. #' Uses a memoised string-height function to avoid re-measuring repeated values. #' -#' @return Numeric vector of row heights in inches (length = nrow(data)). +#' Returns a matrix of cell heights (rows = data rows, cols = resolved_cols). +#' Each entry is the rendered height of that cell in inches, **including the +#' top + bottom cell padding (`v_pad_in`)** so that the per-row height is +#' simply `max(cell_h_mat[i, ])` without further adjustment. +#' +#' @param max_measure_rows Maximum number of rows to measure individually. +#' Non-sampled rows take the per-column max of the sampled rows (a +#' conservative estimate that mirrors prior behaviour). +#' @return Numeric matrix `[nrow(data) × length(resolved_cols)]` of inches. #' @keywords internal measure_row_heights_tbl <- function(data, resolved_cols, gp_tbl, cell_padding, na_string, line_height, max_measure_rows) { n_rows <- nrow(data) + n_cols <- length(resolved_cols) v_pad_in <- .height_in(cell_padding[["top"]]) + .height_in(cell_padding[["bottom"]]) h_lft_in <- .width_in(cell_padding[["left"]]) h_rgt_in <- .width_in(cell_padding[["right"]]) - # Memoised height function: (string, gp_key) -> height_in + # Memoised per-cell-text-height function: (string, gp_key) -> height_in memo <- new.env(hash = TRUE, parent = emptyenv()) .memo_str_height <- function(s, gp_key, gp) { key <- paste0(gp_key, "\x01", s) @@ -48,35 +58,138 @@ measure_row_heights_tbl <- function(data, resolved_cols, gp_tbl, cell_padding, seq_len(n_rows) } - # Measure sampled rows - sampled_heights <- vapply(sample_rows, function(i) { - max(vapply(resolved_cols, function(cs) { + # Build the matrix. Iterate column-major so we can resolve gpar once per + # column rather than once per cell. + cell_h_mat <- matrix(0, nrow = n_rows, ncol = n_cols) + for (j in seq_len(n_cols)) { + cs <- resolved_cols[[j]] + base_gp <- .resolve_table_cell_gp(gp_tbl, cs$is_group_col) + cell_gp <- .gp_with_lineheight(base_gp, line_height) + gp_key <- paste0(if (cs$is_group_col) "group_col" else "data_row", + "_lh", line_height) + avail_w <- if (!is.null(cs$width_in)) cs$width_in - h_lft_in - h_rgt_in + else NA_real_ + + for (i in sample_rows) { cell_str <- .fmt_cell(data[[cs$col]][i], na_string) - base_gp <- .resolve_table_cell_gp(gp_tbl, cs$is_group_col) - cell_gp <- .gp_with_lineheight(base_gp, line_height) - gp_key <- paste0(if (cs$is_group_col) "group_col" else "data_row", - "_lh", line_height) - # For wrap-eligible columns, wrap the text to the column width first display_str <- if (cs$wrap && !is.null(cs$width_in)) { - avail_w <- cs$width_in - h_lft_in - h_rgt_in .wrap_text(cell_str, avail_w, cell_gp) } else { cell_str } - # Count lines for conservative estimate - nlines <- max(1L, length(strsplit(display_str, "\n", fixed = TRUE)[[1L]])) - h_grob <- .memo_str_height(display_str, gp_key, cell_gp) - h_line <- nlines * .height_in(grid::stringHeight("M")) - max(h_grob, h_line) - }, numeric(1L))) + v_pad_in - }, numeric(1L)) - - max_sampled <- max(sampled_heights) - - # Build full height vector - heights <- rep(max_sampled, n_rows) - heights[sample_rows] <- sampled_heights - heights + nlines <- max(1L, length(strsplit(display_str, "\n", fixed = TRUE)[[1L]])) + h_grob <- .memo_str_height(display_str, gp_key, cell_gp) + h_line <- nlines * .height_in(grid::stringHeight("M")) + cell_h_mat[i, j] <- max(h_grob, h_line) + v_pad_in + } + } + + # For non-sampled rows, fill each column with the per-column max-of-sampled + # so that conservative heights are preserved. + if (length(sample_rows) < n_rows) { + not_sampled <- setdiff(seq_len(n_rows), sample_rows) + if (length(sample_rows) > 0L) { + col_max <- apply(cell_h_mat[sample_rows, , drop = FALSE], 2L, max) + } else { + col_max <- rep(0, n_cols) # nocov + } + for (j in seq_len(n_cols)) { + cell_h_mat[not_sampled, j] <- col_max[[j]] + } + } + + cell_h_mat +} + +# --------------------------------------------------------------------------- +# .compute_page_row_heights() — resolve per-row heights for one page +# --------------------------------------------------------------------------- + +# Compute the per-row heights for the rows on a single page. +# +# A single rule, dispatched on whether suppression is active: +# +# * **`suppress_mat` is `NULL`** — suppression is off, so every group +# cell renders on every row. Row height is the per-row max over +# every cell (group and non-group alike). +# +# * **`suppress_mat` is non-NULL** — suppression is on. Group columns +# never inflate row heights. Initialise row_h from non-group cells +# only, then walk group_vars innermost-first and, for each span, +# grow row_h[span_start] only if the label exceeds the cumulative +# span height — which lets multi-line labels flow downward through +# the blanked cells (HTML-`rowspan` behaviour) instead of inflating +# just the labelled row. Innermost-first so outer spans can borrow +# space inner spans already pushed for. First-row growth matches +# the label's top-aligned drawing. +# +# @param cell_h_mat Full matrix of cell heights including v_pad (inches). +# @param page_rows Integer vector of data-row indices visible on this page. +# @param resolved_cols List of resolved column specs (full list, not just page). +# @param group_vars Character vector of group column names. +# @param suppress_mat Logical matrix [length(page_rows) × length(group_vars)] +# from .compute_cell_suppression(), or NULL when suppression is disabled. +# @return Numeric vector of length(page_rows), heights in inches. +#' @keywords internal +.compute_page_row_heights <- function(cell_h_mat, page_rows, resolved_cols, + group_vars, suppress_mat) { + n_pr <- length(page_rows) + if (n_pr == 0L) return(numeric(0L)) + + n_grp <- length(group_vars) + + col_names <- vapply(resolved_cols, function(cs) cs$col, character(1L)) + is_group <- vapply(resolved_cols, function(cs) isTRUE(cs$is_group_col), + logical(1L)) + + # Per-row max over non-group cells. Falls back to the full-matrix max + # if every column is somehow a group column (degenerate input). + non_group_idx <- which(!is_group) + ng_max <- if (length(non_group_idx) > 0L) { + apply(cell_h_mat[page_rows, non_group_idx, drop = FALSE], 1L, max) + } else { + apply(cell_h_mat[page_rows, , drop = FALSE], 1L, max) # nocov + } + row_h <- as.numeric(ng_max) + + if (n_grp == 0L) return(row_h) + group_col_idx <- match(group_vars, col_names) + + if (is.null(suppress_mat)) { + # No suppression: every group cell is rendered fully on its row, so + # group cells contribute to the row max alongside non-group cells. + for (g in seq_len(n_grp)) { + j_mat <- group_col_idx[[g]] + if (is.na(j_mat)) next # safety; shouldn't happen + row_h <- pmax(row_h, cell_h_mat[page_rows, j_mat]) + } + return(row_h) + } + + # Suppression on: group columns never inflate rows. For each span the + # label is amortised across the span and only grows the start row when + # the deficit is positive (so a multi-line label flows downward into + # the suppressed cells). Innermost-first lets outer spans borrow + # whatever growth inner spans already produced. + for (g in rev(seq_len(n_grp))) { + j_mat <- group_col_idx[[g]] + if (is.na(j_mat)) next # safety; shouldn't happen + + starts <- which(!suppress_mat[, g]) + if (length(starts) == 0L) next # entire column suppressed (shouldn't happen) + ends <- c(starts[-1L] - 1L, n_pr) + + for (s_idx in seq_along(starts)) { + ri_start <- starts[[s_idx]] + ri_end <- ends[[s_idx]] + label_h <- cell_h_mat[page_rows[[ri_start]], j_mat] + avail <- sum(row_h[ri_start:ri_end]) + if (label_h > avail + 1e-9) { + row_h[[ri_start]] <- row_h[[ri_start]] + (label_h - avail) + } + } + } + row_h } # --------------------------------------------------------------------------- @@ -85,60 +198,80 @@ measure_row_heights_tbl <- function(data, resolved_cols, gp_tbl, cell_padding, #' Split rows into pages, respecting group boundaries #' +#' Uses a per-page tentative recompute of `.compute_page_row_heights()` so that +#' span-aware row heights drive the page-fit decision. Two non-obvious +#' properties of this scheme are preserved by the algorithm: +#' +#' * Adding a row to an existing group-span on the current page may leave the +#' total page height unchanged (the span absorbs deficit that previously +#' inflated earlier rows), so more rows can fit than a per-row scalar sum +#' would predict. +#' * When only the first row of a multi-row group lands on the current page +#' (group orphan), that row's span on the page is length 1 and the row is +#' grown to fit the full label height. `committed_rh` snapshots heights +#' after each successful append, so the orphan-correct heights are what gets +#' flushed to the page spec. +#' #' @param data Data frame. -#' @param row_heights_in Numeric vector of row heights in inches. +#' @param cell_h_mat Per-cell height matrix from `measure_row_heights_tbl()`. +#' @param resolved_cols Full list of resolved column specs (used to identify +#' non-group columns). +#' @param group_vars Character vector of group column names. #' @param cont_row_h Height of a continuation-marker row in inches. #' @param header_row_h Height of the column header row (0 if suppressed). #' @param content_height_in Available content height per page. -#' @param group_vars Character vector of group column names. #' @param row_cont_msg Text for continuation-marker rows. -#' @param group_rule Logical — are group rules drawn? -#' @return A list of row-page specs (see internal structure below). +#' @param group_rule Logical — are group rules drawn? (Reserved for future +#' use; currently does not affect pagination because rules are 0-height.) +#' @param suppress_repeated_groups Logical, from `tbl$suppress_repeated_groups`. +#' @return A list of row-page specs, each with `$rows`, `$is_cont_top`, +#' `$is_cont_bottom`, `$group_starts`, and `$row_heights_in` (the committed +#' per-row heights for that page in inches). #' @keywords internal -paginate_rows <- function(data, row_heights_in, cont_row_h, header_row_h, - content_height_in, group_vars, row_cont_msg, - group_rule) { - n_rows <- nrow(data) - n_grp <- length(group_vars) +paginate_rows <- function(data, cell_h_mat, resolved_cols, group_vars, + cont_row_h, header_row_h, content_height_in, + row_cont_msg, group_rule, + suppress_repeated_groups = TRUE) { + n_rows <- nrow(data) - # Identify group boundaries: rows that start a new group + # Group boundaries in the *full* data — used for the page-spec $group_starts + # field that the drawing code consults for group-rule placement. group_starts <- .compute_group_starts(data, group_vars) - pages <- list() - cur_rows <- integer(0L) - cur_h <- header_row_h - # Rule heights: rules are drawn within existing row boundaries, 0 extra height - # (they render at the row boundary, not consuming additional space) + pages <- list() + cur_rows <- integer(0L) + committed_rh <- numeric(0L) # heights for cur_rows after last successful add + is_cont_top <- FALSE - flush_page <- function(rows, is_cont_top, is_cont_bottom) { + flush_page <- function(rows, row_heights_in, is_cont_top, is_cont_bottom) { pages[[length(pages) + 1L]] <<- list( rows = rows, is_cont_top = is_cont_top, is_cont_bottom = is_cont_bottom, - group_starts = intersect(group_starts, rows) + group_starts = intersect(group_starts, rows), + row_heights_in = row_heights_in ) } i <- 1L - is_cont_top <- FALSE # does this page start with a (continued) row? - while (i <= n_rows) { - rh <- row_heights_in[[i]] - - # Would this row fit? - extra_h <- rh - if (is_cont_top) extra_h <- extra_h # cont row already counted below - - needs_group_rule <- group_rule && i %in% group_starts && length(cur_rows) > 0L - # Group rule uses no additional height (drawn at boundary) - - # Does adding this row overflow? - if (cur_h + extra_h + cont_row_h > content_height_in + 1e-6 && length(cur_rows) > 0L) { + candidate <- c(cur_rows, i) + sup <- if (suppress_repeated_groups && length(group_vars) > 0L) { + .compute_cell_suppression(data, group_vars, candidate) + } else NULL + rh <- .compute_page_row_heights( + cell_h_mat, candidate, resolved_cols, group_vars, sup + ) + total <- header_row_h + + (if (is_cont_top) cont_row_h else 0) + + sum(rh) + + cont_row_h # reserve bottom continuation marker + if (total > content_height_in + 1e-6 && length(cur_rows) > 0L) { # Warn whenever a group is split across pages (row i and the last row - # on the current page belong to the same group) - if (length(group_vars) > 0L && length(cur_rows) > 0L) { - last_in_page <- cur_rows[length(cur_rows)] + # on the current page belong to the same group). + if (length(group_vars) > 0L) { + last_in_page <- cur_rows[[length(cur_rows)]] same_group <- all(vapply(group_vars, function(gv) { identical(data[[gv]][last_in_page], data[[gv]][i]) }, logical(1L))) @@ -150,23 +283,21 @@ paginate_rows <- function(data, row_heights_in, cont_row_h, header_row_h, } } - flush_page(cur_rows, is_cont_top, is_cont_bottom = TRUE) + flush_page(cur_rows, committed_rh, is_cont_top, is_cont_bottom = TRUE) - # Re-init next page with cont_top marker cur_rows <- integer(0L) - cur_h <- header_row_h + cont_row_h # top cont row + committed_rh <- numeric(0L) is_cont_top <- TRUE - - next # restart loop iteration to re-add row i + next # re-process row i on a fresh page } - cur_rows <- c(cur_rows, i) - cur_h <- cur_h + rh - i <- i + 1L + cur_rows <- candidate + committed_rh <- rh + i <- i + 1L } if (length(cur_rows) > 0L) { - flush_page(cur_rows, is_cont_top, is_cont_bottom = FALSE) + flush_page(cur_rows, committed_rh, is_cont_top, is_cont_bottom = FALSE) } pages diff --git a/R/table_utils.R b/R/table_utils.R index 85359a7..1884093 100644 --- a/R/table_utils.R +++ b/R/table_utils.R @@ -106,7 +106,8 @@ # Compute group sizes (number of rows per group) from data and group vars. # Returns a named integer vector: name = row index of group start (as string), -# value = number of rows in that group. +# value = number of rows in that group. Group is defined by the *full* +# group_vars vector — i.e. the most-specific (innermost) grouping. .compute_group_sizes <- function(data, group_vars) { if (length(group_vars) == 0L || nrow(data) == 0L) return(integer(0L)) all_starts <- .compute_group_starts(data, group_vars) @@ -115,6 +116,68 @@ stats::setNames(sizes, as.character(all_starts)) } +# Per-group-start metadata for group-rule visibility and width. +# +# Returns a list with two named integer vectors keyed by group_start row +# index (as string): +# +# $sizes — for each group_start row i, the size of the group at the +# *outermost* level that changed between rows i-1 and i (the +# count of rows in `data` whose group_vars[1..k] all match +# row i's values, where k is the outermost changing level). +# The first group_start is NA (no transition before it). +# $levels — the column index k (1-based, into group_vars) of the +# outermost changing level. NA for the first group_start. +# +# This differs from .compute_group_sizes(), which always uses the full +# group_vars vector for sizing. When a transition crosses an outer-group +# boundary but the new innermost group has only a single row (e.g. Cohort +# changes from 1 to 2 and Cohort 2 happens to start with a one-row Visit), +# .compute_group_sizes() returns 1 and the rule gets suppressed; here the +# outer Cohort group size is returned so a meaningful boundary still gets +# a rule. $levels lets callers draw a partial-width rule that starts at +# the changing column instead of always spanning the full table width. +# +# Used only when tbl$simplify_rowspan is TRUE. +.compute_group_rule_info <- function(data, group_vars) { + if (length(group_vars) == 0L || nrow(data) == 0L) { + return(list(sizes = integer(0L), levels = integer(0L))) + } + starts <- .compute_group_starts(data, group_vars) + sizes <- rep(NA_integer_, length(starts)) + levels <- rep(NA_integer_, length(starts)) + names(sizes) <- as.character(starts) + names(levels) <- as.character(starts) + if (length(starts) <= 1L) return(list(sizes = sizes, levels = levels)) + + for (idx in seq_along(starts)[-1L]) { + i <- starts[[idx]] + i_prev <- i - 1L + for (k in seq_along(group_vars)) { + if (!identical(data[[group_vars[[k]]]][[i_prev]], + data[[group_vars[[k]]]][[i]])) { + # Outermost changing level is k; count rows whose group_vars[1..k] + # all equal row i's values. + cols <- group_vars[seq_len(k)] + mask <- rep(TRUE, nrow(data)) + for (gv in cols) { + v <- data[[gv]] + target <- data[[gv]][[i]] + if (is.na(target)) { + mask <- mask & is.na(v) + } else { + mask <- mask & !is.na(v) & v == target + } + } + sizes[[idx]] <- sum(mask) + levels[[idx]] <- k + break + } + } + } + list(sizes = sizes, levels = levels) +} + # --------------------------------------------------------------------------- # String / cell formatting helpers # --------------------------------------------------------------------------- diff --git a/R/tfl_table.R b/R/tfl_table.R index b14ed51..b67eac0 100644 --- a/R/tfl_table.R +++ b/R/tfl_table.R @@ -113,7 +113,17 @@ tfl_colspec <- function(col, #' @param suppress_repeated_groups Logical. When `TRUE` (default), group column #' cells whose value equals the immediately preceding rendered row on the #' same page are left blank. The first data row on each page always shows -#' the group value. +#' the group value. When suppression is active, multi-line group labels +#' render HTML-`rowspan`-style: the label's full vertical extent flows +#' downward through the blanked cells below it instead of inflating the +#' labelled row alone. Row heights are computed per page (a row may +#' render at different heights on different pages when a group is split, +#' because the first row on a page re-shows the label and may need to +#' grow to fit it alone). `row_rule` lines that would slice through a +#' flowing label are suppressed, and `group_rule` lines start at the +#' first column whose group value is actually changing at that +#' boundary. Set `suppress_repeated_groups = FALSE` to render every +#' group cell on every row instead. #' @param sub_tfl Character vector of column names in `x`, or `NULL` (default). #' When non-NULL, the table is split into one sub-table per unique combination #' of values in these columns. Each sub-table's caption gains a suffix of the diff --git a/design/ARCHITECTURE.md b/design/ARCHITECTURE.md index 19a5984..d4f4574 100644 --- a/design/ARCHITECTURE.md +++ b/design/ARCHITECTURE.md @@ -148,27 +148,42 @@ export_tfl(x = tfl_table_obj, ...) [exported] │ paginate_cols(...) ├── [scratch device + outer_vp] measure heights: │ .measure_header_row_height() — table_utils.R - │ measure_row_heights_tbl() — table_rows.R + │ measure_row_heights_tbl() → cell_h_mat — table_rows.R + │ Per-cell height matrix [nrow × ncol]; each entry includes + │ the v_pad_in (top + bottom padding). This is the input to + │ the per-page row-height resolver below. │ .measure_cont_row_height() — table_utils.R ├── paginate_rows(...) — table_rows.R + │ Span-aware pagination: per-page tentative recompute via + │ .compute_page_row_heights() to drive page-fit decisions. Each + │ committed page spec carries $row_heights_in (orphan-correct). └── for rp × cg: build_table_grob(row_page, col_group_idx, — table_draw.R n_group_cols, resolved_cols, tbl, - row_heights_in, cont_row_h_in, + cell_heights_in_mat, cont_row_h_in, is_first_col_page, is_last_col_page) → gTree of class "tfl_table_grob" [grob passed as x$content to export_tfl_page()] drawDetails.tfl_table_grob(x, recording) — table_draw.R - ├── Reuse or recompute: header_row_h, cont_row_h, row_h_vec + ├── Reuse or recompute: header_row_h, cont_row_h, suppress_mat + ├── row_h_vec ← row_page$row_heights_in (committed by pagination), or + │ recompute via .compute_page_row_heights(cell_heights_in_mat, …). + ├── span_end_mat (per group column, per row) — last row index in the + │ span starting at each non-suppressed row; lets non-suppressed group + │ cells be drawn with a clip viewport spanning the full span height + │ so multi-line labels flow into suppressed rows below (rowspan-style). ├── Draw column header row (.draw_header_row) ├── Draw col_header_rule (grid.lines) ├── Draw top continuation row (.draw_cont_row) ├── for each data row: │ group rule before row (grid.lines) - │ draw each cell (.draw_cell_text) - │ row rule after row (grid.lines, if row_rule && not last) + │ draw each cell (.draw_cell_text); for non-suppressed + │ group cells whose span > 1 row, pass span_h instead of row_h + │ so the clip viewport extends over the whole span. + │ row rule after row (grid.lines), suppressed when row ri+1 + │ has any suppressed group column (rule would slice a label). ├── group_rule_after_last (grid.lines) ├── Draw bottom continuation row (.draw_cont_row) ├── Draw row_header_sep (grid.lines) @@ -330,11 +345,11 @@ export_tfl(x = list_of_table1, ...) [exported] | `R/reexports.R` | `%||%` from rlang | | `R/tfl_table.R` | `tfl_colspec()`, `tfl_table()`, `print.tfl_table()`, `.check_named_subset()` | | `R/table_columns.R` | `resolve_col_specs()`, `compute_col_widths()`, `.apply_col_wrapping()`, `paginate_cols()` | -| `R/table_rows.R` | `measure_row_heights_tbl()`, `paginate_rows()` | -| `R/table_draw.R` | `build_table_grob()`, `drawDetails.tfl_table_grob()`, `.draw_header_row()`, `.draw_cont_row()`, `.draw_cell_text()` | +| `R/table_rows.R` | `measure_row_heights_tbl()` (returns per-cell matrix), `.compute_page_row_heights()`, `paginate_rows()` | +| `R/table_draw.R` | `build_table_grob()`, `drawDetails.tfl_table_grob()`, `.compute_cell_suppression()`, `.draw_header_row()`, `.draw_cont_row()`, `.draw_cell_text()` | | `R/table_pagelist.R` | `tfl_table_to_pagelist()`, `compute_table_content_area()` | | `R/sub_tfl.R` | `.compute_sub_tfl_groups()`, `.format_sub_tfl_caption()`, `.apply_sub_tfl_caption()`, `.strip_sub_tfl_cols()`, `.resolve_col_label()` | -| `R/table_utils.R` | `.make_outer_vp()`, `.width_in()`, `.height_in()`, `.measure_header_row_height()`, `.measure_cont_row_height()`, `.gp_with_lineheight()`, `.compute_group_starts()`, `.compute_group_sizes()`, `.collect_col_strings()`, `.fmt_cell()`, `.fmt_cell_vec()`, `.measure_max_string_width()`, `.resolve_table_gp()`, `.resolve_table_cell_gp()`, `.default_align()`, `.wrap_text()` | +| `R/table_utils.R` | `.make_outer_vp()`, `.width_in()`, `.height_in()`, `.measure_header_row_height()`, `.measure_cont_row_height()`, `.gp_with_lineheight()`, `.compute_group_starts()`, `.compute_group_sizes()`, `.compute_group_rule_info()` (used when `simplify_rowspan = TRUE` for outer-level rule visibility and partial-width start column), `.collect_col_strings()`, `.fmt_cell()`, `.fmt_cell_vec()`, `.measure_max_string_width()`, `.resolve_table_gp()`, `.resolve_table_cell_gp()`, `.default_align()`, `.wrap_text()` | --- @@ -607,16 +622,37 @@ paginate_cols(col_indices, col_widths_in, group_col_indices, ``` measure_row_heights_tbl(data, resolved_cols, gp_tbl, cell_padding, na_string, line_height, max_measure_rows) - → numeric vector of length nrow(data), heights in inches + → numeric MATRIX [nrow(data) × length(resolved_cols)] of heights in inches + Each entry includes the v_pad_in (top + bottom padding) so that + per-row max(matrix[i, ]) is the row height when no spanning happens. Uses memoised string-height to avoid re-measuring repeated values. max_measure_rows: sample the longest rows to cap measurement cost. - -paginate_rows(data, row_heights, cont_row_h, header_row_h, - avail_h, group_vars, row_cont_msg, group_rule) + Non-sampled rows take the per-column max-of-sampled value. + +.compute_page_row_heights(cell_h_mat, page_rows, resolved_cols, + group_vars, suppress_mat) + → numeric vector of length(page_rows) + Span-aware per-page resolver. Initialises row_h[ri] = max over + non-group columns of cell_h_mat[page_rows[ri], col]. Then for each + group column from innermost (last) to outermost (first), finds spans + in suppress_mat and grows row_h[ri_start] by any deficit between the + label height (cell_h_mat[page_rows[ri_start], col_g]) and the sum of + the span's row heights. Innermost-first ensures outer groups can + borrow the space inner groups already pushed for. Early-exit when + no group columns or suppress_mat = NULL — returns per-row max over + *all* columns (no flow when suppression is disabled). + +paginate_rows(data, cell_h_mat, resolved_cols, group_vars, + cont_row_h, header_row_h, content_height_in, + row_cont_msg, group_rule, suppress_repeated_groups) → list of row_page structs: - { rows, is_cont_top, is_cont_bottom, group_starts } - Group-aware: tries to keep group blocks together; places - continuation-marker rows at page boundaries. + { rows, is_cont_top, is_cont_bottom, group_starts, row_heights_in } + Span-aware pagination via per-page tentative recompute: each candidate + row addition recomputes suppress_mat and .compute_page_row_heights for + c(cur_rows, i) and checks the total against content_height_in. When + overflow is detected, the previously committed (cur_rows, committed_rh) + pair is flushed — committed_rh captures the orphan-correct heights for + the row that landed alone at the page boundary. ``` --- diff --git a/design/DECISIONS.md b/design/DECISIONS.md index 19d9617..bf2d2e2 100644 --- a/design/DECISIONS.md +++ b/design/DECISIONS.md @@ -900,3 +900,137 @@ bug being fixed. Users who want the old behavior can pass grob check), `R/table_columns.R` (`compute_col_widths()` per-column + total-width checks), `R/table_pagelist.R` (thread the arg through), `R/export_tfl.R` (`@inheritDotParams`). + +--- + +## D-40: Group-label rowspan-style flow (issue #29) + +**Decision:** When a group column's value is multi-line, do not force its +row to be tall enough to fit the whole label. Instead let the label flow +through the suppressed (blanked) cells in the rows below it the same way +HTML `` reserves a single visually-spanning cell. + +The implementation has three parts: + +1. **Per-cell height matrix.** `measure_row_heights_tbl()` now returns a + `[nrow(data) × length(resolved_cols)]` matrix of cell heights instead + of a per-row scalar vector. Each entry includes `v_pad_in` so the + per-row max equals the row height when no spanning happens. + +2. **Span-aware per-page resolver `.compute_page_row_heights()`.** Given + the matrix, the rows on a page, the resolved columns, the group + variable list, and the per-page suppression matrix, walk group columns + from innermost to outermost and grow the first row of each span by any + deficit between the label height and the sum of the span's row + heights. Innermost-first lets outer spans borrow inner-pass growth. + First-row growth matches the label's top-anchored alignment in + `.draw_cell_text()`. + +3. **Per-page tentative recompute in `paginate_rows()`.** The fit check + uses `sum(.compute_page_row_heights(c(cur_rows, i), …))` rather than a + running scalar accumulator. This is required because span heights are + non-monotone in row count (adding a row to an open span can leave the + total unchanged or even shrink earlier-row contributions as the span's + `avail` grows), and because the orphan case — when only the first row + of a multi-row group lands on the current page — must size that row + to fit the full label by itself. `committed_rh` snapshots heights + after each successful append so the flush at overflow uses the + orphan-correct heights. + +**User need (from issue #29):** "If there is a grouping column in a table +which will have empty rows under it, and the grouping column has multiple +rows of text, do not reserve space for the grouping column more than is +required. Allow it to flow into the empty space below like a rowspan." +Plus: "if there is a grouping column on one page and different behavior +on the next page... the handling of the reserved height for column A will +differ between the pages." + +**Default behaviour whenever suppression is active.** The four +behavioural changes below all turn on together whenever +`suppress_repeated_groups = TRUE` (the package default). No separate +opt-in flag — if the user has asked for suppression, they have asked +for the behaviour that makes suppression visually coherent: +1. span-aware row heights — group columns never inflate row heights; + multi-line labels flow downward through the blanked cells below; +2. row-rule suppression within a multi-row span — a horizontal line + that would slice a flowing label is skipped; +3. partial-width group rules — the rule line starts at the column for + the outermost group_var level that actually changed at the + transition, so unchanged outer columns through which the label is + flowing aren't visually divided; +4. group rules drawn at *every* transition — the historical "skip rule + when the new innermost group has size 1" check is bypassed because + partial widths and label-flow alignment make single-row transitions + visually unambiguous. + +**Opt-out via `suppress_repeated_groups = FALSE`.** Setting suppression +itself to `FALSE` reverts to the strict per-row layout: every group +cell renders fully on every row and each row's height is the per-row +max over every cell. Group rules also revert to full-width. This is +the only "off switch" — the design treats the rowspan flow as the +natural visual rendering of suppression, not a separate feature. + +An earlier iteration of this branch added a `simplify_rowspan` flag +defaulting to `FALSE` (opt-in for the flow). After review feedback +that the row-height behaviour should be the default whenever +suppression is on, the flag was removed: keeping it bifurcated the +mental model into three modes when one suffices. + +**Suppression-aware row rule.** The `row_rule` between data rows is +suppressed when the next row is part of a multi-row group span (any +suppressed group column on row `ri+1`). A horizontal line slicing +through a label that flows downward would visually fragment it; HTML +rowspan also has no internal borders. Group rules and +`group_rule_after_last` are unaffected because they only fire at group +boundaries (which are also span boundaries). + +**Partial-width group rules.** The group rule line starts at the +column corresponding to the *outermost* group-var level that actually +changed at the transition, not at column 1. A new helper +`.compute_group_rule_info()` returns both the size and the +outermost-changing level per group_start; drawing reads the level to +set the line's left edge. Concrete result for nested +`group_vars = c("Cohort", "Visit")`: + +| transition | outermost changer | rule columns | +| ---------- | ----------------- | ------------------- | +| Visit only | Visit (level 2) | Visit, Value | +| Cohort | Cohort (level 1) | Cohort, Visit, Value| + +Partial-width rules apply regardless of `suppress_repeated_groups`, +because the rule semantically marks a change at the outermost-changing +level whether or not the unchanged levels' cells are suppressed. + +**Alternatives considered and rejected:** +- *Distribute deficit evenly across rows of the span* — leaves wasted + space below the (top-anchored) label in early rows. First-row growth + is both visually minimal and consistent with the alignment. +- *Vertically centre labels in the span* — aesthetic change orthogonal + to the height-management problem. Not in scope; can be added later + via a `tfl_table` argument if a use case appears. +- *Span-aware row-fill rectangles* — currently each row paints its own + background. Painting a multi-row block under a span would be visually + consistent with the label flow but conflicts with stripe shading + (`fill_by = "row"`). Out of scope; the per-row stripe is consistent + with the body cells still being one-row each. +- *Cache only the per-page committed heights, not the full matrix* — + pagination needs the matrix for its tentative recompute, drawing + needs it for the fallback path. Caching both the matrix on the grob + and the committed heights on each page spec is cheap (matrices are + small) and lets each consumer pick whichever is cheaper. + +**Files touched:** +- Modified: `R/table_rows.R` (matrix output, `.compute_page_row_heights()`, + span-aware `paginate_rows()`); `R/table_draw.R` (per-page row-h source, + span-end matrix, span-aware clipping height in `.draw_cell_text()` calls, + row-rule suppression predicate, renamed `cell_heights_in_mat` cache); + `R/table_pagelist.R` (pass the matrix and `suppress_repeated_groups`). +- New tests: `tests/testthat/test-row_span.R` exercising the algorithm, + pagination's free-row property, the orphan case, the per-page reset, and + end-to-end rendering. + +**Backward compatibility:** no exported API changes. Existing tables +that did not use multi-line group labels render identically (same row +heights, same pagination). Tables that did use them now render in less +vertical space, which may *increase* the number of rows on a page (and +correspondingly *decrease* the total page count). diff --git a/design/DESIGN.md b/design/DESIGN.md index 3d0e6fe..484b212 100644 --- a/design/DESIGN.md +++ b/design/DESIGN.md @@ -172,14 +172,57 @@ final output) without writing to disk. --- -## Why store `row_heights_in` and `cont_row_h_in` in the gTree? +## Why store `cell_heights_in_mat` and `cont_row_h_in` in the gTree? The `drawDetails` method is called by `grid` at render time, potentially long -after paginate time. Pre-computing row heights during pagination (when a +after paginate time. Pre-computing cell heights during pagination (when a scratch device is already open) and caching them in the grob avoids opening another device at draw time and ensures layout consistency: the heights used for pagination and the heights used for drawing are identical. +The grob caches the *full* per-cell height matrix rather than per-row +scalars because the per-row height for a given page depends on which other +rows are on that page (suppression resets per page; multi-row group spans +absorb deficit jointly). Each page spec separately carries its committed +`row_heights_in` so drawing reads the exact same heights pagination decided +on, while the matrix supports the fallback path if that cache is missing. + +--- + +## Why a per-cell height matrix instead of per-row scalars? + +Issue #29 introduced HTML-`rowspan`-style flow for multi-line group labels. +The label of a group column should be allowed to flow downward through the +suppressed cells beneath it, so a 2-line label spanning two single-line +rows costs 2 lines, not 3. Implementing that requires answering, for any +(row, column) pair, "what is the natural height of this cell ignoring its +neighbours?" — i.e. a per-cell measurement. Per-row scalars cannot +represent this without losing the column dimension. + +The matrix also lets the per-page row-height resolver recompute heights +when suppression boundaries shift between pages (e.g. when a group is +split across pages and the label re-appears on the second page), without +re-doing the expensive `grobHeight()` measurements. + +--- + +## Why innermost-group first in `.compute_page_row_heights()`? + +Outer-group spans are always supersets of (or equal to) inner-group spans +because `.compute_cell_suppression()` resets the inner `last_val` whenever +any outer column changes. Processing inner spans first means the row +heights already absorb whatever growth the inner labels demanded by the +time outer spans are evaluated; the outer label "borrows" any extra space +the inner pass added. Reversing the order would compute outer growth +against pre-inner heights and then over-grow when inner labels later need +more space. + +The deficit always lands on the *first row* of the span because +`.draw_cell_text()` anchors labels to the top-of-cell (`just = c(.., "top")`). +Growing a later row in the span would not make the top-anchored label any +more visible — extra space would simply appear below the label inside an +already-drawn row. + --- ## Why use rotated side labels for `col_cont_msg` instead of `footer_center`? diff --git a/man/build_table_grob.Rd b/man/build_table_grob.Rd index 862dc1a..eced8aa 100644 --- a/man/build_table_grob.Rd +++ b/man/build_table_grob.Rd @@ -10,7 +10,7 @@ build_table_grob( n_group_cols, resolved_cols, tbl, - row_heights_in = NULL, + cell_heights_in_mat = NULL, cont_row_h_in = NULL, is_first_col_page = TRUE, is_last_col_page = TRUE diff --git a/man/measure_row_heights_tbl.Rd b/man/measure_row_heights_tbl.Rd index 22313ce..e67ecc9 100644 --- a/man/measure_row_heights_tbl.Rd +++ b/man/measure_row_heights_tbl.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/table_rows.R \name{measure_row_heights_tbl} \alias{measure_row_heights_tbl} -\title{Measure the rendered height of each data row in inches} +\title{Measure the rendered height of each table cell in inches} \usage{ measure_row_heights_tbl( data, @@ -14,11 +14,22 @@ measure_row_heights_tbl( max_measure_rows ) } +\arguments{ +\item{max_measure_rows}{Maximum number of rows to measure individually. +Non-sampled rows take the per-column max of the sampled rows (a +conservative estimate that mirrors prior behaviour).} +} \value{ -Numeric vector of row heights in inches (length = nrow(data)). +Numeric matrix \verb{[nrow(data) × length(resolved_cols)]} of inches. } \description{ Must be called while a viewport is active. Uses a memoised string-height function to avoid re-measuring repeated values. } +\details{ +Returns a matrix of cell heights (rows = data rows, cols = resolved_cols). +Each entry is the rendered height of that cell in inches, \strong{including the +top + bottom cell padding (\code{v_pad_in})} so that the per-row height is +simply \code{max(cell_h_mat[i, ])} without further adjustment. +} \keyword{internal} diff --git a/man/paginate_rows.Rd b/man/paginate_rows.Rd index 501a9e5..8b47a7a 100644 --- a/man/paginate_rows.Rd +++ b/man/paginate_rows.Rd @@ -6,19 +6,26 @@ \usage{ paginate_rows( data, - row_heights_in, + cell_h_mat, + resolved_cols, + group_vars, cont_row_h, header_row_h, content_height_in, - group_vars, row_cont_msg, - group_rule + group_rule, + suppress_repeated_groups = TRUE ) } \arguments{ \item{data}{Data frame.} -\item{row_heights_in}{Numeric vector of row heights in inches.} +\item{cell_h_mat}{Per-cell height matrix from \code{measure_row_heights_tbl()}.} + +\item{resolved_cols}{Full list of resolved column specs (used to identify +non-group columns).} + +\item{group_vars}{Character vector of group column names.} \item{cont_row_h}{Height of a continuation-marker row in inches.} @@ -26,16 +33,34 @@ paginate_rows( \item{content_height_in}{Available content height per page.} -\item{group_vars}{Character vector of group column names.} - \item{row_cont_msg}{Text for continuation-marker rows.} -\item{group_rule}{Logical — are group rules drawn?} +\item{group_rule}{Logical — are group rules drawn? (Reserved for future +use; currently does not affect pagination because rules are 0-height.)} + +\item{suppress_repeated_groups}{Logical, from \code{tbl$suppress_repeated_groups}.} } \value{ -A list of row-page specs (see internal structure below). +A list of row-page specs, each with \verb{$rows}, \verb{$is_cont_top}, +\verb{$is_cont_bottom}, \verb{$group_starts}, and \verb{$row_heights_in} (the committed +per-row heights for that page in inches). } \description{ -Split rows into pages, respecting group boundaries +Uses a per-page tentative recompute of \code{.compute_page_row_heights()} so that +span-aware row heights drive the page-fit decision. Two non-obvious +properties of this scheme are preserved by the algorithm: +} +\details{ +\itemize{ +\item Adding a row to an existing group-span on the current page may leave the +total page height unchanged (the span absorbs deficit that previously +inflated earlier rows), so more rows can fit than a per-row scalar sum +would predict. +\item When only the first row of a multi-row group lands on the current page +(group orphan), that row's span on the page is length 1 and the row is +grown to fit the full label height. \code{committed_rh} snapshots heights +after each successful append, so the orphan-correct heights are what gets +flushed to the page spec. +} } \keyword{internal} diff --git a/man/tfl_table.Rd b/man/tfl_table.Rd index 580d884..85c8079 100644 --- a/man/tfl_table.Rd +++ b/man/tfl_table.Rd @@ -75,7 +75,17 @@ Default \code{FALSE}.} \item{suppress_repeated_groups}{Logical. When \code{TRUE} (default), group column cells whose value equals the immediately preceding rendered row on the same page are left blank. The first data row on each page always shows -the group value.} +the group value. When suppression is active, multi-line group labels +render HTML-\code{rowspan}-style: the label's full vertical extent flows +downward through the blanked cells below it instead of inflating the +labelled row alone. Row heights are computed per page (a row may +render at different heights on different pages when a group is split, +because the first row on a page re-shows the label and may need to +grow to fit it alone). \code{row_rule} lines that would slice through a +flowing label are suppressed, and \code{group_rule} lines start at the +first column whose group value is actually changing at that +boundary. Set \code{suppress_repeated_groups = FALSE} to render every +group cell on every row instead.} \item{sub_tfl}{Character vector of column names in \code{x}, or \code{NULL} (default). When non-NULL, the table is split into one sub-table per unique combination diff --git a/tests/testthat/test-row_span.R b/tests/testthat/test-row_span.R new file mode 100644 index 0000000..47e768e --- /dev/null +++ b/tests/testthat/test-row_span.R @@ -0,0 +1,272 @@ +# test-row_span.R — HTML-rowspan-style flow for multi-line group labels +# +# Issue #29: a multi-line value in a group column should not force its row to +# be tall enough to fit all label lines. Instead, the label flows into the +# suppressed (blanked) cells in the rows below it, the same way HTML +# reserves a single cell that visually spans N rows. +# +# This is the default behaviour whenever suppression is active +# (suppress_repeated_groups = TRUE). When suppression is itself off, every +# group cell renders fully on every row and the per-row max over all cells +# is used (the historical layout). + +# ---- helpers --------------------------------------------------------------- + +# Build a minimal resolved_cols list for .compute_page_row_heights(). +.spec <- function(col, is_group_col = FALSE) { + list(col = col, is_group_col = is_group_col) +} + +# ---- .compute_page_row_heights() ------------------------------------------- + +test_that("two-row span: label flows; both rows stay at non-group height", { + cell_h_mat <- matrix(c(2, 2, # column A (label = 2 lines) + 1, 1), # column B (1 line each) + nrow = 2, byrow = FALSE) + resolved_cols <- list(.spec("A", is_group_col = TRUE), .spec("B")) + suppress_mat <- matrix(c(FALSE, TRUE), nrow = 2, ncol = 1, + dimnames = list(NULL, "A")) + + row_h <- writetfl:::.compute_page_row_heights( + cell_h_mat, page_rows = 1:2, resolved_cols, + group_vars = "A", suppress_mat = suppress_mat + ) + expect_equal(row_h, c(1, 1)) +}) + +test_that("single-row span: row grows to fit the multi-line label", { + cell_h_mat <- matrix(c(2, 1), nrow = 1, byrow = FALSE) + resolved_cols <- list(.spec("A", is_group_col = TRUE), .spec("B")) + suppress_mat <- matrix(FALSE, nrow = 1, ncol = 1, + dimnames = list(NULL, "A")) + + row_h <- writetfl:::.compute_page_row_heights( + cell_h_mat, page_rows = 1L, resolved_cols, + group_vars = "A", suppress_mat = suppress_mat + ) + expect_equal(row_h, 2) +}) + +test_that("nested groups: inner span absorbed first, outer borrows remainder", { + cell_h_mat <- matrix(c(2, 2, 2, # A (outer) + 2, 2, 1, # B (inner) + 1, 1, 1), # C + nrow = 3, byrow = FALSE) + resolved_cols <- list( + .spec("A", is_group_col = TRUE), + .spec("B", is_group_col = TRUE), + .spec("C") + ) + suppress_mat <- matrix(c(FALSE, TRUE, TRUE, + FALSE, TRUE, FALSE), + nrow = 3, ncol = 2, + dimnames = list(NULL, c("A", "B"))) + row_h <- writetfl:::.compute_page_row_heights( + cell_h_mat, page_rows = 1:3, resolved_cols, + group_vars = c("A", "B"), suppress_mat = suppress_mat + ) + expect_equal(row_h, c(1, 1, 1)) +}) + +test_that("nested groups: inner growth feeds outer span availability", { + cell_h_mat <- matrix(c(5, 5, 5, # A + 4, 4, 4, # B + 1, 1, 1), # C + nrow = 3, byrow = FALSE) + resolved_cols <- list( + .spec("A", is_group_col = TRUE), + .spec("B", is_group_col = TRUE), + .spec("C") + ) + suppress_mat <- matrix(c(FALSE, TRUE, TRUE, + FALSE, TRUE, TRUE), + nrow = 3, ncol = 2, + dimnames = list(NULL, c("A", "B"))) + row_h <- writetfl:::.compute_page_row_heights( + cell_h_mat, page_rows = 1:3, resolved_cols, + group_vars = c("A", "B"), suppress_mat = suppress_mat + ) + expect_equal(row_h, c(3, 1, 1)) +}) + +test_that("suppress_mat = NULL: every cell counts (per-row max over all)", { + # Suppression disabled — every group cell renders fully on every row, so + # the row max is taken over all cells (group and non-group alike). + cell_h_mat <- matrix(c(2, 2, # A (group, but rendered every row) + 1, 1), # B + nrow = 2, byrow = FALSE) + resolved_cols <- list(.spec("A", is_group_col = TRUE), .spec("B")) + expect_equal( + writetfl:::.compute_page_row_heights( + cell_h_mat, page_rows = 1:2, resolved_cols, + group_vars = "A", suppress_mat = NULL + ), + c(2, 2) + ) +}) + +test_that("no group_vars: per-row max over all columns", { + cell_h_mat <- matrix(c(3, 1, + 1, 4), + nrow = 2, byrow = FALSE) + resolved_cols <- list(.spec("A"), .spec("B")) + row_h <- writetfl:::.compute_page_row_heights( + cell_h_mat, page_rows = 1:2, resolved_cols, + group_vars = character(0L), suppress_mat = NULL + ) + expect_equal(row_h, c(3, 4)) +}) + +test_that("zero-row page returns numeric(0)", { + expect_equal( + writetfl:::.compute_page_row_heights( + matrix(numeric(0L), nrow = 0L, ncol = 2L), + page_rows = integer(0L), + resolved_cols = list(.spec("A", is_group_col = TRUE), .spec("B")), + group_vars = "A", suppress_mat = NULL + ), + numeric(0L) + ) +}) + +# ---- paginate_rows() — span-aware fit + orphan handling -------------------- + +test_that("3-row group, 3-line label fits on one page (free-row property)", { + # Adding rows 2 and 3 to the open span doesn't change the page total — + # the span absorbs the deficit that initially inflated row 1. + data <- data.frame(grp = rep("L1\nL2\nL3", 3L), + val = c("a", "b", "c"), + stringsAsFactors = FALSE) + cell_h_mat <- matrix(c(3, 3, 3, # grp + 1, 1, 1), # val + nrow = 3, byrow = FALSE) + resolved_cols <- list(.spec("grp", is_group_col = TRUE), .spec("val")) + + pages <- writetfl:::paginate_rows( + data, cell_h_mat, resolved_cols, + group_vars = "grp", + cont_row_h = 0, header_row_h = 0, + content_height_in = 3, + row_cont_msg = c("(continued above)", "(continued below)"), + group_rule = FALSE + ) + expect_length(pages, 1L) + expect_equal(pages[[1L]]$rows, 1:3) + expect_equal(pages[[1L]]$row_heights_in, c(1, 1, 1)) +}) + +test_that("group orphan: lone first-row on a page grows to fit full label", { + # 4-row group with label = 4 lines, val = 1 line for rows 1-3 and 2 lines + # for row 4. Adding row 4 to the [1..3] page would push the total over + # the budget, so row 4 gets flushed to its own page where, as a single- + # row span, it must grow to fit the 4-line label. + data <- data.frame(grp = rep("L1\nL2\nL3\nL4", 4L), + val = c("a", "b", "c", "d\ne"), + stringsAsFactors = FALSE) + cell_h_mat <- matrix(c(4, 4, 4, 4, # grp + 1, 1, 1, 2), # val + nrow = 4, byrow = FALSE) + resolved_cols <- list(.spec("grp", is_group_col = TRUE), .spec("val")) + + warns <- character(0) + pages <- withCallingHandlers( + writetfl:::paginate_rows( + data, cell_h_mat, resolved_cols, + group_vars = "grp", + cont_row_h = 1, header_row_h = 0, + content_height_in = 5, + row_cont_msg = c("(continued above)", "(continued below)"), + group_rule = FALSE + ), + warning = function(w) { + warns <<- c(warns, conditionMessage(w)); invokeRestart("muffleWarning") + } + ) + expect_true(any(grepl("continued", warns))) + expect_length(pages, 2L) + expect_equal(pages[[1L]]$rows, 1:3) + expect_equal(pages[[1L]]$row_heights_in, c(2, 1, 1)) + expect_equal(pages[[2L]]$rows, 4L) + expect_equal(pages[[2L]]$row_heights_in, 4) +}) + +test_that("pagination reset per page: orphan re-shows the label and grows", { + # The same data renders at different row heights on different pages + # because suppression resets at every page boundary. + data <- data.frame(grp = rep("L1\nL2\nL3\nL4", 4L), + val = c("a", "b", "c", "d\ne"), + stringsAsFactors = FALSE) + cell_h_mat <- matrix(c(4, 4, 4, 4, + 1, 1, 1, 2), + nrow = 4, byrow = FALSE) + resolved_cols <- list(.spec("grp", is_group_col = TRUE), .spec("val")) + + pages <- suppressWarnings(writetfl:::paginate_rows( + data, cell_h_mat, resolved_cols, + group_vars = "grp", + cont_row_h = 1, header_row_h = 0, + content_height_in = 5, + row_cont_msg = c("(continued above)", "(continued below)"), + group_rule = FALSE + )) + expect_equal(pages[[2L]]$row_heights_in, 4) +}) + +test_that("paginate_rows: suppress_repeated_groups = FALSE inflates every row", { + # If suppression is itself disabled, every group cell renders on every + # row, so each row inflates to label height. This is the strict + # historical layout and the only way to opt out of the rowspan flow. + data <- data.frame(grp = rep("L1\nL2\nL3", 3L), + val = c("a", "b", "c"), + stringsAsFactors = FALSE) + cell_h_mat <- matrix(c(3, 3, 3, + 1, 1, 1), + nrow = 3, byrow = FALSE) + resolved_cols <- list(.spec("grp", is_group_col = TRUE), .spec("val")) + + pages <- writetfl:::paginate_rows( + data, cell_h_mat, resolved_cols, + group_vars = "grp", + cont_row_h = 0, header_row_h = 0, + content_height_in = 99, + row_cont_msg = c("(continued above)", "(continued below)"), + group_rule = FALSE, + suppress_repeated_groups = FALSE + ) + expect_equal(pages[[1L]]$row_heights_in, c(3, 3, 3)) +}) + +# ---- end-to-end via export_tfl() ------------------------------------------- + +test_that("end-to-end: user's two-page rowspan example renders without error", { + df <- dplyr::group_by( + data.frame(A = rep("B\nC", 3L), + D = c("E", "F", "G"), + stringsAsFactors = FALSE), + A + ) + tbl <- tfl_table(df) + f <- tempfile(fileext = ".pdf") + on.exit(unlink(f)) + expect_no_error( + suppressWarnings( # the deliberate group split fires the (continued) warn + export_tfl(tbl, file = f, pg_width = 11, pg_height = 8.5, + min_content_height = grid::unit(0.5, "inches")) + ) + ) + expect_true(file.exists(f)) + expect_gt(file.info(f)$size, 0) +}) + +test_that("end-to-end: row_rule with a multi-row span renders without error", { + df <- dplyr::group_by( + data.frame(A = c("X\nY", "X\nY", "X\nY"), + B = c("p", "q", "r"), + stringsAsFactors = FALSE), + A + ) + tbl <- tfl_table(df, row_rule = TRUE) + f <- tempfile(fileext = ".pdf") + on.exit(unlink(f)) + expect_no_error(export_tfl(tbl, file = f)) +}) diff --git a/tests/testthat/test-table_draw.R b/tests/testthat/test-table_draw.R index 371fdc1..94b3a4f 100644 --- a/tests/testthat/test-table_draw.R +++ b/tests/testthat/test-table_draw.R @@ -2,12 +2,14 @@ library(dplyr, warn.conflicts = FALSE) -# drawDetails — uncached height fallback (lines 112-141) ---------------------- +# drawDetails — uncached height fallback -------------------------------------- # -# build_table_grob() accepts row_heights_in = NULL and cont_row_h_in = NULL. -# When the grob is drawn, drawDetails falls back to recomputing those heights -# on the fly. The test exercises both fallback branches and, via a wrap- -# eligible column, also the .wrap_text branch inside the row-height loop. +# build_table_grob() accepts cell_heights_in_mat = NULL and cont_row_h_in = NULL. +# When the grob is drawn, drawDetails falls back to building a per-page cell +# matrix on the fly and applying .compute_page_row_heights(). The row_page +# also lacks $row_heights_in, forcing the recompute path. The test exercises +# both fallback branches and, via a wrap-eligible column, also the .wrap_text +# branch inside the per-cell measurement loop. test_that("drawDetails recomputes cont_row_h and row_h_vec when not cached", { df <- data.frame( @@ -34,16 +36,17 @@ test_that("drawDetails recomputes cont_row_h and row_h_vec when not cached", { is_cont_top = TRUE, # forces the uncached cont_row_h branch is_cont_bottom = FALSE, group_starts = integer(0L) + # row_heights_in deliberately omitted to force the recompute path ) grob <- writetfl:::build_table_grob( - row_page = row_page, - col_group_idx = seq_along(resolved), - n_group_cols = 0L, - resolved_cols = resolved, - tbl = tbl, - row_heights_in = NULL, # force uncached row-height path - cont_row_h_in = NULL # force uncached cont-row-height path + row_page = row_page, + col_group_idx = seq_along(resolved), + n_group_cols = 0L, + resolved_cols = resolved, + tbl = tbl, + cell_heights_in_mat = NULL, # force the per-page fallback measurement + cont_row_h_in = NULL # force uncached cont-row-height path ) f <- tempfile(fileext = ".pdf") diff --git a/tests/testthat/test-table_utils.R b/tests/testthat/test-table_utils.R index bffd6eb..89129f4 100644 --- a/tests/testthat/test-table_utils.R +++ b/tests/testthat/test-table_utils.R @@ -50,6 +50,60 @@ test_that(".compute_group_sizes returns integer(0) when group_vars is empty", { expect_equal(result, integer(0L)) }) +# .compute_group_rule_info() -------------------------------------------------- + +test_that(".compute_group_rule_info returns NA size and level for the first group_start", { + df <- data.frame(g = c("A", "A", "B"), stringsAsFactors = FALSE) + res <- writetfl:::.compute_group_rule_info(df, "g") + expect_equal(unname(res$sizes[[1L]]), NA_integer_) + expect_equal(unname(res$levels[[1L]]), NA_integer_) +}) + +test_that(".compute_group_rule_info reports outer level + size when outer changes", { + # Two-level group: Cohort, Visit. Cohort 1 has 2 visits (each 2 rows); + # Cohort 2 has 2 visits (each 1 row). At the boundary between the last + # Cohort 1 row and the first Cohort 2 row, the inner (Cohort=2, Baseline) + # group has 1 row but the outer Cohort=2 group has 2 rows. + df <- data.frame( + Cohort = c(1, 1, 1, 1, 2, 2), + Visit = c("A", "A", "B", "B", "A", "B"), + stringsAsFactors = FALSE + ) + res <- writetfl:::.compute_group_rule_info(df, c("Cohort", "Visit")) + # group_starts are rows c(1, 3, 5, 6). + expect_equal(names(res$sizes), c("1", "3", "5", "6")) + expect_equal(names(res$levels), c("1", "3", "5", "6")) + expect_equal(unname(res$sizes), + c(NA_integer_, # row 1: no transition before + 2L, # row 3: Visit changed within Cohort 1 → (Cohort=1, Visit=B) has 2 rows + 2L, # row 5: Cohort changed → (Cohort=2) outer group has 2 rows + 1L)) # row 6: Visit changed within Cohort 2 → (Cohort=2, Visit=B) has 1 row + expect_equal(unname(res$levels), + c(NA_integer_, # row 1: no transition before + 2L, # Visit (level 2) is outermost changer + 1L, # Cohort (level 1) is outermost changer + 2L)) # Visit (level 2) is outermost changer +}) + +test_that(".compute_group_rule_info single-level: level = 1 always", { + df <- data.frame(g = c("A", "A", "B", "C", "C", "C"), stringsAsFactors = FALSE) + res <- writetfl:::.compute_group_rule_info(df, "g") + expect_equal(names(res$sizes), c("1", "3", "4")) + expect_equal(unname(res$sizes), c(NA_integer_, 1L, 3L)) + expect_equal(unname(res$levels), c(NA_integer_, 1L, 1L)) +}) + +test_that(".compute_group_rule_info returns integer(0) fields for empty inputs", { + res1 <- writetfl:::.compute_group_rule_info( + data.frame(g = character(0L), stringsAsFactors = FALSE), "g" + ) + expect_equal(res1$sizes, integer(0L)) + expect_equal(res1$levels, integer(0L)) + res2 <- writetfl:::.compute_group_rule_info(data.frame(a = 1:3), character(0L)) + expect_equal(res2$sizes, integer(0L)) + expect_equal(res2$levels, integer(0L)) +}) + # .collect_col_strings() ------------------------------------------------------ test_that(".collect_col_strings truncates to max_rows unique strings", { diff --git a/tests/testthat/test-tfl_table.R b/tests/testthat/test-tfl_table.R index ee72d7c..4704f9f 100644 --- a/tests/testthat/test-tfl_table.R +++ b/tests/testthat/test-tfl_table.R @@ -381,14 +381,22 @@ make_row_page_inputs <- function(n = 5, group_every = NULL) { value = seq_len(n), stringsAsFactors = FALSE ) - list(data = data, heights = rep(1, n)) + # New paginate_rows takes a per-cell matrix and resolved_cols. + # All-uniform 1-inch cells reproduce the prior "heights = rep(1, n)" behaviour. + cell_h_mat <- matrix(1, nrow = n, ncol = 2L) + resolved_cols <- list( + list(col = "grp", is_group_col = FALSE), + list(col = "value", is_group_col = FALSE) + ) + list(data = data, cell_h_mat = cell_h_mat, resolved_cols = resolved_cols) } test_that("paginate_rows fits all rows on one page", { inp <- make_row_page_inputs(3) - pages <- paginate_rows(inp$data, inp$heights, cont_row_h = 0.5, - header_row_h = 0.5, content_height_in = 10, + pages <- paginate_rows(inp$data, inp$cell_h_mat, inp$resolved_cols, group_vars = character(0L), + cont_row_h = 0.5, header_row_h = 0.5, + content_height_in = 10, row_cont_msg = "(continued)", group_rule = FALSE) expect_length(pages, 1L) expect_equal(pages[[1L]]$rows, 1:3) @@ -398,9 +406,10 @@ test_that("paginate_rows fits all rows on one page", { test_that("paginate_rows splits across pages", { inp <- make_row_page_inputs(5) - pages <- paginate_rows(inp$data, inp$heights, cont_row_h = 0.2, - header_row_h = 0.5, content_height_in = 3, + pages <- paginate_rows(inp$data, inp$cell_h_mat, inp$resolved_cols, group_vars = character(0L), + cont_row_h = 0.2, header_row_h = 0.5, + content_height_in = 3, row_cont_msg = "(continued)", group_rule = FALSE) expect_gt(length(pages), 1L) # All row indices covered exactly once @@ -410,9 +419,10 @@ test_that("paginate_rows splits across pages", { test_that("paginate_rows marks cont_top/cont_bottom on splits", { inp <- make_row_page_inputs(5) - pages <- paginate_rows(inp$data, inp$heights, cont_row_h = 0.2, - header_row_h = 0.5, content_height_in = 2.5, + pages <- paginate_rows(inp$data, inp$cell_h_mat, inp$resolved_cols, group_vars = character(0L), + cont_row_h = 0.2, header_row_h = 0.5, + content_height_in = 2.5, row_cont_msg = "(continued)", group_rule = FALSE) if (length(pages) >= 2L) { expect_true(pages[[1L]]$is_cont_bottom) @@ -424,14 +434,19 @@ test_that("paginate_rows warns when a group spans multiple pages", { data <- data.frame(grp = c("A","A","A","A","A"), val = 1:5, stringsAsFactors = FALSE) gdf <- dplyr::group_by(data, grp) - heights <- rep(1, 5) + cell_h_mat <- matrix(1, nrow = 5, ncol = 2L) + resolved_cols <- list( + list(col = "grp", is_group_col = TRUE), + list(col = "val", is_group_col = FALSE) + ) # Multiple page breaks may fire multiple warnings; capture all and check # at least one matches warns <- character(0) withCallingHandlers( - paginate_rows(gdf, heights, cont_row_h = 0.2, - header_row_h = 0.5, content_height_in = 3, + paginate_rows(gdf, cell_h_mat, resolved_cols, group_vars = "grp", + cont_row_h = 0.2, header_row_h = 0.5, + content_height_in = 3, row_cont_msg = "(continued)", group_rule = FALSE), warning = function(w) { warns <<- c(warns, conditionMessage(w))