diff --git a/R/annotate_gg_km.R b/R/annotate_gg_km.R index 899acee2..16b79ca5 100644 --- a/R/annotate_gg_km.R +++ b/R/annotate_gg_km.R @@ -46,15 +46,16 @@ #' @seealso [gg_km()], [process_survfit()], and [get_cox_pairwise_df()] for #' related functionalities. #' -#' @examples +#' @examplesIf requireNamespace("coin", quietly = TRUE) #' # Preparing the Kaplan-Meier Plot -#' use_lung <- survival::lung +#' library(survival) +#' use_lung <- lung #' use_lung$arm <- factor(sample(c("A", "B", "C"), nrow(use_lung), replace = TRUE)) #' use_lung$status <- use_lung$status - 1 # Convert status to 0/1 #' use_lung <- na.omit(use_lung) #' -#' formula <- survival::Surv(time, status) ~ arm -#' fit_kmg01 <- survival::survfit(formula, use_lung) +#' formula <- Surv(time, status) ~ arm +#' fit_kmg01 <- survfit(formula, use_lung) #' surv_plot_data <- process_survfit(fit_kmg01) #' #' plt_kmg01 <- gg_km(surv_plot_data) diff --git a/R/get_cox_pairwise_df.R b/R/get_cox_pairwise_df.R index 66c1080c..325d35d2 100644 --- a/R/get_cox_pairwise_df.R +++ b/R/get_cox_pairwise_df.R @@ -26,16 +26,22 @@ #' @param ties (`character`)\cr #' A string specifying the method for tie handling in the Cox model. #' Must be one of "exact", "efron", or "breslow". +#' Default is "exact". #' @param test (`character`)\cr #' A string specifying the type of test to compute the p-value. #' Must be one of "log-rank", "gehan-breslow" (wilcoxon), "tarone", "peto", #' "prentice" (modified peto), "fleming-harrington", or "likelihood-ratio". +#' @param ... Additional arguments passed to `survival::coxph()` and `summary()` +#' two arguments are supported: +#' `conf.int` (default `0.95`) to adjust the CI level; +#' `robust = TRUE` to compute robust standard errors; #' #' @return A `data.frame` with one row per comparison arm (stored as rownames). #' The columns are: #' \itemize{ #' \item `HR`: The Hazard Ratio formatted to two decimal places. -#' \item `95% CI`: The 95\% confidence interval as `"(lower, upper)"`. +#' \item `95% CI`: The 95% Wald confidence interval as `"(lower, upper)"`. +#' Setting `robust = TRUE` results in robust sandwich CIs. #' \item `p-value ()`: The p-value from the selected `test`, where #' `` is the title-cased test name (e.g., `"p-value (log-rank)"`). #' } @@ -44,28 +50,29 @@ #' data to the current arm and the reference arm, and then: #' \itemize{ #' \item Fits a Cox model using `survival::coxph()`. -#' \item Computes a p-value via [.estimate_p_value()], which dispatches +#' \item Computes a p-value, which dispatches #' to `coin::logrank_test()` for weighted log-rank variants or to -#' `survival::survreg()` for the likelihood-ratio test. -#' } +#' a nested `survival::coxph()` LRT for the likelihood-ratio test. +#' } #' #' @seealso `annotate_gg_km()`, `gg_km()`, `survival::coxph()`, #' `coin::logrank_test()`. #' -#' @examples +#' @examplesIf requireNamespace("coin", quietly = TRUE) #' # Example data setup (assuming 'time' is event time, 'status' #' # is event indicator (1=event), and 'arm' is the treatment group) -#' library(dplyr) # For better data handling -#' +#' # for data handling +#' library(dplyr) +#' library(survival) #' # Prepare data in a modern dplyr-friendly way -#' surv_data <- survival::lung |> +#' surv_data <- lung |> #' mutate( #' arm = factor(sample(c("A", "B", "C"), n(), replace = TRUE)), #' status = status - 1 # Convert status to 0/1 #' ) |> #' filter(if_all(everything(), ~ !is.na(.))) #' -#' formula <- survival::Surv(time, status) ~ arm +#' formula <- Surv(time, status) ~ arm #' #' # Example 1: Default usage (ties = "exact", test = "log-rank") #' results_default <- get_cox_pairwise_df( @@ -113,16 +120,46 @@ get_cox_pairwise_df <- function( "prentice", "fleming-harrington", "likelihood-ratio" - ) + ), + ... ) { set_cli_abort_call() + + # Parse the dot-dot-dot list + dots <- list(...) + + # Extract explicit parameters, applying defaults if missing + conf_int <- if ("conf.int" %in% names(dots)) dots[["conf.int"]] else 0.95 + robust <- if ("robust" %in% names(dots)) dots[["robust"]] else FALSE + + invalid_dots <- setdiff(names(dots), c("robust", "conf.int")) # Input checks + + # Check for invalid ... arguments + if (length(invalid_dots) > 0) { + cli::cli_abort( + c( + "Invalid argument{?s} passed via {.code ...}.", + "i" = "Only {.arg robust} and {.arg conf.int} are supported at the moment.", + "x" = "Unrecognized argument{?s}: {.arg {invalid_dots}}." + ), + call = get_cli_abort_call() + ) + } + + check_numeric(conf_int) + check_scalar_range(conf_int, c(0, 1), include_bounds = FALSE) + check_logical(robust) + if (!rlang::is_formula(model_formula)) { cli::cli_abort( "{.arg model_formula} must be a {.cls formula}.", call = get_cli_abort_call() ) } + + .check_formula_for_namespace(model_formula) + if (!is.factor(data[[arm]])) { cli::cli_abort( "Column {.arg {data}[[\"{.var {arm}}\"]]} must be a {.cls factor}.", @@ -131,8 +168,23 @@ get_cox_pairwise_df <- function( } ties <- match.arg(ties) + if (robust && ties == "exact") { + cli::cli_abort( + c( + "Robust standard errors cannot be calculated with the {.val exact} tie-handling method.", + "i" = "Please change {.arg ties} to {.val efron} or {.val breslow} when using {.code robust = TRUE}." + ), + call = get_cli_abort_call() + ) + } + test <- match.arg(test) + # Prepare remaining dots for coxph (remove conf.int so it doesn't crash coxph) + coxph_dots <- dots + coxph_dots[["conf.int"]] <- NULL + coxph_dots[["robust"]] <- robust + # Determine reference and comparison groups ref_group <- if (!is.null(ref_group)) { ref_group @@ -153,19 +205,23 @@ get_cox_pairwise_df <- function( call = get_cli_abort_call() ) } + comp_df <- data[as.character(data[[arm]]) %in% subset_arm, ] - comp_df[[arm]] <- droplevels(comp_df[[arm]]) + # Explicitly relevel the factor so the reference group is ALWAYS first. + comp_df[[arm]] <- factor(comp_df[[arm]], levels = c(ref_group, current_arm)) - suppressWarnings( - coxph_ans <- survival::coxph( - formula = model_formula, - data = comp_df, - ties = ties - ) |> summary() + # Safely inject the remaining dots into the coxph call + cox_args <- c( + list(formula = model_formula, data = comp_df, ties = ties), + coxph_dots ) + fit_full <- do.call(survival::coxph, cox_args) - log_rank_pvalue <- .estimate_p_value(model_formula, comp_df, test, arm) + # Extract summary with our specified conf.int + coxph_ans <- summary(fit_full, conf.int = conf_int) + # Pass the 'ties' argument down so the LRT matches the main Cox estimate + log_rank_pvalue <- .estimate_p_value(model_formula, comp_df, test, arm, ties) conf_int_row <- paste0(arm, current_arm) current_row <- data.frame( @@ -187,7 +243,7 @@ get_cox_pairwise_df <- function( names(res) <- c( "HR", - "95% CI", + paste0(conf_int * 100, "% CI"), paste0("p-value (", test_name, ")") ) res @@ -195,65 +251,135 @@ get_cox_pairwise_df <- function( #' Estimate p-value for a pairwise survival comparison #' -#' Dispatches to `coin::logrank_test()` for weighted log-rank variants -#' or to a likelihood-ratio test via `survival::survreg()`. +#' Dispatches to `survival::survdiff()` for standard log-rank, +#' `coin::logrank_test()` for weighted log-rank variants, +#' or to a nested likelihood-ratio test via `survival::coxph()`. #' #' @param formula (`formula`)\cr survival formula, e.g. `Surv(time, status) ~ arm`. #' @param data (`data.frame`)\cr subset containing exactly two arm levels. #' @param test (`string`)\cr test name as accepted by [get_cox_pairwise_df()]. #' @param arm (`string`)\cr column name of the arm variable in `data`. +#' @param ties (`character`)\cr tie handling method for the Cox LRT. #' #' @returns A single numeric p-value. -#' @keywords internal -.estimate_p_value <- function(formula, data, test, arm) { - test_type <- switch(test, - "log-rank" = "logrank", - "gehan-breslow" = "Gehan-Breslow", - "tarone" = "Tarone-Ware", - "peto" = "Peto-Peto", - "prentice" = "Prentice", - "fleming-harrington" = "Fleming-Harrington", - "likelihood-ratio" = "lr" - ) +#' @noRd +.estimate_p_value <- function(formula, data, test, arm, ties) { + if (test == "log-rank") { + # --- 1. Standard Log-Rank via survival (Matches SAS & rtables) --- + sdiff <- survival::survdiff(formula, data = data) + + # Calculate the asymptotic p-value using the chi-square statistic + # Degrees of freedom is (number of groups - 1) + p_value <- stats::pchisq(sdiff$chisq, df = length(sdiff$n) - 1, lower.tail = FALSE) + } else if (test == "likelihood-ratio") { + # --- 2. Likelihood-Ratio Test via survival --- + # Fit the full Cox model + fit_full <- survival::coxph(formula, data = data, ties = ties) + + # Safely create the reduced formula by dropping the arm variable + reduced_formula <- stats::update(formula, paste(". ~ . -", arm)) + + # Fit the reduced model explicitly to avoid drop1 environment scope errors + fit_reduced <- survival::coxph(reduced_formula, data = data, ties = ties) - if (test_type != "lr") { - rlang::check_installed( - "coin", - reason = paste("to run log-rank tests using", test_type, "method") + # Execute the nested Likelihood-Ratio Test + anova_res <- stats::anova(fit_reduced, fit_full, test = "Chisq") + + # Extract the p-value for the second row (the full model comparison) + p_value <- anova_res[["Pr(>|Chi|)"]][[2]] + } else { + # --- 3. Weighted Variants via coin --- + test_type <- switch(test, + "gehan-breslow" = "Gehan-Breslow", + "tarone" = "Tarone-Ware", + "peto" = "Peto-Peto", + "prentice" = "Prentice", + "fleming-harrington" = "Fleming-Harrington" ) - if (length(levels(data[[arm]])) != 2) { - cli::cli_warn( + if (!requireNamespace("coin", quietly = TRUE)) { + cli::cli_abort( paste( - "{.arg arm} does not contain exactly 2 levels!", - "This will result in unexpected behavior of pairwise test." + "The {.pkg coin} package is required to run", + "log-rank tests using the {test_type} method." ) ) } + coin_formula <- .check_and_rewrite_formula(formula, arm) + test_result <- coin::logrank_test( - formula = formula, + formula = coin_formula, data = data, type = test_type ) p_value <- as.numeric(coin::pvalue(test_result)) - } else { - # SAS LR test assumes an exponential distribution - # fit the model - fit_cov <- survival::survreg(formula, data = data, dist = "exponential") - - # fit the null model - drop_arm_formula <- stats::as.formula(paste(". ~ . -", arm)) - # to account for possible covariates - null_formula <- stats::update(formula, drop_arm_formula) - fit_null <- survival::survreg(null_formula, data = data, dist = "exponential") - - # calculate the difference and estimate the statistics - lrt_stat <- 2 * (fit_cov$loglik[2] - fit_null$loglik[1]) - df <- fit_cov$df - fit_null$df - p_value <- stats::pchisq(lrt_stat, df, lower.tail = FALSE) } p_value } + +#' Rewrite survival formula with strata() for specific models +#' +#' @description +#' Parses a survival formula and safely translates `strata()` wrappers into +#' syntaxes supported by target modeling engines. +#' +#' @param formula (`formula`)\cr The original survival formula. +#' @param arm (`character`)\cr The name of the primary arm variable. +#' +#' @returns A modified `formula` with `strata()` terms properly translated. +#' @noRd +.check_and_rewrite_formula <- function(formula, arm) { + f_terms <- stats::terms(formula) + term_labels <- attr(f_terms, "term.labels") + + # Safely identify strata calls directly from the right-hand side labels + strata_idx <- grep("^strata\\(", term_labels) + strata_calls <- term_labels[strata_idx] + + # --- 1. GUARDRAIL VALIDATION --- + valid_terms <- c(arm, strata_calls) + invalid_terms <- setdiff(term_labels, valid_terms) + + if (length(invalid_terms) > 0) { + cli::cli_abort( + paste( + "This log-rank test method does not support covariate adjustment", + "for: {.var {invalid_terms}}.", + "Please use stratification (e.g., {.code ~ {arm} + strata(var)})", + "or switch to {.code test = 'likelihood-ratio'}." + ) + ) + } + + # If there are no strata terms, return the formula completely untouched + if (length(strata_idx) == 0) { + return(formula) + } + + # Use R's Abstract Syntax Tree (AST) to safely extract strata arguments + strata_vars <- unlist(lapply(strata_calls, function(x) { + sapply(as.list(str2lang(x))[-1], deparse) + })) + + lhs_terms <- arm + + # If there are multiple strata variables, wrap them in interaction() + # If single, force as.factor() to satisfy coin::logrank_test requirements + if (length(strata_vars) > 1) { + block_str <- paste0("interaction(", paste(strata_vars, collapse = ", "), ")") + } else { + block_str <- paste0("as.factor(", strata_vars[1], ")") + } + + rhs_str <- paste(lhs_terms, "|", block_str) + + # Rebuild the formula preserving the exact LHS response and original environment + stats::reformulate( + termlabels = rhs_str, + response = formula[[2]], + env = environment(formula) + ) +} diff --git a/R/gg_km.R b/R/gg_km.R index 3eaf7340..8d12ab3f 100644 --- a/R/gg_km.R +++ b/R/gg_km.R @@ -28,16 +28,17 @@ NULL #' Data setup assumes `"time"` is event time, `"status"` is event indicator (`1` represents an event), #' while `"arm"` is the treatment group. #' -#' @examples +#' @examplesIf requireNamespace("coin", quietly = TRUE) #' # Data preparation for KM plot -#' use_lung <- survival::lung +#' library(survival) +#' use_lung <- lung #' use_lung$arm <- factor(sample(c("A", "B", "C"), nrow(use_lung), replace = TRUE)) #' use_lung$status <- use_lung$status - 1 # Convert status to 0/1 #' use_lung <- na.omit(use_lung) #' #' # Fit Kaplan-Meier model -#' formula <- survival::Surv(time, status) ~ arm -#' fit_kmg01 <- survival::survfit(formula, use_lung) +#' formula <- Surv(time, status) ~ arm +#' fit_kmg01 <- survfit(formula, use_lung) #' #' # Process survfit data for plotting #' surv_plot_data <- process_survfit(fit_kmg01) diff --git a/R/tbl_coxph.R b/R/tbl_coxph.R index d4f46b7c..40a1c96c 100644 --- a/R/tbl_coxph.R +++ b/R/tbl_coxph.R @@ -15,17 +15,17 @@ #' #' @seealso `get_cox_pairwise_df()`. #' -#' @examplesIf requireNamespace("survival", quietly = TRUE) && requireNamespace("coin", quietly = TRUE) +#' @examplesIf requireNamespace("coin", quietly = TRUE) #' # Setup sample survival data #' library(survival) -#' surv_data <- survival::lung |> +#' surv_data <- lung |> #' dplyr::mutate( #' arm = factor(sample(c("A", "B", "C"), dplyr::n(), replace = TRUE)), #' status = status - 1 #' ) |> #' dplyr::filter(dplyr::if_all(dplyr::everything(), ~ !is.na(.))) #' -#' formula <- survival::Surv(time, status) ~ arm +#' formula <- Surv(time, status) ~ arm #' #' # Generate the pairwise statistics data.frame #' pairwise_results <- get_cox_pairwise_df( diff --git a/R/utils.R b/R/utils.R index c3e96964..05e4a3b7 100644 --- a/R/utils.R +++ b/R/utils.R @@ -55,3 +55,16 @@ case_switch <- function(..., .default = NULL) { return(0) } } + +#' Check if formula contains namespace +#' @keywords internal +#' @noRd +.check_formula_for_namespace <- function(formula) { + formula_str <- paste(deparse(formula), collapse = " ") + if (grepl("\\b[a-zA-Z][a-zA-Z0-9.]*::", formula_str)) { + cli::cli_abort( + "{.arg formula} must be specified without namespace.", + call = get_cli_abort_call() + ) + } +} diff --git a/man/annotate_gg_km.Rd b/man/annotate_gg_km.Rd index 0b13bcf7..baefc74f 100644 --- a/man/annotate_gg_km.Rd +++ b/man/annotate_gg_km.Rd @@ -107,18 +107,20 @@ Proportional Hazards summary table as an annotation box. }} \examples{ +\dontshow{if (requireNamespace("coin", quietly = TRUE)) withAutoprint(\{ # examplesIf} # Preparing the Kaplan-Meier Plot -use_lung <- survival::lung +library(survival) +use_lung <- lung use_lung$arm <- factor(sample(c("A", "B", "C"), nrow(use_lung), replace = TRUE)) use_lung$status <- use_lung$status - 1 # Convert status to 0/1 use_lung <- na.omit(use_lung) -formula <- survival::Surv(time, status) ~ arm -fit_kmg01 <- survival::survfit(formula, use_lung) +formula <- Surv(time, status) ~ arm +fit_kmg01 <- survfit(formula, use_lung) surv_plot_data <- process_survfit(fit_kmg01) plt_kmg01 <- gg_km(surv_plot_data) - +\dontshow{\}) # examplesIf} # Annotate Plot with Numbers at Risk Table annotate_riskdf(plt_kmg01, fit_kmg01) diff --git a/man/dot-estimate_p_value.Rd b/man/dot-estimate_p_value.Rd deleted file mode 100644 index fc640885..00000000 --- a/man/dot-estimate_p_value.Rd +++ /dev/null @@ -1,25 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/get_cox_pairwise_df.R -\name{.estimate_p_value} -\alias{.estimate_p_value} -\title{Estimate p-value for a pairwise survival comparison} -\usage{ -.estimate_p_value(formula, data, test, arm) -} -\arguments{ -\item{formula}{(\code{formula})\cr survival formula, e.g. \code{Surv(time, status) ~ arm}.} - -\item{data}{(\code{data.frame})\cr subset containing exactly two arm levels.} - -\item{test}{(\code{string})\cr test name as accepted by \code{\link[=get_cox_pairwise_df]{get_cox_pairwise_df()}}.} - -\item{arm}{(\code{string})\cr column name of the arm variable in \code{data}.} -} -\value{ -A single numeric p-value. -} -\description{ -Dispatches to \code{coin::logrank_test()} for weighted log-rank variants -or to a likelihood-ratio test via \code{survival::survreg()}. -} -\keyword{internal} diff --git a/man/get_cox_pairwise_df.Rd b/man/get_cox_pairwise_df.Rd index 118edfc2..ce4297e9 100644 --- a/man/get_cox_pairwise_df.Rd +++ b/man/get_cox_pairwise_df.Rd @@ -11,7 +11,8 @@ get_cox_pairwise_df( ref_group = NULL, ties = c("exact", "efron", "breslow"), test = c("log-rank", "gehan-breslow", "tarone", "peto", "prentice", - "fleming-harrington", "likelihood-ratio") + "fleming-harrington", "likelihood-ratio"), + ... ) } \arguments{ @@ -38,19 +39,26 @@ selected as the reference group.} \item{ties}{(\code{character})\cr A string specifying the method for tie handling in the Cox model. -Must be one of "exact", "efron", or "breslow".} +Must be one of "exact", "efron", or "breslow". +Default is "exact".} \item{test}{(\code{character})\cr A string specifying the type of test to compute the p-value. Must be one of "log-rank", "gehan-breslow" (wilcoxon), "tarone", "peto", "prentice" (modified peto), "fleming-harrington", or "likelihood-ratio".} + +\item{...}{Additional arguments passed to \code{survival::coxph()} and \code{summary()} +two arguments are supported: +\code{conf.int} (default \code{0.95}) to adjust the CI level; +\code{robust = TRUE} to compute robust standard errors;} } \value{ A \code{data.frame} with one row per comparison arm (stored as rownames). The columns are: \itemize{ \item \code{HR}: The Hazard Ratio formatted to two decimal places. -\item \verb{95\% CI}: The 95\\% confidence interval as \code{"(lower, upper)"}. +\item \verb{95\% CI}: The 95\% Wald confidence interval as \code{"(lower, upper)"}. +Setting \code{robust = TRUE} results in robust sandwich CIs. \item \verb{p-value ()}: The p-value from the selected \code{test}, where \verb{} is the title-cased test name (e.g., \code{"p-value (log-rank)"}). } @@ -66,25 +74,27 @@ The function iterates through each non-reference arm, subsets the data to the current arm and the reference arm, and then: \itemize{ \item Fits a Cox model using \code{survival::coxph()}. -\item Computes a p-value via \code{\link[=.estimate_p_value]{.estimate_p_value()}}, which dispatches +\item Computes a p-value, which dispatches to \code{coin::logrank_test()} for weighted log-rank variants or to -\code{survival::survreg()} for the likelihood-ratio test. +a nested \code{survival::coxph()} LRT for the likelihood-ratio test. } } \examples{ +\dontshow{if (requireNamespace("coin", quietly = TRUE)) withAutoprint(\{ # examplesIf} # Example data setup (assuming 'time' is event time, 'status' # is event indicator (1=event), and 'arm' is the treatment group) -library(dplyr) # For better data handling - +# for data handling +library(dplyr) +library(survival) # Prepare data in a modern dplyr-friendly way -surv_data <- survival::lung |> +surv_data <- lung |> mutate( arm = factor(sample(c("A", "B", "C"), n(), replace = TRUE)), status = status - 1 # Convert status to 0/1 ) |> filter(if_all(everything(), ~ !is.na(.))) -formula <- survival::Surv(time, status) ~ arm +formula <- Surv(time, status) ~ arm # Example 1: Default usage (ties = "exact", test = "log-rank") results_default <- get_cox_pairwise_df( @@ -116,7 +126,7 @@ results_lr <- get_cox_pairwise_df( test = "likelihood-ratio" ) print(results_lr) - +\dontshow{\}) # examplesIf} } \seealso{ \code{annotate_gg_km()}, \code{gg_km()}, \code{survival::coxph()}, diff --git a/man/gg_km.Rd b/man/gg_km.Rd index baf45186..f16ad0a6 100644 --- a/man/gg_km.Rd +++ b/man/gg_km.Rd @@ -92,20 +92,22 @@ support for various customizations like censoring marks, Confidence Intervals ( }} \examples{ +\dontshow{if (requireNamespace("coin", quietly = TRUE)) withAutoprint(\{ # examplesIf} # Data preparation for KM plot -use_lung <- survival::lung +library(survival) +use_lung <- lung use_lung$arm <- factor(sample(c("A", "B", "C"), nrow(use_lung), replace = TRUE)) use_lung$status <- use_lung$status - 1 # Convert status to 0/1 use_lung <- na.omit(use_lung) # Fit Kaplan-Meier model -formula <- survival::Surv(time, status) ~ arm -fit_kmg01 <- survival::survfit(formula, use_lung) +formula <- Surv(time, status) ~ arm +fit_kmg01 <- survfit(formula, use_lung) # Process survfit data for plotting surv_plot_data <- process_survfit(fit_kmg01) head(surv_plot_data) - +\dontshow{\}) # examplesIf} # Example of making the KM plot plt_kmg01 <- gg_km(surv_plot_data) diff --git a/man/tbl_coxph.Rd b/man/tbl_coxph.Rd index da048f1f..91ea1b92 100644 --- a/man/tbl_coxph.Rd +++ b/man/tbl_coxph.Rd @@ -22,17 +22,17 @@ presenting the p-value, Hazard Ratio, and 95\% Confidence Interval in a stacked layout where statistics form the rows of the table. } \examples{ -\dontshow{if (requireNamespace("survival", quietly = TRUE) && requireNamespace("coin", quietly = TRUE)) withAutoprint(\{ # examplesIf} +\dontshow{if (requireNamespace("coin", quietly = TRUE)) withAutoprint(\{ # examplesIf} # Setup sample survival data library(survival) -surv_data <- survival::lung |> +surv_data <- lung |> dplyr::mutate( arm = factor(sample(c("A", "B", "C"), dplyr::n(), replace = TRUE)), status = status - 1 ) |> dplyr::filter(dplyr::if_all(dplyr::everything(), ~ !is.na(.))) -formula <- survival::Surv(time, status) ~ arm +formula <- Surv(time, status) ~ arm # Generate the pairwise statistics data.frame pairwise_results <- get_cox_pairwise_df( diff --git a/tests/testthat/test-check_formula_for_namespace.R b/tests/testthat/test-check_formula_for_namespace.R new file mode 100644 index 00000000..eafd8477 --- /dev/null +++ b/tests/testthat/test-check_formula_for_namespace.R @@ -0,0 +1,60 @@ +# test-check_formula_for_namespace.R + +test_that(".check_formula_for_namespace() passes valid formulas", { + # Simple formula + expect_no_error(.check_formula_for_namespace(y ~ x)) + + # Survival formula without namespace + expect_no_error(.check_formula_for_namespace(Surv(time, status) ~ arm)) + + # Complex formula with strata + expect_no_error(.check_formula_for_namespace(Surv(time, status) ~ arm + strata(site))) +}) + +test_that(".check_formula_for_namespace() aborts when a namespace is present", { + expected_msg <- "must be specified without namespace" + + # Namespace on a function call (e.g., survival::Surv) + expect_error( + .check_formula_for_namespace(survival::Surv(time, status) ~ arm), + regexp = expected_msg + ) + + # Namespace on the Right-Hand Side variable + expect_error( + .check_formula_for_namespace(Surv(time, status) ~ mypkg::arm), + regexp = expected_msg + ) + + # Namespace on the Left-Hand Side variable + expect_error( + .check_formula_for_namespace(mypkg::y ~ x), + regexp = expected_msg + ) + + # Namespace buried inside a strata() call + expect_error( + .check_formula_for_namespace(Surv(time, status) ~ arm + strata(mypkg::site)), + regexp = expected_msg + ) +}) + +test_that(".check_formula_for_namespace() correctly parses long formulas", { + # deparse() splits long formulas into a character vector of multiple lines. + # This tests that `paste(..., collapse = " ")` effectively reconstructs it + # so the regex doesn't miss namespaces split across lines. + + # Generate a very long valid formula + long_rhs <- paste(paste0("covar_", 1:50), collapse = " + ") + long_formula_valid <- stats::as.formula(paste("Surv(time, status) ~", long_rhs)) + + expect_no_error(.check_formula_for_namespace(long_formula_valid)) + + # Generate a very long invalid formula with a namespace at the very end + long_formula_invalid <- stats::as.formula(paste("Surv(time, status) ~", long_rhs, "+ pkg::bad_var")) + + expect_error( + .check_formula_for_namespace(long_formula_invalid), + regexp = "must be specified without namespace" + ) +}) diff --git a/tests/testthat/test-get_cox_pairwise_df.R b/tests/testthat/test-get_cox_pairwise_df.R index fc0a6fc9..f0ab3b8b 100644 --- a/tests/testthat/test-get_cox_pairwise_df.R +++ b/tests/testthat/test-get_cox_pairwise_df.R @@ -2,29 +2,27 @@ skip_if_pkg_not_installed(c("survival", "dplyr", "coin")) # Setup shared test data set.seed(42) -surv_data_2arm <- survival::lung |> +test_df_2grp <- survival::veteran |> dplyr::mutate( - arm = factor(sample(c("A", "B"), dplyr::n(), replace = TRUE)), - status = status - 1 + treatment = factor(sample(c("DrugX", "Placebo"), dplyr::n(), replace = TRUE)), + event = status ) |> dplyr::filter(dplyr::if_all(dplyr::everything(), ~ !is.na(.))) -surv_data_3arm <- survival::lung |> +test_df_3grp <- survival::veteran |> dplyr::mutate( - arm = factor(sample(c("A", "B", "C"), dplyr::n(), replace = TRUE)), - status = status - 1 + treatment = factor(sample(c("DrugX", "DrugY", "Placebo"), dplyr::n(), replace = TRUE)), + event = status ) |> dplyr::filter(dplyr::if_all(dplyr::everything(), ~ !is.na(.))) test_that("get_cox_pairwise_df() works with two arms", { expect_no_error( - suppressWarnings( - result <- get_cox_pairwise_df( - model_formula = survival::Surv(time, status) ~ arm, - data = surv_data_2arm, - arm = "arm", - ref_group = "A" - ) + result <- get_cox_pairwise_df( + model_formula = Surv(time, event) ~ treatment, + data = test_df_2grp, + arm = "treatment", + ref_group = "Placebo" ) ) expect_s3_class(result, "data.frame") @@ -38,20 +36,20 @@ test_that( "get_cox_pairwise_df() returns non-NA hazard ratios with more than two arms", { expect_no_error( - suppressWarnings( - result <- get_cox_pairwise_df( - model_formula = survival::Surv(time, status) ~ arm, - data = surv_data_3arm, - arm = "arm", - ref_group = "A" - ) + result <- get_cox_pairwise_df( + model_formula = Surv(time, event) ~ treatment, + data = test_df_3grp, + arm = "treatment", + ref_group = "Placebo" ) ) expect_s3_class(result, "data.frame") expect_equal(nrow(result), 2L) - expect_equal(rownames(result), c("B", "C")) + # The output rownames should be the non-reference groups + expect_true(all(c("DrugX", "DrugY") %in% rownames(result))) expect_named(result, c("HR", "95% CI", "p-value (log-rank)")) - # All hazard ratios and CIs must be non-NA (this was the bug) + + # All hazard ratios and CIs must be non-NA expect_false(anyNA(result[["HR"]])) expect_false(anyNA(result[["95% CI"]])) expect_false(anyNA(result[["p-value (log-rank)"]])) @@ -60,37 +58,37 @@ test_that( test_that("get_cox_pairwise_df() uses first factor level as default ref_group", { expect_no_error( - suppressWarnings( - result <- get_cox_pairwise_df( - model_formula = survival::Surv(time, status) ~ arm, - data = surv_data_3arm, - arm = "arm" - ) + result <- get_cox_pairwise_df( + model_formula = Surv(time, event) ~ treatment, + data = test_df_3grp, + arm = "treatment" ) ) expect_equal(nrow(result), 2L) - # Default ref is "A", so comparisons are "B" and "C" - expect_equal(rownames(result), c("B", "C")) + + # Default ref is the first level, so it should not be in the output rownames + first_lvl <- levels(test_df_3grp$treatment)[1] + expect_false(first_lvl %in% rownames(result)) }) test_that("get_cox_pairwise_df() errors with non-formula model_formula", { expect_error( get_cox_pairwise_df( - model_formula = "Surv(time, status) ~ arm", - data = surv_data_2arm, - arm = "arm" + model_formula = "Surv(time, event) ~ treatment", + data = test_df_2grp, + arm = "treatment" ) ) }) test_that("get_cox_pairwise_df() errors when arm column is not a factor", { - bad_data <- surv_data_2arm - bad_data[["arm"]] <- as.character(bad_data[["arm"]]) + bad_data <- test_df_2grp + bad_data[["treatment"]] <- as.character(bad_data[["treatment"]]) expect_error( get_cox_pairwise_df( - model_formula = survival::Surv(time, status) ~ arm, + model_formula = Surv(time, event) ~ treatment, data = bad_data, - arm = "arm" + arm = "treatment" ) ) }) @@ -101,11 +99,11 @@ test_that(paste0( ), { expect_error( get_cox_pairwise_df( - model_formula = survival::Surv(time, status) ~ arm, - data = surv_data_3arm, - arm = "arm", + model_formula = Surv(time, event) ~ treatment, + data = test_df_3grp, + arm = "treatment", # Passing multiple reference groups to force length > 2 - ref_group = c("A", "B") + ref_group = c("Placebo", "DrugX") ), "must contain exactly 2 arms/groups" ) @@ -117,9 +115,9 @@ test_that("get_cox_pairwise_df() works with all valid 'ties' methods", { for (t_method in ties_methods) { expect_no_error( res <- get_cox_pairwise_df( - model_formula = survival::Surv(time, status) ~ arm, - data = surv_data_2arm, - arm = "arm", + model_formula = Surv(time, event) ~ treatment, + data = test_df_2grp, + arm = "treatment", ties = t_method ) ) @@ -140,13 +138,11 @@ test_that("get_cox_pairwise_df() works with all valid 'test' methods", { for (t_method in test_methods) { expect_no_error( - suppressWarnings( - res <- get_cox_pairwise_df( - model_formula = survival::Surv(time, status) ~ arm, - data = surv_data_2arm, - arm = "arm", - test = t_method - ) + res <- get_cox_pairwise_df( + model_formula = Surv(time, event) ~ treatment, + data = test_df_2grp, + arm = "treatment", + test = t_method ) ) @@ -165,9 +161,9 @@ test_that("get_cox_pairwise_df() works with all valid 'test' methods", { test_that("get_cox_pairwise_df() catches invalid 'ties' and 'test' arguments", { expect_error( get_cox_pairwise_df( - model_formula = survival::Surv(time, status) ~ arm, - data = surv_data_2arm, - arm = "arm", + model_formula = Surv(time, event) ~ treatment, + data = test_df_2grp, + arm = "treatment", ties = "invalid_tie_method" ), "should be one of" @@ -175,11 +171,113 @@ test_that("get_cox_pairwise_df() catches invalid 'ties' and 'test' arguments", { expect_error( get_cox_pairwise_df( - model_formula = survival::Surv(time, status) ~ arm, - data = surv_data_2arm, - arm = "arm", + model_formula = Surv(time, event) ~ treatment, + data = test_df_2grp, + arm = "treatment", test = "invalid_test_method" ), "should be one of" ) }) + +test_that("get_cox_pairwise_df() works for formula with covariates via likelihood-ratio", { + # Likelihood-ratio natively handles right-hand side continuous covariates + expect_no_error( + res_covariate <- get_cox_pairwise_df( + model_formula = Surv(time, event) ~ treatment + karno, + data = test_df_2grp, + arm = "treatment", + test = "likelihood-ratio" + ) + ) + + expect_s3_class(res_covariate, "data.frame") + expect_false(anyNA(res_covariate[["HR"]])) + expect_false(anyNA(res_covariate[["p-value (Likelihood-Ratio)"]])) +}) + +test_that("get_cox_pairwise_df() works for formula with complex strata()", { + # 1. Test log-rank with a single strata variable (uses coin engine) + expect_no_error( + res_strata_lr <- get_cox_pairwise_df( + model_formula = Surv(time, event) ~ treatment + strata(prior), + data = test_df_2grp, + arm = "treatment", + ties = "efron", + test = "log-rank" + ) + ) + expect_s3_class(res_strata_lr, "data.frame") + expect_false(anyNA(res_strata_lr[["p-value (log-rank)"]])) + + # 2. Test log-rank with multiple strata variables inside a single strata() call + # We use 'prior' and 'trt' (2 levels each) to avoid <2 observation block sparsity + expect_no_error( + res_strata_multi <- get_cox_pairwise_df( + model_formula = Surv(time, event) ~ treatment + strata(prior, trt), + data = test_df_2grp, + arm = "treatment", + ties = "efron", + test = "log-rank" + ) + ) + expect_s3_class(res_strata_multi, "data.frame") + expect_false(anyNA(res_strata_multi[["p-value (log-rank)"]])) + + # 3. Ensure the likelihood-ratio test properly handles complex strata natively + expect_no_error( + res_strata_cox <- get_cox_pairwise_df( + model_formula = Surv(time, event) ~ treatment + strata(prior) + karno, + data = test_df_2grp, + arm = "treatment", + test = "likelihood-ratio" + ) + ) + expect_s3_class(res_strata_cox, "data.frame") + expect_false(anyNA(res_strata_cox[["p-value (Likelihood-Ratio)"]])) +}) + +test_that("get_cox_pairwise_df() respects conf.int passed via ...", { + # Run with default 95% CI + res_95 <- get_cox_pairwise_df( + model_formula = Surv(time, event) ~ treatment, + data = test_df_2grp, + arm = "treatment" + ) + + # Run with explicit 99% CI + expect_no_error( + res_99 <- get_cox_pairwise_df( + model_formula = Surv(time, event) ~ treatment, + data = test_df_2grp, + arm = "treatment", + conf.int = 0.99 + ) + ) + + # Check that column names updated dynamically + expect_true("99% CI" %in% names(res_99)) + expect_false("95% CI" %in% names(res_99)) + + # The 99% CI should be wider/different from the 95% CI + expect_false(res_95[["95% CI"]] == res_99[["99% CI"]]) +}) + +test_that("get_cox_pairwise_df() handles robust = TRUE correctly via ...", { + # We test with the likelihood-ratio test to ensure the `robust` argument + # successfully propagates down into the nested LRT models inside .estimate_p_value + expect_no_error( + res_robust <- get_cox_pairwise_df( + model_formula = Surv(time, event) ~ treatment, + data = test_df_2grp, + arm = "treatment", + test = "likelihood-ratio", + ties = "efron", + robust = TRUE + ) + ) + + expect_s3_class(res_robust, "data.frame") + expect_false(anyNA(res_robust[["HR"]])) + expect_false(anyNA(res_robust[["p-value (Likelihood-Ratio)"]])) +}) diff --git a/tests/testthat/test-gg_km.R b/tests/testthat/test-gg_km.R index b839d384..4808401a 100644 --- a/tests/testthat/test-gg_km.R +++ b/tests/testthat/test-gg_km.R @@ -1,3 +1,4 @@ +skip_if_pkg_not_installed("survival") # pre-processing the km fit anl <- cards::ADTTE |> dplyr::mutate(is_event = CNSR == 0) @@ -9,7 +10,7 @@ anl[[by]] <- factor(anl[[by]], levels = c( )) group_sym <- rlang::sym(by) model_formula <- rlang::new_formula( - lhs = rlang::expr(survival::Surv(AVAL, is_event)), + lhs = rlang::expr(Surv(AVAL, is_event)), rhs = rlang::expr(!!group_sym) ) fit_kmg01 <- survival::survfit(model_formula, anl) diff --git a/tests/testthat/test-tbl_coxph.R b/tests/testthat/test-tbl_coxph.R index 50803e2f..d3faf194 100644 --- a/tests/testthat/test-tbl_coxph.R +++ b/tests/testthat/test-tbl_coxph.R @@ -20,14 +20,14 @@ surv_data_3arm <- survival::lung |> # Generate baseline pairwise dataframes to use in the tests pairwise_df_2arm <- get_cox_pairwise_df( - model_formula = survival::Surv(time, status) ~ arm, + model_formula = Surv(time, status) ~ arm, data = surv_data_2arm, arm = "arm", ref_group = "A" ) pairwise_df_3arm <- get_cox_pairwise_df( - model_formula = survival::Surv(time, status) ~ arm, + model_formula = Surv(time, status) ~ arm, data = surv_data_3arm, arm = "arm", ref_group = "A" @@ -108,7 +108,7 @@ test_that("tbl_coxph() works with only p-value (HR and CI removed)", { test_that("tbl_coxph() dynamically adopts different p-value test labels", { # Generate a pairwise dataframe using a different test (e.g., Wilcoxon) pairwise_wilcoxon <- get_cox_pairwise_df( - model_formula = survival::Surv(time, status) ~ arm, + model_formula = Surv(time, status) ~ arm, data = surv_data_2arm, arm = "arm", test = "gehan-breslow"