From 5841abc4522a8ddb7c1976449819d221903614c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Szczypi=C5=84ski?= Date: Mon, 25 May 2026 13:38:18 +0200 Subject: [PATCH 01/23] Adding discrete ties method to get_cox_pairwise_df() --- R/get_cox_pairwise_df.R | 103 ++++++++++++++++++---- inst/WORDLIST | 1 + man/dot-get_discrete_cox.Rd | 24 +++++ man/get_cox_pairwise_df.Rd | 5 +- tests/testthat/test-get_cox_pairwise_df.R | 17 ++-- 5 files changed, 122 insertions(+), 28 deletions(-) create mode 100644 man/dot-get_discrete_cox.Rd diff --git a/R/get_cox_pairwise_df.R b/R/get_cox_pairwise_df.R index 66c1080c..a9a3de7b 100644 --- a/R/get_cox_pairwise_df.R +++ b/R/get_cox_pairwise_df.R @@ -25,7 +25,8 @@ #' selected as the reference group. #' @param ties (`character`)\cr #' A string specifying the method for tie handling in the Cox model. -#' Must be one of "exact", "efron", or "breslow". +#' Must be one of "discrete", "exact", "efron", or "breslow". +#' Default is "discrete". #' @param test (`character`)\cr #' A string specifying the type of test to compute the p-value. #' Must be one of "log-rank", "gehan-breslow" (wilcoxon), "tarone", "peto", @@ -104,7 +105,7 @@ get_cox_pairwise_df <- function( data, arm, ref_group = NULL, - ties = c("exact", "efron", "breslow"), + ties = c("discrete", "exact", "efron", "breslow"), test = c( "log-rank", "gehan-breslow", @@ -157,13 +158,17 @@ get_cox_pairwise_df <- function( comp_df[[arm]] <- droplevels(comp_df[[arm]]) - suppressWarnings( - coxph_ans <- survival::coxph( - formula = model_formula, - data = comp_df, - ties = ties - ) |> summary() - ) + if (ties == "discrete") { + coxph_ans <- .get_discrete_cox(formula = model_formula, data = comp_df) + } else { + suppressWarnings( + coxph_ans <- survival::coxph( + formula = model_formula, + data = comp_df, + ties = ties + ) |> summary() + ) + } log_rank_pvalue <- .estimate_p_value(model_formula, comp_df, test, arm) @@ -222,15 +227,6 @@ get_cox_pairwise_df <- function( reason = paste("to run log-rank tests using", test_type, "method") ) - if (length(levels(data[[arm]])) != 2) { - cli::cli_warn( - paste( - "{.arg arm} does not contain exactly 2 levels!", - "This will result in unexpected behavior of pairwise test." - ) - ) - } - test_result <- coin::logrank_test( formula = formula, data = data, @@ -250,10 +246,79 @@ get_cox_pairwise_df <- function( fit_null <- survival::survreg(null_formula, data = data, dist = "exponential") # calculate the difference and estimate the statistics - lrt_stat <- 2 * (fit_cov$loglik[2] - fit_null$loglik[1]) + lrt_stat <- 2 * (fit_cov$loglik[2] - fit_null$loglik[2]) df <- fit_cov$df - fit_null$df p_value <- stats::pchisq(lrt_stat, df, lower.tail = FALSE) } p_value } + +#' Fit a discrete-time Cox model (pooled logistic regression) +#' +#' @description +#' Helper function to simulate SAS's `TIES=DISCRETE` option. It expands the data +#' into person-time format, fits a logistic regression model, and calculates +#' Wald confidence intervals. +#' +#' @param formula (`formula`)\cr +#' The original survival formula. +#' @param data (`data.frame`)\cr +#' The subsetted data containing exactly two arms. +#' +#' @returns A list mimicking `summary(coxph)` containing a `conf.int` data.frame +#' @keywords internal +.get_discrete_cox <- function(formula, data) { + # lhs - Surv function + # rhs - arm + covariates + # to allow smooth parsing of the Surv formulation + lhs_str <- paste(deparse(formula[[2]]), collapse = " ") + rhs_str <- paste(deparse(formula[[3]]), collapse = " ") + + # clean `suvrival::`` from the formula + clean_lhs_str <- sub(".*Surv\\(", "Surv(", lhs_str) + clean_formula <- stats::as.formula(paste(clean_lhs_str, "~", rhs_str)) + + # Fetch cut points dynamically avoiding hardcoded indices for robust retrieval + mf <- stats::model.frame(clean_formula, data = data) + surv_resp <- stats::model.response(mf) + + status_var <- dimnames(surv_resp)[[2]][2] + event_times <- sort(unique(surv_resp[, 1][surv_resp[, 2] == 1])) + + # Transforming to person-time guarantees proper risk sets for tied intervals + data_long <- survival::survSplit( + formula = clean_formula, + data = data, + cut = event_times, + episode = ".time_int" + ) + + glm_formula <- stats::as.formula( + paste(status_var, "~", rhs_str, "+ as.factor(.time_int)") + ) + + glm_fit <- stats::glm( + glm_formula, + data = data_long, + family = stats::binomial(link = "logit") + ) + + coefs <- stats::coef(glm_fit) + wald_ci_log <- stats::confint.default(glm_fit) + + lower_log_ci <- wald_ci_log[, "2.5 %"] + upper_log_ci <- wald_ci_log[, "97.5 %"] + hr <- exp(coefs) + + conf_int_df <- data.frame( + "exp(coef)" = hr, + "exp(-coef)" = 1 / hr, + "lower .95" = exp(lower_log_ci), + "upper .95" = exp(upper_log_ci), + row.names = names(coefs), + check.names = FALSE + ) + + list(conf.int = conf_int_df) +} diff --git a/inst/WORDLIST b/inst/WORDLIST index 0a17e3c7..118175e9 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -19,6 +19,7 @@ RMP RMPT RStudio Rua +SAS's SOCs Tidyverse analyte diff --git a/man/dot-get_discrete_cox.Rd b/man/dot-get_discrete_cox.Rd new file mode 100644 index 00000000..6272bcf0 --- /dev/null +++ b/man/dot-get_discrete_cox.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_cox_pairwise_df.R +\name{.get_discrete_cox} +\alias{.get_discrete_cox} +\title{Fit a discrete-time Cox model (pooled logistic regression)} +\usage{ +.get_discrete_cox(formula, data) +} +\arguments{ +\item{formula}{(\code{formula})\cr +The original survival formula.} + +\item{data}{(\code{data.frame})\cr +The subsetted data containing exactly two arms.} +} +\value{ +A list mimicking \code{summary(coxph)} containing a \code{conf.int} data.frame +} +\description{ +Helper function to simulate SAS's \code{TIES=DISCRETE} option. It expands the data +into person-time format, fits a logistic regression model, and calculates +Wald confidence intervals. +} +\keyword{internal} diff --git a/man/get_cox_pairwise_df.Rd b/man/get_cox_pairwise_df.Rd index 118edfc2..da139e1c 100644 --- a/man/get_cox_pairwise_df.Rd +++ b/man/get_cox_pairwise_df.Rd @@ -9,7 +9,7 @@ get_cox_pairwise_df( data, arm, ref_group = NULL, - ties = c("exact", "efron", "breslow"), + ties = c("discrete", "exact", "efron", "breslow"), test = c("log-rank", "gehan-breslow", "tarone", "peto", "prentice", "fleming-harrington", "likelihood-ratio") ) @@ -38,7 +38,8 @@ selected as the reference group.} \item{ties}{(\code{character})\cr A string specifying the method for tie handling in the Cox model. -Must be one of "exact", "efron", or "breslow".} +Must be one of "discrete", "exact", "efron", or "breslow". +Default is "discrete".} \item{test}{(\code{character})\cr A string specifying the type of test to compute the p-value. diff --git a/tests/testthat/test-get_cox_pairwise_df.R b/tests/testthat/test-get_cox_pairwise_df.R index fc0a6fc9..d18ab18f 100644 --- a/tests/testthat/test-get_cox_pairwise_df.R +++ b/tests/testthat/test-get_cox_pairwise_df.R @@ -51,7 +51,8 @@ test_that( expect_equal(nrow(result), 2L) expect_equal(rownames(result), c("B", "C")) expect_named(result, c("HR", "95% CI", "p-value (log-rank)")) - # All hazard ratios and CIs must be non-NA (this was the bug) + + # All hazard ratios and CIs must be non-NA expect_false(anyNA(result[["HR"]])) expect_false(anyNA(result[["95% CI"]])) expect_false(anyNA(result[["p-value (log-rank)"]])) @@ -112,15 +113,17 @@ test_that(paste0( }) test_that("get_cox_pairwise_df() works with all valid 'ties' methods", { - ties_methods <- c("exact", "efron", "breslow") + ties_methods <- c("discrete", "exact", "efron", "breslow") for (t_method in ties_methods) { expect_no_error( - res <- get_cox_pairwise_df( - model_formula = survival::Surv(time, status) ~ arm, - data = surv_data_2arm, - arm = "arm", - ties = t_method + suppressWarnings( + res <- get_cox_pairwise_df( + model_formula = survival::Surv(time, status) ~ arm, + data = surv_data_2arm, + arm = "arm", + ties = t_method + ) ) ) expect_s3_class(res, "data.frame") From 82e55630ba7ebd1824147af69e9904e547118d66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Szczypi=C5=84ski?= Date: Mon, 25 May 2026 13:56:36 +0200 Subject: [PATCH 02/23] Small fix to work with gg_km --- R/get_cox_pairwise_df.R | 47 +++++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 23 deletions(-) diff --git a/R/get_cox_pairwise_df.R b/R/get_cox_pairwise_df.R index a9a3de7b..96ae7565 100644 --- a/R/get_cox_pairwise_df.R +++ b/R/get_cox_pairwise_df.R @@ -258,67 +258,68 @@ get_cox_pairwise_df <- function( #' #' @description #' Helper function to simulate SAS's `TIES=DISCRETE` option. It expands the data -#' into person-time format, fits a logistic regression model, and calculates +#' into person-time format, fits a logistic regression model, and calculates #' Wald confidence intervals. #' -#' @param formula (`formula`)\cr +#' @param formula (`formula`)\cr #' The original survival formula. -#' @param data (`data.frame`)\cr +#' @param data (`data.frame`)\cr #' The subsetted data containing exactly two arms. #' #' @returns A list mimicking `summary(coxph)` containing a `conf.int` data.frame #' @keywords internal .get_discrete_cox <- function(formula, data) { - # lhs - Surv function - # rhs - arm + covariates - # to allow smooth parsing of the Surv formulation + # Sanitize to allow smooth parsing of the Surv formulation lhs_str <- paste(deparse(formula[[2]]), collapse = " ") rhs_str <- paste(deparse(formula[[3]]), collapse = " ") - - # clean `suvrival::`` from the formula + clean_lhs_str <- sub(".*Surv\\(", "Surv(", lhs_str) clean_formula <- stats::as.formula(paste(clean_lhs_str, "~", rhs_str)) - - # Fetch cut points dynamically avoiding hardcoded indices for robust retrieval + mf <- stats::model.frame(clean_formula, data = data) surv_resp <- stats::model.response(mf) - - status_var <- dimnames(surv_resp)[[2]][2] + + # Extract the actual status variable name directly from the formula. + # Surv() forces its internal matrix column names to "time" and "status", + # so we must bypass it and read the original variable name (e.g., 'is_event'). + lhs_vars <- all.vars(formula[[2]]) + status_var <- lhs_vars[length(lhs_vars)] + event_times <- sort(unique(surv_resp[, 1][surv_resp[, 2] == 1])) - + # Transforming to person-time guarantees proper risk sets for tied intervals data_long <- survival::survSplit( formula = clean_formula, data = data, cut = event_times, - episode = ".time_int" + episode = ".time_int" ) - + glm_formula <- stats::as.formula( paste(status_var, "~", rhs_str, "+ as.factor(.time_int)") ) - + glm_fit <- stats::glm( - glm_formula, - data = data_long, + glm_formula, + data = data_long, family = stats::binomial(link = "logit") ) - + coefs <- stats::coef(glm_fit) wald_ci_log <- stats::confint.default(glm_fit) - + lower_log_ci <- wald_ci_log[, "2.5 %"] upper_log_ci <- wald_ci_log[, "97.5 %"] hr <- exp(coefs) - + conf_int_df <- data.frame( "exp(coef)" = hr, "exp(-coef)" = 1 / hr, "lower .95" = exp(lower_log_ci), "upper .95" = exp(upper_log_ci), row.names = names(coefs), - check.names = FALSE + check.names = FALSE ) - + list(conf.int = conf_int_df) } From 93da9de65cac815fb774084c4fb803f2fa3c438d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Szczypi=C5=84ski?= Date: Mon, 25 May 2026 13:57:37 +0200 Subject: [PATCH 03/23] style --- R/get_cox_pairwise_df.R | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/R/get_cox_pairwise_df.R b/R/get_cox_pairwise_df.R index 96ae7565..dbcaec15 100644 --- a/R/get_cox_pairwise_df.R +++ b/R/get_cox_pairwise_df.R @@ -258,12 +258,12 @@ get_cox_pairwise_df <- function( #' #' @description #' Helper function to simulate SAS's `TIES=DISCRETE` option. It expands the data -#' into person-time format, fits a logistic regression model, and calculates +#' into person-time format, fits a logistic regression model, and calculates #' Wald confidence intervals. #' -#' @param formula (`formula`)\cr +#' @param formula (`formula`)\cr #' The original survival formula. -#' @param data (`data.frame`)\cr +#' @param data (`data.frame`)\cr #' The subsetted data containing exactly two arms. #' #' @returns A list mimicking `summary(coxph)` containing a `conf.int` data.frame @@ -272,54 +272,54 @@ get_cox_pairwise_df <- function( # Sanitize to allow smooth parsing of the Surv formulation lhs_str <- paste(deparse(formula[[2]]), collapse = " ") rhs_str <- paste(deparse(formula[[3]]), collapse = " ") - + clean_lhs_str <- sub(".*Surv\\(", "Surv(", lhs_str) clean_formula <- stats::as.formula(paste(clean_lhs_str, "~", rhs_str)) - + mf <- stats::model.frame(clean_formula, data = data) surv_resp <- stats::model.response(mf) - + # Extract the actual status variable name directly from the formula. - # Surv() forces its internal matrix column names to "time" and "status", + # Surv() forces its internal matrix column names to "time" and "status", # so we must bypass it and read the original variable name (e.g., 'is_event'). lhs_vars <- all.vars(formula[[2]]) status_var <- lhs_vars[length(lhs_vars)] - + event_times <- sort(unique(surv_resp[, 1][surv_resp[, 2] == 1])) - + # Transforming to person-time guarantees proper risk sets for tied intervals data_long <- survival::survSplit( formula = clean_formula, data = data, cut = event_times, - episode = ".time_int" + episode = ".time_int" ) - + glm_formula <- stats::as.formula( paste(status_var, "~", rhs_str, "+ as.factor(.time_int)") ) - + glm_fit <- stats::glm( - glm_formula, - data = data_long, + glm_formula, + data = data_long, family = stats::binomial(link = "logit") ) - + coefs <- stats::coef(glm_fit) wald_ci_log <- stats::confint.default(glm_fit) - + lower_log_ci <- wald_ci_log[, "2.5 %"] upper_log_ci <- wald_ci_log[, "97.5 %"] hr <- exp(coefs) - + conf_int_df <- data.frame( "exp(coef)" = hr, "exp(-coef)" = 1 / hr, "lower .95" = exp(lower_log_ci), "upper .95" = exp(upper_log_ci), row.names = names(coefs), - check.names = FALSE + check.names = FALSE ) - + list(conf.int = conf_int_df) } From e66f180a730847224746047aa8cc2e99163a6a58 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Szczypi=C5=84ski?= Date: Tue, 26 May 2026 10:24:01 +0200 Subject: [PATCH 04/23] Applying reviewer comments --- R/get_cox_pairwise_df.R | 72 ++++++++------ inst/WORDLIST | 1 - man/dot-estimate_p_value.Rd | 25 ----- man/dot-get_discrete_cox.Rd | 24 ----- man/get_cox_pairwise_df.Rd | 12 ++- tests/testthat/test-get_cox_pairwise_df.R | 115 +++++++++++++--------- 6 files changed, 120 insertions(+), 129 deletions(-) delete mode 100644 man/dot-estimate_p_value.Rd delete mode 100644 man/dot-get_discrete_cox.Rd diff --git a/R/get_cox_pairwise_df.R b/R/get_cox_pairwise_df.R index dbcaec15..f56a0313 100644 --- a/R/get_cox_pairwise_df.R +++ b/R/get_cox_pairwise_df.R @@ -36,7 +36,7 @@ #' The columns are: #' \itemize{ #' \item `HR`: The Hazard Ratio formatted to two decimal places. -#' \item `95% CI`: The 95\% confidence interval as `"(lower, upper)"`. +#' \item `95% CI`: The 95% confidence interval as `"(lower, upper)"`. #' \item `p-value ()`: The p-value from the selected `test`, where #' `` is the title-cased test name (e.g., `"p-value (log-rank)"`). #' } @@ -56,17 +56,21 @@ #' @examples #' # Example data setup (assuming 'time' is event time, 'status' #' # is event indicator (1=event), and 'arm' is the treatment group) -#' library(dplyr) # For better data handling +#' # for data handling +#' library(dplyr) +#' # to not specify formula using `survival::Surv` - it is a bad practice and +#' # can lead to unexpected results +#' library(survival) #' #' # Prepare data in a modern dplyr-friendly way -#' surv_data <- survival::lung |> +#' surv_data <- lung |> #' mutate( #' arm = factor(sample(c("A", "B", "C"), n(), replace = TRUE)), #' status = status - 1 # Convert status to 0/1 #' ) |> #' filter(if_all(everything(), ~ !is.na(.))) #' -#' formula <- survival::Surv(time, status) ~ arm +#' formula <- Surv(time, status) ~ arm #' #' # Example 1: Default usage (ties = "exact", test = "log-rank") #' results_default <- get_cox_pairwise_df( @@ -124,6 +128,15 @@ get_cox_pairwise_df <- function( call = get_cli_abort_call() ) } + + formula_str <- paste(deparse(model_formula), collapse = " ") + if (grepl("\\b[a-zA-Z][a-zA-Z0-9.]*::", formula_str)) { + cli::cli_abort( + "{.arg model_formula} must be specified without namespace.", + call = get_cli_abort_call() + ) + } + if (!is.factor(data[[arm]])) { cli::cli_abort( "Column {.arg {data}[[\"{.var {arm}}\"]]} must be a {.cls factor}.", @@ -161,13 +174,11 @@ get_cox_pairwise_df <- function( if (ties == "discrete") { coxph_ans <- .get_discrete_cox(formula = model_formula, data = comp_df) } else { - suppressWarnings( - coxph_ans <- survival::coxph( - formula = model_formula, - data = comp_df, - ties = ties - ) |> summary() - ) + coxph_ans <- survival::coxph( + formula = model_formula, + data = comp_df, + ties = ties + ) |> summary() } log_rank_pvalue <- .estimate_p_value(model_formula, comp_df, test, arm) @@ -209,7 +220,7 @@ get_cox_pairwise_df <- function( #' @param arm (`string`)\cr column name of the arm variable in `data`. #' #' @returns A single numeric p-value. -#' @keywords internal +#' @noRd .estimate_p_value <- function(formula, data, test, arm) { test_type <- switch(test, "log-rank" = "logrank", @@ -222,10 +233,14 @@ get_cox_pairwise_df <- function( ) if (test_type != "lr") { - rlang::check_installed( - "coin", - reason = paste("to run log-rank tests using", test_type, "method") - ) + if (!requireNamespace("coin", quietly = TRUE)) { + cli::cli_abort( + paste( + "The {.pkg coin} package is required to run", + "log-rank tests using the {test_type} method." + ) + ) + } test_result <- coin::logrank_test( formula = formula, @@ -267,16 +282,9 @@ get_cox_pairwise_df <- function( #' The subsetted data containing exactly two arms. #' #' @returns A list mimicking `summary(coxph)` containing a `conf.int` data.frame -#' @keywords internal +#' @noRd .get_discrete_cox <- function(formula, data) { - # Sanitize to allow smooth parsing of the Surv formulation - lhs_str <- paste(deparse(formula[[2]]), collapse = " ") - rhs_str <- paste(deparse(formula[[3]]), collapse = " ") - - clean_lhs_str <- sub(".*Surv\\(", "Surv(", lhs_str) - clean_formula <- stats::as.formula(paste(clean_lhs_str, "~", rhs_str)) - - mf <- stats::model.frame(clean_formula, data = data) + mf <- stats::model.frame(formula, data = data) surv_resp <- stats::model.response(mf) # Extract the actual status variable name directly from the formula. @@ -289,16 +297,24 @@ get_cox_pairwise_df <- function( # Transforming to person-time guarantees proper risk sets for tied intervals data_long <- survival::survSplit( - formula = clean_formula, + formula = formula, data = data, cut = event_times, episode = ".time_int" ) - glm_formula <- stats::as.formula( - paste(status_var, "~", rhs_str, "+ as.factor(.time_int)") + # Construct the update template dynamically using the extracted status variable name + # This creates: status_var ~ . + as.factor(.time_int) + update_pattern <- stats::reformulate( + termlabels = c(".", "as.factor(.time_int)"), + response = status_var ) + # Apply the update to the original formula + # This safely replaces the Surv(...) LHS with the raw status variable, + # retains all RHS covariates, and adds the discrete time intervals. + glm_formula <- stats::update(formula, update_pattern) + glm_fit <- stats::glm( glm_formula, data = data_long, diff --git a/inst/WORDLIST b/inst/WORDLIST index 118175e9..0a17e3c7 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -19,7 +19,6 @@ RMP RMPT RStudio Rua -SAS's SOCs Tidyverse analyte diff --git a/man/dot-estimate_p_value.Rd b/man/dot-estimate_p_value.Rd deleted file mode 100644 index fc640885..00000000 --- a/man/dot-estimate_p_value.Rd +++ /dev/null @@ -1,25 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/get_cox_pairwise_df.R -\name{.estimate_p_value} -\alias{.estimate_p_value} -\title{Estimate p-value for a pairwise survival comparison} -\usage{ -.estimate_p_value(formula, data, test, arm) -} -\arguments{ -\item{formula}{(\code{formula})\cr survival formula, e.g. \code{Surv(time, status) ~ arm}.} - -\item{data}{(\code{data.frame})\cr subset containing exactly two arm levels.} - -\item{test}{(\code{string})\cr test name as accepted by \code{\link[=get_cox_pairwise_df]{get_cox_pairwise_df()}}.} - -\item{arm}{(\code{string})\cr column name of the arm variable in \code{data}.} -} -\value{ -A single numeric p-value. -} -\description{ -Dispatches to \code{coin::logrank_test()} for weighted log-rank variants -or to a likelihood-ratio test via \code{survival::survreg()}. -} -\keyword{internal} diff --git a/man/dot-get_discrete_cox.Rd b/man/dot-get_discrete_cox.Rd deleted file mode 100644 index 6272bcf0..00000000 --- a/man/dot-get_discrete_cox.Rd +++ /dev/null @@ -1,24 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/get_cox_pairwise_df.R -\name{.get_discrete_cox} -\alias{.get_discrete_cox} -\title{Fit a discrete-time Cox model (pooled logistic regression)} -\usage{ -.get_discrete_cox(formula, data) -} -\arguments{ -\item{formula}{(\code{formula})\cr -The original survival formula.} - -\item{data}{(\code{data.frame})\cr -The subsetted data containing exactly two arms.} -} -\value{ -A list mimicking \code{summary(coxph)} containing a \code{conf.int} data.frame -} -\description{ -Helper function to simulate SAS's \code{TIES=DISCRETE} option. It expands the data -into person-time format, fits a logistic regression model, and calculates -Wald confidence intervals. -} -\keyword{internal} diff --git a/man/get_cox_pairwise_df.Rd b/man/get_cox_pairwise_df.Rd index da139e1c..3242f048 100644 --- a/man/get_cox_pairwise_df.Rd +++ b/man/get_cox_pairwise_df.Rd @@ -51,7 +51,7 @@ A \code{data.frame} with one row per comparison arm (stored as rownames). The columns are: \itemize{ \item \code{HR}: The Hazard Ratio formatted to two decimal places. -\item \verb{95\% CI}: The 95\\% confidence interval as \code{"(lower, upper)"}. +\item \verb{95\% CI}: The 95\% confidence interval as \code{"(lower, upper)"}. \item \verb{p-value ()}: The p-value from the selected \code{test}, where \verb{} is the title-cased test name (e.g., \code{"p-value (log-rank)"}). } @@ -75,17 +75,21 @@ to \code{coin::logrank_test()} for weighted log-rank variants or to \examples{ # Example data setup (assuming 'time' is event time, 'status' # is event indicator (1=event), and 'arm' is the treatment group) -library(dplyr) # For better data handling +# for data handling +library(dplyr) +# to not specify formula using `survival::Surv` - it is a bad practice and +# can lead to unexpected results +library(survival) # Prepare data in a modern dplyr-friendly way -surv_data <- survival::lung |> +surv_data <- lung |> mutate( arm = factor(sample(c("A", "B", "C"), n(), replace = TRUE)), status = status - 1 # Convert status to 0/1 ) |> filter(if_all(everything(), ~ !is.na(.))) -formula <- survival::Surv(time, status) ~ arm +formula <- Surv(time, status) ~ arm # Example 1: Default usage (ties = "exact", test = "log-rank") results_default <- get_cox_pairwise_df( diff --git a/tests/testthat/test-get_cox_pairwise_df.R b/tests/testthat/test-get_cox_pairwise_df.R index d18ab18f..12fe02de 100644 --- a/tests/testthat/test-get_cox_pairwise_df.R +++ b/tests/testthat/test-get_cox_pairwise_df.R @@ -1,30 +1,28 @@ skip_if_pkg_not_installed(c("survival", "dplyr", "coin")) -# Setup shared test data +# Setup shared test data using completely different data and formulas set.seed(42) -surv_data_2arm <- survival::lung |> +test_df_2grp <- survival::veteran |> dplyr::mutate( - arm = factor(sample(c("A", "B"), dplyr::n(), replace = TRUE)), - status = status - 1 + treatment = factor(sample(c("DrugX", "Placebo"), dplyr::n(), replace = TRUE)), + event = status ) |> dplyr::filter(dplyr::if_all(dplyr::everything(), ~ !is.na(.))) -surv_data_3arm <- survival::lung |> +test_df_3grp <- survival::veteran |> dplyr::mutate( - arm = factor(sample(c("A", "B", "C"), dplyr::n(), replace = TRUE)), - status = status - 1 + treatment = factor(sample(c("DrugX", "DrugY", "Placebo"), dplyr::n(), replace = TRUE)), + event = status ) |> dplyr::filter(dplyr::if_all(dplyr::everything(), ~ !is.na(.))) test_that("get_cox_pairwise_df() works with two arms", { expect_no_error( - suppressWarnings( - result <- get_cox_pairwise_df( - model_formula = survival::Surv(time, status) ~ arm, - data = surv_data_2arm, - arm = "arm", - ref_group = "A" - ) + result <- get_cox_pairwise_df( + model_formula = Surv(time, event) ~ treatment, + data = test_df_2grp, + arm = "treatment", + ref_group = "Placebo" ) ) expect_s3_class(result, "data.frame") @@ -40,16 +38,17 @@ test_that( expect_no_error( suppressWarnings( result <- get_cox_pairwise_df( - model_formula = survival::Surv(time, status) ~ arm, - data = surv_data_3arm, - arm = "arm", - ref_group = "A" + model_formula = Surv(time, event) ~ treatment, + data = test_df_3grp, + arm = "treatment", + ref_group = "Placebo" ) ) ) expect_s3_class(result, "data.frame") expect_equal(nrow(result), 2L) - expect_equal(rownames(result), c("B", "C")) + # The output rownames should be the non-reference groups + expect_true(all(c("DrugX", "DrugY") %in% rownames(result))) expect_named(result, c("HR", "95% CI", "p-value (log-rank)")) # All hazard ratios and CIs must be non-NA @@ -63,35 +62,37 @@ test_that("get_cox_pairwise_df() uses first factor level as default ref_group", expect_no_error( suppressWarnings( result <- get_cox_pairwise_df( - model_formula = survival::Surv(time, status) ~ arm, - data = surv_data_3arm, - arm = "arm" + model_formula = Surv(time, event) ~ treatment, + data = test_df_3grp, + arm = "treatment" ) ) ) expect_equal(nrow(result), 2L) - # Default ref is "A", so comparisons are "B" and "C" - expect_equal(rownames(result), c("B", "C")) + + # Default ref is the first level, so it should not be in the output rownames + first_lvl <- levels(test_df_3grp$treatment)[1] + expect_false(first_lvl %in% rownames(result)) }) test_that("get_cox_pairwise_df() errors with non-formula model_formula", { expect_error( get_cox_pairwise_df( - model_formula = "Surv(time, status) ~ arm", - data = surv_data_2arm, - arm = "arm" + model_formula = "Surv(time, event) ~ treatment", + data = test_df_2grp, + arm = "treatment" ) ) }) test_that("get_cox_pairwise_df() errors when arm column is not a factor", { - bad_data <- surv_data_2arm - bad_data[["arm"]] <- as.character(bad_data[["arm"]]) + bad_data <- test_df_2grp + bad_data[["treatment"]] <- as.character(bad_data[["treatment"]]) expect_error( get_cox_pairwise_df( - model_formula = survival::Surv(time, status) ~ arm, + model_formula = Surv(time, event) ~ treatment, data = bad_data, - arm = "arm" + arm = "treatment" ) ) }) @@ -102,11 +103,11 @@ test_that(paste0( ), { expect_error( get_cox_pairwise_df( - model_formula = survival::Surv(time, status) ~ arm, - data = surv_data_3arm, - arm = "arm", + model_formula = Surv(time, event) ~ treatment, + data = test_df_3grp, + arm = "treatment", # Passing multiple reference groups to force length > 2 - ref_group = c("A", "B") + ref_group = c("Placebo", "DrugX") ), "must contain exactly 2 arms/groups" ) @@ -119,9 +120,9 @@ test_that("get_cox_pairwise_df() works with all valid 'ties' methods", { expect_no_error( suppressWarnings( res <- get_cox_pairwise_df( - model_formula = survival::Surv(time, status) ~ arm, - data = surv_data_2arm, - arm = "arm", + model_formula = Surv(time, event) ~ treatment, + data = test_df_2grp, + arm = "treatment", ties = t_method ) ) @@ -145,9 +146,9 @@ test_that("get_cox_pairwise_df() works with all valid 'test' methods", { expect_no_error( suppressWarnings( res <- get_cox_pairwise_df( - model_formula = survival::Surv(time, status) ~ arm, - data = surv_data_2arm, - arm = "arm", + model_formula = Surv(time, event) ~ treatment, + data = test_df_2grp, + arm = "treatment", test = t_method ) ) @@ -168,9 +169,9 @@ test_that("get_cox_pairwise_df() works with all valid 'test' methods", { test_that("get_cox_pairwise_df() catches invalid 'ties' and 'test' arguments", { expect_error( get_cox_pairwise_df( - model_formula = survival::Surv(time, status) ~ arm, - data = surv_data_2arm, - arm = "arm", + model_formula = Surv(time, event) ~ treatment, + data = test_df_2grp, + arm = "treatment", ties = "invalid_tie_method" ), "should be one of" @@ -178,11 +179,31 @@ test_that("get_cox_pairwise_df() catches invalid 'ties' and 'test' arguments", { expect_error( get_cox_pairwise_df( - model_formula = survival::Surv(time, status) ~ arm, - data = surv_data_2arm, - arm = "arm", + model_formula = Surv(time, event) ~ treatment, + data = test_df_2grp, + arm = "treatment", test = "invalid_test_method" ), "should be one of" ) }) + +test_that("get_cox_pairwise_df() works for formula with covariates", { + # Added 'karno' as a covariate. + # Likelihood-ratio is utilized because standard coin::logrank_test + # syntax does not support continuous right-hand side covariates. + expect_no_error( + suppressWarnings( + res_covariate <- get_cox_pairwise_df( + model_formula = Surv(time, event) ~ treatment + karno, + data = test_df_2grp, + arm = "treatment", + test = "likelihood-ratio" + ) + ) + ) + + expect_s3_class(res_covariate, "data.frame") + expect_false(anyNA(res_covariate[["HR"]])) + expect_false(anyNA(res_covariate[["p-value (Likelihood-Ratio)"]])) +}) From 59447a46b978f36597b131d1ec20bd3d0933049a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Szczypi=C5=84ski?= Date: Tue, 26 May 2026 10:37:08 +0200 Subject: [PATCH 05/23] docs --- R/get_cox_pairwise_df.R | 2 +- man/get_cox_pairwise_df.Rd | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/get_cox_pairwise_df.R b/R/get_cox_pairwise_df.R index f56a0313..4db995af 100644 --- a/R/get_cox_pairwise_df.R +++ b/R/get_cox_pairwise_df.R @@ -45,7 +45,7 @@ #' data to the current arm and the reference arm, and then: #' \itemize{ #' \item Fits a Cox model using `survival::coxph()`. -#' \item Computes a p-value via [.estimate_p_value()], which dispatches +#' \item Computes a p-value via, which dispatches #' to `coin::logrank_test()` for weighted log-rank variants or to #' `survival::survreg()` for the likelihood-ratio test. #' } diff --git a/man/get_cox_pairwise_df.Rd b/man/get_cox_pairwise_df.Rd index 3242f048..81f1df60 100644 --- a/man/get_cox_pairwise_df.Rd +++ b/man/get_cox_pairwise_df.Rd @@ -67,7 +67,7 @@ The function iterates through each non-reference arm, subsets the data to the current arm and the reference arm, and then: \itemize{ \item Fits a Cox model using \code{survival::coxph()}. -\item Computes a p-value via \code{\link[=.estimate_p_value]{.estimate_p_value()}}, which dispatches +\item Computes a p-value via, which dispatches to \code{coin::logrank_test()} for weighted log-rank variants or to \code{survival::survreg()} for the likelihood-ratio test. } @@ -76,8 +76,8 @@ to \code{coin::logrank_test()} for weighted log-rank variants or to # Example data setup (assuming 'time' is event time, 'status' # is event indicator (1=event), and 'arm' is the treatment group) # for data handling -library(dplyr) -# to not specify formula using `survival::Surv` - it is a bad practice and +library(dplyr) +# to not specify formula using `survival::Surv` - it is a bad practice and # can lead to unexpected results library(survival) From 6900964f907369d5636c51a5d7edd33934a5ea16 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Szczypi=C5=84ski?= Date: Tue, 26 May 2026 10:54:36 +0200 Subject: [PATCH 06/23] Add functionality to allow for strata() in model formula --- R/get_cox_pairwise_df.R | 81 ++++++++++++++++++++--- tests/testthat/test-get_cox_pairwise_df.R | 32 +++++++++ 2 files changed, 105 insertions(+), 8 deletions(-) diff --git a/R/get_cox_pairwise_df.R b/R/get_cox_pairwise_df.R index 4db995af..535582cb 100644 --- a/R/get_cox_pairwise_df.R +++ b/R/get_cox_pairwise_df.R @@ -48,7 +48,7 @@ #' \item Computes a p-value via, which dispatches #' to `coin::logrank_test()` for weighted log-rank variants or to #' `survival::survreg()` for the likelihood-ratio test. -#' } +#' } #' #' @seealso `annotate_gg_km()`, `gg_km()`, `survival::coxph()`, #' `coin::logrank_test()`. @@ -241,23 +241,25 @@ get_cox_pairwise_df <- function( ) ) } + coin_formula <- .rewrite_strata_term(formula, model = "coin") test_result <- coin::logrank_test( - formula = formula, + formula = coin_formula, data = data, type = test_type ) p_value <- as.numeric(coin::pvalue(test_result)) } else { + clean_formula <- .rewrite_strata_term(formula, model = "linear") # SAS LR test assumes an exponential distribution # fit the model - fit_cov <- survival::survreg(formula, data = data, dist = "exponential") + fit_cov <- survival::survreg(clean_formula, data = data, dist = "exponential") # fit the null model drop_arm_formula <- stats::as.formula(paste(". ~ . -", arm)) # to account for possible covariates - null_formula <- stats::update(formula, drop_arm_formula) + null_formula <- stats::update(clean_formula, drop_arm_formula) fit_null <- survival::survreg(null_formula, data = data, dist = "exponential") # calculate the difference and estimate the statistics @@ -284,20 +286,23 @@ get_cox_pairwise_df <- function( #' @returns A list mimicking `summary(coxph)` containing a `conf.int` data.frame #' @noRd .get_discrete_cox <- function(formula, data) { - mf <- stats::model.frame(formula, data = data) + # Flatten strata terms into standard GLM covariates + clean_formula <- .rewrite_strata_term(formula, model = "linear") + + mf <- stats::model.frame(clean_formula, data = data) surv_resp <- stats::model.response(mf) # Extract the actual status variable name directly from the formula. # Surv() forces its internal matrix column names to "time" and "status", # so we must bypass it and read the original variable name (e.g., 'is_event'). - lhs_vars <- all.vars(formula[[2]]) + lhs_vars <- all.vars(clean_formula[[2]]) status_var <- lhs_vars[length(lhs_vars)] event_times <- sort(unique(surv_resp[, 1][surv_resp[, 2] == 1])) # Transforming to person-time guarantees proper risk sets for tied intervals data_long <- survival::survSplit( - formula = formula, + formula = clean_formula, data = data, cut = event_times, episode = ".time_int" @@ -313,7 +318,7 @@ get_cox_pairwise_df <- function( # Apply the update to the original formula # This safely replaces the Surv(...) LHS with the raw status variable, # retains all RHS covariates, and adds the discrete time intervals. - glm_formula <- stats::update(formula, update_pattern) + glm_formula <- stats::update(clean_formula, update_pattern) glm_fit <- stats::glm( glm_formula, @@ -339,3 +344,63 @@ get_cox_pairwise_df <- function( list(conf.int = conf_int_df) } + +#' Rewrite survival formula with strata() for specific models +#' +#' @description +#' Parses a survival formula and safely translates `strata()` wrappers into +#' syntaxes supported by target modeling engines. +#' +#' @param formula (`formula`)\cr The original survival formula. +#' @param model (`string`)\cr The target model: `"coin"` or `"linear"`. +#' +#' @returns A modified `formula` with `strata()` terms properly translated. +#' @noRd +.rewrite_strata_term <- function(formula, model = c("coin", "linear")) { + model <- match.arg(model) + + f_terms <- stats::terms(formula) + term_labels <- attr(f_terms, "term.labels") + + # Safely identify strata calls directly from the right-hand side labels + strata_idx <- grep("^strata\\(", term_labels) + + # If there are no strata terms, return the formula completely untouched + if (length(strata_idx) == 0) { + return(formula) + } + + strata_calls <- term_labels[strata_idx] + non_strata_labels <- term_labels[-strata_idx] + + # Use R's Abstract Syntax Tree (AST) to safely extract strata arguments + strata_vars <- unlist(lapply(strata_calls, function(x) { + sapply(as.list(str2lang(x))[-1], deparse) + })) + + if (model == "coin") { + # coin requires block syntax: response ~ predictors | strata + lhs_terms <- if (length(non_strata_labels) > 0) { + paste(non_strata_labels, collapse = " + ") + } else { + "1" + } + rhs_str <- paste(lhs_terms, "|", paste(strata_vars, collapse = " + ")) + + } else { + # glm treats strata purely as normal fixed-effect covariates + all_labels <- c(non_strata_labels, strata_vars) + rhs_str <- if (length(all_labels) > 0) { + paste(all_labels, collapse = " + ") + } else { + "1" + } + } + + # Rebuild the formula preserving the exact LHS response and original environment + stats::reformulate( + termlabels = rhs_str, + response = formula[[2]], + env = environment(formula) + ) +} diff --git a/tests/testthat/test-get_cox_pairwise_df.R b/tests/testthat/test-get_cox_pairwise_df.R index 12fe02de..4dcd75ae 100644 --- a/tests/testthat/test-get_cox_pairwise_df.R +++ b/tests/testthat/test-get_cox_pairwise_df.R @@ -207,3 +207,35 @@ test_that("get_cox_pairwise_df() works for formula with covariates", { expect_false(anyNA(res_covariate[["HR"]])) expect_false(anyNA(res_covariate[["p-value (Likelihood-Ratio)"]])) }) + +test_that("get_cox_pairwise_df() works for formula with strata()", { + # 1. Test log-rank with strata (uses coin engine) + expect_no_error( + suppressWarnings( + res_strata_lr <- get_cox_pairwise_df( + model_formula = Surv(time, event) ~ treatment + strata(celltype), + data = test_df_2grp, + arm = "treatment", + ties = "efron", + test = "log-rank" + ) + ) + ) + expect_s3_class(res_strata_lr, "data.frame") + expect_false(anyNA(res_strata_lr[["p-value (log-rank)"]])) + + # 2. Test likelihood-ratio with strata (uses updated parametric survreg engine) + expect_no_error( + suppressWarnings( + res_strata_lrt <- get_cox_pairwise_df( + model_formula = Surv(time, event) ~ treatment + strata(celltype), + data = test_df_2grp, + arm = "treatment", + ties = "efron", + test = "likelihood-ratio" + ) + ) + ) + expect_s3_class(res_strata_lrt, "data.frame") + expect_false(anyNA(res_strata_lrt[["p-value (Likelihood-Ratio)"]])) +}) From ed7124b6a610352e1b340264accfedf2fb1c4fa0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Szczypi=C5=84ski?= Date: Tue, 26 May 2026 10:55:01 +0200 Subject: [PATCH 07/23] style --- R/get_cox_pairwise_df.R | 23 +++++++++++------------ tests/testthat/test-get_cox_pairwise_df.R | 4 ++-- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/R/get_cox_pairwise_df.R b/R/get_cox_pairwise_df.R index 535582cb..eca0240d 100644 --- a/R/get_cox_pairwise_df.R +++ b/R/get_cox_pairwise_df.R @@ -348,36 +348,36 @@ get_cox_pairwise_df <- function( #' Rewrite survival formula with strata() for specific models #' #' @description -#' Parses a survival formula and safely translates `strata()` wrappers into +#' Parses a survival formula and safely translates `strata()` wrappers into #' syntaxes supported by target modeling engines. #' #' @param formula (`formula`)\cr The original survival formula. #' @param model (`string`)\cr The target model: `"coin"` or `"linear"`. -#' +#' #' @returns A modified `formula` with `strata()` terms properly translated. #' @noRd .rewrite_strata_term <- function(formula, model = c("coin", "linear")) { model <- match.arg(model) - + f_terms <- stats::terms(formula) term_labels <- attr(f_terms, "term.labels") - + # Safely identify strata calls directly from the right-hand side labels strata_idx <- grep("^strata\\(", term_labels) - + # If there are no strata terms, return the formula completely untouched if (length(strata_idx) == 0) { return(formula) } - + strata_calls <- term_labels[strata_idx] non_strata_labels <- term_labels[-strata_idx] - + # Use R's Abstract Syntax Tree (AST) to safely extract strata arguments strata_vars <- unlist(lapply(strata_calls, function(x) { sapply(as.list(str2lang(x))[-1], deparse) })) - + if (model == "coin") { # coin requires block syntax: response ~ predictors | strata lhs_terms <- if (length(non_strata_labels) > 0) { @@ -386,7 +386,6 @@ get_cox_pairwise_df <- function( "1" } rhs_str <- paste(lhs_terms, "|", paste(strata_vars, collapse = " + ")) - } else { # glm treats strata purely as normal fixed-effect covariates all_labels <- c(non_strata_labels, strata_vars) @@ -396,11 +395,11 @@ get_cox_pairwise_df <- function( "1" } } - + # Rebuild the formula preserving the exact LHS response and original environment stats::reformulate( - termlabels = rhs_str, - response = formula[[2]], + termlabels = rhs_str, + response = formula[[2]], env = environment(formula) ) } diff --git a/tests/testthat/test-get_cox_pairwise_df.R b/tests/testthat/test-get_cox_pairwise_df.R index 4dcd75ae..ca3c5af8 100644 --- a/tests/testthat/test-get_cox_pairwise_df.R +++ b/tests/testthat/test-get_cox_pairwise_df.R @@ -217,7 +217,7 @@ test_that("get_cox_pairwise_df() works for formula with strata()", { data = test_df_2grp, arm = "treatment", ties = "efron", - test = "log-rank" + test = "log-rank" ) ) ) @@ -232,7 +232,7 @@ test_that("get_cox_pairwise_df() works for formula with strata()", { data = test_df_2grp, arm = "treatment", ties = "efron", - test = "likelihood-ratio" + test = "likelihood-ratio" ) ) ) From 3e62d5489520d3b09cd1d72635e46d5938c7b1e8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Szczypi=C5=84ski?= Date: Tue, 26 May 2026 11:46:01 +0200 Subject: [PATCH 08/23] Update functions that depend on get_cox_pairwise_df --- R/get_cox_pairwise_df.R | 5 +---- R/tbl_coxph.R | 4 ++-- man/get_cox_pairwise_df.Rd | 6 ++---- man/tbl_coxph.Rd | 4 ++-- tests/testthat/test-gg_km.R | 3 ++- tests/testthat/test-tbl_coxph.R | 6 +++--- 6 files changed, 12 insertions(+), 16 deletions(-) diff --git a/R/get_cox_pairwise_df.R b/R/get_cox_pairwise_df.R index eca0240d..63b259f9 100644 --- a/R/get_cox_pairwise_df.R +++ b/R/get_cox_pairwise_df.R @@ -53,14 +53,11 @@ #' @seealso `annotate_gg_km()`, `gg_km()`, `survival::coxph()`, #' `coin::logrank_test()`. #' -#' @examples +#' @examplesIf requireNamespace("survival", quietly = TRUE) #' # Example data setup (assuming 'time' is event time, 'status' #' # is event indicator (1=event), and 'arm' is the treatment group) #' # for data handling #' library(dplyr) -#' # to not specify formula using `survival::Surv` - it is a bad practice and -#' # can lead to unexpected results -#' library(survival) #' #' # Prepare data in a modern dplyr-friendly way #' surv_data <- lung |> diff --git a/R/tbl_coxph.R b/R/tbl_coxph.R index d4f46b7c..cb45a0a4 100644 --- a/R/tbl_coxph.R +++ b/R/tbl_coxph.R @@ -18,14 +18,14 @@ #' @examplesIf requireNamespace("survival", quietly = TRUE) && requireNamespace("coin", quietly = TRUE) #' # Setup sample survival data #' library(survival) -#' surv_data <- survival::lung |> +#' surv_data <- lung |> #' dplyr::mutate( #' arm = factor(sample(c("A", "B", "C"), dplyr::n(), replace = TRUE)), #' status = status - 1 #' ) |> #' dplyr::filter(dplyr::if_all(dplyr::everything(), ~ !is.na(.))) #' -#' formula <- survival::Surv(time, status) ~ arm +#' formula <- Surv(time, status) ~ arm #' #' # Generate the pairwise statistics data.frame #' pairwise_results <- get_cox_pairwise_df( diff --git a/man/get_cox_pairwise_df.Rd b/man/get_cox_pairwise_df.Rd index 81f1df60..3bcaaea2 100644 --- a/man/get_cox_pairwise_df.Rd +++ b/man/get_cox_pairwise_df.Rd @@ -73,13 +73,11 @@ to \code{coin::logrank_test()} for weighted log-rank variants or to } } \examples{ +\dontshow{if (requireNamespace("survival", quietly = TRUE)) withAutoprint(\{ # examplesIf} # Example data setup (assuming 'time' is event time, 'status' # is event indicator (1=event), and 'arm' is the treatment group) # for data handling library(dplyr) -# to not specify formula using `survival::Surv` - it is a bad practice and -# can lead to unexpected results -library(survival) # Prepare data in a modern dplyr-friendly way surv_data <- lung |> @@ -121,7 +119,7 @@ results_lr <- get_cox_pairwise_df( test = "likelihood-ratio" ) print(results_lr) - +\dontshow{\}) # examplesIf} } \seealso{ \code{annotate_gg_km()}, \code{gg_km()}, \code{survival::coxph()}, diff --git a/man/tbl_coxph.Rd b/man/tbl_coxph.Rd index da048f1f..670f0fe3 100644 --- a/man/tbl_coxph.Rd +++ b/man/tbl_coxph.Rd @@ -25,14 +25,14 @@ stacked layout where statistics form the rows of the table. \dontshow{if (requireNamespace("survival", quietly = TRUE) && requireNamespace("coin", quietly = TRUE)) withAutoprint(\{ # examplesIf} # Setup sample survival data library(survival) -surv_data <- survival::lung |> +surv_data <- lung |> dplyr::mutate( arm = factor(sample(c("A", "B", "C"), dplyr::n(), replace = TRUE)), status = status - 1 ) |> dplyr::filter(dplyr::if_all(dplyr::everything(), ~ !is.na(.))) -formula <- survival::Surv(time, status) ~ arm +formula <- Surv(time, status) ~ arm # Generate the pairwise statistics data.frame pairwise_results <- get_cox_pairwise_df( diff --git a/tests/testthat/test-gg_km.R b/tests/testthat/test-gg_km.R index b839d384..4808401a 100644 --- a/tests/testthat/test-gg_km.R +++ b/tests/testthat/test-gg_km.R @@ -1,3 +1,4 @@ +skip_if_pkg_not_installed("survival") # pre-processing the km fit anl <- cards::ADTTE |> dplyr::mutate(is_event = CNSR == 0) @@ -9,7 +10,7 @@ anl[[by]] <- factor(anl[[by]], levels = c( )) group_sym <- rlang::sym(by) model_formula <- rlang::new_formula( - lhs = rlang::expr(survival::Surv(AVAL, is_event)), + lhs = rlang::expr(Surv(AVAL, is_event)), rhs = rlang::expr(!!group_sym) ) fit_kmg01 <- survival::survfit(model_formula, anl) diff --git a/tests/testthat/test-tbl_coxph.R b/tests/testthat/test-tbl_coxph.R index 50803e2f..d3faf194 100644 --- a/tests/testthat/test-tbl_coxph.R +++ b/tests/testthat/test-tbl_coxph.R @@ -20,14 +20,14 @@ surv_data_3arm <- survival::lung |> # Generate baseline pairwise dataframes to use in the tests pairwise_df_2arm <- get_cox_pairwise_df( - model_formula = survival::Surv(time, status) ~ arm, + model_formula = Surv(time, status) ~ arm, data = surv_data_2arm, arm = "arm", ref_group = "A" ) pairwise_df_3arm <- get_cox_pairwise_df( - model_formula = survival::Surv(time, status) ~ arm, + model_formula = Surv(time, status) ~ arm, data = surv_data_3arm, arm = "arm", ref_group = "A" @@ -108,7 +108,7 @@ test_that("tbl_coxph() works with only p-value (HR and CI removed)", { test_that("tbl_coxph() dynamically adopts different p-value test labels", { # Generate a pairwise dataframe using a different test (e.g., Wilcoxon) pairwise_wilcoxon <- get_cox_pairwise_df( - model_formula = survival::Surv(time, status) ~ arm, + model_formula = Surv(time, status) ~ arm, data = surv_data_2arm, arm = "arm", test = "gehan-breslow" From 33fa1184a0e96f9947db822c8c2c1502782a7ba0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Szczypi=C5=84ski?= Date: Tue, 26 May 2026 11:57:51 +0200 Subject: [PATCH 09/23] Updating examples --- R/annotate_gg_km.R | 9 +++++---- R/gg_km.R | 9 +++++---- man/annotate_gg_km.Rd | 10 ++++++---- man/gg_km.Rd | 10 ++++++---- 4 files changed, 22 insertions(+), 16 deletions(-) diff --git a/R/annotate_gg_km.R b/R/annotate_gg_km.R index 899acee2..23ee7779 100644 --- a/R/annotate_gg_km.R +++ b/R/annotate_gg_km.R @@ -46,15 +46,16 @@ #' @seealso [gg_km()], [process_survfit()], and [get_cox_pairwise_df()] for #' related functionalities. #' -#' @examples +#' @examplesIf requireNamespace("survival", quietly = TRUE) && requireNamespace("coin", quietly = TRUE) #' # Preparing the Kaplan-Meier Plot -#' use_lung <- survival::lung +#' library(survival) +#' use_lung <- lung #' use_lung$arm <- factor(sample(c("A", "B", "C"), nrow(use_lung), replace = TRUE)) #' use_lung$status <- use_lung$status - 1 # Convert status to 0/1 #' use_lung <- na.omit(use_lung) #' -#' formula <- survival::Surv(time, status) ~ arm -#' fit_kmg01 <- survival::survfit(formula, use_lung) +#' formula <- Surv(time, status) ~ arm +#' fit_kmg01 <- survfit(formula, use_lung) #' surv_plot_data <- process_survfit(fit_kmg01) #' #' plt_kmg01 <- gg_km(surv_plot_data) diff --git a/R/gg_km.R b/R/gg_km.R index 3eaf7340..2198871b 100644 --- a/R/gg_km.R +++ b/R/gg_km.R @@ -28,16 +28,17 @@ NULL #' Data setup assumes `"time"` is event time, `"status"` is event indicator (`1` represents an event), #' while `"arm"` is the treatment group. #' -#' @examples +#' @examplesIf requireNamespace("survival", quietly = TRUE) && requireNamespace("coin", quietly = TRUE) #' # Data preparation for KM plot -#' use_lung <- survival::lung +#' library(survival) +#' use_lung <- lung #' use_lung$arm <- factor(sample(c("A", "B", "C"), nrow(use_lung), replace = TRUE)) #' use_lung$status <- use_lung$status - 1 # Convert status to 0/1 #' use_lung <- na.omit(use_lung) #' #' # Fit Kaplan-Meier model -#' formula <- survival::Surv(time, status) ~ arm -#' fit_kmg01 <- survival::survfit(formula, use_lung) +#' formula <- Surv(time, status) ~ arm +#' fit_kmg01 <- survfit(formula, use_lung) #' #' # Process survfit data for plotting #' surv_plot_data <- process_survfit(fit_kmg01) diff --git a/man/annotate_gg_km.Rd b/man/annotate_gg_km.Rd index 0b13bcf7..a4a73f4f 100644 --- a/man/annotate_gg_km.Rd +++ b/man/annotate_gg_km.Rd @@ -107,18 +107,20 @@ Proportional Hazards summary table as an annotation box. }} \examples{ +\dontshow{if (requireNamespace("survival", quietly = TRUE) && requireNamespace("coin", quietly = TRUE)) withAutoprint(\{ # examplesIf} # Preparing the Kaplan-Meier Plot -use_lung <- survival::lung +library(survival) +use_lung <- lung use_lung$arm <- factor(sample(c("A", "B", "C"), nrow(use_lung), replace = TRUE)) use_lung$status <- use_lung$status - 1 # Convert status to 0/1 use_lung <- na.omit(use_lung) -formula <- survival::Surv(time, status) ~ arm -fit_kmg01 <- survival::survfit(formula, use_lung) +formula <- Surv(time, status) ~ arm +fit_kmg01 <- survfit(formula, use_lung) surv_plot_data <- process_survfit(fit_kmg01) plt_kmg01 <- gg_km(surv_plot_data) - +\dontshow{\}) # examplesIf} # Annotate Plot with Numbers at Risk Table annotate_riskdf(plt_kmg01, fit_kmg01) diff --git a/man/gg_km.Rd b/man/gg_km.Rd index baf45186..8ddbf0d3 100644 --- a/man/gg_km.Rd +++ b/man/gg_km.Rd @@ -92,20 +92,22 @@ support for various customizations like censoring marks, Confidence Intervals ( }} \examples{ +\dontshow{if (requireNamespace("survival", quietly = TRUE) && requireNamespace("coin", quietly = TRUE)) withAutoprint(\{ # examplesIf} # Data preparation for KM plot -use_lung <- survival::lung +library(survival) +use_lung <- lung use_lung$arm <- factor(sample(c("A", "B", "C"), nrow(use_lung), replace = TRUE)) use_lung$status <- use_lung$status - 1 # Convert status to 0/1 use_lung <- na.omit(use_lung) # Fit Kaplan-Meier model -formula <- survival::Surv(time, status) ~ arm -fit_kmg01 <- survival::survfit(formula, use_lung) +formula <- Surv(time, status) ~ arm +fit_kmg01 <- survfit(formula, use_lung) # Process survfit data for plotting surv_plot_data <- process_survfit(fit_kmg01) head(surv_plot_data) - +\dontshow{\}) # examplesIf} # Example of making the KM plot plt_kmg01 <- gg_km(surv_plot_data) From 71b22102ae787480edd259d6c45c7cebeeacf3a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Szczypi=C5=84ski?= Date: Tue, 26 May 2026 12:05:25 +0200 Subject: [PATCH 10/23] more docs --- R/get_cox_pairwise_df.R | 2 +- man/get_cox_pairwise_df.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/get_cox_pairwise_df.R b/R/get_cox_pairwise_df.R index 63b259f9..e392e659 100644 --- a/R/get_cox_pairwise_df.R +++ b/R/get_cox_pairwise_df.R @@ -58,7 +58,7 @@ #' # is event indicator (1=event), and 'arm' is the treatment group) #' # for data handling #' library(dplyr) -#' +#' library(survival) #' # Prepare data in a modern dplyr-friendly way #' surv_data <- lung |> #' mutate( diff --git a/man/get_cox_pairwise_df.Rd b/man/get_cox_pairwise_df.Rd index 3bcaaea2..94293c44 100644 --- a/man/get_cox_pairwise_df.Rd +++ b/man/get_cox_pairwise_df.Rd @@ -78,7 +78,7 @@ to \code{coin::logrank_test()} for weighted log-rank variants or to # is event indicator (1=event), and 'arm' is the treatment group) # for data handling library(dplyr) - +library(survival) # Prepare data in a modern dplyr-friendly way surv_data <- lung |> mutate( From 32c059c2883dccae563bc3784da8b8211bc0dc89 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Szczypi=C5=84ski?= Date: Tue, 26 May 2026 12:16:08 +0200 Subject: [PATCH 11/23] Comments about Wald CIs --- R/get_cox_pairwise_df.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/get_cox_pairwise_df.R b/R/get_cox_pairwise_df.R index e392e659..1f36cb7f 100644 --- a/R/get_cox_pairwise_df.R +++ b/R/get_cox_pairwise_df.R @@ -36,7 +36,7 @@ #' The columns are: #' \itemize{ #' \item `HR`: The Hazard Ratio formatted to two decimal places. -#' \item `95% CI`: The 95% confidence interval as `"(lower, upper)"`. +#' \item `95% CI`: The 95% Wald confidence interval as `"(lower, upper)"`. #' \item `p-value ()`: The p-value from the selected `test`, where #' `` is the title-cased test name (e.g., `"p-value (log-rank)"`). #' } @@ -168,6 +168,7 @@ get_cox_pairwise_df <- function( comp_df[[arm]] <- droplevels(comp_df[[arm]]) + # Wald CIs are calculated with all ties methods if (ties == "discrete") { coxph_ans <- .get_discrete_cox(formula = model_formula, data = comp_df) } else { From 0da95c4b59e9d337b0d0a4428525829fa63226f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Szczypi=C5=84ski?= Date: Tue, 26 May 2026 12:31:40 +0200 Subject: [PATCH 12/23] docs again --- man/get_cox_pairwise_df.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/get_cox_pairwise_df.Rd b/man/get_cox_pairwise_df.Rd index 94293c44..24f9c252 100644 --- a/man/get_cox_pairwise_df.Rd +++ b/man/get_cox_pairwise_df.Rd @@ -51,7 +51,7 @@ A \code{data.frame} with one row per comparison arm (stored as rownames). The columns are: \itemize{ \item \code{HR}: The Hazard Ratio formatted to two decimal places. -\item \verb{95\% CI}: The 95\% confidence interval as \code{"(lower, upper)"}. +\item \verb{95\% CI}: The 95\% Wald confidence interval as \code{"(lower, upper)"}. \item \verb{p-value ()}: The p-value from the selected \code{test}, where \verb{} is the title-cased test name (e.g., \code{"p-value (log-rank)"}). } From 51b1c814f12b224ad68ec3bddb0eab51867361ca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Szczypi=C5=84ski?= Date: Wed, 27 May 2026 10:22:49 +0200 Subject: [PATCH 13/23] Remove `ties = discrete` --- R/get_cox_pairwise_df.R | 111 ++++------------------ man/get_cox_pairwise_df.Rd | 6 +- tests/testthat/test-get_cox_pairwise_df.R | 3 +- 3 files changed, 23 insertions(+), 97 deletions(-) diff --git a/R/get_cox_pairwise_df.R b/R/get_cox_pairwise_df.R index 1f36cb7f..0a38f6e3 100644 --- a/R/get_cox_pairwise_df.R +++ b/R/get_cox_pairwise_df.R @@ -25,8 +25,8 @@ #' selected as the reference group. #' @param ties (`character`)\cr #' A string specifying the method for tie handling in the Cox model. -#' Must be one of "discrete", "exact", "efron", or "breslow". -#' Default is "discrete". +#' Must be one of "exact", "efron", or "breslow". +#' Default is "exact". #' @param test (`character`)\cr #' A string specifying the type of test to compute the p-value. #' Must be one of "log-rank", "gehan-breslow" (wilcoxon), "tarone", "peto", @@ -106,7 +106,7 @@ get_cox_pairwise_df <- function( data, arm, ref_group = NULL, - ties = c("discrete", "exact", "efron", "breslow"), + ties = c("exact", "efron", "breslow"), test = c( "log-rank", "gehan-breslow", @@ -166,18 +166,16 @@ get_cox_pairwise_df <- function( } comp_df <- data[as.character(data[[arm]]) %in% subset_arm, ] - comp_df[[arm]] <- droplevels(comp_df[[arm]]) + # Explicitly relevel the factor so the reference group is ALWAYS first. + # This correctly replaces droplevels() while enforcing the right baseline. + comp_df[[arm]] <- factor(comp_df[[arm]], levels = c(ref_group, current_arm)) # Wald CIs are calculated with all ties methods - if (ties == "discrete") { - coxph_ans <- .get_discrete_cox(formula = model_formula, data = comp_df) - } else { - coxph_ans <- survival::coxph( - formula = model_formula, - data = comp_df, - ties = ties - ) |> summary() - } + coxph_ans <- survival::coxph( + formula = model_formula, + data = comp_df, + ties = ties + ) |> summary() log_rank_pvalue <- .estimate_p_value(model_formula, comp_df, test, arm) @@ -239,7 +237,7 @@ get_cox_pairwise_df <- function( ) ) } - coin_formula <- .rewrite_strata_term(formula, model = "coin") + coin_formula <- .rewrite_strata_term(formula, model = "log-rank") test_result <- coin::logrank_test( formula = coin_formula, @@ -249,7 +247,7 @@ get_cox_pairwise_df <- function( p_value <- as.numeric(coin::pvalue(test_result)) } else { - clean_formula <- .rewrite_strata_term(formula, model = "linear") + clean_formula <- .rewrite_strata_term(formula, model = "likelihood-ratio") # SAS LR test assumes an exponential distribution # fit the model fit_cov <- survival::survreg(clean_formula, data = data, dist = "exponential") @@ -269,80 +267,6 @@ get_cox_pairwise_df <- function( p_value } -#' Fit a discrete-time Cox model (pooled logistic regression) -#' -#' @description -#' Helper function to simulate SAS's `TIES=DISCRETE` option. It expands the data -#' into person-time format, fits a logistic regression model, and calculates -#' Wald confidence intervals. -#' -#' @param formula (`formula`)\cr -#' The original survival formula. -#' @param data (`data.frame`)\cr -#' The subsetted data containing exactly two arms. -#' -#' @returns A list mimicking `summary(coxph)` containing a `conf.int` data.frame -#' @noRd -.get_discrete_cox <- function(formula, data) { - # Flatten strata terms into standard GLM covariates - clean_formula <- .rewrite_strata_term(formula, model = "linear") - - mf <- stats::model.frame(clean_formula, data = data) - surv_resp <- stats::model.response(mf) - - # Extract the actual status variable name directly from the formula. - # Surv() forces its internal matrix column names to "time" and "status", - # so we must bypass it and read the original variable name (e.g., 'is_event'). - lhs_vars <- all.vars(clean_formula[[2]]) - status_var <- lhs_vars[length(lhs_vars)] - - event_times <- sort(unique(surv_resp[, 1][surv_resp[, 2] == 1])) - - # Transforming to person-time guarantees proper risk sets for tied intervals - data_long <- survival::survSplit( - formula = clean_formula, - data = data, - cut = event_times, - episode = ".time_int" - ) - - # Construct the update template dynamically using the extracted status variable name - # This creates: status_var ~ . + as.factor(.time_int) - update_pattern <- stats::reformulate( - termlabels = c(".", "as.factor(.time_int)"), - response = status_var - ) - - # Apply the update to the original formula - # This safely replaces the Surv(...) LHS with the raw status variable, - # retains all RHS covariates, and adds the discrete time intervals. - glm_formula <- stats::update(clean_formula, update_pattern) - - glm_fit <- stats::glm( - glm_formula, - data = data_long, - family = stats::binomial(link = "logit") - ) - - coefs <- stats::coef(glm_fit) - wald_ci_log <- stats::confint.default(glm_fit) - - lower_log_ci <- wald_ci_log[, "2.5 %"] - upper_log_ci <- wald_ci_log[, "97.5 %"] - hr <- exp(coefs) - - conf_int_df <- data.frame( - "exp(coef)" = hr, - "exp(-coef)" = 1 / hr, - "lower .95" = exp(lower_log_ci), - "upper .95" = exp(upper_log_ci), - row.names = names(coefs), - check.names = FALSE - ) - - list(conf.int = conf_int_df) -} - #' Rewrite survival formula with strata() for specific models #' #' @description @@ -350,11 +274,11 @@ get_cox_pairwise_df <- function( #' syntaxes supported by target modeling engines. #' #' @param formula (`formula`)\cr The original survival formula. -#' @param model (`string`)\cr The target model: `"coin"` or `"linear"`. +#' @param model (`string`)\cr The target model: `"log-rank"` or `"likelihood-ratio"`. #' #' @returns A modified `formula` with `strata()` terms properly translated. #' @noRd -.rewrite_strata_term <- function(formula, model = c("coin", "linear")) { +.rewrite_strata_term <- function(formula, model = c("log-rank", "likelihood-ratio")) { model <- match.arg(model) f_terms <- stats::terms(formula) @@ -376,8 +300,9 @@ get_cox_pairwise_df <- function( sapply(as.list(str2lang(x))[-1], deparse) })) - if (model == "coin") { - # coin requires block syntax: response ~ predictors | strata + if (model == "log-rank") { + # coin is used for log-rank and + # requires block syntax: response ~ predictors | strata lhs_terms <- if (length(non_strata_labels) > 0) { paste(non_strata_labels, collapse = " + ") } else { diff --git a/man/get_cox_pairwise_df.Rd b/man/get_cox_pairwise_df.Rd index 24f9c252..e9d494b3 100644 --- a/man/get_cox_pairwise_df.Rd +++ b/man/get_cox_pairwise_df.Rd @@ -9,7 +9,7 @@ get_cox_pairwise_df( data, arm, ref_group = NULL, - ties = c("discrete", "exact", "efron", "breslow"), + ties = c("exact", "efron", "breslow"), test = c("log-rank", "gehan-breslow", "tarone", "peto", "prentice", "fleming-harrington", "likelihood-ratio") ) @@ -38,8 +38,8 @@ selected as the reference group.} \item{ties}{(\code{character})\cr A string specifying the method for tie handling in the Cox model. -Must be one of "discrete", "exact", "efron", or "breslow". -Default is "discrete".} +Must be one of "exact", "efron", or "breslow". +Default is "exact".} \item{test}{(\code{character})\cr A string specifying the type of test to compute the p-value. diff --git a/tests/testthat/test-get_cox_pairwise_df.R b/tests/testthat/test-get_cox_pairwise_df.R index ca3c5af8..0f1d037d 100644 --- a/tests/testthat/test-get_cox_pairwise_df.R +++ b/tests/testthat/test-get_cox_pairwise_df.R @@ -114,7 +114,8 @@ test_that(paste0( }) test_that("get_cox_pairwise_df() works with all valid 'ties' methods", { - ties_methods <- c("discrete", "exact", "efron", "breslow") + # REMOVED "discrete" from this list + ties_methods <- c("exact", "efron", "breslow") for (t_method in ties_methods) { expect_no_error( From 2d09123433c177b60590c241b5cdc7db89e9d0d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Szczypi=C5=84ski?= Date: Wed, 27 May 2026 10:58:05 +0200 Subject: [PATCH 14/23] Apply suggestion from @jszczypinski MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jan Szczypiński --- R/get_cox_pairwise_df.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/get_cox_pairwise_df.R b/R/get_cox_pairwise_df.R index 0a38f6e3..27fe4334 100644 --- a/R/get_cox_pairwise_df.R +++ b/R/get_cox_pairwise_df.R @@ -45,7 +45,7 @@ #' data to the current arm and the reference arm, and then: #' \itemize{ #' \item Fits a Cox model using `survival::coxph()`. -#' \item Computes a p-value via, which dispatches +#' \item Computes a p-value, which dispatches #' to `coin::logrank_test()` for weighted log-rank variants or to #' `survival::survreg()` for the likelihood-ratio test. #' } From e8ff931d4bb4c0ccee79da8f674c96eab90c4b0c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Szczypi=C5=84ski?= Date: Wed, 27 May 2026 10:59:17 +0200 Subject: [PATCH 15/23] Apply suggestion from @jszczypinski MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jan Szczypiński --- R/get_cox_pairwise_df.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/get_cox_pairwise_df.R b/R/get_cox_pairwise_df.R index 27fe4334..e86c03bc 100644 --- a/R/get_cox_pairwise_df.R +++ b/R/get_cox_pairwise_df.R @@ -53,7 +53,7 @@ #' @seealso `annotate_gg_km()`, `gg_km()`, `survival::coxph()`, #' `coin::logrank_test()`. #' -#' @examplesIf requireNamespace("survival", quietly = TRUE) +#' @examplesIf requireNamespace("survival", quietly = TRUE) && requireNamespace("coin", quietly = TRUE) #' # Example data setup (assuming 'time' is event time, 'status' #' # is event indicator (1=event), and 'arm' is the treatment group) #' # for data handling From f482bfa2cfb409f45a33a67faa759bb3767309b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Szczypi=C5=84ski?= Date: Wed, 27 May 2026 11:00:16 +0200 Subject: [PATCH 16/23] Apply suggestion from @jszczypinski MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jan Szczypiński --- tests/testthat/test-get_cox_pairwise_df.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-get_cox_pairwise_df.R b/tests/testthat/test-get_cox_pairwise_df.R index 0f1d037d..8f589a9b 100644 --- a/tests/testthat/test-get_cox_pairwise_df.R +++ b/tests/testthat/test-get_cox_pairwise_df.R @@ -1,6 +1,6 @@ skip_if_pkg_not_installed(c("survival", "dplyr", "coin")) -# Setup shared test data using completely different data and formulas +# Setup shared test data set.seed(42) test_df_2grp <- survival::veteran |> dplyr::mutate( From 39054ae6a051515d111491003761255deeaa88ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Szczypi=C5=84ski?= Date: Wed, 27 May 2026 12:28:12 +0200 Subject: [PATCH 17/23] Changes to cox ph p value calculation --- R/get_cox_pairwise_df.R | 156 ++++++++++++++++------ man/get_cox_pairwise_df.Rd | 4 +- tests/testthat/test-get_cox_pairwise_df.R | 62 +++++++-- 3 files changed, 165 insertions(+), 57 deletions(-) diff --git a/R/get_cox_pairwise_df.R b/R/get_cox_pairwise_df.R index e86c03bc..67c02717 100644 --- a/R/get_cox_pairwise_df.R +++ b/R/get_cox_pairwise_df.R @@ -47,7 +47,7 @@ #' \item Fits a Cox model using `survival::coxph()`. #' \item Computes a p-value, which dispatches #' to `coin::logrank_test()` for weighted log-rank variants or to -#' `survival::survreg()` for the likelihood-ratio test. +#' a nested `survival::coxph()` LRT for the likelihood-ratio test. #' } #' #' @seealso `annotate_gg_km()`, `gg_km()`, `survival::coxph()`, @@ -118,6 +118,7 @@ get_cox_pairwise_df <- function( ) ) { set_cli_abort_call() + # Input checks if (!rlang::is_formula(model_formula)) { cli::cli_abort( @@ -164,10 +165,10 @@ get_cox_pairwise_df <- function( call = get_cli_abort_call() ) } + comp_df <- data[as.character(data[[arm]]) %in% subset_arm, ] # Explicitly relevel the factor so the reference group is ALWAYS first. - # This correctly replaces droplevels() while enforcing the right baseline. comp_df[[arm]] <- factor(comp_df[[arm]], levels = c(ref_group, current_arm)) # Wald CIs are calculated with all ties methods @@ -177,7 +178,8 @@ get_cox_pairwise_df <- function( ties = ties ) |> summary() - log_rank_pvalue <- .estimate_p_value(model_formula, comp_df, test, arm) + # Pass the 'ties' argument down so the LRT matches the main Cox estimate + log_rank_pvalue <- .estimate_p_value(model_formula, comp_df, test, arm, ties) conf_int_row <- paste0(arm, current_arm) current_row <- data.frame( @@ -208,16 +210,17 @@ get_cox_pairwise_df <- function( #' Estimate p-value for a pairwise survival comparison #' #' Dispatches to `coin::logrank_test()` for weighted log-rank variants -#' or to a likelihood-ratio test via `survival::survreg()`. +#' or to a nested likelihood-ratio test via `survival::coxph()`. #' #' @param formula (`formula`)\cr survival formula, e.g. `Surv(time, status) ~ arm`. #' @param data (`data.frame`)\cr subset containing exactly two arm levels. #' @param test (`string`)\cr test name as accepted by [get_cox_pairwise_df()]. #' @param arm (`string`)\cr column name of the arm variable in `data`. +#' @param ties (`character`)\cr tie handling method for the Cox LRT. #' #' @returns A single numeric p-value. #' @noRd -.estimate_p_value <- function(formula, data, test, arm) { +.estimate_p_value <- function(formula, data, test, arm, ties) { test_type <- switch(test, "log-rank" = "logrank", "gehan-breslow" = "Gehan-Breslow", @@ -237,7 +240,8 @@ get_cox_pairwise_df <- function( ) ) } - coin_formula <- .rewrite_strata_term(formula, model = "log-rank") + + coin_formula <- .check_and_rewrite_formula(formula, arm) test_result <- coin::logrank_test( formula = coin_formula, @@ -246,22 +250,22 @@ get_cox_pairwise_df <- function( ) p_value <- as.numeric(coin::pvalue(test_result)) + } else { - clean_formula <- .rewrite_strata_term(formula, model = "likelihood-ratio") - # SAS LR test assumes an exponential distribution - # fit the model - fit_cov <- survival::survreg(clean_formula, data = data, dist = "exponential") + # Fit the full Cox model + fit_full <- survival::coxph(formula, data = data, ties = ties) - # fit the null model - drop_arm_formula <- stats::as.formula(paste(". ~ . -", arm)) - # to account for possible covariates - null_formula <- stats::update(clean_formula, drop_arm_formula) - fit_null <- survival::survreg(null_formula, data = data, dist = "exponential") - - # calculate the difference and estimate the statistics - lrt_stat <- 2 * (fit_cov$loglik[2] - fit_null$loglik[2]) - df <- fit_cov$df - fit_null$df - p_value <- stats::pchisq(lrt_stat, df, lower.tail = FALSE) + # Safely create the reduced formula by dropping the arm variable + reduced_formula <- stats::update(formula, paste(". ~ . -", arm)) + + # Fit the reduced model explicitly to avoid drop1 environment scope errors + fit_reduced <- survival::coxph(reduced_formula, data = data, ties = ties) + + # Execute the nested Likelihood-Ratio Test + anova_res <- stats::anova(fit_reduced, fit_full, test = "Chisq") + + # Extract the p-value for the second row (the full model comparison) + p_value <- anova_res[["Pr(>|Chi|)"]][[2]] } p_value @@ -274,50 +278,118 @@ get_cox_pairwise_df <- function( #' syntaxes supported by target modeling engines. #' #' @param formula (`formula`)\cr The original survival formula. -#' @param model (`string`)\cr The target model: `"log-rank"` or `"likelihood-ratio"`. +#' @param arm (`character`)\cr The name of the primary arm variable. #' #' @returns A modified `formula` with `strata()` terms properly translated. #' @noRd -.rewrite_strata_term <- function(formula, model = c("log-rank", "likelihood-ratio")) { - model <- match.arg(model) - +.check_and_rewrite_formula <- function(formula, arm) { f_terms <- stats::terms(formula) term_labels <- attr(f_terms, "term.labels") # Safely identify strata calls directly from the right-hand side labels strata_idx <- grep("^strata\\(", term_labels) + strata_calls <- term_labels[strata_idx] + + # --- 1. GUARDRAIL VALIDATION --- + valid_terms <- c(arm, strata_calls) + invalid_terms <- setdiff(term_labels, valid_terms) + + if (length(invalid_terms) > 0) { + cli::cli_abort( + paste( + "This log-rank test method does not support covariate adjustment", + "for: {.var {invalid_terms}}.", + "Please use stratification (e.g., {.code ~ {arm} + strata(var)})", + "or switch to {.code test = 'likelihood-ratio'}." + ) + ) + } # If there are no strata terms, return the formula completely untouched if (length(strata_idx) == 0) { return(formula) } + # Use R's Abstract Syntax Tree (AST) to safely extract strata arguments + strata_vars <- unlist(lapply(strata_calls, function(x) { + sapply(as.list(str2lang(x))[-1], deparse) + })) + + lhs_terms <- arm + + # If there are multiple strata variables, wrap them in interaction() + # If single, force as.factor() to satisfy coin::logrank_test requirements + if (length(strata_vars) > 1) { + block_str <- paste0("interaction(", paste(strata_vars, collapse = ", "), ")") + } else { + block_str <- paste0("as.factor(", strata_vars[1], ")") + } + + rhs_str <- paste(lhs_terms, "|", block_str) + + # Rebuild the formula preserving the exact LHS response and original environment + stats::reformulate( + termlabels = rhs_str, + response = formula[[2]], + env = environment(formula) + ) +} + +#' Rewrite survival formula with strata() for specific models +#' +#' @description +#' Parses a survival formula and safely translates `strata()` wrappers into +#' syntaxes supported by target modeling engines. +#' +#' @param formula (`formula`)\cr The original survival formula. +#' @param arm (`character`)\cr The name of the primary arm variable. +#' +#' @returns A modified `formula` with `strata()` terms properly translated. +#' @noRd +.check_and_rewrite_formula <- function(formula, arm) { + f_terms <- stats::terms(formula) + term_labels <- attr(f_terms, "term.labels") + + # Safely identify strata calls directly from the right-hand side labels + strata_idx <- grep("^strata\\(", term_labels) strata_calls <- term_labels[strata_idx] - non_strata_labels <- term_labels[-strata_idx] + + # --- 1. GUARDRAIL VALIDATION --- + valid_terms <- c(arm, strata_calls) + invalid_terms <- setdiff(term_labels, valid_terms) + + if (length(invalid_terms) > 0) { + cli::cli_abort( + paste( + "This log-rank test method does not support covariate adjustment", + "for: {.var {invalid_terms}}.", + "Please use stratification (e.g., {.code ~ {arm} + strata(var)})", + "or switch to {.code test = 'likelihood-ratio'}." + ) + ) + } + + # If there are no strata terms, return the formula completely untouched + if (length(strata_idx) == 0) { + return(formula) + } # Use R's Abstract Syntax Tree (AST) to safely extract strata arguments strata_vars <- unlist(lapply(strata_calls, function(x) { sapply(as.list(str2lang(x))[-1], deparse) })) - if (model == "log-rank") { - # coin is used for log-rank and - # requires block syntax: response ~ predictors | strata - lhs_terms <- if (length(non_strata_labels) > 0) { - paste(non_strata_labels, collapse = " + ") - } else { - "1" - } - rhs_str <- paste(lhs_terms, "|", paste(strata_vars, collapse = " + ")) + lhs_terms <- arm + + # If there are multiple strata variables, wrap them in interaction() + # If single, force as.factor() to satisfy coin::logrank_test requirements + if (length(strata_vars) > 1) { + block_str <- paste0("interaction(", paste(strata_vars, collapse = ", "), ")") } else { - # glm treats strata purely as normal fixed-effect covariates - all_labels <- c(non_strata_labels, strata_vars) - rhs_str <- if (length(all_labels) > 0) { - paste(all_labels, collapse = " + ") - } else { - "1" - } + block_str <- paste0("as.factor(", strata_vars[1], ")") } + + rhs_str <- paste(lhs_terms, "|", block_str) # Rebuild the formula preserving the exact LHS response and original environment stats::reformulate( @@ -325,4 +397,4 @@ get_cox_pairwise_df <- function( response = formula[[2]], env = environment(formula) ) -} +} \ No newline at end of file diff --git a/man/get_cox_pairwise_df.Rd b/man/get_cox_pairwise_df.Rd index e9d494b3..a160d412 100644 --- a/man/get_cox_pairwise_df.Rd +++ b/man/get_cox_pairwise_df.Rd @@ -67,9 +67,9 @@ The function iterates through each non-reference arm, subsets the data to the current arm and the reference arm, and then: \itemize{ \item Fits a Cox model using \code{survival::coxph()}. -\item Computes a p-value via, which dispatches +\item Computes a p-value, which dispatches to \code{coin::logrank_test()} for weighted log-rank variants or to -\code{survival::survreg()} for the likelihood-ratio test. +a nested \code{survival::coxph()} LRT for the likelihood-ratio test. } } \examples{ diff --git a/tests/testthat/test-get_cox_pairwise_df.R b/tests/testthat/test-get_cox_pairwise_df.R index 8f589a9b..5e8b2c89 100644 --- a/tests/testthat/test-get_cox_pairwise_df.R +++ b/tests/testthat/test-get_cox_pairwise_df.R @@ -114,7 +114,6 @@ test_that(paste0( }) test_that("get_cox_pairwise_df() works with all valid 'ties' methods", { - # REMOVED "discrete" from this list ties_methods <- c("exact", "efron", "breslow") for (t_method in ties_methods) { @@ -189,10 +188,32 @@ test_that("get_cox_pairwise_df() catches invalid 'ties' and 'test' arguments", { ) }) -test_that("get_cox_pairwise_df() works for formula with covariates", { - # Added 'karno' as a covariate. - # Likelihood-ratio is utilized because standard coin::logrank_test - # syntax does not support continuous right-hand side covariates. +test_that("get_cox_pairwise_df() enforces guardrail against covariates for non-parametric tests", { + # coin log-rank should reject formulas with continuous adjustment covariates + expect_error( + get_cox_pairwise_df( + model_formula = Surv(time, event) ~ treatment + karno, + data = test_df_2grp, + arm = "treatment", + test = "log-rank" + ), + "does not support covariate adjustment for: `karno`" + ) + + # coin log-rank should reject even if strata is present alongside a covariate + expect_error( + get_cox_pairwise_df( + model_formula = Surv(time, event) ~ treatment + strata(celltype) + age, + data = test_df_2grp, + arm = "treatment", + test = "tarone" + ), + "does not support covariate adjustment for: `age`" + ) +}) + +test_that("get_cox_pairwise_df() works for formula with covariates via likelihood-ratio", { + # Likelihood-ratio natively handles right-hand side continuous covariates expect_no_error( suppressWarnings( res_covariate <- get_cox_pairwise_df( @@ -209,12 +230,12 @@ test_that("get_cox_pairwise_df() works for formula with covariates", { expect_false(anyNA(res_covariate[["p-value (Likelihood-Ratio)"]])) }) -test_that("get_cox_pairwise_df() works for formula with strata()", { - # 1. Test log-rank with strata (uses coin engine) +test_that("get_cox_pairwise_df() works for formula with complex strata()", { + # 1. Test log-rank with a single strata variable (uses coin engine) expect_no_error( suppressWarnings( res_strata_lr <- get_cox_pairwise_df( - model_formula = Surv(time, event) ~ treatment + strata(celltype), + model_formula = Surv(time, event) ~ treatment + strata(prior), data = test_df_2grp, arm = "treatment", ties = "efron", @@ -225,18 +246,33 @@ test_that("get_cox_pairwise_df() works for formula with strata()", { expect_s3_class(res_strata_lr, "data.frame") expect_false(anyNA(res_strata_lr[["p-value (log-rank)"]])) - # 2. Test likelihood-ratio with strata (uses updated parametric survreg engine) + # 2. Test log-rank with multiple strata variables inside a single strata() call + # We use 'prior' and 'trt' (2 levels each) to avoid <2 observation block sparsity expect_no_error( suppressWarnings( - res_strata_lrt <- get_cox_pairwise_df( - model_formula = Surv(time, event) ~ treatment + strata(celltype), + res_strata_multi <- get_cox_pairwise_df( + model_formula = Surv(time, event) ~ treatment + strata(prior, trt), data = test_df_2grp, arm = "treatment", ties = "efron", + test = "log-rank" + ) + ) + ) + expect_s3_class(res_strata_multi, "data.frame") + expect_false(anyNA(res_strata_multi[["p-value (log-rank)"]])) + + # 3. Ensure the likelihood-ratio test properly handles complex strata natively + expect_no_error( + suppressWarnings( + res_strata_cox <- get_cox_pairwise_df( + model_formula = Surv(time, event) ~ treatment + strata(prior) + karno, + data = test_df_2grp, + arm = "treatment", test = "likelihood-ratio" ) ) ) - expect_s3_class(res_strata_lrt, "data.frame") - expect_false(anyNA(res_strata_lrt[["p-value (Likelihood-Ratio)"]])) + expect_s3_class(res_strata_cox, "data.frame") + expect_false(anyNA(res_strata_cox[["p-value (Likelihood-Ratio)"]])) }) From 822e50b6239fe06de11e59f4ce97162e5dc81c9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Szczypi=C5=84ski?= Date: Wed, 27 May 2026 12:30:35 +0200 Subject: [PATCH 18/23] Docs + style --- R/get_cox_pairwise_df.R | 33 +++++++++++------------ man/get_cox_pairwise_df.Rd | 2 +- tests/testthat/test-get_cox_pairwise_df.R | 6 ++--- 3 files changed, 20 insertions(+), 21 deletions(-) diff --git a/R/get_cox_pairwise_df.R b/R/get_cox_pairwise_df.R index 67c02717..c64bf59e 100644 --- a/R/get_cox_pairwise_df.R +++ b/R/get_cox_pairwise_df.R @@ -118,7 +118,7 @@ get_cox_pairwise_df <- function( ) ) { set_cli_abort_call() - + # Input checks if (!rlang::is_formula(model_formula)) { cli::cli_abort( @@ -165,7 +165,7 @@ get_cox_pairwise_df <- function( call = get_cli_abort_call() ) } - + comp_df <- data[as.character(data[[arm]]) %in% subset_arm, ] # Explicitly relevel the factor so the reference group is ALWAYS first. @@ -240,7 +240,7 @@ get_cox_pairwise_df <- function( ) ) } - + coin_formula <- .check_and_rewrite_formula(formula, arm) test_result <- coin::logrank_test( @@ -250,20 +250,19 @@ get_cox_pairwise_df <- function( ) p_value <- as.numeric(coin::pvalue(test_result)) - } else { - # Fit the full Cox model + # Fit the full Cox model fit_full <- survival::coxph(formula, data = data, ties = ties) # Safely create the reduced formula by dropping the arm variable reduced_formula <- stats::update(formula, paste(". ~ . -", arm)) - + # Fit the reduced model explicitly to avoid drop1 environment scope errors fit_reduced <- survival::coxph(reduced_formula, data = data, ties = ties) - + # Execute the nested Likelihood-Ratio Test anova_res <- stats::anova(fit_reduced, fit_full, test = "Chisq") - + # Extract the p-value for the second row (the full model comparison) p_value <- anova_res[["Pr(>|Chi|)"]][[2]] } @@ -289,11 +288,11 @@ get_cox_pairwise_df <- function( # Safely identify strata calls directly from the right-hand side labels strata_idx <- grep("^strata\\(", term_labels) strata_calls <- term_labels[strata_idx] - + # --- 1. GUARDRAIL VALIDATION --- valid_terms <- c(arm, strata_calls) invalid_terms <- setdiff(term_labels, valid_terms) - + if (length(invalid_terms) > 0) { cli::cli_abort( paste( @@ -316,7 +315,7 @@ get_cox_pairwise_df <- function( })) lhs_terms <- arm - + # If there are multiple strata variables, wrap them in interaction() # If single, force as.factor() to satisfy coin::logrank_test requirements if (length(strata_vars) > 1) { @@ -324,7 +323,7 @@ get_cox_pairwise_df <- function( } else { block_str <- paste0("as.factor(", strata_vars[1], ")") } - + rhs_str <- paste(lhs_terms, "|", block_str) # Rebuild the formula preserving the exact LHS response and original environment @@ -353,11 +352,11 @@ get_cox_pairwise_df <- function( # Safely identify strata calls directly from the right-hand side labels strata_idx <- grep("^strata\\(", term_labels) strata_calls <- term_labels[strata_idx] - + # --- 1. GUARDRAIL VALIDATION --- valid_terms <- c(arm, strata_calls) invalid_terms <- setdiff(term_labels, valid_terms) - + if (length(invalid_terms) > 0) { cli::cli_abort( paste( @@ -380,7 +379,7 @@ get_cox_pairwise_df <- function( })) lhs_terms <- arm - + # If there are multiple strata variables, wrap them in interaction() # If single, force as.factor() to satisfy coin::logrank_test requirements if (length(strata_vars) > 1) { @@ -388,7 +387,7 @@ get_cox_pairwise_df <- function( } else { block_str <- paste0("as.factor(", strata_vars[1], ")") } - + rhs_str <- paste(lhs_terms, "|", block_str) # Rebuild the formula preserving the exact LHS response and original environment @@ -397,4 +396,4 @@ get_cox_pairwise_df <- function( response = formula[[2]], env = environment(formula) ) -} \ No newline at end of file +} diff --git a/man/get_cox_pairwise_df.Rd b/man/get_cox_pairwise_df.Rd index a160d412..49bbeaeb 100644 --- a/man/get_cox_pairwise_df.Rd +++ b/man/get_cox_pairwise_df.Rd @@ -73,7 +73,7 @@ a nested \code{survival::coxph()} LRT for the likelihood-ratio test. } } \examples{ -\dontshow{if (requireNamespace("survival", quietly = TRUE)) withAutoprint(\{ # examplesIf} +\dontshow{if (requireNamespace("survival", quietly = TRUE) && requireNamespace("coin", quietly = TRUE)) withAutoprint(\{ # examplesIf} # Example data setup (assuming 'time' is event time, 'status' # is event indicator (1=event), and 'arm' is the treatment group) # for data handling diff --git a/tests/testthat/test-get_cox_pairwise_df.R b/tests/testthat/test-get_cox_pairwise_df.R index 5e8b2c89..13de7dae 100644 --- a/tests/testthat/test-get_cox_pairwise_df.R +++ b/tests/testthat/test-get_cox_pairwise_df.R @@ -1,6 +1,6 @@ skip_if_pkg_not_installed(c("survival", "dplyr", "coin")) -# Setup shared test data +# Setup shared test data set.seed(42) test_df_2grp <- survival::veteran |> dplyr::mutate( @@ -199,7 +199,7 @@ test_that("get_cox_pairwise_df() enforces guardrail against covariates for non-p ), "does not support covariate adjustment for: `karno`" ) - + # coin log-rank should reject even if strata is present alongside a covariate expect_error( get_cox_pairwise_df( @@ -261,7 +261,7 @@ test_that("get_cox_pairwise_df() works for formula with complex strata()", { ) expect_s3_class(res_strata_multi, "data.frame") expect_false(anyNA(res_strata_multi[["p-value (log-rank)"]])) - + # 3. Ensure the likelihood-ratio test properly handles complex strata natively expect_no_error( suppressWarnings( From 56145036dfa0af3746ee1c8b90eae57e511f5546 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Szczypi=C5=84ski?= Date: Wed, 27 May 2026 12:33:52 +0200 Subject: [PATCH 19/23] remove duplicated helper --- R/get_cox_pairwise_df.R | 64 ----------------------------------------- 1 file changed, 64 deletions(-) diff --git a/R/get_cox_pairwise_df.R b/R/get_cox_pairwise_df.R index c64bf59e..6a40596b 100644 --- a/R/get_cox_pairwise_df.R +++ b/R/get_cox_pairwise_df.R @@ -333,67 +333,3 @@ get_cox_pairwise_df <- function( env = environment(formula) ) } - -#' Rewrite survival formula with strata() for specific models -#' -#' @description -#' Parses a survival formula and safely translates `strata()` wrappers into -#' syntaxes supported by target modeling engines. -#' -#' @param formula (`formula`)\cr The original survival formula. -#' @param arm (`character`)\cr The name of the primary arm variable. -#' -#' @returns A modified `formula` with `strata()` terms properly translated. -#' @noRd -.check_and_rewrite_formula <- function(formula, arm) { - f_terms <- stats::terms(formula) - term_labels <- attr(f_terms, "term.labels") - - # Safely identify strata calls directly from the right-hand side labels - strata_idx <- grep("^strata\\(", term_labels) - strata_calls <- term_labels[strata_idx] - - # --- 1. GUARDRAIL VALIDATION --- - valid_terms <- c(arm, strata_calls) - invalid_terms <- setdiff(term_labels, valid_terms) - - if (length(invalid_terms) > 0) { - cli::cli_abort( - paste( - "This log-rank test method does not support covariate adjustment", - "for: {.var {invalid_terms}}.", - "Please use stratification (e.g., {.code ~ {arm} + strata(var)})", - "or switch to {.code test = 'likelihood-ratio'}." - ) - ) - } - - # If there are no strata terms, return the formula completely untouched - if (length(strata_idx) == 0) { - return(formula) - } - - # Use R's Abstract Syntax Tree (AST) to safely extract strata arguments - strata_vars <- unlist(lapply(strata_calls, function(x) { - sapply(as.list(str2lang(x))[-1], deparse) - })) - - lhs_terms <- arm - - # If there are multiple strata variables, wrap them in interaction() - # If single, force as.factor() to satisfy coin::logrank_test requirements - if (length(strata_vars) > 1) { - block_str <- paste0("interaction(", paste(strata_vars, collapse = ", "), ")") - } else { - block_str <- paste0("as.factor(", strata_vars[1], ")") - } - - rhs_str <- paste(lhs_terms, "|", block_str) - - # Rebuild the formula preserving the exact LHS response and original environment - stats::reformulate( - termlabels = rhs_str, - response = formula[[2]], - env = environment(formula) - ) -} From b51db835338c4c6ca966fc3a85e83d4389c920e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Szczypi=C5=84ski?= Date: Wed, 27 May 2026 14:17:35 +0200 Subject: [PATCH 20/23] Changes based on a review --- R/annotate_gg_km.R | 2 +- R/get_cox_pairwise_df.R | 10 ++-------- R/gg_km.R | 2 +- R/import-standalone-checks.R | 13 +++++++++++++ R/tbl_coxph.R | 2 +- man/annotate_gg_km.Rd | 2 +- man/get_cox_pairwise_df.Rd | 2 +- man/gg_km.Rd | 2 +- man/tbl_coxph.Rd | 2 +- 9 files changed, 22 insertions(+), 15 deletions(-) diff --git a/R/annotate_gg_km.R b/R/annotate_gg_km.R index 23ee7779..16b79ca5 100644 --- a/R/annotate_gg_km.R +++ b/R/annotate_gg_km.R @@ -46,7 +46,7 @@ #' @seealso [gg_km()], [process_survfit()], and [get_cox_pairwise_df()] for #' related functionalities. #' -#' @examplesIf requireNamespace("survival", quietly = TRUE) && requireNamespace("coin", quietly = TRUE) +#' @examplesIf requireNamespace("coin", quietly = TRUE) #' # Preparing the Kaplan-Meier Plot #' library(survival) #' use_lung <- lung diff --git a/R/get_cox_pairwise_df.R b/R/get_cox_pairwise_df.R index 6a40596b..1fc5f0e1 100644 --- a/R/get_cox_pairwise_df.R +++ b/R/get_cox_pairwise_df.R @@ -53,7 +53,7 @@ #' @seealso `annotate_gg_km()`, `gg_km()`, `survival::coxph()`, #' `coin::logrank_test()`. #' -#' @examplesIf requireNamespace("survival", quietly = TRUE) && requireNamespace("coin", quietly = TRUE) +#' @examplesIf requireNamespace("coin", quietly = TRUE) #' # Example data setup (assuming 'time' is event time, 'status' #' # is event indicator (1=event), and 'arm' is the treatment group) #' # for data handling @@ -127,13 +127,7 @@ get_cox_pairwise_df <- function( ) } - formula_str <- paste(deparse(model_formula), collapse = " ") - if (grepl("\\b[a-zA-Z][a-zA-Z0-9.]*::", formula_str)) { - cli::cli_abort( - "{.arg model_formula} must be specified without namespace.", - call = get_cli_abort_call() - ) - } + check_formula_for_namespace(model_formula) if (!is.factor(data[[arm]])) { cli::cli_abort( diff --git a/R/gg_km.R b/R/gg_km.R index 2198871b..8d12ab3f 100644 --- a/R/gg_km.R +++ b/R/gg_km.R @@ -28,7 +28,7 @@ NULL #' Data setup assumes `"time"` is event time, `"status"` is event indicator (`1` represents an event), #' while `"arm"` is the treatment group. #' -#' @examplesIf requireNamespace("survival", quietly = TRUE) && requireNamespace("coin", quietly = TRUE) +#' @examplesIf requireNamespace("coin", quietly = TRUE) #' # Data preparation for KM plot #' library(survival) #' use_lung <- lung diff --git a/R/import-standalone-checks.R b/R/import-standalone-checks.R index 7d94634d..4d4f1dde 100644 --- a/R/import-standalone-checks.R +++ b/R/import-standalone-checks.R @@ -604,5 +604,18 @@ check_numeric <- function(x, invisible(x) } +#' Check if formula contains namespace +#' @keywords internal +#' @noRd +check_formula_for_namespace <- function(formula) { + formula_str <- paste(deparse(model_formula), collapse = " ") + if (grepl("\\b[a-zA-Z][a-zA-Z0-9.]*::", formula_str)) { + cli::cli_abort( + "{.arg model_formula} must be specified without namespace.", + call = get_cli_abort_call() + ) + } +} + # nocov end # styler: on diff --git a/R/tbl_coxph.R b/R/tbl_coxph.R index cb45a0a4..40a1c96c 100644 --- a/R/tbl_coxph.R +++ b/R/tbl_coxph.R @@ -15,7 +15,7 @@ #' #' @seealso `get_cox_pairwise_df()`. #' -#' @examplesIf requireNamespace("survival", quietly = TRUE) && requireNamespace("coin", quietly = TRUE) +#' @examplesIf requireNamespace("coin", quietly = TRUE) #' # Setup sample survival data #' library(survival) #' surv_data <- lung |> diff --git a/man/annotate_gg_km.Rd b/man/annotate_gg_km.Rd index a4a73f4f..baefc74f 100644 --- a/man/annotate_gg_km.Rd +++ b/man/annotate_gg_km.Rd @@ -107,7 +107,7 @@ Proportional Hazards summary table as an annotation box. }} \examples{ -\dontshow{if (requireNamespace("survival", quietly = TRUE) && requireNamespace("coin", quietly = TRUE)) withAutoprint(\{ # examplesIf} +\dontshow{if (requireNamespace("coin", quietly = TRUE)) withAutoprint(\{ # examplesIf} # Preparing the Kaplan-Meier Plot library(survival) use_lung <- lung diff --git a/man/get_cox_pairwise_df.Rd b/man/get_cox_pairwise_df.Rd index 49bbeaeb..f4fcd977 100644 --- a/man/get_cox_pairwise_df.Rd +++ b/man/get_cox_pairwise_df.Rd @@ -73,7 +73,7 @@ a nested \code{survival::coxph()} LRT for the likelihood-ratio test. } } \examples{ -\dontshow{if (requireNamespace("survival", quietly = TRUE) && requireNamespace("coin", quietly = TRUE)) withAutoprint(\{ # examplesIf} +\dontshow{if (requireNamespace("coin", quietly = TRUE)) withAutoprint(\{ # examplesIf} # Example data setup (assuming 'time' is event time, 'status' # is event indicator (1=event), and 'arm' is the treatment group) # for data handling diff --git a/man/gg_km.Rd b/man/gg_km.Rd index 8ddbf0d3..f16ad0a6 100644 --- a/man/gg_km.Rd +++ b/man/gg_km.Rd @@ -92,7 +92,7 @@ support for various customizations like censoring marks, Confidence Intervals ( }} \examples{ -\dontshow{if (requireNamespace("survival", quietly = TRUE) && requireNamespace("coin", quietly = TRUE)) withAutoprint(\{ # examplesIf} +\dontshow{if (requireNamespace("coin", quietly = TRUE)) withAutoprint(\{ # examplesIf} # Data preparation for KM plot library(survival) use_lung <- lung diff --git a/man/tbl_coxph.Rd b/man/tbl_coxph.Rd index 670f0fe3..91ea1b92 100644 --- a/man/tbl_coxph.Rd +++ b/man/tbl_coxph.Rd @@ -22,7 +22,7 @@ presenting the p-value, Hazard Ratio, and 95\% Confidence Interval in a stacked layout where statistics form the rows of the table. } \examples{ -\dontshow{if (requireNamespace("survival", quietly = TRUE) && requireNamespace("coin", quietly = TRUE)) withAutoprint(\{ # examplesIf} +\dontshow{if (requireNamespace("coin", quietly = TRUE)) withAutoprint(\{ # examplesIf} # Setup sample survival data library(survival) surv_data <- lung |> From fc73f3f76dd064c4234ddc121d2191e24a5faa25 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Szczypi=C5=84ski?= Date: Wed, 27 May 2026 14:29:25 +0200 Subject: [PATCH 21/23] Update `check_formula_for_namespace()` --- R/import-standalone-checks.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/import-standalone-checks.R b/R/import-standalone-checks.R index 4d4f1dde..703ad855 100644 --- a/R/import-standalone-checks.R +++ b/R/import-standalone-checks.R @@ -608,10 +608,10 @@ check_numeric <- function(x, #' @keywords internal #' @noRd check_formula_for_namespace <- function(formula) { - formula_str <- paste(deparse(model_formula), collapse = " ") + formula_str <- paste(deparse(formula), collapse = " ") if (grepl("\\b[a-zA-Z][a-zA-Z0-9.]*::", formula_str)) { cli::cli_abort( - "{.arg model_formula} must be specified without namespace.", + "{.arg formula} must be specified without namespace.", call = get_cli_abort_call() ) } From b948396a9584020cc7ea29bf5f5c68df8d8bd240 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Szczypi=C5=84ski?= Date: Wed, 27 May 2026 15:20:11 +0200 Subject: [PATCH 22/23] Use survival for logrank, add ... to get_cox_pairwise_df --- R/get_cox_pairwise_df.R | 126 ++++++++++++++++------ R/import-standalone-checks.R | 13 --- R/utils.R | 13 +++ man/get_cox_pairwise_df.Rd | 9 +- tests/testthat/test-get_cox_pairwise_df.R | 71 +++++++----- 5 files changed, 159 insertions(+), 73 deletions(-) diff --git a/R/get_cox_pairwise_df.R b/R/get_cox_pairwise_df.R index 1fc5f0e1..325d35d2 100644 --- a/R/get_cox_pairwise_df.R +++ b/R/get_cox_pairwise_df.R @@ -31,12 +31,17 @@ #' A string specifying the type of test to compute the p-value. #' Must be one of "log-rank", "gehan-breslow" (wilcoxon), "tarone", "peto", #' "prentice" (modified peto), "fleming-harrington", or "likelihood-ratio". +#' @param ... Additional arguments passed to `survival::coxph()` and `summary()` +#' two arguments are supported: +#' `conf.int` (default `0.95`) to adjust the CI level; +#' `robust = TRUE` to compute robust standard errors; #' #' @return A `data.frame` with one row per comparison arm (stored as rownames). #' The columns are: #' \itemize{ #' \item `HR`: The Hazard Ratio formatted to two decimal places. #' \item `95% CI`: The 95% Wald confidence interval as `"(lower, upper)"`. +#' Setting `robust = TRUE` results in robust sandwich CIs. #' \item `p-value ()`: The p-value from the selected `test`, where #' `` is the title-cased test name (e.g., `"p-value (log-rank)"`). #' } @@ -115,11 +120,37 @@ get_cox_pairwise_df <- function( "prentice", "fleming-harrington", "likelihood-ratio" - ) + ), + ... ) { set_cli_abort_call() + # Parse the dot-dot-dot list + dots <- list(...) + + # Extract explicit parameters, applying defaults if missing + conf_int <- if ("conf.int" %in% names(dots)) dots[["conf.int"]] else 0.95 + robust <- if ("robust" %in% names(dots)) dots[["robust"]] else FALSE + + invalid_dots <- setdiff(names(dots), c("robust", "conf.int")) # Input checks + + # Check for invalid ... arguments + if (length(invalid_dots) > 0) { + cli::cli_abort( + c( + "Invalid argument{?s} passed via {.code ...}.", + "i" = "Only {.arg robust} and {.arg conf.int} are supported at the moment.", + "x" = "Unrecognized argument{?s}: {.arg {invalid_dots}}." + ), + call = get_cli_abort_call() + ) + } + + check_numeric(conf_int) + check_scalar_range(conf_int, c(0, 1), include_bounds = FALSE) + check_logical(robust) + if (!rlang::is_formula(model_formula)) { cli::cli_abort( "{.arg model_formula} must be a {.cls formula}.", @@ -127,7 +158,7 @@ get_cox_pairwise_df <- function( ) } - check_formula_for_namespace(model_formula) + .check_formula_for_namespace(model_formula) if (!is.factor(data[[arm]])) { cli::cli_abort( @@ -137,8 +168,23 @@ get_cox_pairwise_df <- function( } ties <- match.arg(ties) + if (robust && ties == "exact") { + cli::cli_abort( + c( + "Robust standard errors cannot be calculated with the {.val exact} tie-handling method.", + "i" = "Please change {.arg ties} to {.val efron} or {.val breslow} when using {.code robust = TRUE}." + ), + call = get_cli_abort_call() + ) + } + test <- match.arg(test) + # Prepare remaining dots for coxph (remove conf.int so it doesn't crash coxph) + coxph_dots <- dots + coxph_dots[["conf.int"]] <- NULL + coxph_dots[["robust"]] <- robust + # Determine reference and comparison groups ref_group <- if (!is.null(ref_group)) { ref_group @@ -165,13 +211,15 @@ get_cox_pairwise_df <- function( # Explicitly relevel the factor so the reference group is ALWAYS first. comp_df[[arm]] <- factor(comp_df[[arm]], levels = c(ref_group, current_arm)) - # Wald CIs are calculated with all ties methods - coxph_ans <- survival::coxph( - formula = model_formula, - data = comp_df, - ties = ties - ) |> summary() + # Safely inject the remaining dots into the coxph call + cox_args <- c( + list(formula = model_formula, data = comp_df, ties = ties), + coxph_dots + ) + fit_full <- do.call(survival::coxph, cox_args) + # Extract summary with our specified conf.int + coxph_ans <- summary(fit_full, conf.int = conf_int) # Pass the 'ties' argument down so the LRT matches the main Cox estimate log_rank_pvalue <- .estimate_p_value(model_formula, comp_df, test, arm, ties) @@ -195,7 +243,7 @@ get_cox_pairwise_df <- function( names(res) <- c( "HR", - "95% CI", + paste0(conf_int * 100, "% CI"), paste0("p-value (", test_name, ")") ) res @@ -203,7 +251,8 @@ get_cox_pairwise_df <- function( #' Estimate p-value for a pairwise survival comparison #' -#' Dispatches to `coin::logrank_test()` for weighted log-rank variants +#' Dispatches to `survival::survdiff()` for standard log-rank, +#' `coin::logrank_test()` for weighted log-rank variants, #' or to a nested likelihood-ratio test via `survival::coxph()`. #' #' @param formula (`formula`)\cr survival formula, e.g. `Surv(time, status) ~ arm`. @@ -215,17 +264,39 @@ get_cox_pairwise_df <- function( #' @returns A single numeric p-value. #' @noRd .estimate_p_value <- function(formula, data, test, arm, ties) { - test_type <- switch(test, - "log-rank" = "logrank", - "gehan-breslow" = "Gehan-Breslow", - "tarone" = "Tarone-Ware", - "peto" = "Peto-Peto", - "prentice" = "Prentice", - "fleming-harrington" = "Fleming-Harrington", - "likelihood-ratio" = "lr" - ) + if (test == "log-rank") { + # --- 1. Standard Log-Rank via survival (Matches SAS & rtables) --- + sdiff <- survival::survdiff(formula, data = data) + + # Calculate the asymptotic p-value using the chi-square statistic + # Degrees of freedom is (number of groups - 1) + p_value <- stats::pchisq(sdiff$chisq, df = length(sdiff$n) - 1, lower.tail = FALSE) + } else if (test == "likelihood-ratio") { + # --- 2. Likelihood-Ratio Test via survival --- + # Fit the full Cox model + fit_full <- survival::coxph(formula, data = data, ties = ties) + + # Safely create the reduced formula by dropping the arm variable + reduced_formula <- stats::update(formula, paste(". ~ . -", arm)) + + # Fit the reduced model explicitly to avoid drop1 environment scope errors + fit_reduced <- survival::coxph(reduced_formula, data = data, ties = ties) + + # Execute the nested Likelihood-Ratio Test + anova_res <- stats::anova(fit_reduced, fit_full, test = "Chisq") + + # Extract the p-value for the second row (the full model comparison) + p_value <- anova_res[["Pr(>|Chi|)"]][[2]] + } else { + # --- 3. Weighted Variants via coin --- + test_type <- switch(test, + "gehan-breslow" = "Gehan-Breslow", + "tarone" = "Tarone-Ware", + "peto" = "Peto-Peto", + "prentice" = "Prentice", + "fleming-harrington" = "Fleming-Harrington" + ) - if (test_type != "lr") { if (!requireNamespace("coin", quietly = TRUE)) { cli::cli_abort( paste( @@ -244,21 +315,6 @@ get_cox_pairwise_df <- function( ) p_value <- as.numeric(coin::pvalue(test_result)) - } else { - # Fit the full Cox model - fit_full <- survival::coxph(formula, data = data, ties = ties) - - # Safely create the reduced formula by dropping the arm variable - reduced_formula <- stats::update(formula, paste(". ~ . -", arm)) - - # Fit the reduced model explicitly to avoid drop1 environment scope errors - fit_reduced <- survival::coxph(reduced_formula, data = data, ties = ties) - - # Execute the nested Likelihood-Ratio Test - anova_res <- stats::anova(fit_reduced, fit_full, test = "Chisq") - - # Extract the p-value for the second row (the full model comparison) - p_value <- anova_res[["Pr(>|Chi|)"]][[2]] } p_value diff --git a/R/import-standalone-checks.R b/R/import-standalone-checks.R index 703ad855..7d94634d 100644 --- a/R/import-standalone-checks.R +++ b/R/import-standalone-checks.R @@ -604,18 +604,5 @@ check_numeric <- function(x, invisible(x) } -#' Check if formula contains namespace -#' @keywords internal -#' @noRd -check_formula_for_namespace <- function(formula) { - formula_str <- paste(deparse(formula), collapse = " ") - if (grepl("\\b[a-zA-Z][a-zA-Z0-9.]*::", formula_str)) { - cli::cli_abort( - "{.arg formula} must be specified without namespace.", - call = get_cli_abort_call() - ) - } -} - # nocov end # styler: on diff --git a/R/utils.R b/R/utils.R index c3e96964..05e4a3b7 100644 --- a/R/utils.R +++ b/R/utils.R @@ -55,3 +55,16 @@ case_switch <- function(..., .default = NULL) { return(0) } } + +#' Check if formula contains namespace +#' @keywords internal +#' @noRd +.check_formula_for_namespace <- function(formula) { + formula_str <- paste(deparse(formula), collapse = " ") + if (grepl("\\b[a-zA-Z][a-zA-Z0-9.]*::", formula_str)) { + cli::cli_abort( + "{.arg formula} must be specified without namespace.", + call = get_cli_abort_call() + ) + } +} diff --git a/man/get_cox_pairwise_df.Rd b/man/get_cox_pairwise_df.Rd index f4fcd977..ce4297e9 100644 --- a/man/get_cox_pairwise_df.Rd +++ b/man/get_cox_pairwise_df.Rd @@ -11,7 +11,8 @@ get_cox_pairwise_df( ref_group = NULL, ties = c("exact", "efron", "breslow"), test = c("log-rank", "gehan-breslow", "tarone", "peto", "prentice", - "fleming-harrington", "likelihood-ratio") + "fleming-harrington", "likelihood-ratio"), + ... ) } \arguments{ @@ -45,6 +46,11 @@ Default is "exact".} A string specifying the type of test to compute the p-value. Must be one of "log-rank", "gehan-breslow" (wilcoxon), "tarone", "peto", "prentice" (modified peto), "fleming-harrington", or "likelihood-ratio".} + +\item{...}{Additional arguments passed to \code{survival::coxph()} and \code{summary()} +two arguments are supported: +\code{conf.int} (default \code{0.95}) to adjust the CI level; +\code{robust = TRUE} to compute robust standard errors;} } \value{ A \code{data.frame} with one row per comparison arm (stored as rownames). @@ -52,6 +58,7 @@ The columns are: \itemize{ \item \code{HR}: The Hazard Ratio formatted to two decimal places. \item \verb{95\% CI}: The 95\% Wald confidence interval as \code{"(lower, upper)"}. +Setting \code{robust = TRUE} results in robust sandwich CIs. \item \verb{p-value ()}: The p-value from the selected \code{test}, where \verb{} is the title-cased test name (e.g., \code{"p-value (log-rank)"}). } diff --git a/tests/testthat/test-get_cox_pairwise_df.R b/tests/testthat/test-get_cox_pairwise_df.R index 13de7dae..360240cd 100644 --- a/tests/testthat/test-get_cox_pairwise_df.R +++ b/tests/testthat/test-get_cox_pairwise_df.R @@ -188,30 +188,6 @@ test_that("get_cox_pairwise_df() catches invalid 'ties' and 'test' arguments", { ) }) -test_that("get_cox_pairwise_df() enforces guardrail against covariates for non-parametric tests", { - # coin log-rank should reject formulas with continuous adjustment covariates - expect_error( - get_cox_pairwise_df( - model_formula = Surv(time, event) ~ treatment + karno, - data = test_df_2grp, - arm = "treatment", - test = "log-rank" - ), - "does not support covariate adjustment for: `karno`" - ) - - # coin log-rank should reject even if strata is present alongside a covariate - expect_error( - get_cox_pairwise_df( - model_formula = Surv(time, event) ~ treatment + strata(celltype) + age, - data = test_df_2grp, - arm = "treatment", - test = "tarone" - ), - "does not support covariate adjustment for: `age`" - ) -}) - test_that("get_cox_pairwise_df() works for formula with covariates via likelihood-ratio", { # Likelihood-ratio natively handles right-hand side continuous covariates expect_no_error( @@ -276,3 +252,50 @@ test_that("get_cox_pairwise_df() works for formula with complex strata()", { expect_s3_class(res_strata_cox, "data.frame") expect_false(anyNA(res_strata_cox[["p-value (Likelihood-Ratio)"]])) }) + +test_that("get_cox_pairwise_df() respects conf.int passed via ...", { + # Run with default 95% CI + res_95 <- get_cox_pairwise_df( + model_formula = Surv(time, event) ~ treatment, + data = test_df_2grp, + arm = "treatment" + ) + + # Run with explicit 99% CI + expect_no_error( + res_99 <- get_cox_pairwise_df( + model_formula = Surv(time, event) ~ treatment, + data = test_df_2grp, + arm = "treatment", + conf.int = 0.99 + ) + ) + + # Check that column names updated dynamically + expect_true("99% CI" %in% names(res_99)) + expect_false("95% CI" %in% names(res_99)) + + # The 99% CI should be wider/different from the 95% CI + expect_false(res_95[["95% CI"]] == res_99[["99% CI"]]) +}) + +test_that("get_cox_pairwise_df() handles robust = TRUE correctly via ...", { + # We test with the likelihood-ratio test to ensure the `robust` argument + # successfully propagates down into the nested LRT models inside .estimate_p_value + expect_no_error( + suppressWarnings( + res_robust <- get_cox_pairwise_df( + model_formula = Surv(time, event) ~ treatment, + data = test_df_2grp, + arm = "treatment", + test = "likelihood-ratio", + ties = "efron", + robust = TRUE + ) + ) + ) + + expect_s3_class(res_robust, "data.frame") + expect_false(anyNA(res_robust[["HR"]])) + expect_false(anyNA(res_robust[["p-value (Likelihood-Ratio)"]])) +}) From 30497770ee8d67cf20d9af902123b272ce5614de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Szczypi=C5=84ski?= Date: Wed, 27 May 2026 15:32:12 +0200 Subject: [PATCH 23/23] Add a test for formula check + remove supressWarnings --- .../test-check_formula_for_namespace.R | 60 +++++++++ tests/testthat/test-get_cox_pairwise_df.R | 114 ++++++++---------- 2 files changed, 108 insertions(+), 66 deletions(-) create mode 100644 tests/testthat/test-check_formula_for_namespace.R diff --git a/tests/testthat/test-check_formula_for_namespace.R b/tests/testthat/test-check_formula_for_namespace.R new file mode 100644 index 00000000..eafd8477 --- /dev/null +++ b/tests/testthat/test-check_formula_for_namespace.R @@ -0,0 +1,60 @@ +# test-check_formula_for_namespace.R + +test_that(".check_formula_for_namespace() passes valid formulas", { + # Simple formula + expect_no_error(.check_formula_for_namespace(y ~ x)) + + # Survival formula without namespace + expect_no_error(.check_formula_for_namespace(Surv(time, status) ~ arm)) + + # Complex formula with strata + expect_no_error(.check_formula_for_namespace(Surv(time, status) ~ arm + strata(site))) +}) + +test_that(".check_formula_for_namespace() aborts when a namespace is present", { + expected_msg <- "must be specified without namespace" + + # Namespace on a function call (e.g., survival::Surv) + expect_error( + .check_formula_for_namespace(survival::Surv(time, status) ~ arm), + regexp = expected_msg + ) + + # Namespace on the Right-Hand Side variable + expect_error( + .check_formula_for_namespace(Surv(time, status) ~ mypkg::arm), + regexp = expected_msg + ) + + # Namespace on the Left-Hand Side variable + expect_error( + .check_formula_for_namespace(mypkg::y ~ x), + regexp = expected_msg + ) + + # Namespace buried inside a strata() call + expect_error( + .check_formula_for_namespace(Surv(time, status) ~ arm + strata(mypkg::site)), + regexp = expected_msg + ) +}) + +test_that(".check_formula_for_namespace() correctly parses long formulas", { + # deparse() splits long formulas into a character vector of multiple lines. + # This tests that `paste(..., collapse = " ")` effectively reconstructs it + # so the regex doesn't miss namespaces split across lines. + + # Generate a very long valid formula + long_rhs <- paste(paste0("covar_", 1:50), collapse = " + ") + long_formula_valid <- stats::as.formula(paste("Surv(time, status) ~", long_rhs)) + + expect_no_error(.check_formula_for_namespace(long_formula_valid)) + + # Generate a very long invalid formula with a namespace at the very end + long_formula_invalid <- stats::as.formula(paste("Surv(time, status) ~", long_rhs, "+ pkg::bad_var")) + + expect_error( + .check_formula_for_namespace(long_formula_invalid), + regexp = "must be specified without namespace" + ) +}) diff --git a/tests/testthat/test-get_cox_pairwise_df.R b/tests/testthat/test-get_cox_pairwise_df.R index 360240cd..f0ab3b8b 100644 --- a/tests/testthat/test-get_cox_pairwise_df.R +++ b/tests/testthat/test-get_cox_pairwise_df.R @@ -36,13 +36,11 @@ test_that( "get_cox_pairwise_df() returns non-NA hazard ratios with more than two arms", { expect_no_error( - suppressWarnings( - result <- get_cox_pairwise_df( - model_formula = Surv(time, event) ~ treatment, - data = test_df_3grp, - arm = "treatment", - ref_group = "Placebo" - ) + result <- get_cox_pairwise_df( + model_formula = Surv(time, event) ~ treatment, + data = test_df_3grp, + arm = "treatment", + ref_group = "Placebo" ) ) expect_s3_class(result, "data.frame") @@ -60,12 +58,10 @@ test_that( test_that("get_cox_pairwise_df() uses first factor level as default ref_group", { expect_no_error( - suppressWarnings( - result <- get_cox_pairwise_df( - model_formula = Surv(time, event) ~ treatment, - data = test_df_3grp, - arm = "treatment" - ) + result <- get_cox_pairwise_df( + model_formula = Surv(time, event) ~ treatment, + data = test_df_3grp, + arm = "treatment" ) ) expect_equal(nrow(result), 2L) @@ -118,13 +114,11 @@ test_that("get_cox_pairwise_df() works with all valid 'ties' methods", { for (t_method in ties_methods) { expect_no_error( - suppressWarnings( - res <- get_cox_pairwise_df( - model_formula = Surv(time, event) ~ treatment, - data = test_df_2grp, - arm = "treatment", - ties = t_method - ) + res <- get_cox_pairwise_df( + model_formula = Surv(time, event) ~ treatment, + data = test_df_2grp, + arm = "treatment", + ties = t_method ) ) expect_s3_class(res, "data.frame") @@ -144,13 +138,11 @@ test_that("get_cox_pairwise_df() works with all valid 'test' methods", { for (t_method in test_methods) { expect_no_error( - suppressWarnings( - res <- get_cox_pairwise_df( - model_formula = Surv(time, event) ~ treatment, - data = test_df_2grp, - arm = "treatment", - test = t_method - ) + res <- get_cox_pairwise_df( + model_formula = Surv(time, event) ~ treatment, + data = test_df_2grp, + arm = "treatment", + test = t_method ) ) @@ -191,13 +183,11 @@ test_that("get_cox_pairwise_df() catches invalid 'ties' and 'test' arguments", { test_that("get_cox_pairwise_df() works for formula with covariates via likelihood-ratio", { # Likelihood-ratio natively handles right-hand side continuous covariates expect_no_error( - suppressWarnings( - res_covariate <- get_cox_pairwise_df( - model_formula = Surv(time, event) ~ treatment + karno, - data = test_df_2grp, - arm = "treatment", - test = "likelihood-ratio" - ) + res_covariate <- get_cox_pairwise_df( + model_formula = Surv(time, event) ~ treatment + karno, + data = test_df_2grp, + arm = "treatment", + test = "likelihood-ratio" ) ) @@ -209,14 +199,12 @@ test_that("get_cox_pairwise_df() works for formula with covariates via likelihoo test_that("get_cox_pairwise_df() works for formula with complex strata()", { # 1. Test log-rank with a single strata variable (uses coin engine) expect_no_error( - suppressWarnings( - res_strata_lr <- get_cox_pairwise_df( - model_formula = Surv(time, event) ~ treatment + strata(prior), - data = test_df_2grp, - arm = "treatment", - ties = "efron", - test = "log-rank" - ) + res_strata_lr <- get_cox_pairwise_df( + model_formula = Surv(time, event) ~ treatment + strata(prior), + data = test_df_2grp, + arm = "treatment", + ties = "efron", + test = "log-rank" ) ) expect_s3_class(res_strata_lr, "data.frame") @@ -225,14 +213,12 @@ test_that("get_cox_pairwise_df() works for formula with complex strata()", { # 2. Test log-rank with multiple strata variables inside a single strata() call # We use 'prior' and 'trt' (2 levels each) to avoid <2 observation block sparsity expect_no_error( - suppressWarnings( - res_strata_multi <- get_cox_pairwise_df( - model_formula = Surv(time, event) ~ treatment + strata(prior, trt), - data = test_df_2grp, - arm = "treatment", - ties = "efron", - test = "log-rank" - ) + res_strata_multi <- get_cox_pairwise_df( + model_formula = Surv(time, event) ~ treatment + strata(prior, trt), + data = test_df_2grp, + arm = "treatment", + ties = "efron", + test = "log-rank" ) ) expect_s3_class(res_strata_multi, "data.frame") @@ -240,13 +226,11 @@ test_that("get_cox_pairwise_df() works for formula with complex strata()", { # 3. Ensure the likelihood-ratio test properly handles complex strata natively expect_no_error( - suppressWarnings( - res_strata_cox <- get_cox_pairwise_df( - model_formula = Surv(time, event) ~ treatment + strata(prior) + karno, - data = test_df_2grp, - arm = "treatment", - test = "likelihood-ratio" - ) + res_strata_cox <- get_cox_pairwise_df( + model_formula = Surv(time, event) ~ treatment + strata(prior) + karno, + data = test_df_2grp, + arm = "treatment", + test = "likelihood-ratio" ) ) expect_s3_class(res_strata_cox, "data.frame") @@ -283,15 +267,13 @@ test_that("get_cox_pairwise_df() handles robust = TRUE correctly via ...", { # We test with the likelihood-ratio test to ensure the `robust` argument # successfully propagates down into the nested LRT models inside .estimate_p_value expect_no_error( - suppressWarnings( - res_robust <- get_cox_pairwise_df( - model_formula = Surv(time, event) ~ treatment, - data = test_df_2grp, - arm = "treatment", - test = "likelihood-ratio", - ties = "efron", - robust = TRUE - ) + res_robust <- get_cox_pairwise_df( + model_formula = Surv(time, event) ~ treatment, + data = test_df_2grp, + arm = "treatment", + test = "likelihood-ratio", + ties = "efron", + robust = TRUE ) )