insightsengineering · jszczypinski · May 25, 2026 · May 25, 2026 · May 25, 2026 · May 25, 2026
diff --git a/R/annotate_gg_km.R b/R/annotate_gg_km.R
@@ -46,15 +46,16 @@
 #' @seealso [gg_km()], [process_survfit()], and [get_cox_pairwise_df()] for
 #'   related functionalities.
 #'
-#' @examples
+#' @examplesIf requireNamespace("coin", quietly = TRUE)
 #' # Preparing the Kaplan-Meier Plot
-#' use_lung <- survival::lung
+#' library(survival)
+#' use_lung <- lung
 #' use_lung$arm <- factor(sample(c("A", "B", "C"), nrow(use_lung), replace = TRUE))
 #' use_lung$status <- use_lung$status - 1 # Convert status to 0/1
 #' use_lung <- na.omit(use_lung)
 #'
-#' formula <- survival::Surv(time, status) ~ arm
-#' fit_kmg01 <- survival::survfit(formula, use_lung)
+#' formula <- Surv(time, status) ~ arm
+#' fit_kmg01 <- survfit(formula, use_lung)
 #' surv_plot_data <- process_survfit(fit_kmg01)
 #'
 #' plt_kmg01 <- gg_km(surv_plot_data)

diff --git a/R/get_cox_pairwise_df.R b/R/get_cox_pairwise_df.R
@@ -26,16 +26,22 @@
 #' @param ties (`character`)\cr
 #'   A string specifying the method for tie handling in the Cox model.
 #'   Must be one of "exact", "efron", or "breslow".
+#'   Default is "exact".
 #' @param test (`character`)\cr
 #'   A string specifying the type of test to compute the p-value.
 #'   Must be one of "log-rank", "gehan-breslow" (wilcoxon), "tarone", "peto",
 #'   "prentice" (modified peto), "fleming-harrington", or "likelihood-ratio".
+#' @param ... Additional arguments passed to `survival::coxph()` and `summary()`
+#'    two arguments are supported:
+#'    `conf.int` (default `0.95`) to adjust the CI level;
+#'   `robust = TRUE` to compute robust standard errors;
 #'
 #' @return A `data.frame` with one row per comparison arm (stored as rownames).
 #' The columns are:
 #' \itemize{
 #'   \item `HR`: The Hazard Ratio formatted to two decimal places.
-#'   \item `95% CI`: The 95\% confidence interval as `"(lower, upper)"`.
+#'   \item `95% CI`: The 95% Wald confidence interval as `"(lower, upper)"`.
+#'      Setting `robust = TRUE` results in robust sandwich CIs.
 #'   \item `p-value (<test>)`: The p-value from the selected `test`, where
 #'     `<test>` is the title-cased test name (e.g., `"p-value (log-rank)"`).
 #' }
@@ -44,28 +50,29 @@
 #'   data to the current arm and the reference arm, and then:
 #'   \itemize{
 #'     \item Fits a Cox model using `survival::coxph()`.
-#'     \item Computes a p-value via [.estimate_p_value()], which dispatches
+#'     \item Computes a p-value, which dispatches
 #'       to `coin::logrank_test()` for weighted log-rank variants or to
-#'       `survival::survreg()` for the likelihood-ratio test.
-#'   }
+#'       a nested `survival::coxph()` LRT for the likelihood-ratio test.
+#' }
 #'
 #' @seealso `annotate_gg_km()`, `gg_km()`, `survival::coxph()`,
 #'   `coin::logrank_test()`.
 #'
-#' @examples
+#' @examplesIf requireNamespace("coin", quietly = TRUE)
 #' # Example data setup (assuming 'time' is event time, 'status'
 #' # is event indicator (1=event), and 'arm' is the treatment group)
-#' library(dplyr) # For better data handling
-#'
+#' # for data handling
+#' library(dplyr)
+#' library(survival)
 #' # Prepare data in a modern dplyr-friendly way
-#' surv_data <- survival::lung |>
+#' surv_data <- lung |>
 #'   mutate(
 #'     arm = factor(sample(c("A", "B", "C"), n(), replace = TRUE)),
 #'     status = status - 1 # Convert status to 0/1
 #'   ) |>
 #'   filter(if_all(everything(), ~ !is.na(.)))
 #'
-#' formula <- survival::Surv(time, status) ~ arm
+#' formula <- Surv(time, status) ~ arm
 #'
 #' # Example 1: Default usage (ties = "exact", test = "log-rank")
 #' results_default <- get_cox_pairwise_df(
@@ -113,16 +120,46 @@ get_cox_pairwise_df <- function(
     "prentice",
     "fleming-harrington",
     "likelihood-ratio"
-  )
+  ),
+  ...
 ) {
   set_cli_abort_call()
+
+  # Parse the dot-dot-dot list
+  dots <- list(...)
+
+  # Extract explicit parameters, applying defaults if missing
+  conf_int <- if ("conf.int" %in% names(dots)) dots[["conf.int"]] else 0.95
+  robust <- if ("robust" %in% names(dots)) dots[["robust"]] else FALSE
+
+  invalid_dots <- setdiff(names(dots), c("robust", "conf.int"))
   # Input checks
+
+  # Check for invalid ... arguments
+  if (length(invalid_dots) > 0) {
+    cli::cli_abort(
+      c(
+        "Invalid argument{?s} passed via {.code ...}.",
+        "i" = "Only {.arg robust} and {.arg conf.int} are supported at the moment.",
+        "x" = "Unrecognized argument{?s}: {.arg {invalid_dots}}."
+      ),
+      call = get_cli_abort_call()
+    )
+  }
+
+  check_numeric(conf_int)
+  check_scalar_range(conf_int, c(0, 1), include_bounds = FALSE)
+  check_logical(robust)
+
   if (!rlang::is_formula(model_formula)) {
     cli::cli_abort(
       "{.arg model_formula} must be a {.cls formula}.",
       call = get_cli_abort_call()
     )
   }
+
+  .check_formula_for_namespace(model_formula)
+
   if (!is.factor(data[[arm]])) {
     cli::cli_abort(
       "Column {.arg {data}[[\"{.var {arm}}\"]]} must be a {.cls factor}.",
@@ -131,8 +168,23 @@ get_cox_pairwise_df <- function(
   }
 
   ties <- match.arg(ties)
+  if (robust && ties == "exact") {
+    cli::cli_abort(
+      c(
+        "Robust standard errors cannot be calculated with the {.val exact} tie-handling method.",
+        "i" = "Please change {.arg ties} to {.val efron} or {.val breslow} when using {.code robust = TRUE}."
+      ),
+      call = get_cli_abort_call()
+    )
+  }
+
   test <- match.arg(test)
 
+  # Prepare remaining dots for coxph (remove conf.int so it doesn't crash coxph)
+  coxph_dots <- dots
+  coxph_dots[["conf.int"]] <- NULL
+  coxph_dots[["robust"]] <- robust
+
   # Determine reference and comparison groups
   ref_group <- if (!is.null(ref_group)) {
     ref_group
@@ -153,19 +205,23 @@ get_cox_pairwise_df <- function(
         call = get_cli_abort_call()
       )
     }
+
     comp_df <- data[as.character(data[[arm]]) %in% subset_arm, ]
 
-    comp_df[[arm]] <- droplevels(comp_df[[arm]])
+    # Explicitly relevel the factor so the reference group is ALWAYS first.
+    comp_df[[arm]] <- factor(comp_df[[arm]], levels = c(ref_group, current_arm))
 
-    suppressWarnings(
-      coxph_ans <- survival::coxph(
-        formula = model_formula,
-        data = comp_df,
-        ties = ties
-      ) |> summary()
+    # Safely inject the remaining dots into the coxph call
+    cox_args <- c(
+      list(formula = model_formula, data = comp_df, ties = ties),
+      coxph_dots
     )
+    fit_full <- do.call(survival::coxph, cox_args)
 
-    log_rank_pvalue <- .estimate_p_value(model_formula, comp_df, test, arm)
+    # Extract summary with our specified conf.int
+    coxph_ans <- summary(fit_full, conf.int = conf_int)
+    # Pass the 'ties' argument down so the LRT matches the main Cox estimate
+    log_rank_pvalue <- .estimate_p_value(model_formula, comp_df, test, arm, ties)
 
     conf_int_row <- paste0(arm, current_arm)
     current_row <- data.frame(
@@ -187,73 +243,143 @@ get_cox_pairwise_df <- function(
 
   names(res) <- c(
     "HR",
-    "95% CI",
+    paste0(conf_int * 100, "% CI"),
     paste0("p-value (", test_name, ")")
   )
   res
 }
 
 #' Estimate p-value for a pairwise survival comparison
 #'
-#' Dispatches to `coin::logrank_test()` for weighted log-rank variants
-#' or to a likelihood-ratio test via `survival::survreg()`.
+#' Dispatches to `survival::survdiff()` for standard log-rank,
+#' `coin::logrank_test()` for weighted log-rank variants,
+#' or to a nested likelihood-ratio test via `survival::coxph()`.
 #'
 #' @param formula (`formula`)\cr survival formula, e.g. `Surv(time, status) ~ arm`.
 #' @param data (`data.frame`)\cr subset containing exactly two arm levels.
 #' @param test (`string`)\cr test name as accepted by [get_cox_pairwise_df()].
 #' @param arm (`string`)\cr column name of the arm variable in `data`.
+#' @param ties (`character`)\cr tie handling method for the Cox LRT.
 #'
 #' @returns A single numeric p-value.
-#' @keywords internal
-.estimate_p_value <- function(formula, data, test, arm) {
-  test_type <- switch(test,
-    "log-rank" = "logrank",
-    "gehan-breslow" = "Gehan-Breslow",
-    "tarone" = "Tarone-Ware",
-    "peto" = "Peto-Peto",
-    "prentice" = "Prentice",
-    "fleming-harrington" = "Fleming-Harrington",
-    "likelihood-ratio" = "lr"
-  )
+#' @noRd
+.estimate_p_value <- function(formula, data, test, arm, ties) {
+  if (test == "log-rank") {
+    # --- 1. Standard Log-Rank via survival (Matches SAS & rtables) ---
+    sdiff <- survival::survdiff(formula, data = data)
+
+    # Calculate the asymptotic p-value using the chi-square statistic
+    # Degrees of freedom is (number of groups - 1)
+    p_value <- stats::pchisq(sdiff$chisq, df = length(sdiff$n) - 1, lower.tail = FALSE)
+  } else if (test == "likelihood-ratio") {
+    # --- 2. Likelihood-Ratio Test via survival ---
+    # Fit the full Cox model
+    fit_full <- survival::coxph(formula, data = data, ties = ties)
+
+    # Safely create the reduced formula by dropping the arm variable
+    reduced_formula <- stats::update(formula, paste(". ~ . -", arm))
+
+    # Fit the reduced model explicitly to avoid drop1 environment scope errors
+    fit_reduced <- survival::coxph(reduced_formula, data = data, ties = ties)
 
-  if (test_type != "lr") {
-    rlang::check_installed(
-      "coin",
-      reason = paste("to run log-rank tests using", test_type, "method")
+    # Execute the nested Likelihood-Ratio Test
+    anova_res <- stats::anova(fit_reduced, fit_full, test = "Chisq")
+
+    # Extract the p-value for the second row (the full model comparison)
+    p_value <- anova_res[["Pr(>|Chi|)"]][[2]]
+  } else {
+    # --- 3. Weighted Variants via coin ---
+    test_type <- switch(test,
+      "gehan-breslow" = "Gehan-Breslow",
+      "tarone" = "Tarone-Ware",
+      "peto" = "Peto-Peto",
+      "prentice" = "Prentice",
+      "fleming-harrington" = "Fleming-Harrington"
     )
 
-    if (length(levels(data[[arm]])) != 2) {
-      cli::cli_warn(
+    if (!requireNamespace("coin", quietly = TRUE)) {
+      cli::cli_abort(
         paste(
-          "{.arg arm} does not contain exactly 2 levels!",
-          "This will result in unexpected behavior of pairwise test."
+          "The {.pkg coin} package is required to run",
+          "log-rank tests using the {test_type} method."
         )
       )
     }
 
+    coin_formula <- .check_and_rewrite_formula(formula, arm)
+
     test_result <- coin::logrank_test(
-      formula = formula,
+      formula = coin_formula,
       data = data,
       type = test_type
     )
 
     p_value <- as.numeric(coin::pvalue(test_result))
-  } else {
-    # SAS LR test assumes an exponential distribution
-    # fit the model
-    fit_cov <- survival::survreg(formula, data = data, dist = "exponential")
-
-    # fit the null model
-    drop_arm_formula <- stats::as.formula(paste(". ~ . -", arm))
-    # to account for possible covariates
-    null_formula <- stats::update(formula, drop_arm_formula)
-    fit_null <- survival::survreg(null_formula, data = data, dist = "exponential")
-
-    # calculate the difference and estimate the statistics
-    lrt_stat <- 2 * (fit_cov$loglik[2] - fit_null$loglik[1])
-    df <- fit_cov$df - fit_null$df
-    p_value <- stats::pchisq(lrt_stat, df, lower.tail = FALSE)
   }
 
   p_value
 }
+
+#' Rewrite survival formula with strata() for specific models
+#'
+#' @description
+#' Parses a survival formula and safely translates `strata()` wrappers into
+#' syntaxes supported by target modeling engines.
+#'
+#' @param formula (`formula`)\cr The original survival formula.
+#' @param arm (`character`)\cr The name of the primary arm variable.
+#'
+#' @returns A modified `formula` with `strata()` terms properly translated.
+#' @noRd
+.check_and_rewrite_formula <- function(formula, arm) {
+  f_terms <- stats::terms(formula)
+  term_labels <- attr(f_terms, "term.labels")
+
+  # Safely identify strata calls directly from the right-hand side labels
+  strata_idx <- grep("^strata\\(", term_labels)
+  strata_calls <- term_labels[strata_idx]
+
+  # --- 1. GUARDRAIL VALIDATION ---
+  valid_terms <- c(arm, strata_calls)
+  invalid_terms <- setdiff(term_labels, valid_terms)
+
+  if (length(invalid_terms) > 0) {
+    cli::cli_abort(
+      paste(
+        "This log-rank test method does not support covariate adjustment",
+        "for: {.var {invalid_terms}}.",
+        "Please use stratification (e.g., {.code ~ {arm} + strata(var)})",
+        "or switch to {.code test = 'likelihood-ratio'}."
+      )
+    )
+  }
+
+  # If there are no strata terms, return the formula completely untouched
+  if (length(strata_idx) == 0) {
+    return(formula)
+  }
+
+  # Use R's Abstract Syntax Tree (AST) to safely extract strata arguments
+  strata_vars <- unlist(lapply(strata_calls, function(x) {
+    sapply(as.list(str2lang(x))[-1], deparse)
+  }))
+
+  lhs_terms <- arm
+
+  # If there are multiple strata variables, wrap them in interaction()
+  # If single, force as.factor() to satisfy coin::logrank_test requirements
+  if (length(strata_vars) > 1) {
+    block_str <- paste0("interaction(", paste(strata_vars, collapse = ", "), ")")
+  } else {
+    block_str <- paste0("as.factor(", strata_vars[1], ")")
+  }
+
+  rhs_str <- paste(lhs_terms, "|", block_str)
+
+  # Rebuild the formula preserving the exact LHS response and original environment
+  stats::reformulate(
+    termlabels = rhs_str,
+    response = formula[[2]],
+    env = environment(formula)
+  )
+}
diff --git a/R/gg_km.R b/R/gg_km.R
@@ -28,16 +28,17 @@ NULL
 #' Data setup assumes `"time"` is event time, `"status"` is event indicator (`1` represents an event),
 #' while `"arm"` is the treatment group.
 #'
-#' @examples
+#' @examplesIf requireNamespace("coin", quietly = TRUE)
 #' # Data preparation for KM plot
-#' use_lung <- survival::lung
+#' library(survival)
+#' use_lung <- lung
 #' use_lung$arm <- factor(sample(c("A", "B", "C"), nrow(use_lung), replace = TRUE))
 #' use_lung$status <- use_lung$status - 1 # Convert status to 0/1
 #' use_lung <- na.omit(use_lung)
 #'
 #' # Fit Kaplan-Meier model
-#' formula <- survival::Surv(time, status) ~ arm
-#' fit_kmg01 <- survival::survfit(formula, use_lung)
+#' formula <- Surv(time, status) ~ arm
+#' fit_kmg01 <- survfit(formula, use_lung)
 #'
 #' # Process survfit data for plotting
 #' surv_plot_data <- process_survfit(fit_kmg01)