-
-
Notifications
You must be signed in to change notification settings - Fork 4
256 get cox pairwise df strata in formulas #258
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
5841abc
82e5563
93da9de
c760f77
e66f180
59447a4
6900964
ed7124b
3e62d54
33fa118
71b2210
32c059c
510f066
0da95c4
51b1c81
2d09123
e8ff931
f482bfa
39054ae
822e50b
5614503
b51db83
fc73f3f
b948396
3049777
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -26,16 +26,22 @@ | |
| #' @param ties (`character`)\cr | ||
| #' A string specifying the method for tie handling in the Cox model. | ||
| #' Must be one of "exact", "efron", or "breslow". | ||
| #' Default is "exact". | ||
| #' @param test (`character`)\cr | ||
| #' A string specifying the type of test to compute the p-value. | ||
| #' Must be one of "log-rank", "gehan-breslow" (wilcoxon), "tarone", "peto", | ||
| #' "prentice" (modified peto), "fleming-harrington", or "likelihood-ratio". | ||
| #' @param ... Additional arguments passed to `survival::coxph()` and `summary()` | ||
| #' two arguments are supported: | ||
| #' `conf.int` (default `0.95`) to adjust the CI level; | ||
| #' `robust = TRUE` to compute robust standard errors; | ||
| #' | ||
| #' @return A `data.frame` with one row per comparison arm (stored as rownames). | ||
| #' The columns are: | ||
| #' \itemize{ | ||
| #' \item `HR`: The Hazard Ratio formatted to two decimal places. | ||
| #' \item `95% CI`: The 95\% confidence interval as `"(lower, upper)"`. | ||
| #' \item `95% CI`: The 95% Wald confidence interval as `"(lower, upper)"`. | ||
| #' Setting `robust = TRUE` results in robust sandwich CIs. | ||
| #' \item `p-value (<test>)`: The p-value from the selected `test`, where | ||
| #' `<test>` is the title-cased test name (e.g., `"p-value (log-rank)"`). | ||
| #' } | ||
|
|
@@ -44,28 +50,29 @@ | |
| #' data to the current arm and the reference arm, and then: | ||
| #' \itemize{ | ||
| #' \item Fits a Cox model using `survival::coxph()`. | ||
| #' \item Computes a p-value via [.estimate_p_value()], which dispatches | ||
| #' \item Computes a p-value, which dispatches | ||
| #' to `coin::logrank_test()` for weighted log-rank variants or to | ||
| #' `survival::survreg()` for the likelihood-ratio test. | ||
| #' } | ||
| #' a nested `survival::coxph()` LRT for the likelihood-ratio test. | ||
| #' } | ||
| #' | ||
| #' @seealso `annotate_gg_km()`, `gg_km()`, `survival::coxph()`, | ||
| #' `coin::logrank_test()`. | ||
| #' | ||
| #' @examples | ||
| #' @examplesIf requireNamespace("coin", quietly = TRUE) | ||
| #' # Example data setup (assuming 'time' is event time, 'status' | ||
| #' # is event indicator (1=event), and 'arm' is the treatment group) | ||
| #' library(dplyr) # For better data handling | ||
| #' | ||
| #' # for data handling | ||
| #' library(dplyr) | ||
| #' library(survival) | ||
| #' # Prepare data in a modern dplyr-friendly way | ||
| #' surv_data <- survival::lung |> | ||
| #' surv_data <- lung |> | ||
| #' mutate( | ||
| #' arm = factor(sample(c("A", "B", "C"), n(), replace = TRUE)), | ||
| #' status = status - 1 # Convert status to 0/1 | ||
| #' ) |> | ||
| #' filter(if_all(everything(), ~ !is.na(.))) | ||
| #' | ||
| #' formula <- survival::Surv(time, status) ~ arm | ||
| #' formula <- Surv(time, status) ~ arm | ||
| #' | ||
| #' # Example 1: Default usage (ties = "exact", test = "log-rank") | ||
| #' results_default <- get_cox_pairwise_df( | ||
|
|
@@ -113,16 +120,46 @@ get_cox_pairwise_df <- function( | |
| "prentice", | ||
| "fleming-harrington", | ||
| "likelihood-ratio" | ||
| ) | ||
| ), | ||
| ... | ||
| ) { | ||
| set_cli_abort_call() | ||
|
|
||
| # Parse the dot-dot-dot list | ||
| dots <- list(...) | ||
|
|
||
| # Extract explicit parameters, applying defaults if missing | ||
| conf_int <- if ("conf.int" %in% names(dots)) dots[["conf.int"]] else 0.95 | ||
| robust <- if ("robust" %in% names(dots)) dots[["robust"]] else FALSE | ||
|
|
||
| invalid_dots <- setdiff(names(dots), c("robust", "conf.int")) | ||
| # Input checks | ||
|
|
||
| # Check for invalid ... arguments | ||
| if (length(invalid_dots) > 0) { | ||
| cli::cli_abort( | ||
| c( | ||
| "Invalid argument{?s} passed via {.code ...}.", | ||
| "i" = "Only {.arg robust} and {.arg conf.int} are supported at the moment.", | ||
| "x" = "Unrecognized argument{?s}: {.arg {invalid_dots}}." | ||
| ), | ||
| call = get_cli_abort_call() | ||
| ) | ||
| } | ||
|
|
||
| check_numeric(conf_int) | ||
| check_scalar_range(conf_int, c(0, 1), include_bounds = FALSE) | ||
| check_logical(robust) | ||
|
|
||
| if (!rlang::is_formula(model_formula)) { | ||
| cli::cli_abort( | ||
| "{.arg model_formula} must be a {.cls formula}.", | ||
| call = get_cli_abort_call() | ||
| ) | ||
| } | ||
|
|
||
| .check_formula_for_namespace(model_formula) | ||
|
|
||
| if (!is.factor(data[[arm]])) { | ||
| cli::cli_abort( | ||
| "Column {.arg {data}[[\"{.var {arm}}\"]]} must be a {.cls factor}.", | ||
|
|
@@ -131,8 +168,23 @@ get_cox_pairwise_df <- function( | |
| } | ||
|
|
||
| ties <- match.arg(ties) | ||
| if (robust && ties == "exact") { | ||
| cli::cli_abort( | ||
| c( | ||
| "Robust standard errors cannot be calculated with the {.val exact} tie-handling method.", | ||
| "i" = "Please change {.arg ties} to {.val efron} or {.val breslow} when using {.code robust = TRUE}." | ||
| ), | ||
| call = get_cli_abort_call() | ||
| ) | ||
| } | ||
|
|
||
| test <- match.arg(test) | ||
|
|
||
| # Prepare remaining dots for coxph (remove conf.int so it doesn't crash coxph) | ||
| coxph_dots <- dots | ||
| coxph_dots[["conf.int"]] <- NULL | ||
| coxph_dots[["robust"]] <- robust | ||
|
|
||
| # Determine reference and comparison groups | ||
| ref_group <- if (!is.null(ref_group)) { | ||
| ref_group | ||
|
|
@@ -153,19 +205,23 @@ get_cox_pairwise_df <- function( | |
| call = get_cli_abort_call() | ||
| ) | ||
| } | ||
|
|
||
| comp_df <- data[as.character(data[[arm]]) %in% subset_arm, ] | ||
|
|
||
| comp_df[[arm]] <- droplevels(comp_df[[arm]]) | ||
| # Explicitly relevel the factor so the reference group is ALWAYS first. | ||
| comp_df[[arm]] <- factor(comp_df[[arm]], levels = c(ref_group, current_arm)) | ||
|
|
||
| suppressWarnings( | ||
| coxph_ans <- survival::coxph( | ||
| formula = model_formula, | ||
| data = comp_df, | ||
| ties = ties | ||
| ) |> summary() | ||
| # Safely inject the remaining dots into the coxph call | ||
| cox_args <- c( | ||
| list(formula = model_formula, data = comp_df, ties = ties), | ||
| coxph_dots | ||
| ) | ||
| fit_full <- do.call(survival::coxph, cox_args) | ||
|
|
||
| log_rank_pvalue <- .estimate_p_value(model_formula, comp_df, test, arm) | ||
| # Extract summary with our specified conf.int | ||
| coxph_ans <- summary(fit_full, conf.int = conf_int) | ||
| # Pass the 'ties' argument down so the LRT matches the main Cox estimate | ||
| log_rank_pvalue <- .estimate_p_value(model_formula, comp_df, test, arm, ties) | ||
|
|
||
| conf_int_row <- paste0(arm, current_arm) | ||
| current_row <- data.frame( | ||
|
|
@@ -187,73 +243,143 @@ get_cox_pairwise_df <- function( | |
|
|
||
| names(res) <- c( | ||
| "HR", | ||
| "95% CI", | ||
| paste0(conf_int * 100, "% CI"), | ||
| paste0("p-value (", test_name, ")") | ||
| ) | ||
| res | ||
| } | ||
|
|
||
| #' Estimate p-value for a pairwise survival comparison | ||
| #' | ||
| #' Dispatches to `coin::logrank_test()` for weighted log-rank variants | ||
| #' or to a likelihood-ratio test via `survival::survreg()`. | ||
| #' Dispatches to `survival::survdiff()` for standard log-rank, | ||
| #' `coin::logrank_test()` for weighted log-rank variants, | ||
| #' or to a nested likelihood-ratio test via `survival::coxph()`. | ||
| #' | ||
| #' @param formula (`formula`)\cr survival formula, e.g. `Surv(time, status) ~ arm`. | ||
| #' @param data (`data.frame`)\cr subset containing exactly two arm levels. | ||
| #' @param test (`string`)\cr test name as accepted by [get_cox_pairwise_df()]. | ||
| #' @param arm (`string`)\cr column name of the arm variable in `data`. | ||
| #' @param ties (`character`)\cr tie handling method for the Cox LRT. | ||
| #' | ||
| #' @returns A single numeric p-value. | ||
| #' @keywords internal | ||
| .estimate_p_value <- function(formula, data, test, arm) { | ||
| test_type <- switch(test, | ||
| "log-rank" = "logrank", | ||
| "gehan-breslow" = "Gehan-Breslow", | ||
| "tarone" = "Tarone-Ware", | ||
| "peto" = "Peto-Peto", | ||
| "prentice" = "Prentice", | ||
| "fleming-harrington" = "Fleming-Harrington", | ||
| "likelihood-ratio" = "lr" | ||
| ) | ||
| #' @noRd | ||
| .estimate_p_value <- function(formula, data, test, arm, ties) { | ||
| if (test == "log-rank") { | ||
| # --- 1. Standard Log-Rank via survival (Matches SAS & rtables) --- | ||
| sdiff <- survival::survdiff(formula, data = data) | ||
|
|
||
| # Calculate the asymptotic p-value using the chi-square statistic | ||
| # Degrees of freedom is (number of groups - 1) | ||
| p_value <- stats::pchisq(sdiff$chisq, df = length(sdiff$n) - 1, lower.tail = FALSE) | ||
| } else if (test == "likelihood-ratio") { | ||
| # --- 2. Likelihood-Ratio Test via survival --- | ||
| # Fit the full Cox model | ||
| fit_full <- survival::coxph(formula, data = data, ties = ties) | ||
|
|
||
| # Safely create the reduced formula by dropping the arm variable | ||
| reduced_formula <- stats::update(formula, paste(". ~ . -", arm)) | ||
|
|
||
| # Fit the reduced model explicitly to avoid drop1 environment scope errors | ||
| fit_reduced <- survival::coxph(reduced_formula, data = data, ties = ties) | ||
|
Comment on lines
+267
to
+283
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have reverted to survival for computing log-rank p value, since coin is using permutation test for this. This resulted in discrepancy between crane and tern results.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We need to talk about the LRT engine change ( Just flagging this because I think it deserves a note in the PR description / NEWS. The old LRT used However, we could preserve both: use the original Either way, worth a note in NEWS since |
||
|
|
||
| if (test_type != "lr") { | ||
| rlang::check_installed( | ||
| "coin", | ||
| reason = paste("to run log-rank tests using", test_type, "method") | ||
| # Execute the nested Likelihood-Ratio Test | ||
| anova_res <- stats::anova(fit_reduced, fit_full, test = "Chisq") | ||
|
|
||
| # Extract the p-value for the second row (the full model comparison) | ||
| p_value <- anova_res[["Pr(>|Chi|)"]][[2]] | ||
| } else { | ||
| # --- 3. Weighted Variants via coin --- | ||
| test_type <- switch(test, | ||
| "gehan-breslow" = "Gehan-Breslow", | ||
| "tarone" = "Tarone-Ware", | ||
| "peto" = "Peto-Peto", | ||
| "prentice" = "Prentice", | ||
| "fleming-harrington" = "Fleming-Harrington" | ||
| ) | ||
|
|
||
| if (length(levels(data[[arm]])) != 2) { | ||
| cli::cli_warn( | ||
| if (!requireNamespace("coin", quietly = TRUE)) { | ||
| cli::cli_abort( | ||
| paste( | ||
| "{.arg arm} does not contain exactly 2 levels!", | ||
| "This will result in unexpected behavior of pairwise test." | ||
| "The {.pkg coin} package is required to run", | ||
| "log-rank tests using the {test_type} method." | ||
| ) | ||
| ) | ||
| } | ||
|
|
||
| coin_formula <- .check_and_rewrite_formula(formula, arm) | ||
|
|
||
| test_result <- coin::logrank_test( | ||
| formula = formula, | ||
| formula = coin_formula, | ||
| data = data, | ||
| type = test_type | ||
| ) | ||
|
|
||
| p_value <- as.numeric(coin::pvalue(test_result)) | ||
| } else { | ||
| # SAS LR test assumes an exponential distribution | ||
| # fit the model | ||
| fit_cov <- survival::survreg(formula, data = data, dist = "exponential") | ||
|
|
||
| # fit the null model | ||
| drop_arm_formula <- stats::as.formula(paste(". ~ . -", arm)) | ||
| # to account for possible covariates | ||
| null_formula <- stats::update(formula, drop_arm_formula) | ||
| fit_null <- survival::survreg(null_formula, data = data, dist = "exponential") | ||
|
|
||
| # calculate the difference and estimate the statistics | ||
| lrt_stat <- 2 * (fit_cov$loglik[2] - fit_null$loglik[1]) | ||
| df <- fit_cov$df - fit_null$df | ||
| p_value <- stats::pchisq(lrt_stat, df, lower.tail = FALSE) | ||
| } | ||
|
|
||
| p_value | ||
| } | ||
|
|
||
| #' Rewrite survival formula with strata() for specific models | ||
| #' | ||
| #' @description | ||
| #' Parses a survival formula and safely translates `strata()` wrappers into | ||
| #' syntaxes supported by target modeling engines. | ||
| #' | ||
| #' @param formula (`formula`)\cr The original survival formula. | ||
| #' @param arm (`character`)\cr The name of the primary arm variable. | ||
| #' | ||
| #' @returns A modified `formula` with `strata()` terms properly translated. | ||
| #' @noRd | ||
| .check_and_rewrite_formula <- function(formula, arm) { | ||
| f_terms <- stats::terms(formula) | ||
| term_labels <- attr(f_terms, "term.labels") | ||
|
|
||
| # Safely identify strata calls directly from the right-hand side labels | ||
| strata_idx <- grep("^strata\\(", term_labels) | ||
| strata_calls <- term_labels[strata_idx] | ||
|
|
||
| # --- 1. GUARDRAIL VALIDATION --- | ||
| valid_terms <- c(arm, strata_calls) | ||
| invalid_terms <- setdiff(term_labels, valid_terms) | ||
|
|
||
| if (length(invalid_terms) > 0) { | ||
| cli::cli_abort( | ||
| paste( | ||
| "This log-rank test method does not support covariate adjustment", | ||
| "for: {.var {invalid_terms}}.", | ||
| "Please use stratification (e.g., {.code ~ {arm} + strata(var)})", | ||
| "or switch to {.code test = 'likelihood-ratio'}." | ||
| ) | ||
| ) | ||
| } | ||
|
|
||
| # If there are no strata terms, return the formula completely untouched | ||
| if (length(strata_idx) == 0) { | ||
| return(formula) | ||
| } | ||
|
|
||
| # Use R's Abstract Syntax Tree (AST) to safely extract strata arguments | ||
| strata_vars <- unlist(lapply(strata_calls, function(x) { | ||
| sapply(as.list(str2lang(x))[-1], deparse) | ||
| })) | ||
|
|
||
| lhs_terms <- arm | ||
|
|
||
| # If there are multiple strata variables, wrap them in interaction() | ||
| # If single, force as.factor() to satisfy coin::logrank_test requirements | ||
| if (length(strata_vars) > 1) { | ||
| block_str <- paste0("interaction(", paste(strata_vars, collapse = ", "), ")") | ||
| } else { | ||
| block_str <- paste0("as.factor(", strata_vars[1], ")") | ||
| } | ||
|
|
||
| rhs_str <- paste(lhs_terms, "|", block_str) | ||
|
|
||
| # Rebuild the formula preserving the exact LHS response and original environment | ||
| stats::reformulate( | ||
| termlabels = rhs_str, | ||
| response = formula[[2]], | ||
| env = environment(formula) | ||
| ) | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can you change the method if you want? If not we need to be upfront about it
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
By default,
coxph()calculates the variance using the standard Fisher information matrix, and the resulting CIs extracted viasummary()orconfint()are Wald confidence intervals.If you want to change the underlying variance method to calculate Robust (Sandwich) Confidence Intervals, you need to use the
robust = TRUEargument inside your model call.So we can add an argument
robustand additionally specify the confidence limit in another argument?However these will be two additional arguments in the function call. What is your take on this?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could we use
...to pass these additional arguments consistently to each method?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I have added the code to do that.