Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
cb9aea6
Removing susie.cs.ht() because it should be taken directly from the f…
Aug 25, 2025
76712b6
Have customizable post susie QC parameters
Aug 25, 2025
77455fb
Nevermind - function copied and pasted from gitlab, temporary solutio…
Aug 26, 2025
36ab363
Back to sourcing function rather than calling it from flanders R pack…
Aug 26, 2025
57c28ed
Forgot to remove susie_qc_cs_lbf_thr parameter from here
Aug 26, 2025
e375904
Added susie QC parameters to the nextflow schema
Aug 26, 2025
e0883d5
Rearraning post susie QC, so that loci disappearing becuase of QC are…
Sep 1, 2025
14923f8
Adding locusbreaker
bruno-ariano Sep 3, 2025
bd26bb8
Adding locus size parameter
bruno-ariano Sep 3, 2025
2767343
Add report collecting loci that were re-finemapped with L=1 after bei…
Sep 4, 2025
8401ffc
Wrapped all code to go from susie output to rds list of dataframes ob…
Sep 5, 2025
e239bd6
Adding credible set expansion and using function from susie output to…
Sep 5, 2025
5bef08f
Remove some parameters from list of those mandatory - if not specifie…
ariannalandini Sep 8, 2025
1623586
Merge branch 'main' into update_susie_qc
ariannalandini Sep 8, 2025
34cf953
Do not hardcode L=10, but rather use the assigned variable
ariannalandini Sep 8, 2025
3168bec
Ok nevermind, reverting previous commit and adding also post susie QC…
ariannalandini Sep 8, 2025
9675594
Removing hardcoding of L=10 for easier maintenance
ariannalandini Sep 9, 2025
a7103e0
Forgot to close parenthesis
Sep 9, 2025
f4686d8
Temporarely copy and paste functions from gitlab R package version
Sep 9, 2025
5c0b224
Merge branch 'tiledb_locusbreaker' into update_susie_qc
ariannalandini Sep 10, 2025
90b869b
Merge pull request #100 from Biostatistics-Unit-HT/update_susie_qc
ariannalandini Sep 10, 2025
c123ffc
Computing also cs logsum - and adding it to the anndata obs
Sep 10, 2025
2059698
Merge branch 'main' into cs_expansion
ariannalandini Sep 10, 2025
40d48b2
Fixed unmatching parenthesis
Sep 10, 2025
9a0416d
tile_lb_input parameters defined but not used. Assigning correct file…
Sep 10, 2025
d0e5eaf
Removing tuple since it's only one element
Sep 10, 2025
fc15e3b
Merge branch 'tiledb_locusbreaker' into cs_expansion
ariannalandini Sep 10, 2025
664c6f4
Merge pull request #101 from Biostatistics-Unit-HT/cs_expansion
ariannalandini Sep 11, 2025
4633052
Revert "Add credible set expansion, logsum and wrapping up susie refo…
ariannalandini Sep 11, 2025
9978ee7
Merge pull request #102 from Biostatistics-Unit-HT/revert-101-cs_expa…
ariannalandini Sep 11, 2025
d62c4c0
Adding cs expansion
ariannalandini Sep 11, 2025
2009de9
Add Credible Set Expansion feature to Fine-Mapping (#99) (#103)
ariannalandini Sep 11, 2025
24011dd
Replacing plink bfile with pfile - LD reference files expected to be …
Sep 11, 2025
2eadeed
Latest fixes inspired by UKBB Cardinal (#105)
ariannalandini Sep 26, 2025
e8462e7
Adding modification to run on scQTLs on Sanger (#107)
bruno-ariano Sep 30, 2025
5eed09d
Skipping .rds intermediate step and storing susie results directly in…
ariannalandini Oct 2, 2025
510999d
Adding analysis label identifier to the anndata and cs name
Oct 2, 2025
09d7d78
Calculate cond beta and se internally at the function
Oct 3, 2025
09b5eab
Store finemapping exceptions in a single df before saving. Add metada…
Oct 3, 2025
2a8bba7
Fixing tiledb parameters - removing not used, giving correct naming, …
Oct 6, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
284 changes: 282 additions & 2 deletions bin/funs_locus_breaker_cojo_finemap_all_at_once.R
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ run_dentist <- function(D=dataset_aligned
write(locus_only.snp, ncol=1,file=paste0(random.number,"_locus_only.snp.list"))

# Prepare subset of plink LD files
exit_status = system(paste0("plink2 --bfile ", bfile," --extract ",random.number,"_locus_only.snp.list --maf ", maf.thresh, " --make-bed --out ", random.number))
exit_status = system(paste0("plink2 --pfile ", bfile," --extract ",random.number,"_locus_only.snp.list --maf ", maf.thresh, " --make-bed --out ", random.number))

# Raise an error if the external command fails
if (exit_status != 0) {
Expand Down Expand Up @@ -104,7 +104,7 @@ prep_susie_ld <- function(
}

### --export A include-alt --> creates a new fileset, after sample/variant filters have been applied - A: sample-major additive (0/1/2) coding, suitable for loading from R
exit_status = system(paste0("plink2 --bfile ", bfile, " --extract ", random.number, "_locus_only.snp.list --maf ", maf.thresh, " --export A include-alt --out ", random.number))
exit_status = system(paste0("plink2 --pfile ", bfile, " --extract ", random.number, "_locus_only.snp.list --maf ", maf.thresh, " --export A include-alt --out ", random.number))

# Check if the command failed
if (exit_status != 0) {
Expand Down Expand Up @@ -508,3 +508,283 @@ run_susie_w_retries <- function(

return(fitted_rss)
}


#' Expand SuSiE Credible Sets by Thresholding
#'
#' Expands each credible set (CS) from a fitted \code{susie} or \code{susie_rss}
#' object by applying a thresholding rule to log Bayes factors (\code{lbf_variable}).
#' The expanded set is defined as the union of the original CS and
#' SNPs identified via \code{\link{find_threshold}}.
#'
#' @param fitted A \code{susie} or \code{susie_rss} object, typically the
#' output of \code{susieR::susie_rss()}.
#'
#' @return A named list of integer vectors, one per credible set.
#' Each vector contains SNP indices for the union of the original and
#' expanded credible sets, with SNP IDs as names.
#'
#' @details
#' The function retrieves original CSs, then applies \code{find_threshold()}
#' to the corresponding lBF values. The expanded CS is the union of both sets.
#'
#' @seealso \code{\link{find_threshold}}
#' @export
#'
#' @examples
#' \dontrun{
#' fitted <- susieR::susie_rss(z, R, n = N)
#' expanded <- expand_cs(fitted)
#' names(expanded)
#' }
expand_cs <- function(fitted) {
cs_list <- fitted$sets$cs
snp_names <- colnames(fitted$lbf_variable)

expanded <- lapply(names(cs_list), function(i) {
idx_original <- cs_list[[i]]
names(idx_original) <- snp_names[idx_original] # name with SNP IDs
i_num <- as.integer(gsub("L", "", i))

lbf_vec <- fitted$lbf_variable[i_num, ]
idx_expanded <- find_threshold(lbf_vec, original_idx = idx_original)

# Union of both
idx_union <- union(idx_original, idx_expanded)
names(idx_union) <- snp_names[idx_union]

idx_union
})
names(expanded) <- names(cs_list)
return(expanded)
}



#' Identify Expanded Credible Set SNPs via Thresholding
#'
#' Given a vector of SNP scores (e.g. lABFs), this function determines an
#' optimal threshold that best separates the values into two groups.
#' SNPs above the threshold are considered part of the expanded credible set.
#'
#' @param vector Numeric vector of SNP-level scores.
#' @param original_idx Optional integer vector of SNP indices corresponding
#' to the original credible set. If no expansion is possible
#' (e.g. variance of \code{vector} is zero), these indices are returned instead.
#'
#' @return An integer vector of SNP indices corresponding to the expanded credible set.
#' If no expansion is possible, the original indices are returned.
#'
#' @seealso \code{\link{expand_cs}}
#' @export
#'
#' @examples
#' lbf_vec <- rnorm(100)
#' cs_idx <- which(lbf_vec > 2)
#' find_threshold(lbf_vec, original_idx = cs_idx)
find_threshold <- function(vector, original_idx = NULL) {
if (var(vector) != 0) {
thres <- optim(
fn = function(th) thresholder(th, vector),
p = 0,
method = "Brent",
lower = min(vector),
upper = max(vector)
)
idx <- which(vector > thres$par)
return(idx)
} else {
# Return original indices if no expansion is possible
return(original_idx)
}
}


#' Objective Function for Threshold Optimization
#'
#' Internal helper used by \code{\link{find_threshold}}.
#' Given a numeric threshold and a vector of SNP scores, it splits the vector
#' into two groups and returns the variance of residuals from a simple linear model.
#'
#' @param threshold Numeric. Threshold value used to split the vector into groups.
#' @param vector Numeric vector of SNP scores (e.g. lABFs).
#'
#' @return A single numeric value: the variance of residuals from the fitted model.
#'
#' @note This function is not exported. It is used internally by \code{find_threshold()}.
#'
#' @keywords internal
thresholder <- function(threshold, vector) {
var(residuals(lm(vector ~ ifelse(vector > threshold, "A", "B"))))
}


#' Transfer SuSiE RSS fine-mapping results into an AnnData object
#'
#' This function takes a list of fine-mapping results from susie and associated metadata,
#' attaches conditional effect size estimates, and organizes the results into an
#' [anndata::AnnData] object with sparse matrices for lABFs, betas, and standard
#' errors. Metadata about credible sets and variants is stored in the `obs` and
#' `var` slots of the output object.
#'
#' @param finemap_list A named list of susie fine-mapping results. Each element is
#' expected to contain at least:
#' - `lbf_variable`: a matrix of log Bayes factors
#' - `sets`: credible set information (`cs_index`, `cs`, `purity`,
#' `requested_coverage`)
#' - `metadata`: a data frame with study, phenotype, and variant annotations
#' - `comment_section`: optional notes from the fine-mapping run.
#' @param cs_indices A named list parallel to `finemap_list`, containing indices
#' of expanded credible set SNPs to be used instead of the original indices.
#' @param analysis_id A label identifying te whole fine-mapping analysis
#'
#' @details
#' The function performs the following steps for each fine-mapping result:
#' - Identifies and flags credible SNPs according to credible set expansion
#' - Computes and combines metadata about credible sets
#' - Builds sparse matrices of lABFs, betas, and SEs
#' - Constructs an [anndata::AnnData] object with:
#' - `X`: sparse lABF matrix
#' - `layers`: beta and SE sparse matrices
#' - `obs`: credible set metadata
#' - `var`: variant metadata
#'
#' @return An [anndata::AnnData] object containing fine-mapping summary
#' statistics and metadata, ready for downstream analysis or visualization.
#'
#' @importFrom dplyr mutate select filter pull
#' @importFrom tidyr unnest_longer
#' @importFrom Matrix sparseMatrix
#' @importFrom anndata AnnData
#' @examples
#' \dontrun{
#' ad <- from_susie_to_anndata(finemap_list=finemap_list, cs_indices=cs_indices, analysis_id=analysis_id)
#' }
#'
from_susie_to_anndata <- function(finemap_list=NULL, cs_indices=NULL, analysis_id=NULL) {

lABFs_list <- list()
min_res_labf_vec <- c()
top_pvalue_vec <- c()
purity_df <- data.frame()
comment_section <- c()
metadata_df <- data.frame()

for (finemap_name in names(finemap_list)) {
finemap <- finemap_list[[finemap_name]]
cs_index <- cs_indices[[finemap_name]]

# Compute lABFs and conditional beta and se
lABFs <- lapply(finemap$sets$cs_index, function(cs) {
data.frame(
SNP = colnames(finemap$lbf_variable),
lABF = finemap$lbf_variable[cs, ],
is_cs = FALSE,
bC = get_beta_se_susie(finemap, cs)$beta,
bC_se = get_beta_se_susie(finemap, cs)$se
)
})

# Get credible SNPs for each CS
credible_snps <- lapply(finemap$sets$cs_index, function(cs) {
index <- paste0("L", cs)
#susie_get_cs(finemap, coverage = finemap$sets$requested_coverage)$cs[[index]]
cs_index[[index]] ### take indeces of expanded cs rather than original one!
})

# Mark credible SNPs
for (i in seq_along(lABFs)) {
lABFs[[i]]$is_cs[credible_snps[[i]]] <- TRUE
}

# Add info to metadata, collapse study and pheno ID in a single TRAITID
finemap$metadata <- finemap$metadata |>
dplyr::mutate(
analysis_id = opt$analysis_id,
snp = list(sapply(lABFs, function(x) x$SNP[which.max(x$lABF)])),
L_index = list(names(finemap$sets$cs)),
trait = ifelse(unique(TYPE=="gwas"), study_id, paste0(study_id, ":", phenotype_id))
) |>
tidyr::unnest_longer(c(snp, L_index))

min_res_labf <- sapply(lABFs, function(x) unique(min(x$lABF)))
top_pvalue <- sapply(lABFs, function(x) min(2 * pnorm(abs(x$bC) / x$bC_se, lower.tail = FALSE)))
logsum.logABF <- sapply(lABFs, function(x) coloc:::logsum(x$lABF))

# Store results
# names(lABFs) <- credible_set_names
lABFs_list <- c(lABFs_list, lABFs)
min_res_labf_vec <- c(min_res_labf_vec, min_res_labf)
top_pvalue_vec <- c(top_pvalue_vec, top_pvalue)
purity_df <- rbind(purity_df, finemap$sets$purity |> dplyr::mutate(logsum.logABF=logsum.logABF))
comment_section <- c(comment_section, rep(finemap$comment_section, length(finemap$sets$cs_index)))
comment_section[is.na(comment_section)] <- "NaN"
metadata_df <- rbind(metadata_df, finemap$metadata)
}

#### Create names for cs ####
cs_names_vec <- paste(
paste0("chr", metadata_df$chr),
metadata_df$analysis_id,
metadata_df$trait,
metadata_df$snp,
metadata_df$L_index,
sep = "::"
)

# Prepare `obs_df` metadata
obs_df <- metadata_df |> dplyr::select(-L_index) |> dplyr::rename(type=TYPE)
obs_df$chr <- paste0("chr", obs_df$chr)
obs_df$top_pvalue <- top_pvalue_vec
obs_df$min_res_labf <- min_res_labf_vec
# obs_df$panel <- NaN
obs_df$cs_name <- cs_names_vec
names(lABFs_list) <- obs_df$cs_name ### important for extracton of beta and se
obs_df$a1 <- gsub(".*:(\\w+):(\\w+)$", "\\1", obs_df$snp)
obs_df$a0 <- gsub(".*:(\\w+):(\\w+)$", "\\2", obs_df$snp)
obs_df$freq <- NaN

obs_df <- obs_df |>
dplyr::mutate(
bC = map2_dbl(cs_name, snp, ~ lABFs_list[[.x]] |> filter(SNP == .y) %>% pull(bC)),
bC_se = map2_dbl(cs_name, snp, ~ lABFs_list[[.x]] |> filter(SNP == .y) %>% pull(bC_se))
)

obs_df <- cbind(obs_df, purity_df)
obs_df$comment_section <- comment_section

# Prepare sparse matrices
all_snps <- unique(unlist(lapply(lABFs_list, function(x) x$SNP)))
element_indices <- setNames(seq_along(all_snps), all_snps)

create_sparse_matrix <- function(value_name, credible_sets=cs_names_vec) {
Matrix::sparseMatrix(
i = unlist(lapply(seq_along(credible_sets), function(i) rep(i, length(lABFs_list[[credible_sets[i]]]$SNP)))),
j = unlist(lapply(credible_sets, function(cs) element_indices[lABFs_list[[cs]]$SNP])),
x = unlist(lapply(lABFs_list, function(x) x[[value_name]])),
dims = c(length(credible_sets), length(all_snps)),
dimnames = list(credible_sets, all_snps)
)
}

lABF_matrix_sparse <- create_sparse_matrix("lABF")
beta_matrix_sparse <- create_sparse_matrix("bC")
se_matrix_sparse <- create_sparse_matrix("bC_se")

# Create AnnData object
ad <- anndata::AnnData(X = lABF_matrix_sparse)
ad$layers[["beta"]] <- beta_matrix_sparse
ad$layers[["se"]] <- se_matrix_sparse
rownames(obs_df) <- cs_names_vec
ad$obs <- obs_df

var_df <- data.frame(
snp = all_snps,
chr = gsub(".*(\\d+):(\\d+):\\w+:\\w+", "\\1", all_snps),
pos = gsub(".*(\\d+):(\\d+):\\w+:\\w+", "\\2", all_snps)
)
rownames(var_df) <- var_df$snp
ad$var <- var_df

return(ad)
}
Loading