Skip to content

Commit 32eaa20

Browse files
bschilderclaude
andcommitted
Fix 16 test failures: add GOSHIFTER source files, fix clean_granges and add_mb
- Add GOSHIFTER R source and man pages (were untracked, causing 'not found' errors) - Remove deprecated IMPACT module (replaced by GOSHIFTER) - Fix clean_granges: add drop=FALSE to prevent DataFrame→vector coercion - Fix add_mb: fall back to start() when pos_col not in GRanges mcols - Update DESCRIPTION with new Suggests/Remotes for GOSHIFTER dependencies - Update NAMESPACE with GOSHIFTER exports and new imports Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 95e6d04 commit 32eaa20

35 files changed

Lines changed: 2007 additions & 415 deletions

.gitignore

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,4 +42,6 @@ vignettes/*.R
4242
*.utf8.md
4343
*.knit.md
4444
# R Environment Variables
45-
.Renviron
45+
.Renviron
46+
/doc/
47+
/Meta/

DESCRIPTION

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,8 @@ Imports:
5050
rtracklayer,
5151
S4Vectors,
5252
BiocGenerics,
53-
IRanges
53+
IRanges,
54+
rlang
5455
Suggests:
5556
XGR,
5657
markdown,
@@ -73,11 +74,18 @@ Suggests:
7374
SNPlocs.Hsapiens.dbSNP144.GRCh38,
7475
BSgenome.Hsapiens.UCSC.hg19,
7576
BSgenome.Hsapiens.UCSC.hg38,
76-
BiocParallel
77+
BiocParallel,
78+
echoconda,
79+
echoLD,
80+
heatmaply,
81+
reshape2,
82+
R.utils
7783
Remotes:
7884
github::RajLabMSSM/echodata,
7985
github::RajLabMSSM/echotabix,
80-
github::RajLabMSSM/downloadR
86+
github::RajLabMSSM/downloadR,
87+
github::RajLabMSSM/echoconda,
88+
github::RajLabMSSM/echoLD
8189
RoxygenNote: 7.3.3
8290
VignetteBuilder: knitr
8391
License: GPL-3

NAMESPACE

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,11 @@ export(CORCES2020_get_ATAC_peak_overlap)
44
export(CORCES2020_get_hichip_fithichip_overlap)
55
export(CS_bin_plot)
66
export(CS_counts_plot)
7-
export(IMPACT_query)
7+
export(GOSHIFTER)
8+
export(GOSHIFTER_heatmap)
9+
export(GOSHIFTER_histograms_SNPgroups)
10+
export(GOSHIFTER_histograms_pvals)
11+
export(GOSHIFTER_run)
812
export(MOTIFBREAKR)
913
export(MOTIFBREAKR_calc_pvals)
1014
export(MOTIFBREAKR_filter)
@@ -70,9 +74,12 @@ importFrom(data.table,fwrite)
7074
importFrom(data.table,melt.data.table)
7175
importFrom(data.table,merge.data.table)
7276
importFrom(data.table,rbindlist)
77+
importFrom(data.table,set)
7378
importFrom(data.table,transpose)
79+
importFrom(data.table,tstrsplit)
7480
importFrom(downloadR,zenodo_upload)
7581
importFrom(dplyr,arrange)
82+
importFrom(dplyr,count)
7683
importFrom(dplyr,desc)
7784
importFrom(dplyr,group_by)
7885
importFrom(dplyr,mutate)
@@ -81,7 +88,6 @@ importFrom(dplyr,rename)
8188
importFrom(dplyr,sample_n)
8289
importFrom(dplyr,select)
8390
importFrom(dplyr,slice)
84-
importFrom(dplyr,slice_head)
8591
importFrom(dplyr,slice_max)
8692
importFrom(dplyr,summarise)
8793
importFrom(dplyr,tally)
@@ -93,15 +99,19 @@ importFrom(echodata,get_CS_bins)
9399
importFrom(echodata,get_CS_counts)
94100
importFrom(echodata,get_data)
95101
importFrom(echodata,get_header)
102+
importFrom(echodata,granges_to_bed)
96103
importFrom(echodata,is_ggbio)
97104
importFrom(echodata,is_granges)
98105
importFrom(echodata,snp_group_colorDict)
99106
importFrom(echodata,snp_group_filters)
100107
importFrom(echotabix,construct_query)
101-
importFrom(echotabix,convert)
102108
importFrom(echotabix,liftover)
103109
importFrom(echotabix,query)
104110
importFrom(ggbio,ggbio)
111+
importFrom(ggplot2,aes)
112+
importFrom(ggplot2,geom_histogram)
113+
importFrom(ggplot2,ggplot)
114+
importFrom(ggplot2,theme_bw)
105115
importFrom(ggplot2,xlim)
106116
importFrom(grDevices,dev.off)
107117
importFrom(grDevices,png)
@@ -112,6 +122,7 @@ importFrom(patchwork,plot_annotation)
112122
importFrom(patchwork,plot_layout)
113123
importFrom(patchwork,plot_spacer)
114124
importFrom(patchwork,wrap_plots)
125+
importFrom(rlang,.data)
115126
importFrom(rtracklayer,import)
116127
importFrom(rtracklayer,import.bw)
117128
importFrom(stats,as.formula)
@@ -122,5 +133,5 @@ importFrom(stats,setNames)
122133
importFrom(stringr,str_split)
123134
importFrom(tidyr,separate)
124135
importFrom(tools,R_user_dir)
125-
importFrom(utils,data)
136+
importFrom(utils,download.file)
126137
importFrom(utils,zip)

R/GOSHIFTER.R

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
#' Run GoShifter enrichment pipeline
2+
#'
3+
#' Run the \href{https://github.com/immunogenomics/goshifter}{GoShifter}
4+
#' locus-specific enrichment pipeline. This function orchestrates the full
5+
#' workflow: preparing SNP maps and LD files, querying ROADMAP annotations,
6+
#' running the GoShifter permutation test, and collecting results.
7+
#'
8+
#' @param locus_dir Path to the locus-level results directory.
9+
#' @param dat A \code{data.table} or \code{data.frame} with columns
10+
#' \code{SNP}, \code{CHR}, \code{POS}, and \code{P}.
11+
#' @param SNP_group Character string labelling this group of SNPs
12+
#' (default \code{""}).
13+
#' @param goshifter_path Path to the directory containing
14+
#' \code{goshifter.py}. If \code{NULL}, the bundled copy shipped with
15+
#' \pkg{echolocatoR} is used (see \code{\link{GOSHIFTER_find_folder}}).
16+
#' @param permutations Number of permutations for the enrichment test
17+
#' (default \code{1000}).
18+
#' @param ROADMAP_search A keyword query passed to
19+
#' \code{\link{ROADMAP_query}} to filter ROADMAP annotations
20+
#' (e.g. \code{"monocyte"}).
21+
#' @param chromatin_states Character vector of chromatin state abbreviations
22+
#' to test enrichment for (default \code{c("TssA")}).
23+
#' @param R2_filter LD r-squared threshold for GoShifter
24+
#' (default \code{0.8}).
25+
#' @param overlap_threshold Minimum number of overlapping SNPs required
26+
#' before running GoShifter on an annotation (default \code{1}).
27+
#' @param force_new_goshifter If \code{TRUE}, re-run even if results
28+
#' already exist on disk (default \code{FALSE}).
29+
#' @param remove_tmps Remove intermediate GoShifter output files after
30+
#' collecting results (default \code{TRUE}).
31+
#' @param verbose Print messages (default \code{TRUE}).
32+
#' @param save_results Write combined results to a tab-delimited file
33+
#' (default \code{TRUE}).
34+
#'
35+
#' @returns A \code{data.table} of GoShifter enrichment results across
36+
#' all requested chromatin states.
37+
#'
38+
#' @source
39+
#' \href{https://github.com/immunogenomics/goshifter}{GoShifter GitHub}
40+
#' \href{https://pubmed.ncbi.nlm.nih.gov/26140449/}{
41+
#' Trynka et al. (2015) \emph{Am J Hum Genet}}
42+
#'
43+
#' @family GOSHIFTER
44+
#' @export
45+
#' @importFrom data.table fread rbindlist fwrite
46+
#' @examples
47+
#' \dontrun{
48+
#' locus_dir <- echodata::locus_dir
49+
#' dat <- echodata::BST1
50+
#' gs_out <- GOSHIFTER(locus_dir = locus_dir, dat = dat)
51+
#' }
52+
GOSHIFTER <- function(locus_dir,
53+
dat,
54+
SNP_group = "",
55+
goshifter_path = NULL,
56+
permutations = 1000,
57+
ROADMAP_search = "",
58+
chromatin_states = c("TssA"),
59+
R2_filter = 0.8,
60+
overlap_threshold = 1,
61+
force_new_goshifter = FALSE,
62+
remove_tmps = TRUE,
63+
verbose = TRUE,
64+
save_results = TRUE) {
65+
66+
## Resolve GoShifter install path
67+
goshifter_path <- GOSHIFTER_find_folder(goshifter = goshifter_path)
68+
## Clean up any leftover .pyc files
69+
pyc_files <- file.path(
70+
goshifter_path,
71+
c("chromtree.pyc", "data.pyc", "docopt.pyc", "functions.pyc")
72+
)
73+
suppressWarnings(file.remove(pyc_files))
74+
75+
## Chromatin-state key
76+
chromState_key <- data.table::fread(
77+
system.file("extdata/ROADMAP",
78+
"ROADMAP_chromatinState_HMM.tsv",
79+
package = "echoannot")
80+
)
81+
82+
## Create GoShifter results directory
83+
GS_results_path <- file.path(locus_dir, "GoShifter")
84+
dir.create(GS_results_path, showWarnings = FALSE, recursive = TRUE)
85+
results_file <- file.path(
86+
GS_results_path,
87+
paste0("GOSHIFTER.",
88+
paste(chromatin_states, collapse = "-"),
89+
".", SNP_group, ".txt")
90+
)
91+
92+
## Prepare SNP-map file
93+
GOSHIFTER_create_snpmap(
94+
dat = dat,
95+
GS_results = GS_results_path,
96+
verbose = verbose
97+
)
98+
99+
## Prepare LD files
100+
GOSHIFTER_create_LD(
101+
locus_dir = locus_dir,
102+
dat = dat,
103+
verbose = verbose
104+
)
105+
106+
## Query ROADMAP annotations
107+
grl_roadmap <- ROADMAP_query(
108+
query_dat = dat,
109+
keyword_query = ROADMAP_search,
110+
nThread = 1,
111+
verbose = verbose
112+
)
113+
114+
## Use cached results if available
115+
if (file.exists(results_file) && !force_new_goshifter) {
116+
GS_RESULTS <- data.table::fread(results_file)
117+
} else {
118+
## Run GoShifter for each chromatin state
119+
GS_RESULTS <- lapply(chromatin_states, function(cs) {
120+
tryCatch({
121+
messager("+ GoShifter:: Running on chromatin state subset:",
122+
cs, v = verbose)
123+
GOSHIFTER_run(
124+
goshifter_path = goshifter_path,
125+
locus_dir = locus_dir,
126+
GRlist = grl_roadmap,
127+
dat = dat,
128+
chromatin_state = cs,
129+
R2_filter = R2_filter,
130+
permutations = permutations,
131+
remove_tmps = remove_tmps,
132+
verbose = verbose,
133+
overlap_threshold = overlap_threshold
134+
)
135+
}, error = function(e) {
136+
messager("GoShifter:: Error for chromatin state ",
137+
cs, ": ", conditionMessage(e), v = verbose)
138+
NULL
139+
})
140+
}) |> data.table::rbindlist(fill = TRUE)
141+
}
142+
143+
## Save results
144+
if (save_results) {
145+
data.table::fwrite(GS_RESULTS, results_file,
146+
sep = "\t", quote = FALSE)
147+
}
148+
149+
## Clean up intermediate output files
150+
if (remove_tmps) {
151+
tmp_suffixes <- c(".nperm1000.enrich",
152+
".nperm1000.locusscore",
153+
".nperm1000.snpoverlap")
154+
suppressWarnings(
155+
file.remove(file.path(GS_results_path,
156+
paste0("GS_results", tmp_suffixes)))
157+
)
158+
}
159+
160+
return(GS_RESULTS)
161+
}

0 commit comments

Comments
 (0)