Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
78 commits
Select commit Hold shift + click to select a range
553366c
refactoring core gdb methods/functions + >90% coverage
Sep 30, 2025
4bdb033
varSet refactoring
Oct 20, 2025
a50297d
refactored varSet unit tests (>90% coverage)
Oct 20, 2025
a1fda07
genoMatrix refactoring
Oct 20, 2025
9d74812
genoMatrix unit test refactoring (>90%)
Oct 20, 2025
90ed689
refactoring summariseGeno
Oct 20, 2025
526e65c
add asserts
Oct 20, 2025
94b9d59
replace aggregateFile and associated methods with aggDb
Oct 20, 2025
ce5482c
refactored gdb-aggregate method (using aggDb)
Oct 20, 2025
d025cdd
assocTest refactoring
Oct 20, 2025
67cf889
refactor assocTest_gdb
Oct 20, 2025
eb0f2d4
replace assoctest_aggregateFile with assoctest_aggdb + refactor
Oct 20, 2025
2a1b1e3
refactor rvatResult
Oct 20, 2025
24ee3a1
update nullmodelGSA to aggdb
Oct 20, 2025
6d6bb79
update internal data
Oct 20, 2025
3700348
update cli
Oct 20, 2025
51aed93
minor fixes in geneSetAssoc tests
Oct 20, 2025
4343ffa
replace aggregateFile tests with aggdb tests
Oct 20, 2025
5eaa201
minor updates cli tests
Oct 20, 2025
0435360
add test helper functions
Oct 20, 2025
79d3d00
minor updates unit tests
Oct 20, 2025
761d2aa
updates DESCRIPTION/NAMESPACE and documentation
Oct 20, 2025
b17f17e
small fix unit tests
Oct 20, 2025
cdf19f6
add test data
Oct 20, 2025
39dea48
small fix in unit tests
Oct 20, 2025
d88c383
minor fixes
Oct 20, 2025
3508f50
refactor genoMatrix unit tests (>90% coverage)
pjhop Oct 26, 2025
6994358
refactor summariseGeno unit tests (>90% coverage)
pjhop Nov 2, 2025
977e3c0
minor updates aggdb + updated unit tests aggdb and related methods (>…
pjhop Nov 9, 2025
7f7730f
minor updates assocTest + refactoring assocTest unit tests
pjhop Dec 10, 2025
0b19300
refactoring of rvatResult unit tests
pjhop Dec 10, 2025
abe7d83
reformat mapToCDS.R
pjhop Dec 10, 2025
2a122dd
refactor mapToCDS
pjhop Dec 17, 2025
925c015
reformat vcfUtils.R
pjhop Dec 17, 2025
a974c0e
linting mapToCDS
pjhop Dec 17, 2025
9c8bb40
vcfUtils: reformatting
pjhop Dec 17, 2025
764ef17
spatialClust.R: reformat
pjhop Dec 17, 2025
678320f
spatialClust linting
pjhop Dec 17, 2025
603fab7
refactoring of spatialClust
pjhop Dec 17, 2025
c5183bf
reformat geneSetAssoc.R
pjhop Dec 17, 2025
8a9748a
move geneSet functions and methods to separate file
pjhop Dec 17, 2025
456d86b
geneSet.R linting
pjhop Dec 18, 2025
2737ad6
geneSet.R refactoring
pjhop Dec 21, 2025
af21f12
reformat geneSet unit tests
pjhop Dec 21, 2025
8814c27
update geneSet unit tests
pjhop Dec 23, 2025
13c84aa
format geneSetAssoc unit tests
pjhop Dec 28, 2025
d6ebb33
reorganize geneSetAssoc unit tests
pjhop Dec 29, 2025
15a8502
geneSetAssoc: linting
pjhop Dec 31, 2025
cfa229c
refactor geneSetAssoc unit tests + bump to >80% coverage
pjhop Jan 3, 2026
befde00
geneSetAssoc: refactoring
pjhop Jan 6, 2026
b06799f
geneSetAssoc: refactor core GSA parts
pjhop Jan 10, 2026
9169557
geneSetAssoc: refactor conditional GSA and cell-type enrichments
pjhop Jan 10, 2026
aa5b528
geneSetAssoc: minor updates refactoring + cleanup
pjhop Jan 12, 2026
33c9b3a
remove rvatViewer code (will me moved to separate package)
pjhop Jan 12, 2026
7d34802
fix aggdb documentation/examples
pjhop Jan 12, 2026
b0442ad
update documentation + move examples to separate files
pjhop Jan 17, 2026
61b7238
remove skat_fwe tests; add aggregate documentation; add writeVcf to c…
pjhop Feb 8, 2026
3959bba
reformat allClasses/allGenerics
pjhop Feb 8, 2026
8c635d1
fix writing to output in assocTest
pjhop Feb 8, 2026
d8a9889
consistent file naming
pjhop Feb 8, 2026
338eb3b
minor fix gdb and related methods
pjhop Feb 16, 2026
d21b34d
minor fixes + file renamings
pjhop Feb 18, 2026
81efb14
minor fixes documentation
pjhop Feb 19, 2026
39ee4f6
fix data paths in unit tests
pjhop Feb 19, 2026
98ceee4
version bump + format DESCRIPTION
pjhop Feb 19, 2026
27a0829
minor fixes unit tests
pjhop Feb 19, 2026
a8d8dee
minor fixes
pjhop Feb 20, 2026
27b595b
update air formatting
pjhop Feb 24, 2026
653e516
update man
pjhop Feb 25, 2026
672a75b
remaining formatting
pjhop Mar 10, 2026
26f034f
minor fixes
pjhop Mar 10, 2026
fd013ee
add snapshots + R CMD check workflow
pjhop Mar 13, 2026
3b6fa0d
avoid opening all aggdb connections simultaneously in aggdbList
pjhop Mar 26, 2026
a62427d
minor update
pjhop Mar 29, 2026
d9ca5db
add rvatData to remotes in DESCRIPTION
pjhop Mar 29, 2026
a8df225
minor unit test fix
pjhop Mar 29, 2026
5c8eb74
add v.0.4.0 to changelog
pjhop Mar 30, 2026
eee1e68
couple of minor fixes
pjhop Mar 30, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
name: R-CMD-check

on:
push:
branches: [main, dev]
pull_request:
branches: [main, dev]

permissions:
contents: read

jobs:
R-CMD-check:
runs-on: ubuntu-latest

env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
R_KEEP_PKG_SOURCE: yes

steps:
- uses: actions/checkout@v4

- uses: r-lib/actions/setup-r@v2
with:
use-public-rspm: true

- uses: r-lib/actions/setup-r-dependencies@v2
with:
extra-packages: any::rcmdcheck
needs: check

- uses: r-lib/actions/check-r-package@v2
with:
error-on: '"error"'
args: 'c("--no-manual", "--as-cran")'
92 changes: 72 additions & 20 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,49 +1,101 @@
Package: rvat
Title: Rare variant analysis toolkit
Version: 0.3.4
Version: 0.4.0
Comment thread
pjhop marked this conversation as resolved.
Authors@R: c(person(given='Kevin',family='Kenna',email='K.P.Kenna@umcutrecht.nl',role = c("aut")),person(given='Paul',family='Hop',email='P.J.Hop-2@umcutrecht.nl',role = c("aut", "cre")))
Description: RVAT is an R package that allows for the conversion of large genetic datasets from variant call format (.vcf) to a compressed SQLite representation (.gdb). Gdb files allow for rapid loading of compressed sample genotype strings into R BioConductor-compatible classes. They also allow for the upload and integration of variant and sample annotation data and complex data querying. RVAT includes a range of methods for SQL and non-SQL based querying, as well as methods for single variant association testing, aggregate association testing, gene set analyses, (interactive) visualization and the generation of various population genetic summary statistics. RVAT methods can be called directly in R sessions or used outside of the R environment through a provided command line tool.
License: GPL-3
Encoding: UTF-8
biocViews:
Imports: magrittr, dplyr, ggplot2 (>= 3.3.0), logistf (>= 1.24), RSQLite, SKAT, MASS, Matrix, zoo, shiny,WGCNA,GenomicRanges,IRanges,DBI,methods,SPAtest,DT,ggrepel,stringr,stringi,tibble,tidyr,rtracklayer,plotly,BiocGenerics,GENESIS,GenomeInfoDb,GWASTools,matrixcalc,R.utils,optparse,readr,ggh4x
Depends: R (>= 3.4.1), SummarizedExperiment,S4Vectors
Imports:
magrittr,
dplyr,
ggplot2 (>= 3.3.0),
logistf (>= 1.24),
RSQLite,
SKAT,
MASS,
Matrix,
zoo,
WGCNA,
GenomicRanges,
IRanges,
DBI,
methods,
SPAtest,
ggrepel,
stringr,
stringi,
tibble,
tidyr,
rtracklayer,
plotly,
BiocGenerics,
GENESIS,
GenomeInfoDb,
GWASTools,
matrixcalc,
R.utils,
optparse,
readr,
ggh4x,
data.table,
rlang (>= 1.1.0)
Depends:
R (>= 3.4.1),
SummarizedExperiment,
S4Vectors
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.3.2
RoxygenNote: 7.3.3
Collate:
'allInternalData.R'
'allGenerics.R'
'allClasses.R'
'aggregateFile.R'
'aggDb-collapse.R'
'aggDb-list.R'
'aggDb-merge.R'
'asserts-types-check.R'
'asserts-obj-type.R'
'aggDb.R'
'assocTest-aggdb.R'
'assocTest-gdb.R'
'assocTest-helper.R'
'assocTest-stattests.R'
'assocTest.R'
'assocTest_aggregateFile.R'
'assocTest_gdb.R'
'assocTest_helper.R'
'dump.R'
'gdb-aggregate.R'
'gdb-anno-cohort.R'
'gdb-buildGdb.R'
'gdb-getGT.R'
'gdb-mapVariants.R'
'gdb-shared-helpers.R'
'gdb-summariseGeno.R'
'gdb-utils.R'
'gdb.R'
'gdbUtils.R'
'geneSet.R'
'rvatResult.R'
'gsaResult.R'
'geneSetAssoc.R'
'genoMatrix.R'
'manhattanly.R'
'genoMatrix-aggregate.R'
'genoMatrix-constructor.R'
'genoMatrix-core.R'
'genoMatrix-getCarriers.R'
'genoMatrix-getters.R'
'genoMatrix-summariseGeno.R'
'genoMatrix-updaters.R'
'mapToCDS.R'
'mapVariants.R'
'mutationPlot.R'
'nullmodelGSA.R'
'pipe.R'
'resampling.R'
'rvatViewer.R'
'rvatViewerClass.R'
'rvat_cli.R'
'rvat_cli_help.R'
'rvat_cli_options.R'
'rvat_cli_helpers.R'
'shiny_functions.R'
'rvat-cli-help.R'
'rvat-cli-options.R'
'rvat-cli-helpers.R'
'rvat-cli.R'
'spatialClust.R'
'utils.R'
'varSet.R'
'vcfUtils.R'
Suggests:
testthat (>= 3.0.0), rvatData, withr
Remotes:
KennaLab/rvatData
Config/testthat/edition: 3
20 changes: 10 additions & 10 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,16 @@
export("%$%")
export("%>%")
export(ACAT)
export(aggregateFile)
export(aggregateFileList)
export(aggdb)
export(aggdbList)
export(as.geneSetList)
export(assocTest)
export(buildCorMatrix)
export(buildGdb)
export(buildGeneSet)
export(buildResamplingFile)
export(buildVarSet)
export(collapseAggregateFiles)
export(collapseAggDbs)
export(collapseVarSetList)
export(concatGdb)
export(densityPlot)
Expand Down Expand Up @@ -54,6 +54,7 @@ export(listAnno)
export(listCohort)
export(listGeneSets)
export(listMetadata)
export(listParams)
export(listSamples)
export(listUnits)
export(listVarSets)
Expand All @@ -62,15 +63,14 @@ export(listWeights)
export(manhattan)
export(mapToCDS)
export(mapVariants)
export(mergeAggregateFiles)
export(mergeAggDbs)
export(mutationPlot)
export(qqplot)
export(readGMT)
export(readResults)
export(recode)
export(remapIDs)
export(resamplingFile)
export(rvatViewer)
export(rvbResult)
export(singlevarResult)
export(spatialClust)
Expand All @@ -85,8 +85,8 @@ export(varSetList)
export(vcfInfo2Table)
export(writeResult)
export(writeVcf)
exportClasses(aggregateFile)
exportClasses(aggregateFileList)
exportClasses(aggdb)
exportClasses(aggdbList)
exportClasses(gdb)
exportClasses(geneSet)
exportClasses(geneSetFile)
Expand All @@ -112,7 +112,7 @@ exportMethods(assocTest)
exportMethods(buildCorMatrix)
exportMethods(buildVarSet)
exportMethods(close)
exportMethods(collapseAggregateFiles)
exportMethods(collapseAggDbs)
exportMethods(collapseVarSetList)
exportMethods(densityPlot)
exportMethods(dropTable)
Expand Down Expand Up @@ -149,6 +149,7 @@ exportMethods(listAnno)
exportMethods(listCohort)
exportMethods(listGeneSets)
exportMethods(listMetadata)
exportMethods(listParams)
exportMethods(listSamples)
exportMethods(listUnits)
exportMethods(listVarSets)
Expand All @@ -158,7 +159,7 @@ exportMethods(manhattan)
exportMethods(mapToCDS)
exportMethods(mapVariants)
exportMethods(merge)
exportMethods(mergeAggregateFiles)
exportMethods(mergeAggDbs)
exportMethods(metadata)
exportMethods(names)
exportMethods(qqplot)
Expand All @@ -182,7 +183,6 @@ import(GenomicRanges)
import(S4Vectors)
import(SummarizedExperiment)
import(methods)
import(shiny)
importClassesFrom(RSQLite,SQLiteConnection)
importClassesFrom(S4Vectors,DFrame)
importClassesFrom(S4Vectors,DataFrame)
Expand Down
104 changes: 104 additions & 0 deletions R/aggDb-collapse.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
#' @include allClasses.R
#' @include allGenerics.R
#' @include allInternalData.R

#' @rdname collapseAggDbs
#' @usage NULL
#' @export
setMethod(
"collapseAggDbs",
signature = signature(object = "aggdbList"),
definition = function(
object,
output = NULL,
overWrite = FALSE,
verbose = TRUE
) {
# input validation
check_character(output, allow_null = TRUE)
check_length(output, equal = 1L, allow_null = TRUE)
if (!is.null(output)) {
.check_output(output, overWrite = overWrite, verbose = verbose)
}
check_bool(verbose)

# initialize
samples <- listSamples(object)
agg_merged <- vector(mode = "numeric", length = length(samples))

# loop through databases
for (i in seq_along(object@paths)) {
path <- object@paths[i]
if (verbose) {
message(sprintf(
"Processing aggdb %d/%d: '%s'",
i,
length(object@paths),
basename(path)
))
}

con <- NULL
tryCatch(
{
# connect
con <- aggdb(path)

# retrieve aggregates
agg_current <- getUnit(con, unit = listUnits(con))

# sum aggregates
agg_current_merged <- colSums(agg_current, na.rm = TRUE)

# add to total
agg_merged <- agg_merged + agg_current_merged
},
error = function(e) {
stop(
sprintf(
"Could not process '%s': Error: %s",
basename(path),
e$message
),
call. = FALSE
)
},
finally = {
if (!is.null(con) && DBI::dbIsValid(con)) {
close(con)
}
}
)
}

# finalize
agg_merged <- data.frame(
IID = samples,
aggregate = agg_merged,
stringsAsFactors = FALSE,
row.names = NULL
)

# return data.frame if output is not specified
if (is.null(output)) {
return(agg_merged)
} else {
if (verbose) {
message(sprintf("Writing merged aggregates to: %s", output))
}

# write to output if specified
output_con <- gzfile(output, "wb")
on.exit(close(output_con), add = TRUE)
write.table(
agg_merged,
file = output_con,
quote = FALSE,
sep = "\t",
row.names = FALSE
)

invisible(NULL)
}
}
)
Loading
Loading