Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,8 @@
^inst/benchmarks/
^inst/profiling/
^inst/vignettes/
^nextflow

^.git
^\.git

^src/Makevars(?!.in$|.win$)
^src/Cogaps.o
Expand Down Expand Up @@ -66,3 +65,11 @@
^src/gibbs_sampler/SparseNormalModel.o

^src/math/VectorMath\.o$

^nextflow*
^main\.nf
^work*
\.nextflow
\.cirro
^tests/nextflow

5 changes: 5 additions & 0 deletions .cirro/process-compute.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
process {
executor = 'awsbatch'
errorStrategy = 'retry'
maxRetries = 2
}
42 changes: 42 additions & 0 deletions .cirro/process-form.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
{
"form": {
"type": "object",
"properties": {
"npatterns": {
"title": "npatterns",
"description": "number of patterns to find e.g 5 or 5,6,7",
"type": "string"
},
"niterations": {
"title": "niterations",
"description": "number of iterations to run",
"type": "integer",
"default": 1000
},
"n_top_genes": {
"title": "n_top_genes",
"description": "number of top genes (by var) to use",
"type": "integer",
"default": 1000
},
"distributed": {
"title": "distributed",
"description": "single run: none, distributed: genome-wide, single-cell",
"type": "string",
"enum": [
"none",
"genome-wide",
"single-cell"
]
},
"nsets": {
"title": "nsets",
"description": "If distributed, number of sets to split into",
"type": "integer",
"default": 4
}
},
"required": []
},
"ui": {}
}
10 changes: 10 additions & 0 deletions .cirro/process-input.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@

{
"niterations": "$.dataset.params.niterations",
"n_top_genes": "$.dataset.params.n_top_genes",
"distributed": "$.dataset.params.distributed",
"nsets": "$.dataset.params.nsets",
"outdir": "$.dataset.dataPath",
"input": "$.inputs.[*].dataPath",
"npatterns": "$.dataset.params.npatterns"
}
3 changes: 3 additions & 0 deletions .cirro/process-output.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"commands": []
}
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
Package: CoGAPS
Version: 3.27.4
Version: 3.27.5
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Certainly this PR is worthy of a minor increment!

Date: 2025-03-11
Title: Coordinated Gene Activity in Pattern Sets
Author: Jeanette Johnson, Ashley Tsang, Jacob Mitchell, Thomas Sherman, Wai-shing Lee, Conor Kelton, Ondrej Maxian, Jacob Carey,
Expand Down
20 changes: 20 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,23 @@ devtools::install_github("FertigLab/CoGAPS")
# Using CoGAPS

Follow the vignette [here](https://github.com/FertigLab/CoGAPS/blob/master/vignettes/CoGAPS.Rmd) and available as static html [here](https://rpubs.com/jeanettejohnson/1018399)

# Run as nextflow pipeline
The example below will attempt running CoGAPS with number of patterns 3 and 4 on every `.rds` and `.h5ad` file in the input folder (`tests/nextflow`).
```
nextflow run main.nf --input tests/nextflow --outdir out -c nextflow.config -profile docker --max_memory 10GB --npatterns 3,4
```

Supported CLI params and their defaults are:

npatterns = "5"
nsets = 1
niterations = 100
sparse = 0
seed = 42
distributed = "null"
nthreads = 1
max_memory = '128.GB'
max_cpus = 8
max_time = '72.h'
n_top_genes = 5000
69 changes: 49 additions & 20 deletions nextflow/main.nf → main.nf
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
process COGAPS {
tag "$prefix"
label 'process_medium'
label 'process_high'
label 'process_long'
container 'ghcr.io/fertiglab/cogaps:master'

Expand Down Expand Up @@ -31,6 +31,14 @@ process COGAPS {
mkdir -p "${prefix}"
Rscript -e 'library("CoGAPS");
sparse <- readRDS("$dgCMatrix");
#select top 5K genes
message("finding top ", ${params.n_top_genes}, " genes");
vars <- apply(sparse, 1, var);
ngenes <- min(length(vars),${params.n_top_genes});
top_genes <- order(vars, decreasing=TRUE)[1:ngenes];
sparse <- sparse[top_genes,];
message("selected top ", length(top_genes), " genes of ", length(vars));

data <- as.matrix(sparse);
#avoid errors with distributed params
dist_param <- NULL;
Expand All @@ -42,10 +50,18 @@ process COGAPS {
sparseOptimization = as.logical($cparams.sparse),
distributed=dist_param);
if (!(is.null(dist_param))){
params <- setDistributedParams(params, nSets = $cparams.nsets);
nsets <- $cparams.nsets;
allow_cpus <- as.numeric($task.cpus);
if( allow_cpus < 2){
stop("Error: distributed mode requires at least 2 cpus")
}
if (nsets > allow_cpus){
message("Warning: nsets is greater than available cpus. Setting nsets to ", allow_cpus);
}
params <- setDistributedParams(params, nSets = min(nsets,allow_cpus));
};
cogapsResult <- CoGAPS(data = data, params = params, nThreads = $cparams.nthreads,
outputFrequency = floor($cparams.niterations/10));
outputFrequency = 100);
saveRDS(cogapsResult, file = "${prefix}/cogapsResult.rds")'

cat <<-END_VERSIONS > versions.yml
Expand Down Expand Up @@ -88,7 +104,6 @@ process COGAPS_TENX2DGC {
mkdir "${prefix}"

Rscript -e 'res <- Seurat::Read10X("$data/filtered_feature_bc_matrix/");
res <- Seurat::NormalizeData(res);
saveRDS(res, file="${prefix}/dgCMatrix.rds")';

cat <<-END_VERSIONS > versions.yml
Expand All @@ -101,7 +116,7 @@ process COGAPS_TENX2DGC {

process COGAPS_ADATA2DGC {
tag "$meta.id"
label 'process_low'
label 'process_medium'
container 'docker.io/satijalab/seurat:5.0.0'

input:
Expand Down Expand Up @@ -157,9 +172,6 @@ process COGAPS_ADATA2DGC {
if(transpose){
res <- Matrix::t(res)
};

message("Normalizing data");
res <- Seurat::NormalizeData(res);
message("Saving dgCMatrix");
saveRDS(res, file="${prefix}/dgCMatrix.rds")';

Expand All @@ -173,22 +185,39 @@ process COGAPS_ADATA2DGC {
"""
}

//example channel with data folders, for example
ch_data = Channel.fromPath('./test/**gist.rds')
.map { tuple([id:it.getParent().getName()], it)}

//example channel with cparams
ch_cparams = Channel.of([npatterns: 7, niterations: 100, sparse: 1, distributed: 'null', nsets:1, nthreads:1],
[npatterns: 7, niterations: 100, sparse: 0, distributed: 'null', nsets:1, nthreads:1])
//example workflow
workflow {
//example channel with data folders, for example
ch_adata = Channel.fromPath("${params.input}/**.h5ad")
.map { tuple([id:it.getName().replace('.', '-')], it)}

// combine the two channels as input to CoGAPS
ch_input = ch_data.combine(ch_cparams)
ch_rds = Channel.fromPath("${params.input}/**.rds")
.map { tuple([id:it.getName().replace('.', '-')], it)}

//make a channel with desired pattern number
def patterns = params.npatterns.split(',').collect { it.toInteger() }
ch_patterns = Channel.from(patterns)

//example channel with cparams
ch_fixed_params = Channel.of([niterations: params.niterations, sparse: params.sparse, distributed: params.distributed, nsets:params.nsets, nthreads:1])
Copy link

Copilot AI Jul 9, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nthreads is statically set to 1, so any CLI override of --nthreads will be ignored. Use params.nthreads instead of a hardcoded value.

Suggested change
ch_fixed_params = Channel.of([niterations: params.niterations, sparse: params.sparse, distributed: params.distributed, nsets:params.nsets, nthreads:1])
ch_fixed_params = Channel.of([niterations: params.niterations, sparse: params.sparse, distributed: params.distributed, nsets:params.nsets, nthreads: params.nthreads])

Copilot uses AI. Check for mistakes.
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nthreads is tricky (works with multithreaded only) and never used, would prefer leaving it hardcoded


ch_cparams = ch_patterns
.combine(ch_fixed_params)
.map { tuple([id:it[0].toString(), npatterns:it[0], niterations:it[1].niterations, sparse:it[1].sparse, distributed:it[1].distributed, nsets:it[1].nsets, nthreads:it[1].nthreads]) }

// convert adata to dgCMatrix
COGAPS_ADATA2DGC(ch_adata)

// ch_cogaps_input of converted adatas and rdses
ch_input = COGAPS_ADATA2DGC.out.dgCMatrix
ch_input = ch_input.mix(ch_rds)

// combine the two channels as input to CoGAPS
ch_input = ch_input.combine(ch_cparams)

//run the workflow
workflow {
COGAPS(ch_input)
}

//example:
//nextflow run main.nf -profile docker -resume
//nextflow run main.nf -profile slurm -resume
//nextflow run main.nf --input tests/nextflow --outdir out -c nextflow.config -profile docker
20 changes: 10 additions & 10 deletions nextflow/nextflow.config → nextflow.config
Original file line number Diff line number Diff line change
@@ -1,20 +1,22 @@
// default params
params {
outdir = 'out'
input = ''
outdir = null
input = null

//cogaps params
npatterns = 7
npatterns = "5"
nsets = 1
niterations = 100
sparse = 1
sparse = 0
seed = 42
distributed = 'null'
distributed = "null"
nthreads = 1

max_memory = '8.GB'
max_cpus = 4
max_time = '48.h'
max_memory = '128.GB'
max_cpus = 8
max_time = '72.h'

n_top_genes = 5000
}

//reporting
Expand Down Expand Up @@ -151,8 +153,6 @@ profiles {
singularity.autoMounts = true
process {
executor = 'slurm'
cpus = 10
memory = '10 GB'
}
}
test {
Expand Down
Binary file added tests/nextflow/gist.h5ad
Binary file not shown.
Binary file added tests/nextflow/gist.rds
Binary file not shown.
Loading