-
Notifications
You must be signed in to change notification settings - Fork 16
Nextflow #144
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Nextflow #144
Changes from all commits
06b9297
5ef6b3f
98e570d
8edd58c
abb66ca
c2e020f
83cc13c
cafd7a2
a47c121
3acecbb
dae6ad4
453c447
b4e10fc
364d823
859f1e3
9ee7261
ecee6a4
583a868
d1caed3
852e993
40f49d9
7b7eca1
61fb147
3dacec4
a8a8060
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| process { | ||
| executor = 'awsbatch' | ||
| errorStrategy = 'retry' | ||
| maxRetries = 2 | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,42 @@ | ||
| { | ||
| "form": { | ||
| "type": "object", | ||
| "properties": { | ||
| "npatterns": { | ||
| "title": "npatterns", | ||
| "description": "number of patterns to find e.g 5 or 5,6,7", | ||
| "type": "string" | ||
| }, | ||
| "niterations": { | ||
| "title": "niterations", | ||
| "description": "number of iterations to run", | ||
| "type": "integer", | ||
| "default": 1000 | ||
| }, | ||
| "n_top_genes": { | ||
| "title": "n_top_genes", | ||
| "description": "number of top genes (by var) to use", | ||
| "type": "integer", | ||
| "default": 1000 | ||
| }, | ||
| "distributed": { | ||
| "title": "distributed", | ||
| "description": "single run: none, distributed: genome-wide, single-cell", | ||
| "type": "string", | ||
| "enum": [ | ||
| "none", | ||
| "genome-wide", | ||
| "single-cell" | ||
| ] | ||
| }, | ||
| "nsets": { | ||
| "title": "nsets", | ||
| "description": "If distributed, number of sets to split into", | ||
| "type": "integer", | ||
| "default": 4 | ||
| } | ||
| }, | ||
| "required": [] | ||
| }, | ||
| "ui": {} | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,10 @@ | ||
|
|
||
| { | ||
| "niterations": "$.dataset.params.niterations", | ||
| "n_top_genes": "$.dataset.params.n_top_genes", | ||
| "distributed": "$.dataset.params.distributed", | ||
| "nsets": "$.dataset.params.nsets", | ||
| "outdir": "$.dataset.dataPath", | ||
| "input": "$.inputs.[*].dataPath", | ||
| "npatterns": "$.dataset.params.npatterns" | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| { | ||
| "commands": [] | ||
| } |
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -1,6 +1,6 @@ | ||||||
| process COGAPS { | ||||||
| tag "$prefix" | ||||||
| label 'process_medium' | ||||||
| label 'process_high' | ||||||
| label 'process_long' | ||||||
| container 'ghcr.io/fertiglab/cogaps:master' | ||||||
|
|
||||||
|
|
@@ -31,6 +31,14 @@ process COGAPS { | |||||
| mkdir -p "${prefix}" | ||||||
| Rscript -e 'library("CoGAPS"); | ||||||
| sparse <- readRDS("$dgCMatrix"); | ||||||
| #select top 5K genes | ||||||
| message("finding top ", ${params.n_top_genes}, " genes"); | ||||||
| vars <- apply(sparse, 1, var); | ||||||
| ngenes <- min(length(vars),${params.n_top_genes}); | ||||||
| top_genes <- order(vars, decreasing=TRUE)[1:ngenes]; | ||||||
| sparse <- sparse[top_genes,]; | ||||||
| message("selected top ", length(top_genes), " genes of ", length(vars)); | ||||||
|
|
||||||
| data <- as.matrix(sparse); | ||||||
| #avoid errors with distributed params | ||||||
| dist_param <- NULL; | ||||||
|
|
@@ -42,10 +50,18 @@ process COGAPS { | |||||
| sparseOptimization = as.logical($cparams.sparse), | ||||||
| distributed=dist_param); | ||||||
| if (!(is.null(dist_param))){ | ||||||
| params <- setDistributedParams(params, nSets = $cparams.nsets); | ||||||
| nsets <- $cparams.nsets; | ||||||
| allow_cpus <- as.numeric($task.cpus); | ||||||
| if( allow_cpus < 2){ | ||||||
| stop("Error: distributed mode requires at least 2 cpus") | ||||||
| } | ||||||
| if (nsets > allow_cpus){ | ||||||
| message("Warning: nsets is greater than available cpus. Setting nsets to ", allow_cpus); | ||||||
| } | ||||||
| params <- setDistributedParams(params, nSets = min(nsets,allow_cpus)); | ||||||
| }; | ||||||
| cogapsResult <- CoGAPS(data = data, params = params, nThreads = $cparams.nthreads, | ||||||
| outputFrequency = floor($cparams.niterations/10)); | ||||||
| outputFrequency = 100); | ||||||
| saveRDS(cogapsResult, file = "${prefix}/cogapsResult.rds")' | ||||||
|
|
||||||
| cat <<-END_VERSIONS > versions.yml | ||||||
|
|
@@ -88,7 +104,6 @@ process COGAPS_TENX2DGC { | |||||
| mkdir "${prefix}" | ||||||
|
|
||||||
| Rscript -e 'res <- Seurat::Read10X("$data/filtered_feature_bc_matrix/"); | ||||||
| res <- Seurat::NormalizeData(res); | ||||||
| saveRDS(res, file="${prefix}/dgCMatrix.rds")'; | ||||||
|
|
||||||
| cat <<-END_VERSIONS > versions.yml | ||||||
|
|
@@ -101,7 +116,7 @@ process COGAPS_TENX2DGC { | |||||
|
|
||||||
| process COGAPS_ADATA2DGC { | ||||||
| tag "$meta.id" | ||||||
| label 'process_low' | ||||||
| label 'process_medium' | ||||||
| container 'docker.io/satijalab/seurat:5.0.0' | ||||||
|
|
||||||
| input: | ||||||
|
|
@@ -157,9 +172,6 @@ process COGAPS_ADATA2DGC { | |||||
| if(transpose){ | ||||||
| res <- Matrix::t(res) | ||||||
| }; | ||||||
|
|
||||||
| message("Normalizing data"); | ||||||
| res <- Seurat::NormalizeData(res); | ||||||
| message("Saving dgCMatrix"); | ||||||
| saveRDS(res, file="${prefix}/dgCMatrix.rds")'; | ||||||
|
|
||||||
|
|
@@ -173,22 +185,39 @@ process COGAPS_ADATA2DGC { | |||||
| """ | ||||||
| } | ||||||
|
|
||||||
| //example channel with data folders, for example | ||||||
| ch_data = Channel.fromPath('./test/**gist.rds') | ||||||
| .map { tuple([id:it.getParent().getName()], it)} | ||||||
|
|
||||||
| //example channel with cparams | ||||||
| ch_cparams = Channel.of([npatterns: 7, niterations: 100, sparse: 1, distributed: 'null', nsets:1, nthreads:1], | ||||||
| [npatterns: 7, niterations: 100, sparse: 0, distributed: 'null', nsets:1, nthreads:1]) | ||||||
| //example workflow | ||||||
| workflow { | ||||||
| //example channel with data folders, for example | ||||||
| ch_adata = Channel.fromPath("${params.input}/**.h5ad") | ||||||
| .map { tuple([id:it.getName().replace('.', '-')], it)} | ||||||
|
|
||||||
| // combine the two channels as input to CoGAPS | ||||||
| ch_input = ch_data.combine(ch_cparams) | ||||||
| ch_rds = Channel.fromPath("${params.input}/**.rds") | ||||||
| .map { tuple([id:it.getName().replace('.', '-')], it)} | ||||||
|
|
||||||
| //make a channel with desired pattern number | ||||||
| def patterns = params.npatterns.split(',').collect { it.toInteger() } | ||||||
| ch_patterns = Channel.from(patterns) | ||||||
|
|
||||||
| //example channel with cparams | ||||||
| ch_fixed_params = Channel.of([niterations: params.niterations, sparse: params.sparse, distributed: params.distributed, nsets:params.nsets, nthreads:1]) | ||||||
|
||||||
| ch_fixed_params = Channel.of([niterations: params.niterations, sparse: params.sparse, distributed: params.distributed, nsets:params.nsets, nthreads:1]) | |
| ch_fixed_params = Channel.of([niterations: params.niterations, sparse: params.sparse, distributed: params.distributed, nsets:params.nsets, nthreads: params.nthreads]) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nthreads is tricky (works with multithreaded only) and never used, would prefer leaving it hardcoded
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Certainly this PR is worthy of a minor increment!