Skip to content

Commit cdfe210

Browse files
Merge pull request #494 from WrightonLabCSU/feature/dbcan3-test-version
Feature/dbcan3 test version
2 parents f03804b + 0c3dc40 commit cdfe210

15 files changed

Lines changed: 152 additions & 15 deletions

CHANGELOG.md

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,24 @@
22

33
All notable changes to this project will be documented in this file.
44

5+
## 2.0.0-beta27 - 2026-03-18
6+
7+
[f03804b](https://github.com/WrightonLabCSU/DRAM/commit/f03804bca43b15e55731316c00b1c34ac328c62c)...[7d9a12d](https://github.com/WrightonLabCSU/DRAM/commit/7d9a12d225c577a6b2fb0c4d7b1ba60a5588e1e8)
8+
9+
### Features
10+
11+
- Add a test version of dbcan3 to compare against dbcan2 ([efb3cc2](https://github.com/WrightonLabCSU/DRAM/commit/efb3cc23a5478f85e449099ec37285138cc5f8b7))
12+
13+
dbcan3 and dbcan3-sub test versions, will run both if run_dbcan3
14+
option is present.
15+
16+
- Switch hmmsearch to using PyHMMER search ([7d9a12d](https://github.com/WrightonLabCSU/DRAM/commit/7d9a12d225c577a6b2fb0c4d7b1ba60a5588e1e8))
17+
18+
PyHMMER has better parrallelism support, directly calling
19+
the lower level C bindings for HMMER and rewriting how
20+
it parallelizes. This means that when you had cpus=4 arg, it can
21+
1/3 of the walltime with the exact same result.
22+
523
## 2.0.0-beta26 - 2026-03-09
624

725
[605d4f5](https://github.com/WrightonLabCSU/DRAM/commit/605d4f5d619d9f373352c8f400128066edcf58ef)...[91edea7](https://github.com/WrightonLabCSU/DRAM/commit/91edea7e6974be47da036f0f8af247d3d033326a)

bin/hmm_search.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
#!/usr/bin/env python
2+
import time
3+
import pyhmmer
4+
import click
5+
from pathlib import Path
6+
7+
alphabet = pyhmmer.easel.Alphabet.amino()
8+
9+
10+
@click.command()
11+
@click.option(
12+
"--hmm",
13+
type=str,
14+
help="Path glob to the HMM db.",
15+
)
16+
@click.option(
17+
"--input_file",
18+
type=click.Path(exists=True),
19+
help="Path to the input fasta to search against",
20+
)
21+
@click.option("--e_value", type=float, help="e value cutoff for filtering")
22+
@click.option(
23+
"--output_file",
24+
type=click.Path(),
25+
help="Path to output file",
26+
)
27+
@click.option("--cpus", type=int, help="number of cpu core to run HMMER with")
28+
def main(hmm, input_file, e_value, output_file, cpus):
29+
t1 = time.time()
30+
31+
hmm = Path(hmm)
32+
33+
hmm_paths = hmm.parent.glob(hmm.name)
34+
35+
hmms = []
36+
for path in hmm_paths:
37+
with pyhmmer.plan7.HMMFile(path) as hmm_file:
38+
hmms.extend(hmm_file)
39+
40+
print(hmms)
41+
42+
with open(output_file, "wb") as out_fh:
43+
with pyhmmer.easel.SequenceFile(
44+
input_file, digital=True, alphabet=alphabet
45+
) as sf:
46+
seqs = pyhmmer.easel.DigitalSequenceBlock(alphabet)
47+
seqs.extend(sf)
48+
first = True
49+
for hits in pyhmmer.hmmer.hmmsearch(hmms, seqs, cpus=cpus, E=e_value):
50+
hits.write(out_fh, format="domains", header=first)
51+
first = False
52+
# total = sum(len(hits) for hits in pyhmmer.hmmer.hmmsearch(hmms, seqs, cpus=8, E=1e-15))
53+
print(f"pyhmmer search completed in {time.time() - t1:.3} seconds")
54+
55+
56+
if __name__ == "__main__":
57+
main()

modules/local/annotate/add_sql_descriptions.nf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ process ADD_SQL_DESCRIPTIONS {
44
errorStrategy 'finish'
55

66
conda "${moduleDir}/environment.yml"
7-
container "community.wave.seqera.io/library/python_pandas_hmmer_mmseqs2_pruned:d2c88b719ab1322c"
7+
container "community.wave.seqera.io/library/python_pandas_hmmer_mmseqs2_pruned:0a22b52d960467a9"
88

99
input:
1010
tuple val(input_fasta), path(hits_file)

modules/local/annotate/combine_annotations.nf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ process COMBINE_ANNOTATIONS {
44
errorStrategy 'finish'
55

66
conda "${moduleDir}/environment.yml"
7-
container "community.wave.seqera.io/library/python_pandas_hmmer_mmseqs2_pruned:d2c88b719ab1322c"
7+
container "community.wave.seqera.io/library/python_pandas_hmmer_mmseqs2_pruned:0a22b52d960467a9"
88

99
input:
1010
path(fastas, stageAs: "annotations/*" )

modules/local/annotate/environment.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,4 @@ dependencies:
1010
- scikit-bio=0.7.1
1111
- scipy<2
1212
- click<9.0
13+
- pyhmmer

modules/local/annotate/gene_locs.nf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ process GENE_LOCS {
44
errorStrategy 'finish'
55

66
conda "${moduleDir}/environment.yml"
7-
container "community.wave.seqera.io/library/python_pandas_hmmer_mmseqs2_pruned:d2c88b719ab1322c"
7+
container "community.wave.seqera.io/library/python_pandas_hmmer_mmseqs2_pruned:0a22b52d960467a9"
88

99
tag { input_fasta }
1010

modules/local/annotate/hmmsearch.nf

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ process HMM_SEARCH {
44
errorStrategy 'finish'
55

66
conda "${moduleDir}/environment.yml"
7-
container "community.wave.seqera.io/library/python_pandas_hmmer_mmseqs2_pruned:d2c88b719ab1322c"
7+
container "community.wave.seqera.io/library/python_pandas_hmmer_mmseqs2_pruned:0a22b52d960467a9"
88

99
tag { input_fasta }
1010

@@ -24,12 +24,12 @@ process HMM_SEARCH {
2424
def ec_flag = ec_from_info ? "--ec_from_info" : ""
2525

2626
"""
27-
hmmsearch \\
28-
-E ${e_value} \\
29-
--domtblout ${input_fasta}_hmmsearch.out \\
30-
--cpu ${task.cpus} \\
31-
${database_loc}/*.hmm \\
32-
${fasta} > /dev/null
27+
hmm_search.py \\
28+
--hmm ${database_loc}/*.hmm \\
29+
--input_file ${fasta} \\
30+
--e_value ${e_value} \\
31+
--output_file ${input_fasta}_hmmsearch.out \\
32+
--cpus ${task.cpus}
3333
3434
hmm_parser.py \\
3535
--hmm_domtbl ${input_fasta}_hmmsearch.out \\

modules/local/annotate/merge_annotations.nf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ process MERGE_ANNOTATIONS {
44
errorStrategy 'finish'
55

66
conda "${moduleDir}/environment.yml"
7-
container "community.wave.seqera.io/library/python_pandas_hmmer_mmseqs2_pruned:d2c88b719ab1322c"
7+
container "community.wave.seqera.io/library/python_pandas_hmmer_mmseqs2_pruned:0a22b52d960467a9"
88

99
input:
1010
path( ch_annotations, stageAs: "annotations/*" )

modules/local/annotate/mmseqs_index.nf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ process MMSEQS_INDEX{
44
errorStrategy 'finish'
55

66
conda "${moduleDir}/environment.yml"
7-
container "community.wave.seqera.io/library/python_pandas_hmmer_mmseqs2_pruned:d2c88b719ab1322c"
7+
container "community.wave.seqera.io/library/python_pandas_hmmer_mmseqs2_pruned:0a22b52d960467a9"
88

99
tag { input_fasta }
1010

modules/local/annotate/mmseqs_search.nf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ process MMSEQS_SEARCH {
44
errorStrategy 'finish'
55

66
conda "${moduleDir}/environment.yml"
7-
container "community.wave.seqera.io/library/python_pandas_hmmer_mmseqs2_pruned:d2c88b719ab1322c"
7+
container "community.wave.seqera.io/library/python_pandas_hmmer_mmseqs2_pruned:0a22b52d960467a9"
88

99
tag { input_fasta }
1010

0 commit comments

Comments
 (0)