|
| 1 | +#!/usr/bin/env python |
| 2 | +import time |
| 3 | +import pyhmmer |
| 4 | +import click |
| 5 | +from pathlib import Path |
| 6 | + |
| 7 | +alphabet = pyhmmer.easel.Alphabet.amino() |
| 8 | + |
| 9 | + |
| 10 | +@click.command() |
| 11 | +@click.option( |
| 12 | + "--hmm", |
| 13 | + type=str, |
| 14 | + help="Path glob to the HMM db.", |
| 15 | +) |
| 16 | +@click.option( |
| 17 | + "--input_file", |
| 18 | + type=click.Path(exists=True), |
| 19 | + help="Path to the input fasta to search against", |
| 20 | +) |
| 21 | +@click.option("--e_value", type=float, help="e value cutoff for filtering") |
| 22 | +@click.option( |
| 23 | + "--output_file", |
| 24 | + type=click.Path(), |
| 25 | + help="Path to output file", |
| 26 | +) |
| 27 | +@click.option("--cpus", type=int, help="number of cpu core to run HMMER with") |
| 28 | +def main(hmm, input_file, e_value, output_file, cpus): |
| 29 | + t1 = time.time() |
| 30 | + |
| 31 | + hmm = Path(hmm) |
| 32 | + |
| 33 | + hmm_paths = hmm.parent.glob(hmm.name) |
| 34 | + |
| 35 | + hmms = [] |
| 36 | + for path in hmm_paths: |
| 37 | + with pyhmmer.plan7.HMMFile(path) as hmm_file: |
| 38 | + hmms.extend(hmm_file) |
| 39 | + |
| 40 | + print(hmms) |
| 41 | + |
| 42 | + with open(output_file, "wb") as out_fh: |
| 43 | + with pyhmmer.easel.SequenceFile( |
| 44 | + input_file, digital=True, alphabet=alphabet |
| 45 | + ) as sf: |
| 46 | + seqs = pyhmmer.easel.DigitalSequenceBlock(alphabet) |
| 47 | + seqs.extend(sf) |
| 48 | + first = True |
| 49 | + for hits in pyhmmer.hmmer.hmmsearch(hmms, seqs, cpus=cpus, E=e_value): |
| 50 | + hits.write(out_fh, format="domains", header=first) |
| 51 | + first = False |
| 52 | + # total = sum(len(hits) for hits in pyhmmer.hmmer.hmmsearch(hmms, seqs, cpus=8, E=1e-15)) |
| 53 | + print(f"pyhmmer search completed in {time.time() - t1:.3} seconds") |
| 54 | + |
| 55 | + |
| 56 | +if __name__ == "__main__": |
| 57 | + main() |
0 commit comments