From 384e41991901b999895c6b7818687085f356d856 Mon Sep 17 00:00:00 2001 From: LCrossman Date: Tue, 17 Mar 2026 09:34:20 +0000 Subject: [PATCH 1/2] adding blast submodule to init py --- microbiorust-py/microbiorust/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/microbiorust-py/microbiorust/__init__.py b/microbiorust-py/microbiorust/__init__.py index c19da46..3bf929f 100644 --- a/microbiorust-py/microbiorust/__init__.py +++ b/microbiorust-py/microbiorust/__init__.py @@ -7,7 +7,7 @@ from . import microbiorust as _base #list of submodules importable - submodules = ["gbk", "embl", "align", "seqmetrics"] + submodules = ["gbk", "embl", "align", "seqmetrics", "blast"] for sub_name in submodules: #get the submodule from the base binary From bf1729ba4084d1238a153869fcfa12a3053ca527 Mon Sep 17 00:00:00 2001 From: LCrossman Date: Wed, 18 Mar 2026 12:44:42 +0000 Subject: [PATCH 2/2] adding pytests for microbiorust-py from python --- .../test_mbr.cpython-310-pytest-9.0.2.pyc | Bin 0 -> 10231 bytes microbiorust-py/tests/test_mbr.py | 150 ++++++++++++++++++ 2 files changed, 150 insertions(+) create mode 100644 microbiorust-py/tests/__pycache__/test_mbr.cpython-310-pytest-9.0.2.pyc create mode 100644 microbiorust-py/tests/test_mbr.py diff --git a/microbiorust-py/tests/__pycache__/test_mbr.cpython-310-pytest-9.0.2.pyc b/microbiorust-py/tests/__pycache__/test_mbr.cpython-310-pytest-9.0.2.pyc new file mode 100644 index 0000000000000000000000000000000000000000..34f6f890fe2d8bb153af79eb9312337dc9512e10 GIT binary patch literal 10231 zcmcIq+ix3Jdgl!9)Wz2%&Stme3b`@6rbJni?XKm`graGcG%ZmKU9-C&MjA?{O_B7O zkuNG$Y}YAJED#jPQ-G!m8|aH(_Ni#m_XYYt%u|5^3j}@an_tr3@0%e-Qt~=kq!i}N zxqg@PUC#HNQDsNh@qcq%|$As1=je zDJyOzJ`2=NTNBnK-e;^SYZ~vf)+y^W-sh|{)>*vITj#9vc)wx2VZDj>o7P*_1-##~ zw2eUO?XN+3pp??w(3r&7LWZdiXJ$8Cvc!Sf#;f~zt9f!p7vbtp{50# zU{8BSh{65$vZ`S^4LfNjcbo0zuGvaHGFwe6nQyc|G20K4+s#%Zy&uCAC%v=zz^qIJ z0*R#mE8fn2y5ZOD&Ys$8)HD6D!TV}vF1gudbERZRZ>!mEm3VeA3?Av>laB-pXf|78lcll}0w3 z+t`qW$v#Dc!D_DT4<(Ne7IJs*+%B&OJ4f&~ZhuSAZhbef&^?D`(0v)ybcd$2%ec)WU(S!U8{?EC1PqfxIm+fh4a zyJ_z(&z!KQGp|{^+iBIEr`^W#48%G%d8g5C3}f5s>~>p?-A3E_5QJS?Tu8kJc&oFu zXV)EbXL)Ax(xthDG5A?eOam0(H~ax$zcRbku{!O#?U>40&b)?pGQT{dx~=jU)yJSZ z%52*$5$y8Jt(8Jy!zizo%SNSal*@(Udajs9y;!Z@ELYabH&^c(>(y0bt(4y=Rt;mL zQq2{NRU@}vEZnW+ONA8!ziPf4R zVx?RumR5{vu3ETRsuU|X@@wUl{9R)uf5%ui@)ZM8FP1iLRaf)XN-lq+Qh*W_tNCjF zZW)?U$d{_Cr6QDJq=3XjUsmF1uD)ujE#%#X{a#DF7p|8pcYoY}_rE*K%uj zfj?g@15veFt`u@cq3}A!zOC{s9Ra#sUd^wTju}@bS*aL#!^l?hM$*XVvsojbtyGd( zBb!GHjigb*Ap0vxyej#;fx#rotbs)pV;trZVPY00Wb>FuNR_N1;4vMmvY5c71jlNH z(2aaDo6qLa0Map-nfVM63TYK13BXF35F*@fLGD++Zh5-l+vkLaP1bcuQdXjj9d_X3{ zz{uIaBq+JRV=ynxkxo*Hd0Zm%0S_dD41kbb7U*b`ClCqZAVlmn5CFwEOvnf%kORzx z2v`+qpaR(un5ck^3nq^x${pJT0UU$5KpC+gn87=ghmr(!SV9e;a$y3u78<}R7~!5m zttdlu9^20a$cWPjldNGi5lEGsvktALg0ci45XW&*4AIW)xQrGlBT~T!yaS4_pX@^e z^pQ(4=~dYe3Jh$xfCmv{`?&-ek#Vd6v7poAmLZkT{BSji1IMxrR8{OY zH(VH@_KNTdtY z&_uyZNFr9zKu@j6IX4{Bup`_oETaw#;LXlTH5GCZWIu5pE3@lHtcHbP$xS>~O{Y<3 zzZ*ondlEs>6CY6^{4rX>ISXBh3LB$h78NWf1M9IERD?y8 z1^3J=CQXxU6CV?kZlze$9_;b}Bq$)TFYC=#lfF&;iCf{CPKZtk^^@_53D-e`#f(;;lufF~w+hAE4G6$HMBk1sNe?;wcXn3COyT8V>LFWQSepz7-O?*V7pn&|1< znl;J(ZT&^ani^m~B=%ku*ncM&Q3LiVsmH+O^jA8t#+m*1;!O?pgik?Ygn}3-I3*NJ z;D!J<3%!YBe19{-_aBH^`2HVa`_uTg0=OGETY9-^+VCY#^1TncPcNtJ#NYUpWINEEZTS zvba2Ab5K&b=&-4>xaSG{9sJ5KqX$rup7tnE#YN6TU9L;?T3B+&lGyj5?&|3|H^jS# zajTDCW6JWRtFHewp@vZCzj`yU{{bB!yNd9j7=fSL9_kzFsR-D8uccNw@`dd#tyZY`tov0+ds=4!m{MDx7C+Ywj18+?6sYg zE(^Iam-eTJ@M1p|_v6@ZksaBsAEF|=758@Qd4NVdvSSGcIk#zUIXfornZm8kvrf zb9J0>yQWizI9q00a%=VHKwLY0L=MD>opjICA;*@0=LLeP8b>s@&D5mmWi1H$-3e0b zZFXk3acQ`*=Bvt$rKy98oiO`o?OG^Zw z;OT1WD!NA0fT!HR&-XO?`TOwm^yQ!H8Vu;40#@*!^m$OPOA^fP1Vl$x0wCQ%zEZfbMg4czL?~mb1(zx@*iS{9#k+zM<6V4=~-Xj(CAnI)tBfjM-+z;klO`TF7 zU~FI}Bz|Dp>O-zgvG|C^Pgwkv#YGlr7PBbaP^a5yyCD*x=FpLv5KB{k!5JAA^DHj0 zSYW|mY}k0_h8qiNiNoSgJk!g}ja$#Df%U*2f#?QS*bIKA5g2``O=`c@X1-B3&@o~* zjQ5Mcs?rC&Os=V2=hHH5OWU&4f_ z6L}t$@aZDPV-5l&HIG`HHK<_};ZT3Zb4{biq8zbqB5ij zA%q$UX`rN_(3*e&LEYbm$})I?@i3_Q0`tSxlms=?{h&rLz zG#Z$LXCbH@MJ#e$!?a)_{UdmZHqdIc4@2I`{d3}>&CoKIUJf6{S3GmMvLBZ0#3Sh% zruI+vA)`gOfUHD-F$@@i1C**r3?b$>Uz0PON@9WMJ$~v)RqBHT$qjc^v+XEG{+{{K zyoOBj1ZzwsB(G67Ia4fY=&V~*E9f3Roh}bIt}0rFXRV8@jb&4w*&R{Z!5m4dXm-ET z?n9T=9rSH6_UZps3m+O3$aeqkvDyPc(CF+SMVfLSsCr0Z(IQ-w<2;{MVtwYPL3E@Pz)?A zUOrmdpX~0bordM5syiNo|Ml}>ci-7_j@*4}%1cZ|zzJDf-!Z%PcXzR=s3%nJqxvfr zC%M%VrFDe%G1l44lqa=m{VPq~MRmkIa*Wav_xNj6j|_Z!a^Hv)Eyd7awsp{mhUqAY4-Gpk%)|6dA*e zJQGL7H}O~GDe{5Ge%xfXA}#>d$wM-Z_}^}{?L9l?TmK+#7XyV6iO{BY8V+*>Z?2m- zf;VJ@uR0e~5l?6}&_5)v3Y(1d#;xnDcc=|xJis&w$P@Y#P5lBbhUkOe$ayvgcLD+R zLqUk8#8p0h2$iwQEVriWc`bPk1!sUi-1|ws->@k*lWag6OWs1tMj;SH}@Oj{ZraY?K-^w?6*5|u1h>e_fJ=j!)vR9DgCNCE^T>pTW717DZ6scf+1_fn;cZ+FGP8UK z3ti0~(=^HT{(-=WLI2f-!2bK9d-tANs?)vK*=%k#ou?-x#){W|$ah;Ul|-$7cukKk z{7&bUqDH3@xDIVRRCUzl&{AS0JghuZsj_zD^G-UP&Yi%*XILgzW+81F2NY5%Mv5ag zM29zUdVfH}v!VXA8ICF)(GIL>BobV5^yfH~BoYkwBSG}x(1QN)P2G=3Y!&8(&OrCW z7>%5y`@j?zOo7|{4v6>ypAHpkakNcf@| zUO*Q0yyT>be>LfNNw_@}r;MfvIKwPJa3hke+e4uSqZB0~J;^Pe5xEYd_zmX!QcZY4 z$9?pSn9qo3{1Tq==qPjWk2B&C{lmUqR+jk`ABiTwa7y_i>6fz z@v2?zaM7rS)?%AYN1QAN@oRpJr1S$+YE!ruvhfMVyTJcWt_o4X8Guj^ScuY|sFXbKbBQ$)01)|Q3p%FoAs)e4^1vl2ePLp86eY@`6rKP=42v=mt`{jd@8y|GJ z*f{AEJ9H!75-;K;|B5G?8|4LVtGTIo%(&6*<`ZX6HN32yUqw7VdF>=gv;_4MFG>s| zPE^t6gwmnYMIz8i!i-(p#ebW$8rPZR*zcp53B>TtB8KEV6iR5J1w0e@ijjz&#uJN0 KV$oP+GWLJGU3J6& literal 0 HcmV?d00001 diff --git a/microbiorust-py/tests/test_mbr.py b/microbiorust-py/tests/test_mbr.py new file mode 100644 index 0000000..89426bf --- /dev/null +++ b/microbiorust-py/tests/test_mbr.py @@ -0,0 +1,150 @@ +import pytest +import os +import microbiorust +import textwrap +from pathlib import Path + +# --- FIXTURES: Generating Mock Data --- + +@pytest.fixture +def mock_gbk(tmp_path): + """Creates a minimal valid GenBank file.""" + path = tmp_path / "test.gbk" + content = textwrap.dedent(""" + LOCUS source_1 910 bp DNA linear CON 01-NOV-2024 + DEFINITION Escherichia coli K-12 substr. MG1655. + ACCESSION source_1 + VERSION source_1 + KEYWORDS . + SOURCE Escherichia coli K-12 substr. MG1655 + ORGANISM Escherichia coli K-12 substr. MG1655 + FEATURES Location/Qualifiers + source 1..910 + /organism="K-12 substr. MG1655" + /mol_type="DNA" + gene complement(1..354) + /locus_tag="b3304" + CDS complement(1..354) + /locus_tag="b3304" + /codon_start=1 + /gene="rplR" + /translation="MDKKSARIRRATRARRKLQELGATRLVVHRTPRHIYAQVIAPNGSLVAASTVEKAIAEQLKYTGNKDAAAAVGKAVAERALEKGIKDVSFDRSGFQYHGRVQALDAAREAGLQ" + /product="50S ribosomal subunit protein L18" + gene complement(364..897) + /locus_tag="b3305" + CDS complement(364..897) + /locus_tag="b3305" + /codon_start=1 + /gene="rplF" + /translation="MSRVAKAPVVVPAGVDVKINGQVITIKGKNGELTRTLNDAVEVKHNTLTFGPRDGYADGWAQAGTARALLNSMVIGVTEGFTKKLQLVGVGYRAAVKGNVINLSGFSHPVDHQLPAGITAECPTQTEIVLKGADKQVIGQVAADLRAYRRPEPYKGKGVRYADVVRTKEAKK" + /product="50S ribosomal subunit protein L6" + ORIGIN + 1 TTAGAACTGA AGGCCAGCTT CACGGGCAGC ATCTGCCAGT GCCTGGACAC GACCATGATA + 61 TTGGAACCCG GAACGGTCAA AGGATACATC TTTGATGCCT TTTTCCAGAG CGCGTTCAGC + 121 GACAGCTTTA CCCACAGCTG CAGCCGCGTC TTTGTTACCG GTGTACTTCA GTTGTTCAGC + 181 GATAGCTTTT TCTACAGTAG AAGCAGCTAC CAGAACTTCA GAACCGTTCG GTGCAATTAC + 241 CTGTGCGTAA ATGTGACGCG GGGTACGATG TACCACCAGG CGAGTTGCGC CCAGCTCCTG + 301 GAGCTTGCGG CGTGCGCGGG TCGCACGACG GATACGAGCA GATTTCTTAT CCATAGTGTT + 361 ACCTTACTTC TTCTTAGCCT CTTTGGTACG CACGACTTCG TCGGCGTAAC GAACACCCTT + 421 GCCTTTATAA GGCTCAGGAC GACGGTAGGC GCGCAGATCC GCTGCAACCT GGCCGATCAC + 481 CTGCTTATCA GCGCCTTTCA GCACGATTTC AGTCTGAGTC GGACATTCAG CAGTGATACC + 541 CGCAGGCAGC TGATGGTCAA CAGGATGAGA GAAACCCAGA GACAGGTTAA TCACATTGCC + 601 TTTAACCGCT GCACGGTAAC CTACACCAAC CAGCTGCAGC TTCTTAGTGA AGCCTTCGGT + 661 AACACCGATA ACCATTGAGT TCAGCAGGGC ACGCGCGGTA CCAGCCTGTG CCCAACCGTC + 721 TGCGTAACCA TCACGCGGAC CGAAGGTCAG GGTATTATCT GCATGTTTAA CTTCAACAGC + 781 ATCGTTGAGA GTACGAGTCA GCTCGCCGTT TTTACCTTTG ATCGTAATAA CCTGACCGTT + 841 GATTTTTACG TCAACGCCGG CAGGAACAAC GACCGGTGCT TTAGCAACAC GAGACA + // + """) + path.write_text(content) + return str(path) + +@pytest.fixture +def mock_msa(tmp_path): + """Creates a mock FASTA alignment file.""" + path = tmp_path / "align.fasta" + content = ( + ">Seq1\nATGC--AT\n" + ">Seq2\nATGC--TT\n" + ">Seq3\nATGCGGTT\n" + ) + path.write_text(content) + return str(path) + +@pytest.fixture +def mock_blast_tab(tmp_path): + """Creates a mock BLAST tabular file.""" + path = tmp_path / "results.tab" + # qseqid sseqid pident length mismatch gapopen qstart qend sstart send evalue bitscore + content = "seqA\tseqB\t99.0\t100\t1\t0\t1\t100\t1\t100\t1e-10\t200.0\n" + path.write_text(content) + return str(path) + +#tests for parsers (gbk, embl) + +def test_gbk_conversions(mock_gbk): + #test protein sequence extraction + #with open(mock_gbk, 'rb') as f: + # print(f"\n[DEBUG] Raw File Content: {f.read()}") + faa = microbiorust.gbk_to_faa(mock_gbk) + assert os.path.exists(mock_gbk) + assert any("b3304" in line for line in faa) + + #test count + count = microbiorust.gbk_to_faa_count(mock_gbk) + assert count == 2 + + #test nucleotide sequence extraction + fna = microbiorust.gbk_to_fna(mock_gbk) + assert any("ttagaactga" in line.lower() for line in fna) + +def test_gbk_to_gff(mock_gbk): + #this function writes to {filename}.gff and reads again to check + microbiorust.gbk_to_gff(mock_gbk, dna=True) + gff_path = f"{mock_gbk}.gff" + assert os.path.exists(gff_path) + with open(gff_path, "r") as f: + assert "source_source_1_1" in f.read() + +#tests for multiple sequence alignment + +def test_subset_msa(mock_msa): + #subset mock alignment: Rows 0-2 (Seq1 & Seq2), Cols 0-4 (ATGC) + subset = microbiorust.subset_msa_alignment(mock_msa, (0, 2), (0, 4)) + print("subset", subset) + assert len(subset) == 2 # 2 headers + 2 sequences + assert ">Seq1" in subset[0] + assert "ATGC" in subset[1] + +def test_purge_gaps(mock_msa, tmp_path): + out_path = str(tmp_path / "purged.fasta") + #threshold 0.5 should remove the '--' columns in Seq1 and Seq2 and write to file + microbiorust.purge_gaps(mock_msa, out_path, 0.5) + assert os.path.exists(out_path) + +def test_get_consensus(mock_msa): + #given 'ATGC' is constant in the mock, it should be in consensus + consensus = microbiorust.get_consensus(mock_msa) + assert consensus.startswith("ATGC") + +#tests for Sequence Metrics + +def test_hydrophobicity(): + seq = "MALWMRLLPLLALLALWGPDPAAAFVN" + scores = microbiorust.hydrophobicity(seq, window_size=3) + assert len(scores) > 0 + assert all(isinstance(s, float) for s in scores) + +def test_amino_counts(): + seq = "MATAG" + counts = microbiorust.amino_counts(seq) + assert counts['M'] == 1 + assert counts['A'] == 2 + +#test for Async Tabular Parser + +def test_parse_tabular(mock_blast_tab): + results = microbiorust.parse_tabular(mock_blast_tab) + assert len(results) == 1 + assert results[0]['qseqid'] == "seqA" + assert results[0]['bitscore'] == 200.0