diff --git a/.github/workflows/production-release.yml b/.github/workflows/production-release.yml
index 3e385ce..aa4a905 100644
--- a/.github/workflows/production-release.yml
+++ b/.github/workflows/production-release.yml
@@ -24,6 +24,7 @@ jobs:
runs-on: ubuntu-latest
outputs:
tag: ${{ steps.tag.outputs.tag }}
+ release_version: ${{ steps.tag.outputs.release_version }}
tag_exists: ${{ steps.check.outputs.exists }}
steps:
@@ -39,7 +40,24 @@ jobs:
echo "Error: Tag must be in format v1.0.0, 1.0.0, or CalVer like 25.5.28"
exit 1
fi
+
+ RELEASE_VERSION="${TAG#v}"
+ PACKAGE_VERSION=$(python3 - <<'PY'
+ import pathlib
+ import tomllib
+
+ pyproject = tomllib.loads(pathlib.Path("pyproject.toml").read_text())
+ print(pyproject["project"]["version"])
+ PY
+ )
+
+ if [ "$RELEASE_VERSION" != "$PACKAGE_VERSION" ]; then
+ echo "Error: Tag version ($RELEASE_VERSION) does not match pyproject.toml version ($PACKAGE_VERSION)"
+ exit 1
+ fi
+
echo "tag=$TAG" >> $GITHUB_OUTPUT
+ echo "release_version=$RELEASE_VERSION" >> $GITHUB_OUTPUT
- name: Check if tag exists
id: check
@@ -128,7 +146,7 @@ jobs:
echo "Checking if version exists on PyPI..."
# Check if package exists on PyPI
- if curl -s "https://pypi.org/pypi/funannotate2-addons/${{ needs.validate-tag.outputs.tag }}/json" | grep -q "Not Found"; then
+ if curl -s "https://pypi.org/pypi/funannotate2-addons/${{ needs.validate-tag.outputs.release_version }}/json" | grep -q "Not Found"; then
echo "❌ Version does not exist on PyPI - this is a real error"
exit 1
else
@@ -162,21 +180,31 @@ jobs:
- name: Generate release notes
id: release_notes
run: |
- # Get the previous tag
- PREV_TAG=$(git describe --tags --abbrev=0 ${{ needs.validate-tag.outputs.tag }}^)
+ # Get the previous tag if one exists (initial releases may not have one)
+ PREV_TAG=$(git tag --merged "${{ needs.validate-tag.outputs.tag }}" --sort=-creatordate | grep -vx "${{ needs.validate-tag.outputs.tag }}" | head -n 1 || true)
echo "Generating release notes for ${{ needs.validate-tag.outputs.tag }}"
- echo "Previous tag: $PREV_TAG"
+ if [ -n "$PREV_TAG" ]; then
+ echo "Previous tag: $PREV_TAG"
+ else
+ echo "Previous tag: none (initial release)"
+ fi
echo ""
# Generate changelog
echo "## Changes" > release_notes.md
echo "" >> release_notes.md
- # Get all commits between previous tag and current tag, excluding version bump commits
- git log --pretty=format:"- %s (%h)" $PREV_TAG..${{ needs.validate-tag.outputs.tag }} \
- --grep="bump version" --grep="version bump" --grep="update version" \
- --invert-grep >> release_notes.md
+ # Get all commits since the previous tag, excluding version bump commits
+ if [ -n "$PREV_TAG" ]; then
+ git log --pretty=format:"- %s (%h)" "$PREV_TAG..${{ needs.validate-tag.outputs.tag }}" \
+ --grep="bump version" --grep="version bump" --grep="update version" \
+ --invert-grep >> release_notes.md
+ else
+ git log --pretty=format:"- %s (%h)" "${{ needs.validate-tag.outputs.tag }}" \
+ --grep="bump version" --grep="version bump" --grep="update version" \
+ --invert-grep >> release_notes.md
+ fi
# Check if we have any commits
if [ ! -s release_notes.md ] || [ $(wc -l < release_notes.md) -le 2 ]; then
@@ -187,7 +215,11 @@ jobs:
echo "" >> release_notes.md
echo "" >> release_notes.md
- echo "**Full Changelog**: https://github.com/${{ github.repository }}/compare/$PREV_TAG...${{ needs.validate-tag.outputs.tag }}" >> release_notes.md
+ if [ -n "$PREV_TAG" ]; then
+ echo "**Full Changelog**: https://github.com/${{ github.repository }}/compare/$PREV_TAG...${{ needs.validate-tag.outputs.tag }}" >> release_notes.md
+ else
+ echo "**Full Changelog**: Initial release" >> release_notes.md
+ fi
echo "Generated release notes:"
cat release_notes.md
diff --git a/funannotate2_addons/emapper.py b/funannotate2_addons/emapper.py
index cd656a4..a42587b 100644
--- a/funannotate2_addons/emapper.py
+++ b/funannotate2_addons/emapper.py
@@ -2,7 +2,6 @@
import os
import sys
-import uuid
import subprocess
import argparse
import json
@@ -905,7 +904,7 @@ def run_emapper_cli(args):
input_file = get_input_file(args, "proteins") if args.input else args.file
if not input_file:
- log.error("No protein FASTA file found")
+ logger.error("No protein FASTA file found")
return
# Get output directory
@@ -922,8 +921,8 @@ def run_emapper_cli(args):
# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)
- # Set output prefix
- output_prefix = os.path.join(output_dir, f"emapper_{uuid.uuid4().hex[:6]}")
+ # Use a stable prefix so repeated runs can reuse the same output set.
+ output_prefix = os.path.join(output_dir, "emapper")
# Set up logging
log_file = f"{output_prefix}.log"
diff --git a/funannotate2_addons/iprscan.py b/funannotate2_addons/iprscan.py
index 75a0039..cd9c6ef 100644
--- a/funannotate2_addons/iprscan.py
+++ b/funannotate2_addons/iprscan.py
@@ -93,7 +93,7 @@ def run_iprscan(
"--input",
input_file,
"-o",
- os.path.basename(f"{output_prefix}.{format.lower()}"),
+ f"{output_prefix}.{format.lower()}",
"-f",
format,
"-cpu",
@@ -169,15 +169,32 @@ def parse_iprscan_xml(input_file, output_file=None, gene_dict=None):
logger.error(f"Error parsing XML file: {e}")
return None
- # Define namespaces
- ns = {"ns": "http://www.ebi.ac.uk/interpro/resources/schemas/interproscan5"}
+ # Detect namespace from the document so we can support current and older
+ # InterProScan XML schema URLs.
+ namespace = root.tag.split("}")[0].strip("{") if root.tag.startswith("{") else ""
+
+ def qname(tag):
+ return f"{{{namespace}}}{tag}" if namespace else tag
+
+ def unique_by_id(items, item_id):
+ return item_id not in [item["id"] for item in items]
# Parse annotations
annotations = {}
# Iterate through protein matches
- for protein in root.findall(".//ns:protein", ns):
+ for protein in root.findall(f".//{qname('protein')}"):
protein_id = protein.attrib.get("id", "")
+ if not protein_id:
+ xref = protein.find(qname("xref"))
+ if xref is not None:
+ protein_id = xref.attrib.get("id", "")
+ if not protein_id:
+ sequence = protein.find(qname("sequence"))
+ if sequence is not None and sequence.text:
+ protein_id = sequence.text.strip()
+ if not protein_id:
+ continue
if protein_id not in annotations:
annotations[protein_id] = {
@@ -187,33 +204,40 @@ def parse_iprscan_xml(input_file, output_file=None, gene_dict=None):
"signatures": [],
}
+ matches = protein.find(qname("matches"))
+ if matches is None:
+ continue
+
# Get matches
- for match in protein.findall(".//ns:match", ns):
- signature = match.find(".//ns:signature", ns)
+ for match in matches:
+ if not match.tag.split("}")[-1].endswith("-match"):
+ continue
+
+ signature = match.find(qname("signature"))
if signature is not None:
sig_acc = signature.attrib.get("ac", "")
sig_desc = signature.attrib.get("desc", "")
sig_name = signature.attrib.get("name", "")
# Add to signatures
- if sig_acc and sig_acc not in [
- s["id"] for s in annotations[protein_id]["signatures"]
- ]:
+ if sig_acc and unique_by_id(
+ annotations[protein_id]["signatures"], sig_acc
+ ):
annotations[protein_id]["signatures"].append(
{"id": sig_acc, "name": sig_name, "description": sig_desc}
)
# Get InterPro domains
- entry = match.find(".//ns:entry", ns)
+ entry = signature.find(qname("entry"))
if entry is not None:
entry_acc = entry.attrib.get("ac", "")
entry_desc = entry.attrib.get("desc", "")
entry_name = entry.attrib.get("name", "")
# Add to InterPro domains
- if entry_acc and entry_acc not in [
- d["id"] for d in annotations[protein_id]["interpro_domains"]
- ]:
+ if entry_acc and unique_by_id(
+ annotations[protein_id]["interpro_domains"], entry_acc
+ ):
annotations[protein_id]["interpro_domains"].append(
{
"id": entry_acc,
@@ -223,29 +247,35 @@ def parse_iprscan_xml(input_file, output_file=None, gene_dict=None):
)
# Get GO terms
- for go_term in match.findall(".//ns:go-term", ns):
+ go_terms = signature.findall(f".//{qname('go-xref')}")
+ if not go_terms:
+ go_terms = match.findall(f".//{qname('go-term')}")
+ for go_term in go_terms:
go_id = go_term.attrib.get("id", "")
go_name = go_term.attrib.get("name", "")
go_category = go_term.attrib.get("category", "")
# Add to GO terms
- if go_id and go_id not in [
- g["id"] for g in annotations[protein_id]["go_terms"]
- ]:
+ if go_id and unique_by_id(
+ annotations[protein_id]["go_terms"], go_id
+ ):
annotations[protein_id]["go_terms"].append(
{"id": go_id, "name": go_name, "category": go_category}
)
# Get pathways
- for pathway in match.findall(".//ns:pathway", ns):
+ pathways = signature.findall(f".//{qname('pathway-xref')}")
+ if not pathways:
+ pathways = match.findall(f".//{qname('pathway')}")
+ for pathway in pathways:
pathway_id = pathway.attrib.get("id", "")
pathway_name = pathway.attrib.get("name", "")
pathway_db = pathway.attrib.get("db", "")
# Add to pathways
- if pathway_id and pathway_id not in [
- p["id"] for p in annotations[protein_id]["pathways"]
- ]:
+ if pathway_id and unique_by_id(
+ annotations[protein_id]["pathways"], pathway_id
+ ):
annotations[protein_id]["pathways"].append(
{
"id": pathway_id,
diff --git a/funannotate2_addons/signalp6.py b/funannotate2_addons/signalp6.py
index d62f1ff..0f7b740 100644
--- a/funannotate2_addons/signalp6.py
+++ b/funannotate2_addons/signalp6.py
@@ -4,6 +4,7 @@
import subprocess
import argparse
import json
+import re
import uuid
from .log import startLogging
from .utils import (
@@ -17,6 +18,82 @@
logger = startLogging()
+def _signalp_annotation_value(prediction):
+ """Format a standardized SignalP annotation value."""
+ cleavage_pos = prediction.get("cleavage_site")
+ if cleavage_pos and "-" in cleavage_pos:
+ return f"SignalP:1-{cleavage_pos.split('-')[0]}"
+ return f"SignalP:{prediction['prediction']}"
+
+
+def _write_signalp_annotations(output_file, predictions):
+ """Write standardized SignalP annotations."""
+ with open(output_file, "w") as out:
+ out.write("#gene_id\tannotation_type\tannotation_value\n")
+ for protein_id, pred in predictions.items():
+ if pred["has_signal_peptide"]:
+ annotation = _signalp_annotation_value(pred)
+ out.write(f"{protein_id}\tnote\t{annotation}\n")
+
+
+def _normalize_signalp_prediction(prediction):
+ """Normalize SignalP prediction labels across text and JSON outputs."""
+ prediction_map = {
+ "other": "OTHER",
+ "sp": "SP",
+ "signal peptide (sec/spi)": "SP",
+ "signal peptide": "SP",
+ "lipoprotein signal peptide (sec/spii)": "LIPO",
+ "tat signal peptide (tat/spi)": "TAT",
+ "tat lipoprotein signal peptide (tat/spii)": "TATLIPO",
+ "pilin-like signal peptide (sec/spiii)": "PILIN",
+ "pilin-like signal peptide": "PILIN",
+ }
+ normalized = prediction_map.get(str(prediction).strip().lower())
+ if normalized:
+ return normalized
+ return str(prediction).strip().upper()
+
+
+def _parse_signalp6_sequence_json_entry(entry):
+ """Parse a real SignalP 6 JSON SEQUENCES entry."""
+ protein_id = entry["Name"]
+ prediction = _normalize_signalp_prediction(entry["Prediction"])
+ protein_types = entry.get("Protein_types", [])
+ likelihoods = entry.get("Likelihood", [])
+
+ probabilities = {}
+ for label, value in zip(protein_types, likelihoods):
+ probabilities[_normalize_signalp_prediction(label)] = float(value)
+
+ parsed = {
+ "prediction": prediction,
+ "probability": probabilities.get(
+ prediction,
+ max((float(value) for value in likelihoods), default=0.0),
+ ),
+ "has_signal_peptide": prediction != "OTHER",
+ }
+
+ if "OTHER" in probabilities:
+ parsed["other_prob"] = probabilities["OTHER"]
+ if "SP" in probabilities:
+ parsed["sp_prob"] = probabilities["SP"]
+
+ cleavage_info = str(entry.get("CS_pos", "")).strip()
+ if cleavage_info:
+ match = re.search(
+ r"Cleavage site between pos\.\s*(\d+)\s+and\s+(\d+)(?:\.\s*Probability\s*([0-9.]+))?",
+ cleavage_info,
+ )
+ if match:
+ parsed["cleavage_site"] = f"{match.group(1)}-{match.group(2)}"
+ if match.group(3):
+ parsed["cleavage_prob"] = float(match.group(3))
+
+ return protein_id, parsed
+
+
def get_version(signalp_path):
"""
Get the version of SignalP.
@@ -197,26 +274,7 @@ def parse_signalp(input_file, output_file=None, gene_dict=None):
# Write to output file if specified
if output_file:
- with open(output_file, "w") as out:
- # Write header
- out.write("#gene_id\tannotation_type\tannotation_value\n")
-
- for protein_id, pred in predictions.items():
- if pred["has_signal_peptide"]:
- if "cleavage_site" in pred:
- # Extract the cleavage position to determine signal peptide range
- # CS pos: 17-18 means cleavage between 17 and 18, so signal peptide is 1-17
- cleavage_pos = pred["cleavage_site"]
- if "-" in cleavage_pos:
- end_pos = cleavage_pos.split("-")[
- 0
- ] # Take the first number
- annotation = f"SignalP:1-{end_pos}"
- else:
- annotation = f"SignalP:{pred['prediction']}"
- else:
- annotation = f"SignalP:{pred['prediction']}"
- out.write(f"{protein_id}\tnote\t{annotation}\n")
+ _write_signalp_annotations(output_file, predictions)
return predictions
@@ -249,36 +307,34 @@ def parse_signalp_json(input_file, output_file=None, gene_dict=None):
with open(input_file, "r") as f:
data = json.load(f)
- for entry in data:
- protein_id = entry["ID"]
- prediction = entry["PREDICTION"]
- probability = float(entry["PROB"])
-
- # Store prediction
- predictions[protein_id] = {
- "prediction": prediction,
- "probability": probability,
- "has_signal_peptide": prediction != "OTHER",
- }
-
- # Add cleavage site if available
- if "CS_POS" in entry and entry["CS_POS"] != "-":
- predictions[protein_id]["cleavage_site"] = entry["CS_POS"]
+ if isinstance(data, list):
+ entries = data
+ real_signalp6_json = False
+ elif isinstance(data, dict) and isinstance(data.get("SEQUENCES"), dict):
+ entries = data["SEQUENCES"].values()
+ real_signalp6_json = True
+ else:
+ raise ValueError("Unsupported SignalP JSON format")
+
+ for entry in entries:
+ if real_signalp6_json:
+ protein_id, parsed = _parse_signalp6_sequence_json_entry(entry)
+ else:
+ protein_id = entry["ID"]
+ prediction = _normalize_signalp_prediction(entry["PREDICTION"])
+ parsed = {
+ "prediction": prediction,
+ "probability": float(entry["PROB"]),
+ "has_signal_peptide": prediction != "OTHER",
+ }
+ if "CS_POS" in entry and entry["CS_POS"] != "-":
+ parsed["cleavage_site"] = entry["CS_POS"]
+
+ predictions[protein_id] = parsed
# Write to output file if specified
if output_file:
- with open(output_file, "w") as out:
- # Write header
- out.write("#gene_id\tannotation_type\tannotation_value\n")
-
- for protein_id, pred in predictions.items():
- if pred["has_signal_peptide"]:
- out.write(
- f"{protein_id}\tnote\tSignalP:{pred['prediction']} prob={pred['probability']:.3f}"
- )
- if "cleavage_site" in pred:
- out.write(f" cleavage_site={pred['cleavage_site']}")
- out.write("\n")
+ _write_signalp_annotations(output_file, predictions)
return predictions
@@ -417,7 +473,8 @@ def run_signalp_cli(args):
# Parse SignalP results
annotations_file = os.path.join(output_dir, "signalp.annotations.txt")
- predictions = parse_signalp(args.parse, output_file=annotations_file)
+ parser = parse_signalp_json if args.parse.endswith(".json") else parse_signalp
+ predictions = parser(args.parse, output_file=annotations_file)
if predictions:
logger.info(f"Parsed {len(predictions)} predictions from {args.parse}")
diff --git a/tests/fixtures/iprscan/current_namespace_minimal.xml b/tests/fixtures/iprscan/current_namespace_minimal.xml
new file mode 100644
index 0000000..b1b6e78
--- /dev/null
+++ b/tests/fixtures/iprscan/current_namespace_minimal.xml
@@ -0,0 +1,23 @@
+
+
+
+ MSTNPKPQRIT
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/tests/unit/test_antismash.py b/tests/unit/test_antismash.py
index 9e1b432..2732298 100644
--- a/tests/unit/test_antismash.py
+++ b/tests/unit/test_antismash.py
@@ -15,6 +15,13 @@
)
+ANTISMASH_LOCAL_GBK = os.environ.get(
+ "FUNANNOTATE2_ADDONS_ANTISMASH_GBK",
+ "/Users/jon/software/f2_ecosystem/funannotate2-addons/local_tests/antismash_pverr/antismash_fe857478/Pseudogymnoascus_verrucosus.gbk",
+)
+HAS_ANTISMASH_LOCAL_GBK = os.path.exists(ANTISMASH_LOCAL_GBK)
+
+
class TestAntismash(unittest.TestCase):
def setUp(self):
# Create a temporary directory for test files
@@ -153,5 +160,28 @@ def test_antismash_to_json(self):
self.assertIsInstance(clusters, dict)
+@unittest.skipUnless(
+ HAS_ANTISMASH_LOCAL_GBK,
+ "local antiSMASH GenBank fixture not available",
+)
+class TestAntismashLocalFixtures(unittest.TestCase):
+ def test_parse_real_antismash_genbank_fixture(self):
+ backbone_domains, backbone_subtype, backbone_enzymes, cluster_genes = (
+ parse_antismash_gbk(ANTISMASH_LOCAL_GBK)
+ )
+
+ self.assertEqual(len(cluster_genes), 28)
+ self.assertEqual(backbone_subtype["cluster_1"], ["T1PKS"])
+ self.assertEqual(backbone_subtype["cluster_28"], ["terpene"])
+ self.assertEqual(len(cluster_genes["cluster_1"]), 30)
+ self.assertIn("FUN2_000393", cluster_genes["cluster_1"])
+ self.assertIn("FUN2_000393", backbone_enzymes["cluster_1"])
+ self.assertIn("FUN2_011547", backbone_enzymes["cluster_28"])
+
+ annotations = "\n".join(backbone_domains["FUN2_000393"])
+ self.assertIn("SMCOG1093", annotations)
+ self.assertIn("antiSMASH domain: itr_KS", annotations)
+
+
if __name__ == "__main__":
unittest.main()
diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py
index e61b161..efb96d2 100644
--- a/tests/unit/test_cli.py
+++ b/tests/unit/test_cli.py
@@ -1,5 +1,6 @@
#!/usr/bin/env python3
+import argparse
import unittest
import sys
from unittest import mock
@@ -9,6 +10,15 @@
class TestCLI(unittest.TestCase):
+ def _assert_main_dispatch(self, subparser_name, patch_target):
+ args = argparse.Namespace(subparser_name=subparser_name)
+
+ with mock.patch("funannotate2_addons.__main__.parse_args", return_value=args):
+ with mock.patch(patch_target) as mock_runner:
+ main()
+
+ mock_runner.assert_called_once_with(args)
+
def test_parse_args_no_args(self):
"""Test that no arguments raises SystemExit and prints help"""
with self.assertRaises(SystemExit) as cm:
@@ -61,30 +71,24 @@ def test_parse_args_signalp6(self):
@mock.patch("funannotate2_addons.__main__.run_emapper_cli")
def test_main_emapper(self, mock_run_emapper):
"""Test main function calls emapper CLI"""
- with mock.patch("sys.argv", ["funannotate2_addons", "emapper", "--help"]):
- with self.assertRaises(SystemExit):
- main()
+ self._assert_main_dispatch("emapper", "funannotate2_addons.__main__.run_emapper_cli")
@mock.patch("funannotate2_addons.__main__.run_iprscan_cli")
def test_main_iprscan(self, mock_run_iprscan):
"""Test main function calls iprscan CLI"""
- with mock.patch("sys.argv", ["funannotate2_addons", "iprscan", "--help"]):
- with self.assertRaises(SystemExit):
- main()
+ self._assert_main_dispatch("iprscan", "funannotate2_addons.__main__.run_iprscan_cli")
@mock.patch("funannotate2_addons.__main__.run_antismash_cli")
def test_main_antismash(self, mock_run_antismash):
"""Test main function calls antismash CLI"""
- with mock.patch("sys.argv", ["funannotate2_addons", "antismash", "--help"]):
- with self.assertRaises(SystemExit):
- main()
+ self._assert_main_dispatch(
+ "antismash", "funannotate2_addons.__main__.run_antismash_cli"
+ )
@mock.patch("funannotate2_addons.__main__.run_signalp_cli")
def test_main_signalp6(self, mock_run_signalp):
"""Test main function calls signalp6 CLI"""
- with mock.patch("sys.argv", ["funannotate2_addons", "signalp6", "--help"]):
- with self.assertRaises(SystemExit):
- main()
+ self._assert_main_dispatch("signalp6", "funannotate2_addons.__main__.run_signalp_cli")
if __name__ == "__main__":
diff --git a/tests/unit/test_emapper.py b/tests/unit/test_emapper.py
index 4fe16c7..4557081 100644
--- a/tests/unit/test_emapper.py
+++ b/tests/unit/test_emapper.py
@@ -1,5 +1,6 @@
#!/usr/bin/env python3
+import argparse
import os
import unittest
from unittest import mock
@@ -11,6 +12,7 @@
get_version,
parse_emapper_annotations,
emapper_to_json,
+ run_emapper_cli,
)
@@ -99,6 +101,85 @@ def test_emapper_to_json(self):
self.assertEqual(data["gene2"]["gene_name"], "geneB")
self.assertEqual(data["gene2"]["description"], "Transcription factor")
+ @mock.patch("funannotate2_addons.emapper.logger.error")
+ @mock.patch("funannotate2_addons.emapper.get_input_file", return_value=None)
+ def test_run_emapper_cli_missing_input_file(self, mock_get_input_file, mock_logger):
+ args = mock.Mock(input="predict_dir", file=None, parse=None)
+
+ run_emapper_cli(args)
+
+ mock_get_input_file.assert_called_once_with(args, "proteins")
+ mock_logger.assert_called_once_with("No protein FASTA file found")
+
+ @mock.patch("funannotate2_addons.emapper.startLogging")
+ @mock.patch("funannotate2_addons.emapper.parse_emapper_annotations")
+ @mock.patch("funannotate2_addons.emapper.run_emapper")
+ @mock.patch("funannotate2_addons.emapper.get_input_file")
+ def test_run_emapper_cli_uses_stable_output_prefix(
+ self,
+ mock_get_input_file,
+ mock_run_emapper,
+ mock_parse_annotations,
+ mock_start_logging,
+ ):
+ input_file = os.path.join(self.test_dir, "proteins.fa")
+ with open(input_file, "w") as f:
+ f.write(">gene1\nMSTNPKPQRIT\n")
+
+ output_dir = os.path.join(self.test_dir, "output")
+ expected_prefix = os.path.join(output_dir, "emapper")
+ mock_get_input_file.return_value = input_file
+ mock_run_emapper.return_value = f"{expected_prefix}.emapper.annotations"
+ mock_parse_annotations.return_value = {"gene1": {"gene_name": "geneA"}}
+ mock_start_logging.return_value = mock.Mock()
+
+ args = argparse.Namespace(
+ input="predict_dir",
+ file=None,
+ parse=None,
+ output=output_dir,
+ emapper_path="emapper.py",
+ cpus=4,
+ database=None,
+ data_dir=None,
+ temp_dir=None,
+ resume=True,
+ override=False,
+ mode="diamond",
+ tax_scope="auto",
+ target_orthologs="all",
+ sensmode="sensitive",
+ no_annot=False,
+ no_search=False,
+ dmnd_db=None,
+ seed_orthologs=None,
+ report_orthologs=False,
+ go_evidence=None,
+ pfam_realign=False,
+ seed_ortholog_score=None,
+ seed_ortholog_evalue=None,
+ query_cover=None,
+ subject_cover=None,
+ matrix=None,
+ gapopen=None,
+ gapextend=None,
+ evalue=None,
+ pident=None,
+ query_sgcov=None,
+ subject_sgcov=None,
+ json=False,
+ )
+
+ run_emapper_cli(args)
+
+ self.assertTrue(os.path.isdir(output_dir))
+ self.assertEqual(mock_run_emapper.call_args.kwargs["output_prefix"], expected_prefix)
+ self.assertTrue(mock_run_emapper.call_args.kwargs["resume"])
+ mock_parse_annotations.assert_called_once_with(
+ f"{expected_prefix}.emapper.annotations",
+ output_file=f"{expected_prefix}.annotations.txt",
+ )
+
if __name__ == "__main__":
unittest.main()
diff --git a/tests/unit/test_iprscan.py b/tests/unit/test_iprscan.py
index 9300a24..2a78f88 100644
--- a/tests/unit/test_iprscan.py
+++ b/tests/unit/test_iprscan.py
@@ -11,12 +11,25 @@
from funannotate2_addons.iprscan import (
get_version,
+ run_iprscan,
+ parse_iprscan,
parse_iprscan_tsv,
parse_iprscan_xml,
iprscan_to_json,
)
+CURRENT_NAMESPACE_XML_FIXTURE = os.path.abspath(
+ os.path.join(
+ os.path.dirname(__file__),
+ "..",
+ "fixtures",
+ "iprscan",
+ "current_namespace_minimal.xml",
+ )
+)
+
+
class TestIprscan(unittest.TestCase):
def setUp(self):
# Create a temporary directory for test files
@@ -103,6 +116,29 @@ def test_get_version_error(self, mock_run):
version = get_version("interproscan.sh")
self.assertIsNone(version)
+ @mock.patch("subprocess.run")
+ def test_run_iprscan_uses_full_output_path(self, mock_run):
+ input_file = os.path.join(self.test_dir, "proteins.fa")
+ with open(input_file, "w") as f:
+ f.write(">protein1\nMSTNPKPQRIT\n")
+
+ output_prefix = os.path.join(self.test_dir, "iprscan_out", "iprscan")
+ os.makedirs(os.path.dirname(output_prefix), exist_ok=True)
+ expected_output = f"{output_prefix}.xml"
+
+ def fake_run(cmd, **kwargs):
+ with open(expected_output, "w") as out:
+ out.write("")
+ return mock.Mock(returncode=0)
+
+ mock_run.side_effect = fake_run
+
+ result = run_iprscan(input_file=input_file, output_prefix=output_prefix)
+
+ self.assertEqual(result, expected_output)
+ called_cmd = mock_run.call_args.args[0]
+ self.assertEqual(called_cmd[called_cmd.index("-o") + 1], expected_output)
+
def test_parse_iprscan_tsv(self):
# Test parsing the sample TSV file
annotations = parse_iprscan_tsv(self.sample_tsv_file)
@@ -148,26 +184,23 @@ def test_parse_iprscan_xml(self):
# Test parsing the sample XML file
annotations = parse_iprscan_xml(self.sample_xml_file)
- # Check that annotations were parsed correctly
- self.assertIsInstance(annotations, dict)
- self.assertTrue(len(annotations) > 0)
-
- # Check that we have protein entries
- protein_ids = list(annotations.keys())
- self.assertTrue(len(protein_ids) > 0)
-
- # Check the structure of the first protein
- first_protein = annotations[protein_ids[0]]
- self.assertIn("signatures", first_protein)
- self.assertIn("interpro_domains", first_protein)
- self.assertIn("go_terms", first_protein)
- self.assertIn("pathways", first_protein)
-
- # Check that signatures are present
- self.assertIsInstance(first_protein["signatures"], list)
- self.assertIsInstance(first_protein["interpro_domains"], list)
- self.assertIsInstance(first_protein["go_terms"], list)
- self.assertIsInstance(first_protein["pathways"], list)
+ self.assertIn("protein1", annotations)
+ self.assertIn("protein2", annotations)
+ self.assertEqual(annotations["protein1"]["signatures"][0]["id"], "PF00001")
+ self.assertEqual(
+ annotations["protein1"]["interpro_domains"][0]["id"], "IPR000001"
+ )
+ self.assertEqual(annotations["protein1"]["go_terms"][0]["id"], "GO:0003677")
+
+ def test_parse_iprscan_xml_current_namespace_and_xref(self):
+ annotations = parse_iprscan_xml(CURRENT_NAMESPACE_XML_FIXTURE)
+
+ self.assertIn("FUN2_000001-T1", annotations)
+ protein = annotations["FUN2_000001-T1"]
+ self.assertEqual(protein["signatures"][0]["id"], "PF00001")
+ self.assertEqual(protein["interpro_domains"][0]["id"], "IPR000001")
+ self.assertEqual(protein["go_terms"][0]["id"], "GO:0003677")
+ self.assertEqual(protein["pathways"][0]["id"], "map00010")
def test_iprscan_to_json(self):
# Test converting TSV to JSON
@@ -194,6 +227,39 @@ def test_iprscan_to_json(self):
self.assertIn("interpro_domains", protein1_data)
self.assertIn("go_terms", protein1_data)
+ @mock.patch("funannotate2_addons.iprscan.parse_iprscan_xml")
+ def test_parse_iprscan_dispatches_xml(self, mock_parse_xml):
+ expected = {"protein1": {}}
+ mock_parse_xml.return_value = expected
+
+ result = parse_iprscan(self.sample_xml_file)
+
+ self.assertEqual(result, expected)
+ mock_parse_xml.assert_called_once_with(self.sample_xml_file, None, None)
+
+ @mock.patch("funannotate2_addons.iprscan.parse_iprscan_tsv")
+ def test_parse_iprscan_dispatches_tsv(self, mock_parse_tsv):
+ expected = {"protein1": {}}
+ mock_parse_tsv.return_value = expected
+
+ result = parse_iprscan(self.sample_tsv_file)
+
+ self.assertEqual(result, expected)
+ mock_parse_tsv.assert_called_once_with(self.sample_tsv_file, None, None)
+
+ @mock.patch("funannotate2_addons.iprscan.logger.error")
+ def test_parse_iprscan_rejects_unsupported_extension(self, mock_logger):
+ unsupported_file = os.path.join(self.test_dir, "sample.txt")
+ with open(unsupported_file, "w") as f:
+ f.write("unsupported")
+
+ result = parse_iprscan(unsupported_file)
+
+ self.assertIsNone(result)
+ mock_logger.assert_called_once_with(
+ f"Unsupported file format: {unsupported_file}"
+ )
+
if __name__ == "__main__":
unittest.main()
diff --git a/tests/unit/test_signalp6.py b/tests/unit/test_signalp6.py
index 65fdd7a..fc7530d 100644
--- a/tests/unit/test_signalp6.py
+++ b/tests/unit/test_signalp6.py
@@ -5,14 +5,32 @@
import shutil
import os
import json
+import argparse
import subprocess
from unittest import mock
from funannotate2_addons.signalp6 import (
get_version,
parse_signalp,
+ parse_signalp_json,
signalp_to_json,
run_signalp,
+ run_signalp_cli,
+)
+
+
+SIGNALP6_LOCAL_ROOT = os.environ.get(
+ "FUNANNOTATE2_ADDONS_SIGNALP6_LOCAL",
+ "/Users/jon/software/f2_ecosystem/funannotate2-addons/local_tests/signalp6_test",
+)
+SIGNALP6_LOCAL_TXT = os.path.join(SIGNALP6_LOCAL_ROOT, "test2", "prediction_results.txt")
+SIGNALP6_LOCAL_JSON = os.path.join(SIGNALP6_LOCAL_ROOT, "test2", "output.json")
+SIGNALP6_LOCAL_ANNOTATIONS = os.path.join(
+ SIGNALP6_LOCAL_ROOT, "test3", "signalp.annotations.txt"
+)
+HAS_SIGNALP6_LOCAL_FIXTURES = all(
+ os.path.exists(path)
+ for path in (SIGNALP6_LOCAL_TXT, SIGNALP6_LOCAL_JSON, SIGNALP6_LOCAL_ANNOTATIONS)
)
@@ -31,6 +49,16 @@ def setUp(self):
f.write("protein3\tSP\t0.007633\t0.992322\tCS pos: 23-24. Pr: 0.6512\n")
f.write("protein4\tOTHER\t1.000045\t0.000000\t\n")
+ self.sample_json_file = os.path.join(self.test_dir, "prediction_results.json")
+ with open(self.sample_json_file, "w") as f:
+ json.dump(
+ [
+ {"ID": "protein1", "PREDICTION": "OTHER", "PROB": "0.998", "CS_POS": "-"},
+ {"ID": "protein2", "PREDICTION": "SP", "PROB": "0.999", "CS_POS": "17-18"},
+ ],
+ f,
+ )
+
# Create a sample protein FASTA file
self.sample_fasta_file = os.path.join(self.test_dir, "proteins.fasta")
with open(self.sample_fasta_file, "w") as f:
@@ -151,6 +179,33 @@ def test_signalp_to_json(self):
self.assertTrue(protein2_data["has_signal_peptide"])
self.assertEqual(protein2_data["cleavage_site"], "17-18")
+ def test_parse_signalp_json(self):
+ predictions = parse_signalp_json(self.sample_json_file)
+
+ self.assertIn("protein1", predictions)
+ self.assertIn("protein2", predictions)
+ self.assertFalse(predictions["protein1"]["has_signal_peptide"])
+ self.assertTrue(predictions["protein2"]["has_signal_peptide"])
+ self.assertEqual(predictions["protein2"]["cleavage_site"], "17-18")
+
+ def test_run_signalp_cli_parse_json(self):
+ output_dir = os.path.join(self.test_dir, "signalp_json_output")
+ args = argparse.Namespace(parse=self.sample_json_file, output=output_dir, json=True)
+
+ run_signalp_cli(args)
+
+ annotations_file = os.path.join(output_dir, "signalp.annotations.txt")
+ json_file = os.path.join(output_dir, "signalp.json")
+
+ self.assertTrue(os.path.exists(annotations_file))
+ self.assertTrue(os.path.exists(json_file))
+
+ with open(annotations_file, "r") as f:
+ annotations = f.read()
+
+ self.assertIn("protein2\tnote\tSignalP:1-17", annotations)
+
+
@mock.patch("subprocess.run")
@mock.patch("funannotate2_addons.signalp6.log_command")
def test_run_signalp(self, mock_log_command, mock_run):
@@ -190,5 +245,46 @@ def test_run_signalp(self, mock_log_command, mock_run):
self.assertIn(output_dir, call_args)
+@unittest.skipUnless(
+ HAS_SIGNALP6_LOCAL_FIXTURES,
+ "local SignalP6 fixtures not available",
+)
+class TestSignalp6LocalFixtures(unittest.TestCase):
+ def test_parse_real_signalp_tabular_fixture(self):
+ predictions = parse_signalp(SIGNALP6_LOCAL_TXT)
+
+ self.assertEqual(len(predictions), 65)
+ self.assertFalse(predictions["FUN2_000001-T1 FUN2_000001"]["has_signal_peptide"])
+
+ protein = predictions["FUN2_000014-T1 FUN2_000014"]
+ self.assertEqual(protein["prediction"], "SP")
+ self.assertAlmostEqual(protein["probability"], 0.999747, places=6)
+ self.assertEqual(protein["cleavage_site"], "17-18")
+ self.assertAlmostEqual(protein["cleavage_prob"], 0.9751, places=4)
+
+ def test_parse_real_signalp_json_fixture_matches_expected_annotations(self):
+ output_file = os.path.join(
+ tempfile.mkdtemp(), "signalp_from_real_json.annotations.txt"
+ )
+ try:
+ predictions = parse_signalp_json(SIGNALP6_LOCAL_JSON, output_file=output_file)
+
+ self.assertEqual(len(predictions), 65)
+ self.assertFalse(predictions["FUN2_000001-T1 FUN2_000001"]["has_signal_peptide"])
+
+ protein = predictions["FUN2_000014-T1 FUN2_000014"]
+ self.assertEqual(protein["prediction"], "SP")
+ self.assertAlmostEqual(protein["probability"], 0.9997, places=4)
+ self.assertEqual(protein["cleavage_site"], "17-18")
+ self.assertAlmostEqual(protein["cleavage_prob"], 0.975137, places=6)
+
+ with open(output_file, "r") as observed, open(
+ SIGNALP6_LOCAL_ANNOTATIONS, "r"
+ ) as expected:
+ self.assertEqual(observed.read(), expected.read())
+ finally:
+ shutil.rmtree(os.path.dirname(output_file))
+
+
if __name__ == "__main__":
unittest.main()