From 33600440c28ec4324e7723035e2dce310307b340 Mon Sep 17 00:00:00 2001
From: Jon Palmer <nextgenusfs@gmail.com>
Date: Fri, 6 Mar 2026 15:50:11 -0800
Subject: [PATCH 1/4] test(iprscan): add committed current-namespace XML
 fixture

Agent-Id: agent-884774f7-f25e-4edf-b925-15e28faa6df7
---
 .../iprscan/current_namespace_minimal.xml     |  23 ++++
 tests/unit/test_iprscan.py                    | 106 ++++++++++++++----
 2 files changed, 109 insertions(+), 20 deletions(-)
 create mode 100644 tests/fixtures/iprscan/current_namespace_minimal.xml
diff --git a/tests/fixtures/iprscan/current_namespace_minimal.xml b/tests/fixtures/iprscan/current_namespace_minimal.xml
new file mode 100644
index 0000000..b1b6e78
--- /dev/null
+++ b/tests/fixtures/iprscan/current_namespace_minimal.xml
@@ -0,0 +1,23 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<protein-matches xmlns="https://ftp.ebi.ac.uk/pub/software/unix/iprscan/5/schemas">
+    <protein>
+        <sequence md5="abc123">MSTNPKPQRIT</sequence>
+        <xref id="FUN2_000001-T1" name="FUN2_000001-T1 FUN2_000001"/>
+        <matches>
+            <hmmer3-match evalue="1.2E-15" score="15.2">
+                <signature ac="PF00001" desc="DNA binding domain" name="DNA_binding" type="DOMAIN">
+                    <entry ac="IPR000001" desc="DNA binding" name="DNA_binding" type="DOMAIN"/>
+                    <go-xrefs>
+                        <go-xref id="GO:0003677" name="DNA binding" category="molecular_function"/>
+                    </go-xrefs>
+                    <pathway-xrefs>
+                        <pathway-xref id="map00010" db="KEGG" name="Glycolysis"/>
+                    </pathway-xrefs>
+                </signature>
+                <locations>
+                    <hmmer3-location start="10" end="90" score="15.2" evalue="1.2E-15" representative="true"/>
+                </locations>
+            </hmmer3-match>
+        </matches>
+    </protein>
+</protein-matches>
\ No newline at end of file
diff --git a/tests/unit/test_iprscan.py b/tests/unit/test_iprscan.py
index 9300a24..2a78f88 100644
--- a/tests/unit/test_iprscan.py
+++ b/tests/unit/test_iprscan.py
@@ -11,12 +11,25 @@
 
 from funannotate2_addons.iprscan import (
     get_version,
+    run_iprscan,
+    parse_iprscan,
     parse_iprscan_tsv,
     parse_iprscan_xml,
     iprscan_to_json,
 )
 
 
+CURRENT_NAMESPACE_XML_FIXTURE = os.path.abspath(
+    os.path.join(
+        os.path.dirname(__file__),
+        "..",
+        "fixtures",
+        "iprscan",
+        "current_namespace_minimal.xml",
+    )
+)
+
+
 class TestIprscan(unittest.TestCase):
     def setUp(self):
         # Create a temporary directory for test files
@@ -103,6 +116,29 @@ def test_get_version_error(self, mock_run):
         version = get_version("interproscan.sh")
         self.assertIsNone(version)
 
+    @mock.patch("subprocess.run")
+    def test_run_iprscan_uses_full_output_path(self, mock_run):
+        input_file = os.path.join(self.test_dir, "proteins.fa")
+        with open(input_file, "w") as f:
+            f.write(">protein1\nMSTNPKPQRIT\n")
+
+        output_prefix = os.path.join(self.test_dir, "iprscan_out", "iprscan")
+        os.makedirs(os.path.dirname(output_prefix), exist_ok=True)
+        expected_output = f"{output_prefix}.xml"
+
+        def fake_run(cmd, **kwargs):
+            with open(expected_output, "w") as out:
+                out.write("<protein-matches />")
+            return mock.Mock(returncode=0)
+
+        mock_run.side_effect = fake_run
+
+        result = run_iprscan(input_file=input_file, output_prefix=output_prefix)
+
+        self.assertEqual(result, expected_output)
+        called_cmd = mock_run.call_args.args[0]
+        self.assertEqual(called_cmd[called_cmd.index("-o") + 1], expected_output)
+
     def test_parse_iprscan_tsv(self):
         # Test parsing the sample TSV file
         annotations = parse_iprscan_tsv(self.sample_tsv_file)
@@ -148,26 +184,23 @@ def test_parse_iprscan_xml(self):
         # Test parsing the sample XML file
         annotations = parse_iprscan_xml(self.sample_xml_file)
 
-        # Check that annotations were parsed correctly
-        self.assertIsInstance(annotations, dict)
-        self.assertTrue(len(annotations) > 0)
-
-        # Check that we have protein entries
-        protein_ids = list(annotations.keys())
-        self.assertTrue(len(protein_ids) > 0)
-
-        # Check the structure of the first protein
-        first_protein = annotations[protein_ids[0]]
-        self.assertIn("signatures", first_protein)
-        self.assertIn("interpro_domains", first_protein)
-        self.assertIn("go_terms", first_protein)
-        self.assertIn("pathways", first_protein)
-
-        # Check that signatures are present
-        self.assertIsInstance(first_protein["signatures"], list)
-        self.assertIsInstance(first_protein["interpro_domains"], list)
-        self.assertIsInstance(first_protein["go_terms"], list)
-        self.assertIsInstance(first_protein["pathways"], list)
+        self.assertIn("protein1", annotations)
+        self.assertIn("protein2", annotations)
+        self.assertEqual(annotations["protein1"]["signatures"][0]["id"], "PF00001")
+        self.assertEqual(
+            annotations["protein1"]["interpro_domains"][0]["id"], "IPR000001"
+        )
+        self.assertEqual(annotations["protein1"]["go_terms"][0]["id"], "GO:0003677")
+
+    def test_parse_iprscan_xml_current_namespace_and_xref(self):
+        annotations = parse_iprscan_xml(CURRENT_NAMESPACE_XML_FIXTURE)
+
+        self.assertIn("FUN2_000001-T1", annotations)
+        protein = annotations["FUN2_000001-T1"]
+        self.assertEqual(protein["signatures"][0]["id"], "PF00001")
+        self.assertEqual(protein["interpro_domains"][0]["id"], "IPR000001")
+        self.assertEqual(protein["go_terms"][0]["id"], "GO:0003677")
+        self.assertEqual(protein["pathways"][0]["id"], "map00010")
 
     def test_iprscan_to_json(self):
         # Test converting TSV to JSON
@@ -194,6 +227,39 @@ def test_iprscan_to_json(self):
         self.assertIn("interpro_domains", protein1_data)
         self.assertIn("go_terms", protein1_data)
 
+    @mock.patch("funannotate2_addons.iprscan.parse_iprscan_xml")
+    def test_parse_iprscan_dispatches_xml(self, mock_parse_xml):
+        expected = {"protein1": {}}
+        mock_parse_xml.return_value = expected
+
+        result = parse_iprscan(self.sample_xml_file)
+
+        self.assertEqual(result, expected)
+        mock_parse_xml.assert_called_once_with(self.sample_xml_file, None, None)
+
+    @mock.patch("funannotate2_addons.iprscan.parse_iprscan_tsv")
+    def test_parse_iprscan_dispatches_tsv(self, mock_parse_tsv):
+        expected = {"protein1": {}}
+        mock_parse_tsv.return_value = expected
+
+        result = parse_iprscan(self.sample_tsv_file)
+
+        self.assertEqual(result, expected)
+        mock_parse_tsv.assert_called_once_with(self.sample_tsv_file, None, None)
+
+    @mock.patch("funannotate2_addons.iprscan.logger.error")
+    def test_parse_iprscan_rejects_unsupported_extension(self, mock_logger):
+        unsupported_file = os.path.join(self.test_dir, "sample.txt")
+        with open(unsupported_file, "w") as f:
+            f.write("unsupported")
+
+        result = parse_iprscan(unsupported_file)
+
+        self.assertIsNone(result)
+        mock_logger.assert_called_once_with(
+            f"Unsupported file format: {unsupported_file}"
+        )
+
 
 if __name__ == "__main__":
     unittest.main()

From dcca2f240c6429413562d4d8dbebc0f07e8a23cf Mon Sep 17 00:00:00 2001
From: Jon Palmer <nextgenusfs@gmail.com>
Date: Fri, 6 Mar 2026 15:53:50 -0800
Subject: [PATCH 2/4] fix(tooling): land approved emapper, iprscan, and signalp
 updates

Agent-Id: agent-884774f7-f25e-4edf-b925-15e28faa6df7
---
 funannotate2_addons/emapper.py  |   7 +-
 funannotate2_addons/iprscan.py  |  72 ++++++++++-----
 funannotate2_addons/signalp6.py | 153 ++++++++++++++++++++++----------
 tests/unit/test_emapper.py      |  81 +++++++++++++++++
 tests/unit/test_signalp6.py     |  96 ++++++++++++++++++++
 5 files changed, 336 insertions(+), 73 deletions(-)

diff --git a/funannotate2_addons/emapper.py b/funannotate2_addons/emapper.py
index cd656a4..a42587b 100644
--- a/funannotate2_addons/emapper.py
+++ b/funannotate2_addons/emapper.py
@@ -2,7 +2,6 @@
 
 import os
 import sys
-import uuid
 import subprocess
 import argparse
 import json
@@ -905,7 +904,7 @@ def run_emapper_cli(args):
     input_file = get_input_file(args, "proteins") if args.input else args.file
 
     if not input_file:
-        log.error("No protein FASTA file found")
+        logger.error("No protein FASTA file found")
         return
 
     # Get output directory
@@ -922,8 +921,8 @@ def run_emapper_cli(args):
     # Create output directory if it doesn't exist
     os.makedirs(output_dir, exist_ok=True)
 
-    # Set output prefix
-    output_prefix = os.path.join(output_dir, f"emapper_{uuid.uuid4().hex[:6]}")
+    # Use a stable prefix so repeated runs can reuse the same output set.
+    output_prefix = os.path.join(output_dir, "emapper")
 
     # Set up logging
     log_file = f"{output_prefix}.log"
diff --git a/funannotate2_addons/iprscan.py b/funannotate2_addons/iprscan.py
index 75a0039..cd9c6ef 100644
--- a/funannotate2_addons/iprscan.py
+++ b/funannotate2_addons/iprscan.py
@@ -93,7 +93,7 @@ def run_iprscan(
         "--input",
         input_file,
         "-o",
-        os.path.basename(f"{output_prefix}.{format.lower()}"),
+        f"{output_prefix}.{format.lower()}",
         "-f",
         format,
         "-cpu",
@@ -169,15 +169,32 @@ def parse_iprscan_xml(input_file, output_file=None, gene_dict=None):
         logger.error(f"Error parsing XML file: {e}")
         return None
 
-    # Define namespaces
-    ns = {"ns": "http://www.ebi.ac.uk/interpro/resources/schemas/interproscan5"}
+    # Detect namespace from the document so we can support current and older
+    # InterProScan XML schema URLs.
+    namespace = root.tag.split("}")[0].strip("{") if root.tag.startswith("{") else ""
+
+    def qname(tag):
+        return f"{{{namespace}}}{tag}" if namespace else tag
+
+    def unique_by_id(items, item_id):
+        return item_id not in [item["id"] for item in items]
 
     # Parse annotations
     annotations = {}
 
     # Iterate through protein matches
-    for protein in root.findall(".//ns:protein", ns):
+    for protein in root.findall(f".//{qname('protein')}"):
         protein_id = protein.attrib.get("id", "")
+        if not protein_id:
+            xref = protein.find(qname("xref"))
+            if xref is not None:
+                protein_id = xref.attrib.get("id", "")
+        if not protein_id:
+            sequence = protein.find(qname("sequence"))
+            if sequence is not None and sequence.text:
+                protein_id = sequence.text.strip()
+        if not protein_id:
+            continue
 
         if protein_id not in annotations:
             annotations[protein_id] = {
@@ -187,33 +204,40 @@ def parse_iprscan_xml(input_file, output_file=None, gene_dict=None):
                 "signatures": [],
             }
 
+        matches = protein.find(qname("matches"))
+        if matches is None:
+            continue
+
         # Get matches
-        for match in protein.findall(".//ns:match", ns):
-            signature = match.find(".//ns:signature", ns)
+        for match in matches:
+            if not match.tag.split("}")[-1].endswith("-match"):
+                continue
+
+            signature = match.find(qname("signature"))
             if signature is not None:
                 sig_acc = signature.attrib.get("ac", "")
                 sig_desc = signature.attrib.get("desc", "")
                 sig_name = signature.attrib.get("name", "")
 
                 # Add to signatures
-                if sig_acc and sig_acc not in [
-                    s["id"] for s in annotations[protein_id]["signatures"]
-                ]:
+                if sig_acc and unique_by_id(
+                    annotations[protein_id]["signatures"], sig_acc
+                ):
                     annotations[protein_id]["signatures"].append(
                         {"id": sig_acc, "name": sig_name, "description": sig_desc}
                     )
 
                 # Get InterPro domains
-                entry = match.find(".//ns:entry", ns)
+                entry = signature.find(qname("entry"))
                 if entry is not None:
                     entry_acc = entry.attrib.get("ac", "")
                     entry_desc = entry.attrib.get("desc", "")
                     entry_name = entry.attrib.get("name", "")
 
                     # Add to InterPro domains
-                    if entry_acc and entry_acc not in [
-                        d["id"] for d in annotations[protein_id]["interpro_domains"]
-                    ]:
+                    if entry_acc and unique_by_id(
+                        annotations[protein_id]["interpro_domains"], entry_acc
+                    ):
                         annotations[protein_id]["interpro_domains"].append(
                             {
                                 "id": entry_acc,
@@ -223,29 +247,35 @@ def parse_iprscan_xml(input_file, output_file=None, gene_dict=None):
                         )
 
                 # Get GO terms
-                for go_term in match.findall(".//ns:go-term", ns):
+                go_terms = signature.findall(f".//{qname('go-xref')}")
+                if not go_terms:
+                    go_terms = match.findall(f".//{qname('go-term')}")
+                for go_term in go_terms:
                     go_id = go_term.attrib.get("id", "")
                     go_name = go_term.attrib.get("name", "")
                     go_category = go_term.attrib.get("category", "")
 
                     # Add to GO terms
-                    if go_id and go_id not in [
-                        g["id"] for g in annotations[protein_id]["go_terms"]
-                    ]:
+                    if go_id and unique_by_id(
+                        annotations[protein_id]["go_terms"], go_id
+                    ):
                         annotations[protein_id]["go_terms"].append(
                             {"id": go_id, "name": go_name, "category": go_category}
                         )
 
                 # Get pathways
-                for pathway in match.findall(".//ns:pathway", ns):
+                pathways = signature.findall(f".//{qname('pathway-xref')}")
+                if not pathways:
+                    pathways = match.findall(f".//{qname('pathway')}")
+                for pathway in pathways:
                     pathway_id = pathway.attrib.get("id", "")
                     pathway_name = pathway.attrib.get("name", "")
                     pathway_db = pathway.attrib.get("db", "")
 
                     # Add to pathways
-                    if pathway_id and pathway_id not in [
-                        p["id"] for p in annotations[protein_id]["pathways"]
-                    ]:
+                    if pathway_id and unique_by_id(
+                        annotations[protein_id]["pathways"], pathway_id
+                    ):
                         annotations[protein_id]["pathways"].append(
                             {
                                 "id": pathway_id,
diff --git a/funannotate2_addons/signalp6.py b/funannotate2_addons/signalp6.py
index d62f1ff..0f7b740 100644
--- a/funannotate2_addons/signalp6.py
+++ b/funannotate2_addons/signalp6.py
@@ -4,6 +4,7 @@
 import subprocess
 import argparse
 import json
+import re
 import uuid
 from .log import startLogging
 from .utils import (
@@ -17,6 +18,82 @@
 logger = startLogging()
 
 
+def _signalp_annotation_value(prediction):
+    """Format a standardized SignalP annotation value."""
+    cleavage_pos = prediction.get("cleavage_site")
+    if cleavage_pos and "-" in cleavage_pos:
+        return f"SignalP:1-{cleavage_pos.split('-')[0]}"
+    return f"SignalP:{prediction['prediction']}"
+
+
+def _write_signalp_annotations(output_file, predictions):
+    """Write standardized SignalP annotations."""
+    with open(output_file, "w") as out:
+        out.write("#gene_id\tannotation_type\tannotation_value\n")
+        for protein_id, pred in predictions.items():
+            if pred["has_signal_peptide"]:
+                annotation = _signalp_annotation_value(pred)
+                out.write(f"{protein_id}\tnote\t{annotation}\n")
+
+
+def _normalize_signalp_prediction(prediction):
+    """Normalize SignalP prediction labels across text and JSON outputs."""
+    prediction_map = {
+        "other": "OTHER",
+        "sp": "SP",
+        "signal peptide (sec/spi)": "SP",
+        "signal peptide": "SP",
+        "lipoprotein signal peptide (sec/spii)": "LIPO",
+        "tat signal peptide (tat/spi)": "TAT",
+        "tat lipoprotein signal peptide (tat/spii)": "TATLIPO",
+        "pilin-like signal peptide (sec/spiii)": "PILIN",
+        "pilin-like signal peptide": "PILIN",
+    }
+    normalized = prediction_map.get(str(prediction).strip().lower())
+    if normalized:
+        return normalized
+    return str(prediction).strip().upper()
+
+
+def _parse_signalp6_sequence_json_entry(entry):
+    """Parse a real SignalP 6 JSON SEQUENCES entry."""
+    protein_id = entry["Name"]
+    prediction = _normalize_signalp_prediction(entry["Prediction"])
+    protein_types = entry.get("Protein_types", [])
+    likelihoods = entry.get("Likelihood", [])
+
+    probabilities = {}
+    for label, value in zip(protein_types, likelihoods):
+        probabilities[_normalize_signalp_prediction(label)] = float(value)
+
+    parsed = {
+        "prediction": prediction,
+        "probability": probabilities.get(
+            prediction,
+            max((float(value) for value in likelihoods), default=0.0),
+        ),
+        "has_signal_peptide": prediction != "OTHER",
+    }
+
+    if "OTHER" in probabilities:
+        parsed["other_prob"] = probabilities["OTHER"]
+    if "SP" in probabilities:
+        parsed["sp_prob"] = probabilities["SP"]
+
+    cleavage_info = str(entry.get("CS_pos", "")).strip()
+    if cleavage_info:
+        match = re.search(
+            r"Cleavage site between pos\.\s*(\d+)\s+and\s+(\d+)(?:\.\s*Probability\s*([0-9.]+))?",
+            cleavage_info,
+        )
+        if match:
+            parsed["cleavage_site"] = f"{match.group(1)}-{match.group(2)}"
+            if match.group(3):
+                parsed["cleavage_prob"] = float(match.group(3))
+
+    return protein_id, parsed
+
+
 def get_version(signalp_path):
     """
     Get the version of SignalP.
@@ -197,26 +274,7 @@ def parse_signalp(input_file, output_file=None, gene_dict=None):
 
         # Write to output file if specified
         if output_file:
-            with open(output_file, "w") as out:
-                # Write header
-                out.write("#gene_id\tannotation_type\tannotation_value\n")
-
-                for protein_id, pred in predictions.items():
-                    if pred["has_signal_peptide"]:
-                        if "cleavage_site" in pred:
-                            # Extract the cleavage position to determine signal peptide range
-                            # CS pos: 17-18 means cleavage between 17 and 18, so signal peptide is 1-17
-                            cleavage_pos = pred["cleavage_site"]
-                            if "-" in cleavage_pos:
-                                end_pos = cleavage_pos.split("-")[
-                                    0
-                                ]  # Take the first number
-                                annotation = f"SignalP:1-{end_pos}"
-                            else:
-                                annotation = f"SignalP:{pred['prediction']}"
-                        else:
-                            annotation = f"SignalP:{pred['prediction']}"
-                        out.write(f"{protein_id}\tnote\t{annotation}\n")
+            _write_signalp_annotations(output_file, predictions)
 
         return predictions
 
@@ -249,36 +307,34 @@ def parse_signalp_json(input_file, output_file=None, gene_dict=None):
         with open(input_file, "r") as f:
             data = json.load(f)
 
-            for entry in data:
-                protein_id = entry["ID"]
-                prediction = entry["PREDICTION"]
-                probability = float(entry["PROB"])
-
-                # Store prediction
-                predictions[protein_id] = {
-                    "prediction": prediction,
-                    "probability": probability,
-                    "has_signal_peptide": prediction != "OTHER",
-                }
-
-                # Add cleavage site if available
-                if "CS_POS" in entry and entry["CS_POS"] != "-":
-                    predictions[protein_id]["cleavage_site"] = entry["CS_POS"]
+            if isinstance(data, list):
+                entries = data
+                real_signalp6_json = False
+            elif isinstance(data, dict) and isinstance(data.get("SEQUENCES"), dict):
+                entries = data["SEQUENCES"].values()
+                real_signalp6_json = True
+            else:
+                raise ValueError("Unsupported SignalP JSON format")
+
+            for entry in entries:
+                if real_signalp6_json:
+                    protein_id, parsed = _parse_signalp6_sequence_json_entry(entry)
+                else:
+                    protein_id = entry["ID"]
+                    prediction = _normalize_signalp_prediction(entry["PREDICTION"])
+                    parsed = {
+                        "prediction": prediction,
+                        "probability": float(entry["PROB"]),
+                        "has_signal_peptide": prediction != "OTHER",
+                    }
+                    if "CS_POS" in entry and entry["CS_POS"] != "-":
+                        parsed["cleavage_site"] = entry["CS_POS"]
+
+                predictions[protein_id] = parsed
 
         # Write to output file if specified
         if output_file:
-            with open(output_file, "w") as out:
-                # Write header
-                out.write("#gene_id\tannotation_type\tannotation_value\n")
-
-                for protein_id, pred in predictions.items():
-                    if pred["has_signal_peptide"]:
-                        out.write(
-                            f"{protein_id}\tnote\tSignalP:{pred['prediction']} prob={pred['probability']:.3f}"
-                        )
-                        if "cleavage_site" in pred:
-                            out.write(f" cleavage_site={pred['cleavage_site']}")
-                        out.write("\n")
+            _write_signalp_annotations(output_file, predictions)
 
         return predictions
 
@@ -417,7 +473,8 @@ def run_signalp_cli(args):
 
         # Parse SignalP results
         annotations_file = os.path.join(output_dir, "signalp.annotations.txt")
-        predictions = parse_signalp(args.parse, output_file=annotations_file)
+        parser = parse_signalp_json if args.parse.endswith(".json") else parse_signalp
+        predictions = parser(args.parse, output_file=annotations_file)
 
         if predictions:
             logger.info(f"Parsed {len(predictions)} predictions from {args.parse}")
diff --git a/tests/unit/test_emapper.py b/tests/unit/test_emapper.py
index 4fe16c7..4557081 100644
--- a/tests/unit/test_emapper.py
+++ b/tests/unit/test_emapper.py
@@ -1,5 +1,6 @@
 #!/usr/bin/env python3
 
+import argparse
 import os
 import unittest
 from unittest import mock
@@ -11,6 +12,7 @@
     get_version,
     parse_emapper_annotations,
     emapper_to_json,
+    run_emapper_cli,
 )
 
 
@@ -99,6 +101,85 @@ def test_emapper_to_json(self):
         self.assertEqual(data["gene2"]["gene_name"], "geneB")
         self.assertEqual(data["gene2"]["description"], "Transcription factor")
 
+    @mock.patch("funannotate2_addons.emapper.logger.error")
+    @mock.patch("funannotate2_addons.emapper.get_input_file", return_value=None)
+    def test_run_emapper_cli_missing_input_file(self, mock_get_input_file, mock_logger):
+        args = mock.Mock(input="predict_dir", file=None, parse=None)
+
+        run_emapper_cli(args)
+
+        mock_get_input_file.assert_called_once_with(args, "proteins")
+        mock_logger.assert_called_once_with("No protein FASTA file found")
+
+    @mock.patch("funannotate2_addons.emapper.startLogging")
+    @mock.patch("funannotate2_addons.emapper.parse_emapper_annotations")
+    @mock.patch("funannotate2_addons.emapper.run_emapper")
+    @mock.patch("funannotate2_addons.emapper.get_input_file")
+    def test_run_emapper_cli_uses_stable_output_prefix(
+        self,
+        mock_get_input_file,
+        mock_run_emapper,
+        mock_parse_annotations,
+        mock_start_logging,
+    ):
+        input_file = os.path.join(self.test_dir, "proteins.fa")
+        with open(input_file, "w") as f:
+            f.write(">gene1\nMSTNPKPQRIT\n")
+
+        output_dir = os.path.join(self.test_dir, "output")
+        expected_prefix = os.path.join(output_dir, "emapper")
+        mock_get_input_file.return_value = input_file
+        mock_run_emapper.return_value = f"{expected_prefix}.emapper.annotations"
+        mock_parse_annotations.return_value = {"gene1": {"gene_name": "geneA"}}
+        mock_start_logging.return_value = mock.Mock()
+
+        args = argparse.Namespace(
+            input="predict_dir",
+            file=None,
+            parse=None,
+            output=output_dir,
+            emapper_path="emapper.py",
+            cpus=4,
+            database=None,
+            data_dir=None,
+            temp_dir=None,
+            resume=True,
+            override=False,
+            mode="diamond",
+            tax_scope="auto",
+            target_orthologs="all",
+            sensmode="sensitive",
+            no_annot=False,
+            no_search=False,
+            dmnd_db=None,
+            seed_orthologs=None,
+            report_orthologs=False,
+            go_evidence=None,
+            pfam_realign=False,
+            seed_ortholog_score=None,
+            seed_ortholog_evalue=None,
+            query_cover=None,
+            subject_cover=None,
+            matrix=None,
+            gapopen=None,
+            gapextend=None,
+            evalue=None,
+            pident=None,
+            query_sgcov=None,
+            subject_sgcov=None,
+            json=False,
+        )
+
+        run_emapper_cli(args)
+
+        self.assertTrue(os.path.isdir(output_dir))
+        self.assertEqual(mock_run_emapper.call_args.kwargs["output_prefix"], expected_prefix)
+        self.assertTrue(mock_run_emapper.call_args.kwargs["resume"])
+        mock_parse_annotations.assert_called_once_with(
+            f"{expected_prefix}.emapper.annotations",
+            output_file=f"{expected_prefix}.annotations.txt",
+        )
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/tests/unit/test_signalp6.py b/tests/unit/test_signalp6.py
index 65fdd7a..fc7530d 100644
--- a/tests/unit/test_signalp6.py
+++ b/tests/unit/test_signalp6.py
@@ -5,14 +5,32 @@
 import shutil
 import os
 import json
+import argparse
 import subprocess
 from unittest import mock
 
 from funannotate2_addons.signalp6 import (
     get_version,
     parse_signalp,
+    parse_signalp_json,
     signalp_to_json,
     run_signalp,
+    run_signalp_cli,
+)
+
+
+SIGNALP6_LOCAL_ROOT = os.environ.get(
+    "FUNANNOTATE2_ADDONS_SIGNALP6_LOCAL",
+    "/Users/jon/software/f2_ecosystem/funannotate2-addons/local_tests/signalp6_test",
+)
+SIGNALP6_LOCAL_TXT = os.path.join(SIGNALP6_LOCAL_ROOT, "test2", "prediction_results.txt")
+SIGNALP6_LOCAL_JSON = os.path.join(SIGNALP6_LOCAL_ROOT, "test2", "output.json")
+SIGNALP6_LOCAL_ANNOTATIONS = os.path.join(
+    SIGNALP6_LOCAL_ROOT, "test3", "signalp.annotations.txt"
+)
+HAS_SIGNALP6_LOCAL_FIXTURES = all(
+    os.path.exists(path)
+    for path in (SIGNALP6_LOCAL_TXT, SIGNALP6_LOCAL_JSON, SIGNALP6_LOCAL_ANNOTATIONS)
 )
 
 
@@ -31,6 +49,16 @@ def setUp(self):
             f.write("protein3\tSP\t0.007633\t0.992322\tCS pos: 23-24. Pr: 0.6512\n")
             f.write("protein4\tOTHER\t1.000045\t0.000000\t\n")
 
+        self.sample_json_file = os.path.join(self.test_dir, "prediction_results.json")
+        with open(self.sample_json_file, "w") as f:
+            json.dump(
+                [
+                    {"ID": "protein1", "PREDICTION": "OTHER", "PROB": "0.998", "CS_POS": "-"},
+                    {"ID": "protein2", "PREDICTION": "SP", "PROB": "0.999", "CS_POS": "17-18"},
+                ],
+                f,
+            )
+
         # Create a sample protein FASTA file
         self.sample_fasta_file = os.path.join(self.test_dir, "proteins.fasta")
         with open(self.sample_fasta_file, "w") as f:
@@ -151,6 +179,33 @@ def test_signalp_to_json(self):
         self.assertTrue(protein2_data["has_signal_peptide"])
         self.assertEqual(protein2_data["cleavage_site"], "17-18")
 
+    def test_parse_signalp_json(self):
+        predictions = parse_signalp_json(self.sample_json_file)
+
+        self.assertIn("protein1", predictions)
+        self.assertIn("protein2", predictions)
+        self.assertFalse(predictions["protein1"]["has_signal_peptide"])
+        self.assertTrue(predictions["protein2"]["has_signal_peptide"])
+        self.assertEqual(predictions["protein2"]["cleavage_site"], "17-18")
+
+    def test_run_signalp_cli_parse_json(self):
+        output_dir = os.path.join(self.test_dir, "signalp_json_output")
+        args = argparse.Namespace(parse=self.sample_json_file, output=output_dir, json=True)
+
+        run_signalp_cli(args)
+
+        annotations_file = os.path.join(output_dir, "signalp.annotations.txt")
+        json_file = os.path.join(output_dir, "signalp.json")
+
+        self.assertTrue(os.path.exists(annotations_file))
+        self.assertTrue(os.path.exists(json_file))
+
+        with open(annotations_file, "r") as f:
+            annotations = f.read()
+
+        self.assertIn("protein2\tnote\tSignalP:1-17", annotations)
+
+
     @mock.patch("subprocess.run")
     @mock.patch("funannotate2_addons.signalp6.log_command")
     def test_run_signalp(self, mock_log_command, mock_run):
@@ -190,5 +245,46 @@ def test_run_signalp(self, mock_log_command, mock_run):
         self.assertIn(output_dir, call_args)
 
 
+@unittest.skipUnless(
+    HAS_SIGNALP6_LOCAL_FIXTURES,
+    "local SignalP6 fixtures not available",
+)
+class TestSignalp6LocalFixtures(unittest.TestCase):
+    def test_parse_real_signalp_tabular_fixture(self):
+        predictions = parse_signalp(SIGNALP6_LOCAL_TXT)
+
+        self.assertEqual(len(predictions), 65)
+        self.assertFalse(predictions["FUN2_000001-T1 FUN2_000001"]["has_signal_peptide"])
+
+        protein = predictions["FUN2_000014-T1 FUN2_000014"]
+        self.assertEqual(protein["prediction"], "SP")
+        self.assertAlmostEqual(protein["probability"], 0.999747, places=6)
+        self.assertEqual(protein["cleavage_site"], "17-18")
+        self.assertAlmostEqual(protein["cleavage_prob"], 0.9751, places=4)
+
+    def test_parse_real_signalp_json_fixture_matches_expected_annotations(self):
+        output_file = os.path.join(
+            tempfile.mkdtemp(), "signalp_from_real_json.annotations.txt"
+        )
+        try:
+            predictions = parse_signalp_json(SIGNALP6_LOCAL_JSON, output_file=output_file)
+
+            self.assertEqual(len(predictions), 65)
+            self.assertFalse(predictions["FUN2_000001-T1 FUN2_000001"]["has_signal_peptide"])
+
+            protein = predictions["FUN2_000014-T1 FUN2_000014"]
+            self.assertEqual(protein["prediction"], "SP")
+            self.assertAlmostEqual(protein["probability"], 0.9997, places=4)
+            self.assertEqual(protein["cleavage_site"], "17-18")
+            self.assertAlmostEqual(protein["cleavage_prob"], 0.975137, places=6)
+
+            with open(output_file, "r") as observed, open(
+                SIGNALP6_LOCAL_ANNOTATIONS, "r"
+            ) as expected:
+                self.assertEqual(observed.read(), expected.read())
+        finally:
+            shutil.rmtree(os.path.dirname(output_file))
+
+
 if __name__ == "__main__":
     unittest.main()

From deaf45b91361dcb55b5ee38161a926132ffdd9ae Mon Sep 17 00:00:00 2001
From: Jon Palmer <nextgenusfs@gmail.com>
Date: Fri, 6 Mar 2026 15:54:00 -0800
Subject: [PATCH 3/4] ci(release): harden production release workflow and CLI
 coverage

Agent-Id: agent-884774f7-f25e-4edf-b925-15e28faa6df7
---
 .github/workflows/production-release.yml | 50 +++++++++++++++++++-----
 tests/unit/test_cli.py                   | 28 +++++++------
 2 files changed, 57 insertions(+), 21 deletions(-)

diff --git a/.github/workflows/production-release.yml b/.github/workflows/production-release.yml
index 3e385ce..aa4a905 100644
--- a/.github/workflows/production-release.yml
+++ b/.github/workflows/production-release.yml
@@ -24,6 +24,7 @@ jobs:
     runs-on: ubuntu-latest
     outputs:
       tag: ${{ steps.tag.outputs.tag }}
+      release_version: ${{ steps.tag.outputs.release_version }}
       tag_exists: ${{ steps.check.outputs.exists }}
 
     steps:
@@ -39,7 +40,24 @@ jobs:
           echo "Error: Tag must be in format v1.0.0, 1.0.0, or CalVer like 25.5.28"
           exit 1
         fi
+
+        RELEASE_VERSION="${TAG#v}"
+        PACKAGE_VERSION=$(python3 - <<'PY'
+        import pathlib
+        import tomllib
+
+        pyproject = tomllib.loads(pathlib.Path("pyproject.toml").read_text())
+        print(pyproject["project"]["version"])
+        PY
+        )
+
+        if [ "$RELEASE_VERSION" != "$PACKAGE_VERSION" ]; then
+          echo "Error: Tag version ($RELEASE_VERSION) does not match pyproject.toml version ($PACKAGE_VERSION)"
+          exit 1
+        fi
+
         echo "tag=$TAG" >> $GITHUB_OUTPUT
+        echo "release_version=$RELEASE_VERSION" >> $GITHUB_OUTPUT
 
     - name: Check if tag exists
       id: check
@@ -128,7 +146,7 @@ jobs:
           echo "Checking if version exists on PyPI..."
 
           # Check if package exists on PyPI
-          if curl -s "https://pypi.org/pypi/funannotate2-addons/${{ needs.validate-tag.outputs.tag }}/json" | grep -q "Not Found"; then
+          if curl -s "https://pypi.org/pypi/funannotate2-addons/${{ needs.validate-tag.outputs.release_version }}/json" | grep -q "Not Found"; then
             echo "❌ Version does not exist on PyPI - this is a real error"
             exit 1
           else
@@ -162,21 +180,31 @@ jobs:
     - name: Generate release notes
       id: release_notes
       run: |
-        # Get the previous tag
-        PREV_TAG=$(git describe --tags --abbrev=0 ${{ needs.validate-tag.outputs.tag }}^)
+        # Get the previous tag if one exists (initial releases may not have one)
+        PREV_TAG=$(git tag --merged "${{ needs.validate-tag.outputs.tag }}" --sort=-creatordate | grep -vx "${{ needs.validate-tag.outputs.tag }}" | head -n 1 || true)
 
         echo "Generating release notes for ${{ needs.validate-tag.outputs.tag }}"
-        echo "Previous tag: $PREV_TAG"
+        if [ -n "$PREV_TAG" ]; then
+          echo "Previous tag: $PREV_TAG"
+        else
+          echo "Previous tag: none (initial release)"
+        fi
         echo ""
 
         # Generate changelog
         echo "## Changes" > release_notes.md
         echo "" >> release_notes.md
 
-        # Get all commits between previous tag and current tag, excluding version bump commits
-        git log --pretty=format:"- %s (%h)" $PREV_TAG..${{ needs.validate-tag.outputs.tag }} \
-          --grep="bump version" --grep="version bump" --grep="update version" \
-          --invert-grep >> release_notes.md
+        # Get all commits since the previous tag, excluding version bump commits
+        if [ -n "$PREV_TAG" ]; then
+          git log --pretty=format:"- %s (%h)" "$PREV_TAG..${{ needs.validate-tag.outputs.tag }}" \
+            --grep="bump version" --grep="version bump" --grep="update version" \
+            --invert-grep >> release_notes.md
+        else
+          git log --pretty=format:"- %s (%h)" "${{ needs.validate-tag.outputs.tag }}" \
+            --grep="bump version" --grep="version bump" --grep="update version" \
+            --invert-grep >> release_notes.md
+        fi
 
         # Check if we have any commits
         if [ ! -s release_notes.md ] || [ $(wc -l < release_notes.md) -le 2 ]; then
@@ -187,7 +215,11 @@ jobs:
 
         echo "" >> release_notes.md
         echo "" >> release_notes.md
-        echo "**Full Changelog**: https://github.com/${{ github.repository }}/compare/$PREV_TAG...${{ needs.validate-tag.outputs.tag }}" >> release_notes.md
+        if [ -n "$PREV_TAG" ]; then
+          echo "**Full Changelog**: https://github.com/${{ github.repository }}/compare/$PREV_TAG...${{ needs.validate-tag.outputs.tag }}" >> release_notes.md
+        else
+          echo "**Full Changelog**: Initial release" >> release_notes.md
+        fi
 
         echo "Generated release notes:"
         cat release_notes.md
diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py
index e61b161..efb96d2 100644
--- a/tests/unit/test_cli.py
+++ b/tests/unit/test_cli.py
@@ -1,5 +1,6 @@
 #!/usr/bin/env python3
 
+import argparse
 import unittest
 import sys
 from unittest import mock
@@ -9,6 +10,15 @@
 
 
 class TestCLI(unittest.TestCase):
+    def _assert_main_dispatch(self, subparser_name, patch_target):
+        args = argparse.Namespace(subparser_name=subparser_name)
+
+        with mock.patch("funannotate2_addons.__main__.parse_args", return_value=args):
+            with mock.patch(patch_target) as mock_runner:
+                main()
+
+        mock_runner.assert_called_once_with(args)
+
     def test_parse_args_no_args(self):
         """Test that no arguments raises SystemExit and prints help"""
         with self.assertRaises(SystemExit) as cm:
@@ -61,30 +71,24 @@ def test_parse_args_signalp6(self):
     @mock.patch("funannotate2_addons.__main__.run_emapper_cli")
     def test_main_emapper(self, mock_run_emapper):
         """Test main function calls emapper CLI"""
-        with mock.patch("sys.argv", ["funannotate2_addons", "emapper", "--help"]):
-            with self.assertRaises(SystemExit):
-                main()
+        self._assert_main_dispatch("emapper", "funannotate2_addons.__main__.run_emapper_cli")
 
     @mock.patch("funannotate2_addons.__main__.run_iprscan_cli")
     def test_main_iprscan(self, mock_run_iprscan):
         """Test main function calls iprscan CLI"""
-        with mock.patch("sys.argv", ["funannotate2_addons", "iprscan", "--help"]):
-            with self.assertRaises(SystemExit):
-                main()
+        self._assert_main_dispatch("iprscan", "funannotate2_addons.__main__.run_iprscan_cli")
 
     @mock.patch("funannotate2_addons.__main__.run_antismash_cli")
     def test_main_antismash(self, mock_run_antismash):
         """Test main function calls antismash CLI"""
-        with mock.patch("sys.argv", ["funannotate2_addons", "antismash", "--help"]):
-            with self.assertRaises(SystemExit):
-                main()
+        self._assert_main_dispatch(
+            "antismash", "funannotate2_addons.__main__.run_antismash_cli"
+        )
 
     @mock.patch("funannotate2_addons.__main__.run_signalp_cli")
     def test_main_signalp6(self, mock_run_signalp):
         """Test main function calls signalp6 CLI"""
-        with mock.patch("sys.argv", ["funannotate2_addons", "signalp6", "--help"]):
-            with self.assertRaises(SystemExit):
-                main()
+        self._assert_main_dispatch("signalp6", "funannotate2_addons.__main__.run_signalp_cli")
 
 
 if __name__ == "__main__":

From ca30d7ec7e53498209995aa19ec112a8356ea843 Mon Sep 17 00:00:00 2001
From: Jon Palmer <nextgenusfs@gmail.com>
Date: Fri, 6 Mar 2026 15:54:10 -0800
Subject: [PATCH 4/4] test(antismash): add approved local fixture regression
 coverage

Agent-Id: agent-884774f7-f25e-4edf-b925-15e28faa6df7
---
 tests/unit/test_antismash.py | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/tests/unit/test_antismash.py b/tests/unit/test_antismash.py
index 9e1b432..2732298 100644
--- a/tests/unit/test_antismash.py
+++ b/tests/unit/test_antismash.py
@@ -15,6 +15,13 @@
 )
 
 
+ANTISMASH_LOCAL_GBK = os.environ.get(
+    "FUNANNOTATE2_ADDONS_ANTISMASH_GBK",
+    "/Users/jon/software/f2_ecosystem/funannotate2-addons/local_tests/antismash_pverr/antismash_fe857478/Pseudogymnoascus_verrucosus.gbk",
+)
+HAS_ANTISMASH_LOCAL_GBK = os.path.exists(ANTISMASH_LOCAL_GBK)
+
+
 class TestAntismash(unittest.TestCase):
     def setUp(self):
         # Create a temporary directory for test files
@@ -153,5 +160,28 @@ def test_antismash_to_json(self):
         self.assertIsInstance(clusters, dict)
 
 
+@unittest.skipUnless(
+    HAS_ANTISMASH_LOCAL_GBK,
+    "local antiSMASH GenBank fixture not available",
+)
+class TestAntismashLocalFixtures(unittest.TestCase):
+    def test_parse_real_antismash_genbank_fixture(self):
+        backbone_domains, backbone_subtype, backbone_enzymes, cluster_genes = (
+            parse_antismash_gbk(ANTISMASH_LOCAL_GBK)
+        )
+
+        self.assertEqual(len(cluster_genes), 28)
+        self.assertEqual(backbone_subtype["cluster_1"], ["T1PKS"])
+        self.assertEqual(backbone_subtype["cluster_28"], ["terpene"])
+        self.assertEqual(len(cluster_genes["cluster_1"]), 30)
+        self.assertIn("FUN2_000393", cluster_genes["cluster_1"])
+        self.assertIn("FUN2_000393", backbone_enzymes["cluster_1"])
+        self.assertIn("FUN2_011547", backbone_enzymes["cluster_28"])
+
+        annotations = "\n".join(backbone_domains["FUN2_000393"])
+        self.assertIn("SMCOG1093", annotations)
+        self.assertIn("antiSMASH domain: itr_KS", annotations)
+
+
 if __name__ == "__main__":
     unittest.main()