From 33600440c28ec4324e7723035e2dce310307b340 Mon Sep 17 00:00:00 2001 From: Jon Palmer Date: Fri, 6 Mar 2026 15:50:11 -0800 Subject: [PATCH 1/4] test(iprscan): add committed current-namespace XML fixture Agent-Id: agent-884774f7-f25e-4edf-b925-15e28faa6df7 --- .../iprscan/current_namespace_minimal.xml | 23 ++++ tests/unit/test_iprscan.py | 106 ++++++++++++++---- 2 files changed, 109 insertions(+), 20 deletions(-) create mode 100644 tests/fixtures/iprscan/current_namespace_minimal.xml diff --git a/tests/fixtures/iprscan/current_namespace_minimal.xml b/tests/fixtures/iprscan/current_namespace_minimal.xml new file mode 100644 index 0000000..b1b6e78 --- /dev/null +++ b/tests/fixtures/iprscan/current_namespace_minimal.xml @@ -0,0 +1,23 @@ + + + + MSTNPKPQRIT + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/tests/unit/test_iprscan.py b/tests/unit/test_iprscan.py index 9300a24..2a78f88 100644 --- a/tests/unit/test_iprscan.py +++ b/tests/unit/test_iprscan.py @@ -11,12 +11,25 @@ from funannotate2_addons.iprscan import ( get_version, + run_iprscan, + parse_iprscan, parse_iprscan_tsv, parse_iprscan_xml, iprscan_to_json, ) +CURRENT_NAMESPACE_XML_FIXTURE = os.path.abspath( + os.path.join( + os.path.dirname(__file__), + "..", + "fixtures", + "iprscan", + "current_namespace_minimal.xml", + ) +) + + class TestIprscan(unittest.TestCase): def setUp(self): # Create a temporary directory for test files @@ -103,6 +116,29 @@ def test_get_version_error(self, mock_run): version = get_version("interproscan.sh") self.assertIsNone(version) + @mock.patch("subprocess.run") + def test_run_iprscan_uses_full_output_path(self, mock_run): + input_file = os.path.join(self.test_dir, "proteins.fa") + with open(input_file, "w") as f: + f.write(">protein1\nMSTNPKPQRIT\n") + + output_prefix = os.path.join(self.test_dir, "iprscan_out", "iprscan") + os.makedirs(os.path.dirname(output_prefix), exist_ok=True) + expected_output = f"{output_prefix}.xml" + + def fake_run(cmd, **kwargs): + with open(expected_output, "w") as out: + out.write("") + return mock.Mock(returncode=0) + + mock_run.side_effect = fake_run + + result = run_iprscan(input_file=input_file, output_prefix=output_prefix) + + self.assertEqual(result, expected_output) + called_cmd = mock_run.call_args.args[0] + self.assertEqual(called_cmd[called_cmd.index("-o") + 1], expected_output) + def test_parse_iprscan_tsv(self): # Test parsing the sample TSV file annotations = parse_iprscan_tsv(self.sample_tsv_file) @@ -148,26 +184,23 @@ def test_parse_iprscan_xml(self): # Test parsing the sample XML file annotations = parse_iprscan_xml(self.sample_xml_file) - # Check that annotations were parsed correctly - self.assertIsInstance(annotations, dict) - self.assertTrue(len(annotations) > 0) - - # Check that we have protein entries - protein_ids = list(annotations.keys()) - self.assertTrue(len(protein_ids) > 0) - - # Check the structure of the first protein - first_protein = annotations[protein_ids[0]] - self.assertIn("signatures", first_protein) - self.assertIn("interpro_domains", first_protein) - self.assertIn("go_terms", first_protein) - self.assertIn("pathways", first_protein) - - # Check that signatures are present - self.assertIsInstance(first_protein["signatures"], list) - self.assertIsInstance(first_protein["interpro_domains"], list) - self.assertIsInstance(first_protein["go_terms"], list) - self.assertIsInstance(first_protein["pathways"], list) + self.assertIn("protein1", annotations) + self.assertIn("protein2", annotations) + self.assertEqual(annotations["protein1"]["signatures"][0]["id"], "PF00001") + self.assertEqual( + annotations["protein1"]["interpro_domains"][0]["id"], "IPR000001" + ) + self.assertEqual(annotations["protein1"]["go_terms"][0]["id"], "GO:0003677") + + def test_parse_iprscan_xml_current_namespace_and_xref(self): + annotations = parse_iprscan_xml(CURRENT_NAMESPACE_XML_FIXTURE) + + self.assertIn("FUN2_000001-T1", annotations) + protein = annotations["FUN2_000001-T1"] + self.assertEqual(protein["signatures"][0]["id"], "PF00001") + self.assertEqual(protein["interpro_domains"][0]["id"], "IPR000001") + self.assertEqual(protein["go_terms"][0]["id"], "GO:0003677") + self.assertEqual(protein["pathways"][0]["id"], "map00010") def test_iprscan_to_json(self): # Test converting TSV to JSON @@ -194,6 +227,39 @@ def test_iprscan_to_json(self): self.assertIn("interpro_domains", protein1_data) self.assertIn("go_terms", protein1_data) + @mock.patch("funannotate2_addons.iprscan.parse_iprscan_xml") + def test_parse_iprscan_dispatches_xml(self, mock_parse_xml): + expected = {"protein1": {}} + mock_parse_xml.return_value = expected + + result = parse_iprscan(self.sample_xml_file) + + self.assertEqual(result, expected) + mock_parse_xml.assert_called_once_with(self.sample_xml_file, None, None) + + @mock.patch("funannotate2_addons.iprscan.parse_iprscan_tsv") + def test_parse_iprscan_dispatches_tsv(self, mock_parse_tsv): + expected = {"protein1": {}} + mock_parse_tsv.return_value = expected + + result = parse_iprscan(self.sample_tsv_file) + + self.assertEqual(result, expected) + mock_parse_tsv.assert_called_once_with(self.sample_tsv_file, None, None) + + @mock.patch("funannotate2_addons.iprscan.logger.error") + def test_parse_iprscan_rejects_unsupported_extension(self, mock_logger): + unsupported_file = os.path.join(self.test_dir, "sample.txt") + with open(unsupported_file, "w") as f: + f.write("unsupported") + + result = parse_iprscan(unsupported_file) + + self.assertIsNone(result) + mock_logger.assert_called_once_with( + f"Unsupported file format: {unsupported_file}" + ) + if __name__ == "__main__": unittest.main() From dcca2f240c6429413562d4d8dbebc0f07e8a23cf Mon Sep 17 00:00:00 2001 From: Jon Palmer Date: Fri, 6 Mar 2026 15:53:50 -0800 Subject: [PATCH 2/4] fix(tooling): land approved emapper, iprscan, and signalp updates Agent-Id: agent-884774f7-f25e-4edf-b925-15e28faa6df7 --- funannotate2_addons/emapper.py | 7 +- funannotate2_addons/iprscan.py | 72 ++++++++++----- funannotate2_addons/signalp6.py | 153 ++++++++++++++++++++++---------- tests/unit/test_emapper.py | 81 +++++++++++++++++ tests/unit/test_signalp6.py | 96 ++++++++++++++++++++ 5 files changed, 336 insertions(+), 73 deletions(-) diff --git a/funannotate2_addons/emapper.py b/funannotate2_addons/emapper.py index cd656a4..a42587b 100644 --- a/funannotate2_addons/emapper.py +++ b/funannotate2_addons/emapper.py @@ -2,7 +2,6 @@ import os import sys -import uuid import subprocess import argparse import json @@ -905,7 +904,7 @@ def run_emapper_cli(args): input_file = get_input_file(args, "proteins") if args.input else args.file if not input_file: - log.error("No protein FASTA file found") + logger.error("No protein FASTA file found") return # Get output directory @@ -922,8 +921,8 @@ def run_emapper_cli(args): # Create output directory if it doesn't exist os.makedirs(output_dir, exist_ok=True) - # Set output prefix - output_prefix = os.path.join(output_dir, f"emapper_{uuid.uuid4().hex[:6]}") + # Use a stable prefix so repeated runs can reuse the same output set. + output_prefix = os.path.join(output_dir, "emapper") # Set up logging log_file = f"{output_prefix}.log" diff --git a/funannotate2_addons/iprscan.py b/funannotate2_addons/iprscan.py index 75a0039..cd9c6ef 100644 --- a/funannotate2_addons/iprscan.py +++ b/funannotate2_addons/iprscan.py @@ -93,7 +93,7 @@ def run_iprscan( "--input", input_file, "-o", - os.path.basename(f"{output_prefix}.{format.lower()}"), + f"{output_prefix}.{format.lower()}", "-f", format, "-cpu", @@ -169,15 +169,32 @@ def parse_iprscan_xml(input_file, output_file=None, gene_dict=None): logger.error(f"Error parsing XML file: {e}") return None - # Define namespaces - ns = {"ns": "http://www.ebi.ac.uk/interpro/resources/schemas/interproscan5"} + # Detect namespace from the document so we can support current and older + # InterProScan XML schema URLs. + namespace = root.tag.split("}")[0].strip("{") if root.tag.startswith("{") else "" + + def qname(tag): + return f"{{{namespace}}}{tag}" if namespace else tag + + def unique_by_id(items, item_id): + return item_id not in [item["id"] for item in items] # Parse annotations annotations = {} # Iterate through protein matches - for protein in root.findall(".//ns:protein", ns): + for protein in root.findall(f".//{qname('protein')}"): protein_id = protein.attrib.get("id", "") + if not protein_id: + xref = protein.find(qname("xref")) + if xref is not None: + protein_id = xref.attrib.get("id", "") + if not protein_id: + sequence = protein.find(qname("sequence")) + if sequence is not None and sequence.text: + protein_id = sequence.text.strip() + if not protein_id: + continue if protein_id not in annotations: annotations[protein_id] = { @@ -187,33 +204,40 @@ def parse_iprscan_xml(input_file, output_file=None, gene_dict=None): "signatures": [], } + matches = protein.find(qname("matches")) + if matches is None: + continue + # Get matches - for match in protein.findall(".//ns:match", ns): - signature = match.find(".//ns:signature", ns) + for match in matches: + if not match.tag.split("}")[-1].endswith("-match"): + continue + + signature = match.find(qname("signature")) if signature is not None: sig_acc = signature.attrib.get("ac", "") sig_desc = signature.attrib.get("desc", "") sig_name = signature.attrib.get("name", "") # Add to signatures - if sig_acc and sig_acc not in [ - s["id"] for s in annotations[protein_id]["signatures"] - ]: + if sig_acc and unique_by_id( + annotations[protein_id]["signatures"], sig_acc + ): annotations[protein_id]["signatures"].append( {"id": sig_acc, "name": sig_name, "description": sig_desc} ) # Get InterPro domains - entry = match.find(".//ns:entry", ns) + entry = signature.find(qname("entry")) if entry is not None: entry_acc = entry.attrib.get("ac", "") entry_desc = entry.attrib.get("desc", "") entry_name = entry.attrib.get("name", "") # Add to InterPro domains - if entry_acc and entry_acc not in [ - d["id"] for d in annotations[protein_id]["interpro_domains"] - ]: + if entry_acc and unique_by_id( + annotations[protein_id]["interpro_domains"], entry_acc + ): annotations[protein_id]["interpro_domains"].append( { "id": entry_acc, @@ -223,29 +247,35 @@ def parse_iprscan_xml(input_file, output_file=None, gene_dict=None): ) # Get GO terms - for go_term in match.findall(".//ns:go-term", ns): + go_terms = signature.findall(f".//{qname('go-xref')}") + if not go_terms: + go_terms = match.findall(f".//{qname('go-term')}") + for go_term in go_terms: go_id = go_term.attrib.get("id", "") go_name = go_term.attrib.get("name", "") go_category = go_term.attrib.get("category", "") # Add to GO terms - if go_id and go_id not in [ - g["id"] for g in annotations[protein_id]["go_terms"] - ]: + if go_id and unique_by_id( + annotations[protein_id]["go_terms"], go_id + ): annotations[protein_id]["go_terms"].append( {"id": go_id, "name": go_name, "category": go_category} ) # Get pathways - for pathway in match.findall(".//ns:pathway", ns): + pathways = signature.findall(f".//{qname('pathway-xref')}") + if not pathways: + pathways = match.findall(f".//{qname('pathway')}") + for pathway in pathways: pathway_id = pathway.attrib.get("id", "") pathway_name = pathway.attrib.get("name", "") pathway_db = pathway.attrib.get("db", "") # Add to pathways - if pathway_id and pathway_id not in [ - p["id"] for p in annotations[protein_id]["pathways"] - ]: + if pathway_id and unique_by_id( + annotations[protein_id]["pathways"], pathway_id + ): annotations[protein_id]["pathways"].append( { "id": pathway_id, diff --git a/funannotate2_addons/signalp6.py b/funannotate2_addons/signalp6.py index d62f1ff..0f7b740 100644 --- a/funannotate2_addons/signalp6.py +++ b/funannotate2_addons/signalp6.py @@ -4,6 +4,7 @@ import subprocess import argparse import json +import re import uuid from .log import startLogging from .utils import ( @@ -17,6 +18,82 @@ logger = startLogging() +def _signalp_annotation_value(prediction): + """Format a standardized SignalP annotation value.""" + cleavage_pos = prediction.get("cleavage_site") + if cleavage_pos and "-" in cleavage_pos: + return f"SignalP:1-{cleavage_pos.split('-')[0]}" + return f"SignalP:{prediction['prediction']}" + + +def _write_signalp_annotations(output_file, predictions): + """Write standardized SignalP annotations.""" + with open(output_file, "w") as out: + out.write("#gene_id\tannotation_type\tannotation_value\n") + for protein_id, pred in predictions.items(): + if pred["has_signal_peptide"]: + annotation = _signalp_annotation_value(pred) + out.write(f"{protein_id}\tnote\t{annotation}\n") + + +def _normalize_signalp_prediction(prediction): + """Normalize SignalP prediction labels across text and JSON outputs.""" + prediction_map = { + "other": "OTHER", + "sp": "SP", + "signal peptide (sec/spi)": "SP", + "signal peptide": "SP", + "lipoprotein signal peptide (sec/spii)": "LIPO", + "tat signal peptide (tat/spi)": "TAT", + "tat lipoprotein signal peptide (tat/spii)": "TATLIPO", + "pilin-like signal peptide (sec/spiii)": "PILIN", + "pilin-like signal peptide": "PILIN", + } + normalized = prediction_map.get(str(prediction).strip().lower()) + if normalized: + return normalized + return str(prediction).strip().upper() + + +def _parse_signalp6_sequence_json_entry(entry): + """Parse a real SignalP 6 JSON SEQUENCES entry.""" + protein_id = entry["Name"] + prediction = _normalize_signalp_prediction(entry["Prediction"]) + protein_types = entry.get("Protein_types", []) + likelihoods = entry.get("Likelihood", []) + + probabilities = {} + for label, value in zip(protein_types, likelihoods): + probabilities[_normalize_signalp_prediction(label)] = float(value) + + parsed = { + "prediction": prediction, + "probability": probabilities.get( + prediction, + max((float(value) for value in likelihoods), default=0.0), + ), + "has_signal_peptide": prediction != "OTHER", + } + + if "OTHER" in probabilities: + parsed["other_prob"] = probabilities["OTHER"] + if "SP" in probabilities: + parsed["sp_prob"] = probabilities["SP"] + + cleavage_info = str(entry.get("CS_pos", "")).strip() + if cleavage_info: + match = re.search( + r"Cleavage site between pos\.\s*(\d+)\s+and\s+(\d+)(?:\.\s*Probability\s*([0-9.]+))?", + cleavage_info, + ) + if match: + parsed["cleavage_site"] = f"{match.group(1)}-{match.group(2)}" + if match.group(3): + parsed["cleavage_prob"] = float(match.group(3)) + + return protein_id, parsed + + def get_version(signalp_path): """ Get the version of SignalP. @@ -197,26 +274,7 @@ def parse_signalp(input_file, output_file=None, gene_dict=None): # Write to output file if specified if output_file: - with open(output_file, "w") as out: - # Write header - out.write("#gene_id\tannotation_type\tannotation_value\n") - - for protein_id, pred in predictions.items(): - if pred["has_signal_peptide"]: - if "cleavage_site" in pred: - # Extract the cleavage position to determine signal peptide range - # CS pos: 17-18 means cleavage between 17 and 18, so signal peptide is 1-17 - cleavage_pos = pred["cleavage_site"] - if "-" in cleavage_pos: - end_pos = cleavage_pos.split("-")[ - 0 - ] # Take the first number - annotation = f"SignalP:1-{end_pos}" - else: - annotation = f"SignalP:{pred['prediction']}" - else: - annotation = f"SignalP:{pred['prediction']}" - out.write(f"{protein_id}\tnote\t{annotation}\n") + _write_signalp_annotations(output_file, predictions) return predictions @@ -249,36 +307,34 @@ def parse_signalp_json(input_file, output_file=None, gene_dict=None): with open(input_file, "r") as f: data = json.load(f) - for entry in data: - protein_id = entry["ID"] - prediction = entry["PREDICTION"] - probability = float(entry["PROB"]) - - # Store prediction - predictions[protein_id] = { - "prediction": prediction, - "probability": probability, - "has_signal_peptide": prediction != "OTHER", - } - - # Add cleavage site if available - if "CS_POS" in entry and entry["CS_POS"] != "-": - predictions[protein_id]["cleavage_site"] = entry["CS_POS"] + if isinstance(data, list): + entries = data + real_signalp6_json = False + elif isinstance(data, dict) and isinstance(data.get("SEQUENCES"), dict): + entries = data["SEQUENCES"].values() + real_signalp6_json = True + else: + raise ValueError("Unsupported SignalP JSON format") + + for entry in entries: + if real_signalp6_json: + protein_id, parsed = _parse_signalp6_sequence_json_entry(entry) + else: + protein_id = entry["ID"] + prediction = _normalize_signalp_prediction(entry["PREDICTION"]) + parsed = { + "prediction": prediction, + "probability": float(entry["PROB"]), + "has_signal_peptide": prediction != "OTHER", + } + if "CS_POS" in entry and entry["CS_POS"] != "-": + parsed["cleavage_site"] = entry["CS_POS"] + + predictions[protein_id] = parsed # Write to output file if specified if output_file: - with open(output_file, "w") as out: - # Write header - out.write("#gene_id\tannotation_type\tannotation_value\n") - - for protein_id, pred in predictions.items(): - if pred["has_signal_peptide"]: - out.write( - f"{protein_id}\tnote\tSignalP:{pred['prediction']} prob={pred['probability']:.3f}" - ) - if "cleavage_site" in pred: - out.write(f" cleavage_site={pred['cleavage_site']}") - out.write("\n") + _write_signalp_annotations(output_file, predictions) return predictions @@ -417,7 +473,8 @@ def run_signalp_cli(args): # Parse SignalP results annotations_file = os.path.join(output_dir, "signalp.annotations.txt") - predictions = parse_signalp(args.parse, output_file=annotations_file) + parser = parse_signalp_json if args.parse.endswith(".json") else parse_signalp + predictions = parser(args.parse, output_file=annotations_file) if predictions: logger.info(f"Parsed {len(predictions)} predictions from {args.parse}") diff --git a/tests/unit/test_emapper.py b/tests/unit/test_emapper.py index 4fe16c7..4557081 100644 --- a/tests/unit/test_emapper.py +++ b/tests/unit/test_emapper.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 +import argparse import os import unittest from unittest import mock @@ -11,6 +12,7 @@ get_version, parse_emapper_annotations, emapper_to_json, + run_emapper_cli, ) @@ -99,6 +101,85 @@ def test_emapper_to_json(self): self.assertEqual(data["gene2"]["gene_name"], "geneB") self.assertEqual(data["gene2"]["description"], "Transcription factor") + @mock.patch("funannotate2_addons.emapper.logger.error") + @mock.patch("funannotate2_addons.emapper.get_input_file", return_value=None) + def test_run_emapper_cli_missing_input_file(self, mock_get_input_file, mock_logger): + args = mock.Mock(input="predict_dir", file=None, parse=None) + + run_emapper_cli(args) + + mock_get_input_file.assert_called_once_with(args, "proteins") + mock_logger.assert_called_once_with("No protein FASTA file found") + + @mock.patch("funannotate2_addons.emapper.startLogging") + @mock.patch("funannotate2_addons.emapper.parse_emapper_annotations") + @mock.patch("funannotate2_addons.emapper.run_emapper") + @mock.patch("funannotate2_addons.emapper.get_input_file") + def test_run_emapper_cli_uses_stable_output_prefix( + self, + mock_get_input_file, + mock_run_emapper, + mock_parse_annotations, + mock_start_logging, + ): + input_file = os.path.join(self.test_dir, "proteins.fa") + with open(input_file, "w") as f: + f.write(">gene1\nMSTNPKPQRIT\n") + + output_dir = os.path.join(self.test_dir, "output") + expected_prefix = os.path.join(output_dir, "emapper") + mock_get_input_file.return_value = input_file + mock_run_emapper.return_value = f"{expected_prefix}.emapper.annotations" + mock_parse_annotations.return_value = {"gene1": {"gene_name": "geneA"}} + mock_start_logging.return_value = mock.Mock() + + args = argparse.Namespace( + input="predict_dir", + file=None, + parse=None, + output=output_dir, + emapper_path="emapper.py", + cpus=4, + database=None, + data_dir=None, + temp_dir=None, + resume=True, + override=False, + mode="diamond", + tax_scope="auto", + target_orthologs="all", + sensmode="sensitive", + no_annot=False, + no_search=False, + dmnd_db=None, + seed_orthologs=None, + report_orthologs=False, + go_evidence=None, + pfam_realign=False, + seed_ortholog_score=None, + seed_ortholog_evalue=None, + query_cover=None, + subject_cover=None, + matrix=None, + gapopen=None, + gapextend=None, + evalue=None, + pident=None, + query_sgcov=None, + subject_sgcov=None, + json=False, + ) + + run_emapper_cli(args) + + self.assertTrue(os.path.isdir(output_dir)) + self.assertEqual(mock_run_emapper.call_args.kwargs["output_prefix"], expected_prefix) + self.assertTrue(mock_run_emapper.call_args.kwargs["resume"]) + mock_parse_annotations.assert_called_once_with( + f"{expected_prefix}.emapper.annotations", + output_file=f"{expected_prefix}.annotations.txt", + ) + if __name__ == "__main__": unittest.main() diff --git a/tests/unit/test_signalp6.py b/tests/unit/test_signalp6.py index 65fdd7a..fc7530d 100644 --- a/tests/unit/test_signalp6.py +++ b/tests/unit/test_signalp6.py @@ -5,14 +5,32 @@ import shutil import os import json +import argparse import subprocess from unittest import mock from funannotate2_addons.signalp6 import ( get_version, parse_signalp, + parse_signalp_json, signalp_to_json, run_signalp, + run_signalp_cli, +) + + +SIGNALP6_LOCAL_ROOT = os.environ.get( + "FUNANNOTATE2_ADDONS_SIGNALP6_LOCAL", + "/Users/jon/software/f2_ecosystem/funannotate2-addons/local_tests/signalp6_test", +) +SIGNALP6_LOCAL_TXT = os.path.join(SIGNALP6_LOCAL_ROOT, "test2", "prediction_results.txt") +SIGNALP6_LOCAL_JSON = os.path.join(SIGNALP6_LOCAL_ROOT, "test2", "output.json") +SIGNALP6_LOCAL_ANNOTATIONS = os.path.join( + SIGNALP6_LOCAL_ROOT, "test3", "signalp.annotations.txt" +) +HAS_SIGNALP6_LOCAL_FIXTURES = all( + os.path.exists(path) + for path in (SIGNALP6_LOCAL_TXT, SIGNALP6_LOCAL_JSON, SIGNALP6_LOCAL_ANNOTATIONS) ) @@ -31,6 +49,16 @@ def setUp(self): f.write("protein3\tSP\t0.007633\t0.992322\tCS pos: 23-24. Pr: 0.6512\n") f.write("protein4\tOTHER\t1.000045\t0.000000\t\n") + self.sample_json_file = os.path.join(self.test_dir, "prediction_results.json") + with open(self.sample_json_file, "w") as f: + json.dump( + [ + {"ID": "protein1", "PREDICTION": "OTHER", "PROB": "0.998", "CS_POS": "-"}, + {"ID": "protein2", "PREDICTION": "SP", "PROB": "0.999", "CS_POS": "17-18"}, + ], + f, + ) + # Create a sample protein FASTA file self.sample_fasta_file = os.path.join(self.test_dir, "proteins.fasta") with open(self.sample_fasta_file, "w") as f: @@ -151,6 +179,33 @@ def test_signalp_to_json(self): self.assertTrue(protein2_data["has_signal_peptide"]) self.assertEqual(protein2_data["cleavage_site"], "17-18") + def test_parse_signalp_json(self): + predictions = parse_signalp_json(self.sample_json_file) + + self.assertIn("protein1", predictions) + self.assertIn("protein2", predictions) + self.assertFalse(predictions["protein1"]["has_signal_peptide"]) + self.assertTrue(predictions["protein2"]["has_signal_peptide"]) + self.assertEqual(predictions["protein2"]["cleavage_site"], "17-18") + + def test_run_signalp_cli_parse_json(self): + output_dir = os.path.join(self.test_dir, "signalp_json_output") + args = argparse.Namespace(parse=self.sample_json_file, output=output_dir, json=True) + + run_signalp_cli(args) + + annotations_file = os.path.join(output_dir, "signalp.annotations.txt") + json_file = os.path.join(output_dir, "signalp.json") + + self.assertTrue(os.path.exists(annotations_file)) + self.assertTrue(os.path.exists(json_file)) + + with open(annotations_file, "r") as f: + annotations = f.read() + + self.assertIn("protein2\tnote\tSignalP:1-17", annotations) + + @mock.patch("subprocess.run") @mock.patch("funannotate2_addons.signalp6.log_command") def test_run_signalp(self, mock_log_command, mock_run): @@ -190,5 +245,46 @@ def test_run_signalp(self, mock_log_command, mock_run): self.assertIn(output_dir, call_args) +@unittest.skipUnless( + HAS_SIGNALP6_LOCAL_FIXTURES, + "local SignalP6 fixtures not available", +) +class TestSignalp6LocalFixtures(unittest.TestCase): + def test_parse_real_signalp_tabular_fixture(self): + predictions = parse_signalp(SIGNALP6_LOCAL_TXT) + + self.assertEqual(len(predictions), 65) + self.assertFalse(predictions["FUN2_000001-T1 FUN2_000001"]["has_signal_peptide"]) + + protein = predictions["FUN2_000014-T1 FUN2_000014"] + self.assertEqual(protein["prediction"], "SP") + self.assertAlmostEqual(protein["probability"], 0.999747, places=6) + self.assertEqual(protein["cleavage_site"], "17-18") + self.assertAlmostEqual(protein["cleavage_prob"], 0.9751, places=4) + + def test_parse_real_signalp_json_fixture_matches_expected_annotations(self): + output_file = os.path.join( + tempfile.mkdtemp(), "signalp_from_real_json.annotations.txt" + ) + try: + predictions = parse_signalp_json(SIGNALP6_LOCAL_JSON, output_file=output_file) + + self.assertEqual(len(predictions), 65) + self.assertFalse(predictions["FUN2_000001-T1 FUN2_000001"]["has_signal_peptide"]) + + protein = predictions["FUN2_000014-T1 FUN2_000014"] + self.assertEqual(protein["prediction"], "SP") + self.assertAlmostEqual(protein["probability"], 0.9997, places=4) + self.assertEqual(protein["cleavage_site"], "17-18") + self.assertAlmostEqual(protein["cleavage_prob"], 0.975137, places=6) + + with open(output_file, "r") as observed, open( + SIGNALP6_LOCAL_ANNOTATIONS, "r" + ) as expected: + self.assertEqual(observed.read(), expected.read()) + finally: + shutil.rmtree(os.path.dirname(output_file)) + + if __name__ == "__main__": unittest.main() From deaf45b91361dcb55b5ee38161a926132ffdd9ae Mon Sep 17 00:00:00 2001 From: Jon Palmer Date: Fri, 6 Mar 2026 15:54:00 -0800 Subject: [PATCH 3/4] ci(release): harden production release workflow and CLI coverage Agent-Id: agent-884774f7-f25e-4edf-b925-15e28faa6df7 --- .github/workflows/production-release.yml | 50 +++++++++++++++++++----- tests/unit/test_cli.py | 28 +++++++------ 2 files changed, 57 insertions(+), 21 deletions(-) diff --git a/.github/workflows/production-release.yml b/.github/workflows/production-release.yml index 3e385ce..aa4a905 100644 --- a/.github/workflows/production-release.yml +++ b/.github/workflows/production-release.yml @@ -24,6 +24,7 @@ jobs: runs-on: ubuntu-latest outputs: tag: ${{ steps.tag.outputs.tag }} + release_version: ${{ steps.tag.outputs.release_version }} tag_exists: ${{ steps.check.outputs.exists }} steps: @@ -39,7 +40,24 @@ jobs: echo "Error: Tag must be in format v1.0.0, 1.0.0, or CalVer like 25.5.28" exit 1 fi + + RELEASE_VERSION="${TAG#v}" + PACKAGE_VERSION=$(python3 - <<'PY' + import pathlib + import tomllib + + pyproject = tomllib.loads(pathlib.Path("pyproject.toml").read_text()) + print(pyproject["project"]["version"]) + PY + ) + + if [ "$RELEASE_VERSION" != "$PACKAGE_VERSION" ]; then + echo "Error: Tag version ($RELEASE_VERSION) does not match pyproject.toml version ($PACKAGE_VERSION)" + exit 1 + fi + echo "tag=$TAG" >> $GITHUB_OUTPUT + echo "release_version=$RELEASE_VERSION" >> $GITHUB_OUTPUT - name: Check if tag exists id: check @@ -128,7 +146,7 @@ jobs: echo "Checking if version exists on PyPI..." # Check if package exists on PyPI - if curl -s "https://pypi.org/pypi/funannotate2-addons/${{ needs.validate-tag.outputs.tag }}/json" | grep -q "Not Found"; then + if curl -s "https://pypi.org/pypi/funannotate2-addons/${{ needs.validate-tag.outputs.release_version }}/json" | grep -q "Not Found"; then echo "❌ Version does not exist on PyPI - this is a real error" exit 1 else @@ -162,21 +180,31 @@ jobs: - name: Generate release notes id: release_notes run: | - # Get the previous tag - PREV_TAG=$(git describe --tags --abbrev=0 ${{ needs.validate-tag.outputs.tag }}^) + # Get the previous tag if one exists (initial releases may not have one) + PREV_TAG=$(git tag --merged "${{ needs.validate-tag.outputs.tag }}" --sort=-creatordate | grep -vx "${{ needs.validate-tag.outputs.tag }}" | head -n 1 || true) echo "Generating release notes for ${{ needs.validate-tag.outputs.tag }}" - echo "Previous tag: $PREV_TAG" + if [ -n "$PREV_TAG" ]; then + echo "Previous tag: $PREV_TAG" + else + echo "Previous tag: none (initial release)" + fi echo "" # Generate changelog echo "## Changes" > release_notes.md echo "" >> release_notes.md - # Get all commits between previous tag and current tag, excluding version bump commits - git log --pretty=format:"- %s (%h)" $PREV_TAG..${{ needs.validate-tag.outputs.tag }} \ - --grep="bump version" --grep="version bump" --grep="update version" \ - --invert-grep >> release_notes.md + # Get all commits since the previous tag, excluding version bump commits + if [ -n "$PREV_TAG" ]; then + git log --pretty=format:"- %s (%h)" "$PREV_TAG..${{ needs.validate-tag.outputs.tag }}" \ + --grep="bump version" --grep="version bump" --grep="update version" \ + --invert-grep >> release_notes.md + else + git log --pretty=format:"- %s (%h)" "${{ needs.validate-tag.outputs.tag }}" \ + --grep="bump version" --grep="version bump" --grep="update version" \ + --invert-grep >> release_notes.md + fi # Check if we have any commits if [ ! -s release_notes.md ] || [ $(wc -l < release_notes.md) -le 2 ]; then @@ -187,7 +215,11 @@ jobs: echo "" >> release_notes.md echo "" >> release_notes.md - echo "**Full Changelog**: https://github.com/${{ github.repository }}/compare/$PREV_TAG...${{ needs.validate-tag.outputs.tag }}" >> release_notes.md + if [ -n "$PREV_TAG" ]; then + echo "**Full Changelog**: https://github.com/${{ github.repository }}/compare/$PREV_TAG...${{ needs.validate-tag.outputs.tag }}" >> release_notes.md + else + echo "**Full Changelog**: Initial release" >> release_notes.md + fi echo "Generated release notes:" cat release_notes.md diff --git a/tests/unit/test_cli.py b/tests/unit/test_cli.py index e61b161..efb96d2 100644 --- a/tests/unit/test_cli.py +++ b/tests/unit/test_cli.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 +import argparse import unittest import sys from unittest import mock @@ -9,6 +10,15 @@ class TestCLI(unittest.TestCase): + def _assert_main_dispatch(self, subparser_name, patch_target): + args = argparse.Namespace(subparser_name=subparser_name) + + with mock.patch("funannotate2_addons.__main__.parse_args", return_value=args): + with mock.patch(patch_target) as mock_runner: + main() + + mock_runner.assert_called_once_with(args) + def test_parse_args_no_args(self): """Test that no arguments raises SystemExit and prints help""" with self.assertRaises(SystemExit) as cm: @@ -61,30 +71,24 @@ def test_parse_args_signalp6(self): @mock.patch("funannotate2_addons.__main__.run_emapper_cli") def test_main_emapper(self, mock_run_emapper): """Test main function calls emapper CLI""" - with mock.patch("sys.argv", ["funannotate2_addons", "emapper", "--help"]): - with self.assertRaises(SystemExit): - main() + self._assert_main_dispatch("emapper", "funannotate2_addons.__main__.run_emapper_cli") @mock.patch("funannotate2_addons.__main__.run_iprscan_cli") def test_main_iprscan(self, mock_run_iprscan): """Test main function calls iprscan CLI""" - with mock.patch("sys.argv", ["funannotate2_addons", "iprscan", "--help"]): - with self.assertRaises(SystemExit): - main() + self._assert_main_dispatch("iprscan", "funannotate2_addons.__main__.run_iprscan_cli") @mock.patch("funannotate2_addons.__main__.run_antismash_cli") def test_main_antismash(self, mock_run_antismash): """Test main function calls antismash CLI""" - with mock.patch("sys.argv", ["funannotate2_addons", "antismash", "--help"]): - with self.assertRaises(SystemExit): - main() + self._assert_main_dispatch( + "antismash", "funannotate2_addons.__main__.run_antismash_cli" + ) @mock.patch("funannotate2_addons.__main__.run_signalp_cli") def test_main_signalp6(self, mock_run_signalp): """Test main function calls signalp6 CLI""" - with mock.patch("sys.argv", ["funannotate2_addons", "signalp6", "--help"]): - with self.assertRaises(SystemExit): - main() + self._assert_main_dispatch("signalp6", "funannotate2_addons.__main__.run_signalp_cli") if __name__ == "__main__": From ca30d7ec7e53498209995aa19ec112a8356ea843 Mon Sep 17 00:00:00 2001 From: Jon Palmer Date: Fri, 6 Mar 2026 15:54:10 -0800 Subject: [PATCH 4/4] test(antismash): add approved local fixture regression coverage Agent-Id: agent-884774f7-f25e-4edf-b925-15e28faa6df7 --- tests/unit/test_antismash.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/tests/unit/test_antismash.py b/tests/unit/test_antismash.py index 9e1b432..2732298 100644 --- a/tests/unit/test_antismash.py +++ b/tests/unit/test_antismash.py @@ -15,6 +15,13 @@ ) +ANTISMASH_LOCAL_GBK = os.environ.get( + "FUNANNOTATE2_ADDONS_ANTISMASH_GBK", + "/Users/jon/software/f2_ecosystem/funannotate2-addons/local_tests/antismash_pverr/antismash_fe857478/Pseudogymnoascus_verrucosus.gbk", +) +HAS_ANTISMASH_LOCAL_GBK = os.path.exists(ANTISMASH_LOCAL_GBK) + + class TestAntismash(unittest.TestCase): def setUp(self): # Create a temporary directory for test files @@ -153,5 +160,28 @@ def test_antismash_to_json(self): self.assertIsInstance(clusters, dict) +@unittest.skipUnless( + HAS_ANTISMASH_LOCAL_GBK, + "local antiSMASH GenBank fixture not available", +) +class TestAntismashLocalFixtures(unittest.TestCase): + def test_parse_real_antismash_genbank_fixture(self): + backbone_domains, backbone_subtype, backbone_enzymes, cluster_genes = ( + parse_antismash_gbk(ANTISMASH_LOCAL_GBK) + ) + + self.assertEqual(len(cluster_genes), 28) + self.assertEqual(backbone_subtype["cluster_1"], ["T1PKS"]) + self.assertEqual(backbone_subtype["cluster_28"], ["terpene"]) + self.assertEqual(len(cluster_genes["cluster_1"]), 30) + self.assertIn("FUN2_000393", cluster_genes["cluster_1"]) + self.assertIn("FUN2_000393", backbone_enzymes["cluster_1"]) + self.assertIn("FUN2_011547", backbone_enzymes["cluster_28"]) + + annotations = "\n".join(backbone_domains["FUN2_000393"]) + self.assertIn("SMCOG1093", annotations) + self.assertIn("antiSMASH domain: itr_KS", annotations) + + if __name__ == "__main__": unittest.main()