Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 41 additions & 9 deletions .github/workflows/production-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ jobs:
runs-on: ubuntu-latest
outputs:
tag: ${{ steps.tag.outputs.tag }}
release_version: ${{ steps.tag.outputs.release_version }}
tag_exists: ${{ steps.check.outputs.exists }}

steps:
Expand All @@ -39,7 +40,24 @@ jobs:
echo "Error: Tag must be in format v1.0.0, 1.0.0, or CalVer like 25.5.28"
exit 1
fi

RELEASE_VERSION="${TAG#v}"
PACKAGE_VERSION=$(python3 - <<'PY'
import pathlib
import tomllib

pyproject = tomllib.loads(pathlib.Path("pyproject.toml").read_text())
print(pyproject["project"]["version"])
PY
)

if [ "$RELEASE_VERSION" != "$PACKAGE_VERSION" ]; then
echo "Error: Tag version ($RELEASE_VERSION) does not match pyproject.toml version ($PACKAGE_VERSION)"
exit 1
fi

echo "tag=$TAG" >> $GITHUB_OUTPUT
echo "release_version=$RELEASE_VERSION" >> $GITHUB_OUTPUT

- name: Check if tag exists
id: check
Expand Down Expand Up @@ -128,7 +146,7 @@ jobs:
echo "Checking if version exists on PyPI..."

# Check if package exists on PyPI
if curl -s "https://pypi.org/pypi/funannotate2-addons/${{ needs.validate-tag.outputs.tag }}/json" | grep -q "Not Found"; then
if curl -s "https://pypi.org/pypi/funannotate2-addons/${{ needs.validate-tag.outputs.release_version }}/json" | grep -q "Not Found"; then
echo "❌ Version does not exist on PyPI - this is a real error"
exit 1
else
Expand Down Expand Up @@ -162,21 +180,31 @@ jobs:
- name: Generate release notes
id: release_notes
run: |
# Get the previous tag
PREV_TAG=$(git describe --tags --abbrev=0 ${{ needs.validate-tag.outputs.tag }}^)
# Get the previous tag if one exists (initial releases may not have one)
PREV_TAG=$(git tag --merged "${{ needs.validate-tag.outputs.tag }}" --sort=-creatordate | grep -vx "${{ needs.validate-tag.outputs.tag }}" | head -n 1 || true)

echo "Generating release notes for ${{ needs.validate-tag.outputs.tag }}"
echo "Previous tag: $PREV_TAG"
if [ -n "$PREV_TAG" ]; then
echo "Previous tag: $PREV_TAG"
else
echo "Previous tag: none (initial release)"
fi
echo ""

# Generate changelog
echo "## Changes" > release_notes.md
echo "" >> release_notes.md

# Get all commits between previous tag and current tag, excluding version bump commits
git log --pretty=format:"- %s (%h)" $PREV_TAG..${{ needs.validate-tag.outputs.tag }} \
--grep="bump version" --grep="version bump" --grep="update version" \
--invert-grep >> release_notes.md
# Get all commits since the previous tag, excluding version bump commits
if [ -n "$PREV_TAG" ]; then
git log --pretty=format:"- %s (%h)" "$PREV_TAG..${{ needs.validate-tag.outputs.tag }}" \
--grep="bump version" --grep="version bump" --grep="update version" \
--invert-grep >> release_notes.md
else
git log --pretty=format:"- %s (%h)" "${{ needs.validate-tag.outputs.tag }}" \
--grep="bump version" --grep="version bump" --grep="update version" \
--invert-grep >> release_notes.md
fi

# Check if we have any commits
if [ ! -s release_notes.md ] || [ $(wc -l < release_notes.md) -le 2 ]; then
Expand All @@ -187,7 +215,11 @@ jobs:

echo "" >> release_notes.md
echo "" >> release_notes.md
echo "**Full Changelog**: https://github.com/${{ github.repository }}/compare/$PREV_TAG...${{ needs.validate-tag.outputs.tag }}" >> release_notes.md
if [ -n "$PREV_TAG" ]; then
echo "**Full Changelog**: https://github.com/${{ github.repository }}/compare/$PREV_TAG...${{ needs.validate-tag.outputs.tag }}" >> release_notes.md
else
echo "**Full Changelog**: Initial release" >> release_notes.md
fi

echo "Generated release notes:"
cat release_notes.md
Expand Down
7 changes: 3 additions & 4 deletions funannotate2_addons/emapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import os
import sys
import uuid
import subprocess
import argparse
import json
Expand Down Expand Up @@ -905,7 +904,7 @@ def run_emapper_cli(args):
input_file = get_input_file(args, "proteins") if args.input else args.file

if not input_file:
log.error("No protein FASTA file found")
logger.error("No protein FASTA file found")
return

# Get output directory
Expand All @@ -922,8 +921,8 @@ def run_emapper_cli(args):
# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Set output prefix
output_prefix = os.path.join(output_dir, f"emapper_{uuid.uuid4().hex[:6]}")
# Use a stable prefix so repeated runs can reuse the same output set.
output_prefix = os.path.join(output_dir, "emapper")

# Set up logging
log_file = f"{output_prefix}.log"
Expand Down
72 changes: 51 additions & 21 deletions funannotate2_addons/iprscan.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def run_iprscan(
"--input",
input_file,
"-o",
os.path.basename(f"{output_prefix}.{format.lower()}"),
f"{output_prefix}.{format.lower()}",
"-f",
format,
"-cpu",
Expand Down Expand Up @@ -169,15 +169,32 @@ def parse_iprscan_xml(input_file, output_file=None, gene_dict=None):
logger.error(f"Error parsing XML file: {e}")
return None

# Define namespaces
ns = {"ns": "http://www.ebi.ac.uk/interpro/resources/schemas/interproscan5"}
# Detect namespace from the document so we can support current and older
# InterProScan XML schema URLs.
namespace = root.tag.split("}")[0].strip("{") if root.tag.startswith("{") else ""

def qname(tag):
return f"{{{namespace}}}{tag}" if namespace else tag

def unique_by_id(items, item_id):
return item_id not in [item["id"] for item in items]

# Parse annotations
annotations = {}

# Iterate through protein matches
for protein in root.findall(".//ns:protein", ns):
for protein in root.findall(f".//{qname('protein')}"):
protein_id = protein.attrib.get("id", "")
if not protein_id:
xref = protein.find(qname("xref"))
if xref is not None:
protein_id = xref.attrib.get("id", "")
if not protein_id:
sequence = protein.find(qname("sequence"))
if sequence is not None and sequence.text:
protein_id = sequence.text.strip()
if not protein_id:
continue

if protein_id not in annotations:
annotations[protein_id] = {
Expand All @@ -187,33 +204,40 @@ def parse_iprscan_xml(input_file, output_file=None, gene_dict=None):
"signatures": [],
}

matches = protein.find(qname("matches"))
if matches is None:
continue

# Get matches
for match in protein.findall(".//ns:match", ns):
signature = match.find(".//ns:signature", ns)
for match in matches:
if not match.tag.split("}")[-1].endswith("-match"):
continue

signature = match.find(qname("signature"))
if signature is not None:
sig_acc = signature.attrib.get("ac", "")
sig_desc = signature.attrib.get("desc", "")
sig_name = signature.attrib.get("name", "")

# Add to signatures
if sig_acc and sig_acc not in [
s["id"] for s in annotations[protein_id]["signatures"]
]:
if sig_acc and unique_by_id(
annotations[protein_id]["signatures"], sig_acc
):
annotations[protein_id]["signatures"].append(
{"id": sig_acc, "name": sig_name, "description": sig_desc}
)

# Get InterPro domains
entry = match.find(".//ns:entry", ns)
entry = signature.find(qname("entry"))
if entry is not None:
entry_acc = entry.attrib.get("ac", "")
entry_desc = entry.attrib.get("desc", "")
entry_name = entry.attrib.get("name", "")

# Add to InterPro domains
if entry_acc and entry_acc not in [
d["id"] for d in annotations[protein_id]["interpro_domains"]
]:
if entry_acc and unique_by_id(
annotations[protein_id]["interpro_domains"], entry_acc
):
annotations[protein_id]["interpro_domains"].append(
{
"id": entry_acc,
Expand All @@ -223,29 +247,35 @@ def parse_iprscan_xml(input_file, output_file=None, gene_dict=None):
)

# Get GO terms
for go_term in match.findall(".//ns:go-term", ns):
go_terms = signature.findall(f".//{qname('go-xref')}")
if not go_terms:
go_terms = match.findall(f".//{qname('go-term')}")
for go_term in go_terms:
go_id = go_term.attrib.get("id", "")
go_name = go_term.attrib.get("name", "")
go_category = go_term.attrib.get("category", "")

# Add to GO terms
if go_id and go_id not in [
g["id"] for g in annotations[protein_id]["go_terms"]
]:
if go_id and unique_by_id(
annotations[protein_id]["go_terms"], go_id
):
annotations[protein_id]["go_terms"].append(
{"id": go_id, "name": go_name, "category": go_category}
)

# Get pathways
for pathway in match.findall(".//ns:pathway", ns):
pathways = signature.findall(f".//{qname('pathway-xref')}")
if not pathways:
pathways = match.findall(f".//{qname('pathway')}")
for pathway in pathways:
pathway_id = pathway.attrib.get("id", "")
pathway_name = pathway.attrib.get("name", "")
pathway_db = pathway.attrib.get("db", "")

# Add to pathways
if pathway_id and pathway_id not in [
p["id"] for p in annotations[protein_id]["pathways"]
]:
if pathway_id and unique_by_id(
annotations[protein_id]["pathways"], pathway_id
):
annotations[protein_id]["pathways"].append(
{
"id": pathway_id,
Expand Down
Loading