From 31a2ad8c6e25efc72faf7a634f492a1f017eb05a Mon Sep 17 00:00:00 2001 From: Harshad Date: Mon, 21 Jul 2025 16:09:50 -0500 Subject: [PATCH 1/3] Add `ncbitaxon:has_division` property. --- src/ncbitaxon.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/src/ncbitaxon.py b/src/ncbitaxon.py index f734652..b919b8f 100755 --- a/src/ncbitaxon.py +++ b/src/ncbitaxon.py @@ -141,8 +141,9 @@ def convert_synonyms(tax_id, synonyms): return output -def convert_node(node, label, merged, synonyms, citations): +def convert_node(node, label, merged, synonyms, citations, divisions): """Given a node dictionary, a label string, and lists for merged, synonyms, and citations, + and a divisions dictionary mapping division IDs to names, return a Turtle string representing this tax_id.""" tax_id = node["tax_id"] output = [f"NCBITaxon:{tax_id} a owl:Class"] @@ -172,6 +173,11 @@ def convert_node(node, label, merged, synonyms, citations): gc_id = node["genetic_code_id"] if gc_id: output.append(f'; oboInOwl:hasDbXref "GC_ID:{gc_id}"^^xsd:string') + + div_id = node["division_id"] + if div_id and div_id in divisions: + division_name= escape_literal(divisions[div_id]) + output.append(f'; ncbitaxon:has_division "{division_name}"^^xsd:string') for merge in merged: output.append(f'; oboInOwl:hasAlternativeId "NCBITaxon:{merge}"^^xsd:string') @@ -203,6 +209,7 @@ def convert(taxdmp_path, output_path, taxa=None): synonyms = defaultdict(list) merged = defaultdict(list) citations = defaultdict(list) + divisions = defaultdict(str) with open(output_path, "w") as output: isodate = date.today().isoformat() ncbi_date = date.today().replace(day=1) @@ -266,7 +273,18 @@ def convert(taxdmp_path, output_path, taxa=None): rdfs:subPropertyOf oboInOwl:SynonymTypeProperty . """)) + output.write("""ncbitaxon:has_division a owl:AnnotationProperty +; rdfs:label "has division"^^xsd:string +; rdfs:comment "A metadata relation indicating taxonomic division (eg Bacteria, Eukaryota)"^^xsd:string +; oboInOwl:hasOBONamespace "ncbi_taxonomy"^^xsd:string +. +""") with zipfile.ZipFile(taxdmp_path) as taxdmp: + with taxdmp.open("division.dmp") as dmp: + for line in io.TextIOWrapper(dmp): + div_id, _div_code, name, _comments , _ = split_line(line) + divisions[div_id] = name + with taxdmp.open("names.dmp") as dmp: for line in io.TextIOWrapper(dmp): tax_id, name, unique, name_class, _ = split_line(line) @@ -332,6 +350,7 @@ def convert(taxdmp_path, output_path, taxa=None): merged[tax_id], synonyms[tax_id], citations[tax_id], + divisions, ) output.write(result) From e08a603060cd9f23a8ab3840a80e1f60db0f387c Mon Sep 17 00:00:00 2001 From: Harshad Date: Tue, 22 Jul 2025 10:31:11 -0500 Subject: [PATCH 2/3] Switch has_division from `xsd:String` to URI --- src/ncbitaxon.py | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/src/ncbitaxon.py b/src/ncbitaxon.py index b919b8f..8f75378 100755 --- a/src/ncbitaxon.py +++ b/src/ncbitaxon.py @@ -176,8 +176,10 @@ def convert_node(node, label, merged, synonyms, citations, divisions): div_id = node["division_id"] if div_id and div_id in divisions: - division_name= escape_literal(divisions[div_id]) - output.append(f'; ncbitaxon:has_division "{division_name}"^^xsd:string') + # division_name= escape_literal(divisions[div_id]) + # output.append(f'; ncbitaxon:has_division "{division_name}"^^xsd:string') + division_id = label_to_id(divisions[div_id]) + output.append(f'; ncbitaxon:has_division NCBITaxon:{division_id}') for merge in merged: output.append(f'; oboInOwl:hasAlternativeId "NCBITaxon:{merge}"^^xsd:string') @@ -365,6 +367,12 @@ def convert(taxdmp_path, output_path, taxa=None): ; rdfs:comment "This is an abstract class for use with the NCBI taxonomy to name the depth of the node within the tree. The link between the node term and the rank is only visible if you are using an obo 1.3 aware browser/editor; otherwise this can be ignored."^^xsd:string ; oboInOwl:hasOBONamespace "ncbi_taxonomy"^^xsd:string . + + a owl:Class +; rdfs:label "taxonomic division"^^xsd:string +; rdfs:comment "This is an abstract class for NCBI taxonomic divisions."^^xsd:string +; oboInOwl:hasOBONamespace "ncbi_taxonomy"^^xsd:string +. """ ) for label in ranks: @@ -382,7 +390,17 @@ def convert(taxdmp_path, output_path, taxa=None): . """ ) - + # Add division classes + for division_id, division_name in divisions.items(): + division_class_id = label_to_id(division_name) + output.write( + f"""NCBITaxon:{division_class_id} a owl:Class +; rdfs:label "{division_name}"^^xsd:string +; rdfs:subClassOf +; oboInOwl:hasOBONamespace "ncbi_taxonomy"^^xsd:string +. +""" + ) def main(): parser = argparse.ArgumentParser( From 85e3ba032d8e1e07dff6b122e18c1c85d6d721c7 Mon Sep 17 00:00:00 2001 From: Harshad Date: Wed, 23 Jul 2025 09:33:32 -0500 Subject: [PATCH 3/3] roll back last commit --- src/ncbitaxon.py | 45 ++++++++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/src/ncbitaxon.py b/src/ncbitaxon.py index 8f75378..6cbaa40 100755 --- a/src/ncbitaxon.py +++ b/src/ncbitaxon.py @@ -176,10 +176,10 @@ def convert_node(node, label, merged, synonyms, citations, divisions): div_id = node["division_id"] if div_id and div_id in divisions: - # division_name= escape_literal(divisions[div_id]) - # output.append(f'; ncbitaxon:has_division "{division_name}"^^xsd:string') - division_id = label_to_id(divisions[div_id]) - output.append(f'; ncbitaxon:has_division NCBITaxon:{division_id}') + division_name= escape_literal(divisions[div_id]) + output.append(f'; ncbitaxon:has_division "{division_name}"^^xsd:string') + # division_id = label_to_id(divisions[div_id]) + # output.append(f'; ncbitaxon:has_division NCBITaxon:{division_id}') for merge in merged: output.append(f'; oboInOwl:hasAlternativeId "NCBITaxon:{merge}"^^xsd:string') @@ -367,12 +367,6 @@ def convert(taxdmp_path, output_path, taxa=None): ; rdfs:comment "This is an abstract class for use with the NCBI taxonomy to name the depth of the node within the tree. The link between the node term and the rank is only visible if you are using an obo 1.3 aware browser/editor; otherwise this can be ignored."^^xsd:string ; oboInOwl:hasOBONamespace "ncbi_taxonomy"^^xsd:string . - - a owl:Class -; rdfs:label "taxonomic division"^^xsd:string -; rdfs:comment "This is an abstract class for NCBI taxonomic divisions."^^xsd:string -; oboInOwl:hasOBONamespace "ncbi_taxonomy"^^xsd:string -. """ ) for label in ranks: @@ -389,17 +383,17 @@ def convert(taxdmp_path, output_path, taxa=None): ; oboInOwl:hasOBONamespace "ncbi_taxonomy"^^xsd:string . """ - ) - # Add division classes - for division_id, division_name in divisions.items(): - division_class_id = label_to_id(division_name) - output.write( - f"""NCBITaxon:{division_class_id} a owl:Class -; rdfs:label "{division_name}"^^xsd:string -; rdfs:subClassOf -; oboInOwl:hasOBONamespace "ncbi_taxonomy"^^xsd:string -. -""" +# ) +# # Add division classes +# for division_id, division_name in divisions.items(): +# division_class_id = label_to_id(division_name) +# output.write( +# f"""NCBITaxon:{division_class_id} a owl:Class +# ; rdfs:label "{division_name}"^^xsd:string +# ; rdfs:subClassOf +# ; oboInOwl:hasOBONamespace "ncbi_taxonomy"^^xsd:string +# . +# """ ) def main(): @@ -424,3 +418,12 @@ def main(): if __name__ == "__main__": main() + + +# commented chunk lines 368 to 373 +# ; oboInOwl:hasOBONamespace "ncbi_taxonomy"^^xsd:string +# . + +# a owl:Class +# ; rdfs:label "taxonomic division"^^xsd:string +# ; rdfs:comment "This is an abstract class for NCBI taxonomic divisions."^^xsd:string