diff --git a/src/ncbitaxon.py b/src/ncbitaxon.py index e53be6f..aaa5628 100755 --- a/src/ncbitaxon.py +++ b/src/ncbitaxon.py @@ -154,8 +154,9 @@ def convert_synonyms(tax_id, synonyms): return output -def convert_node(node, label, merged, synonyms, citations): +def convert_node(node, label, merged, synonyms, citations, divisions): """Given a node dictionary, a label string, and lists for merged, synonyms, and citations, + and a divisions dictionary mapping division IDs to names, return a Turtle string representing this tax_id.""" tax_id = node["tax_id"] output = [f"NCBITaxon:{tax_id} a owl:Class"] @@ -193,6 +194,13 @@ def convert_node(node, label, merged, synonyms, citations): gc_id = node["genetic_code_id"] if gc_id: output.append(f'; oboInOwl:hasDbXref "GC_ID:{gc_id}"^^xsd:string') + + div_id = node["division_id"] + if div_id and div_id in divisions: + division_name= escape_literal(divisions[div_id]) + output.append(f'; ncbitaxon:has_division "{division_name}"^^xsd:string') + # division_id = label_to_id(divisions[div_id]) + # output.append(f'; ncbitaxon:has_division NCBITaxon:{division_id}') for merge in merged: output.append(f'; oboInOwl:hasAlternativeId "NCBITaxon:{merge}"^^xsd:string') @@ -224,6 +232,7 @@ def convert(taxdmp_path, output_path, taxa=None): synonyms = defaultdict(list) merged = defaultdict(list) citations = defaultdict(list) + divisions = defaultdict(str) with open(output_path, "w") as output: isodate = date.today().isoformat() ncbi_date = date.today().replace(day=1) @@ -299,7 +308,18 @@ def convert(taxdmp_path, output_path, taxa=None): rdfs:subPropertyOf oboInOwl:SynonymTypeProperty . """)) + output.write("""ncbitaxon:has_division a owl:AnnotationProperty +; rdfs:label "has division"^^xsd:string +; rdfs:comment "A metadata relation indicating taxonomic division (eg Bacteria, Eukaryota)"^^xsd:string +; oboInOwl:hasOBONamespace "ncbi_taxonomy"^^xsd:string +. +""") with zipfile.ZipFile(taxdmp_path) as taxdmp: + with taxdmp.open("division.dmp") as dmp: + for line in io.TextIOWrapper(dmp): + div_id, _div_code, name, _comments , _ = split_line(line) + divisions[div_id] = name + with taxdmp.open("names.dmp") as dmp: for line in io.TextIOWrapper(dmp): tax_id, name, unique, name_class, _ = split_line(line) @@ -365,6 +385,7 @@ def convert(taxdmp_path, output_path, taxa=None): merged[tax_id], synonyms[tax_id], citations[tax_id], + divisions, ) output.write(result) @@ -424,6 +445,17 @@ def convert(taxdmp_path, output_path, taxa=None): ; obo:IAO_0100001 {rank_curie} . """ +# ) +# # Add division classes +# for division_id, division_name in divisions.items(): +# division_class_id = label_to_id(division_name) +# output.write( +# f"""NCBITaxon:{division_class_id} a owl:Class +# ; rdfs:label "{division_name}"^^xsd:string +# ; rdfs:subClassOf +# ; oboInOwl:hasOBONamespace "ncbi_taxonomy"^^xsd:string +# . +# """ ) parent_taxrank_id = "9000000" if label in pseudo_ranks else "0000000" @@ -460,3 +492,12 @@ def main(): if __name__ == "__main__": main() + + +# commented chunk lines 368 to 373 +# ; oboInOwl:hasOBONamespace "ncbi_taxonomy"^^xsd:string +# . + +# a owl:Class +# ; rdfs:label "taxonomic division"^^xsd:string +# ; rdfs:comment "This is an abstract class for NCBI taxonomic divisions."^^xsd:string