-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcall_og_structure.py
More file actions
55 lines (46 loc) · 1.8 KB
/
call_og_structure.py
File metadata and controls
55 lines (46 loc) · 1.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#!/usr/bin/env python3
import csv
def classify_structure(n_sp):
if n_sp == 0:
c_sp = "none"
if n_sp == 1:
c_sp = "one"
if n_sp > 1:
c_sp = "many"
return(c_sp)
def classify_orthogroups(sp1, sp2, sp3, og_file):
orthogroups_dict = {}
with open(og_file, 'r') as orthogroups:
for line in orthogroups:
og_id = line.rstrip("\n").split(": ")[0]
seqs = line.rstrip("\n").split(": ")[1].split(" ")
#Count the number of genes for each species
n_sp1 = 0
n_sp2 = 0
n_sp3 = 0
for seq in seqs:
if seq.startswith(sp1):
n_sp1 = n_sp1 + 1
if seq.startswith(sp2):
n_sp2 = n_sp2 + 1
if seq.startswith(sp3):
n_sp3 = n_sp3 + 1
#Classify structure
c_sp1 = classify_structure(n_sp1)
c_sp2 = classify_structure(n_sp2)
c_sp3 = classify_structure(n_sp3)
og_struct = c_sp1 + ":" + c_sp2 + ":" + c_sp3
orthogroups_dict[og_id] = {"id":og_id, sp1:n_sp1, sp2:n_sp2, sp3:n_sp3, "structure":og_struct}
return(orthogroups_dict)
orthogroups = "/projects/b1059/projects/Ryan/ortholog_sims/NemaScan/input_data/all_species/orthogroups/02.21.22_orthogroups/Orthogroups.txt"
output = "/projects/b1059/projects/Ryan/ortholog_sims/20230614_pre_sim_ogs/data/orthogroups/orthogroups_structure.csv"
sp1 = "Transcript" #elegans
sp2 = "QX1410"
sp3 = "transcript_CTROP"
og_dicts = classify_orthogroups(sp1, sp2, sp3, orthogroups)
col_name=["id", sp1, sp2, sp3, "structure"]
with open(output, 'w') as csvFile:
wr = csv.DictWriter(csvFile, fieldnames=col_name)
wr.writeheader()
for ele in og_dicts.values():
wr.writerow(ele)