-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmycoplasma.py
More file actions
30 lines (24 loc) · 939 Bytes
/
mycoplasma.py
File metadata and controls
30 lines (24 loc) · 939 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
data = open("Mycoplasma_hominis.fasta", "r").read()
data2 = open("Mycoplasma_hominis.csv", "r").readlines()
result = open("genes.fasta", "w")
def complement(dna):
d = {'A': 'T', 'C': 'G', 'G': 'C', 'T': 'A'}
return "".join([d[nucleotide] for nucleotide in dna][::-1])
contig_dict = {}
lines = data.splitlines()
for line in lines:
if line.startswith(">"):
contig_dict[line] = ""
else:
contig_dict[list(contig_dict.keys())[len(contig_dict) - 1]] += line
for line in data2:
line = line.split(",")
print(line)
if line[1] == "gene":
start_pos = int(line[2]) - 1
end_pos = int(line[3])
result.write(f">{line[0]}|gene|{line[5]}|{line[2]}:{line[3]}|{line[4]}\n")
if line[4] == "-":
result.write(f"{complement(contig_dict[">" + line[0]][start_pos:end_pos])}\n")
else:
result.write(f"{contig_dict[">" + line[0]][start_pos:end_pos]}\n")