forked from hades-k/bio_seq_analyzer
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsequence.py
More file actions
66 lines (54 loc) · 1.7 KB
/
sequence.py
File metadata and controls
66 lines (54 loc) · 1.7 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
from abc import ABC, abstractmethod
class Sequence(ABC):
def __init__(self, df):
'''
:param df: pandas dataframe from Parser output
'''
self.__info = df
@property
@abstractmethod
def sequence(self):
pass
@property
@abstractmethod
def length(self):
pass
class MitochondrialDNA(Sequence):
def __init__(self, df):
super().__init__(df)
self.__sequence = str(df['seq'])
self.__length = int(df['length'])
self.__id = str(df['id'])
self.__name = str(df['name'])
self.__description = str(df['description'])
@property
def sequence(self):
return self.__sequence
@property
def length(self):
return self.__length
@property
def gc_content(self):
return ((self.sequence.count('G') + self.sequence.count('C')) / self.length) * 100
def get_subsequence(self, start: int, end: int):
if start < 0 or end > len(self.sequence):
raise ValueError(f"Subsequence indices out of range: start={start}, end={end}, length={self.length}")
else:
return self.sequence[start:end]
def find_irregular_bases(self):
valid_bases = {'A', 'T', 'G', 'C'}
irregular = []
for base in self.sequence.upper():
if base not in valid_bases:
irregular.append(base)
return irregular
@property
def name (self):
return self.__name
if __name__ == "__main__":
from tools import Parser
parser = Parser()
records = parser.run("synthetic_mtDNA_dataset.fasta")
NC10 = MitochondrialDNA(records.loc[10])
print(NC10.gc_content)
print(NC10.find_irregular_bases())