-
Notifications
You must be signed in to change notification settings - Fork 10
Expand file tree
/
Copy pathLengthDistribution.py
More file actions
executable file
·61 lines (47 loc) · 1.63 KB
/
LengthDistribution.py
File metadata and controls
executable file
·61 lines (47 loc) · 1.63 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#!/usr/bin/python
# encoding: utf-8
# author: Charles Joly Beauparlant
# 2012-07-23
"""
This script calculates the distribution of sequence's length from a fasta file
Usage:
cat joe.fasta | ./LengthDistribution.py <maxLength> > distribution.txt
maxLength: Maximum sequence length to compute
"""
class DistributionCalculator:
def __init__(self, maxLength):
self.maxLength = maxLength
self.clear()
def clear(self):
self.distribution = [0] * self.maxLength
def processSequence(self, sequence, count):
length = len(sequence)
if length <= self.maxLength:
self.distribution[length] += count
def printResults(self):
# Calculate total number of sequences
total = 0
for i in range(self.maxLength):
total += self.distribution[i]
# Print percentage of total for each lenght
for i in range(self.maxLength):
percent = self.distribution[i] / float(total) * 100
print str(i) + "\t" + str(percent)
import sys
if __name__=="__main__":
if len(sys.argv)!=2:
print __doc__
sys.exit(1)
maxLength=int(sys.argv[1])
distributionCalculator = DistributionCalculator(maxLength)
i =0
for line in sys.stdin:
if i==0:
count = int(line.split()[3])
if i==1:
sequence = line.strip()
distributionCalculator.processSequence(sequence, count)
i+=1
if i==2:
i=0
distributionCalculator.printResults()