-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathGunvant.py
More file actions
48 lines (40 loc) · 1.28 KB
/
Gunvant.py
File metadata and controls
48 lines (40 loc) · 1.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
"""This takes a GATK.vcf file as input and return the same file, but converst the Chr to just
numbers with no 0's. This was made for Gunvant for use in a specifc software
by "Meesh" (mich0391@umn.edu)"""
#example use
#python3 Gunvant.py your.vcf newname.vcf
import sys
from itertools import takewhile, islice
seqfile = open(sys.argv[1], "r")
output = open(sys.argv[2], "w")
identifierlist = list()
sequenceinfolist = list()
out = []
#store all relavent information
for line in seqfile:
line = line.rstrip()
#If it is a header line then just write it
if line.startswith("#"):
output.write(line+'\n')
#if not lets cut out out the parts
else:
columns = line.split("\t")
CHR = columns[0].split("Chr")
#see if it is a scaffold
if "scaf" in str(CHR):
FCHR =str(CHR[0])
#see if it starts with a 0
elif CHR[1][0]=="0":
temp = CHR[1].split("0")
FCHR =temp[1]
#if it is 10 or above just grab it
else:
FCHR = CHR[1]
if out != FCHR:
print("Working on Chromosome "+FCHR)
out = FCHR
#combine it for printing
final= str(FCHR)+'\t'+str('\t'.join(columns[1:])+'\n')
output.write(final)
output.close()
seqfile.close()