forked from axelcournac/3C_tutorial
-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathMatrice_Creator.py
More file actions
88 lines (80 loc) · 2.45 KB
/
Matrice_Creator.py
File metadata and controls
88 lines (80 loc) · 2.45 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# -*- coding: utf-8 -*-
"""
To convert an alignment output file into a matrice object.
author: Axel KournaK
"""
import numpy as np
import matplotlib.pyplot as plt
import os
import sys
BIN = int(sys.argv[2])
# Size (in bp) of the bin to build the contacts map
mat = {}
# dictionary object to keep in memory all the contacts
maxi = {}
# dictionary object to keep in memory the lenghts of the chromosomes (in bins)
i = 0
with open(sys.argv[1]) as f: # open the file for reading output alignment file
for line in f: # iterate over each line
i = i + 1
if i % 1000000 == 0:
print(str(i) + " lines parsed.")
chr1, locus1, sens1, indice1, chr2, locus2, sens2, indice2 = line.split()
# split it by whitespace
locus1 = int(locus1)
sens1 = sens1
locus2 = int(locus2)
sens2 = sens2
bin1 = int(locus1 / BIN)
bin2 = int(locus2 / BIN)
key1 = (chr1, bin1, chr2, bin2)
key2 = (chr2, bin2, chr1, bin1)
if key1 in mat:
mat[key1] += 1
else:
mat[key1] = 1
if key2 in mat:
mat[key2] += 1
else:
mat[key2] = 1
if chr1 not in maxi:
maxi[chr1] = 0
if chr2 not in maxi:
maxi[chr2] = 0
if bin1 > maxi[chr1]:
maxi[chr1] = bin1
if bin2 > maxi[chr2]:
maxi[chr2] = bin2
# Check for the maximum of bins:
list_chr = sorted(list(maxi.keys()))
# by default all the chromosomes of the organism
# list_chr = ["chr3"]; # or list of subset of chromosomes you want to display
N_BINS = 0
for chr in list_chr:
N_BINS = N_BINS + maxi[chr] + 1
print(chr, maxi[chr])
#
print(N_BINS)
# Conversion of the Matrix in an numpy array object:
bin_mat1 = 0
bin_mat2 = 0
MATRICE = np.zeros((N_BINS, N_BINS))
for chr1 in list_chr:
for bin1 in range(0, maxi[chr1] + 1):
bin_mat2 = 0
for chr2 in list_chr:
for bin2 in range(0, maxi[chr2] + 1):
key1 = (chr1, bin1, chr2, bin2)
if key1 in mat:
mat[key1] = mat[key1]
MATRICE[bin_mat1, bin_mat2] = mat[key1]
else:
mat[key1] = 0
MATRICE[bin_mat1, bin_mat2] = 0
bin_mat2 += 1
bin_mat1 += 1
# Plot of the matrice and savings:
plt.imshow(MATRICE ** 0.2, interpolation="none")
plt.colorbar()
plt.savefig("MAT_RAW.png")
np.savetxt("MAT_RAW.txt", MATRICE)