-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathrun_VarSAn.sh
More file actions
155 lines (145 loc) · 8.98 KB
/
run_VarSAn.sh
File metadata and controls
155 lines (145 loc) · 8.98 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
#!/bin/sh
snplist=${1}
sg=${2:-all}
pw=${3:-Reactome} # User selected pathway
gg=${4:-yes}
weight=${5:-1}
is_random=${6:-0} # Is the query set a random query set?
en=${7} # What enrichment file to use
mkdir -p ./scratch/
case $sg in
all)
echo "SNP to gene connections: all tissue eQTLs"
if [ "${is_random}" = "0" ]; then
echo "Calculating tissue enrichment..."
./TissueEnrichment.sh ${snplist}
mv ${snplist}.enrich* ./scratch/
fi
sleep 3
for s in $(cat ./data/GTEx_eQTL/TissueList.txt ); do w=$(awk -v var=$s '($1==var){print $2}' ./scratch/${en}.enrich.pvalue.weight); perl snp_to_edge.pl ./gene_sets/${snplist}.txt ./data/GTEx_eQTL/$s.GTEx_eQTL.txt|awk -v var=$w '{print $1":"$2"\t"var}'; done >./scratch/${1}.edge.tmp
perl format_edge.pl ./scratch/${1}.edge.tmp sum|tr ":" "\t"|awk '{print $2"\t"$1"\t"$3"\tSNP-Gene\n"$1"\t"$2"\t0.000000000000001\tSNP-Gene"}' >./scratch/${1}.edge
if [ "${weight}" = "1" ]; then
echo "w eq 1"
perl SignWeight.pl ./scratch/${1}.edge >./scratch/${1}.query.tmp
else
echo "w not eq 1"
awk '($3 > 0.1){print $2"\t1"}' ./scratch/${1}.edge|sort -u >./scratch/${1}.query.tmp
fi
perl snp_to_edge.pl ./gene_sets/${1}.txt ./data/Polyphen/Polyphen_pair.txt >./scratch/${1}-coding.tmp
awk '{print $2"\t"$1"\t1\tSNP-Gene\n"$1"\t"$2"\t0.000000000000001\tSNP-Gene"}' ./scratch/${1}-coding.tmp >>./scratch/${1}.edge
cut -f 1 ./scratch/${1}-coding.tmp|sort -u|awk '{print $1"\t1"}' >>./scratch/${1}.query.tmp
perl format_query.pl ./scratch/${1}.query.tmp >./scratch/${1}.query.txt
awk '($3 > 0.1){print $1}' ./scratch/${1}.edge >./scratch/${1}-tmp1.txt
if [ "${gg}" = 'yes' ]; then
echo "PPI yes"
awk '{print $1"\t"$2"\t1\t"$4"\n"$2"\t"$1"\t1\t"$4}' ./data/PPI/hn_IntNet.edge |sort -u >>./scratch/${1}.edge
fi
if [ "${pw}" = 'Reactome' ]; then
echo "pw yes"
awk '{print $1"\t"$2"\t1\t"$4"\n"$2"\t"$1"\t0.000000000000001\t"$4}' ./data/Pathway/ReactomeMoreThan30Slim-1.edge >>./scratch/${1}.edge
if [ "${gg}" = 'yes' ]; then
cat ./scratch/${1}.query.txt ./scratch/${1}-tmp1.txt ./data/PPI/hn_IntNet.gene ./data/Pathway/ReactomeMoreThan30Slim-1.gene | awk '{print $1"\t1"}'|sort -u >./scratch/${1}.node
else
cat ./scratch/${1}.query.txt ./scratch/${1}-tmp1.txt ./data/Pathway/ReactomeMoreThan30Slim-1.gene | awk '{print $1"\t1"}'|sort -u >./scratch/${1}.node
fi
else
echo "pw KEGG"
awk '{print $1"\t"$2"\t1\t"$4"\n"$2"\t"$1"\t0.000000000000001\t"$4}' ./data/Pathway/KEGG.edge >>./scratch/${1}.edge
if [ "${gg}" = 'yes' ]; then
cat ./scratch/${1}.query.txt ./scratch/${1}-tmp1.txt ./data/PPI/hn_IntNet.gene ./data/Pathway/KEGG.gene | awk '{print $1"\t1"}'|sort -u >./scratch/${1}.node
else
cat ./scratch/${1}.query.txt ./scratch/${1}-tmp1.txt ./data/Pathway/KEGG.gene | awk '{print $1"\t1"}'|sort -u >./scratch/${1}.node
fi
fi
echo ${1}.query.txt >./scratch/${1}.list
#cd ./scratch/
Rscript run_DRaWR_query_specific_network.r ${1} ./results/
sleep 1
rm ./results/*.stats
;;
Proximity)
echo "SNP to gene connections: Proximity, 10kb"
echo "Currently not available!"
perl snp_to_edge.pl ./gene_sets/${1}.txt ../gene_sets/GWAS_29059683.proximity.edge|awk '{print $2"\t"$1"\t1\tSNP-Gene\n"$1"\t"$2"\t0.000000000000001\tSNP-Gene"}' >./scratch/${1}.edge
if [ "${weight}" = "1" ]; then
echo "w eq 1"
perl SignWeight.pl ./scratch/${1}.edge >./scratch/${1}.query.tmp
else
echo "w not eq 1"
awk '($3 > 0.1){print $2"\t1"}' ./scratch/${1}.edge|sort -u >./scratch/${1}.query.tmp
fi
perl snp_to_edge.pl ./gene_sets/${1}.txt ./data/Polyphen/Polyphen_pair.txt >./scratch/${1}-coding.tmp
awk '{print $2"\t"$1"\t1\tSNP-Gene\n"$1"\t"$2"\t0.000000000000001\tSNP-Gene"}' ./scratch/${1}-coding.tmp >>./scratch/${1}.edge
cut -f 1 ./scratch/${1}-coding.tmp|sort -u|awk '{print $1"\t1"}' >>./scratch/${1}.query.tmp
perl format_query.pl ./scratch/${1}.query.tmp >./scratch/${1}.query.txt
awk '($3 > 0.1){print $1}' ./scratch/${1}.edge >./scratch/${1}-tmp1.txt
if [ "${gg}" = 'yes' ]; then
echo "PPI yes"
awk '{print $1"\t"$2"\t1\t"$4"\n"$2"\t"$1"\t1\t"$4}' ./data/PPI/hn_IntNet.edge |sort -u >>./scratch/${1}.edge
fi
if [ "${pw}" = 'Reactome' ]; then
echo "pw yes"
awk '{print $1"\t"$2"\t1\t"$4"\n"$2"\t"$1"\t0.000000000000001\t"$4}' ./data/Pathway/ReactomeMoreThan30Slim-1.edge >>./scratch/${1}.edge
if [ "${gg}" = 'yes' ]; then
cat ./scratch/${1}.query.txt ./scratch/${1}-tmp1.txt ./data/PPI/hn_IntNet.gene ./data/Pathway/ReactomeMoreThan30Slim-1.gene | awk '{print $1"\t1"}'|sort -u >./scratch/${1}.node
else
cat ./scratch/${1}.query.txt ./scratch/${1}-tmp1.txt ./data/Pathway/ReactomeMoreThan30Slim-1.gene | awk '{print $1"\t1"}'|sort -u >./scratch/${1}.node
fi
else
echo "pw KEGG"
awk '{print $1"\t"$2"\t1\t"$4"\n"$2"\t"$1"\t0.000000000000001\t"$4}' ./data/Pathway/KEGG.edge >>./scratch/${1}.edge
if [ "${gg}" = 'yes' ]; then
cat ./scratch/${1}.query.txt ./scratch/${1}-tmp1.txt ./data/PPI/hn_IntNet.gene ./data/Pathway/KEGG.gene | awk '{print $1"\t1"}'|sort -u >./scratch/${1}.node
else
cat ./scratch/${1}.query.txt ./scratch/${1}-tmp1.txt ./data/Pathway/KEGG.gene | awk '{print $1"\t1"}'|sort -u >./scratch/${1}.node
fi
fi
echo ${1}.query.txt >./scratch/${1}.list
#cd ./scratch/
Rscript run_DRaWR_query_specific_network.r ${1} ./results/
sleep 1
rm ./results/*.stats
;;
*)
echo "SNP to gene connections: ${sg} tissue eQTLs"
perl snp_to_edge.pl ./gene_sets/${1}.txt ./data/GTEx_eQTL/${sg}.GTEx_eQTL.txt|awk '{print $2"\t"$1"\t1\tSNP-Gene\n"$1"\t"$2"\t0.000000000000001\tSNP-Gene"}' >./scratch/${1}.edge
if [ "${weight}" = "1" ]; then
echo "w eq 1"
perl SignWeight.pl ./scratch/${1}.edge >./scratch/${1}.query.tmp
else
echo "w not eq 1"
awk '($3 > 0.1){print $2"\t1"}' ./scratch/${1}.edge|sort -u >./scratch/${1}.query.tmp
fi
perl snp_to_edge.pl ./gene_sets/${1}.txt ./data/Polyphen/Polyphen_pair.txt >./scratch/${1}-coding.tmp
awk '{print $2"\t"$1"\t1\tSNP-Gene\n"$1"\t"$2"\t0.000000000000001\tSNP-Gene"}' ./scratch/${1}-coding.tmp >>./scratch/${1}.edge
cut -f 1 ./scratch/${1}-coding.tmp|sort -u|awk '{print $1"\t1"}' >>./scratch/${1}.query.tmp
perl format_query.pl ./scratch/${1}.query.tmp >./scratch/${1}.query.txt
awk '($3 > 0.1){print $1}' ./scratch/${1}.edge >./scratch/${1}-tmp1.txt
if [ "${gg}" = 'yes' ]; then
echo "PPI yes"
awk '{print $1"\t"$2"\t1\t"$4"\n"$2"\t"$1"\t1\t"$4}' ./data/PPI/hn_IntNet.edge |sort -u >>./scratch/${1}.edge
fi
if [ "${pw}" = 'Reactome' ]; then
echo "pw yes"
awk '{print $1"\t"$2"\t1\t"$4"\n"$2"\t"$1"\t0.000000000000001\t"$4}' ./data/Pathway/ReactomeMoreThan30Slim-1.edge >>./scratch/${1}.edge
if [ "${gg}" = 'yes' ]; then
cat ./scratch/${1}.query.txt ./scratch/${1}-tmp1.txt ./data/PPI/hn_IntNet.gene ./data/Pathway/ReactomeMoreThan30Slim-1.gene | awk '{print $1"\t1"}'|sort -u >./scratch/${1}.node
else
cat ./scratch/${1}.query.txt ./scratch/${1}-tmp1.txt ./data/Pathway/ReactomeMoreThan30Slim-1.gene | awk '{print $1"\t1"}'|sort -u >./scratch/${1}.node
fi
else
echo "pw KEGG"
awk '{print $1"\t"$2"\t1\t"$4"\n"$2"\t"$1"\t0.000000000000001\t"$4}' ./data/Pathway/KEGG.edge >>./scratch/${1}.edge
if [ "${gg}" = 'yes' ]; then
cat ./scratch/${1}.query.txt ./scratch/${1}-tmp1.txt ./data/PPI/hn_IntNet.gene ./data/Pathway/KEGG.gene | awk '{print $1"\t1"}'|sort -u >./scratch/${1}.node
else
cat ./scratch/${1}.query.txt ./scratch/${1}-tmp1.txt ./data/Pathway/KEGG.gene | awk '{print $1"\t1"}'|sort -u >./scratch/${1}.node
fi
fi
echo ${1}.query.txt >./scratch/${1}.list
#cd ./scratch/
Rscript run_DRaWR_query_specific_network.r ${1} ./results/
sleep 1
rm ./results/*.stats
;;
esac