-
Notifications
You must be signed in to change notification settings - Fork 16
Expand file tree
/
Copy pathpredict.sh
More file actions
51 lines (45 loc) · 1.69 KB
/
predict.sh
File metadata and controls
51 lines (45 loc) · 1.69 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
#############ARGS#############
BASE=. #Change this depending on your local path
ID=1FU0
DATADIR=$BASE/data/predict_test_case/$ID
FASTAFILE=$DATADIR/$ID'.fasta'
NUM_REC=3 #Number of recycles to use for the prediction
PARAMS=$BASE/data/params/params50000.npy
OUTDIR=$BASE/data/predict_test_case/$ID'/'
# The sequence has to be defined with the noncanonical amino acids (NCAAs) in
# threeletter code separated by hyphens, and the regular in oneletter e.g.
# >1FU0_A
# MEKKEFHIVAETGIHARPATLLVQTASKFNSDINLEYKGKSVNLK-SEP-IMGVMSLGVGQGSDVTITVDGADEAEGMAAIVETLQKEGLA
#
# We also need a fasta where the NCAA is "X" for the MSA search
# >1FU0_A
# MEKKEFHIVAETGIHARPATLLVQTASKFNSDINLEYKGKSVNLKXIMGVMSLGVGQGSDVTITVDGADEAEGMAAIVETLQKEGLA
FASTAWITHX=$DATADIR/$ID'_X.fasta'
#########Step1: Create MSA with HHblits#########
HHBLITSDB=$BASE/data/uniclust30_2018_08/uniclust30_2018_08
#MSA
MSA=$DATADIR/$ID'.a3m'
#Write individual fasta files for all unique sequences
if test -f $MSA; then
echo $MSA exists
else
$BASE/hh-suite/build/bin/hhblits -i $FASTAWITHX -d $HHBLITSDB -E 0.001 -all -n 2 -oa3m $MSA
fi
#########Step2: Make MSA features#########
#Here we also use "FASTAWITHX", the NCAA frames and features are mapped
#later in the predict script when the MSA is sampled
MSA_FEATS=$OUTDIR/msa_features.pkl
if test -f $MSA_FEATS; then
echo $MSA_FEATS exists
else
python3 $BASE/src/make_msa_seq_feats.py --input_fasta_path $FASTAWITHX \
--input_msas $MSA --outdir $OUTDIR
fi
#########Step3: Predict#########
#The NCAAs will be saved to chain B in the prediction for easier visualisation
python3 ./src/predict_sc.py --predict_id $ID \
--MSA_feats $MSA_FEATS \
--fasta $FASTAFILE \
--num_recycles $NUM_REC \
--params $PARAMS \
--outdir $OUTDIR