-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathscript_train.sh
More file actions
executable file
·88 lines (77 loc) · 2.34 KB
/
script_train.sh
File metadata and controls
executable file
·88 lines (77 loc) · 2.34 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#!/bin/bash
#export TPU_IP_ADDRESS=10.8.195.90;
#export XRT_TPU_CONFIG="tpu_worker;0;$TPU_IP_ADDRESS:8470"
#export TPU_NAME="grpc://$TPU_IP_ADDRESS:8470"
export XRT_WORKERS="localservice:0;grpc://localhost:40934"
export XRT_DEVICE_MAP="CPU:0;/job:localservice/replica:0/task:0/device:XLA_CPU:0|GPU:0;/job:localservice/replica:0/task:0/device:XLA_GPU:0"
#export PYTHONPATH=$SCRATCH/:${PYTHONPATH}
#export PYTHONPATH=/gpfsscratch/rech/imi/usc19dv/lib/python3.7/site-packages:${PYTHONPATH}
#module load pytorch-gpu/py3/1.8.0
py=python3
root_dir=$SCRATCH/data
#root_dir=data
exp=$1
####aistpp_60hz
#data_dir=${root_dir}/scaled_features
#hparams_file=aistpp_60hz/${exp}
####aistpp_20hz
#data_dir=${root_dir}/aistpp_20hz
#hparams_file=aistpp_20hz/${exp}
####moglow_pos
#data_dir=${root_dir}/moglow_pos
#hparams_file=moglow_pos/${exp}
####dance_combined
#data_dir=${root_dir}/dance_combined
#data_dir=${root_dir}/dance_combined2
data_dir=${root_dir}/dance_combined3
hparams_file=dance_combined/${exp}
#hparams_file=neos/${exp}
echo $exp
#echo $RANK
#echo $LOCAL_RANK
echo $SLURM_PROCID
### UNCOMMENT THIS IF IN SLURM
#export LOCAL_RANK=$SLURM_LOCALID
$py training/train.py --data_dir=${data_dir} \
--max_epochs=10\
--hparams_file=training/hparams/${hparams_file}.yaml \
--experiment_name=$exp\
--workers=$(nproc) \
--gpus=-1 \
--accelerator=cuda \
${@:2} #NOTE: can override experiment_name, and any of the options above
#--batch_size=32 \
#--accelerator=ddp \
#--plugins=deepspeed \
#--precision=16 \
#--gradient_clip_val=0.5 \
#--sync_batchnorm \
#--lr_policy=LinearWarmupCosineAnnealing \
#--auto_lr_find \
#--do_tuning \
#--learning_rate=7e-5 \
#--batch_size=84 \
#--num_nodes=4 \
#--output_lengths=3 \
#--dropout=0.1 \
#--vae_dhid=128 \
#--optimizer=madgrad \
#--learning_rate=1e-3 \
#--use_x_transformers \
#--use_rotary_pos_emb \
#--batch_size=84 \
#--lr_policy=reduceOnPlateau \
#--learning_rate=1e-4 \
#--use_pos_emb_output \
#--flow_dist=studentT \
#--gradient_clip_val=1 \
#--flow_dist=studentT \
#--fix_lengths \
#--use_x_transformers \
#--use_rotary_pos_emb \
#--output_lengths="3" \
#--scales="[[16,0]]" \
#--residual_scales="[[16,0]]"
# --glow_norm_layer="actnorm" \
#--use_pos_emb_output \
# --tpu_cores=8 \