IG-RL/settings.ini at master · FXDevailly/IG-RL · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
# This file voluntarily contains only the most important/interesting settings/hyperparameters which are defined in the .ini config files which can be found unfer the config folder.
# A description of most of these settings is provided
# You can look for the corresponding setting in a complete experiment .ini file and tune it accordingly.


[BASE]

mode = 'test' # train, test (performing training or evaluation)

# PATH USED TO LOAD PARAMETERS IF PERFORMING EVALUATION
load_model_path : 'config/binary/GCN/Q_L/Train/DIVERSIFIED_NETS/results/BINARY_GEN_GCN_IQL/models_params'

# TYPE OF POLICY (binary is the one used in the paper)
policy : 'binary' # 'free'

save_params_frequency : 100
save_extended_training_stats:True

# DEFINES IF TRIPS ARE GENERATED BEFORE THE EXPERIMENT (GENERATING THEM BEFOREHAND ENABLES USING THE SAME TRIPS BETWEEN DIFFERENT EXPERIMENTS/METHODS)
gen_trips_before_exp : True

# Estimated arrival rate (used to influence traffic density)
period = 1

# False is faster but can cause memory overflow for large road networks and/or many self-play processes
sequential_computation : True

# Number of steps per episode
nb_steps_per_exp: 3600
# Total number of steps
exp_sim_duration = 3600
# Maximum real-time/wallclock time duration
exp_real_duration = 1000000

# Used for debugging
print_time_gating:False

# For evaluation, 'greedy' is used for a trained method
tests = ['greedy']   #  ['strong_baseline', 'classic']

# Number of environments running in parallel
n_tests = 25


# When generating random networks, this value defines the proportion of road networks which will be identical to the road network used in evaluation.
# Value between 0 and 1
# if 0 , the method is Generalist and only trained on random road networks which do not include the target/evaluation road network)
# if 1 , the method is Specialist and is only trained on the road network used in training
specialist : 1

# Number of times to run the entire experiment
n_exp = 1

# Uses a real road network
real_net = True
# Adress of the real road network to be used
real_net_address : 'Manhattan.net.xml'
net_name :'Manhattan'

# Name of the experiment used to create a proper arborescence
exp_name : 'BINARY_GEN_GCN_IQL_NO_DUELING'


[INTERFACE]

### Includes rendering (only works if code is run locally)
render : False
### Save rendering (only works if code is run locally)
save_render : False


[SIM]
# Speed at which vehicles enter the network
V_ENTER = 35

### WARMING
wait_n_steps : 0


yellow_duration : 5
min_time_between_actions : 5

### USED FOR DYNAMIC TRAFFIC GENERATION (NOT THE METHOD USED IN THE PAPER)
N_VEH_SAMPLES : 50
PROB_VEH : 0.003

### USED FOR GENERATING TRIPS BEFORE RUNNING SIMULATIONS (METHOD USED IN THE PAPER)
target_velocity: 50
fringe_factor: 10
demand_duration : 120
demand_variance : 0
lane_demand_variance : 2
min_distance:2
Max_Speed: 50


[MODEL]
# USE GCN OR DNN
GCN : True

[STATE]
#Lanes

# INCLUDE AGGREGATED DATA FOR LANE NODE EMBEDDINGS (FOR IG-RL-L)
lane_node_state : True

# FEATURES TO INCLUDE IN THE LANE NODE REPRESENTATION AND THEIR CORRESPONDING DIMENSIONALITIES
lane_vars : collections.OrderedDict({'length':1, 'nb_veh' :1, 'avg_speed' : 1})


# FEATURES TO INCLUDE IN THE CONNECTION NODE REPRESENTATION AND THEIR CORRESPONDING DIMENSIONALITIES
connection_vars : collections.OrderedDict({'open' :1, 'nb_switch_to_open' : 1, 'current_priority' : 1, 'priority_next_open':1})
num_observed_next_phases : 12


# INCLUDE VEHICLE AS NODES IN THE NETWORK (FOR IG-RL-V)
veh_as_nodes : False
# FEATURES TO INCLUDE IN THE VEHICLE NODE REPRESENTATION AND THEIR CORRESPONDING DIMENSIONALITIES
veh_vars : collections.OrderedDict({traci.constants.VAR_SPEED : 1, traci.constants.VAR_LANEPOSITION : 1})


[REWARD]

# METRIC TO USE AS REWARD ('queue_length is used in the paper')
reward_type = 'queue_length'

min_dist_delay = 25 # (length of entering area) : In training only, we add the delay of a vehicle to a lane only if it has at least traveled this value (in meters) along the corresponding lane
max_dist_queue = 50 # (length of lane sensors) : In both training and test, vehicles are counted in the queue if the distance which separates them to the intersection (in meters) is equal or inferior to this value


# IF distance_gamma is 0, only rewards on neighbouring lanes are included in the MDP of a given intersection, if >0, other rewards will be discounted and their values will be included. (0 in the paper)
distance_gamma : 0

# RL discount factor
time_gamma : 0.95


# DIMENSIONALITY OF THE FINAL MLP LAYER
nn_layers_size :32

learning_rate : 1e-3
batch_size : 16
# Enables gradient accumulation between training steps (frequency at which parameters are updated)
# Set to 1 to update parameters after every gradient computation.
accumulation_steps : 1
[RL]

# FREQUENCY AT WHICH THE MODEL IN THE COMPUT PROCESS RECEIVES UPDATED PARAMETERS FROM THE TRAIN PROCESS
update_comput_model_frequency : 100

# PERFORM ACTION-CORRECTION (True for IG-RL in general. Disable for IG-RL-no_correction)
correct_actions : True

# FREQUENCY AT WHICH THE TARGET MODEL IS UPDATED IN THE DOUBLE Q LEARNING SETTTING
target_model_update_frequency: 100
double_DQN : True
dueling_DQN : False