-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtileCoding.py
More file actions
155 lines (117 loc) · 4.72 KB
/
tileCoding.py
File metadata and controls
155 lines (117 loc) · 4.72 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
# based on: https://github.com/lbarazza/Tile-Coding/blob/master/main.py
import sys
import gymnasium as gym
import argparse
import random
import time
import gzip
sys.path.append('Code/TileCoding/')
sys.path.append('Code/envs/')
from Code.TileCoding.config import *
from Code.TileCoding.agent import *
from Code.utils import save_log, save_model, load_model
def run_agent(env, agent, nEpisodes, env_name, seed=None, verbose=False, model_save=True, log_save=True, log_folder="results/"):
log_data = {
"episode": [],
"reward": [],
"epochs": [],
"epsilon": [],
"success": []
}
log_info = {
"seed": [],
"time": [],
"agent": [],
"env": [],
"episodes": [],
"epochs": [],
"success_rate": [],
}
if seed is None:
seed = random.randint(0, 1000)
random.seed(seed)
start_time = time.time()
for i in range(nEpisodes):
state = env.reset()
ret = 0
epochs = 0
while True:
action = agent.choose_action(state)
new_state, reward, done, success = env.step(action)
ret+=reward
epochs+=1
agent.train(state, action, reward, new_state, done)
state = new_state
if done:
if verbose:
print ("_______________________________")
print("Episode: " + str(i) + '\t' + "Reward: " + str(ret) + '\t' + "Epsilon: " + str(agent.epsilon) + '\t' +"epochs: " + str(epochs) + '\t' + "success: " + str(success))
break
log_data["episode"].append(i)
log_data["reward"].append(ret)
log_data["epochs"].append(epochs)
log_data["epsilon"].append(agent.epsilon)
log_data["success"].append(success)
end_time = time.time()
log_info["seed"].append(seed)
log_info["time"].append(end_time - start_time)
log_info["agent"].append("TileCoding")
log_info["env"].append(env_name)
log_info["episodes"].append(nEpisodes)
log_info["epochs"].append(sum(log_data["epochs"]))
log_info["success_rate"].append(sum(log_data["success"])/nEpisodes)
if model_save:
save_model(agent, log_info)
if log_save:
save_log(log_data, log_info, folder=log_folder)
return agent
def show_model(env, agent):
for i in range(1):
state = env.reset()
done = False
while not done:
env.render()
action = agent.choose_action(state)
new_state, reward, done, success = env.step(action)
state = new_state
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Set options for training and rendering TileCoding')
parser.add_argument('-t', '--train', choices=['t', 'f'], default='t', help='Train the model')
parser.add_argument('-r', '--render', choices=['t', 'f'], default='t', help='Render the model')
parser.add_argument('-s', '--seed', type=int, default=None, help='Seed for the model. If rendering, provide the seed of the model to render')
parser.add_argument('-v', '--verbose', choices=['t', 'f'], default='t', help='Verbose mode')
# choose the environment to train and render
parser.add_argument('-e', '--env', default='MountainCar', choices=['MountainCar', 'MountainCarContinuous','CartPole', 'LunarLander', 'Acrobot', 'Pendulum'], help='Choose the environment to train and render')
args = parser.parse_args()
if args.env == 'MountainCar':
config = MountainCar
elif args.env == 'MountainCarContinuous':
config = MountainCarContinuous
elif args.env == 'CartPole':
config = CartPole
elif args.env == 'LunarLander':
config = LunarLander
elif args.env == 'Acrobot':
config = Acrobot
elif args.env == 'Pendulum':
config = Pendulum
else:
print("Invalid environment")
sys.exit()
print("Environment: ", args.env)
env = config['env']
verbose = args.verbose == 't'
tiling_specs = config['tiling_specs']
if args.train == 't':
print("Training the model")
agent = Agent((env._action_space.n, env._env.observation_space.low, env._env.observation_space.high), tiling_specs, verbose=verbose)
trained_agent = run_agent(env, agent, config["episodes"], config['map_name'], args.seed, verbose=verbose)
if args.render == 't':
print("Rendering the model")
if args.train != 't' and args.seed is None:
print("Please provide a seed to render the model")
sys.exit()
if args.train != 't':
trained_agent = load_model("TileCoding", config['map_name'], args.seed)
env = config['renderEnv']
show_model(env, trained_agent)