-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathEnvironment.java
More file actions
107 lines (92 loc) · 1.92 KB
/
Environment.java
File metadata and controls
107 lines (92 loc) · 1.92 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import java.util.ArrayList;
import java.util.Iterator;
public class Environment {
ArrayList<State> states;
double gamma;
public Environment(double gamma)
{
states = new ArrayList<State>();
this.gamma = gamma;
}
public void addState(State a)
{
states.add(a);
}
public void removeState(String stateName)
{
}
public void Train(State ini, ArrayList<Action> actions)
{
State currentState = ini;
boolean found = false;
Iterator<Action> itr = actions.iterator();
while (itr.hasNext())
{
Action a = itr.next();
Iterator<Transition> itr2 = currentState.transitions.iterator();
while (itr2.hasNext()&&!found)
{
Transition t = itr2.next();
if (t.a.name == a.name)
{
t.q = t.reinforcement + gamma*max(t.destination);
currentState = t.destination;
found = true;
}
}
found = false;
}
}
public double max(State s)
{
Iterator<Transition> itr = s.transitions.iterator();
double max = 0.0;
while (itr.hasNext())
{
Transition t = itr.next();
if (t.q > max) max = t.q ;
}
return max;
}
public void Print()
{
Iterator<State> itr = states.iterator();
while (itr.hasNext())
{
State s = itr.next();
Iterator<Transition> itr2 = s.transitions.iterator();
while (itr2.hasNext())
{
Transition t = itr2.next();
System.out.println(s.name + " " + t.a.name + " " + t.q);
}
}
}
public void Play(State ini)
{
State current = ini;
Transition t;
System.out.print(current.name + " -> ");
while(true)
{
t=pickBest(current.transitions);
System.out.print(t.destination.name + " -> ");
current = t.destination;
}
}
public Transition pickBest(ArrayList<Transition> transitions)
{
Transition best = transitions.get(0);
Transition t;
Iterator<Transition> it = transitions.iterator();
while (it.hasNext())
{
t = (Transition) it.next();
if (t.q > best.q)
{
best = t;
}
}
return best;
}
}