Torcs---Reinforcement-Learning-using-Q-Learning/RewardFunction.py at master · A-Raafat/Torcs---Reinforcement-Learning-using-Q-Learning · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# -*- coding: utf-8 -*-
"""
Created on Fri Feb 09 13:49:03 2018

@author: Ahmed Raafat
"""
import numpy
import re


def ComputeReward(speed,trackpos,angle,dist):
    stuck=0
    SOOT=0
    OOT=0


    if numpy.abs(trackpos)>=0.98:
        OOT=1
    elif numpy.abs(trackpos)>=0.75:
        SOOT=1

    if (numpy.abs(angle) >= 45 and speed<10) or (speed<3 and dist>20):
        x=count()
       # print("Restart in" +str(25-x))
        if x==25:
            stuck=1
            count.counter=0

    Rspeed=numpy.power((speed/float(160)),4)*0.05
    Rtrackpos=numpy.power(1/(float(numpy.abs(trackpos))+1),4)*0.7
    Rangle=numpy.power((1/((float(numpy.abs(angle))/40)+1)),4)*0.25


    if stuck==1:
        Reward=-2
    elif SOOT==1:
        Reward=(Rspeed+Rtrackpos+Rangle)*0.5
    elif OOT==1:
        if numpy.abs(trackpos) >=1.5:
            Reward=-1.5
        else:
            Reward=numpy.abs(trackpos)*(-1)
    else:
        Reward=Rspeed+Rtrackpos+Rangle
    #print("Reward = "+str(Reward))
    return Reward

#maximum action index depending on Qmax value
def FindQmaxIndex(state,table):
    State=Stateindex(convert2string(state))
    #print(State)

    x=table.loc[State][:]
    maximum=x[1]
    ActionIndex=1
    for i in range (2,16):
        if x[i]>maximum:
            maximum=x[i]
            ActionIndex=i
    #print (maximum)
    return ActionIndex
'''

#Do After first iteration
def FindQmaxIndex(state,table):
    State=Stateindex(convert2string(state))
    print(State)
    maximum=table.iloc[State][1]

    for i in range (15):
        if table.iloc[State][i+1]>maximum:
            maximum=table.iloc[State][i+1]
            ActionIndex=i
        else:
            ActionIndex=0
    return ActionIndex
'''


def Stateindex(state):
    getnumbers=re.findall('\d+', state)
    values=[]

    for i in range (11):
        values.append(int(getnumbers[i]))

    output = "".join(map(str, values))
    del values
    return int(output, 2)


def convert2string(state):     #Converting state into partioned state (0000)(000)(0000)
    b=tuple(state)
    x=str(b[0:4])+str(b[4:7])+str(b[7:11])
    return x


def count():
    count.counter += 1
    return(count.counter)
count.counter = 0