This repository has been archived by the owner on Dec 20, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathruagomesfreiregame2sol.py
executable file
·91 lines (76 loc) · 3.31 KB
/
ruagomesfreiregame2sol.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import random
import math
# LearningAgent to implement
# no knowledeg about the environment can be used
# the code should work even with another environment
class LearningAgent:
# init
# nS maximum number of states
# nA maximum number of action per state
def __init__(self,nS,nA):
# define this function
self.nS = nS
self.nA = nA
self.gama = 0.9
self.alfa = 0.6
self.eps = 0.1
self.memory = []
for i in range(nS):
l = []
for j in range(nA):
l.append(-math.inf)
self.memory.append(l)
# define this function
# Select one action, used when learning
# st - is the current state
# aa - is the set of possible actions
# for a given state they are always given in the same order
# returns
# a - the index to the action in aa
def selectactiontolearn(self,st,aa):
# define this function
# print("select one action to learn better")
for i in range(len(aa)):
if self.memory[st][i] == -math.inf:
self.memory[st][i] = 0
if random.uniform(0, 1) < self.eps:
"""
Explore: select a random action """
a = random.randint(0,len(aa) - 1)
else:
"""
Exploit: select the action with max value (future reward) """
maxI = -1
maxQ = -math.inf
for i in range(len(aa)):
if(self.memory[st][i] > maxQ):
maxQ = self.memory[st][i]
maxI = i
a = maxI
return a
# Select one action, used when evaluating
# st - is the current state
# aa - is the set of possible actions
# for a given state they are always given in the same order
# returns
# a - the index to the action in aa
def selectactiontoexecute(self,st,aa):
# define this function
maxI = -1
maxQ = -math.inf
for i in range(len(aa)):
if(self.memory[st][i] > maxQ):
maxQ = self.memory[st][i]
maxI = i
# print("select one action to see if I learned")
return maxI
# this function is called after every action
# st - original state
# nst - next state
# a - the index to the action taken
# r - reward obtained
def learn(self,ost,nst,a,r):
# define this function
#print("learn something from this data")
self.memory[ost][a] += self.alfa * ( r + self.gama * max(self.memory[nst]) - self.memory[ost][a])
return