ruagomesfreiregame2sol.py

import random
import math

# LearningAgent to implement
# no knowledeg about the environment can be used
# the code should work even with another environment
class LearningAgent:

        # init
        # nS maximum number of states
        # nA maximum number of action per state
        def __init__(self,nS,nA):

                # define this function
                self.nS = nS
                self.nA = nA
                self.gama = 0.9
                self.alfa = 0.6
                self.eps =  0.1
                self.memory = []
                for i in range(nS):
                        l = []
                        for j in range(nA):
                                l.append(-math.inf)
                        self.memory.append(l)

                # define this function
              
        
        # Select one action, used when learning  
        # st - is the current state        
        # aa - is the set of possible actions
        # for a given state they are always given in the same order
        # returns
        # a - the index to the action in aa
        def selectactiontolearn(self,st,aa):
                # define this function
                # print("select one action to learn better")
                for i in range(len(aa)):
                        if self.memory[st][i] == -math.inf: 
                                self.memory[st][i] = 0
                if random.uniform(0, 1) < self.eps:
                        """
                        Explore: select a random action    """
                        a = random.randint(0,len(aa) - 1)
                else:
                        """
                        Exploit: select the action with max value (future reward)    """


                        maxI = -1
                        maxQ = -math.inf
                        for i in range(len(aa)):
                              
                                if(self.memory[st][i] > maxQ):
                                        maxQ = self.memory[st][i]
                                        maxI = i
                        a = maxI

                return a    

        # Select one action, used when evaluating
        # st - is the current state        
        # aa - is the set of possible actions
        # for a given state they are always given in the same order
        # returns
        # a - the index to the action in aa
        def selectactiontoexecute(self,st,aa):
                # define this function
                maxI = -1
                maxQ = -math.inf
                for i in range(len(aa)):
                        if(self.memory[st][i] > maxQ):
                                maxQ = self.memory[st][i]
                                maxI = i
                # print("select one action to see if I learned")
                return maxI


        # this function is called after every action
        # st - original state
        # nst - next state
        # a - the index to the action taken
        # r - reward obtained
        def learn(self,ost,nst,a,r):
                # define this function
                #print("learn something from this data")
                
                self.memory[ost][a] += self.alfa * ( r + self.gama * max(self.memory[nst]) - self.memory[ost][a])

                return