diff --git a/infant_abm/agents/infant/actions.py b/infant_abm/agents/infant/actions.py index f1a2720..8082752 100644 --- a/infant_abm/agents/infant/actions.py +++ b/infant_abm/agents/infant/actions.py @@ -16,13 +16,13 @@ class InteractWithToy(Action): number = 3 -# class EvaluateToy(Action): -# def __init__(self, duration=0, metadata=None): -# super().__init__(metadata) -# self.duration = duration +class EvaluateToy(Action): + def __init__(self, duration=0, metadata=None): + super().__init__(metadata) + self.duration = duration -# class EvaluateThrow(Action): -# def __init__(self, duration=0, metadata=None): -# super().__init__(metadata) -# self.duration = duration +class EvaluateThrow(Action): + def __init__(self, duration=0, metadata=None): + super().__init__(metadata) + self.duration = duration diff --git a/infant_abm/agents/infant/q_learning_agent.py b/infant_abm/agents/infant/q_learning_agent.py index b1bc569..4e6306f 100644 --- a/infant_abm/agents/infant/q_learning_agent.py +++ b/infant_abm/agents/infant/q_learning_agent.py @@ -4,52 +4,71 @@ # [infant_looked_at_toy, parent_looked_at_toy, mutual_gaze] STATE_SPACE = np.array([2, 2, 2]) -STATE_SPACE_SIZE = np.mul.reduce(STATE_SPACE) +STATE_SPACE_SIZE = np.multiply.reduce(STATE_SPACE) GOAL_STATE = np.array([1, 1, 1]) class QLearningAgent: def __init__(self, model, actions, alpha=0.1, gamma=0.9, epsilon=0.1): self.model = model - self.q_table = np.zeros((STATE_SPACE_SIZE, len(actions))) + self.q_table = np.random.rand(STATE_SPACE_SIZE, len(actions)) + + print(f"init\n\n{self.q_table}") + self.alpha = alpha self.gamma = gamma self.epsilon = epsilon self.actions = {action: num for num, action in enumerate(actions)} self.number_actions = {num: action for action, num in self.actions.items()} - def choose_action(self, infant): - state = self._get_state(infant) + def choose_action(self): + state = self.get_state() if np.random.rand() < self.epsilon: - return np.random.choice(self.actions.keys()) # Explore + return np.random.choice(list(self.actions.keys())) # Explore else: + # print( + # f"argmax {np.argmax(self.q_table[state])}\n num_actions {self.number_actions}" + # ) return self.number_actions[np.argmax(self.q_table[state])] # Exploit def update_q_table(self, state, action, reward, next_state): - state = self._to_number_state(state) - next_state = QLearningAgent._to_number_state(next_state) - + action = self.actions[action] best_next_action = np.max(self.q_table[next_state]) - self.q_table[state, action] += self.alpha * ( + + # print( + # f"state {state}, action {action}, reward {reward}, best_next {best_next_action}, next_state {next_state}" + # ) + # print(f"{self.q_table[state, action]}\n") + + plus = self.alpha * ( reward + self.gamma * best_next_action - self.q_table[state, action] ) + # print(plus) + + self.q_table[state, action] += plus + def get_state(self): # [infant_looked_at_toy, parent_looked_at_toy, mutual_gaze] - return ( - int(self._infant_looked_at_toy()), - int(self._parent_looked_at_toy_after_infant()), - int(), + raw_state = np.array( + [ + int(self._infant_looked_at_toy()), + int(self._parent_looked_at_toy_after_infant()), + int(self._mutual_gaze()), + ] ) + multiplier = np.array([4, 2, 1]) + return np.sum(raw_state * multiplier) + def _infant_looked_at_toy(self): return any([isinstance(obj, Toy) for obj in self.model.infant.gaze_directions]) def _parent_looked_at_toy_after_infant(self): for i, obj in enumerate(self.model.infant.gaze_directions): - if obj in self.model.parent.gaze_directions[i:]: + if isinstance(obj, Toy) and obj in self.model.parent.gaze_directions[i:]: return True return False @@ -61,19 +80,7 @@ def _mutual_gaze(self): ) def reward(self, state): - if state == GOAL_STATE: + if np.all(state == GOAL_STATE): return 1 else: return 0 - - @staticmethod - def _to_number_state(state): - (previous_action, infant_gaze, parent_gaze) = state - return parent_gaze + infant_gaze * 6 + previous_action * 6 * 6 - - @staticmethod - def _get_agent_gaze_direction(agent): - if agent.gaze_direction is None: - return 0 - else: - return min(agent.gaze_direction.unique_id, 5) diff --git a/infant_abm/agents/infant/seq_vision_infant.py b/infant_abm/agents/infant/seq_vision_infant.py index 6d41546..a4d6dee 100644 --- a/infant_abm/agents/infant/seq_vision_infant.py +++ b/infant_abm/agents/infant/seq_vision_infant.py @@ -15,7 +15,7 @@ class SeqVisionInfant(InfantBase): # TOY_EVALUATION_DURATION = 3 # THROW_EVALUATION_DURATION = 20 - # PERSISTENCE_BOOST_DURATION = 20 + PERSISTENCE_BOOST_DURATION = 20 # COORDINATION_BOOST_VALUE = 0.2 # PERSISTENCE_BOOST_VALUE = 0.2 @@ -35,22 +35,36 @@ def __init__(self, unique_id, model, pos, params: Params): self.current_persistence_boost_duration = 0 self.q_learning_state = None - def get_actions(self): - return [actions.LookForToy, actions.Crawl, actions.InteractWithToy] + def get_q_actions(self): + return [None, self.model.parent] + self.model.get_toys() - def before_step(self): - self.q_learning_state = self.model.q_learning_agent.get_state(self) + def _before_step(self): + self.q_learning_state = self.model.q_learning_agent.get_state() - self.gaze_directions.append(self.model.q_learning_agent.choose_action(self)) + new_action = self.model.q_learning_agent.choose_action() + + self.gaze_directions.append(new_action) self.gaze_directions.pop(0) def after_step(self): - next_state = self.model.q_learning_agent.get_state(self) + next_state = self.model.q_learning_agent.get_state() reward = self.model.q_learning_agent.reward(next_state) self.model.q_learning_agent.update_q_table( - self.q_learning_state, self.gaze_direction, reward, next_state + self.q_learning_state, self.gaze_directions[-1], reward, next_state ) + # print(f"{self.gaze_directions[-2:]}, {self.model.parent.gaze_directions[-2:]}") + + if np.random.rand() < 0.005: + # print(next_state) + # print(self.model.q_learning_agent.q_table) + print( + { + state: np.argmax(self.model.q_learning_agent.q_table[state]) + for state in range(8) + } + ) + def _step_look_for_toy(self, _action): self.current_persistence_boost_duration = 0 self.params.persistence.reset() @@ -117,7 +131,8 @@ def _step_interact_with_toy(self, _action): def _step_crawl(self, _action): if self._target_in_range(): self._start_evaluating_throw() - return actions.EvaluateThrow() + # return actions.EvaluateThrow() + return actions.InteractWithToy() if self._gets_distracted(): self.target = None diff --git a/infant_abm/agents/parent/vision_only_parent.py b/infant_abm/agents/parent/vision_only_parent.py index 2c0fbd8..40518ed 100644 --- a/infant_abm/agents/parent/vision_only_parent.py +++ b/infant_abm/agents/parent/vision_only_parent.py @@ -53,7 +53,7 @@ def _random_gaze_direction(self): case 1: return self.model.infant case 2: - if target is not None and 0.5 > np.random.rand: + if target is not None and 0.5 > np.random.rand(): return target else: toys = self.model.get_toys() diff --git a/infant_abm/agents/parent_base.py b/infant_abm/agents/parent_base.py index f291eb2..f9b4a3d 100644 --- a/infant_abm/agents/parent_base.py +++ b/infant_abm/agents/parent_base.py @@ -37,6 +37,8 @@ def __init__(self, unique_id, model, pos): def step(self): self.satisfaction = 0 + self._before_step() + match self.next_action: case Action.WAIT: pass diff --git a/infant_abm/model.py b/infant_abm/model.py index efb1163..d0e4b53 100644 --- a/infant_abm/model.py +++ b/infant_abm/model.py @@ -83,13 +83,12 @@ def __init__( self.make_agents(infant_params) self.q_learning_agent = QLearningAgent( - model=self, actions=self.infant.get_actions() + model=self, actions=self.infant.get_q_actions() ) + self.infant.q_learning_state = self.q_learning_agent.get_state() self.datacollector = mesa.DataCollector( model_reporters={ - "parent-visible": lambda m: int(getattr(m.infant, "parent_visible", 0)), - "infant-visible": lambda m: int(m.parent.infant_visible) / 2, "heading": lambda m: m.infant.params.persistence.e2, "throwing": lambda m: m.infant.params.coordination.e2, "goal_dist": lambda m: m.get_middle_dist(), @@ -115,6 +114,8 @@ def step(self): self.datacollector.collect(self) + self.infant.after_step() + def get_middle_dist(self) -> float: middle_point = (self.parent.pos + self.infant.pos) / 2 diff --git a/run_simulation.py b/run_simulation.py index 86babf2..08a534a 100644 --- a/run_simulation.py +++ b/run_simulation.py @@ -109,11 +109,13 @@ def run_comparative_boost_simulation(): if __name__ == "__main__": output_dir = "./results/model2.0/q-learn" - params = [{"infant_params": InfantParams.from_array(0.5, 0.5, 0.5)}] + params = [ + {"infant_params": InfantParams.from_array([0.5, 0.5, 0.5]), "config": Config()} + ] run_basic_simulation( output_dir=output_dir, parameter_sets=params, - iterations=10000, + iterations=5000, repeats=1, )