Merge pull request #12 from beyretb/dev-v0.4

merge Dev v0.4 to master
beyretb · May 28, 2019 · ec9cb65 · ec9cb65
2 parents 5a23856 + 202bd71
commit ec9cb65
Show file tree

Hide file tree

Showing 22 changed files with 191 additions and 702 deletions.
diff --git a/.gitignore b/.gitignore
@@ -6,6 +6,4 @@ env/*
 __pycache__/
 UnitySDK.log
 /venv
-testDevs.py
-testDevs.yaml
-.DS_Store
+/dev
diff --git a/README.md b/README.md
@@ -44,9 +44,9 @@ Finally download the environment for your system:
 
 | OS | Environment link |
 | --- | --- |
-| Linux |  [download v0.3](https://www.doc.ic.ac.uk/~bb1010/animalAI/env_linux_v0.3.zip) |
-| MacOS |  [download v0.3](https://www.doc.ic.ac.uk/~bb1010/animalAI/env_mac_v0.3.zip) |
-| Windows | [download v0.3](https://www.doc.ic.ac.uk/~bb1010/animalAI/env_windows_v0.3.zip)  |
+| Linux |  [download v0.4](https://www.doc.ic.ac.uk/~bb1010/animalAI/env_linux_v0.4.zip) |
+| MacOS |  [download v0.4](https://www.doc.ic.ac.uk/~bb1010/animalAI/env_mac_v0.4.zip) |
+| Windows | [download v0.4](https://www.doc.ic.ac.uk/~bb1010/animalAI/env_windows_v0.4.zip)  |
 
 You can now unzip the content of the archive to the `env` folder and you're ready to go! Make sure the executable 
 `AnimalAI.*` is in `env/`. On linux you may have to make the file executable by running `chmod +x env/AnimalAI.x86_64`. 
@@ -99,19 +99,29 @@ Occasional slow frame rates in play mode. Temporary fix: reduce screen size.
 
 - [ ] Offer a gym wrapper for training
 - [ ] Add protobuf for arena spawning feedback
-- [ ] Improve the way the agent spawns
+- [x] Improve the way the agent spawns
 - [x] Add lights out configurations.
 - [x] Improve environment framerates
 - [x] Add moving food
 
 ## Version History
 
+- v0.4 - Lights off moved to Unity, colors configurations, proportional goals, bugs fixes
+    - The light is now directly switched on/off within Unity, configuration files stay the same
+    - Blackouts now work with infinite episodes (`t=0`)
+    - The `rand_colors` configurations have been removed and the user can now pass `RGB` values, see [here](documentation/configFile.md#objects)
+    - Rewards for goals are now proportional to their size (except for the `DeathZone`), see [here](documentation/definitionsOfObjects.md#rewards)
+    - The agent is now a ball rather than a cube
+    - Increased safety for spawning the agent to avoid infinite loops
+    - Bugs fixes
+
 - v0.3 - Lights off, remove Beams and add cylinder
     - We added the possibility to switch the lights off at given intervals, see [here](documentation/configFile.md#blackouts)
     - visualizeLightsOff.py displays an example of lights off, from the agent's point of view
     - Beams objects have been removed
     - A `Cylinder` object has been added (similar behaviour to the `Woodlog`)
     - The immovable `Cylinder` tunnel has been renamed `CylinderTunnel`
+    - `UnityEnvironment.reset()` parameter `config` renamed to `arenas_configurations_input`
 
 - v0.2 - New moving food rewards, improved Unity performance and bug fixes 
     - Moving rewards have been added, two for each type of reward, see 

diff --git a/agent.py b/agent.py
@@ -0,0 +1,27 @@
+from animalai.envs.brain import BrainInfo
+
+class Agent(object):
+
+    def __init__(self, configuration_to_load: str):
+
+        """
+         Load your agent here and initialize anything needed
+        :param configuration_to_load: path to your model to lead
+        """
+        pass
+
+    def step(self, brain_info: BrainInfo) -> list[float]:
+
+        """
+        A single step the agent should take based on the current
+        :param brain_info:  a single BrainInfo containing the observations and reward for a single step for one agent
+        :return:            a list of actions to execute (of size 2)
+        """
+
+        self.action = []
+
+        return self.action
+
+    def destroy(self):
+
+        pass
diff --git a/animalai/communicator_objects/arena_parameters_proto_pb2.py b/animalai/communicator_objects/arena_parameters_proto_pb2.py
diff --git a/animalai/envs/arena_config.py b/animalai/envs/arena_config.py
@@ -2,7 +2,6 @@
 import jsonpickle
 import yaml
 import copy
-import numpy as np
 
 from animalai.communicator_objects import UnityRLResetInput, ArenaParametersProto
 
@@ -26,44 +25,41 @@ def to_proto(self):
         return res
 
 
+class RGB(yaml.YAMLObject):
+    yaml_tag = u'!RGB'
+
+    def __init__(self, r=0, g=0, b=0):
+        self.r = r
+        self.g = g
+        self.b = b
+
+    def to_proto(self):
+        res = ArenaParametersProto.ItemsToSpawn.Vector3()
+        res.x = self.r
+        res.y = self.g
+        res.z = self.b
+
+        return res
+
+
 class Item(yaml.YAMLObject):
     yaml_tag = u'!Item'
 
-    def __init__(self, name='', rand_color=False, positions=None, rotations=None, sizes=None):
+    def __init__(self, name='', positions=None, rotations=None, sizes=None, colors=None):
         self.name = name
-        self.rand_color = rand_color
         self.positions = positions if positions is not None else []
         self.rotations = rotations if rotations is not None else []
         self.sizes = sizes if sizes is not None else []
+        self.colors = colors if colors is not None else []
 
 
 class Arena(yaml.YAMLObject):
     yaml_tag = u'!Arena'
 
-    def __init__(self, t=1000, rand_all_colors=False, items=None, blackouts=None):
+    def __init__(self, t=1000, items=None, blackouts=None):
         self.t = t
-        self.rand_all_colors = rand_all_colors
         self.items = items if items is not None else {}
         self.blackouts = blackouts if blackouts is not None else []
-        self.generate_blackout_steps()
-
-    def generate_blackout_steps(self):
-        # Transform a list of steps at which we turn on/off the light into a list of 1/0 of size t for each step
-
-        if self.blackouts is not None and len(self.blackouts) > 0 and self.t>0:
-            if self.blackouts[0] > 0:
-                self.blackouts_steps = np.ones(self.t)
-                light = True
-                for i in range(len(self.blackouts) - 1):
-                    self.blackouts_steps[self.blackouts[i]:self.blackouts[i + 1]] = not light
-                    light = not light
-                self.blackouts_steps[self.blackouts[-1]:] = not light
-            else:
-                flip_every = -self.blackouts[0]
-                self.blackouts_steps = np.array(
-                    ([1] * flip_every + [0] * flip_every) * (self.t // (2 * flip_every) + 1))[:self.t]
-        else:
-            self.blackouts_steps = np.ones(max(self.t, 1))
 
 
 class ArenaConfig(yaml.YAMLObject):
@@ -73,8 +69,6 @@ def __init__(self, yaml_path=None):
 
         if yaml_path is not None:
             self.arenas = yaml.load(open(yaml_path, 'r'), Loader=yaml.Loader).arenas
-            for arena in self.arenas.values():
-                arena.generate_blackout_steps()
         else:
             self.arenas = {}
 
@@ -89,14 +83,14 @@ def dict_to_arena_config(self) -> UnityRLResetInput:
         for k in self.arenas:
             config_out.arenas[k].CopyFrom(ArenaParametersProto())
             config_out.arenas[k].t = self.arenas[k].t
-            config_out.arenas[k].rand_all_colors = self.arenas[k].rand_all_colors
+            config_out.arenas[k].blackouts.extend(self.arenas[k].blackouts)
             for item in self.arenas[k].items:
                 to_spawn = config_out.arenas[k].items.add()
                 to_spawn.name = item.name
-                to_spawn.rand_color = item.rand_color
                 to_spawn.positions.extend([v.to_proto() for v in item.positions])
                 to_spawn.rotations.extend(item.rotations)
                 to_spawn.sizes.extend([v.to_proto() for v in item.sizes])
+                to_spawn.colors.extend([v.to_proto() for v in item.colors])
 
         return config_out
 
@@ -105,7 +99,6 @@ def update(self, arenas_configurations_input):
         if arenas_configurations_input is not None:
             for arena_i in arenas_configurations_input.arenas:
                 self.arenas[arena_i] = copy.copy(arenas_configurations_input.arenas[arena_i])
-                self.arenas[arena_i].generate_blackout_steps()
 
 
 def constructor_arena(loader, node):

diff --git a/animalai/envs/environment.py b/animalai/envs/environment.py
@@ -30,7 +30,6 @@ def __init__(self, file_name=None,
                  base_port=5005,
                  seed=0,
                  docker_training=False,
-                 no_graphics=False,
                  n_arenas=1,
                  play=False,
                  arenas_configurations=None):
@@ -43,7 +42,6 @@ def __init__(self, file_name=None,
         :int base_port: Baseline port number to connect to Unity environment over. worker_id increments over this.
         :int worker_id: Number to add to communication port (5005) [0]. Used for asynchronous agent scenarios.
         :param docker_training: Informs this class whether the process is being run within a container.
-        :param no_graphics: Whether to run the Unity simulator in no-graphics mode
         """
 
         atexit.register(self._close)
@@ -56,10 +54,9 @@ def __init__(self, file_name=None,
         self.proc1 = None  # The process that is started. If None, no process was started
         self.communicator = self.get_communicator(worker_id, base_port)
         self.arenas_configurations = arenas_configurations if arenas_configurations is not None else ArenaConfig()
-        self.check_lights = True
 
         if file_name is not None:
-            self.executable_launcher(file_name, docker_training, no_graphics)
+            self.executable_launcher(file_name, docker_training)
         else:
             logger.info("Start training by pressing the Play button in the Unity Editor.")
         self._loaded = True
@@ -130,7 +127,7 @@ def brain_names(self):
     def external_brain_names(self):
         return self._external_brain_names
 
-    def executable_launcher(self, file_name, docker_training, no_graphics):
+    def executable_launcher(self, file_name, docker_training):
         cwd = os.getcwd()
         file_name = (file_name.strip()
                      .replace('.app', '').replace('.exe', '').replace('.x86_64', '').replace('.x86',
@@ -177,17 +174,12 @@ def executable_launcher(self, file_name, docker_training, no_graphics):
             logger.debug("This is the launch string {}".format(launch_string))
             # Launch Unity environment
             if not docker_training:
-                if no_graphics:
+                if not self.play:
                     self.proc1 = subprocess.Popen(
-                        [launch_string, '-nographics', '-batchmode',
-                         '--port', str(self.port)])
+                        [launch_string, '--port', str(self.port), '--nArenas', str(self.n_arenas)])
                 else:
-                    if not self.play:
-                        self.proc1 = subprocess.Popen(
-                            [launch_string, '--port', str(self.port), '--nArenas', str(self.n_arenas)])
-                    else:
-                        self.proc1 = subprocess.Popen(
-                            [launch_string, '--port', str(self.port)])
+                    self.proc1 = subprocess.Popen(
+                        [launch_string, '--port', str(self.port)])
 
             else:
                 """
@@ -209,7 +201,7 @@ def executable_launcher(self, file_name, docker_training, no_graphics):
                 """
                 docker_ls = ("exec xvfb-run --auto-servernum"
                              " --server-args='-screen 0 640x480x24'"
-                             " {0} --port {1}").format(launch_string, str(self.port))
+                             " {0} --port {1} --nArenas {2}").format(launch_string, str(self.port), str(self.n_arenas))
                 self.proc1 = subprocess.Popen(docker_ls,
                                               stdout=subprocess.PIPE,
                                               stderr=subprocess.PIPE,
@@ -232,7 +224,6 @@ def reset(self, arenas_configurations_input=None, train_mode=True) -> AllBrainIn
         """
         if self._loaded:
             self.arenas_configurations.update(arenas_configurations_input)
-            self.check_lights = not np.all([e.blackouts for e in self.arenas_configurations.arenas.values()])
 
             outputs = self.communicator.exchange(
                 self._generate_reset_input(train_mode, arenas_configurations_input)
@@ -388,8 +379,6 @@ def step(self, vector_action=None, memory=None, text_action=None, value=None, st
             self._global_done = state[1]
             for _b in self._external_brain_names:
                 self._n_agents[_b] = len(state[0][_b].agents)
-            if self.check_lights:
-                state = self._apply_lights(state, step_number)
             return state[0]
         elif not self._loaded:
             raise UnityEnvironmentException("No Unity environment is loaded.")
@@ -436,17 +425,6 @@ def _flatten(cls, arr):
         arr = [float(x) for x in arr]
         return arr
 
-    def _apply_lights(self, state, step_number):
-        """
-        Sets visual observations to zero for Arenas where the light should be off.
-        :return: the modified state
-        """
-        if 'Learner' in state[0].keys():
-            mask = np.array([e.blackouts_steps[step_number % len(e.blackouts_steps)] \
-                             for e in self.arenas_configurations.arenas.values()])
-            state[0]['Learner'].visual_observations[0] = (state[0]['Learner'].visual_observations[0].T * mask).T
-        return state
-
     def _get_state(self, output: UnityRLOutput) -> (AllBrainInfo, bool):
         """
         Collects experience information from all external brains in environment at current step.