use envpool in vizdoom example, update doc (#634)

thu-ml · May 8, 2022 · bf8f63f · bf8f63f
1 parent 2a7c151
commit bf8f63f
Show file tree

Hide file tree

Showing 8 changed files with 335 additions and 149 deletions.
diff --git a/docs/api/tianshou.data.rst b/docs/api/tianshou.data.rst
@@ -88,3 +88,30 @@ AsyncCollector
    :members:
    :undoc-members:
    :show-inheritance:
+
+
+Utils
+-----
+
+to_numpy
+~~~~~~~~
+
+.. autofunction:: tianshou.data.to_numpy
+
+to_torch
+~~~~~~~~
+
+.. autofunction:: tianshou.data.to_torch
+
+to_torch_as
+~~~~~~~~~~~
+
+.. autofunction:: tianshou.data.to_torch_as
+
+SegmentTree
+~~~~~~~~~~~
+
+.. autoclass:: tianshou.data.SegmentTree
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/api/tianshou.env.rst b/docs/api/tianshou.env.rst
@@ -46,6 +46,26 @@ RayVectorEnv
    :show-inheritance:
 
 
+Wrapper
+-------
+
+VectorEnvWrapper
+~~~~~~~~~~~~~~~~
+
+.. autoclass:: tianshou.env.VectorEnvWrapper
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+VectorEnvNormObs
+~~~~~~~~~~~~~~~~
+
+.. autoclass:: tianshou.env.VectorEnvNormObs
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
+
 Worker
 ------
 
@@ -80,3 +100,15 @@ RayEnvWorker
    :members:
    :undoc-members:
    :show-inheritance:
+
+
+Utils
+-----
+
+PettingZooEnv
+~~~~~~~~~~~~~
+
+.. autoclass:: tianshou.env.PettingZooEnv
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/docs/spelling_wordlist.txt b/docs/spelling_wordlist.txt
@@ -158,3 +158,4 @@ Enduro
 Qbert
 Seaquest
 subnets
+subprocesses
diff --git a/docs/tutorials/cheatsheet.rst b/docs/tutorials/cheatsheet.rst
@@ -123,7 +123,11 @@ EnvPool Integration
 
 `EnvPool <https://github.com/sail-sg/envpool/>`_ is a C++-based vectorized environment implementation and is way faster than the above solutions. The APIs are almost the same as above four classes, so that means you can directly switch the vectorized environment to envpool and get immediate speed-up.
 
-Currently it supports Atari, VizDoom, toy_text and classic_control environments. For more information, please refer to `EnvPool's documentation <https://envpool.readthedocs.io/en/latest/>`_.
+Currently it supports
+`Atari <https://github.com/thu-ml/tianshou/tree/master/examples/atari#envpool>`_,
+`Mujoco <https://github.com/thu-ml/tianshou/tree/master/examples/mujoco#envpool>`_,
+`VizDoom <https://github.com/thu-ml/tianshou/tree/master/examples/vizdoom#envpool>`_,
+toy_text and classic_control environments. For more information, please refer to `EnvPool's documentation <https://envpool.readthedocs.io/en/latest/>`_.
 
 ::
 
@@ -133,7 +137,7 @@ Currently it supports Atari, VizDoom, toy_text and classic_control environments.
     envs = envpool.make_gym("CartPole-v0", num_envs=10)
     collector = Collector(policy, envs, buffer)
 
-Here are some examples: https://github.com/sail-sg/envpool/tree/master/examples/tianshou_examples
+Here are some other `examples <https://github.com/sail-sg/envpool/tree/master/examples/tianshou_examples>`_.
 
 .. _preprocess_fn:
 
@@ -177,7 +181,7 @@ For example, you can write your hook as:
                     self.episode_log[i].append(kwargs['rew'][i])
                     kwargs['rew'][i] -= self.baseline
                 for i in range(n):
-                    if kwargs['done']:
+                    if kwargs['done'][i]:
                         self.main_log.append(np.mean(self.episode_log[i]))
                         self.episode_log[i] = []
                         self.baseline = np.mean(self.main_log)
@@ -191,6 +195,40 @@ And finally,
 
 Some examples are in `test/base/test_collector.py <https://github.com/thu-ml/tianshou/blob/master/test/base/test_collector.py>`_.
 
+Another solution is to create a vector environment wrapper through :class:`~tianshou.env.VectorEnvWrapper`, e.g.
+::
+
+    import numpy as np
+    from collections import deque
+    from tianshou.env import VectorEnvWrapper
+
+    class MyWrapper(VectorEnvWrapper):
+        def __init__(self, venv, size=100):
+            self.episode_log = None
+            self.main_log = deque(maxlen=size)
+            self.main_log.append(0)
+            self.baseline = 0
+
+        def step(self, action, env_id):
+            obs, rew, done, info = self.venv.step(action, env_id)
+            n = len(rew)
+            if self.episode_log is None:
+                self.episode_log = [[] for i in range(n)]
+            for i in range(n):
+                self.episode_log[i].append(rew[i])
+                rew[i] -= self.baseline
+            for i in range(n):
+                if done[i]:
+                    self.main_log.append(np.mean(self.episode_log[i]))
+                    self.episode_log[i] = []
+                    self.baseline = np.mean(self.main_log)
+            return obs, rew, done, info
+
+    env = MyWrapper(env, size=100)
+    collector = Collector(policy, env, buffer)
+
+We provide an observation normalization vector env wrapper: :class:`~tianshou.env.VectorEnvNormObs`.
+
 
 .. _rnn_training:
 

diff --git a/examples/vizdoom/README.md b/examples/vizdoom/README.md
@@ -2,12 +2,24 @@
 
 [ViZDoom](https://github.com/mwydmuch/ViZDoom) is a popular RL env for a famous first-person shooting game Doom. Here we provide some results and intuitions for this scenario.
 
+## EnvPool
+
+We highly recommend using envpool to run the following experiments. To install, in a linux machine, type:
+
+```bash
+pip install envpool
+```
+
+After that, `make_vizdoom_env` will automatically switch to envpool's ViZDoom env. EnvPool's implementation is much faster (about 2\~3x faster for pure execution speed, 1.5x for overall RL training pipeline) than python vectorized env implementation.
+
+For more information, please refer to EnvPool's [GitHub](https://github.com/sail-sg/envpool/) and [Docs](https://envpool.readthedocs.io/en/latest/api/vizdoom.html).
+
 ## Train
 
 To train an agent:
 
 ```bash
-python3 vizdoom_c51.py --task {D1_basic|D3_battle|D4_battle2}
+python3 vizdoom_c51.py --task {D1_basic|D2_navigation|D3_battle|D4_battle2}
 ```
 
 D1 (health gathering) should finish training (no death) in less than 500k env step (5 epochs);

diff --git a/examples/vizdoom/env.py b/examples/vizdoom/env.py
@@ -5,6 +5,13 @@
 import numpy as np
 import vizdoom as vzd
 
+from tianshou.env import ShmemVectorEnv
+
+try:
+    import envpool
+except ImportError:
+    envpool = None
+
 
 def normal_button_comb():
     actions = []
@@ -112,6 +119,58 @@ def close(self):
         self.game.close()
 
 
+def make_vizdoom_env(task, frame_skip, res, save_lmp, seed, training_num, test_num):
+    test_num = min(os.cpu_count() - 1, test_num)
+    if envpool is not None:
+        task_id = "".join([i.capitalize() for i in task.split("_")]) + "-v1"
+        lmp_save_dir = "lmps/" if save_lmp else ""
+        reward_config = {
+            "KILLCOUNT": [20.0, -20.0],
+            "HEALTH": [1.0, 0.0],
+            "AMMO2": [1.0, -1.0],
+        }
+        if "battle" in task:
+            reward_config["HEALTH"] = [1.0, -1.0]
+        env = train_envs = envpool.make_gym(
+            task_id,
+            frame_skip=frame_skip,
+            stack_num=res[0],
+            seed=seed,
+            num_envs=training_num,
+            reward_config=reward_config,
+            use_combined_action=True,
+            max_episode_steps=2625,
+            use_inter_area_resize=False,
+        )
+        test_envs = envpool.make_gym(
+            task_id,
+            frame_skip=frame_skip,
+            stack_num=res[0],
+            lmp_save_dir=lmp_save_dir,
+            seed=seed,
+            num_envs=test_num,
+            reward_config=reward_config,
+            use_combined_action=True,
+            max_episode_steps=2625,
+            use_inter_area_resize=False,
+        )
+    else:
+        cfg_path = f"maps/{task}.cfg"
+        env = Env(cfg_path, frame_skip, res)
+        train_envs = ShmemVectorEnv(
+            [lambda: Env(cfg_path, frame_skip, res) for _ in range(training_num)]
+        )
+        test_envs = ShmemVectorEnv(
+            [
+                lambda: Env(cfg_path, frame_skip, res, save_lmp)
+                for _ in range(test_num)
+            ]
+        )
+        train_envs.seed(seed)
+        test_envs.seed(seed)
+    return env, train_envs, test_envs
+
+
 if __name__ == '__main__':
     # env = Env("maps/D1_basic.cfg", 4, (4, 84, 84))
     env = Env("maps/D3_battle.cfg", 4, (4, 84, 84))