From 1714c7f2c7295e58c9b94f6dcdc270da23f3d74c Mon Sep 17 00:00:00 2001 From: Dominik Jain Date: Thu, 7 Mar 2024 17:57:11 +0100 Subject: [PATCH] High-level API: Fix number of test episodes being incorrectly scaled by number of envs (#1071) --- docs/04_contributing/05_contributors.rst | 2 +- docs/spelling_wordlist.txt | 1 + tianshou/highlevel/agent.py | 4 ++-- tianshou/highlevel/config.py | 9 --------- 4 files changed, 4 insertions(+), 12 deletions(-) diff --git a/docs/04_contributing/05_contributors.rst b/docs/04_contributing/05_contributors.rst index d48a7148f..715c24ab3 100644 --- a/docs/04_contributing/05_contributors.rst +++ b/docs/04_contributing/05_contributors.rst @@ -2,7 +2,7 @@ Contributors ============ We always welcome contributions to help make Tianshou better! -Tiashou was originally created by the `THU-ML Group `_ at Tsinghua University. +Tianshou was originally created by the `THU-ML Group `_ at Tsinghua University. Today, it is backed by the `appliedAI Institute for Europe `_, which is committed to making Tianshou the go-to resource for reinforcement learning research and development, diff --git a/docs/spelling_wordlist.txt b/docs/spelling_wordlist.txt index 8ab0c2876..d3cd95a73 100644 --- a/docs/spelling_wordlist.txt +++ b/docs/spelling_wordlist.txt @@ -253,3 +253,4 @@ Dominik Tsinghua Tianshou appliedAI +Panchenko diff --git a/tianshou/highlevel/agent.py b/tianshou/highlevel/agent.py index 1a1a0bf76..39d367e5f 100644 --- a/tianshou/highlevel/agent.py +++ b/tianshou/highlevel/agent.py @@ -184,7 +184,7 @@ def create_trainer( max_epoch=sampling_config.num_epochs, step_per_epoch=sampling_config.step_per_epoch, repeat_per_collect=sampling_config.repeat_per_collect, - episode_per_test=sampling_config.num_test_episodes_per_test_env, + episode_per_test=sampling_config.num_test_episodes, batch_size=sampling_config.batch_size, step_per_collect=sampling_config.step_per_collect, save_best_fn=policy_persistence.get_save_best_fn(world), @@ -228,7 +228,7 @@ def create_trainer( max_epoch=sampling_config.num_epochs, step_per_epoch=sampling_config.step_per_epoch, step_per_collect=sampling_config.step_per_collect, - episode_per_test=sampling_config.num_test_episodes_per_test_env, + episode_per_test=sampling_config.num_test_episodes, batch_size=sampling_config.batch_size, save_best_fn=policy_persistence.get_save_best_fn(world), logger=world.logger, diff --git a/tianshou/highlevel/config.py b/tianshou/highlevel/config.py index 498247214..48dde374d 100644 --- a/tianshou/highlevel/config.py +++ b/tianshou/highlevel/config.py @@ -1,4 +1,3 @@ -import math import multiprocessing from dataclasses import dataclass @@ -9,7 +8,6 @@ class SamplingConfig(ToStringMixin): """Configuration of sampling, epochs, parallelization, buffers, collectors, and batching.""" - # TODO: What are the most reasonable defaults? num_epochs: int = 100 """ the number of epochs to run training for. An epoch is the outermost iteration level and each @@ -55,8 +53,6 @@ class SamplingConfig(ToStringMixin): num_test_episodes: int = 1 """the total number of episodes to collect in each test step (across all test environments). - This should be a multiple of the number of test environments; if it is not, the effective - number of episodes collected will be the nearest multiple (rounded up). """ buffer_size: int = 4096 @@ -129,8 +125,3 @@ class SamplingConfig(ToStringMixin): def __post_init__(self) -> None: if self.num_train_envs == -1: self.num_train_envs = multiprocessing.cpu_count() - - @property - def num_test_episodes_per_test_env(self) -> int: - """:return: the number of episodes to collect per test environment in every test step""" - return math.ceil(self.num_test_episodes / self.num_test_envs)