From 1714c7f2c7295e58c9b94f6dcdc270da23f3d74c Mon Sep 17 00:00:00 2001
From: Dominik Jain <d.jain@appliedai.de>
Date: Thu, 7 Mar 2024 17:57:11 +0100
Subject: [PATCH] High-level API: Fix number of test episodes being incorrectly
 scaled by number of envs (#1071)

---
 docs/04_contributing/05_contributors.rst | 2 +-
 docs/spelling_wordlist.txt               | 1 +
 tianshou/highlevel/agent.py              | 4 ++--
 tianshou/highlevel/config.py             | 9 ---------
 4 files changed, 4 insertions(+), 12 deletions(-)

diff --git a/docs/04_contributing/05_contributors.rst b/docs/04_contributing/05_contributors.rst
index d48a7148f..715c24ab3 100644
--- a/docs/04_contributing/05_contributors.rst
+++ b/docs/04_contributing/05_contributors.rst
@@ -2,7 +2,7 @@ Contributors
 ============
 
 We always welcome contributions to help make Tianshou better!
-Tiashou was originally created by the `THU-ML Group <https://ml.cs.tsinghua.edu.cn>`_ at Tsinghua University.
+Tianshou was originally created by the `THU-ML Group <https://ml.cs.tsinghua.edu.cn>`_ at Tsinghua University.
 
 Today, it is backed by the `appliedAI Institute for Europe <https://www.appliedai-institute.de/en/>`_,
 which is committed to making Tianshou the go-to resource for reinforcement learning research and development,
diff --git a/docs/spelling_wordlist.txt b/docs/spelling_wordlist.txt
index 8ab0c2876..d3cd95a73 100644
--- a/docs/spelling_wordlist.txt
+++ b/docs/spelling_wordlist.txt
@@ -253,3 +253,4 @@ Dominik
 Tsinghua
 Tianshou
 appliedAI
+Panchenko
diff --git a/tianshou/highlevel/agent.py b/tianshou/highlevel/agent.py
index 1a1a0bf76..39d367e5f 100644
--- a/tianshou/highlevel/agent.py
+++ b/tianshou/highlevel/agent.py
@@ -184,7 +184,7 @@ def create_trainer(
             max_epoch=sampling_config.num_epochs,
             step_per_epoch=sampling_config.step_per_epoch,
             repeat_per_collect=sampling_config.repeat_per_collect,
-            episode_per_test=sampling_config.num_test_episodes_per_test_env,
+            episode_per_test=sampling_config.num_test_episodes,
             batch_size=sampling_config.batch_size,
             step_per_collect=sampling_config.step_per_collect,
             save_best_fn=policy_persistence.get_save_best_fn(world),
@@ -228,7 +228,7 @@ def create_trainer(
             max_epoch=sampling_config.num_epochs,
             step_per_epoch=sampling_config.step_per_epoch,
             step_per_collect=sampling_config.step_per_collect,
-            episode_per_test=sampling_config.num_test_episodes_per_test_env,
+            episode_per_test=sampling_config.num_test_episodes,
             batch_size=sampling_config.batch_size,
             save_best_fn=policy_persistence.get_save_best_fn(world),
             logger=world.logger,
diff --git a/tianshou/highlevel/config.py b/tianshou/highlevel/config.py
index 498247214..48dde374d 100644
--- a/tianshou/highlevel/config.py
+++ b/tianshou/highlevel/config.py
@@ -1,4 +1,3 @@
-import math
 import multiprocessing
 from dataclasses import dataclass
 
@@ -9,7 +8,6 @@
 class SamplingConfig(ToStringMixin):
     """Configuration of sampling, epochs, parallelization, buffers, collectors, and batching."""
 
-    # TODO: What are the most reasonable defaults?
     num_epochs: int = 100
     """
     the number of epochs to run training for. An epoch is the outermost iteration level and each
@@ -55,8 +53,6 @@ class SamplingConfig(ToStringMixin):
 
     num_test_episodes: int = 1
     """the total number of episodes to collect in each test step (across all test environments).
-    This should be a multiple of the number of test environments; if it is not, the effective
-    number of episodes collected will be the nearest multiple (rounded up).
     """
 
     buffer_size: int = 4096
@@ -129,8 +125,3 @@ class SamplingConfig(ToStringMixin):
     def __post_init__(self) -> None:
         if self.num_train_envs == -1:
             self.num_train_envs = multiprocessing.cpu_count()
-
-    @property
-    def num_test_episodes_per_test_env(self) -> int:
-        """:return: the number of episodes to collect per test environment in every test step"""
-        return math.ceil(self.num_test_episodes / self.num_test_envs)