From 8ac5be73e053eaba47d0680ce19dfa9089dbdd94 Mon Sep 17 00:00:00 2001 From: Biagioni Date: Mon, 15 Nov 2021 14:29:41 -0700 Subject: [PATCH] update readme for running rllib, and set default max episodes steps to 250 to avoid wonky training behavior --- examples/marl/rllib/README.md | 36 +++++++++++++++++++++++ examples/marl/rllib/heterogeneous/args.py | 2 +- paper/fig3/README.md | 2 +- paper/fig3/args.py | 2 +- 4 files changed, 39 insertions(+), 3 deletions(-) diff --git a/examples/marl/rllib/README.md b/examples/marl/rllib/README.md index 93f53c4..5dac136 100644 --- a/examples/marl/rllib/README.md +++ b/examples/marl/rllib/README.md @@ -6,3 +6,39 @@ pip install -r requirements.txt to intall the required dependencies in your environment. +__Note:__ RLLib is a sophisticated software package, and there are many +hyperparameters involved in running any given algorithm +(see, e.g., [here](https://docs.ray.io/en/latest/rllib-algorithms.html#ppo)). +Experiments for our paper were run using compute nodes on NREL's Eagle +supercomputer, with 34 CPU rollout workers and 1 GPU learner. +(Multi-node jobs are also possible but are architecture-dependent, +we have Slurm-based scripts that we can share on request). Most +users running on a local machine won't have access to these types of resources +which may affect how you want to run the training. Some considerations: + +1. The `train_batch_size` parameter denotes the total number of environment steps +used for each policy update. If this number is large (e.g., 10k) but the `num_workers` +is small (e.g., 4), it may take a very long time for the workers to collect each batch. +In our example, this would result in each worker needing to collect 2500 steps or +10 complete episodes. Consider using a smaller `train_batch_size` in this case. + +2. The `rollout_fragment_length` hyperparameter is RLLib's way of letting you decide +how to break up episodes before sending trajectories to the learner. We set this +value equal to the episode length so that there weren't "boundaries" in the training data, +but this is not strictly necessary. A related parameter is the `batch_mode` which +determines whether the agent will allow episodes to be cut short (`truncate_episodes`) +or will require that the episode finish before performing policy updates +(`complete_episodes`). Using the defaults we provide +(`rollout_fragment_length = env.max_episode_steps` and +`batch_mode = complete_episodes`) should work for local training, so long as the +`train_batch_size` is sufficiently small (see previous bullet). + +3. Finally, training batch size -- the amount of data the agent uses for policy +updates -- turns out to be a significant parameter in how/if the algorithm +converges. You may have to tune other parameters such as the learning rate (`lr`) +which are known to be tightly correlated with batch size in terms of learning +performance. See, e.g., https://arxiv.org/abs/1812.06162 for more discussion +and references. You can use [Ray's Tune library](https://docs.ray.io/en/latest/tune/index.html) +to help with hyperparameter tuning. + +Happy learning! diff --git a/examples/marl/rllib/heterogeneous/args.py b/examples/marl/rllib/heterogeneous/args.py index 8ccdde7..150b415 100644 --- a/examples/marl/rllib/heterogeneous/args.py +++ b/examples/marl/rllib/heterogeneous/args.py @@ -4,7 +4,7 @@ parser.add_argument("--env-name", default="buildings", type=str) parser.add_argument("--system-load-rescale-factor", default=0.6, type=float) -parser.add_argument("--max-episode-steps", default=None, type=int) +parser.add_argument("--max-episode-steps", default=250, type=int) parser.add_argument("--local-dir", default="~/ray_result", type=str) parser.add_argument("--stop-timesteps", default=int(1e10), type=int) parser.add_argument("--stop-iters", default=int(1e10), type=int) diff --git a/paper/fig3/README.md b/paper/fig3/README.md index 942e16b..b4f992f 100644 --- a/paper/fig3/README.md +++ b/paper/fig3/README.md @@ -15,4 +15,4 @@ python -u train.py \ ``` See the `examples/marl/rllib/README.md` for instructions on how to install -`rllib` in your environment. +`rllib` in your environment, and other tips for running RLLib training. diff --git a/paper/fig3/args.py b/paper/fig3/args.py index 852bf29..2dcb82b 100644 --- a/paper/fig3/args.py +++ b/paper/fig3/args.py @@ -4,7 +4,7 @@ parser.add_argument("--env-name", default="power-gridworld", type=str) parser.add_argument("--system-load-rescale-factor", default=0.6, type=float) -parser.add_argument("--max-episode-steps", default=None, type=int) +parser.add_argument("--max-episode-steps", default=250, type=int) parser.add_argument("--local-dir", default="~/ray_result", type=str) parser.add_argument("--stop-timesteps", default=int(1e10), type=int) parser.add_argument("--stop-iters", default=int(1e10), type=int)