From 5b9cb440f7f507948b0431077c36706a99100b78 Mon Sep 17 00:00:00 2001 From: Vincent Moens Date: Thu, 2 May 2024 14:17:17 +0100 Subject: [PATCH] [Doc] Fix links in doc (#2151) --- docs/source/reference/data.rst | 4 ++++ docs/source/reference/envs.rst | 4 ++-- docs/source/reference/trainers.rst | 2 ++ tutorials/sphinx-tutorials/coding_dqn.py | 12 ++++++------ tutorials/sphinx-tutorials/coding_ppo.py | 14 +++++++------- tutorials/sphinx-tutorials/multiagent_ppo.py | 4 ++-- tutorials/sphinx-tutorials/pendulum.py | 8 ++++---- 7 files changed, 27 insertions(+), 21 deletions(-) diff --git a/docs/source/reference/data.rst b/docs/source/reference/data.rst index efb1a755d0e..3c249bcfbaa 100644 --- a/docs/source/reference/data.rst +++ b/docs/source/reference/data.rst @@ -24,6 +24,8 @@ widely used replay buffers: Composable Replay Buffers ------------------------- +.. _ref_buffers: + We also give users the ability to compose a replay buffer. We provide a wide panel of solutions for replay buffer usage, including support for almost any data type; storage in memory, on device or on physical memory; @@ -796,6 +798,8 @@ such that they can be stacked together during sampling. TensorSpec ---------- +.. _ref_specs: + The `TensorSpec` parent class and subclasses define the basic properties of observations and actions in TorchRL, such as shape, device, dtype and domain. It is important that your environment specs match the input and output that it sends and receives, as diff --git a/docs/source/reference/envs.rst b/docs/source/reference/envs.rst index 5d4b6d0b7b5..3c5e69f221b 100644 --- a/docs/source/reference/envs.rst +++ b/docs/source/reference/envs.rst @@ -133,7 +133,7 @@ function. transform. -Our environment `tutorial `_ +Our environment :ref:`tutorial ` provides more information on how to design a custom environment from scratch. .. autosummary:: @@ -559,7 +559,7 @@ Transforms In most cases, the raw output of an environment must be treated before being passed to another object (such as a policy or a value operator). To do this, TorchRL provides a set of transforms that aim at reproducing the transform logic of `torch.distributions.Transform` and `torchvision.transforms`. -Our environment `tutorial `_ +Our environment :ref:`tutorial ` provides more information on how to design a custom transform. Transformed environments are build using the :class:`TransformedEnv` primitive. diff --git a/docs/source/reference/trainers.rst b/docs/source/reference/trainers.rst index e253ad7067e..2f0982257eb 100644 --- a/docs/source/reference/trainers.rst +++ b/docs/source/reference/trainers.rst @@ -3,6 +3,8 @@ torchrl.trainers package ======================== +.. _ref_trainers: + The trainer package provides utilities to write re-usable training scripts. The core idea is to use a trainer that implements a nested loop, where the outer loop runs the data collection steps and the inner loop the optimization steps. We believe this fits multiple RL training schemes, such as diff --git a/tutorials/sphinx-tutorials/coding_dqn.py b/tutorials/sphinx-tutorials/coding_dqn.py index 36434435e81..3b9d712736a 100644 --- a/tutorials/sphinx-tutorials/coding_dqn.py +++ b/tutorials/sphinx-tutorials/coding_dqn.py @@ -35,7 +35,7 @@ # - how to build an environment in TorchRL, including transforms (e.g. data # normalization, frame concatenation, resizing and turning to grayscale) # and parallel execution. Unlike what we did in the -# `DDPG tutorial `_, we +# :ref:`DDPG tutorial `, we # will normalize the pixels and not the state vector. # - how to design a :class:`~torchrl.modules.QValueActor` object, i.e. an actor # that estimates the action values and picks up the action with the highest @@ -46,7 +46,7 @@ # - and finally how to evaluate your model. # # **Prerequisites**: We encourage you to get familiar with torchrl through the -# `PPO tutorial `_ first. +# :ref:`PPO tutorial ` first. # # DQN # --- @@ -393,8 +393,8 @@ def get_replay_buffer(buffer_size, n_optim, batch_size): # Data collector # ~~~~~~~~~~~~~~ # -# As in `PPO `_ and -# `DDPG `_, we will be using +# As in :ref:`PPO ` and +# :ref:`DDPG `, we will be using # a data collector as a dataloader in the outer loop. # # We choose the following configuration: we will be running a series of @@ -691,7 +691,7 @@ def get_loss_module(actor, gamma): # In this case, a location must be explicitly passed (). This method gives # more control over the location of the hook but it also requires more # understanding of the Trainer mechanism. -# Check the `trainer documentation `_ +# Check the :ref:`trainer documentation ` # for a detailed description of the trainer hooks. # trainer.register_op("post_optim", target_net_updater.step) @@ -768,7 +768,7 @@ def print_csv_files_in_folder(folder_path): # - A prioritized replay buffer could also be used. This will give a # higher priority to samples that have the worst value accuracy. # Learn more on the -# `replay buffer section `_ +# :ref:`replay buffer section ` # of the documentation. # - A distributional loss (see :class:`~torchrl.objectives.DistributionalDQNLoss` # for more information). diff --git a/tutorials/sphinx-tutorials/coding_ppo.py b/tutorials/sphinx-tutorials/coding_ppo.py index 394af8e741b..eff6f31657b 100644 --- a/tutorials/sphinx-tutorials/coding_ppo.py +++ b/tutorials/sphinx-tutorials/coding_ppo.py @@ -26,12 +26,12 @@ We will cover six crucial components of TorchRL: -* `environments `__ -* `transforms `__ -* `models (policy and value function) `__ -* `loss modules `__ -* `data collectors `__ -* `replay buffers `__ +* :ref:`environments ` +* :ref:`transforms ` +* :ref:`models ` +* :ref:`loss modules ` +* :ref:`data collectors ` +* :ref:`replay buffers ` """ @@ -478,7 +478,7 @@ # Data collector # -------------- # -# TorchRL provides a set of `DataCollector classes `__. +# TorchRL provides a set of :ref:`DataCollector classes `. # Briefly, these classes execute three operations: reset an environment, # compute an action given the latest observation, execute a step in the environment, # and repeat the last two steps until the environment signals a stop (or reaches diff --git a/tutorials/sphinx-tutorials/multiagent_ppo.py b/tutorials/sphinx-tutorials/multiagent_ppo.py index 39f19242648..f19fb270908 100644 --- a/tutorials/sphinx-tutorials/multiagent_ppo.py +++ b/tutorials/sphinx-tutorials/multiagent_ppo.py @@ -183,7 +183,7 @@ # TorchRL API allows integrating various types of multi-agent environment flavours. # Some examples include environments with shared or individual agent rewards, done flags, and observations. # For more information on how the multi-agent environments API works in TorchRL, you can check out the dedicated -# `doc section `_. +# :ref:`doc section `. # # The VMAS simulator, in particular, models agents with individual rewards, info, observations, and actions, but # with a collective done flag. @@ -784,7 +784,7 @@ # # If you are interested in creating or wrapping your own multi-agent environments in TorchRL, # you can check out the dedicated -# `doc section `_. +# :ref:`doc section `. # # Finally, you can modify the parameters of this tutorial to try many other configurations and scenarios # to become a MARL master. diff --git a/tutorials/sphinx-tutorials/pendulum.py b/tutorials/sphinx-tutorials/pendulum.py index 5605af66a0c..4eda4ea8e91 100644 --- a/tutorials/sphinx-tutorials/pendulum.py +++ b/tutorials/sphinx-tutorials/pendulum.py @@ -33,9 +33,9 @@ In the process, we will touch three crucial components of TorchRL: -* `environments `__ -* `transforms `__ -* `models (policy and value function) `__ +* :ref:`environments ` +* :ref:`transforms ` +* :ref:`models ` """ @@ -389,7 +389,7 @@ def _reset(self, tensordict): # convenient shortcuts to the content of the output and input spec containers. # # TorchRL offers multiple :class:`~torchrl.data.TensorSpec` -# `subclasses `_ to +# :ref:`subclasses ` to # encode the environment's input and output characteristics. # # Specs shape