[Doc] Fix (and deactivate) tutorials

ghstack-source-id: 56c7757c36a2d609688ce0777a49d54763d3e691 Pull Request resolved: #2785
pytorch · Feb 12, 2025 · f1c42e0 · f1c42e0 · github-actions · Feb 12, 2025
1 parent f5445a4
commit f1c42e0
Show file tree

Hide file tree

Showing 11 changed files with 147 additions and 29 deletions.
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
@@ -26,10 +26,11 @@ jobs:
   build-docs:
     strategy:
       matrix:
-        python_version: ["3.9"]
-        cuda_arch_version: ["12.4"]
+        python_version: [ "3.9" ]
+        cuda_arch_version: [ "12.4" ]
     uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
     with:
+      runner: linux.g5.4xlarge.nvidia.gpu
       repository: pytorch/rl
       upload-artifact: docs
       timeout: 120
@@ -38,7 +39,6 @@ jobs:
         set -v
         # apt-get update && apt-get install -y -f git wget gcc g++ dialog apt-utils
         yum makecache
-        # yum install -y glfw glew mesa-libGL mesa-libGL-devel mesa-libOSMesa-devel egl-utils freeglut
         # Install Mesa and OpenGL Libraries:
         yum install -y glfw mesa-libGL mesa-libGL-devel egl-utils freeglut mesa-libGLU mesa-libEGL
         # Install DRI Drivers:
@@ -112,7 +112,7 @@ jobs:
         cd ./docs
         # timeout 7m bash -ic "MUJOCO_GL=egl sphinx-build ./source _local_build" || code=$?; if [[ $code -ne 124 && $code -ne 0 ]]; then exit $code; fi
         # bash -ic "PYOPENGL_PLATFORM=egl MUJOCO_GL=egl sphinx-build ./source _local_build" || code=$?; if [[ $code -ne 124 && $code -ne 0 ]]; then exit $code; fi
-        PYOPENGL_PLATFORM=egl MUJOCO_GL=egl TORCHRL_CONSOLE_STREAM=stdout sphinx-build ./source _local_build -v
+        PYOPENGL_PLATFORM=egl MUJOCO_GL=egl TORCHRL_CONSOLE_STREAM=stdout sphinx-build ./source _local_build -v -j 4
         cd ..
 
         cp -r docs/_local_build/* "${RUNNER_ARTIFACT_DIR}"
@@ -123,8 +123,8 @@ jobs:
 
   upload:
     needs: build-docs
-    if: github.repository == 'pytorch/rl' && github.event_name == 'push' && 
-        ((github.ref_type == 'branch' && github.ref_name == 'main') || github.ref_type == 'tag')
+    if: github.repository == 'pytorch/rl' && github.event_name == 'push' &&
+      ((github.ref_type == 'branch' && github.ref_name == 'main') || github.ref_type == 'tag')
     permissions:
       contents: write
     uses: pytorch/test-infra/.github/workflows/linux_job.yml@main

diff --git a/docs/requirements.txt b/docs/requirements.txt
@@ -28,3 +28,4 @@ vmas
 onnxscript
 onnxruntime
 onnx
+psutil
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -28,8 +28,7 @@
 import pytorch_sphinx_theme
 import torchrl
 
-# Suppress warnings - TODO
-# suppress_warnings = [ 'misc.highlighting_failure' ]
+# Suppress warnings
 warnings.filterwarnings("ignore", category=UserWarning)
 
 project = "torchrl"
@@ -86,6 +85,21 @@
     "torchvision": ("https://pytorch.org/vision/stable/", None),
 }
 
+
+def kill_procs(gallery_conf, fname):
+    import os
+
+    import psutil
+
+    # Get the current process
+    current_proc = psutil.Process(os.getpid())
+    # Iterate over all child processes
+    for child in current_proc.children(recursive=True):
+        # Kill the child process
+        child.terminate()
+        print(f"Killed child process with PID {child.pid}")  # noqa: T201
+
+
 sphinx_gallery_conf = {
     "examples_dirs": "reference/generated/tutorials/",  # path to your example scripts
     "gallery_dirs": "tutorials",  # path to where to save gallery generated output
@@ -95,9 +109,12 @@
     "notebook_images": "reference/generated/tutorials/media/",  # images to parse
     "download_all_examples": True,
     "abort_on_example_error": True,
-    "show_memory": True,
+    # "show_memory": True,
+    "plot_gallery": "False",
     "capture_repr": ("_repr_html_", "__repr__"),  # capture representations
     "write_computation_times": True,
+    # "compress_images": ("images", "thumbnails"),
+    "reset_modules": (kill_procs, "matplotlib", "seaborn"),
 }
 
 napoleon_use_ivar = True

diff --git a/docs/source/content_generation.py b/docs/source/content_generation.py
@@ -83,10 +83,6 @@ def generate_tutorial_references(tutorial_path: str, file_type: str) -> None:
         for f in os.listdir(tutorial_path)
         if f.endswith((".py", ".rst", ".png"))
     ]
-    # Make rb_tutorial.py the first one
-    file_paths = [p for p in file_paths if p.endswith("rb_tutorial.py")] + [
-        p for p in file_paths if not p.endswith("rb_tutorial.py")
-    ]
 
     for file_path in file_paths:
         shutil.copyfile(file_path, os.path.join(target_path, Path(file_path).name))
diff --git a/tutorials/sphinx-tutorials/coding_ddpg.py b/tutorials/sphinx-tutorials/coding_ddpg.py
@@ -1234,3 +1234,18 @@ def ceil_div(x, y):
 # - Using `@dispatch` (see `[Feature] Distpatch IQL loss module <https://github.com/pytorch/rl/pull/1230>`_.)
 # - Allowing flexible TensorDict keys.
 #
+
+# sphinx_gallery_start_ignore
+
+# Remove scratch dir
+try:
+    import shutil
+
+    # Use shutil.rmtree() to delete the directory and all its contents
+    shutil.rmtree(buffer_scratch_dir)
+    print(f"Directory '{buffer_scratch_dir}' deleted successfully.")
+except FileNotFoundError:
+    print(f"Directory '{buffer_scratch_dir}' not found.")
+except Exception as e:
+    print(f"Error deleting directory: {e}")
+# sphinx_gallery_end_ignore
diff --git a/tutorials/sphinx-tutorials/coding_dqn.py b/tutorials/sphinx-tutorials/coding_dqn.py
@@ -380,10 +380,13 @@ def make_model(dummy_env):
 # time must always have the same shape.
 
 
+buffer_scratch_dir = tempfile.TemporaryDirectory().name
+
+
 def get_replay_buffer(buffer_size, n_optim, batch_size, device):
     replay_buffer = TensorDictReplayBuffer(
         batch_size=batch_size,
-        storage=LazyMemmapStorage(buffer_size),
+        storage=LazyMemmapStorage(buffer_size, scratch_dir=buffer_scratch_dir),
         prefetch=n_optim,
         transform=lambda td: td.to(device),
     )
@@ -777,3 +780,19 @@ def print_csv_files_in_folder(folder_path):
 # - A distributional loss (see :class:`~torchrl.objectives.DistributionalDQNLoss`
 #   for more information).
 # - More fancy exploration techniques, such as :class:`~torchrl.modules.NoisyLinear` layers and such.
+
+
+# sphinx_gallery_start_ignore
+
+# Remove scratch dir
+try:
+    import shutil
+
+    # Use shutil.rmtree() to delete the directory and all its contents
+    shutil.rmtree(buffer_scratch_dir)
+    print(f"Directory '{buffer_scratch_dir}' deleted successfully.")
+except FileNotFoundError:
+    print(f"Directory '{buffer_scratch_dir}' not found.")
+except Exception as e:
+    print(f"Error deleting directory: {e}")
+# sphinx_gallery_end_ignore
diff --git a/tutorials/sphinx-tutorials/dqn_with_rnn.py b/tutorials/sphinx-tutorials/dqn_with_rnn.py
@@ -20,6 +20,8 @@
       * tqdm
 """
 
+import tempfile
+
 #########################################################################
 # Overview
 # --------
@@ -393,9 +395,13 @@
 #   For the sake of efficiency, we're only running a few thousands iterations
 #   here. In a real setting, the total number of frames should be set to 1M.
 #
+buffer_scratch_dir = tempfile.TemporaryDirectory().name
+
 collector = SyncDataCollector(env, stoch_policy, frames_per_batch=50, total_frames=200)
 rb = TensorDictReplayBuffer(
-    storage=LazyMemmapStorage(20_000), batch_size=4, prefetch=10
+    storage=LazyMemmapStorage(20_000, scratch_dir=buffer_scratch_dir),
+    batch_size=4,
+    prefetch=10,
 )
 
 ######################################################################
@@ -470,3 +476,19 @@
 # ---------------
 #
 # - The TorchRL documentation can be found `here <https://pytorch.org/rl/>`_.
+
+
+# sphinx_gallery_start_ignore
+
+# Remove scratch dir
+try:
+    import shutil
+
+    # Use shutil.rmtree() to delete the directory and all its contents
+    shutil.rmtree(buffer_scratch_dir)
+    print(f"Directory '{buffer_scratch_dir}' deleted successfully.")
+except FileNotFoundError:
+    print(f"Directory '{buffer_scratch_dir}' not found.")
+except Exception as e:
+    print(f"Error deleting directory: {e}")
+# sphinx_gallery_end_ignore
diff --git a/tutorials/sphinx-tutorials/getting-started-3.py b/tutorials/sphinx-tutorials/getting-started-3.py
@@ -16,6 +16,8 @@
 
 """
 
+import tempfile
+
 #################################
 #
 # There is no learning without data. In supervised learning, users are
@@ -56,12 +58,12 @@
 
 import torch
 
-torch.manual_seed(0)
-
 from torchrl.collectors import SyncDataCollector
 from torchrl.envs import GymEnv
 from torchrl.envs.utils import RandomPolicy
 
+torch.manual_seed(0)
+
 env = GymEnv("CartPole-v1")
 env.set_seed(0)
 
@@ -141,7 +143,11 @@
 
 from torchrl.data.replay_buffers import LazyMemmapStorage, ReplayBuffer
 
-buffer = ReplayBuffer(storage=LazyMemmapStorage(max_size=1000))
+buffer_scratch_dir = tempfile.TemporaryDirectory().name
+
+buffer = ReplayBuffer(
+    storage=LazyMemmapStorage(max_size=1000, scratch_dir=buffer_scratch_dir)
+)
 
 #################################
 # Populating the buffer can be done via the
@@ -190,3 +196,18 @@
 #   batch-size in the constructor, then try to iterate over it. This is
 #   equivalent to calling ``rb.sample()`` within a loop!
 #
+
+# sphinx_gallery_start_ignore
+
+# Remove scratch dir
+try:
+    import shutil
+
+    # Use shutil.rmtree() to delete the directory and all its contents
+    shutil.rmtree(buffer_scratch_dir)
+    print(f"Directory '{buffer_scratch_dir}' deleted successfully.")
+except FileNotFoundError:
+    print(f"Directory '{buffer_scratch_dir}' not found.")
+except Exception as e:
+    print(f"Error deleting directory: {e}")
+# sphinx_gallery_end_ignore
diff --git a/tutorials/sphinx-tutorials/multiagent_competitive_ddpg.py b/tutorials/sphinx-tutorials/multiagent_competitive_ddpg.py
@@ -661,11 +661,16 @@
 # This will result in faster sampling but is subject to the memory constraints of the selected device.
 #
 
+
 replay_buffers = {}
+scratch_dirs = []
 for group, _agents in env.group_map.items():
+    scratch_dir = tempfile.TemporaryDirectory().name
+    scratch_dirs.append(scratch_dir)
     replay_buffer = ReplayBuffer(
         storage=LazyMemmapStorage(
-            memory_size
+            memory_size,
+            scratch_dir=scratch_dir,
         ),  # We will store up to memory_size multi-agent transitions
         sampler=RandomSampler(),
         batch_size=train_batch_size,  # We will sample batches of this size
@@ -948,3 +953,19 @@ def process_batch(batch: TensorDictBase) -> TensorDictBase:
 #
 #    Scenarios available in `VMAS <https://github.com/proroklab/VectorizedMultiAgentSimulator>`__
 #
+
+# sphinx_gallery_start_ignore
+
+# Remove scratch dir
+try:
+    import shutil
+
+    for scratch_dir in scratch_dirs:
+        # Use shutil.rmtree() to delete the directory and all its contents
+        shutil.rmtree(scratch_dir)
+        print(f"Directory '{scratch_dir}' deleted successfully.")
+except FileNotFoundError:
+    print(f"Directory '{scratch_dir}' not found.")
+except Exception as e:
+    print(f"Error deleting directory: {e}")
+# sphinx_gallery_end_ignore
diff --git a/tutorials/sphinx-tutorials/pretrained_models.py b/tutorials/sphinx-tutorials/pretrained_models.py
@@ -3,6 +3,8 @@
 =======================
 This tutorial explains how to use pretrained models in TorchRL.
 """
+import tempfile
+
 ##############################################################################
 # At the end of this tutorial, you will be capable of using pretrained models
 # for efficient image representation, and fine-tune them.
@@ -66,7 +68,7 @@
 r3m = R3MTransform(
     "resnet50",
     in_keys=["pixels"],
-    download=True,
+    download=False,  # Turn to true for real-life testing
 )
 env_transformed = TransformedEnv(base_env, r3m)
 net = nn.Sequential(
@@ -114,7 +116,8 @@
 #
 from torchrl.data import LazyMemmapStorage, ReplayBuffer
 
-storage = LazyMemmapStorage(1000)
+buffer_scratch_dir = tempfile.TemporaryDirectory().name
+storage = LazyMemmapStorage(1000, scratch_dir=buffer_scratch_dir)
 rb = ReplayBuffer(storage=storage, transform=Compose(lambda td: td.to(device), r3m))
 
 ##############################################################################
@@ -140,7 +143,15 @@
 print("data after sampling:", batch)
 
 # sphinx_gallery_start_ignore
-import time
-
-time.sleep(10)
+# Remove scratch dir
+try:
+    import shutil
+
+    # Use shutil.rmtree() to delete the directory and all its contents
+    shutil.rmtree(buffer_scratch_dir)
+    print(f"Directory '{buffer_scratch_dir}' deleted successfully.")
+except FileNotFoundError:
+    print(f"Directory '{buffer_scratch_dir}' not found.")
+except Exception as e:
+    print(f"Error deleting directory: {e}")
 # sphinx_gallery_end_ignore
diff --git a/tutorials/sphinx-tutorials/rb_tutorial.py b/tutorials/sphinx-tutorials/rb_tutorial.py
@@ -56,11 +56,6 @@
 # example:
 #
 
-######################################################################
-# .. warning:: This tutorial build is temporarily disabled.
-
-exit(0)
-
 # sphinx_gallery_start_ignore
 import gc
-Original file line number
+Diff line change
@@ Expand Up / @@ -28,3 +28,4 @@ vmas @@
     onnxscript
     onnxruntime
     onnx
+    psutil