diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 5d6f06123d0..65e265b1145 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -26,10 +26,11 @@ jobs: build-docs: strategy: matrix: - python_version: ["3.9"] - cuda_arch_version: ["12.4"] + python_version: [ "3.9" ] + cuda_arch_version: [ "12.4" ] uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main with: + runner: linux.g5.4xlarge.nvidia.gpu repository: pytorch/rl upload-artifact: docs timeout: 120 @@ -38,7 +39,6 @@ jobs: set -v # apt-get update && apt-get install -y -f git wget gcc g++ dialog apt-utils yum makecache - # yum install -y glfw glew mesa-libGL mesa-libGL-devel mesa-libOSMesa-devel egl-utils freeglut # Install Mesa and OpenGL Libraries: yum install -y glfw mesa-libGL mesa-libGL-devel egl-utils freeglut mesa-libGLU mesa-libEGL # Install DRI Drivers: @@ -112,7 +112,7 @@ jobs: cd ./docs # timeout 7m bash -ic "MUJOCO_GL=egl sphinx-build ./source _local_build" || code=$?; if [[ $code -ne 124 && $code -ne 0 ]]; then exit $code; fi # bash -ic "PYOPENGL_PLATFORM=egl MUJOCO_GL=egl sphinx-build ./source _local_build" || code=$?; if [[ $code -ne 124 && $code -ne 0 ]]; then exit $code; fi - PYOPENGL_PLATFORM=egl MUJOCO_GL=egl TORCHRL_CONSOLE_STREAM=stdout sphinx-build ./source _local_build -v + PYOPENGL_PLATFORM=egl MUJOCO_GL=egl TORCHRL_CONSOLE_STREAM=stdout sphinx-build ./source _local_build -v -j 4 cd .. cp -r docs/_local_build/* "${RUNNER_ARTIFACT_DIR}" @@ -123,8 +123,8 @@ jobs: upload: needs: build-docs - if: github.repository == 'pytorch/rl' && github.event_name == 'push' && - ((github.ref_type == 'branch' && github.ref_name == 'main') || github.ref_type == 'tag') + if: github.repository == 'pytorch/rl' && github.event_name == 'push' && + ((github.ref_type == 'branch' && github.ref_name == 'main') || github.ref_type == 'tag') permissions: contents: write uses: pytorch/test-infra/.github/workflows/linux_job.yml@main diff --git a/docs/requirements.txt b/docs/requirements.txt index e212cd942f4..966081ccc77 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -28,3 +28,4 @@ vmas onnxscript onnxruntime onnx +psutil diff --git a/docs/source/conf.py b/docs/source/conf.py index 96b8b193fc8..872700f2027 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -28,8 +28,7 @@ import pytorch_sphinx_theme import torchrl -# Suppress warnings - TODO -# suppress_warnings = [ 'misc.highlighting_failure' ] +# Suppress warnings warnings.filterwarnings("ignore", category=UserWarning) project = "torchrl" @@ -86,6 +85,21 @@ "torchvision": ("https://pytorch.org/vision/stable/", None), } + +def kill_procs(gallery_conf, fname): + import os + + import psutil + + # Get the current process + current_proc = psutil.Process(os.getpid()) + # Iterate over all child processes + for child in current_proc.children(recursive=True): + # Kill the child process + child.terminate() + print(f"Killed child process with PID {child.pid}") # noqa: T201 + + sphinx_gallery_conf = { "examples_dirs": "reference/generated/tutorials/", # path to your example scripts "gallery_dirs": "tutorials", # path to where to save gallery generated output @@ -95,9 +109,12 @@ "notebook_images": "reference/generated/tutorials/media/", # images to parse "download_all_examples": True, "abort_on_example_error": True, - "show_memory": True, + # "show_memory": True, + "plot_gallery": "False", "capture_repr": ("_repr_html_", "__repr__"), # capture representations "write_computation_times": True, + # "compress_images": ("images", "thumbnails"), + "reset_modules": (kill_procs, "matplotlib", "seaborn"), } napoleon_use_ivar = True diff --git a/docs/source/content_generation.py b/docs/source/content_generation.py index 29e1afff29d..e24dbd33a04 100644 --- a/docs/source/content_generation.py +++ b/docs/source/content_generation.py @@ -83,10 +83,6 @@ def generate_tutorial_references(tutorial_path: str, file_type: str) -> None: for f in os.listdir(tutorial_path) if f.endswith((".py", ".rst", ".png")) ] - # Make rb_tutorial.py the first one - file_paths = [p for p in file_paths if p.endswith("rb_tutorial.py")] + [ - p for p in file_paths if not p.endswith("rb_tutorial.py") - ] for file_path in file_paths: shutil.copyfile(file_path, os.path.join(target_path, Path(file_path).name)) diff --git a/tutorials/sphinx-tutorials/coding_ddpg.py b/tutorials/sphinx-tutorials/coding_ddpg.py index 734fed2d74f..c12777b443c 100644 --- a/tutorials/sphinx-tutorials/coding_ddpg.py +++ b/tutorials/sphinx-tutorials/coding_ddpg.py @@ -1234,3 +1234,18 @@ def ceil_div(x, y): # - Using `@dispatch` (see `[Feature] Distpatch IQL loss module `_.) # - Allowing flexible TensorDict keys. # + +# sphinx_gallery_start_ignore + +# Remove scratch dir +try: + import shutil + + # Use shutil.rmtree() to delete the directory and all its contents + shutil.rmtree(buffer_scratch_dir) + print(f"Directory '{buffer_scratch_dir}' deleted successfully.") +except FileNotFoundError: + print(f"Directory '{buffer_scratch_dir}' not found.") +except Exception as e: + print(f"Error deleting directory: {e}") +# sphinx_gallery_end_ignore diff --git a/tutorials/sphinx-tutorials/coding_dqn.py b/tutorials/sphinx-tutorials/coding_dqn.py index b0e244e4143..fc360114377 100644 --- a/tutorials/sphinx-tutorials/coding_dqn.py +++ b/tutorials/sphinx-tutorials/coding_dqn.py @@ -380,10 +380,13 @@ def make_model(dummy_env): # time must always have the same shape. +buffer_scratch_dir = tempfile.TemporaryDirectory().name + + def get_replay_buffer(buffer_size, n_optim, batch_size, device): replay_buffer = TensorDictReplayBuffer( batch_size=batch_size, - storage=LazyMemmapStorage(buffer_size), + storage=LazyMemmapStorage(buffer_size, scratch_dir=buffer_scratch_dir), prefetch=n_optim, transform=lambda td: td.to(device), ) @@ -777,3 +780,19 @@ def print_csv_files_in_folder(folder_path): # - A distributional loss (see :class:`~torchrl.objectives.DistributionalDQNLoss` # for more information). # - More fancy exploration techniques, such as :class:`~torchrl.modules.NoisyLinear` layers and such. + + +# sphinx_gallery_start_ignore + +# Remove scratch dir +try: + import shutil + + # Use shutil.rmtree() to delete the directory and all its contents + shutil.rmtree(buffer_scratch_dir) + print(f"Directory '{buffer_scratch_dir}' deleted successfully.") +except FileNotFoundError: + print(f"Directory '{buffer_scratch_dir}' not found.") +except Exception as e: + print(f"Error deleting directory: {e}") +# sphinx_gallery_end_ignore diff --git a/tutorials/sphinx-tutorials/dqn_with_rnn.py b/tutorials/sphinx-tutorials/dqn_with_rnn.py index 58c47f68321..e58fc933267 100644 --- a/tutorials/sphinx-tutorials/dqn_with_rnn.py +++ b/tutorials/sphinx-tutorials/dqn_with_rnn.py @@ -20,6 +20,8 @@ * tqdm """ +import tempfile + ######################################################################### # Overview # -------- @@ -393,9 +395,13 @@ # For the sake of efficiency, we're only running a few thousands iterations # here. In a real setting, the total number of frames should be set to 1M. # +buffer_scratch_dir = tempfile.TemporaryDirectory().name + collector = SyncDataCollector(env, stoch_policy, frames_per_batch=50, total_frames=200) rb = TensorDictReplayBuffer( - storage=LazyMemmapStorage(20_000), batch_size=4, prefetch=10 + storage=LazyMemmapStorage(20_000, scratch_dir=buffer_scratch_dir), + batch_size=4, + prefetch=10, ) ###################################################################### @@ -470,3 +476,19 @@ # --------------- # # - The TorchRL documentation can be found `here `_. + + +# sphinx_gallery_start_ignore + +# Remove scratch dir +try: + import shutil + + # Use shutil.rmtree() to delete the directory and all its contents + shutil.rmtree(buffer_scratch_dir) + print(f"Directory '{buffer_scratch_dir}' deleted successfully.") +except FileNotFoundError: + print(f"Directory '{buffer_scratch_dir}' not found.") +except Exception as e: + print(f"Error deleting directory: {e}") +# sphinx_gallery_end_ignore diff --git a/tutorials/sphinx-tutorials/getting-started-3.py b/tutorials/sphinx-tutorials/getting-started-3.py index 70ffe37a005..72b855348f5 100644 --- a/tutorials/sphinx-tutorials/getting-started-3.py +++ b/tutorials/sphinx-tutorials/getting-started-3.py @@ -16,6 +16,8 @@ """ +import tempfile + ################################# # # There is no learning without data. In supervised learning, users are @@ -56,12 +58,12 @@ import torch -torch.manual_seed(0) - from torchrl.collectors import SyncDataCollector from torchrl.envs import GymEnv from torchrl.envs.utils import RandomPolicy +torch.manual_seed(0) + env = GymEnv("CartPole-v1") env.set_seed(0) @@ -141,7 +143,11 @@ from torchrl.data.replay_buffers import LazyMemmapStorage, ReplayBuffer -buffer = ReplayBuffer(storage=LazyMemmapStorage(max_size=1000)) +buffer_scratch_dir = tempfile.TemporaryDirectory().name + +buffer = ReplayBuffer( + storage=LazyMemmapStorage(max_size=1000, scratch_dir=buffer_scratch_dir) +) ################################# # Populating the buffer can be done via the @@ -190,3 +196,18 @@ # batch-size in the constructor, then try to iterate over it. This is # equivalent to calling ``rb.sample()`` within a loop! # + +# sphinx_gallery_start_ignore + +# Remove scratch dir +try: + import shutil + + # Use shutil.rmtree() to delete the directory and all its contents + shutil.rmtree(buffer_scratch_dir) + print(f"Directory '{buffer_scratch_dir}' deleted successfully.") +except FileNotFoundError: + print(f"Directory '{buffer_scratch_dir}' not found.") +except Exception as e: + print(f"Error deleting directory: {e}") +# sphinx_gallery_end_ignore diff --git a/tutorials/sphinx-tutorials/multiagent_competitive_ddpg.py b/tutorials/sphinx-tutorials/multiagent_competitive_ddpg.py index a7bd74a4deb..479b59ede53 100644 --- a/tutorials/sphinx-tutorials/multiagent_competitive_ddpg.py +++ b/tutorials/sphinx-tutorials/multiagent_competitive_ddpg.py @@ -661,11 +661,16 @@ # This will result in faster sampling but is subject to the memory constraints of the selected device. # + replay_buffers = {} +scratch_dirs = [] for group, _agents in env.group_map.items(): + scratch_dir = tempfile.TemporaryDirectory().name + scratch_dirs.append(scratch_dir) replay_buffer = ReplayBuffer( storage=LazyMemmapStorage( - memory_size + memory_size, + scratch_dir=scratch_dir, ), # We will store up to memory_size multi-agent transitions sampler=RandomSampler(), batch_size=train_batch_size, # We will sample batches of this size @@ -948,3 +953,19 @@ def process_batch(batch: TensorDictBase) -> TensorDictBase: # # Scenarios available in `VMAS `__ # + +# sphinx_gallery_start_ignore + +# Remove scratch dir +try: + import shutil + + for scratch_dir in scratch_dirs: + # Use shutil.rmtree() to delete the directory and all its contents + shutil.rmtree(scratch_dir) + print(f"Directory '{scratch_dir}' deleted successfully.") +except FileNotFoundError: + print(f"Directory '{scratch_dir}' not found.") +except Exception as e: + print(f"Error deleting directory: {e}") +# sphinx_gallery_end_ignore diff --git a/tutorials/sphinx-tutorials/pretrained_models.py b/tutorials/sphinx-tutorials/pretrained_models.py index 4de341c7378..d8e9cc16ba9 100644 --- a/tutorials/sphinx-tutorials/pretrained_models.py +++ b/tutorials/sphinx-tutorials/pretrained_models.py @@ -3,6 +3,8 @@ ======================= This tutorial explains how to use pretrained models in TorchRL. """ +import tempfile + ############################################################################## # At the end of this tutorial, you will be capable of using pretrained models # for efficient image representation, and fine-tune them. @@ -66,7 +68,7 @@ r3m = R3MTransform( "resnet50", in_keys=["pixels"], - download=True, + download=False, # Turn to true for real-life testing ) env_transformed = TransformedEnv(base_env, r3m) net = nn.Sequential( @@ -114,7 +116,8 @@ # from torchrl.data import LazyMemmapStorage, ReplayBuffer -storage = LazyMemmapStorage(1000) +buffer_scratch_dir = tempfile.TemporaryDirectory().name +storage = LazyMemmapStorage(1000, scratch_dir=buffer_scratch_dir) rb = ReplayBuffer(storage=storage, transform=Compose(lambda td: td.to(device), r3m)) ############################################################################## @@ -140,7 +143,15 @@ print("data after sampling:", batch) # sphinx_gallery_start_ignore -import time - -time.sleep(10) +# Remove scratch dir +try: + import shutil + + # Use shutil.rmtree() to delete the directory and all its contents + shutil.rmtree(buffer_scratch_dir) + print(f"Directory '{buffer_scratch_dir}' deleted successfully.") +except FileNotFoundError: + print(f"Directory '{buffer_scratch_dir}' not found.") +except Exception as e: + print(f"Error deleting directory: {e}") # sphinx_gallery_end_ignore diff --git a/tutorials/sphinx-tutorials/rb_tutorial.py b/tutorials/sphinx-tutorials/rb_tutorial.py index 3bc5adc7ce4..ea8201d9dfa 100644 --- a/tutorials/sphinx-tutorials/rb_tutorial.py +++ b/tutorials/sphinx-tutorials/rb_tutorial.py @@ -56,11 +56,6 @@ # example: # -###################################################################### -# .. warning:: This tutorial build is temporarily disabled. - -exit(0) - # sphinx_gallery_start_ignore import gc