Skip to content

Commit

Permalink
Apply isort and black reformatting
Browse files Browse the repository at this point in the history
Signed-off-by: erhoo82 <[email protected]>
  • Loading branch information
erhoo82 committed Feb 5, 2025
1 parent a14004c commit 46dd5bf
Show file tree
Hide file tree
Showing 7 changed files with 28 additions and 28 deletions.
12 changes: 6 additions & 6 deletions examples/llm/pretrain/default_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@

def local_executor_torchrun(devices: int = 2) -> run.LocalExecutor:
env_vars = {
"TRANSFORMERS_OFFLINE": "1", # Enable online downloads from HuggingFace
"TORCH_NCCL_AVOID_RECORD_STREAMS": "1", # Disable caching NCCL communication buffer memory
"NCCL_NVLS_ENABLE": "0", # Disable NVLink SHARP to save memory
"TRANSFORMERS_OFFLINE": "1", # Enable online downloads from HuggingFace
"TORCH_NCCL_AVOID_RECORD_STREAMS": "1", # Disable caching NCCL communication buffer memory
"NCCL_NVLS_ENABLE": "0", # Disable NVLink SHARP to save memory
}

executor = run.LocalExecutor(ntasks_per_node=devices, launcher="torchrun", env_vars=env_vars)
Expand Down Expand Up @@ -55,9 +55,9 @@ def slurm_executor(
mounts.extend(custom_mounts)

env_vars = {
"TRANSFORMERS_OFFLINE": "1", # Enable online downloads from HuggingFace
"TORCH_NCCL_AVOID_RECORD_STREAMS": "1", # Disable caching NCCL communication buffer memory
"NCCL_NVLS_ENABLE": "0", # Disable NVLink SHARP to save memory
"TRANSFORMERS_OFFLINE": "1", # Enable online downloads from HuggingFace
"TORCH_NCCL_AVOID_RECORD_STREAMS": "1", # Disable caching NCCL communication buffer memory
"NCCL_NVLS_ENABLE": "0", # Disable NVLink SHARP to save memory
}
if custom_env_vars:
env_vars |= custom_env_vars
Expand Down
10 changes: 5 additions & 5 deletions scripts/llm/performance/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,12 +57,12 @@ def slurm_executor(
sys.exit(1)

env_vars = {
"TRANSFORMERS_OFFLINE": "1", # Enable online downloads from HuggingFace
"TRANSFORMERS_OFFLINE": "1", # Enable online downloads from HuggingFace
"TOKENIZERS_PARALLELISM": "False", # Restrict warning message prints
"NCCL_NVLS_ENABLE": "0", # Disable NVLink SHARP to save memory
"NVTE_FLASH_ATTN": "1", # Enable Flash Attention, which is needed to enable cuDNN fused attention
"NVTE_FUSED_ATTN": "1", # Enable cuDNN fused attention
"NEMO_LOG_MEMORY_USAGE": "1", # Print memory allocation
"NCCL_NVLS_ENABLE": "0", # Disable NVLink SHARP to save memory
"NVTE_FLASH_ATTN": "1", # Enable Flash Attention, which is needed to enable cuDNN fused attention
"NVTE_FUSED_ATTN": "1", # Enable cuDNN fused attention
"NEMO_LOG_MEMORY_USAGE": "1", # Print memory allocation
"NEMORUN_HOME": log_dir,
}
mounts = []
Expand Down
14 changes: 7 additions & 7 deletions scripts/llm/pretraining.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,9 @@ def slurm_executor(
mounts.extend(custom_mounts)

env_vars = {
"TRANSFORMERS_OFFLINE": "1", # Enable online downloads from HuggingFace
"TORCH_NCCL_AVOID_RECORD_STREAMS": "1", # Disable caching NCCL communication buffer memory
"NCCL_NVLS_ENABLE": "0", # Disable NVLink SHARP to save memory
"TRANSFORMERS_OFFLINE": "1", # Enable online downloads from HuggingFace
"TORCH_NCCL_AVOID_RECORD_STREAMS": "1", # Disable caching NCCL communication buffer memory
"NCCL_NVLS_ENABLE": "0", # Disable NVLink SHARP to save memory
}
if custom_env_vars:
env_vars |= custom_env_vars
Expand Down Expand Up @@ -116,10 +116,10 @@ def slurm_executor(

def local_executor_torchrun(nodes: int = 1, devices: int = 2) -> run.LocalExecutor:
env_vars = {
"TRANSFORMERS_OFFLINE": "1", # Enable online downloads from HuggingFace
"TORCH_NCCL_AVOID_RECORD_STREAMS": "1", # Disable caching NCCL communication buffer memory
"NCCL_NVLS_ENABLE": "0", # Disable NVLink SHARP to save memory
"NVTE_FUSED_ATTN": "0", # Disable cuDNN fused attention
"TRANSFORMERS_OFFLINE": "1", # Enable online downloads from HuggingFace
"TORCH_NCCL_AVOID_RECORD_STREAMS": "1", # Disable caching NCCL communication buffer memory
"NCCL_NVLS_ENABLE": "0", # Disable NVLink SHARP to save memory
"NVTE_FUSED_ATTN": "0", # Disable cuDNN fused attention
}

executor = run.LocalExecutor(ntasks_per_node=devices, launcher="torchrun", env_vars=env_vars)
Expand Down
4 changes: 2 additions & 2 deletions tests/collections/llm/hf/peft_nemorun.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@
def local_executor_torchrun(nodes: int = 1, devices: int = 2) -> run.LocalExecutor:
# Env vars for jobs are configured here
env_vars = {
"TORCH_NCCL_AVOID_RECORD_STREAMS": "1", # Disable caching NCCL communication buffer memory
"NCCL_NVLS_ENABLE": "0", # Disable NVLink SHARP to save memory
"TORCH_NCCL_AVOID_RECORD_STREAMS": "1", # Disable caching NCCL communication buffer memory
"NCCL_NVLS_ENABLE": "0", # Disable NVLink SHARP to save memory
}

executor = run.LocalExecutor(ntasks_per_node=devices, launcher="torchrun", env_vars=env_vars)
Expand Down
6 changes: 3 additions & 3 deletions tests/collections/llm/hf/pretrain_nemorun.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@
def local_executor_torchrun(nodes: int = 1, devices: int = 2) -> run.LocalExecutor:
# Env vars for jobs are configured here
env_vars = {
"TORCH_NCCL_AVOID_RECORD_STREAMS": "1", # Disable caching NCCL communication buffer memory
"NCCL_NVLS_ENABLE": "0", # Disable NVLink SHARP to save memory
"NVTE_FUSED_ATTN": "0", # Disable cuDNN fused attention
"TORCH_NCCL_AVOID_RECORD_STREAMS": "1", # Disable caching NCCL communication buffer memory
"NCCL_NVLS_ENABLE": "0", # Disable NVLink SHARP to save memory
"NVTE_FUSED_ATTN": "0", # Disable cuDNN fused attention
}

executor = run.LocalExecutor(ntasks_per_node=devices, launcher="torchrun", env_vars=env_vars)
Expand Down
4 changes: 2 additions & 2 deletions tests/collections/llm/hf/sft_nemorun.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@
def local_executor_torchrun(nodes: int = 1, devices: int = 2) -> run.LocalExecutor:
# Env vars for jobs are configured here
env_vars = {
"TORCH_NCCL_AVOID_RECORD_STREAMS": "1", # Disable caching NCCL communication buffer memory
"NCCL_NVLS_ENABLE": "0", # Disable NVLink SHARP to save memory
"TORCH_NCCL_AVOID_RECORD_STREAMS": "1", # Disable caching NCCL communication buffer memory
"NCCL_NVLS_ENABLE": "0", # Disable NVLink SHARP to save memory
}

executor = run.LocalExecutor(ntasks_per_node=devices, launcher="torchrun", env_vars=env_vars)
Expand Down
6 changes: 3 additions & 3 deletions tests/collections/llm/hf/sft_nemorun_fsdp2.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,9 @@
def local_executor_torchrun(nodes: int = 1, devices: int = 2) -> run.LocalExecutor:
# Env vars for jobs are configured here
env_vars = {
"TORCH_NCCL_AVOID_RECORD_STREAMS": "1", # Disable caching NCCL communication buffer memory
"NCCL_NVLS_ENABLE": "0", # Disable NVLink SHARP to save memory
"NVTE_FUSED_ATTN": "0", # Disable cuDNN attention
"TORCH_NCCL_AVOID_RECORD_STREAMS": "1", # Disable caching NCCL communication buffer memory
"NCCL_NVLS_ENABLE": "0", # Disable NVLink SHARP to save memory
"NVTE_FUSED_ATTN": "0", # Disable cuDNN attention
}

executor = run.LocalExecutor(ntasks_per_node=devices, launcher="torchrun", env_vars=env_vars)
Expand Down

0 comments on commit 46dd5bf

Please sign in to comment.