huggingface · vwxyzjn · Jan 26, 2024 · Dec 8, 2023 · Dec 11, 2023 · Dec 11, 2023
diff --git a/benchmark/benchmark_and_report.sh b/benchmark/benchmark_and_report.sh
@@ -1,20 +1,5 @@
-#### Step 1: create a work directory:
-# this is necessary because another github action job will remove
-# the entire directory, which slurm depends on.
-# https://stackoverflow.com/questions/4632028/how-to-create-a-temporary-directory
-MY_SLURM_TMP_DIR=/fsx/costa/slurm_tmpdir
-mkdir -p $MY_SLURM_TMP_DIR
-WORK_DIR=`mktemp -d -p "$MY_SLURM_TMP_DIR"`
-cp -r "$PWD" "$WORK_DIR"
-cd "$WORK_DIR/$(basename "$PWD")"
-echo WORK_DIR: $WORK_DIR
-
-#### Step 2: actual work starts:
-echo PATH is $PATH
-echo PYTHONPATH is $PYTHONPATH
-echo whcih python is $(which python)
-
 export WANDB_ENTITY=huggingface
+export WANDB_PROJECT=trl
 bash $BENCHMARK_SCRIPT > output.txt
 
 # Extract Job IDs into an array

diff --git a/benchmark/benchmark_level1.sh b/benchmark/benchmark_level1.sh
@@ -1,6 +1,39 @@
 # hello world experiment
 python benchmark/benchmark.py \
-    --command "python examples/scripts/ppo.py --ppo_config.log_with wandb" \
+    --command "python examples/scripts/ppo.py --log_with wandb" \
+    --num-seeds 3 \
+    --start-seed 1 \
+    --workers 10 \
+    --slurm-nodes 1 \
+    --slurm-gpus-per-task 1 \
+    --slurm-ntasks 1 \
+    --slurm-total-cpus 12 \
+    --slurm-template-path benchmark/trl.slurm_template
+
+python benchmark/benchmark.py \
+    --command "python examples/scripts/dpo.py --model_name_or_path=gpt2 --per_device_train_batch_size 4 --max_steps 1000 --learning_rate 1e-3 --gradient_accumulation_steps 1 --logging_steps 10 --eval_steps 500 --output_dir="dpo_anthropic_hh" --optim adamw_torch --warmup_steps 150 --report_to wandb --bf16 --logging_first_step --no_remove_unused_columns" \
+    --num-seeds 3 \
+    --start-seed 1 \
+    --workers 10 \
+    --slurm-nodes 1 \
+    --slurm-gpus-per-task 1 \
+    --slurm-ntasks 1 \
+    --slurm-total-cpus 12 \
+    --slurm-template-path benchmark/trl.slurm_template
+
+python benchmark/benchmark.py \
+    --command "python examples/scripts/sft.py --model_name_or_path="facebook/opt-350m" --report_to="wandb" --learning_rate=1.41e-5 --per_device_train_batch_size=64 --gradient_accumulation_steps=16 --output_dir="sft_openassistant-guanaco" --logging_steps=1 --num_train_epochs=3 --max_steps=-1 --push_to_hub --gradient_checkpointing" \
+    --num-seeds 3 \
+    --start-seed 1 \
+    --workers 10 \
+    --slurm-nodes 1 \
+    --slurm-gpus-per-task 1 \
+    --slurm-ntasks 1 \
+    --slurm-total-cpus 12 \
+    --slurm-template-path benchmark/trl.slurm_template
+
+python benchmark/benchmark.py \
+    --command "python examples/scripts/reward_modeling.py --model_name_or_path=facebook/opt-350m --output_dir="reward_modeling_anthropic_hh" --per_device_train_batch_size=64 --num_train_epochs=1 --gradient_accumulation_steps=16 --gradient_checkpointing=True --learning_rate=1.41e-5 --report_to="wandb" --remove_unused_columns=False --optim="adamw_torch" --logging_steps=10 --evaluation_strategy="steps" --max_length=512" \
     --num-seeds 3 \
     --start-seed 1 \
     --workers 10 \

diff --git a/benchmark/benchmark_level1_plot.sh b/benchmark/benchmark_level1_plot.sh
@@ -9,7 +9,37 @@ python -m openrlbenchmark.rlops_multi_metrics \
     --no-check-empty-runs \
     --pc.ncols 2 \
     --pc.ncols-legend 1 \
-    --output-filename benchmark/trl/$FOLDER_STRING/hello_world \
+    --output-filename benchmark/trl/$FOLDER_STRING/ppo \
+    --scan-history
+
+python -m openrlbenchmark.rlops_multi_metrics \
+    --filters '?we=huggingface&wpn=trl&xaxis=_step&ceik=output_dir&cen=_name_or_path&metrics=train/rewards/accuracies&metrics=train/loss' \
+        "gpt2$TAGS_STRING" \
+    --env-ids dpo_anthropic_hh \
+    --no-check-empty-runs \
+    --pc.ncols 2 \
+    --pc.ncols-legend 1 \
+    --output-filename benchmark/trl/$FOLDER_STRING/dpo \
+    --scan-history
+
+python -m openrlbenchmark.rlops_multi_metrics \
+    --filters '?we=huggingface&wpn=trl&xaxis=_step&ceik=output_dir&cen=_name_or_path&metrics=train/loss&metrics=eval/accuracy&metrics=eval/loss' \
+        "facebook/opt-350m$TAGS_STRING" \
+    --env-ids reward_modeling_anthropic_hh \
+    --no-check-empty-runs \
+    --pc.ncols 2 \
+    --pc.ncols-legend 1 \
+    --output-filename benchmark/trl/$FOLDER_STRING/reward_modeling \
+    --scan-history
+
+python -m openrlbenchmark.rlops_multi_metrics \
+    --filters '?we=huggingface&wpn=trl&xaxis=_step&ceik=output_dir&cen=_name_or_path&metrics=train/loss' \
+        "facebook/opt-350m$TAGS_STRING" \
+    --env-ids sft_openassistant-guanaco \
+    --no-check-empty-runs \
+    --pc.ncols 2 \
+    --pc.ncols-legend 1 \
+    --output-filename benchmark/trl/$FOLDER_STRING/sft \
     --scan-history
 
 python benchmark/upload_benchmark.py \

diff --git a/benchmark/benchmark_level2.sh b/benchmark/benchmark_level2.sh
@@ -1,6 +1,6 @@
 # compound experiments: gpt2xl + grad_accu
 python benchmark/benchmark.py \
-    --command "python examples/scripts/ppo.py --ppo_config.exp_name ppo_gpt2xl_grad_accu --ppo_config.model_name gpt2-xl --ppo_config.mini_batch_size 16 --ppo_config.gradient_accumulation_steps 8 --ppo_config.log_with wandb" \
+    --command "python examples/scripts/ppo.py --exp_name ppo_gpt2xl_grad_accu --model_name gpt2-xl --mini_batch_size 16 --gradient_accumulation_steps 8 --log_with wandb" \
     --num-seeds 3 \
     --start-seed 1 \
     --workers 10 \
@@ -12,7 +12,7 @@ python benchmark/benchmark.py \
 
 # compound experiments: Cerebras-GPT-6.7B + deepspeed zero2 + grad_accu
 python benchmark/benchmark.py \
-    --command "accelerate launch --config_file examples/accelerate_configs/deepspeed_zero2.yaml examples/scripts/ppo.py --ppo_config.exp_name ppo_Cerebras-GPT-6.7B_grad_accu_deepspeed_stage2  --ppo_config.batch_size 32  --ppo_config.mini_batch_size 32 --ppo_config.log_with wandb --ppo_config.model_name cerebras/Cerebras-GPT-6.7B --ppo_config.reward_model sentiment-analysis:cerebras/Cerebras-GPT-6.7B" \
+    --command "accelerate launch --config_file examples/accelerate_configs/deepspeed_zero2.yaml examples/scripts/ppo.py --exp_name ppo_Cerebras-GPT-6.7B_grad_accu_deepspeed_stage2  --batch_size 32  --mini_batch_size 32 --log_with wandb --model_name cerebras/Cerebras-GPT-6.7B --reward_model sentiment-analysis:cerebras/Cerebras-GPT-6.7B" \
     --num-seeds 3 \
     --start-seed 1 \
     --workers 10 \

diff --git a/benchmark/benchmark_level3.sh b/benchmark/benchmark_level3.sh
@@ -1,6 +1,6 @@
 ## w/ and w/o gradient accumulation
 python benchmark/benchmark.py \
-    --command "python examples/scripts/ppo.py --ppo_config.exp_name ppo_step_grad_accu --ppo_config.mini_batch_size 1 --ppo_config.gradient_accumulation_steps 128 --ppo_config.log_with wandb" \
+    --command "python examples/scripts/ppo.py --exp_name ppo_step_grad_accu --mini_batch_size 1 --gradient_accumulation_steps 128 --log_with wandb" \
     --num-seeds 3 \
     --start-seed 1 \
     --workers 10 \
@@ -12,7 +12,7 @@ python benchmark/benchmark.py \
 
 ## w/ different models (gpt2, gpt2-xl, falcon, llama2)
 python benchmark/benchmark.py \
-    --command "python examples/scripts/ppo.py --ppo_config.exp_name ppo_gpt2 --ppo_config.log_with wandb" \
+    --command "python examples/scripts/ppo.py --exp_name ppo_gpt2 --log_with wandb" \
     --num-seeds 3 \
     --start-seed 1 \
     --workers 10 \
@@ -22,7 +22,7 @@ python benchmark/benchmark.py \
     --slurm-total-cpus 12 \
     --slurm-template-path benchmark/trl.slurm_template
 python benchmark/benchmark.py \
-    --command "python examples/scripts/ppo.py --ppo_config.exp_name ppo_falcon_rw_1b --ppo_config.model_name tiiuae/falcon-rw-1b --ppo_config.log_with wandb" \
+    --command "python examples/scripts/ppo.py --exp_name ppo_falcon_rw_1b --model_name tiiuae/falcon-rw-1b --log_with wandb" \
     --num-seeds 3 \
     --start-seed 1 \
     --workers 10 \
@@ -35,7 +35,7 @@ python benchmark/benchmark.py \
 
 ## w/ and w/o PEFT
 python benchmark/benchmark.py \
-    --command "python examples/scripts/ppo.py --ppo_config.exp_name ppo_peft --use_peft --ppo_config.log_with wandb" \
+    --command "python examples/scripts/ppo.py --exp_name ppo_peft --use_peft --log_with wandb" \
     --num-seeds 3 \
     --start-seed 1 \
     --workers 10 \

diff --git a/benchmark/post_github_comment.sbatch b/benchmark/post_github_comment.sbatch
@@ -1,6 +1,6 @@
 #!/bin/bash
 #SBATCH --job-name=trl
-#SBATCH --partition=production-cluster
+#SBATCH --partition=hopper-cpu
 #SBATCH --ntasks=1
 #SBATCH --output=slurm/logs/%x_%j.out
 

diff --git a/benchmark/regression_test.sh b/benchmark/regression_test.sh
@@ -0,0 +1,3 @@
+BENCHMARK_SCRIPT="benchmark/benchmark_level1.sh" \
+BENCHMARK_PLOT_SCRIPT="benchmark/benchmark_level1_plot.sh" \
+bash benchmark/benchmark_and_report.sh
diff --git a/benchmark/trl.slurm_template b/benchmark/trl.slurm_template
@@ -1,16 +1,19 @@
 #!/bin/bash
 #SBATCH --job-name=trl
-#SBATCH --partition=production-cluster
+#SBATCH --partition=hopper-prod
 #SBATCH --gpus-per-task={{gpus_per_task}}
 #SBATCH --cpus-per-gpu={{cpus_per_gpu}}
 #SBATCH --ntasks={{ntasks}}
 #SBATCH --output=slurm/logs/%x_%j.out
 #SBATCH --array={{array}}
-#SBATCH --exclude=ip-26-0-156-239,ip-26-0-148-151,ip-26-0-146-212,ip-26-0-145-137,ip-26-0-146-249,ip-26-0-146-149,ip-26-0-147-233,ip-26-0-145-154,ip-26-0-144-35,ip-26-0-144-189,ip-26-0-146-183,ip-26-0-147-120,ip-26-0-144-95,ip-26-0-145-193
+##SBATCH --exclude=ip-26-0-149-199
+
+module load cuda/12.1
+
 {{nodes}}
 
 seeds={{seeds}}
 seed=${seeds[$SLURM_ARRAY_TASK_ID % {{len_seeds}}]}
 
 echo "Running task $SLURM_ARRAY_TASK_ID with seed: $seed"
-srun {{command}} --ppo_config.seed $seed
+srun {{command}} --seed $seed
diff --git a/docs/source/quickstart.mdx b/docs/source/quickstart.mdx
@@ -30,7 +30,7 @@ tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
 tokenizer.pad_token = tokenizer.eos_token
 
 # 2. initialize trainer
-ppo_config = {"batch_size": 1}
+ppo_config = {"mini_batch_size": 1, "batch_size": 1}
 config = PPOConfig(**ppo_config)
 ppo_trainer = PPOTrainer(config, model, model_ref, tokenizer)
 

diff --git a/docs/source/sentiment_tuning.mdx b/docs/source/sentiment_tuning.mdx
@@ -25,7 +25,7 @@ accelerate launch examples/scripts/ppo.py # launches training
 # 3. get help text and documentation
 python examples/scripts/ppo.py --help
 # 4. configure logging with wandb and, say, mini_batch_size=1 and gradient_accumulation_steps=16
-python examples/scripts/ppo.py --ppo_config.log_with wandb --ppo_config.mini_batch_size 1 --ppo_config.gradient_accumulation_steps 16
+python examples/scripts/ppo.py --log_with wandb --mini_batch_size 1 --gradient_accumulation_steps 16
 ```
 
 Note: if you don't want to log with `wandb` remove `log_with="wandb"` in the scripts/notebooks. You can also replace it with your favourite experiment tracker that's [supported by `accelerate`](https://huggingface.co/docs/accelerate/usage_guides/tracking).
@@ -42,7 +42,7 @@ Below are some benchmark results for `examples/scripts/ppo.py`. To reproduce loc
 
 ```bash
 python benchmark/benchmark.py \
-    --command "python examples/scripts/ppo.py --ppo_config.log_with wandb" \
+    --command "python examples/scripts/ppo.py --log_with wandb" \
     --num-seeds 5 \
     --start-seed 1 \
     --workers 10 \
@@ -61,7 +61,7 @@ python benchmark/benchmark.py \
 
 ```bash
 python benchmark/benchmark.py \
-    --command "python examples/scripts/ppo.py --ppo_config.exp_name sentiment_tuning_step_grad_accu --ppo_config.mini_batch_size 1 --ppo_config.gradient_accumulation_steps 128 --ppo_config.log_with wandb" \
+    --command "python examples/scripts/ppo.py --exp_name sentiment_tuning_step_grad_accu --mini_batch_size 1 --gradient_accumulation_steps 128 --log_with wandb" \
     --num-seeds 5 \
     --start-seed 1 \
     --workers 10 \
@@ -79,7 +79,7 @@ python benchmark/benchmark.py \
 
 ```bash
 python benchmark/benchmark.py \
-    --command "python examples/scripts/ppo.py --ppo_config.exp_name sentiment_tuning_gpt2 --ppo_config.log_with wandb" \
+    --command "python examples/scripts/ppo.py --exp_name sentiment_tuning_gpt2 --log_with wandb" \
     --num-seeds 5 \
     --start-seed 1 \
     --workers 10 \
@@ -89,7 +89,7 @@ python benchmark/benchmark.py \
     --slurm-total-cpus 12 \
     --slurm-template-path benchmark/trl.slurm_template
 python benchmark/benchmark.py \
-    --command "python examples/scripts/ppo.py --ppo_config.exp_name sentiment_tuning_gpt2xl_grad_accu --ppo_config.model_name gpt2-xl --ppo_config.mini_batch_size 16 --ppo_config.gradient_accumulation_steps 8 --ppo_config.log_with wandb" \
+    --command "python examples/scripts/ppo.py --exp_name sentiment_tuning_gpt2xl_grad_accu --model_name gpt2-xl --mini_batch_size 16 --gradient_accumulation_steps 8 --log_with wandb" \
     --num-seeds 5 \
     --start-seed 1 \
     --workers 10 \
@@ -99,7 +99,7 @@ python benchmark/benchmark.py \
     --slurm-total-cpus 12 \
     --slurm-template-path benchmark/trl.slurm_template
 python benchmark/benchmark.py \
-    --command "python examples/scripts/ppo.py --ppo_config.exp_name sentiment_tuning_falcon_rw_1b --ppo_config.model_name tiiuae/falcon-rw-1b --ppo_config.log_with wandb" \
+    --command "python examples/scripts/ppo.py --exp_name sentiment_tuning_falcon_rw_1b --model_name tiiuae/falcon-rw-1b --log_with wandb" \
     --num-seeds 5 \
     --start-seed 1 \
     --workers 10 \
@@ -116,7 +116,7 @@ python benchmark/benchmark.py \
 
 ```
 python benchmark/benchmark.py \
-    --command "python examples/scripts/ppo.py --ppo_config.exp_name sentiment_tuning_peft --use_peft --ppo_config.log_with wandb" \
+    --command "python examples/scripts/ppo.py --exp_name sentiment_tuning_peft --use_peft --log_with wandb" \
     --num-seeds 5 \
     --start-seed 1 \
     --workers 10 \

diff --git a/docs/source/sft_trainer.mdx b/docs/source/sft_trainer.mdx
@@ -426,13 +426,13 @@ To use Flash Attention 2, first install the latest `flash-attn` package:
 pip install -U flash-attn
 ```
 
-And add `use_flash_attention_2=True` when calling `from_pretrained`:
+And add `attn_implementation="flash_attention_2"` when calling `from_pretrained`:
 
 ```python
 model = AutoModelForCausalLM.from_pretrained(
     model_id,
     load_in_4bit=True,
-    use_flash_attention_2=True
+    attn_implementation="flash_attention_2"
 )
 ```
 
@@ -441,6 +441,45 @@ After loading your model, you can either train it as it is, or attach adapters a
 
 In contrary to Flash Attention 1, the integration makes it possible to train your model on an arbitrary dataset that also includes padding tokens.
 
+
+### Using model creation utility
+
+We included a utility function to create your model.
+
+[[autodoc]] ModelConfig
+
+```python
+from trl import ModelConfig, SFTTrainer, get_kbit_device_map, get_peft_config, get_quantization_config
+model_config = ModelConfig(
+    model_name_or_path="facebook/opt-350m"
+    attn_implementation=None, # or "flash_attention_2"
+)
+torch_dtype = (
+    model_config.torch_dtype
+    if model_config.torch_dtype in ["auto", None]
+    else getattr(torch, model_config.torch_dtype)
+)
+quantization_config = get_quantization_config(model_config)
+model_kwargs = dict(
+    revision=model_config.model_revision,
+    trust_remote_code=model_config.trust_remote_code,
+    attn_implementation=model_config.attn_implementation,
+    torch_dtype=torch_dtype,
+    use_cache=False if training_args.gradient_checkpointing else True,
+    device_map=get_kbit_device_map() if quantization_config is not None else None,
+    quantization_config=quantization_config,
+)
+model = AutoModelForCausalLM.from_pretrained(model_config.model_name_or_path, **model_kwargs)
+trainer = SFTTrainer(
+    ...,
+    model=model_config.model_name_or_path,
+    peft_config=get_peft_config(model_config),
+)
+```
+
+
+
+
 ### Enhance model's performances using NEFTune
 
 NEFTune is a technique to boost the performance of chat models and was introduced by the paper ["NEFTune: Noisy Embeddings Improve Instruction Finetuning"](https://arxiv.org/abs/2310.05914) from Jain et al. it consists of adding noise to the embedding vectors during training. According to the abstract of the paper:

diff --git a/examples/hello_world.py b/examples/hello_world.py
@@ -12,7 +12,7 @@
 tokenizer.pad_token = tokenizer.eos_token
 
 # 2. initialize trainer
-ppo_config = {"batch_size": 1}
+ppo_config = {"mini_batch_size": 1, "batch_size": 1}
 config = PPOConfig(**ppo_config)
 ppo_trainer = PPOTrainer(config, model, model_ref, tokenizer)