huggingface · qgallouedec · Feb 5, 2025 · Feb 1, 2025 · Feb 1, 2025 · Feb 2, 2025
diff --git a/trl/trainer/grpo_config.py b/trl/trainer/grpo_config.py
@@ -73,6 +73,10 @@ class GRPOConfig(TrainingArguments):
         vllm_dtype (`str`, *optional*, defaults to `"auto"`):
             Data type to use for vLLM generation. If set to `"auto"`, the data type will be automatically determined
             based on the model configuration. Find the supported values in the vLLM documentation.
+        vllm_max_model_len (`int` or `None`, *optional*, defaults to `None`):
+            If set, the `max_model_len` to use for vLLM. This could be useful when running with reduced
+            `vllm_gpu_memory_utilization`, leading to a reduced KV cache size. If not set, vLLM will use the model
+            context size, which might be much larger than the KV cache, leading to inefficiencies.
 
         > Parameters that control the training
 
@@ -181,6 +185,14 @@ class GRPOConfig(TrainingArguments):
             "determined based on the model configuration. Find the supported values in the vLLM documentation."
         },
     )
+    vllm_max_model_len: Optional[int] = field(
+        default=None,
+        metadata={
+            "help": "If set, the `max_model_len` to use for vLLM. This could be useful when running with reduced "
+            "`vllm_gpu_memory_utilization`, leading to a reduced KV cache size. If not set, vLLM will use the model "
+            "context size, which might be much larger than the KV cache, leading to inefficiencies."
+        },
+    )
 
     # Parameters that control the training
     learning_rate: float = field(

diff --git a/trl/trainer/grpo_trainer.py b/trl/trainer/grpo_trainer.py
@@ -322,6 +322,7 @@ def data_collator(features):  # No data collation is needed in GRPO
                         # directly reuse the KV cache if it shares the same prefix with one of the existing queries.
                         # This is particularly useful here because we generate completions from the same prompts.
                         enable_prefix_caching=True,
+                        max_model_len=self.args.vllm_max_model_len,
                     )
                 self.sampling_params = SamplingParams(
                     n=self.num_generations,