From 509c0ba49cc1157bbff6b3339143f7f7674f60bc Mon Sep 17 00:00:00 2001 From: xusenlin Date: Fri, 7 Jun 2024 23:46:40 +0800 Subject: [PATCH] add disable_custom_all_reduce for vllm multi-gpu inference --- api/config.py | 3 +++ api/models.py | 1 + 2 files changed, 4 insertions(+) diff --git a/api/config.py b/api/config.py index 2696ac0..2ca1ae1 100644 --- a/api/config.py +++ b/api/config.py @@ -224,6 +224,9 @@ class VLLMSetting(BaseModel): lora_modules: Optional[str] = Field( default=get_env("LORA_MODULES", ""), ) + disable_custom_all_reduce: Optional[bool] = Field( + default=get_bool_env("DISABLE_CUSTOM_ALL_REDUCE"), + ) vllm_disable_log_stats: Optional[bool] = Field( default=get_bool_env("VLLM_DISABLE_LOG_STATS", "true"), ) diff --git a/api/models.py b/api/models.py index cfeba0f..93d56bf 100644 --- a/api/models.py +++ b/api/models.py @@ -112,6 +112,7 @@ def create_vllm_engine(): "max_loras", "max_lora_rank", "lora_extra_vocab_size", + "disable_custom_all_reduce", } if vllm_version >= "0.4.3":