Skip to content

Commit

Permalink
add disable_custom_all_reduce for vllm multi-gpu inference
Browse files Browse the repository at this point in the history
  • Loading branch information
xusenlin committed Jun 7, 2024
1 parent 016bdff commit 509c0ba
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 0 deletions.
3 changes: 3 additions & 0 deletions api/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,9 @@ class VLLMSetting(BaseModel):
lora_modules: Optional[str] = Field(
default=get_env("LORA_MODULES", ""),
)
disable_custom_all_reduce: Optional[bool] = Field(
default=get_bool_env("DISABLE_CUSTOM_ALL_REDUCE"),
)
vllm_disable_log_stats: Optional[bool] = Field(
default=get_bool_env("VLLM_DISABLE_LOG_STATS", "true"),
)
Expand Down
1 change: 1 addition & 0 deletions api/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ def create_vllm_engine():
"max_loras",
"max_lora_rank",
"lora_extra_vocab_size",
"disable_custom_all_reduce",
}

if vllm_version >= "0.4.3":
Expand Down

0 comments on commit 509c0ba

Please sign in to comment.