Skip to content

Commit

Permalink
ENH: bypass the sampling parameter skip_special_tokens to vLLM backend (
Browse files Browse the repository at this point in the history
  • Loading branch information
zjuyzj authored Dec 11, 2024
1 parent 472c7f1 commit 0fd2001
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 2 deletions.
4 changes: 2 additions & 2 deletions xinference/api/restful_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -2044,7 +2044,6 @@ async def create_chat_completion(self, request: Request) -> Response:
)
if body.tools and body.stream:
is_vllm = await model.is_vllm_backend()

if not (
(is_vllm and model_family in QWEN_TOOL_CALL_FAMILY)
or (not is_vllm and model_family in GLM4_TOOL_CALL_FAMILY)
Expand All @@ -2054,7 +2053,8 @@ async def create_chat_completion(self, request: Request) -> Response:
detail="Streaming support for tool calls is available only when using "
"Qwen models with vLLM backend or GLM4-chat models without vLLM backend.",
)

if "skip_special_tokens" in raw_kwargs and await model.is_vllm_backend():
kwargs["skip_special_tokens"] = raw_kwargs["skip_special_tokens"]
if body.stream:

async def stream_results():
Expand Down
4 changes: 4 additions & 0 deletions xinference/model/llm/vllm/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ class VLLMGenerateConfig(TypedDict, total=False):
stop: Optional[Union[str, List[str]]]
stream: bool # non-sampling param, should not be passed to the engine.
stream_options: Optional[Union[dict, None]]
skip_special_tokens: Optional[bool]
response_format: Optional[dict]
guided_json: Optional[Union[str, dict]]
guided_regex: Optional[str]
Expand Down Expand Up @@ -373,6 +374,9 @@ def _sanitize_generate_config(
sanitized.setdefault(
"stream_options", generate_config.get("stream_options", None)
)
sanitized.setdefault(
"skip_special_tokens", generate_config.get("skip_special_tokens", True)
)
sanitized.setdefault(
"guided_json", generate_config.get("guided_json", guided_json)
)
Expand Down

0 comments on commit 0fd2001

Please sign in to comment.