From 1a6c28c7487ef39ac76e6459ae911e9ef8dc06f6 Mon Sep 17 00:00:00 2001 From: elijah Date: Mon, 13 Jan 2025 11:17:20 +0800 Subject: [PATCH 01/10] bugfix: add tokenizer_mode for benckmark/backend_request_func.get_tokenizer Signed-off-by: elijah --- benchmarks/backend_request_func.py | 34 +++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/benchmarks/backend_request_func.py b/benchmarks/backend_request_func.py index b67849038cf0d..b9b211d5aa0ed 100644 --- a/benchmarks/backend_request_func.py +++ b/benchmarks/backend_request_func.py @@ -404,27 +404,45 @@ async def async_request_openai_chat_completions( def get_model(pretrained_model_name_or_path: str) -> str: - if os.getenv('VLLM_USE_MODELSCOPE', 'False').lower() == 'true': + if os.getenv("VLLM_USE_MODELSCOPE", "False").lower() == "true": from modelscope import snapshot_download model_path = snapshot_download( model_id=pretrained_model_name_or_path, local_files_only=huggingface_hub.constants.HF_HUB_OFFLINE, - ignore_file_pattern=[".*.pt", ".*.safetensors", ".*.bin"]) + ignore_file_pattern=[".*.pt", ".*.safetensors", ".*.bin"], + ) return model_path return pretrained_model_name_or_path def get_tokenizer( - pretrained_model_name_or_path: str, trust_remote_code: bool + pretrained_model_name_or_path: str, + tokenizer_mode: str = "auto", + trust_remote_code: bool = False, + **kwargs, ) -> Union[PreTrainedTokenizer, PreTrainedTokenizerFast]: if pretrained_model_name_or_path is not None and not os.path.exists( - pretrained_model_name_or_path): - pretrained_model_name_or_path = get_model( - pretrained_model_name_or_path) - return AutoTokenizer.from_pretrained(pretrained_model_name_or_path, - trust_remote_code=trust_remote_code) + pretrained_model_name_or_path + ): + pretrained_model_name_or_path = get_model(pretrained_model_name_or_path) + if tokenizer_mode == "slow": + if kwargs.get("use_fast", False): + raise ValueError("Cannot use the fast tokenizer in slow tokenizer mode.") + kwargs["use_fast"] = False + if tokenizer_mode == "mistral": + vllm_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + sys.path.append(vllm_dir) + from vllm.transformers_utils.tokenizers import MistralTokenizer + + return MistralTokenizer.from_pretrained(str(pretrained_model_name_or_path)) + else: + return AutoTokenizer.from_pretrained( + pretrained_model_name_or_path, + trust_remote_code=trust_remote_code, + **kwargs, + ) ASYNC_REQUEST_FUNCS = { From 1cf685fb1d3d92e544989d16b58526f67dadb999 Mon Sep 17 00:00:00 2001 From: elijah Date: Mon, 13 Jan 2025 11:23:55 +0800 Subject: [PATCH 02/10] chore: Remove unrelated changes from auto-formatting Signed-off-by: elijah --- benchmarks/backend_request_func.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/benchmarks/backend_request_func.py b/benchmarks/backend_request_func.py index b9b211d5aa0ed..78057b4783818 100644 --- a/benchmarks/backend_request_func.py +++ b/benchmarks/backend_request_func.py @@ -404,14 +404,13 @@ async def async_request_openai_chat_completions( def get_model(pretrained_model_name_or_path: str) -> str: - if os.getenv("VLLM_USE_MODELSCOPE", "False").lower() == "true": + if os.getenv('VLLM_USE_MODELSCOPE', 'False').lower() == 'true': from modelscope import snapshot_download model_path = snapshot_download( model_id=pretrained_model_name_or_path, local_files_only=huggingface_hub.constants.HF_HUB_OFFLINE, - ignore_file_pattern=[".*.pt", ".*.safetensors", ".*.bin"], - ) + ignore_file_pattern=[".*.pt", ".*.safetensors", ".*.bin"]) return model_path return pretrained_model_name_or_path From a222b4510abe7b6c72b42ad013589f9988b6f2d1 Mon Sep 17 00:00:00 2001 From: elijah Date: Mon, 13 Jan 2025 12:10:26 +0800 Subject: [PATCH 03/10] chore: fix ci lint Signed-off-by: elijah --- benchmarks/backend_request_func.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/benchmarks/backend_request_func.py b/benchmarks/backend_request_func.py index 78057b4783818..d882ddfee63e0 100644 --- a/benchmarks/backend_request_func.py +++ b/benchmarks/backend_request_func.py @@ -428,12 +428,16 @@ def get_tokenizer( pretrained_model_name_or_path = get_model(pretrained_model_name_or_path) if tokenizer_mode == "slow": if kwargs.get("use_fast", False): - raise ValueError("Cannot use the fast tokenizer in slow tokenizer mode.") + raise ValueError( + "Cannot use the fast tokenizer in slow tokenizer mode." + ) kwargs["use_fast"] = False if tokenizer_mode == "mistral": vllm_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) sys.path.append(vllm_dir) - from vllm.transformers_utils.tokenizers import MistralTokenizer + from vllm.transformers_utils.tokenizers import ( + MistralTokenizer + ) return MistralTokenizer.from_pretrained(str(pretrained_model_name_or_path)) else: From 16ee4b04499dcb020fd4b0a30fe18946986a8635 Mon Sep 17 00:00:00 2001 From: elijah Date: Mon, 13 Jan 2025 12:12:40 +0800 Subject: [PATCH 04/10] chore: fix ci lint Signed-off-by: elijah --- benchmarks/backend_request_func.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/benchmarks/backend_request_func.py b/benchmarks/backend_request_func.py index d882ddfee63e0..4a84938e21703 100644 --- a/benchmarks/backend_request_func.py +++ b/benchmarks/backend_request_func.py @@ -423,14 +423,13 @@ def get_tokenizer( **kwargs, ) -> Union[PreTrainedTokenizer, PreTrainedTokenizerFast]: if pretrained_model_name_or_path is not None and not os.path.exists( - pretrained_model_name_or_path - ): - pretrained_model_name_or_path = get_model(pretrained_model_name_or_path) + pretrained_model_name_or_path): + pretrained_model_name_or_path = get_model( + pretrained_model_name_or_path) if tokenizer_mode == "slow": if kwargs.get("use_fast", False): raise ValueError( - "Cannot use the fast tokenizer in slow tokenizer mode." - ) + "Cannot use the fast tokenizer in slow tokenizer mode.") kwargs["use_fast"] = False if tokenizer_mode == "mistral": vllm_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) @@ -439,7 +438,8 @@ def get_tokenizer( MistralTokenizer ) - return MistralTokenizer.from_pretrained(str(pretrained_model_name_or_path)) + return MistralTokenizer.from_pretrained( + str(pretrained_model_name_or_path)) else: return AutoTokenizer.from_pretrained( pretrained_model_name_or_path, From 7707f2c7e770f219f9c84d2b5632ee90a687d588 Mon Sep 17 00:00:00 2001 From: elijah Date: Mon, 13 Jan 2025 12:18:20 +0800 Subject: [PATCH 05/10] chore: fix ci lint Signed-off-by: elijah --- benchmarks/backend_request_func.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/benchmarks/backend_request_func.py b/benchmarks/backend_request_func.py index 4a84938e21703..709ee9e62140d 100644 --- a/benchmarks/backend_request_func.py +++ b/benchmarks/backend_request_func.py @@ -12,6 +12,10 @@ from transformers import (AutoTokenizer, PreTrainedTokenizer, PreTrainedTokenizerFast) +vllm_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +sys.path.append(vllm_dir) +from vllm.transformers_utils.tokenizers import MistralTokenizer + AIOHTTP_TIMEOUT = aiohttp.ClientTimeout(total=6 * 60 * 60) @@ -432,12 +436,6 @@ def get_tokenizer( "Cannot use the fast tokenizer in slow tokenizer mode.") kwargs["use_fast"] = False if tokenizer_mode == "mistral": - vllm_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - sys.path.append(vllm_dir) - from vllm.transformers_utils.tokenizers import ( - MistralTokenizer - ) - return MistralTokenizer.from_pretrained( str(pretrained_model_name_or_path)) else: From 971102de34b49af5a120cd07b9466016df22cd32 Mon Sep 17 00:00:00 2001 From: elijah Date: Mon, 13 Jan 2025 12:35:53 +0800 Subject: [PATCH 06/10] chore: raise import error when using mistral tokenizer Signed-off-by: elijah --- benchmarks/backend_request_func.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/benchmarks/backend_request_func.py b/benchmarks/backend_request_func.py index 709ee9e62140d..3b336512bbee5 100644 --- a/benchmarks/backend_request_func.py +++ b/benchmarks/backend_request_func.py @@ -12,10 +12,6 @@ from transformers import (AutoTokenizer, PreTrainedTokenizer, PreTrainedTokenizerFast) -vllm_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) -sys.path.append(vllm_dir) -from vllm.transformers_utils.tokenizers import MistralTokenizer - AIOHTTP_TIMEOUT = aiohttp.ClientTimeout(total=6 * 60 * 60) @@ -436,6 +432,13 @@ def get_tokenizer( "Cannot use the fast tokenizer in slow tokenizer mode.") kwargs["use_fast"] = False if tokenizer_mode == "mistral": + try: + from vllm.transformers_utils.tokenizer import ( + MistralTokenizer) + except ImportError: + raise ImportError( + "MistralTokenizer requires vllm package.\n" + "Please install it with `pip install vllm` to use mistral tokenizer mode.") return MistralTokenizer.from_pretrained( str(pretrained_model_name_or_path)) else: From b870182d2db473f6835193b91416c9ed8ef83c6f Mon Sep 17 00:00:00 2001 From: elijah Date: Mon, 13 Jan 2025 14:03:03 +0800 Subject: [PATCH 07/10] chore: fix ci lint Signed-off-by: elijah --- benchmarks/backend_request_func.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/benchmarks/backend_request_func.py b/benchmarks/backend_request_func.py index 3b336512bbee5..d5489a2cfd408 100644 --- a/benchmarks/backend_request_func.py +++ b/benchmarks/backend_request_func.py @@ -423,7 +423,7 @@ def get_tokenizer( **kwargs, ) -> Union[PreTrainedTokenizer, PreTrainedTokenizerFast]: if pretrained_model_name_or_path is not None and not os.path.exists( - pretrained_model_name_or_path): + pretrained_model_name_or_path): pretrained_model_name_or_path = get_model( pretrained_model_name_or_path) if tokenizer_mode == "slow": @@ -433,12 +433,13 @@ def get_tokenizer( kwargs["use_fast"] = False if tokenizer_mode == "mistral": try: - from vllm.transformers_utils.tokenizer import ( - MistralTokenizer) - except ImportError: + from vllm.transformers_utils.tokenizer import MistralTokenizer + except ImportError as e: raise ImportError( "MistralTokenizer requires vllm package.\n" - "Please install it with `pip install vllm` to use mistral tokenizer mode.") + "Please install it with `pip install vllm` " + "to use mistral tokenizer mode." + ) from e return MistralTokenizer.from_pretrained( str(pretrained_model_name_or_path)) else: From 1793462e04164795c939013eb578376f11f9da6b Mon Sep 17 00:00:00 2001 From: elijah Date: Mon, 13 Jan 2025 14:07:21 +0800 Subject: [PATCH 08/10] chore: fix ci lint Signed-off-by: elijah --- benchmarks/backend_request_func.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/benchmarks/backend_request_func.py b/benchmarks/backend_request_func.py index d5489a2cfd408..03e8dd57bf29a 100644 --- a/benchmarks/backend_request_func.py +++ b/benchmarks/backend_request_func.py @@ -438,8 +438,7 @@ def get_tokenizer( raise ImportError( "MistralTokenizer requires vllm package.\n" "Please install it with `pip install vllm` " - "to use mistral tokenizer mode." - ) from e + "to use mistral tokenizer mode.") from e return MistralTokenizer.from_pretrained( str(pretrained_model_name_or_path)) else: From af33c6628db5433a044d65b6d1b2e976c22d6f8d Mon Sep 17 00:00:00 2001 From: elijah Date: Mon, 13 Jan 2025 14:12:15 +0800 Subject: [PATCH 09/10] chore: fix ci lint Signed-off-by: elijah --- benchmarks/backend_request_func.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/benchmarks/backend_request_func.py b/benchmarks/backend_request_func.py index 03e8dd57bf29a..fa15f0cf331d1 100644 --- a/benchmarks/backend_request_func.py +++ b/benchmarks/backend_request_func.py @@ -435,8 +435,7 @@ def get_tokenizer( try: from vllm.transformers_utils.tokenizer import MistralTokenizer except ImportError as e: - raise ImportError( - "MistralTokenizer requires vllm package.\n" + raise ImportError("MistralTokenizer requires vllm package.\n" "Please install it with `pip install vllm` " "to use mistral tokenizer mode.") from e return MistralTokenizer.from_pretrained( From e55fe57b842d9d283cb403aae4db7d8dd4af8bf9 Mon Sep 17 00:00:00 2001 From: elijah Date: Mon, 13 Jan 2025 14:20:15 +0800 Subject: [PATCH 10/10] chore: fix ci lint Signed-off-by: elijah --- benchmarks/backend_request_func.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benchmarks/backend_request_func.py b/benchmarks/backend_request_func.py index fa15f0cf331d1..9d71e4ecc4a37 100644 --- a/benchmarks/backend_request_func.py +++ b/benchmarks/backend_request_func.py @@ -436,8 +436,8 @@ def get_tokenizer( from vllm.transformers_utils.tokenizer import MistralTokenizer except ImportError as e: raise ImportError("MistralTokenizer requires vllm package.\n" - "Please install it with `pip install vllm` " - "to use mistral tokenizer mode.") from e + "Please install it with `pip install vllm` " + "to use mistral tokenizer mode.") from e return MistralTokenizer.from_pretrained( str(pretrained_model_name_or_path)) else: