From 08afd3d7bc35d29ae0d56fd18899ba5117aabf93 Mon Sep 17 00:00:00 2001 From: Rishin Raj Date: Tue, 26 Nov 2024 06:26:29 +0000 Subject: [PATCH] Removed CB architecture support check Signed-off-by: Rishin Raj --- .../exporter/export_hf_to_cloud_ai_100.py | 8 -------- QEfficient/transformers/modeling_utils.py | 19 ------------------- tests/cloud/conftest.py | 7 ------- 3 files changed, 34 deletions(-) diff --git a/QEfficient/exporter/export_hf_to_cloud_ai_100.py b/QEfficient/exporter/export_hf_to_cloud_ai_100.py index 55f2ac3be..514e8455c 100644 --- a/QEfficient/exporter/export_hf_to_cloud_ai_100.py +++ b/QEfficient/exporter/export_hf_to_cloud_ai_100.py @@ -16,7 +16,6 @@ from QEfficient.base.common import AUTO_MODEL_MAP_TO_MODEL_TYPE_MAP, QEFF_MODEL_TYPE, QEFFCommonLoader from QEfficient.base.modeling_qeff import QEFFBaseModel from QEfficient.exporter.export_utils import export_onnx, fix_onnx_fp16, generate_input_files, run_model_on_ort -from QEfficient.transformers.modeling_utils import get_lists_of_cb_qeff_models from QEfficient.transformers.models.modeling_auto import QEFFAutoModelForCausalLM from QEfficient.utils import load_hf_tokenizer from QEfficient.utils.constants import QEFF_MODELS_DIR, Constants @@ -316,13 +315,6 @@ def export_for_cloud( seq_length: int = Constants.SEQ_LEN, full_batch_size: Optional[int] = None, ) -> str: - # Check if model architecture is supported for continuous batching. - if full_batch_size and qeff_model.model.config.architectures[0].lower() not in { - x.lower() for x in get_lists_of_cb_qeff_models.architectures - }: - raise NotImplementedError( - f"Continuous batching is not supported for {qeff_model.model.config.architectures[0]}" - ) # FIXME: move all this to class instead of here, and just call qeff_model.export here. if AUTO_MODEL_MAP_TO_MODEL_TYPE_MAP.get(qeff_model.__class__, None) == QEFF_MODEL_TYPE.CAUSALLM: # type: ignore diff --git a/QEfficient/transformers/modeling_utils.py b/QEfficient/transformers/modeling_utils.py index 91c886c5f..039504a48 100644 --- a/QEfficient/transformers/modeling_utils.py +++ b/QEfficient/transformers/modeling_utils.py @@ -137,25 +137,6 @@ # Required for the Automation tool ModelArchitectures = namedtuple("ModelArchitectures", ["architectures"]) -get_lists_of_cb_qeff_models = ModelArchitectures( - [ - LlamaForCausalLM.__name__, - GemmaForCausalLM.__name__, - Gemma2ForCausalLM.__name__, - MistralForCausalLM.__name__, - MixtralForCausalLM.__name__, - Starcoder2ForCausalLM.__name__, - Qwen2ForCausalLM.__name__, - Phi3ForCausalLM.__name__, - PhiForCausalLM.__name__, - CodeGenForCausalLM.__name__, - GPT2LMHeadModel.__name__, - GPTJForCausalLM.__name__, - MptForCausalLM.__name__, - FalconForCausalLM.__name__, - GPTBigCodeForCausalLM.__name__, - ] -) # Create an instance of the named tuple qeff_supported_architectures = ModelArchitectures( [ diff --git a/tests/cloud/conftest.py b/tests/cloud/conftest.py index d4960a4ad..d6b3702af 100644 --- a/tests/cloud/conftest.py +++ b/tests/cloud/conftest.py @@ -11,7 +11,6 @@ import pytest -from QEfficient.transformers.modeling_utils import get_lists_of_cb_qeff_models from QEfficient.utils import get_onnx_dir_name from QEfficient.utils.constants import QEFF_MODELS_DIR from QEfficient.utils.logging_utils import logger @@ -271,12 +270,6 @@ def pytest_collection_modifyitems(config, items): if item.module.__name__ in ["test_export", "test_compile", "test_execute", "test_infer"]: if hasattr(item, "callspec"): params = item.callspec.params - model_class = model_class_dict[params["model_name"]] - if ( - params["full_batch_size"] is not None - and model_class not in get_lists_of_cb_qeff_models.architectures - ): - item.add_marker(pytest.mark.skip(reason="Skipping because FULL BATCH SIZE does not support...")) if item.module.__name__ in ["test_export", "test_compile", "test_execute"]: if hasattr(item, "callspec"):