diff --git a/examples/local/txt2img-example.py b/examples/local/txt2img-example.py index a30fdfcb..edbc5963 100644 --- a/examples/local/txt2img-example.py +++ b/examples/local/txt2img-example.py @@ -14,6 +14,10 @@ mii_configs = { "tensor_parallel": 1, + "enable_cuda_graph": + True, + "replace_with_kernel_inject": + True, "dtype": "fp16", "hf_auth_token": diff --git a/mii/models/load_models.py b/mii/models/load_models.py index ca8505fd..8f5b283a 100644 --- a/mii/models/load_models.py +++ b/mii/models/load_models.py @@ -70,8 +70,6 @@ def load_models(task_name, model_name, task_name, mii_config) - inf_config["replace_with_kernel_inject"] = False #not supported yet - inf_config["enable_cuda_graph"] = True else: raise ValueError(f"Unknown model provider {provider}")