Move cpu_only to MCV class (#762)

* Initial changes * Fix typo * Updated single model ckpt * Updated mulit-model ckpt * Updating plot manager * Updating BLS checkpoint * Ensemble unit tests passing * Fixing BLS report manager unit testing * Updating request rate checkpoint * Updating checkpoints for L0 tests * Changing tests to use add_sub * Fixing num of output rows * Removing backwards compat
triton-inference-server · Sep 8, 2023 · 7ad3117 · 7ad3117
1 parent 07aaa4d
commit 7ad3117
Show file tree

Hide file tree

Showing 42 changed files with 30,366 additions and 34,534 deletions.
diff --git a/model_analyzer/config/generate/base_model_config_generator.py b/model_analyzer/config/generate/base_model_config_generator.py
@@ -159,11 +159,10 @@ def _make_remote_model_config_variant(self) -> ModelConfigVariant:
         model_config = ModelConfig.create_from_triton_api(
             self._client, self._base_model_name, self._config.client_max_retries
         )
-        model_config.set_cpu_only(self._cpu_only)
         if not self._config.reload_model_disable:
             self._client.unload_model(self._base_model_name)
 
-        return ModelConfigVariant(model_config, self._base_model_name)
+        return ModelConfigVariant(model_config, self._base_model_name, self._cpu_only)
 
     def _make_direct_mode_model_config_variant(
         self, param_combo: Dict
@@ -217,9 +216,8 @@ def make_model_config_variant(
 
         model_config_dict["name"] = variant_name if c_api_mode else model_name
         model_config = ModelConfig.create_from_dictionary(model_config_dict)
-        model_config.set_cpu_only(model.cpu_only())
 
-        return ModelConfigVariant(model_config, variant_name)
+        return ModelConfigVariant(model_config, variant_name, model.cpu_only())
 
     @staticmethod
     def make_ensemble_model_config_variant(

diff --git a/model_analyzer/config/run/model_run_config.py b/model_analyzer/config/run/model_run_config.py
@@ -297,13 +297,6 @@ def from_dict(cls, model_run_config_dict):
             model_run_config_dict["_perf_config"]
         )
 
-        # TODO: TMA-1332: This is for backward compatibility with older checkpoints used in unit tests
-        if "_model_config" in model_run_config_dict:
-            model_config = ModelConfig.from_dict(model_run_config_dict["_model_config"])
-            model_run_config._model_config_variant = ModelConfigVariant(
-                model_config, model_config.get_field("name")
-            )
-
         if "_composing_config_variants" in model_run_config_dict:
             model_run_config._composing_config_variants = [
                 ModelConfigVariant(
@@ -317,21 +310,4 @@ def from_dict(cls, model_run_config_dict):
                 ]
             ]
 
-        # TODO: TMA-1332: This is for backward compatibility with older checkpoints used in unit tests
-        if "_composing_configs" in model_run_config_dict:
-            composing_configs = [
-                ModelConfig.from_dict(composing_config_dict)
-                for composing_config_dict in model_run_config_dict["_composing_configs"]
-            ]
-
-            composing_variant_names = [
-                composing_config.get_field("name")
-                for composing_config in composing_configs
-            ]
-
-            model_run_config._composing_config_variants = [
-                ModelConfigVariant(composing_config, composing_variant_names[i])
-                for i, composing_config in enumerate(composing_configs)
-            ]
-
         return model_run_config
diff --git a/model_analyzer/config/run/run_config.py b/model_analyzer/config/run/run_config.py
@@ -85,7 +85,7 @@ def cpu_only(self):
         """
         return all(
             [
-                model_run_config.model_config().cpu_only()
+                model_run_config.model_config_variant().cpu_only
                 for model_run_config in self._model_run_configs
             ]
         )

diff --git a/model_analyzer/plots/detailed_plot.py b/model_analyzer/plots/detailed_plot.py
@@ -168,7 +168,6 @@ def plot_data(self):
 
         # Sort the data by request rate or concurrency
         if "request_rate" in self._data and self._data["request_rate"][0]:
-            print(f"\n\nFound request rate: {self._data['request_rate']}\n\n")
             sort_indices = list(
                 zip(*sorted(enumerate(self._data["request_rate"]), key=lambda x: x[1]))
             )[0]

diff --git a/model_analyzer/triton/model/model_config.py b/model_analyzer/triton/model/model_config.py
@@ -50,24 +50,14 @@ def __init__(self, model_config):
         """
 
         self._model_config = model_config
-        self._cpu_only = False
 
     def to_dict(self):
         model_config_dict = json_format.MessageToDict(self._model_config)
-        model_config_dict["cpu_only"] = self._cpu_only
         return model_config_dict
 
     @classmethod
     def from_dict(cls, model_config_dict):
-        if "cpu_only" in model_config_dict:
-            cpu_only = model_config_dict["cpu_only"]
-            del model_config_dict["cpu_only"]
-            model_config = ModelConfig.create_from_dictionary(model_config_dict)
-            model_config._cpu_only = cpu_only
-        else:
-            model_config = ModelConfig.create_from_dictionary(model_config_dict)
-
-        return model_config
+        return ModelConfig.create_from_dictionary(model_config_dict)
 
     @staticmethod
     def create_model_config_dict(config, client, gpus, model_repository, model_name):
@@ -290,27 +280,6 @@ def create_from_profile_spec(
 
         return model_config
 
-    def set_cpu_only(self, cpu_only):
-        """
-        Parameters
-        ----------
-        bool
-            Whether this model config has only
-            CPU instance groups
-        """
-
-        self._cpu_only = cpu_only
-
-    def cpu_only(self):
-        """
-        Returns
-        -------
-        bool
-            Whether the model should be run on CPU only
-        """
-
-        return self._cpu_only
-
     def is_ensemble(self) -> bool:
         """
         Returns

diff --git a/model_analyzer/triton/model/model_config_variant.py b/model_analyzer/triton/model/model_config_variant.py
@@ -21,8 +21,9 @@
 class ModelConfigVariant:
     """
     A dataclass that holds the ModelConfig as well as the variant name
-    for the model
+    and cpu_only flag for the model
     """
 
     model_config: ModelConfig
     variant_name: str
+    cpu_only: bool = False
diff --git a/qa/L0_output_fields/config_generator.py b/qa/L0_output_fields/config_generator.py
@@ -20,7 +20,7 @@
 def _get_sweep_configs():
     sweep_configs = []
     model_config = {
-        "profile_models": ["vgg19_libtorch"],
+        "profile_models": ["add_sub"],
         "server_output_fields": [
             "model_name",
             "gpu_uuid",

diff --git a/qa/L0_output_fields/test.sh b/qa/L0_output_fields/test.sh
@@ -22,16 +22,16 @@ python3 config_generator.py
 # Set test parameters
 MODEL_ANALYZER="`which model-analyzer`"
 REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
-MODEL_REPOSITORY=${MODEL_REPOSITORY:="/mnt/nvdl/datasets/inferenceserver/$REPO_VERSION/libtorch_model_store"}
-CHECKPOINT_REPOSITORY=${CHECKPOINT_REPOSITORY:="/mnt/nvdl/datasets/inferenceserver/model_analyzer_checkpoints/2022_08_02"}
+MODEL_REPOSITORY=${MODEL_REPOSITORY:="/opt/triton-model-analyzer/examples/quick-start"}
+CHECKPOINT_REPOSITORY=${CHECKPOINT_REPOSITORY:="/mnt/nvdl/datasets/inferenceserver/model_analyzer_checkpoints/2023_09_07"}
 FILENAME_SERVER_ONLY="server-metrics.csv"
 FILENAME_INFERENCE_MODEL="model-metrics-inference.csv"
 FILENAME_GPU_MODEL="model-metrics-gpu.csv"
 GPUS=(`get_all_gpus_uuids`)
 OUTPUT_MODEL_REPOSITORY=${OUTPUT_MODEL_REPOSITORY:=`get_output_directory`}
 CHECKPOINT_DIRECTORY="."
 
-cp $CHECKPOINT_REPOSITORY/resnet50_vgg19.ckpt $CHECKPOINT_DIRECTORY/0.ckpt
+cp $CHECKPOINT_REPOSITORY/add_sub.ckpt $CHECKPOINT_DIRECTORY/0.ckpt
 
 MODEL_ANALYZER_ANALYZE_BASE_ARGS="--checkpoint-directory $CHECKPOINT_DIRECTORY --filename-server-only=$FILENAME_SERVER_ONLY"
 MODEL_ANALYZER_ANALYZE_BASE_ARGS="$MODEL_ANALYZER_ANALYZE_BASE_ARGS --filename-model-inference=$FILENAME_INFERENCE_MODEL --filename-model-gpu=$FILENAME_GPU_MODEL"
@@ -59,7 +59,7 @@ for CONFIG_FILE in ${LIST_OF_CONFIG_FILES[@]}; do
 
     MODEL_ANALYZER_ARGS="$MODEL_ANALYZER_ANALYZE_BASE_ARGS -e $EXPORT_PATH -f $CONFIG_FILE"
 
-    TEST_OUTPUT_NUM_ROWS=16
+    TEST_OUTPUT_NUM_ROWS=47
     run_analyzer
     if [ $? -ne 0 ]; then
         echo -e "\n***\n*** Test Failed. model-analyzer exited with non-zero exit code. \n***"

diff --git a/qa/L0_quick_search/test.sh b/qa/L0_quick_search/test.sh
@@ -20,8 +20,8 @@ create_logs_dir "L0_quick_search"
 # Set test parameters
 MODEL_ANALYZER="`which model-analyzer`"
 REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
-MODEL_REPOSITORY=${MODEL_REPOSITORY:="/mnt/nvdl/datasets/inferenceserver/$REPO_VERSION/libtorch_model_store"}
-QA_MODELS="resnet50_libtorch"
+MODEL_REPOSITORY=${MODEL_REPOSITORY:="/opt/triton-model-analyzer/examples/quick-start"}
+QA_MODELS="add_sub"
 MODEL_NAMES="$(echo $QA_MODELS | sed 's/ /,/g')"
 TRITON_LAUNCH_MODE=${TRITON_LAUNCH_MODE:="local"}
 CLIENT_PROTOCOL="grpc"

diff --git a/qa/L0_quick_search_multi_model/test.sh b/qa/L0_quick_search_multi_model/test.sh
@@ -20,8 +20,8 @@ create_logs_dir "L0_quick_search_multi_model"
 # Set test parameters
 MODEL_ANALYZER="`which model-analyzer`"
 REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
-MODEL_REPOSITORY=${MODEL_REPOSITORY:="/mnt/nvdl/datasets/inferenceserver/$REPO_VERSION/libtorch_model_store"}
-QA_MODELS="resnet50_libtorch,vgg19_libtorch"
+MODEL_REPOSITORY=${MODEL_REPOSITORY:="/opt/triton-model-analyzer/examples/quick-start"}
+QA_MODELS="add,sub"
 MODEL_NAMES="$(echo $QA_MODELS | sed 's/ /,/g')"
 TRITON_LAUNCH_MODE=${TRITON_LAUNCH_MODE:="local"}
 CLIENT_PROTOCOL="grpc"

diff --git a/qa/L0_results/test.sh b/qa/L0_results/test.sh
@@ -19,9 +19,9 @@ create_logs_dir "L0_results"
 # Set test parameters
 MODEL_ANALYZER="`which model-analyzer`"
 REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
-MODEL_REPOSITORY=${MODEL_REPOSITORY:="/mnt/nvdl/datasets/inferenceserver/$REPO_VERSION/libtorch_model_store"}
-CHECKPOINT_REPOSITORY=${CHECKPOINT_REPOSITORY:="/mnt/nvdl/datasets/inferenceserver/model_analyzer_checkpoints/2022_08_02"}
-QA_MODELS="resnet50_libtorch"
+MODEL_REPOSITORY=${MODEL_REPOSITORY:="/opt/triton-model-analyzer/examples/quick-start"}
+CHECKPOINT_REPOSITORY=${CHECKPOINT_REPOSITORY:="/mnt/nvdl/datasets/inferenceserver/model_analyzer_checkpoints/2023_09_07"}
+QA_MODELS="add_sub"
 MODEL_NAMES="$(echo $QA_MODELS | sed 's/ /,/g')"
 FILENAME_SERVER_ONLY="server-metrics.csv"
 FILENAME_INFERENCE_MODEL="model-metrics-inference.csv"
@@ -33,7 +33,7 @@ GPUS=(`get_all_gpus_uuids`)
 OUTPUT_MODEL_REPOSITORY=${OUTPUT_MODEL_REPOSITORY:=`get_output_directory`}
 create_result_paths
 
-cp $CHECKPOINT_REPOSITORY/resnet50_vgg19.ckpt $CHECKPOINT_DIRECTORY/0.ckpt
+cp $CHECKPOINT_REPOSITORY/add_sub.ckpt $CHECKPOINT_DIRECTORY/0.ckpt
 rm -rf $OUTPUT_MODEL_REPOSITORY