Add input_data.json

triton-inference-server · Aug 21, 2023 · 8d4556c · 8d4556c
1 parent 6117f96
commit 8d4556c
Show file tree

Hide file tree

Showing 10 changed files with 72 additions and 56 deletions.
diff --git a/examples/quick-start/add/1/model.py b/examples/quick-start/add/1/model.py
@@ -49,12 +49,12 @@ def initialize(self, args):
         # You must parse model_config. JSON string is not parsed here
         self.model_config = model_config = json.loads(args["model_config"])
 
-        # Get OUTPUT0 configuration
-        output0_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT0")
+        # Get OUTPUT configuration
+        output_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT")
 
         # Convert Triton types to numpy types
-        self.output0_dtype = pb_utils.triton_string_to_numpy(
-            output0_config["data_type"]
+        self.output_dtype = pb_utils.triton_string_to_numpy(
+            output_config["data_type"]
         )
 
     def execute(self, requests):
@@ -79,7 +79,7 @@ def execute(self, requests):
           be the same as `requests`
         """
 
-        output0_dtype = self.output0_dtype
+        output_dtype = self.output_dtype
 
         responses = []
 
@@ -95,7 +95,7 @@ def execute(self, requests):
 
             # Create output tensors. You need pb_utils.Tensor
             # objects to create pb_utils.InferenceResponse.
-            out_tensor_0 = pb_utils.Tensor("OUTPUT0", out_0.astype(output0_dtype))
+            out_tensor_0 = pb_utils.Tensor("OUTPUT", out_0.astype(output_dtype))
 
             # Create InferenceResponse. You can set an error here in case
             # there was a problem with handling this inference request.

diff --git a/examples/quick-start/add/config.pbtxt b/examples/quick-start/add/config.pbtxt
@@ -32,7 +32,7 @@ input [
 ]
 output [
   {
-    name: "OUTPUT0"
+    name: "OUTPUT"
     data_type: TYPE_FP32
     dims: [ 4 ]
   }

diff --git a/examples/quick-start/bls/1/model.py b/examples/quick-start/bls/1/model.py
@@ -81,35 +81,27 @@ def execute(self, requests):
             # Get INPUT1
             in_1 = pb_utils.get_input_tensor_by_name(request, "INPUT1")
 
-            # Create inference request object for "add" model
-            infer_request_add = pb_utils.InferenceRequest(
-                model_name="add",
-                requested_output_names=["OUTPUT0"],
-                inputs=[in_0, in_1],
-            )
-
-            # Perform synchronous blocking inference request
-            infer_response_add = infer_request_add.exec()
+            # Get Model Name
+            model_name = pb_utils.get_input_tensor_by_name(request, "MODEL_NAME")
 
-            if infer_response_add.has_error():
-                raise pb_utils.TritonModelException(
-                    infer_response_add.error().message()
-                )
+            # Model Name string
+            model_name_string = model_name.as_numpy()[0]
 
-            # Create inference request object for "sub" model
-            infer_request_sub = pb_utils.InferenceRequest(
-                model_name="sub",
-                requested_output_names=["OUTPUT1"],
+            # Create inference request object
+            infer_request = pb_utils.InferenceRequest(
+                model_name=model_name_string,
+                requested_output_names=["OUTPUT"],
                 inputs=[in_0, in_1],
             )
 
             # Perform synchronous blocking inference request
-            infer_response_sub = infer_request_sub.exec()
+            infer_response = infer_request.exec()
 
-            if infer_response_sub.has_error():
-                raise pb_utils.TritonModelException(
-                    infer_response_sub.error().message()
-                )
+            # Make sure that the inference response doesn't have an error. If
+            # it has an error and you can't proceed with your model execution
+            # you can raise an exception.
+            if infer_response.has_error():
+                raise pb_utils.TritonModelException(infer_response.error().message())
 
             # Create InferenceResponse. You can set an error here in case
             # there was a problem with handling this inference request.
@@ -123,10 +115,7 @@ def execute(self, requests):
             # outputs with correct output names, we can just pass the list
             # of outputs to the InferenceResponse object.
             inference_response = pb_utils.InferenceResponse(
-                output_tensors=[
-                    pb_utils.get_output_tensor_by_name(infer_response_add, "OUTPUT0"),
-                    pb_utils.get_output_tensor_by_name(infer_response_sub, "OUTPUT1"),
-                ]
+                output_tensors=infer_response.output_tensors()
             )
             responses.append(inference_response)
 

diff --git a/examples/quick-start/bls/config.pbtxt b/examples/quick-start/bls/config.pbtxt
@@ -16,29 +16,26 @@ name: "bls"
 backend: "python"
 
 input [
+  {
+    name: "MODEL_NAME"
+    data_type: TYPE_STRING
+    dims: [ 1 ]
+  },
   {
     name: "INPUT0"
     data_type: TYPE_FP32
     dims: [ 4 ]
-  }
-]
-input [
+  },
   {
     name: "INPUT1"
     data_type: TYPE_FP32
     dims: [ 4 ]
   }
 ]
+
 output [
   {
-    name: "OUTPUT0"
-    data_type: TYPE_FP32
-    dims: [ 4 ]
-  }
-]
-output [
-  {
-    name: "OUTPUT1"
+    name: "OUTPUT"
     data_type: TYPE_FP32
     dims: [ 4 ]
   }

diff --git a/examples/quick-start/ensemble_add_sub/config.pbtxt b/examples/quick-start/ensemble_add_sub/config.pbtxt
@@ -56,7 +56,7 @@ ensemble_scheduling {
         value: "INPUT1"
       }
       output_map {
-        key: "OUTPUT0"
+        key: "OUTPUT"
         value: "OUTPUT0"
       }
     },
@@ -72,7 +72,7 @@ ensemble_scheduling {
         value: "INPUT1"
       }
       output_map {
-        key: "OUTPUT1"
+        key: "OUTPUT"
         value: "OUTPUT1"
       }
     }

diff --git a/examples/quick-start/sub/1/model.py b/examples/quick-start/sub/1/model.py
@@ -49,12 +49,12 @@ def initialize(self, args):
         # You must parse model_config. JSON string is not parsed here
         self.model_config = model_config = json.loads(args["model_config"])
 
-        # Get OUTPUT1 configuration
-        output1_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT1")
+        # Get OUTPUT configuration
+        output_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT")
 
         # Convert Triton types to numpy types
-        self.output1_dtype = pb_utils.triton_string_to_numpy(
-            output1_config["data_type"]
+        self.output_dtype = pb_utils.triton_string_to_numpy(
+            output_config["data_type"]
         )
 
     def execute(self, requests):
@@ -79,7 +79,7 @@ def execute(self, requests):
           be the same as `requests`
         """
 
-        output1_dtype = self.output1_dtype
+        output_dtype = self.output_dtype
 
         responses = []
 
@@ -95,7 +95,7 @@ def execute(self, requests):
 
             # Create output tensors. You need pb_utils.Tensor
             # objects to create pb_utils.InferenceResponse.
-            out_tensor_1 = pb_utils.Tensor("OUTPUT1", out_1.astype(output1_dtype))
+            out_tensor_1 = pb_utils.Tensor("OUTPUT", out_1.astype(output_dtype))
 
             # Create InferenceResponse. You can set an error here in case
             # there was a problem with handling this inference request.

diff --git a/examples/quick-start/sub/config.pbtxt b/examples/quick-start/sub/config.pbtxt
@@ -32,7 +32,7 @@ input [
 ]
 output [
   {
-    name: "OUTPUT1"
+    name: "OUTPUT"
     data_type: TYPE_FP32
     dims: [ 4 ]
   }

diff --git a/qa/L0_bls_model/input_data.json b/qa/L0_bls_model/input_data.json
@@ -0,0 +1,21 @@
+{
+	"data": [
+		{
+			"MODEL_NAME": [
+				"add"
+			],
+			"INPUT0": [
+				0.74106514,
+				0.7371813,
+				0.5274665,
+				0.13930903
+			],
+			"INPUT1": [
+				0.7845891,
+				0.88089234,
+				0.8466405,
+				0.55024815
+			]
+		}
+	]
+}
diff --git a/qa/L0_bls_model/test.sh b/qa/L0_bls_model/test.sh
@@ -22,7 +22,8 @@ MODEL_ANALYZER="$(which model-analyzer)"
 REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION}
 MODEL_REPOSITORY=${MODEL_REPOSITORY:="/mnt/nvdl/datasets/inferenceserver/$REPO_VERSION/qa_bls_model_repository/"}
 QA_MODELS="bls"
-BLS_COMPOSING_MODELS="add,sub"
+BLS_COMPOSING_MODELS="add"
+INPUT_JSON="$(pwd)/input_data.json"
 MODEL_NAMES="$(echo $QA_MODELS | sed 's/ /,/g')"
 TRITON_LAUNCH_MODE=${TRITON_LAUNCH_MODE:="local"}
 CLIENT_PROTOCOL="grpc"
@@ -38,7 +39,7 @@ rm -rf $OUTPUT_MODEL_REPOSITORY
 create_result_paths
 SERVER_LOG=$TEST_LOG_DIR/server.log
 
-python3 test_config_generator.py --profile-models $MODEL_NAMES --bls-composing-models $BLS_COMPOSING_MODELS
+python3 test_config_generator.py --profile-models $MODEL_NAMES --bls-composing-models $BLS_COMPOSING_MODELS -i $INPUT_JSON
 
 # Run the analyzer and check the results
 RET=0

diff --git a/qa/L0_bls_model/test_config_generator.py b/qa/L0_bls_model/test_config_generator.py
@@ -54,7 +54,14 @@ def setup(self):
             "--bls-composing-models",
             type=str,
             required=True,
-            help="Comma separated list of models to be profiled",
+            help="Comma separated list of BLS composing models",
+        )
+        parser.add_argument(
+            "-i",
+            "--input-json-file",
+            type=str,
+            required=True,
+            help="Input data JSON file path",
         )
 
         args = parser.parse_args()
@@ -63,6 +70,7 @@ def setup(self):
         self.config["bls_composing_models"] = sorted(
             args.bls_composing_models.split(",")
         )
+        self.config["perf_analyzer_flags"] = {"input-data": args.input_json_file}
 
     def generate_config(self):
         with open("config.yml", "w+") as f: