diff --git a/examples/quick-start/add/1/model.py b/examples/quick-start/add/1/model.py index 96996301e..ea8944df8 100755 --- a/examples/quick-start/add/1/model.py +++ b/examples/quick-start/add/1/model.py @@ -49,12 +49,12 @@ def initialize(self, args): # You must parse model_config. JSON string is not parsed here self.model_config = model_config = json.loads(args["model_config"]) - # Get OUTPUT0 configuration - output0_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT0") + # Get OUTPUT configuration + output_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT") # Convert Triton types to numpy types - self.output0_dtype = pb_utils.triton_string_to_numpy( - output0_config["data_type"] + self.output_dtype = pb_utils.triton_string_to_numpy( + output_config["data_type"] ) def execute(self, requests): @@ -79,7 +79,7 @@ def execute(self, requests): be the same as `requests` """ - output0_dtype = self.output0_dtype + output_dtype = self.output_dtype responses = [] @@ -95,7 +95,7 @@ def execute(self, requests): # Create output tensors. You need pb_utils.Tensor # objects to create pb_utils.InferenceResponse. - out_tensor_0 = pb_utils.Tensor("OUTPUT0", out_0.astype(output0_dtype)) + out_tensor_0 = pb_utils.Tensor("OUTPUT", out_0.astype(output_dtype)) # Create InferenceResponse. You can set an error here in case # there was a problem with handling this inference request. diff --git a/examples/quick-start/add/config.pbtxt b/examples/quick-start/add/config.pbtxt old mode 100644 new mode 100755 index 51efce9da..7c3d2128c --- a/examples/quick-start/add/config.pbtxt +++ b/examples/quick-start/add/config.pbtxt @@ -32,7 +32,7 @@ input [ ] output [ { - name: "OUTPUT0" + name: "OUTPUT" data_type: TYPE_FP32 dims: [ 4 ] } diff --git a/examples/quick-start/bls/1/model.py b/examples/quick-start/bls/1/model.py index 72e75678b..b45873037 100755 --- a/examples/quick-start/bls/1/model.py +++ b/examples/quick-start/bls/1/model.py @@ -81,35 +81,27 @@ def execute(self, requests): # Get INPUT1 in_1 = pb_utils.get_input_tensor_by_name(request, "INPUT1") - # Create inference request object for "add" model - infer_request_add = pb_utils.InferenceRequest( - model_name="add", - requested_output_names=["OUTPUT0"], - inputs=[in_0, in_1], - ) - - # Perform synchronous blocking inference request - infer_response_add = infer_request_add.exec() + # Get Model Name + model_name = pb_utils.get_input_tensor_by_name(request, "MODEL_NAME") - if infer_response_add.has_error(): - raise pb_utils.TritonModelException( - infer_response_add.error().message() - ) + # Model Name string + model_name_string = model_name.as_numpy()[0] - # Create inference request object for "sub" model - infer_request_sub = pb_utils.InferenceRequest( - model_name="sub", - requested_output_names=["OUTPUT1"], + # Create inference request object + infer_request = pb_utils.InferenceRequest( + model_name=model_name_string, + requested_output_names=["OUTPUT"], inputs=[in_0, in_1], ) # Perform synchronous blocking inference request - infer_response_sub = infer_request_sub.exec() + infer_response = infer_request.exec() - if infer_response_sub.has_error(): - raise pb_utils.TritonModelException( - infer_response_sub.error().message() - ) + # Make sure that the inference response doesn't have an error. If + # it has an error and you can't proceed with your model execution + # you can raise an exception. + if infer_response.has_error(): + raise pb_utils.TritonModelException(infer_response.error().message()) # Create InferenceResponse. You can set an error here in case # there was a problem with handling this inference request. @@ -123,10 +115,7 @@ def execute(self, requests): # outputs with correct output names, we can just pass the list # of outputs to the InferenceResponse object. inference_response = pb_utils.InferenceResponse( - output_tensors=[ - pb_utils.get_output_tensor_by_name(infer_response_add, "OUTPUT0"), - pb_utils.get_output_tensor_by_name(infer_response_sub, "OUTPUT1"), - ] + output_tensors=infer_response.output_tensors() ) responses.append(inference_response) diff --git a/examples/quick-start/bls/config.pbtxt b/examples/quick-start/bls/config.pbtxt old mode 100644 new mode 100755 index 0c8ee6e1f..aa977816b --- a/examples/quick-start/bls/config.pbtxt +++ b/examples/quick-start/bls/config.pbtxt @@ -16,29 +16,26 @@ name: "bls" backend: "python" input [ + { + name: "MODEL_NAME" + data_type: TYPE_STRING + dims: [ 1 ] + }, { name: "INPUT0" data_type: TYPE_FP32 dims: [ 4 ] - } -] -input [ + }, { name: "INPUT1" data_type: TYPE_FP32 dims: [ 4 ] } ] + output [ { - name: "OUTPUT0" - data_type: TYPE_FP32 - dims: [ 4 ] - } -] -output [ - { - name: "OUTPUT1" + name: "OUTPUT" data_type: TYPE_FP32 dims: [ 4 ] } diff --git a/examples/quick-start/ensemble_add_sub/config.pbtxt b/examples/quick-start/ensemble_add_sub/config.pbtxt old mode 100644 new mode 100755 index 14cd8f0bf..d3aea410f --- a/examples/quick-start/ensemble_add_sub/config.pbtxt +++ b/examples/quick-start/ensemble_add_sub/config.pbtxt @@ -56,7 +56,7 @@ ensemble_scheduling { value: "INPUT1" } output_map { - key: "OUTPUT0" + key: "OUTPUT" value: "OUTPUT0" } }, @@ -72,7 +72,7 @@ ensemble_scheduling { value: "INPUT1" } output_map { - key: "OUTPUT1" + key: "OUTPUT" value: "OUTPUT1" } } diff --git a/examples/quick-start/sub/1/model.py b/examples/quick-start/sub/1/model.py index 4fc24c016..403d11e05 100755 --- a/examples/quick-start/sub/1/model.py +++ b/examples/quick-start/sub/1/model.py @@ -49,12 +49,12 @@ def initialize(self, args): # You must parse model_config. JSON string is not parsed here self.model_config = model_config = json.loads(args["model_config"]) - # Get OUTPUT1 configuration - output1_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT1") + # Get OUTPUT configuration + output_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT") # Convert Triton types to numpy types - self.output1_dtype = pb_utils.triton_string_to_numpy( - output1_config["data_type"] + self.output_dtype = pb_utils.triton_string_to_numpy( + output_config["data_type"] ) def execute(self, requests): @@ -79,7 +79,7 @@ def execute(self, requests): be the same as `requests` """ - output1_dtype = self.output1_dtype + output_dtype = self.output_dtype responses = [] @@ -95,7 +95,7 @@ def execute(self, requests): # Create output tensors. You need pb_utils.Tensor # objects to create pb_utils.InferenceResponse. - out_tensor_1 = pb_utils.Tensor("OUTPUT1", out_1.astype(output1_dtype)) + out_tensor_1 = pb_utils.Tensor("OUTPUT", out_1.astype(output_dtype)) # Create InferenceResponse. You can set an error here in case # there was a problem with handling this inference request. diff --git a/examples/quick-start/sub/config.pbtxt b/examples/quick-start/sub/config.pbtxt old mode 100644 new mode 100755 index 548f6120c..f1248a71e --- a/examples/quick-start/sub/config.pbtxt +++ b/examples/quick-start/sub/config.pbtxt @@ -32,7 +32,7 @@ input [ ] output [ { - name: "OUTPUT1" + name: "OUTPUT" data_type: TYPE_FP32 dims: [ 4 ] } diff --git a/qa/L0_bls_model/input_data.json b/qa/L0_bls_model/input_data.json new file mode 100755 index 000000000..acab035c8 --- /dev/null +++ b/qa/L0_bls_model/input_data.json @@ -0,0 +1,21 @@ +{ + "data": [ + { + "MODEL_NAME": [ + "add" + ], + "INPUT0": [ + 0.74106514, + 0.7371813, + 0.5274665, + 0.13930903 + ], + "INPUT1": [ + 0.7845891, + 0.88089234, + 0.8466405, + 0.55024815 + ] + } + ] +} \ No newline at end of file diff --git a/qa/L0_bls_model/test.sh b/qa/L0_bls_model/test.sh index ef5e45f2c..09bee4962 100755 --- a/qa/L0_bls_model/test.sh +++ b/qa/L0_bls_model/test.sh @@ -22,7 +22,8 @@ MODEL_ANALYZER="$(which model-analyzer)" REPO_VERSION=${NVIDIA_TRITON_SERVER_VERSION} MODEL_REPOSITORY=${MODEL_REPOSITORY:="/mnt/nvdl/datasets/inferenceserver/$REPO_VERSION/qa_bls_model_repository/"} QA_MODELS="bls" -BLS_COMPOSING_MODELS="add,sub" +BLS_COMPOSING_MODELS="add" +INPUT_JSON="$(pwd)/input_data.json" MODEL_NAMES="$(echo $QA_MODELS | sed 's/ /,/g')" TRITON_LAUNCH_MODE=${TRITON_LAUNCH_MODE:="local"} CLIENT_PROTOCOL="grpc" @@ -38,7 +39,7 @@ rm -rf $OUTPUT_MODEL_REPOSITORY create_result_paths SERVER_LOG=$TEST_LOG_DIR/server.log -python3 test_config_generator.py --profile-models $MODEL_NAMES --bls-composing-models $BLS_COMPOSING_MODELS +python3 test_config_generator.py --profile-models $MODEL_NAMES --bls-composing-models $BLS_COMPOSING_MODELS -i $INPUT_JSON # Run the analyzer and check the results RET=0 diff --git a/qa/L0_bls_model/test_config_generator.py b/qa/L0_bls_model/test_config_generator.py index bc6be2e1c..67ab0edbe 100755 --- a/qa/L0_bls_model/test_config_generator.py +++ b/qa/L0_bls_model/test_config_generator.py @@ -54,7 +54,14 @@ def setup(self): "--bls-composing-models", type=str, required=True, - help="Comma separated list of models to be profiled", + help="Comma separated list of BLS composing models", + ) + parser.add_argument( + "-i", + "--input-json-file", + type=str, + required=True, + help="Input data JSON file path", ) args = parser.parse_args() @@ -63,6 +70,7 @@ def setup(self): self.config["bls_composing_models"] = sorted( args.bls_composing_models.split(",") ) + self.config["perf_analyzer_flags"] = {"input-data": args.input_json_file} def generate_config(self): with open("config.yml", "w+") as f: