Skip to content

Commit

Permalink
Workaround with L0_trt_reformat_free by removing shm checks
Browse files Browse the repository at this point in the history
  • Loading branch information
yinggeh committed Aug 4, 2024
1 parent dc90a52 commit 482409e
Show file tree
Hide file tree
Showing 3 changed files with 110 additions and 135 deletions.
4 changes: 2 additions & 2 deletions Dockerfile.QA
Original file line number Diff line number Diff line change
Expand Up @@ -149,8 +149,8 @@ RUN mkdir -p qa/common && \
cp bin/triton_json_test qa/L0_json/. && \
cp bin/backend_output_detail_test qa/L0_backend_output_detail/. && \
cp -r deploy/mlflow-triton-plugin qa/L0_mlflow/. && \
cp bin/input_byte_size_test qa/L0_input_validation/. && \
cp -r docs/examples/model_repository/simple_identity qa/L0_input_validation/models
cp -r docs/examples/model_repository/{simple,simple_identity,simple_string} qa/L0_input_validation/models && \
cp bin/input_byte_size_test qa/L0_input_validation/.

RUN mkdir -p qa/pkgs && \
cp python/triton*.whl qa/pkgs/. && \
Expand Down
190 changes: 102 additions & 88 deletions qa/L0_input_validation/input_validation_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,10 @@
import infer_util as iu
import numpy as np
import tritonclient.grpc as tritongrpcclient
import tritonclient.http as tritonhttpclient
import tritonclient.utils as utils
import tritonclient.utils.shared_memory as shm
from tritonclient.utils import InferenceServerException, np_to_triton_dtype
from tritonclient.utils import InferenceServerException


class InputValTest(unittest.TestCase):
Expand Down Expand Up @@ -116,101 +118,113 @@ def test_input_validation_all_optional(self):


class InputShapeTest(unittest.TestCase):
def test_input_shape_validation(self):
input_size = 8
model_name = "pt_identity"
triton_client = tritongrpcclient.InferenceServerClient("localhost:8001")
def test_client_input_shape_validation(self):
model_name = "simple"

# Pass
input_data = np.arange(input_size)[None].astype(np.float32)
inputs = [
tritongrpcclient.InferInput(
"INPUT0", input_data.shape, np_to_triton_dtype(input_data.dtype)
)
]
inputs[0].set_data_from_numpy(input_data)
triton_client.infer(model_name=model_name, inputs=inputs)

# Larger input byte size than expected
input_data = np.arange(input_size + 2)[None].astype(np.float32)
inputs = [
tritongrpcclient.InferInput(
"INPUT0", input_data.shape, np_to_triton_dtype(input_data.dtype)
)
]
inputs[0].set_data_from_numpy(input_data)
# Compromised input shape
inputs[0].set_shape((1, input_size))
with self.assertRaises(InferenceServerException) as e:
triton_client.infer(
model_name=model_name,
inputs=inputs,
for client_type in ["http", "grpc"]:
if client_type == "http":
triton_client = tritonhttpclient.InferenceServerClient("localhost:8000")
else:
triton_client = tritongrpcclient.InferenceServerClient("localhost:8001")

# Infer
inputs = []
if client_type == "http":
inputs.append(tritonhttpclient.InferInput("INPUT0", [1, 16], "INT32"))
inputs.append(tritonhttpclient.InferInput("INPUT1", [1, 16], "INT32"))
else:
inputs.append(tritongrpcclient.InferInput("INPUT0", [1, 16], "INT32"))
inputs.append(tritongrpcclient.InferInput("INPUT1", [1, 16], "INT32"))

# Create the data for the two input tensors. Initialize the first
# to unique integers and the second to all ones.
input0_data = np.arange(start=0, stop=16, dtype=np.int32)
input0_data = np.expand_dims(input0_data, axis=0)
input1_data = np.ones(shape=(1, 16), dtype=np.int32)

# Initialize the data
inputs[0].set_data_from_numpy(input0_data)
inputs[1].set_data_from_numpy(input1_data)

# Compromised input shapes
inputs[0].set_shape([2, 8])
inputs[1].set_shape([2, 8])

with self.assertRaises(InferenceServerException) as e:
triton_client.infer(model_name=model_name, inputs=inputs)
err_str = str(e.exception)
self.assertIn(
f"unexpected shape for input 'INPUT1' for model 'simple'. Expected [-1,16], got [2,8]",
err_str,
)
err_str = str(e.exception)
self.assertIn(
"input byte size mismatch for input 'INPUT0' for model 'pt_identity'. Expected 32, got 40",
err_str,
)

def test_input_string_shape_validation(self):
input_size = 16
model_name = "graphdef_object_int32_int32"
np_dtype_string = np.dtype(object)
triton_client = tritongrpcclient.InferenceServerClient("localhost:8001")
# Compromised input shapes
inputs[0].set_shape([1, 8])
inputs[1].set_shape([1, 8])

def get_input_array(input_size, np_dtype):
rinput_dtype = iu._range_repr_dtype(np_dtype)
input_array = np.random.randint(
low=0, high=127, size=(1, input_size), dtype=rinput_dtype
with self.assertRaises(InferenceServerException) as e:
triton_client.infer(model_name=model_name, inputs=inputs)
err_str = str(e.exception)
self.assertIn(
f"input 'INPUT0' got unexpected elements count 16, expected 8",
err_str,
)

# Convert to string type
inn = np.array(
[str(x) for x in input_array.reshape(input_array.size)], dtype=object
)
input_array = inn.reshape(input_array.shape)
def test_client_input_string_shape_validation(self):
for client_type in ["http", "grpc"]:

inputs = []
inputs.append(
tritongrpcclient.InferInput(
"INPUT0", input_array.shape, np_to_triton_dtype(np_dtype)
)
)
inputs.append(
tritongrpcclient.InferInput(
"INPUT1", input_array.shape, np_to_triton_dtype(np_dtype)
)
)
def identity_inference(triton_client, np_array, binary_data):
model_name = "simple_identity"

inputs[0].set_data_from_numpy(input_array)
inputs[1].set_data_from_numpy(input_array)
return inputs
# Total elements no change
inputs = []
if client_type == "http":
inputs.append(
tritonhttpclient.InferInput("INPUT0", np_array.shape, "BYTES")
)
inputs[0].set_data_from_numpy(np_array, binary_data=binary_data)
inputs[0].set_shape([2, 8])
else:
inputs.append(
tritongrpcclient.InferInput("INPUT0", np_array.shape, "BYTES")
)
inputs[0].set_data_from_numpy(np_array)
inputs[0].set_shape([2, 8])
triton_client.infer(model_name=model_name, inputs=inputs)

# Input size is less than expected
inputs = get_input_array(input_size - 2, np_dtype_string)
# Compromised input shape
inputs[0].set_shape((1, input_size))
inputs[1].set_shape((1, input_size))
with self.assertRaises(InferenceServerException) as e:
triton_client.infer(model_name=model_name, inputs=inputs)
err_str = str(e.exception)
self.assertIn(
f"expected {input_size} string elements for inference input 'INPUT1', got {input_size-2}",
err_str,
)
# Compromised input shape
inputs[0].set_shape([1, 8])

# Input size is greater than expected
inputs = get_input_array(input_size + 2, np_dtype_string)
# Compromised input shape
inputs[0].set_shape((1, input_size))
inputs[1].set_shape((1, input_size))
with self.assertRaises(InferenceServerException) as e:
triton_client.infer(model_name=model_name, inputs=inputs)
err_str = str(e.exception)
self.assertIn(
f"expected {input_size} string elements for inference input 'INPUT1', got {input_size+2}",
err_str,
)
with self.assertRaises(InferenceServerException) as e:
triton_client.infer(model_name=model_name, inputs=inputs)
err_str = str(e.exception)
self.assertIn(
f"input 'INPUT0' got unexpected elements count 16, expected 8",
err_str,
)

if client_type == "http":
triton_client = tritonhttpclient.InferenceServerClient("localhost:8000")
else:
triton_client = tritongrpcclient.InferenceServerClient("localhost:8001")

# Example using BYTES input tensor with utf-8 encoded string that
# has an embedded null character.
null_chars_array = np.array(
["he\x00llo".encode("utf-8") for i in range(16)], dtype=np.object_
)
null_char_data = null_chars_array.reshape([1, 16])
identity_inference(triton_client, null_char_data, True) # Using binary data
identity_inference(triton_client, null_char_data, False) # Using JSON data

# Example using BYTES input tensor with 16 elements, where each
# element is a 4-byte binary blob with value 0x00010203. Can use
# dtype=np.bytes_ in this case.
bytes_data = [b"\x00\x01\x02\x03" for i in range(16)]
np_bytes_data = np.array(bytes_data, dtype=np.bytes_)
np_bytes_data = np_bytes_data.reshape([1, 16])
identity_inference(triton_client, np_bytes_data, True) # Using binary data
identity_inference(triton_client, np_bytes_data, False) # Using JSON data

def test_wrong_input_shape_tensor_size(self):
def inference_helper(model_name, batch_size=1):
Expand Down Expand Up @@ -246,12 +260,12 @@ def inference_helper(model_name, batch_size=1):
tritongrpcclient.InferInput(
"DUMMY_INPUT0",
dummy_input_data.shape,
np_to_triton_dtype(np.float32),
utils.np_to_triton_dtype(np.float32),
),
tritongrpcclient.InferInput(
"INPUT0",
shape_tensor_data.shape,
np_to_triton_dtype(np.int32),
utils.np_to_triton_dtype(np.int32),
),
]
inputs[0].set_data_from_numpy(dummy_input_data)
Expand Down
51 changes: 6 additions & 45 deletions qa/L0_input_validation/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ set +e
python3 -m pytest --junitxml="input_validation.report.xml" $TEST_PY::InputValTest >> $CLIENT_LOG 2>&1

if [ $? -ne 0 ]; then
cat $CLIENT_LOG
echo -e "\n***\n*** input_validation_test.py FAILED. \n***"
RET=1
fi
Expand All @@ -80,49 +81,6 @@ wait $SERVER_PID
pip install torch
pip install pytest-asyncio

mkdir -p models/pt_identity/1
PYTHON_CODE=$(cat <<END
import torch
torch.jit.save(
torch.jit.script(torch.nn.Identity()),
"`pwd`/models/pt_identity/1/model.pt",
)
END
)
res="$(python3 -c "$PYTHON_CODE")"

if [ $? -ne 0 ]; then
echo -e "\n***\n*** model "pt_identity" initialization FAILED. \n***"
echo $res
exit 1
fi

# Create the config.pbtxt file with the specified configuration
cat > models/pt_identity/config.pbtxt << EOL
name: "pt_identity"
backend: "pytorch"
max_batch_size: 8
input [
{
name: "INPUT0"
data_type: TYPE_FP32
dims: [8]
}
]
output [
{
name: "OUTPUT0"
data_type: TYPE_FP32
dims: [8]
}
]
# ensure we batch requests together
dynamic_batching {
max_queue_delay_microseconds: 1000000
}
EOL

cp -r $DATADIR/qa_model_repository/graphdef_object_int32_int32 models/.
cp -r $DATADIR/qa_shapetensor_model_repository/plan_nobatch_zero_1_float32_int32 models/.
cp -r $DATADIR/qa_shapetensor_model_repository/plan_zero_1_float32_int32 models/.

Expand All @@ -138,6 +96,7 @@ set +e
python3 -m pytest --junitxml="input_shape_validation.report.xml" $TEST_PY::InputShapeTest >> $CLIENT_LOG 2>&1

if [ $? -ne 0 ]; then
cat $CLIENT_LOG
echo -e "\n***\n*** input_validation_test.py FAILED. \n***"
RET=1
fi
Expand All @@ -147,18 +106,20 @@ kill $SERVER_PID
wait $SERVER_PID

# input_byte_size_test
cp -r /data/inferenceserver/${REPO_VERSION}/qa_identity_model_repository/{savedmodel_zero_1_float32,savedmodel_zero_1_object} ./models

set +e
LD_LIBRARY_PATH=/opt/tritonserver/lib:$LD_LIBRARY_PATH $TEST_EXEC >>$TEST_LOG 2>&1
if [ $? -ne 0 ]; then
echo -e "\n***\n*** Query Unit Test Failed\n***"
cat $TEST_LOG
echo -e "\n***\n*** input_byte_size_test FAILED\n***"
RET=1
fi
set -e

if [ $RET -eq 0 ]; then
echo -e "\n***\n*** Input Validation Test Passed\n***"
else
cat $CLIENT_LOG
cat $SERVER_LOG
echo -e "\n***\n*** Input Validation Test FAILED\n***"
fi
Expand Down

0 comments on commit 482409e

Please sign in to comment.