From 474ea93c0ae7aa26035a7458a740fc547ebb53b0 Mon Sep 17 00:00:00 2001 From: Matthew Kotila Date: Fri, 21 Jul 2023 11:51:11 -0700 Subject: [PATCH] Calculate response throughput metric (#356) * Calculate response throughput metric * Address feedback * Cleanup --- src/c++/library/common.h | 6 +- src/c++/library/grpc_client.cc | 16 ++- src/c++/library/http_client.cc | 12 ++ .../client_backend/client_backend.h | 9 +- .../client_backend/mock_client_backend.h | 9 ++ .../triton/triton_client_backend.cc | 7 + .../triton/triton_client_backend.h | 2 + src/c++/perf_analyzer/infer_context.cc | 24 ++-- src/c++/perf_analyzer/inference_profiler.cc | 18 ++- src/c++/perf_analyzer/inference_profiler.h | 23 ++-- src/c++/perf_analyzer/load_manager.h | 9 +- .../perf_analyzer/mock_inference_profiler.h | 89 ++++++++++++- src/c++/perf_analyzer/mock_load_manager.h | 37 ++++++ src/c++/perf_analyzer/perf_utils.h | 3 +- .../perf_analyzer/test_inference_profiler.cc | 122 +++++++++++++++++- 15 files changed, 355 insertions(+), 31 deletions(-) create mode 100644 src/c++/perf_analyzer/mock_load_manager.h diff --git a/src/c++/library/common.h b/src/c++/library/common.h index 1f0494000..ba98d82ca 100644 --- a/src/c++/library/common.h +++ b/src/c++/library/common.h @@ -513,10 +513,14 @@ class InferResult { const std::string& output_name, const uint8_t** buf, size_t* byte_size) const = 0; - /// Get final response bool of the request which generated this response. + /// Get final response bool for this response. /// \return Error object indicating the success or failure. virtual Error IsFinalResponse(bool* is_final_response) const = 0; + /// Get null response bool for this response. + /// \return Error object indicating the success or failure. + virtual Error IsNullResponse(bool* is_null_response) const = 0; + /// Get the result data as a vector of strings. The vector will /// receive a copy of result data. An error will be generated if /// the datatype of output is not 'BYTES'. diff --git a/src/c++/library/grpc_client.cc b/src/c++/library/grpc_client.cc index eba18a33e..537608fba 100644 --- a/src/c++/library/grpc_client.cc +++ b/src/c++/library/grpc_client.cc @@ -1,4 +1,4 @@ -// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions @@ -190,6 +190,7 @@ class InferResultGrpc : public InferResult { const std::string& output_name, const uint8_t** buf, size_t* byte_size) const override; Error IsFinalResponse(bool* is_final_response) const override; + Error IsNullResponse(bool* is_null_response) const override; Error StringData( const std::string& output_name, std::vector* string_result) const override; @@ -211,6 +212,7 @@ class InferResultGrpc : public InferResult { std::shared_ptr stream_response_; Error request_status_; bool is_final_response_{true}; + bool is_null_response_{false}; }; Error @@ -322,6 +324,16 @@ InferResultGrpc::IsFinalResponse(bool* is_final_response) const return Error::Success; } +Error +InferResultGrpc::IsNullResponse(bool* is_null_response) const +{ + if (is_null_response == nullptr) { + return Error("is_null_response cannot be nullptr"); + } + *is_null_response = is_null_response_; + return Error::Success; +} + Error InferResultGrpc::StringData( const std::string& output_name, @@ -384,6 +396,7 @@ InferResultGrpc::InferResultGrpc( if (is_final_response_itr != response_->parameters().end()) { is_final_response_ = is_final_response_itr->second.bool_param(); } + is_null_response_ = response_->outputs().empty() && is_final_response_; } InferResultGrpc::InferResultGrpc( @@ -409,6 +422,7 @@ InferResultGrpc::InferResultGrpc( if (is_final_response_itr != response_->parameters().end()) { is_final_response_ = is_final_response_itr->second.bool_param(); } + is_null_response_ = response_->outputs().empty() && is_final_response_; } //============================================================================== diff --git a/src/c++/library/http_client.cc b/src/c++/library/http_client.cc index 4258ead12..eacf958da 100644 --- a/src/c++/library/http_client.cc +++ b/src/c++/library/http_client.cc @@ -740,6 +740,7 @@ class InferResultHttp : public InferResult { const std::string& output_name, const uint8_t** buf, size_t* byte_size) const override; Error IsFinalResponse(bool* is_final_response) const override; + Error IsNullResponse(bool* is_null_response) const override; Error StringData( const std::string& output_name, std::vector* string_result) const override; @@ -769,6 +770,7 @@ class InferResultHttp : public InferResult { bool binary_data_{true}; bool is_final_response_{true}; + bool is_null_response_{false}; }; void @@ -951,6 +953,16 @@ InferResultHttp::IsFinalResponse(bool* is_final_response) const return Error::Success; } +Error +InferResultHttp::IsNullResponse(bool* is_null_response) const +{ + if (is_null_response == nullptr) { + return Error("is_null_response cannot be nullptr"); + } + *is_null_response = is_null_response_; + return Error::Success; +} + Error InferResultHttp::StringData( const std::string& output_name, diff --git a/src/c++/perf_analyzer/client_backend/client_backend.h b/src/c++/perf_analyzer/client_backend/client_backend.h index 4903da61d..c351dfe61 100644 --- a/src/c++/perf_analyzer/client_backend/client_backend.h +++ b/src/c++/perf_analyzer/client_backend/client_backend.h @@ -617,12 +617,19 @@ class InferResult { const std::string& output_name, const uint8_t** buf, size_t* byte_size) const = 0; - /// Get final response bool of the request which generated this response. + /// Get final response bool for this response. /// \return Error object indicating the success or failure. virtual Error IsFinalResponse(bool* is_final_response) const { return Error("InferResult::IsFinalResponse() not implemented"); }; + + /// Get null response bool for this response. + /// \return Error object indicating the success or failure. + virtual Error IsNullResponse(bool* is_null_response) const + { + return Error("InferResult::IsNullResponse() not implemented"); + }; }; }}} // namespace triton::perfanalyzer::clientbackend diff --git a/src/c++/perf_analyzer/client_backend/mock_client_backend.h b/src/c++/perf_analyzer/client_backend/mock_client_backend.h index 28a568eb0..ddc14f663 100644 --- a/src/c++/perf_analyzer/client_backend/mock_client_backend.h +++ b/src/c++/perf_analyzer/client_backend/mock_client_backend.h @@ -127,6 +127,15 @@ class MockInferResult : public InferResult { return Error::Success; } + Error IsNullResponse(bool* is_null_response) const override + { + if (is_null_response == nullptr) { + return Error("is_null_response cannot be nullptr"); + } + *is_null_response = false; + return Error::Success; + } + private: std::string req_id_; }; diff --git a/src/c++/perf_analyzer/client_backend/triton/triton_client_backend.cc b/src/c++/perf_analyzer/client_backend/triton/triton_client_backend.cc index 7ac2188a6..1be578a95 100644 --- a/src/c++/perf_analyzer/client_backend/triton/triton_client_backend.cc +++ b/src/c++/perf_analyzer/client_backend/triton/triton_client_backend.cc @@ -827,6 +827,13 @@ TritonInferResult::IsFinalResponse(bool* is_final_response) const return Error::Success; } +Error +TritonInferResult::IsNullResponse(bool* is_null_response) const +{ + RETURN_IF_TRITON_ERROR(result_->IsNullResponse(is_null_response)); + return Error::Success; +} + //============================================================================== }}}} // namespace triton::perfanalyzer::clientbackend::tritonremote diff --git a/src/c++/perf_analyzer/client_backend/triton/triton_client_backend.h b/src/c++/perf_analyzer/client_backend/triton/triton_client_backend.h index 5b25e8297..aab3c8028 100644 --- a/src/c++/perf_analyzer/client_backend/triton/triton_client_backend.h +++ b/src/c++/perf_analyzer/client_backend/triton/triton_client_backend.h @@ -331,6 +331,8 @@ class TritonInferResult : public InferResult { size_t* byte_size) const override; /// See InferResult::IsFinalResponse() Error IsFinalResponse(bool* is_final_response) const override; + /// See InferResult::IsNullResponse() + Error IsNullResponse(bool* is_null_response) const override; private: std::unique_ptr result_; diff --git a/src/c++/perf_analyzer/infer_context.cc b/src/c++/perf_analyzer/infer_context.cc index 7c4a36944..dc65c2adc 100644 --- a/src/c++/perf_analyzer/infer_context.cc +++ b/src/c++/perf_analyzer/infer_context.cc @@ -1,4 +1,4 @@ -// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions @@ -236,20 +236,26 @@ void InferContext::AsyncCallbackFuncImpl(cb::InferResult* result) { std::shared_ptr result_ptr(result); + bool is_final_response{true}; if (thread_stat_->cb_status_.IsOk()) { // Add the request timestamp to thread Timestamp vector with // proper locking std::lock_guard lock(thread_stat_->mu_); thread_stat_->cb_status_ = result_ptr->RequestStatus(); if (thread_stat_->cb_status_.IsOk()) { - std::chrono::time_point end_time_async; - end_time_async = std::chrono::system_clock::now(); std::string request_id; thread_stat_->cb_status_ = result_ptr->Id(&request_id); const auto& it = async_req_map_.find(request_id); if (it != async_req_map_.end()) { - it->second.end_times.push_back(end_time_async); - bool is_final_response{false}; + bool is_null_response{false}; + thread_stat_->cb_status_ = + result_ptr->IsNullResponse(&is_null_response); + if (thread_stat_->cb_status_.IsOk() == false) { + return; + } + if (is_null_response == false) { + it->second.end_times.push_back(std::chrono::system_clock::now()); + } thread_stat_->cb_status_ = result_ptr->IsFinalResponse(&is_final_response); if (thread_stat_->cb_status_.IsOk() == false) { @@ -267,10 +273,12 @@ InferContext::AsyncCallbackFuncImpl(cb::InferResult* result) } } - total_ongoing_requests_--; + if (is_final_response) { + total_ongoing_requests_--; - if (async_callback_finalize_func_ != nullptr) { - async_callback_finalize_func_(id_); + if (async_callback_finalize_func_ != nullptr) { + async_callback_finalize_func_(id_); + } } } diff --git a/src/c++/perf_analyzer/inference_profiler.cc b/src/c++/perf_analyzer/inference_profiler.cc index b60006286..b0dd3f224 100644 --- a/src/c++/perf_analyzer/inference_profiler.cc +++ b/src/c++/perf_analyzer/inference_profiler.cc @@ -1012,6 +1012,8 @@ InferenceProfiler::MergePerfStatusReports( perf_status.client_stats.sequence_count; experiment_perf_status.client_stats.delayed_request_count += perf_status.client_stats.delayed_request_count; + experiment_perf_status.client_stats.response_count += + perf_status.client_stats.response_count; experiment_perf_status.client_stats.duration_ns += perf_status.client_stats.duration_ns; @@ -1079,6 +1081,8 @@ InferenceProfiler::MergePerfStatusReports( (experiment_perf_status.client_stats.request_count * experiment_perf_status.batch_size) / client_duration_sec; + experiment_perf_status.client_stats.responses_per_sec = + experiment_perf_status.client_stats.response_count / client_duration_sec; RETURN_IF_ERROR(SummarizeLatency( experiment_perf_status.client_stats.latencies, experiment_perf_status)); @@ -1211,18 +1215,20 @@ InferenceProfiler::Summarize( { size_t valid_sequence_count = 0; size_t delayed_request_count = 0; + size_t response_count = 0; // Get measurement from requests that fall within the time interval std::pair valid_range{window_start_ns, window_end_ns}; uint64_t window_duration_ns = valid_range.second - valid_range.first; std::vector latencies; ValidLatencyMeasurement( - valid_range, valid_sequence_count, delayed_request_count, &latencies); + valid_range, valid_sequence_count, delayed_request_count, &latencies, + response_count); RETURN_IF_ERROR(SummarizeLatency(latencies, summary)); RETURN_IF_ERROR(SummarizeClientStat( start_stat, end_stat, window_duration_ns, latencies.size(), - valid_sequence_count, delayed_request_count, summary)); + valid_sequence_count, delayed_request_count, response_count, summary)); summary.client_stats.latencies = std::move(latencies); SummarizeOverhead(window_duration_ns, manager_->GetIdleTime(), summary); @@ -1245,10 +1251,11 @@ void InferenceProfiler::ValidLatencyMeasurement( const std::pair& valid_range, size_t& valid_sequence_count, size_t& delayed_request_count, - std::vector* valid_latencies) + std::vector* valid_latencies, size_t& response_count) { valid_latencies->clear(); valid_sequence_count = 0; + response_count = 0; std::vector erase_indices{}; for (size_t i = 0; i < all_timestamps_.size(); i++) { const auto& timestamp = all_timestamps_[i]; @@ -1260,6 +1267,7 @@ InferenceProfiler::ValidLatencyMeasurement( if ((request_end_ns >= valid_range.first) && (request_end_ns <= valid_range.second)) { valid_latencies->push_back(request_end_ns - request_start_ns); + response_count += std::get<1>(timestamp).size(); erase_indices.push_back(i); // Just add the sequence_end flag here. if (std::get<2>(timestamp)) { @@ -1358,7 +1366,7 @@ InferenceProfiler::SummarizeClientStat( const cb::InferStat& start_stat, const cb::InferStat& end_stat, const uint64_t duration_ns, const size_t valid_request_count, const size_t valid_sequence_count, const size_t delayed_request_count, - PerfStatus& summary) + const size_t response_count, PerfStatus& summary) { summary.on_sequence_model = ((parser_->SchedulerType() == ModelParser::SEQUENCE) || @@ -1367,6 +1375,7 @@ InferenceProfiler::SummarizeClientStat( summary.client_stats.request_count = valid_request_count; summary.client_stats.sequence_count = valid_sequence_count; summary.client_stats.delayed_request_count = delayed_request_count; + summary.client_stats.response_count = response_count; summary.client_stats.duration_ns = duration_ns; float client_duration_sec = (float)summary.client_stats.duration_ns / NANOS_PER_SECOND; @@ -1374,6 +1383,7 @@ InferenceProfiler::SummarizeClientStat( valid_sequence_count / client_duration_sec; summary.client_stats.infer_per_sec = (valid_request_count * summary.batch_size) / client_duration_sec; + summary.client_stats.responses_per_sec = response_count / client_duration_sec; if (include_lib_stats_) { size_t completed_count = diff --git a/src/c++/perf_analyzer/inference_profiler.h b/src/c++/perf_analyzer/inference_profiler.h index 6a0ee625a..b07cd93ae 100644 --- a/src/c++/perf_analyzer/inference_profiler.h +++ b/src/c++/perf_analyzer/inference_profiler.h @@ -48,6 +48,7 @@ namespace triton { namespace perfanalyzer { #ifndef DOCTEST_CONFIG_DISABLE +class NaggyMockInferenceProfiler; class TestInferenceProfiler; #endif @@ -126,6 +127,8 @@ struct ClientSideStats { uint64_t sequence_count; // The number of requests that missed their schedule uint64_t delayed_request_count; + // The number of responses + uint64_t response_count; uint64_t duration_ns; uint64_t avg_latency_ns; // a ordered map of percentiles to be reported ( pair) @@ -139,6 +142,7 @@ struct ClientSideStats { uint64_t avg_receive_time_ns; // Per sec stat double infer_per_sec; + double responses_per_sec; double sequence_per_sec; // Completed request count reported by the client library @@ -440,16 +444,17 @@ class InferenceProfiler { /// sequence model. /// \param latencies Returns the vector of request latencies where the /// requests are completed within the measurement window. - void ValidLatencyMeasurement( + /// \param response_count Returns the number of responses + virtual void ValidLatencyMeasurement( const std::pair& valid_range, size_t& valid_sequence_count, size_t& delayed_request_count, - std::vector* latencies); + std::vector* latencies, size_t& response_count); /// \param latencies The vector of request latencies collected. /// \param summary Returns the summary that the latency related fields are /// set. /// \return cb::Error object indicating success or failure. - cb::Error SummarizeLatency( + virtual cb::Error SummarizeLatency( const std::vector& latencies, PerfStatus& summary); /// \param latencies The vector of request latencies collected. @@ -466,14 +471,15 @@ class InferenceProfiler { /// \param valid_sequence_count The number of completed sequences recorded. /// \param delayed_request_count The number of requests that missed their /// schedule. + /// \param response_count The number of responses. /// \param summary Returns the summary that the fields recorded by /// client are set. /// \return cb::Error object indicating success or failure. - cb::Error SummarizeClientStat( + virtual cb::Error SummarizeClientStat( const cb::InferStat& start_stat, const cb::InferStat& end_stat, const uint64_t duration_ns, const size_t valid_request_count, const size_t delayed_request_count, const size_t valid_sequence_count, - PerfStatus& summary); + const size_t response_count, PerfStatus& summary); /// Adds the send request rate metric to the summary object. /// \param window_duration_s The duration of the window in seconds. @@ -557,7 +563,7 @@ class InferenceProfiler { /// \param perf_status List of perf status reports to be merged. /// \param summary_status Final merged summary status. /// \return cb::Error object indicating success or failure. - cb::Error MergePerfStatusReports( + virtual cb::Error MergePerfStatusReports( std::deque& perf_status, PerfStatus& summary_status); /// Merge individual server side statistics into a single server side report. @@ -565,7 +571,7 @@ class InferenceProfiler { /// merged. /// \param server_side_summary Final merged summary status. /// \return cb::Error object indicating success or failure. - cb::Error MergeServerSideStats( + virtual cb::Error MergeServerSideStats( std::vector& server_side_stats, ServerSideStats& server_side_summary); @@ -695,10 +701,11 @@ class InferenceProfiler { const double overhead_pct_threshold_{0.0}; #ifndef DOCTEST_CONFIG_DISABLE + friend NaggyMockInferenceProfiler; friend TestInferenceProfiler; public: - InferenceProfiler(){}; + InferenceProfiler() = default; #endif }; diff --git a/src/c++/perf_analyzer/load_manager.h b/src/c++/perf_analyzer/load_manager.h index ebcad5192..5a10ae592 100644 --- a/src/c++/perf_analyzer/load_manager.h +++ b/src/c++/perf_analyzer/load_manager.h @@ -40,6 +40,11 @@ namespace triton { namespace perfanalyzer { + +#ifndef DOCTEST_CONFIG_DISABLE +class NaggyMockLoadManager; +#endif + class LoadManager { public: virtual ~LoadManager() = default; @@ -97,7 +102,7 @@ class LoadManager { const size_t GetAndResetNumSentRequests(); /// \return the batch size used for the inference requests - size_t BatchSize() const { return batch_size_; } + virtual size_t BatchSize() const { return batch_size_; } /// Count the number of requests collected until now. uint64_t CountCollectedRequests(); @@ -165,6 +170,8 @@ class LoadManager { std::shared_ptr data_loader); #ifndef DOCTEST_CONFIG_DISABLE + friend NaggyMockLoadManager; + public: LoadManager() = default; #endif diff --git a/src/c++/perf_analyzer/mock_inference_profiler.h b/src/c++/perf_analyzer/mock_inference_profiler.h index b44d94959..a31485091 100644 --- a/src/c++/perf_analyzer/mock_inference_profiler.h +++ b/src/c++/perf_analyzer/mock_inference_profiler.h @@ -1,4 +1,4 @@ -// Copyright 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions @@ -30,9 +30,92 @@ namespace triton { namespace perfanalyzer { -class MockInferenceProfiler : public InferenceProfiler { +class NaggyMockInferenceProfiler : public InferenceProfiler { public: - MockInferenceProfiler() = default; + NaggyMockInferenceProfiler() + { + ON_CALL( + *this, ValidLatencyMeasurement( + testing::_, testing::_, testing::_, testing::_, testing::_)) + .WillByDefault( + [this]( + const std::pair& valid_range, + size_t& valid_sequence_count, size_t& delayed_request_count, + std::vector* latencies, + size_t& response_count) -> void { + this->InferenceProfiler::ValidLatencyMeasurement( + valid_range, valid_sequence_count, delayed_request_count, + latencies, response_count); + }); + ON_CALL(*this, SummarizeLatency(testing::_, testing::_)) + .WillByDefault( + [this]( + const std::vector& latencies, + PerfStatus& summary) -> cb::Error { + return this->InferenceProfiler::SummarizeLatency( + latencies, summary); + }); + ON_CALL(*this, MergePerfStatusReports(testing::_, testing::_)) + .WillByDefault( + [this]( + std::deque& perf_status, + PerfStatus& summary_status) -> cb::Error { + return this->InferenceProfiler::MergePerfStatusReports( + perf_status, summary_status); + }); + ON_CALL(*this, MergeServerSideStats(testing::_, testing::_)) + .WillByDefault( + [this]( + std::vector& server_side_stats, + ServerSideStats& server_side_summary) -> cb::Error { + return this->InferenceProfiler::MergeServerSideStats( + server_side_stats, server_side_summary); + }); + ON_CALL( + *this, SummarizeClientStat( + testing::_, testing::_, testing::_, testing::_, testing::_, + testing::_, testing::_, testing::_)) + .WillByDefault( + [this]( + const cb::InferStat& start_stat, const cb::InferStat& end_stat, + const uint64_t duration_ns, const size_t valid_request_count, + const size_t delayed_request_count, + const size_t valid_sequence_count, const size_t response_count, + PerfStatus& summary) -> cb::Error { + return this->InferenceProfiler::SummarizeClientStat( + start_stat, end_stat, duration_ns, valid_request_count, + delayed_request_count, valid_sequence_count, response_count, + summary); + }); + }; + MOCK_METHOD0(IncludeServerStats, bool()); + MOCK_METHOD( + void, ValidLatencyMeasurement, + ((const std::pair&), size_t&, size_t&, + std::vector*, size_t&), + (override)); + MOCK_METHOD( + cb::Error, SummarizeLatency, (const std::vector&, PerfStatus&), + (override)); + MOCK_METHOD( + cb::Error, MergePerfStatusReports, (std::deque&, PerfStatus&), + (override)); + MOCK_METHOD( + cb::Error, MergeServerSideStats, + (std::vector&, ServerSideStats&), (override)); + MOCK_METHOD( + cb::Error, SummarizeClientStat, + (const cb::InferStat&, const cb::InferStat&, const uint64_t, const size_t, + const size_t, const size_t, const size_t, PerfStatus&), + (override)); + + std::shared_ptr& parser_{InferenceProfiler::parser_}; + std::unique_ptr& manager_{InferenceProfiler::manager_}; + bool& include_lib_stats_{InferenceProfiler::include_lib_stats_}; + TimestampVector& all_timestamps_{InferenceProfiler::all_timestamps_}; }; + +using MockInferenceProfiler = testing::NiceMock; + }} // namespace triton::perfanalyzer diff --git a/src/c++/perf_analyzer/mock_load_manager.h b/src/c++/perf_analyzer/mock_load_manager.h new file mode 100644 index 000000000..2088a4053 --- /dev/null +++ b/src/c++/perf_analyzer/mock_load_manager.h @@ -0,0 +1,37 @@ +// Copyright 2023 (c), NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of NVIDIA CORPORATION nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#pragma once + +#include "gmock/gmock.h" +#include "load_manager.h" + +namespace triton { namespace perfanalyzer { + +class NaggyMockLoadManager : public LoadManager {}; + +using MockLoadManager = testing::NiceMock; + +}} // namespace triton::perfanalyzer diff --git a/src/c++/perf_analyzer/perf_utils.h b/src/c++/perf_analyzer/perf_utils.h index f11bf9815..1865b8430 100644 --- a/src/c++/perf_analyzer/perf_utils.h +++ b/src/c++/perf_analyzer/perf_utils.h @@ -55,7 +55,8 @@ constexpr uint64_t NANOS_PER_MILLIS = 1000000; //============================================================================== using TimestampVector = std::vector, - std::vector>, uint32_t, bool>>; + std::vector>, uint32_t, + bool>>; // Will use the characters specified here to construct random strings std::string const character_set = diff --git a/src/c++/perf_analyzer/test_inference_profiler.cc b/src/c++/perf_analyzer/test_inference_profiler.cc index 27f75519f..71ed2eb89 100644 --- a/src/c++/perf_analyzer/test_inference_profiler.cc +++ b/src/c++/perf_analyzer/test_inference_profiler.cc @@ -27,6 +27,8 @@ #include "doctest.h" #include "inference_profiler.h" #include "mock_inference_profiler.h" +#include "mock_load_manager.h" +#include "mock_model_parser.h" namespace triton { namespace perfanalyzer { @@ -35,12 +37,14 @@ class TestInferenceProfiler : public InferenceProfiler { static void ValidLatencyMeasurement( const std::pair& valid_range, size_t& valid_sequence_count, size_t& delayed_request_count, - std::vector* latencies, TimestampVector& all_timestamps) + std::vector* latencies, size_t& response_count, + TimestampVector& all_timestamps) { InferenceProfiler inference_profiler{}; inference_profiler.all_timestamps_ = all_timestamps; inference_profiler.ValidLatencyMeasurement( - valid_range, valid_sequence_count, delayed_request_count, latencies); + valid_range, valid_sequence_count, delayed_request_count, latencies, + response_count); } static std::tuple GetMeanAndStdDev( @@ -162,6 +166,7 @@ TEST_CASE("testing the ValidLatencyMeasurement function") size_t valid_sequence_count{}; size_t delayed_request_count{}; std::vector latencies{}; + size_t response_count{}; const std::pair window{4, 17}; using time_point = std::chrono::time_point; @@ -201,7 +206,7 @@ TEST_CASE("testing the ValidLatencyMeasurement function") TestInferenceProfiler::ValidLatencyMeasurement( window, valid_sequence_count, delayed_request_count, &latencies, - all_timestamps); + response_count, all_timestamps); const auto& convert_timestamp_to_latency{ [](std::tuple, uint32_t, bool> t) { @@ -854,4 +859,115 @@ TEST_CASE("determine_stats_model_version: testing DetermineStatsModelVersion()") std::cerr.rdbuf(old); } +TEST_CASE( + "valid_latency_measurement: testing the ValidLatencyMeasurement function") +{ + MockInferenceProfiler mock_inference_profiler{}; + + SUBCASE("testing logic relevant to response throughput metric") + { + auto clock_epoch{std::chrono::time_point()}; + + auto request1_timestamp{clock_epoch + std::chrono::nanoseconds(1)}; + auto response1_timestamp{clock_epoch + std::chrono::nanoseconds(2)}; + auto response2_timestamp{clock_epoch + std::chrono::nanoseconds(3)}; + auto timestamp1{std::make_tuple( + request1_timestamp, + std::vector>{ + response1_timestamp, response2_timestamp}, + 0, false)}; + + auto request2_timestamp{clock_epoch + std::chrono::nanoseconds(4)}; + auto response3_timestamp{clock_epoch + std::chrono::nanoseconds(5)}; + auto response4_timestamp{clock_epoch + std::chrono::nanoseconds(6)}; + auto response5_timestamp{clock_epoch + std::chrono::nanoseconds(7)}; + auto timestamp2{std::make_tuple( + request2_timestamp, + std::vector>{ + response3_timestamp, response4_timestamp, response5_timestamp}, + 0, false)}; + + mock_inference_profiler.all_timestamps_ = {timestamp1, timestamp2}; + + const std::pair valid_range{ + std::make_pair(0, UINT64_MAX)}; + size_t valid_sequence_count{0}; + size_t delayed_request_count{0}; + std::vector valid_latencies{}; + size_t response_count{0}; + + mock_inference_profiler.ValidLatencyMeasurement( + valid_range, valid_sequence_count, delayed_request_count, + &valid_latencies, response_count); + + CHECK(response_count == 5); + } +} + +TEST_CASE( + "merge_perf_status_reports: testing the MergePerfStatusReports function") +{ + MockInferenceProfiler mock_inference_profiler{}; + + SUBCASE("testing logic relevant to response throughput metric") + { + PerfStatus perf_status1{}; + perf_status1.client_stats.response_count = 8; + perf_status1.client_stats.duration_ns = 2000000000; + + PerfStatus perf_status2{}; + perf_status2.client_stats.response_count = 10; + perf_status2.client_stats.duration_ns = 4000000000; + + std::deque perf_status{perf_status1, perf_status2}; + PerfStatus summary_status{}; + + cb::Error error{}; + + EXPECT_CALL( + mock_inference_profiler, MergeServerSideStats(testing::_, testing::_)) + .WillOnce(testing::Return(cb::Error::Success)); + EXPECT_CALL( + mock_inference_profiler, SummarizeLatency(testing::_, testing::_)) + .WillOnce(testing::Return(cb::Error::Success)); + + error = mock_inference_profiler.MergePerfStatusReports( + perf_status, summary_status); + + REQUIRE(error.IsOk() == true); + CHECK(summary_status.client_stats.response_count == 18); + CHECK( + summary_status.client_stats.responses_per_sec == doctest::Approx(3.0)); + } +} + +TEST_CASE("summarize_client_stat: testing the SummarizeClientStat function") +{ + MockInferenceProfiler mock_inference_profiler{}; + + SUBCASE("testing logic relevant to response throughput metric") + { + mock_inference_profiler.parser_ = std::make_shared(); + mock_inference_profiler.manager_ = std::make_unique(); + + const cb::InferStat start_stat{}; + const cb::InferStat end_stat{}; + const uint64_t duration_ns{2000000000}; + const size_t valid_request_count{0}; + const size_t delayed_request_count{0}; + const size_t valid_sequence_count{0}; + const size_t response_count{8}; + PerfStatus summary{}; + + cb::Error error{}; + + error = mock_inference_profiler.SummarizeClientStat( + start_stat, end_stat, duration_ns, valid_request_count, + delayed_request_count, valid_sequence_count, response_count, summary); + + REQUIRE(error.IsOk() == true); + CHECK(summary.client_stats.response_count == 8); + CHECK(summary.client_stats.responses_per_sec == doctest::Approx(4.0)); + } +} }} // namespace triton::perfanalyzer