diff --git a/src/c++/perf_analyzer/periodic_concurrency_worker.cc b/src/c++/perf_analyzer/periodic_concurrency_worker.cc index 9fbaee3cc..630997903 100644 --- a/src/c++/perf_analyzer/periodic_concurrency_worker.cc +++ b/src/c++/perf_analyzer/periodic_concurrency_worker.cc @@ -53,8 +53,14 @@ PeriodicConcurrencyWorker::WorkerCallback(uint32_t infer_context_id) if (ctxs_.at(infer_context_id)->GetNumResponsesForCurrentRequest() == request_period_) { period_completed_callback_(); + period_completed_callback_called_ = true; } if (ctxs_.at(infer_context_id)->HasReceivedFinalResponse()) { + if (period_completed_callback_called_ == false) { + throw std::runtime_error( + "Request received final response before request period was reached. " + "Request period parameter must be less than or equal to max tokens."); + } request_completed_callback_(); } } diff --git a/src/c++/perf_analyzer/periodic_concurrency_worker.h b/src/c++/perf_analyzer/periodic_concurrency_worker.h index 7242219b9..b5ee3887c 100644 --- a/src/c++/perf_analyzer/periodic_concurrency_worker.h +++ b/src/c++/perf_analyzer/periodic_concurrency_worker.h @@ -75,6 +75,7 @@ class PeriodicConcurrencyWorker : public ConcurrencyWorker { std::function request_completed_callback_{nullptr}; std::function worker_callback_{std::bind( &PeriodicConcurrencyWorker::WorkerCallback, this, std::placeholders::_1)}; + bool period_completed_callback_called_{false}; }; }} // namespace triton::perfanalyzer