diff --git a/src/c++/perf_analyzer/periodic_concurrency_worker.cc b/src/c++/perf_analyzer/periodic_concurrency_worker.cc index 9fbaee3cc..f834ac533 100644 --- a/src/c++/perf_analyzer/periodic_concurrency_worker.cc +++ b/src/c++/perf_analyzer/periodic_concurrency_worker.cc @@ -54,7 +54,16 @@ PeriodicConcurrencyWorker::WorkerCallback(uint32_t infer_context_id) request_period_) { period_completed_callback_(); } + if (ctxs_.at(infer_context_id)->HasReceivedFinalResponse()) { + bool has_not_completed_period{ + ctxs_.at(infer_context_id)->GetNumResponsesForCurrentRequest() < + request_period_}; + if (has_not_completed_period) { + throw std::runtime_error( + "Request received final response before request period was reached. " + "Request period parameter must be less than or equal to max tokens."); + } request_completed_callback_(); } }