Skip to content

Commit

Permalink
Fix decoupled gpu output error handling
Browse files Browse the repository at this point in the history
  • Loading branch information
kthui committed May 22, 2024
1 parent 89512e6 commit 5e972cf
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 18 deletions.
2 changes: 1 addition & 1 deletion src/pb_stub.cc
Original file line number Diff line number Diff line change
Expand Up @@ -735,7 +735,7 @@ Stub::ProcessRequests(RequestBatch* request_batch_shm_ptr)
"Failed to process the request(s) for model '" + name_ +
"', message: ") +
error_string;
LOG_INFO << err_message.c_str();
LOG_ERROR << err_message.c_str();
response_batch_shm_ptr->has_error = true;
error_string_shm = PbString::Create(shm_pool_, error_string);
response_batch_shm_ptr->error = error_string_shm->ShmHandle();
Expand Down
39 changes: 22 additions & 17 deletions src/response_sender.cc
Original file line number Diff line number Diff line change
Expand Up @@ -147,22 +147,38 @@ ResponseSender::Send(
}

if (has_gpu_output) {
ScopedDefer _([send_message_payload] {
bi::scoped_lock<bi::interprocess_mutex> guard{send_message_payload->mu};
send_message_payload->is_stub_turn = false;
send_message_payload->cv.notify_one();
while (!send_message_payload->is_stub_turn) {
// Wait for the stub process to send the response and populate error
// message if any.
send_message_payload->cv.wait(guard);
}
});

AllocatedSharedMemory<GPUBuffersShm> gpu_buffers_handle =
shm_pool_->Load<GPUBuffersShm>(
send_message_payload->gpu_buffers_handle);
if (!gpu_buffers_handle.data_->success) {
std::unique_ptr<PbString> error = PbString::LoadFromSharedMemory(
shm_pool_, gpu_buffers_handle.data_->error);
throw PythonBackendException(
"Failed to load GPU buffers: " + error->String());
}

AllocatedSharedMemory<bi::managed_external_buffer::handle_t>
gpu_buffers_handle_shm =
shm_pool_->Load<bi::managed_external_buffer::handle_t>(
gpu_buffers_handle.data_->buffers);
uint64_t gpu_buffer_count = gpu_buffers_handle.data_->buffer_count;
if (gpu_tensors.size() != gpu_buffer_count) {
LOG_ERROR
<< (std::string(
"GPU buffers size does not match the provided buffers: ") +
std::to_string(gpu_tensors.size()) +
" != " + std::to_string(gpu_buffer_count));
return;
throw PythonBackendException(
std::string(
"GPU buffers size does not match the provided buffers: ") +
std::to_string(gpu_tensors.size()) +
" != " + std::to_string(gpu_buffer_count));
}

std::vector<std::unique_ptr<PbMemory>> dst_buffers;
Expand All @@ -175,17 +191,6 @@ ResponseSender::Send(
std::shared_ptr<PbTensor>& src_buffer = gpu_tensors[i];
PbMemory::CopyBuffer(dst_buffers[i], src_buffer->Memory());
}

{
bi::scoped_lock<bi::interprocess_mutex> guard{send_message_payload->mu};
send_message_payload->is_stub_turn = false;
send_message_payload->cv.notify_one();
while (!send_message_payload->is_stub_turn) {
// Wait for the stub process to send the response and populate error
// message if any.
send_message_payload->cv.wait(guard);
}
}
}

if (send_message_payload->has_error) {
Expand Down

0 comments on commit 5e972cf

Please sign in to comment.