Skip to content

Commit

Permalink
[enhancement](cloud) get delete bitmap api uses the unified rpc retry…
Browse files Browse the repository at this point in the history
… mechanism (#46848)
  • Loading branch information
luwei16 authored and Your Name committed Jan 13, 2025
1 parent fe1bfe7 commit fad96b4
Showing 1 changed file with 11 additions and 34 deletions.
45 changes: 11 additions & 34 deletions be/src/cloud/cloud_meta_mgr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,8 @@ static std::string debug_info(const Request& req) {
} else if constexpr (is_any_v<Request, RemoveDeleteBitmapUpdateLockRequest>) {
return fmt::format(" table_id={}, tablet_id={}, lock_id={}", req.table_id(),
req.tablet_id(), req.lock_id());
} else if constexpr (is_any_v<Request, GetDeleteBitmapRequest>) {
return fmt::format(" tablet_id={}", req.tablet_id());
} else {
static_assert(!sizeof(Request));
}
Expand Down Expand Up @@ -373,7 +375,11 @@ Status retry_rpc(std::string_view op_name, const Request& req, Response* res,
std::shared_ptr<MetaService_Stub> stub;
RETURN_IF_ERROR(proxy->get(&stub));
brpc::Controller cntl;
cntl.set_timeout_ms(config::meta_service_brpc_timeout_ms);
if (op_name == "get delete bitmap") {
cntl.set_timeout_ms(3 * config::meta_service_brpc_timeout_ms);
} else {
cntl.set_timeout_ms(config::meta_service_brpc_timeout_ms);
}
cntl.set_max_retry(kBrpcRetryTimes);
res->Clear();
(stub.get()->*method)(&cntl, &req, res, nullptr);
Expand Down Expand Up @@ -714,41 +720,12 @@ Status CloudMetaMgr::sync_tablet_delete_bitmap(CloudTablet* tablet, int64_t old_

VLOG_DEBUG << "send GetDeleteBitmapRequest: " << req.ShortDebugString();

int retry_times = 0;
MetaServiceProxy* proxy;
RETURN_IF_ERROR(MetaServiceProxy::get_proxy(&proxy));
auto start = std::chrono::high_resolution_clock::now();
while (true) {
std::shared_ptr<MetaService_Stub> stub;
RETURN_IF_ERROR(proxy->get(&stub));
// When there are many delete bitmaps that need to be synchronized, it
// may take a longer time, especially when loading the tablet for the
// first time, so set a relatively long timeout time.
brpc::Controller cntl;
cntl.set_timeout_ms(3 * config::meta_service_brpc_timeout_ms);
cntl.set_max_retry(kBrpcRetryTimes);
res.Clear();
stub->get_delete_bitmap(&cntl, &req, &res, nullptr);
if (cntl.Failed()) [[unlikely]] {
LOG_INFO("failed to get delete bitmap")
.tag("reason", cntl.ErrorText())
.tag("tablet_id", tablet->tablet_id())
.tag("partition_id", tablet->partition_id())
.tag("tried", retry_times);
proxy->set_unhealthy();
} else {
break;
}

if (++retry_times > config::delete_bitmap_rpc_retry_times) {
if (cntl.Failed()) {
return Status::RpcError("failed to get delete bitmap, tablet={} err={}",
tablet->tablet_id(), cntl.ErrorText());
}
break;
}
}
auto st = retry_rpc("get delete bitmap", req, &res, &MetaService_Stub::get_delete_bitmap);
auto end = std::chrono::high_resolution_clock::now();
if (st.code() == ErrorCode::THRIFT_RPC_ERROR) {
return st;
}

if (res.status().code() == MetaServiceCode::TABLET_NOT_FOUND) {
return Status::NotFound("failed to get delete bitmap: {}", res.status().msg());
Expand Down

0 comments on commit fad96b4

Please sign in to comment.