diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp index a762d0de3928f0..4cd55d968060bf 100644 --- a/be/src/common/config.cpp +++ b/be/src/common/config.cpp @@ -1120,6 +1120,9 @@ DEFINE_mInt32(s3_writer_buffer_allocation_timeout_second, "60"); DEFINE_mBool(enable_column_type_check, "true"); +// Tolerance for the number of partition id 0 in rowset, default 0 +DEFINE_Int32(ignore_invalid_partition_id_rowset_num, "0"); + // clang-format off #ifdef BE_TEST // test s3 diff --git a/be/src/common/config.h b/be/src/common/config.h index f71aad99ab32b0..c4935bf8338fb4 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -1176,6 +1176,9 @@ DECLARE_mInt32(s3_writer_buffer_allocation_timeout_second); DECLARE_mBool(enable_column_type_check); +// Tolerance for the number of partition id 0 in rowset, default 0 +DECLARE_Int32(ignore_invalid_partition_id_rowset_num); + #ifdef BE_TEST // test s3 DECLARE_String(test_s3_resource); diff --git a/be/src/olap/data_dir.cpp b/be/src/olap/data_dir.cpp index 47434bd82261a5..5517391541f3b4 100644 --- a/be/src/olap/data_dir.cpp +++ b/be/src/olap/data_dir.cpp @@ -383,6 +383,12 @@ Status DataDir::load() { if (rowset_meta->is_local()) { rowset_meta->set_fs(local_fs); } + + if (rowset_meta->partition_id() == 0) { + LOG(WARNING) << "rs tablet=" << rowset_meta->tablet_id() << " rowset_id=" << rowset_id + << " load from meta but partition id eq 0"; + } + dir_rowset_metas.push_back(rowset_meta); return true; }; @@ -470,6 +476,19 @@ Status DataDir::load() { }; TabletMetaManager::traverse_pending_publish(_meta, load_pending_publish_info_func); + int64_t rowset_partition_id_eq_0_num = 0; + for (auto rowset_meta : dir_rowset_metas) { + if (rowset_meta->partition_id() == 0) { + ++rowset_partition_id_eq_0_num; + } + } + if (rowset_partition_id_eq_0_num > config::ignore_invalid_partition_id_rowset_num) { + LOG(FATAL) << fmt::format( + "roswet partition id eq 0 bigger than config {}, be exit, plz check be.INFO", + config::ignore_invalid_partition_id_rowset_num); + exit(-1); + } + // traverse rowset // 1. add committed rowset to txn map // 2. add visible rowset to tablet @@ -486,6 +505,13 @@ Status DataDir::load() { continue; } + if (rowset_meta->partition_id() == 0) { + LOG(WARNING) << "skip tablet_id=" << tablet->tablet_id() + << " rowset: " << rowset_meta->rowset_id() + << " txn: " << rowset_meta->txn_id(); + continue; + } + RowsetSharedPtr rowset; Status create_status = tablet->create_rowset(rowset_meta, &rowset); if (!create_status) { @@ -499,8 +525,9 @@ Status DataDir::load() { rowset_meta->tablet_uid() == tablet->tablet_uid()) { if (!rowset_meta->tablet_schema()) { rowset_meta->set_tablet_schema(tablet->tablet_schema()); - RowsetMetaManager::save(_meta, rowset_meta->tablet_uid(), rowset_meta->rowset_id(), - rowset_meta->get_rowset_pb()); + RETURN_IF_ERROR(RowsetMetaManager::save(_meta, rowset_meta->tablet_uid(), + rowset_meta->rowset_id(), + rowset_meta->get_rowset_pb(), false)); } Status commit_txn_status = _txn_manager->commit_txn( _meta, rowset_meta->partition_id(), rowset_meta->txn_id(), @@ -527,8 +554,9 @@ Status DataDir::load() { rowset_meta->tablet_uid() == tablet->tablet_uid()) { if (!rowset_meta->tablet_schema()) { rowset_meta->set_tablet_schema(tablet->tablet_schema()); - RowsetMetaManager::save(_meta, rowset_meta->tablet_uid(), rowset_meta->rowset_id(), - rowset_meta->get_rowset_pb()); + RETURN_IF_ERROR(RowsetMetaManager::save(_meta, rowset_meta->tablet_uid(), + rowset_meta->rowset_id(), + rowset_meta->get_rowset_pb(), false)); } Status publish_status = tablet->add_rowset(rowset); if (!publish_status && !publish_status.is()) { diff --git a/be/src/olap/rowset/rowset_meta_manager.cpp b/be/src/olap/rowset/rowset_meta_manager.cpp index 23682338ff9a07..f5dc8101ea0d17 100644 --- a/be/src/olap/rowset/rowset_meta_manager.cpp +++ b/be/src/olap/rowset/rowset_meta_manager.cpp @@ -34,6 +34,7 @@ #include "olap/olap_define.h" #include "olap/olap_meta.h" #include "olap/utils.h" +#include "util/debug_points.h" namespace doris { namespace { @@ -98,15 +99,22 @@ Status RowsetMetaManager::save(OlapMeta* meta, TabletUid tablet_uid, const Rowse // return Status::InternalError("invaid partition id {} tablet {}", // rowset_meta_pb.partition_id(), rowset_meta_pb.tablet_id()); } + DBUG_EXECUTE_IF("RowsetMetaManager::save::zero_partition_id", { + long partition_id = rowset_meta_pb.partition_id(); + auto& rs_pb = const_cast&>(rowset_meta_pb); + rs_pb.set_partition_id(0); + LOG(WARNING) << "set debug point RowsetMetaManager::save::zero_partition_id old=" + << partition_id << " new=" << rowset_meta_pb.DebugString(); + }); if (enable_binlog) { return _save_with_binlog(meta, tablet_uid, rowset_id, rowset_meta_pb); } else { - return save(meta, tablet_uid, rowset_id, rowset_meta_pb); + return _save(meta, tablet_uid, rowset_id, rowset_meta_pb); } } -Status RowsetMetaManager::save(OlapMeta* meta, TabletUid tablet_uid, const RowsetId& rowset_id, - const RowsetMetaPB& rowset_meta_pb) { +Status RowsetMetaManager::_save(OlapMeta* meta, TabletUid tablet_uid, const RowsetId& rowset_id, + const RowsetMetaPB& rowset_meta_pb) { std::string key = fmt::format("{}{}_{}", ROWSET_PREFIX, tablet_uid.to_string(), rowset_id.to_string()); std::string value; @@ -523,7 +531,7 @@ Status RowsetMetaManager::load_json_rowset_meta(OlapMeta* meta, } RowsetId rowset_id = rowset_meta.rowset_id(); TabletUid tablet_uid = rowset_meta.tablet_uid(); - Status status = save(meta, tablet_uid, rowset_id, rowset_meta.get_rowset_pb()); + Status status = save(meta, tablet_uid, rowset_id, rowset_meta.get_rowset_pb(), false); return status; } diff --git a/be/src/olap/rowset/rowset_meta_manager.h b/be/src/olap/rowset/rowset_meta_manager.h index 0c04cb686c5f31..ddf33aa055a44a 100644 --- a/be/src/olap/rowset/rowset_meta_manager.h +++ b/be/src/olap/rowset/rowset_meta_manager.h @@ -51,8 +51,6 @@ class RowsetMetaManager { // TODO(Drogon): refactor save && _save_with_binlog to one, adapt to ut temperately static Status save(OlapMeta* meta, TabletUid tablet_uid, const RowsetId& rowset_id, const RowsetMetaPB& rowset_meta_pb, bool enable_binlog); - static Status save(OlapMeta* meta, TabletUid tablet_uid, const RowsetId& rowset_id, - const RowsetMetaPB& rowset_meta_pb); static std::vector get_binlog_filenames(OlapMeta* meta, TabletUid tablet_uid, std::string_view binlog_version, @@ -79,6 +77,8 @@ class RowsetMetaManager { static Status load_json_rowset_meta(OlapMeta* meta, const std::string& rowset_meta_path); private: + static Status _save(OlapMeta* meta, TabletUid tablet_uid, const RowsetId& rowset_id, + const RowsetMetaPB& rowset_meta_pb); static Status _save_with_binlog(OlapMeta* meta, TabletUid tablet_uid, const RowsetId& rowset_id, const RowsetMetaPB& rowset_meta_pb); static Status _get_rowset_binlog_metas(OlapMeta* meta, const TabletUid tablet_uid, diff --git a/be/src/olap/tablet_manager.cpp b/be/src/olap/tablet_manager.cpp index 7aa5c52534f475..f1e8e6185fc0a0 100644 --- a/be/src/olap/tablet_manager.cpp +++ b/be/src/olap/tablet_manager.cpp @@ -844,6 +844,10 @@ Status TabletManager::load_tablet_from_meta(DataDir* data_dir, TTabletId tablet_ tablet_meta->set_tablet_state(TABLET_RUNNING); } + if (tablet_meta->partition_id() == 0) { + LOG(WARNING) << "tablet=" << tablet_id << " load from meta but partition id eq 0"; + } + TabletSharedPtr tablet = Tablet::create_tablet_from_meta(tablet_meta, data_dir); if (tablet == nullptr) { return Status::Error( diff --git a/be/src/olap/tablet_meta.cpp b/be/src/olap/tablet_meta.cpp index daec22246e04f6..9d6ea16d6ffb02 100644 --- a/be/src/olap/tablet_meta.cpp +++ b/be/src/olap/tablet_meta.cpp @@ -38,6 +38,7 @@ #include "olap/olap_define.h" #include "olap/tablet_meta_manager.h" #include "olap/utils.h" +#include "util/debug_points.h" #include "util/string_util.h" #include "util/time.h" #include "util/uid_util.h" @@ -471,6 +472,16 @@ Status TabletMeta::_save_meta(DataDir* data_dir) { Status TabletMeta::serialize(string* meta_binary) { TabletMetaPB tablet_meta_pb; to_meta_pb(&tablet_meta_pb); + if (tablet_meta_pb.partition_id() <= 0) { + LOG(WARNING) << "invalid partition id " << tablet_meta_pb.partition_id() << " tablet " + << tablet_meta_pb.tablet_id(); + } + DBUG_EXECUTE_IF("TabletMeta::serialize::zero_partition_id", { + long partition_id = tablet_meta_pb.partition_id(); + tablet_meta_pb.set_partition_id(0); + LOG(WARNING) << "set debug point TabletMeta::serialize::zero_partition_id old=" + << partition_id << " new=" << tablet_meta_pb.DebugString(); + }); bool serialize_success = tablet_meta_pb.SerializeToString(meta_binary); if (!serialize_success) { LOG(FATAL) << "failed to serialize meta " << full_name(); diff --git a/be/src/olap/txn_manager.cpp b/be/src/olap/txn_manager.cpp index da465b3fbd08b2..441cf93d4e4eb9 100644 --- a/be/src/olap/txn_manager.cpp +++ b/be/src/olap/txn_manager.cpp @@ -290,6 +290,13 @@ Status TxnManager::commit_txn(OlapMeta* meta, TPartitionId partition_id, do { // get tx std::shared_lock rdlock(_get_txn_map_lock(transaction_id)); + auto rs_pb = rowset_ptr->rowset_meta()->get_rowset_pb(); + // TODO(dx): remove log after fix partition id eq 0 bug + if (!rs_pb.has_partition_id() || rs_pb.partition_id() == 0) { + rowset_ptr->rowset_meta()->set_partition_id(partition_id); + LOG(WARNING) << "cant get partition id from rs pb, get from func arg partition_id=" + << partition_id; + } txn_tablet_map_t& txn_tablet_map = _get_txn_tablet_map(transaction_id); auto it = txn_tablet_map.find(key); if (it == txn_tablet_map.end()) { @@ -335,15 +342,9 @@ Status TxnManager::commit_txn(OlapMeta* meta, TPartitionId partition_id, // save meta need access disk, it maybe very slow, so that it is not in global txn lock // it is under a single txn lock if (!is_recovery) { - auto rs_pb = rowset_ptr->rowset_meta()->get_rowset_pb(); - // TODO(dx): remove log after fix partition id eq 0 bug - if (!rs_pb.has_partition_id() || rs_pb.partition_id() == 0) { - rs_pb.set_partition_id(partition_id); - LOG(WARNING) << "cant get partition id from rs pb, get from func arg partition_id=" - << partition_id; - } Status save_status = - RowsetMetaManager::save(meta, tablet_uid, rowset_ptr->rowset_id(), rs_pb); + RowsetMetaManager::save(meta, tablet_uid, rowset_ptr->rowset_id(), + rowset_ptr->rowset_meta()->get_rowset_pb(), false); DBUG_EXECUTE_IF("TxnManager.RowsetMetaManager.save_wait", { if (auto wait = dp->param("duration", 0); wait > 0) { LOG_WARNING("TxnManager.RowsetMetaManager.save_wait").tag("wait ms", wait); diff --git a/be/test/olap/rowset/rowset_meta_manager_test.cpp b/be/test/olap/rowset/rowset_meta_manager_test.cpp index a747d1fa2ca553..5875ba424b1349 100644 --- a/be/test/olap/rowset/rowset_meta_manager_test.cpp +++ b/be/test/olap/rowset/rowset_meta_manager_test.cpp @@ -103,7 +103,7 @@ TEST_F(RowsetMetaManagerTest, TestSaveAndGetAndRemove) { EXPECT_EQ(rowset_meta.rowset_id(), rowset_id); RowsetMetaPB rowset_meta_pb; rowset_meta.to_rowset_pb(&rowset_meta_pb); - Status status = RowsetMetaManager::save(_meta, _tablet_uid, rowset_id, rowset_meta_pb); + Status status = RowsetMetaManager::save(_meta, _tablet_uid, rowset_id, rowset_meta_pb, false); EXPECT_TRUE(status == Status::OK()); EXPECT_TRUE(RowsetMetaManager::check_rowset_meta(_meta, _tablet_uid, rowset_id)); std::string json_rowset_meta_read;