diff --git a/be/src/storage/lake/vacuum.cpp b/be/src/storage/lake/vacuum.cpp index 726480504052fa..44b0bf7b90c813 100644 --- a/be/src/storage/lake/vacuum.cpp +++ b/be/src/storage/lake/vacuum.cpp @@ -348,11 +348,15 @@ static Status collect_files_to_vacuum(TabletManager* tablet_mgr, std::string_vie auto t1 = butil::gettimeofday_ms(); g_metadata_travel_latency << (t1 - t0); - *vacuumed_version = final_retain_version; if (!skip_check_grace_timestamp) { // All tablet metadata files encountered were created after the grace timestamp, there were no files to delete + // The final_retain_version is set to min_retain_version or minmum exist version which has garbage files. + // So we set vacuumed_version to `final_retain_version - 1` to avoid the garbage files of final_retain_version can + // not be deleted + *vacuumed_version = final_retain_version - 1; return Status::OK(); } + *vacuumed_version = final_retain_version; DCHECK_LE(version, final_retain_version); for (auto v = version + 1; v < final_retain_version; v++) { RETURN_IF_ERROR(metafile_deleter->delete_file(join_path(meta_dir, tablet_metadata_filename(tablet_id, v)))); @@ -384,6 +388,7 @@ static Status vacuum_tablet_metadata(TabletManager* tablet_mgr, std::string_view auto metafile_delete_cb = [=](const std::vector& files) { erase_tablet_metadata_from_metacache(tablet_mgr, files); }; + int64_t final_vacuum_version = std::numeric_limits::max(); for (auto tablet_id : tablet_ids) { int64_t tablet_vacuumed_version = 0; AsyncFileDeleter datafile_deleter(config::lake_vacuum_min_batch_delete_size); @@ -393,13 +398,14 @@ static Status vacuum_tablet_metadata(TabletManager* tablet_mgr, std::string_view &tablet_vacuumed_version)); RETURN_IF_ERROR(datafile_deleter.finish()); RETURN_IF_ERROR(metafile_deleter.finish()); - if (*vacuumed_version == 0 || *vacuumed_version > tablet_vacuumed_version) { + if (final_vacuum_version > tablet_vacuumed_version) { // set partition vacuumed_version to min tablet vacuumed version - *vacuumed_version = tablet_vacuumed_version; + final_vacuum_version = tablet_vacuumed_version; } (*vacuumed_files) += datafile_deleter.delete_count(); (*vacuumed_files) += metafile_deleter.delete_count(); } + *vacuumed_version = final_vacuum_version; return Status::OK(); } diff --git a/be/test/storage/lake/vacuum_test.cpp b/be/test/storage/lake/vacuum_test.cpp index 5ca899b15e8fee..4ef224411b25a0 100644 --- a/be/test/storage/lake/vacuum_test.cpp +++ b/be/test/storage/lake/vacuum_test.cpp @@ -590,7 +590,7 @@ TEST_P(LakeVacuumTest, test_vacuum_3) { EXPECT_EQ(0, response.status().status_code()) << response.status().error_msgs(0); EXPECT_EQ(0, response.vacuumed_files()); EXPECT_EQ(0, response.vacuumed_file_size()); - EXPECT_EQ(2, response.vacuumed_version()); + EXPECT_EQ(1, response.vacuumed_version()); ensure_all_files_exist(); } @@ -1357,6 +1357,7 @@ TEST_P(LakeVacuumTest, test_vacuumed_version) { "data_size": 4096 } ], + "prev_garbage_version": 0, "commit_time": 1687331159 } )DEL"))); @@ -1412,6 +1413,21 @@ TEST_P(LakeVacuumTest, test_vacuumed_version) { } )DEL"))); + { + VacuumRequest request; + VacuumResponse response; + request.set_delete_txn_log(true); + request.add_tablet_ids(10001); + request.add_tablet_ids(10002); + request.set_min_retain_version(4); + request.set_grace_timestamp(1687331158); + request.set_min_active_txn_id(12344); + vacuum(_tablet_mgr.get(), request, &response); + ASSERT_TRUE(response.has_status()); + EXPECT_EQ(0, response.status().status_code()) << response.status().error_msgs(0); + EXPECT_EQ(1, response.vacuumed_version()); + } + { VacuumRequest request; VacuumResponse response; @@ -1439,7 +1455,7 @@ TEST_P(LakeVacuumTest, test_vacuumed_version) { vacuum(_tablet_mgr.get(), request, &response); ASSERT_TRUE(response.has_status()); EXPECT_EQ(0, response.status().status_code()) << response.status().error_msgs(0); - EXPECT_EQ(4, response.vacuumed_version()); + EXPECT_EQ(3, response.vacuumed_version()); } }