Skip to content

Commit

Permalink
[improvement](test)Use show_nested_index_file to check inverted index (
Browse files Browse the repository at this point in the history
…apache#38443)

Use show_nested_index_file to check inverted index instead of
calc_crc
  • Loading branch information
qidaye committed Aug 1, 2024
1 parent 338fa32 commit 45a902b
Show file tree
Hide file tree
Showing 11 changed files with 633 additions and 0 deletions.
80 changes: 80 additions & 0 deletions be/src/http/action/show_nested_index_file_action.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include "http/action/show_nested_index_file_action.h"

#include <rapidjson/rapidjson.h>

#include <exception>
#include <string>

#include "common/status.h"
#include "http/http_channel.h"
#include "http/http_headers.h"
#include "http/http_request.h"
#include "http/http_status.h"
#include "olap/storage_engine.h"
#include "olap/tablet_manager.h"
#include "util/stopwatch.hpp"

namespace doris {
using namespace ErrorCode;

const static std::string HEADER_JSON = "application/json";

ShowNestedIndexFileAction::ShowNestedIndexFileAction(ExecEnv* exec_env, TPrivilegeHier::type hier,
TPrivilegeType::type ptype)
: HttpHandlerWithAuth(exec_env, hier, ptype) {}

// show the nested inverted index file in the tablet
Status ShowNestedIndexFileAction::_handle_show_nested_index_file(HttpRequest* req,
std::string* json_meta) {
req->add_output_header(HttpHeaders::CONTENT_TYPE, HEADER_JSON.c_str());
std::string req_tablet_id = req->param(TABLET_ID_KEY);
uint64_t tablet_id = 0;
try {
tablet_id = std::stoull(req_tablet_id);
} catch (const std::exception& e) {
LOG(WARNING) << "invalid argument.tablet_id:" << req_tablet_id;
return Status::InternalError("convert failed, {}", e.what());
}

auto base_tablet = DORIS_TRY(ExecEnv::get_tablet(tablet_id));
// cast base tablet to tablet
auto tablet = std::dynamic_pointer_cast<Tablet>(base_tablet);
RETURN_IF_ERROR(tablet->show_nested_index_file(json_meta));
return Status::OK();
}

void ShowNestedIndexFileAction::handle(HttpRequest* req) {
MonotonicStopWatch timer;
timer.start();

std::string json_meta;
Status status = _handle_show_nested_index_file(req, &json_meta);
std::string status_result = status.to_json();
timer.stop();
LOG(INFO) << "handle show_nested_index_file request finished, result:" << status_result
<< ", use time = " << timer.elapsed_time() / 1000000 << "ms";
if (status.ok()) {
HttpChannel::send_reply(req, HttpStatus::OK, json_meta);
} else {
HttpChannel::send_reply(req, HttpStatus::INTERNAL_SERVER_ERROR, status_result);
}
}

} // end namespace doris
46 changes: 46 additions & 0 deletions be/src/http/action/show_nested_index_file_action.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#include <stdint.h>

#include <string>

#include "common/status.h"
#include "http/http_handler_with_auth.h"

namespace doris {
class HttpRequest;
class BaseStorageEngine;
class ExecEnv;

// This action is used to show nested inverted index file in tablet
class ShowNestedIndexFileAction : public HttpHandlerWithAuth {
public:
ShowNestedIndexFileAction(ExecEnv* exec_env, TPrivilegeHier::type hier,
TPrivilegeType::type ptype);

~ShowNestedIndexFileAction() override = default;

void handle(HttpRequest* req) override;

private:
Status _handle_show_nested_index_file(HttpRequest* req, std::string* json_header);
};

} // end namespace doris
128 changes: 128 additions & 0 deletions be/src/olap/rowset/beta_rowset.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
#include "olap/rowset/beta_rowset_reader.h"
#include "olap/rowset/segment_v2/inverted_index_cache.h"
#include "olap/rowset/segment_v2/inverted_index_desc.h"
#include "olap/rowset/segment_v2/inverted_index_file_reader.h"
#include "olap/tablet_schema.h"
#include "olap/utils.h"
#include "util/crc32c.h"
Expand Down Expand Up @@ -702,4 +703,131 @@ Status BetaRowset::calc_local_file_crc(uint32_t* crc_value, int64_t* file_count)
return Status::OK();
}

Status BetaRowset::show_nested_index_file(rapidjson::Value* rowset_value,
rapidjson::Document::AllocatorType& allocator) {
const auto& fs = _rowset_meta->fs();
auto storage_format = _schema->get_inverted_index_storage_format();
const auto* format_str = storage_format == InvertedIndexStorageFormatPB::V1 ? "V1" : "V2";
auto rs_id = rowset_id().to_string();
rowset_value->AddMember("rowset_id", rapidjson::Value(rs_id.c_str(), allocator), allocator);
rowset_value->AddMember("index_storage_format", rapidjson::Value(format_str, allocator),
allocator);
rapidjson::Value segments(rapidjson::kArrayType);
for (int seg_id = 0; seg_id < num_segments(); ++seg_id) {
rapidjson::Value segment(rapidjson::kObjectType);
segment.AddMember("segment_id", rapidjson::Value(seg_id).Move(), allocator);

auto seg_path = segment_file_path(seg_id);
// std::string convert to path and get parent path
auto seg_parent_path = std::filesystem::path(seg_path).parent_path();
auto seg_file_name = std::filesystem::path(seg_path).filename().string();
auto inverted_index_file_reader = std::make_unique<InvertedIndexFileReader>(
fs, seg_parent_path, seg_file_name, storage_format);
RETURN_IF_ERROR(inverted_index_file_reader->init());
auto dirs = inverted_index_file_reader->get_all_directories();

auto add_file_info_to_json = [&](const std::string& path,
rapidjson::Value& json_value) -> Status {
json_value.AddMember("idx_file_path", rapidjson::Value(path.c_str(), allocator),
allocator);
int64_t idx_file_size = 0;
auto st = fs->file_size(path, &idx_file_size);
if (st != Status::OK()) {
LOG(WARNING) << "show nested index file get file size error, file: " << path
<< ", error: " << st.msg();
return st;
}
json_value.AddMember("idx_file_size", rapidjson::Value(idx_file_size).Move(),
allocator);
return Status::OK();
};

auto process_files = [&allocator, &inverted_index_file_reader](
auto& index_meta, rapidjson::Value& indices,
rapidjson::Value& index) -> Status {
rapidjson::Value files_value(rapidjson::kArrayType);
std::vector<std::string> files;
auto ret = inverted_index_file_reader->open(&index_meta);
if (!ret.has_value()) {
LOG(INFO) << "InvertedIndexFileReader open error:" << ret.error();
return Status::InternalError("InvertedIndexFileReader open error");
}
using T = std::decay_t<decltype(ret)>;
auto reader = std::forward<T>(ret).value();
reader->list(&files);
for (auto& file : files) {
rapidjson::Value file_value(rapidjson::kObjectType);
auto size = reader->fileLength(file.c_str());
file_value.AddMember("name", rapidjson::Value(file.c_str(), allocator), allocator);
file_value.AddMember("size", rapidjson::Value(size).Move(), allocator);
files_value.PushBack(file_value, allocator);
}
index.AddMember("files", files_value, allocator);
indices.PushBack(index, allocator);
return Status::OK();
};

if (storage_format != InvertedIndexStorageFormatPB::V1) {
auto path = InvertedIndexDescriptor::get_index_file_name(seg_path);
auto st = add_file_info_to_json(path, segment);
if (!st.ok()) {
return st;
}
rapidjson::Value indices(rapidjson::kArrayType);
for (auto& dir : *dirs) {
rapidjson::Value index(rapidjson::kObjectType);
auto index_id = dir.first.first;
auto index_suffix = dir.first.second;
index.AddMember("index_id", rapidjson::Value(index_id).Move(), allocator);
index.AddMember("index_suffix", rapidjson::Value(index_suffix.c_str(), allocator),
allocator);

rapidjson::Value files_value(rapidjson::kArrayType);
std::vector<std::string> files;
doris::TabletIndexPB index_pb;
index_pb.set_index_id(index_id);
index_pb.set_index_suffix_name(index_suffix);
TabletIndex index_meta;
index_meta.init_from_pb(index_pb);

auto status = process_files(index_meta, indices, index);
if (!status.ok()) {
return status;
}
}
segment.AddMember("indices", indices, allocator);
segments.PushBack(segment, allocator);
} else {
rapidjson::Value indices(rapidjson::kArrayType);
for (auto column : _rowset_meta->tablet_schema()->columns()) {
const auto* index_meta = _rowset_meta->tablet_schema()->get_inverted_index(*column);
if (index_meta == nullptr) {
continue;
}
rapidjson::Value index(rapidjson::kObjectType);
auto index_id = index_meta->index_id();
auto index_suffix = index_meta->get_index_suffix();
index.AddMember("index_id", rapidjson::Value(index_id).Move(), allocator);
index.AddMember("index_suffix", rapidjson::Value(index_suffix.c_str(), allocator),
allocator);
auto path = InvertedIndexDescriptor::get_index_file_name(
seg_path, index_id, index_suffix);
auto st = add_file_info_to_json(path, index);
if (!st.ok()) {
return st;
}

auto status = process_files(*index_meta, indices, index);
if (!status.ok()) {
return status;
}
}
segment.AddMember("indices", indices, allocator);
segments.PushBack(segment, allocator);
}
}
rowset_value->AddMember("segments", segments, allocator);
return Status::OK();
}

} // namespace doris
3 changes: 3 additions & 0 deletions be/src/olap/rowset/beta_rowset.h
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,9 @@ class BetaRowset final : public Rowset {

Status calc_local_file_crc(uint32_t* crc_value, int64_t* file_count);

Status show_nested_index_file(rapidjson::Value* rowset_value,
rapidjson::Document::AllocatorType& allocator);

protected:
BetaRowset(const TabletSchemaSPtr& schema, const std::string& tablet_path,
const RowsetMetaSharedPtr& rowset_meta);
Expand Down
36 changes: 36 additions & 0 deletions be/src/olap/tablet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4106,4 +4106,40 @@ Status Tablet::calc_local_file_crc(uint32_t* crc_value, int64_t start_version, i
return Status::OK();
}

Status Tablet::show_nested_index_file(std::string* json_meta) {
Version v(0, max_version_unlocked().second);
std::vector<RowsetSharedPtr> rowsets;
traverse_rowsets([&rowsets, &v](const auto& rs) {
// get all rowsets
if (v.contains(rs->version())) {
rowsets.emplace_back(rs);
}
});
std::sort(rowsets.begin(), rowsets.end(), Rowset::comparator);

rapidjson::Document doc;
doc.SetObject();
rapidjson::Document::AllocatorType& allocator = doc.GetAllocator();
rapidjson::Value tabletIdValue(tablet_id());
doc.AddMember("tablet_id", tabletIdValue, allocator);

rapidjson::Value rowsets_value(rapidjson::kArrayType);

for (const auto& rs : rowsets) {
rapidjson::Value rowset_value(rapidjson::kObjectType);

auto rowset = std::static_pointer_cast<BetaRowset>(rs);
RETURN_IF_ERROR(rowset->show_nested_index_file(&rowset_value, allocator));
rowsets_value.PushBack(rowset_value, allocator);
}
doc.AddMember("rowsets", rowsets_value, allocator);

rapidjson::StringBuffer buffer;
rapidjson::PrettyWriter<rapidjson::StringBuffer> writer(buffer);
doc.Accept(writer);
*json_meta = std::string(buffer.GetString());

return Status::OK();
}

} // namespace doris
1 change: 1 addition & 0 deletions be/src/olap/tablet.h
Original file line number Diff line number Diff line change
Expand Up @@ -605,6 +605,7 @@ class Tablet final : public BaseTablet {
void clear_cache();
Status calc_local_file_crc(uint32_t* crc_value, int64_t start_version, int64_t end_version,
int32_t* rowset_count, int64_t* file_count);
Status show_nested_index_file(std::string* json_meta);

private:
Status _init_once_action();
Expand Down
6 changes: 6 additions & 0 deletions be/src/service/http_service.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
#include "http/action/report_action.h"
#include "http/action/reset_rpc_channel_action.h"
#include "http/action/restore_tablet_action.h"
#include "http/action/show_nested_index_file_action.h"
#include "http/action/snapshot_action.h"
#include "http/action/stream_load.h"
#include "http/action/stream_load_2pc.h"
Expand Down Expand Up @@ -331,6 +332,11 @@ Status HttpService::start() {
_pool.add(new CalcFileCrcAction(_env, TPrivilegeHier::GLOBAL, TPrivilegeType::ADMIN));
_ev_http_server->register_handler(HttpMethod::GET, "/api/calc_crc", calc_crc_action);

ShowNestedIndexFileAction* show_nested_index_file_action = _pool.add(
new ShowNestedIndexFileAction(_env, TPrivilegeHier::GLOBAL, TPrivilegeType::ADMIN));
_ev_http_server->register_handler(HttpMethod::GET, "/api/show_nested_index_file",
show_nested_index_file_action);

ReportAction* report_task_action = _pool.add(
new ReportAction(_env, TPrivilegeHier::GLOBAL, TPrivilegeType::ADMIN, "REPORT_TASK"));
_ev_http_server->register_handler(HttpMethod::GET, "/api/report/task", report_task_action);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !sql --
4748

-- !sql --
1

-- !sql --
4748

-- !sql --
1

Loading

0 comments on commit 45a902b

Please sign in to comment.