From e590855461561bcc077d723cb892faa3e09e526d Mon Sep 17 00:00:00 2001 From: PSeitz Date: Tue, 5 Nov 2024 14:22:50 +0800 Subject: [PATCH 01/27] box warp routes (#5540) box warp routes to avoid super heavy types. Performance impact should be okay, since these heavy types are not cache friendly. This reduces type complexity significantly, but some really long type chains remain. I've identified two, but they may be more 1. Boxed or chains 2. Response type chains We can replace the boxed or chains with a Vec, since they all contain the same box type now. Such a type doesn't seem to exist in warp yet. `cargo install --path .` Compile time before 4m49s Compile time now 4m14s Tool to list heavy functions: ` CARGO_PROFILE_RELEASE_LTO=fat cargo llvm-lines --release --bin quickwit > llvm_lines ` Size down from 32MB to 22MB addresses #5539 --- quickwit/quickwit-codegen/example/src/lib.rs | 3 ++ .../src/cluster_api/rest_handler.rs | 1 + .../src/delete_task_api/handler.rs | 1 + .../quickwit-serve/src/developer_api/mod.rs | 2 +- .../src/elasticsearch_api/bulk.rs | 1 + .../src/elasticsearch_api/mod.rs | 3 ++ .../src/elasticsearch_api/rest_handler.rs | 11 ++++++++ .../src/index_api/rest_handler.rs | 21 ++++++++++++++ .../src/indexing_api/rest_handler.rs | 1 + .../src/ingest_api/rest_handler.rs | 4 +++ .../src/jaeger_api/rest_handler.rs | 1 + .../quickwit-serve/src/node_info_handler.rs | 1 + .../src/otlp_api/rest_handler.rs | 5 ++++ quickwit/quickwit-serve/src/rest.rs | 28 +++++++++++++++---- .../src/template_api/rest_handler.rs | 1 + quickwit/quickwit-serve/src/ui_handler.rs | 1 + quickwit/rust-toolchain.toml | 2 +- 17 files changed, 79 insertions(+), 8 deletions(-) diff --git a/quickwit/quickwit-codegen/example/src/lib.rs b/quickwit/quickwit-codegen/example/src/lib.rs index 31572dafd94..f1599a6a605 100644 --- a/quickwit/quickwit-codegen/example/src/lib.rs +++ b/quickwit/quickwit-codegen/example/src/lib.rs @@ -62,6 +62,7 @@ where S: Service } #[derive(Debug, Clone, Default)] +#[allow(dead_code)] struct CounterLayer { counter: Arc, } @@ -77,6 +78,7 @@ impl Layer for CounterLayer { } } +#[allow(dead_code)] fn spawn_ping_response_stream( mut request_stream: ServiceStream, ) -> ServiceStream> { @@ -114,6 +116,7 @@ fn spawn_ping_response_stream( } #[derive(Debug, Clone, Default)] +#[allow(dead_code)] struct HelloImpl { delay: Duration, } diff --git a/quickwit/quickwit-serve/src/cluster_api/rest_handler.rs b/quickwit/quickwit-serve/src/cluster_api/rest_handler.rs index ddd8f4ffa05..e4357faeebd 100644 --- a/quickwit/quickwit-serve/src/cluster_api/rest_handler.rs +++ b/quickwit/quickwit-serve/src/cluster_api/rest_handler.rs @@ -45,6 +45,7 @@ pub fn cluster_handler( .and(extract_format_from_qs()) .map(into_rest_api_response) .recover(recover_fn) + .boxed() } #[utoipa::path( diff --git a/quickwit/quickwit-serve/src/delete_task_api/handler.rs b/quickwit/quickwit-serve/src/delete_task_api/handler.rs index eae5d625160..c9eefb95a1c 100644 --- a/quickwit/quickwit-serve/src/delete_task_api/handler.rs +++ b/quickwit/quickwit-serve/src/delete_task_api/handler.rs @@ -65,6 +65,7 @@ pub fn delete_task_api_handlers( get_delete_tasks_handler(metastore.clone()) .or(post_delete_tasks_handler(metastore.clone())) .recover(recover_fn) + .boxed() } pub fn get_delete_tasks_handler( diff --git a/quickwit/quickwit-serve/src/developer_api/mod.rs b/quickwit/quickwit-serve/src/developer_api/mod.rs index 2438e93146e..537cbe76a08 100644 --- a/quickwit/quickwit-serve/src/developer_api/mod.rs +++ b/quickwit/quickwit-serve/src/developer_api/mod.rs @@ -46,7 +46,7 @@ pub(crate) fn developer_api_routes( warp::path!("api" / "developer" / ..) .and( debug_handler(cluster.clone()) - .or(log_level_handler(env_filter_reload_fn.clone())) + .or(log_level_handler(env_filter_reload_fn.clone()).boxed()) .or(pprof_handlers()), ) .recover(recover_fn) diff --git a/quickwit/quickwit-serve/src/elasticsearch_api/bulk.rs b/quickwit/quickwit-serve/src/elasticsearch_api/bulk.rs index 5bd5ab43829..c6723d8521e 100644 --- a/quickwit/quickwit-serve/src/elasticsearch_api/bulk.rs +++ b/quickwit/quickwit-serve/src/elasticsearch_api/bulk.rs @@ -76,6 +76,7 @@ pub fn es_compat_index_bulk_handler( .and(extract_format_from_qs()) .map(make_elastic_api_response) .recover(recover_fn) + .boxed() } async fn elastic_ingest_bulk( diff --git a/quickwit/quickwit-serve/src/elasticsearch_api/mod.rs b/quickwit/quickwit-serve/src/elasticsearch_api/mod.rs index a3b156dedee..479e48687f4 100644 --- a/quickwit/quickwit-serve/src/elasticsearch_api/mod.rs +++ b/quickwit/quickwit-serve/src/elasticsearch_api/mod.rs @@ -67,6 +67,7 @@ pub fn elastic_api_handlers( ingest_service.clone(), ingest_router.clone(), )) + .boxed() .or(es_compat_index_bulk_handler(ingest_service, ingest_router)) .or(es_compat_index_search_handler(search_service.clone())) .or(es_compat_index_count_handler(search_service.clone())) @@ -75,6 +76,7 @@ pub fn elastic_api_handlers( .or(es_compat_index_field_capabilities_handler( search_service.clone(), )) + .boxed() .or(es_compat_index_stats_handler(metastore.clone())) .or(es_compat_delete_index_handler(index_service)) .or(es_compat_stats_handler(metastore.clone())) @@ -82,6 +84,7 @@ pub fn elastic_api_handlers( .or(es_compat_cat_indices_handler(metastore.clone())) .or(es_compat_resolve_index_handler(metastore.clone())) .recover(recover_fn) + .boxed() // Register newly created handlers here. } diff --git a/quickwit/quickwit-serve/src/elasticsearch_api/rest_handler.rs b/quickwit/quickwit-serve/src/elasticsearch_api/rest_handler.rs index 4bca30add33..2c6fd23a40d 100644 --- a/quickwit/quickwit-serve/src/elasticsearch_api/rest_handler.rs +++ b/quickwit/quickwit-serve/src/elasticsearch_api/rest_handler.rs @@ -90,6 +90,7 @@ pub fn es_compat_cluster_info_handler( })) }, ) + .boxed() } /// GET or POST _elastic/_search @@ -135,6 +136,7 @@ pub fn es_compat_delete_index_handler( .and(with_arg(index_service)) .then(es_compat_delete_index) .map(|result| make_elastic_api_response(result, BodyFormat::default())) + .boxed() } /// GET _elastic/_stats @@ -146,6 +148,7 @@ pub fn es_compat_stats_handler( .then(es_compat_stats) .map(|result| make_elastic_api_response(result, BodyFormat::default())) .recover(recover_fn) + .boxed() } /// GET _elastic/{index}/_stats @@ -157,6 +160,7 @@ pub fn es_compat_index_stats_handler( .then(es_compat_index_stats) .map(|result| make_elastic_api_response(result, BodyFormat::default())) .recover(recover_fn) + .boxed() } /// GET _elastic/_cat/indices @@ -168,6 +172,7 @@ pub fn es_compat_cat_indices_handler( .then(es_compat_cat_indices) .map(|result| make_elastic_api_response(result, BodyFormat::default())) .recover(recover_fn) + .boxed() } /// GET _elastic/_cat/indices/{index} @@ -179,6 +184,7 @@ pub fn es_compat_index_cat_indices_handler( .then(es_compat_index_cat_indices) .map(|result| make_elastic_api_response(result, BodyFormat::default())) .recover(recover_fn) + .boxed() } /// GET _elastic/_resolve/index/{index} @@ -189,6 +195,7 @@ pub fn es_compat_resolve_index_handler( .and(with_arg(metastore_service)) .then(es_compat_resolve_index) .map(|result| make_elastic_api_response(result, BodyFormat::default())) + .boxed() } /// GET or POST _elastic/{index}/_search @@ -200,6 +207,7 @@ pub fn es_compat_index_search_handler( .then(es_compat_index_search) .map(|result| make_elastic_api_response(result, BodyFormat::default())) .recover(recover_fn) + .boxed() } /// GET or POST _elastic/{index}/_count @@ -211,6 +219,7 @@ pub fn es_compat_index_count_handler( .then(es_compat_index_count) .map(|result| make_elastic_api_response(result, BodyFormat::default())) .recover(recover_fn) + .boxed() } /// POST _elastic/_msearch @@ -228,6 +237,7 @@ pub fn es_compat_index_multi_search_handler( RestApiResponse::new(&result, status_code, BodyFormat::default()) }) .recover(recover_fn) + .boxed() } /// GET or POST _elastic/_search/scroll @@ -239,6 +249,7 @@ pub fn es_compat_scroll_handler( .then(es_scroll) .map(|result| make_elastic_api_response(result, BodyFormat::default())) .recover(recover_fn) + .boxed() } fn build_request_for_es_api( diff --git a/quickwit/quickwit-serve/src/index_api/rest_handler.rs b/quickwit/quickwit-serve/src/index_api/rest_handler.rs index 832df922983..0b33e730182 100644 --- a/quickwit/quickwit-serve/src/index_api/rest_handler.rs +++ b/quickwit/quickwit-serve/src/index_api/rest_handler.rs @@ -95,10 +95,12 @@ pub fn index_management_handlers( .or(update_index_handler(index_service.metastore())) .or(clear_index_handler(index_service.clone())) .or(delete_index_handler(index_service.clone())) + .boxed() // Splits handlers .or(list_splits_handler(index_service.metastore())) .or(describe_index_handler(index_service.metastore())) .or(mark_splits_for_deletion_handler(index_service.metastore())) + .boxed() // Sources handlers. .or(reset_source_checkpoint_handler(index_service.metastore())) .or(toggle_source_handler(index_service.metastore())) @@ -106,11 +108,13 @@ pub fn index_management_handlers( .or(get_source_handler(index_service.metastore())) .or(delete_source_handler(index_service.metastore())) .or(get_source_shards_handler(index_service.metastore())) + .boxed() // Tokenizer handlers. .or(analyze_request_handler()) // Parse query into query AST handler. .or(parse_query_request_handler()) .recover(recover_fn) + .boxed() } fn json_body( @@ -127,6 +131,7 @@ pub fn get_index_metadata_handler( .then(get_index_metadata) .and(extract_format_from_qs()) .map(into_rest_api_response) + .boxed() } async fn get_index_metadata( @@ -163,6 +168,7 @@ fn list_indexes_metadata_handler( .then(list_indexes_metadata) .and(extract_format_from_qs()) .map(into_rest_api_response) + .boxed() } /// Describes an index with its main information and statistics. @@ -260,6 +266,7 @@ fn describe_index_handler( .then(describe_index) .and(extract_format_from_qs()) .map(into_rest_api_response) + .boxed() } /// This struct represents the QueryString passed to @@ -377,6 +384,7 @@ fn list_splits_handler( .then(list_splits) .and(extract_format_from_qs()) .map(into_rest_api_response) + .boxed() } #[derive(Deserialize, utoipa::ToSchema)] @@ -433,6 +441,7 @@ fn mark_splits_for_deletion_handler( .then(mark_splits_for_deletion) .and(extract_format_from_qs()) .map(into_rest_api_response) + .boxed() } #[utoipa::path( @@ -496,6 +505,7 @@ fn create_index_handler( .map(log_failure("failed to create index")) .and(extract_format_from_qs()) .map(into_rest_api_response) + .boxed() } #[utoipa::path( @@ -544,6 +554,7 @@ fn update_index_handler( .map(log_failure("failed to update index")) .and(extract_format_from_qs()) .map(into_rest_api_response) + .boxed() } #[utoipa::path( @@ -606,6 +617,7 @@ fn clear_index_handler( .then(clear_index) .and(extract_format_from_qs()) .map(into_rest_api_response) + .boxed() } #[utoipa::path( @@ -646,6 +658,7 @@ fn delete_index_handler( .then(delete_index) .and(extract_format_from_qs()) .map(into_rest_api_response) + .boxed() } #[utoipa::path( @@ -686,6 +699,7 @@ fn create_source_handler( .map(log_failure("failed to create source")) .and(extract_format_from_qs()) .map(into_rest_api_response) + .boxed() } #[utoipa::path( @@ -741,6 +755,7 @@ fn get_source_handler( .then(get_source) .and(extract_format_from_qs()) .map(into_rest_api_response) + .boxed() } async fn get_source( @@ -774,6 +789,7 @@ fn reset_source_checkpoint_handler( .then(reset_source_checkpoint) .and(extract_format_from_qs()) .map(into_rest_api_response) + .boxed() } #[utoipa::path( @@ -821,6 +837,7 @@ fn toggle_source_handler( .then(toggle_source) .and(extract_format_from_qs()) .map(into_rest_api_response) + .boxed() } #[derive(Deserialize, utoipa::ToSchema)] @@ -880,6 +897,7 @@ fn delete_source_handler( .then(delete_source) .and(extract_format_from_qs()) .map(into_rest_api_response) + .boxed() } #[utoipa::path( @@ -930,6 +948,7 @@ fn get_source_shards_handler( .then(get_source_shards) .and(extract_format_from_qs()) .map(into_rest_api_response) + .boxed() } async fn get_source_shards( @@ -982,6 +1001,7 @@ fn analyze_request_handler() -> impl Filter impl Filter + Clone { diff --git a/quickwit/quickwit-serve/src/jaeger_api/rest_handler.rs b/quickwit/quickwit-serve/src/jaeger_api/rest_handler.rs index 544c30afda4..79f9b7fdc30 100644 --- a/quickwit/quickwit-serve/src/jaeger_api/rest_handler.rs +++ b/quickwit/quickwit-serve/src/jaeger_api/rest_handler.rs @@ -68,6 +68,7 @@ pub(crate) fn jaeger_api_handlers( .or(jaeger_traces_search_handler(jaeger_service_opt.clone())) .or(jaeger_traces_handler(jaeger_service_opt.clone())) .recover(recover_fn) + .boxed() } fn jaeger_api_path_filter() -> impl Filter,), Error = Rejection> + Clone { diff --git a/quickwit/quickwit-serve/src/node_info_handler.rs b/quickwit/quickwit-serve/src/node_info_handler.rs index b1791be9d6f..93a7e4f1ac4 100644 --- a/quickwit/quickwit-serve/src/node_info_handler.rs +++ b/quickwit/quickwit-serve/src/node_info_handler.rs @@ -38,6 +38,7 @@ pub fn node_info_handler( node_version_handler(build_info, runtime_info) .or(node_config_handler(config)) .recover(recover_fn) + .boxed() } #[utoipa::path(get, tag = "Node Info", path = "/version")] diff --git a/quickwit/quickwit-serve/src/otlp_api/rest_handler.rs b/quickwit/quickwit-serve/src/otlp_api/rest_handler.rs index c393c665507..f3ac77261b4 100644 --- a/quickwit/quickwit-serve/src/otlp_api/rest_handler.rs +++ b/quickwit/quickwit-serve/src/otlp_api/rest_handler.rs @@ -56,6 +56,7 @@ pub(crate) fn otlp_ingest_api_handlers( .or(otlp_default_traces_handler(otlp_traces_service.clone()).recover(recover_fn)) .or(otlp_logs_handler(otlp_logs_service).recover(recover_fn)) .or(otlp_ingest_traces_handler(otlp_traces_service).recover(recover_fn)) + .boxed() } /// Open Telemetry REST/Protobuf logs ingest endpoint. @@ -91,6 +92,7 @@ pub(crate) fn otlp_default_logs_handler( ) .and(with_arg(BodyFormat::default())) .map(into_rest_api_response) + .boxed() } /// Open Telemetry REST/Protobuf logs ingest endpoint. #[utoipa::path( @@ -116,6 +118,7 @@ pub(crate) fn otlp_logs_handler( .then(otlp_ingest_logs) .and(with_arg(BodyFormat::default())) .map(into_rest_api_response) + .boxed() } /// Open Telemetry REST/Protobuf traces ingest endpoint. @@ -151,6 +154,7 @@ pub(crate) fn otlp_default_traces_handler( ) .and(with_arg(BodyFormat::default())) .map(into_rest_api_response) + .boxed() } /// Open Telemetry REST/Protobuf traces ingest endpoint. #[utoipa::path( @@ -176,6 +180,7 @@ pub(crate) fn otlp_ingest_traces_handler( .then(otlp_ingest_traces) .and(with_arg(BodyFormat::default())) .map(into_rest_api_response) + .boxed() } #[derive(Debug, Clone, thiserror::Error, Serialize)] diff --git a/quickwit/quickwit-serve/src/rest.rs b/quickwit/quickwit-serve/src/rest.rs index 3c83c2d84f1..71601335874 100644 --- a/quickwit/quickwit-serve/src/rest.rs +++ b/quickwit/quickwit-serve/src/rest.rs @@ -145,33 +145,38 @@ pub(crate) async fn start_rest_server( let api_doc = warp::path("openapi.json") .and(warp::get()) .map(|| warp::reply::json(&crate::openapi::build_docs())) - .recover(recover_fn); + .recover(recover_fn) + .boxed(); // `/health/*` routes. let health_check_routes = health_check_handlers( quickwit_services.cluster.clone(), quickwit_services.indexing_service_opt.clone(), quickwit_services.janitor_service_opt.clone(), - ); + ) + .boxed(); // `/metrics` route. let metrics_routes = warp::path("metrics") .and(warp::get()) .map(metrics_handler) - .recover(recover_fn); + .recover(recover_fn) + .boxed(); // `/api/developer/*` route. let developer_routes = developer_api_routes( quickwit_services.cluster.clone(), quickwit_services.env_filter_reload_fn.clone(), - ); + ) + .boxed(); // `/api/v1/*` routes. let api_v1_root_route = api_v1_routes(quickwit_services.clone()); let redirect_root_to_ui_route = warp::path::end() .and(warp::get()) .map(|| redirect(http::Uri::from_static("/ui/search"))) - .recover(recover_fn); + .recover(recover_fn) + .boxed(); let extra_headers = warp::reply::with::headers( quickwit_services @@ -243,6 +248,7 @@ fn search_routes( .or(search_plan_post_handler(search_service.clone())) .or(search_stream_handler(search_service)) .recover(recover_fn) + .boxed() } fn api_v1_routes( @@ -259,37 +265,47 @@ fn api_v1_routes( quickwit_services.index_manager.clone(), ) .or(cluster_handler(quickwit_services.cluster.clone())) + .boxed() .or(node_info_handler( BuildInfo::get(), RuntimeInfo::get(), quickwit_services.node_config.clone(), )) + .boxed() .or(indexing_get_handler( quickwit_services.indexing_service_opt.clone(), )) + .boxed() .or(search_routes(quickwit_services.search_service.clone())) + .boxed() .or(ingest_api_handlers( quickwit_services.ingest_router_service.clone(), quickwit_services.ingest_service.clone(), quickwit_services.node_config.ingest_api_config.clone(), )) + .boxed() .or(otlp_ingest_api_handlers( quickwit_services.otlp_logs_service_opt.clone(), quickwit_services.otlp_traces_service_opt.clone(), )) + .boxed() .or(index_management_handlers( quickwit_services.index_manager.clone(), quickwit_services.node_config.clone(), )) + .boxed() .or(delete_task_api_handlers( quickwit_services.metastore_client.clone(), )) + .boxed() .or(jaeger_api_handlers( quickwit_services.jaeger_service_opt.clone(), )) + .boxed() .or(index_template_api_handlers( quickwit_services.metastore_client.clone(), - )), + )) + .boxed(), ) } diff --git a/quickwit/quickwit-serve/src/template_api/rest_handler.rs b/quickwit/quickwit-serve/src/template_api/rest_handler.rs index fca22b2b4a7..0b549bdf588 100644 --- a/quickwit/quickwit-serve/src/template_api/rest_handler.rs +++ b/quickwit/quickwit-serve/src/template_api/rest_handler.rs @@ -57,6 +57,7 @@ pub(crate) fn index_template_api_handlers( .or(delete_index_template_handler(metastore.clone())) .or(list_index_templates_handler(metastore.clone())) .recover(recover_fn) + .boxed() } fn create_index_template_handler( diff --git a/quickwit/quickwit-serve/src/ui_handler.rs b/quickwit/quickwit-serve/src/ui_handler.rs index 2c78712381c..ecd54b060aa 100644 --- a/quickwit/quickwit-serve/src/ui_handler.rs +++ b/quickwit/quickwit-serve/src/ui_handler.rs @@ -43,6 +43,7 @@ pub fn ui_handler() -> impl Filter Result { diff --git a/quickwit/rust-toolchain.toml b/quickwit/rust-toolchain.toml index 94de52665ce..5c077352cff 100644 --- a/quickwit/rust-toolchain.toml +++ b/quickwit/rust-toolchain.toml @@ -1,4 +1,4 @@ [toolchain] -channel = "1.78" +channel = "1.81" components = ["cargo", "clippy", "rustfmt", "rust-docs"] From d4ad40db86c86172ad6c902e50a5de38b207fd3a Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Thu, 7 Nov 2024 15:57:46 +0900 Subject: [PATCH 02/27] Removing the last extern crate macro use old idiom (#5546) --- quickwit/quickwit-proto/build.rs | 7 +- quickwit/quickwit-proto/src/cluster/mod.rs | 1 + .../opentelemetry.proto.collector.logs.v1.rs | 6 +- ...pentelemetry.proto.collector.metrics.v1.rs | 6 +- .../opentelemetry.proto.collector.trace.v1.rs | 6 +- .../opentelemetry.proto.common.v1.rs | 12 +-- .../opentelemetry.proto.logs.v1.rs | 12 +-- .../opentelemetry.proto.metrics.v1.rs | 42 +++++----- .../opentelemetry.proto.resource.v1.rs | 2 +- .../opentelemetry.proto.trace.v1.rs | 18 ++-- .../src/codegen/quickwit/quickwit.search.rs | 84 +++++++++---------- .../quickwit-proto/src/control_plane/mod.rs | 1 + quickwit/quickwit-proto/src/developer/mod.rs | 2 +- quickwit/quickwit-proto/src/ingest/mod.rs | 1 + quickwit/quickwit-proto/src/lib.rs | 3 - 15 files changed, 103 insertions(+), 100 deletions(-) diff --git a/quickwit/quickwit-proto/build.rs b/quickwit/quickwit-proto/build.rs index 2a93593e84c..0a7d7566a3b 100644 --- a/quickwit/quickwit-proto/build.rs +++ b/quickwit/quickwit-proto/build.rs @@ -184,7 +184,10 @@ fn main() -> Result<(), Box> { tonic_build::configure() .enum_attribute(".", "#[serde(rename_all=\"snake_case\")]") - .type_attribute(".", "#[derive(Serialize, Deserialize, utoipa::ToSchema)]") + .type_attribute( + ".", + "#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)]", + ) .type_attribute("PartialHit", "#[derive(Eq, Hash)]") .type_attribute("PartialHit.sort_value", "#[derive(Copy)]") .type_attribute("SearchRequest", "#[derive(Eq, Hash)]") @@ -214,7 +217,7 @@ fn main() -> Result<(), Box> { let protos = find_protos("protos/third-party/opentelemetry"); tonic_build::configure() - .type_attribute(".", "#[derive(Serialize, Deserialize)]") + .type_attribute(".", "#[derive(serde::Serialize, serde::Deserialize)]") .type_attribute("StatusCode", r#"#[serde(rename_all = "snake_case")]"#) .type_attribute( "ExportLogsServiceResponse", diff --git a/quickwit/quickwit-proto/src/cluster/mod.rs b/quickwit/quickwit-proto/src/cluster/mod.rs index 48ee9dc0554..8edd07e83c7 100644 --- a/quickwit/quickwit-proto/src/cluster/mod.rs +++ b/quickwit/quickwit-proto/src/cluster/mod.rs @@ -19,6 +19,7 @@ use quickwit_common::rate_limited_error; use quickwit_common::tower::MakeLoadShedError; +use serde::{Deserialize, Serialize}; use thiserror; use crate::error::{ServiceError, ServiceErrorCode}; diff --git a/quickwit/quickwit-proto/src/codegen/opentelemetry/opentelemetry.proto.collector.logs.v1.rs b/quickwit/quickwit-proto/src/codegen/opentelemetry/opentelemetry.proto.collector.logs.v1.rs index 68fd0e1d329..7c35915e03c 100644 --- a/quickwit/quickwit-proto/src/codegen/opentelemetry/opentelemetry.proto.collector.logs.v1.rs +++ b/quickwit/quickwit-proto/src/codegen/opentelemetry/opentelemetry.proto.collector.logs.v1.rs @@ -1,4 +1,4 @@ -#[derive(Serialize, Deserialize)] +#[derive(serde::Serialize, serde::Deserialize)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct ExportLogsServiceRequest { @@ -12,7 +12,7 @@ pub struct ExportLogsServiceRequest { super::super::super::logs::v1::ResourceLogs, >, } -#[derive(Serialize, Deserialize)] +#[derive(serde::Serialize, serde::Deserialize)] #[derive(utoipa::ToSchema)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -35,7 +35,7 @@ pub struct ExportLogsServiceResponse { #[prost(message, optional, tag = "1")] pub partial_success: ::core::option::Option, } -#[derive(Serialize, Deserialize)] +#[derive(serde::Serialize, serde::Deserialize)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct ExportLogsPartialSuccess { diff --git a/quickwit/quickwit-proto/src/codegen/opentelemetry/opentelemetry.proto.collector.metrics.v1.rs b/quickwit/quickwit-proto/src/codegen/opentelemetry/opentelemetry.proto.collector.metrics.v1.rs index 3751a2dd800..53bbe9793cd 100644 --- a/quickwit/quickwit-proto/src/codegen/opentelemetry/opentelemetry.proto.collector.metrics.v1.rs +++ b/quickwit/quickwit-proto/src/codegen/opentelemetry/opentelemetry.proto.collector.metrics.v1.rs @@ -1,4 +1,4 @@ -#[derive(Serialize, Deserialize)] +#[derive(serde::Serialize, serde::Deserialize)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct ExportMetricsServiceRequest { @@ -12,7 +12,7 @@ pub struct ExportMetricsServiceRequest { super::super::super::metrics::v1::ResourceMetrics, >, } -#[derive(Serialize, Deserialize)] +#[derive(serde::Serialize, serde::Deserialize)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct ExportMetricsServiceResponse { @@ -34,7 +34,7 @@ pub struct ExportMetricsServiceResponse { #[prost(message, optional, tag = "1")] pub partial_success: ::core::option::Option, } -#[derive(Serialize, Deserialize)] +#[derive(serde::Serialize, serde::Deserialize)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct ExportMetricsPartialSuccess { diff --git a/quickwit/quickwit-proto/src/codegen/opentelemetry/opentelemetry.proto.collector.trace.v1.rs b/quickwit/quickwit-proto/src/codegen/opentelemetry/opentelemetry.proto.collector.trace.v1.rs index 9215438d32b..b667cb51d41 100644 --- a/quickwit/quickwit-proto/src/codegen/opentelemetry/opentelemetry.proto.collector.trace.v1.rs +++ b/quickwit/quickwit-proto/src/codegen/opentelemetry/opentelemetry.proto.collector.trace.v1.rs @@ -1,4 +1,4 @@ -#[derive(Serialize, Deserialize)] +#[derive(serde::Serialize, serde::Deserialize)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct ExportTraceServiceRequest { @@ -12,7 +12,7 @@ pub struct ExportTraceServiceRequest { super::super::super::trace::v1::ResourceSpans, >, } -#[derive(Serialize, Deserialize)] +#[derive(serde::Serialize, serde::Deserialize)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct ExportTraceServiceResponse { @@ -34,7 +34,7 @@ pub struct ExportTraceServiceResponse { #[prost(message, optional, tag = "1")] pub partial_success: ::core::option::Option, } -#[derive(Serialize, Deserialize)] +#[derive(serde::Serialize, serde::Deserialize)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct ExportTracePartialSuccess { diff --git a/quickwit/quickwit-proto/src/codegen/opentelemetry/opentelemetry.proto.common.v1.rs b/quickwit/quickwit-proto/src/codegen/opentelemetry/opentelemetry.proto.common.v1.rs index 721f6bad294..abc40cf30e7 100644 --- a/quickwit/quickwit-proto/src/codegen/opentelemetry/opentelemetry.proto.common.v1.rs +++ b/quickwit/quickwit-proto/src/codegen/opentelemetry/opentelemetry.proto.common.v1.rs @@ -1,7 +1,7 @@ /// AnyValue is used to represent any type of attribute value. AnyValue may contain a /// primitive value such as a string or integer or it may contain an arbitrary nested /// object containing arrays, key-value lists and primitives. -#[derive(Serialize, Deserialize)] +#[derive(serde::Serialize, serde::Deserialize)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct AnyValue { @@ -14,7 +14,7 @@ pub struct AnyValue { pub mod any_value { /// The value is one of the listed fields. It is valid for all values to be unspecified /// in which case this AnyValue is considered to be "empty". - #[derive(Serialize, Deserialize)] + #[derive(serde::Serialize, serde::Deserialize)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Oneof)] pub enum Value { @@ -36,7 +36,7 @@ pub mod any_value { } /// ArrayValue is a list of AnyValue messages. We need ArrayValue as a message /// since oneof in AnyValue does not allow repeated fields. -#[derive(Serialize, Deserialize)] +#[derive(serde::Serialize, serde::Deserialize)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct ArrayValue { @@ -49,7 +49,7 @@ pub struct ArrayValue { /// a list of KeyValue messages (e.g. in Span) we use `repeated KeyValue` directly to /// avoid unnecessary extra wrapping (which slows down the protocol). The 2 approaches /// are semantically equivalent. -#[derive(Serialize, Deserialize)] +#[derive(serde::Serialize, serde::Deserialize)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct KeyValueList { @@ -62,7 +62,7 @@ pub struct KeyValueList { } /// KeyValue is a key-value pair that is used to store Span attributes, Link /// attributes, etc. -#[derive(Serialize, Deserialize)] +#[derive(serde::Serialize, serde::Deserialize)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct KeyValue { @@ -73,7 +73,7 @@ pub struct KeyValue { } /// InstrumentationScope is a message representing the instrumentation scope information /// such as the fully qualified name and version. -#[derive(Serialize, Deserialize)] +#[derive(serde::Serialize, serde::Deserialize)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct InstrumentationScope { diff --git a/quickwit/quickwit-proto/src/codegen/opentelemetry/opentelemetry.proto.logs.v1.rs b/quickwit/quickwit-proto/src/codegen/opentelemetry/opentelemetry.proto.logs.v1.rs index 895396cf183..ba30bf10a0d 100644 --- a/quickwit/quickwit-proto/src/codegen/opentelemetry/opentelemetry.proto.logs.v1.rs +++ b/quickwit/quickwit-proto/src/codegen/opentelemetry/opentelemetry.proto.logs.v1.rs @@ -8,7 +8,7 @@ /// /// When new fields are added into this message, the OTLP request MUST be updated /// as well. -#[derive(Serialize, Deserialize)] +#[derive(serde::Serialize, serde::Deserialize)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct LogsData { @@ -21,7 +21,7 @@ pub struct LogsData { pub resource_logs: ::prost::alloc::vec::Vec, } /// A collection of ScopeLogs from a Resource. -#[derive(Serialize, Deserialize)] +#[derive(serde::Serialize, serde::Deserialize)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct ResourceLogs { @@ -38,7 +38,7 @@ pub struct ResourceLogs { pub schema_url: ::prost::alloc::string::String, } /// A collection of Logs produced by a Scope. -#[derive(Serialize, Deserialize)] +#[derive(serde::Serialize, serde::Deserialize)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct ScopeLogs { @@ -56,7 +56,7 @@ pub struct ScopeLogs { } /// A log record according to OpenTelemetry Log Data Model: /// -#[derive(Serialize, Deserialize)] +#[derive(serde::Serialize, serde::Deserialize)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct LogRecord { @@ -123,7 +123,7 @@ pub struct LogRecord { pub span_id: ::prost::alloc::vec::Vec, } /// Possible values for LogRecord.SeverityNumber. -#[derive(Serialize, Deserialize)] +#[derive(serde::Serialize, serde::Deserialize)] #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] #[repr(i32)] pub enum SeverityNumber { @@ -221,7 +221,7 @@ impl SeverityNumber { } } /// Masks for LogRecord.flags field. -#[derive(Serialize, Deserialize)] +#[derive(serde::Serialize, serde::Deserialize)] #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] #[repr(i32)] pub enum LogRecordFlags { diff --git a/quickwit/quickwit-proto/src/codegen/opentelemetry/opentelemetry.proto.metrics.v1.rs b/quickwit/quickwit-proto/src/codegen/opentelemetry/opentelemetry.proto.metrics.v1.rs index b723f424e49..0c2da7bda93 100644 --- a/quickwit/quickwit-proto/src/codegen/opentelemetry/opentelemetry.proto.metrics.v1.rs +++ b/quickwit/quickwit-proto/src/codegen/opentelemetry/opentelemetry.proto.metrics.v1.rs @@ -8,7 +8,7 @@ /// /// When new fields are added into this message, the OTLP request MUST be updated /// as well. -#[derive(Serialize, Deserialize)] +#[derive(serde::Serialize, serde::Deserialize)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct MetricsData { @@ -21,7 +21,7 @@ pub struct MetricsData { pub resource_metrics: ::prost::alloc::vec::Vec, } /// A collection of ScopeMetrics from a Resource. -#[derive(Serialize, Deserialize)] +#[derive(serde::Serialize, serde::Deserialize)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct ResourceMetrics { @@ -38,7 +38,7 @@ pub struct ResourceMetrics { pub schema_url: ::prost::alloc::string::String, } /// A collection of Metrics produced by an Scope. -#[derive(Serialize, Deserialize)] +#[derive(serde::Serialize, serde::Deserialize)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct ScopeMetrics { @@ -139,7 +139,7 @@ pub struct ScopeMetrics { /// to support correct rate calculation. Although it may be omitted /// when the start time is truly unknown, setting StartTimeUnixNano is /// strongly encouraged. -#[derive(Serialize, Deserialize)] +#[derive(serde::Serialize, serde::Deserialize)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct Metric { @@ -164,7 +164,7 @@ pub mod metric { /// Data determines the aggregation type (if any) of the metric, what is the /// reported value type for the data points, as well as the relatationship to /// the time interval over which they are reported. - #[derive(Serialize, Deserialize)] + #[derive(serde::Serialize, serde::Deserialize)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Oneof)] pub enum Data { @@ -189,7 +189,7 @@ pub mod metric { /// aggregation, regardless of aggregation temporalities. Therefore, /// AggregationTemporality is not included. Consequently, this also means /// "StartTimeUnixNano" is ignored for all data points. -#[derive(Serialize, Deserialize)] +#[derive(serde::Serialize, serde::Deserialize)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct Gauge { @@ -198,7 +198,7 @@ pub struct Gauge { } /// Sum represents the type of a scalar metric that is calculated as a sum of all /// reported measurements over a time interval. -#[derive(Serialize, Deserialize)] +#[derive(serde::Serialize, serde::Deserialize)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct Sum { @@ -214,7 +214,7 @@ pub struct Sum { } /// Histogram represents the type of a metric that is calculated by aggregating /// as a Histogram of all reported measurements over a time interval. -#[derive(Serialize, Deserialize)] +#[derive(serde::Serialize, serde::Deserialize)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct Histogram { @@ -227,7 +227,7 @@ pub struct Histogram { } /// ExponentialHistogram represents the type of a metric that is calculated by aggregating /// as a ExponentialHistogram of all reported double measurements over a time interval. -#[derive(Serialize, Deserialize)] +#[derive(serde::Serialize, serde::Deserialize)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct ExponentialHistogram { @@ -244,7 +244,7 @@ pub struct ExponentialHistogram { /// data type. These data points cannot always be merged in a meaningful way. /// While they can be useful in some applications, histogram data points are /// recommended for new applications. -#[derive(Serialize, Deserialize)] +#[derive(serde::Serialize, serde::Deserialize)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct Summary { @@ -253,7 +253,7 @@ pub struct Summary { } /// NumberDataPoint is a single data point in a timeseries that describes the /// time-varying scalar value of a metric. -#[derive(Serialize, Deserialize)] +#[derive(serde::Serialize, serde::Deserialize)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct NumberDataPoint { @@ -293,7 +293,7 @@ pub struct NumberDataPoint { pub mod number_data_point { /// The value itself. A point is considered invalid when one of the recognized /// value fields is not present inside this oneof. - #[derive(Serialize, Deserialize)] + #[derive(serde::Serialize, serde::Deserialize)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Oneof)] pub enum Value { @@ -313,7 +313,7 @@ pub mod number_data_point { /// If the histogram does not contain the distribution of values, then both /// "explicit_bounds" and "bucket_counts" must be omitted and only "count" and /// "sum" are known. -#[derive(Serialize, Deserialize)] +#[derive(serde::Serialize, serde::Deserialize)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct HistogramDataPoint { @@ -395,7 +395,7 @@ pub struct HistogramDataPoint { /// summary statistics for a population of values, it may optionally contain the /// distribution of those values across a set of buckets. /// -#[derive(Serialize, Deserialize)] +#[derive(serde::Serialize, serde::Deserialize)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct ExponentialHistogramDataPoint { @@ -485,7 +485,7 @@ pub struct ExponentialHistogramDataPoint { pub mod exponential_histogram_data_point { /// Buckets are a set of bucket counts, encoded in a contiguous array /// of counts. - #[derive(Serialize, Deserialize)] + #[derive(serde::Serialize, serde::Deserialize)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct Buckets { @@ -509,7 +509,7 @@ pub mod exponential_histogram_data_point { } /// SummaryDataPoint is a single data point in a timeseries that describes the /// time-varying values of a Summary metric. -#[derive(Serialize, Deserialize)] +#[derive(serde::Serialize, serde::Deserialize)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct SummaryDataPoint { @@ -564,7 +564,7 @@ pub mod summary_data_point { /// /// See the following issue for more context: /// - #[derive(Serialize, Deserialize)] + #[derive(serde::Serialize, serde::Deserialize)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct ValueAtQuantile { @@ -583,7 +583,7 @@ pub mod summary_data_point { /// Exemplars also hold information about the environment when the measurement /// was recorded, for example the span and trace ID of the active span when the /// exemplar was recorded. -#[derive(Serialize, Deserialize)] +#[derive(serde::Serialize, serde::Deserialize)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct Exemplar { @@ -621,7 +621,7 @@ pub mod exemplar { /// The value of the measurement that was recorded. An exemplar is /// considered invalid when one of the recognized value fields is not present /// inside this oneof. - #[derive(Serialize, Deserialize)] + #[derive(serde::Serialize, serde::Deserialize)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Oneof)] pub enum Value { @@ -634,7 +634,7 @@ pub mod exemplar { /// AggregationTemporality defines how a metric aggregator reports aggregated /// values. It describes how those values relate to the time interval over /// which they are aggregated. -#[derive(Serialize, Deserialize)] +#[derive(serde::Serialize, serde::Deserialize)] #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] #[repr(i32)] pub enum AggregationTemporality { @@ -731,7 +731,7 @@ impl AggregationTemporality { /// /// (point.flags & FLAG_NO_RECORDED_VALUE) == FLAG_NO_RECORDED_VALUE /// -#[derive(Serialize, Deserialize)] +#[derive(serde::Serialize, serde::Deserialize)] #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] #[repr(i32)] pub enum DataPointFlags { diff --git a/quickwit/quickwit-proto/src/codegen/opentelemetry/opentelemetry.proto.resource.v1.rs b/quickwit/quickwit-proto/src/codegen/opentelemetry/opentelemetry.proto.resource.v1.rs index 0d286e7b4d8..cca43e3c694 100644 --- a/quickwit/quickwit-proto/src/codegen/opentelemetry/opentelemetry.proto.resource.v1.rs +++ b/quickwit/quickwit-proto/src/codegen/opentelemetry/opentelemetry.proto.resource.v1.rs @@ -1,5 +1,5 @@ /// Resource information. -#[derive(Serialize, Deserialize)] +#[derive(serde::Serialize, serde::Deserialize)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct Resource { diff --git a/quickwit/quickwit-proto/src/codegen/opentelemetry/opentelemetry.proto.trace.v1.rs b/quickwit/quickwit-proto/src/codegen/opentelemetry/opentelemetry.proto.trace.v1.rs index cbb578273f3..abb6ab679e9 100644 --- a/quickwit/quickwit-proto/src/codegen/opentelemetry/opentelemetry.proto.trace.v1.rs +++ b/quickwit/quickwit-proto/src/codegen/opentelemetry/opentelemetry.proto.trace.v1.rs @@ -8,7 +8,7 @@ /// /// When new fields are added into this message, the OTLP request MUST be updated /// as well. -#[derive(Serialize, Deserialize)] +#[derive(serde::Serialize, serde::Deserialize)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct TracesData { @@ -21,7 +21,7 @@ pub struct TracesData { pub resource_spans: ::prost::alloc::vec::Vec, } /// A collection of ScopeSpans from a Resource. -#[derive(Serialize, Deserialize)] +#[derive(serde::Serialize, serde::Deserialize)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct ResourceSpans { @@ -38,7 +38,7 @@ pub struct ResourceSpans { pub schema_url: ::prost::alloc::string::String, } /// A collection of Spans produced by an InstrumentationScope. -#[derive(Serialize, Deserialize)] +#[derive(serde::Serialize, serde::Deserialize)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct ScopeSpans { @@ -57,7 +57,7 @@ pub struct ScopeSpans { /// A Span represents a single operation performed by a single component of the system. /// /// The next available field id is 17. -#[derive(Serialize, Deserialize)] +#[derive(serde::Serialize, serde::Deserialize)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct Span { @@ -167,7 +167,7 @@ pub struct Span { pub mod span { /// Event is a time-stamped annotation of the span, consisting of user-supplied /// text description and key-value pairs. - #[derive(Serialize, Deserialize)] + #[derive(serde::Serialize, serde::Deserialize)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct Event { @@ -194,7 +194,7 @@ pub mod span { /// different trace. For example, this can be used in batching operations, /// where a single batch handler processes multiple requests from different /// traces or when the handler receives a request from a different project. - #[derive(Serialize, Deserialize)] + #[derive(serde::Serialize, serde::Deserialize)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct Link { @@ -222,7 +222,7 @@ pub mod span { } /// SpanKind is the type of span. Can be used to specify additional relationships between spans /// in addition to a parent/child relationship. - #[derive(Serialize, Deserialize)] + #[derive(serde::Serialize, serde::Deserialize)] #[derive( Clone, Copy, @@ -288,7 +288,7 @@ pub mod span { } /// The Status type defines a logical error model that is suitable for different /// programming environments, including REST APIs and RPC APIs. -#[derive(Serialize, Deserialize)] +#[derive(serde::Serialize, serde::Deserialize)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct Status { @@ -303,7 +303,7 @@ pub struct Status { pub mod status { /// For the semantics of status codes see /// - #[derive(Serialize, Deserialize)] + #[derive(serde::Serialize, serde::Deserialize)] #[serde(rename_all = "snake_case")] #[derive( Clone, diff --git a/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.search.rs b/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.search.rs index 189019162f8..3fc4d5bdcaa 100644 --- a/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.search.rs +++ b/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.search.rs @@ -1,5 +1,5 @@ /// / Scroll Request -#[derive(Serialize, Deserialize, utoipa::ToSchema)] +#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct ScrollRequest { @@ -9,7 +9,7 @@ pub struct ScrollRequest { #[prost(uint32, optional, tag = "2")] pub scroll_ttl_secs: ::core::option::Option, } -#[derive(Serialize, Deserialize, utoipa::ToSchema)] +#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct PutKvRequest { @@ -20,25 +20,25 @@ pub struct PutKvRequest { #[prost(uint32, tag = "3")] pub ttl_secs: u32, } -#[derive(Serialize, Deserialize, utoipa::ToSchema)] +#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct PutKvResponse {} -#[derive(Serialize, Deserialize, utoipa::ToSchema)] +#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct GetKvRequest { #[prost(bytes = "vec", tag = "1")] pub key: ::prost::alloc::vec::Vec, } -#[derive(Serialize, Deserialize, utoipa::ToSchema)] +#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct GetKvResponse { #[prost(bytes = "vec", optional, tag = "1")] pub payload: ::core::option::Option<::prost::alloc::vec::Vec>, } -#[derive(Serialize, Deserialize, utoipa::ToSchema)] +#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct ReportSplit { @@ -49,18 +49,18 @@ pub struct ReportSplit { #[prost(string, tag = "1")] pub storage_uri: ::prost::alloc::string::String, } -#[derive(Serialize, Deserialize, utoipa::ToSchema)] +#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct ReportSplitsRequest { #[prost(message, repeated, tag = "1")] pub report_splits: ::prost::alloc::vec::Vec, } -#[derive(Serialize, Deserialize, utoipa::ToSchema)] +#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct ReportSplitsResponse {} -#[derive(Serialize, Deserialize, utoipa::ToSchema)] +#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct ListFieldsRequest { @@ -79,7 +79,7 @@ pub struct ListFieldsRequest { #[prost(int64, optional, tag = "4")] pub end_timestamp: ::core::option::Option, } -#[derive(Serialize, Deserialize, utoipa::ToSchema)] +#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct LeafListFieldsRequest { @@ -98,14 +98,14 @@ pub struct LeafListFieldsRequest { #[prost(string, repeated, tag = "4")] pub fields: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, } -#[derive(Serialize, Deserialize, utoipa::ToSchema)] +#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct ListFieldsResponse { #[prost(message, repeated, tag = "1")] pub fields: ::prost::alloc::vec::Vec, } -#[derive(Serialize, Deserialize, utoipa::ToSchema)] +#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct ListFieldsEntryResponse { @@ -135,14 +135,14 @@ pub struct ListFieldsEntryResponse { ::prost::alloc::string::String, >, } -#[derive(Serialize, Deserialize, utoipa::ToSchema)] +#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct ListFields { #[prost(message, repeated, tag = "1")] pub fields: ::prost::alloc::vec::Vec, } -#[derive(Serialize, Deserialize, utoipa::ToSchema)] +#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] #[derive(Eq, Hash)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -192,7 +192,7 @@ pub struct SearchRequest { #[prost(enumeration = "CountHits", tag = "17")] pub count_hits: i32, } -#[derive(Serialize, Deserialize, utoipa::ToSchema)] +#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] #[derive(Eq, Hash)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -207,7 +207,7 @@ pub struct SortField { #[prost(enumeration = "SortDatetimeFormat", optional, tag = "3")] pub sort_datetime_format: ::core::option::Option, } -#[derive(Serialize, Deserialize, utoipa::ToSchema)] +#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct SearchResponse { @@ -242,14 +242,14 @@ pub struct SearchResponse { #[prost(uint64, tag = "8")] pub num_successful_splits: u64, } -#[derive(Serialize, Deserialize, utoipa::ToSchema)] +#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct SearchPlanResponse { #[prost(string, tag = "1")] pub result: ::prost::alloc::string::String, } -#[derive(Serialize, Deserialize, utoipa::ToSchema)] +#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct SplitSearchError { @@ -265,7 +265,7 @@ pub struct SplitSearchError { } /// / A LeafSearchRequest can span multiple indices. /// / -#[derive(Serialize, Deserialize, utoipa::ToSchema)] +#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct LeafSearchRequest { @@ -287,7 +287,7 @@ pub struct LeafSearchRequest { pub index_uris: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, } /// / LeafRequestRef references data in LeafSearchRequest to deduplicate data. -#[derive(Serialize, Deserialize, utoipa::ToSchema)] +#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct LeafRequestRef { @@ -302,7 +302,7 @@ pub struct LeafRequestRef { #[prost(message, repeated, tag = "3")] pub split_offsets: ::prost::alloc::vec::Vec, } -#[derive(Serialize, Deserialize, utoipa::ToSchema)] +#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct SplitIdAndFooterOffsets { @@ -339,7 +339,7 @@ pub struct SplitIdAndFooterOffsets { /// flattened by concatenating the path to the root. /// /// See `quickwit_search::convert_leaf_hit` -#[derive(Serialize, Deserialize, utoipa::ToSchema)] +#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct LeafHit { @@ -353,7 +353,7 @@ pub struct LeafHit { #[prost(string, optional, tag = "3")] pub leaf_snippet_json: ::core::option::Option<::prost::alloc::string::String>, } -#[derive(Serialize, Deserialize, utoipa::ToSchema)] +#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct Hit { @@ -385,7 +385,7 @@ pub struct Hit { /// - the split_id, /// - the segment_ord, /// - the doc id. -#[derive(Serialize, Deserialize, utoipa::ToSchema)] +#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] #[derive(Eq, Hash)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -404,7 +404,7 @@ pub struct PartialHit { #[prost(uint32, tag = "4")] pub doc_id: u32, } -#[derive(Serialize, Deserialize, utoipa::ToSchema)] +#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] #[derive(Ord, PartialOrd)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] @@ -414,7 +414,7 @@ pub struct SortByValue { } /// Nested message and enum types in `SortByValue`. pub mod sort_by_value { - #[derive(Serialize, Deserialize, utoipa::ToSchema)] + #[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] #[serde(rename_all = "snake_case")] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Oneof)] @@ -429,7 +429,7 @@ pub mod sort_by_value { Boolean(bool), } } -#[derive(Serialize, Deserialize, utoipa::ToSchema)] +#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct LeafSearchResponse { @@ -458,7 +458,7 @@ pub struct LeafSearchResponse { ::prost::alloc::vec::Vec, >, } -#[derive(Serialize, Deserialize, utoipa::ToSchema)] +#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct SnippetRequest { @@ -467,7 +467,7 @@ pub struct SnippetRequest { #[prost(string, tag = "2")] pub query_ast_resolved: ::prost::alloc::string::String, } -#[derive(Serialize, Deserialize, utoipa::ToSchema)] +#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct FetchDocsRequest { @@ -489,7 +489,7 @@ pub struct FetchDocsRequest { #[prost(string, tag = "6")] pub doc_mapper: ::prost::alloc::string::String, } -#[derive(Serialize, Deserialize, utoipa::ToSchema)] +#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct FetchDocsResponse { @@ -497,7 +497,7 @@ pub struct FetchDocsResponse { #[prost(message, repeated, tag = "1")] pub hits: ::prost::alloc::vec::Vec, } -#[derive(Serialize, Deserialize, utoipa::ToSchema)] +#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct ListTermsRequest { @@ -521,7 +521,7 @@ pub struct ListTermsRequest { #[prost(bytes = "vec", optional, tag = "8")] pub end_key: ::core::option::Option<::prost::alloc::vec::Vec>, } -#[derive(Serialize, Deserialize, utoipa::ToSchema)] +#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct ListTermsResponse { @@ -539,7 +539,7 @@ pub struct ListTermsResponse { #[prost(string, repeated, tag = "4")] pub errors: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, } -#[derive(Serialize, Deserialize, utoipa::ToSchema)] +#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct LeafListTermsRequest { @@ -555,7 +555,7 @@ pub struct LeafListTermsRequest { #[prost(string, tag = "3")] pub index_uri: ::prost::alloc::string::String, } -#[derive(Serialize, Deserialize, utoipa::ToSchema)] +#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct LeafListTermsResponse { @@ -572,7 +572,7 @@ pub struct LeafListTermsResponse { #[prost(uint64, tag = "4")] pub num_attempted_splits: u64, } -#[derive(Serialize, Deserialize, utoipa::ToSchema)] +#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct SearchStreamRequest { @@ -600,7 +600,7 @@ pub struct SearchStreamRequest { #[prost(string, repeated, tag = "10")] pub snippet_fields: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, } -#[derive(Serialize, Deserialize, utoipa::ToSchema)] +#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct LeafSearchStreamRequest { @@ -620,7 +620,7 @@ pub struct LeafSearchStreamRequest { #[prost(string, tag = "6")] pub index_uri: ::prost::alloc::string::String, } -#[derive(Serialize, Deserialize, utoipa::ToSchema)] +#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] pub struct LeafSearchStreamResponse { @@ -631,7 +631,7 @@ pub struct LeafSearchStreamResponse { #[prost(string, tag = "2")] pub split_id: ::prost::alloc::string::String, } -#[derive(Serialize, Deserialize, utoipa::ToSchema)] +#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] #[serde(rename_all = "snake_case")] #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] #[repr(i32)] @@ -683,7 +683,7 @@ impl ListFieldType { } } } -#[derive(Serialize, Deserialize, utoipa::ToSchema)] +#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] #[serde(rename_all = "snake_case")] #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] #[repr(i32)] @@ -714,7 +714,7 @@ impl CountHits { } } } -#[derive(Serialize, Deserialize, utoipa::ToSchema)] +#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] #[serde(rename_all = "snake_case")] #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] #[repr(i32)] @@ -749,7 +749,7 @@ impl SortOrder { /// Sort value format for datetime field. /// We keep an enum with only one format /// for future extension. -#[derive(Serialize, Deserialize, utoipa::ToSchema)] +#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] #[serde(rename_all = "snake_case")] #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] #[repr(i32)] @@ -777,7 +777,7 @@ impl SortDatetimeFormat { } } } -#[derive(Serialize, Deserialize, utoipa::ToSchema)] +#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] #[serde(rename_all = "snake_case")] #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord, ::prost::Enumeration)] #[repr(i32)] diff --git a/quickwit/quickwit-proto/src/control_plane/mod.rs b/quickwit/quickwit-proto/src/control_plane/mod.rs index 8184851845e..ce3b50dca9f 100644 --- a/quickwit/quickwit-proto/src/control_plane/mod.rs +++ b/quickwit/quickwit-proto/src/control_plane/mod.rs @@ -20,6 +20,7 @@ use quickwit_actors::AskError; use quickwit_common::rate_limited_error; use quickwit_common::tower::{MakeLoadShedError, RpcName, TimeoutExceeded}; +use serde::{Deserialize, Serialize}; use thiserror; use crate::metastore::{MetastoreError, OpenShardSubrequest}; diff --git a/quickwit/quickwit-proto/src/developer/mod.rs b/quickwit/quickwit-proto/src/developer/mod.rs index 2ed98190b17..a6472585042 100644 --- a/quickwit/quickwit-proto/src/developer/mod.rs +++ b/quickwit/quickwit-proto/src/developer/mod.rs @@ -25,7 +25,7 @@ include!("../codegen/quickwit/quickwit.developer.rs"); pub type DeveloperResult = std::result::Result; -#[derive(Debug, thiserror::Error, Eq, PartialEq, Serialize, Deserialize)] +#[derive(Debug, thiserror::Error, Eq, PartialEq, serde::Serialize, serde::Deserialize)] #[serde(rename_all = "snake_case")] pub enum DeveloperError { #[error("internal error: {0}")] diff --git a/quickwit/quickwit-proto/src/ingest/mod.rs b/quickwit/quickwit-proto/src/ingest/mod.rs index 48a410cd5ba..72a66082421 100644 --- a/quickwit/quickwit-proto/src/ingest/mod.rs +++ b/quickwit/quickwit-proto/src/ingest/mod.rs @@ -23,6 +23,7 @@ use bytes::Bytes; use bytesize::ByteSize; use quickwit_common::rate_limited_error; use quickwit_common::tower::MakeLoadShedError; +use serde::{Deserialize, Serialize}; use self::ingester::{PersistFailureReason, ReplicateFailureReason}; use self::router::IngestFailureReason; diff --git a/quickwit/quickwit-proto/src/lib.rs b/quickwit/quickwit-proto/src/lib.rs index c5a2aa5034d..aa8f49fcb97 100644 --- a/quickwit/quickwit-proto/src/lib.rs +++ b/quickwit/quickwit-proto/src/lib.rs @@ -109,9 +109,6 @@ pub mod opentelemetry { } } -#[macro_use] -extern crate serde; - impl TryFrom for search::SearchRequest { type Error = anyhow::Error; From db1751bd444430cbce0020b9ec18d6c4a536e7d2 Mon Sep 17 00:00:00 2001 From: trinity-1686a Date: Fri, 8 Nov 2024 13:32:48 +0100 Subject: [PATCH 03/27] restrict maturity period to retention (#5543) --- .../src/actors/indexing_pipeline.rs | 9 +- .../src/actors/indexing_service.rs | 3 + .../src/actors/merge_pipeline.rs | 4 + .../quickwit-indexing/src/actors/uploader.rs | 12 +++ .../quickwit-indexing/src/merge_policy/mod.rs | 2 +- .../src/models/split_attrs.rs | 101 ++++++++++++++++-- .../src/actors/delete_task_pipeline.rs | 1 + .../quickwit-metastore/src/split_metadata.rs | 19 +++- 8 files changed, 142 insertions(+), 9 deletions(-) diff --git a/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs b/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs index b90f795d236..044a868f354 100644 --- a/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs +++ b/quickwit/quickwit-indexing/src/actors/indexing_pipeline.rs @@ -30,7 +30,7 @@ use quickwit_actors::{ use quickwit_common::pubsub::EventBroker; use quickwit_common::temp_dir::TempDirectory; use quickwit_common::KillSwitch; -use quickwit_config::{IndexingSettings, SourceConfig}; +use quickwit_config::{IndexingSettings, RetentionPolicy, SourceConfig}; use quickwit_doc_mapper::DocMapper; use quickwit_ingest::IngesterPool; use quickwit_proto::indexing::IndexingPipelineId; @@ -367,6 +367,7 @@ impl IndexingPipeline { UploaderType::IndexUploader, self.params.metastore.clone(), self.params.merge_policy.clone(), + self.params.retention_policy.clone(), self.params.split_store.clone(), SplitsUpdateMailbox::Sequencer(sequencer_mailbox), self.params.max_concurrent_split_uploads_index, @@ -585,6 +586,7 @@ pub struct IndexingPipelineParams { // Merge-related parameters pub merge_policy: Arc, + pub retention_policy: Option, pub merge_planner_mailbox: Mailbox, pub max_concurrent_split_uploads_merge: usize, @@ -717,6 +719,7 @@ mod tests { storage, split_store, merge_policy: default_merge_policy(), + retention_policy: None, queues_dir_path: PathBuf::from("./queues"), max_concurrent_split_uploads_index: 4, max_concurrent_split_uploads_merge: 5, @@ -831,6 +834,7 @@ mod tests { storage, split_store, merge_policy: default_merge_policy(), + retention_policy: None, max_concurrent_split_uploads_index: 4, max_concurrent_split_uploads_merge: 5, cooperative_indexing_permits: None, @@ -908,6 +912,7 @@ mod tests { metastore: metastore.clone(), split_store: split_store.clone(), merge_policy: default_merge_policy(), + retention_policy: None, max_concurrent_split_uploads: 2, merge_io_throughput_limiter_opt: None, merge_scheduler_service: universe.get_or_spawn_one(), @@ -930,6 +935,7 @@ mod tests { storage, split_store, merge_policy: default_merge_policy(), + retention_policy: None, max_concurrent_split_uploads_index: 4, max_concurrent_split_uploads_merge: 5, cooperative_indexing_permits: None, @@ -1057,6 +1063,7 @@ mod tests { storage, split_store, merge_policy: default_merge_policy(), + retention_policy: None, max_concurrent_split_uploads_index: 4, max_concurrent_split_uploads_merge: 5, cooperative_indexing_permits: None, diff --git a/quickwit/quickwit-indexing/src/actors/indexing_service.rs b/quickwit/quickwit-indexing/src/actors/indexing_service.rs index 5a180840a1e..697bfb57b62 100644 --- a/quickwit/quickwit-indexing/src/actors/indexing_service.rs +++ b/quickwit/quickwit-indexing/src/actors/indexing_service.rs @@ -287,6 +287,7 @@ impl IndexingService { })?; let merge_policy = crate::merge_policy::merge_policy_from_settings(&index_config.indexing_settings); + let retention_policy = index_config.retention_policy_opt.clone(); let split_store = IndexingSplitStore::new(storage.clone(), self.local_split_store.clone()); let doc_mapper = build_doc_mapper(&index_config.doc_mapping, &index_config.search_settings) @@ -301,6 +302,7 @@ impl IndexingService { split_store: split_store.clone(), merge_scheduler_service: self.merge_scheduler_service.clone(), merge_policy: merge_policy.clone(), + retention_policy: retention_policy.clone(), merge_io_throughput_limiter_opt: self.merge_io_throughput_limiter_opt.clone(), max_concurrent_split_uploads: self.max_concurrent_split_uploads, event_broker: self.event_broker.clone(), @@ -329,6 +331,7 @@ impl IndexingService { // Merge-related parameters merge_policy, + retention_policy, max_concurrent_split_uploads_merge, merge_planner_mailbox, diff --git a/quickwit/quickwit-indexing/src/actors/merge_pipeline.rs b/quickwit/quickwit-indexing/src/actors/merge_pipeline.rs index acb0f00c3e3..97c57a79b31 100644 --- a/quickwit/quickwit-indexing/src/actors/merge_pipeline.rs +++ b/quickwit/quickwit-indexing/src/actors/merge_pipeline.rs @@ -29,6 +29,7 @@ use quickwit_common::io::{IoControls, Limiter}; use quickwit_common::pubsub::EventBroker; use quickwit_common::temp_dir::TempDirectory; use quickwit_common::KillSwitch; +use quickwit_config::RetentionPolicy; use quickwit_doc_mapper::DocMapper; use quickwit_metastore::{ ListSplitsQuery, ListSplitsRequestExt, MetastoreServiceStreamSplitsExt, SplitMetadata, @@ -286,6 +287,7 @@ impl MergePipeline { UploaderType::MergeUploader, self.params.metastore.clone(), self.params.merge_policy.clone(), + self.params.retention_policy.clone(), self.params.split_store.clone(), merge_publisher_mailbox.into(), self.params.max_concurrent_split_uploads, @@ -572,6 +574,7 @@ pub struct MergePipelineParams { pub merge_scheduler_service: Mailbox, pub split_store: IndexingSplitStore, pub merge_policy: Arc, + pub retention_policy: Option, pub max_concurrent_split_uploads: usize, //< TODO share with the indexing pipeline. pub merge_io_throughput_limiter_opt: Option, pub event_broker: EventBroker, @@ -635,6 +638,7 @@ mod tests { merge_scheduler_service: universe.get_or_spawn_one(), split_store, merge_policy: default_merge_policy(), + retention_policy: None, max_concurrent_split_uploads: 2, merge_io_throughput_limiter_opt: None, event_broker: Default::default(), diff --git a/quickwit/quickwit-indexing/src/actors/uploader.rs b/quickwit/quickwit-indexing/src/actors/uploader.rs index f7d09dc3fd7..daa5caa140a 100644 --- a/quickwit/quickwit-indexing/src/actors/uploader.rs +++ b/quickwit/quickwit-indexing/src/actors/uploader.rs @@ -31,6 +31,7 @@ use once_cell::sync::OnceCell; use quickwit_actors::{Actor, ActorContext, ActorExitStatus, Handler, Mailbox, QueueCapacity}; use quickwit_common::pubsub::EventBroker; use quickwit_common::spawn_named_task; +use quickwit_config::RetentionPolicy; use quickwit_metastore::checkpoint::IndexCheckpointDelta; use quickwit_metastore::{SplitMetadata, StageSplitsRequestExt}; use quickwit_proto::metastore::{MetastoreService, MetastoreServiceClient, StageSplitsRequest}; @@ -166,6 +167,7 @@ pub struct Uploader { uploader_type: UploaderType, metastore: MetastoreServiceClient, merge_policy: Arc, + retention_policy: Option, split_store: IndexingSplitStore, split_update_mailbox: SplitsUpdateMailbox, max_concurrent_split_uploads: usize, @@ -174,10 +176,12 @@ pub struct Uploader { } impl Uploader { + #[allow(clippy::too_many_arguments)] pub fn new( uploader_type: UploaderType, metastore: MetastoreServiceClient, merge_policy: Arc, + retention_policy: Option, split_store: IndexingSplitStore, split_update_mailbox: SplitsUpdateMailbox, max_concurrent_split_uploads: usize, @@ -187,6 +191,7 @@ impl Uploader { uploader_type, metastore, merge_policy, + retention_policy, split_store, split_update_mailbox, max_concurrent_split_uploads, @@ -300,6 +305,7 @@ impl Handler for Uploader { let index_uid = batch.index_uid(); let ctx_clone = ctx.clone(); let merge_policy = self.merge_policy.clone(); + let retention_policy = self.retention_policy.clone(); debug!(split_ids=?split_ids, "start-stage-and-store-splits"); let event_broker = self.event_broker.clone(); spawn_named_task( @@ -324,6 +330,7 @@ impl Handler for Uploader { )?; let split_metadata = create_split_metadata( &merge_policy, + retention_policy.as_ref(), &packaged_split.split_attrs, packaged_split.tags.clone(), split_streamer.footer_range.start..split_streamer.footer_range.end, @@ -535,6 +542,7 @@ mod tests { UploaderType::IndexUploader, MetastoreServiceClient::from_mock(mock_metastore), merge_policy, + None, split_store, SplitsUpdateMailbox::Sequencer(sequencer_mailbox), 4, @@ -650,6 +658,7 @@ mod tests { UploaderType::IndexUploader, MetastoreServiceClient::from_mock(mock_metastore), merge_policy, + None, split_store, SplitsUpdateMailbox::Sequencer(sequencer_mailbox), 4, @@ -797,6 +806,7 @@ mod tests { UploaderType::IndexUploader, MetastoreServiceClient::from_mock(mock_metastore), merge_policy, + None, split_store, SplitsUpdateMailbox::Publisher(publisher_mailbox), 4, @@ -870,6 +880,7 @@ mod tests { UploaderType::IndexUploader, MetastoreServiceClient::from_mock(mock_metastore), default_merge_policy(), + None, split_store, SplitsUpdateMailbox::Sequencer(sequencer_mailbox), 4, @@ -974,6 +985,7 @@ mod tests { UploaderType::IndexUploader, MetastoreServiceClient::from_mock(mock_metastore), merge_policy, + None, split_store, SplitsUpdateMailbox::Publisher(publisher_mailbox), 4, diff --git a/quickwit/quickwit-indexing/src/merge_policy/mod.rs b/quickwit/quickwit-indexing/src/merge_policy/mod.rs index e916c9b6ffc..02f2249c5dc 100644 --- a/quickwit/quickwit-indexing/src/merge_policy/mod.rs +++ b/quickwit/quickwit-indexing/src/merge_policy/mod.rs @@ -396,7 +396,7 @@ pub mod tests { source_id: "test_source".to_string(), }; let split_attrs = merge_split_attrs(pipeline_id, merged_split_id, splits).unwrap(); - create_split_metadata(merge_policy, &split_attrs, tags, 0..0) + create_split_metadata(merge_policy, None, &split_attrs, tags, 0..0) } fn apply_merge( diff --git a/quickwit/quickwit-indexing/src/models/split_attrs.rs b/quickwit/quickwit-indexing/src/models/split_attrs.rs index 5ac0de40ff3..217f1bc331d 100644 --- a/quickwit/quickwit-indexing/src/models/split_attrs.rs +++ b/quickwit/quickwit-indexing/src/models/split_attrs.rs @@ -21,8 +21,9 @@ use std::collections::BTreeSet; use std::fmt; use std::ops::{Range, RangeInclusive}; use std::sync::Arc; +use std::time::Duration; -use quickwit_metastore::SplitMetadata; +use quickwit_metastore::{SplitMaturity, SplitMetadata}; use quickwit_proto::types::{DocMappingUid, IndexUid, NodeId, SourceId, SplitId}; use tantivy::DateTime; use time::OffsetDateTime; @@ -92,13 +93,27 @@ impl fmt::Debug for SplitAttrs { pub fn create_split_metadata( merge_policy: &Arc, + retention_policy: Option<&quickwit_config::RetentionPolicy>, split_attrs: &SplitAttrs, tags: BTreeSet, footer_offsets: Range, ) -> SplitMetadata { let create_timestamp = OffsetDateTime::now_utc().unix_timestamp(); - let maturity = + + let time_range = split_attrs + .time_range + .as_ref() + .map(|range| range.start().into_timestamp_secs()..=range.end().into_timestamp_secs()); + + let mut maturity = merge_policy.split_maturity(split_attrs.num_docs as usize, split_attrs.num_merge_ops); + if let Some(max_maturity) = max_maturity_before_end_of_retention( + retention_policy, + create_timestamp, + time_range.as_ref().map(|time_range| *time_range.end()), + ) { + maturity = maturity.min(max_maturity); + } SplitMetadata { node_id: split_attrs.node_id.to_string(), index_uid: split_attrs.index_uid.clone(), @@ -107,10 +122,7 @@ pub fn create_split_metadata( split_id: split_attrs.split_id.clone(), partition_id: split_attrs.partition_id, num_docs: split_attrs.num_docs as usize, - time_range: split_attrs - .time_range - .as_ref() - .map(|range| range.start().into_timestamp_secs()..=range.end().into_timestamp_secs()), + time_range, uncompressed_docs_size_in_bytes: split_attrs.uncompressed_docs_size_in_bytes, create_timestamp, maturity, @@ -120,3 +132,80 @@ pub fn create_split_metadata( num_merge_ops: split_attrs.num_merge_ops, } } + +/// reduce the maturity period of a split based on retention policy, so that it doesn't get merged +/// after it expires. +fn max_maturity_before_end_of_retention( + retention_policy: Option<&quickwit_config::RetentionPolicy>, + create_timestamp: i64, + time_range_end: Option, +) -> Option { + let time_range_end = time_range_end? as u64; + let retention_period_s = retention_policy?.retention_period().ok()?.as_secs(); + + let maturity = if let Some(maturation_period_s) = + (time_range_end + retention_period_s).checked_sub(create_timestamp as u64) + { + SplitMaturity::Immature { + maturation_period: Duration::from_secs(maturation_period_s), + } + } else { + // this split could be deleted as soon as it is created. Ideally we would + // handle that sooner. + SplitMaturity::Mature + }; + Some(maturity) +} + +#[cfg(test)] +mod tests { + use std::time::Duration; + + use quickwit_metastore::SplitMaturity; + + use super::max_maturity_before_end_of_retention; + + #[test] + fn test_max_maturity_before_end_of_retention() { + let retention_policy = quickwit_config::RetentionPolicy { + evaluation_schedule: "daily".to_string(), + retention_period: "300 sec".to_string(), + }; + let create_timestamp = 1000; + + // this should be deleted asap, not subject to merge + assert_eq!( + max_maturity_before_end_of_retention( + Some(&retention_policy), + create_timestamp, + Some(200), + ), + Some(SplitMaturity::Mature) + ); + + // retention ends at 750 + 300 = 1050, which is 50s from now + assert_eq!( + max_maturity_before_end_of_retention( + Some(&retention_policy), + create_timestamp, + Some(750), + ), + Some(SplitMaturity::Immature { + maturation_period: Duration::from_secs(50) + }) + ); + + // no retention policy + assert_eq!( + max_maturity_before_end_of_retention(None, create_timestamp, Some(850),), + None, + ); + + // no timestamp_range.end but a retention policy, that's odd, don't change anything about + // the maturity period + assert_eq!( + max_maturity_before_end_of_retention(Some(&retention_policy), create_timestamp, None,), + None, + ); + } +} diff --git a/quickwit/quickwit-janitor/src/actors/delete_task_pipeline.rs b/quickwit/quickwit-janitor/src/actors/delete_task_pipeline.rs index 5c25c7ae1c7..452e47bcb5b 100644 --- a/quickwit/quickwit-janitor/src/actors/delete_task_pipeline.rs +++ b/quickwit/quickwit-janitor/src/actors/delete_task_pipeline.rs @@ -181,6 +181,7 @@ impl DeleteTaskPipeline { UploaderType::DeleteUploader, self.metastore.clone(), merge_policy, + index_config.retention_policy_opt.clone(), split_store.clone(), SplitsUpdateMailbox::Publisher(publisher_mailbox), self.max_concurrent_split_uploads, diff --git a/quickwit/quickwit-metastore/src/split_metadata.rs b/quickwit/quickwit-metastore/src/split_metadata.rs index 53608f307d8..9af86c0a5de 100644 --- a/quickwit/quickwit-metastore/src/split_metadata.rs +++ b/quickwit/quickwit-metastore/src/split_metadata.rs @@ -344,7 +344,7 @@ impl FromStr for SplitState { /// or `Immature` with a given maturation period. /// The maturity is determined by the `MergePolicy`. #[serde_as] -#[derive(Clone, Copy, Debug, Default, Eq, Serialize, Deserialize, PartialEq)] +#[derive(Clone, Copy, Debug, Default, Eq, Serialize, Deserialize, PartialEq, PartialOrd, Ord)] #[serde(tag = "type")] #[serde(rename_all = "snake_case")] pub enum SplitMaturity { @@ -439,4 +439,21 @@ mod tests { assert_eq!(format!("{:?}", split_metadata), expected_output); } + + #[test] + fn test_spit_maturity_order() { + assert!( + SplitMaturity::Mature + < SplitMaturity::Immature { + maturation_period: Duration::from_secs(0) + } + ); + assert!( + SplitMaturity::Immature { + maturation_period: Duration::from_secs(0) + } < SplitMaturity::Immature { + maturation_period: Duration::from_secs(1) + } + ); + } } From 23289a1b2f19d06334c891105a3eb9f2a69bc284 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 18 Nov 2024 17:40:07 +0900 Subject: [PATCH 04/27] Bump codecov/codecov-action from 4 to 5 (#5549) Bumps [codecov/codecov-action](https://github.com/codecov/codecov-action) from 4 to 5. - [Release notes](https://github.com/codecov/codecov-action/releases) - [Changelog](https://github.com/codecov/codecov-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/codecov/codecov-action/compare/v4...v5) --- updated-dependencies: - dependency-name: codecov/codecov-action dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/coverage.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index 00e495bf99f..308b022045d 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -160,7 +160,7 @@ jobs: working-directory: ./quickwit - name: Upload coverage to Codecov - uses: codecov/codecov-action@v4 + uses: codecov/codecov-action@v5 with: token: ${{ secrets.CODECOV_TOKEN }} # not required for public repos files: ./quickwit/lcov.info From fe0f39adebd5024b968fff525bcfcca5b75d7eac Mon Sep 17 00:00:00 2001 From: trinity-1686a Date: Mon, 18 Nov 2024 12:41:32 +0100 Subject: [PATCH 05/27] wait for merge at end of local ingest (#5542) --- quickwit/quickwit-cli/src/tool.rs | 6 +++++- quickwit/quickwit-indexing/src/actors/mod.rs | 2 +- quickwit/quickwit-indexing/src/lib.rs | 4 ++-- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/quickwit/quickwit-cli/src/tool.rs b/quickwit/quickwit-cli/src/tool.rs index f5b2c512d33..cee9361bd6f 100644 --- a/quickwit/quickwit-cli/src/tool.rs +++ b/quickwit/quickwit-cli/src/tool.rs @@ -495,7 +495,11 @@ pub async fn local_ingest_docs_cli(args: LocalIngestDocsArgs) -> anyhow::Result< let statistics = start_statistics_reporting_loop(indexing_pipeline_handle, args.input_path_opt.is_none()) .await?; - merge_pipeline_handle.quit().await; + merge_pipeline_handle + .mailbox() + .ask(quickwit_indexing::FinishPendingMergesAndShutdownPipeline) + .await?; + merge_pipeline_handle.join().await; // Shutdown the indexing server. universe .send_exit_with_success(&indexing_server_mailbox) diff --git a/quickwit/quickwit-indexing/src/actors/mod.rs b/quickwit/quickwit-indexing/src/actors/mod.rs index ab70f06df35..31e6d32a032 100644 --- a/quickwit/quickwit-indexing/src/actors/mod.rs +++ b/quickwit/quickwit-indexing/src/actors/mod.rs @@ -41,7 +41,7 @@ pub use indexer::{Indexer, IndexerCounters}; pub use indexing_pipeline::{IndexingPipeline, IndexingPipelineParams}; pub use indexing_service::{IndexingService, IndexingServiceCounters, INDEXING_DIR_NAME}; pub use merge_executor::{combine_partition_ids, merge_split_attrs, MergeExecutor}; -pub use merge_pipeline::MergePipeline; +pub use merge_pipeline::{FinishPendingMergesAndShutdownPipeline, MergePipeline}; pub(crate) use merge_planner::{MergePlanner, RunFinalizeMergePolicyAndQuit}; pub use merge_scheduler_service::{schedule_merge, MergePermit, MergeSchedulerService}; pub use merge_split_downloader::MergeSplitDownloader; diff --git a/quickwit/quickwit-indexing/src/lib.rs b/quickwit/quickwit-indexing/src/lib.rs index 53a0b37b3a9..8d26d490110 100644 --- a/quickwit/quickwit-indexing/src/lib.rs +++ b/quickwit/quickwit-indexing/src/lib.rs @@ -31,8 +31,8 @@ use tracing::info; use crate::actors::MergeSchedulerService; pub use crate::actors::{ - IndexingError, IndexingPipeline, IndexingPipelineParams, IndexingService, PublisherType, - Sequencer, SplitsUpdateMailbox, + FinishPendingMergesAndShutdownPipeline, IndexingError, IndexingPipeline, + IndexingPipelineParams, IndexingService, PublisherType, Sequencer, SplitsUpdateMailbox, }; pub use crate::controlled_directory::ControlledDirectory; use crate::models::IndexingStatistics; From 6362633aab08b0477a659df01b4d679a80fea47b Mon Sep 17 00:00:00 2001 From: Adrien Guillo Date: Fri, 22 Nov 2024 15:59:44 -0500 Subject: [PATCH 06/27] Update Rust version --- quickwit/rust-toolchain.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/quickwit/rust-toolchain.toml b/quickwit/rust-toolchain.toml index 5c077352cff..b45c6886959 100644 --- a/quickwit/rust-toolchain.toml +++ b/quickwit/rust-toolchain.toml @@ -1,4 +1,4 @@ [toolchain] -channel = "1.81" +channel = "1.82" components = ["cargo", "clippy", "rustfmt", "rust-docs"] From 4b851079668b640cd20e80c7be9367677a099017 Mon Sep 17 00:00:00 2001 From: Adrien Guillo Date: Fri, 22 Nov 2024 16:04:17 -0500 Subject: [PATCH 07/27] Fix clippy warning --- quickwit/quickwit-search/src/collector.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/quickwit/quickwit-search/src/collector.rs b/quickwit/quickwit-search/src/collector.rs index 78b40be925e..4b69348ecde 100644 --- a/quickwit/quickwit-search/src/collector.rs +++ b/quickwit/quickwit-search/src/collector.rs @@ -471,6 +471,7 @@ fn get_score_extractor( }) } +#[allow(clippy::large_enum_variant)] enum AggregationSegmentCollectors { FindTraceIdsSegmentCollector(Box), TantivyAggregationSegmentCollector(AggregationSegmentCollector), From 0e907c27fd3b9c81a5df3ec4edd7b3b8630b8f7b Mon Sep 17 00:00:00 2001 From: Adrien Guillo Date: Fri, 22 Nov 2024 16:18:25 -0500 Subject: [PATCH 08/27] Upgrade VRL Removes the clippy warning triggered by the `anymap` beta dependency of VRL 0.8 --- quickwit/Cargo.toml | 2 +- quickwit/quickwit-indexing/src/actors/doc_processor.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/quickwit/Cargo.toml b/quickwit/Cargo.toml index 82419148e2f..c3e3051470c 100644 --- a/quickwit/Cargo.toml +++ b/quickwit/Cargo.toml @@ -266,7 +266,7 @@ ulid = "1.1" username = "0.2" utoipa = { version = "4.2", features = ["time", "ulid"] } uuid = { version = "1.10", features = ["v4", "serde"] } -vrl = { version = "0.8.1", default-features = false, features = [ +vrl = { version = "0.19", default-features = false, features = [ "compiler", "diagnostic", "stdlib", diff --git a/quickwit/quickwit-indexing/src/actors/doc_processor.rs b/quickwit/quickwit-indexing/src/actors/doc_processor.rs index 174f2565bf0..a746058e38f 100644 --- a/quickwit/quickwit-indexing/src/actors/doc_processor.rs +++ b/quickwit/quickwit-indexing/src/actors/doc_processor.rs @@ -139,7 +139,7 @@ fn try_into_vrl_doc( SourceInputFormat::Json => serde_json::from_slice::(&raw_doc)?, SourceInputFormat::PlainText => { let mut map = std::collections::BTreeMap::new(); - let key = PLAIN_TEXT.to_string(); + let key = vrl::value::KeyString::from(PLAIN_TEXT); let value = VrlValue::Bytes(raw_doc); map.insert(key, value); VrlValue::Object(map) From bc2c8a907f1da07cf9be2bd2c75ba007900c87dc Mon Sep 17 00:00:00 2001 From: Adrien Guillo Date: Fri, 22 Nov 2024 16:20:11 -0500 Subject: [PATCH 09/27] Upgrade dependencies --- quickwit/Cargo.lock | 1280 ++++++++++++++++++++++++++++++------------- 1 file changed, 911 insertions(+), 369 deletions(-) diff --git a/quickwit/Cargo.lock b/quickwit/Cargo.lock index d39e91442fd..1662803ed0b 100644 --- a/quickwit/Cargo.lock +++ b/quickwit/Cargo.lock @@ -104,9 +104,9 @@ dependencies = [ [[package]] name = "allocator-api2" -version = "0.2.18" +version = "0.2.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" +checksum = "45862d1c77f2228b9e10bc609d5bc203d86ebc9b87ad8d5d5167a6c9abf739d9" [[package]] name = "android-tzdata" @@ -150,9 +150,9 @@ dependencies = [ [[package]] name = "anstream" -version = "0.6.15" +version = "0.6.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64e15c1ab1f89faffbf04a634d5e1962e9074f2741eef6d97f3c4e322426d526" +checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b" dependencies = [ "anstyle", "anstyle-parse", @@ -165,49 +165,43 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.8" +version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bec1de6f59aedf83baf9ff929c98f2ad654b97c9510f4e70cf6f661d49fd5b1" +checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" [[package]] name = "anstyle-parse" -version = "0.2.5" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb47de1e80c2b463c735db5b217a0ddc39d612e7ac9e2e96a5aed1f57616c1cb" +checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9" dependencies = [ "utf8parse", ] [[package]] name = "anstyle-query" -version = "1.1.1" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d36fc52c7f6c869915e99412912f22093507da8d9e942ceaf66fe4b7c14422a" +checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c" dependencies = [ - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] name = "anstyle-wincon" -version = "3.0.4" +version = "3.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5bf74e1b6e971609db8ca7a9ce79fd5768ab6ae46441c572e46cf596f59e57f8" +checksum = "2109dbce0e72be3ec00bed26e6a7479ca384ad226efdd66db8fa2e3a38c83125" dependencies = [ "anstyle", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] name = "anyhow" -version = "1.0.89" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86fdf8605db99b54d3cd748a44c6d04df638eb5dafb219b135d0149bd0db01f6" - -[[package]] -name = "anymap" -version = "1.0.0-beta.2" +version = "1.0.93" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f1f8f5a6f3d50d89e3797d7593a50f96bb2aaa20ca0cc7be1fb673232c91d72" +checksum = "4c95c10ba0b00a02636238b814946408b1322d5ac4760326e6fb8ec956d85775" [[package]] name = "arc-swap" @@ -264,15 +258,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "81953c529336010edd6d8e358f886d9581267795c61b19475b71314bffa46d35" dependencies = [ "concurrent-queue", - "event-listener", + "event-listener 2.5.3", "futures-core", ] [[package]] name = "async-compression" -version = "0.4.13" +version = "0.4.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e614738943d3f68c628ae3dbce7c3daffb196665f82f8c8ea6b65de73c79429" +checksum = "0cb8f1d480b0ea3783ab015936d2a55c87e219676f0c0b7dec61494043f21857" dependencies = [ "flate2", "futures-core", @@ -283,6 +277,17 @@ dependencies = [ "zstd-safe 7.2.1", ] +[[package]] +name = "async-lock" +version = "3.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff6e472cdea888a4bd64f342f09b3f50e1886d32afe8df3d663c01140b811b18" +dependencies = [ + "event-listener 5.3.1", + "event-listener-strategy", + "pin-project-lite", +] + [[package]] name = "async-speed-limit" version = "0.4.2" @@ -314,7 +319,7 @@ checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.89", ] [[package]] @@ -325,7 +330,7 @@ checksum = "721cae7de5c34fbb2acd27e21e6d2cf7b886dce0c27388d46c4e6c47ea4318dd" dependencies = [ "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.89", ] [[package]] @@ -345,9 +350,9 @@ checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" [[package]] name = "aws-config" -version = "1.5.8" +version = "1.5.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7198e6f03240fdceba36656d8be440297b6b82270325908c7381f37d826a74f6" +checksum = "9b49afaa341e8dd8577e1a2200468f98956d6eda50bcf4a53246cc00174ba924" dependencies = [ "aws-credential-types", "aws-runtime", @@ -362,7 +367,7 @@ dependencies = [ "aws-smithy-types", "aws-types", "bytes", - "fastrand 2.1.1", + "fastrand 2.2.0", "hex", "http 0.2.12", "ring 0.17.8", @@ -401,7 +406,7 @@ dependencies = [ "aws-smithy-types", "aws-types", "bytes", - "fastrand 2.1.1", + "fastrand 2.2.0", "http 0.2.12", "http-body 0.4.6", "once_cell", @@ -413,9 +418,9 @@ dependencies = [ [[package]] name = "aws-sdk-kinesis" -version = "1.47.0" +version = "1.51.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba9516981ddb40ff46df4dbddf6ea3dcfe0a95a06b0bee88cda5a0d129ded773" +checksum = "ad48026d3d53881146469b36358d633f1b8c9ad6eb3033f348600f981f2f449b" dependencies = [ "aws-credential-types", "aws-runtime", @@ -435,11 +440,10 @@ dependencies = [ [[package]] name = "aws-sdk-s3" -version = "1.54.0" +version = "1.62.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2f2a62020f3e06f9b352b2a23547f6e1d110b6bf1e18a6b588ae36114eaf6e2" +checksum = "83d3a2854c7490b4c63d2b0e8c3976d628c80afa3045d078a715b2edb2ee4e0a" dependencies = [ - "ahash 0.8.11", "aws-credential-types", "aws-runtime", "aws-sigv4", @@ -454,7 +458,7 @@ dependencies = [ "aws-smithy-xml", "aws-types", "bytes", - "fastrand 2.1.1", + "fastrand 2.2.0", "hex", "hmac", "http 0.2.12", @@ -470,9 +474,9 @@ dependencies = [ [[package]] name = "aws-sdk-sqs" -version = "1.45.0" +version = "1.49.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a92759d1d77fbbb99c8077d4cc6ef4e69f559ce3ed9aa41445bff391aa4de4d" +checksum = "073df10a6d1dbbfdb06c5a6a6d1ebf5bf799afe64586e0688bae08a3b1be553f" dependencies = [ "aws-credential-types", "aws-runtime", @@ -492,9 +496,9 @@ dependencies = [ [[package]] name = "aws-sdk-sso" -version = "1.45.0" +version = "1.49.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e33ae899566f3d395cbf42858e433930682cc9c1889fa89318896082fef45efb" +checksum = "09677244a9da92172c8dc60109b4a9658597d4d298b188dd0018b6a66b410ca4" dependencies = [ "aws-credential-types", "aws-runtime", @@ -514,9 +518,9 @@ dependencies = [ [[package]] name = "aws-sdk-ssooidc" -version = "1.46.0" +version = "1.50.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f39c09e199ebd96b9f860b0fce4b6625f211e064ad7c8693b72ecf7ef03881e0" +checksum = "81fea2f3a8bb3bd10932ae7ad59cc59f65f270fc9183a7e91f501dc5efbef7ee" dependencies = [ "aws-credential-types", "aws-runtime", @@ -536,9 +540,9 @@ dependencies = [ [[package]] name = "aws-sdk-sts" -version = "1.45.0" +version = "1.50.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3d95f93a98130389eb6233b9d615249e543f6c24a68ca1f109af9ca5164a8765" +checksum = "6ada54e5f26ac246dc79727def52f7f8ed38915cb47781e2a72213957dc3a7d5" dependencies = [ "aws-credential-types", "aws-runtime", @@ -559,9 +563,9 @@ dependencies = [ [[package]] name = "aws-sigv4" -version = "1.2.4" +version = "1.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc8db6904450bafe7473c6ca9123f88cc11089e41a025408f992db4e22d3be68" +checksum = "5619742a0d8f253be760bfbb8e8e8368c69e3587e4637af5754e488a611499b1" dependencies = [ "aws-credential-types", "aws-smithy-eventstream", @@ -599,9 +603,9 @@ dependencies = [ [[package]] name = "aws-smithy-checksums" -version = "0.60.12" +version = "0.60.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "598b1689d001c4d4dc3cb386adb07d37786783aee3ac4b324bcadac116bf3d23" +checksum = "ba1a71073fca26775c8b5189175ea8863afb1c9ea2cceb02a5de5ad9dfbaa795" dependencies = [ "aws-smithy-http", "aws-smithy-types", @@ -690,9 +694,9 @@ dependencies = [ [[package]] name = "aws-smithy-runtime" -version = "1.7.2" +version = "1.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a065c0fe6fdbdf9f11817eb68582b2ab4aff9e9c39e986ae48f7ec576c6322db" +checksum = "be28bd063fa91fd871d131fc8b68d7cd4c5fa0869bea68daca50dcb1cbd76be2" dependencies = [ "aws-smithy-async", "aws-smithy-http", @@ -700,15 +704,15 @@ dependencies = [ "aws-smithy-runtime-api", "aws-smithy-types", "bytes", - "fastrand 2.1.1", + "fastrand 2.2.0", "h2", "http 0.2.12", "http-body 0.4.6", "http-body 1.0.1", "httparse", - "hyper 0.14.30", + "hyper 0.14.31", "hyper-rustls", - "indexmap 2.1.0", + "indexmap 2.6.0", "once_cell", "pin-project-lite", "pin-utils", @@ -722,9 +726,9 @@ dependencies = [ [[package]] name = "aws-smithy-runtime-api" -version = "1.7.2" +version = "1.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e086682a53d3aa241192aa110fa8dfce98f2f5ac2ead0de84d41582c7e8fdb96" +checksum = "92165296a47a812b267b4f41032ff8069ab7ff783696d217f0994a0d7ab585cd" dependencies = [ "aws-smithy-async", "aws-smithy-types", @@ -739,9 +743,9 @@ dependencies = [ [[package]] name = "aws-smithy-types" -version = "1.2.7" +version = "1.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "147100a7bea70fa20ef224a6bad700358305f5dc0f84649c53769761395b355b" +checksum = "4fbd94a32b3a7d55d3806fe27d98d3ad393050439dd05eb53ece36ec5e3d3510" dependencies = [ "base64-simd", "bytes", @@ -835,7 +839,7 @@ dependencies = [ "futures-util", "http 0.2.12", "http-body 0.4.6", - "hyper 0.14.30", + "hyper 0.14.31", "itoa", "matchit", "memchr", @@ -943,7 +947,7 @@ version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d67782c3f868daa71d3533538e98a8e13713231969def7536e8039606fc46bf0" dependencies = [ - "fastrand 2.1.1", + "fastrand 2.2.0", "futures-core", "pin-project", "tokio", @@ -976,6 +980,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "349a06037c7bf932dd7e7d1f653678b2038b9ad46a74102f1fc7bd7872678cce" +[[package]] +name = "base62" +version = "2.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48fa474cf7492f9a299ba6019fb99ec673e1739556d48e8a90eabaea282ef0e4" + [[package]] name = "base64" version = "0.13.1" @@ -1027,9 +1037,9 @@ dependencies = [ [[package]] name = "binggan" -version = "0.14.0" +version = "0.14.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b16cf7e26155ca336ba3a7220c817cdfe73a10f867fa352349fd425b43814bd9" +checksum = "da19f87f00cab3f37ed64c5bc7cdfe2bdbb9fef2b9406f764c7a0eed57918ebd" dependencies = [ "alloca", "bpu_trasher", @@ -1038,7 +1048,7 @@ dependencies = [ "perf-event", "rustc-hash 2.0.0", "rustop", - "unicode-width", + "unicode-width 0.1.14", "yansi", ] @@ -1113,9 +1123,9 @@ dependencies = [ [[package]] name = "borsh" -version = "1.5.1" +version = "1.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6362ed55def622cddc70a4746a68554d7b687713770de539e59a739b249f8ed" +checksum = "2506947f73ad44e344215ccd6403ac2ae18cd8e046e581a441bf8d199f257f03" dependencies = [ "borsh-derive", "cfg_aliases", @@ -1123,16 +1133,15 @@ dependencies = [ [[package]] name = "borsh-derive" -version = "1.5.1" +version = "1.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3ef8005764f53cd4dca619f5bf64cafd4664dada50ece25e4d81de54c80cc0b" +checksum = "c2593a3b8b938bd68373196c9832f516be11fa487ef4ae745eb282e6a56a7244" dependencies = [ "once_cell", "proc-macro-crate 3.2.0", "proc-macro2", "quote", - "syn 2.0.79", - "syn_derive", + "syn 2.0.89", ] [[package]] @@ -1189,9 +1198,9 @@ checksum = "5ce89b21cab1437276d2650d57e971f9d548a2d9037cc231abdc0562b97498ce" [[package]] name = "bytemuck" -version = "1.18.0" +version = "1.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94bbb0ad554ad961ddc5da507a12a29b14e4ae5bda06b19f575a3e6079d2e2ae" +checksum = "8b37c88a63ffd85d15b406896cc343916d7cf57838a847b3a6f2ca5d39a5695a" [[package]] name = "byteorder" @@ -1201,9 +1210,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.7.2" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "428d9aa8fbc0670b7b8d6030a7fadd0f86151cae55e4dbbece15f3780a3dfaf3" +checksum = "9ac0150caa2ae65ca5bd83f25c7de183dea78d4d366469f148435e2acfbad0da" dependencies = [ "serde", ] @@ -1293,9 +1302,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.1.28" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e80e3b6a3ab07840e1cae9b0666a63970dc28e8ed5ffbcdacbfc760c281bfc1" +checksum = "fd9de9f2205d5ef3fd67e685b0df337994ddd4495e2a28d185500d0e1edfea47" dependencies = [ "jobserver", "libc", @@ -1397,9 +1406,9 @@ dependencies = [ [[package]] name = "chrono-tz" -version = "0.8.6" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d59ae0466b83e838b81a54256c39d5d7c20b9d7daa10510a242d9b75abd5936e" +checksum = "cd6dd8046d00723a59a2f8c5f295c515b9bb9a331ee4f8f3d4dd49e428acd3b6" dependencies = [ "chrono", "chrono-tz-build", @@ -1408,12 +1417,11 @@ dependencies = [ [[package]] name = "chrono-tz-build" -version = "0.2.1" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "433e39f13c9a060046954e0592a8d0a4bcb1040125cbf91cb8ee58964cfb350f" +checksum = "e94fea34d77a245229e7746bd2beb786cd2a896f306ff491fb8cecb3074b10a7" dependencies = [ "parse-zoneinfo", - "phf", "phf_codegen", ] @@ -1444,17 +1452,21 @@ dependencies = [ "half", ] +[[package]] +name = "cidr" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bdf600c45bd958cf2945c445264471cca8b6c8e67bc87b71affd6d7e5682621" + [[package]] name = "cidr-utils" -version = "0.5.11" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2315f7119b7146d6a883de6acd63ddf96071b5f79d9d98d2adaa84d749f6abf1" +checksum = "25c0a9fb70c2c2cc2a520aa259b1d1345650046a07df1b6da1d3cefcd327f43e" dependencies = [ - "debug-helper", + "cidr", "num-bigint", "num-traits", - "once_cell", - "regex", ] [[package]] @@ -1470,18 +1482,18 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.20" +version = "4.5.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b97f376d85a664d5837dbae44bf546e6477a679ff6610010f17276f686d867e8" +checksum = "fb3b4b9e5a7c7514dfa52869339ee98b3156b0bfb4e8a77c4ff4babb64b1604f" dependencies = [ "clap_builder", ] [[package]] name = "clap_builder" -version = "4.5.20" +version = "4.5.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19bc80abd44e4bed93ca373a0704ccbd1b710dc5749406201bb018272808dc54" +checksum = "b17a95aa67cc7b5ebd32aa5370189aa0d79069ef1c64ce893bd30fb24bff20ec" dependencies = [ "anstream", "anstyle", @@ -1491,9 +1503,9 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.7.2" +version = "0.7.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" +checksum = "afb84c814227b90d6895e01398aee0d8033c00e7466aca416fb6a8e0eb19d8a7" [[package]] name = "cmake" @@ -1528,14 +1540,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3538270d33cc669650c4b093848450d380def10c331d38c768e34cac80576e6e" dependencies = [ "termcolor", - "unicode-width", + "unicode-width 0.1.14", ] [[package]] name = "colorchoice" -version = "1.0.2" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3fd119d74b830634cea2a0f58bbd0d54540518a14397557951e79340abc28c0" +checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" [[package]] name = "colored" @@ -1579,7 +1591,7 @@ dependencies = [ "encode_unicode", "lazy_static", "libc", - "unicode-width", + "unicode-width 0.1.14", "windows-sys 0.52.0", ] @@ -1589,8 +1601,8 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2895653b4d9f1538a83970077cb01dfc77a4810524e51a110944688e916b18e" dependencies = [ - "prost", - "prost-types", + "prost 0.11.9", + "prost-types 0.11.9", "tonic", "tracing-core", ] @@ -1607,7 +1619,7 @@ dependencies = [ "futures", "hdrhistogram", "humantime", - "prost-types", + "prost-types 0.11.9", "serde", "serde_json", "thread_local", @@ -1637,6 +1649,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e" +[[package]] +name = "convert_case" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec182b0ca2f35d8fc196cf3404988fd8b8c739a4d270ff118a398feb0cbec1ca" +dependencies = [ + "unicode-segmentation", +] + [[package]] name = "core-foundation" version = "0.9.4" @@ -1664,9 +1685,9 @@ dependencies = [ [[package]] name = "cpufeatures" -version = "0.2.14" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "608697df725056feaccfa42cffdaeeec3fccc4ffc38358ecd19b243e716a78e0" +checksum = "16b80225097f2e5ae4e7179dd2266824648f3e2f49d9134d584b76389d31c4c3" dependencies = [ "libc", ] @@ -1852,9 +1873,9 @@ dependencies = [ [[package]] name = "csv" -version = "1.3.0" +version = "1.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac574ff4d437a7b5ad237ef331c17ccca63c46479e5b5453eb8e10bb99a759fe" +checksum = "acdc4883a9c96732e4733212c01447ebd805833b7275a73ca3ee080fd77afdaf" dependencies = [ "csv-core", "itoa", @@ -1925,7 +1946,7 @@ dependencies = [ "proc-macro2", "quote", "strsim 0.11.1", - "syn 2.0.79", + "syn 2.0.89", ] [[package]] @@ -1947,7 +1968,7 @@ checksum = "d336a2a514f6ccccaa3e09b02d41d35330c07ddf03a62165fcec10bb561c7806" dependencies = [ "darling_core 0.20.10", "quote", - "syn 2.0.79", + "syn 2.0.89", ] [[package]] @@ -1981,12 +2002,6 @@ version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "092966b41edc516079bdf31ec78a2e0588d1d0c08f78b91d8307215928642b2b" -[[package]] -name = "debug-helper" -version = "0.3.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f578e8e2c440e7297e008bb5486a3a8a194775224bbc23729b0dbdfaeebf162e" - [[package]] name = "debugid" version = "0.8.0" @@ -2033,11 +2048,11 @@ version = "0.99.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5f33878137e4dafd7fa914ad4e259e18a4e8e532b9617a2d0150262bf53abfce" dependencies = [ - "convert_case", + "convert_case 0.4.0", "proc-macro2", "quote", "rustc_version", - "syn 2.0.79", + "syn 2.0.89", ] [[package]] @@ -2097,6 +2112,17 @@ dependencies = [ "winapi 0.3.9", ] +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.89", +] + [[package]] name = "dns-lookup" version = "2.0.4" @@ -2115,6 +2141,24 @@ version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" +[[package]] +name = "domain" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64008666d9f3b6a88a63cd28ad8f3a5a859b8037e11bfb680c1b24945ea1c28d" +dependencies = [ + "bytes", + "futures-util", + "moka", + "octseq", + "rand 0.8.5", + "serde", + "smallvec", + "time", + "tokio", + "tracing", +] + [[package]] name = "dotenvy" version = "0.15.7" @@ -2336,7 +2380,7 @@ checksum = "a1ab991c1362ac86c61ab6f556cff143daa22e5a15e4e189df818b2fd19fe65b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.89", ] [[package]] @@ -2385,6 +2429,27 @@ version = "2.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0206175f82b8d6bf6652ff7d71a1e27fd2e4efde587fd368662814d6ec1d9ce0" +[[package]] +name = "event-listener" +version = "5.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6032be9bd27023a771701cc49f9f053c751055f71efb2e0ae5c15809093675ba" +dependencies = [ + "concurrent-queue", + "parking", + "pin-project-lite", +] + +[[package]] +name = "event-listener-strategy" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f214dc438f977e6d4e3500aaa277f5ad94ca83fbbd9b1a15713ce2344ccc5a1" +dependencies = [ + "event-listener 5.3.1", + "pin-project-lite", +] + [[package]] name = "extend" version = "0.1.2" @@ -2408,11 +2473,22 @@ dependencies = [ "rand 0.8.5", ] +[[package]] +name = "fancy-regex" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "531e46835a22af56d1e3b66f04844bed63158bc094a628bec1d321d9b4c44bf2" +dependencies = [ + "bit-set", + "regex-automata 0.4.9", + "regex-syntax 0.8.5", +] + [[package]] name = "fastdivide" -version = "0.4.1" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "59668941c55e5c186b8b58c391629af56774ec768f73c08bbcd56f09348eb00b" +checksum = "9afc2bd4d5a73106dd53d10d73d3401c2f32730ba2c0b93ddb888a8983680471" [[package]] name = "fastrand" @@ -2425,9 +2501,9 @@ dependencies = [ [[package]] name = "fastrand" -version = "2.1.1" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8c02a5121d4ea3eb16a80748c74f5549a5665e4c21333c6098f283870fbdea6" +checksum = "486f806e73c5707928240ddc295403b1b93c96a02038563881c4a2fd84b81ac4" [[package]] name = "ff" @@ -2477,9 +2553,9 @@ checksum = "b3ea1ec5f8307826a5b71094dd91fc04d4ae75d5709b20ad351c7fb4815c86ec" [[package]] name = "flate2" -version = "1.0.34" +version = "1.0.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1b589b4dc103969ad3cf85c950899926ec64300a1a46d76c03a6072957036f0" +checksum = "c936bfdafb507ebbf50b8074c54fa31c5be9a1e7e5f467dd659697041407d07c" dependencies = [ "crc32fast", "miniz_oxide", @@ -2496,9 +2572,9 @@ dependencies = [ [[package]] name = "flume" -version = "0.11.0" +version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55ac459de2512911e4b674ce33cf20befaba382d05b62b008afc1c8b57cbf181" +checksum = "da0e4dd2a88388a1f4ccc7c9ce104604dab68d9f408dc34cd45823d5a9069095" dependencies = [ "futures-core", "futures-sink", @@ -2656,7 +2732,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.89", ] [[package]] @@ -2786,8 +2862,8 @@ version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2a3b24a3f57be08afc02344e693afb55e48172c9c2ab86ff3fdb8efff550e4b9" dependencies = [ - "prost", - "prost-types", + "prost 0.11.9", + "prost-types 0.11.9", "tonic", ] @@ -2814,7 +2890,7 @@ dependencies = [ "google-cloud-gax", "google-cloud-googleapis", "google-cloud-token", - "prost-types", + "prost-types 0.11.9", "thiserror", "tokio", "tokio-util", @@ -2863,7 +2939,7 @@ dependencies = [ "futures-sink", "futures-util", "http 0.2.12", - "indexmap 2.1.0", + "indexmap 2.6.0", "slab", "tokio", "tokio-util", @@ -2901,9 +2977,9 @@ dependencies = [ [[package]] name = "hashbrown" -version = "0.15.0" +version = "0.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e087f84d4f86bf4b218b927129862374b72199ae7d8657835f1e89000eea4fb" +checksum = "3a9bfc1af68b1726ea47d3d5109de126281def866b33970e10fbab11b5dafab3" dependencies = [ "allocator-api2", "equivalent", @@ -2965,6 +3041,12 @@ dependencies = [ "unicode-segmentation", ] +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + [[package]] name = "hermit-abi" version = "0.3.9" @@ -3021,6 +3103,17 @@ dependencies = [ "winapi 0.3.9", ] +[[package]] +name = "hostname" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9c7c7c8ac16c798734b8a24560c1362120597c40d5e1459f09498f8f6c8f2ba" +dependencies = [ + "cfg-if", + "libc", + "windows", +] + [[package]] name = "htmlescape" version = "0.3.1" @@ -3150,9 +3243,9 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" [[package]] name = "hyper" -version = "0.14.30" +version = "0.14.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a152ddd61dfaec7273fe8419ab357f33aee0d914c5f4efbf0d96fa749eea5ec9" +checksum = "8c08302e8fa335b151b788c775ff56e7a03ae64ff85c548ee820fecb70356e85" dependencies = [ "bytes", "futures-channel", @@ -3174,9 +3267,9 @@ dependencies = [ [[package]] name = "hyper" -version = "1.4.1" +version = "1.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50dfd22e0e76d0f662d429a5f80fcaf3855009297eab6a0a9f8543834744ba05" +checksum = "97818827ef4f364230e16705d4706e2897df2bb60617d6ca15d598025a3c481f" dependencies = [ "bytes", "futures-channel", @@ -3199,7 +3292,7 @@ checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" dependencies = [ "futures-util", "http 0.2.12", - "hyper 0.14.30", + "hyper 0.14.31", "log", "rustls", "rustls-native-certs", @@ -3213,7 +3306,7 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbb958482e8c7be4bc3cf272a766a2b0bf1a6755e7a6ae777f017a31d11b13b1" dependencies = [ - "hyper 0.14.30", + "hyper 0.14.31", "pin-project-lite", "tokio", "tokio-io-timeout", @@ -3226,7 +3319,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905" dependencies = [ "bytes", - "hyper 0.14.30", + "hyper 0.14.31", "native-tls", "tokio", "tokio-native-tls", @@ -3234,16 +3327,16 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.9" +version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "41296eb09f183ac68eec06e03cdbea2e759633d4067b2f6552fc2e009bcad08b" +checksum = "df2dcfbe0677734ab2f3ffa7fa7bfd4706bfdc1ef393f2ee30184aed67e631b4" dependencies = [ "bytes", "futures-channel", "futures-util", "http 1.1.0", "http-body 1.0.1", - "hyper 1.4.1", + "hyper 1.5.1", "pin-project-lite", "socket2", "tokio", @@ -3283,6 +3376,124 @@ dependencies = [ "cc", ] +[[package]] +name = "icu_collections" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locid" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_locid_transform" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_locid_transform_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_locid_transform_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e" + +[[package]] +name = "icu_normalizer" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "utf16_iter", + "utf8_iter", + "write16", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516" + +[[package]] +name = "icu_properties" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_locid_transform", + "icu_properties_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569" + +[[package]] +name = "icu_provider" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_provider_macros", + "stable_deref_trait", + "tinystr", + "writeable", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_provider_macros" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.89", +] + [[package]] name = "ident_case" version = "1.0.1" @@ -3299,6 +3510,27 @@ dependencies = [ "unicode-normalization", ] +[[package]] +name = "idna" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e" +dependencies = [ + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71" +dependencies = [ + "icu_normalizer", + "icu_properties", +] + [[package]] name = "indexmap" version = "1.9.3" @@ -3312,26 +3544,26 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.1.0" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d530e1a18b1cb4c484e6e34556a0d948706958449fca0cab753d649f2bce3d1f" +checksum = "707907fe3c25f5424cce2cb7e1cbcafee6bdbe735ca90ef77c29e84591e5b9da" dependencies = [ "equivalent", - "hashbrown 0.14.5", + "hashbrown 0.15.1", "serde", ] [[package]] name = "indicatif" -version = "0.17.8" +version = "0.17.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "763a5a8f45087d6bcea4222e7b72c291a054edf80e4ef6efd2a4979878c7bea3" +checksum = "cbf675b85ed934d3c67b5c5469701eec7db22689d0a2139d856e0925fa28b281" dependencies = [ "console", - "instant", "number_prefix", "portable-atomic", - "unicode-width", + "unicode-width 0.2.0", + "web-time", ] [[package]] @@ -3353,7 +3585,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "232929e1d75fe899576a3d5c7416ad0d88dbfbb3c3d6aa00873a7408a50ddb88" dependencies = [ "ahash 0.8.11", - "indexmap 2.1.0", + "indexmap 2.6.0", "is-terminal", "itoa", "log", @@ -3364,6 +3596,19 @@ dependencies = [ "str_stack", ] +[[package]] +name = "influxdb-line-protocol" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22fa7ee6be451ea0b1912b962c91c8380835e97cf1584a77e18264e908448dcb" +dependencies = [ + "bytes", + "log", + "nom", + "smallvec", + "snafu 0.7.5", +] + [[package]] name = "inherent" version = "1.0.11" @@ -3372,7 +3617,7 @@ checksum = "0122b7114117e64a63ac49f752a5ca4624d534c7b1c7de796ac196381cd2d947" dependencies = [ "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.89", ] [[package]] @@ -3467,9 +3712,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.11" +version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" +checksum = "540654e97a3f4470a492cd30ff187bc95d89557a903a2bbf112e2fae98104ef2" [[package]] name = "jobserver" @@ -3482,9 +3727,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.71" +version = "0.3.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0cb94a0ffd3f3ee755c20f7d8752f45cac88605a4dcf808abcff72873296ec7b" +checksum = "6a88f1bda2bd75b0452a14784937d796722fdebfe50df998aeb3f0b7603019a9" dependencies = [ "wasm-bindgen", ] @@ -3552,7 +3797,7 @@ dependencies = [ "bit-set", "ena", "itertools 0.11.0", - "lalrpop-util", + "lalrpop-util 0.20.2", "petgraph", "regex", "regex-syntax 0.8.5", @@ -3568,8 +3813,14 @@ name = "lalrpop-util" version = "0.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "507460a910eb7b32ee961886ff48539633b788a36b65692b95f225b844c82553" + +[[package]] +name = "lalrpop-util" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "108dc8f5dabad92c65a03523055577d847f5dcc00f3e7d3a68bc4d48e01d8fe1" dependencies = [ - "regex-automata 0.4.8", + "regex-automata 0.4.9", ] [[package]] @@ -3585,7 +3836,7 @@ dependencies = [ "futures", "http 0.2.12", "http-body 0.4.6", - "hyper 0.14.30", + "hyper 0.14.31", "lambda_runtime 0.8.3", "mime", "percent-encoding", @@ -3609,7 +3860,7 @@ dependencies = [ "http 0.2.12", "http-body 0.4.6", "http-serde 1.1.3", - "hyper 0.14.30", + "hyper 0.14.31", "lambda_runtime_api_client 0.8.0", "serde", "serde_json", @@ -3634,7 +3885,7 @@ dependencies = [ "http-body 1.0.1", "http-body-util", "http-serde 2.1.1", - "hyper 1.4.1", + "hyper 1.5.1", "hyper-util", "lambda_runtime_api_client 0.11.1", "pin-project", @@ -3655,7 +3906,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "690c5ae01f3acac8c9c3348b556fc443054e9b7f1deaf53e9ebab716282bf0ed" dependencies = [ "http 0.2.12", - "hyper 0.14.30", + "hyper 0.14.31", "tokio", "tower-service", ] @@ -3672,7 +3923,7 @@ dependencies = [ "http 1.1.0", "http-body 1.0.1", "http-body-util", - "hyper 1.4.1", + "hyper 1.5.1", "hyper-util", "tokio", "tower", @@ -3698,15 +3949,15 @@ checksum = "0c2cdeb66e45e9f36bfad5bbdb4d2384e70936afbee843c6f6543f0c551ebb25" [[package]] name = "libc" -version = "0.2.159" +version = "0.2.164" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "561d97a539a36e26a9a5fad1ea11a3039a67714694aaa379433e580854bc3dc5" +checksum = "433bfe06b8c75da9b2e3fbea6e5329ff87748f0b144ef75306e674c3f6f7c13f" [[package]] name = "libm" -version = "0.2.8" +version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" +checksum = "8355be11b20d696c8f18f6cc018c4e372165b1fa8126cef092399c9951984ffa" [[package]] name = "libredox" @@ -3981,6 +4232,12 @@ version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" +[[package]] +name = "litemap" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "643cb0b8d4fcc284004d5fd0d67ccf61dfffadb7f75e1e71bc420f4688a3a704" + [[package]] name = "lock_api" version = "0.4.12" @@ -4003,7 +4260,7 @@ version = "0.12.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "234cf4f4a04dc1f57e24b96cc0cd600cf2af460d4161ac5ecdd0af8e1f3b2a38" dependencies = [ - "hashbrown 0.15.0", + "hashbrown 0.15.1", ] [[package]] @@ -4123,7 +4380,7 @@ checksum = "f3cd9f9bbedc1b92683a9847b8db12f3203cf32af6a11db085fa007708dc9555" dependencies = [ "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.89", ] [[package]] @@ -4191,6 +4448,30 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "moka" +version = "0.12.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32cf62eb4dd975d2dde76432fb1075c49e3ee2331cf36f1f8fd4b66550d32b6f" +dependencies = [ + "async-lock", + "async-trait", + "crossbeam-channel", + "crossbeam-epoch", + "crossbeam-utils", + "event-listener 5.3.1", + "futures-util", + "once_cell", + "parking_lot", + "quanta", + "rustc_version", + "smallvec", + "tagptr", + "thiserror", + "triomphe", + "uuid", +] + [[package]] name = "mrecordlog" version = "0.4.0" @@ -4452,7 +4733,7 @@ dependencies = [ "proc-macro-crate 1.3.1", "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.89", ] [[package]] @@ -4509,6 +4790,17 @@ dependencies = [ "memchr", ] +[[package]] +name = "octseq" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "126c3ca37c9c44cec575247f43a3e4374d8927684f129d2beeb0d2cef262fe12" +dependencies = [ + "bytes", + "serde", + "smallvec", +] + [[package]] name = "ofb" version = "0.6.1" @@ -4622,9 +4914,9 @@ dependencies = [ [[package]] name = "openssl" -version = "0.10.66" +version = "0.10.68" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9529f4786b70a3e8c61e11179af17ab6188ad8d0ded78c5529441ed39d4bd9c1" +checksum = "6174bc48f102d208783c2c84bf931bb75927a617866870de8a4ea85597f871f5" dependencies = [ "bitflags 2.6.0", "cfg-if", @@ -4643,7 +4935,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.89", ] [[package]] @@ -4654,18 +4946,18 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" [[package]] name = "openssl-src" -version = "300.3.2+3.3.2" +version = "300.4.1+3.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a211a18d945ef7e648cc6e0058f4c548ee46aab922ea203e0d30e966ea23647b" +checksum = "faa4eac4138c62414b5622d1b31c5c304f34b406b013c079c2bbc652fdd6678c" dependencies = [ "cc", ] [[package]] name = "openssl-sys" -version = "0.9.103" +version = "0.9.104" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f9e8deee91df40a943c71b917e5874b951d32a802526c85721ce3b776c929d6" +checksum = "45abf306cbf99debc8195b66b7346498d7b10c210de50418b5ccd7ceba08c741" dependencies = [ "cc", "libc", @@ -4711,7 +5003,7 @@ dependencies = [ "opentelemetry-semantic-conventions", "opentelemetry_api", "opentelemetry_sdk", - "prost", + "prost 0.11.9", "reqwest", "thiserror", "tokio", @@ -4726,7 +5018,7 @@ checksum = "b1e3f814aa9f8c905d0ee4bde026afd3b2577a97c10e1699912e3e44f0c4cbeb" dependencies = [ "opentelemetry_api", "opentelemetry_sdk", - "prost", + "prost 0.11.9", "tonic", ] @@ -4798,9 +5090,9 @@ dependencies = [ [[package]] name = "ordered-float" -version = "4.3.0" +version = "4.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44d501f1a72f71d3c063a6bbc8f7271fa73aa09fe5d6283b6571e2ed176a2537" +checksum = "c65ee1f9701bf938026630b455d5315f490640234259037edb259798b3bcf85e" dependencies = [ "num-traits", ] @@ -4832,12 +5124,12 @@ version = "0.18.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39b0deead1528fd0e5947a8546a9642a9777c25f6e1e26f34c97b204bbb465bd" dependencies = [ - "heck", + "heck 0.4.1", "itertools 0.12.1", "proc-macro2", "proc-macro2-diagnostics", "quote", - "syn 2.0.79", + "syn 2.0.89", ] [[package]] @@ -4881,7 +5173,7 @@ dependencies = [ "ansitok", "bytecount", "fnv", - "unicode-width", + "unicode-width 0.1.14", ] [[package]] @@ -5028,9 +5320,9 @@ dependencies = [ [[package]] name = "pest" -version = "2.7.13" +version = "2.7.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdbef9d1d47087a895abd220ed25eb4ad973a5e26f6a4367b038c25e28dfc2d9" +checksum = "879952a81a83930934cbf1786752d6dedc3b1f29e8f8fb2ad1d0a36f377cf442" dependencies = [ "memchr", "thiserror", @@ -5039,9 +5331,9 @@ dependencies = [ [[package]] name = "pest_derive" -version = "2.7.13" +version = "2.7.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d3a6e3394ec80feb3b6393c725571754c6188490265c61aaf260810d6b95aa0" +checksum = "d214365f632b123a47fd913301e14c946c61d1c183ee245fa76eb752e59a02dd" dependencies = [ "pest", "pest_generator", @@ -5049,22 +5341,22 @@ dependencies = [ [[package]] name = "pest_generator" -version = "2.7.13" +version = "2.7.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94429506bde1ca69d1b5601962c73f4172ab4726571a59ea95931218cb0e930e" +checksum = "eb55586734301717aea2ac313f50b2eb8f60d2fc3dc01d190eefa2e625f60c4e" dependencies = [ "pest", "pest_meta", "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.89", ] [[package]] name = "pest_meta" -version = "2.7.13" +version = "2.7.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac8a071862e93690b6e34e9a5fb8e33ff3734473ac0245b27232222c4906a33f" +checksum = "b75da2a70cf4d9cb76833c990ac9cd3923c9a8905a8929789ce347c84564d03d" dependencies = [ "once_cell", "pest", @@ -5078,7 +5370,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" dependencies = [ "fixedbitset", - "indexmap 2.1.0", + "indexmap 2.6.0", ] [[package]] @@ -5130,29 +5422,29 @@ dependencies = [ [[package]] name = "pin-project" -version = "1.1.6" +version = "1.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf123a161dde1e524adf36f90bc5d8d3462824a9c43553ad07a8183161189ec" +checksum = "be57f64e946e500c8ee36ef6331845d40a93055567ec57e8fae13efd33759b95" dependencies = [ "pin-project-internal", ] [[package]] name = "pin-project-internal" -version = "1.1.6" +version = "1.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4502d8515ca9f32f1fb543d987f63d95a14934883db45bdb48060b6b69257f8" +checksum = "3c0f5fad0874fc7abcd4d750e76917eaebbecaa2c20bde22e1dbeeba8beb758c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.89", ] [[package]] name = "pin-project-lite" -version = "0.2.14" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bda66fc9667c18cb2758a2ac84d1167245054bcf85d5d1aaa6923f45801bdd02" +checksum = "915a1e146535de9163f3987b8944ed8cf49a18bb0056bcebcdcece385cece4ff" [[package]] name = "pin-utils" @@ -5470,12 +5762,12 @@ dependencies = [ [[package]] name = "prettyplease" -version = "0.2.22" +version = "0.2.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "479cf940fbbb3426c32c5d5176f62ad57549a0bb84773423ba8be9d089f5faba" +checksum = "64d1ec885c64d0457d564db4ec299b2dae3f9c02808b8ad9c3a089c591b18033" dependencies = [ "proc-macro2", - "syn 2.0.79", + "syn 2.0.89", ] [[package]] @@ -5494,7 +5786,7 @@ version = "3.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ecf48c7ca261d60b74ab1a7b20da18bede46776b2e55535cb958eb595c5fa7b" dependencies = [ - "toml_edit 0.22.20", + "toml_edit 0.22.22", ] [[package]] @@ -5523,9 +5815,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.87" +version = "1.0.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3e4daa0dcf6feba26f985457cdf104d4b4256fc5a09547140f3631bb076b19a" +checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0" dependencies = [ "unicode-ident", ] @@ -5538,7 +5830,7 @@ checksum = "af066a9c399a26e020ada66a034357a868728e72cd426f3adcd35f80d88d88c8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.89", "version_check", "yansi", ] @@ -5610,7 +5902,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b82eaa1d779e9a4bc1c3217db8ffbeabaae1dca241bf70183242128d48681cd" dependencies = [ "bytes", - "prost-derive", + "prost-derive 0.11.9", +] + +[[package]] +name = "prost" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b0487d90e047de87f984913713b85c601c05609aad5b0df4b4573fbf69aa13f" +dependencies = [ + "bytes", + "prost-derive 0.13.3", ] [[package]] @@ -5620,15 +5922,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "119533552c9a7ffacc21e099c24a0ac8bb19c2a2a3f363de84cd9b844feab270" dependencies = [ "bytes", - "heck", + "heck 0.4.1", "itertools 0.10.5", "lazy_static", "log", "multimap", "petgraph", "prettyplease 0.1.25", - "prost", - "prost-types", + "prost 0.11.9", + "prost-types 0.11.9", "regex", "syn 1.0.109", "tempfile", @@ -5648,13 +5950,46 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "prost-derive" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e9552f850d5f0964a4e4d0bf306459ac29323ddfbae05e35a7c0d35cb0803cc5" +dependencies = [ + "anyhow", + "itertools 0.13.0", + "proc-macro2", + "quote", + "syn 2.0.89", +] + +[[package]] +name = "prost-reflect" +version = "0.14.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b7535b02f0e5efe3e1dbfcb428be152226ed0c66cad9541f2274c8ba8d4cd40" +dependencies = [ + "once_cell", + "prost 0.13.3", + "prost-types 0.13.3", +] + [[package]] name = "prost-types" version = "0.11.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "213622a1460818959ac1181aaeb2dc9c7f63df720db7d788b3e24eacd1983e13" dependencies = [ - "prost", + "prost 0.11.9", +] + +[[package]] +name = "prost-types" +version = "0.13.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4759aa0d3a6232fb8dbdb97b61de2c20047c68aca932c7ed76da9d788508d670" +dependencies = [ + "prost 0.13.3", ] [[package]] @@ -5663,6 +5998,21 @@ version = "2.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "106dd99e98437432fed6519dedecfade6a06a73bb7b2a1e019fdd2bee5778d94" +[[package]] +name = "psl" +version = "2.1.59" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9b9bc1071e59efa5b4972b584daa069e80d5262ce1309718a2ab20dae5f84ca" +dependencies = [ + "psl-types", +] + +[[package]] +name = "psl-types" +version = "2.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33cb294fe86a74cbcf50d4445b37da762029549ebeea341421c7c70370f86cac" + [[package]] name = "ptr_meta" version = "0.1.4" @@ -5683,6 +6033,16 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "publicsuffix" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f42ea446cab60335f76979ec15e12619a2165b5ae2c12166bef27d283a9fadf" +dependencies = [ + "idna 1.0.3", + "psl-types", +] + [[package]] name = "pulsar" version = "5.1.1" @@ -5706,9 +6066,9 @@ dependencies = [ "oauth2", "openidconnect", "pem 1.1.1", - "prost", + "prost 0.11.9", "prost-build", - "prost-derive", + "prost-derive 0.11.9", "rand 0.8.5", "regex", "serde", @@ -5722,6 +6082,21 @@ dependencies = [ "zstd 0.11.2+zstd.1.5.2", ] +[[package]] +name = "quanta" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e5167a477619228a0b284fac2674e3c388cba90631d7b7de620e6f1fcd08da5" +dependencies = [ + "crossbeam-utils", + "libc", + "once_cell", + "raw-cpuid", + "wasi 0.11.0+wasi-snapshot-preview1", + "web-sys", + "winapi 0.3.9", +] + [[package]] name = "query_map" version = "0.7.0" @@ -5801,7 +6176,7 @@ dependencies = [ "aws-smithy-runtime", "aws-types", "futures", - "hyper 0.14.30", + "hyper 0.14.31", "hyper-rustls", "quickwit-common", "tokio", @@ -5891,13 +6266,13 @@ version = "0.8.0" dependencies = [ "anyhow", "futures", - "heck", - "prettyplease 0.2.22", + "heck 0.4.1", + "prettyplease 0.2.25", "proc-macro2", "prost-build", "quote", "serde", - "syn 2.0.79", + "syn 2.0.89", "tonic-build", ] @@ -5911,7 +6286,7 @@ dependencies = [ "futures", "http 0.2.12", "mockall", - "prost", + "prost 0.11.9", "quickwit-actors", "quickwit-codegen", "quickwit-common", @@ -5939,9 +6314,9 @@ dependencies = [ "fnv", "futures", "home", - "hostname", + "hostname 0.3.1", "http 0.2.12", - "hyper 0.14.30", + "hyper 0.14.31", "itertools 0.13.0", "once_cell", "pin-project", @@ -6069,7 +6444,7 @@ dependencies = [ "binggan", "fnv", "hex", - "indexmap 2.1.0", + "indexmap 2.6.0", "itertools 0.13.0", "matches", "nom", @@ -6140,7 +6515,7 @@ dependencies = [ "oneshot", "openssl", "proptest", - "prost", + "prost 0.11.9", "pulsar", "quickwit-actors", "quickwit-aws", @@ -6188,7 +6563,7 @@ dependencies = [ "mockall", "mrecordlog", "once_cell", - "prost", + "prost 0.11.9", "quickwit-actors", "quickwit-cluster", "quickwit-codegen", @@ -6218,7 +6593,7 @@ dependencies = [ "anyhow", "aws-sdk-sqs", "futures-util", - "hyper 0.14.30", + "hyper 0.14.31", "itertools 0.13.0", "quickwit-actors", "quickwit-cli", @@ -6247,8 +6622,8 @@ dependencies = [ "async-trait", "itertools 0.13.0", "once_cell", - "prost", - "prost-types", + "prost 0.11.9", + "prost-types 0.11.9", "quickwit-actors", "quickwit-cluster", "quickwit-common", @@ -6355,7 +6730,7 @@ version = "0.8.0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.89", ] [[package]] @@ -6410,7 +6785,7 @@ dependencies = [ "async-trait", "hex", "once_cell", - "prost", + "prost 0.11.9", "quickwit-common", "quickwit-config", "quickwit-ingest", @@ -6439,9 +6814,9 @@ dependencies = [ "http 0.2.12", "mockall", "opentelemetry", - "prost", + "prost 0.11.9", "prost-build", - "prost-types", + "prost-types 0.11.9", "quickwit-actors", "quickwit-codegen", "quickwit-common", @@ -6526,7 +6901,7 @@ dependencies = [ "once_cell", "postcard", "proptest", - "prost", + "prost 0.11.9", "quickwit-common", "quickwit-config", "quickwit-directories", @@ -6570,7 +6945,7 @@ dependencies = [ "hex", "http-serde 1.1.3", "humantime", - "hyper 0.14.30", + "hyper 0.14.31", "itertools 0.13.0", "mime_guess", "mockall", @@ -6578,8 +6953,8 @@ dependencies = [ "opentelemetry", "percent-encoding", "pprof", - "prost", - "prost-types", + "prost 0.11.9", + "prost-types 0.11.9", "quickwit-actors", "quickwit-cluster", "quickwit-common", @@ -6637,7 +7012,7 @@ dependencies = [ "bytesize", "fnv", "futures", - "hyper 0.14.30", + "hyper 0.14.31", "lru", "md5", "mockall", @@ -6672,7 +7047,7 @@ version = "0.8.0" dependencies = [ "async-trait", "encoding_rs", - "hostname", + "hostname 0.3.1", "md5", "once_cell", "quickwit-common", @@ -6796,6 +7171,15 @@ dependencies = [ "rand_core 0.6.4", ] +[[package]] +name = "raw-cpuid" +version = "11.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ab240315c661615f2ee9f0f2cd32d5a7343a84d5ebcccb99d46e6637565e7b0" +dependencies = [ + "bitflags 2.6.0", +] + [[package]] name = "rayon" version = "1.10.0" @@ -6872,13 +7256,13 @@ dependencies = [ [[package]] name = "regex" -version = "1.11.0" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38200e5ee88914975b69f657f0801b6f6dccafd44fd9326302a4aaeecfacb1d8" +checksum = "b544ef1b4eac5dc2db33ea63606ae9ffcfac26c1416a2806ae0bf5f56b201191" dependencies = [ "aho-corasick", "memchr", - "regex-automata 0.4.8", + "regex-automata 0.4.9", "regex-syntax 0.8.5", ] @@ -6893,9 +7277,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.8" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "368758f23274712b504848e9d5a6f010445cc8b87a7cdb4d7cbee666c1288da3" +checksum = "809e8dc61f6de73b46c85f4c96486310fe304c434cfa43669d7b40f711150908" dependencies = [ "aho-corasick", "memchr", @@ -6971,7 +7355,7 @@ dependencies = [ "h2", "http 0.2.12", "http-body 0.4.6", - "hyper 0.14.30", + "hyper 0.14.31", "hyper-rustls", "hyper-tls", "ipnet", @@ -7100,12 +7484,9 @@ dependencies = [ [[package]] name = "roxmltree" -version = "0.18.1" +version = "0.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "862340e351ce1b271a378ec53f304a5558f7db87f3769dc655a8f6ecbb68b302" -dependencies = [ - "xmlparser", -] +checksum = "6c20b6793b5c2fa6553b250154b78d6d0db37e72700ae35fad9387a46f487c97" [[package]] name = "rsa" @@ -7148,7 +7529,7 @@ dependencies = [ "proc-macro2", "quote", "rust-embed-utils", - "syn 2.0.79", + "syn 2.0.89", "walkdir", ] @@ -7217,9 +7598,9 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.37" +version = "0.38.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8acb788b847c24f28525660c4d7758620a7210875711f79e7f663cc152726811" +checksum = "d7f649912bc1495e167a6edee79151c84b1bad49748cb4f1f1167f459f6224f6" dependencies = [ "bitflags 2.6.0", "errno", @@ -7289,9 +7670,9 @@ checksum = "0b5a6a926633a8ce739286680df905e1d1d01db609fc0e09d28e9b901ac7b22f" [[package]] name = "rustversion" -version = "1.0.17" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "955d28af4278de8121b7ebeb796b6a45735dc01436d898801014aced2773a3d6" +checksum = "0e819f2bc632f285be6d7cd36e25940d45b2391dd6d9b939e79de557f7014248" [[package]] name = "rusty-fork" @@ -7343,18 +7724,18 @@ dependencies = [ [[package]] name = "scc" -version = "2.2.0" +version = "2.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "836f1e0f4963ef5288b539b643b35e043e76a32d0f4e47e67febf69576527f50" +checksum = "66b202022bb57c049555430e11fc22fea12909276a80a4c3d368da36ac1d88ed" dependencies = [ "sdd", ] [[package]] name = "schannel" -version = "0.1.26" +version = "0.1.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01227be5826fa0690321a2ba6c5cd57a19cf3f6a09e76973b58e61de6ab9d1c1" +checksum = "1f29ebaa345f945cec9fbbc532eb307f0fdad8161f281b6369539c8d84876b3d" dependencies = [ "windows-sys 0.59.0", ] @@ -7394,9 +7775,9 @@ dependencies = [ [[package]] name = "sdd" -version = "3.0.3" +version = "3.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60a7b59a5d9b0099720b417b6325d91a52cbf5b3dcb5041d864be53eefa58abc" +checksum = "49c1eeaf4b6a87c7479688c6d52b9f1153cedd3c489300564f932b065c6eab95" [[package]] name = "sea-query" @@ -7425,10 +7806,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9834af2c4bd8c5162f00c89f1701fb6886119a88062cf76fe842ea9e232b9839" dependencies = [ "darling 0.20.10", - "heck", + "heck 0.4.1", "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.89", "thiserror", ] @@ -7467,9 +7848,9 @@ dependencies = [ [[package]] name = "security-framework-sys" -version = "2.12.0" +version = "2.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea4a292869320c0272d7bc55a5a6aafaff59b4f63404a003887b679a2e05b4b6" +checksum = "fa39c7303dc58b5543c94d22c1766b0d31f2ee58306363ea622b10bbc075eaa2" dependencies = [ "core-foundation-sys", "libc", @@ -7489,9 +7870,9 @@ checksum = "f97841a747eef040fcd2e7b3b9a220a7205926e60488e673d9e4926d27772ce5" [[package]] name = "serde" -version = "1.0.210" +version = "1.0.215" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8e3592472072e6e22e0a54d5904d9febf8508f65fb8552499a1abc7d1078c3a" +checksum = "6513c1ad0b11a9376da888e3e0baa0077f1aed55c17f50e7b2397136129fb88f" dependencies = [ "serde_derive", ] @@ -7508,13 +7889,13 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.210" +version = "1.0.215" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f" +checksum = "ad1e866f866923f252f05c889987993144fb74e722403468a4ebd70c3cd756c0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.89", ] [[package]] @@ -7529,12 +7910,13 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.112" +version = "1.0.133" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d1bd37ce2324cf3bf85e5a25f96eb4baf0d5aa6eba43e7ae8958870c4ec48ed" +checksum = "c7fceb2473b9166b2294ef05efcb65a3db80803f0b03ef86a5fc88a2b85ee377" dependencies = [ - "indexmap 2.1.0", + "indexmap 2.6.0", "itoa", + "memchr", "ryu", "serde", ] @@ -7634,7 +8016,7 @@ dependencies = [ "chrono", "hex", "indexmap 1.9.3", - "indexmap 2.1.0", + "indexmap 2.6.0", "serde", "serde_derive", "serde_json", @@ -7663,16 +8045,16 @@ dependencies = [ "darling 0.20.10", "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.89", ] [[package]] name = "serde_yaml" -version = "0.9.30" +version = "0.9.34+deprecated" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1bf28c79a99f70ee1f1d83d10c875d2e70618417fda01ad1785e027579d9d38" +checksum = "6a8b1a1a2ebf674015cc02edccce75287f1a0130d394307b36743c2f5d504b47" dependencies = [ - "indexmap 2.1.0", + "indexmap 2.6.0", "itoa", "ryu", "serde", @@ -7681,9 +8063,9 @@ dependencies = [ [[package]] name = "serial_test" -version = "3.1.1" +version = "3.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4b4b487fe2acf240a021cf57c6b2b4903b1e78ca0ecd862a71b71d2a51fed77d" +checksum = "1b258109f244e1d6891bf1053a55d63a5cd4f8f4c30cf9a1280989f80e7a1fa9" dependencies = [ "fslock", "futures", @@ -7696,13 +8078,13 @@ dependencies = [ [[package]] name = "serial_test_derive" -version = "3.1.1" +version = "3.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82fe9db325bcef1fbcde82e078a5cc4efdf787e96b3b9cf45b50b529f2083d67" +checksum = "5d69265a08751de7844521fd15003ae0a888e035773ba05695c5c759a6f89eef" dependencies = [ "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.89", ] [[package]] @@ -7863,7 +8245,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e4de37ad025c587a29e8f3f5605c00f70b98715ef90b9061a815b9e59e9042d6" dependencies = [ "doc-comment", - "snafu-derive", + "snafu-derive 0.7.5", +] + +[[package]] +name = "snafu" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "223891c85e2a29c3fe8fb900c1fae5e69c2e42415e3177752e8718475efa5019" +dependencies = [ + "snafu-derive 0.8.5", ] [[package]] @@ -7872,12 +8263,24 @@ version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "990079665f075b699031e9c08fd3ab99be5029b96f3b78dc0709e8f77e4efebf" dependencies = [ - "heck", + "heck 0.4.1", "proc-macro2", "quote", "syn 1.0.109", ] +[[package]] +name = "snafu-derive" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03c3c6b7927ffe7ecaa769ee0e3994da3b8cafc8f444578982c83ecb161af917" +dependencies = [ + "heck 0.5.0", + "proc-macro2", + "quote", + "syn 2.0.89", +] + [[package]] name = "snap" version = "1.1.1" @@ -7965,7 +8368,7 @@ dependencies = [ "crc", "crossbeam-queue", "either", - "event-listener", + "event-listener 2.5.3", "futures-channel", "futures-core", "futures-intrusive", @@ -7973,7 +8376,7 @@ dependencies = [ "futures-util", "hashlink", "hex", - "indexmap 2.1.0", + "indexmap 2.6.0", "log", "memchr", "once_cell", @@ -8016,7 +8419,7 @@ checksum = "5833ef53aaa16d860e92123292f1f6a3d53c34ba8b1969f152ef1a7bb803f3c8" dependencies = [ "dotenvy", "either", - "heck", + "heck 0.4.1", "hex", "once_cell", "proc-macro2", @@ -8211,9 +8614,9 @@ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "symbolic-common" -version = "12.12.0" +version = "12.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "366f1b4c6baf6cfefc234bbd4899535fca0b06c74443039a73f6dfb2fad88d77" +checksum = "3d4d73159efebfb389d819fd479afb2dbd57dcb3e3f4b7fcfa0e675f5a46c1cb" dependencies = [ "debugid", "memmap2", @@ -8223,9 +8626,9 @@ dependencies = [ [[package]] name = "symbolic-demangle" -version = "12.12.0" +version = "12.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aba05ba5b9962ea5617baf556293720a8b2d0a282aa14ee4bf10e22efc7da8c8" +checksum = "a767859f6549c665011970874c3f541838b4835d5aaaa493d3ee383918be9f10" dependencies = [ "cpp_demangle", "rustc-demangle", @@ -8245,9 +8648,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.79" +version = "2.0.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89132cd0bf050864e1d38dc3bbc07a0eb8e7530af26344d3d2bbbef83499f590" +checksum = "44d46482f1c1c87acd84dea20c1bf5ebff4c757009ed6bf19cfd36fb10e92c4e" dependencies = [ "proc-macro2", "quote", @@ -8255,28 +8658,27 @@ dependencies = [ ] [[package]] -name = "syn_derive" -version = "0.1.8" +name = "sync_wrapper" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" + +[[package]] +name = "synstructure" +version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1329189c02ff984e9736652b1631330da25eaa6bc639089ed4915d25446cbe7b" +checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" dependencies = [ - "proc-macro-error", "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.89", ] -[[package]] -name = "sync_wrapper" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" - [[package]] name = "syslog_loose" -version = "0.19.0" +version = "0.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "acf5252d1adec0a489a0225f867c1a7fd445e41674530a396d0629cff0c4b211" +checksum = "161028c00842709450114c39db3b29f44c898055ed8833bb9b535aba7facf30e" dependencies = [ "chrono", "nom", @@ -8313,7 +8715,7 @@ dependencies = [ "ansitok", "papergrid", "tabled_derive", - "unicode-width", + "unicode-width 0.1.14", ] [[package]] @@ -8322,13 +8724,19 @@ version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "99f688a08b54f4f02f0a3c382aefdb7884d3d69609f785bd253dc033243e3fe4" dependencies = [ - "heck", + "heck 0.4.1", "proc-macro-error", "proc-macro2", "quote", "syn 1.0.109", ] +[[package]] +name = "tagptr" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417" + [[package]] name = "tantivy" version = "0.23.0" @@ -8473,9 +8881,9 @@ checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" [[package]] name = "tar" -version = "0.4.42" +version = "0.4.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ff6c40d3aedb5e06b57c6f669ad17ab063dd1e63d977c6a88e7f4dfa4f04020" +checksum = "c65998313f8e17d0d553d28f91a0df93e4dbbbf770279c7bc21ca0f09ea1a1f6" dependencies = [ "filetime", "libc", @@ -8484,12 +8892,12 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.13.0" +version = "3.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0f2c9fc62d0beef6951ccffd757e241266a2c833136efbe35af6cd2567dca5b" +checksum = "28cce251fcbc87fac86a866eeb0d6c2d536fc16d06f184bb61aeae11aa4cee0c" dependencies = [ "cfg-if", - "fastrand 2.1.1", + "fastrand 2.2.0", "once_cell", "rustix", "windows-sys 0.59.0", @@ -8523,22 +8931,22 @@ checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76" [[package]] name = "thiserror" -version = "1.0.64" +version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d50af8abc119fb8bb6dbabcfa89656f46f84aa0ac7688088608076ad2b459a84" +checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.64" +version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08904e7672f5eb876eaaf87e0ce17857500934f4981c4a0ab2b4aa98baac7fc3" +checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.89", ] [[package]] @@ -8640,6 +9048,16 @@ dependencies = [ "crunchy", ] +[[package]] +name = "tinystr" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f" +dependencies = [ + "displaydoc", + "zerovec", +] + [[package]] name = "tinytemplate" version = "1.2.1" @@ -8667,9 +9085,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokio" -version = "1.40.0" +version = "1.41.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2b070231665d27ad9ec9b8df639893f46727666c6767db40317fbe920a5d998" +checksum = "22cfb5bee7a6a52939ca9224d6ac897bb669134078daa8735560897f69de4d33" dependencies = [ "backtrace", "bytes", @@ -8702,7 +9120,7 @@ checksum = "693d596312e88961bc67d7f1f97af8a70227d9f90c31bba5806eec004978d752" dependencies = [ "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.89", ] [[package]] @@ -8816,7 +9234,7 @@ version = "0.19.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b5bb770da30e5cbfde35a2d7b9b8a2c4b8ef89548a7a6aeab5c9a576e3e7421" dependencies = [ - "indexmap 2.1.0", + "indexmap 2.6.0", "serde", "serde_spanned", "toml_datetime", @@ -8825,11 +9243,11 @@ dependencies = [ [[package]] name = "toml_edit" -version = "0.22.20" +version = "0.22.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "583c44c02ad26b0c3f3066fe629275e50627026c51ac2e595cca4c230ce1ce1d" +checksum = "4ae48d6208a266e853d946088ed816055e556cc6028c5e8e2b84d9fa5dd7c7f5" dependencies = [ - "indexmap 2.1.0", + "indexmap 2.6.0", "toml_datetime", "winnow 0.6.20", ] @@ -8851,11 +9269,11 @@ dependencies = [ "h2", "http 0.2.12", "http-body 0.4.6", - "hyper 0.14.30", + "hyper 0.14.31", "hyper-timeout", "percent-encoding", "pin-project", - "prost", + "prost 0.11.9", "rustls-pemfile", "tokio", "tokio-rustls", @@ -8953,7 +9371,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.89", ] [[package]] @@ -9034,6 +9452,12 @@ dependencies = [ "tracing-serde", ] +[[package]] +name = "triomphe" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "859eb650cfee7434994602c3a68b25d77ad9e68c8a6cd491616ef86661382eb3" + [[package]] name = "try-lock" version = "0.2.5" @@ -9114,12 +9538,9 @@ checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94" [[package]] name = "unicase" -version = "2.7.0" +version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7d2d4dafb69621809a81864c9c1b864479e1235c0dd4e199924b9742439ed89" -dependencies = [ - "version_check", -] +checksum = "7e51b68083f157f853b6379db119d1c1be0e6e4dec98101079dec41f6f5cf6df" [[package]] name = "unicode-bidi" @@ -9129,9 +9550,9 @@ checksum = "5ab17db44d7388991a428b2ee655ce0c212e862eff1768a455c58f9aad6e7893" [[package]] name = "unicode-ident" -version = "1.0.13" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" +checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83" [[package]] name = "unicode-normalization" @@ -9160,6 +9581,12 @@ version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7dd6e30e90baa6f72411720665d41d89b9a3d039dc45b8faea1ddd07f617f6af" +[[package]] +name = "unicode-width" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fc81956842c57dac11422a97c3b8195a1ff727f06e85c84ed2e8aa277c9a0fd" + [[package]] name = "unicode-xid" version = "0.2.6" @@ -9202,12 +9629,12 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" [[package]] name = "url" -version = "2.5.2" +version = "2.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22784dbdf76fdde8af1aeda5622b546b422b6fc585325248a2bf9f5e41e94d6c" +checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60" dependencies = [ "form_urlencoded", - "idna", + "idna 1.0.3", "percent-encoding", "serde", ] @@ -9234,6 +9661,12 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" +[[package]] +name = "utf16_iter" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" + [[package]] name = "utf8-ranges" version = "1.0.5" @@ -9246,6 +9679,12 @@ version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "86bd8d4e895da8537e5315b8254664e6b769c4ff3db18321b297a1e7004392e3" +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + [[package]] name = "utf8parse" version = "0.2.2" @@ -9258,7 +9697,7 @@ version = "4.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c5afb1a60e207dca502682537fefcfd9921e71d0b83e9576060f09abc6efab23" dependencies = [ - "indexmap 2.1.0", + "indexmap 2.6.0", "serde", "serde_json", "utoipa-gen", @@ -9273,15 +9712,15 @@ dependencies = [ "proc-macro-error", "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.89", "ulid", ] [[package]] name = "uuid" -version = "1.10.0" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81dfa00651efa65069b0b6b651f4aaa31ba9e3c3ce0137aaad053604ee7e0314" +checksum = "f8c5f0a0af699448548ad1a2fbf920fb4bee257eae39953ba95cb84891a0446a" dependencies = [ "getrandom 0.2.15", "rand 0.8.5", @@ -9309,14 +9748,14 @@ checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" [[package]] name = "vrl" -version = "0.8.1" +version = "0.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a8a93ee342590c4df0ff63961d7d76a347e0c7b6e6c0be4c001317ca1ff11b53" +checksum = "5c22ec61cbd43e563df185521f9a2fb2f42f6ab96604a574c82f6564049fb431" dependencies = [ "aes", - "anymap", "base16", - "base64 0.21.7", + "base62", + "base64 0.22.1", "bytes", "cbc", "cfb-mode", @@ -9328,37 +9767,49 @@ dependencies = [ "cidr-utils", "codespan-reporting", "community-id", + "convert_case 0.6.0", "crypto_secretbox", "csv", "ctr", "data-encoding", + "digest", "dns-lookup", + "domain", "dyn-clone", + "fancy-regex", "flate2", "grok", "hex", "hmac", - "hostname", - "indexmap 2.1.0", + "hostname 0.4.0", + "iana-time-zone", + "idna 0.5.0", + "indexmap 2.6.0", "indoc", - "itertools 0.11.0", + "influxdb-line-protocol", + "itertools 0.13.0", "lalrpop", - "lalrpop-util", + "lalrpop-util 0.21.0", "md-5", "nom", "ofb", "once_cell", "onig", - "ordered-float 4.3.0", + "ordered-float 4.5.0", "paste", "peeking_take_while", "percent-encoding", "pest", "pest_derive", + "prost 0.13.3", + "prost-reflect", + "psl", + "psl-types", + "publicsuffix", "quoted_printable", "rand 0.8.5", "regex", - "roxmltree 0.18.1", + "roxmltree 0.20.0", "rust_decimal", "seahash", "serde", @@ -9366,11 +9817,13 @@ dependencies = [ "sha-1", "sha2", "sha3", - "snafu", + "snafu 0.8.5", + "snap", "strip-ansi-escapes", "syslog_loose", "termcolor", "thiserror", + "tokio", "tracing", "uaparser", "url", @@ -9463,7 +9916,7 @@ dependencies = [ "futures-util", "headers", "http 0.2.12", - "hyper 0.14.30", + "hyper 0.14.31", "log", "mime", "mime_guess", @@ -9510,9 +9963,9 @@ dependencies = [ [[package]] name = "wasm-bindgen" -version = "0.2.94" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef073ced962d62984fb38a36e5fdc1a2b23c9e0e1fa0689bb97afa4202ef6887" +checksum = "128d1e363af62632b8eb57219c8fd7877144af57558fb2ef0368d0087bddeb2e" dependencies = [ "cfg-if", "once_cell", @@ -9521,24 +9974,24 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.94" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c4bfab14ef75323f4eb75fa52ee0a3fb59611977fd3240da19b2cf36ff85030e" +checksum = "cb6dd4d3ca0ddffd1dd1c9c04f94b868c37ff5fac97c30b97cff2d74fce3a358" dependencies = [ "bumpalo", "log", "once_cell", "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.89", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-futures" -version = "0.4.44" +version = "0.4.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65471f79c1022ffa5291d33520cbbb53b7687b01c2f8e83b57d102eed7ed479d" +checksum = "cc7ec4f8827a71586374db3e87abdb5a2bb3a15afed140221307c3ec06b1f63b" dependencies = [ "cfg-if", "js-sys", @@ -9548,9 +10001,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.94" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7bec9830f60924d9ceb3ef99d55c155be8afa76954edffbb5936ff4509474e7" +checksum = "e79384be7f8f5a9dd5d7167216f022090cf1f9ec128e6e6a482a2cb5c5422c56" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -9558,28 +10011,28 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.94" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c74f6e152a76a2ad448e223b0fc0b6b5747649c3d769cc6bf45737bf97d0ed6" +checksum = "26c6ab57572f7a24a4985830b120de1594465e5d500f24afe89e16b4e833ef68" dependencies = [ "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.89", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.94" +version = "0.2.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a42f6c679374623f295a8623adfe63d9284091245c3504bde47c17a3ce2777d9" +checksum = "65fc09f10666a9f147042251e0dda9c18f166ff7de300607007e96bdebc1068d" [[package]] name = "wasm-streams" -version = "0.4.1" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e072d4e72f700fb3443d8fe94a39315df013eef1104903cdb0a2abd322bbecd" +checksum = "15053d8d85c7eccdbefef60f06769760a563c7f0a9d6902a13d35c7800b0ad65" dependencies = [ "futures-util", "js-sys", @@ -9590,9 +10043,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.71" +version = "0.3.72" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44188d185b5bdcae1052d08bcbcf9091a5524038d4572cc4f4f2bb9d5554ddd9" +checksum = "f6488b90108c040df0fe62fa815cbdee25124641df01814dd7282749234c6112" dependencies = [ "js-sys", "wasm-bindgen", @@ -9693,6 +10146,16 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e48a53791691ab099e5e2ad123536d0fff50652600abaf43bbf952894110d0be" +dependencies = [ + "windows-core", + "windows-targets 0.52.6", +] + [[package]] name = "windows-core" version = "0.52.0" @@ -9891,7 +10354,7 @@ dependencies = [ "futures", "futures-timer", "http-types", - "hyper 0.14.30", + "hyper 0.14.31", "log", "once_cell", "regex", @@ -9910,6 +10373,18 @@ dependencies = [ "regex", ] +[[package]] +name = "write16" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" + +[[package]] +name = "writeable" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" + [[package]] name = "wyz" version = "0.5.1" @@ -9951,6 +10426,30 @@ dependencies = [ "is-terminal", ] +[[package]] +name = "yoke" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c5b1314b079b0930c31e3af543d8ee1757b1951ae1e1565ec704403a7240ca5" +dependencies = [ + "serde", + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28cc31741b18cb6f1d5ff12f5b7523e3d6eb0852bbbad19d73905511d9849b95" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.89", + "synstructure", +] + [[package]] name = "zerocopy" version = "0.7.35" @@ -9969,7 +10468,28 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.79", + "syn 2.0.89", +] + +[[package]] +name = "zerofrom" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91ec111ce797d0e0784a1116d0ddcdbea84322cd79e5d5ad173daeba4f93ab55" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ea7b4a3637ea8669cedf0f1fd5c286a17f3de97b8dd5a70a6c167a1730e63a5" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.89", + "synstructure", ] [[package]] @@ -9978,6 +10498,28 @@ version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ced3678a2879b30306d323f4542626697a464a97c0a07c9aebf7ebca65cd4dde" +[[package]] +name = "zerovec" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.89", +] + [[package]] name = "zip" version = "0.6.6" From b82ece120e602155b267f37891a84cec1c43d24e Mon Sep 17 00:00:00 2001 From: Adrien Guillo Date: Fri, 22 Nov 2024 16:51:34 -0500 Subject: [PATCH 10/27] Allow Unicode-3.0 license --- quickwit/deny.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/quickwit/deny.toml b/quickwit/deny.toml index 139785b1c9a..260a0a9b583 100644 --- a/quickwit/deny.toml +++ b/quickwit/deny.toml @@ -62,7 +62,7 @@ allow = [ "MIT", "MPL-2.0", "OpenSSL", - "Unicode-DFS-2016", + "Unicode-3.0", "Unlicense", "Zlib", "zlib-acknowledgement", From 297aabe34a12dd67097ce457626bd574a63802c5 Mon Sep 17 00:00:00 2001 From: Remi Dettai Date: Mon, 25 Nov 2024 09:08:55 +0100 Subject: [PATCH 11/27] Add missing Search API section header (#5556) --- docs/reference/rest-api.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/reference/rest-api.md b/docs/reference/rest-api.md index cbb56fd2f05..b4ad3e853db 100644 --- a/docs/reference/rest-api.md +++ b/docs/reference/rest-api.md @@ -168,6 +168,8 @@ The formatting is based on the specified output format. On error, an "X-Stream-Error" header will be sent via the trailers channel with information about the error, and the stream will be closed via [`sender.abort()`](https://docs.rs/hyper/0.14.16/hyper/body/struct.Sender.html#method.abort). Depending on the client, the trailer header with error details may not be shown. The error will also be logged in quickwit ("Error when streaming search results"). +## Ingest API + ### Ingest data into an index ``` From cbf35fa5da5ca75c8bc2b7c895bbc045be256a14 Mon Sep 17 00:00:00 2001 From: trinity-1686a Date: Mon, 25 Nov 2024 17:36:24 +0100 Subject: [PATCH 12/27] fix failing test after rustc and serde update (#5564) --- .../quickwit-common/src/rendezvous_hasher.rs | 2 +- .../quickwit-search/src/cluster_client.rs | 30 +++++++++---------- .../quickwit-search/src/search_job_placer.rs | 9 +++--- .../model/bulk_query_params.rs | 2 +- 4 files changed, 22 insertions(+), 21 deletions(-) diff --git a/quickwit/quickwit-common/src/rendezvous_hasher.rs b/quickwit/quickwit-common/src/rendezvous_hasher.rs index beec0e244d0..2d3c24efd3f 100644 --- a/quickwit/quickwit-common/src/rendezvous_hasher.rs +++ b/quickwit/quickwit-common/src/rendezvous_hasher.rs @@ -64,7 +64,7 @@ mod tests { let mut socket_set3 = vec![socket1, socket4]; sort_by_rendez_vous_hash(&mut socket_set3, "key"); - assert_eq!(socket_set1, &[socket1, socket2, socket3, socket4]); + assert_eq!(socket_set1, &[socket1, socket3, socket2, socket4]); assert_eq!(socket_set2, &[socket1, socket2, socket4]); assert_eq!(socket_set3, &[socket1, socket4]); } diff --git a/quickwit/quickwit-search/src/cluster_client.rs b/quickwit/quickwit-search/src/cluster_client.rs index d32ad92327c..b8042f03fb7 100644 --- a/quickwit/quickwit-search/src/cluster_client.rs +++ b/quickwit/quickwit-search/src/cluster_client.rs @@ -746,30 +746,30 @@ mod tests { #[tokio::test] async fn test_put_kv_happy_path() { // 3 servers 1, 2, 3 - // Targeted key has affinity [2, 3, 1]. + // Targeted key has affinity [3, 2, 1]. // // Put on 2 and 3 is successful - // Get succeeds on 2. + // Get succeeds on 3. let mock_search_service_1 = MockSearchService::new(); let mut mock_search_service_2 = MockSearchService::new(); - mock_search_service_2.expect_put_kv().once().returning( + // Due to the buffered call it is possible for the + // put request to 2 to be emitted too. + mock_search_service_2 + .expect_put_kv() + .returning(|_put_req: quickwit_proto::search::PutKvRequest| {}); + let mut mock_search_service_3 = MockSearchService::new(); + mock_search_service_3.expect_put_kv().once().returning( |put_req: quickwit_proto::search::PutKvRequest| { assert_eq!(put_req.key, b"my_key"); assert_eq!(put_req.payload, b"my_payload"); }, ); - mock_search_service_2.expect_get_kv().once().returning( + mock_search_service_3.expect_get_kv().once().returning( |get_req: quickwit_proto::search::GetKvRequest| { assert_eq!(get_req.key, b"my_key"); Some(b"my_payload".to_vec()) }, ); - let mut mock_search_service_3 = MockSearchService::new(); - // Due to the buffered call it is possible for the - // put request to 3 to be emitted too. - mock_search_service_3 - .expect_put_kv() - .returning(|_put_req: quickwit_proto::search::PutKvRequest| {}); let searcher_pool = searcher_pool_for_test([ ("127.0.0.1:1001", mock_search_service_1), ("127.0.0.1:1002", mock_search_service_2), @@ -791,11 +791,11 @@ mod tests { #[tokio::test] async fn test_put_kv_failing_get() { // 3 servers 1, 2, 3 - // Targeted key has affinity [2, 3, 1]. + // Targeted key has affinity [3, 2, 1]. // // Put on 2 and 3 is successful - // Get fails on 2. - // Get succeeds on 3. + // Get fails on 3. + // Get succeeds on 2. let mock_search_service_1 = MockSearchService::new(); let mut mock_search_service_2 = MockSearchService::new(); mock_search_service_2.expect_put_kv().once().returning( @@ -807,7 +807,7 @@ mod tests { mock_search_service_2.expect_get_kv().once().returning( |get_req: quickwit_proto::search::GetKvRequest| { assert_eq!(get_req.key, b"my_key"); - None + Some(b"my_payload".to_vec()) }, ); let mut mock_search_service_3 = MockSearchService::new(); @@ -820,7 +820,7 @@ mod tests { mock_search_service_3.expect_get_kv().once().returning( |get_req: quickwit_proto::search::GetKvRequest| { assert_eq!(get_req.key, b"my_key"); - Some(b"my_payload".to_vec()) + None }, ); let searcher_pool = searcher_pool_for_test([ diff --git a/quickwit/quickwit-search/src/search_job_placer.rs b/quickwit/quickwit-search/src/search_job_placer.rs index 9914ffdd177..eb15513a76c 100644 --- a/quickwit/quickwit-search/src/search_job_placer.rs +++ b/quickwit/quickwit-search/src/search_job_placer.rs @@ -427,21 +427,22 @@ mod tests { let expected_searcher_addr_1: SocketAddr = ([127, 0, 0, 1], 1001).into(); let expected_searcher_addr_2: SocketAddr = ([127, 0, 0, 1], 1002).into(); + // on a small number of splits, we may be unbalanced let expected_assigned_jobs = vec![ ( expected_searcher_addr_1, vec![ - SearchJob::for_test("split5", 5), - SearchJob::for_test("split4", 4), SearchJob::for_test("split3", 3), + SearchJob::for_test("split2", 2), + SearchJob::for_test("split1", 1), ], ), ( expected_searcher_addr_2, vec![ SearchJob::for_test("split6", 6), - SearchJob::for_test("split2", 2), - SearchJob::for_test("split1", 1), + SearchJob::for_test("split5", 5), + SearchJob::for_test("split4", 4), ], ), ]; diff --git a/quickwit/quickwit-serve/src/elasticsearch_api/model/bulk_query_params.rs b/quickwit/quickwit-serve/src/elasticsearch_api/model/bulk_query_params.rs index e9b415c8248..fbba0f739a6 100644 --- a/quickwit/quickwit-serve/src/elasticsearch_api/model/bulk_query_params.rs +++ b/quickwit/quickwit-serve/src/elasticsearch_api/model/bulk_query_params.rs @@ -114,7 +114,7 @@ mod tests { serde_qs::from_str::("refresh=wait") .unwrap_err() .to_string(), - "unknown variant `wait`, expected one of `false`, `true`, `wait_for`" + "unknown variant `wait`, expected one of `false`, ``, `true`, `wait_for`" ); } } From 0a112fb2617d16f1602c760613ab2f1b4a43699c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 25 Nov 2024 12:02:40 -0500 Subject: [PATCH 13/27] Bump cross-spawn from 7.0.3 to 7.0.6 in /quickwit/quickwit-ui (#5558) Bumps [cross-spawn](https://github.com/moxystudio/node-cross-spawn) from 7.0.3 to 7.0.6. - [Changelog](https://github.com/moxystudio/node-cross-spawn/blob/master/CHANGELOG.md) - [Commits](https://github.com/moxystudio/node-cross-spawn/compare/v7.0.3...v7.0.6) --- updated-dependencies: - dependency-name: cross-spawn dependency-type: indirect ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- quickwit/quickwit-ui/yarn.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/quickwit/quickwit-ui/yarn.lock b/quickwit/quickwit-ui/yarn.lock index 8b61d510388..af8d1e6be22 100644 --- a/quickwit/quickwit-ui/yarn.lock +++ b/quickwit/quickwit-ui/yarn.lock @@ -5398,9 +5398,9 @@ create-jest@^29.7.0: prompts "^2.0.1" cross-spawn@^7.0.0, cross-spawn@^7.0.2, cross-spawn@^7.0.3: - version "7.0.3" - resolved "https://registry.yarnpkg.com/cross-spawn/-/cross-spawn-7.0.3.tgz#f73a85b9d5d41d045551c177e2882d4ac85728a6" - integrity sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w== + version "7.0.6" + resolved "https://registry.yarnpkg.com/cross-spawn/-/cross-spawn-7.0.6.tgz#8a58fe78f00dcd70c370451759dfbfaf03e8ee9f" + integrity sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA== dependencies: path-key "^3.1.0" shebang-command "^2.0.0" From 4ffbd53e4f4ee3dc45936df940cbd62a9d950039 Mon Sep 17 00:00:00 2001 From: Remi Dettai Date: Tue, 26 Nov 2024 09:21:30 +0100 Subject: [PATCH 14/27] Fix krb5 transitive dependency cross build (#5555) * Fix krb5 transitive dependency cross build * Specify env flags in Cross.toml * Change fix target to gnu instead of musl * Remove deprecated Cross configurations * Fix variable case --- .github/actions/cross-build-binary/action.yml | 1 - quickwit/Cross.toml | 11 +++++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/.github/actions/cross-build-binary/action.yml b/.github/actions/cross-build-binary/action.yml index 30b68da1023..d1916325d78 100644 --- a/.github/actions/cross-build-binary/action.yml +++ b/.github/actions/cross-build-binary/action.yml @@ -43,7 +43,6 @@ runs: QW_COMMIT_DATE: ${{ env.QW_COMMIT_DATE }} QW_COMMIT_HASH: ${{ env.QW_COMMIT_HASH }} QW_COMMIT_TAGS: ${{ env.QW_COMMIT_TAGS }} - working-directory: ./quickwit - name: Bundle archive run: | diff --git a/quickwit/Cross.toml b/quickwit/Cross.toml index d0392c714e0..89139a76aa4 100644 --- a/quickwit/Cross.toml +++ b/quickwit/Cross.toml @@ -10,11 +10,18 @@ image = "quickwit/cross:x86_64-unknown-linux-gnu" [target.x86_64-unknown-linux-musl] image = "quickwit/cross:x86_64-unknown-linux-musl" -RUSTFLAGS="LIB_LDFLAGS=-L/usr/lib/x86_64-linux-gnu CFLAGS=-I/usr/local/musl/include CC=musl-gcc" [target.aarch64-unknown-linux-gnu] image = "quickwit/cross:aarch64-unknown-linux-gnu" +[target.aarch64-unknown-linux-gnu.env] +# Fix build for transitive dependency rdkafka -> rdkafka-sys -> sasl2-sys -> krb5-src +# Introduced by https://github.com/MaterializeInc/rust-krb5-src/pull/27 +passthrough = [ + "krb5_cv_attr_constructor_destructor=yes", + "ac_cv_func_regcomp=yes", + "ac_cv_printf_positional=yes", +] + [target.aarch64-unknown-linux-musl] image = "quickwit/cross:aarch64-unknown-linux-musl" -linker = "aarch64-linux-musl-gcc" From aa600c91e4d020261d51ff81d826def3c2798d1c Mon Sep 17 00:00:00 2001 From: Adrien Guillo Date: Tue, 26 Nov 2024 14:18:24 -0500 Subject: [PATCH 15/27] Parse datetimes and timestamps with leading and/or trailing whitespace (#5544) --- .../src/date_time_parsing.rs | 36 ++++++++++--------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/quickwit/quickwit-datetime/src/date_time_parsing.rs b/quickwit/quickwit-datetime/src/date_time_parsing.rs index 54e8d4b88bb..ab5ec94304e 100644 --- a/quickwit/quickwit-datetime/src/date_time_parsing.rs +++ b/quickwit/quickwit-datetime/src/date_time_parsing.rs @@ -36,22 +36,24 @@ pub fn parse_date_time_str( date_time_str: &str, date_time_formats: &[DateTimeInputFormat], ) -> Result { + let trimmed_date_time_str = date_time_str.trim_ascii(); + for date_time_format in date_time_formats { let date_time_opt = match date_time_format { - DateTimeInputFormat::Iso8601 => parse_iso8601(date_time_str) + DateTimeInputFormat::Iso8601 => parse_iso8601(trimmed_date_time_str) .map(TantivyDateTime::from_utc) .ok(), - DateTimeInputFormat::Rfc2822 => parse_rfc2822(date_time_str) + DateTimeInputFormat::Rfc2822 => parse_rfc2822(trimmed_date_time_str) .map(TantivyDateTime::from_utc) .ok(), - DateTimeInputFormat::Rfc3339 => parse_rfc3339(date_time_str) + DateTimeInputFormat::Rfc3339 => parse_rfc3339(trimmed_date_time_str) .map(TantivyDateTime::from_utc) .ok(), DateTimeInputFormat::Strptime(parser) => parser - .parse_date_time(date_time_str) + .parse_date_time(trimmed_date_time_str) .map(TantivyDateTime::from_utc) .ok(), - DateTimeInputFormat::Timestamp => parse_timestamp_str(date_time_str), + DateTimeInputFormat::Timestamp => parse_timestamp_str(trimmed_date_time_str), }; if let Some(date_time) = date_time_opt { return Ok(date_time); @@ -80,7 +82,7 @@ pub fn parse_timestamp_float( )); } let duration_since_epoch = Duration::try_from_secs_f64(timestamp) - .map_err(|error| format!("Failed to parse datetime `{timestamp}`: {error}"))?; + .map_err(|error| format!("failed to parse datetime `{timestamp}`: {error}"))?; let timestamp_nanos = duration_since_epoch.as_nanos() as i64; Ok(TantivyDateTime::from_timestamp_nanos(timestamp_nanos)) } @@ -208,13 +210,13 @@ mod tests { fn test_parse_strptime() { let test_data = vec![ ( - "%Y-%m-%d %H:%M:%S", + " %Y-%m-%d %H:%M:%S ", "2012-05-21 12:09:14", datetime!(2012-05-21 12:09:14 UTC), ), ( "%Y-%m-%d %H:%M:%S %z", - "2012-05-21 12:09:14 +0000", + " 2012-05-21 12:09:14 +0000 ", datetime!(2012-05-21 12:09:14 UTC), ), ( @@ -260,15 +262,15 @@ mod tests { ), ]; for (fmt, date_time_str, expected) in test_data { - let parser = StrptimeParser::from_strptime(fmt).unwrap(); - let result = parser.parse_date_time(date_time_str); + let parser = DateTimeInputFormat::Strptime(StrptimeParser::from_strptime(fmt).unwrap()); + let result = parse_date_time_str(date_time_str, &[parser]); if let Err(error) = &result { panic!( "failed to parse `{date_time_str}` using the following strptime format \ `{fmt}`: {error}" ) } - assert_eq!(result.unwrap(), expected); + assert_eq!(result.unwrap(), TantivyDateTime::from_utc(expected)); } } @@ -291,14 +293,14 @@ mod tests { #[test] fn test_parse_date_time_str() { for date_time_str in [ - "20120521T120914Z", - "Mon, 21 May 2012 12:09:14 GMT", - "2012-05-21T12:09:14-00:00", + "20120521T120914Z ", + " Mon, 21 May 2012 12:09:14 GMT", + " 2012-05-21T12:09:14-00:00 ", "2012-05-21 12:09:14", - "2012/05/21 12:09:14", + " 2012/05/21 12:09:14", "2012/05/21 12:09:14 +00:00", - "1337602154", - "1337602154.0", + "1337602154 ", + " 1337602154.0 ", ] { let date_time = parse_date_time_str( date_time_str, From d2d1d4fa0457ecc0b04fd0231aeac5b7fb0fcfe5 Mon Sep 17 00:00:00 2001 From: Tomer Gabel Date: Thu, 28 Nov 2024 18:03:33 +0200 Subject: [PATCH 16/27] Fix metric name in docs (#5567) --- docs/reference/metrics.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/metrics.md b/docs/reference/metrics.md index 77c8b7bd397..9df72335a05 100644 --- a/docs/reference/metrics.md +++ b/docs/reference/metrics.md @@ -34,7 +34,7 @@ Currently Quickwit exposes metrics for three caches: `fastfields`, `shortlived`, | Namespace | Metric Name | Description | Labels | Type | | --------- | ----------- | ----------- | ------ | ---- | | `quickwit_indexing` | `processed_docs_total`| Number of processed docs by index, source and processed status in [`valid`, `schema_error`, `parse_error`, `transform_error`] | [`index`, `source`, `docs_processed_status`] | `counter` | -| `quickwit_indexing` | `processed_docs_total`| Number of processed bytes by index, source and processed status in [`valid`, `schema_error`, `parse_error`, `transform_error`] | [`index`, `source`, `docs_processed_status`] | `counter` | +| `quickwit_indexing` | `processed_bytes`| Number of processed bytes by index, source and processed status in [`valid`, `schema_error`, `parse_error`, `transform_error`] | [`index`, `source`, `docs_processed_status`] | `counter` | | `quickwit_indexing` | `available_concurrent_upload_permits`| Number of available concurrent upload permits by component in [`merger`, `indexer`] | [`component`] | `gauge` | | `quickwit_indexing` | `ongoing_merge_operations`| Number of available concurrent upload permits by component in [`merger`, `indexer`]. | [`index`, `source`] | `gauge` | From f9dbc5e58da9df5a9d2a998c1ee1e7da84ab8902 Mon Sep 17 00:00:00 2001 From: Pierre Barre Date: Thu, 28 Nov 2024 17:04:55 +0100 Subject: [PATCH 17/27] Update azure multipart policy (#5553) --- .../src/object_storage/azure_blob_storage.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/quickwit/quickwit-storage/src/object_storage/azure_blob_storage.rs b/quickwit/quickwit-storage/src/object_storage/azure_blob_storage.rs index fbe042ee09f..394b61ed906 100644 --- a/quickwit/quickwit-storage/src/object_storage/azure_blob_storage.rs +++ b/quickwit/quickwit-storage/src/object_storage/azure_blob_storage.rs @@ -107,7 +107,15 @@ impl AzureBlobStorage { container_client, uri, prefix: PathBuf::new(), - multipart_policy: MultiPartPolicy::default(), + multipart_policy: MultiPartPolicy { + // Azure max part size is 100MB + // https://azure.microsoft.com/en-us/blog/general-availability-larger-block-blobs-in-azure-storage/ + target_part_num_bytes: 100_000_000, + multipart_threshold_num_bytes: 100_000_000, + max_num_parts: 50_000, // Azure allows up to 50,000 blocks + max_object_num_bytes: 4_770_000_000_000u64, // Azure allows up to 4.77TB objects + max_concurrent_uploads: 100, + }, retry_params: RetryParams::aggressive(), } } From 53293792066aa7e5a0951d0be04fee94fc93cf0c Mon Sep 17 00:00:00 2001 From: Remi Dettai Date: Thu, 28 Nov 2024 17:41:28 +0100 Subject: [PATCH 18/27] Return 429 on ES API when no shards available (#5566) --- quickwit/quickwit-serve/src/elasticsearch_api/bulk_v2.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/quickwit/quickwit-serve/src/elasticsearch_api/bulk_v2.rs b/quickwit/quickwit-serve/src/elasticsearch_api/bulk_v2.rs index d6578c699bc..3a142b82b2e 100644 --- a/quickwit/quickwit-serve/src/elasticsearch_api/bulk_v2.rs +++ b/quickwit/quickwit-serve/src/elasticsearch_api/bulk_v2.rs @@ -273,6 +273,11 @@ fn make_elastic_bulk_response_v2( format!("shard rate limiting [{}]", failure.index_id), StatusCode::TOO_MANY_REQUESTS, ), + IngestFailureReason::NoShardsAvailable => ( + ElasticException::RateLimited, + format!("no shards available [{}]", failure.index_id), + StatusCode::TOO_MANY_REQUESTS, + ), reason => { let pretty_reason = reason .as_str_name() From 7ec03f9cd2afec9df8b5be405851ff860d4b32bd Mon Sep 17 00:00:00 2001 From: Remi Dettai Date: Mon, 9 Dec 2024 10:15:16 +0100 Subject: [PATCH 19/27] Use content_length_limit for ES bulk limit (#5573) --- .../quickwit-serve/src/elasticsearch_api/bulk.rs | 7 +++++-- .../src/elasticsearch_api/bulk_v2.rs | 16 +++++++++------- .../src/elasticsearch_api/filter.rs | 7 ++++--- .../quickwit-serve/src/elasticsearch_api/mod.rs | 8 +++++++- 4 files changed, 25 insertions(+), 13 deletions(-) diff --git a/quickwit/quickwit-serve/src/elasticsearch_api/bulk.rs b/quickwit/quickwit-serve/src/elasticsearch_api/bulk.rs index c6723d8521e..eaffcda87a3 100644 --- a/quickwit/quickwit-serve/src/elasticsearch_api/bulk.rs +++ b/quickwit/quickwit-serve/src/elasticsearch_api/bulk.rs @@ -20,6 +20,7 @@ use std::collections::HashMap; use std::time::Instant; +use bytesize::ByteSize; use hyper::StatusCode; use quickwit_config::{disable_ingest_v1, enable_ingest_v2}; use quickwit_ingest::{ @@ -42,8 +43,9 @@ use crate::{with_arg, Body}; pub fn es_compat_bulk_handler( ingest_service: IngestServiceClient, ingest_router: IngestRouterServiceClient, + content_length_limit: ByteSize, ) -> impl Filter + Clone { - elastic_bulk_filter() + elastic_bulk_filter(content_length_limit) .and(with_arg(ingest_service)) .and(with_arg(ingest_router)) .then(|body, bulk_options, ingest_service, ingest_router| { @@ -58,8 +60,9 @@ pub fn es_compat_bulk_handler( pub fn es_compat_index_bulk_handler( ingest_service: IngestServiceClient, ingest_router: IngestRouterServiceClient, + content_length_limit: ByteSize, ) -> impl Filter + Clone { - elastic_index_bulk_filter() + elastic_index_bulk_filter(content_length_limit) .and(with_arg(ingest_service)) .and(with_arg(ingest_router)) .then( diff --git a/quickwit/quickwit-serve/src/elasticsearch_api/bulk_v2.rs b/quickwit/quickwit-serve/src/elasticsearch_api/bulk_v2.rs index 3a142b82b2e..8fb114838d8 100644 --- a/quickwit/quickwit-serve/src/elasticsearch_api/bulk_v2.rs +++ b/quickwit/quickwit-serve/src/elasticsearch_api/bulk_v2.rs @@ -351,6 +351,7 @@ fn remove_doc_handles( #[cfg(test)] mod tests { + use bytesize::ByteSize; use quickwit_proto::ingest::router::{ IngestFailure, IngestFailureReason, IngestResponseV2, IngestSuccess, MockIngestRouterService, @@ -399,8 +400,9 @@ mod tests { fn es_compat_bulk_handler_v2( ingest_router: IngestRouterServiceClient, + content_length_limit: ByteSize, ) -> impl Filter + Clone { - elastic_bulk_filter() + elastic_bulk_filter(content_length_limit) .and(with_arg(ingest_router)) .then(|body, bulk_options, ingest_router| { elastic_bulk_ingest_v2(None, body, bulk_options, ingest_router) @@ -459,7 +461,7 @@ mod tests { }) }); let ingest_router = IngestRouterServiceClient::from_mock(mock_ingest_router); - let handler = es_compat_bulk_handler_v2(ingest_router); + let handler = es_compat_bulk_handler_v2(ingest_router, ByteSize::mb(10)); let payload = r#" {"create": {"_index": "my-index-1", "_id" : "1"}} @@ -511,7 +513,7 @@ mod tests { #[tokio::test] async fn test_bulk_api_accepts_empty_requests() { let ingest_router = IngestRouterServiceClient::mocked(); - let handler = es_compat_bulk_handler_v2(ingest_router); + let handler = es_compat_bulk_handler_v2(ingest_router, ByteSize::mb(10)); let response = warp::test::request() .path("/_elastic/_bulk") @@ -556,7 +558,7 @@ mod tests { }) }); let ingest_router = IngestRouterServiceClient::from_mock(mock_ingest_router); - let handler = es_compat_bulk_handler_v2(ingest_router); + let handler = es_compat_bulk_handler_v2(ingest_router, ByteSize::mb(10)); let payload = r#" @@ -579,7 +581,7 @@ mod tests { #[tokio::test] async fn test_bulk_api_handles_malformed_requests() { let ingest_router = IngestRouterServiceClient::mocked(); - let handler = es_compat_bulk_handler_v2(ingest_router); + let handler = es_compat_bulk_handler_v2(ingest_router, ByteSize::mb(10)); let payload = r#" {"create": {"_index": "my-index-1", "_id" : "1"},} @@ -680,7 +682,7 @@ mod tests { }) }); let ingest_router = IngestRouterServiceClient::from_mock(mock_ingest_router); - let handler = es_compat_bulk_handler_v2(ingest_router); + let handler = es_compat_bulk_handler_v2(ingest_router, ByteSize::mb(10)); let payload = r#" {"index": {"_index": "my-index-1", "_id" : "1"}} @@ -822,7 +824,7 @@ mod tests { }) }); let ingest_router = IngestRouterServiceClient::from_mock(mock_ingest_router); - let handler = es_compat_bulk_handler_v2(ingest_router); + let handler = es_compat_bulk_handler_v2(ingest_router, ByteSize::mb(10)); let payload = r#" {"create": {"_index": "my-index-1", "_id" : "1"}} diff --git a/quickwit/quickwit-serve/src/elasticsearch_api/filter.rs b/quickwit/quickwit-serve/src/elasticsearch_api/filter.rs index 968046ebcaf..bfffe3c9ff0 100644 --- a/quickwit/quickwit-serve/src/elasticsearch_api/filter.rs +++ b/quickwit/quickwit-serve/src/elasticsearch_api/filter.rs @@ -35,7 +35,6 @@ use crate::search_api::{extract_index_id_patterns, extract_index_id_patterns_def use crate::Body; const BODY_LENGTH_LIMIT: ByteSize = ByteSize::mib(1); -const CONTENT_LENGTH_LIMIT: ByteSize = ByteSize::mib(10); // TODO: Make all elastic endpoint models `utoipa` compatible // and register them here. @@ -72,11 +71,12 @@ pub(crate) fn elasticsearch_filter( ) )] pub(crate) fn elastic_bulk_filter( + content_length_limit: ByteSize, ) -> impl Filter + Clone { warp::path!("_elastic" / "_bulk") .and(warp::post().or(warp::put()).unify()) .and(warp::body::content_length_limit( - CONTENT_LENGTH_LIMIT.as_u64(), + content_length_limit.as_u64(), )) .and(get_body_bytes()) .and(serde_qs::warp::query(serde_qs::Config::default())) @@ -95,11 +95,12 @@ pub(crate) fn elastic_bulk_filter( ) )] pub(crate) fn elastic_index_bulk_filter( + content_length_limit: ByteSize, ) -> impl Filter + Clone { warp::path!("_elastic" / String / "_bulk") .and(warp::post().or(warp::put()).unify()) .and(warp::body::content_length_limit( - CONTENT_LENGTH_LIMIT.as_u64(), + content_length_limit.as_u64(), )) .and(get_body_bytes()) .and(serde_qs::warp::query::( diff --git a/quickwit/quickwit-serve/src/elasticsearch_api/mod.rs b/quickwit/quickwit-serve/src/elasticsearch_api/mod.rs index 479e48687f4..77b686db537 100644 --- a/quickwit/quickwit-serve/src/elasticsearch_api/mod.rs +++ b/quickwit/quickwit-serve/src/elasticsearch_api/mod.rs @@ -61,14 +61,20 @@ pub fn elastic_api_handlers( metastore: MetastoreServiceClient, index_service: IndexService, ) -> impl Filter + Clone { + let ingest_content_length_limit = node_config.ingest_api_config.content_length_limit; es_compat_cluster_info_handler(node_config, BuildInfo::get()) .or(es_compat_search_handler(search_service.clone())) .or(es_compat_bulk_handler( ingest_service.clone(), ingest_router.clone(), + ingest_content_length_limit, )) .boxed() - .or(es_compat_index_bulk_handler(ingest_service, ingest_router)) + .or(es_compat_index_bulk_handler( + ingest_service, + ingest_router, + ingest_content_length_limit, + )) .or(es_compat_index_search_handler(search_service.clone())) .or(es_compat_index_count_handler(search_service.clone())) .or(es_compat_scroll_handler(search_service.clone())) From fba56b86cfea62ab1a4c716cbed8438664cc52ea Mon Sep 17 00:00:00 2001 From: Adrien Guillo Date: Tue, 10 Dec 2024 17:08:05 -0500 Subject: [PATCH 20/27] Bump pulsar from 5.1.1 to 6.3 (#5584) --- quickwit/Cargo.lock | 321 +++++++++++++----- quickwit/Cargo.toml | 2 +- .../src/source/pulsar_source.rs | 2 - 3 files changed, 231 insertions(+), 94 deletions(-) diff --git a/quickwit/Cargo.lock b/quickwit/Cargo.lock index 1662803ed0b..27411942af1 100644 --- a/quickwit/Cargo.lock +++ b/quickwit/Cargo.lock @@ -580,7 +580,7 @@ dependencies = [ "http 0.2.12", "http 1.1.0", "once_cell", - "p256", + "p256 0.11.1", "percent-encoding", "ring 0.17.8", "sha2", @@ -823,7 +823,7 @@ dependencies = [ "serde", "serde_dynamo", "serde_json", - "serde_with 3.11.0", + "serde_with", ] [[package]] @@ -980,6 +980,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "349a06037c7bf932dd7e7d1f653678b2038b9ad46a74102f1fc7bd7872678cce" +[[package]] +name = "base16ct" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c7f02d4ea65f2c1853089ffd8d2787bdbc63de2f0d29dedbcf8ccdfa0ccd4cf" + [[package]] name = "base62" version = "2.0.3" @@ -1498,7 +1504,7 @@ dependencies = [ "anstream", "anstyle", "clap_lex", - "strsim 0.11.1", + "strsim", ] [[package]] @@ -1841,8 +1847,10 @@ version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0dc92fb57ca44df6db8059111ab3af99a63d5d0f8375d9972e319a379c6bab76" dependencies = [ + "generic-array", "rand_core 0.6.4", "subtle", + "zeroize", ] [[package]] @@ -1902,37 +1910,40 @@ dependencies = [ ] [[package]] -name = "darling" -version = "0.13.4" +name = "curve25519-dalek" +version = "4.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a01d95850c592940db9b8194bc39f4bc0e89dee5c4265e4b1807c34a9aba453c" +checksum = "97fb8b7c4503de7d6ae7b42ab72a5a59857b4c937ec27a3d4539dba95b5ab2be" dependencies = [ - "darling_core 0.13.4", - "darling_macro 0.13.4", + "cfg-if", + "cpufeatures", + "curve25519-dalek-derive", + "digest", + "fiat-crypto", + "rustc_version", + "subtle", + "zeroize", ] [[package]] -name = "darling" -version = "0.20.10" +name = "curve25519-dalek-derive" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6f63b86c8a8826a49b8c21f08a2d07338eec8d900540f8630dc76284be802989" +checksum = "f46882e17999c6cc590af592290432be3bce0428cb0d5f8b6715e4dc7b383eb3" dependencies = [ - "darling_core 0.20.10", - "darling_macro 0.20.10", + "proc-macro2", + "quote", + "syn 2.0.89", ] [[package]] -name = "darling_core" -version = "0.13.4" +name = "darling" +version = "0.20.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "859d65a907b6852c9361e3185c862aae7fafd2887876799fa55f5f99dc40d610" +checksum = "6f63b86c8a8826a49b8c21f08a2d07338eec8d900540f8630dc76284be802989" dependencies = [ - "fnv", - "ident_case", - "proc-macro2", - "quote", - "strsim 0.10.0", - "syn 1.0.109", + "darling_core", + "darling_macro", ] [[package]] @@ -1945,28 +1956,17 @@ dependencies = [ "ident_case", "proc-macro2", "quote", - "strsim 0.11.1", + "strsim", "syn 2.0.89", ] -[[package]] -name = "darling_macro" -version = "0.13.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c972679f83bdf9c42bd905396b6c3588a843a17f0f16dfcfa3e2c5d57441835" -dependencies = [ - "darling_core 0.13.4", - "quote", - "syn 1.0.109", -] - [[package]] name = "darling_macro" version = "0.20.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d336a2a514f6ccccaa3e09b02d41d35330c07ddf03a62165fcec10bb561c7806" dependencies = [ - "darling_core 0.20.10", + "darling_core", "quote", "syn 2.0.89", ] @@ -1979,9 +1979,9 @@ checksum = "e8566979429cf69b49a5c740c60791108e86440e8be149bbea4fe54d2c32d6e2" [[package]] name = "data-url" -version = "0.2.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d7439c3735f405729d52c3fbbe4de140eaf938a1fe47d227c27f8254d4302a5" +checksum = "5c297a1c74b71ae29df00c3e22dd9534821d60eb9af5a0192823fa2acea70c2a" [[package]] name = "deadpool" @@ -2208,11 +2208,49 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "413301934810f597c1d19ca71c8710e99a3f1ba28a0d2ebc01551a2daeea3c5c" dependencies = [ "der 0.6.1", - "elliptic-curve", - "rfc6979", + "elliptic-curve 0.12.3", + "rfc6979 0.3.1", "signature 1.6.4", ] +[[package]] +name = "ecdsa" +version = "0.16.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee27f32b5c5292967d2d4a9d7f1e0b0aed2c15daded5a60300e4abb9d8020bca" +dependencies = [ + "der 0.7.9", + "digest", + "elliptic-curve 0.13.8", + "rfc6979 0.4.0", + "signature 2.2.0", + "spki 0.7.3", +] + +[[package]] +name = "ed25519" +version = "2.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "115531babc129696a58c64a4fef0a8bf9e9698629fb97e9e40767d235cfbcd53" +dependencies = [ + "pkcs8 0.10.2", + "signature 2.2.0", +] + +[[package]] +name = "ed25519-dalek" +version = "2.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a3daa8e81a3963a60642bcc1f90a670680bd4a77535faa384e9d1c79d620871" +dependencies = [ + "curve25519-dalek", + "ed25519", + "serde", + "sha2", + "subtle", + "zeroize", +] + [[package]] name = "either" version = "1.13.0" @@ -2240,16 +2278,37 @@ version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e7bb888ab5300a19b8e5bceef25ac745ad065f3c9f7efc6de1b91958110891d3" dependencies = [ - "base16ct", + "base16ct 0.1.1", "crypto-bigint 0.4.9", "der 0.6.1", "digest", - "ff", + "ff 0.12.1", "generic-array", - "group", + "group 0.12.1", "pkcs8 0.9.0", "rand_core 0.6.4", - "sec1", + "sec1 0.3.0", + "subtle", + "zeroize", +] + +[[package]] +name = "elliptic-curve" +version = "0.13.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5e6043086bf7973472e0c7dff2142ea0b680d30e18d9cc40f267efbf222bd47" +dependencies = [ + "base16ct 0.2.0", + "crypto-bigint 0.5.5", + "digest", + "ff 0.13.0", + "generic-array", + "group 0.13.0", + "hkdf", + "pem-rfc7468", + "pkcs8 0.10.2", + "rand_core 0.6.4", + "sec1 0.7.3", "subtle", "zeroize", ] @@ -2515,6 +2574,22 @@ dependencies = [ "subtle", ] +[[package]] +name = "ff" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ded41244b729663b1e574f1b4fb731469f69f79c17667b5d776b16cda0479449" +dependencies = [ + "rand_core 0.6.4", + "subtle", +] + +[[package]] +name = "fiat-crypto" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28dea519a9695b9977216879a3ebfddf92f1c08c05d984f8996aecd6ecdc811d" + [[package]] name = "filetime" version = "0.2.25" @@ -2922,7 +2997,18 @@ version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5dfbfb3a6cfbd390d5c9564ab283a0349b9b9fcd46a706c1eb10e0db70bfbac7" dependencies = [ - "ff", + "ff 0.12.1", + "rand_core 0.6.4", + "subtle", +] + +[[package]] +name = "group" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0f9ef7462f7c099f518d754361858f86d8a07af53ba9af0fe635bbccb151a63" +dependencies = [ + "ff 0.13.0", "rand_core 0.6.4", "subtle", ] @@ -4887,26 +4973,31 @@ dependencies = [ [[package]] name = "openidconnect" -version = "2.5.1" +version = "3.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98dd5b7049bac4fdd2233b8c9767d42c05da8006fdb79cc903258556d2b18009" +checksum = "f47e80a9cfae4462dd29c41e987edd228971d6565553fbc14b8a11e666d91590" dependencies = [ "base64 0.13.1", "chrono", + "dyn-clone", + "ed25519-dalek", + "hmac", "http 0.2.12", "itertools 0.10.5", "log", - "num-bigint", "oauth2", + "p256 0.13.2", + "p384", "rand 0.8.5", - "ring 0.16.20", + "rsa", "serde", "serde-value", "serde_derive", "serde_json", "serde_path_to_error", "serde_plain", - "serde_with 1.14.0", + "serde_with", + "sha2", "subtle", "thiserror", "url", @@ -5158,8 +5249,32 @@ version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "51f44edd08f51e2ade572f141051021c5af22677e42b7dd28a88155151c33594" dependencies = [ - "ecdsa", - "elliptic-curve", + "ecdsa 0.14.8", + "elliptic-curve 0.12.3", + "sha2", +] + +[[package]] +name = "p256" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c9863ad85fa8f4460f9c48cb909d38a0d689dba1f6f6988a5e3e0d31071bcd4b" +dependencies = [ + "ecdsa 0.16.9", + "elliptic-curve 0.13.8", + "primeorder", + "sha2", +] + +[[package]] +name = "p384" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70786f51bcc69f6a4c0360e063a4cac5419ef7c5cd5b3c99ad70f3be5ba79209" +dependencies = [ + "ecdsa 0.16.9", + "elliptic-curve 0.13.8", + "primeorder", "sha2", ] @@ -5770,6 +5885,15 @@ dependencies = [ "syn 2.0.89", ] +[[package]] +name = "primeorder" +version = "0.13.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "353e1ca18966c16d9deb1c69278edbc5f194139612772bd9537af60ac231e1e6" +dependencies = [ + "elliptic-curve 0.13.8", +] + [[package]] name = "proc-macro-crate" version = "1.3.1" @@ -6045,9 +6169,9 @@ dependencies = [ [[package]] name = "pulsar" -version = "5.1.1" +version = "6.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "20f237570b5665b38c7d5228f9a1d2990e369c00e635704528996bcd5219f540" +checksum = "d7f3541ff84e39da334979ac4bf171e0f277f4f782603aeae65bf5795dc7275a" dependencies = [ "async-trait", "bit-vec", @@ -6065,7 +6189,7 @@ dependencies = [ "nom", "oauth2", "openidconnect", - "pem 1.1.1", + "pem 3.0.4", "prost 0.11.9", "prost-build", "prost-derive 0.11.9", @@ -6079,7 +6203,7 @@ dependencies = [ "tokio-util", "url", "uuid", - "zstd 0.11.2+zstd.1.5.2", + "zstd 0.12.4", ] [[package]] @@ -6362,7 +6486,7 @@ dependencies = [ "regex", "serde", "serde_json", - "serde_with 3.11.0", + "serde_with", "serde_yaml", "siphasher", "tokio", @@ -6762,7 +6886,7 @@ dependencies = [ "sea-query-binder", "serde", "serde_json", - "serde_with 3.11.0", + "serde_with", "serial_test", "sqlx", "tempfile", @@ -6853,7 +6977,7 @@ dependencies = [ "quickwit-datetime", "serde", "serde_json", - "serde_with 3.11.0", + "serde_with", "tantivy", "thiserror", "time", @@ -6978,7 +7102,7 @@ dependencies = [ "serde", "serde_json", "serde_qs 0.12.0", - "serde_with 3.11.0", + "serde_with", "tempfile", "thiserror", "time", @@ -7405,6 +7529,16 @@ dependencies = [ "zeroize", ] +[[package]] +name = "rfc6979" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8dd2a808d456c4a54e300a23e9f5a67e122c3024119acbfd73e3bf664491cb2" +dependencies = [ + "hmac", + "subtle", +] + [[package]] name = "rgb" version = "0.8.50" @@ -7805,7 +7939,7 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9834af2c4bd8c5162f00c89f1701fb6886119a88062cf76fe842ea9e232b9839" dependencies = [ - "darling 0.20.10", + "darling", "heck 0.4.1", "proc-macro2", "quote", @@ -7825,7 +7959,7 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3be24c1842290c45df0a7bf069e0c268a747ad05a192f2fd7dcfdbc1cba40928" dependencies = [ - "base16ct", + "base16ct 0.1.1", "der 0.6.1", "generic-array", "pkcs8 0.9.0", @@ -7833,6 +7967,20 @@ dependencies = [ "zeroize", ] +[[package]] +name = "sec1" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3e97a565f76233a6003f9f5c54be1d9c5bdfa3eccfb189469f11ec4901c47dc" +dependencies = [ + "base16ct 0.2.0", + "der 0.7.9", + "generic-array", + "pkcs8 0.10.2", + "subtle", + "zeroize", +] + [[package]] name = "security-framework" version = "2.11.1" @@ -7996,16 +8144,6 @@ dependencies = [ "serde", ] -[[package]] -name = "serde_with" -version = "1.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "678b5a069e50bf00ecd22d0cd8ddf7c236f68581b03db652061ed5eb13a312ff" -dependencies = [ - "serde", - "serde_with_macros 1.5.2", -] - [[package]] name = "serde_with" version = "3.11.0" @@ -8020,29 +8158,17 @@ dependencies = [ "serde", "serde_derive", "serde_json", - "serde_with_macros 3.11.0", + "serde_with_macros", "time", ] -[[package]] -name = "serde_with_macros" -version = "1.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e182d6ec6f05393cc0e5ed1bf81ad6db3a8feedf8ee515ecdd369809bcce8082" -dependencies = [ - "darling 0.13.4", - "proc-macro2", - "quote", - "syn 1.0.109", -] - [[package]] name = "serde_with_macros" version = "3.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d846214a9854ef724f3da161b426242d8de7c1fc7de2f89bb1efcb154dca79d" dependencies = [ - "darling 0.20.10", + "darling", "proc-macro2", "quote", "syn 2.0.89", @@ -8594,12 +8720,6 @@ dependencies = [ "vte 0.11.1", ] -[[package]] -name = "strsim" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" - [[package]] name = "strsim" version = "0.11.1" @@ -10549,6 +10669,15 @@ dependencies = [ "zstd-safe 5.0.2+zstd.1.5.2", ] +[[package]] +name = "zstd" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a27595e173641171fc74a1232b7b1c7a7cb6e18222c11e9dfb9888fa424c53c" +dependencies = [ + "zstd-safe 6.0.6", +] + [[package]] name = "zstd" version = "0.13.2" @@ -10568,6 +10697,16 @@ dependencies = [ "zstd-sys", ] +[[package]] +name = "zstd-safe" +version = "6.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee98ffd0b48ee95e6c5168188e44a54550b1564d9d530ee21d5f0eaed1069581" +dependencies = [ + "libc", + "zstd-sys", +] + [[package]] name = "zstd-safe" version = "7.2.1" diff --git a/quickwit/Cargo.toml b/quickwit/Cargo.toml index c3e3051470c..9c91d6efd58 100644 --- a/quickwit/Cargo.toml +++ b/quickwit/Cargo.toml @@ -181,7 +181,7 @@ prost = { version = "0.11.6", default-features = false, features = [ ] } prost-build = "0.11.6" prost-types = "0.11.6" -pulsar = { version = "5.1.1", default-features = false, features = [ +pulsar = { version = "6.3", default-features = false, features = [ "auth-oauth2", "compression", "tokio-runtime", diff --git a/quickwit/quickwit-indexing/src/source/pulsar_source.rs b/quickwit/quickwit-indexing/src/source/pulsar_source.rs index e7e1ce6e9b0..528ef6e12ec 100644 --- a/quickwit/quickwit-indexing/src/source/pulsar_source.rs +++ b/quickwit/quickwit-indexing/src/source/pulsar_source.rs @@ -429,9 +429,7 @@ async fn connect_pulsar(params: &PulsarSourceParams) -> anyhow::Result = builder.build().await?; - Ok(pulsar) } From 2121ba1ce3ceff8d867671848d0c212803f10b42 Mon Sep 17 00:00:00 2001 From: Remi Dettai Date: Wed, 11 Dec 2024 16:05:42 +0100 Subject: [PATCH 21/27] Fix lenient option with wildcard queries (#5575) * Better error messages in integ tests * Initial fix suggestion from Trinity * Add rest api test and clarify docs about leniency * Add missing field test on wildcard query * Fix add query building unit tests * Forgotten staging file --- docs/reference/es_compatible_api.md | 22 +++++- .../quickwit-doc-mapper/src/query_builder.rs | 72 +++++++++++++++++-- .../src/tests/update_tests/mod.rs | 2 +- .../src/elastic_query_dsl/match_query.rs | 6 +- .../src/elastic_query_dsl/mod.rs | 8 +++ .../src/elastic_query_dsl/multi_match.rs | 6 +- .../elastic_query_dsl/phrase_prefix_query.rs | 1 + .../elastic_query_dsl/query_string_query.rs | 6 +- .../src/query_ast/full_text_query.rs | 1 + .../src/query_ast/phrase_prefix_query.rs | 10 ++- .../src/query_ast/user_input_query.rs | 3 + .../src/query_ast/wildcard_query.rs | 67 ++++++++++------- .../0005-query_string_query.yaml | 16 ++++- 13 files changed, 169 insertions(+), 51 deletions(-) diff --git a/docs/reference/es_compatible_api.md b/docs/reference/es_compatible_api.md index 3ead0fa927c..c3c4c94e1c9 100644 --- a/docs/reference/es_compatible_api.md +++ b/docs/reference/es_compatible_api.md @@ -394,6 +394,7 @@ The following query types are supported. | `fields` | `String[]` (Optional) | Default search target fields. | - | | `default_operator` | `"AND"` or `"OR"` | In the absence of boolean operator defines whether terms should be combined as a conjunction (`AND`) or disjunction (`OR`). | `OR` | | `boost` | `Number` | Multiplier boost for score computation. | 1.0 | +| `lenient` | `Boolean` | [See note](#about-the-lenient-argument). | false | ### `bool` @@ -494,7 +495,7 @@ The following query types are supported. | `operator` | `"AND"` or `"OR"` | Defines whether all terms should be present (`AND`) or if at least one term is sufficient to match (`OR`). | OR | | `zero_terms_query` | `all` or `none` | Defines if all (`all`) or no documents (`none`) should be returned if the query does not contain any terms after tokenization. | `none` | | `boost` | `Number` | Multiplier boost for score computation | 1.0 | - +| `lenient` | `Boolean` | [See note](#about-the-lenient-argument). | false | @@ -637,8 +638,17 @@ Contrary to ES/Opensearch, in Quickwit, at most 50 terms will be considered when } ``` -#### Supported Multi-match Queries -| Type | Description | +#### Supported parameters + +| Variable | Type | Description | Default value | +| ------------------ | --------------------- | ---------------------------------------------| ------------- | +| `type` | `String` | See supported types below | `most_fields` | +| `fields` | `String[]` (Optional) | Default search target fields. | - | +| `lenient` | `Boolean` | [See note](#about-the-lenient-argument). | false | + +Supported types: + +| `type` value | Description | | --------------- | ------------------------------------------------------------------------------------------- | | `most_fields` | Finds documents matching any field and combines the `_score` from each field (default). | | `phrase` | Runs a `match_phrase` query on each field. | @@ -721,6 +731,12 @@ Query matching only documents containing a non-null value for a given field. | `field` | String | Only documents with a value for field will be returned. | - | +### About the `lenient` argument + +Quickwit and Elasticsearch have different interpretations of the `lenient` setting: +- In Quickwit, lenient mode allows ignoring parts of the query that reference non-existing columns. This is a behavior that Elasticsearch supports by default. +- In Elasticsearch, lenient mode primarily addresses type errors (such as searching for text in an integer field). Quickwit always supports this behavior, regardless of the `lenient` setting. + ## Search multiple indices Search APIs that accept requests path parameter also support multi-target syntax. diff --git a/quickwit/quickwit-doc-mapper/src/query_builder.rs b/quickwit/quickwit-doc-mapper/src/query_builder.rs index dbc663794e5..9dffeef0ad7 100644 --- a/quickwit/quickwit-doc-mapper/src/query_builder.rs +++ b/quickwit/quickwit-doc-mapper/src/query_builder.rs @@ -248,7 +248,9 @@ impl<'a, 'b: 'a> QueryAstVisitor<'a> for ExtractPrefixTermRanges<'b> { ) -> Result<(), Self::Err> { let terms = match phrase_prefix.get_terms(self.schema, self.tokenizer_manager) { Ok((_, terms)) => terms, - Err(InvalidQuery::SchemaError(_)) => return Ok(()), /* the query will be nullified when casting to a tantivy ast */ + Err(InvalidQuery::SchemaError(_)) | Err(InvalidQuery::FieldDoesNotExist { .. }) => { + return Ok(()) + } /* the query will be nullified when casting to a tantivy ast */ Err(e) => return Err(e), }; if let Some((_, term)) = terms.last() { @@ -258,7 +260,12 @@ impl<'a, 'b: 'a> QueryAstVisitor<'a> for ExtractPrefixTermRanges<'b> { } fn visit_wildcard(&mut self, wildcard_query: &'a WildcardQuery) -> Result<(), Self::Err> { - let (_, term) = wildcard_query.extract_prefix_term(self.schema, self.tokenizer_manager)?; + let term = match wildcard_query.extract_prefix_term(self.schema, self.tokenizer_manager) { + Ok((_, term)) => term, + /* the query will be nullified when casting to a tantivy ast */ + Err(InvalidQuery::FieldDoesNotExist { .. }) => return Ok(()), + Err(e) => return Err(e), + }; self.add_prefix_term(term, u32::MAX, false); Ok(()) } @@ -280,8 +287,11 @@ mod test { use quickwit_query::query_ast::{ query_ast_from_user_text, FullTextMode, FullTextParams, PhrasePrefixQuery, QueryAstVisitor, + UserInputQuery, + }; + use quickwit_query::{ + create_default_quickwit_tokenizer_manager, BooleanOperand, MatchAllOrNone, }; - use quickwit_query::{create_default_quickwit_tokenizer_manager, MatchAllOrNone}; use tantivy::schema::{DateOptions, DateTimePrecision, Schema, FAST, INDEXED, STORED, TEXT}; use tantivy::Term; @@ -323,7 +333,7 @@ mod test { search_fields: Vec, expected: TestExpectation, ) { - check_build_query(user_query, search_fields, expected, true); + check_build_query(user_query, search_fields, expected, true, false); } #[track_caller] @@ -332,15 +342,31 @@ mod test { search_fields: Vec, expected: TestExpectation, ) { - check_build_query(user_query, search_fields, expected, false); + check_build_query(user_query, search_fields, expected, false, false); + } + + #[track_caller] + fn check_build_query_static_lenient_mode( + user_query: &str, + search_fields: Vec, + expected: TestExpectation, + ) { + check_build_query(user_query, search_fields, expected, false, true); } fn test_build_query( user_query: &str, search_fields: Vec, dynamic_mode: bool, + lenient: bool, ) -> Result { - let query_ast = query_ast_from_user_text(user_query, Some(search_fields)) + let user_input_query = UserInputQuery { + user_text: user_query.to_string(), + default_fields: Some(search_fields), + default_operator: BooleanOperand::And, + lenient, + }; + let query_ast = user_input_query .parse_user_query(&[]) .map_err(|err| err.to_string())?; let schema = make_schema(dynamic_mode); @@ -362,8 +388,9 @@ mod test { search_fields: Vec, expected: TestExpectation, dynamic_mode: bool, + lenient: bool, ) { - let query_result = test_build_query(user_query, search_fields, dynamic_mode); + let query_result = test_build_query(user_query, search_fields, dynamic_mode, lenient); match (query_result, expected) { (Err(query_err_msg), TestExpectation::Err(sub_str)) => { assert!( @@ -425,6 +452,11 @@ mod test { Vec::new(), TestExpectation::Err("invalid query: field does not exist: `foo`"), ); + check_build_query_static_lenient_mode( + "foo:bar", + Vec::new(), + TestExpectation::Ok("EmptyQuery"), + ); check_build_query_static_mode( "title:bar", Vec::new(), @@ -435,6 +467,11 @@ mod test { vec!["fieldnotinschema".to_string()], TestExpectation::Err("invalid query: field does not exist: `fieldnotinschema`"), ); + check_build_query_static_lenient_mode( + "bar", + vec!["fieldnotinschema".to_string()], + TestExpectation::Ok("EmptyQuery"), + ); check_build_query_static_mode( "title:[a TO b]", Vec::new(), @@ -503,6 +540,25 @@ mod test { ); } + #[test] + fn test_wildcard_query() { + check_build_query_static_mode( + "title:hello*", + Vec::new(), + TestExpectation::Ok("PhrasePrefixQuery"), + ); + check_build_query_static_mode( + "foo:bar*", + Vec::new(), + TestExpectation::Err("invalid query: field does not exist: `foo`"), + ); + check_build_query_static_mode( + "title:hello*yo", + Vec::new(), + TestExpectation::Err("Wildcard query contains wildcard in non final position"), + ); + } + #[test] fn test_datetime_range_query() { { @@ -695,12 +751,14 @@ mod test { phrase: "short".to_string(), max_expansions: 50, params: params.clone(), + lenient: false, }; let long = PhrasePrefixQuery { field: "title".to_string(), phrase: "not so short".to_string(), max_expansions: 50, params: params.clone(), + lenient: false, }; let mut extractor1 = ExtractPrefixTermRanges::with_schema(&schema, &tokenizer_manager); extractor1.visit_phrase_prefix(&short).unwrap(); diff --git a/quickwit/quickwit-integration-tests/src/tests/update_tests/mod.rs b/quickwit/quickwit-integration-tests/src/tests/update_tests/mod.rs index ad6bb67bcc5..835102c89eb 100644 --- a/quickwit/quickwit-integration-tests/src/tests/update_tests/mod.rs +++ b/quickwit/quickwit-integration-tests/src/tests/update_tests/mod.rs @@ -41,7 +41,7 @@ async fn assert_hits_unordered( ) .await; if let Ok(expected_hits) = expected_result { - let resp = search_res.unwrap_or_else(|_| panic!("query: {}", query)); + let resp = search_res.unwrap_or_else(|err| panic!("query: {}, error: {}", query, err)); assert_eq!(resp.errors.len(), 0, "query: {}", query); assert_eq!( resp.num_hits, diff --git a/quickwit/quickwit-query/src/elastic_query_dsl/match_query.rs b/quickwit/quickwit-query/src/elastic_query_dsl/match_query.rs index 18c565976f7..1547dcaeae9 100644 --- a/quickwit/quickwit-query/src/elastic_query_dsl/match_query.rs +++ b/quickwit/quickwit-query/src/elastic_query_dsl/match_query.rs @@ -19,6 +19,7 @@ use serde::Deserialize; +use super::LeniencyBool; use crate::elastic_query_dsl::{ ConvertibleToQueryAst, ElasticQueryDslInner, StringOrStructForSerialization, }; @@ -42,11 +43,8 @@ pub(crate) struct MatchQueryParams { pub(crate) operator: BooleanOperand, #[serde(default)] pub(crate) zero_terms_query: MatchAllOrNone, - // Quickwit and Elastic have different notions of lenient. For us, it means it's okay to - // disregard part of the query where which uses non-existing collumn (which Elastic does by - // default). For Elastic, it covers type errors (searching text in an integer field). #[serde(default)] - pub(crate) lenient: bool, + pub(crate) lenient: LeniencyBool, } impl ConvertibleToQueryAst for MatchQuery { diff --git a/quickwit/quickwit-query/src/elastic_query_dsl/mod.rs b/quickwit/quickwit-query/src/elastic_query_dsl/mod.rs index 9e49c866d95..2140b659138 100644 --- a/quickwit/quickwit-query/src/elastic_query_dsl/mod.rs +++ b/quickwit/quickwit-query/src/elastic_query_dsl/mod.rs @@ -50,6 +50,14 @@ use crate::elastic_query_dsl::terms_query::TermsQuery; use crate::not_nan_f32::NotNaNf32; use crate::query_ast::QueryAst; +/// Quickwit and Elasticsearch have different interpretations of leniency: +/// - In Quickwit, lenient mode allows ignoring parts of the query that reference non-existing +/// columns. This is a behavior that Elasticsearch supports by default. +/// - In Elasticsearch, lenient mode primarily addresses type errors (such as searching for text in +/// an integer field). Quickwit always supports this behavior, regardless of the `lenient` +/// setting. +pub type LeniencyBool = bool; + fn default_max_expansions() -> u32 { 50 } diff --git a/quickwit/quickwit-query/src/elastic_query_dsl/multi_match.rs b/quickwit/quickwit-query/src/elastic_query_dsl/multi_match.rs index 9b607151a31..8f5f8313a53 100644 --- a/quickwit/quickwit-query/src/elastic_query_dsl/multi_match.rs +++ b/quickwit/quickwit-query/src/elastic_query_dsl/multi_match.rs @@ -21,6 +21,7 @@ use serde::Deserialize; use serde_with::formats::PreferMany; use serde_with::{serde_as, OneOrMany}; +use super::LeniencyBool; use crate::elastic_query_dsl::bool_query::BoolQuery; use crate::elastic_query_dsl::match_bool_prefix::MatchBoolPrefixQuery; use crate::elastic_query_dsl::match_phrase_query::{MatchPhraseQuery, MatchPhraseQueryParams}; @@ -48,11 +49,8 @@ struct MultiMatchQueryForDeserialization { #[serde_as(deserialize_as = "OneOrMany<_, PreferMany>")] #[serde(default)] fields: Vec, - // Quickwit and Elastic have different notions of lenient. For us, it means it's okay to - // disregard part of the query where which uses non-existing collumn (which Elastic does by - // default). For Elastic, it covers type errors (searching text in an integer field). #[serde(default)] - lenient: bool, + lenient: LeniencyBool, } fn deserialize_match_query_for_one_field( diff --git a/quickwit/quickwit-query/src/elastic_query_dsl/phrase_prefix_query.rs b/quickwit/quickwit-query/src/elastic_query_dsl/phrase_prefix_query.rs index 3955a175c64..4579b6530bf 100644 --- a/quickwit/quickwit-query/src/elastic_query_dsl/phrase_prefix_query.rs +++ b/quickwit/quickwit-query/src/elastic_query_dsl/phrase_prefix_query.rs @@ -67,6 +67,7 @@ impl ConvertibleToQueryAst for MatchPhrasePrefixQuery { phrase: query, params: analyzer, max_expansions, + lenient: false, }; Ok(phrase_prefix_query_ast.into()) } diff --git a/quickwit/quickwit-query/src/elastic_query_dsl/query_string_query.rs b/quickwit/quickwit-query/src/elastic_query_dsl/query_string_query.rs index f7192f8928e..9e7e6ce180f 100644 --- a/quickwit/quickwit-query/src/elastic_query_dsl/query_string_query.rs +++ b/quickwit/quickwit-query/src/elastic_query_dsl/query_string_query.rs @@ -19,6 +19,7 @@ use serde::Deserialize; +use super::LeniencyBool; use crate::elastic_query_dsl::ConvertibleToQueryAst; use crate::not_nan_f32::NotNaNf32; use crate::query_ast::UserInputQuery; @@ -40,11 +41,8 @@ pub(crate) struct QueryStringQuery { default_operator: BooleanOperand, #[serde(default)] boost: Option, - // Regardless of this option Quickwit behaves in elasticsearch definition of - // lenient. We include this property here just to accept user queries containing - // this option. #[serde(default)] - lenient: bool, + lenient: LeniencyBool, } impl ConvertibleToQueryAst for QueryStringQuery { diff --git a/quickwit/quickwit-query/src/query_ast/full_text_query.rs b/quickwit/quickwit-query/src/query_ast/full_text_query.rs index d77b39e67df..661bb89039f 100644 --- a/quickwit/quickwit-query/src/query_ast/full_text_query.rs +++ b/quickwit/quickwit-query/src/query_ast/full_text_query.rs @@ -227,6 +227,7 @@ pub struct FullTextQuery { pub field: String, pub text: String, pub params: FullTextParams, + /// Support missing fields pub lenient: bool, } diff --git a/quickwit/quickwit-query/src/query_ast/phrase_prefix_query.rs b/quickwit/quickwit-query/src/query_ast/phrase_prefix_query.rs index d0107f885f9..1675b22d760 100644 --- a/quickwit/quickwit-query/src/query_ast/phrase_prefix_query.rs +++ b/quickwit/quickwit-query/src/query_ast/phrase_prefix_query.rs @@ -38,6 +38,8 @@ pub struct PhrasePrefixQuery { pub phrase: String, pub max_expansions: u32, pub params: FullTextParams, + /// Support missing fields + pub lenient: bool, } impl PhrasePrefixQuery { @@ -117,7 +119,13 @@ impl BuildTantivyAst for PhrasePrefixQuery { _search_fields: &[String], _with_validation: bool, ) -> Result { - let (_, terms) = self.get_terms(schema, tokenizer_manager)?; + let (_, terms) = match self.get_terms(schema, tokenizer_manager) { + Ok(res) => res, + Err(InvalidQuery::FieldDoesNotExist { .. }) if self.lenient => { + return Ok(TantivyQueryAst::match_none()) + } + Err(e) => return Err(e), + }; if terms.is_empty() { if self.params.zero_terms_query.is_none() { diff --git a/quickwit/quickwit-query/src/query_ast/user_input_query.rs b/quickwit/quickwit-query/src/query_ast/user_input_query.rs index 8a910567982..279f41b4676 100644 --- a/quickwit/quickwit-query/src/query_ast/user_input_query.rs +++ b/quickwit/quickwit-query/src/query_ast/user_input_query.rs @@ -49,6 +49,7 @@ pub struct UserInputQuery { #[serde(default, skip_serializing_if = "Option::is_none")] pub default_fields: Option>, pub default_operator: BooleanOperand, + /// Support missing fields pub lenient: bool, } @@ -273,12 +274,14 @@ fn convert_user_input_literal( phrase: phrase.clone(), params: full_text_params.clone(), max_expansions: DEFAULT_PHRASE_QUERY_MAX_EXPANSION, + lenient, } .into() } else if wildcard { query_ast::WildcardQuery { field: field_name, value: phrase.clone(), + lenient, } .into() } else { diff --git a/quickwit/quickwit-query/src/query_ast/wildcard_query.rs b/quickwit/quickwit-query/src/query_ast/wildcard_query.rs index 86afb68a7d3..145e5a45bd1 100644 --- a/quickwit/quickwit-query/src/query_ast/wildcard_query.rs +++ b/quickwit/quickwit-query/src/query_ast/wildcard_query.rs @@ -34,6 +34,8 @@ use crate::{find_field_or_hit_dynamic, InvalidQuery}; pub struct WildcardQuery { pub field: String, pub value: String, + /// Support missing fields + pub lenient: bool, } impl From for QueryAst { @@ -42,16 +44,6 @@ impl From for QueryAst { } } -impl WildcardQuery { - #[cfg(test)] - pub fn from_field_value(field: impl ToString, value: impl ToString) -> Self { - Self { - field: field.to_string(), - value: value.to_string(), - } - } -} - fn extract_unique_token(mut tokens: Vec) -> anyhow::Result { let term = tokens .pop() @@ -77,7 +69,7 @@ fn unescape_with_final_wildcard(phrase: &str) -> anyhow::Result { .scan(State::Normal, |state, c| { if *saw_wildcard { return Some(Some(Err(anyhow!( - "Wildcard iquery contains wildcard in non final position" + "Wildcard query contains wildcard in non final position" )))); } match state { @@ -190,7 +182,13 @@ impl BuildTantivyAst for WildcardQuery { _search_fields: &[String], _with_validation: bool, ) -> Result { - let (_, term) = self.extract_prefix_term(schema, tokenizer_manager)?; + let (_, term) = match self.extract_prefix_term(schema, tokenizer_manager) { + Ok(res) => res, + Err(InvalidQuery::FieldDoesNotExist { .. }) if self.lenient => { + return Ok(TantivyQueryAst::match_none()) + } + Err(e) => return Err(e), + }; let mut phrase_prefix_query = tantivy::query::PhrasePrefixQuery::new_with_offset(vec![(0, term)]); @@ -206,20 +204,24 @@ mod tests { use super::*; use crate::create_default_quickwit_tokenizer_manager; + fn single_text_field_schema(field_name: &str, tokenizer: &str) -> TantivySchema { + let mut schema_builder = TantivySchema::builder(); + let text_options = TextOptions::default() + .set_indexing_options(TextFieldIndexing::default().set_tokenizer(tokenizer)); + schema_builder.add_text_field(field_name, text_options); + schema_builder.build() + } + #[test] fn test_extract_term_for_wildcard() { let query = WildcardQuery { field: "my_field".to_string(), value: "MyString Wh1ch a nOrMal Tokenizer would cut*".to_string(), + lenient: false, }; let tokenizer_manager = create_default_quickwit_tokenizer_manager(); for tokenizer in ["raw", "whitespace"] { - let mut schema_builder = TantivySchema::builder(); - let text_options = TextOptions::default() - .set_indexing_options(TextFieldIndexing::default().set_tokenizer(tokenizer)); - schema_builder.add_text_field("my_field", text_options); - let schema = schema_builder.build(); - + let schema = single_text_field_schema("my_field", tokenizer); let (_field, term) = query .extract_prefix_term(&schema, &tokenizer_manager) .unwrap(); @@ -237,19 +239,34 @@ mod tests { "source_code_default", "source_code_with_hex", ] { - let mut schema_builder = TantivySchema::builder(); - let text_options = TextOptions::default() - .set_indexing_options(TextFieldIndexing::default().set_tokenizer(tokenizer)); - schema_builder.add_text_field("my_field", text_options); - let schema = schema_builder.build(); - + let schema = single_text_field_schema("my_field", tokenizer); let (_field, term) = query .extract_prefix_term(&schema, &tokenizer_manager) .unwrap(); - let value = term.value(); let text = value.as_str().unwrap(); assert_eq!(text, &query.value.trim_end_matches('*').to_lowercase()); } } + + #[test] + fn test_extract_term_for_wildcard_missing_field() { + let query = WildcardQuery { + field: "my_missing_field".to_string(), + value: "My query value*".to_string(), + lenient: false, + }; + let tokenizer_manager = create_default_quickwit_tokenizer_manager(); + let schema = single_text_field_schema("my_field", "whitespace"); + let err = query + .extract_prefix_term(&schema, &tokenizer_manager) + .unwrap_err(); + let InvalidQuery::FieldDoesNotExist { + full_path: missing_field_full_path, + } = err + else { + panic!("unexpected error: {:?}", err); + }; + assert_eq!(missing_field_full_path, "my_missing_field"); + } } diff --git a/quickwit/rest-api-tests/scenarii/es_compatibility/0005-query_string_query.yaml b/quickwit/rest-api-tests/scenarii/es_compatibility/0005-query_string_query.yaml index 8cb495379c3..668e4877cfc 100644 --- a/quickwit/rest-api-tests/scenarii/es_compatibility/0005-query_string_query.yaml +++ b/quickwit/rest-api-tests/scenarii/es_compatibility/0005-query_string_query.yaml @@ -226,13 +226,25 @@ json: query: query_string: query: "true" - fields: ["public", "public.inner"] + fields: ["public", "public.notdefined", "notdefined"] lenient: true expected: hits: total: value: 100 --- +# trailing wildcard +json: + query: + query_string: + query: "jour*" + fields: ["payload.description", "payload.notdefined", "notdefined"] + lenient: true +expected: + hits: + total: + value: 3 +--- # elasticsearch accepts this query engines: - quickwit @@ -240,5 +252,5 @@ json: query: query_string: query: "true" - fields: ["public", "public.inner"] + fields: ["public", "public.notdefined"] status_code: 400 From ec95419ec72c30619ba0b34c2ab2748b7c67eb96 Mon Sep 17 00:00:00 2001 From: Paul Masurel Date: Wed, 11 Dec 2024 17:04:16 +0100 Subject: [PATCH 22/27] Ensures the affinity function is the same as in Quickwit 0.8 (#5580) * Ensures the affinity function is the same as in Quickwit 0.8 Closes #5576 * Revert "fix failing test after rustc and serde update (#5564)" This reverts commit cbf35fa5da5ca75c8bc2b7c895bbc045be256a14. --- quickwit/quickwit-common/src/lib.rs | 3 + .../quickwit-common/src/rendezvous_hasher.rs | 34 ++++-- .../src/socket_addr_legacy_hash.rs | 100 ++++++++++++++++++ .../quickwit-search/src/cluster_client.rs | 30 +++--- .../quickwit-search/src/search_job_placer.rs | 19 ++-- 5 files changed, 157 insertions(+), 29 deletions(-) create mode 100644 quickwit/quickwit-common/src/socket_addr_legacy_hash.rs diff --git a/quickwit/quickwit-common/src/lib.rs b/quickwit/quickwit-common/src/lib.rs index dff26829584..fbda5acbacb 100644 --- a/quickwit/quickwit-common/src/lib.rs +++ b/quickwit/quickwit-common/src/lib.rs @@ -48,6 +48,8 @@ pub mod tower; pub mod type_map; pub mod uri; +mod socket_addr_legacy_hash; + use std::env; use std::fmt::{Debug, Display}; use std::future::Future; @@ -58,6 +60,7 @@ pub use coolid::new_coolid; pub use kill_switch::KillSwitch; pub use path_hasher::PathHasher; pub use progress::{Progress, ProtectedZoneGuard}; +pub use socket_addr_legacy_hash::SocketAddrLegacyHash; pub use stream_utils::{BoxStream, ServiceStream}; use tracing::{error, info}; diff --git a/quickwit/quickwit-common/src/rendezvous_hasher.rs b/quickwit/quickwit-common/src/rendezvous_hasher.rs index 2d3c24efd3f..aadfd314768 100644 --- a/quickwit/quickwit-common/src/rendezvous_hasher.rs +++ b/quickwit/quickwit-common/src/rendezvous_hasher.rs @@ -43,6 +43,7 @@ mod tests { use std::net::SocketAddr; use super::*; + use crate::SocketAddrLegacyHash; fn test_socket_addr(last_byte: u8) -> SocketAddr { ([127, 0, 0, last_byte], 10_000u16).into() @@ -55,17 +56,38 @@ mod tests { let socket3 = test_socket_addr(3); let socket4 = test_socket_addr(4); - let mut socket_set1 = vec![socket4, socket3, socket1, socket2]; + let legacy_socket1 = SocketAddrLegacyHash(&socket1); + let legacy_socket2 = SocketAddrLegacyHash(&socket2); + let legacy_socket3 = SocketAddrLegacyHash(&socket3); + let legacy_socket4 = SocketAddrLegacyHash(&socket4); + + let mut socket_set1 = vec![ + legacy_socket4, + legacy_socket3, + legacy_socket1, + legacy_socket2, + ]; sort_by_rendez_vous_hash(&mut socket_set1, "key"); - let mut socket_set2 = vec![socket1, socket2, socket4]; + let mut socket_set2 = vec![legacy_socket1, legacy_socket2, legacy_socket4]; sort_by_rendez_vous_hash(&mut socket_set2, "key"); - let mut socket_set3 = vec![socket1, socket4]; + let mut socket_set3 = vec![legacy_socket1, legacy_socket4]; sort_by_rendez_vous_hash(&mut socket_set3, "key"); - assert_eq!(socket_set1, &[socket1, socket3, socket2, socket4]); - assert_eq!(socket_set2, &[socket1, socket2, socket4]); - assert_eq!(socket_set3, &[socket1, socket4]); + assert_eq!( + socket_set1, + &[ + legacy_socket1, + legacy_socket2, + legacy_socket3, + legacy_socket4 + ] + ); + assert_eq!( + socket_set2, + &[legacy_socket1, legacy_socket2, legacy_socket4] + ); + assert_eq!(socket_set3, &[legacy_socket1, legacy_socket4]); } } diff --git a/quickwit/quickwit-common/src/socket_addr_legacy_hash.rs b/quickwit/quickwit-common/src/socket_addr_legacy_hash.rs new file mode 100644 index 00000000000..0adadf8f2e4 --- /dev/null +++ b/quickwit/quickwit-common/src/socket_addr_legacy_hash.rs @@ -0,0 +1,100 @@ +// Copyright (C) 2024 Quickwit, Inc. +// +// Quickwit is offered under the AGPL v3.0 and as commercial software. +// For commercial licensing, contact us at hello@quickwit.io. +// +// AGPL: +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU Affero General Public License as +// published by the Free Software Foundation, either version 3 of the +// License, or (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Affero General Public License for more details. +// +// You should have received a copy of the GNU Affero General Public License +// along with this program. If not, see . + +use std::hash::Hasher; +use std::net::SocketAddr; + +/// Computes the hash of socket addr, the way it was done before Rust 1.81 +/// +/// In +/// rustc change the implementation of Hash for IpAddr v4 and v6. +/// +/// The idea was to not hash an array of bytes but instead interpret it as a register +/// and hash this. +/// +/// This was done for performance reason, but this change the result of the hash function +/// used to compute affinity in quickwit. As a result, the switch would invalidate all +/// existing cache. +/// +/// In order to avoid this, we introduce the following function that reproduces the old +/// behavior. +#[repr(transparent)] +#[derive(Debug, Eq, PartialEq, Copy, Clone)] +pub struct SocketAddrLegacyHash<'a>(pub &'a SocketAddr); + +impl<'a> std::hash::Hash for SocketAddrLegacyHash<'a> { + fn hash(&self, state: &mut H) { + std::mem::discriminant(self.0).hash(state); + match self.0 { + SocketAddr::V4(socket_addr_v4) => { + socket_addr_v4.ip().octets().hash(state); + socket_addr_v4.port().hash(state); + } + SocketAddr::V6(socket_addr_v6) => { + socket_addr_v6.ip().octets().hash(state); + socket_addr_v6.port().hash(state); + socket_addr_v6.flowinfo().hash(state); + socket_addr_v6.scope_id().hash(state); + } + } + } +} + +#[cfg(test)] +mod tests { + use std::net::SocketAddrV6; + + use super::*; + + fn sample_socket_addr_v4() -> SocketAddr { + "17.12.15.3:1834".parse().unwrap() + } + + fn sample_socket_addr_v6() -> SocketAddr { + let mut socket_addr_v6: SocketAddrV6 = "[fe80::240:63ff:fede:3c19]:8080".parse().unwrap(); + socket_addr_v6.set_scope_id(4047u32); + socket_addr_v6.set_flowinfo(303u32); + socket_addr_v6.into() + } + + fn compute_hash(hashable: impl std::hash::Hash) -> u64 { + // I wish I could have used the sip hasher but we don't have the deps here and I did + // not want to move that code to quickwit-common. + // + // If test break because rust changed its default hasher, we can just update the tests in + // this file with the new values. + let mut hasher = siphasher::sip::SipHasher::default(); + hashable.hash(&mut hasher); + hasher.finish() + } + + #[test] + fn test_legacy_hash_socket_addr_v4() { + let h = compute_hash(SocketAddrLegacyHash(&sample_socket_addr_v4())); + // This value is coming from using rust 1.80 to hash socket addr + assert_eq!(h, 8725442259486497862); + } + + #[test] + fn test_legacy_hash_socket_addr_v6() { + let h = compute_hash(SocketAddrLegacyHash(&sample_socket_addr_v6())); + // This value is coming from using rust 1.80 to hash socket addr + assert_eq!(h, 14277248675058176752); + } +} diff --git a/quickwit/quickwit-search/src/cluster_client.rs b/quickwit/quickwit-search/src/cluster_client.rs index b8042f03fb7..d32ad92327c 100644 --- a/quickwit/quickwit-search/src/cluster_client.rs +++ b/quickwit/quickwit-search/src/cluster_client.rs @@ -746,30 +746,30 @@ mod tests { #[tokio::test] async fn test_put_kv_happy_path() { // 3 servers 1, 2, 3 - // Targeted key has affinity [3, 2, 1]. + // Targeted key has affinity [2, 3, 1]. // // Put on 2 and 3 is successful - // Get succeeds on 3. + // Get succeeds on 2. let mock_search_service_1 = MockSearchService::new(); let mut mock_search_service_2 = MockSearchService::new(); - // Due to the buffered call it is possible for the - // put request to 2 to be emitted too. - mock_search_service_2 - .expect_put_kv() - .returning(|_put_req: quickwit_proto::search::PutKvRequest| {}); - let mut mock_search_service_3 = MockSearchService::new(); - mock_search_service_3.expect_put_kv().once().returning( + mock_search_service_2.expect_put_kv().once().returning( |put_req: quickwit_proto::search::PutKvRequest| { assert_eq!(put_req.key, b"my_key"); assert_eq!(put_req.payload, b"my_payload"); }, ); - mock_search_service_3.expect_get_kv().once().returning( + mock_search_service_2.expect_get_kv().once().returning( |get_req: quickwit_proto::search::GetKvRequest| { assert_eq!(get_req.key, b"my_key"); Some(b"my_payload".to_vec()) }, ); + let mut mock_search_service_3 = MockSearchService::new(); + // Due to the buffered call it is possible for the + // put request to 3 to be emitted too. + mock_search_service_3 + .expect_put_kv() + .returning(|_put_req: quickwit_proto::search::PutKvRequest| {}); let searcher_pool = searcher_pool_for_test([ ("127.0.0.1:1001", mock_search_service_1), ("127.0.0.1:1002", mock_search_service_2), @@ -791,11 +791,11 @@ mod tests { #[tokio::test] async fn test_put_kv_failing_get() { // 3 servers 1, 2, 3 - // Targeted key has affinity [3, 2, 1]. + // Targeted key has affinity [2, 3, 1]. // // Put on 2 and 3 is successful - // Get fails on 3. - // Get succeeds on 2. + // Get fails on 2. + // Get succeeds on 3. let mock_search_service_1 = MockSearchService::new(); let mut mock_search_service_2 = MockSearchService::new(); mock_search_service_2.expect_put_kv().once().returning( @@ -807,7 +807,7 @@ mod tests { mock_search_service_2.expect_get_kv().once().returning( |get_req: quickwit_proto::search::GetKvRequest| { assert_eq!(get_req.key, b"my_key"); - Some(b"my_payload".to_vec()) + None }, ); let mut mock_search_service_3 = MockSearchService::new(); @@ -820,7 +820,7 @@ mod tests { mock_search_service_3.expect_get_kv().once().returning( |get_req: quickwit_proto::search::GetKvRequest| { assert_eq!(get_req.key, b"my_key"); - None + Some(b"my_payload".to_vec()) }, ); let searcher_pool = searcher_pool_for_test([ diff --git a/quickwit/quickwit-search/src/search_job_placer.rs b/quickwit/quickwit-search/src/search_job_placer.rs index eb15513a76c..d739a76eed4 100644 --- a/quickwit/quickwit-search/src/search_job_placer.rs +++ b/quickwit/quickwit-search/src/search_job_placer.rs @@ -27,6 +27,7 @@ use anyhow::bail; use async_trait::async_trait; use quickwit_common::pubsub::EventSubscriber; use quickwit_common::rendezvous_hasher::{node_affinity, sort_by_rendez_vous_hash}; +use quickwit_common::SocketAddrLegacyHash; use quickwit_proto::search::{ReportSplit, ReportSplitsRequest}; use tracing::{info, warn}; @@ -77,7 +78,9 @@ impl EventSubscriber for SearchJobPlacer { for report_split in evt.report_splits { let node_addr = nodes .keys() - .max_by_key(|node_addr| node_affinity(*node_addr, &report_split.split_id)) + .max_by_key(|node_addr| { + node_affinity(SocketAddrLegacyHash(node_addr), &report_split.split_id) + }) // This actually never happens thanks to the if-condition at the // top of this function. .expect("`nodes` should not be empty"); @@ -115,7 +118,7 @@ struct SocketAddrAndClient { impl Hash for SocketAddrAndClient { fn hash(&self, hasher: &mut H) { - self.socket_addr.hash(hasher); + SocketAddrLegacyHash(&self.socket_addr).hash(hasher); } } @@ -174,7 +177,7 @@ impl SearchJobPlacer { all_nodes.len() ); } - let mut candidate_nodes: Vec<_> = all_nodes + let mut candidate_nodes: Vec = all_nodes .into_iter() .map(|(grpc_addr, client)| CandidateNode { grpc_addr, @@ -259,7 +262,7 @@ struct CandidateNode { impl Hash for CandidateNode { fn hash(&self, state: &mut H) { - self.grpc_addr.hash(state); + SocketAddrLegacyHash(&self.grpc_addr).hash(state); } } @@ -432,17 +435,17 @@ mod tests { ( expected_searcher_addr_1, vec![ + SearchJob::for_test("split5", 5), + SearchJob::for_test("split4", 4), SearchJob::for_test("split3", 3), - SearchJob::for_test("split2", 2), - SearchJob::for_test("split1", 1), ], ), ( expected_searcher_addr_2, vec![ SearchJob::for_test("split6", 6), - SearchJob::for_test("split5", 5), - SearchJob::for_test("split4", 4), + SearchJob::for_test("split2", 2), + SearchJob::for_test("split1", 1), ], ), ]; From 3ec6a07606660a71dd54f077615004e7495cb5bd Mon Sep 17 00:00:00 2001 From: Remi Dettai Date: Thu, 12 Dec 2024 16:07:34 +0100 Subject: [PATCH 23/27] Limit and monitor warmup memory usage (#5568) * Measure and log the amount of memory taken by a split search, and log this. * Limit search memory usage associated with warmup. Due to tantivy limitations, searching a split requires downloading all of the required data, and keep them in memory. We call this phase warmup. Before this PR, the only thing that curbed memory usage was the search permits: only N split search may happen concurrently. Unfortunately, the amount of data required here varies vastly. We need a mechanism to measure and avoid running more split search when memory is tight. Just using a semaphore is however not an option. We do not know beforehands how much memory will be required by a split search, so it could easily lead to a dead lock. Instead, this commit builds upon the search permit provider. The search permit provider is in charge of managing a configurable memory budget for this warmup memory. We introduce here a configurable "warmup_single_split_initial_allocation". A new leaf split search cannot be started if this memory is not available. This initial allocation is meant to be greater than what will be actually needed most of the time. The split search then holds this allocation until the end of warmup. After warmup, we can get the actual memory usage by interrogating the warmup cache. We can then update the amount of memory held. (most of the time, this should mean releasing some memory) In addition, in this PR, at this point, we also release the warmup search permit: We still have to perform the actual task of searching, but the thread pool will take care of limiting the number of concurrent task. Closes #5355 * Bring some clarifications and remove single permit getter * Make search permit provider into an actor. Also attach the permit to the actual memory cache to ensure memory is freed at the right moment. * Revert weird cargo lock update * Improve separation of concern by using wrapping instead of nesting Adding an extra generic field into the cache to optionally allow permit tracking is weird. Instead, we make the directory generic on the type of cache and use a wrapped cache when tracking is necessary. * Fix clippy * Fix undefined incremental resource stat * Add tests to permit provider * Improve and test stats merging utils * Fix minor typos * Add test for permit resizing * Increase default warmup memory * Increase default warmup memory * Add warmup cache metric * Limit permit memory size with split size * Also use num_docs to estimate init cache size * Restore sort on HotCache file list * Minor closure renaming * Add minimum allocation size * Increase default warmup memory to limit its effect --------- Co-authored-by: Paul Masurel --- .../quickwit-config/src/node_config/mod.rs | 14 +- .../src/node_config/serialize.rs | 4 +- .../src/caching_directory.rs | 24 +- .../quickwit-directories/src/hot_directory.rs | 32 +- .../protos/quickwit/search.proto | 10 + .../src/codegen/quickwit/quickwit.search.rs | 17 + .../quickwit-search/src/cluster_client.rs | 7 +- quickwit/quickwit-search/src/collector.rs | 47 +- quickwit/quickwit-search/src/fetch_docs.rs | 4 +- quickwit/quickwit-search/src/leaf.rs | 266 ++++++++-- quickwit/quickwit-search/src/leaf_cache.rs | 5 +- quickwit/quickwit-search/src/lib.rs | 127 ++++- quickwit/quickwit-search/src/list_terms.rs | 35 +- quickwit/quickwit-search/src/metrics.rs | 20 + quickwit/quickwit-search/src/root.rs | 57 ++- .../src/search_permit_provider.rs | 460 ++++++++++++++---- .../quickwit-search/src/search_stream/leaf.rs | 9 +- quickwit/quickwit-search/src/service.rs | 6 +- .../src/cache/byte_range_cache.rs | 52 +- 19 files changed, 972 insertions(+), 224 deletions(-) diff --git a/quickwit/quickwit-config/src/node_config/mod.rs b/quickwit/quickwit-config/src/node_config/mod.rs index 3eef1f10428..822fe86cb91 100644 --- a/quickwit/quickwit-config/src/node_config/mod.rs +++ b/quickwit/quickwit-config/src/node_config/mod.rs @@ -226,6 +226,8 @@ pub struct SearcherConfig { #[serde(default)] #[serde(skip_serializing_if = "Option::is_none")] pub storage_timeout_policy: Option, + pub warmup_memory_budget: ByteSize, + pub warmup_single_split_initial_allocation: ByteSize, } /// Configuration controlling how fast a searcher should timeout a `get_slice` @@ -263,7 +265,7 @@ impl StorageTimeoutPolicy { impl Default for SearcherConfig { fn default() -> Self { - Self { + SearcherConfig { fast_field_cache_capacity: ByteSize::gb(1), split_footer_cache_capacity: ByteSize::mb(500), partial_request_cache_capacity: ByteSize::mb(64), @@ -274,6 +276,8 @@ impl Default for SearcherConfig { split_cache: None, request_timeout_secs: Self::default_request_timeout_secs(), storage_timeout_policy: None, + warmup_memory_budget: ByteSize::gb(100), + warmup_single_split_initial_allocation: ByteSize::gb(1), } } } @@ -308,6 +312,14 @@ impl SearcherConfig { split_cache_limits.max_file_descriptors ); } + if self.warmup_single_split_initial_allocation > self.warmup_memory_budget { + anyhow::bail!( + "warmup_single_split_initial_allocation ({}) must be lower or equal to \ + warmup_memory_budget ({})", + self.warmup_single_split_initial_allocation, + self.warmup_memory_budget + ); + } } Ok(()) } diff --git a/quickwit/quickwit-config/src/node_config/serialize.rs b/quickwit/quickwit-config/src/node_config/serialize.rs index 8a1337636cf..b208309af4c 100644 --- a/quickwit/quickwit-config/src/node_config/serialize.rs +++ b/quickwit/quickwit-config/src/node_config/serialize.rs @@ -616,7 +616,9 @@ mod tests { min_throughtput_bytes_per_secs: 100_000, timeout_millis: 2_000, max_num_retries: 2 - }) + }), + warmup_memory_budget: ByteSize::gb(100), + warmup_single_split_initial_allocation: ByteSize::gb(1), } ); assert_eq!( diff --git a/quickwit/quickwit-directories/src/caching_directory.rs b/quickwit/quickwit-directories/src/caching_directory.rs index b90f444d062..58d5ffd8028 100644 --- a/quickwit/quickwit-directories/src/caching_directory.rs +++ b/quickwit/quickwit-directories/src/caching_directory.rs @@ -33,21 +33,27 @@ use tantivy::{Directory, HasLen}; pub struct CachingDirectory { underlying: Arc, // TODO fixme: that's a pretty ugly cache we have here. - cache: Arc, + cache: ByteRangeCache, } impl CachingDirectory { /// Creates a new CachingDirectory. /// - /// Warming: The resulting CacheDirectory will cache all information without ever + /// Warning: The resulting CacheDirectory will cache all information without ever /// removing any item from the cache. pub fn new_unbounded(underlying: Arc) -> CachingDirectory { - CachingDirectory { - underlying, - cache: Arc::new(ByteRangeCache::with_infinite_capacity( - &quickwit_storage::STORAGE_METRICS.shortlived_cache, - )), - } + let byte_range_cache = ByteRangeCache::with_infinite_capacity( + &quickwit_storage::STORAGE_METRICS.shortlived_cache, + ); + CachingDirectory::new(underlying, byte_range_cache) + } + + /// Creates a new CachingDirectory. + /// + /// Warning: The resulting CacheDirectory will cache all information without ever + /// removing any item from the cache. + pub fn new(underlying: Arc, cache: ByteRangeCache) -> CachingDirectory { + CachingDirectory { underlying, cache } } } @@ -59,7 +65,7 @@ impl fmt::Debug for CachingDirectory { struct CachingFileHandle { path: PathBuf, - cache: Arc, + cache: ByteRangeCache, underlying_filehandle: Arc, } diff --git a/quickwit/quickwit-directories/src/hot_directory.rs b/quickwit/quickwit-directories/src/hot_directory.rs index d217ac29851..a388ea75b51 100644 --- a/quickwit/quickwit-directories/src/hot_directory.rs +++ b/quickwit/quickwit-directories/src/hot_directory.rs @@ -205,14 +205,12 @@ impl StaticDirectoryCache { self.file_lengths.get(path).copied() } - /// return the files and their cached lengths - pub fn get_stats(&self) -> Vec<(PathBuf, usize)> { + pub fn get_file_lengths(&self) -> Vec<(PathBuf, u64)> { let mut entries = self - .slices + .file_lengths .iter() - .map(|(path, cache)| (path.to_owned(), cache.len())) + .map(|(path, len)| (path.clone(), *len)) .collect::>(); - entries.sort_by_key(|el| el.0.to_owned()); entries } @@ -265,10 +263,6 @@ impl StaticSliceCache { } None } - - pub fn len(&self) -> usize { - self.bytes.len() - } } struct StaticSliceCacheBuilder { @@ -376,12 +370,12 @@ impl HotDirectory { }), }) } - /// Get files and their cached sizes. - pub fn get_stats_per_file( - hot_cache_bytes: OwnedBytes, - ) -> anyhow::Result> { - let static_cache = StaticDirectoryCache::open(hot_cache_bytes)?; - Ok(static_cache.get_stats()) + + /// Get all the files in the directory and their sizes. + /// + /// The actual cached data is a very small fraction of this length. + pub fn get_file_lengths(&self) -> Vec<(PathBuf, u64)> { + self.inner.cache.get_file_lengths() } } @@ -704,10 +698,10 @@ mod tests { assert_eq!(directory_cache.get_file_length(three_path), Some(300)); assert_eq!(directory_cache.get_file_length(four_path), None); - let stats = directory_cache.get_stats(); - assert_eq!(stats[0], (one_path.to_owned(), 8)); - assert_eq!(stats[1], (three_path.to_owned(), 0)); - assert_eq!(stats[2], (two_path.to_owned(), 7)); + let file_lengths = directory_cache.get_file_lengths(); + assert_eq!(file_lengths[0], (one_path.to_owned(), 100)); + assert_eq!(file_lengths[1], (three_path.to_owned(), 300)); + assert_eq!(file_lengths[2], (two_path.to_owned(), 200)); assert_eq!( directory_cache diff --git a/quickwit/quickwit-proto/protos/quickwit/search.proto b/quickwit/quickwit-proto/protos/quickwit/search.proto index 60671239ecc..1213ce2040e 100644 --- a/quickwit/quickwit-proto/protos/quickwit/search.proto +++ b/quickwit/quickwit-proto/protos/quickwit/search.proto @@ -347,6 +347,14 @@ message LeafSearchRequest { repeated string index_uris = 9; } +message ResourceStats { + uint64 short_lived_cache_num_bytes = 1; + uint64 split_num_docs = 2; + uint64 warmup_microsecs = 3; + uint64 cpu_thread_pool_wait_microsecs = 4; + uint64 cpu_microsecs = 5; +} + /// LeafRequestRef references data in LeafSearchRequest to deduplicate data. message LeafRequestRef { // The ordinal of the doc_mapper in `LeafSearchRequest.doc_mappers` @@ -479,6 +487,8 @@ message LeafSearchResponse { // postcard serialized intermediate aggregation_result. optional bytes intermediate_aggregation_result = 6; + + ResourceStats resource_stats = 8; } message SnippetRequest { diff --git a/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.search.rs b/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.search.rs index 3fc4d5bdcaa..e29cae37fec 100644 --- a/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.search.rs +++ b/quickwit/quickwit-proto/src/codegen/quickwit/quickwit.search.rs @@ -286,6 +286,21 @@ pub struct LeafSearchRequest { #[prost(string, repeated, tag = "9")] pub index_uris: ::prost::alloc::vec::Vec<::prost::alloc::string::String>, } +#[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] +pub struct ResourceStats { + #[prost(uint64, tag = "1")] + pub short_lived_cache_num_bytes: u64, + #[prost(uint64, tag = "2")] + pub split_num_docs: u64, + #[prost(uint64, tag = "3")] + pub warmup_microsecs: u64, + #[prost(uint64, tag = "4")] + pub cpu_thread_pool_wait_microsecs: u64, + #[prost(uint64, tag = "5")] + pub cpu_microsecs: u64, +} /// / LeafRequestRef references data in LeafSearchRequest to deduplicate data. #[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] #[allow(clippy::derive_partial_eq_without_eq)] @@ -457,6 +472,8 @@ pub struct LeafSearchResponse { pub intermediate_aggregation_result: ::core::option::Option< ::prost::alloc::vec::Vec, >, + #[prost(message, optional, tag = "8")] + pub resource_stats: ::core::option::Option, } #[derive(serde::Serialize, serde::Deserialize, utoipa::ToSchema)] #[allow(clippy::derive_partial_eq_without_eq)] diff --git a/quickwit/quickwit-search/src/cluster_client.rs b/quickwit/quickwit-search/src/cluster_client.rs index d32ad92327c..32f375ca06c 100644 --- a/quickwit/quickwit-search/src/cluster_client.rs +++ b/quickwit/quickwit-search/src/cluster_client.rs @@ -36,7 +36,7 @@ use tracing::{debug, error, info, warn}; use crate::retry::search::LeafSearchRetryPolicy; use crate::retry::search_stream::{LeafSearchStreamRetryPolicy, SuccessfulSplitIds}; use crate::retry::{retry_client, DefaultRetryPolicy, RetryPolicy}; -use crate::{SearchError, SearchJobPlacer, SearchServiceClient}; +use crate::{merge_resource_stats_it, SearchError, SearchJobPlacer, SearchServiceClient}; /// Maximum number of put requests emitted to perform a replicated given PUT KV. const MAX_PUT_KV_ATTEMPTS: usize = 6; @@ -317,6 +317,10 @@ fn merge_original_with_retry_leaf_search_response( (Some(left), None) => Some(left), (None, None) => None, }; + let resource_stats = merge_resource_stats_it([ + &original_response.resource_stats, + &retry_response.resource_stats, + ]); Ok(LeafSearchResponse { intermediate_aggregation_result, num_hits: original_response.num_hits + retry_response.num_hits, @@ -326,6 +330,7 @@ fn merge_original_with_retry_leaf_search_response( partial_hits: original_response.partial_hits, num_successful_splits: original_response.num_successful_splits + retry_response.num_successful_splits, + resource_stats, }) } diff --git a/quickwit/quickwit-search/src/collector.rs b/quickwit/quickwit-search/src/collector.rs index 4b69348ecde..67beb8090cb 100644 --- a/quickwit/quickwit-search/src/collector.rs +++ b/quickwit/quickwit-search/src/collector.rs @@ -25,8 +25,8 @@ use itertools::Itertools; use quickwit_common::binary_heap::{SortKeyMapper, TopK}; use quickwit_doc_mapper::WarmupInfo; use quickwit_proto::search::{ - LeafSearchResponse, PartialHit, SearchRequest, SortByValue, SortOrder, SortValue, - SplitSearchError, + LeafSearchResponse, PartialHit, ResourceStats, SearchRequest, SortByValue, SortOrder, + SortValue, SplitSearchError, }; use quickwit_proto::types::SplitId; use serde::Deserialize; @@ -40,7 +40,7 @@ use tantivy::{DateTime, DocId, Score, SegmentOrdinal, SegmentReader, TantivyErro use crate::find_trace_ids_collector::{FindTraceIdsCollector, FindTraceIdsSegmentCollector, Span}; use crate::top_k_collector::{specialized_top_k_segment_collector, QuickwitSegmentTopKCollector}; -use crate::GlobalDocAddress; +use crate::{merge_resource_stats, merge_resource_stats_it, GlobalDocAddress}; #[derive(Clone, Debug)] pub(crate) enum SortByComponent { @@ -587,6 +587,7 @@ impl SegmentCollector for QuickwitSegmentCollector { } None => None, }; + Ok(LeafSearchResponse { intermediate_aggregation_result, num_hits: self.num_hits, @@ -594,6 +595,7 @@ impl SegmentCollector for QuickwitSegmentCollector { failed_splits: Vec::new(), num_attempted_splits: 1, num_successful_splits: 1, + resource_stats: None, }) } } @@ -919,6 +921,11 @@ fn merge_leaf_responses( return Ok(leaf_responses.pop().unwrap()); } + let resource_stats_it = leaf_responses + .iter() + .map(|leaf_response| &leaf_response.resource_stats); + let merged_resource_stats = merge_resource_stats_it(resource_stats_it); + let merged_intermediate_aggregation_result: Option> = merge_intermediate_aggregation_result( aggregations_opt, @@ -960,6 +967,7 @@ fn merge_leaf_responses( failed_splits, num_attempted_splits, num_successful_splits, + resource_stats: merged_resource_stats, }) } @@ -1183,6 +1191,7 @@ pub(crate) struct IncrementalCollector { num_attempted_splits: u64, num_successful_splits: u64, start_offset: usize, + resource_stats: Option, } impl IncrementalCollector { @@ -1203,6 +1212,7 @@ impl IncrementalCollector { failed_splits: Vec::new(), num_attempted_splits: 0, num_successful_splits: 0, + resource_stats: None, } } @@ -1215,8 +1225,11 @@ impl IncrementalCollector { num_attempted_splits, intermediate_aggregation_result, num_successful_splits, + resource_stats, } = leaf_response; + merge_resource_stats(&resource_stats, &mut self.resource_stats); + self.num_hits += num_hits; self.top_k_hits.add_entries(partial_hits.into_iter()); self.failed_splits.extend(failed_splits); @@ -1266,6 +1279,7 @@ impl IncrementalCollector { num_attempted_splits: self.num_attempted_splits, num_successful_splits: self.num_successful_splits, intermediate_aggregation_result, + resource_stats: self.resource_stats, }) } } @@ -1275,8 +1289,8 @@ mod tests { use std::cmp::Ordering; use quickwit_proto::search::{ - LeafSearchResponse, PartialHit, SearchRequest, SortByValue, SortField, SortOrder, - SortValue, SplitSearchError, + LeafSearchResponse, PartialHit, ResourceStats, SearchRequest, SortByValue, SortField, + SortOrder, SortValue, SplitSearchError, }; use tantivy::collector::Collector; use tantivy::TantivyDocument; @@ -1772,6 +1786,7 @@ mod tests { num_attempted_splits: 3, num_successful_splits: 3, intermediate_aggregation_result: None, + resource_stats: None, }], ); @@ -1789,7 +1804,8 @@ mod tests { failed_splits: Vec::new(), num_attempted_splits: 3, num_successful_splits: 3, - intermediate_aggregation_result: None + intermediate_aggregation_result: None, + resource_stats: None, } ); @@ -1828,6 +1844,7 @@ mod tests { num_attempted_splits: 3, num_successful_splits: 3, intermediate_aggregation_result: None, + resource_stats: None, }, LeafSearchResponse { num_hits: 10, @@ -1846,6 +1863,7 @@ mod tests { num_attempted_splits: 2, num_successful_splits: 1, intermediate_aggregation_result: None, + resource_stats: None, }, ], ); @@ -1877,7 +1895,8 @@ mod tests { }], num_attempted_splits: 5, num_successful_splits: 4, - intermediate_aggregation_result: None + intermediate_aggregation_result: None, + resource_stats: None, } ); @@ -1917,6 +1936,10 @@ mod tests { num_attempted_splits: 3, num_successful_splits: 3, intermediate_aggregation_result: None, + resource_stats: Some(ResourceStats { + cpu_microsecs: 100, + ..Default::default() + }), }, LeafSearchResponse { num_hits: 10, @@ -1935,6 +1958,10 @@ mod tests { num_attempted_splits: 2, num_successful_splits: 1, intermediate_aggregation_result: None, + resource_stats: Some(ResourceStats { + cpu_microsecs: 50, + ..Default::default() + }), }, ], ); @@ -1966,7 +1993,11 @@ mod tests { }], num_attempted_splits: 5, num_successful_splits: 4, - intermediate_aggregation_result: None + intermediate_aggregation_result: None, + resource_stats: Some(ResourceStats { + cpu_microsecs: 150, + ..Default::default() + }), } ); // TODO would be nice to test aggregation too. diff --git a/quickwit/quickwit-search/src/fetch_docs.rs b/quickwit/quickwit-search/src/fetch_docs.rs index 9c326764539..d75f7efff0c 100644 --- a/quickwit/quickwit-search/src/fetch_docs.rs +++ b/quickwit/quickwit-search/src/fetch_docs.rs @@ -174,12 +174,12 @@ async fn fetch_docs_in_split( global_doc_addrs.sort_by_key(|doc| doc.doc_addr); // Opens the index without the ephemeral unbounded cache, this cache is indeed not useful // when fetching docs as we will fetch them only once. - let mut index = open_index_with_caches( + let (mut index, _) = open_index_with_caches( &searcher_context, index_storage, split, Some(doc_mapper.tokenizer_manager()), - false, + None, ) .await .context("open-index-for-split")?; diff --git a/quickwit/quickwit-search/src/leaf.rs b/quickwit/quickwit-search/src/leaf.rs index 5ad92f63aa2..236149ca038 100644 --- a/quickwit/quickwit-search/src/leaf.rs +++ b/quickwit/quickwit-search/src/leaf.rs @@ -22,35 +22,37 @@ use std::ops::Bound; use std::path::PathBuf; use std::str::FromStr; use std::sync::{Arc, Mutex, RwLock}; +use std::time::{Duration, Instant}; use anyhow::Context; +use bytesize::ByteSize; use futures::future::try_join_all; use quickwit_common::pretty::PrettySample; use quickwit_directories::{CachingDirectory, HotDirectory, StorageDirectory}; use quickwit_doc_mapper::{DocMapper, TermRange, WarmupInfo}; use quickwit_proto::search::{ - CountHits, LeafSearchRequest, LeafSearchResponse, PartialHit, SearchRequest, SortOrder, - SortValue, SplitIdAndFooterOffsets, SplitSearchError, + CountHits, LeafSearchRequest, LeafSearchResponse, PartialHit, ResourceStats, SearchRequest, + SortOrder, SortValue, SplitIdAndFooterOffsets, SplitSearchError, }; use quickwit_query::query_ast::{BoolQuery, QueryAst, QueryAstTransformer, RangeQuery, TermQuery}; use quickwit_query::tokenizers::TokenizerManager; use quickwit_storage::{ - wrap_storage_with_cache, BundleStorage, MemorySizedCache, OwnedBytes, SplitCache, Storage, - StorageResolver, TimeoutAndRetryStorage, + wrap_storage_with_cache, BundleStorage, ByteRangeCache, MemorySizedCache, OwnedBytes, + SplitCache, Storage, StorageResolver, TimeoutAndRetryStorage, }; use tantivy::aggregation::agg_req::{AggregationVariants, Aggregations}; use tantivy::aggregation::AggregationLimitsGuard; use tantivy::directory::FileSlice; use tantivy::fastfield::FastFieldReaders; use tantivy::schema::Field; -use tantivy::{DateTime, Index, ReloadPolicy, Searcher, Term}; +use tantivy::{DateTime, Index, ReloadPolicy, Searcher, TantivyError, Term}; use tokio::task::JoinError; use tracing::*; use crate::collector::{make_collector_for_split, make_merge_collector, IncrementalCollector}; use crate::metrics::SEARCH_METRICS; use crate::root::is_metadata_count_request_with_ast; -use crate::search_permit_provider::SearchPermit; +use crate::search_permit_provider::{compute_initial_memory_allocation, SearchPermit}; use crate::service::{deserialize_doc_mapper, SearcherContext}; use crate::{QuickwitAggregations, SearchError}; @@ -124,33 +126,39 @@ pub(crate) async fn open_split_bundle( Ok((hotcache_bytes, bundle_storage)) } +/// Add a storage proxy to retry `get_slice` requests if they are taking too long, +/// if configured in the searcher config. +/// +/// The goal here is too ensure a low latency. +fn configure_storage_retries( + searcher_context: &SearcherContext, + index_storage: Arc, +) -> Arc { + if let Some(storage_timeout_policy) = &searcher_context.searcher_config.storage_timeout_policy { + Arc::new(TimeoutAndRetryStorage::new( + index_storage, + storage_timeout_policy.clone(), + )) + } else { + index_storage + } +} + /// Opens a `tantivy::Index` for the given split with several cache layers: /// - A split footer cache given by `SearcherContext.split_footer_cache`. /// - A fast fields cache given by `SearcherContext.storage_long_term_cache`. -/// - An ephemeral unbounded cache directory whose lifetime is tied to the returned `Index`. +/// - An ephemeral unbounded cache directory (whose lifetime is tied to the +/// returned `Index` if no `ByteRangeCache` is provided). #[instrument(skip_all, fields(split_footer_start=split_and_footer_offsets.split_footer_start, split_footer_end=split_and_footer_offsets.split_footer_end))] pub(crate) async fn open_index_with_caches( searcher_context: &SearcherContext, index_storage: Arc, split_and_footer_offsets: &SplitIdAndFooterOffsets, tokenizer_manager: Option<&TokenizerManager>, - ephemeral_unbounded_cache: bool, -) -> anyhow::Result { - // Let's add a storage proxy to retry `get_slice` requests if they are taking too long, - // if configured in the searcher config. - // - // The goal here is too ensure a low latency. - - let index_storage_with_retry_on_timeout = if let Some(storage_timeout_policy) = - &searcher_context.searcher_config.storage_timeout_policy - { - Arc::new(TimeoutAndRetryStorage::new( - index_storage, - storage_timeout_policy.clone(), - )) - } else { - index_storage - }; + ephemeral_unbounded_cache: Option, +) -> anyhow::Result<(Index, HotDirectory)> { + let index_storage_with_retry_on_timeout = + configure_storage_retries(searcher_context, index_storage); let (hotcache_bytes, bundle_storage) = open_split_bundle( searcher_context, @@ -166,14 +174,14 @@ pub(crate) async fn open_index_with_caches( let directory = StorageDirectory::new(bundle_storage_with_cache); - let hot_directory = if ephemeral_unbounded_cache { - let caching_directory = CachingDirectory::new_unbounded(Arc::new(directory)); + let hot_directory = if let Some(cache) = ephemeral_unbounded_cache { + let caching_directory = CachingDirectory::new(Arc::new(directory), cache); HotDirectory::open(caching_directory, hotcache_bytes.read_bytes()?)? } else { HotDirectory::open(directory, hotcache_bytes.read_bytes()?)? }; - let mut index = Index::open(hot_directory)?; + let mut index = Index::open(hot_directory.clone())?; if let Some(tokenizer_manager) = tokenizer_manager { index.set_tokenizers(tokenizer_manager.tantivy_manager().clone()); } @@ -182,7 +190,7 @@ pub(crate) async fn open_index_with_caches( .tantivy_manager() .clone(), ); - Ok(index) + Ok((index, hot_directory)) } /// Tantivy search does not make it possible to fetch data asynchronously during @@ -363,10 +371,23 @@ fn get_leaf_resp_from_count(count: u64) -> LeafSearchResponse { num_attempted_splits: 1, num_successful_splits: 1, intermediate_aggregation_result: None, + resource_stats: None, } } +/// Compute the size of the index, store excluded. +fn compute_index_size(hot_directory: &HotDirectory) -> ByteSize { + let size_bytes = hot_directory + .get_file_lengths() + .iter() + .filter(|(path, _)| !path.to_string_lossy().ends_with("store")) + .map(|(_, size)| *size) + .sum(); + ByteSize(size_bytes) +} + /// Apply a leaf search on a single split. +#[allow(clippy::too_many_arguments)] async fn leaf_search_single_split( searcher_context: &SearcherContext, mut search_request: SearchRequest, @@ -375,6 +396,7 @@ async fn leaf_search_single_split( doc_mapper: Arc, split_filter: Arc>, aggregations_limits: AggregationLimitsGuard, + search_permit: &mut SearchPermit, ) -> crate::Result { rewrite_request( &mut search_request, @@ -400,15 +422,21 @@ async fn leaf_search_single_split( } let split_id = split.split_id.to_string(); - let index = open_index_with_caches( + let byte_range_cache = + ByteRangeCache::with_infinite_capacity(&quickwit_storage::STORAGE_METRICS.shortlived_cache); + let (index, hot_directory) = open_index_with_caches( searcher_context, storage, &split, Some(doc_mapper.tokenizer_manager()), - true, + Some(byte_range_cache.clone()), ) .await?; - let split_schema = index.schema(); + + let index_size = compute_index_size(&hot_directory); + if index_size < search_permit.memory_allocation() { + search_permit.update_memory_usage(index_size); + } let reader = index .reader_builder() @@ -419,13 +447,33 @@ async fn leaf_search_single_split( let mut collector = make_collector_for_split(split_id.clone(), &search_request, aggregations_limits)?; + let split_schema = index.schema(); let (query, mut warmup_info) = doc_mapper.query(split_schema.clone(), &query_ast, false)?; let collector_warmup_info = collector.warmup_info(); warmup_info.merge(collector_warmup_info); warmup_info.simplify(); + let warmup_start = Instant::now(); warmup(&searcher, &warmup_info).await?; + let warmup_end = Instant::now(); + let warmup_duration: Duration = warmup_end.duration_since(warmup_start); + let warmup_size = ByteSize(byte_range_cache.get_num_bytes()); + if warmup_size > search_permit.memory_allocation() { + warn!( + memory_usage = ?warmup_size, + memory_allocation = ?search_permit.memory_allocation(), + "current leaf search is consuming more memory than the initial allocation" + ); + } + crate::SEARCH_METRICS + .leaf_search_single_split_warmup_num_bytes + .observe(warmup_size.as_u64() as f64); + search_permit.update_memory_usage(warmup_size); + search_permit.free_warmup_slot(); + + let split_num_docs = split.num_docs; + let span = info_span!("tantivy_search"); let (search_request, leaf_search_response) = { @@ -433,25 +481,31 @@ async fn leaf_search_single_split( crate::search_thread_pool() .run_cpu_intensive(move || { + let cpu_start = Instant::now(); + let cpu_thread_pool_wait_microsecs = cpu_start.duration_since(warmup_end); let _span_guard = span.enter(); // Our search execution has been scheduled, let's check if we can improve the // request based on the results of the preceding searches check_optimize_search_request(&mut search_request, &split, &split_filter); collector.update_search_param(&search_request); - if is_metadata_count_request_with_ast(&query_ast, &search_request) { - return Ok(( - search_request, - get_leaf_resp_from_count(searcher.num_docs() as u64), - )); - } - if collector.is_count_only() { - let count = query.count(&searcher)? as u64; - Ok((search_request, get_leaf_resp_from_count(count))) - } else { - searcher - .search(&query, &collector) - .map(|resp| (search_request, resp)) - } + let mut leaf_search_response: LeafSearchResponse = + if is_metadata_count_request_with_ast(&query_ast, &search_request) { + get_leaf_resp_from_count(searcher.num_docs()) + } else if collector.is_count_only() { + let count = query.count(&searcher)? as u64; + get_leaf_resp_from_count(count) + } else { + searcher.search(&query, &collector)? + }; + leaf_search_response.resource_stats = Some(ResourceStats { + cpu_microsecs: cpu_start.elapsed().as_micros() as u64, + short_lived_cache_num_bytes: warmup_size.as_u64(), + split_num_docs, + warmup_microsecs: warmup_duration.as_micros() as u64, + cpu_thread_pool_wait_microsecs: cpu_thread_pool_wait_microsecs.as_micros() + as u64, + }); + Result::<_, TantivyError>::Ok((search_request, leaf_search_response)) }) .await .map_err(|_| { @@ -1261,17 +1315,25 @@ pub async fn leaf_search( // We acquire all of the leaf search permits to make sure our single split search tasks // do no interleave with other leaf search requests. + let permit_sizes = split_with_req.iter().map(|(split, _)| { + compute_initial_memory_allocation( + split, + searcher_context + .searcher_config + .warmup_single_split_initial_allocation, + ) + }); let permit_futures = searcher_context .search_permit_provider - .get_permits(split_with_req.len()); + .get_permits(permit_sizes) + .await; for ((split, mut request), permit_fut) in split_with_req.into_iter().zip(permit_futures.into_iter()) { let leaf_split_search_permit = permit_fut .instrument(info_span!("waiting_for_leaf_search_split_semaphore")) - .await - .expect("Failed to acquire permit. This should never happen! Please, report on https://github.com/quickwit-oss/quickwit/issues."); + .await; let can_be_better = check_optimize_search_request(&mut request, &split, &split_filter); if !can_be_better && !run_all_splits { @@ -1361,7 +1423,7 @@ async fn leaf_search_single_split_wrapper( split: SplitIdAndFooterOffsets, split_filter: Arc>, incremental_merge_collector: Arc>, - search_permit: SearchPermit, + mut search_permit: SearchPermit, aggregations_limits: AggregationLimitsGuard, ) { crate::SEARCH_METRICS.leaf_searches_splits_total.inc(); @@ -1376,10 +1438,12 @@ async fn leaf_search_single_split_wrapper( doc_mapper, split_filter.clone(), aggregations_limits, + &mut search_permit, ) .await; - // We explicitly drop it, to highlight it to the reader + // Explicitly drop the permit for readability. + // This should always happen after the ephemeral search cache is dropped. std::mem::drop(search_permit); if leaf_search_single_split_res.is_ok() { @@ -1417,6 +1481,15 @@ async fn leaf_search_single_split_wrapper( mod tests { use std::ops::Bound; + use bytes::BufMut; + use quickwit_directories::write_hotcache; + use rand::{thread_rng, Rng}; + use tantivy::directory::RamDirectory; + use tantivy::schema::{ + BytesOptions, FieldEntry, Schema, TextFieldIndexing, TextOptions, Value, + }; + use tantivy::TantivyDocument; + use super::*; fn bool_filter(ast: impl Into) -> QueryAst { @@ -1852,4 +1925,97 @@ mod tests { assert_eq!(rewrote_bounds_agg, no_bounds_agg); } } + + fn create_tantivy_dir_with_hotcache<'a, V>( + field_entry: FieldEntry, + field_value: V, + ) -> (HotDirectory, usize) + where + V: Value<'a>, + { + let field_name = field_entry.name().to_string(); + let mut schema_builder = Schema::builder(); + schema_builder.add_field(field_entry); + let schema = schema_builder.build(); + + let ram_directory = RamDirectory::create(); + let index = Index::open_or_create(ram_directory.clone(), schema.clone()).unwrap(); + + let mut index_writer = index.writer(15_000_000).unwrap(); + let field = schema.get_field(&field_name).unwrap(); + let mut new_doc = TantivyDocument::default(); + new_doc.add_field_value(field, field_value); + index_writer.add_document(new_doc).unwrap(); + index_writer.commit().unwrap(); + + let mut hotcache_bytes_writer = Vec::new().writer(); + write_hotcache(ram_directory.clone(), &mut hotcache_bytes_writer).unwrap(); + let hotcache_bytes = OwnedBytes::new(hotcache_bytes_writer.into_inner()); + let hot_directory = HotDirectory::open(ram_directory.clone(), hotcache_bytes).unwrap(); + (hot_directory, ram_directory.total_mem_usage()) + } + + #[test] + fn test_compute_index_size_without_store() { + // We don't want to make assertions on absolute index sizes (it might + // change in future Tantivy versions), but rather verify that the store + // is properly excluded from the computed size. + + // We use random bytes so that the store can't compress them + let mut payload = vec![0u8; 1024]; + thread_rng().fill(&mut payload[..]); + + let (hotcache_directory_stored_payload, directory_size_stored_payload) = + create_tantivy_dir_with_hotcache( + FieldEntry::new_bytes("payload".to_string(), BytesOptions::default().set_stored()), + &payload, + ); + let size_with_stored_payload = + compute_index_size(&hotcache_directory_stored_payload).as_u64(); + + let (hotcache_directory_index_only, directory_size_index_only) = + create_tantivy_dir_with_hotcache( + FieldEntry::new_bytes("payload".to_string(), BytesOptions::default()), + &payload, + ); + let size_index_only = compute_index_size(&hotcache_directory_index_only).as_u64(); + + assert!(directory_size_stored_payload > directory_size_index_only + 1000); + assert!(size_with_stored_payload.abs_diff(size_index_only) < 10); + } + + #[test] + fn test_compute_index_size_varies_with_data() { + // We don't want to make assertions on absolute index sizes (it might + // change in future Tantivy versions), but rather verify that an index + // with more data is indeed bigger. + + let indexing_options = + TextOptions::default().set_indexing_options(TextFieldIndexing::default()); + + let (hotcache_directory_larger, directory_size_larger) = create_tantivy_dir_with_hotcache( + FieldEntry::new_text("text".to_string(), indexing_options.clone()), + "Sed ut perspiciatis unde omnis iste natus error sit voluptatem accusantium \ + doloremque laudantium, totam rem aperiam, eaque ipsa quae ab illo inventore \ + veritatis et quasi architecto beatae vitae dicta sunt explicabo. Nemo enim ipsam \ + voluptatem quia voluptas sit aspernatur aut odit aut fugit, sed quia consequuntur \ + magni dolores eos qui ratione voluptatem sequi nesciunt. Neque porro quisquam est, \ + qui dolorem ipsum quia dolor sit amet, consectetur, adipisci velit, sed quia non \ + numquam eius modi tempora incidunt ut labore et dolore magnam aliquam quaerat \ + voluptatem. Ut enim ad minima veniam, quis nostrum exercitationem ullam corporis \ + suscipit laboriosam, nisi ut aliquid ex ea commodi consequatur? Quis autem vel eum \ + iure reprehenderit qui in ea voluptate velit esse quam nihil molestiae consequatur, \ + vel illum qui dolorem eum fugiat quo voluptas nulla pariatur?", + ); + let larger_size = compute_index_size(&hotcache_directory_larger).as_u64(); + + let (hotcache_directory_smaller, directory_size_smaller) = create_tantivy_dir_with_hotcache( + FieldEntry::new_text("text".to_string(), indexing_options), + "hi", + ); + let smaller_size = compute_index_size(&hotcache_directory_smaller).as_u64(); + + assert!(directory_size_larger > directory_size_smaller + 100); + assert!(larger_size > smaller_size + 100); + } } diff --git a/quickwit/quickwit-search/src/leaf_cache.rs b/quickwit/quickwit-search/src/leaf_cache.rs index 491f66f3aee..016cdd5b00f 100644 --- a/quickwit/quickwit-search/src/leaf_cache.rs +++ b/quickwit/quickwit-search/src/leaf_cache.rs @@ -192,7 +192,8 @@ impl std::ops::RangeBounds for Range { #[cfg(test)] mod tests { use quickwit_proto::search::{ - LeafSearchResponse, PartialHit, SearchRequest, SortValue, SplitIdAndFooterOffsets, + LeafSearchResponse, PartialHit, ResourceStats, SearchRequest, SortValue, + SplitIdAndFooterOffsets, }; use super::LeafSearchCache; @@ -252,6 +253,7 @@ mod tests { sort_value2: None, split_id: "split_1".to_string(), }], + resource_stats: None, }; assert!(cache.get(split_1.clone(), query_1.clone()).is_none()); @@ -342,6 +344,7 @@ mod tests { sort_value2: None, split_id: "split_1".to_string(), }], + resource_stats: Some(ResourceStats::default()), }; // for split_1, 1 and 1bis cover different timestamp ranges diff --git a/quickwit/quickwit-search/src/lib.rs b/quickwit/quickwit-search/src/lib.rs index a81a974d75d..b7c03a0c5ea 100644 --- a/quickwit/quickwit-search/src/lib.rs +++ b/quickwit/quickwit-search/src/lib.rs @@ -72,7 +72,9 @@ use quickwit_metastore::{ IndexMetadata, ListIndexesMetadataResponseExt, ListSplitsQuery, ListSplitsRequestExt, MetastoreServiceStreamSplitsExt, SplitMetadata, SplitState, }; -use quickwit_proto::search::{PartialHit, SearchRequest, SearchResponse, SplitIdAndFooterOffsets}; +use quickwit_proto::search::{ + PartialHit, ResourceStats, SearchRequest, SearchResponse, SplitIdAndFooterOffsets, +}; use quickwit_proto::types::IndexUid; use quickwit_storage::StorageResolver; pub use service::SearcherContext; @@ -340,3 +342,126 @@ pub fn searcher_pool_for_test( }), ) } + +pub(crate) fn merge_resource_stats_it<'a>( + stats_it: impl IntoIterator>, +) -> Option { + let mut acc_stats: Option = None; + for new_stats in stats_it { + merge_resource_stats(new_stats, &mut acc_stats); + } + acc_stats +} + +fn merge_resource_stats( + new_stats_opt: &Option, + stat_accs_opt: &mut Option, +) { + if let Some(new_stats) = new_stats_opt { + if let Some(stat_accs) = stat_accs_opt { + stat_accs.short_lived_cache_num_bytes += new_stats.short_lived_cache_num_bytes; + stat_accs.split_num_docs += new_stats.split_num_docs; + stat_accs.warmup_microsecs += new_stats.warmup_microsecs; + stat_accs.cpu_thread_pool_wait_microsecs += new_stats.cpu_thread_pool_wait_microsecs; + stat_accs.cpu_microsecs += new_stats.cpu_microsecs; + } else { + *stat_accs_opt = Some(new_stats.clone()); + } + } +} +#[cfg(test)] +mod stats_merge_tests { + use super::*; + + #[test] + fn test_merge_resource_stats() { + let mut acc_stats = None; + + merge_resource_stats(&None, &mut acc_stats); + + assert_eq!(acc_stats, None); + + let stats = Some(ResourceStats { + short_lived_cache_num_bytes: 100, + split_num_docs: 200, + warmup_microsecs: 300, + cpu_thread_pool_wait_microsecs: 400, + cpu_microsecs: 500, + }); + + merge_resource_stats(&stats, &mut acc_stats); + + assert_eq!(acc_stats, stats); + + let new_stats = Some(ResourceStats { + short_lived_cache_num_bytes: 50, + split_num_docs: 100, + warmup_microsecs: 150, + cpu_thread_pool_wait_microsecs: 200, + cpu_microsecs: 250, + }); + + merge_resource_stats(&new_stats, &mut acc_stats); + + let stats_plus_new_stats = Some(ResourceStats { + short_lived_cache_num_bytes: 150, + split_num_docs: 300, + warmup_microsecs: 450, + cpu_thread_pool_wait_microsecs: 600, + cpu_microsecs: 750, + }); + + assert_eq!(acc_stats, stats_plus_new_stats); + + merge_resource_stats(&None, &mut acc_stats); + + assert_eq!(acc_stats, stats_plus_new_stats); + } + + #[test] + fn test_merge_resource_stats_it() { + let merged_stats = merge_resource_stats_it(Vec::<&Option>::new()); + assert_eq!(merged_stats, None); + + let stats1 = Some(ResourceStats { + short_lived_cache_num_bytes: 100, + split_num_docs: 200, + warmup_microsecs: 300, + cpu_thread_pool_wait_microsecs: 400, + cpu_microsecs: 500, + }); + + let merged_stats = merge_resource_stats_it(vec![&None, &stats1, &None]); + + assert_eq!(merged_stats, stats1); + + let stats2 = Some(ResourceStats { + short_lived_cache_num_bytes: 50, + split_num_docs: 100, + warmup_microsecs: 150, + cpu_thread_pool_wait_microsecs: 200, + cpu_microsecs: 250, + }); + + let stats3 = Some(ResourceStats { + short_lived_cache_num_bytes: 25, + split_num_docs: 50, + warmup_microsecs: 75, + cpu_thread_pool_wait_microsecs: 100, + cpu_microsecs: 125, + }); + + let merged_stats = merge_resource_stats_it(vec![&stats1, &stats2, &stats3]); + + assert_eq!( + merged_stats, + Some(ResourceStats { + short_lived_cache_num_bytes: 175, + split_num_docs: 350, + warmup_microsecs: 525, + cpu_thread_pool_wait_microsecs: 700, + cpu_microsecs: 875, + }) + ); + } +} diff --git a/quickwit/quickwit-search/src/list_terms.rs b/quickwit/quickwit-search/src/list_terms.rs index 765203438d1..f796252c125 100644 --- a/quickwit/quickwit-search/src/list_terms.rs +++ b/quickwit/quickwit-search/src/list_terms.rs @@ -33,13 +33,14 @@ use quickwit_proto::search::{ SplitIdAndFooterOffsets, SplitSearchError, }; use quickwit_proto::types::IndexUid; -use quickwit_storage::Storage; +use quickwit_storage::{ByteRangeCache, Storage}; use tantivy::schema::{Field, FieldType}; use tantivy::{ReloadPolicy, Term}; use tracing::{debug, error, info, instrument}; use crate::leaf::open_index_with_caches; use crate::search_job_placer::group_jobs_by_index_id; +use crate::search_permit_provider::compute_initial_memory_allocation; use crate::{resolve_index_patterns, ClusterClient, SearchError, SearchJob, SearcherContext}; /// Performs a distributed list terms. @@ -216,7 +217,10 @@ async fn leaf_list_terms_single_split( storage: Arc, split: SplitIdAndFooterOffsets, ) -> crate::Result { - let index = open_index_with_caches(searcher_context, storage, &split, None, true).await?; + let cache = + ByteRangeCache::with_infinite_capacity(&quickwit_storage::STORAGE_METRICS.shortlived_cache); + let (index, _) = + open_index_with_caches(searcher_context, storage, &split, None, Some(cache)).await?; let split_schema = index.schema(); let reader = index .reader_builder() @@ -325,18 +329,26 @@ pub async fn leaf_list_terms( splits: &[SplitIdAndFooterOffsets], ) -> Result { info!(split_offsets = ?PrettySample::new(splits, 5)); + let permit_sizes = splits.iter().map(|split| { + compute_initial_memory_allocation( + split, + searcher_context + .searcher_config + .warmup_single_split_initial_allocation, + ) + }); + let permits = searcher_context + .search_permit_provider + .get_permits(permit_sizes) + .await; let leaf_search_single_split_futures: Vec<_> = splits .iter() - .map(|split| { + .zip(permits.into_iter()) + .map(|(split, search_permit_recv)| { let index_storage_clone = index_storage.clone(); let searcher_context_clone = searcher_context.clone(); async move { - let _leaf_split_search_permit = searcher_context_clone - .search_permit_provider - .get_permit() - .await - .expect("Failed to acquire permit. This should never happen! Please, report on https://github.com/quickwit-oss/quickwit/issues."); - + let leaf_split_search_permit = search_permit_recv.await; // TODO dedicated counter and timer? crate::SEARCH_METRICS.leaf_searches_splits_total.inc(); let timer = crate::SEARCH_METRICS @@ -350,6 +362,11 @@ pub async fn leaf_list_terms( ) .await; timer.observe_duration(); + + // Explicitly drop the permit for readability. + // This should always happen after the ephemeral search cache is dropped. + std::mem::drop(leaf_split_search_permit); + leaf_search_single_split_res.map_err(|err| (split.split_id.clone(), err)) } }) diff --git a/quickwit/quickwit-search/src/metrics.rs b/quickwit/quickwit-search/src/metrics.rs index 35b7d3115c5..55bff88a565 100644 --- a/quickwit/quickwit-search/src/metrics.rs +++ b/quickwit/quickwit-search/src/metrics.rs @@ -19,6 +19,7 @@ // See https://prometheus.io/docs/practices/naming/ +use bytesize::ByteSize; use once_cell::sync::Lazy; use quickwit_common::metrics::{ exponential_buckets, linear_buckets, new_counter, new_counter_vec, new_gauge_vec, @@ -37,6 +38,7 @@ pub struct SearchMetrics { pub job_assigned_total: IntCounterVec<1>, pub leaf_search_single_split_tasks_pending: IntGauge, pub leaf_search_single_split_tasks_ongoing: IntGauge, + pub leaf_search_single_split_warmup_num_bytes: Histogram, } impl Default for SearchMetrics { @@ -52,6 +54,18 @@ impl Default for SearchMetrics { .copied() .collect(); + let pseudo_exponential_bytes_buckets = vec![ + ByteSize::mb(10).as_u64() as f64, + ByteSize::mb(20).as_u64() as f64, + ByteSize::mb(50).as_u64() as f64, + ByteSize::mb(100).as_u64() as f64, + ByteSize::mb(200).as_u64() as f64, + ByteSize::mb(500).as_u64() as f64, + ByteSize::gb(1).as_u64() as f64, + ByteSize::gb(2).as_u64() as f64, + ByteSize::gb(5).as_u64() as f64, + ]; + let leaf_search_single_split_tasks = new_gauge_vec::<1>( "leaf_search_single_split_tasks", "Number of single split search tasks pending or ongoing", @@ -124,6 +138,12 @@ impl Default for SearchMetrics { .with_label_values(["ongoing"]), leaf_search_single_split_tasks_pending: leaf_search_single_split_tasks .with_label_values(["pending"]), + leaf_search_single_split_warmup_num_bytes: new_histogram( + "leaf_search_single_split_warmup_num_bytes", + "Size of the short lived cache for a single split once the warmup is done.", + "search", + pseudo_exponential_bytes_buckets, + ), job_assigned_total: new_counter_vec( "job_assigned_total", "Number of job assigned to searchers, per affinity rank.", diff --git a/quickwit/quickwit-search/src/root.rs b/quickwit/quickwit-search/src/root.rs index 608bc87e479..724687148f2 100644 --- a/quickwit/quickwit-search/src/root.rs +++ b/quickwit/quickwit-search/src/root.rs @@ -18,6 +18,7 @@ // along with this program. If not, see . use std::collections::{HashMap, HashSet}; +use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::OnceLock; use std::time::Duration; @@ -49,7 +50,7 @@ use tantivy::aggregation::intermediate_agg_result::IntermediateAggregationResult use tantivy::collector::Collector; use tantivy::schema::{Field, FieldEntry, FieldType, Schema}; use tantivy::TantivyError; -use tracing::{debug, info, info_span, instrument}; +use tracing::{debug, info_span, instrument}; use crate::cluster_client::ClusterClient; use crate::collector::{make_merge_collector, QuickwitAggregations}; @@ -683,10 +684,46 @@ pub fn get_count_from_metadata(split_metadatas: &[SplitMetadata]) -> Vec bool { + // It is not worth considering small splits for this. + if split_num_docs < 100_000 { + return false; + } + // We multiply those figure by 1_000 for accuracy. + const PERCENTILE: u64 = 95; + const PRIOR_NUM_BYTES_PER_DOC: u64 = 3 * 1_000; + static NUM_BYTES_PER_DOC_95_PERCENTILE_ESTIMATOR: AtomicU64 = + AtomicU64::new(PRIOR_NUM_BYTES_PER_DOC); + let num_bits_per_docs = num_bytes * 1_000 / split_num_docs; + let current_estimator = NUM_BYTES_PER_DOC_95_PERCENTILE_ESTIMATOR.load(Ordering::Relaxed); + let is_memory_intensive = num_bits_per_docs > current_estimator; + let new_estimator: u64 = if is_memory_intensive { + current_estimator.saturating_add(PRIOR_NUM_BYTES_PER_DOC * PERCENTILE / 100) + } else { + current_estimator.saturating_sub(PRIOR_NUM_BYTES_PER_DOC * (100 - PERCENTILE) / 100) + }; + // We do not use fetch_add / fetch_sub directly as they wrap around. + // Concurrency could lead to different results here, but really we don't care. + // + // This is just ignoring some gradient updates. + NUM_BYTES_PER_DOC_95_PERCENTILE_ESTIMATOR.store(new_estimator, Ordering::Relaxed); + is_memory_intensive +} + /// If this method fails for some splits, a partial search response is returned, with the list of /// faulty splits in the failed_splits field. #[instrument(level = "debug", skip_all)] @@ -744,9 +781,21 @@ pub(crate) async fn search_partial_hits_phase( has_intermediate_aggregation_result = leaf_search_response.intermediate_aggregation_result.is_some(), "Merged leaf search response." ); + + if let Some(resource_stats) = &leaf_search_response.resource_stats { + if is_top_5pct_memory_intensive( + resource_stats.short_lived_cache_num_bytes, + resource_stats.split_num_docs, + ) { + // We log at most 5 times per minute. + quickwit_common::rate_limited_info!(limit_per_min=5, split_num_docs=resource_stats.split_num_docs, %search_request.query_ast, short_lived_cached_num_bytes=resource_stats.short_lived_cache_num_bytes, query=%search_request.query_ast, "memory intensive query"); + } + } + if !leaf_search_response.failed_splits.is_empty() { quickwit_common::rate_limited_error!(limit_per_min=6, failed_splits = ?leaf_search_response.failed_splits, "leaf search response contains at least one failed split"); } + Ok(leaf_search_response) } @@ -1114,7 +1163,6 @@ pub async fn root_search( mut metastore: MetastoreServiceClient, cluster_client: &ClusterClient, ) -> crate::Result { - info!(searcher_context = ?searcher_context, search_request = ?search_request); let start_instant = tokio::time::Instant::now(); let list_indexes_metadatas_request = ListIndexesMetadataRequest { index_id_patterns: search_request.index_id_patterns.clone(), @@ -1169,9 +1217,12 @@ pub async fn root_search( ) .await; + let elapsed = start_instant.elapsed(); + if let Ok(search_response) = &mut search_response_result { - search_response.elapsed_time_micros = start_instant.elapsed().as_micros() as u64; + search_response.elapsed_time_micros = elapsed.as_micros() as u64; } + let label_values = if search_response_result.is_ok() { ["success"] } else { diff --git a/quickwit/quickwit-search/src/search_permit_provider.rs b/quickwit/quickwit-search/src/search_permit_provider.rs index f6883efb34b..64bc36ff3a6 100644 --- a/quickwit/quickwit-search/src/search_permit_provider.rs +++ b/quickwit/quickwit-search/src/search_permit_provider.rs @@ -18,109 +18,221 @@ // along with this program. If not, see . use std::collections::VecDeque; -use std::sync::{Arc, Mutex}; +use std::future::Future; +use std::pin::Pin; +use std::task::{Context, Poll}; +use bytesize::ByteSize; use quickwit_common::metrics::GaugeGuard; -use tokio::sync::oneshot; +use quickwit_proto::search::SplitIdAndFooterOffsets; +#[cfg(test)] +use tokio::sync::watch; +use tokio::sync::{mpsc, oneshot}; -/// `SearchPermitProvider` is a distributor of permits to perform single split -/// search operation. +/// Distributor of permits to perform split search operation. /// -/// Requests are served in order. +/// Requests are served in order. Each permit initially reserves a slot for the +/// warmup (limit concurrent downloads) and a pessimistic amount of memory. Once +/// the warmup is completed, the actual memory usage is set and the warmup slot +/// is released. Once the search is completed and the permit is dropped, the +/// remaining memory is also released. #[derive(Clone)] pub struct SearchPermitProvider { - inner_arc: Arc>, + message_sender: mpsc::UnboundedSender, + #[cfg(test)] + actor_stopped: watch::Receiver, +} + +#[derive(Debug)] +pub enum SearchPermitMessage { + Request { + permit_sender: oneshot::Sender>, + permit_sizes: Vec, + }, + UpdateMemory { + memory_delta: i64, + }, + FreeWarmupSlot, + Drop { + memory_size: u64, + warmup_slot_freed: bool, + }, +} + +/// Makes very pessimistic estimate of the memory allocation required for a split search +/// +/// This is refined later on when more data is available about the split. +pub fn compute_initial_memory_allocation( + split: &SplitIdAndFooterOffsets, + warmup_single_split_initial_allocation: ByteSize, +) -> ByteSize { + let split_size = split.split_footer_start; + // we consider the configured initial allocation to be set for a large split with 10M docs + const LARGE_SPLIT_NUM_DOCS: u64 = 10_000_000; + let proportional_allocation = + warmup_single_split_initial_allocation.as_u64() * split.num_docs / LARGE_SPLIT_NUM_DOCS; + let size_bytes = [ + split_size, + proportional_allocation, + warmup_single_split_initial_allocation.as_u64(), + ] + .into_iter() + .min() + .unwrap(); + const MINIMUM_ALLOCATION_BYTES: u64 = 10_000_000; + ByteSize(size_bytes.max(MINIMUM_ALLOCATION_BYTES)) } impl SearchPermitProvider { - pub fn new(num_permits: usize) -> SearchPermitProvider { - SearchPermitProvider { - inner_arc: Arc::new(Mutex::new(InnerSearchPermitProvider { - num_permits_available: num_permits, - permits_requests: VecDeque::new(), - })), + pub fn new(num_download_slots: usize, memory_budget: ByteSize) -> Self { + let (message_sender, message_receiver) = mpsc::unbounded_channel(); + #[cfg(test)] + let (state_sender, state_receiver) = watch::channel(false); + let actor = SearchPermitActor { + msg_receiver: message_receiver, + msg_sender: message_sender.downgrade(), + num_warmup_slots_available: num_download_slots, + total_memory_budget: memory_budget.as_u64(), + permits_requests: VecDeque::new(), + total_memory_allocated: 0u64, + #[cfg(test)] + stopped: state_sender, + }; + tokio::spawn(actor.run()); + Self { + message_sender, + #[cfg(test)] + actor_stopped: state_receiver, } } - /// Returns a future permit in the form of a oneshot Receiver channel. + /// Returns one permit future for each provided split metadata. /// - /// At this point the permit is not acquired yet. - #[must_use] - pub fn get_permit(&self) -> oneshot::Receiver { - let mut permits_lock = self.inner_arc.lock().unwrap(); - permits_lock.get_permit(&self.inner_arc) - } - - /// Returns a list of future permits in the form of oneshot Receiver channels. + /// The permits returned are guaranteed to be resolved in order. In + /// addition, the permits are guaranteed to be resolved before permits + /// returned by subsequent calls to this function. /// - /// The permits returned are guaranteed to be resolved in order. - /// In addition, the permits are guaranteed to be resolved before permits returned by - /// subsequent calls to this function (or `get_permit`). - #[must_use] - pub fn get_permits(&self, num_permits: usize) -> Vec> { - let mut permits_lock = self.inner_arc.lock().unwrap(); - permits_lock.get_permits(num_permits, &self.inner_arc) + /// The permit memory size is capped by per_permit_initial_memory_allocation. + pub async fn get_permits( + &self, + splits: impl IntoIterator, + ) -> Vec { + let (permit_sender, permit_receiver) = oneshot::channel(); + let permit_sizes = splits.into_iter().map(|size| size.as_u64()).collect(); + self.message_sender + .send(SearchPermitMessage::Request { + permit_sender, + permit_sizes, + }) + .expect("Receiver lives longer than sender"); + permit_receiver + .await + .expect("Receiver lives longer than sender") } } -struct InnerSearchPermitProvider { - num_permits_available: usize, - permits_requests: VecDeque>, +struct SearchPermitActor { + msg_receiver: mpsc::UnboundedReceiver, + msg_sender: mpsc::WeakUnboundedSender, + num_warmup_slots_available: usize, + /// Note it is possible for memory_allocated to exceed memory_budget temporarily, + /// if and only if a split leaf search task ended up using more than `initial_allocation`. + /// When it happens, new permits will not be assigned until the memory is freed. + total_memory_budget: u64, + total_memory_allocated: u64, + permits_requests: VecDeque<(oneshot::Sender, u64)>, + #[cfg(test)] + stopped: watch::Sender, } -impl InnerSearchPermitProvider { - fn get_permit( - &mut self, - inner_arc: &Arc>, - ) -> oneshot::Receiver { - let (tx, rx) = oneshot::channel(); - self.permits_requests.push_back(tx); - self.assign_available_permits(inner_arc); - rx - } - - fn get_permits( - &mut self, - num_permits: usize, - inner_arc: &Arc>, - ) -> Vec> { - let mut permits = Vec::with_capacity(num_permits); - for _ in 0..num_permits { - let (tx, rx) = oneshot::channel(); - self.permits_requests.push_back(tx); - permits.push(rx); +impl SearchPermitActor { + async fn run(mut self) { + // Stops when the last clone of SearchPermitProvider is dropped. + while let Some(msg) = self.msg_receiver.recv().await { + self.handle_message(msg); + } + #[cfg(test)] + self.stopped.send(true).ok(); + } + + fn handle_message(&mut self, msg: SearchPermitMessage) { + match msg { + SearchPermitMessage::Request { + permit_sizes, + permit_sender, + } => { + let mut permits = Vec::with_capacity(permit_sizes.len()); + for permit_size in permit_sizes { + let (tx, rx) = oneshot::channel(); + self.permits_requests.push_back((tx, permit_size)); + permits.push(SearchPermitFuture(rx)); + } + self.assign_available_permits(); + permit_sender + .send(permits) + // This is a request response pattern, so we can safely ignore the error. + .expect("Receiver lives longer than sender"); + } + SearchPermitMessage::UpdateMemory { memory_delta } => { + if self.total_memory_allocated as i64 + memory_delta < 0 { + panic!("More memory released than allocated, should never happen.") + } + self.total_memory_allocated = + (self.total_memory_allocated as i64 + memory_delta) as u64; + self.assign_available_permits(); + } + SearchPermitMessage::FreeWarmupSlot => { + self.num_warmup_slots_available += 1; + self.assign_available_permits(); + } + SearchPermitMessage::Drop { + memory_size, + warmup_slot_freed, + } => { + if !warmup_slot_freed { + self.num_warmup_slots_available += 1; + } + self.total_memory_allocated = self + .total_memory_allocated + .checked_sub(memory_size) + .expect("More memory released than allocated, should never happen."); + self.assign_available_permits(); + } } - self.assign_available_permits(inner_arc); - permits } - fn recycle_permit(&mut self, inner_arc: &Arc>) { - self.num_permits_available += 1; - self.assign_available_permits(inner_arc); + fn pop_next_request_if_serviceable(&mut self) -> Option<(oneshot::Sender, u64)> { + if self.num_warmup_slots_available == 0 { + return None; + } + if let Some((_, next_permit_size)) = self.permits_requests.front() { + if self.total_memory_allocated + next_permit_size <= self.total_memory_budget { + return self.permits_requests.pop_front(); + } + } + None } - fn assign_available_permits(&mut self, inner_arc: &Arc>) { - while self.num_permits_available > 0 { - let Some(sender) = self.permits_requests.pop_front() else { - break; - }; + fn assign_available_permits(&mut self) { + while let Some((permit_requester_tx, next_permit_size)) = + self.pop_next_request_if_serviceable() + { let mut ongoing_gauge_guard = GaugeGuard::from_gauge( &crate::SEARCH_METRICS.leaf_search_single_split_tasks_ongoing, ); ongoing_gauge_guard.add(1); - let send_res = sender.send(SearchPermit { - _ongoing_gauge_guard: ongoing_gauge_guard, - inner_arc: inner_arc.clone(), - recycle_on_drop: true, - }); - match send_res { - Ok(()) => { - self.num_permits_available -= 1; - } - Err(search_permit) => { - search_permit.drop_without_recycling_permit(); - } - } + self.total_memory_allocated += next_permit_size; + self.num_warmup_slots_available -= 1; + permit_requester_tx + .send(SearchPermit { + _ongoing_gauge_guard: ongoing_gauge_guard, + msg_sender: self.msg_sender.clone(), + memory_allocation: next_permit_size, + warmup_slot_freed: false, + }) + // if the requester dropped its receiver, we drop the newly + // created SearchPermit which releases the resources + .ok(); } crate::SEARCH_METRICS .leaf_search_single_split_tasks_pending @@ -128,41 +240,93 @@ impl InnerSearchPermitProvider { } } +#[derive(Debug)] pub struct SearchPermit { _ongoing_gauge_guard: GaugeGuard<'static>, - inner_arc: Arc>, - recycle_on_drop: bool, + msg_sender: mpsc::WeakUnboundedSender, + memory_allocation: u64, + warmup_slot_freed: bool, } impl SearchPermit { - fn drop_without_recycling_permit(mut self) { - self.recycle_on_drop = false; - drop(self); + /// Update the memory usage attached to this permit. + /// + /// This will increase or decrease the available memory in the [`SearchPermitProvider`]. + pub fn update_memory_usage(&mut self, new_memory_usage: ByteSize) { + let new_usage_bytes = new_memory_usage.as_u64(); + let memory_delta = new_usage_bytes as i64 - self.memory_allocation as i64; + self.memory_allocation = new_usage_bytes; + self.send_if_still_running(SearchPermitMessage::UpdateMemory { memory_delta }); + } + + /// Drop the warmup permit, allowing more downloads to be started. Only one + /// slot is attached to each permit so calling this again has no effect. + pub fn free_warmup_slot(&mut self) { + if self.warmup_slot_freed { + return; + } + self.warmup_slot_freed = true; + self.send_if_still_running(SearchPermitMessage::FreeWarmupSlot); + } + + pub fn memory_allocation(&self) -> ByteSize { + ByteSize(self.memory_allocation) + } + + fn send_if_still_running(&self, msg: SearchPermitMessage) { + if let Some(sender) = self.msg_sender.upgrade() { + sender + .send(msg) + // Receiver instance in the event loop is never dropped or + // closed as long as there is a strong sender reference. + .expect("Receiver should live longer than sender"); + } } } impl Drop for SearchPermit { fn drop(&mut self) { - if !self.recycle_on_drop { - return; + self.send_if_still_running(SearchPermitMessage::Drop { + memory_size: self.memory_allocation, + warmup_slot_freed: self.warmup_slot_freed, + }); + } +} + +#[derive(Debug)] +pub struct SearchPermitFuture(oneshot::Receiver); + +impl Future for SearchPermitFuture { + type Output = SearchPermit; + + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + let receiver = Pin::new(&mut self.get_mut().0); + match receiver.poll(cx) { + Poll::Ready(Ok(search_permit)) => Poll::Ready(search_permit), + Poll::Ready(Err(_)) => panic!("Failed to acquire permit. This should never happen! Please, report on https://github.com/quickwit-oss/quickwit/issues."), + Poll::Pending => Poll::Pending, } - let mut inner_guard = self.inner_arc.lock().unwrap(); - inner_guard.recycle_permit(&self.inner_arc.clone()); } } #[cfg(test)] mod tests { + use std::iter::repeat; + use std::time::Duration; + + use futures::StreamExt; + use rand::seq::SliceRandom; use tokio::task::JoinSet; use super::*; #[tokio::test] - async fn test_search_permits_get_permits_future() { - // We test here that `get_permits_futures` does not interleave - let search_permits = SearchPermitProvider::new(1); + async fn test_search_permit_order() { + let permit_provider = SearchPermitProvider::new(1, ByteSize::mb(100)); let mut all_futures = Vec::new(); - let first_batch_of_permits = search_permits.get_permits(10); + let first_batch_of_permits = permit_provider + .get_permits(repeat(ByteSize::mb(10)).take(10)) + .await; assert_eq!(first_batch_of_permits.len(), 10); all_futures.extend( first_batch_of_permits @@ -171,7 +335,9 @@ mod tests { .map(move |(i, fut)| ((1, i), fut)), ); - let second_batch_of_permits = search_permits.get_permits(10); + let second_batch_of_permits = permit_provider + .get_permits(repeat(ByteSize::mb(10)).take(10)) + .await; assert_eq!(second_batch_of_permits.len(), 10); all_futures.extend( second_batch_of_permits @@ -180,7 +346,6 @@ mod tests { .map(move |(i, fut)| ((2, i), fut)), ); - use rand::seq::SliceRandom; // not super useful, considering what join set does, but still a tiny bit more sound. all_futures.shuffle(&mut rand::thread_rng()); @@ -206,15 +371,110 @@ mod tests { } #[tokio::test] - async fn test_search_permits_receiver_race_condition() { - // Here we test that we don't have a problem if the Receiver is dropped. - // In particular, we want to check that there is not a race condition where drop attempts to - // lock the mutex. - let search_permits = SearchPermitProvider::new(1); - let permit_rx = search_permits.get_permit(); - let permit_rx2 = search_permits.get_permit(); - drop(permit_rx2); - drop(permit_rx); - let _permit_rx = search_permits.get_permit(); + async fn test_search_permit_early_drops() { + let permit_provider = SearchPermitProvider::new(1, ByteSize::mb(100)); + let permit_fut1 = permit_provider + .get_permits(vec![ByteSize::mb(10)]) + .await + .into_iter() + .next() + .unwrap(); + let permit_fut2 = permit_provider + .get_permits([ByteSize::mb(10)]) + .await + .into_iter() + .next() + .unwrap(); + drop(permit_fut1); + let permit = permit_fut2.await; + assert_eq!(permit.memory_allocation, ByteSize::mb(10).as_u64()); + assert_eq!(*permit_provider.actor_stopped.borrow(), false); + + let _permit_fut3 = permit_provider + .get_permits([ByteSize::mb(10)]) + .await + .into_iter() + .next() + .unwrap(); + let mut actor_stopped = permit_provider.actor_stopped.clone(); + drop(permit_provider); + { + actor_stopped.changed().await.unwrap(); + assert!(*actor_stopped.borrow()); + } + } + + /// Tries to wait for a permit + async fn try_get(permit_fut: SearchPermitFuture) -> anyhow::Result { + // using a short timeout is a bit flaky, but it should be enough for these tests + let permit = tokio::time::timeout(Duration::from_millis(20), permit_fut).await?; + Ok(permit) + } + + #[tokio::test] + async fn test_memory_budget() { + let permit_provider = SearchPermitProvider::new(100, ByteSize::mb(100)); + let mut permit_futs = permit_provider + .get_permits(repeat(ByteSize::mb(10)).take(14)) + .await; + let mut remaining_permit_futs = permit_futs.split_off(10).into_iter(); + assert_eq!(remaining_permit_futs.len(), 4); + // we should be able to obtain 10 permits right away (100MB / 10MB) + let mut permits: Vec = futures::stream::iter(permit_futs.into_iter()) + .buffered(1) + .collect() + .await; + // the next permit is blocked by the memory budget + let next_blocked_permit_fut = remaining_permit_futs.next().unwrap(); + try_get(next_blocked_permit_fut).await.unwrap_err(); + // if we drop one of the permits, we can get a new one + permits.drain(0..1); + let next_permit_fut = remaining_permit_futs.next().unwrap(); + let _new_permit = try_get(next_permit_fut).await.unwrap(); + // the next permit is blocked again by the memory budget + let next_blocked_permit_fut = remaining_permit_futs.next().unwrap(); + try_get(next_blocked_permit_fut).await.unwrap_err(); + // by setting a more accurate memory usage after a completed warmup, we can get more permits + permits[0].update_memory_usage(ByteSize::mb(4)); + permits[1].update_memory_usage(ByteSize::mb(6)); + let next_permit_fut = remaining_permit_futs.next().unwrap(); + try_get(next_permit_fut).await.unwrap(); + } + + #[tokio::test] + async fn test_warmup_slot() { + let permit_provider = SearchPermitProvider::new(10, ByteSize::mb(100)); + let mut permit_futs = permit_provider + .get_permits(repeat(ByteSize::mb(1)).take(16)) + .await; + let mut remaining_permit_futs = permit_futs.split_off(10).into_iter(); + assert_eq!(remaining_permit_futs.len(), 6); + // we should be able to obtain 10 permits right away + let mut permits: Vec = futures::stream::iter(permit_futs.into_iter()) + .buffered(1) + .collect() + .await; + // the next permit is blocked by the warmup slots + let next_blocked_permit_fut = remaining_permit_futs.next().unwrap(); + try_get(next_blocked_permit_fut).await.unwrap_err(); + // if we drop one of the permits, we can get a new one + permits.drain(0..1); + let next_permit_fut = remaining_permit_futs.next().unwrap(); + permits.push(try_get(next_permit_fut).await.unwrap()); + // the next permit is blocked again by the warmup slots + let next_blocked_permit_fut = remaining_permit_futs.next().unwrap(); + try_get(next_blocked_permit_fut).await.unwrap_err(); + // we can explicitly free the warmup slot on a permit + permits[0].free_warmup_slot(); + let next_permit_fut = remaining_permit_futs.next().unwrap(); + permits.push(try_get(next_permit_fut).await.unwrap()); + // dropping that same permit does not free up another slot + permits.drain(0..1); + let next_blocked_permit_fut = remaining_permit_futs.next().unwrap(); + try_get(next_blocked_permit_fut).await.unwrap_err(); + // but dropping a permit for which the slot wasn't explicitly free does free up a slot + permits.drain(0..1); + let next_blocked_permit_fut = remaining_permit_futs.next().unwrap(); + permits.push(try_get(next_blocked_permit_fut).await.unwrap()); } } diff --git a/quickwit/quickwit-search/src/search_stream/leaf.rs b/quickwit/quickwit-search/src/search_stream/leaf.rs index 941e0d12612..0659965b40d 100644 --- a/quickwit/quickwit-search/src/search_stream/leaf.rs +++ b/quickwit/quickwit-search/src/search_stream/leaf.rs @@ -29,7 +29,7 @@ use quickwit_proto::search::{ LeafSearchStreamResponse, OutputFormat, SearchRequest, SearchStreamRequest, SplitIdAndFooterOffsets, }; -use quickwit_storage::Storage; +use quickwit_storage::{ByteRangeCache, Storage}; use tantivy::columnar::{DynamicColumn, HasAssociatedColumnType}; use tantivy::fastfield::Column; use tantivy::query::Query; @@ -116,6 +116,7 @@ async fn leaf_search_stream_single_split( mut stream_request: SearchStreamRequest, storage: Arc, ) -> crate::Result { + // TODO: Should we track the memory here using the SearchPermitProvider? let _leaf_split_stream_permit = searcher_context .split_stream_semaphore .acquire() @@ -127,12 +128,14 @@ async fn leaf_search_stream_single_split( &split, ); - let index = open_index_with_caches( + let cache = + ByteRangeCache::with_infinite_capacity(&quickwit_storage::STORAGE_METRICS.shortlived_cache); + let (index, _) = open_index_with_caches( &searcher_context, storage, &split, Some(doc_mapper.tokenizer_manager()), - true, + Some(cache), ) .await?; let split_schema = index.schema(); diff --git a/quickwit/quickwit-search/src/service.rs b/quickwit/quickwit-search/src/service.rs index 0029f4dd3a7..d566463b42e 100644 --- a/quickwit/quickwit-search/src/service.rs +++ b/quickwit/quickwit-search/src/service.rs @@ -488,8 +488,10 @@ impl SearcherContext { capacity_in_bytes, &quickwit_storage::STORAGE_METRICS.split_footer_cache, ); - let leaf_search_split_semaphore = - SearchPermitProvider::new(searcher_config.max_num_concurrent_split_searches); + let leaf_search_split_semaphore = SearchPermitProvider::new( + searcher_config.max_num_concurrent_split_searches, + searcher_config.warmup_memory_budget, + ); let split_stream_semaphore = Semaphore::new(searcher_config.max_num_concurrent_split_streams); let fast_field_cache_capacity = searcher_config.fast_field_cache_capacity.as_u64() as usize; diff --git a/quickwit/quickwit-storage/src/cache/byte_range_cache.rs b/quickwit/quickwit-storage/src/cache/byte_range_cache.rs index 9ef3b7f523f..425e4f9a043 100644 --- a/quickwit/quickwit-storage/src/cache/byte_range_cache.rs +++ b/quickwit/quickwit-storage/src/cache/byte_range_cache.rs @@ -21,7 +21,8 @@ use std::borrow::{Borrow, Cow}; use std::collections::BTreeMap; use std::ops::Range; use std::path::{Path, PathBuf}; -use std::sync::Mutex; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::sync::{Arc, Mutex}; use tantivy::directory::OwnedBytes; @@ -344,31 +345,54 @@ impl Drop for NeedMutByteRangeCache { /// cached data, the changes may or may not get recorded. /// /// At the moment this is hardly a cache as it features no eviction policy. +#[derive(Clone)] pub struct ByteRangeCache { - inner: Mutex>, + inner_arc: Arc, +} + +struct Inner { + num_stored_bytes: AtomicU64, + need_mut_byte_range_cache: Mutex>, } impl ByteRangeCache { /// Creates a slice cache that never removes any entry. pub fn with_infinite_capacity(cache_counters: &'static CacheMetrics) -> Self { + let need_mut_byte_range_cache = + NeedMutByteRangeCache::with_infinite_capacity(cache_counters); + let inner = Inner { + num_stored_bytes: AtomicU64::default(), + need_mut_byte_range_cache: Mutex::new(need_mut_byte_range_cache), + }; ByteRangeCache { - inner: Mutex::new(NeedMutByteRangeCache::with_infinite_capacity( - cache_counters, - )), + inner_arc: Arc::new(inner), } } + /// Overall amount of bytes stored in the cache. + pub fn get_num_bytes(&self) -> u64 { + self.inner_arc.num_stored_bytes.load(Ordering::Relaxed) + } + /// If available, returns the cached view of the slice. pub fn get_slice(&self, path: &Path, byte_range: Range) -> Option { - self.inner.lock().unwrap().get_slice(path, byte_range) + self.inner_arc + .need_mut_byte_range_cache + .lock() + .unwrap() + .get_slice(path, byte_range) } /// Put the given amount of data in the cache. pub fn put_slice(&self, path: PathBuf, byte_range: Range, bytes: OwnedBytes) { - self.inner - .lock() - .unwrap() - .put_slice(path, byte_range, bytes) + let mut need_mut_byte_range_cache_locked = + self.inner_arc.need_mut_byte_range_cache.lock().unwrap(); + need_mut_byte_range_cache_locked.put_slice(path, byte_range, bytes); + let num_bytes = need_mut_byte_range_cache_locked.num_bytes; + drop(need_mut_byte_range_cache_locked); + self.inner_arc + .num_stored_bytes + .store(num_bytes, Ordering::Relaxed); } } @@ -446,13 +470,13 @@ mod tests { .sum(); // in some case we have ranges touching each other, count_items count them // as only one, but cache count them as 2. - assert!(cache.inner.lock().unwrap().num_items >= expected_item_count as u64); + assert!(cache.inner_arc.need_mut_byte_range_cache.lock().unwrap().num_items >= expected_item_count as u64); let expected_byte_count = state.values() .flatten() .filter(|stored| **stored) .count(); - assert_eq!(cache.inner.lock().unwrap().num_bytes, expected_byte_count as u64); + assert_eq!(cache.inner_arc.need_mut_byte_range_cache.lock().unwrap().num_bytes, expected_byte_count as u64); } Operation::Get { range, @@ -519,7 +543,7 @@ mod tests { ); { - let mutable_cache = cache.inner.lock().unwrap(); + let mutable_cache = cache.inner_arc.need_mut_byte_range_cache.lock().unwrap(); assert_eq!(mutable_cache.cache.len(), 4); assert_eq!(mutable_cache.num_items, 4); assert_eq!(mutable_cache.cache_counters.in_cache_count.get(), 4); @@ -531,7 +555,7 @@ mod tests { { // now they should've been merged, except the last one - let mutable_cache = cache.inner.lock().unwrap(); + let mutable_cache = cache.inner_arc.need_mut_byte_range_cache.lock().unwrap(); assert_eq!(mutable_cache.cache.len(), 2); assert_eq!(mutable_cache.num_items, 2); assert_eq!(mutable_cache.cache_counters.in_cache_count.get(), 2); From 3ce58e702b8853f47391726295031afbbb9c7165 Mon Sep 17 00:00:00 2001 From: Alexander Lyon Date: Tue, 17 Dec 2024 11:46:21 +0100 Subject: [PATCH 24/27] make jaeger span attribute-to-tag conversion exhaustive (#5574) --- quickwit/quickwit-jaeger/src/lib.rs | 178 +++++++++++++++++++--------- 1 file changed, 124 insertions(+), 54 deletions(-) diff --git a/quickwit/quickwit-jaeger/src/lib.rs b/quickwit/quickwit-jaeger/src/lib.rs index 815f5ef7015..2c71213a1ae 100644 --- a/quickwit/quickwit-jaeger/src/lib.rs +++ b/quickwit/quickwit-jaeger/src/lib.rs @@ -24,7 +24,7 @@ use std::sync::Arc; use std::time::Instant; use async_trait::async_trait; -use itertools::Itertools; +use itertools::{Either, Itertools}; use prost::Message; use prost_types::{Duration as WellKnownDuration, Timestamp as WellKnownTimestamp}; use quickwit_config::JaegerConfig; @@ -772,7 +772,7 @@ fn qw_span_to_jaeger_span(qw_span_json: &str) -> Result { qw_span.resource_attributes.remove("service.name"); let process = Some(JaegerProcess { service_name: qw_span.service_name, - tags: otlp_attributes_to_jaeger_tags(qw_span.resource_attributes)?, + tags: otlp_attributes_to_jaeger_tags(qw_span.resource_attributes), }); let logs: Vec = qw_span .events @@ -780,7 +780,7 @@ fn qw_span_to_jaeger_span(qw_span_json: &str) -> Result { .map(qw_event_to_jaeger_log) .collect::>()?; - let mut tags = otlp_attributes_to_jaeger_tags(qw_span.span_attributes)?; + let mut tags = otlp_attributes_to_jaeger_tags(qw_span.span_attributes); inject_dropped_count_tags( &mut tags, qw_span.span_dropped_attributes_count, @@ -944,55 +944,90 @@ fn inject_span_status_tags(tags: &mut Vec, span_status: QwSpanSt }; } -/// Converts OpenTelemetry attributes to Jaeger tags. +/// Converts OpenTelemetry attributes to Jaeger tags. Objects are flattened with +/// their keys prefixed with the parent keys delimited by a dot. +/// /// fn otlp_attributes_to_jaeger_tags( - attributes: HashMap, -) -> Result, Status> { - let mut tags = Vec::with_capacity(attributes.len()); - for (key, value) in attributes { - let mut tag = JaegerKeyValue { - key, - v_type: ValueType::String as i32, - v_str: String::new(), - v_bool: false, - v_int64: 0, - v_float64: 0.0, - v_binary: Vec::new(), - }; - match value { - // Array values MUST be serialized to string like a JSON list. - JsonValue::Array(values) => { - tag.v_type = ValueType::String as i32; - tag.v_str = serde_json::to_string(&values) - .expect("A vec of `serde_json::Value` values should be JSON serializable."); - } - JsonValue::Bool(value) => { - tag.v_type = ValueType::Bool as i32; - tag.v_bool = value; - } - JsonValue::Number(number) => { - if let Some(value) = number.as_i64() { - tag.v_type = ValueType::Int64 as i32; - tag.v_int64 = value; - } else if let Some(value) = number.as_f64() { - tag.v_type = ValueType::Float64 as i32; - tag.v_float64 = value + attributes: impl IntoIterator, +) -> Vec { + otlp_attributes_to_jaeger_tags_inner(attributes, None) +} + +/// Inner helper for `otpl_attributes_to_jaeger_tags` recursive call +/// +/// PERF: as long as `attributes` IntoIterator implementation correctly sets the +/// lower bound then collect should allocate efficiently. Note that the flat map +/// may cause more allocations as we cannot predict the number of elements in the +/// iterator. +fn otlp_attributes_to_jaeger_tags_inner( + attributes: impl IntoIterator, + parent_key: Option<&str>, +) -> Vec { + attributes + .into_iter() + .map(|(key, value)| { + let key = parent_key + .map(|parent_key| format!("{parent_key}.{key}")) + .unwrap_or(key); + match value { + JsonValue::Array(values) => { + Either::Left(Some(JaegerKeyValue { + key, + v_type: ValueType::String as i32, + // Array values MUST be serialized to string like a JSON list. + v_str: serde_json::to_string(&values).expect( + "A vec of `serde_json::Value` values should be JSON serializable.", + ), + ..Default::default() + })) + } + JsonValue::Bool(v_bool) => Either::Left(Some(JaegerKeyValue { + key, + v_type: ValueType::Bool as i32, + v_bool, + ..Default::default() + })), + JsonValue::Number(number) => { + let value = if let Some(v_int64) = number.as_i64() { + Some(JaegerKeyValue { + key, + v_type: ValueType::Int64 as i32, + v_int64, + ..Default::default() + }) + } else if let Some(v_float64) = number.as_f64() { + Some(JaegerKeyValue { + key, + v_type: ValueType::Float64 as i32, + v_float64, + ..Default::default() + }) + } else { + // Print some error rather than silently ignoring the value. + warn!("ignoring unrepresentable number value: {number:?}"); + None + }; + + Either::Left(value) + } + JsonValue::String(v_str) => Either::Left(Some(JaegerKeyValue { + key, + v_type: ValueType::String as i32, + v_str, + ..Default::default() + })), + JsonValue::Null => { + // No use including null values in the tags, so ignore + Either::Left(None) + } + JsonValue::Object(value) => { + Either::Right(otlp_attributes_to_jaeger_tags_inner(value, Some(&key))) } } - JsonValue::String(value) => { - tag.v_type = ValueType::String as i32; - tag.v_str = value - } - _ => { - return Err(Status::internal(format!( - "Failed to serialize attributes: unexpected type `{value:?}`" - ))) - } - }; - tags.push(tag); - } - Ok(tags) + }) + .flat_map(|e| e.into_iter()) + .collect() } /// Converts OpenTelemetry links to Jaeger span references. @@ -1036,7 +1071,7 @@ fn qw_event_to_jaeger_log(event: QwEvent) -> Result { let insert_event_name = !event.event_name.is_empty() && !event.event_attributes.contains_key("event"); - let mut fields = otlp_attributes_to_jaeger_tags(event.event_attributes)?; + let mut fields = otlp_attributes_to_jaeger_tags(event.event_attributes); if insert_event_name { fields.push(JaegerKeyValue { @@ -1960,18 +1995,29 @@ mod tests { #[test] fn test_otlp_attributes_to_jaeger_tags() { - let attributes = HashMap::from_iter([ + let mut tags = otlp_attributes_to_jaeger_tags([ ("array_int".to_string(), json!([1, 2])), ("array_str".to_string(), json!(["foo", "bar"])), ("bool".to_string(), json!(true)), ("float".to_string(), json!(1.0)), ("integer".to_string(), json!(1)), ("string".to_string(), json!("foo")), + ( + "object".to_string(), + json!({ + "array_int": [1,2], + "array_str": ["foo", "bar"], + "bool": true, + "float": 1.0, + "integer": 1, + "string": "foo", + }), + ), ]); - let mut tags = otlp_attributes_to_jaeger_tags(attributes).unwrap(); tags.sort_by(|left, right| left.key.cmp(&right.key)); - assert_eq!(tags.len(), 6); + // a tag for the 6 keys in the root, plus 6 more for the nested keys + assert_eq!(tags.len(), 12); assert_eq!(tags[0].key, "array_int"); assert_eq!(tags[0].v_type(), ValueType::String); @@ -1993,9 +2039,33 @@ mod tests { assert_eq!(tags[4].v_type(), ValueType::Int64); assert_eq!(tags[4].v_int64, 1); - assert_eq!(tags[5].key, "string"); + assert_eq!(tags[5].key, "object.array_int"); assert_eq!(tags[5].v_type(), ValueType::String); - assert_eq!(tags[5].v_str, "foo"); + assert_eq!(tags[5].v_str, "[1,2]"); + + assert_eq!(tags[6].key, "object.array_str"); + assert_eq!(tags[6].v_type(), ValueType::String); + assert_eq!(tags[6].v_str, r#"["foo","bar"]"#); + + assert_eq!(tags[7].key, "object.bool"); + assert_eq!(tags[7].v_type(), ValueType::Bool); + assert!(tags[7].v_bool); + + assert_eq!(tags[8].key, "object.float"); + assert_eq!(tags[8].v_type(), ValueType::Float64); + assert_eq!(tags[8].v_float64, 1.0); + + assert_eq!(tags[9].key, "object.integer"); + assert_eq!(tags[9].v_type(), ValueType::Int64); + assert_eq!(tags[9].v_int64, 1); + + assert_eq!(tags[10].key, "object.string"); + assert_eq!(tags[10].v_type(), ValueType::String); + assert_eq!(tags[10].v_str, "foo"); + + assert_eq!(tags[11].key, "string"); + assert_eq!(tags[11].v_type(), ValueType::String); + assert_eq!(tags[11].v_str, "foo"); } #[test] From 81bae66063d5da59db9561ca3131489405c8f9df Mon Sep 17 00:00:00 2001 From: Adrien Guillo Date: Fri, 3 Jan 2025 17:24:10 +0100 Subject: [PATCH 25/27] Remove support for 2-digit years in java datetime parser (#5596) --- .../src/java_date_time_format.rs | 104 ++++++------------ 1 file changed, 31 insertions(+), 73 deletions(-) diff --git a/quickwit/quickwit-datetime/src/java_date_time_format.rs b/quickwit/quickwit-datetime/src/java_date_time_format.rs index 1cc035c90f3..1a13bbdc0dc 100644 --- a/quickwit/quickwit-datetime/src/java_date_time_format.rs +++ b/quickwit/quickwit-datetime/src/java_date_time_format.rs @@ -36,7 +36,6 @@ use crate::date_time_format; const JAVA_DATE_FORMAT_TOKENS: &[&str] = &[ "yyyy", "xxxx", - "xx[xx]", "SSSSSSSSS", // For nanoseconds "SSSSSSS", // For microseconds "SSSSSS", // For fractional seconds up to six digits @@ -45,10 +44,8 @@ const JAVA_DATE_FORMAT_TOKENS: &[&str] = &[ "SSS", "SS", "ZZ", - "xx", "ww", "w[w]", - "yy", "MM", "dd", "HH", @@ -112,29 +109,12 @@ fn build_zone_offset(_: &str) -> Option { )) } -fn build_year_item(ptn: &str) -> Option { - let mut full_year = Year::default(); - full_year.repr = YearRepr::Full; - let full_year_component = OwnedFormatItem::Component(Component::Year(full_year)); - - let mut short_year = Year::default(); - short_year.repr = YearRepr::LastTwo; - let short_year_component = OwnedFormatItem::Component(Component::Year(short_year)); - - if ptn.len() == 4 { - Some(full_year_component) - } else if ptn.len() == 2 { - Some(short_year_component) - } else { - Some(OwnedFormatItem::First( - vec![full_year_component, short_year_component].into_boxed_slice(), - )) - } -} - -fn build_week_based_year_item(ptn: &str) -> Option { - // TODO no `Component` for that - build_year_item(ptn) +// There is a `YearRepr::LastTwo` representation in the time crate, but the parser is unreliable, so +// we only support `YearRepr::Full` for now. See also https://github.com/time-rs/time/issues/649. +const fn year_item() -> Option { + let mut year_component = Year::default(); + year_component.repr = YearRepr::Full; + Some(OwnedFormatItem::Component(Component::Year(year_component))) } fn build_month_item(ptn: &str) -> Option { @@ -256,8 +236,7 @@ fn match_java_date_format_token( } let format_item = match *token { - "yyyy" | "yy" => build_year_item(token), - "xxxx" | "xx[xx]" | "xx" => build_week_based_year_item(token), + "yyyy" | "xxxx" => year_item(), "MM" | "M" => build_month_item(token), "dd" | "d" => build_day_item(token), "HH" | "H" => build_hour_item(token), @@ -269,7 +248,7 @@ fn match_java_date_format_token( "Z" => build_zone_offset(token), "ww" | "w[w]" | "w" => build_week_of_year_item(token), "e" => build_day_of_week_item(token), - _ => return Err(format!("Unrecognized token '{}'", token)), + _ => return Err(format!("unrecognized token '{token}'")), }; return Ok(format_item); } @@ -299,16 +278,16 @@ fn resolve_java_datetime_format_alias(java_datetime_format: &str) -> &str { m.insert("basic_date", "yyyyMMdd"); m.insert("strict_basic_week_date", "xxxx'W'wwe"); - m.insert("basic_week_date", "xx[xx]'W'wwe"); + m.insert("basic_week_date", "xxxx'W'wwe"); m.insert("strict_basic_week_date_time", "xxxx'W'wwe'T'HHmmss.SSSZ"); - m.insert("basic_week_date_time", "xx[xx]'W'wwe'T'HHmmss.SSSZ"); + m.insert("basic_week_date_time", "xxxx'W'wwe'T'HHmmss.SSSZ"); m.insert( "strict_basic_week_date_time_no_millis", "xxxx'W'wwe'T'HHmmssZ", ); - m.insert("basic_week_date_time_no_millis", "xx[xx]'W'wwe'T'HHmmssZ"); + m.insert("basic_week_date_time_no_millis", "xxxx'W'wwe'T'HHmmssZ"); m.insert("strict_week_date", "xxxx-'W'ww-e"); m.insert("week_date", "xxxx-'W'w[w]-e"); @@ -356,8 +335,8 @@ impl StrptimeParser { .is_empty() { return Err(format!( - "datetime string `{}` does not match strptime format `{}`", - date_time_str, &self.strptime_format + "datetime string `{date_time_str}` does not match strptime format `{}`", + self.strptime_format )); } @@ -590,17 +569,12 @@ mod tests { "2024W313", datetime!(2024-08-01 0:00:00.0 +00:00:00), ); - test_parse_java_datetime_aux( - "basic_week_date", - "24W313", - datetime!(2024-08-01 0:00:00.0 +00:00:00), - ); - // // ❌ 'the 'year' component could not be parsed' - // test_parse_java_datetime_aux( - // "basic_week_date", - // "1W313", - // datetime!(2018-08-02 0:00:00.0 +00:00:00), - // ); + let parser = StrptimeParser::from_java_datetime_format("basic_week_date").unwrap(); + parser.parse_date_time("24W313").unwrap_err(); + + let parser = StrptimeParser::from_java_datetime_format("basic_week_date").unwrap(); + parser.parse_date_time("1W313").unwrap_err(); + test_parse_java_datetime_aux( "basic_week_date_time", "2018W313T121212.1Z", @@ -706,7 +680,7 @@ mod tests { for (date_str, &expected_dt) in dates.iter().zip(expected.iter()) { let parsed_dt = parser .parse_date_time(date_str) - .unwrap_or_else(|e| panic!("Failed to parse {}: {}", date_str, e)); + .unwrap_or_else(|error| panic!("failed to parse {date_str}: {error}")); assert_eq!(parsed_dt, expected_dt); } } @@ -736,18 +710,18 @@ mod tests { for (date_str, &expected_dt) in dates.iter().zip(expected.iter()) { let parsed_dt = parser .parse_date_time(date_str) - .unwrap_or_else(|e| panic!("Failed to parse {}: {}", date_str, e)); + .unwrap_or_else(|error| panic!("failed to parse {date_str}: {error}")); assert_eq!(parsed_dt, expected_dt); } } #[test] fn test_parse_java_datetime_format_items() { - let format_str = "xx[xx]'W'wwe"; + let format_str = "xxxx'W'wwe"; let result = parse_java_datetime_format_items(format_str).unwrap(); // We expect the tokens to be parsed as: - // - 'xx[xx]' (week-based year) with optional length + // - 'xxxx' (week-based year) // - 'W' (literal) // - 'ww' (week of year) // - 'e' (day of week) @@ -756,37 +730,22 @@ mod tests { // Verify each token match &result[0] { - OwnedFormatItem::First(boxed_slice) => { - assert_eq!(boxed_slice.len(), 2); - match (&boxed_slice[0], &boxed_slice[1]) { - ( - OwnedFormatItem::Component(Component::Year(_)), - OwnedFormatItem::Component(Component::Year(_)), - ) => {} - unexpected => { - panic!("Expected two Year components, but found: {:?}", unexpected) - } - } + OwnedFormatItem::Component(Component::Year(year)) => { + assert_eq!(year.repr, YearRepr::Full); } - unexpected => panic!( - "Expected First with two Year components, but found: {:?}", - unexpected - ), + unexpected => panic!("expected Year, but found: {unexpected:?}",), } - match &result[1] { OwnedFormatItem::Literal(lit) => assert_eq!(lit.as_ref(), b"W"), - unexpected => panic!("Expected literal 'W', but found: {:?}", unexpected), + unexpected => panic!("expected literal 'W', but found: {unexpected:?}"), } - match &result[2] { OwnedFormatItem::Component(Component::WeekNumber(_)) => {} - unexpected => panic!("Expected WeekNumber component, but found: {:?}", unexpected), + unexpected => panic!("expected WeekNumber component, but found: {unexpected:?}"), } - match &result[3] { OwnedFormatItem::Component(Component::Weekday(_)) => {} - unexpected => panic!("Expected Weekday component, but found: {:?}", unexpected), + unexpected => panic!("expected Weekday component, but found: {unexpected:?}"), } } @@ -803,15 +762,14 @@ mod tests { for (input, expected) in test_cases.iter() { let result = parser.parse_date_time(input).unwrap(); - assert_eq!(result, *expected, "Failed to parse {}", input); + assert_eq!(result, *expected, "failed to parse {input}"); } // Test error case let error_case = "2023-1430"; assert!( parser.parse_date_time(error_case).is_err(), - "Expected error for input: {}", - error_case + "expected error for input: {error_case}", ); } } From ccf522813c6b0537118614408a76dccb7b330bf3 Mon Sep 17 00:00:00 2001 From: Adrien Guillo Date: Fri, 3 Jan 2025 17:41:30 +0100 Subject: [PATCH 26/27] Use `unresolvable.wikipedia.org` in `test_peer_socket_addrs` (#5594) Co-authored-by: trinity-1686a --- quickwit/quickwit-config/src/node_config/serialize.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/quickwit/quickwit-config/src/node_config/serialize.rs b/quickwit/quickwit-config/src/node_config/serialize.rs index b208309af4c..dd3adef6d28 100644 --- a/quickwit/quickwit-config/src/node_config/serialize.rs +++ b/quickwit/quickwit-config/src/node_config/serialize.rs @@ -879,7 +879,7 @@ mod tests { ..Default::default() }, peer_seeds: ConfigValue::for_test(List(vec![ - "unresolvable-host".to_string(), + "unresolvable.example.com".to_string(), "localhost".to_string(), "localhost:1337".to_string(), "127.0.0.1".to_string(), From ec200599d4ef5cf24e85ed09787c2780cb9eb138 Mon Sep 17 00:00:00 2001 From: trinity-1686a Date: Mon, 6 Jan 2025 10:14:05 +0100 Subject: [PATCH 27/27] start adding documentation on updating mapping (#5290) * start adding documentation on updating mapping * improve update-api documentation * document valid conversions * improve section on reversibility * typos and wording Co-authored-by: Adrien Guillo * reword sentence --------- Co-authored-by: Adrien Guillo --- docs/reference/rest-api.md | 8 +- docs/reference/updating-mapper.md | 178 ++++++++++++++++++ .../src/doc_mapper/tantivy_val_to_json.rs | 5 + 3 files changed, 186 insertions(+), 5 deletions(-) create mode 100644 docs/reference/updating-mapper.md diff --git a/docs/reference/rest-api.md b/docs/reference/rest-api.md index b4ad3e853db..604b1083aad 100644 --- a/docs/reference/rest-api.md +++ b/docs/reference/rest-api.md @@ -320,12 +320,10 @@ Updates the configurations of an index. This endpoint follows PUT semantics, whi - The retention policy update is automatically picked up by the janitor service on its next state refresh. - The search settings update is automatically picked up by searcher nodes when the next query is executed. -- The indexing settings update is not automatically picked up by the indexer nodes, they need to be manually restarted. -- The doc mapping update is not automatically picked up by the indexer nodes, they have to be manually restarted. +- The indexing settings update is automatically picked up by the indexer nodes once the control plane emits a new indexing plan. +- The doc mapping update is automatically picked up by the indexer nodes once the control plane emit a new indexing plan. -Updating the doc mapping doesn't reindex existing data. Queries and answers are mapped on a best effort basis when querying older splits. -It is also not possible to update the timestamp field, or to modify/remove existing non-default tokenizers (but it is possible to change -which tokenizer is used for a field). +Updating the doc mapping doesn't reindex existing data. Queries and results are mapped on a best-effort basis when querying older splits. For more details, check [the reference](updating-mapper.md) out. #### PUT payload diff --git a/docs/reference/updating-mapper.md b/docs/reference/updating-mapper.md new file mode 100644 index 00000000000..2341215244b --- /dev/null +++ b/docs/reference/updating-mapper.md @@ -0,0 +1,178 @@ +# Updating the doc mapping of an index + +Quickwit allows updating the mapping it uses to add more fields to an existing index or change how they are indexed. In doing so, it does not reindex existing data but still lets you search through older documents where possible. + +## Indexing + +When you update a doc mapping for an index, Quickwit will restart indexing pipelines to take the changes into account. As both this operation and the document ingestion are asynchronous, there is no strict happens-before relationship between ingestion and update. This means a document ingested just before the update may be indexed according to the newer doc mapper, and document ingested just after the update may be indexed with the older doc mapper. + +## Querying + +Quickwit always validate queries against the most recent mapping. +If a query was valid under a previous mapping but is not compatible with the newer mapping, that query will be rejected. +For instance if a field which was indexed no longer is, any query that uses it will become invalid. +On the other hand, if a query was not valid for a previous doc mapping, but is valid under the new doc mapping, Quickwit will process the query. +When querying newer splits, it will behave normally, when querying older splits, it will try to execute the query as correctly as possible. +If you find yourself in a situation where older splits causes a valid request to return an error, please open a bug report. +See examples 1 and 2 below for clarification. + +Change in tokenizer affect only newer splits, older splits keep using the tokenizers they were created with. + +Document retrieved are mapped from Quickwit internal format to JSON based on the latest doc mapping. This means if fields are deleted, +they will stop appearing (see also Reversibility below) unless mapper mode is Dynamic. If the type of some field changed, it will be converted on a best-effort basis: +integers will get turned into text, text will get turned into string when it is possible, otherwise, the field is omited. +See example 3 for clarification. + +## Reversibility + +Quickwit does not modify existing data when receiving a new doc mapping. If you realize that you updated the mapping in a wrong way, you can re-update your index using the previous mapping. Documents indexed while the mapping was wrong will be impacted, but any document that was committed before the change will be queryable as if nothing happened. + +## Type update reference + +Conversion from a type to itself is omitted. Conversions that never succeed and always omit the field are omitted, too. + + +| type before | type after | behavior | +|-------------|------------| +| u64/i64/f64 | text | convert to decimal string | +| date | text | convert to rfc3339 textual representation | +| ip | text | convert to IPv6 representation. For IPv4, convert to IPv4-mapped IPv6 address (`::ffff:1.2.3.4`) | +| bool | text | convert to "true" or false" | +| u64/i64/f64 | bool | convert 0/0.0 to false and 1/1.0 to true, otherise omit | +| text | bool | convert if "true" or "false" (lowercase), otherwise omit | +| text | ip | convert if valid IPv4 or IPv6, otherwise omit | +| text | f64 | convert if valid floating point number, otherwise omit | +| u64/i64 | f64 | convert, possibly with loss of precision | +| bool | f64 | convert to 0.0 for false, and 1.0 for true | +| text | u64 | convert is valid integer in range 0..2\*\*64, otherwise omit | +| i64 | u64 | convert if in range 0..2\*\*63, otherwise omit | +| f64 | u64 | convert if in range 0..2\*\*64, possibly with loss of precision, otherwise omit | +| text | i64 | convert is valid integer in range -2\*\*63..2\*\*63, otherwise omit | +| u64 | i64 | convert if in range 0..2\*\*63, otherwise omit | +| f64 | i64 | convert if in range -2\*\*63..2\*\*63, possibly with loss of precision, otherwise omit | +| bool | i64 | convert to 0 for false, and 1 for true | +| text | datetime | parse according to current input\_format, otherwise omit | +| u64 | datetime | parse according to current input\_format, otherwise omit | +| i64 | datetime | parse according to current input\_format, otherwise omit | +| f64 | datetime | parse according to current input\_format, otherwise omit | +| array\ | array\ | convert individual elements, skipping over those which can't be converted | +| T | array\ | convert element, emiting array of a single element, or empty array if it can't be converted | +| array\ | U | convert individual elements, keeping the first which can be converted | +| json | object | try convert individual elements if they exists inside object, omit individual elements which can't be | +| object | json | convert individual elements. Previous lists of one element are converted to a single element not in an array. + +## Examples + +In the below examples, fields which are not relevant are removed for conciseness, you will not be able to use these index config as is. + +### Example 1 + +before: +```yaml +doc_mapping: + field_mappings: + - name: field1 + type: text + tokenizer: raw +``` + +after: +```yaml +doc_mapping: + field_mappings: + - name: field1 + type: text + indexed: false +``` + +A field changed from being indexed to not being indexed. +A query such as `field1:my_value` was valid, but is now rejected. + +### Example 2 + +before: +```yaml +doc_mapping: + field_mappings: + - name: field1 + type: text + indexed: false + - name: field2 + type: text + tokenizer: raw + +``` + +after: +```yaml +doc_mapping: + field_mappings: + - name: field1 + type: text + tokenizer: raw + - name: field2 + type: text + tokenizer: raw +``` + +A field changed from being not indexed to being indexed. +A query such as `field1:my_value` was invalid before, and is now valid. When querying older splits, it won't return a match, but won't return an error either. +A query such as `field1:my_value OR field2:my_value` is now valid too. For old splits, it will return the same results as `field2:my_value` as field1 wasn't indexed before. For newer splits, it will return the expected results. +A query such as `NOT field1:my_value` would return all documents for old splits, and only documents where `field1` is not `my_value` for newer splits. + + +### Example 3 + +# show cast (trivial, valid and invalid) +# show array to single + +before: +```yaml +doc_mapping: + field_mappings: + - name: field1 + type: text + - name: field2 + type: u64 + - name: field3 + type: array +``` +document presents before update: +```json +{ + "field1": "123", + "field2": 456, + "field3": ["abc", "def"] +} +{ + "field1": "message", + "field2": 987, + "field3": ["ghi"] +} +``` + +after: +```yaml +doc_mapping: + field_mappings: + - name: field1 + type: u64 + - name: field2 + type: text + - name: field3 + type: text +``` + +When querying this index, the documents returned would become: +```json +{ + "field1": 123, + "field2": "456", + "field3": "abc" +} +{ + // field1 is missing because "message" can't be converted to int + "field2": "987", + "field3": "ghi" +} +``` diff --git a/quickwit/quickwit-doc-mapper/src/doc_mapper/tantivy_val_to_json.rs b/quickwit/quickwit-doc-mapper/src/doc_mapper/tantivy_val_to_json.rs index 949f205451b..d281f66217f 100644 --- a/quickwit/quickwit-doc-mapper/src/doc_mapper/tantivy_val_to_json.rs +++ b/quickwit/quickwit-doc-mapper/src/doc_mapper/tantivy_val_to_json.rs @@ -90,6 +90,11 @@ fn value_to_bool(value: TantivyValue) -> Result { 1 => Some(true), _ => None, }, + TantivyValue::F64(number) => match number { + 0.0 => Some(false), + 1.0 => Some(true), + _ => None, + }, TantivyValue::Bool(b) => Some(*b), _ => None, }