From ff7c3f31b97c0871122a5b1e7e84f153db59f092 Mon Sep 17 00:00:00 2001 From: Remi Dettai Date: Mon, 27 Jan 2025 15:01:21 +0100 Subject: [PATCH] Fix default dynamic mapping config --- .../src/index_config/serialize.rs | 39 +++++++++++++++++++ .../src/doc_mapper/field_mapping_entry.rs | 13 ++++++- .../quickwit-doc-mapper/src/doc_mapper/mod.rs | 2 +- 3 files changed, 52 insertions(+), 2 deletions(-) diff --git a/quickwit/quickwit-config/src/index_config/serialize.rs b/quickwit/quickwit-config/src/index_config/serialize.rs index cfbe53026ed..2cc5fcdd815 100644 --- a/quickwit/quickwit-config/src/index_config/serialize.rs +++ b/quickwit/quickwit-config/src/index_config/serialize.rs @@ -348,6 +348,45 @@ mod test { } } + #[test] + fn test_default_dynamic_mapping_matches_docs() { + let minimal_config_yaml = r#" + version: 0.8 + index_id: hdfs-logs + doc_mapping: + doc_mapping_uid: 00000000000000000000000000 + "#; + let docs_config_yaml = r#" + version: 0.8 + index_id: hdfs-logs + doc_mapping: + doc_mapping_uid: 00000000000000000000000000 + mode: dynamic + dynamic_mapping: + indexed: true + stored: true + tokenizer: default + record: basic + expand_dots: true + fast: true + "#; + { + let minimal_index_config: IndexConfig = load_index_config_from_user_config( + ConfigFormat::Yaml, + minimal_config_yaml.as_bytes(), + &Uri::for_test("s3://mybucket"), + ) + .unwrap(); + let docs_index_config: IndexConfig = load_index_config_from_user_config( + ConfigFormat::Yaml, + docs_config_yaml.as_bytes(), + &Uri::for_test("s3://mybucket"), + ) + .unwrap(); + assert_eq!(minimal_index_config, docs_index_config); + } + } + #[test] fn test_update_index_root_uri() { let original_config_yaml = r#" diff --git a/quickwit/quickwit-doc-mapper/src/doc_mapper/field_mapping_entry.rs b/quickwit/quickwit-doc-mapper/src/doc_mapper/field_mapping_entry.rs index 7ecde0c8e2c..b98f3bde3eb 100644 --- a/quickwit/quickwit-doc-mapper/src/doc_mapper/field_mapping_entry.rs +++ b/quickwit/quickwit-doc-mapper/src/doc_mapper/field_mapping_entry.rs @@ -409,6 +409,14 @@ impl TextIndexingOptions { fieldnorms: false, } } + + fn default_dynamic() -> Self { + TextIndexingOptions { + tokenizer: QuickwitTextTokenizer::default(), + record: IndexRecordOption::Basic, + fieldnorms: false, + } + } } impl Default for TextIndexingOptions { @@ -611,8 +619,11 @@ impl QuickwitJsonOptions { /// Build a default QuickwitJsonOptions for dynamic fields. pub fn default_dynamic() -> Self { QuickwitJsonOptions { + description: None, + indexing_options: Some(TextIndexingOptions::default_dynamic()), + stored: true, + expand_dots: true, fast: FastFieldOptions::default_enabled(), - ..Default::default() } } } diff --git a/quickwit/quickwit-doc-mapper/src/doc_mapper/mod.rs b/quickwit/quickwit-doc-mapper/src/doc_mapper/mod.rs index 491a9e9969e..093e041afe3 100644 --- a/quickwit/quickwit-doc-mapper/src/doc_mapper/mod.rs +++ b/quickwit/quickwit-doc-mapper/src/doc_mapper/mod.rs @@ -266,7 +266,7 @@ mod tests { tantivy_schema.get_field_entry(dynamic_field).field_type() { let text_opt = json_options.get_text_indexing_options().unwrap(); - assert_eq!(text_opt.tokenizer(), "raw"); + assert_eq!(text_opt.tokenizer(), "default"); } else { panic!("dynamic field should be of JSON type"); }