Skip to content

Commit

Permalink
add compat tests (#2485)
Browse files Browse the repository at this point in the history
  • Loading branch information
PSeitz authored Sep 4, 2024
1 parent dc5d31c commit a206c3c
Show file tree
Hide file tree
Showing 14 changed files with 150 additions and 7 deletions.
80 changes: 80 additions & 0 deletions src/compat_tests.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
use std::path::PathBuf;

use schema::*;

use crate::*;

fn create_index(path: &str) {
let mut schema_builder = Schema::builder();
let label = schema_builder.add_text_field("label", TEXT | STORED);
let date = schema_builder.add_date_field("date", INDEXED | STORED);
let schema = schema_builder.build();
std::fs::create_dir_all(path).unwrap();
let index = Index::create_in_dir(path, schema).unwrap();
let mut index_writer = index.writer_with_num_threads(1, 20_000_000).unwrap();
index_writer
.add_document(doc!(label => "dateformat", date => DateTime::from_timestamp_nanos(123456)))
.unwrap();
index_writer.commit().unwrap();
}

#[test]
/// Writes an Index for the current INDEX_FORMAT_VERSION to disk.
fn create_format() {
let version = INDEX_FORMAT_VERSION.to_string();
let file_path = path_for_version(&version);
if PathBuf::from(file_path.clone()).exists() {
return;
}
create_index(&file_path);
}

fn path_for_version(version: &str) -> String {
format!("./tests/compat_tests_data/index_v{}/", version)
}

/// feature flag quickwit uses a different dictionary type
#[test]
#[cfg(not(feature = "quickwit"))]
fn test_format_6() {
let path = path_for_version("6");

let index = Index::open_in_dir(path).expect("Failed to open index");
// dates are truncated to Microseconds in v6
assert_date_time_precision(&index, DateTimePrecision::Microseconds);
}

#[cfg(not(feature = "quickwit"))]
fn assert_date_time_precision(index: &Index, precision: DateTimePrecision) {
use collector::TopDocs;
let reader = index.reader().expect("Failed to create reader");
let searcher = reader.searcher();

let schema = index.schema();
let label_field = schema.get_field("label").expect("Field 'label' not found");
let query_parser = query::QueryParser::for_index(index, vec![label_field]);

let query = query_parser
.parse_query("dateformat")
.expect("Failed to parse query");
let top_docs = searcher
.search(&query, &TopDocs::with_limit(1))
.expect("Search failed");

assert_eq!(top_docs.len(), 1, "Expected 1 search result");

let doc_address = top_docs[0].1;
let retrieved_doc: TantivyDocument = searcher
.doc(doc_address)
.expect("Failed to retrieve document");

let date_field = schema.get_field("date").expect("Field 'date' not found");
let date_value = retrieved_doc
.get_first(date_field)
.expect("Date field not found in document")
.as_datetime()
.unwrap();

let expected = DateTime::from_timestamp_nanos(123456).truncate(precision);
assert_eq!(date_value, expected,);
}
9 changes: 6 additions & 3 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -202,12 +202,15 @@ pub mod space_usage;
pub mod store;
pub mod termdict;

mod docset;
mod reader;

#[cfg(test)]
mod compat_tests;

pub use self::reader::{IndexReader, IndexReaderBuilder, ReloadPolicy, Warmer};
pub mod snippet;

mod docset;
use std::fmt;

pub use census::{Inventory, TrackedObject};
Expand All @@ -229,9 +232,9 @@ pub use crate::indexer::{IndexWriter, SingleSegmentIndexWriter};
pub use crate::schema::{Document, TantivyDocument, Term};

/// Index format version.
const INDEX_FORMAT_VERSION: u32 = 6;
pub const INDEX_FORMAT_VERSION: u32 = 6;
/// Oldest index format version this tantivy version can read.
const INDEX_FORMAT_OLDEST_SUPPORTED_VERSION: u32 = 4;
pub const INDEX_FORMAT_OLDEST_SUPPORTED_VERSION: u32 = 4;

/// Structure version for the index.
#[derive(Clone, PartialEq, Eq, Serialize, Deserialize)]
Expand Down
2 changes: 1 addition & 1 deletion src/query/range_query/range_query_fastfield.rs
Original file line number Diff line number Diff line change
Expand Up @@ -471,7 +471,7 @@ fn bound_to_value_range<T: MonotonicallyMappableToU64>(
}

#[cfg(test)]
pub mod tests {
mod tests {
use std::ops::{Bound, RangeInclusive};

use common::bounds::BoundsRange;
Expand Down
25 changes: 22 additions & 3 deletions src/termdict/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,26 @@ use self::termdict::{
pub use self::termdict::{TermMerger, TermStreamer};
use crate::postings::TermInfo;

#[derive(Debug, Eq, PartialEq)]
#[repr(u32)]
#[allow(dead_code)]
enum DictionaryType {
Fst = 1,
SSTable = 2,
}

impl TryFrom<u32> for DictionaryType {
type Error = &'static str;

fn try_from(value: u32) -> Result<Self, Self::Error> {
match value {
1 => Ok(DictionaryType::Fst),
2 => Ok(DictionaryType::SSTable),
_ => Err("Invalid value for DictionaryType"),
}
}
}

#[cfg(not(feature = "quickwit"))]
const CURRENT_TYPE: DictionaryType = DictionaryType::Fst;

Expand All @@ -70,13 +83,19 @@ impl TermDictionary {
let (main_slice, dict_type) = file.split_from_end(4);
let mut dict_type = dict_type.read_bytes()?;
let dict_type = u32::deserialize(&mut dict_type)?;
let dict_type = DictionaryType::try_from(dict_type).map_err(|_| {
io::Error::new(
io::ErrorKind::Other,
format!("Unsuported dictionary type, found {dict_type}"),
)
})?;

if dict_type != CURRENT_TYPE as u32 {
if dict_type != CURRENT_TYPE {
return Err(io::Error::new(
io::ErrorKind::Other,
format!(
"Unsuported dictionary type, expected {}, found {dict_type}",
CURRENT_TYPE as u32,
"Unsuported dictionary type, compiled tantivy with {CURRENT_TYPE:?}, but got \
{dict_type:?}",
),
));
}
Expand Down
1 change: 1 addition & 0 deletions tests/compat_tests_data/index_v6/.managed.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
["00000000000000000000000000000000.store","00000000000000000000000000000000.fast","00000000000000000000000000000000.fieldnorm","00000000000000000000000000000000.term","00000000000000000000000000000000.idx","meta.json","00000000000000000000000000000000.pos"]
Empty file.
Empty file.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
40 changes: 40 additions & 0 deletions tests/compat_tests_data/index_v6/meta.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
{
"index_settings": {
"docstore_compression": "lz4",
"docstore_blocksize": 16384
},
"segments": [
{
"segment_id": "00000000-0000-0000-0000-000000000000",
"max_doc": 1,
"deletes": null
}
],
"schema": [
{
"name": "label",
"type": "text",
"options": {
"indexing": {
"record": "position",
"fieldnorms": true,
"tokenizer": "default"
},
"stored": true,
"fast": false
}
},
{
"name": "date",
"type": "date",
"options": {
"indexed": true,
"fieldnorms": true,
"fast": false,
"stored": true,
"precision": "seconds"
}
}
],
"opstamp": 2
}

0 comments on commit a206c3c

Please sign in to comment.