Skip to content

Commit

Permalink
V5.0.0 (#75)
Browse files Browse the repository at this point in the history
* Update references and editor config
* V5.0.0 dynamic fields (#70)
* Search results phrases working with dynamic fields
* Added support for child object dynamic field providers
* Handle serialization of dynamic fields
* Throw error on deserialization of earlier index if fields not found
* Updated documentation
  • Loading branch information
mikegoatly authored Jul 5, 2023
1 parent a908e8c commit 0ddbcf7
Show file tree
Hide file tree
Showing 74 changed files with 2,848 additions and 531 deletions.
60 changes: 57 additions & 3 deletions .editorconfig
Original file line number Diff line number Diff line change
Expand Up @@ -99,10 +99,10 @@ csharp_prefer_simple_using_statement = true:suggestion
# Expression-level preferences
csharp_prefer_simple_default_expression = true:suggestion
csharp_style_pattern_local_over_anonymous_function = true:suggestion
csharp_style_prefer_index_operator = true:suggestion
csharp_style_prefer_index_operator = false:suggestion
csharp_style_prefer_range_operator = true:suggestion
csharp_style_unused_value_assignment_preference = discard_variable:suggestion
csharp_style_unused_value_expression_statement_preference = discard_variable:silent
csharp_style_unused_value_assignment_preference = discard_variable:none
csharp_style_unused_value_expression_statement_preference = discard_variable:none

# 'using' directive preferences
csharp_using_directive_placement = outside_namespace:silent
Expand Down Expand Up @@ -195,3 +195,57 @@ dotnet_naming_style.begins_with_i.required_prefix = I
dotnet_naming_style.begins_with_i.required_suffix =
dotnet_naming_style.begins_with_i.word_separator =
dotnet_naming_style.begins_with_i.capitalization = pascal_case
csharp_style_namespace_declarations = block_scoped:silent
csharp_style_prefer_method_group_conversion = true:silent
csharp_style_prefer_top_level_statements = true:silent
csharp_style_prefer_null_check_over_type_check = true:suggestion
csharp_style_prefer_local_over_anonymous_function = true:suggestion
csharp_style_implicit_object_creation_when_type_is_apparent = true:suggestion
csharp_style_prefer_tuple_swap = true:suggestion
csharp_style_prefer_utf8_string_literals = true:suggestion
csharp_style_prefer_readonly_struct = true:suggestion
csharp_style_prefer_readonly_struct_member = true:suggestion
csharp_style_allow_embedded_statements_on_same_line_experimental = true:silent
csharp_style_allow_blank_lines_between_consecutive_braces_experimental = true:silent
csharp_style_allow_blank_line_after_colon_in_constructor_initializer_experimental = true:silent
csharp_style_allow_blank_line_after_token_in_conditional_expression_experimental = true:silent
csharp_style_allow_blank_line_after_token_in_arrow_expression_clause_experimental = true:silent
csharp_style_prefer_pattern_matching = true:silent
csharp_style_prefer_not_pattern = true:suggestion
csharp_style_prefer_extended_property_pattern = true:suggestion

[*.{cs,vb}]
dotnet_style_operator_placement_when_wrapping = beginning_of_line
tab_width = 4
indent_size = 4
end_of_line = crlf
dotnet_style_coalesce_expression = true:suggestion
dotnet_style_null_propagation = true:suggestion
dotnet_style_prefer_is_null_check_over_reference_equality_method = true:suggestion
dotnet_style_prefer_auto_properties = true:silent
dotnet_style_object_initializer = true:suggestion
dotnet_style_collection_initializer = true:suggestion
dotnet_style_prefer_simplified_boolean_expressions = true:suggestion
dotnet_style_prefer_conditional_expression_over_assignment = false:silent
dotnet_style_prefer_conditional_expression_over_return = false:silent
dotnet_style_explicit_tuple_names = true:suggestion
dotnet_style_prefer_inferred_tuple_names = true:suggestion
dotnet_style_prefer_inferred_anonymous_type_member_names = true:suggestion
dotnet_style_prefer_compound_assignment = true:suggestion
dotnet_style_prefer_simplified_interpolation = true:suggestion
dotnet_style_namespace_match_folder = true:suggestion
dotnet_style_readonly_field = true:suggestion
dotnet_style_predefined_type_for_locals_parameters_members = true:silent
dotnet_style_predefined_type_for_member_access = true:silent
dotnet_style_require_accessibility_modifiers = for_non_interface_members:silent
dotnet_style_allow_multiple_blank_lines_experimental = true:silent
dotnet_style_allow_statement_immediately_after_block_experimental = true:silent
dotnet_code_quality_unused_parameters = all:suggestion
dotnet_style_parentheses_in_arithmetic_binary_operators = always_for_clarity:silent
dotnet_style_parentheses_in_other_binary_operators = always_for_clarity:silent
dotnet_style_parentheses_in_relational_binary_operators = always_for_clarity:silent
dotnet_style_parentheses_in_other_operators = never_if_unnecessary:silent
dotnet_style_qualification_for_field = true:silent
dotnet_style_qualification_for_property = true:silent
dotnet_style_qualification_for_method = true:silent
dotnet_style_qualification_for_event = true:silent
4 changes: 2 additions & 2 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ trigger:
- test

variables:
majorVersion: 4
majorVersion: 5
minorVersion: 0
patchVersion: 1
patchVersion: 0
project: src/Lifti.Core/Lifti.Core.csproj
testProject: test/Lifti.Tests/Lifti.Tests.csproj
buildConfiguration: 'Release'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,13 @@ var index = new FullTextIndexBuilder<int>()
.WithKey(c => c.Id)
.WithField("Name", c => c.Name)
.WithField("Profile", c => c.ProfileHtml, textExtractor: new XmlTextExtractor())
.WithDynamicFields("Tags", c => c.TagDictionary, "Tag_")
.WithDynamicFields(
"Questions",
c => c.Questions,
q => q.QuestionName,
q => q.QuestionResponse,
"Question_")
)
.Build();

Expand All @@ -33,7 +40,7 @@ Each object configured against the index must have a key of the same type as the

## WithField

An object needs one or more named fields configured from which to read text, each of which is configured using `WithField`.
An object can be configured with one *static* fields that are known at compile time. The `WithField` method overloads allow for static fields to be defined.

### `name`

Expand Down Expand Up @@ -66,6 +73,47 @@ Equivalent to [WithTextExtraction](./WithTextExtraction) but for use exclusively

Equivalent to [WithDefaultThesaurus](./WithDefaultThesaurus) but for use exclusively with this field. Left null, the default thesaurus builder for the index will be used.

## WithDynamicFields

In addition to the static fields configured using `WithField`, it is possible to configure dynamic fields that are not known at compile time. The `WithDynamicFields`
overloads allow for dynamic field readers to be defined, each of which will be invoked to retrieve the field names for the object being indexed.

> **Important:** LIFTI only supports a maximum of 255 unique field names per index, whether they are dynamic or defined statically using `WithField`.
Because dynamic fields are not known at compile time, it is possible to exceed this limit if you are not careful. If you do exceed this limit, an exception will be thrown
when you try to index an object.

### `dynamicFieldReaderName`

The unique name of the dynamic field reader. Dynamic fields are registered in the index against this name so that when the index is deserialized, the dynamic fields
can be rehydrated against the correct configuration.

### `dynamicFieldReader`

A function capable of extracting the dynamic field information from the object type `T`.

You can provide a function that returns a dictionary of name/value pairs, where the key becomes the field name
and the value the text being indexed against it:

* `Func<T, IDictionary<string, string>?>`
* `Func<T, IDictionary<string, IEnumerable<string>>>`

Or you can provice a function that returns a collection of *child objects*:

* `Func<T, ICollection<TChild>?>`
* `Func<T, ICollection<TChild>?>`

These last two overloads also require you provide two more delegates via the `getFieldName` and `getFieldText` parameters.
These delegates are used to extract the field name and text from each child object.

### fieldNamePrefix

The prefix to use when constructing the field name. This is useful when the dynamic fields can produce the same field name as a static field,
or a dynamic field from another dynamic field reader.

### Other `WithDynamicFields` parameters

The `tokenizationOptions`, `textExtractor` and `thesaurusOptions` parameters are equivalent to their `WithField` counterparts.

## Indexing multiple object types

It is possible to index multiple types against an index, however you need to consider a couple of constraints.
Expand Down
11 changes: 11 additions & 0 deletions docs/content/en/docs/Reference/Serialization format/PreVersion5.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
---
title: "V2-4 Serialization File Format"
linkTitle: "V2-4 Serialization File Format"
date: 2023-07-02
description: >
Documentation for older serialization formats.
---

## Version 4 (v4.0.0)

![LIFTI Serialization Format](../../../../images/v2-serialization.svg)
17 changes: 17 additions & 0 deletions docs/content/en/docs/Reference/Serialization format/_index.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
---
title: "Serialization File Format"
linkTitle: "Serialization File Format"
date: 2023-07-04
description: >
The current serialization format is version 5.
---

## Version 5 (v5.0.0)

![LIFTI Serialization Format](../../../images/v5-serialization.svg)

Notes:

- Versions 2 to 4 are readable as a one-time conversion but always written back as version 5.
- Int32s are written as *positive* values using 7-bit encoding. This means that the maximum value is 2,147,483,647, apart from Int32s written by the `IntFormatterKeySerializer` which can't make the assumption that the value is always positive. For these, values are written using zig-zag encoding.
- New in version 5 is the list of fields in the index. This is used upon deserialization to rehydrate the dynamic fields and ensure that the field names in the index being deserialized into are mapped correctly to fields in the serialized index.
9 changes: 0 additions & 9 deletions docs/content/en/docs/Reference/serialization-format.md

This file was deleted.

52 changes: 52 additions & 0 deletions docs/content/en/docs/Searching/field-information.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
---
title: "Field Information"
linkTitle: "Field Information"
weight: 5
description: >
You can query the index to get information about the fields that have been indexed.
---

A `FullTextIndex` class exposes a `FieldLookup` property of type `IIndexedFieldLookup` that can be used to query the index for
information about the fields that have been indexed.

`IIndexedFieldLookup` exposes the following methods:

## `DefaultField`

The id of the default field used when an `IFullTextIndex{T}.AddAsync(T, string, System.Threading.CancellationToken)` overload has been used, as opposed to indexing text read from properties of object.

## `GetFieldForId(byte id)`

Gets the configured name for a field id.

## `GetFieldInfo(string fieldName)`

Gets the configuration required for indexing a named field, including the `Tokenization.TextExtraction.ITextExtractor` and `Tokenization.IIndexTokenizer` instances to use when processing the field's text.

### `IndexedFieldDetails`

This abstract class contains information about a field that has been configured for indexing.

#### Properties

- **Id**: Gets the id of the field.
- **Name**: Gets the name of the field.
- **ObjectType**: Gets the type of the object the field is registered for.
- **FieldKind**: Gets the kind of field this instance represents, either `FieldKind.Static` or `FieldKind.Dynamic`.
- **TextExtractor**: Gets the `ITextExtractor` used to extract sections of text from this field.
- **Tokenizer**: Gets the `IIndexTokenizer` that should be used when tokenizing text for the field.
- **Thesaurus**: Gets the `IThesaurus` that should be used to expand tokens when processing text for this field.
- **DynamicFieldReaderName**: Gets the name of the dynamic field reader that generated this field. If this field is not a dynamic field, this will be `null`.

#### Methods

- **ReadAsync(object item, CancellationToken cancellationToken)**: Reads the text for the field from the specified item. The item must be of the type specified by the `ObjectType` property.


## `IsKnownField(Type objectType, string fieldName)`

Returns `true` if the given field name is known to the index and associated to the given object type, whether statically defined at index creation, or dynamically registered during indexing.

## `AllFieldNames`

Gets the names of all fields configured in the index, including any dynamic fields that have been registered during the indexing of objects.
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
title: Manually Constructing Query Objects
linkTitle: "Manually Constructing Query Objects"
date: 2022-02-12
weight: 6
description:
Instead of using a query parser to interpret your query, you can manually construct a `Query` object and execute it against the index.
---
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
title: "IndexNavigator"
linkTitle: "IndexNavigator"
date: 2020-07-14
weight: 6
description: >
You can use an IndexNavigator to navigate the index character by character.
---
Expand Down
38 changes: 38 additions & 0 deletions docs/content/en/docs/Serialization/_index.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,41 @@ Console.WriteLine($"{matches.Count()} items contain text in the new index");
```

If you want to understand how the binary data is laid out, you can have a look at the [Serialization Format](../reference/serialization-format) reference page.

## Changing index definitions

### Adding fields

**Adding** fields to an index definition is supported.

Consider an index defined as:

``` csharp
var index = new FullTextIndexBuilder<int>()
.WithObjectTokenization<Customer>(o => o
.WithKey(c => c.Id)
.WithField("Name", c => c.Name)
)
.Build();
```

A static field called `Name` is defined with the id `1`. If you serialize this index, and then change the index definition to:

``` csharp
var index = new FullTextIndexBuilder<int>()
.WithObjectTokenization<Customer>(o => o
.WithKey(c => c.Id)
.WithField("Name", c => c.Name)
.WithField("Notes", c => c.Notes)
)
.Build();
```

A new field called `Notes` is defined with the id `2`. Deserializing the output from the first definition will work fine.

Even if field *order* is changed, such that `Name` became field id 2, the deserialization will still work. The serialization process is
smart enough to map the old field id to the new field id.

### Removing and renaming fields

This is *not* supported. If you remove or rename a field, the deserialization process will fail.
3 changes: 3 additions & 0 deletions docs/static/images/v5-serialization.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
4 changes: 2 additions & 2 deletions samples/Blazor/Blazor.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@
</ItemGroup>

<ItemGroup>
<PackageReference Include="Microsoft.AspNetCore.Components.WebAssembly" Version="7.0.0" />
<PackageReference Include="Microsoft.AspNetCore.Components.WebAssembly.DevServer" Version="7.0.0" PrivateAssets="all" />
<PackageReference Include="Microsoft.AspNetCore.Components.WebAssembly" Version="7.0.5" />
<PackageReference Include="Microsoft.AspNetCore.Components.WebAssembly.DevServer" Version="7.0.5" PrivateAssets="all" />
</ItemGroup>

<ItemGroup>
Expand Down
72 changes: 72 additions & 0 deletions samples/TestConsole/DynamicFieldsSample.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
using Lifti;
using System;
using System.Collections.Generic;
using System.Threading.Tasks;

namespace TestConsole
{
public class DynamicFieldsSample : SampleBase
{
public class TestObject
{
public TestObject(int id, string details, IDictionary<string, string> data)
{
this.Id = id;
this.Details = details;
this.Data = data;
}

public int Id { get; set; }
public string Details { get; }
public IDictionary<string, string> Data { get; set; }
}

public override async Task RunAsync()
{
Console.WriteLine("Creating an index that has dynamically registered fields for an object.");
Console.WriteLine("Only one field, Details, is statically registered when the index is created.");

var objects = new Dictionary<int, TestObject>
{
{
1,
new TestObject(
1,
"Some details",
new Dictionary<string, string> { { "Name", "Joe Bloggs" }, { "Profile", "Just placeholder text here" } })
},
{
2,
new TestObject(
2,
"Chillin with orange juice",
new Dictionary<string, string> { { "Name", "Just Bob" }, { "FavouriteExercise", "Jumping jacks" } })
}
};

var index = new FullTextIndexBuilder<int>()
.WithObjectTokenization<TestObject>(o => o
.WithKey(c => c.Id)
.WithField("Details", x => x.Details)
.WithDynamicFields("Data", c => c.Data)
)
.Build();

await index.AddRangeAsync(objects.Values);

var results = RunSearchAsync(
index,
"ju*",
i => objects[i],
@"Words beginning with 'ju' are contained across 4 fields, 3 of which will have been dynamically registered");

Console.WriteLine("Fields known to the index:");
foreach (var field in index.FieldLookup.AllFieldNames)
{
Console.WriteLine($"{field} - Field kind:{index.FieldLookup.GetFieldInfo(field).FieldKind}");
}

WaitForEnterToReturnToMenu();
}
}
}
Loading

0 comments on commit 0ddbcf7

Please sign in to comment.