From 9fb00c3f6df57973b6a12dae6ef1d5d8ca8027ab Mon Sep 17 00:00:00 2001 From: Mike Goatly <4577868+mikegoatly@users.noreply.github.com> Date: Mon, 25 Jul 2022 19:15:56 +0100 Subject: [PATCH] Update the underlying stream position after deserialization (#47) --- azure-pipelines.yml | 236 +++++------ .../Serialization/Binary/V2IndexReader.cs | 22 +- .../Serialization/BinarySerializerTests.cs | 365 +++++++++++------- test/Lifti.Tests/TestResources.Designer.cs | 176 +++++---- test/Lifti.Tests/TestResources.resx | 255 ++++++------ test/Lifti.Tests/V4.dat | Bin 0 -> 325 bytes .../FullTextIndexTests.cs | 188 +++++---- .../IndexBenchmarkBase.cs | 68 ++-- test/PerformanceProfiling/Program.cs | 2 +- 9 files changed, 715 insertions(+), 597 deletions(-) create mode 100644 test/Lifti.Tests/V4.dat diff --git a/azure-pipelines.yml b/azure-pipelines.yml index f3fbae4d..3f669fa7 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -1,118 +1,118 @@ -trigger: - branches: - include: - - master - paths: - include: - - src - - test - -variables: - majorVersion: 3 - minorVersion: 0 - patchVersion: 1 - project: src/Lifti.Core/Lifti.Core.csproj - testProject: test/Lifti.Tests/Lifti.Tests.csproj - buildConfiguration: 'Release' - -stages: -- stage: Build - displayName: Build nuget packages - - jobs: - - job: Build - - pool: - vmImage: 'windows-latest' - - steps: - - task: UseDotNet@2 - inputs: - packageType: 'sdk' - version: '6.0.x' - - task: DotNetCoreCLI@2 - displayName: "NuGet Restore" - inputs: - command: restore - projects: '**/*.csproj' - verbosityRestore: 'Normal' - - task: DotNetCoreCLI@2 - displayName: Run unit tests - inputs: - command: 'test' - projects: $(testProject) - - task: DotNetCoreCLI@2 - displayName: Pack CI nuget version - inputs: - command: 'pack' - packagesToPack: $(project) - packDirectory: '$(Build.ArtifactStagingDirectory)/packages/ci' - versioningScheme: 'byPrereleaseNumber' - majorVersion: '$(majorVersion)' - minorVersion: '$(minorVersion)' - patchVersion: '$(patchVersion)' - verbosityPack: 'Normal' - arguments: '--configuration $(buildConfiguration)' - - - task: PublishSymbols@2 - inputs: - SearchPattern: '**/bin/**/*.pdb' - SymbolServerType: 'TeamServices' - - - task: DotNetCoreCLI@2 - displayName: Pack release nuget version - inputs: - command: 'pack' - packagesToPack: $(project) - packDirectory: '$(Build.ArtifactStagingDirectory)/packages/release' - versioningScheme: 'off' - buildProperties: 'PackageVersion=$(majorVersion).$(minorVersion).$(patchVersion)' - verbosityPack: 'Normal' - arguments: '--configuration $(buildConfiguration)' - - - publish: '$(Build.ArtifactStagingDirectory)/packages' - artifact: 'packages' - -- stage: PublishCINugetPackage - displayName: Publish to CI feed - dependsOn: Build - condition: succeeded() - - jobs: - - job: PublishCI - pool: - vmImage: 'ubuntu-latest' - - steps: - - checkout: none - - - download: current - artifact: 'packages' - - - task: DotNetCoreCLI@2 - inputs: - command: 'push' - packagesToPush: '$(Pipeline.Workspace)/packages/ci/*.nupkg' - nuGetFeedType: 'internal' - publishVstsFeed: '21c23043-21b0-4e5a-8557-00b88fc52fd4/9f4e269d-a35a-4657-b2a3-b56b01c01f8c' - -- stage: 'PublishReleaseNuGetPackage' - displayName: 'Publish Release NuGet Package' - dependsOn: 'PublishCINugetPackage' - condition: and(succeeded(), eq(variables['Build.SourceBranch'], 'refs/heads/master')) - jobs: - - deployment: - pool: - vmImage: 'ubuntu-latest' - environment: 'Nuget' - strategy: - runOnce: - deploy: - steps: - - task: NuGetCommand@2 - displayName: 'Push NuGet Package' - inputs: - command: 'push' - packagesToPush: '$(Pipeline.Workspace)/packages/release/*.nupkg' - nuGetFeedType: 'external' - publishFeedCredentials: 'NuGet' +trigger: + branches: + include: + - master + paths: + include: + - src + - test + +variables: + majorVersion: 3 + minorVersion: 1 + patchVersion: 0 + project: src/Lifti.Core/Lifti.Core.csproj + testProject: test/Lifti.Tests/Lifti.Tests.csproj + buildConfiguration: 'Release' + +stages: +- stage: Build + displayName: Build nuget packages + + jobs: + - job: Build + + pool: + vmImage: 'windows-latest' + + steps: + - task: UseDotNet@2 + inputs: + packageType: 'sdk' + version: '6.0.x' + - task: DotNetCoreCLI@2 + displayName: "NuGet Restore" + inputs: + command: restore + projects: '**/*.csproj' + verbosityRestore: 'Normal' + - task: DotNetCoreCLI@2 + displayName: Run unit tests + inputs: + command: 'test' + projects: $(testProject) + - task: DotNetCoreCLI@2 + displayName: Pack CI nuget version + inputs: + command: 'pack' + packagesToPack: $(project) + packDirectory: '$(Build.ArtifactStagingDirectory)/packages/ci' + versioningScheme: 'byPrereleaseNumber' + majorVersion: '$(majorVersion)' + minorVersion: '$(minorVersion)' + patchVersion: '$(patchVersion)' + verbosityPack: 'Normal' + arguments: '--configuration $(buildConfiguration)' + + - task: PublishSymbols@2 + inputs: + SearchPattern: '**/bin/**/*.pdb' + SymbolServerType: 'TeamServices' + + - task: DotNetCoreCLI@2 + displayName: Pack release nuget version + inputs: + command: 'pack' + packagesToPack: $(project) + packDirectory: '$(Build.ArtifactStagingDirectory)/packages/release' + versioningScheme: 'off' + buildProperties: 'PackageVersion=$(majorVersion).$(minorVersion).$(patchVersion)' + verbosityPack: 'Normal' + arguments: '--configuration $(buildConfiguration)' + + - publish: '$(Build.ArtifactStagingDirectory)/packages' + artifact: 'packages' + +- stage: PublishCINugetPackage + displayName: Publish to CI feed + dependsOn: Build + condition: succeeded() + + jobs: + - job: PublishCI + pool: + vmImage: 'ubuntu-latest' + + steps: + - checkout: none + + - download: current + artifact: 'packages' + + - task: DotNetCoreCLI@2 + inputs: + command: 'push' + packagesToPush: '$(Pipeline.Workspace)/packages/ci/*.nupkg' + nuGetFeedType: 'internal' + publishVstsFeed: '21c23043-21b0-4e5a-8557-00b88fc52fd4/9f4e269d-a35a-4657-b2a3-b56b01c01f8c' + +- stage: 'PublishReleaseNuGetPackage' + displayName: 'Publish Release NuGet Package' + dependsOn: 'PublishCINugetPackage' + condition: and(succeeded(), eq(variables['Build.SourceBranch'], 'refs/heads/master')) + jobs: + - deployment: + pool: + vmImage: 'ubuntu-latest' + environment: 'Nuget' + strategy: + runOnce: + deploy: + steps: + - task: NuGetCommand@2 + displayName: 'Push NuGet Package' + inputs: + command: 'push' + packagesToPush: '$(Pipeline.Workspace)/packages/release/*.nupkg' + nuGetFeedType: 'external' + publishFeedCredentials: 'NuGet' diff --git a/src/Lifti.Core/Serialization/Binary/V2IndexReader.cs b/src/Lifti.Core/Serialization/Binary/V2IndexReader.cs index 2f831031..4e450b2a 100644 --- a/src/Lifti.Core/Serialization/Binary/V2IndexReader.cs +++ b/src/Lifti.Core/Serialization/Binary/V2IndexReader.cs @@ -1,4 +1,5 @@ -using System.Collections.Generic; +using System; +using System.Collections.Generic; using System.Collections.Immutable; using System.IO; using System.Threading.Tasks; @@ -11,7 +12,8 @@ internal class V2IndexReader : IIndexReader private readonly Stream underlyingStream; private readonly bool disposeStream; private readonly IKeySerializer keySerializer; - private readonly MemoryStream buffer; + private readonly MemoryStream buffer; + private long initialUnderlyingStreamOffset = 0L; protected readonly BinaryReader reader; public V2IndexReader(Stream stream, bool disposeStream, IKeySerializer keySerializer) @@ -20,7 +22,7 @@ public V2IndexReader(Stream stream, bool disposeStream, IKeySerializer key this.disposeStream = disposeStream; this.keySerializer = keySerializer; - this.buffer = new MemoryStream((int)this.underlyingStream.Length); + this.buffer = new MemoryStream((int)(this.underlyingStream.Length - this.underlyingStream.Position)); this.reader = new BinaryReader(this.buffer); } @@ -61,14 +63,19 @@ public async Task ReadIntoAsync(FullTextIndex index) new DocumentStatistics(fieldTokenCounts.ToImmutable(), totalTokenCount)); } - index.SetRootWithLock(this.DeserializeNode(index.IndexNodeFactory, 0)); + index.SetRootWithLock(this.DeserializeNode(index.IndexNodeFactory, 0)); if (this.reader.ReadInt32() != -1) { throw new DeserializationException(ExceptionMessages.MissingIndexTerminator); + } + + if (this.underlyingStream.CanSeek) + { + this.underlyingStream.Position = this.buffer.Position + initialUnderlyingStreamOffset; } - } - + } + private IndexNode DeserializeNode(IIndexNodeFactory nodeFactory, int depth) { var textLength = this.reader.ReadInt32(); @@ -198,7 +205,8 @@ private int DeserializeAbbreviatedData(LocationEntrySerializationOptimizations s } private async Task FillBufferAsync() - { + { + this.initialUnderlyingStreamOffset = this.underlyingStream.Position; await this.underlyingStream.CopyToAsync(this.buffer).ConfigureAwait(false); this.buffer.Position = 0; } diff --git a/test/Lifti.Tests/Serialization/BinarySerializerTests.cs b/test/Lifti.Tests/Serialization/BinarySerializerTests.cs index a2d6bdeb..7f069248 100644 --- a/test/Lifti.Tests/Serialization/BinarySerializerTests.cs +++ b/test/Lifti.Tests/Serialization/BinarySerializerTests.cs @@ -1,150 +1,215 @@ -using FluentAssertions; -using Lifti.Serialization.Binary; -using Lifti.Tokenization.TextExtraction; -using PerformanceProfiling; -using System; -using System.Diagnostics; -using System.IO; -using System.Linq; -using System.Threading.Tasks; -using Xunit; -using Xunit.Abstractions; - -namespace Lifti.Tests.Serialization -{ - public class BinarySerializerTests - { - private readonly ITestOutputHelper output; - - public BinarySerializerTests(ITestOutputHelper output) - { - this.output = output; - } - - [Fact] - public async Task ShouldSerializeEmojiWithSurrogatePairs() - { - var index = await SearializeAndDeserializeIndexWithText("🎶 🤷🏾‍♀️"); - index.Search("🤷🏾‍♀️").Should().HaveCount(1); - } - - [Fact] - public async Task ShouldSerializeEmoji() - { - var index = await SearializeAndDeserializeIndexWithText("🎶"); - index.Search("🎶").Should().HaveCount(1); - } - - [Fact] - public async Task ShouldSerializeEmojiSequences() - { - var index = await SearializeAndDeserializeIndexWithText("🎶🤷🏾‍♀️"); - index.Search("🎶🤷🏾‍♀️").Should().HaveCount(1); - } - - [Fact] - public async Task ShouldDeserializeV3Index() - { - var index = new FullTextIndexBuilder().Build(); - var serializer = new BinarySerializer(); - using (var stream = new MemoryStream(TestResources.v3Index)) - { - await serializer.DeserializeAsync(index, stream); - } - - index.Search("serialized").Should().HaveCount(1); - index.Search("亜").Should().HaveCount(1); - } - - [Fact] - public async Task ShouldDeserializeV2Index() - { - var index = new FullTextIndexBuilder().Build(); - var serializer = new BinarySerializer(); - using (var stream = new MemoryStream(TestResources.v2Index)) - { - await serializer.DeserializeAsync(index, stream); - } - - index.Search("serialized").Should().HaveCount(1); - index.Search("亜").Should().HaveCount(1); - } - - [Fact] - public async Task ShouldRoundTripIndexStructure() - { - var serializer = new BinarySerializer(); - - var fileName = Guid.NewGuid().ToString() + ".dat"; - - using (var stream = File.Open(fileName, FileMode.CreateNew)) - { - var stopwatch = Stopwatch.StartNew(); - var index = await CreateWikipediaIndexAsync(); - await serializer.SerializeAsync(index, stream, false); - - this.output.WriteLine($"Serialized in {stopwatch.ElapsedMilliseconds}ms"); - - stream.Length.Should().BeGreaterThan(4); - - var newIndex = new FullTextIndexBuilder().Build(); - - stream.Position = 0; - - stopwatch.Restart(); - await serializer.DeserializeAsync(newIndex, stream, false); - - this.output.WriteLine($"Deserialized in {stopwatch.ElapsedMilliseconds}ms"); - - newIndex.Items.GetIndexedItems().Should().BeEquivalentTo(index.Items.GetIndexedItems()); - newIndex.Count.Should().Be(index.Count); - newIndex.Root.ToString().Should().Be(index.Root.ToString()); - - var oldResults = index.Search("test").ToList(); - var newResults = newIndex.Search("test").ToList(); - - oldResults.Should().NotBeEmpty(); - newResults.Should().BeEquivalentTo(oldResults); - - newIndex.Search("🤷‍♀️").Should().HaveCount(1); - } - - File.Delete(fileName); - } - - private static async Task> SearializeAndDeserializeIndexWithText(string text) - { - var stream = new MemoryStream(); - var serializer = new BinarySerializer(); - var index = new FullTextIndexBuilder().Build(); - await index.AddAsync("A", text); - - await serializer.SerializeAsync(index, stream, false); - - stream.Position = 0; - - var index2 = new FullTextIndexBuilder().Build(); - await serializer.DeserializeAsync(index2, stream); - return index2; - } - - private async Task> CreateWikipediaIndexAsync() - { - var index = new FullTextIndexBuilder() - .WithTextExtractor() - .WithDefaultTokenization(o => o.WithStemming()) - .Build(); - - var wikipediaTests = WikipediaDataLoader.Load(typeof(FullTextIndexTests)); - foreach (var (name, text) in wikipediaTests) - { - await index.AddAsync(name, text); - } - - // For good measure, index some surrogate pairs - await index.AddAsync("Emoji", "Emojis can cause problems 🤷‍♀️ 🤷🏾‍♂️"); - - return index; - } - } -} +using FluentAssertions; +using Lifti.Serialization.Binary; +using Lifti.Tokenization.TextExtraction; +using PerformanceProfiling; +using System; +using System.Diagnostics; +using System.IO; +using System.Linq; +using System.Threading.Tasks; +using Xunit; +using Xunit.Abstractions; + +namespace Lifti.Tests.Serialization +{ + public class BinarySerializerTests + { + private readonly ITestOutputHelper output; + + public BinarySerializerTests(ITestOutputHelper output) + { + this.output = output; + } + + [Fact] + public async Task ShouldSerializeEmojiWithSurrogatePairs() + { + var index = await SearializeAndDeserializeIndexWithTextAsync("🎶 🤷🏾‍♀️"); + index.Search("🤷🏾‍♀️").Should().HaveCount(1); + } + + [Fact] + public async Task ShouldSerializeEmoji() + { + var index = await SearializeAndDeserializeIndexWithTextAsync("🎶"); + index.Search("🎶").Should().HaveCount(1); + } + + [Fact] + public async Task ShouldSerializeEmojiSequences() + { + var index = await SearializeAndDeserializeIndexWithTextAsync("🎶🤷🏾‍♀️"); + index.Search("🎶🤷🏾‍♀️").Should().HaveCount(1); + } + + [Fact] + public async Task ShouldDeserializeV4Index() + { + var index = new FullTextIndexBuilder().Build(); + var serializer = new BinarySerializer(); + using (var stream = new MemoryStream(TestResources.v4Index)) + { + await serializer.DeserializeAsync(index, stream); + } + + index.Search("serialized").Should().HaveCount(1); + index.Search("亜").Should().HaveCount(1); + } + + [Fact] + public async Task ShouldDeserializeV3Index() + { + var index = new FullTextIndexBuilder().Build(); + var serializer = new BinarySerializer(); + using (var stream = new MemoryStream(TestResources.v3Index)) + { + await serializer.DeserializeAsync(index, stream); + } + + index.Search("serialized").Should().HaveCount(1); + index.Search("亜").Should().HaveCount(1); + } + + [Fact] + public async Task ShouldDeserializeV2Index() + { + var index = new FullTextIndexBuilder().Build(); + var serializer = new BinarySerializer(); + using (var stream = new MemoryStream(TestResources.v2Index)) + { + await serializer.DeserializeAsync(index, stream); + } + + index.Search("serialized").Should().HaveCount(1); + index.Search("亜").Should().HaveCount(1); + } + + [Fact] + public async Task ShouldRoundTripIndexStructure() + { + var serializer = new BinarySerializer(); + var fileName = CreateRandomIndexFileName(); + + using (var stream = File.Open(fileName, FileMode.CreateNew)) + { + var stopwatch = Stopwatch.StartNew(); + var index = await CreateWikipediaIndexAsync(); + await serializer.SerializeAsync(index, stream, false); + + this.output.WriteLine($"Serialized in {stopwatch.ElapsedMilliseconds}ms"); + + stream.Length.Should().BeGreaterThan(4); + + var newIndex = new FullTextIndexBuilder().Build(); + + stream.Position = 0; + + stopwatch.Restart(); + await serializer.DeserializeAsync(newIndex, stream, false); + + this.output.WriteLine($"Deserialized in {stopwatch.ElapsedMilliseconds}ms"); + + newIndex.Items.GetIndexedItems().Should().BeEquivalentTo(index.Items.GetIndexedItems()); + newIndex.Count.Should().Be(index.Count); + newIndex.Root.ToString().Should().Be(index.Root.ToString()); + + var oldResults = index.Search("test").ToList(); + var newResults = newIndex.Search("test").ToList(); + + oldResults.Should().NotBeEmpty(); + newResults.Should().BeEquivalentTo(oldResults); + + newIndex.Search("🤷‍♀️").Should().HaveCount(1); + } + + File.Delete(fileName); + } + + [Fact] + public async Task ShouldBeAbleToSerializeAndDeserializeMultipleIndexesToTheSameStream() + { + var index1 = await CreateIndexAsync("Foo"); + var index2 = await CreateIndexAsync("Bar"); + var fileName = CreateRandomIndexFileName(); + + var serializer = new BinarySerializer(); + using (var stream = File.Open(fileName, FileMode.CreateNew)) + { + await serializer.SerializeAsync(index1, stream, false); + await serializer.SerializeAsync(index2, stream, true); + } + + using (var stream = File.Open(fileName, FileMode.Open)) + { + var deserializedIndex1 = new FullTextIndexBuilder().Build(); + var deserializedIndex2 = new FullTextIndexBuilder().Build(); + await serializer.DeserializeAsync(deserializedIndex1, stream, false); + await serializer.DeserializeAsync(deserializedIndex2, stream, true); + + deserializedIndex1.Search("Foo").Should().HaveCount(1); + deserializedIndex2.Search("Bar").Should().HaveCount(1); + } + } + + // Used to create test indexes when defining a new serialization version + //[Fact] + //public async Task CreateTestIndex() + //{ + // var index = new FullTextIndexBuilder().Build(); + // await index.AddAsync("A", "Some serialized data"); + // await index.AddAsync("B", "亜"); + + // var serializer = new BinarySerializer(); + // using (var stream = File.Open("../../../V4.dat", FileMode.CreateNew)) + // { + // await serializer.SerializeAsync(index, stream, true); + // } + //} + + private static string CreateRandomIndexFileName() + { + return Guid.NewGuid().ToString() + ".dat"; + } + + private static async Task> SearializeAndDeserializeIndexWithTextAsync(string text) + { + var stream = new MemoryStream(); + var serializer = new BinarySerializer(); + var index = await CreateIndexAsync(text); + + await serializer.SerializeAsync(index, stream, false); + + stream.Position = 0; + + var index2 = new FullTextIndexBuilder().Build(); + await serializer.DeserializeAsync(index2, stream); + return index2; + } + + private static async Task> CreateIndexAsync(string text) + { + var index = new FullTextIndexBuilder().Build(); + await index.AddAsync("A", text); + return index; + } + + private async Task> CreateWikipediaIndexAsync() + { + var index = new FullTextIndexBuilder() + .WithTextExtractor() + .WithDefaultTokenization(o => o.WithStemming()) + .Build(); + + var wikipediaTests = WikipediaDataLoader.Load(typeof(FullTextIndexTests)); + foreach (var (name, text) in wikipediaTests) + { + await index.AddAsync(name, text); + } + + // For good measure, index some surrogate pairs + await index.AddAsync("Emoji", "Emojis can cause problems 🤷‍♀️ 🤷🏾‍♂️"); + + return index; + } + } +} diff --git a/test/Lifti.Tests/TestResources.Designer.cs b/test/Lifti.Tests/TestResources.Designer.cs index 55dc2c74..8e1c742d 100644 --- a/test/Lifti.Tests/TestResources.Designer.cs +++ b/test/Lifti.Tests/TestResources.Designer.cs @@ -1,83 +1,93 @@ -//------------------------------------------------------------------------------ -// -// This code was generated by a tool. -// Runtime Version:4.0.30319.42000 -// -// Changes to this file may cause incorrect behavior and will be lost if -// the code is regenerated. -// -//------------------------------------------------------------------------------ - -namespace Lifti.Tests { - using System; - - - /// - /// A strongly-typed resource class, for looking up localized strings, etc. - /// - // This class was auto-generated by the StronglyTypedResourceBuilder - // class via a tool like ResGen or Visual Studio. - // To add or remove a member, edit your .ResX file then rerun ResGen - // with the /str option, or rebuild your VS project. - [global::System.CodeDom.Compiler.GeneratedCodeAttribute("System.Resources.Tools.StronglyTypedResourceBuilder", "16.0.0.0")] - [global::System.Diagnostics.DebuggerNonUserCodeAttribute()] - [global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()] - internal class TestResources { - - private static global::System.Resources.ResourceManager resourceMan; - - private static global::System.Globalization.CultureInfo resourceCulture; - - [global::System.Diagnostics.CodeAnalysis.SuppressMessageAttribute("Microsoft.Performance", "CA1811:AvoidUncalledPrivateCode")] - internal TestResources() { - } - - /// - /// Returns the cached ResourceManager instance used by this class. - /// - [global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)] - internal static global::System.Resources.ResourceManager ResourceManager { - get { - if (object.ReferenceEquals(resourceMan, null)) { - global::System.Resources.ResourceManager temp = new global::System.Resources.ResourceManager("Lifti.Tests.TestResources", typeof(TestResources).Assembly); - resourceMan = temp; - } - return resourceMan; - } - } - - /// - /// Overrides the current thread's CurrentUICulture property for all - /// resource lookups using this strongly typed resource class. - /// - [global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)] - internal static global::System.Globalization.CultureInfo Culture { - get { - return resourceCulture; - } - set { - resourceCulture = value; - } - } - - /// - /// Looks up a localized resource of type System.Byte[]. - /// - internal static byte[] v2Index { - get { - object obj = ResourceManager.GetObject("v2Index", resourceCulture); - return ((byte[])(obj)); - } - } - - /// - /// Looks up a localized resource of type System.Byte[]. - /// - internal static byte[] v3Index { - get { - object obj = ResourceManager.GetObject("v3Index", resourceCulture); - return ((byte[])(obj)); - } - } - } -} +//------------------------------------------------------------------------------ +// +// This code was generated by a tool. +// Runtime Version:4.0.30319.42000 +// +// Changes to this file may cause incorrect behavior and will be lost if +// the code is regenerated. +// +//------------------------------------------------------------------------------ + +namespace Lifti.Tests { + using System; + + + /// + /// A strongly-typed resource class, for looking up localized strings, etc. + /// + // This class was auto-generated by the StronglyTypedResourceBuilder + // class via a tool like ResGen or Visual Studio. + // To add or remove a member, edit your .ResX file then rerun ResGen + // with the /str option, or rebuild your VS project. + [global::System.CodeDom.Compiler.GeneratedCodeAttribute("System.Resources.Tools.StronglyTypedResourceBuilder", "17.0.0.0")] + [global::System.Diagnostics.DebuggerNonUserCodeAttribute()] + [global::System.Runtime.CompilerServices.CompilerGeneratedAttribute()] + internal class TestResources { + + private static global::System.Resources.ResourceManager resourceMan; + + private static global::System.Globalization.CultureInfo resourceCulture; + + [global::System.Diagnostics.CodeAnalysis.SuppressMessageAttribute("Microsoft.Performance", "CA1811:AvoidUncalledPrivateCode")] + internal TestResources() { + } + + /// + /// Returns the cached ResourceManager instance used by this class. + /// + [global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)] + internal static global::System.Resources.ResourceManager ResourceManager { + get { + if (object.ReferenceEquals(resourceMan, null)) { + global::System.Resources.ResourceManager temp = new global::System.Resources.ResourceManager("Lifti.Tests.TestResources", typeof(TestResources).Assembly); + resourceMan = temp; + } + return resourceMan; + } + } + + /// + /// Overrides the current thread's CurrentUICulture property for all + /// resource lookups using this strongly typed resource class. + /// + [global::System.ComponentModel.EditorBrowsableAttribute(global::System.ComponentModel.EditorBrowsableState.Advanced)] + internal static global::System.Globalization.CultureInfo Culture { + get { + return resourceCulture; + } + set { + resourceCulture = value; + } + } + + /// + /// Looks up a localized resource of type System.Byte[]. + /// + internal static byte[] v2Index { + get { + object obj = ResourceManager.GetObject("v2Index", resourceCulture); + return ((byte[])(obj)); + } + } + + /// + /// Looks up a localized resource of type System.Byte[]. + /// + internal static byte[] v3Index { + get { + object obj = ResourceManager.GetObject("v3Index", resourceCulture); + return ((byte[])(obj)); + } + } + + /// + /// Looks up a localized resource of type System.Byte[]. + /// + internal static byte[] v4Index { + get { + object obj = ResourceManager.GetObject("v4Index", resourceCulture); + return ((byte[])(obj)); + } + } + } +} diff --git a/test/Lifti.Tests/TestResources.resx b/test/Lifti.Tests/TestResources.resx index 05ae836b..9e180b51 100644 --- a/test/Lifti.Tests/TestResources.resx +++ b/test/Lifti.Tests/TestResources.resx @@ -1,127 +1,130 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - text/microsoft-resx - - - 2.0 - - - System.Resources.ResXResourceReader, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 - - - System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 - - - - v2.dat;System.Byte[], mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 - - - v3.dat;System.Byte[], mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + text/microsoft-resx + + + 2.0 + + + System.Resources.ResXResourceReader, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 + + + System.Resources.ResXResourceWriter, System.Windows.Forms, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 + + + + v2.dat;System.Byte[], mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 + + + v3.dat;System.Byte[], mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 + + + v4.dat;System.Byte[], mscorlib, Version=4.0.0.0, Culture=neutral, PublicKeyToken=b77a5c561934e089 + \ No newline at end of file diff --git a/test/Lifti.Tests/V4.dat b/test/Lifti.Tests/V4.dat new file mode 100644 index 0000000000000000000000000000000000000000..80d6fc6a564e255bf2748d6f19e25464057a2b37 GIT binary patch literal 325 zcmZWj%MO4b3@eHrJo!r)Igt2B)RW)lzbc)8SzNNwZe7O|EGhtoIj{ index; - - [GlobalSetup] - public async Task SetUp() - { - this.index = CreateNewIndex(4); - await this.PopulateIndexAsync(this.index); - } - - [Params("(confiscation & th*) | \"and they\"")] - public string SearchCriteria { get; set; } - - [Benchmark] - public object Searching() - { - return this.index.Search(this.SearchCriteria); - } +{ + //[RankColumn, MemoryDiagnoser] + //[ShortRunJob(RuntimeMoniker.NetCoreApp31)] + //[ShortRunJob(RuntimeMoniker.Net60)] + //public class IndexSearchingBenchmarks : IndexBenchmarkBase + //{ + // private IFullTextIndex index; + + // [GlobalSetup] + // public async Task SetUp() + // { + // this.index = CreateNewIndex(4); + // await this.PopulateIndexAsync(this.index); + // } + + // [Params("(confiscation & th*) | \"and they\"")] + // public string SearchCriteria { get; set; } + + // [Benchmark] + // public object Searching() + // { + // return this.index.Search(this.SearchCriteria); + // } + //} + + //[SimpleJob(RuntimeMoniker.NetCoreApp22)] + //[SimpleJob(RuntimeMoniker.NetCoreApp31)] + //[RankColumn, MemoryDiagnoser] + //public class WordSplittingBenchmarks : IndexBenchmarkBase + //{ + // [Benchmark()] + // public void XmlWorkSplittingNew() + // { + // var splitter = new XmlTokenizer(); + + // splitter.Process(WikipediaData.SampleData[0].text).ToList(); + // } + //} + + //[MediumRunJob(RuntimeMoniker.NetCoreApp31)] + [MediumRunJob(RuntimeMoniker.Net60)] + [RankColumn, MemoryDiagnoser] + public class SerializationBenchmarks : IndexBenchmarkBase + { + private BinarySerializer serializer; + private string fileName; + + [GlobalSetup] + public async Task Setup() + { + var index = CreateNewIndex(2); + await this.PopulateIndexAsync(index); + + this.serializer = new BinarySerializer(); + this.fileName = $"{Guid.NewGuid()}.dat"; + using var stream = File.OpenWrite(this.fileName); + await this.serializer.SerializeAsync(index, stream, true); + } + + [Benchmark()] + public async Task IndexDeserialization() + { + var index = CreateNewIndex(2); + using var stream = File.OpenRead(this.fileName); + await this.serializer.DeserializeAsync(index, stream, true); + } } - //[SimpleJob(RuntimeMoniker.NetCoreApp22)] - //[SimpleJob(RuntimeMoniker.NetCoreApp31)] + //[MediumRunJob(RuntimeMoniker.NetCoreApp31)] + //[MediumRunJob(RuntimeMoniker.Net60)] //[RankColumn, MemoryDiagnoser] - //public class WordSplittingBenchmarks : IndexBenchmarkBase + //public class FullTextIndexTests : IndexBenchmarkBase //{ - // [Benchmark()] - // public void XmlWorkSplittingNew() - // { - // var splitter = new XmlTokenizer(); - - // splitter.Process(WikipediaData.SampleData[0].text).ToList(); - // } - //} - - [MediumRunJob(RuntimeMoniker.NetCoreApp31)] - [MediumRunJob(RuntimeMoniker.Net60)] - [RankColumn, MemoryDiagnoser] - public class FullTextIndexTests : IndexBenchmarkBase - { //[Benchmark()] //public async Task NewCodeIndexingAlwaysSupportIntraNodeText() //{ @@ -64,46 +96,46 @@ public class FullTextIndexTests : IndexBenchmarkBase // await this.PopulateIndexAsync(index); //} - [Benchmark()] - public async Task NewCodeIndexingIntraNodeTextAt4Characters() - { - var index = CreateNewIndex(4); - await this.PopulateIndexAsync(index); - } - - //[Benchmark()] - //public async Task NewCodeIndexingOneByOneIntraNodeTextAt2Characters() - //{ - // var index = CreateNewIndex(2); - // await this.PopulateIndexOneByOneAsync(index); - //} - - //[Benchmark()] - //public async Task NewCodeIndexingOneByOneAlwaysSupportIntraNodeText() - //{ - // var index = CreateNewIndex(0); - // await this.PopulateIndexOneByOneAsync(index); - //} - - //[Benchmark()] - //public async Task NewCodeIndexingOneByOneAlwaysIndexCharByChar() - //{ - // var index = CreateNewIndex(1000); - // await this.PopulateIndexOneByOneAsync(index); - //} - //[Benchmark()] - //public async Task Task NewCodeIndexingOneByOneIntraNodeTextAt4Characters() + //public async Task NewCodeIndexingIntraNodeTextAt4Characters() //{ // var index = CreateNewIndex(4); - // await this.PopulateIndexOneByOneAsync(index); - //} - - //[Benchmark()] - //public async Task NewCodeIndexingIntraNodeTextAt2Characters() - //{ - // var index = CreateNewIndex(2); // await this.PopulateIndexAsync(index); - //} - } + //} + + //[Benchmark()] + //public async Task NewCodeIndexingOneByOneIntraNodeTextAt2Characters() + //{ + // var index = CreateNewIndex(2); + // await this.PopulateIndexOneByOneAsync(index); + //} + + //[Benchmark()] + //public async Task NewCodeIndexingOneByOneAlwaysSupportIntraNodeText() + //{ + // var index = CreateNewIndex(0); + // await this.PopulateIndexOneByOneAsync(index); + //} + + //[Benchmark()] + //public async Task NewCodeIndexingOneByOneAlwaysIndexCharByChar() + //{ + // var index = CreateNewIndex(1000); + // await this.PopulateIndexOneByOneAsync(index); + //} + + //[Benchmark()] + //public async Task Task NewCodeIndexingOneByOneIntraNodeTextAt4Characters() + //{ + // var index = CreateNewIndex(4); + // await this.PopulateIndexOneByOneAsync(index); + //} + + //[Benchmark()] + //public async Task NewCodeIndexingIntraNodeTextAt2Characters() + //{ + // var index = CreateNewIndex(2); + // await this.PopulateIndexAsync(index); + //} + //} } diff --git a/test/PerformanceProfiling/IndexBenchmarkBase.cs b/test/PerformanceProfiling/IndexBenchmarkBase.cs index e08e4173..f520e31f 100644 --- a/test/PerformanceProfiling/IndexBenchmarkBase.cs +++ b/test/PerformanceProfiling/IndexBenchmarkBase.cs @@ -1,34 +1,34 @@ -using Lifti; -using Lifti.Tokenization.TextExtraction; -using System.Threading.Tasks; - -namespace PerformanceProfiling -{ - public abstract class IndexBenchmarkBase - { - - protected async Task PopulateIndexAsync(IFullTextIndex index) - { - await index.AddRangeAsync(WikipediaData.SampleData); - } - - protected async Task PopulateIndexOneByOne(IFullTextIndex index) - { - foreach (var page in WikipediaData.SampleData) - { - await index.AddAsync(page); - } - } - - protected static IFullTextIndex CreateNewIndex(int supportSplitAtIndex) - { - return new FullTextIndexBuilder() - .WithIntraNodeTextSupportedAfterIndexDepth(supportSplitAtIndex) - .WithObjectTokenization<(string name, string text)>( - o => o - .WithKey(p => p.name) - .WithField("Text", p => p.text, t => t.WithStemming(), new XmlTextExtractor())) - .Build(); - } - } -} +using Lifti; +using Lifti.Tokenization.TextExtraction; +using System.Threading.Tasks; + +namespace PerformanceProfiling +{ + public abstract class IndexBenchmarkBase + { + + protected async Task PopulateIndexAsync(IFullTextIndex index) + { + await index.AddRangeAsync(WikipediaData.SampleData); + } + + protected async Task PopulateIndexOneByOne(IFullTextIndex index) + { + foreach (var page in WikipediaData.SampleData) + { + await index.AddAsync(page); + } + } + + protected static FullTextIndex CreateNewIndex(int supportSplitAtIndex) + { + return new FullTextIndexBuilder() + .WithIntraNodeTextSupportedAfterIndexDepth(supportSplitAtIndex) + .WithObjectTokenization<(string name, string text)>( + o => o + .WithKey(p => p.name) + .WithField("Text", p => p.text, t => t.WithStemming(), new XmlTextExtractor())) + .Build(); + } + } +} diff --git a/test/PerformanceProfiling/Program.cs b/test/PerformanceProfiling/Program.cs index 5b73c3a7..281ddf58 100644 --- a/test/PerformanceProfiling/Program.cs +++ b/test/PerformanceProfiling/Program.cs @@ -6,7 +6,7 @@ internal static class Program { public static void Main(string[] args) { - var summary = BenchmarkRunner.Run(); + var summary = BenchmarkRunner.Run(); } } }