Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Lucene.NET storage strategy - generation #13

Merged
merged 2 commits into from
Aug 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion src/Kentico.Xperience.Lucene/Enums/LuceneTaskType.cs
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,10 @@ public enum LuceneTaskType
/// <summary>
/// A task for a page which should be removed from the index.
/// </summary>
DELETE
DELETE,

/// <summary>
/// Task marks the end of indexed items, index is published after this task occurs
/// </summary>
PUBLISH_INDEX,
}
7 changes: 5 additions & 2 deletions src/Kentico.Xperience.Lucene/LuceneQueueWorker.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
using CMS.Base;
using CMS.Base;
using CMS.Core;

using CMS.DocumentEngine;
using Kentico.Xperience.Lucene.Models;
using Kentico.Xperience.Lucene.Services;

Expand Down Expand Up @@ -52,6 +52,9 @@ public static void EnqueueLuceneQueueItem(LuceneQueueItem queueItem)
Current.Enqueue(queueItem, false);
}

public static void EnqueueIndexPublication(string indexName)
=> EnqueueLuceneQueueItem(new LuceneQueueItem(null!, LuceneTaskType.PUBLISH_INDEX, indexName));


/// <inheritdoc />
protected override void Finish() => RunProcess();
Expand Down
12 changes: 8 additions & 4 deletions src/Kentico.Xperience.Lucene/Models/LuceneIndex.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using System.Reflection;
using System.Text.RegularExpressions;
using Kentico.Xperience.Lucene.Attributes;
using Kentico.Xperience.Lucene.Services;
using Kentico.Xperience.Lucene.Services.Implementations;
Expand Down Expand Up @@ -42,9 +43,9 @@ public string IndexName
}

/// <summary>
/// The filesystem path of the Lucene index.
/// Index storage context, employs picked storage strategy
/// </summary>
public string IndexPath
public IndexStorageContext StorageContext
{
get;
}
Expand Down Expand Up @@ -78,9 +79,10 @@ internal IEnumerable<IncludedPathAttribute> IncludedPaths
/// <param name="indexName">The code name of the Lucene index.</param>
/// <param name="indexPath">The filesystem Lucene index. Defaults to /App_Data/LuceneSearch/[IndexName]</param>
/// <param name="luceneIndexingStrategy">Defaults to <see cref="DefaultLuceneIndexingStrategy"/></param>
/// <param name="storageStrategy">Storage strategy defines how index will be stored from directory naming perspective</param>
/// <exception cref="ArgumentNullException" />
/// <exception cref="InvalidOperationException" />
public LuceneIndex(Type type, Analyzer analyzer, string indexName, string? indexPath = null, ILuceneIndexingStrategy? luceneIndexingStrategy = null)
public LuceneIndex(Type type, Analyzer analyzer, string indexName, string? indexPath = null, ILuceneIndexingStrategy? luceneIndexingStrategy = null, IIndexStorageStrategy? storageStrategy = null)
seangwright marked this conversation as resolved.
Show resolved Hide resolved
{
if (string.IsNullOrEmpty(indexName))
{
Expand All @@ -100,7 +102,9 @@ public LuceneIndex(Type type, Analyzer analyzer, string indexName, string? index
Analyzer = analyzer ?? throw new ArgumentNullException(nameof(analyzer));
LuceneSearchModelType = type;
IndexName = indexName;
IndexPath = indexPath ?? Path.Combine(Environment.CurrentDirectory, "App_Data", "LuceneSearch", indexName);
string indexStoragePath = indexPath ?? Path.Combine(Environment.CurrentDirectory, "App_Data", "LuceneSearch", indexName);

StorageContext = new IndexStorageContext(storageStrategy ?? new GenerationStorageStrategy(), indexStoragePath);
LuceneIndexingStrategy = luceneIndexingStrategy ?? new DefaultLuceneIndexingStrategy();

var paths = type.GetCustomAttributes<IncludedPathAttribute>(false);
Expand Down
163 changes: 163 additions & 0 deletions src/Kentico.Xperience.Lucene/Services/IIndexStorageStrategy.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
using System.Diagnostics.CodeAnalysis;
using System.Text.RegularExpressions;

namespace Kentico.Xperience.Lucene.Services;

public class IndexStorageContext
{
private readonly IIndexStorageStrategy storageStrategy;
private readonly string indexStoragePathRoot;

public IndexStorageContext(IIndexStorageStrategy selectedStorageStrategy, string indexStoragePathRoot)
{
storageStrategy = selectedStorageStrategy;
this.indexStoragePathRoot = indexStoragePathRoot;
}

public IndexStorageModel GetPublishedIndex() =>
storageStrategy
.GetExistingIndexes(indexStoragePathRoot)
.Where(x => x.IsPublished)
.MaxBy(x => x.Generation) ?? new IndexStorageModel(storageStrategy.FormatPath(indexStoragePathRoot, 1, true), 1, true);

/// <summary>
/// Gets next generation of index
/// </summary>
public IndexStorageModel GetNextGeneration()
{
var lastIndex = storageStrategy
.GetExistingIndexes(indexStoragePathRoot)
.MaxBy(x => x.Generation);

var newIndex = lastIndex switch
{
var (path, generation, published) => new IndexStorageModel(path, published ? generation + 1 : generation, false),
_ => new IndexStorageModel("", 1, false)
};

return newIndex with { Path = storageStrategy.FormatPath(indexStoragePathRoot, newIndex.Generation, newIndex.IsPublished) };
}

public IndexStorageModel GetLastGeneration(bool defaultPublished) =>
storageStrategy
.GetExistingIndexes(indexStoragePathRoot)
.MaxBy(x => x.Generation)
?? new IndexStorageModel(storageStrategy.FormatPath(indexStoragePathRoot, 1, defaultPublished), 1, defaultPublished);

/// <summary>
/// method returns last writable index storage model
/// </summary>
/// <returns>Storage model with information about writable index</returns>
/// <exception cref="ArgumentException">thrown when unexpected model occurs</exception>
public IndexStorageModel GetNextOrOpenNextGeneration()
{
var lastIndex = storageStrategy
.GetExistingIndexes(indexStoragePathRoot)
.MaxBy(x => x.Generation);

return lastIndex switch
{
{ IsPublished: false } => lastIndex,
(_, var generation, true) => new IndexStorageModel(storageStrategy.FormatPath(indexStoragePathRoot, generation + 1, false), generation + 1, false),
null =>
// no existing index, lets create new one
new IndexStorageModel(storageStrategy.FormatPath(indexStoragePathRoot, 1, false), 1, false),
_ => throw new ArgumentException($"Non-null last index storage with invalid settings '{lastIndex}'")
};
}

public void PublishIndex(IndexStorageModel storage) => storageStrategy.PublishIndex(storage);
}

public record IndexStorageModel(string Path, int Generation, bool IsPublished);

public interface IIndexStorageStrategy
{
IEnumerable<IndexStorageModel> GetExistingIndexes(string indexStoragePath);
string FormatPath(string indexRoot, int generation, bool isPublished);
void PublishIndex(IndexStorageModel storage);
}

public class GenerationStorageStrategy : IIndexStorageStrategy
{
public IEnumerable<IndexStorageModel> GetExistingIndexes(string indexStoragePath)
{
if (Directory.Exists(indexStoragePath))
{
foreach (string directory in Directory.GetDirectories(indexStoragePath))
{
if (ParseIndexStorageModel(directory) is (true, var result))
{
yield return result!;
}
}
}
}

public string FormatPath(string indexRoot, int generation, bool isPublished) => Path.Combine(indexRoot, $"i-g{generation:0000000}-p_{isPublished}");

public void PublishIndex(IndexStorageModel storage)
{
string root = Path.Combine(storage.Path, "..");
var published = storage with { IsPublished = true, Path = FormatPath(root, storage.Generation, true) };
Directory.Move(storage.Path, published.Path);
}

private record IndexStorageModelParsingResult(

Check warning on line 106 in src/Kentico.Xperience.Lucene/Services/IIndexStorageStrategy.cs

View workflow job for this annotation

GitHub Actions / build

Private record classes which are not derived in the current assembly should be marked as 'sealed'. (https://rules.sonarsource.com/csharp/RSPEC-3260)
bool Success,
[property: MemberNotNullWhen(true, "Success")] IndexStorageModel? Result
);

private IndexStorageModelParsingResult ParseIndexStorageModel(string directoryPath)
{
if (string.IsNullOrWhiteSpace(directoryPath))
{
return new IndexStorageModelParsingResult(false, null);
}

try
{
var dirInfo = new DirectoryInfo(directoryPath);
if (dirInfo.Name is { Length: > 0 } directoryName)
{
var matchResult = Regex.Match(directoryName, "i-g(?<generation>[0-9]*)-p_(?<published>(true)|(false))", RegexOptions.IgnoreCase | RegexOptions.Singleline);
switch (matchResult)
{
case { Success: true } r
seangwright marked this conversation as resolved.
Show resolved Hide resolved
when r.Groups["generation"] is { Success: true, Value: { Length: > 0 } gen } &&
r.Groups["published"] is { Success: true, Value: { Length: > 0 } pub }:
{
if (int.TryParse(gen, out int generation) && bool.TryParse(pub, out bool published))
{
return new IndexStorageModelParsingResult(true, new IndexStorageModel(directoryPath, generation, published));
}

break;
}
default:
{
return new IndexStorageModelParsingResult(false, null);
}
}
}
}
catch
{
// low priority, if path cannot be parsed, it is possibly not generated index
// ignored
}

return new IndexStorageModelParsingResult(false, null);
}
}

public class SimpleStorageStrategy : IIndexStorageStrategy
{
public IEnumerable<IndexStorageModel> GetExistingIndexes(string indexStoragePath) => new[] { new IndexStorageModel(indexStoragePath, 0, true) };
public string FormatPath(string indexRoot, int generation, bool isPublished) => indexRoot;

public void PublishIndex(IndexStorageModel storage)
{
// Method intentionally left empty. In this strategy, publication of index is not needed
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ namespace Kentico.Xperience.Lucene.Services;

public interface ILuceneIndexService
{
T UseWriter<T>(LuceneIndex index, Func<IndexWriter, T> useIndexWriter, OpenMode openMode = OpenMode.CREATE_OR_APPEND);
T UseWriter<T>(LuceneIndex index, Func<IndexWriter, T> useIndexWriter, IndexStorageModel storage, OpenMode openMode = OpenMode.CREATE_OR_APPEND);

void ResetIndex(LuceneIndex index);

Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
using System.Collections.Concurrent;
using CMS.Core;
using CMS.DocumentEngine;
using CMS.Helpers.Caching.Abstractions;
Expand All @@ -20,7 +21,7 @@
private readonly ILuceneSearchModelToDocumentMapper luceneSearchModelToDocumentMapper;

private readonly ICacheAccessor cacheAccessor;
private readonly IEventLogService eventLogService;

Check warning on line 24 in src/Kentico.Xperience.Lucene/Services/Implementations/DefaultLuceneClient.cs

View workflow job for this annotation

GitHub Actions / build

Remove this unread private field 'eventLogService' or refactor the code to use its value. (https://rules.sonarsource.com/csharp/RSPEC-4487)
private readonly IPageRetriever pageRetriever;

internal const string CACHEKEY_STATISTICS = "Lucene|ListIndices";
Expand Down Expand Up @@ -60,7 +61,7 @@


/// <inheritdoc/>
public async Task<ICollection<LuceneIndexStatisticsViewModel>> GetStatistics(CancellationToken cancellationToken) =>

Check warning on line 64 in src/Kentico.Xperience.Lucene/Services/Implementations/DefaultLuceneClient.cs

View workflow job for this annotation

GitHub Actions / build

This async method lacks 'await' operators and will run synchronously. Consider using the 'await' operator to await non-blocking API calls, or 'await Task.Run(...)' to do CPU-bound work on a background thread.
IndexStore.Instance.GetAllIndexes().Select(i =>
{
var statistics = luceneIndexService.UseSearcher(i, s => new LuceneIndexStatisticsViewModel()
Expand Down Expand Up @@ -103,7 +104,7 @@
return UpsertRecordsInternal(dataObjects, indexName);
}

private async Task<int> DeleteRecordsInternal(IEnumerable<string> objectIds, string indexName)

Check warning on line 107 in src/Kentico.Xperience.Lucene/Services/Implementations/DefaultLuceneClient.cs

View workflow job for this annotation

GitHub Actions / build

This async method lacks 'await' operators and will run synchronously. Consider using the 'await' operator to await non-blocking API calls, or 'await Task.Run(...)' to do CPU-bound work on a background thread.
{
var index = IndexStore.Instance.GetIndex(indexName);
if (index != null)
Expand All @@ -116,11 +117,10 @@
var termQuery = new TermQuery(new Term(nameof(LuceneSearchModel.ObjectID), objectId));
booleanQuery.Add(termQuery, Occur.SHOULD); // Match any of the object IDs
}
// todo use batches

Check warning on line 120 in src/Kentico.Xperience.Lucene/Services/Implementations/DefaultLuceneClient.cs

View workflow job for this annotation

GitHub Actions / build

Complete the task associated to this 'TODO' comment. (https://rules.sonarsource.com/csharp/RSPEC-1135)
writer.DeleteDocuments(booleanQuery);
return "OK";
});

}, index.StorageContext.GetLastGeneration(true));
}
return 0;
}
Expand All @@ -130,6 +130,7 @@
{
// Clear statistics cache so listing displays updated data after rebuild
cacheAccessor.Remove(CACHEKEY_STATISTICS);

luceneIndexService.ResetIndex(luceneIndex);

var indexedNodes = new List<TreeNode>();
Expand All @@ -152,12 +153,12 @@

indexedNodes.AddRange(nodes);
}



indexedNodes.ForEach(node => LuceneQueueWorker.EnqueueLuceneQueueItem(new LuceneQueueItem(node, LuceneTaskType.CREATE, luceneIndex.IndexName)));
LuceneQueueWorker.EnqueueIndexPublication(luceneIndex.IndexName);
}

private async Task<int> UpsertRecordsInternal(IEnumerable<LuceneSearchModel> dataObjects, string indexName)

Check warning on line 161 in src/Kentico.Xperience.Lucene/Services/Implementations/DefaultLuceneClient.cs

View workflow job for this annotation

GitHub Actions / build

This async method lacks 'await' operators and will run synchronously. Consider using the 'await' operator to await non-blocking API calls, or 'await Task.Run(...)' to do CPU-bound work on a background thread.
{
var index = IndexStore.Instance.GetIndex(indexName);
if (index != null)
Expand All @@ -177,7 +178,7 @@
count++;
}
return count;
});
}, index.StorageContext.GetLastGeneration(true));
}
return 0;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,34 +10,37 @@ namespace Kentico.Xperience.Lucene.Services.Implementations;
public class DefaultLuceneIndexService : ILuceneIndexService
{
private const LuceneVersion LUCENE_VERSION = LuceneVersion.LUCENE_48;
public TResult UseWriter<TResult>(LuceneIndex index, Func<IndexWriter, TResult> useIndexWriter, OpenMode openMode = OpenMode.CREATE_OR_APPEND)
{
using LuceneDirectory indexDir = FSDirectory.Open(index.IndexPath);

public TResult UseWriter<TResult>(LuceneIndex index, Func<IndexWriter, TResult> useIndexWriter, IndexStorageModel storage, OpenMode openMode = OpenMode.CREATE_OR_APPEND)
{
using LuceneDirectory indexDir = FSDirectory.Open(storage.Path);

//Create an index writer
var indexConfig = new IndexWriterConfig(LUCENE_VERSION, index.Analyzer)
{
OpenMode = openMode // create/overwrite index
OpenMode = openMode // create/overwrite index
};
using var writer = new IndexWriter(indexDir, indexConfig);

return useIndexWriter(writer);
}

public void ResetIndex(LuceneIndex index) => UseWriter(index, (IndexWriter writer) => true, OpenMode.CREATE);
public void ResetIndex(LuceneIndex index) => UseWriter(index, (IndexWriter writer) => true, index.StorageContext.GetNextGeneration(), OpenMode.CREATE);

public TResult UseSearcher<TResult>(LuceneIndex index, Func<IndexSearcher, TResult> useIndexSearcher)
{
if (!System.IO.Directory.Exists(index.IndexPath))
var storage = index.StorageContext.GetPublishedIndex();
if (!System.IO.Directory.Exists(storage.Path))
{
// ensure index
UseWriter(index, (writer) =>
{
writer.Commit();
return true;
});
}, storage);
}
using LuceneDirectory indexDir = FSDirectory.Open(index.IndexPath);

using LuceneDirectory indexDir = FSDirectory.Open(storage.Path);
using var reader = DirectoryReader.Open(indexDir);
var searcher = new IndexSearcher(reader);
return useIndexSearcher(searcher);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
using CMS.Core;
using CMS.Core;
using CMS.DocumentEngine;
using CMS.WorkflowEngine;

Expand All @@ -11,7 +11,7 @@
private readonly ILuceneClient luceneClient;
private readonly ILuceneModelGenerator luceneObjectGenerator;
private readonly IEventLogService eventLogService;
private readonly IWorkflowStepInfoProvider workflowStepInfoProvider;

Check warning on line 14 in src/Kentico.Xperience.Lucene/Services/Implementations/DefaultLuceneTaskProcessor.cs

View workflow job for this annotation

GitHub Actions / build

Remove this unread private field 'workflowStepInfoProvider' or refactor the code to use its value. (https://rules.sonarsource.com/csharp/RSPEC-4487)
private readonly IVersionHistoryInfoProvider versionHistoryInfoProvider;


Expand Down Expand Up @@ -52,8 +52,21 @@
upsertData.Add(data);
}

successfulOperations += await luceneClient.DeleteRecords(deleteIds, group.Key);
successfulOperations += await luceneClient.UpsertRecords(upsertData, group.Key, cancellationToken);
if (IndexStore.Instance.GetIndex(group.Key) is { } index)
{
successfulOperations += await luceneClient.DeleteRecords(deleteIds, group.Key);
successfulOperations += await luceneClient.UpsertRecords(upsertData, group.Key, cancellationToken);

if (group.Any(t => t.TaskType == LuceneTaskType.PUBLISH_INDEX))
{
var storage = index.StorageContext.GetNextOrOpenNextGeneration();
index.StorageContext.PublishIndex(storage);
}
}
else
{
eventLogService.LogError(nameof(DefaultLuceneTaskProcessor), nameof(ProcessLuceneTasks), "Index instance not exists");
}
}
catch (Exception ex)
{
Expand Down
Loading