Skip to content

Commit

Permalink
Updates various places to reduce the need to acquire a searcher/reade…
Browse files Browse the repository at this point in the history
…r which causes overhead for each search, lots of TODOs and WIPs
  • Loading branch information
Shazwazza committed Aug 20, 2024
1 parent b25fa83 commit 00cb2a4
Show file tree
Hide file tree
Showing 13 changed files with 234 additions and 63 deletions.
3 changes: 2 additions & 1 deletion src/Examine.Lucene/Directories/FileSystemDirectoryFactory.cs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ protected override Directory CreateDirectory(LuceneIndex luceneIndex, bool force
IndexWriter.Unlock(dir);
}

return dir;
// TODO: Put this behind IOptions for NRT stuff, but I think this is going to be better
return new NRTCachingDirectory(dir, 5.0, 60.0);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,8 @@ internal CreateResult TryCreateDirectory(LuceneIndex luceneIndex, bool forceUnlo
// Start replicating back to main
_replicator.StartIndexReplicationOnSchedule(1000);

directory = localLuceneDir;
// TODO: Put this behind IOptions for NRT stuff, but I think this is going to be better
directory = new NRTCachingDirectory(localLuceneDir, 5.0, 60.0);

return mainResult;
}
Expand Down
1 change: 1 addition & 0 deletions src/Examine.Lucene/ExamineReplicator.cs
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ protected virtual void Dispose(bool disposing)
{
_sourceIndex.IndexCommitted -= SourceIndex_IndexCommitted;
_localReplicationClient.Dispose();
_destinationDirectory.Dispose();
}

_disposedValue = true;
Expand Down
8 changes: 5 additions & 3 deletions src/Examine.Lucene/LuceneIndexOptions.cs
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
using System;
using System.Collections.Generic;
using System.Text;
using Lucene.Net.Analysis;
using Lucene.Net.Analysis.Standard;
using Lucene.Net.Index;

namespace Examine.Lucene
{

public class LuceneIndexOptions : IndexOptions
{
public bool NrtEnabled { get; set; } = true;

Check warning on line 10 in src/Examine.Lucene/LuceneIndexOptions.cs

View workflow job for this annotation

GitHub Actions / build

Check warning on line 10 in src/Examine.Lucene/LuceneIndexOptions.cs

View workflow job for this annotation

GitHub Actions / build


public double NrtTargetMaxStaleSec { get; set; } = 5.0;

public double NrtTargetMinStaleSec { get; set; } = 1.0;

public IndexDeletionPolicy IndexDeletionPolicy { get; set; }

Expand Down
65 changes: 52 additions & 13 deletions src/Examine.Lucene/Providers/LuceneIndex.cs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
using Examine.Lucene.Indexing;
using Examine.Lucene.Directories;
using static Lucene.Net.Queries.Function.ValueSources.MultiFunction;
using Lucene.Net.Search.Join;
using Lucene.Net.Index.Extensions;

namespace Examine.Lucene.Providers
{
Expand Down Expand Up @@ -154,8 +156,7 @@ internal LuceneIndex(
/// </summary>
public Analyzer DefaultAnalyzer { get; }

public PerFieldAnalyzerWrapper FieldAnalyzer => _fieldAnalyzer
?? (_fieldAnalyzer =
public PerFieldAnalyzerWrapper FieldAnalyzer => (PerFieldAnalyzerWrapper)(_fieldAnalyzer ??=
(DefaultAnalyzer is PerFieldAnalyzerWrapper pfa)
? pfa
: _fieldValueTypeCollection.Value.Analyzer);
Expand Down Expand Up @@ -416,6 +417,11 @@ private void CreateNewIndex(Directory dir)
MergeScheduler = new ErrorLoggingConcurrentMergeScheduler(Name,
(s, e) => OnIndexingError(new IndexingErrorEventArgs(this, s, "-1", e)))
};

// TODO: With NRT, we should apparently use this but there is no real implementation of it!?
// https://stackoverflow.com/questions/12271614/lucene-net-indexwriter-setmergedsegmentwarmer
//writerConfig.SetMergedSegmentWarmer(new SimpleMergedSegmentWarmer())

writer = new IndexWriter(dir, writerConfig);

}
Expand Down Expand Up @@ -1035,21 +1041,43 @@ private LuceneSearcher CreateSearcher()
{
//trim the "Indexer" / "Index" suffix if it exists
if (!name.EndsWith(suffix))
{
continue;
}

name = name.Substring(0, name.LastIndexOf(suffix, StringComparison.Ordinal));
}

TrackingIndexWriter writer = IndexWriter;
var searcherManager = new SearcherManager(writer.IndexWriter, true, new SearcherFactory());

// Create an IndexSearcher ReferenceManager to safely share IndexSearcher instances across
// multiple threads
var searcherManager = new SearcherManager(
writer.IndexWriter,
false, // TODO: Apply All Deletes? Will be faster if this is false, https://blog.mikemccandless.com/2011/11/near-real-time-readers-with-lucenes.html
new SearcherFactory());

searcherManager.AddListener(this);

_nrtReopenThread = new ControlledRealTimeReopenThread<IndexSearcher>(writer, searcherManager, 5.0, 1.0)
if (_options.NrtEnabled)
{
Name = $"{Name} NRT Reopen Thread",
IsBackground = true
};
// Create the ControlledRealTimeReopenThread that reopens the index periodically having into
// account the changes made to the index and tracked by the TrackingIndexWriter instance
// The index is refreshed every XX sec when nobody is waiting
// and every XX sec whenever is someone waiting (see search method)
// (see http://lucene.apache.org/core/4_3_0/core/org/apache/lucene/search/NRTManagerReopenThread.html)
_nrtReopenThread = new ControlledRealTimeReopenThread<IndexSearcher>(
writer,
searcherManager,
_options.NrtTargetMaxStaleSec, // when there is nobody waiting
_options.NrtTargetMinStaleSec) // when there is someone waiting
{
Name = $"{Name} NRT Reopen Thread",
IsBackground = true
};

_nrtReopenThread.Start();
_nrtReopenThread.Start();
}

// wait for most recent changes when first creating the searcher
WaitForChanges();
Expand Down Expand Up @@ -1186,10 +1214,17 @@ public void WaitForChanges()
{
if (_latestGen.HasValue && !_disposedValue && !_cancellationToken.IsCancellationRequested)
{
var found = _nrtReopenThread?.WaitForGeneration(_latestGen.Value, 5000);
if (_logger.IsEnabled(LogLevel.Debug))
if (_options.NrtEnabled)
{
_logger.LogDebug("{IndexName} WaitForChanges returned {GenerationFound}", Name, found);
var found = _nrtReopenThread?.WaitForGeneration(_latestGen.Value, 5000);
if (_logger.IsEnabled(LogLevel.Debug))
{
_logger.LogDebug("{IndexName} WaitForChanges returned {GenerationFound}", Name, found);
}
}
else
{
// TODO: MaybeRefresh
}
}
}
Expand Down Expand Up @@ -1307,13 +1342,17 @@ protected virtual void Dispose(bool disposing)
{
OnIndexingError(new IndexingErrorEventArgs(this, "Error closing the index", "-1", e));
}


}

_cancellationTokenSource.Dispose();

_logOutput?.Close();

_fieldAnalyzer?.Dispose();
if (!object.ReferenceEquals(_fieldAnalyzer, DefaultAnalyzer))
{
DefaultAnalyzer?.Dispose();
}
}
_disposedValue = true;
}
Expand Down
14 changes: 13 additions & 1 deletion src/Examine.Lucene/Providers/LuceneSearcher.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,11 @@ namespace Examine.Lucene.Providers
///</summary>
public class LuceneSearcher : BaseLuceneSearcher, IDisposable
{
private readonly object _locker = new object();
private readonly SearcherManager _searcherManager;
private readonly FieldValueTypeCollection _fieldValueTypeCollection;
private bool _disposedValue;
private volatile ISearchContext _searchContext;

/// <summary>
/// Constructor allowing for creating a NRT instance based on a given writer
Expand All @@ -31,7 +33,17 @@ public LuceneSearcher(string name, SearcherManager searcherManager, Analyzer ana
}

public override ISearchContext GetSearchContext()
=> new SearchContext(_searcherManager, _fieldValueTypeCollection);
{
var isCurrent = _searcherManager.IsSearcherCurrent();
if (_searchContext is null || !isCurrent)
{
_searchContext = new SearchContext(_searcherManager, _fieldValueTypeCollection);
}

return _searchContext;

//return new SearchContext(_searcherManager, _fieldValueTypeCollection);
}

protected virtual void Dispose(bool disposing)
{
Expand Down
4 changes: 4 additions & 0 deletions src/Examine.Lucene/PublicAPI.Unshipped.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,6 @@
Examine.Lucene.Directories.SyncedFileSystemDirectoryFactory.SyncedFileSystemDirectoryFactory(System.IO.DirectoryInfo localDir, System.IO.DirectoryInfo mainDir, Examine.Lucene.Directories.ILockFactory lockFactory, Microsoft.Extensions.Logging.ILoggerFactory loggerFactory, bool tryFixMainIndexIfCorrupt) -> void
Examine.Lucene.LuceneIndexOptions.NrtTargetMaxStaleSec.get -> double
Examine.Lucene.LuceneIndexOptions.NrtTargetMaxStaleSec.set -> void
Examine.Lucene.LuceneIndexOptions.NrtTargetMinStaleSec.get -> double
Examine.Lucene.LuceneIndexOptions.NrtTargetMinStaleSec.set -> void
virtual Examine.Lucene.Providers.LuceneIndex.UpdateLuceneDocument(Lucene.Net.Index.Term term, Lucene.Net.Documents.Document doc) -> long?
1 change: 1 addition & 0 deletions src/Examine.Lucene/Search/ISearchContext.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ public interface ISearchContext
ISearcherReference GetSearcher();

string[] SearchableFields { get; }

IIndexFieldValueType GetFieldValueType(string fieldName);
}
}
18 changes: 10 additions & 8 deletions src/Examine.Lucene/Search/LuceneSearchExecutor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,16 @@ private int MaxDoc
{
get
{
if (_maxDoc == null)
{
using (ISearcherReference searcher = _searchContext.GetSearcher())
{
_maxDoc = searcher.IndexSearcher.IndexReader.MaxDoc;
}
}
return _maxDoc.Value;
return 100;
////if (_maxDoc == null)
////{
//// using (ISearcherReference searcher = _searchContext.GetSearcher())
//// {
//// // TODO: Getting the IndexSearcher here will call .Acquire() on the SearcherManager again
//// _maxDoc = searcher.IndexSearcher.IndexReader.MaxDoc;
//// }
////}
////return _maxDoc.Value;
}
}

Expand Down
23 changes: 21 additions & 2 deletions src/Examine.Lucene/Search/SearchContext.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,32 @@ public class SearchContext : ISearchContext
{
private readonly SearcherManager _searcherManager;
private readonly FieldValueTypeCollection _fieldValueTypeCollection;
private readonly Lazy<ISearcherReference> _searcherReference;
private string[] _searchableFields;

public SearchContext(SearcherManager searcherManager, FieldValueTypeCollection fieldValueTypeCollection)
{
_searcherManager = searcherManager;
_fieldValueTypeCollection = fieldValueTypeCollection ?? throw new ArgumentNullException(nameof(fieldValueTypeCollection));
_searcherReference = new Lazy<ISearcherReference>(() =>
{
// TODO: Only if NRT is disabled?
//_searcherManager.MaybeRefresh();
return new SearcherReference(_searcherManager);
});
}

public ISearcherReference GetSearcher() => new SearcherReference(_searcherManager);
// TODO: Do we want to create a new searcher every time? I think so, but we shouldn't allocate so much
public ISearcherReference GetSearcher()
{
//return _searcherReference.Value;

// TODO: Only if NRT is disabled?
//_searcherManager.MaybeRefresh();

return new SearcherReference(_searcherManager);
}

public string[] SearchableFields
{
Expand All @@ -32,8 +49,10 @@ public string[] SearchableFields
// will not release it from the searcher manager. When we are collecting fields, we are essentially
// performing a 'search'. We must ensure that the underlying reader has the correct reference counts.
IndexSearcher searcher = _searcherManager.Acquire();
//var searcher = GetSearcher().IndexSearcher;

try
{
{
var fields = MultiFields.GetMergedFieldInfos(searcher.IndexReader)
.Select(x => x.Name)
.ToList();
Expand Down
44 changes: 19 additions & 25 deletions src/Examine.Lucene/Search/SearcherReference.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,49 +3,43 @@

namespace Examine.Lucene.Search
{
public class SearcherReference : ISearcherReference
// TODO: struct
public readonly struct SearcherReference : ISearcherReference

Check warning on line 7 in src/Examine.Lucene/Search/SearcherReference.cs

View workflow job for this annotation

GitHub Actions / build

Symbol 'implicit constructor for 'SearcherReference'' is not part of the declared public API (https://github.com/dotnet/roslyn-analyzers/blob/main/src/PublicApiAnalyzers/PublicApiAnalyzers.Help.md)
{
private bool _disposedValue;
//private bool _disposedValue;
private readonly SearcherManager _searcherManager;
private IndexSearcher _searcher;
private readonly IndexSearcher _searcher;

public SearcherReference(SearcherManager searcherManager)
{
_searcherManager = searcherManager;
_searcher = _searcherManager.Acquire();
}

public IndexSearcher IndexSearcher
{
get
{
if (_disposedValue)
{
throw new ObjectDisposedException($"{nameof(SearcherReference)} is disposed");
}
return _searcher ?? (_searcher = _searcherManager.Acquire());
}
}

protected virtual void Dispose(bool disposing)
{
if (!_disposedValue)
{
if (disposing)
{
if (_searcher != null)
{
_searcherManager.Release(_searcher);
}
}
//if (_disposedValue)
//{
// throw new ObjectDisposedException($"{nameof(SearcherReference)} is disposed");
//}

_disposedValue = true;
//return _searcher ??= _searcherManager.Acquire();
return _searcher;
}
}

public void Dispose()
{
// Do not change this code. Put cleanup code in 'Dispose(bool disposing)' method
Dispose(disposing: true);
//if (!_disposedValue)
//{
//if (_searcher != null)
//{
_searcherManager.Release(_searcher);
//}
// _disposedValue = true;
//}
}
}
}
Loading

0 comments on commit 00cb2a4

Please sign in to comment.