Skip to content

Commit

Permalink
Merge pull request #14 from Kentico/TK/lucene_facets
Browse files Browse the repository at this point in the history
lucene facets
  • Loading branch information
seangwright authored Sep 6, 2023
2 parents 5d15ddf + d54c503 commit df6c62f
Show file tree
Hide file tree
Showing 21 changed files with 829 additions and 70 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,11 @@ public CafeCardWidgetViewComponent(

public ViewViewComponentResult Invoke(CafeCardProperties properties)
{
var selectedPage = properties.SelectedCafes.FirstOrDefault();
var selectedPage = properties.SelectedCafes?.FirstOrDefault();
var cafe = (selectedPage != null) ? repository.GetCafeByNodeId(selectedPage.ItemId) : null;
var cafeModel = CafeCardViewModel.GetViewModel(cafe);

return View("~/Components/Widgets/CafeCardWidget/_CafeCardWidget.cshtml", cafeModel);
}
}
}
}
10 changes: 9 additions & 1 deletion src/Kentico.Xperience.Lucene.Sample/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
The constraint ensures that broken URLs lead to a "404 page not found" page and are not handled by a controller dedicated to the component or
to a page handled by the content tree-based router (which would lead to an exception).
*/
const string CONSTRAINT_FOR_NON_ROUTER_PAGE_CONTROLLERS = "Account|Consent|Subscription|Coffees|Search|CrawlerSearch";
const string CONSTRAINT_FOR_NON_ROUTER_PAGE_CONTROLLERS = "Account|Consent|Subscription|Coffees|Search|CrawlerSearch|CafeSearch";

var builder = WebApplication.CreateBuilder(args);

Expand Down Expand Up @@ -101,9 +101,17 @@ The constraint ensures that broken URLs lead to a "404 page not found" page and
DancingGoatCrawlerSearchModel.IndexName,
indexPath: null,
new DancingGoatCrawlerLuceneIndexingStrategy()),
new LuceneIndex(
typeof(CafeSearchModel),
new StandardAnalyzer(Lucene.Net.Util.LuceneVersion.LUCENE_48),
CafeSearchModel.IndexName,
indexPath: null,
new CafeLuceneIndexingStrategy()
)
});
builder.Services.AddSingleton<WebScraperHtmlSanitizer>();
builder.Services.AddSingleton<DancingGoatSearchService>();
builder.Services.AddSingleton<CafeSearchService>();
builder.Services.AddHttpClient<WebCrawlerService>();
builder.Services.AddSingleton<DancingGoatCrawlerSearchService>();

Expand Down
78 changes: 78 additions & 0 deletions src/Kentico.Xperience.Lucene.Sample/Search/CafeSearchController.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
using CMS.Core;
using Kentico.Xperience.Lucene;
using Kentico.Xperience.Lucene.Services;
using Microsoft.AspNetCore.Mvc;

namespace DancingGoat.Search;

public class CafeSearchController: Controller
{

public record SearchRequest(string Query = "", int PageSize = 20, int Page = 1);
public record RebuildSearchIndexRequest(string IndexName, string Secret);

// replace with real secret loaded from config
private const string REBUILD_SECRET = "1234567890aaabbbccc";
private readonly CafeSearchService searchService;
private readonly ILuceneClient luceneClient;
private readonly IEventLogService eventLogService;

public CafeSearchController(CafeSearchService searchService, ILuceneClient luceneClient, IEventLogService eventLogService)
{
this.searchService = searchService;
this.luceneClient = luceneClient;
this.eventLogService = eventLogService;
}

[HttpGet]
public IActionResult Index(string query, int pageSize = 10, int page = 1, string? facet = null)
{
var results = searchService.Search(query, pageSize, page, facet);

return View(results);
}


/// <summary>
/// Rebuild of index could be initialized by HTTP POST request to url [webroot]/search/rebuild with body
/// <code>
/// {
/// "indexName": "...",
/// "secret": "..."
/// }
/// </code>
/// </summary>
/// <param name="request"></param>
/// <returns></returns>
[HttpPost]
public async Task<IActionResult> Rebuild([FromBody] RebuildSearchIndexRequest request)
{
try
{
if (request.Secret != REBUILD_SECRET)
{
return Unauthorized("Invalid Secret");
}

if (string.IsNullOrWhiteSpace(request.IndexName))
{
return NotFound($"IndexName is required");
}

var index = IndexStore.Instance.GetIndex(CafeSearchModel.IndexName);
if (index == null)
{
return NotFound($"Index not found: {request.IndexName}");
}

await luceneClient.Rebuild(index.IndexName, null);
return Ok("Index rebuild started");
}
catch (Exception ex)
{
eventLogService.LogException(nameof(SearchController), nameof(Rebuild), ex, 0, $"IndexName: {request.IndexName}");
return Problem("Index rebuild failed");
}
}
}

49 changes: 49 additions & 0 deletions src/Kentico.Xperience.Lucene.Sample/Search/CafeSearchModel.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
using CMS.DocumentEngine;
using CMS.DocumentEngine.Types.DancingGoatCore;
using Kentico.Xperience.Lucene.Attributes;
using Kentico.Xperience.Lucene.Models;
using Kentico.Xperience.Lucene.Services.Implementations;
using Lucene.Net.Facet;

namespace DancingGoat.Search;

[IncludedPath("/%", ContentTypes = new string[] { Cafe.CLASS_NAME })]
public class CafeSearchModel: LuceneSearchModel
{
public const string IndexName = "CafeIndex";

[TextField(true)]
[Source(new string[] { nameof(TreeNode.DocumentName) })]
public string Title { get; set; }

[TextField(true)]
public string CafeCountry { get; set; }

[TextField(true)]
public string CafeCity { get; set; }

[TextField(true)]
public string CafeZipCode { get; set; }

public override IEnumerable<FacetField> OnTaxonomyFieldCreation()
{
string[] countries = CafeCountry?.Split(',', StringSplitOptions.RemoveEmptyEntries).Select(country => country.Trim()).ToArray() ?? Array.Empty<string>();
yield return countries switch
{
{ Length: >= 2 } => new FacetField("Country", countries[0], countries[1]),
{ Length: 1 } => new FacetField("Country", countries[0], "no state"),
_ => new FacetField("Country", "no country", "no state")
};
}
}


public class CafeLuceneIndexingStrategy : DefaultLuceneIndexingStrategy
{
public override FacetsConfig FacetsConfigFactory()
{
var facetConfig = new FacetsConfig();
facetConfig.SetHierarchical("Country", true);
return facetConfig;
}
}
103 changes: 103 additions & 0 deletions src/Kentico.Xperience.Lucene.Sample/Search/CafeSearchService.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
using Kentico.Xperience.Lucene;
using Kentico.Xperience.Lucene.Models;
using Kentico.Xperience.Lucene.Services;
using Lucene.Net.Documents;
using Lucene.Net.Facet;
using Lucene.Net.Search;
using Lucene.Net.Util;

namespace DancingGoat.Search;

public class CafeSearchService
{
private const int PHRASE_SLOP = 3;
private const int MAX_RESULTS = 1000;

private readonly ILuceneIndexService luceneIndexService;

public CafeSearchService(ILuceneIndexService luceneIndexService) => this.luceneIndexService = luceneIndexService;

public LuceneSearchResultModel<CafeSearchModel> Search(string searchText, int pageSize = 20, int page = 1, string facet = null)
{
var index = IndexStore.Instance.GetIndex(CafeSearchModel.IndexName) ?? throw new Exception($"Index {CafeSearchModel.IndexName} was not found!!!");
pageSize = Math.Max(1, pageSize);
page = Math.Max(1, page);
int offset = pageSize * (page - 1);
int limit = pageSize;

var queryBuilder = new QueryBuilder(index.Analyzer);

var query = string.IsNullOrWhiteSpace(searchText)
? new MatchAllDocsQuery()
: GetTermQuery(queryBuilder, searchText);

DrillDownQuery drillDownQuery = null;
if (facet != null)
{
var indexingStrategy = new CafeLuceneIndexingStrategy();
var config = indexingStrategy.FacetsConfigFactory();
drillDownQuery = new DrillDownQuery(indexingStrategy.FacetsConfigFactory(), query);

string[] f = facet.Split(';', StringSplitOptions.RemoveEmptyEntries);
if (f.Length >= 2)
{
var countryDim = config?.GetDimConfig("Country");
var boolQuery = new BooleanQuery();
boolQuery.Add(new TermQuery(DrillDownQuery.Term(countryDim.IndexFieldName, "Country", f.Skip(1).ToArray())), Occur.MUST);
boolQuery.Add(query, Occur.MUST);
drillDownQuery.Add("Country", boolQuery);
}
}

var result = luceneIndexService.UseSearcherWithFacets(
index,
query, 20,
(searcher, facets) =>
{
var topDocs = searcher.Search(drillDownQuery ?? query, MAX_RESULTS,
new Sort(new SortField(
nameof(DancingGoatSearchModel.PublishedDateTicks),
FieldCache.NUMERIC_UTILS_INT64_PARSER,
true)));
return new LuceneSearchResultModel<CafeSearchModel>
{
Query = searchText ?? "",
Page = page,
PageSize = pageSize,
TotalPages = topDocs.TotalHits <= 0 ? 0 : ((topDocs.TotalHits - 1) / pageSize) + 1,
TotalHits = topDocs.TotalHits,
Hits = topDocs.ScoreDocs
.Skip(offset)
.Take(limit)
.Select(d => MapToResultItem(searcher.Doc(d.Doc)))
.ToList(),
Facet = facet,
Facets = facets?.GetTopChildren(10, "Country", facet?.Split(';').Skip(1).ToArray() ?? Array.Empty<string>())?.LabelValues
};
}
);

return result;
}

private static Query GetTermQuery(QueryBuilder queryBuilder, string searchText)
{
var titlePhrase = queryBuilder.CreatePhraseQuery(nameof(CafeSearchModel.Title), searchText, PHRASE_SLOP);
titlePhrase.Boost = 5;

return new BooleanQuery
{
{ titlePhrase, Occur.SHOULD },
};
}

private CafeSearchModel MapToResultItem(Document doc) => new()
{
Title = doc.Get(nameof(CafeSearchModel.Title)),
Url = doc.Get(nameof(CafeSearchModel.Url)),
CafeCity = doc.Get(nameof(CafeSearchModel.CafeCity)),
CafeCountry = doc.Get(nameof(CafeSearchModel.CafeCountry)),
CafeZipCode = doc.Get(nameof(CafeSearchModel.CafeZipCode)),
};
}
Loading

0 comments on commit df6c62f

Please sign in to comment.