From 52bad48399df2971af03278f33ac550f760cc55d Mon Sep 17 00:00:00 2001 From: Egor Potemkin Date: Mon, 10 Feb 2025 23:00:51 +0000 Subject: [PATCH] Addressed PR comments --- README.md | 12 ++++++++++++ src/main/perf/SearchTask.java | 15 +++++++++++++-- src/main/perf/TestContext.java | 17 +++++++++++++++++ 3 files changed, 42 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 50ddb42b5..f75d04cdc 100644 --- a/README.md +++ b/README.md @@ -179,3 +179,15 @@ To test vector search with [Cohere/wikipedia-22-12-en-embeddings](https://huggin in the format that luceneutil vector search can understand. Instead of `10000000` increase the number of documents to `100000000` if you want to run vector search on 10M documents. 2. In `src/python/knnPerTest.py` uncomment lines that define doc and query vectors for cohere dataset. 3. run `src/python/knnPerTest.py` + +# Running the facets benchmark + +## Compare facet implementations + +There are currently two facets implementations - one that first collects document IDs and then computes facets in a separate phase, and a new implementation that computes facets during collection. + +To compare performance for the two implementations run + +``` +python src/python/localrunFacets.py -source facetsWikimediumAll +``` \ No newline at end of file diff --git a/src/main/perf/SearchTask.java b/src/main/perf/SearchTask.java index 68beef15f..741a65d01 100644 --- a/src/main/perf/SearchTask.java +++ b/src/main/perf/SearchTask.java @@ -19,7 +19,12 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Document; -import org.apache.lucene.facet.*; +import org.apache.lucene.facet.FacetResult; +import org.apache.lucene.facet.Facets; +import org.apache.lucene.facet.FacetsCollector; +import org.apache.lucene.facet.FacetsCollectorManager; +import org.apache.lucene.facet.FacetsConfig; +import org.apache.lucene.facet.LabelAndValue; import org.apache.lucene.facet.range.LongRange; import org.apache.lucene.facet.range.LongRangeFacetCounts; import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetCounts; @@ -73,7 +78,13 @@ import java.io.IOException; import java.io.PrintStream; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; final class SearchTask extends Task { private final String category; diff --git a/src/main/perf/TestContext.java b/src/main/perf/TestContext.java index 8c21ba1e1..92933afb8 100644 --- a/src/main/perf/TestContext.java +++ b/src/main/perf/TestContext.java @@ -1,5 +1,22 @@ package perf; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + import java.util.Objects; /**