Skip to content

Commit

Permalink
Implement Weight#count for vector values in the FieldExistsQuery (#13322
Browse files Browse the repository at this point in the history
)

* implement Weight#count for vector values

* add change log

* apply review comment

* apply review comment

* changelog

* remove null check
  • Loading branch information
bugmakerrrrrr authored and benwtrent committed Jun 5, 2024
1 parent 2ed1f2f commit b19ac01
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 16 deletions.
2 changes: 2 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ Optimizations
* GITHUB##13425: Rewrite SortedNumericDocValuesRangeQuery to MatchNoDocsQuery when the upper bound is smaller than the
lower bound. (Ioana Tagirta)

* GITHUB#13322: Implement Weight#count for vector values in the FieldExistsQuery. (Pan Guixin)

Bug Fixes
---------------------
(No changes)
Expand Down
40 changes: 24 additions & 16 deletions lucene/core/src/java/org/apache/lucene/search/FieldExistsQuery.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,12 @@
import java.io.IOException;
import java.util.Objects;
import org.apache.lucene.document.KnnFloatVectorField;
import org.apache.lucene.index.ByteVectorValues;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.FloatVectorValues;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader;
Expand All @@ -35,7 +37,7 @@
* org.apache.lucene.document.KnnByteVectorField} or a field that indexes norms or doc values.
*/
public class FieldExistsQuery extends Query {
private String field;
private final String field;

/** Create a query that will match that have a value for the given {@code field}. */
public FieldExistsQuery(String field) {
Expand Down Expand Up @@ -128,20 +130,7 @@ public Query rewrite(IndexSearcher indexSearcher) throws IOException {
break;
}
} else if (fieldInfo.getVectorDimension() != 0) { // the field indexes vectors
final DocIdSetIterator vectorValues;
switch (fieldInfo.getVectorEncoding()) {
case FLOAT32:
vectorValues = leaf.getFloatVectorValues(field);
break;
case BYTE:
vectorValues = leaf.getByteVectorValues(field);
break;
default:
throw new IllegalArgumentException(
"unknown vector encoding=" + fieldInfo.getVectorEncoding());
}
assert vectorValues != null : "unexpected null vector values";
if (vectorValues != null && vectorValues.cost() != leaf.maxDoc()) {
if (getVectorValuesSize(fieldInfo, leaf) != leaf.maxDoc()) {
allReadersRewritable = false;
break;
}
Expand Down Expand Up @@ -253,7 +242,10 @@ public int count(LeafReaderContext context) throws IOException {
}

return super.count(context);
} else if (fieldInfo.getVectorDimension() != 0) { // the field indexes vectors
} else if (fieldInfo.hasVectorValues()) { // the field indexes vectors
if (reader.hasDeletions() == false) {
return getVectorValuesSize(fieldInfo, reader);
}
return super.count(context);
} else if (fieldInfo.getDocValuesType()
!= DocValuesType.NONE) { // the field indexes doc values
Expand Down Expand Up @@ -300,4 +292,20 @@ private String buildErrorMsg(FieldInfo fieldInfo) {
+ fieldInfo.name
+ "' exists and indexes neither of these data structures";
}

private int getVectorValuesSize(FieldInfo fi, LeafReader reader) throws IOException {
assert fi.name.equals(field);
switch (fi.getVectorEncoding()) {
case FLOAT32:
FloatVectorValues floatVectorValues = reader.getFloatVectorValues(field);
assert floatVectorValues != null : "unexpected null float vector values";
return floatVectorValues.size();
case BYTE:
ByteVectorValues byteVectorValues = reader.getByteVectorValues(field);
assert byteVectorValues != null : "unexpected null byte vector values";
return byteVectorValues.size();
default:
throw new IllegalArgumentException("unknown vector encoding=" + fi.getVectorEncoding());
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,9 @@
import org.apache.lucene.tests.index.RandomIndexWriter;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.tests.util.TestUtil;
import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.VectorUtil;

Expand Down Expand Up @@ -649,6 +651,39 @@ public void testKnnVectorAllDocsHaveField() throws IOException {
}
}

public void testDeleteKnnVector() throws IOException {
try (Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir)) {
final int numDocs = atLeast(100);

boolean allDocsHaveVector = random().nextBoolean();
BitSet docWithVector = new FixedBitSet(numDocs);
for (int i = 0; i < numDocs; ++i) {
Document doc = new Document();
if (allDocsHaveVector || random().nextBoolean()) {
doc.add(new KnnFloatVectorField("vector", randomVector(5)));
docWithVector.set(i);
}
doc.add(new StringField("id", Integer.toString(i), Store.NO));
iw.addDocument(doc);
}
if (random().nextBoolean()) {
final int numDeleted = random().nextInt(numDocs) + 1;
for (int i = 0; i < numDeleted; ++i) {
iw.deleteDocuments(new Term("id", Integer.toString(i)));
docWithVector.clear(i);
}
}

try (IndexReader reader = iw.getReader()) {
final IndexSearcher searcher = newSearcher(reader);

final int count = searcher.count(new FieldExistsQuery("vector"));
assertEquals(docWithVector.cardinality(), count);
}
}
}

public void testKnnVectorConjunction() throws IOException {
try (Directory dir = newDirectory();
RandomIndexWriter iw = new RandomIndexWriter(random(), dir)) {
Expand Down

0 comments on commit b19ac01

Please sign in to comment.