Skip to content

Commit

Permalink
Add BytesRefIterator to TermInSetQuery (#13806)
Browse files Browse the repository at this point in the history
TermInSetQuery used to have an accessor to its terms that was removed in #12173
to protect leaking internal encoding details. This introduces an accessor to the
term data in the query that doesn't expose internals but merely allows iterating
over the decoded BytesRef, making inspection of the querys content possible again.

Closes #13804
  • Loading branch information
cbuescher authored Sep 19, 2024
1 parent 6d987e1 commit e4ac577
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 7 deletions.
2 changes: 2 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,8 @@ Build
API Changes
---------------------

* GITHUB#13806: Add TermInSetQuery#getBytesRefIterator to be able to iterate over query terms. (Christoph Büscher)

* GITHUB#13469: Expose FlatVectorsFormat as a first-class format; can be configured using a custom Codec. (Michael Sokolov)

* GITHUB#13612: Hunspell: add Suggester#proceedPastRep to avoid losing relevant suggestions. (Peter Gromov)
Expand Down
18 changes: 11 additions & 7 deletions lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,7 @@
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.BytesRefComparator;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.StringSorter;
import org.apache.lucene.util.*;
import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.ByteRunAutomaton;
Expand Down Expand Up @@ -141,6 +135,16 @@ public long getTermsCount() {
return termData.size();
}

/**
* Get an iterator over the encoded terms for query inspection.
*
* @lucene.experimental
*/
public BytesRefIterator getBytesRefIterator() {
final TermIterator iterator = this.termData.iterator();
return () -> iterator.next();
}

@Override
public void visit(QueryVisitor visitor) {
if (visitor.acceptField(field) == false) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
import org.apache.lucene.tests.util.RamUsageTester;
import org.apache.lucene.tests.util.TestUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.automaton.ByteRunAutomaton;

Expand Down Expand Up @@ -527,4 +528,19 @@ public void consumeTermsMatching(
}
});
}

public void testTermsIterator() throws IOException {
TermInSetQuery empty = new TermInSetQuery("field", Collections.emptyList());
BytesRefIterator it = empty.getBytesRefIterator();
assertNull(it.next());

TermInSetQuery query =
new TermInSetQuery(
"field", List.of(newBytesRef("term1"), newBytesRef("term2"), newBytesRef("term3")));
it = query.getBytesRefIterator();
assertEquals(newBytesRef("term1"), it.next());
assertEquals(newBytesRef("term2"), it.next());
assertEquals(newBytesRef("term3"), it.next());
assertNull(it.next());
}
}

0 comments on commit e4ac577

Please sign in to comment.