Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce TestLucene90DocValuesFormatVariableSkipInterval for testing docvalues skipper index #13550

Merged
merged 7 commits into from
Jul 9, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
import static org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT;
import static org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat.NUMERIC_BLOCK_SHIFT;
import static org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat.NUMERIC_BLOCK_SIZE;
import static org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat.SKIP_INDEX_INTERVAL_SIZE;

import java.io.IOException;
import java.util.Arrays;
Expand Down Expand Up @@ -63,10 +62,12 @@ final class Lucene90DocValuesConsumer extends DocValuesConsumer {
IndexOutput data, meta;
final int maxDoc;
private byte[] termsDictBuffer;
private final int skipIndexIntervalSize;

/** expert: Creates a new writer */
public Lucene90DocValuesConsumer(
SegmentWriteState state,
int skipIndexIntervalSize,
String dataCodec,
String dataExtension,
String metaCodec,
Expand Down Expand Up @@ -96,6 +97,7 @@ public Lucene90DocValuesConsumer(
state.segmentInfo.getId(),
state.segmentSuffix);
maxDoc = state.segmentInfo.maxDoc();
this.skipIndexIntervalSize = skipIndexIntervalSize;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we make any assumption anywhere that this needs to be a power of two? If so we should validate it?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No , there is no assumption. On the other hand I added in the codec a check so skipIndexIntervalSize must be 1. In addtion I replace 1 << x by an integer to remove the suggestion that this is required.

success = true;
} finally {
if (!success) {
Expand Down Expand Up @@ -239,7 +241,7 @@ private void writeSkipIndex(FieldInfo field, DocValuesProducer valuesProducer)
for (int i = 0, end = values.docValueCount(); i < end; ++i) {
accumulator.accumulate(values.nextValue());
}
if (++counter == SKIP_INDEX_INTERVAL_SIZE) {
if (++counter == skipIndexIntervalSize) {
globalMaxValue = Math.max(globalMaxValue, accumulator.maxValue);
globalMinValue = Math.min(globalMinValue, accumulator.minValue);
globalDocCount += accumulator.docCount;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -138,15 +138,23 @@
*/
public final class Lucene90DocValuesFormat extends DocValuesFormat {

private final int skipIndexIntervalSize;

/** Default constructor. */
public Lucene90DocValuesFormat() {
this(DEFAULT_SKIP_INDEX_INTERVAL_SIZE);
}

/** Default constructor. */
iverase marked this conversation as resolved.
Show resolved Hide resolved
public Lucene90DocValuesFormat(int skipIndexIntervalSize) {
super("Lucene90");
this.skipIndexIntervalSize = skipIndexIntervalSize;
}

@Override
public DocValuesConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
return new Lucene90DocValuesConsumer(
state, DATA_CODEC, DATA_EXTENSION, META_CODEC, META_EXTENSION);
state, skipIndexIntervalSize, DATA_CODEC, DATA_EXTENSION, META_CODEC, META_EXTENSION);
}

@Override
Expand Down Expand Up @@ -182,6 +190,7 @@ public DocValuesProducer fieldsProducer(SegmentReadState state) throws IOExcepti
static final int TERMS_DICT_REVERSE_INDEX_SIZE = 1 << TERMS_DICT_REVERSE_INDEX_SHIFT;
static final int TERMS_DICT_REVERSE_INDEX_MASK = TERMS_DICT_REVERSE_INDEX_SIZE - 1;

static final int SKIP_INDEX_INTERVAL_SHIFT = 12;
static final int SKIP_INDEX_INTERVAL_SIZE = 1 << SKIP_INDEX_INTERVAL_SHIFT;
private static final int DEFAULT_SKIP_INDEX_INTERVAL_SHIFT = 12;
private static final int DEFAULT_SKIP_INDEX_INTERVAL_SIZE =
1 << DEFAULT_SKIP_INDEX_INTERVAL_SHIFT;
}
4 changes: 3 additions & 1 deletion lucene/test-framework/src/java/module-info.java
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
exports org.apache.lucene.tests.codecs.cranky;
exports org.apache.lucene.tests.codecs.mockrandom;
exports org.apache.lucene.tests.codecs.ramonly;
exports org.apache.lucene.tests.codecs.skipper;
exports org.apache.lucene.tests.codecs.uniformsplit.sharedterms;
exports org.apache.lucene.tests.codecs.uniformsplit;
exports org.apache.lucene.tests.codecs.vector;
Expand All @@ -61,7 +62,8 @@
org.apache.lucene.tests.codecs.compressing.HighCompressionCompressingCodec,
org.apache.lucene.tests.codecs.compressing.LZ4WithPresetCompressingCodec,
org.apache.lucene.tests.codecs.compressing.dummy.DummyCompressingCodec,
org.apache.lucene.tests.codecs.vector.ConfigurableMCodec;
org.apache.lucene.tests.codecs.vector.ConfigurableMCodec,
org.apache.lucene.tests.codecs.skipper.SkipperCodec;
provides org.apache.lucene.codecs.DocValuesFormat with
org.apache.lucene.tests.codecs.asserting.AssertingDocValuesFormat;
provides org.apache.lucene.codecs.KnnVectorsFormat with
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.tests.codecs.skipper;

import java.util.Random;
import org.apache.lucene.codecs.DocValuesFormat;
import org.apache.lucene.codecs.FilterCodec;
import org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat;
import org.apache.lucene.tests.util.TestUtil;

/**
* A codec that uses {@link Lucene90DocValuesFormat} with a random skip index size for its doc
* values and delegates to the default codec for everything else.
*/
public final class SkipperCodec extends FilterCodec {
iverase marked this conversation as resolved.
Show resolved Hide resolved

/** Create a random instance. */
public static SkipperCodec randomInstance(Random random) {
return switch (random.nextInt(10)) {
case 0 -> new SkipperCodec();
default -> new SkipperCodec(random);
};
}

private final DocValuesFormat docValuesFormat;
private final int skipIndexIntervalSize;

/** no arg constructor */
public SkipperCodec() {
super("SkipperCodec", TestUtil.getDefaultCodec());
this.docValuesFormat = new Lucene90DocValuesFormat();
skipIndexIntervalSize = -1; // default
}

/** Creates a DocValuesFormat with random skipIndexIntervalSize */
public SkipperCodec(Random random) {
super("SkipperCodec", TestUtil.getDefaultCodec());
this.skipIndexIntervalSize = random.nextInt(1 << 3, 1 << 10);
this.docValuesFormat = new Lucene90DocValuesFormat(skipIndexIntervalSize);
}

@Override
public DocValuesFormat docValuesFormat() {
return docValuesFormat;
}

@Override
public String toString() {
return getName() + "(skipIndexIntervalSize= " + skipIndexIntervalSize + ")";
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/**
* Support for testing {@link org.apache.lucene.codecs.lucene90.Lucene90DocValuesFormat} skipper
* index.
*/
package org.apache.lucene.tests.codecs.skipper;
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
import org.apache.lucene.index.TermsEnum.SeekStatus;
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.analysis.MockAnalyzer;
import org.apache.lucene.tests.codecs.skipper.SkipperCodec;
import org.apache.lucene.tests.util.TestUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
Expand Down Expand Up @@ -157,6 +158,7 @@ public void testNumberMergeAwayAllValuesWithSkipper() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer);
iwconfig.setMergePolicy(newLogMergePolicy());
iwconfig.setCodec(SkipperCodec.randomInstance(random()));
iverase marked this conversation as resolved.
Show resolved Hide resolved
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);

Document doc = new Document();
Expand Down Expand Up @@ -190,6 +192,7 @@ public void testSortedNumberMergeAwayAllValuesWithSkipper() throws IOException {
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer);
iwconfig.setMergePolicy(newLogMergePolicy());
iwconfig.setCodec(SkipperCodec.randomInstance(random()));
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);

Document doc = new Document();
Expand Down Expand Up @@ -224,6 +227,7 @@ public void testSortedMergeAwayAllValuesLargeSegmentWithSkipper() throws IOExcep
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer);
iwconfig.setMergePolicy(newLogMergePolicy());
iwconfig.setCodec(SkipperCodec.randomInstance(random()));
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);

Document doc = new Document();
Expand Down Expand Up @@ -264,6 +268,7 @@ public void testSortedSetMergeAwayAllValuesLargeSegmentWithSkipper() throws IOEx
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer);
iwconfig.setMergePolicy(newLogMergePolicy());
iwconfig.setCodec(SkipperCodec.randomInstance(random()));
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);

Document doc = new Document();
Expand Down Expand Up @@ -304,6 +309,7 @@ public void testNumericMergeAwayAllValuesLargeSegmentWithSkipper() throws IOExce
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer);
iwconfig.setMergePolicy(newLogMergePolicy());
iwconfig.setCodec(SkipperCodec.randomInstance(random()));
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);

Document doc = new Document();
Expand Down Expand Up @@ -340,6 +346,7 @@ public void testSortedNumericMergeAwayAllValuesLargeSegmentWithSkipper() throws
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwconfig = newIndexWriterConfig(analyzer);
iwconfig.setMergePolicy(newLogMergePolicy());
iwconfig.setCodec(SkipperCodec.randomInstance(random()));
RandomIndexWriter iwriter = new RandomIndexWriter(random(), directory, iwconfig);

Document doc = new Document();
Expand Down Expand Up @@ -670,7 +677,9 @@ private void assertDocValuesWithSkipper(int totalDocs, TestDocValueSkipper testD
default -> throw new AssertionError();
}
Directory directory = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), directory);
IndexWriterConfig iwconfig = newIndexWriterConfig();
iwconfig.setCodec(SkipperCodec.randomInstance(random()));
RandomIndexWriter writer = new RandomIndexWriter(random(), directory, iwconfig);
int numDocs = 0;
for (int i = 0; i < totalDocs; i++) {
Document doc = new Document();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,4 @@ org.apache.lucene.tests.codecs.compressing.HighCompressionCompressingCodec
org.apache.lucene.tests.codecs.compressing.LZ4WithPresetCompressingCodec
org.apache.lucene.tests.codecs.compressing.dummy.DummyCompressingCodec
org.apache.lucene.tests.codecs.vector.ConfigurableMCodec
org.apache.lucene.tests.codecs.skipper.SkipperCodec