Skip to content

Commit

Permalink
Make OneMerge#reorder preserve blocks. (#13128)
Browse files Browse the repository at this point in the history
This updates `IndexWriter` to only call `OneMerge#reorder` when it has a chance
to preserve the block structure, ie. either there are no blocks or blocks are
identified by a parent field.

Furthermore, `MockRandomMergePolicy` is updated to preserve the block structure
when a parent field is configured.
  • Loading branch information
jpountz committed Mar 29, 2024
1 parent 65c286e commit 2e503b6
Show file tree
Hide file tree
Showing 3 changed files with 136 additions and 19 deletions.
24 changes: 22 additions & 2 deletions lucene/core/src/java/org/apache/lucene/index/IndexWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -3430,7 +3430,17 @@ public void addIndexesReaderMerge(MergePolicy.OneMerge merge) throws IOException
readers.add(reader);
}

if (config.getIndexSort() == null && readers.isEmpty() == false) {
// Don't reorder if an explicit sort is configured.
final boolean hasIndexSort = config.getIndexSort() != null;
// Don't reorder if blocks can't be identified using the parent field.
final boolean hasBlocksButNoParentField =
readers.stream().map(LeafReader::getMetaData).anyMatch(LeafMetaData::hasBlocks)
&& readers.stream()
.map(CodecReader::getFieldInfos)
.map(FieldInfos::getParentField)
.anyMatch(Objects::isNull);

if (hasIndexSort == false && hasBlocksButNoParentField == false && readers.isEmpty() == false) {
CodecReader mergedReader = SlowCompositeCodecReaderWrapper.wrap(readers);
DocMap docMap = merge.reorder(mergedReader, directory);
if (docMap != null) {
Expand Down Expand Up @@ -5208,7 +5218,17 @@ public int length() {
}

MergeState.DocMap[] reorderDocMaps = null;
if (config.getIndexSort() == null) {
// Don't reorder if an explicit sort is configured.
final boolean hasIndexSort = config.getIndexSort() != null;
// Don't reorder if blocks can't be identified using the parent field.
final boolean hasBlocksButNoParentField =
mergeReaders.stream().map(LeafReader::getMetaData).anyMatch(LeafMetaData::hasBlocks)
&& mergeReaders.stream()
.map(CodecReader::getFieldInfos)
.map(FieldInfos::getParentField)
.anyMatch(Objects::isNull);

if (hasIndexSort == false && hasBlocksButNoParentField == false) {
// Create a merged view of the input segments. This effectively does the merge.
CodecReader mergedView = SlowCompositeCodecReaderWrapper.wrap(mergeReaders);
Sorter.DocMap docMap = merge.reorder(mergedView, directory);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import java.util.Random;
import java.util.Set;
import org.apache.lucene.index.CodecReader;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.FilterLeafReader;
import org.apache.lucene.index.MergePolicy;
import org.apache.lucene.index.MergeTrigger;
Expand All @@ -34,6 +35,7 @@
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.tests.util.TestUtil;
import org.apache.lucene.util.BitSet;

/** MergePolicy that makes random decisions for testing. */
public class MockRandomMergePolicy extends MergePolicy {
Expand Down Expand Up @@ -245,26 +247,50 @@ public Sorter.DocMap reorder(CodecReader reader, Directory dir) throws IOExcepti
System.out.println("NOTE: MockRandomMergePolicy now reverses reader=" + reader);
}
// Reverse the doc ID order
final int maxDoc = reader.maxDoc();
return new Sorter.DocMap() {
return reverse(reader);
}
return null;
}
}

@Override
public int size() {
return maxDoc;
}
static Sorter.DocMap reverse(CodecReader reader) throws IOException {
final int maxDoc = reader.maxDoc();
final BitSet parents;
if (reader.getFieldInfos().getParentField() == null) {
parents = null;
} else {
parents =
BitSet.of(DocValues.getNumeric(reader, reader.getFieldInfos().getParentField()), maxDoc);
}
return new Sorter.DocMap() {

@Override
public int size() {
return maxDoc;
}

@Override
public int oldToNew(int docID) {
return maxDoc - 1 - docID;
}
@Override
public int oldToNew(int docID) {
if (parents == null) {
return maxDoc - 1 - docID;
} else {
final int oldBlockStart = docID == 0 ? 0 : parents.prevSetBit(docID - 1) + 1;
final int oldBlockEnd = parents.nextSetBit(docID);
final int newBlockEnd = maxDoc - 1 - oldBlockStart;
return newBlockEnd - (oldBlockEnd - docID);
}
}

@Override
public int newToOld(int docID) {
return maxDoc - 1 - docID;
}
};
@Override
public int newToOld(int docID) {
if (parents == null) {
return maxDoc - 1 - docID;
} else {
final int oldBlockEnd = parents.nextSetBit(maxDoc - 1 - docID);
final int newBlockEnd = oldToNew(oldBlockEnd);
return oldBlockEnd - (newBlockEnd - docID);
}
}
return null;
}
};
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.tests.index;

import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.CodecReader;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Sorter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.util.LuceneTestCase;

public class TestMockRandomMergePolicy extends LuceneTestCase {

public void testReverseWithParents() throws Exception {
Directory dir = newDirectory();
IndexWriter w = new IndexWriter(dir, new IndexWriterConfig().setParentField("_parent"));
List<Document> docs = new ArrayList<>();
for (int i = 0; i < 5; ++i) {
docs.add(new Document());
}
w.addDocuments(docs.subList(0, 2));
w.addDocuments(docs.subList(0, 4));
w.addDocuments(docs.subList(0, 3));
w.forceMerge(1);
w.close();
DirectoryReader reader = DirectoryReader.open(dir);
CodecReader codecReader = (CodecReader) getOnlyLeafReader(reader);
Sorter.DocMap docMap = MockRandomMergePolicy.reverse(codecReader);

assertEquals(7, docMap.oldToNew(0));
assertEquals(8, docMap.oldToNew(1));
assertEquals(3, docMap.oldToNew(2));
assertEquals(4, docMap.oldToNew(3));
assertEquals(5, docMap.oldToNew(4));
assertEquals(6, docMap.oldToNew(5));
assertEquals(0, docMap.oldToNew(6));
assertEquals(1, docMap.oldToNew(7));
assertEquals(2, docMap.oldToNew(8));

assertEquals(6, docMap.newToOld(0));
assertEquals(7, docMap.newToOld(1));
assertEquals(8, docMap.newToOld(2));
assertEquals(2, docMap.newToOld(3));
assertEquals(3, docMap.newToOld(4));
assertEquals(4, docMap.newToOld(5));
assertEquals(5, docMap.newToOld(6));
assertEquals(0, docMap.newToOld(7));
assertEquals(1, docMap.newToOld(8));

reader.close();
dir.close();
}
}

0 comments on commit 2e503b6

Please sign in to comment.