Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Complete keyword changes for star tree #16233

Merged
merged 17 commits into from
Nov 12, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ public class StarTreeMapperIT extends OpenSearchIntegTestCase {
.put(IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), new ByteSizeValue(512, ByteSizeUnit.MB))
.build();

private static XContentBuilder createMinimalTestMapping(boolean invalidDim, boolean invalidMetric, boolean keywordDim) {
private static XContentBuilder createMinimalTestMapping(boolean invalidDim, boolean invalidMetric, boolean ipdim) {
try {
return jsonBuilder().startObject()
.startObject("composite")
Expand All @@ -68,12 +68,15 @@ private static XContentBuilder createMinimalTestMapping(boolean invalidDim, bool
.endObject()
.startArray("ordered_dimensions")
.startObject()
.field("name", getDim(invalidDim, keywordDim))
.field("name", getDim(invalidDim, ipdim))
.endObject()
.startObject()
.field("name", "keyword_dv")
.endObject()
.endArray()
.startArray("metrics")
.startObject()
.field("name", getDim(invalidMetric, false))
.field("name", getMetric(invalidMetric, false))
.endObject()
.endArray()
.endObject()
Expand All @@ -99,6 +102,10 @@ private static XContentBuilder createMinimalTestMapping(boolean invalidDim, bool
.field("type", "keyword")
.field("doc_values", false)
.endObject()
.startObject("ip")
.field("type", "ip")
.field("doc_values", false)
.endObject()
.endObject()
.endObject();
} catch (IOException e) {
Expand Down Expand Up @@ -356,10 +363,19 @@ private XContentBuilder getMappingWithDuplicateFields(boolean isDuplicateDim, bo
}

private static String getDim(boolean hasDocValues, boolean isKeyword) {
if (hasDocValues) {
return random().nextBoolean() ? "numeric" : "keyword";
} else if (isKeyword) {
return "ip";
}
return "numeric_dv";
}

private static String getMetric(boolean hasDocValues, boolean isKeyword) {
if (hasDocValues) {
return "numeric";
} else if (isKeyword) {
return "keyword";
return "ip";
}
return "numeric_dv";
}
Expand Down Expand Up @@ -398,6 +414,7 @@ public void testValidCompositeIndex() {
assertEquals(expectedTimeUnits.get(i).shortName(), dateDim.getSortedCalendarIntervals().get(i).shortName());
}
assertEquals("numeric_dv", starTreeFieldType.getDimensions().get(1).getField());
assertEquals("keyword_dv", starTreeFieldType.getDimensions().get(2).getField());
assertEquals("numeric_dv", starTreeFieldType.getMetrics().get(0).getField());
List<MetricStat> expectedMetrics = Arrays.asList(MetricStat.VALUE_COUNT, MetricStat.SUM, MetricStat.AVG);
assertEquals(expectedMetrics, starTreeFieldType.getMetrics().get(0).getMetrics());
Expand Down Expand Up @@ -665,10 +682,7 @@ public void testInvalidDimCompositeIndex() {
IllegalArgumentException.class,
() -> prepareCreate(TEST_INDEX).setSettings(settings).setMapping(createMinimalTestMapping(true, false, false)).get()
);
assertEquals(
"Aggregations not supported for the dimension field [numeric] with field type [integer] as part of star tree field",
ex.getMessage()
);
assertTrue(ex.getMessage().startsWith("Aggregations not supported for the dimension field "));
}

public void testMaxDimsCompositeIndex() {
Expand Down Expand Up @@ -734,7 +748,7 @@ public void testUnsupportedDim() {
() -> prepareCreate(TEST_INDEX).setSettings(settings).setMapping(createMinimalTestMapping(false, false, true)).get()
);
assertEquals(
"Failed to parse mapping [_doc]: unsupported field type associated with dimension [keyword] as part of star tree field [startree-1]",
"Failed to parse mapping [_doc]: unsupported field type associated with dimension [ip] as part of star tree field [startree-1]",
ex.getMessage()
);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.apache.lucene.index;

import org.apache.lucene.search.DocIdSetIterator;

/**
* Base wrapper class for DocValuesWriter.
*/
public abstract class DocValuesWriterWrapper<T extends DocIdSetIterator> {
public abstract T getDocValues();
bharath-techie marked this conversation as resolved.
Show resolved Hide resolved
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
*
* @opensearch.experimental
*/
public class SortedNumericDocValuesWriterWrapper {
public class SortedNumericDocValuesWriterWrapper extends DocValuesWriterWrapper<SortedNumericDocValues> {

private final SortedNumericDocValuesWriter sortedNumericDocValuesWriter;

Expand Down Expand Up @@ -47,6 +47,7 @@ public void addValue(int docID, long value) {
*
* @return the {@link SortedNumericDocValues} instance
*/
@Override
public SortedNumericDocValues getDocValues() {
return sortedNumericDocValuesWriter.getDocValues();
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.apache.lucene.index;

import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Counter;

/**
* A wrapper class for writing sorted set doc values.
* <p>
* This class provides a convenient way to add sorted set doc values to a field
* and retrieve the corresponding {@link SortedSetDocValues} instance.
*
* @opensearch.experimental
*/
public class SortedSetDocValuesWriterWrapper extends DocValuesWriterWrapper<SortedSetDocValues> {

private final SortedSetDocValuesWriter sortedSetDocValuesWriterWrapper;
bharath-techie marked this conversation as resolved.
Show resolved Hide resolved

/**
* Sole constructor. Constructs a new {@link SortedSetDocValuesWriterWrapper} instance.
*
* @param fieldInfo the field information for the field being written
* @param counter a counter for tracking memory usage
* @param byteBlockPool a byte block pool for allocating byte blocks
* @see SortedSetDocValuesWriter
*/
public SortedSetDocValuesWriterWrapper(FieldInfo fieldInfo, Counter counter, ByteBlockPool byteBlockPool) {
sortedSetDocValuesWriterWrapper = new SortedSetDocValuesWriter(fieldInfo, counter, byteBlockPool);
}

/**
* Adds a bytes ref value to the sorted set doc values for the specified document.
*
* @param docID the document ID
* @param value the value to add
*/
public void addValue(int docID, BytesRef value) {
bharath-techie marked this conversation as resolved.
Show resolved Hide resolved
sortedSetDocValuesWriterWrapper.addValue(docID, value);
}

/**
* Returns the {@link SortedSetDocValues} instance containing the sorted numeric doc values
*
* @return the {@link SortedSetDocValues} instance
*/
@Override
public SortedSetDocValues getDocValues() {
return sortedSetDocValuesWriterWrapper.getDocValues();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
Expand All @@ -40,6 +41,7 @@

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -111,7 +113,7 @@ public Composite912DocValuesReader(DocValuesProducer producer, SegmentReadState
readState.segmentInfo.getId(),
readState.segmentSuffix
);

Map<String, DocValuesType> dimensionFieldTypeMap = new HashMap<>();
while (true) {

// validate magic marker
Expand Down Expand Up @@ -155,13 +157,16 @@ public Composite912DocValuesReader(DocValuesProducer producer, SegmentReadState
compositeIndexInputMap.put(compositeFieldName, starTreeIndexInput);
compositeIndexMetadataMap.put(compositeFieldName, starTreeMetadata);

List<String> dimensionFields = starTreeMetadata.getDimensionFields();

Map<String, DocValuesType> dimensionFieldToDocValuesMap = starTreeMetadata.getDimensionFields();
// generating star tree unique fields (fully qualified name for dimension and metrics)
for (String dimensions : dimensionFields) {
fields.add(fullyQualifiedFieldNameForStarTreeDimensionsDocValues(compositeFieldName, dimensions));
for (Map.Entry<String, DocValuesType> dimensionEntry : dimensionFieldToDocValuesMap.entrySet()) {
String dimName = fullyQualifiedFieldNameForStarTreeDimensionsDocValues(
compositeFieldName,
dimensionEntry.getKey()
);
fields.add(dimName);
dimensionFieldTypeMap.put(dimName, dimensionEntry.getValue());
}

// adding metric fields
for (Metric metric : starTreeMetadata.getMetrics()) {
for (MetricStat metricStat : metric.getBaseMetrics()) {
Expand All @@ -184,7 +189,7 @@ public Composite912DocValuesReader(DocValuesProducer producer, SegmentReadState

// populates the dummy list of field infos to fetch doc id set iterators for respective fields.
// the dummy field info is used to fetch the doc id set iterators for respective fields based on field name
FieldInfos fieldInfos = new FieldInfos(getFieldInfoList(fields));
FieldInfos fieldInfos = new FieldInfos(getFieldInfoList(fields, dimensionFieldTypeMap));
this.readState = new SegmentReadState(readState.directory, readState.segmentInfo, fieldInfos, readState.context);

// initialize star-tree doc values producer
Expand Down Expand Up @@ -298,4 +303,8 @@ public static SortedNumericDocValues getSortedNumericDocValues(SortedNumericDocV
return sortedNumeric == null ? DocValues.emptySortedNumeric() : sortedNumeric;
}

public static SortedSetDocValues getSortedSetDocValues(SortedSetDocValues sortedSetDv) {
return sortedSetDv == null ? DocValues.emptySortedSet() : sortedSetDv;
}
bharath-techie marked this conversation as resolved.
Show resolved Hide resolved

}
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.IndexOutput;
import org.opensearch.common.annotation.ExperimentalApi;
Expand All @@ -34,6 +35,7 @@
import org.opensearch.index.compositeindex.datacube.startree.index.StarTreeValues;
import org.opensearch.index.mapper.CompositeMappedFieldType;
import org.opensearch.index.mapper.DocCountFieldMapper;
import org.opensearch.index.mapper.KeywordFieldMapper;
import org.opensearch.index.mapper.MapperService;
import org.opensearch.index.mapper.StarTreeMapper;

Expand Down Expand Up @@ -71,6 +73,7 @@
private final AtomicInteger fieldNumberAcrossCompositeFields;

private final Map<String, DocValuesProducer> fieldProducerMap = new HashMap<>();
private final Map<String, SortedSetDocValues> fieldDocIdSetIteratorMap = new HashMap<>();

public Composite912DocValuesWriter(DocValuesConsumer delegate, SegmentWriteState segmentWriteState, MapperService mapperService)
throws IOException {
Expand All @@ -82,14 +85,7 @@
this.compositeMappedFieldTypes = mapperService.getCompositeFieldTypes();
compositeFieldSet = new HashSet<>();
segmentFieldSet = new HashSet<>();
// TODO : add integ test for this
for (FieldInfo fi : this.state.fieldInfos) {
if (DocValuesType.SORTED_NUMERIC.equals(fi.getDocValuesType())) {
segmentFieldSet.add(fi.name);
} else if (fi.name.equals(DocCountFieldMapper.NAME)) {
segmentFieldSet.add(fi.name);
}
}
addStarTreeSupportedFieldsFromSegment();
for (CompositeMappedFieldType type : compositeMappedFieldTypes) {
compositeFieldSet.addAll(type.fields());
}
Expand Down Expand Up @@ -148,6 +144,19 @@
segmentHasCompositeFields = Collections.disjoint(segmentFieldSet, compositeFieldSet) == false;
}

private void addStarTreeSupportedFieldsFromSegment() {
// TODO : add integ test for this
for (FieldInfo fi : this.state.fieldInfos) {
bharath-techie marked this conversation as resolved.
Show resolved Hide resolved
if (DocValuesType.SORTED_NUMERIC.equals(fi.getDocValuesType())) {
segmentFieldSet.add(fi.name);
} else if (DocValuesType.SORTED_SET.equals(fi.getDocValuesType())) {
segmentFieldSet.add(fi.name);
} else if (fi.name.equals(DocCountFieldMapper.NAME)) {
segmentFieldSet.add(fi.name);

Check warning on line 155 in server/src/main/java/org/opensearch/index/codec/composite/composite912/Composite912DocValuesWriter.java

View check run for this annotation

Codecov / codecov/patch

server/src/main/java/org/opensearch/index/codec/composite/composite912/Composite912DocValuesWriter.java#L155

Added line #L155 was not covered by tests
}
}
}

@Override
public void addNumericField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
delegate.addNumericField(field, valuesProducer);
Expand Down Expand Up @@ -179,6 +188,15 @@
@Override
public void addSortedSetField(FieldInfo field, DocValuesProducer valuesProducer) throws IOException {
delegate.addSortedSetField(field, valuesProducer);
// Perform this only during flush flow
if (mergeState.get() == null && segmentHasCompositeFields) {
createCompositeIndicesIfPossible(valuesProducer, field);
}
if (mergeState.get() != null) {
if (compositeFieldSet.contains(field.name)) {
fieldDocIdSetIteratorMap.put(field.name, valuesProducer.getSortedSet(field));
}
}
}

@Override
Expand Down Expand Up @@ -235,6 +253,7 @@
* Add empty doc values for fields not present in segment
*/
private void addDocValuesForEmptyField(String compositeField) {
// special case for doc count
if (compositeField.equals(DocCountFieldMapper.NAME)) {
fieldProducerMap.put(compositeField, new EmptyDocValuesProducer() {
@Override
Expand All @@ -243,16 +262,29 @@
}
});
} else {
fieldProducerMap.put(compositeField, new EmptyDocValuesProducer() {
@Override
public SortedNumericDocValues getSortedNumeric(FieldInfo field) {
return DocValues.emptySortedNumeric();
}
});
if (isSortedSetField(compositeField)) {
fieldProducerMap.put(compositeField, new EmptyDocValuesProducer() {
@Override
public SortedSetDocValues getSortedSet(FieldInfo field) {
return DocValues.emptySortedSet();
}
});
} else {
bharath-techie marked this conversation as resolved.
Show resolved Hide resolved
fieldProducerMap.put(compositeField, new EmptyDocValuesProducer() {
@Override
public SortedNumericDocValues getSortedNumeric(FieldInfo field) {
return DocValues.emptySortedNumeric();
}
});
}
}
compositeFieldSet.remove(compositeField);
}

private boolean isSortedSetField(String field) {
return mapperService.fieldType(field) instanceof KeywordFieldMapper.KeywordFieldType;
}

@Override
public void merge(MergeState mergeState) throws IOException {
this.mergeState.compareAndSet(null, mergeState);
Expand Down Expand Up @@ -313,7 +345,14 @@
}
}
try (StarTreesBuilder starTreesBuilder = new StarTreesBuilder(state, mapperService, fieldNumberAcrossCompositeFields)) {
starTreesBuilder.buildDuringMerge(metaOut, dataOut, starTreeSubsPerField, compositeDocValuesConsumer);
starTreesBuilder.buildDuringMerge(
metaOut,
dataOut,
starTreeSubsPerField,
compositeDocValuesConsumer,
mergeState,
fieldDocIdSetIteratorMap
);
}
}

Expand Down
Loading
Loading