Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Complete keyword changes for star tree #16233

Merged
merged 17 commits into from
Nov 12, 2024
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Increase segrep pressure checkpoint default limit to 30 ([#16577](https://github.com/opensearch-project/OpenSearch/pull/16577/files))
- Add dynamic setting allowing size > 0 requests to be cached in the request cache ([#16483](https://github.com/opensearch-project/OpenSearch/pull/16483))
- Make IndexStoreListener a pluggable interface ([#16583](https://github.com/opensearch-project/OpenSearch/pull/16583))
- Support for keyword fields in star-tree index ([#16233](https://github.com/opensearch-project/OpenSearch/pull/16233))

### Dependencies
- Bump `com.azure:azure-storage-common` from 12.25.1 to 12.27.1 ([#16521](https://github.com/opensearch-project/OpenSearch/pull/16521))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ public class StarTreeMapperIT extends OpenSearchIntegTestCase {
.put(IndexSettings.INDEX_TRANSLOG_FLUSH_THRESHOLD_SIZE_SETTING.getKey(), new ByteSizeValue(512, ByteSizeUnit.MB))
.build();

private static XContentBuilder createMinimalTestMapping(boolean invalidDim, boolean invalidMetric, boolean keywordDim) {
private static XContentBuilder createMinimalTestMapping(boolean invalidDim, boolean invalidMetric, boolean ipdim) {
try {
return jsonBuilder().startObject()
.startObject("composite")
Expand All @@ -68,12 +68,15 @@ private static XContentBuilder createMinimalTestMapping(boolean invalidDim, bool
.endObject()
.startArray("ordered_dimensions")
.startObject()
.field("name", getDim(invalidDim, keywordDim))
.field("name", getDim(invalidDim, ipdim))
.endObject()
.startObject()
.field("name", "keyword_dv")
.endObject()
.endArray()
.startArray("metrics")
.startObject()
.field("name", getDim(invalidMetric, false))
.field("name", getMetric(invalidMetric, false))
.endObject()
.endArray()
.endObject()
Expand All @@ -99,6 +102,10 @@ private static XContentBuilder createMinimalTestMapping(boolean invalidDim, bool
.field("type", "keyword")
.field("doc_values", false)
.endObject()
.startObject("ip")
.field("type", "ip")
.field("doc_values", false)
.endObject()
.endObject()
.endObject();
} catch (IOException e) {
Expand Down Expand Up @@ -356,10 +363,19 @@ private XContentBuilder getMappingWithDuplicateFields(boolean isDuplicateDim, bo
}

private static String getDim(boolean hasDocValues, boolean isKeyword) {
if (hasDocValues) {
return random().nextBoolean() ? "numeric" : "keyword";
} else if (isKeyword) {
return "ip";
}
return "numeric_dv";
}

private static String getMetric(boolean hasDocValues, boolean isKeyword) {
if (hasDocValues) {
return "numeric";
} else if (isKeyword) {
return "keyword";
return "ip";
}
return "numeric_dv";
}
Expand Down Expand Up @@ -398,6 +414,7 @@ public void testValidCompositeIndex() {
assertEquals(expectedTimeUnits.get(i).shortName(), dateDim.getSortedCalendarIntervals().get(i).shortName());
}
assertEquals("numeric_dv", starTreeFieldType.getDimensions().get(1).getField());
assertEquals("keyword_dv", starTreeFieldType.getDimensions().get(2).getField());
assertEquals("numeric_dv", starTreeFieldType.getMetrics().get(0).getField());
List<MetricStat> expectedMetrics = Arrays.asList(MetricStat.VALUE_COUNT, MetricStat.SUM, MetricStat.AVG);
assertEquals(expectedMetrics, starTreeFieldType.getMetrics().get(0).getMetrics());
Expand Down Expand Up @@ -665,10 +682,7 @@ public void testInvalidDimCompositeIndex() {
IllegalArgumentException.class,
() -> prepareCreate(TEST_INDEX).setSettings(settings).setMapping(createMinimalTestMapping(true, false, false)).get()
);
assertEquals(
"Aggregations not supported for the dimension field [numeric] with field type [integer] as part of star tree field",
ex.getMessage()
);
assertTrue(ex.getMessage().startsWith("Aggregations not supported for the dimension field "));
}

public void testMaxDimsCompositeIndex() {
Expand Down Expand Up @@ -734,7 +748,7 @@ public void testUnsupportedDim() {
() -> prepareCreate(TEST_INDEX).setSettings(settings).setMapping(createMinimalTestMapping(false, false, true)).get()
);
assertEquals(
"Failed to parse mapping [_doc]: unsupported field type associated with dimension [keyword] as part of star tree field [startree-1]",
"Failed to parse mapping [_doc]: unsupported field type associated with dimension [ip] as part of star tree field [startree-1]",
ex.getMessage()
);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.apache.lucene.index;

import org.apache.lucene.search.DocIdSetIterator;

/**
* Base wrapper class for DocValuesWriter.
*/
public interface DocValuesWriterWrapper<T extends DocIdSetIterator> {
T getDocValues();
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@
*
* @opensearch.experimental
*/
public class SortedNumericDocValuesWriterWrapper {
public class SortedNumericDocValuesWriterWrapper implements DocValuesWriterWrapper<SortedNumericDocValues> {

private final SortedNumericDocValuesWriter sortedNumericDocValuesWriter;
private final SortedNumericDocValuesWriter sortedNumericDocValuesWriterDelegate;

/**
* Sole constructor. Constructs a new {@link SortedNumericDocValuesWriterWrapper} instance.
Expand All @@ -29,7 +29,7 @@ public class SortedNumericDocValuesWriterWrapper {
* @param counter a counter for tracking memory usage
*/
public SortedNumericDocValuesWriterWrapper(FieldInfo fieldInfo, Counter counter) {
sortedNumericDocValuesWriter = new SortedNumericDocValuesWriter(fieldInfo, counter);
sortedNumericDocValuesWriterDelegate = new SortedNumericDocValuesWriter(fieldInfo, counter);
}

/**
Expand All @@ -39,15 +39,16 @@ public SortedNumericDocValuesWriterWrapper(FieldInfo fieldInfo, Counter counter)
* @param value the value to add
*/
public void addValue(int docID, long value) {
sortedNumericDocValuesWriter.addValue(docID, value);
sortedNumericDocValuesWriterDelegate.addValue(docID, value);
}

/**
* Returns the {@link SortedNumericDocValues} instance containing the sorted numeric doc values
*
* @return the {@link SortedNumericDocValues} instance
*/
@Override
public SortedNumericDocValues getDocValues() {
return sortedNumericDocValuesWriter.getDocValues();
return sortedNumericDocValuesWriterDelegate.getDocValues();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.apache.lucene.index;

import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Counter;

/**
* A wrapper class for writing sorted set doc values.
* <p>
* This class provides a convenient way to add sorted set doc values to a field
* and retrieve the corresponding {@link SortedSetDocValues} instance.
*
* @opensearch.experimental
*/
public class SortedSetDocValuesWriterWrapper implements DocValuesWriterWrapper<SortedSetDocValues> {

private final SortedSetDocValuesWriter sortedSetDocValuesWriterDelegate;

/**
* Sole constructor. Constructs a new {@link SortedSetDocValuesWriterWrapper} instance.
*
* @param fieldInfo the field information for the field being written
* @param counter a counter for tracking memory usage
* @param byteBlockPool a byte block pool for allocating byte blocks
* @see SortedSetDocValuesWriter
*/
public SortedSetDocValuesWriterWrapper(FieldInfo fieldInfo, Counter counter, ByteBlockPool byteBlockPool) {
sortedSetDocValuesWriterDelegate = new SortedSetDocValuesWriter(fieldInfo, counter, byteBlockPool);
}

/**
* Adds a bytes ref value to the sorted set doc values for the specified document.
*
* @param docID the document ID
* @param value the value to add
*/
public void addValue(int docID, BytesRef value) {
bharath-techie marked this conversation as resolved.
Show resolved Hide resolved
sortedSetDocValuesWriterDelegate.addValue(docID, value);
}

/**
* Returns the {@link SortedSetDocValues} instance containing the sorted numeric doc values
*
* @return the {@link SortedSetDocValues} instance
*/
@Override
public SortedSetDocValues getDocValues() {
return sortedSetDocValuesWriterDelegate.getDocValues();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
Expand All @@ -40,6 +40,7 @@

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
Expand Down Expand Up @@ -111,7 +112,7 @@ public Composite912DocValuesReader(DocValuesProducer producer, SegmentReadState
readState.segmentInfo.getId(),
readState.segmentSuffix
);

Map<String, DocValuesType> dimensionFieldTypeMap = new HashMap<>();
while (true) {

// validate magic marker
Expand Down Expand Up @@ -155,13 +156,16 @@ public Composite912DocValuesReader(DocValuesProducer producer, SegmentReadState
compositeIndexInputMap.put(compositeFieldName, starTreeIndexInput);
compositeIndexMetadataMap.put(compositeFieldName, starTreeMetadata);

List<String> dimensionFields = starTreeMetadata.getDimensionFields();

Map<String, DocValuesType> dimensionFieldToDocValuesMap = starTreeMetadata.getDimensionFields();
// generating star tree unique fields (fully qualified name for dimension and metrics)
for (String dimensions : dimensionFields) {
fields.add(fullyQualifiedFieldNameForStarTreeDimensionsDocValues(compositeFieldName, dimensions));
for (Map.Entry<String, DocValuesType> dimensionEntry : dimensionFieldToDocValuesMap.entrySet()) {
String dimName = fullyQualifiedFieldNameForStarTreeDimensionsDocValues(
compositeFieldName,
dimensionEntry.getKey()
);
fields.add(dimName);
dimensionFieldTypeMap.put(dimName, dimensionEntry.getValue());
}

// adding metric fields
for (Metric metric : starTreeMetadata.getMetrics()) {
for (MetricStat metricStat : metric.getBaseMetrics()) {
Expand All @@ -184,7 +188,7 @@ public Composite912DocValuesReader(DocValuesProducer producer, SegmentReadState

// populates the dummy list of field infos to fetch doc id set iterators for respective fields.
// the dummy field info is used to fetch the doc id set iterators for respective fields based on field name
FieldInfos fieldInfos = new FieldInfos(getFieldInfoList(fields));
FieldInfos fieldInfos = new FieldInfos(getFieldInfoList(fields, dimensionFieldTypeMap));
this.readState = new SegmentReadState(
readState.directory,
readState.segmentInfo,
Expand Down Expand Up @@ -291,17 +295,4 @@ public CompositeIndexValues getCompositeIndexValues(CompositeIndexFieldInfo comp

}

/**
* Returns the sorted numeric doc values for the given sorted numeric field.
* If the sorted numeric field is null, it returns an empty doc id set iterator.
* <p>
* Sorted numeric field can be null for cases where the segment doesn't hold a particular value.
*
* @param sortedNumeric the sorted numeric doc values for a field
* @return empty sorted numeric values if the field is not present, else sortedNumeric
*/
public static SortedNumericDocValues getSortedNumericDocValues(SortedNumericDocValues sortedNumeric) {
return sortedNumeric == null ? DocValues.emptySortedNumeric() : sortedNumeric;
}

}
Loading
Loading