From 6ee907c2e7d376ac747c57a12346688569ba21c3 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Tue, 14 Jan 2025 12:43:09 -0800 Subject: [PATCH 1/4] remove json column v4 format writers --- .../druid/guice/BuiltInTypesModule.java | 19 +- .../segment/DefaultColumnFormatConfig.java | 2 +- .../segment/NestedDataColumnHandlerV4.java | 108 ---- .../segment/NestedDataColumnIndexerV4.java | 501 ------------------ .../segment/NestedDataColumnMergerV4.java | 214 -------- .../druid/segment/NestedDataColumnSchema.java | 12 +- .../incremental/IncrementalIndexAdapter.java | 12 - .../nested/NestedDataColumnSerializerV4.java | 397 -------------- .../nested/NestedDataComplexTypeSerde.java | 6 +- .../druid/guice/BuiltInTypesModuleTest.java | 3 - .../druid/query/NestedDataTestUtils.java | 18 +- .../query/scan/NestedDataScanQueryTest.java | 2 +- .../NestedDataColumnIndexerV4Test.java | 450 ---------------- .../NestedDataColumnSupplierV4Test.java | 218 +------- 14 files changed, 20 insertions(+), 1942 deletions(-) delete mode 100644 processing/src/main/java/org/apache/druid/segment/NestedDataColumnHandlerV4.java delete mode 100644 processing/src/main/java/org/apache/druid/segment/NestedDataColumnIndexerV4.java delete mode 100644 processing/src/main/java/org/apache/druid/segment/NestedDataColumnMergerV4.java delete mode 100644 processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSerializerV4.java delete mode 100644 processing/src/test/java/org/apache/druid/segment/NestedDataColumnIndexerV4Test.java diff --git a/processing/src/main/java/org/apache/druid/guice/BuiltInTypesModule.java b/processing/src/main/java/org/apache/druid/guice/BuiltInTypesModule.java index dc9701e7d8b3..96866d44a2a0 100644 --- a/processing/src/main/java/org/apache/druid/guice/BuiltInTypesModule.java +++ b/processing/src/main/java/org/apache/druid/guice/BuiltInTypesModule.java @@ -32,7 +32,6 @@ import org.apache.druid.segment.DimensionHandlerProvider; import org.apache.druid.segment.DimensionHandlerUtils; import org.apache.druid.segment.NestedCommonFormatColumnHandler; -import org.apache.druid.segment.NestedDataColumnHandlerV4; import org.apache.druid.segment.nested.NestedDataComplexTypeSerde; import org.apache.druid.segment.nested.StructuredData; import org.apache.druid.segment.nested.StructuredDataJsonSerializer; @@ -72,12 +71,7 @@ public void configure(Binder binder) @LazySingleton public SideEffectRegisterer initDimensionHandlerAndMvHandlingMode(DefaultColumnFormatConfig formatsConfig) { - if (formatsConfig.getNestedColumnFormatVersion() != null && formatsConfig.getNestedColumnFormatVersion() == 4) { - DimensionHandlerUtils.registerDimensionHandlerProvider( - NestedDataComplexTypeSerde.TYPE_NAME, - new NestedColumnV4HandlerProvider() - ); - } else { + if (formatsConfig.getNestedColumnFormatVersion() == null || formatsConfig.getNestedColumnFormatVersion() == 5) { DimensionHandlerUtils.registerDimensionHandlerProvider( NestedDataComplexTypeSerde.TYPE_NAME, new NestedCommonFormatHandlerProvider() @@ -144,17 +138,6 @@ public DimensionHandler get(Stri } } - public static class NestedColumnV4HandlerProvider - implements DimensionHandlerProvider - { - - @Override - public DimensionHandler get(String dimensionName) - { - return new NestedDataColumnHandlerV4(dimensionName); - } - } - /** * this is used as a vehicle to register the correct version of the system default nested column handler and multi * value handling mode by side effect with the help of binding to diff --git a/processing/src/main/java/org/apache/druid/segment/DefaultColumnFormatConfig.java b/processing/src/main/java/org/apache/druid/segment/DefaultColumnFormatConfig.java index 4df37f85b354..61e386db9ae0 100644 --- a/processing/src/main/java/org/apache/druid/segment/DefaultColumnFormatConfig.java +++ b/processing/src/main/java/org/apache/druid/segment/DefaultColumnFormatConfig.java @@ -33,7 +33,7 @@ public class DefaultColumnFormatConfig public static void validateNestedFormatVersion(@Nullable Integer formatVersion) { if (formatVersion != null) { - if (formatVersion < 4 || formatVersion > 5) { + if (formatVersion != 5) { throw DruidException.forPersona(DruidException.Persona.USER) .ofCategory(DruidException.Category.INVALID_INPUT) .build("Unsupported nested column format version[%s]", formatVersion); diff --git a/processing/src/main/java/org/apache/druid/segment/NestedDataColumnHandlerV4.java b/processing/src/main/java/org/apache/druid/segment/NestedDataColumnHandlerV4.java deleted file mode 100644 index effb74f8c328..000000000000 --- a/processing/src/main/java/org/apache/druid/segment/NestedDataColumnHandlerV4.java +++ /dev/null @@ -1,108 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment; - -import org.apache.druid.data.input.impl.DimensionSchema; -import org.apache.druid.java.util.common.io.Closer; -import org.apache.druid.query.dimension.DefaultDimensionSpec; -import org.apache.druid.query.dimension.DimensionSpec; -import org.apache.druid.segment.column.ColumnCapabilities; -import org.apache.druid.segment.column.ColumnType; -import org.apache.druid.segment.nested.StructuredData; -import org.apache.druid.segment.selector.settable.SettableColumnValueSelector; -import org.apache.druid.segment.selector.settable.SettableObjectColumnValueSelector; -import org.apache.druid.segment.writeout.SegmentWriteOutMedium; - -import java.io.File; -import java.util.Comparator; - -public class NestedDataColumnHandlerV4 implements DimensionHandler -{ - private static Comparator COMPARATOR = (s1, s2) -> - StructuredData.COMPARATOR.compare( - StructuredData.wrap(s1.getObject()), - StructuredData.wrap(s2.getObject()) - ); - - private final String name; - - public NestedDataColumnHandlerV4(String name) - { - this.name = name; - } - - @Override - public String getDimensionName() - { - return name; - } - - @Override - public DimensionSpec getDimensionSpec() - { - return new DefaultDimensionSpec(name, name, ColumnType.NESTED_DATA); - } - - @Override - public DimensionSchema getDimensionSchema(ColumnCapabilities capabilities) - { - return new NestedDataColumnSchema(name, 4); - } - - @Override - public DimensionIndexer makeIndexer(boolean useMaxMemoryEstimates) - { - return new NestedDataColumnIndexerV4(); - } - - @Override - public DimensionMergerV9 makeMerger( - String outputName, - IndexSpec indexSpec, - SegmentWriteOutMedium segmentWriteOutMedium, - ColumnCapabilities capabilities, - ProgressIndicator progress, - File segmentBaseDir, - Closer closer - ) - { - return new NestedDataColumnMergerV4(outputName, indexSpec, segmentWriteOutMedium, closer); - } - - @Override - public int getLengthOfEncodedKeyComponent(StructuredData dimVals) - { - // this is called in one place, OnheapIncrementalIndex, where returning 0 here means the value is null - // so the actual value we return here doesn't matter. we should consider refactoring this to a boolean - return 1; - } - - @Override - public Comparator getEncodedValueSelectorComparator() - { - return COMPARATOR; - } - - @Override - public SettableColumnValueSelector makeNewSettableEncodedValueSelector() - { - return new SettableObjectColumnValueSelector(); - } -} diff --git a/processing/src/main/java/org/apache/druid/segment/NestedDataColumnIndexerV4.java b/processing/src/main/java/org/apache/druid/segment/NestedDataColumnIndexerV4.java deleted file mode 100644 index 366f5d86278d..000000000000 --- a/processing/src/main/java/org/apache/druid/segment/NestedDataColumnIndexerV4.java +++ /dev/null @@ -1,501 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment; - -import org.apache.druid.collections.bitmap.BitmapFactory; -import org.apache.druid.collections.bitmap.MutableBitmap; -import org.apache.druid.java.util.common.IAE; -import org.apache.druid.java.util.common.UOE; -import org.apache.druid.math.expr.ExprEval; -import org.apache.druid.math.expr.ExpressionType; -import org.apache.druid.query.dimension.DimensionSpec; -import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; -import org.apache.druid.segment.column.ColumnCapabilities; -import org.apache.druid.segment.column.ColumnCapabilitiesImpl; -import org.apache.druid.segment.column.ColumnFormat; -import org.apache.druid.segment.column.ColumnType; -import org.apache.druid.segment.data.CloseableIndexed; -import org.apache.druid.segment.incremental.IncrementalIndex; -import org.apache.druid.segment.incremental.IncrementalIndexRowHolder; -import org.apache.druid.segment.nested.FieldTypeInfo; -import org.apache.druid.segment.nested.NestedDataComplexTypeSerde; -import org.apache.druid.segment.nested.NestedPathFinder; -import org.apache.druid.segment.nested.NestedPathPart; -import org.apache.druid.segment.nested.SortedValueDictionary; -import org.apache.druid.segment.nested.StructuredData; -import org.apache.druid.segment.nested.StructuredDataProcessor; -import org.apache.druid.segment.nested.ValueDictionary; - -import javax.annotation.Nullable; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.Objects; -import java.util.SortedMap; -import java.util.TreeMap; - -public class NestedDataColumnIndexerV4 implements DimensionIndexer -{ - private static final ColumnFormat FORMAT = new NestedDataComplexTypeSerde.NestedColumnFormatV4(); - - protected volatile boolean hasNulls = false; - - protected SortedMap fieldIndexers = new TreeMap<>(); - protected final ValueDictionary globalDictionary = new ValueDictionary(); - - int estimatedFieldKeySize = 0; - - protected final StructuredDataProcessor indexerProcessor = new StructuredDataProcessor() - { - @Override - public ProcessedValue processField(ArrayList fieldPath, @Nullable Object fieldValue) - { - // null value is always added to the global dictionary as id 0, so we can ignore them here - if (fieldValue != null) { - final String fieldName = NestedPathFinder.toNormalizedJsonPath(fieldPath); - ExprEval eval = ExprEval.bestEffortOf(fieldValue); - FieldIndexer fieldIndexer = fieldIndexers.get(fieldName); - if (fieldIndexer == null) { - estimatedFieldKeySize += StructuredDataProcessor.estimateStringSize(fieldName); - fieldIndexer = new FieldIndexer(globalDictionary); - fieldIndexers.put(fieldName, fieldIndexer); - } - return fieldIndexer.processValue(eval); - } - return ProcessedValue.NULL_LITERAL; - } - - @Nullable - @Override - public ProcessedValue processArrayField( - ArrayList fieldPath, - @Nullable List array - ) - { - // classic nested data column indexer does not handle arrays - return null; - } - }; - - @Override - public EncodedKeyComponent processRowValsToUnsortedEncodedKeyComponent( - @Nullable Object dimValues, - boolean reportParseExceptions - ) - { - final long oldDictSizeInBytes = globalDictionary.sizeInBytes(); - final int oldFieldKeySize = estimatedFieldKeySize; - final StructuredData data; - if (dimValues == null) { - hasNulls = true; - data = null; - } else if (dimValues instanceof StructuredData) { - data = (StructuredData) dimValues; - } else { - data = new StructuredData(dimValues); - } - StructuredDataProcessor.ProcessResults info = indexerProcessor.processFields(data == null ? null : data.getValue()); - // 'raw' data is currently preserved 'as-is', and not replaced with object references to the global dictionaries - long effectiveSizeBytes = info.getEstimatedSize(); - // then, we add the delta of size change to the global dictionaries to account for any new space added by the - // 'raw' data - effectiveSizeBytes += (globalDictionary.sizeInBytes() - oldDictSizeInBytes); - effectiveSizeBytes += (estimatedFieldKeySize - oldFieldKeySize); - return new EncodedKeyComponent<>(data, effectiveSizeBytes); - } - - @Override - public void setSparseIndexed() - { - this.hasNulls = true; - } - - @Override - public StructuredData getUnsortedEncodedValueFromSorted(StructuredData sortedIntermediateValue) - { - return sortedIntermediateValue; - } - - @Override - public CloseableIndexed getSortedIndexedValues() - { - throw new UnsupportedOperationException("Not supported"); - } - - @Override - public StructuredData getMinValue() - { - throw new UnsupportedOperationException("Not supported"); - } - - @Override - public StructuredData getMaxValue() - { - throw new UnsupportedOperationException("Not supported"); - } - - @Override - public int getCardinality() - { - return DimensionDictionarySelector.CARDINALITY_UNKNOWN; - } - - @Override - public DimensionSelector makeDimensionSelector( - DimensionSpec spec, - IncrementalIndexRowHolder currEntry, - IncrementalIndex.DimensionDesc desc - ) - { - final int dimIndex = desc.getIndex(); - final ColumnValueSelector rootLiteralSelector = getRootLiteralValueSelector(currEntry, dimIndex); - if (rootLiteralSelector != null) { - return new BaseSingleValueDimensionSelector() - { - @Nullable - @Override - protected String getValue() - { - final Object o = rootLiteralSelector.getObject(); - if (o == null) { - return null; - } - return o.toString(); - } - - @Override - public void inspectRuntimeShape(RuntimeShapeInspector inspector) - { - - } - }; - } - // column has nested data or is of mixed root type, cannot use - throw new UOE( - "makeDimensionSelector is not supported, column [%s] is [%s] typed and should only use makeColumnValueSelector", - spec.getOutputName(), - ColumnType.NESTED_DATA - ); - } - - @Override - public ColumnValueSelector makeColumnValueSelector( - IncrementalIndexRowHolder currEntry, - IncrementalIndex.DimensionDesc desc - ) - { - final int dimIndex = desc.getIndex(); - final ColumnValueSelector rootLiteralSelector = getRootLiteralValueSelector(currEntry, dimIndex); - if (rootLiteralSelector != null) { - return rootLiteralSelector; - } - - return new ObjectColumnSelector() - { - @Override - public void inspectRuntimeShape(RuntimeShapeInspector inspector) - { - - } - - @Nullable - @Override - public StructuredData getObject() - { - final Object[] dims = currEntry.get().getDims(); - if (0 <= dimIndex && dimIndex < dims.length) { - return (StructuredData) dims[dimIndex]; - } else { - return null; - } - } - - @Override - public Class classOfObject() - { - return StructuredData.class; - } - }; - } - private ColumnType getLogicalType() - { - if (fieldIndexers.size() == 1 && fieldIndexers.containsKey(NestedPathFinder.JSON_PATH_ROOT)) { - FieldIndexer rootField = fieldIndexers.get(NestedPathFinder.JSON_PATH_ROOT); - ColumnType singleType = rootField.getTypes().getSingleType(); - return singleType == null ? ColumnType.NESTED_DATA : singleType; - } - return ColumnType.NESTED_DATA; - } - - @Override - public ColumnCapabilities getColumnCapabilities() - { - return ColumnCapabilitiesImpl.createDefault() - .setType(getLogicalType()) - .setHasNulls(hasNulls); - } - - @Override - public ColumnFormat getFormat() - { - return FORMAT; - } - - public SortedValueDictionary getSortedValueLookups() - { - return globalDictionary.getSortedCollector(); - } - - public SortedMap getFieldTypeInfo() - { - TreeMap fields = new TreeMap<>(); - for (Map.Entry entry : fieldIndexers.entrySet()) { - // skip adding the field if no types are in the set, meaning only null values have been processed - if (!entry.getValue().getTypes().isEmpty()) { - fields.put(entry.getKey(), entry.getValue().getTypes()); - } - } - return fields; - } - - @Override - public int compareUnsortedEncodedKeyComponents( - @Nullable StructuredData lhs, - @Nullable StructuredData rhs - ) - { - return StructuredData.COMPARATOR.compare(lhs, rhs); - } - - @Override - public boolean checkUnsortedEncodedKeyComponentsEqual( - @Nullable StructuredData lhs, - @Nullable StructuredData rhs - ) - { - return Objects.equals(lhs, rhs); - } - - @Override - public int getUnsortedEncodedKeyComponentHashCode(@Nullable StructuredData key) - { - return Objects.hash(key); - } - - @Override - public Object convertUnsortedEncodedKeyComponentToActualList(StructuredData key) - { - return key; - } - - @Override - public ColumnValueSelector convertUnsortedValuesToSorted(ColumnValueSelector selectorWithUnsortedValues) - { - final FieldIndexer rootIndexer = fieldIndexers.get(NestedPathFinder.JSON_PATH_ROOT); - if (fieldIndexers.size() == 1 && rootIndexer != null && rootIndexer.isSingleType()) { - // for root only literals, makeColumnValueSelector and makeDimensionSelector automatically unwrap StructuredData - // we need to do the opposite here, wrapping selector values with a StructuredData so that they are consistently - // typed for the merger - return new ColumnValueSelector() - { - @Override - public boolean isNull() - { - return selectorWithUnsortedValues.isNull(); - } - - @Override - public void inspectRuntimeShape(RuntimeShapeInspector inspector) - { - selectorWithUnsortedValues.inspectRuntimeShape(inspector); - } - - @Nullable - @Override - public StructuredData getObject() - { - return StructuredData.wrap(selectorWithUnsortedValues.getObject()); - } - - @Override - public float getFloat() - { - return selectorWithUnsortedValues.getFloat(); - } - - @Override - public double getDouble() - { - return selectorWithUnsortedValues.getDouble(); - } - - @Override - public long getLong() - { - return selectorWithUnsortedValues.getLong(); - } - - @Override - public Class classOfObject() - { - return StructuredData.class; - } - }; - } - return selectorWithUnsortedValues; - } - - @Override - public void fillBitmapsFromUnsortedEncodedKeyComponent( - StructuredData key, - int rowNum, - MutableBitmap[] bitmapIndexes, - BitmapFactory factory - ) - { - throw new UnsupportedOperationException("Not supported"); - } - - @Nullable - private ColumnValueSelector getRootLiteralValueSelector( - IncrementalIndexRowHolder currEntry, - int dimIndex - ) - { - if (fieldIndexers.size() > 1) { - return null; - } - final FieldIndexer root = fieldIndexers.get(NestedPathFinder.JSON_PATH_ROOT); - if (root == null || !root.isSingleType()) { - return null; - } - return new ColumnValueSelector<>() - { - @Override - public boolean isNull() - { - final Object o = getObject(); - return !(o instanceof Number); - } - - @Override - public float getFloat() - { - Object value = getObject(); - if (value == null) { - return 0; - } - return ((Number) value).floatValue(); - } - - @Override - public double getDouble() - { - Object value = getObject(); - if (value == null) { - return 0; - } - return ((Number) value).doubleValue(); - } - - @Override - public long getLong() - { - Object value = getObject(); - if (value == null) { - return 0; - } - return ((Number) value).longValue(); - } - - @Override - public void inspectRuntimeShape(RuntimeShapeInspector inspector) - { - - } - - @Nullable - @Override - public Object getObject() - { - final Object[] dims = currEntry.get().getDims(); - if (0 <= dimIndex && dimIndex < dims.length) { - final StructuredData data = (StructuredData) dims[dimIndex]; - if (data != null) { - return ExprEval.bestEffortOf(data.getValue()).valueOrDefault(); - } - } - - return null; - } - - @Override - public Class classOfObject() - { - return Object.class; - } - }; - } - - static class FieldIndexer - { - private final ValueDictionary valueDictionary; - private final FieldTypeInfo.MutableTypeSet typeSet; - - FieldIndexer(ValueDictionary valueDictionary) - { - this.valueDictionary = valueDictionary; - this.typeSet = new FieldTypeInfo.MutableTypeSet(); - } - - private StructuredDataProcessor.ProcessedValue processValue(ExprEval eval) - { - final ColumnType columnType = ExpressionType.toColumnType(eval.type()); - int sizeEstimate; - switch (columnType.getType()) { - case LONG: - typeSet.add(ColumnType.LONG); - sizeEstimate = valueDictionary.addLongValue(eval.asLong()); - return new StructuredDataProcessor.ProcessedValue<>(eval.asLong(), sizeEstimate); - case DOUBLE: - typeSet.add(ColumnType.DOUBLE); - sizeEstimate = valueDictionary.addDoubleValue(eval.asDouble()); - return new StructuredDataProcessor.ProcessedValue<>(eval.asDouble(), sizeEstimate); - case STRING: - typeSet.add(ColumnType.STRING); - final String asString = eval.asString(); - sizeEstimate = valueDictionary.addStringValue(asString); - return new StructuredDataProcessor.ProcessedValue<>(asString, sizeEstimate); - default: - throw new IAE("Unhandled type: %s", columnType); - } - } - - public FieldTypeInfo.MutableTypeSet getTypes() - { - return typeSet; - } - - public boolean isSingleType() - { - return typeSet.getSingleType() != null; - } - } -} diff --git a/processing/src/main/java/org/apache/druid/segment/NestedDataColumnMergerV4.java b/processing/src/main/java/org/apache/druid/segment/NestedDataColumnMergerV4.java deleted file mode 100644 index 6006dbdab2e3..000000000000 --- a/processing/src/main/java/org/apache/druid/segment/NestedDataColumnMergerV4.java +++ /dev/null @@ -1,214 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment; - -import com.google.common.collect.PeekingIterator; -import org.apache.druid.java.util.common.io.Closer; -import org.apache.druid.java.util.common.logger.Logger; -import org.apache.druid.segment.column.ColumnDescriptor; -import org.apache.druid.segment.column.ValueType; -import org.apache.druid.segment.data.Indexed; -import org.apache.druid.segment.nested.FieldTypeInfo; -import org.apache.druid.segment.nested.NestedDataColumnSerializerV4; -import org.apache.druid.segment.nested.NestedDataComplexTypeSerde; -import org.apache.druid.segment.nested.SortedValueDictionary; -import org.apache.druid.segment.serde.ComplexColumnPartSerde; -import org.apache.druid.segment.writeout.SegmentWriteOutMedium; - -import javax.annotation.Nullable; -import java.io.IOException; -import java.nio.IntBuffer; -import java.util.Comparator; -import java.util.List; -import java.util.SortedMap; -import java.util.TreeMap; - -public class NestedDataColumnMergerV4 implements DimensionMergerV9 -{ - private static final Logger log = new Logger(NestedDataColumnMergerV4.class); - - public static final Comparator> STRING_MERGING_COMPARATOR = - SimpleDictionaryMergingIterator.makePeekingComparator(); - public static final Comparator> LONG_MERGING_COMPARATOR = - SimpleDictionaryMergingIterator.makePeekingComparator(); - public static final Comparator> DOUBLE_MERGING_COMPARATOR = - SimpleDictionaryMergingIterator.makePeekingComparator(); - - private final String name; - private final IndexSpec indexSpec; - private final SegmentWriteOutMedium segmentWriteOutMedium; - private final Closer closer; - - private ColumnDescriptor.Builder descriptorBuilder; - private NestedDataColumnSerializerV4 serializer; - - public NestedDataColumnMergerV4( - String name, - IndexSpec indexSpec, - SegmentWriteOutMedium segmentWriteOutMedium, - Closer closer - ) - { - - this.name = name; - this.indexSpec = indexSpec; - this.segmentWriteOutMedium = segmentWriteOutMedium; - this.closer = closer; - } - - @Override - public void writeMergedValueDictionary(List adapters) throws IOException - { - try { - long dimStartTime = System.currentTimeMillis(); - - int numMergeIndex = 0; - SortedValueDictionary sortedLookup = null; - final Indexed[] sortedLookups = new Indexed[adapters.size()]; - final Indexed[] sortedLongLookups = new Indexed[adapters.size()]; - final Indexed[] sortedDoubleLookups = new Indexed[adapters.size()]; - - final SortedMap mergedFields = new TreeMap<>(); - - for (int i = 0; i < adapters.size(); i++) { - final IndexableAdapter adapter = adapters.get(i); - - final IndexableAdapter.NestedColumnMergable mergable = closer.register( - adapter.getNestedColumnMergeables(name) - ); - if (mergable == null) { - continue; - } - final SortedValueDictionary dimValues = mergable.getValueDictionary(); - - boolean allNulls = dimValues == null || dimValues.allNull(); - if (!allNulls) { - sortedLookup = dimValues; - mergable.mergeFieldsInto(mergedFields); - sortedLookups[i] = dimValues.getSortedStrings(); - sortedLongLookups[i] = dimValues.getSortedLongs(); - sortedDoubleLookups[i] = dimValues.getSortedDoubles(); - numMergeIndex++; - } - } - - descriptorBuilder = new ColumnDescriptor.Builder(); - - serializer = new NestedDataColumnSerializerV4( - name, - indexSpec, - segmentWriteOutMedium, - closer - ); - - final ComplexColumnPartSerde partSerde = ComplexColumnPartSerde.serializerBuilder() - .withTypeName(NestedDataComplexTypeSerde.TYPE_NAME) - .withDelegate(serializer) - .build(); - descriptorBuilder.setValueType(ValueType.COMPLEX) - .setHasMultipleValues(false) - .addSerde(partSerde); - - serializer.open(); - serializer.serializeFields(mergedFields); - - int stringCardinality; - int longCardinality; - int doubleCardinality; - if (numMergeIndex == 1) { - serializer.serializeDictionaries( - sortedLookup.getSortedStrings(), - sortedLookup.getSortedLongs(), - sortedLookup.getSortedDoubles() - ); - stringCardinality = sortedLookup.getStringCardinality(); - longCardinality = sortedLookup.getLongCardinality(); - doubleCardinality = sortedLookup.getDoubleCardinality(); - } else { - final SimpleDictionaryMergingIterator stringIterator = new SimpleDictionaryMergingIterator<>( - sortedLookups, - STRING_MERGING_COMPARATOR - ); - final SimpleDictionaryMergingIterator longIterator = new SimpleDictionaryMergingIterator<>( - sortedLongLookups, - LONG_MERGING_COMPARATOR - ); - final SimpleDictionaryMergingIterator doubleIterator = new SimpleDictionaryMergingIterator<>( - sortedDoubleLookups, - DOUBLE_MERGING_COMPARATOR - ); - serializer.serializeDictionaries( - () -> stringIterator, - () -> longIterator, - () -> doubleIterator - ); - stringCardinality = stringIterator.getCardinality(); - longCardinality = longIterator.getCardinality(); - doubleCardinality = doubleIterator.getCardinality(); - } - - log.debug( - "Completed dim[%s] conversions with string cardinality[%,d], long cardinality[%,d], double cardinality[%,d] in %,d millis.", - name, - stringCardinality, - longCardinality, - doubleCardinality, - System.currentTimeMillis() - dimStartTime - ); - } - catch (IOException ioe) { - log.error(ioe, "Failed to merge dictionary for column [%s]", name); - throw ioe; - } - } - - @Override - public ColumnValueSelector convertSortedSegmentRowValuesToMergedRowValues( - int segmentIndex, - ColumnValueSelector source - ) - { - return source; - } - - @Override - public void processMergedRow(ColumnValueSelector selector) throws IOException - { - serializer.serialize(selector); - } - - @Override - public void writeIndexes(@Nullable List segmentRowNumConversions) - { - // fields write their own indexes - } - - @Override - public boolean hasOnlyNulls() - { - return false; - } - - @Override - public ColumnDescriptor makeColumnDescriptor() - { - return descriptorBuilder.build(); - } -} diff --git a/processing/src/main/java/org/apache/druid/segment/NestedDataColumnSchema.java b/processing/src/main/java/org/apache/druid/segment/NestedDataColumnSchema.java index bd1e7bf57243..bae8435ecb72 100644 --- a/processing/src/main/java/org/apache/druid/segment/NestedDataColumnSchema.java +++ b/processing/src/main/java/org/apache/druid/segment/NestedDataColumnSchema.java @@ -30,10 +30,8 @@ import java.util.Objects; /** - * Nested column {@link DimensionSchema}. If {@link #formatVersion} is set to 4, or null and - * {@link DefaultColumnFormatConfig#nestedColumnFormatVersion} is set to 4, then {@link NestedDataColumnHandlerV4} is - * used, else {@link NestedCommonFormatColumnHandler} is used instead and this is equivalent to using - * {@link AutoTypeColumnSchema} + * Nested column {@link DimensionSchema}. This uses {@link NestedCommonFormatColumnHandler} and is equivalent to using + * {@link AutoTypeColumnSchema}, but remains for backwards compatibility. */ public class NestedDataColumnSchema extends DimensionSchema { @@ -90,11 +88,7 @@ public ColumnType getColumnType() @Override public DimensionHandler getDimensionHandler() { - if (formatVersion == 4) { - return new NestedDataColumnHandlerV4(getName()); - } else { - return new NestedCommonFormatColumnHandler(getName(), null); - } + return new NestedCommonFormatColumnHandler(getName(), null); } @Override diff --git a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexAdapter.java b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexAdapter.java index 35736cc4975d..dea7ad4fd228 100644 --- a/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexAdapter.java +++ b/processing/src/main/java/org/apache/druid/segment/incremental/IncrementalIndexAdapter.java @@ -28,7 +28,6 @@ import org.apache.druid.segment.IndexableAdapter; import org.apache.druid.segment.IntIteratorUtils; import org.apache.druid.segment.Metadata; -import org.apache.druid.segment.NestedDataColumnIndexerV4; import org.apache.druid.segment.TransformableRowIterator; import org.apache.druid.segment.column.ColumnCapabilities; import org.apache.druid.segment.column.ColumnFormat; @@ -154,17 +153,6 @@ public NestedColumnMergable getNestedColumnMergeables(String column) } final DimensionIndexer indexer = accessor.dimensionDesc.getIndexer(); - if (indexer instanceof NestedDataColumnIndexerV4) { - NestedDataColumnIndexerV4 nestedDataColumnIndexer = (NestedDataColumnIndexerV4) indexer; - - return new NestedColumnMergable( - nestedDataColumnIndexer.getSortedValueLookups(), - nestedDataColumnIndexer.getFieldTypeInfo(), - true, - false, - null - ); - } if (indexer instanceof AutoTypeColumnIndexer) { AutoTypeColumnIndexer autoIndexer = (AutoTypeColumnIndexer) indexer; diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSerializerV4.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSerializerV4.java deleted file mode 100644 index c30559dd43f0..000000000000 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataColumnSerializerV4.java +++ /dev/null @@ -1,397 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment.nested; - -import com.google.common.base.Preconditions; -import com.google.common.collect.Maps; -import org.apache.druid.collections.bitmap.ImmutableBitmap; -import org.apache.druid.collections.bitmap.MutableBitmap; -import org.apache.druid.java.util.common.FileUtils; -import org.apache.druid.java.util.common.ISE; -import org.apache.druid.java.util.common.RE; -import org.apache.druid.java.util.common.io.Closer; -import org.apache.druid.java.util.common.io.smoosh.FileSmoosher; -import org.apache.druid.java.util.common.io.smoosh.SmooshedWriter; -import org.apache.druid.java.util.common.logger.Logger; -import org.apache.druid.math.expr.ExprEval; -import org.apache.druid.segment.ColumnValueSelector; -import org.apache.druid.segment.GenericColumnSerializer; -import org.apache.druid.segment.IndexMerger; -import org.apache.druid.segment.IndexSpec; -import org.apache.druid.segment.column.ColumnType; -import org.apache.druid.segment.column.StringEncodingStrategies; -import org.apache.druid.segment.column.Types; -import org.apache.druid.segment.column.ValueType; -import org.apache.druid.segment.data.ByteBufferWriter; -import org.apache.druid.segment.data.CompressedVariableSizedBlobColumnSerializer; -import org.apache.druid.segment.data.CompressionStrategy; -import org.apache.druid.segment.data.DictionaryWriter; -import org.apache.druid.segment.data.FixedIndexedWriter; -import org.apache.druid.segment.data.GenericIndexed; -import org.apache.druid.segment.data.GenericIndexedWriter; -import org.apache.druid.segment.serde.ColumnSerializerUtils; -import org.apache.druid.segment.serde.ComplexColumnMetadata; -import org.apache.druid.segment.serde.Serializer; -import org.apache.druid.segment.writeout.SegmentWriteOutMedium; - -import javax.annotation.Nullable; -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.nio.ByteBuffer; -import java.nio.ByteOrder; -import java.nio.channels.WritableByteChannel; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.SortedMap; - -public class NestedDataColumnSerializerV4 implements GenericColumnSerializer -{ - private static final Logger log = new Logger(NestedDataColumnSerializerV4.class); - public static final String STRING_DICTIONARY_FILE_NAME = "__stringDictionary"; - public static final String LONG_DICTIONARY_FILE_NAME = "__longDictionary"; - public static final String DOUBLE_DICTIONARY_FILE_NAME = "__doubleDictionary"; - public static final String RAW_FILE_NAME = "__raw"; - public static final String NULL_BITMAP_FILE_NAME = "__nullIndex"; - public static final String NESTED_FIELD_PREFIX = "__field_"; - - private final String name; - private final SegmentWriteOutMedium segmentWriteOutMedium; - private final IndexSpec indexSpec; - @SuppressWarnings("unused") - private final Closer closer; - - private final StructuredDataProcessor fieldProcessor = new StructuredDataProcessor() - { - @Override - public ProcessedValue processField(ArrayList fieldPath, @Nullable Object fieldValue) - { - final GlobalDictionaryEncodedFieldColumnWriter writer = fieldWriters.get( - NestedPathFinder.toNormalizedJsonPath(fieldPath) - ); - if (writer != null) { - try { - final ExprEval eval = ExprEval.bestEffortOf(fieldValue); - if (eval.type().isPrimitive() || eval.type().isPrimitiveArray()) { - writer.addValue(rowCount, eval.value()); - } else { - // behave consistently with nested column indexer, which defaults to string - writer.addValue(rowCount, eval.asString()); - } - // serializer doesn't use size estimate - return ProcessedValue.NULL_LITERAL; - } - catch (IOException e) { - throw new RE(e, "Failed to write field [%s], unhandled value", fieldPath); - } - } - return ProcessedValue.NULL_LITERAL; - } - - @Nullable - @Override - public ProcessedValue processArrayField( - ArrayList fieldPath, - @Nullable List array - ) - { - // classic nested column ingestion does not support array fields - return null; - } - }; - - private byte[] metadataBytes; - private DictionaryIdLookup globalDictionaryIdLookup; - private SortedMap fields; - private GenericIndexedWriter fieldsWriter; - private FieldTypeInfo.Writer fieldsInfoWriter; - private DictionaryWriter dictionaryWriter; - private FixedIndexedWriter longDictionaryWriter; - private FixedIndexedWriter doubleDictionaryWriter; - private CompressedVariableSizedBlobColumnSerializer rawWriter; - private ByteBufferWriter nullBitmapWriter; - private MutableBitmap nullRowsBitmap; - private Map> fieldWriters; - private int rowCount = 0; - private boolean closedForWrite = false; - - private boolean dictionarySerialized = false; - - public NestedDataColumnSerializerV4( - String name, - IndexSpec indexSpec, - SegmentWriteOutMedium segmentWriteOutMedium, - Closer closer - ) - { - this.name = name; - this.segmentWriteOutMedium = segmentWriteOutMedium; - this.indexSpec = indexSpec; - this.closer = closer; - } - - @Override - public void open() throws IOException - { - fieldsWriter = new GenericIndexedWriter<>(segmentWriteOutMedium, name, GenericIndexed.STRING_STRATEGY); - fieldsWriter.open(); - - fieldsInfoWriter = new FieldTypeInfo.Writer(segmentWriteOutMedium); - fieldsInfoWriter.open(); - - dictionaryWriter = StringEncodingStrategies.getStringDictionaryWriter( - indexSpec.getStringDictionaryEncoding(), - segmentWriteOutMedium, - name - ); - dictionaryWriter.open(); - - longDictionaryWriter = new FixedIndexedWriter<>( - segmentWriteOutMedium, - ColumnType.LONG.getStrategy(), - ByteOrder.nativeOrder(), - Long.BYTES, - true - ); - longDictionaryWriter.open(); - - doubleDictionaryWriter = new FixedIndexedWriter<>( - segmentWriteOutMedium, - ColumnType.DOUBLE.getStrategy(), - ByteOrder.nativeOrder(), - Double.BYTES, - true - ); - doubleDictionaryWriter.open(); - - rawWriter = new CompressedVariableSizedBlobColumnSerializer( - ColumnSerializerUtils.getInternalFileName(name, RAW_FILE_NAME), - segmentWriteOutMedium, - indexSpec.getJsonCompression() != null ? indexSpec.getJsonCompression() : CompressionStrategy.LZ4 - ); - rawWriter.open(); - - nullBitmapWriter = new ByteBufferWriter<>( - segmentWriteOutMedium, - indexSpec.getBitmapSerdeFactory().getObjectStrategy() - ); - nullBitmapWriter.open(); - - nullRowsBitmap = indexSpec.getBitmapSerdeFactory().getBitmapFactory().makeEmptyMutableBitmap(); - - globalDictionaryIdLookup = closer.register( - new DictionaryIdLookup( - name, - FileUtils.getTempDir().toFile(), - dictionaryWriter, - longDictionaryWriter, - doubleDictionaryWriter, - null - ) - ); - } - - public void serializeFields(SortedMap fields) throws IOException - { - this.fields = fields; - this.fieldWriters = Maps.newHashMapWithExpectedSize(fields.size()); - int ctr = 0; - for (Map.Entry field : fields.entrySet()) { - final String fieldName = field.getKey(); - final String fieldFileName = NESTED_FIELD_PREFIX + ctr++; - fieldsWriter.write(fieldName); - fieldsInfoWriter.write(field.getValue()); - final GlobalDictionaryEncodedFieldColumnWriter writer; - final ColumnType type = field.getValue().getSingleType(); - if (type != null) { - if (Types.is(type, ValueType.STRING)) { - writer = new ScalarStringFieldColumnWriter( - name, - fieldFileName, - segmentWriteOutMedium, - indexSpec, - globalDictionaryIdLookup - ); - } else if (Types.is(type, ValueType.LONG)) { - writer = new ScalarLongFieldColumnWriter( - name, - fieldFileName, - segmentWriteOutMedium, - indexSpec, - globalDictionaryIdLookup - ); - } else if (Types.is(type, ValueType.DOUBLE)) { - writer = new ScalarDoubleFieldColumnWriter( - name, - fieldFileName, - segmentWriteOutMedium, - indexSpec, - globalDictionaryIdLookup - ); - } else { - throw new ISE("Invalid field type [%s], how did this happen?", type); - } - } else { - writer = new VariantFieldColumnWriter( - name, - fieldFileName, - segmentWriteOutMedium, - indexSpec, - globalDictionaryIdLookup - ); - } - writer.open(); - fieldWriters.put(fieldName, writer); - } - } - - public void serializeDictionaries( - Iterable strings, - Iterable longs, - Iterable doubles - ) throws IOException - { - if (dictionarySerialized) { - throw new ISE("String dictionary already serialized for column [%s], cannot serialize again", name); - } - - // null is always 0 - dictionaryWriter.write(null); - for (String value : strings) { - if (value == null) { - continue; - } - - dictionaryWriter.write(value); - } - dictionarySerialized = true; - - for (Long value : longs) { - if (value == null) { - continue; - } - longDictionaryWriter.write(value); - } - - for (Double value : doubles) { - if (value == null) { - continue; - } - doubleDictionaryWriter.write(value); - } - dictionarySerialized = true; - } - - @Override - public void serialize(ColumnValueSelector selector) throws IOException - { - final StructuredData data = StructuredData.wrap(selector.getObject()); - if (data == null) { - nullRowsBitmap.add(rowCount); - } - rawWriter.addValue(NestedDataComplexTypeSerde.INSTANCE.toBytes(data)); - if (data != null) { - fieldProcessor.processFields(data.getValue()); - } - rowCount++; - } - - @Override - public long getSerializedSize() throws IOException - { - if (!closedForWrite) { - closedForWrite = true; - ByteArrayOutputStream baos = new ByteArrayOutputStream(); - IndexMerger.SERIALIZER_UTILS.writeString( - baos, - ColumnSerializerUtils.SMILE_MAPPER.writeValueAsString( - new ComplexColumnMetadata( - ByteOrder.nativeOrder(), - indexSpec.getBitmapSerdeFactory(), - name, - !nullRowsBitmap.isEmpty() - ) - ) - ); - this.metadataBytes = baos.toByteArray(); - this.nullBitmapWriter.write(nullRowsBitmap); - } - - long size = 1; - size += metadataBytes.length; - if (fieldsWriter != null) { - size += fieldsWriter.getSerializedSize(); - } - if (fieldsInfoWriter != null) { - size += fieldsInfoWriter.getSerializedSize(); - } - // the value dictionaries, raw column, and null index are all stored in separate files - return size; - } - - @Override - public void writeTo( - WritableByteChannel channel, - FileSmoosher smoosher - ) throws IOException - { - Preconditions.checkState(closedForWrite, "Not closed yet!"); - Preconditions.checkArgument(dictionaryWriter.isSorted(), "Dictionary not sorted?!?"); - // version 5 - channel.write(ByteBuffer.wrap(new byte[]{0x04})); - channel.write(ByteBuffer.wrap(metadataBytes)); - fieldsWriter.writeTo(channel, smoosher); - fieldsInfoWriter.writeTo(channel, smoosher); - - // version 3 stores large components in separate files to prevent exceeding smoosh file limit (int max) - writeInternal(smoosher, dictionaryWriter, STRING_DICTIONARY_FILE_NAME); - writeInternal(smoosher, longDictionaryWriter, LONG_DICTIONARY_FILE_NAME); - writeInternal(smoosher, doubleDictionaryWriter, DOUBLE_DICTIONARY_FILE_NAME); - writeInternal(smoosher, rawWriter, RAW_FILE_NAME); - if (!nullRowsBitmap.isEmpty()) { - writeInternal(smoosher, nullBitmapWriter, NULL_BITMAP_FILE_NAME); - } - - - // close the SmooshedWriter since we are done here, so we don't write to a temporary file per sub-column - // In the future, it would be best if the writeTo() itself didn't take a channel but was expected to actually - // open its own channels on the FileSmoosher object itself. Or some other thing that give this Serializer - // total control over when resources are opened up and when they are closed. Until then, we are stuck - // with a very tight coupling of this code with how the external "driver" is working. - if (channel instanceof SmooshedWriter) { - channel.close(); - } - - for (Map.Entry field : fields.entrySet()) { - // remove writer so that it can be collected when we are done with it - GlobalDictionaryEncodedFieldColumnWriter writer = fieldWriters.remove(field.getKey()); - writer.writeTo(rowCount, smoosher); - } - log.info("Column [%s] serialized successfully with [%d] nested columns.", name, fields.size()); - } - - private void writeInternal(FileSmoosher smoosher, Serializer serializer, String fileName) throws IOException - { - final String internalName = ColumnSerializerUtils.getInternalFileName(name, fileName); - try (SmooshedWriter smooshChannel = smoosher.addWithSmooshedWriter(internalName, serializer.getSerializedSize())) { - serializer.writeTo(smooshChannel, smoosher); - } - } -} diff --git a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataComplexTypeSerde.java b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataComplexTypeSerde.java index 931dec70a9f0..c18a23037565 100644 --- a/processing/src/main/java/org/apache/druid/segment/nested/NestedDataComplexTypeSerde.java +++ b/processing/src/main/java/org/apache/druid/segment/nested/NestedDataComplexTypeSerde.java @@ -25,7 +25,7 @@ import org.apache.druid.error.DruidException; import org.apache.druid.java.util.common.guava.Comparators; import org.apache.druid.segment.DimensionHandler; -import org.apache.druid.segment.NestedDataColumnHandlerV4; +import org.apache.druid.segment.NestedCommonFormatColumnHandler; import org.apache.druid.segment.NestedDataColumnSchema; import org.apache.druid.segment.column.ColumnBuilder; import org.apache.druid.segment.column.ColumnCapabilities; @@ -234,13 +234,13 @@ public ColumnType getLogicalType() @Override public DimensionHandler getColumnHandler(String columnName) { - return new NestedDataColumnHandlerV4(columnName); + return new NestedCommonFormatColumnHandler(columnName, null); } @Override public DimensionSchema getColumnSchema(String columnName) { - return new NestedDataColumnSchema(columnName, 4); + return new NestedDataColumnSchema(columnName, 5); } @Override diff --git a/processing/src/test/java/org/apache/druid/guice/BuiltInTypesModuleTest.java b/processing/src/test/java/org/apache/druid/guice/BuiltInTypesModuleTest.java index 23a2d18f39e4..373f8a458c2e 100644 --- a/processing/src/test/java/org/apache/druid/guice/BuiltInTypesModuleTest.java +++ b/processing/src/test/java/org/apache/druid/guice/BuiltInTypesModuleTest.java @@ -26,7 +26,6 @@ import org.apache.druid.segment.DimensionHandlerProvider; import org.apache.druid.segment.DimensionHandlerUtils; import org.apache.druid.segment.NestedCommonFormatColumnHandler; -import org.apache.druid.segment.NestedDataColumnHandlerV4; import org.apache.druid.segment.nested.NestedDataComplexTypeSerde; import org.junit.AfterClass; import org.junit.Assert; @@ -88,7 +87,6 @@ public void testOverride() { DimensionHandlerUtils.DIMENSION_HANDLER_PROVIDERS.remove(NestedDataComplexTypeSerde.TYPE_NAME); Properties props = new Properties(); - props.setProperty("druid.indexing.formats.nestedColumnFormatVersion", "4"); props.setProperty("druid.indexing.formats.stringMultiValueHandlingMode", "sorted_array"); Injector gadget = makeInjector(props); @@ -98,7 +96,6 @@ public void testOverride() DimensionHandlerProvider provider = DimensionHandlerUtils.DIMENSION_HANDLER_PROVIDERS.get( NestedDataComplexTypeSerde.TYPE_NAME ); - Assert.assertTrue(provider.get("test") instanceof NestedDataColumnHandlerV4); Assert.assertEquals( DimensionSchema.MultiValueHandling.SORTED_ARRAY, diff --git a/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java b/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java index d270d53c015e..0aaaf77c66c9 100644 --- a/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java +++ b/processing/src/test/java/org/apache/druid/query/NestedDataTestUtils.java @@ -106,16 +106,16 @@ public class NestedDataTestUtils ) .build(); - public static final DimensionsSpec TSV_V4_SCHEMA = + public static final DimensionsSpec TSV_NESTED_SCHEMA = DimensionsSpec.builder() .setDimensions( Arrays.asList( - new NestedDataColumnSchema("dim", 4), - new NestedDataColumnSchema("nest_json", 4), - new NestedDataColumnSchema("nester_json", 4), - new NestedDataColumnSchema("variant_json", 4), - new NestedDataColumnSchema("list_json", 4), - new NestedDataColumnSchema("nonexistent", 4) + new NestedDataColumnSchema("dim", 5), + new NestedDataColumnSchema("nest_json", 5), + new NestedDataColumnSchema("nester_json", 5), + new NestedDataColumnSchema("variant_json", 5), + new NestedDataColumnSchema("list_json", 5), + new NestedDataColumnSchema("nonexistent", 5) ) ) .build(); @@ -176,7 +176,7 @@ public static List createSimpleSegmentsTsv( ); } - public static List createSimpleSegmentsTsvV4( + public static List createSimpleSegmentsTsvNested( TemporaryFolder tempFolder, Closer closer ) @@ -186,7 +186,7 @@ public static List createSimpleSegmentsTsvV4( tempFolder, closer, Granularities.NONE, - TSV_V4_SCHEMA, + TSV_NESTED_SCHEMA, true ); } diff --git a/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java b/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java index 81a747c745ca..baee61a86fab 100644 --- a/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java +++ b/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java @@ -285,7 +285,7 @@ public void testIngestAndScanSegmentsTsvV4() throws Exception .limit(100) .context(ImmutableMap.of()) .build(); - List segs = NestedDataTestUtils.createSimpleSegmentsTsvV4(tempFolder, closer); + List segs = NestedDataTestUtils.createSimpleSegmentsTsvNested(tempFolder, closer); final Sequence seq = helper.runQueryOnSegmentsObjs(segs, scanQuery); diff --git a/processing/src/test/java/org/apache/druid/segment/NestedDataColumnIndexerV4Test.java b/processing/src/test/java/org/apache/druid/segment/NestedDataColumnIndexerV4Test.java deleted file mode 100644 index be41958f0066..000000000000 --- a/processing/src/test/java/org/apache/druid/segment/NestedDataColumnIndexerV4Test.java +++ /dev/null @@ -1,450 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.druid.segment; - -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableMap; -import org.apache.druid.data.input.MapBasedInputRow; -import org.apache.druid.data.input.impl.DimensionsSpec; -import org.apache.druid.data.input.impl.TimestampSpec; -import org.apache.druid.guice.BuiltInTypesModule; -import org.apache.druid.query.dimension.DefaultDimensionSpec; -import org.apache.druid.query.dimension.DimensionSpec; -import org.apache.druid.segment.column.ColumnType; -import org.apache.druid.segment.incremental.IncrementalIndex; -import org.apache.druid.segment.incremental.IncrementalIndexCursorFactory; -import org.apache.druid.segment.incremental.IncrementalIndexSchema; -import org.apache.druid.segment.incremental.IndexSizeExceededException; -import org.apache.druid.segment.incremental.OnheapIncrementalIndex; -import org.apache.druid.segment.nested.StructuredData; -import org.apache.druid.testing.InitializedNullHandlingTest; -import org.junit.Assert; -import org.junit.BeforeClass; -import org.junit.Test; - -import javax.annotation.Nonnull; -import java.util.Map; - -public class NestedDataColumnIndexerV4Test extends InitializedNullHandlingTest -{ - private static final String TIME_COL = "time"; - private static final String STRING_COL = "string"; - private static final String STRING_ARRAY_COL = "string_array"; - private static final String LONG_COL = "long"; - private static final String DOUBLE_COL = "double"; - private static final String VARIANT_COL = "variant"; - private static final String NESTED_COL = "nested"; - - @BeforeClass - public static void setup() - { - BuiltInTypesModule.registerHandlersAndSerde(); - } - - @Test - public void testKeySizeEstimation() - { - NestedDataColumnIndexerV4 indexer = new NestedDataColumnIndexerV4(); - Assert.assertEquals(DimensionDictionarySelector.CARDINALITY_UNKNOWN, indexer.getCardinality()); - int baseCardinality = 0; - Assert.assertEquals(baseCardinality, indexer.globalDictionary.getCardinality()); - - EncodedKeyComponent key; - // new raw value, new field, new dictionary entry - key = indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableMap.of("x", "foo"), false); - Assert.assertEquals(228, key.getEffectiveSizeBytes()); - Assert.assertEquals(baseCardinality + 1, indexer.globalDictionary.getCardinality()); - // adding same value only adds estimated size of value itself - key = indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableMap.of("x", "foo"), false); - Assert.assertEquals(112, key.getEffectiveSizeBytes()); - Assert.assertEquals(baseCardinality + 1, indexer.globalDictionary.getCardinality()); - // new raw value, new field, new dictionary entry - key = indexer.processRowValsToUnsortedEncodedKeyComponent(10L, false); - Assert.assertEquals(94, key.getEffectiveSizeBytes()); - Assert.assertEquals(baseCardinality + 2, indexer.globalDictionary.getCardinality()); - // adding same value only adds estimated size of value itself - key = indexer.processRowValsToUnsortedEncodedKeyComponent(10L, false); - Assert.assertEquals(16, key.getEffectiveSizeBytes()); - Assert.assertEquals(baseCardinality + 2, indexer.globalDictionary.getCardinality()); - // new raw value, new dictionary entry - key = indexer.processRowValsToUnsortedEncodedKeyComponent(11L, false); - Assert.assertEquals(48, key.getEffectiveSizeBytes()); - Assert.assertEquals(baseCardinality + 3, indexer.globalDictionary.getCardinality()); - - // new raw value, new fields - key = indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableList.of(1L, 2L, 10L), false); - Assert.assertEquals(276, key.getEffectiveSizeBytes()); - Assert.assertEquals(baseCardinality + 5, indexer.globalDictionary.getCardinality()); - // new raw value, re-use fields and dictionary - key = indexer.processRowValsToUnsortedEncodedKeyComponent(ImmutableList.of(1L, 2L, 10L), false); - Assert.assertEquals(56, key.getEffectiveSizeBytes()); - Assert.assertEquals(baseCardinality + 5, indexer.globalDictionary.getCardinality()); - // new raw value, new fields - key = indexer.processRowValsToUnsortedEncodedKeyComponent( - ImmutableMap.of("x", ImmutableList.of(1L, 2L, 10L)), - false - ); - Assert.assertEquals(286, key.getEffectiveSizeBytes()); - Assert.assertEquals(baseCardinality + 5, indexer.globalDictionary.getCardinality()); - // new raw value - key = indexer.processRowValsToUnsortedEncodedKeyComponent( - ImmutableMap.of("x", ImmutableList.of(1L, 2L, 10L)), - false - ); - Assert.assertEquals(118, key.getEffectiveSizeBytes()); - Assert.assertEquals(baseCardinality + 5, indexer.globalDictionary.getCardinality()); - - key = indexer.processRowValsToUnsortedEncodedKeyComponent("", false); - Assert.assertEquals(104, key.getEffectiveSizeBytes()); - Assert.assertEquals(baseCardinality + 6, indexer.globalDictionary.getCardinality()); - - key = indexer.processRowValsToUnsortedEncodedKeyComponent(0, false); - Assert.assertEquals(48, key.getEffectiveSizeBytes()); - Assert.assertEquals(baseCardinality + 7, indexer.globalDictionary.getCardinality()); - Assert.assertEquals(DimensionDictionarySelector.CARDINALITY_UNKNOWN, indexer.getCardinality()); - } - - @Test - public void testNestedColumnIndexerSchemaDiscoveryRootString() throws IndexSizeExceededException - { - long minTimestamp = System.currentTimeMillis(); - IncrementalIndex index = makeIncrementalIndex(minTimestamp); - - index.add(makeInputRow(minTimestamp + 1, true, STRING_COL, "a")); - index.add(makeInputRow(minTimestamp + 2, true, STRING_COL, "b")); - index.add(makeInputRow(minTimestamp + 3, true, STRING_COL, "c")); - index.add(makeInputRow(minTimestamp + 4, true, STRING_COL, null)); - index.add(makeInputRow(minTimestamp + 5, false, STRING_COL, null)); - - IncrementalIndexCursorFactory cursorFactory = new IncrementalIndexCursorFactory(index); - - try (final CursorHolder cursorHolder = cursorFactory.makeCursorHolder(CursorBuildSpec.FULL_SCAN)) { - Cursor cursor = cursorHolder.asCursor(); - final DimensionSpec dimensionSpec = new DefaultDimensionSpec(STRING_COL, STRING_COL, ColumnType.STRING); - ColumnSelectorFactory columnSelectorFactory = cursor.getColumnSelectorFactory(); - - ColumnValueSelector valueSelector = columnSelectorFactory.makeColumnValueSelector(STRING_COL); - DimensionSelector dimensionSelector = columnSelectorFactory.makeDimensionSelector(dimensionSpec); - Assert.assertEquals("a", valueSelector.getObject()); - Assert.assertEquals(1, dimensionSelector.getRow().size()); - Assert.assertEquals("a", dimensionSelector.lookupName(dimensionSelector.getRow().get(0))); - Assert.assertEquals("a", dimensionSelector.getObject()); - - cursor.advance(); - Assert.assertEquals("b", valueSelector.getObject()); - Assert.assertEquals(1, dimensionSelector.getRow().size()); - Assert.assertEquals("b", dimensionSelector.lookupName(dimensionSelector.getRow().get(0))); - Assert.assertEquals("b", dimensionSelector.getObject()); - - cursor.advance(); - Assert.assertEquals("c", valueSelector.getObject()); - Assert.assertEquals(1, dimensionSelector.getRow().size()); - Assert.assertEquals("c", dimensionSelector.lookupName(dimensionSelector.getRow().get(0))); - Assert.assertEquals("c", dimensionSelector.getObject()); - - cursor.advance(); - Assert.assertNull(valueSelector.getObject()); - Assert.assertEquals(1, dimensionSelector.getRow().size()); - Assert.assertNull(dimensionSelector.lookupName(dimensionSelector.getRow().get(0))); - Assert.assertNull(dimensionSelector.getObject()); - - cursor.advance(); - Assert.assertNull(valueSelector.getObject()); - Assert.assertEquals(1, dimensionSelector.getRow().size()); - Assert.assertNull(dimensionSelector.lookupName(dimensionSelector.getRow().get(0))); - Assert.assertNull(dimensionSelector.getObject()); - } - } - - @Test - public void testNestedColumnIndexerSchemaDiscoveryRootLong() throws IndexSizeExceededException - { - long minTimestamp = System.currentTimeMillis(); - IncrementalIndex index = makeIncrementalIndex(minTimestamp); - - index.add(makeInputRow(minTimestamp + 1, true, LONG_COL, 1L)); - index.add(makeInputRow(minTimestamp + 2, true, LONG_COL, 2L)); - index.add(makeInputRow(minTimestamp + 3, true, LONG_COL, 3L)); - index.add(makeInputRow(minTimestamp + 4, true, LONG_COL, null)); - index.add(makeInputRow(minTimestamp + 5, false, LONG_COL, null)); - - IncrementalIndexCursorFactory cursorFactory = new IncrementalIndexCursorFactory(index); - try (final CursorHolder cursorHolder = cursorFactory.makeCursorHolder(CursorBuildSpec.FULL_SCAN)) { - Cursor cursor = cursorHolder.asCursor(); - final DimensionSpec dimensionSpec = new DefaultDimensionSpec(LONG_COL, LONG_COL, ColumnType.LONG); - ColumnSelectorFactory columnSelectorFactory = cursor.getColumnSelectorFactory(); - - ColumnValueSelector valueSelector = columnSelectorFactory.makeColumnValueSelector(LONG_COL); - DimensionSelector dimensionSelector = columnSelectorFactory.makeDimensionSelector(dimensionSpec); - Assert.assertEquals(1L, valueSelector.getObject()); - Assert.assertEquals(1L, valueSelector.getLong()); - Assert.assertFalse(valueSelector.isNull()); - Assert.assertEquals(1, dimensionSelector.getRow().size()); - Assert.assertEquals("1", dimensionSelector.lookupName(dimensionSelector.getRow().get(0))); - Assert.assertEquals("1", dimensionSelector.getObject()); - - cursor.advance(); - Assert.assertEquals(2L, valueSelector.getObject()); - Assert.assertEquals(2L, valueSelector.getLong()); - Assert.assertFalse(valueSelector.isNull()); - Assert.assertEquals(1, dimensionSelector.getRow().size()); - Assert.assertEquals("2", dimensionSelector.lookupName(dimensionSelector.getRow().get(0))); - Assert.assertEquals("2", dimensionSelector.getObject()); - - cursor.advance(); - Assert.assertEquals(3L, valueSelector.getObject()); - Assert.assertEquals(3L, valueSelector.getLong()); - Assert.assertFalse(valueSelector.isNull()); - Assert.assertEquals(1, dimensionSelector.getRow().size()); - Assert.assertEquals("3", dimensionSelector.lookupName(dimensionSelector.getRow().get(0))); - Assert.assertEquals("3", dimensionSelector.getObject()); - - cursor.advance(); - Assert.assertNull(valueSelector.getObject()); - Assert.assertTrue(valueSelector.isNull()); - Assert.assertEquals(1, dimensionSelector.getRow().size()); - Assert.assertNull(dimensionSelector.lookupName(dimensionSelector.getRow().get(0))); - Assert.assertNull(dimensionSelector.getObject()); - - cursor.advance(); - Assert.assertNull(valueSelector.getObject()); - Assert.assertTrue(valueSelector.isNull()); - Assert.assertEquals(1, dimensionSelector.getRow().size()); - Assert.assertNull(dimensionSelector.lookupName(dimensionSelector.getRow().get(0))); - Assert.assertNull(dimensionSelector.getObject()); - } - } - - @Test - public void testNestedColumnIndexerSchemaDiscoveryRootDouble() throws IndexSizeExceededException - { - long minTimestamp = System.currentTimeMillis(); - IncrementalIndex index = makeIncrementalIndex(minTimestamp); - - index.add(makeInputRow(minTimestamp + 1, true, DOUBLE_COL, 1.1)); - index.add(makeInputRow(minTimestamp + 2, true, DOUBLE_COL, 2.2)); - index.add(makeInputRow(minTimestamp + 3, true, DOUBLE_COL, 3.3)); - index.add(makeInputRow(minTimestamp + 4, true, DOUBLE_COL, null)); - index.add(makeInputRow(minTimestamp + 5, false, DOUBLE_COL, null)); - - IncrementalIndexCursorFactory cursorFactory = new IncrementalIndexCursorFactory(index); - try (final CursorHolder cursorHolder = cursorFactory.makeCursorHolder(CursorBuildSpec.FULL_SCAN)) { - Cursor cursor = cursorHolder.asCursor(); - final DimensionSpec dimensionSpec = new DefaultDimensionSpec(DOUBLE_COL, DOUBLE_COL, ColumnType.DOUBLE); - ColumnSelectorFactory columnSelectorFactory = cursor.getColumnSelectorFactory(); - - ColumnValueSelector valueSelector = columnSelectorFactory.makeColumnValueSelector(DOUBLE_COL); - DimensionSelector dimensionSelector = columnSelectorFactory.makeDimensionSelector(dimensionSpec); - Assert.assertEquals(1.1, valueSelector.getObject()); - Assert.assertEquals(1.1, valueSelector.getDouble(), 0.0); - Assert.assertFalse(valueSelector.isNull()); - Assert.assertEquals(1, dimensionSelector.getRow().size()); - Assert.assertEquals("1.1", dimensionSelector.lookupName(dimensionSelector.getRow().get(0))); - Assert.assertEquals("1.1", dimensionSelector.getObject()); - - cursor.advance(); - Assert.assertEquals(2.2, valueSelector.getObject()); - Assert.assertEquals(2.2, valueSelector.getDouble(), 0.0); - Assert.assertFalse(valueSelector.isNull()); - Assert.assertEquals(1, dimensionSelector.getRow().size()); - Assert.assertEquals("2.2", dimensionSelector.lookupName(dimensionSelector.getRow().get(0))); - Assert.assertEquals("2.2", dimensionSelector.getObject()); - - cursor.advance(); - Assert.assertEquals(3.3, valueSelector.getObject()); - Assert.assertEquals(3.3, valueSelector.getDouble(), 0.0); - Assert.assertFalse(valueSelector.isNull()); - Assert.assertEquals(1, dimensionSelector.getRow().size()); - Assert.assertEquals("3.3", dimensionSelector.lookupName(dimensionSelector.getRow().get(0))); - Assert.assertEquals("3.3", dimensionSelector.getObject()); - - cursor.advance(); - Assert.assertNull(valueSelector.getObject()); - Assert.assertTrue(valueSelector.isNull()); - Assert.assertEquals(1, dimensionSelector.getRow().size()); - Assert.assertNull(dimensionSelector.lookupName(dimensionSelector.getRow().get(0))); - Assert.assertNull(dimensionSelector.getObject()); - - cursor.advance(); - Assert.assertNull(valueSelector.getObject()); - Assert.assertTrue(valueSelector.isNull()); - Assert.assertEquals(1, dimensionSelector.getRow().size()); - Assert.assertNull(dimensionSelector.lookupName(dimensionSelector.getRow().get(0))); - Assert.assertNull(dimensionSelector.getObject()); - } - } - - @Test - public void testNestedColumnIndexerSchemaDiscoveryRootStringArray() throws IndexSizeExceededException - { - long minTimestamp = System.currentTimeMillis(); - IncrementalIndex index = makeIncrementalIndex(minTimestamp); - - index.add(makeInputRow(minTimestamp + 1, true, STRING_ARRAY_COL, new String[]{"a"})); - index.add(makeInputRow(minTimestamp + 2, true, STRING_ARRAY_COL, new Object[]{"b", "c"})); - index.add(makeInputRow(minTimestamp + 3, true, STRING_ARRAY_COL, ImmutableList.of("d", "e"))); - index.add(makeInputRow(minTimestamp + 4, true, STRING_ARRAY_COL, null)); - index.add(makeInputRow(minTimestamp + 5, false, STRING_ARRAY_COL, null)); - - IncrementalIndexCursorFactory cursorFactory = new IncrementalIndexCursorFactory(index); - try (final CursorHolder cursorHolder = cursorFactory.makeCursorHolder(CursorBuildSpec.FULL_SCAN)) { - Cursor cursor = cursorHolder.asCursor(); - final DimensionSpec dimensionSpec = new DefaultDimensionSpec( - STRING_ARRAY_COL, - STRING_ARRAY_COL, - ColumnType.STRING - ); - ColumnSelectorFactory columnSelectorFactory = cursor.getColumnSelectorFactory(); - - ColumnValueSelector valueSelector = columnSelectorFactory.makeColumnValueSelector(STRING_ARRAY_COL); - Assert.assertThrows( - UnsupportedOperationException.class, - () -> cursor.getColumnSelectorFactory().makeDimensionSelector(dimensionSpec) - ); - Assert.assertArrayEquals(new Object[]{"a"}, (Object[]) valueSelector.getObject()); - - cursor.advance(); - Assert.assertArrayEquals(new Object[]{"b", "c"}, (Object[]) valueSelector.getObject()); - - cursor.advance(); - Assert.assertArrayEquals(new Object[]{"d", "e"}, (Object[]) valueSelector.getObject()); - - cursor.advance(); - Assert.assertNull(valueSelector.getObject()); - - cursor.advance(); - Assert.assertNull(valueSelector.getObject()); - } - } - - @Test - public void testNestedColumnIndexerSchemaDiscoveryRootVariant() throws IndexSizeExceededException - { - long minTimestamp = System.currentTimeMillis(); - IncrementalIndex index = makeIncrementalIndex(minTimestamp); - - index.add(makeInputRow(minTimestamp + 1, true, VARIANT_COL, "a")); - index.add(makeInputRow(minTimestamp + 2, true, VARIANT_COL, 2L)); - index.add(makeInputRow(minTimestamp + 3, true, VARIANT_COL, 3.3)); - index.add(makeInputRow(minTimestamp + 4, true, VARIANT_COL, null)); - index.add(makeInputRow(minTimestamp + 5, false, VARIANT_COL, null)); - - IncrementalIndexCursorFactory cursorFactory = new IncrementalIndexCursorFactory(index); - try (final CursorHolder cursorHolder = cursorFactory.makeCursorHolder(CursorBuildSpec.FULL_SCAN)) { - Cursor cursor = cursorHolder.asCursor(); - final DimensionSpec dimensionSpec = new DefaultDimensionSpec(VARIANT_COL, VARIANT_COL, ColumnType.STRING); - ColumnSelectorFactory columnSelectorFactory = cursor.getColumnSelectorFactory(); - - ColumnValueSelector valueSelector = columnSelectorFactory.makeColumnValueSelector(VARIANT_COL); - DimensionSelector dimensionSelector = cursor.getColumnSelectorFactory().makeDimensionSelector(dimensionSpec); - Assert.assertEquals("a", valueSelector.getObject()); - Assert.assertEquals("a", dimensionSelector.getObject()); - - cursor.advance(); - Assert.assertEquals(2L, valueSelector.getObject()); - Assert.assertFalse(valueSelector.isNull()); - Assert.assertEquals("2", dimensionSelector.getObject()); - - cursor.advance(); - Assert.assertEquals(3.3, valueSelector.getObject()); - Assert.assertFalse(valueSelector.isNull()); - Assert.assertEquals("3.3", dimensionSelector.getObject()); - - cursor.advance(); - Assert.assertNull(valueSelector.getObject()); - Assert.assertNull(dimensionSelector.getObject()); - - cursor.advance(); - Assert.assertNull(valueSelector.getObject()); - Assert.assertNull(dimensionSelector.getObject()); - } - } - - @Test - public void testNestedColumnIndexerSchemaDiscoveryNested() throws IndexSizeExceededException - { - long minTimestamp = System.currentTimeMillis(); - IncrementalIndex index = makeIncrementalIndex(minTimestamp); - - index.add(makeInputRow(minTimestamp + 1, true, NESTED_COL, "a")); - index.add(makeInputRow(minTimestamp + 2, true, NESTED_COL, 2L)); - index.add(makeInputRow(minTimestamp + 3, true, NESTED_COL, ImmutableMap.of("x", 1.1, "y", 2L))); - index.add(makeInputRow(minTimestamp + 4, true, NESTED_COL, null)); - index.add(makeInputRow(minTimestamp + 5, false, NESTED_COL, null)); - - IncrementalIndexCursorFactory cursorFactory = new IncrementalIndexCursorFactory(index); - try (final CursorHolder cursorHolder = cursorFactory.makeCursorHolder(CursorBuildSpec.FULL_SCAN)) { - Cursor cursor = cursorHolder.asCursor(); - final DimensionSpec dimensionSpec = new DefaultDimensionSpec(NESTED_COL, NESTED_COL, ColumnType.STRING); - ColumnSelectorFactory columnSelectorFactory = cursor.getColumnSelectorFactory(); - - ColumnValueSelector valueSelector = columnSelectorFactory.makeColumnValueSelector(NESTED_COL); - Assert.assertThrows( - UnsupportedOperationException.class, - () -> cursor.getColumnSelectorFactory().makeDimensionSelector(dimensionSpec) - ); - Assert.assertEquals(StructuredData.wrap("a"), valueSelector.getObject()); - - cursor.advance(); - Assert.assertEquals(StructuredData.wrap(2L), valueSelector.getObject()); - - cursor.advance(); - Assert.assertEquals(StructuredData.wrap(ImmutableMap.of("x", 1.1, "y", 2L)), valueSelector.getObject()); - - cursor.advance(); - Assert.assertNull(valueSelector.getObject()); - - cursor.advance(); - Assert.assertNull(valueSelector.getObject()); - } - } - - @Nonnull - private static IncrementalIndex makeIncrementalIndex(long minTimestamp) - { - IncrementalIndex index = new OnheapIncrementalIndex.Builder() - .setIndexSchema( - IncrementalIndexSchema.builder() - .withMinTimestamp(minTimestamp) - .withTimestampSpec(new TimestampSpec(TIME_COL, "millis", null)) - .withDimensionsSpec( - DimensionsSpec.builder() - .useSchemaDiscovery(true) - .build() - ) - .withRollup(false) - .build() - ) - .setMaxRowCount(1000) - .build(); - return index; - } - - private MapBasedInputRow makeInputRow( - long timestamp, - boolean explicitNull, - Object... kv - ) - { - final Map event = TestHelper.makeMap(explicitNull, kv); - event.put("time", timestamp); - return new MapBasedInputRow(timestamp, ImmutableList.copyOf(event.keySet()), event); - } -} diff --git a/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierV4Test.java b/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierV4Test.java index 77568f0355ff..6e27bf49fbf0 100644 --- a/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierV4Test.java +++ b/processing/src/test/java/org/apache/druid/segment/nested/NestedDataColumnSupplierV4Test.java @@ -22,53 +22,31 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; -import com.google.common.util.concurrent.Futures; -import com.google.common.util.concurrent.ListenableFuture; -import com.google.common.util.concurrent.ListeningExecutorService; -import com.google.common.util.concurrent.MoreExecutors; import org.apache.druid.collections.bitmap.RoaringBitmapFactory; import org.apache.druid.guice.BuiltInTypesModule; -import org.apache.druid.java.util.common.concurrent.Execs; import org.apache.druid.java.util.common.io.Closer; -import org.apache.druid.java.util.common.io.smoosh.FileSmoosher; import org.apache.druid.java.util.common.io.smoosh.SmooshedFileMapper; -import org.apache.druid.java.util.common.io.smoosh.SmooshedWriter; import org.apache.druid.query.DefaultBitmapResultFactory; import org.apache.druid.query.filter.SelectorPredicateFactory; import org.apache.druid.query.filter.StringPredicateDruidPredicateFactory; import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector; import org.apache.druid.segment.ColumnValueSelector; import org.apache.druid.segment.DimensionSelector; -import org.apache.druid.segment.IndexSpec; -import org.apache.druid.segment.IndexableAdapter; -import org.apache.druid.segment.NestedDataColumnHandlerV4; -import org.apache.druid.segment.NestedDataColumnIndexerV4; -import org.apache.druid.segment.NestedDataColumnSchema; import org.apache.druid.segment.ObjectColumnSelector; import org.apache.druid.segment.QueryableIndex; import org.apache.druid.segment.SimpleAscendingOffset; import org.apache.druid.segment.TestHelper; -import org.apache.druid.segment.column.ColumnBuilder; -import org.apache.druid.segment.column.ColumnCapabilities; -import org.apache.druid.segment.column.ColumnConfig; import org.apache.druid.segment.column.ColumnHolder; import org.apache.druid.segment.column.ColumnIndexSupplier; import org.apache.druid.segment.column.ColumnType; -import org.apache.druid.segment.column.TypeStrategy; import org.apache.druid.segment.index.BitmapColumnIndex; import org.apache.druid.segment.index.semantic.DruidPredicateIndexes; import org.apache.druid.segment.index.semantic.NullValueIndex; import org.apache.druid.segment.index.semantic.StringValueSetIndexes; -import org.apache.druid.segment.serde.ColumnPartSerde; -import org.apache.druid.segment.serde.ColumnSerializerUtils; -import org.apache.druid.segment.serde.ComplexColumnPartSerde; -import org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory; -import org.apache.druid.segment.writeout.TmpFileSegmentWriteOutMediumFactory; import org.apache.druid.testing.InitializedNullHandlingTest; import org.apache.druid.utils.CompressionUtils; import org.junit.After; import org.junit.Assert; -import org.junit.Before; import org.junit.BeforeClass; import org.junit.Rule; import org.junit.Test; @@ -78,16 +56,9 @@ import java.io.File; import java.io.IOException; import java.nio.ByteBuffer; -import java.util.ArrayList; -import java.util.Collection; import java.util.List; import java.util.Map; -import java.util.SortedMap; -import java.util.TreeMap; import java.util.TreeSet; -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.atomic.AtomicReference; public class NestedDataColumnSupplierV4Test extends InitializedNullHandlingTest { @@ -144,147 +115,12 @@ public static void staticSetup() BuiltInTypesModule.registerHandlersAndSerde(); } - @Before - public void setup() throws IOException - { - final String fileNameBase = "test"; - final String arrayFileNameBase = "array"; - fileMapper = smooshify(fileNameBase, tempFolder.newFolder(), data); - baseBuffer = fileMapper.mapFile(fileNameBase); - arrayFileMapper = smooshify(arrayFileNameBase, tempFolder.newFolder(), arrayTestData); - arrayBaseBuffer = arrayFileMapper.mapFile(arrayFileNameBase); - } - - private SmooshedFileMapper smooshify( - String fileNameBase, - File tmpFile, - List> data - ) - throws IOException - { - SegmentWriteOutMediumFactory writeOutMediumFactory = TmpFileSegmentWriteOutMediumFactory.instance(); - try (final FileSmoosher smoosher = new FileSmoosher(tmpFile)) { - NestedDataColumnSerializerV4 serializer = new NestedDataColumnSerializerV4( - fileNameBase, - IndexSpec.DEFAULT, - writeOutMediumFactory.makeSegmentWriteOutMedium(tempFolder.newFolder()), - closer - ); - - NestedDataColumnIndexerV4 indexer = new NestedDataColumnIndexerV4(); - for (Object o : data) { - indexer.processRowValsToUnsortedEncodedKeyComponent(o, false); - } - SortedMap sortedFields = new TreeMap<>(); - - IndexableAdapter.NestedColumnMergable mergable = closer.register( - new IndexableAdapter.NestedColumnMergable( - indexer.getSortedValueLookups(), - indexer.getFieldTypeInfo(), - true, - false, - null - ) - ); - SortedValueDictionary globalDictionarySortedCollector = mergable.getValueDictionary(); - mergable.mergeFieldsInto(sortedFields); - - serializer.open(); - serializer.serializeFields(sortedFields); - serializer.serializeDictionaries( - globalDictionarySortedCollector.getSortedStrings(), - globalDictionarySortedCollector.getSortedLongs(), - globalDictionarySortedCollector.getSortedDoubles() - ); - - SettableSelector valueSelector = new SettableSelector(); - for (Object o : data) { - valueSelector.setObject(StructuredData.wrap(o)); - serializer.serialize(valueSelector); - } - - try (SmooshedWriter writer = smoosher.addWithSmooshedWriter(fileNameBase, serializer.getSerializedSize())) { - serializer.writeTo(writer, smoosher); - } - smoosher.close(); - return closer.register(SmooshedFileMapper.load(tmpFile)); - } - } - @After public void teardown() throws IOException { closer.close(); } - @Test - public void testBasicFunctionality() throws IOException - { - ColumnBuilder bob = new ColumnBuilder(); - bob.setFileMapper(fileMapper); - ComplexColumnPartSerde partSerde = ComplexColumnPartSerde.createDeserializer(NestedDataComplexTypeSerde.TYPE_NAME); - ColumnPartSerde.Deserializer deserializer = partSerde.getDeserializer(); - deserializer.read(baseBuffer, bob, ColumnConfig.SELECTION_SIZE, null); - final ColumnHolder holder = bob.build(); - final ColumnCapabilities capabilities = holder.getCapabilities(); - Assert.assertEquals(ColumnType.NESTED_DATA, capabilities.toColumnType()); - Assert.assertTrue(holder.getColumnFormat() instanceof NestedDataComplexTypeSerde.NestedColumnFormatV4); - Assert.assertTrue(holder.getColumnFormat().getColumnHandler("test") instanceof NestedDataColumnHandlerV4); - NestedDataColumnSchema schema = (NestedDataColumnSchema) holder.getColumnFormat().getColumnSchema("test"); - Assert.assertEquals(4, schema.getFormatVersion()); - try (NestedDataComplexColumn column = (NestedDataComplexColumn) holder.getColumn()) { - smokeTest(column); - } - } - - @Test - public void testConcurrency() throws ExecutionException, InterruptedException - { - // if this test ever starts being to be a flake, there might be thread safety issues - ColumnBuilder bob = new ColumnBuilder(); - bob.setFileMapper(fileMapper); - NestedDataColumnSupplierV4 supplier = NestedDataColumnSupplierV4.read( - baseBuffer, - bob, - ColumnConfig.SELECTION_SIZE, - ColumnSerializerUtils.SMILE_MAPPER - ); - final String expectedReason = "none"; - final AtomicReference failureReason = new AtomicReference<>(expectedReason); - - final int threads = 10; - ListeningExecutorService executorService = MoreExecutors.listeningDecorator( - Execs.multiThreaded(threads, "NestedDataColumnSupplierV4Test-%d") - ); - try { - Collection> futures = new ArrayList<>(threads); - final CountDownLatch threadsStartLatch = new CountDownLatch(1); - for (int i = 0; i < threads; ++i) { - futures.add( - executorService.submit(() -> { - try { - threadsStartLatch.await(); - for (int iter = 0; iter < 5000; iter++) { - try (NestedDataComplexColumn column = (NestedDataComplexColumn) supplier.get()) { - smokeTest(column); - } - } - } - catch (Throwable ex) { - failureReason.set(ex.getMessage()); - } - }) - ); - } - threadsStartLatch.countDown(); - Futures.allAsList(futures).get(); - Assert.assertEquals(expectedReason, failureReason.get()); - } - finally { - executorService.shutdownNow(); - } - } - @Test public void testLegacyV3ReaderFormat() throws IOException { @@ -358,6 +194,7 @@ public void testLegacyV4ReaderFormat() throws IOException Assert.assertTrue(indexForValue.computeBitmapResult(resultFactory, false).get(0)); } } + private void smokeTest(NestedDataComplexColumn column) throws IOException { SimpleAscendingOffset offset = new SimpleAscendingOffset(data.size()); @@ -440,6 +277,7 @@ private void smokeTest(NestedDataComplexColumn column) throws IOException offset.increment(); } } + private void testPath( Map row, int rowNumber, @@ -537,56 +375,4 @@ public void inspectRuntimeShape(RuntimeShapeInspector inspector) } } - - private static class OnlyPositionalReadsTypeStrategy implements TypeStrategy - { - private final TypeStrategy delegate; - - private OnlyPositionalReadsTypeStrategy(TypeStrategy delegate) - { - this.delegate = delegate; - } - - @Override - public int estimateSizeBytes(T value) - { - return delegate.estimateSizeBytes(value); - } - - @Override - public T read(ByteBuffer buffer) - { - throw new IllegalStateException("non-positional read"); - } - - @Override - public boolean readRetainsBufferReference() - { - return delegate.readRetainsBufferReference(); - } - - @Override - public int write(ByteBuffer buffer, T value, int maxSizeBytes) - { - return delegate.write(buffer, value, maxSizeBytes); - } - - @Override - public T read(ByteBuffer buffer, int offset) - { - return delegate.read(buffer, offset); - } - - @Override - public int write(ByteBuffer buffer, int offset, T value, int maxSizeBytes) - { - return delegate.write(buffer, offset, value, maxSizeBytes); - } - - @Override - public int compare(Object o1, Object o2) - { - return delegate.compare(o1, o2); - } - } } From 511d747ad05f242000dee9bdded4f47cb670d5dd Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Tue, 14 Jan 2025 14:35:47 -0800 Subject: [PATCH 2/4] fixes --- .../msq/indexing/MSQCompactionRunnerTest.java | 2 +- .../segment/NestedDataColumnSchemaTest.java | 25 ++----------------- 2 files changed, 3 insertions(+), 24 deletions(-) diff --git a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/MSQCompactionRunnerTest.java b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/MSQCompactionRunnerTest.java index 31c578407aed..6df649403c2d 100644 --- a/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/MSQCompactionRunnerTest.java +++ b/extensions-core/multi-stage-query/src/test/java/org/apache/druid/msq/indexing/MSQCompactionRunnerTest.java @@ -102,7 +102,7 @@ public class MSQCompactionRunnerTest private static final StringDimensionSchema STRING_DIMENSION = new StringDimensionSchema("string_dim", null, false); private static final StringDimensionSchema MV_STRING_DIMENSION = new StringDimensionSchema("mv_string_dim", null, null); private static final LongDimensionSchema LONG_DIMENSION = new LongDimensionSchema("long_dim"); - private static final NestedDataColumnSchema NESTED_DIMENSION = new NestedDataColumnSchema("nested_dim", 4); + private static final NestedDataColumnSchema NESTED_DIMENSION = new NestedDataColumnSchema("nested_dim", 5); private static final AutoTypeColumnSchema AUTO_DIMENSION = new AutoTypeColumnSchema("auto_dim", null); private static final List DIMENSIONS = ImmutableList.of( STRING_DIMENSION, diff --git a/processing/src/test/java/org/apache/druid/segment/NestedDataColumnSchemaTest.java b/processing/src/test/java/org/apache/druid/segment/NestedDataColumnSchemaTest.java index 847f8a9d1965..2ca05f60d46e 100644 --- a/processing/src/test/java/org/apache/druid/segment/NestedDataColumnSchemaTest.java +++ b/processing/src/test/java/org/apache/druid/segment/NestedDataColumnSchemaTest.java @@ -30,9 +30,7 @@ public class NestedDataColumnSchemaTest { private static final DefaultColumnFormatConfig DEFAULT_CONFIG = new DefaultColumnFormatConfig(null, null); - private static final DefaultColumnFormatConfig DEFAULT_CONFIG_V4 = new DefaultColumnFormatConfig(4, null); private static final ObjectMapper MAPPER; - private static final ObjectMapper MAPPER_V4; static { MAPPER = new DefaultObjectMapper(); @@ -42,22 +40,12 @@ public class NestedDataColumnSchemaTest DEFAULT_CONFIG ) ); - - MAPPER_V4 = new DefaultObjectMapper(); - MAPPER_V4.setInjectableValues( - new InjectableValues.Std().addValue( - DefaultColumnFormatConfig.class, - DEFAULT_CONFIG_V4 - ) - ); } @Test public void testSerdeRoundTrip() throws JsonProcessingException { - final NestedDataColumnSchema v4 = new NestedDataColumnSchema("test", 4); final NestedDataColumnSchema v5 = new NestedDataColumnSchema("test", 5); - Assert.assertEquals(v4, MAPPER.readValue(MAPPER.writeValueAsString(v4), NestedDataColumnSchema.class)); Assert.assertEquals(v5, MAPPER.readValue(MAPPER.writeValueAsString(v5), NestedDataColumnSchema.class)); } @@ -69,20 +57,11 @@ public void testSerdeDefault() throws JsonProcessingException Assert.assertEquals(new NestedDataColumnSchema("test", 5), andBack); } - @Test - public void testSerdeSystemDefault() throws JsonProcessingException - { - final String there = "{\"type\":\"json\", \"name\":\"test\"}"; - NestedDataColumnSchema andBack = MAPPER_V4.readValue(there, NestedDataColumnSchema.class); - Assert.assertEquals(new NestedDataColumnSchema("test", 4), andBack); - } - @Test public void testSerdeOverride() throws JsonProcessingException { - final String there = "{\"type\":\"json\", \"name\":\"test\",\"formatVersion\":4}"; - NestedDataColumnSchema andBack = MAPPER.readValue(there, NestedDataColumnSchema.class); - Assert.assertEquals(new NestedDataColumnSchema("test", 4), andBack); + Throwable t = Assert.assertThrows(DruidException.class, () -> new NestedDataColumnSchema("test", 4)); + Assert.assertEquals("Unsupported nested column format version[4]", t.getMessage()); } @Test From 8bc2859491cb1f654946fa40b97a06839f7b7d86 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Tue, 14 Jan 2025 16:38:03 -0800 Subject: [PATCH 3/4] fix test --- .../apache/druid/segment/DefaultColumnFormatsConfigTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/processing/src/test/java/org/apache/druid/segment/DefaultColumnFormatsConfigTest.java b/processing/src/test/java/org/apache/druid/segment/DefaultColumnFormatsConfigTest.java index c4de7e41f2b1..42a819037f14 100644 --- a/processing/src/test/java/org/apache/druid/segment/DefaultColumnFormatsConfigTest.java +++ b/processing/src/test/java/org/apache/druid/segment/DefaultColumnFormatsConfigTest.java @@ -45,11 +45,11 @@ public void testDefaultsSerde() throws JsonProcessingException @Test public void testDefaultsSerdeOverride() throws JsonProcessingException { - DefaultColumnFormatConfig defaultColumnFormatConfig = new DefaultColumnFormatConfig(4, "ARRAY"); + DefaultColumnFormatConfig defaultColumnFormatConfig = new DefaultColumnFormatConfig(5, "ARRAY"); String there = MAPPER.writeValueAsString(defaultColumnFormatConfig); DefaultColumnFormatConfig andBack = MAPPER.readValue(there, DefaultColumnFormatConfig.class); Assert.assertEquals(defaultColumnFormatConfig, andBack); - Assert.assertEquals(4, (int) andBack.getNestedColumnFormatVersion()); + Assert.assertEquals(5, (int) andBack.getNestedColumnFormatVersion()); Assert.assertEquals(DimensionSchema.MultiValueHandling.ARRAY.toString(), andBack.getStringMultiValueHandlingMode()); } From 0a1bab46b709b221f6a5e5bc7681003c1c263410 Mon Sep 17 00:00:00 2001 From: Clint Wylie Date: Thu, 23 Jan 2025 04:01:47 -0800 Subject: [PATCH 4/4] log.warn on invalid nested column format version instead of failure --- .../segment/DefaultColumnFormatConfig.java | 7 +++--- .../segment/NestedDataColumnSchemaTest.java | 22 ------------------- 2 files changed, 4 insertions(+), 25 deletions(-) diff --git a/processing/src/main/java/org/apache/druid/segment/DefaultColumnFormatConfig.java b/processing/src/main/java/org/apache/druid/segment/DefaultColumnFormatConfig.java index 61e386db9ae0..0149deba1538 100644 --- a/processing/src/main/java/org/apache/druid/segment/DefaultColumnFormatConfig.java +++ b/processing/src/main/java/org/apache/druid/segment/DefaultColumnFormatConfig.java @@ -23,6 +23,7 @@ import com.fasterxml.jackson.annotation.JsonProperty; import org.apache.druid.data.input.impl.DimensionSchema; import org.apache.druid.error.DruidException; +import org.apache.druid.java.util.common.logger.Logger; import javax.annotation.Nullable; import java.util.Arrays; @@ -30,13 +31,13 @@ public class DefaultColumnFormatConfig { + private static final Logger LOG = new Logger(DefaultColumnFormatConfig.class); + public static void validateNestedFormatVersion(@Nullable Integer formatVersion) { if (formatVersion != null) { if (formatVersion != 5) { - throw DruidException.forPersona(DruidException.Persona.USER) - .ofCategory(DruidException.Category.INVALID_INPUT) - .build("Unsupported nested column format version[%s]", formatVersion); + LOG.warn("Unsupported nested column format version[%s], using default version instead", formatVersion); } } } diff --git a/processing/src/test/java/org/apache/druid/segment/NestedDataColumnSchemaTest.java b/processing/src/test/java/org/apache/druid/segment/NestedDataColumnSchemaTest.java index 2ca05f60d46e..2dd5455ceb5a 100644 --- a/processing/src/test/java/org/apache/druid/segment/NestedDataColumnSchemaTest.java +++ b/processing/src/test/java/org/apache/druid/segment/NestedDataColumnSchemaTest.java @@ -22,7 +22,6 @@ import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.InjectableValues; import com.fasterxml.jackson.databind.ObjectMapper; -import org.apache.druid.error.DruidException; import org.apache.druid.jackson.DefaultObjectMapper; import org.junit.Assert; import org.junit.Test; @@ -56,25 +55,4 @@ public void testSerdeDefault() throws JsonProcessingException NestedDataColumnSchema andBack = MAPPER.readValue(there, NestedDataColumnSchema.class); Assert.assertEquals(new NestedDataColumnSchema("test", 5), andBack); } - - @Test - public void testSerdeOverride() throws JsonProcessingException - { - Throwable t = Assert.assertThrows(DruidException.class, () -> new NestedDataColumnSchema("test", 4)); - Assert.assertEquals("Unsupported nested column format version[4]", t.getMessage()); - } - - @Test - public void testVersionTooSmall() - { - Throwable t = Assert.assertThrows(DruidException.class, () -> new NestedDataColumnSchema("test", 3)); - Assert.assertEquals("Unsupported nested column format version[3]", t.getMessage()); - } - - @Test - public void testVersionTooBig() - { - Throwable t = Assert.assertThrows(DruidException.class, () -> new NestedDataColumnSchema("test", 6)); - Assert.assertEquals("Unsupported nested column format version[6]", t.getMessage()); - } }