Skip to content

Commit

Permalink
[Star tree] Add date field rounding support in star tree (#15249)
Browse files Browse the repository at this point in the history
---------

Signed-off-by: Bharathwaj G <[email protected]>
(cherry picked from commit 1e49aa8)
Signed-off-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
  • Loading branch information
github-actions[bot] committed Oct 14, 2024
1 parent 9950cba commit 33404fe
Show file tree
Hide file tree
Showing 27 changed files with 1,710 additions and 259 deletions.

Large diffs are not rendered by default.

18 changes: 9 additions & 9 deletions server/src/main/java/org/opensearch/common/Rounding.java
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ public enum DateTimeUnit {
WEEK_OF_WEEKYEAR((byte) 1, "week", IsoFields.WEEK_OF_WEEK_BASED_YEAR, true, TimeUnit.DAYS.toMillis(7)) {
private final long extraLocalOffsetLookup = TimeUnit.DAYS.toMillis(7);

long roundFloor(long utcMillis) {
public long roundFloor(long utcMillis) {
return DateUtils.roundWeekOfWeekYear(utcMillis);
}

Expand All @@ -108,7 +108,7 @@ long extraLocalOffsetLookup() {
YEAR_OF_CENTURY((byte) 2, "year", ChronoField.YEAR_OF_ERA, false, 12) {
private final long extraLocalOffsetLookup = TimeUnit.DAYS.toMillis(366);

long roundFloor(long utcMillis) {
public long roundFloor(long utcMillis) {
return DateUtils.roundYear(utcMillis);
}

Expand All @@ -119,7 +119,7 @@ long extraLocalOffsetLookup() {
QUARTER_OF_YEAR((byte) 3, "quarter", IsoFields.QUARTER_OF_YEAR, false, 3) {
private final long extraLocalOffsetLookup = TimeUnit.DAYS.toMillis(92);

long roundFloor(long utcMillis) {
public long roundFloor(long utcMillis) {
return DateUtils.roundQuarterOfYear(utcMillis);
}

Expand All @@ -130,7 +130,7 @@ long extraLocalOffsetLookup() {
MONTH_OF_YEAR((byte) 4, "month", ChronoField.MONTH_OF_YEAR, false, 1) {
private final long extraLocalOffsetLookup = TimeUnit.DAYS.toMillis(31);

long roundFloor(long utcMillis) {
public long roundFloor(long utcMillis) {
return DateUtils.roundMonthOfYear(utcMillis);
}

Expand All @@ -139,7 +139,7 @@ long extraLocalOffsetLookup() {
}
},
DAY_OF_MONTH((byte) 5, "day", ChronoField.DAY_OF_MONTH, true, ChronoField.DAY_OF_MONTH.getBaseUnit().getDuration().toMillis()) {
long roundFloor(long utcMillis) {
public long roundFloor(long utcMillis) {
return DateUtils.roundFloor(utcMillis, this.ratio);
}

Expand All @@ -148,7 +148,7 @@ long extraLocalOffsetLookup() {
}
},
HOUR_OF_DAY((byte) 6, "hour", ChronoField.HOUR_OF_DAY, true, ChronoField.HOUR_OF_DAY.getBaseUnit().getDuration().toMillis()) {
long roundFloor(long utcMillis) {
public long roundFloor(long utcMillis) {
return DateUtils.roundFloor(utcMillis, ratio);
}

Expand All @@ -163,7 +163,7 @@ long extraLocalOffsetLookup() {
true,
ChronoField.MINUTE_OF_HOUR.getBaseUnit().getDuration().toMillis()
) {
long roundFloor(long utcMillis) {
public long roundFloor(long utcMillis) {
return DateUtils.roundFloor(utcMillis, ratio);
}

Expand All @@ -178,7 +178,7 @@ long extraLocalOffsetLookup() {
true,
ChronoField.SECOND_OF_MINUTE.getBaseUnit().getDuration().toMillis()
) {
long roundFloor(long utcMillis) {
public long roundFloor(long utcMillis) {
return DateUtils.roundFloor(utcMillis, ratio);
}

Expand Down Expand Up @@ -211,7 +211,7 @@ long extraLocalOffsetLookup() {
* @param utcMillis the milliseconds since the epoch
* @return the rounded down milliseconds since the epoch
*/
abstract long roundFloor(long utcMillis);
public abstract long roundFloor(long utcMillis);

/**
* When looking up {@link LocalTimeOffset} go this many milliseconds
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.index.compositeindex.datacube;

import org.opensearch.index.compositeindex.datacube.startree.utils.date.DateTimeUnitRounding;

import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.TimeUnit;

import static java.util.Collections.unmodifiableMap;

/**
* Enum representing the extended date time units supported for star tree index as part of index mapping.
* The enum values are:
* <ul>
* <li>HALF_HOUR_OF_DAY: Represents half hour of day rounding</li>
* <li>QUARTER_HOUR_OF_DAY: Represents quarter hour of day rounding</li>
* </ul>
* <p>
* The enum also provides a static map of date field units to their corresponding ExtendedDateTimeUnit instances.
*
* @see org.opensearch.common.Rounding.DateTimeUnit for more information on the dateTimeUnit enum and rounding logic.
*
* @opensearch.experimental
*/
public enum DataCubeDateTimeUnit implements DateTimeUnitRounding {
HALF_HOUR_OF_DAY("half-hour") {
@Override
public long roundFloor(long utcMillis) {
return utcMillis - (utcMillis % TimeUnit.MINUTES.toMillis(30));
}
},
QUARTER_HOUR_OF_DAY("quarter-hour") {
@Override
public long roundFloor(long utcMillis) {
return utcMillis - (utcMillis % TimeUnit.MINUTES.toMillis(15));
}
};

public static final Map<String, DataCubeDateTimeUnit> DATE_FIELD_UNITS;
static {
Map<String, DataCubeDateTimeUnit> dateFieldUnits = new HashMap<>();
dateFieldUnits.put("30m", DataCubeDateTimeUnit.HALF_HOUR_OF_DAY);
dateFieldUnits.put("half-hour", DataCubeDateTimeUnit.HALF_HOUR_OF_DAY);
dateFieldUnits.put("15m", DataCubeDateTimeUnit.QUARTER_HOUR_OF_DAY);
dateFieldUnits.put("quarter-hour", DataCubeDateTimeUnit.QUARTER_HOUR_OF_DAY);
DATE_FIELD_UNITS = unmodifiableMap(dateFieldUnits);
}

private final String shortName;

DataCubeDateTimeUnit(String shortName) {
this.shortName = shortName;
}

/**
* This rounds down the supplied milliseconds since the epoch down to the next unit. In order to retain performance this method
* should be as fast as possible and not try to convert dates to java-time objects if possible
*
* @param utcMillis the milliseconds since the epoch
* @return the rounded down milliseconds since the epoch
*/
@Override
public abstract long roundFloor(long utcMillis);

@Override
public String shortName() {
return shortName;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,21 @@

import org.opensearch.common.Rounding;
import org.opensearch.common.annotation.ExperimentalApi;
import org.opensearch.common.time.DateUtils;
import org.opensearch.core.xcontent.XContentBuilder;
import org.opensearch.index.compositeindex.datacube.startree.utils.date.DateTimeUnitRounding;
import org.opensearch.index.mapper.CompositeDataCubeFieldType;
import org.opensearch.index.mapper.DateFieldMapper;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.function.Consumer;
import java.util.stream.Collectors;

/**
* Date dimension class
Expand All @@ -24,27 +33,78 @@
*/
@ExperimentalApi
public class DateDimension implements Dimension {
private final List<Rounding.DateTimeUnit> calendarIntervals;
private final List<DateTimeUnitRounding> calendarIntervals;
public static final String CALENDAR_INTERVALS = "calendar_intervals";
public static final String DATE = "date";
private final String field;
private final List<DateTimeUnitRounding> sortedCalendarIntervals;
private final DateFieldMapper.Resolution resolution;

public DateDimension(String field, List<Rounding.DateTimeUnit> calendarIntervals) {
public DateDimension(String field, List<DateTimeUnitRounding> calendarIntervals, DateFieldMapper.Resolution resolution) {
this.field = field;
this.calendarIntervals = calendarIntervals;
// Sort from the lowest unit to the highest unit
this.sortedCalendarIntervals = getSortedDateTimeUnits(calendarIntervals);
if (resolution == null) {
this.resolution = DateFieldMapper.Resolution.MILLISECONDS;
} else {
this.resolution = resolution;
}
}

public List<Rounding.DateTimeUnit> getIntervals() {
public List<DateTimeUnitRounding> getIntervals() {
return calendarIntervals;
}

public List<DateTimeUnitRounding> getSortedCalendarIntervals() {
return sortedCalendarIntervals;
}

/**
* Sets the dimension values in sorted order in the provided array starting from the given index.
*
* @param val The value to be set
* @param dimSetter Consumer which sets the dimensions
*/
@Override
public void setDimensionValues(final Long val, final Consumer<Long> dimSetter) {
for (DateTimeUnitRounding dateTimeUnit : sortedCalendarIntervals) {
if (val == null) {
dimSetter.accept(null);
} else {
Long roundedValue = dateTimeUnit.roundFloor(storedDurationSinceEpoch(val));
dimSetter.accept(roundedValue);
}
}
}

/**
* Converts nanoseconds to milliseconds based on the resolution of the field
*/
private long storedDurationSinceEpoch(long nanoSecondsSinceEpoch) {
if (resolution.equals(DateFieldMapper.Resolution.NANOSECONDS)) return DateUtils.toMilliSeconds(nanoSecondsSinceEpoch);
return nanoSecondsSinceEpoch;
}

/**
* Returns the list of fields that represent the dimension
*/
@Override
public List<String> getSubDimensionNames() {
List<String> fields = new ArrayList<>(calendarIntervals.size());
for (DateTimeUnitRounding interval : sortedCalendarIntervals) {
fields.add(field + "_" + interval.shortName());
}
return fields;
}

@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
builder.startObject("date_dimension");
builder.field(CompositeDataCubeFieldType.NAME, this.getField());
builder.field(CompositeDataCubeFieldType.TYPE, DATE);
builder.startArray(CALENDAR_INTERVALS);
for (Rounding.DateTimeUnit interval : calendarIntervals) {
for (DateTimeUnitRounding interval : calendarIntervals) {
builder.value(interval.shortName());
}
builder.endArray();
Expand All @@ -69,4 +129,44 @@ public int hashCode() {
public String getField() {
return field;
}

@Override
public int getNumSubDimensions() {
return calendarIntervals.size();
}

/**
* DateTimeUnit Comparator which tracks dateTimeUnits in sorted order from second unit to year unit
*/
public static class DateTimeUnitComparator implements Comparator<DateTimeUnitRounding> {
public static final Map<String, Integer> ORDERED_DATE_TIME_UNIT = new HashMap<>();

static {
ORDERED_DATE_TIME_UNIT.put(Rounding.DateTimeUnit.SECOND_OF_MINUTE.shortName(), 1);
ORDERED_DATE_TIME_UNIT.put(Rounding.DateTimeUnit.MINUTES_OF_HOUR.shortName(), 2);
ORDERED_DATE_TIME_UNIT.put(DataCubeDateTimeUnit.QUARTER_HOUR_OF_DAY.shortName(), 3);
ORDERED_DATE_TIME_UNIT.put(DataCubeDateTimeUnit.HALF_HOUR_OF_DAY.shortName(), 4);
ORDERED_DATE_TIME_UNIT.put(Rounding.DateTimeUnit.HOUR_OF_DAY.shortName(), 5);
ORDERED_DATE_TIME_UNIT.put(Rounding.DateTimeUnit.DAY_OF_MONTH.shortName(), 6);
ORDERED_DATE_TIME_UNIT.put(Rounding.DateTimeUnit.WEEK_OF_WEEKYEAR.shortName(), 7);
ORDERED_DATE_TIME_UNIT.put(Rounding.DateTimeUnit.MONTH_OF_YEAR.shortName(), 8);
ORDERED_DATE_TIME_UNIT.put(Rounding.DateTimeUnit.QUARTER_OF_YEAR.shortName(), 9);
ORDERED_DATE_TIME_UNIT.put(Rounding.DateTimeUnit.YEAR_OF_CENTURY.shortName(), 10);
}

@Override
public int compare(DateTimeUnitRounding unit1, DateTimeUnitRounding unit2) {
return Integer.compare(
ORDERED_DATE_TIME_UNIT.getOrDefault(unit1.shortName(), Integer.MAX_VALUE),
ORDERED_DATE_TIME_UNIT.getOrDefault(unit2.shortName(), Integer.MAX_VALUE)
);
}
}

/**
* Returns a sorted list of dateTimeUnits based on the DateTimeUnitComparator
*/
public static List<DateTimeUnitRounding> getSortedDateTimeUnits(List<DateTimeUnitRounding> dateTimeUnits) {
return dateTimeUnits.stream().sorted(new DateTimeUnitComparator()).collect(Collectors.toList());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,35 @@
import org.opensearch.common.annotation.ExperimentalApi;
import org.opensearch.core.xcontent.ToXContent;

import java.util.List;
import java.util.function.Consumer;

/**
* Base interface for data-cube dimensions
*
* @opensearch.experimental
*/
@ExperimentalApi
public interface Dimension extends ToXContent {

String getField();

/**
* Returns the number of dimension values that gets added to star tree document
* as part of this dimension
*/
int getNumSubDimensions();

/**
* Sets the dimension values with the consumer
*
* @param value The value to be set
* @param dimSetter Consumer which sets the dimensions
*/
void setDimensionValues(final Long value, final Consumer<Long> dimSetter);

/**
* Returns the list of dimension fields that represent the dimension
*/
List<String> getSubDimensionNames();
}
Loading

0 comments on commit 33404fe

Please sign in to comment.