Skip to content

Commit

Permalink
fix issue with OR filter vector value matcher for proper 3VL behavior…
Browse files Browse the repository at this point in the history
… when inverted (apache#17655)


Fixes a bug with the OR filters vectorized value matcher that causes vector engines processing filter an OR filter under a NOT filter (ex. of the form NOT(x OR y)) to produce incorrect results for null values matched.

This bug is due to incorrectly hard coding the includeUnknown parameter as false for OR filter child vector matchers after the initial filter clause instead of passing it through the function parameter to the underlying matchers.
  • Loading branch information
clintropolis authored and 317brian committed Jan 28, 2025
1 parent 5da657b commit 6a9a1ad
Show file tree
Hide file tree
Showing 3 changed files with 129 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ public ReadableVectorMatch match(final ReadableVectorMatch mask, boolean include
}

currentMask.removeAll(currentMatch);
currentMatch = baseMatchers[i].match(currentMask, false);
currentMatch = baseMatchers[i].match(currentMask, includeUnknown);
retVal.addAll(currentMatch, scratch);

if (currentMatch == currentMask) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@

import com.google.common.base.Function;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import nl.jqno.equalsverifier.EqualsVerifier;
import org.apache.druid.data.input.InputRow;
import org.apache.druid.data.input.impl.DimensionsSpec;
Expand All @@ -34,8 +33,11 @@
import org.apache.druid.query.filter.AndDimFilter;
import org.apache.druid.query.filter.NotDimFilter;
import org.apache.druid.query.filter.SelectorDimFilter;
import org.apache.druid.query.filter.TrueDimFilter;
import org.apache.druid.segment.CursorFactory;
import org.apache.druid.segment.IndexBuilder;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.column.RowSignature;
import org.junit.AfterClass;
import org.junit.Test;
import org.junit.runner.RunWith;
Expand All @@ -57,13 +59,19 @@ public class AndFilterTest extends BaseFilterTest
)
);

private static final RowSignature ROW_SIGNATURE = RowSignature.builder()
.add("dim0", ColumnType.STRING)
.add("dim1", ColumnType.STRING)
.add("dim2", ColumnType.STRING)
.build();

private static final List<InputRow> ROWS = ImmutableList.of(
PARSER.parseBatch(ImmutableMap.of("dim0", "0", "dim1", "0")).get(0),
PARSER.parseBatch(ImmutableMap.of("dim0", "1", "dim1", "0")).get(0),
PARSER.parseBatch(ImmutableMap.of("dim0", "2", "dim1", "0")).get(0),
PARSER.parseBatch(ImmutableMap.of("dim0", "3", "dim1", "0")).get(0),
PARSER.parseBatch(ImmutableMap.of("dim0", "4", "dim1", "0")).get(0),
PARSER.parseBatch(ImmutableMap.of("dim0", "5", "dim1", "0")).get(0)
makeSchemaRow(PARSER, ROW_SIGNATURE, "0", "0", "a"),
makeSchemaRow(PARSER, ROW_SIGNATURE, "1", "0", null),
makeSchemaRow(PARSER, ROW_SIGNATURE, "2", "0", "b"),
makeSchemaRow(PARSER, ROW_SIGNATURE, "3", "0", null),
makeSchemaRow(PARSER, ROW_SIGNATURE, "4", "0", "c"),
makeSchemaRow(PARSER, ROW_SIGNATURE, "5", "0", null)
);

public AndFilterTest(
Expand Down Expand Up @@ -177,6 +185,30 @@ public void testNotAnd()
);
}

@Test
public void testNotAndWithNulls()
{
assertFilterMatches(
new AndDimFilter(
ImmutableList.of(
TrueDimFilter.instance(),
new SelectorDimFilter("dim2", "c", null)
)
),
ImmutableList.of("4")
);
assertFilterMatches(
new NotDimFilter(
new AndDimFilter(ImmutableList.of(
TrueDimFilter.instance(),
new SelectorDimFilter("dim2", "c", null)
)
)
),
ImmutableList.of("0", "2")
);
}

@Test
public void test_equals()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@

import com.google.common.base.Function;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import nl.jqno.equalsverifier.EqualsVerifier;
import org.apache.druid.data.input.InputRow;
Expand All @@ -40,6 +39,8 @@
import org.apache.druid.query.filter.TrueDimFilter;
import org.apache.druid.segment.CursorFactory;
import org.apache.druid.segment.IndexBuilder;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.column.RowSignature;
import org.junit.AfterClass;
import org.junit.Test;
import org.junit.runner.RunWith;
Expand All @@ -60,14 +61,19 @@ public class OrFilterTest extends BaseFilterTest
DimensionsSpec.EMPTY
)
);
private static final RowSignature ROW_SIGNATURE = RowSignature.builder()
.add("dim0", ColumnType.STRING)
.add("dim1", ColumnType.STRING)
.add("dim2", ColumnType.STRING)
.build();

private static final List<InputRow> ROWS = ImmutableList.of(
PARSER.parseBatch(ImmutableMap.of("dim0", "0", "dim1", "0")).get(0),
PARSER.parseBatch(ImmutableMap.of("dim0", "1", "dim1", "0")).get(0),
PARSER.parseBatch(ImmutableMap.of("dim0", "2", "dim1", "0")).get(0),
PARSER.parseBatch(ImmutableMap.of("dim0", "3", "dim1", "0")).get(0),
PARSER.parseBatch(ImmutableMap.of("dim0", "4", "dim1", "0")).get(0),
PARSER.parseBatch(ImmutableMap.of("dim0", "5", "dim1", "0")).get(0)
makeSchemaRow(PARSER, ROW_SIGNATURE, "0", "0", "a"),
makeSchemaRow(PARSER, ROW_SIGNATURE, "1", "0", null),
makeSchemaRow(PARSER, ROW_SIGNATURE, "2", "0", "b"),
makeSchemaRow(PARSER, ROW_SIGNATURE, "3", "0", null),
makeSchemaRow(PARSER, ROW_SIGNATURE, "4", "0", "c"),
makeSchemaRow(PARSER, ROW_SIGNATURE, "5", "0", null)
);

public OrFilterTest(
Expand Down Expand Up @@ -152,6 +158,17 @@ public void testTwoFilterFirstMatchesNoneSecondMatchesAll()
),
ImmutableList.of("0", "1", "2", "3", "4", "5")
);
assertFilterMatches(
NotDimFilter.of(
new OrDimFilter(
ImmutableList.of(
new SelectorDimFilter("dim0", "7", null),
new SelectorDimFilter("dim1", "0", null)
)
)
),
ImmutableList.of()
);
}

@Test
Expand Down Expand Up @@ -208,6 +225,17 @@ public void testTwoFilterFirstMatchesSomeSecondMatchesNone()
),
ImmutableList.of("3")
);
assertFilterMatches(
NotDimFilter.of(
new OrDimFilter(
ImmutableList.of(
new SelectorDimFilter("dim0", "3", null),
new SelectorDimFilter("dim1", "7", null)
)
)
),
ImmutableList.of("0", "1", "2", "4", "5")
);
}

@Test
Expand Down Expand Up @@ -236,6 +264,18 @@ public void testTwoFilterFirstMatchesNoneSecondMatchesNone()
),
ImmutableList.of()
);

assertFilterMatches(
NotDimFilter.of(
new OrDimFilter(
ImmutableList.of(
new SelectorDimFilter("dim1", "7", null),
new SelectorDimFilter("dim0", "7", null)
)
)
),
ImmutableList.of("0", "1", "2", "3", "4", "5")
);
}

@Test
Expand All @@ -254,6 +294,48 @@ public void testThreeFilterFirstMatchesSomeSecondLiterallyTrueThirdMatchesNone()
),
ImmutableList.of("0", "1", "2", "4", "5")
);
assertFilterMatches(
NotDimFilter.of(
new AndDimFilter(
new InDimFilter("dim0", ImmutableSet.of("0", "1", "2", "4", "5")),
new OrDimFilter(
ImmutableList.of(
new SelectorDimFilter("dim0", "4", null),
TrueDimFilter.instance(),
new SelectorDimFilter("dim0", "7", null)
)
)
)
),
ImmutableList.of("3")
);
}

@Test
public void testNotOrWithNulls()
{
assertFilterMatches(
new OrDimFilter(
ImmutableList.of(
new SelectorDimFilter("dim0", "3", null),
new SelectorDimFilter("dim2", "c", null)
)
),
ImmutableList.of("3", "4")
);

assertFilterMatches(
NotDimFilter.of(
new OrDimFilter(
ImmutableList.of(
new SelectorDimFilter("dim0", "3", null),
new SelectorDimFilter("dim2", "c", null)
)
)
),
// dim2 null rows don't match when inverted because unknown
ImmutableList.of("0", "2")
);
}

@Test
Expand Down

0 comments on commit 6a9a1ad

Please sign in to comment.