Skip to content

Commit

Permalink
Revert "feat(mapping-optimizer): Support in operator for mapping opti…
Browse files Browse the repository at this point in the history
…mizer (#5691)"

This reverts commit 87f601a.

Co-authored-by: Zylphrex <[email protected]>
  • Loading branch information
getsentry-bot and Zylphrex committed Mar 26, 2024
1 parent 87f601a commit 7ed7441
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 228 deletions.
1 change: 0 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -366,7 +366,6 @@ jobs:
tests/sentry/search/events \
tests/sentry/event_manager \
tests/sentry/api/endpoints/test_organization_profiling_functions.py \
tests/snuba/api/endpoints/test_organization_events_stats_mep.py \
tests/snuba/test_snql_snuba.py \
tests/snuba/test_metrics_layer.py \
-vv --cov . --cov-report="xml:.artifacts/snuba.coverage.xml"
Expand Down
160 changes: 35 additions & 125 deletions snuba/query/processors/physical/mapping_optimizer.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import logging
from enum import Enum
from typing import Optional, Tuple, cast
from typing import Optional, Tuple

from snuba import environment
from snuba.clickhouse.query import Query
Expand All @@ -13,7 +13,6 @@
)
from snuba.query.conditions import (
BooleanFunctions,
ConditionFunctions,
combine_and_conditions,
combine_or_conditions,
get_first_level_and_conditions,
Expand Down Expand Up @@ -91,8 +90,9 @@ def __init__(
self.__killswitch = killswitch
self.__value_subcolumn_name = value_subcolumn_name

self.__equals_condition_pattern = FunctionCall(
function_name=String(ConditionFunctions.EQ),
# TODO: Add the support for IN conditions.
self.__optimizable_pattern = FunctionCall(
function_name=String("equals"),
parameters=(
Or(
[
Expand All @@ -106,19 +106,6 @@ def __init__(
Param("right_hand_side", Literal(Any(str))),
),
)
self.__in_condition_pattern = FunctionCall(
function_name=String(ConditionFunctions.IN),
parameters=(
mapping_pattern,
Param(
"right_hand_side",
FunctionCall(
Or([String("array"), String("tuple")]),
all_parameters=Literal(),
),
),
),
)
self.__tag_exists_patterns = [
FunctionCall(
function_name=String("notEquals"),
Expand Down Expand Up @@ -168,43 +155,20 @@ def __classify_combined_conditions(self, condition: Expression) -> ConditionClas

def __classify_condition(self, condition: Expression) -> ConditionClass:
# Expects this to be an individual condition
equals_condition_match = self.__equals_condition_pattern.match(condition)
in_condition_match = (
self.__in_condition_pattern.match(condition)
if equals_condition_match is None
else None
)
match = self.__optimizable_pattern.match(condition)
if (
equals_condition_match is not None
and equals_condition_match.string(KEY_COL_MAPPING_PARAM)
== f"{self.__column_name}.key"
match is not None
and match.string(KEY_COL_MAPPING_PARAM) == f"{self.__column_name}.key"
):
rhs = equals_condition_match.expression("right_hand_side")
rhs = match.expression("right_hand_side")
assert isinstance(rhs, LiteralExpr)
return (
ConditionClass.NOT_OPTIMIZABLE
# ifNull(tags[asd], '') = '' is not optimizable.
if rhs.value == ""
else ConditionClass.OPTIMIZABLE
)
if (
in_condition_match is not None
and in_condition_match.string(KEY_COL_MAPPING_PARAM)
== f"{self.__column_name}.key"
):
rhs = in_condition_match.expression("right_hand_side")
assert isinstance(rhs, FunctionExpr)

params = rhs.parameters
for param in params:
assert isinstance(param, LiteralExpr)

# tags[foo] IN array('') is not optimizable
if param.value == "":
return ConditionClass.NOT_OPTIMIZABLE

return ConditionClass.OPTIMIZABLE
elif equals_condition_match is None and in_condition_match is None:
elif match is None:
# If this condition is not matching an optimizable condition,
# check that it does not reference the optimizable column.
# If it does, it means we should not optimize this query.
Expand All @@ -219,89 +183,35 @@ def __classify_condition(self, condition: Expression) -> ConditionClass:
return ConditionClass.IRRELEVANT

def __replace_with_hash(self, condition: Expression) -> Expression:
equals_condition_match = self.__equals_condition_pattern.match(condition)
in_condition_match = (
self.__in_condition_pattern.match(condition)
if equals_condition_match is None
else None
)
match = self.__optimizable_pattern.match(condition)
if (
equals_condition_match is not None
and equals_condition_match.string(KEY_COL_MAPPING_PARAM)
== f"{self.__column_name}.key"
match is None
or match.string(KEY_COL_MAPPING_PARAM) != f"{self.__column_name}.key"
):
rhs = equals_condition_match.expression("right_hand_side")
assert isinstance(rhs, LiteralExpr)
key = equals_condition_match.scalar(KEY_MAPPING_PARAM)
assert isinstance(key, (str, int))
if isinstance(key, str):
key = key.translate(ESCAPE_TRANSLATION)

return FunctionExpr(
alias=condition.alias,
function_name="has",
parameters=(
Column(
alias=None,
table_name=equals_condition_match.optional_string(
TABLE_MAPPING_PARAM
),
column_name=self.__hash_map_name,
),
FunctionExpr(
alias=None,
function_name="cityHash64",
parameters=(LiteralExpr(None, f"{key}={rhs.value}"),),
),
return condition
rhs = match.expression("right_hand_side")
assert isinstance(rhs, LiteralExpr)
key = match.scalar(KEY_MAPPING_PARAM)
assert isinstance(key, (str, int))
if isinstance(key, str):
key = key.translate(ESCAPE_TRANSLATION)

return FunctionExpr(
alias=condition.alias,
function_name="has",
parameters=(
Column(
alias=None,
table_name=match.optional_string(TABLE_MAPPING_PARAM),
column_name=self.__hash_map_name,
),
)
elif (
in_condition_match is not None
and in_condition_match.string(KEY_COL_MAPPING_PARAM)
== f"{self.__column_name}.key"
):
key = in_condition_match.scalar(KEY_MAPPING_PARAM)
assert isinstance(key, (str, int))
if isinstance(key, str):
key = key.translate(ESCAPE_TRANSLATION)

rhs = in_condition_match.expression("right_hand_side")
assert isinstance(rhs, FunctionExpr)
for param in rhs.parameters:
assert isinstance(param, LiteralExpr)
params = cast(list[LiteralExpr], rhs.parameters)

return FunctionExpr(
alias=condition.alias,
function_name="hasAny",
parameters=(
Column(
alias=None,
table_name=in_condition_match.optional_string(
TABLE_MAPPING_PARAM
),
column_name=self.__hash_map_name,
),
FunctionExpr(
alias=rhs.alias,
function_name="array",
parameters=tuple(
[
FunctionExpr(
alias=None,
function_name="cityHash64",
parameters=(
LiteralExpr(None, f"{key}={lit.value}"),
),
)
for lit in params
]
),
),
FunctionExpr(
alias=None,
function_name="cityHash64",
parameters=(LiteralExpr(None, f"{key}={rhs.value}"),),
),
)

return condition
),
)

def _get_condition_without_redundant_checks(
self, condition: Expression, query: Query
Expand Down Expand Up @@ -355,7 +265,7 @@ def _get_condition_without_redundant_checks(
if tag_exist_match:
matched_tag_exists_conditions[condition_id] = tag_exist_match
if not tag_exist_match:
eq_match = self.__equals_condition_pattern.match(cond)
eq_match = self.__optimizable_pattern.match(cond)
if eq_match:
tag_eq_match_keys.add(eq_match.scalar(KEY_MAPPING_PARAM))
useful_conditions = []
Expand Down
102 changes: 0 additions & 102 deletions tests/query/processors/test_mapping_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,108 +278,6 @@
nested_condition("tags", "my_tag", ConditionFunctions.EQ, "a"),
id="Non optimizable having",
),
pytest.param(
build_query(
selected_columns=[column("event_id")],
condition=binary_condition(
ConditionFunctions.IN,
nested_expression("tags", "my_tag"),
FunctionCall(
None,
"tuple",
(
Literal(None, "a"),
Literal(None, "b"),
Literal(None, "c"),
),
),
),
),
FunctionCall(
None,
"hasAny",
(
column("_tags_hash_map", True),
FunctionCall(
None,
"array",
(
FunctionCall(None, "cityHash64", (Literal(None, "my_tag=a"),)),
FunctionCall(None, "cityHash64", (Literal(None, "my_tag=b"),)),
FunctionCall(None, "cityHash64", (Literal(None, "my_tag=c"),)),
),
),
),
),
id="Optimizable IN condition using tuple",
),
pytest.param(
build_query(
selected_columns=[column("event_id")],
condition=binary_condition(
ConditionFunctions.IN,
nested_expression("tags", "my_tag"),
FunctionCall(
None,
"array",
(
Literal(None, "a"),
Literal(None, "b"),
Literal(None, "c"),
),
),
),
),
FunctionCall(
None,
"hasAny",
(
column("_tags_hash_map", True),
FunctionCall(
None,
"array",
(
FunctionCall(None, "cityHash64", (Literal(None, "my_tag=a"),)),
FunctionCall(None, "cityHash64", (Literal(None, "my_tag=b"),)),
FunctionCall(None, "cityHash64", (Literal(None, "my_tag=c"),)),
),
),
),
),
id="Optimizable IN condition using array",
),
pytest.param(
build_query(
selected_columns=[column("event_id")],
condition=binary_condition(
ConditionFunctions.IN,
nested_expression("tags", "my_tag"),
FunctionCall(
None,
"array",
(
Literal(None, "a"),
Literal(None, "b"),
Literal(None, ""),
),
),
),
),
binary_condition(
ConditionFunctions.IN,
nested_expression("tags", "my_tag"),
FunctionCall(
None,
"array",
(
Literal(None, "a"),
Literal(None, "b"),
Literal(None, ""),
),
),
),
id="Non optimizable IN condition with empty value",
),
]


Expand Down

0 comments on commit 7ed7441

Please sign in to comment.