Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

wip(eap): http response rate function #85662

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 67 additions & 21 deletions src/sentry/search/eap/columns.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from typing import Any

from dateutil.tz import tz
from sentry_protos.snuba.v1.endpoint_trace_item_table_pb2 import Column
from sentry_protos.snuba.v1.request_common_pb2 import TraceItemType
from sentry_protos.snuba.v1.trace_item_attribute_pb2 import (
AttributeAggregation,
Expand Down Expand Up @@ -81,27 +82,6 @@ class ArgumentDefinition:
ignored: bool = False


@dataclass
class FunctionDefinition:
internal_function: Function.ValueType
# The list of arguments for this function
arguments: list[ArgumentDefinition]
# The search_type the argument should be the default type for this column
default_search_type: constants.SearchType
# Try to infer the search type from the function arguments
infer_search_type_from_arguments: bool = True
# The internal rpc type for this function, optional as it can mostly be inferred from search_type
internal_type: AttributeKey.Type.ValueType | None = None
# Processor is the function run in the post process step to transform a row into the final result
processor: Callable[[Any], Any] | None = None
# Whether to request extrapolation or not, should be true for all functions except for _sample functions for debugging
extrapolation: bool = True

@property
def required_arguments(self) -> list[ArgumentDefinition]:
return [arg for arg in self.arguments if arg.default_arg is None and not arg.ignored]


@dataclass
class VirtualColumnDefinition:
constructor: Callable[[SnubaParams], VirtualColumnContext]
Expand All @@ -117,6 +97,24 @@ class VirtualColumnDefinition:
default_value: str | None = None


@dataclass(frozen=True, kw_only=True)
class ResolvedFormula(ResolvedAttribute):
formula: Column.BinaryFormula

@property
def proto_definition(self) -> Column.BinaryFormula:
"""The definition of this function as needed by the RPC"""
return self.formula

@property
def proto_type(self) -> AttributeKey.Type.ValueType:
"""The rpc always returns functions as floats, especially count() even though it should be an integer

see: https://www.notion.so/sentry/Should-count-return-an-int-in-the-v1-RPC-API-1348b10e4b5d80498bfdead194cc304e
"""
return constants.DOUBLE


@dataclass(frozen=True, kw_only=True)
class ResolvedFunction(ResolvedAttribute):
# The internal rpc alias for this column
Expand Down Expand Up @@ -147,6 +145,54 @@ def proto_type(self) -> AttributeKey.Type.ValueType:
return constants.DOUBLE


@dataclass
class FunctionDefinition:
internal_function: Function.ValueType
# The list of arguments for this function
arguments: list[ArgumentDefinition]
# The search_type the argument should be the default type for this column
default_search_type: constants.SearchType
# Try to infer the search type from the function arguments
infer_search_type_from_arguments: bool = True
# The internal rpc type for this function, optional as it can mostly be inferred from search_type
internal_type: AttributeKey.Type.ValueType | None = None
# Processor is the function run in the post process step to transform a row into the final result
processor: Callable[[Any], Any] | None = None
# Whether to request extrapolation or not, should be true for all functions except for _sample functions for debugging
extrapolation: bool = True
# For more complex functions, a formula can be specified
formula_resolver: Callable[[Any], Any] | None = None

@property
def required_arguments(self) -> list[ArgumentDefinition]:
return [arg for arg in self.arguments if arg.default_arg is None and not arg.ignored]

def resolve(
self, alias: str, search_type: constants.SearchType, resolved_argument: AttributeKey | None
) -> ResolvedFormula | ResolvedFunction:
if self.formula_resolver:
return ResolvedFormula(
public_alias=alias,
search_type=search_type,
formula=self.formula_resolver(
resolved_argument.name if resolved_argument else None
),
argument=resolved_argument,
internal_type=self.internal_type,
processor=self.processor,
)
else:
return ResolvedFunction(
public_alias=alias,
internal_name=self.internal_function,
search_type=search_type,
internal_type=self.internal_type,
processor=self.processor,
extrapolation=self.extrapolation,
argument=resolved_argument,
)


def simple_sentry_field(field) -> ResolvedColumn:
"""For a good number of fields, the public alias matches the internal alias
without the `sentry.` suffix. This helper functions makes defining them easier"""
Expand Down
37 changes: 16 additions & 21 deletions src/sentry/search/eap/resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
from sentry.search.eap.columns import (
ColumnDefinitions,
ResolvedColumn,
ResolvedFormula,
ResolvedFunction,
VirtualColumnDefinition,
)
Expand All @@ -60,9 +61,9 @@ class SearchResolver:
_resolved_attribute_cache: dict[str, tuple[ResolvedColumn, VirtualColumnDefinition | None]] = (
field(default_factory=dict)
)
_resolved_function_cache: dict[str, tuple[ResolvedFunction, VirtualColumnDefinition | None]] = (
field(default_factory=dict)
)
_resolved_function_cache: dict[
str, tuple[ResolvedFunction | ResolvedFormula, VirtualColumnDefinition | None]
] = field(default_factory=dict)

@sentry_sdk.trace
def resolve_meta(self, referrer: str) -> RequestMeta:
Expand Down Expand Up @@ -324,7 +325,7 @@ def resolve_virtual_context_term(
self,
term: str,
raw_value: str | list[str],
resolved_column: ResolvedColumn,
resolved_column: ResolvedColumn | ResolvedFormula | ResolvedFormula,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this intentional? VCCs should only apply to Resolved Columns

context: VirtualColumnDefinition,
) -> list[str] | str:
# Convert the term to the expected values
Expand Down Expand Up @@ -468,7 +469,7 @@ def resolve_aggregate_term(

def _resolve_search_value(
self,
column: ResolvedColumn,
column: ResolvedColumn | ResolvedFormula,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another one, is this intentional? We don't resolve search values for formulas do we?

operator: str,
value: str | float | datetime | Sequence[float] | Sequence[str],
) -> AttributeValue:
Expand Down Expand Up @@ -547,9 +548,10 @@ def resolve_contexts(
return final_contexts

@sentry_sdk.trace
def resolve_columns(
self, selected_columns: list[str]
) -> tuple[list[ResolvedColumn | ResolvedFunction], list[VirtualColumnDefinition | None]]:
def resolve_columns(self, selected_columns: list[str]) -> tuple[
list[ResolvedColumn | ResolvedFunction | ResolvedFormula],
list[VirtualColumnDefinition | None],
]:
"""Given a list of columns resolve them and get their context if applicable

This function will also dedupe the virtual column contexts if necessary
Expand Down Expand Up @@ -584,7 +586,7 @@ def resolve_columns(

def resolve_column(
self, column: str, match: Match | None = None
) -> tuple[ResolvedColumn | ResolvedFunction, VirtualColumnDefinition | None]:
) -> tuple[ResolvedColumn | ResolvedFunction | ResolvedFormula, VirtualColumnDefinition | None]:
"""Column is either an attribute or an aggregate, this function will determine which it is and call the relevant
resolve function"""
match = fields.is_function(column)
Expand Down Expand Up @@ -666,7 +668,7 @@ def resolve_attribute(
@sentry_sdk.trace
def resolve_aggregates(
self, columns: list[str]
) -> tuple[list[ResolvedFunction], list[VirtualColumnDefinition | None]]:
) -> tuple[list[ResolvedFunction | ResolvedFormula], list[VirtualColumnDefinition | None]]:
"""Helper function to resolve a list of aggregates instead of 1 attribute at a time"""
resolved_aggregates, resolved_contexts = [], []
for column in columns:
Expand All @@ -677,10 +679,10 @@ def resolve_aggregates(

def resolve_aggregate(
self, column: str, match: Match | None = None
) -> tuple[ResolvedFunction, VirtualColumnDefinition | None]:
) -> tuple[ResolvedFunction | ResolvedFormula, VirtualColumnDefinition | None]:
if column in self._resolved_function_cache:
return self._resolved_function_cache[column]
# Check if this is a valid function, parse the function name and args out
# Check if the column looks like a function (matches a pattern), parse the function name and args out
if match is None:
match = fields.is_function(column)
if match is None:
Expand Down Expand Up @@ -743,15 +745,8 @@ def resolve_aggregate(
resolved_argument = None
search_type = function_definition.default_search_type

resolved_function = ResolvedFunction(
public_alias=alias,
internal_name=function_definition.internal_function,
search_type=search_type,
internal_type=function_definition.internal_type,
processor=function_definition.processor,
extrapolation=function_definition.extrapolation,
argument=resolved_argument,
)
resolved_function = function_definition.resolve(alias, search_type, resolved_argument)

resolved_context = None
self._resolved_function_cache[column] = (resolved_function, resolved_context)
return self._resolved_function_cache[column]
14 changes: 14 additions & 0 deletions src/sentry/search/eap/span_columns.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
simple_sentry_field,
)
from sentry.search.eap.common_columns import COMMON_COLUMNS
from sentry.search.eap.span_formulas import CUSTOM_FUNCTION_RESOLVER
from sentry.search.events.constants import (
PRECISE_FINISH_TS,
PRECISE_START_TS,
Expand Down Expand Up @@ -619,6 +620,19 @@ def count_processor(count_value: int | None) -> int:
)
],
),
"http_response_rate": FunctionDefinition(
internal_function=Function.FUNCTION_UNSPECIFIED,
default_search_type="percentage",
arguments=[
ArgumentDefinition(
argument_types={
"string"
}, # TODO - this should be an integer, but `resolve_attribute` returns a string
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think instead of leaving this as a todo we need to add a way to define arguments that aren't just attributes, so that SearchResolver.resolve_aggregate knows to cast a user input, which probably would clean up the definition of http_response_rate for you too

default_arg=None, # TODO - this should only accept 2,3,4,5
)
],
formula_resolver=CUSTOM_FUNCTION_RESOLVER["http_response_rate"],
),
}

SPAN_DEFINITIONS = ColumnDefinitions(
Expand Down
109 changes: 109 additions & 0 deletions src/sentry/search/eap/span_formulas.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
from collections.abc import Callable
from typing import Any

from sentry_protos.snuba.v1.attribute_conditional_aggregation_pb2 import (
AttributeConditionalAggregation,
)
from sentry_protos.snuba.v1.endpoint_trace_item_table_pb2 import Column
from sentry_protos.snuba.v1.trace_item_attribute_pb2 import (
AttributeKey,
AttributeValue,
ExtrapolationMode,
Function,
StrArray,
)
from sentry_protos.snuba.v1.trace_item_filter_pb2 import ComparisonFilter, TraceItemFilter


def http_response_rate(arg: str) -> Column.BinaryFormula:
code = int(
arg # TODO - converting this arg is a bit of a hack, we should pass in the int directly if the arg type is int
)

# TODO - handling valid parameters should be handled in the function_definitions (span_columns.py)
if code not in [1, 2, 3, 4, 5]:
raise Exception("http_response_rate takes in a single digit (1,2,3,4,5)")

response_code_map = {
1: ["100", "101", "102"],
2: ["200", "201", "202", "203", "204", "205", "206", "207", "208", "226"],
3: ["300", "301", "302", "303", "304", "305", "306", "307", "308"],
4: [
"400",
"401",
"402",
"403",
"404",
"405",
"406",
"407",
"408",
"409",
"410",
"411",
"412",
"413",
"414",
"415",
"416",
"417",
"418",
"421",
"422",
"423",
"424",
"425",
"426",
"428",
"429",
"431",
"451",
],
5: ["500", "501", "502", "503", "504", "505", "506", "507", "508", "509", "510", "511"],
}

response_codes = response_code_map[code]
return Column.BinaryFormula(
left=Column(
conditional_aggregation=AttributeConditionalAggregation(
aggregate=Function.FUNCTION_COUNT,
key=AttributeKey(
name="sentry.status_code",
type=AttributeKey.TYPE_STRING,
),
filter=TraceItemFilter(
comparison_filter=ComparisonFilter(
key=AttributeKey(
name="sentry.status_code",
type=AttributeKey.TYPE_STRING,
),
op=ComparisonFilter.OP_IN,
value=AttributeValue(
val_str_array=StrArray(
values=response_codes,
),
),
)
),
label="error_request_count",
extrapolation_mode=ExtrapolationMode.EXTRAPOLATION_MODE_NONE,
),
),
op=Column.BinaryFormula.OP_DIVIDE,
right=Column(
conditional_aggregation=AttributeConditionalAggregation(
aggregate=Function.FUNCTION_COUNT,
key=AttributeKey(
name="sentry.status_code",
type=AttributeKey.TYPE_STRING,
),
label="total_request_count",
extrapolation_mode=ExtrapolationMode.EXTRAPOLATION_MODE_NONE,
),
),
)


CUSTOM_FUNCTION_RESOLVER: dict[str, Callable[[Any], Column.BinaryFormula]] = {
"http_response_rate": http_response_rate
}
6 changes: 4 additions & 2 deletions src/sentry/snuba/rpc_dataset_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from sentry_protos.snuba.v1.request_common_pb2 import PageToken
from sentry_protos.snuba.v1.trace_item_attribute_pb2 import AttributeAggregation, AttributeKey

from sentry.search.eap.columns import ResolvedColumn, ResolvedFunction
from sentry.search.eap.columns import ResolvedColumn, ResolvedFormula, ResolvedFunction
from sentry.search.eap.resolver import SearchResolver
from sentry.search.eap.types import CONFIDENCES, ConfidenceData, EAPResponse
from sentry.search.events.fields import get_function_alias
Expand All @@ -16,7 +16,9 @@
logger = logging.getLogger("sentry.snuba.spans_rpc")


def categorize_column(column: ResolvedColumn | ResolvedFunction) -> Column:
def categorize_column(column: ResolvedColumn | ResolvedFunction | ResolvedFormula) -> Column:
if isinstance(column, ResolvedFormula):
return Column(formula=column.proto_definition, label=column.public_alias)
if isinstance(column, ResolvedFunction):
return Column(aggregation=column.proto_definition, label=column.public_alias)
else:
Expand Down
Loading
Loading