From b2ee17860905e64f2ee8350f1d0dcdf0272cdba9 Mon Sep 17 00:00:00 2001 From: Evan Hicks Date: Tue, 19 Mar 2024 11:32:29 -0400 Subject: [PATCH 1/6] feat(metrics): Add a meta table to counters This creates a table and a materialized view to populate it for storing meta information about metrics. This table is meant to satisfy queries that are trying to find metric_ids, tag keys and tag values, but are not interested in the values associated with the metrics. In theory this will eventually be done for all the metric types, but for now this is being used just for counters to test how well this solution actually solves the problems. --- .../0030_counters_meta_table.py | 68 ++++++++++++++++++ .../0031_counters_meta_table_mv.py | 71 +++++++++++++++++++ 2 files changed, 139 insertions(+) create mode 100644 snuba/snuba_migrations/generic_metrics/0030_counters_meta_table.py create mode 100644 snuba/snuba_migrations/generic_metrics/0031_counters_meta_table_mv.py diff --git a/snuba/snuba_migrations/generic_metrics/0030_counters_meta_table.py b/snuba/snuba_migrations/generic_metrics/0030_counters_meta_table.py new file mode 100644 index 0000000000..5d8e0cef1e --- /dev/null +++ b/snuba/snuba_migrations/generic_metrics/0030_counters_meta_table.py @@ -0,0 +1,68 @@ +from typing import Sequence + +from snuba.clickhouse.columns import AggregateFunction, Column, DateTime, String, UInt +from snuba.clusters.storage_sets import StorageSetKey +from snuba.migrations import migration, operations, table_engines +from snuba.migrations.columns import MigrationModifiers as Modifiers +from snuba.migrations.operations import OperationTarget +from snuba.utils.schemas import Float + + +class Migration(migration.ClickhouseNodeMigration): + blocking = False + granularity = "2048" + local_table_name = "generic_metric_counters_meta_aggregated_local" + dist_table_name = "generic_metric_counters_meta_aggregated_dist" + storage_set_key = StorageSetKey.GENERIC_METRICS_COUNTERS + columns: Sequence[Column[Modifiers]] = [ + Column("org_id", UInt(64)), + Column("project_id", UInt(64)), + Column("use_case_id", String(Modifiers(low_cardinality=True))), + Column("metric_id", UInt(64)), + Column("tag_key", String()), + Column("tag_value", String()), + Column("timestamp", DateTime(modifiers=Modifiers(codecs=["DoubleDelta"]))), + Column("retention_days", UInt(16)), + Column("count", AggregateFunction("sum", [Float(64)])), + ] + + def forwards_ops(self) -> Sequence[operations.SqlOperation]: + return [ + operations.CreateTable( + storage_set=self.storage_set_key, + table_name=self.local_table_name, + engine=table_engines.AggregatingMergeTree( + storage_set=self.storage_set_key, + order_by="(org_id, project_id, use_case_id, metric_id, tag_key, timestamp, tag_value)", + primary_key="(org_id, project_id, use_case_id, metric_id, tag_key, timestamp)", + partition_by="(retention_days, toMonday(timestamp))", + settings={"index_granularity": self.granularity}, + ttl="timestamp + toIntervalDay(retention_days)", + ), + columns=self.columns, + target=OperationTarget.LOCAL, + ), + operations.CreateTable( + storage_set=self.storage_set_key, + table_name=self.dist_table_name, + engine=table_engines.Distributed( + local_table_name=self.local_table_name, sharding_key=None + ), + columns=self.columns, + target=OperationTarget.DISTRIBUTED, + ), + ] + + def backwards_ops(self) -> Sequence[operations.SqlOperation]: + return [ + operations.DropTable( + storage_set=self.storage_set_key, + table_name=self.dist_table_name, + target=OperationTarget.DISTRIBUTED, + ), + operations.DropTable( + storage_set=self.storage_set_key, + table_name=self.local_table_name, + target=OperationTarget.LOCAL, + ), + ] diff --git a/snuba/snuba_migrations/generic_metrics/0031_counters_meta_table_mv.py b/snuba/snuba_migrations/generic_metrics/0031_counters_meta_table_mv.py new file mode 100644 index 0000000000..f687decfc8 --- /dev/null +++ b/snuba/snuba_migrations/generic_metrics/0031_counters_meta_table_mv.py @@ -0,0 +1,71 @@ +from typing import Sequence + +from snuba.clickhouse.columns import AggregateFunction, Column, DateTime, String, UInt +from snuba.clusters.storage_sets import StorageSetKey +from snuba.migrations import migration, operations +from snuba.migrations.columns import MigrationModifiers as Modifiers +from snuba.migrations.operations import OperationTarget +from snuba.utils.schemas import Float + + +class Migration(migration.ClickhouseNodeMigration): + blocking = False + view_name = "generic_metric_counters_meta_aggregation_mv" + dest_table_name = "generic_metric_counters_meta_aggregated_local" + dest_table_columns: Sequence[Column[Modifiers]] = [ + Column("org_id", UInt(64)), + Column("project_id", UInt(64)), + Column("use_case_id", String(Modifiers(low_cardinality=True))), + Column("metric_id", UInt(64)), + Column("tag_key", String()), + Column("tag_value", String()), + Column("timestamp", DateTime(modifiers=Modifiers(codecs=["DoubleDelta"]))), + Column("retention_days", UInt(16)), + Column("value", AggregateFunction("sum", [Float(64)])), + ] + storage_set_key = StorageSetKey.GENERIC_METRICS_COUNTERS + + def forwards_ops(self) -> Sequence[operations.SqlOperation]: + return [ + operations.CreateMaterializedView( + storage_set=self.storage_set_key, + view_name=self.view_name, + columns=self.dest_table_columns, + destination_table_name=self.dest_table_name, + target=OperationTarget.LOCAL, + query=""" + SELECT + org_id, + project_id, + use_case_id, + metric_id, + tag_key, + tag_value, + toStartOfWeek(timestamp) as timestamp, + retention_days, + sumState(count_value) as count + FROM generic_metric_counters_raw_local + ARRAY JOIN + tags.key AS tag_key, + tags.raw_value AS tag_value + GROUP BY + org_id, + project_id, + use_case_id, + metric_id, + tag_key, + tag_value, + timestamp, + retention_days + """, + ), + ] + + def backwards_ops(self) -> Sequence[operations.SqlOperation]: + return [ + operations.DropTable( + storage_set=self.storage_set_key, + table_name=self.view_name, + target=OperationTarget.LOCAL, + ) + ] From 89ed598cf88d590c78eb9ac660edd143e9fe477d Mon Sep 17 00:00:00 2001 From: Evan Hicks Date: Tue, 19 Mar 2024 13:18:05 -0400 Subject: [PATCH 2/6] add to groups --- snuba/migrations/group_loader.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/snuba/migrations/group_loader.py b/snuba/migrations/group_loader.py index ef464378eb..5b1da45ec5 100644 --- a/snuba/migrations/group_loader.py +++ b/snuba/migrations/group_loader.py @@ -318,6 +318,8 @@ def get_migrations(self) -> Sequence[str]: "0027_sets_add_raw_tags_column", "0028_distributions_add_indexed_tags_column", "0029_add_use_case_id_index", + "0030_counters_meta_table", + "0031_counters_meta_table_mv", ] From bf4c7624b7edcab4d2225452f307cff497b8f58a Mon Sep 17 00:00:00 2001 From: Evan Hicks Date: Wed, 20 Mar 2024 11:52:02 -0400 Subject: [PATCH 3/6] use groupUniqArray --- .../0030_counters_meta_table.py | 13 ++++++++++--- .../0031_counters_meta_table_mv.py | 18 ++++++++++++------ 2 files changed, 22 insertions(+), 9 deletions(-) diff --git a/snuba/snuba_migrations/generic_metrics/0030_counters_meta_table.py b/snuba/snuba_migrations/generic_metrics/0030_counters_meta_table.py index 5d8e0cef1e..da7837fa59 100644 --- a/snuba/snuba_migrations/generic_metrics/0030_counters_meta_table.py +++ b/snuba/snuba_migrations/generic_metrics/0030_counters_meta_table.py @@ -1,6 +1,13 @@ from typing import Sequence -from snuba.clickhouse.columns import AggregateFunction, Column, DateTime, String, UInt +from snuba.clickhouse.columns import ( + AggregateFunction, + Array, + Column, + DateTime, + String, + UInt, +) from snuba.clusters.storage_sets import StorageSetKey from snuba.migrations import migration, operations, table_engines from snuba.migrations.columns import MigrationModifiers as Modifiers @@ -20,9 +27,9 @@ class Migration(migration.ClickhouseNodeMigration): Column("use_case_id", String(Modifiers(low_cardinality=True))), Column("metric_id", UInt(64)), Column("tag_key", String()), - Column("tag_value", String()), Column("timestamp", DateTime(modifiers=Modifiers(codecs=["DoubleDelta"]))), Column("retention_days", UInt(16)), + Column("tags.value", AggregateFunction("groupUniqArray", [Array(String())])), Column("count", AggregateFunction("sum", [Float(64)])), ] @@ -33,7 +40,7 @@ def forwards_ops(self) -> Sequence[operations.SqlOperation]: table_name=self.local_table_name, engine=table_engines.AggregatingMergeTree( storage_set=self.storage_set_key, - order_by="(org_id, project_id, use_case_id, metric_id, tag_key, timestamp, tag_value)", + order_by="(org_id, project_id, use_case_id, metric_id, tag_key, timestamp)", primary_key="(org_id, project_id, use_case_id, metric_id, tag_key, timestamp)", partition_by="(retention_days, toMonday(timestamp))", settings={"index_granularity": self.granularity}, diff --git a/snuba/snuba_migrations/generic_metrics/0031_counters_meta_table_mv.py b/snuba/snuba_migrations/generic_metrics/0031_counters_meta_table_mv.py index f687decfc8..a289b3d423 100644 --- a/snuba/snuba_migrations/generic_metrics/0031_counters_meta_table_mv.py +++ b/snuba/snuba_migrations/generic_metrics/0031_counters_meta_table_mv.py @@ -1,6 +1,13 @@ from typing import Sequence -from snuba.clickhouse.columns import AggregateFunction, Column, DateTime, String, UInt +from snuba.clickhouse.columns import ( + AggregateFunction, + Array, + Column, + DateTime, + String, + UInt, +) from snuba.clusters.storage_sets import StorageSetKey from snuba.migrations import migration, operations from snuba.migrations.columns import MigrationModifiers as Modifiers @@ -18,9 +25,9 @@ class Migration(migration.ClickhouseNodeMigration): Column("use_case_id", String(Modifiers(low_cardinality=True))), Column("metric_id", UInt(64)), Column("tag_key", String()), - Column("tag_value", String()), Column("timestamp", DateTime(modifiers=Modifiers(codecs=["DoubleDelta"]))), Column("retention_days", UInt(16)), + Column("tags.value", AggregateFunction("groupUniqArray", [Array(String())])), Column("value", AggregateFunction("sum", [Float(64)])), ] storage_set_key = StorageSetKey.GENERIC_METRICS_COUNTERS @@ -40,21 +47,20 @@ def forwards_ops(self) -> Sequence[operations.SqlOperation]: use_case_id, metric_id, tag_key, - tag_value, toStartOfWeek(timestamp) as timestamp, retention_days, + groupUniqArrayState(tag_value) as `tags.value`, sumState(count_value) as count FROM generic_metric_counters_raw_local ARRAY JOIN - tags.key AS tag_key, - tags.raw_value AS tag_value + tags.key AS tag_key, tags.raw_value AS tag_value + WHERE use_case_id NOT IN ('escalating_issues', 'bundle_analysis', 'metric_stats') GROUP BY org_id, project_id, use_case_id, metric_id, tag_key, - tag_value, timestamp, retention_days """, From 1b5b1453fc1a73ca311de9e01970f7814f848075 Mon Sep 17 00:00:00 2001 From: Evan Hicks Date: Thu, 21 Mar 2024 11:59:14 -0400 Subject: [PATCH 4/6] fix column name error --- .../generic_metrics/0030_counters_meta_table.py | 2 +- .../generic_metrics/0031_counters_meta_table_mv.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/snuba/snuba_migrations/generic_metrics/0030_counters_meta_table.py b/snuba/snuba_migrations/generic_metrics/0030_counters_meta_table.py index da7837fa59..b41ac6ccfd 100644 --- a/snuba/snuba_migrations/generic_metrics/0030_counters_meta_table.py +++ b/snuba/snuba_migrations/generic_metrics/0030_counters_meta_table.py @@ -29,7 +29,7 @@ class Migration(migration.ClickhouseNodeMigration): Column("tag_key", String()), Column("timestamp", DateTime(modifiers=Modifiers(codecs=["DoubleDelta"]))), Column("retention_days", UInt(16)), - Column("tags.value", AggregateFunction("groupUniqArray", [Array(String())])), + Column("tag_values", AggregateFunction("groupUniqArray", [Array(String())])), Column("count", AggregateFunction("sum", [Float(64)])), ] diff --git a/snuba/snuba_migrations/generic_metrics/0031_counters_meta_table_mv.py b/snuba/snuba_migrations/generic_metrics/0031_counters_meta_table_mv.py index a289b3d423..81e7ca9d9a 100644 --- a/snuba/snuba_migrations/generic_metrics/0031_counters_meta_table_mv.py +++ b/snuba/snuba_migrations/generic_metrics/0031_counters_meta_table_mv.py @@ -27,7 +27,7 @@ class Migration(migration.ClickhouseNodeMigration): Column("tag_key", String()), Column("timestamp", DateTime(modifiers=Modifiers(codecs=["DoubleDelta"]))), Column("retention_days", UInt(16)), - Column("tags.value", AggregateFunction("groupUniqArray", [Array(String())])), + Column("tag_values", AggregateFunction("groupUniqArray", [Array(String())])), Column("value", AggregateFunction("sum", [Float(64)])), ] storage_set_key = StorageSetKey.GENERIC_METRICS_COUNTERS @@ -49,7 +49,7 @@ def forwards_ops(self) -> Sequence[operations.SqlOperation]: tag_key, toStartOfWeek(timestamp) as timestamp, retention_days, - groupUniqArrayState(tag_value) as `tags.value`, + groupUniqArrayState(tag_value) as `tag_values`, sumState(count_value) as count FROM generic_metric_counters_raw_local ARRAY JOIN From 7b9f5d971e674d70bc0725fb7473ff200f8998b2 Mon Sep 17 00:00:00 2001 From: Evan Hicks Date: Thu, 21 Mar 2024 12:15:14 -0400 Subject: [PATCH 5/6] Use a record_meta column that determines what we store meta for --- snuba/migrations/group_loader.py | 5 +-- .../0030_add_record_meta_column.py | 33 +++++++++++++++++++ ...a_table.py => 0031_counters_meta_table.py} | 0 ...e_mv.py => 0032_counters_meta_table_mv.py} | 2 +- 4 files changed, 37 insertions(+), 3 deletions(-) create mode 100644 snuba/snuba_migrations/generic_metrics/0030_add_record_meta_column.py rename snuba/snuba_migrations/generic_metrics/{0030_counters_meta_table.py => 0031_counters_meta_table.py} (100%) rename snuba/snuba_migrations/generic_metrics/{0031_counters_meta_table_mv.py => 0032_counters_meta_table_mv.py} (96%) diff --git a/snuba/migrations/group_loader.py b/snuba/migrations/group_loader.py index 5b1da45ec5..dd184b683a 100644 --- a/snuba/migrations/group_loader.py +++ b/snuba/migrations/group_loader.py @@ -318,8 +318,9 @@ def get_migrations(self) -> Sequence[str]: "0027_sets_add_raw_tags_column", "0028_distributions_add_indexed_tags_column", "0029_add_use_case_id_index", - "0030_counters_meta_table", - "0031_counters_meta_table_mv", + "0030_add_record_meta_column", + "0031_counters_meta_table", + "0032_counters_meta_table_mv", ] diff --git a/snuba/snuba_migrations/generic_metrics/0030_add_record_meta_column.py b/snuba/snuba_migrations/generic_metrics/0030_add_record_meta_column.py new file mode 100644 index 0000000000..53fd2120ea --- /dev/null +++ b/snuba/snuba_migrations/generic_metrics/0030_add_record_meta_column.py @@ -0,0 +1,33 @@ +from typing import Sequence + +from snuba.clickhouse.columns import Column, UInt +from snuba.clusters.storage_sets import StorageSetKey +from snuba.migrations import migration, operations +from snuba.migrations.columns import MigrationModifiers as Modifiers + + +class Migration(migration.ClickhouseNodeMigration): + blocking = False + local_table_name = "generic_metric_counters_raw_local" + storage_set_key = StorageSetKey.GENERIC_METRICS_COUNTERS + + def forwards_ops(self) -> Sequence[operations.SqlOperation]: + return [ + operations.AddColumn( + storage_set=self.storage_set_key, + table_name=self.local_table_name, + column=Column("record_meta", UInt(8, Modifiers(default=str("0")))), + target=operations.OperationTarget.LOCAL, + after="materialization_version", + ), + ] + + def backwards_ops(self) -> Sequence[operations.SqlOperation]: + return [ + operations.DropColumn( + column_name="record_meta", + storage_set=self.storage_set_key, + table_name=self.local_table_name, + target=operations.OperationTarget.LOCAL, + ), + ] diff --git a/snuba/snuba_migrations/generic_metrics/0030_counters_meta_table.py b/snuba/snuba_migrations/generic_metrics/0031_counters_meta_table.py similarity index 100% rename from snuba/snuba_migrations/generic_metrics/0030_counters_meta_table.py rename to snuba/snuba_migrations/generic_metrics/0031_counters_meta_table.py diff --git a/snuba/snuba_migrations/generic_metrics/0031_counters_meta_table_mv.py b/snuba/snuba_migrations/generic_metrics/0032_counters_meta_table_mv.py similarity index 96% rename from snuba/snuba_migrations/generic_metrics/0031_counters_meta_table_mv.py rename to snuba/snuba_migrations/generic_metrics/0032_counters_meta_table_mv.py index 81e7ca9d9a..74a5c70ddb 100644 --- a/snuba/snuba_migrations/generic_metrics/0031_counters_meta_table_mv.py +++ b/snuba/snuba_migrations/generic_metrics/0032_counters_meta_table_mv.py @@ -54,7 +54,7 @@ def forwards_ops(self) -> Sequence[operations.SqlOperation]: FROM generic_metric_counters_raw_local ARRAY JOIN tags.key AS tag_key, tags.raw_value AS tag_value - WHERE use_case_id NOT IN ('escalating_issues', 'bundle_analysis', 'metric_stats') + WHERE record_meta = 1 GROUP BY org_id, project_id, From f019f71f5c051ceb5a9c6f7c1676b89e82398fc8 Mon Sep 17 00:00:00 2001 From: Evan Hicks Date: Thu, 21 Mar 2024 15:02:37 -0400 Subject: [PATCH 6/6] align types properly --- .../generic_metrics/0031_counters_meta_table.py | 11 ++--------- .../generic_metrics/0032_counters_meta_table_mv.py | 11 ++--------- 2 files changed, 4 insertions(+), 18 deletions(-) diff --git a/snuba/snuba_migrations/generic_metrics/0031_counters_meta_table.py b/snuba/snuba_migrations/generic_metrics/0031_counters_meta_table.py index b41ac6ccfd..397066fc78 100644 --- a/snuba/snuba_migrations/generic_metrics/0031_counters_meta_table.py +++ b/snuba/snuba_migrations/generic_metrics/0031_counters_meta_table.py @@ -1,13 +1,6 @@ from typing import Sequence -from snuba.clickhouse.columns import ( - AggregateFunction, - Array, - Column, - DateTime, - String, - UInt, -) +from snuba.clickhouse.columns import AggregateFunction, Column, DateTime, String, UInt from snuba.clusters.storage_sets import StorageSetKey from snuba.migrations import migration, operations, table_engines from snuba.migrations.columns import MigrationModifiers as Modifiers @@ -29,7 +22,7 @@ class Migration(migration.ClickhouseNodeMigration): Column("tag_key", String()), Column("timestamp", DateTime(modifiers=Modifiers(codecs=["DoubleDelta"]))), Column("retention_days", UInt(16)), - Column("tag_values", AggregateFunction("groupUniqArray", [Array(String())])), + Column("tag_values", AggregateFunction("groupUniqArray", [String()])), Column("count", AggregateFunction("sum", [Float(64)])), ] diff --git a/snuba/snuba_migrations/generic_metrics/0032_counters_meta_table_mv.py b/snuba/snuba_migrations/generic_metrics/0032_counters_meta_table_mv.py index 74a5c70ddb..a49291bdb5 100644 --- a/snuba/snuba_migrations/generic_metrics/0032_counters_meta_table_mv.py +++ b/snuba/snuba_migrations/generic_metrics/0032_counters_meta_table_mv.py @@ -1,13 +1,6 @@ from typing import Sequence -from snuba.clickhouse.columns import ( - AggregateFunction, - Array, - Column, - DateTime, - String, - UInt, -) +from snuba.clickhouse.columns import AggregateFunction, Column, DateTime, String, UInt from snuba.clusters.storage_sets import StorageSetKey from snuba.migrations import migration, operations from snuba.migrations.columns import MigrationModifiers as Modifiers @@ -27,7 +20,7 @@ class Migration(migration.ClickhouseNodeMigration): Column("tag_key", String()), Column("timestamp", DateTime(modifiers=Modifiers(codecs=["DoubleDelta"]))), Column("retention_days", UInt(16)), - Column("tag_values", AggregateFunction("groupUniqArray", [Array(String())])), + Column("tag_values", AggregateFunction("groupUniqArray", [String()])), Column("value", AggregateFunction("sum", [Float(64)])), ] storage_set_key = StorageSetKey.GENERIC_METRICS_COUNTERS