Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into feature/or-filters
Browse files Browse the repository at this point in the history
  • Loading branch information
tlokvenec committed Sep 20, 2024
2 parents 2c9e546 + 45d7373 commit 213de25
Show file tree
Hide file tree
Showing 10 changed files with 108 additions and 31 deletions.
2 changes: 1 addition & 1 deletion metrics_layer/core/model/field.py
Original file line number Diff line number Diff line change
Expand Up @@ -1317,7 +1317,7 @@ def apply_dimension_group_time_sql(self, sql: str, query_type: str):
"fiscal_quarter_of_year": lambda s, qt: (
f"EXTRACT(QUARTER FROM {self._fiscal_offset_to_timestamp(s, qt)})"
),
"hour_of_day": lambda s, qt: f"CAST({s} AS STRING FORMAT 'HH24')",
"hour_of_day": lambda s, qt: f"CAST(CAST({s} AS STRING FORMAT 'HH24') AS INT64)",
"day_of_week": lambda s, qt: f"CAST({s} AS STRING FORMAT 'DAY')",
"day_of_month": lambda s, qt: f"EXTRACT(DAY FROM {s})",
"day_of_year": lambda s, qt: f"EXTRACT(DAYOFYEAR FROM {s})",
Expand Down
2 changes: 1 addition & 1 deletion metrics_layer/core/model/view.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ def is_affected_by_access_filters(self):

@property
def primary_key(self):
return next((f for f in self.fields() if f.primary_key), None)
return next((f for f in self.fields(expand_dimension_groups=True) if f.primary_key), None)

def _error(self, element, error, extra: dict = {}):
line, column = self.line_col(element)
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "metrics_layer"
version = "0.12.36"
version = "0.12.37"
description = "The open source metrics layer."
authors = ["Paul Blankley <[email protected]>"]
keywords = ["Metrics Layer", "Business Intelligence", "Analytics"]
Expand Down
26 changes: 26 additions & 0 deletions tests/config/metrics_layer_config/views/monthly_aggregates.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
version: 1
type: view
name: monthly_aggregates

sql_table_name: analytics.monthly_rollup
default_date: record
model_name: test_model

fields:
- name: record
field_type: 'dimension_group'
type: time
timeframes: [raw, time, date, week, month, quarter, year]
sql: '${TABLE}.record_date'
primary_key: yes

- name: division
field_type: 'dimension'
type: string
sql: '${TABLE}.division'
searchable: true

- name: count_new_employees
field_type: measure
type: count
sql: ${TABLE}.n_new_employees
4 changes: 4 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@
sales_dashboard_v2_path = os.path.join(
BASE_PATH, "config/metrics_layer_config/dashboards/sales_dashboard_v2.yml"
)
monthly_aggregates_view_path = os.path.join(
BASE_PATH, "config/metrics_layer_config/views/monthly_aggregates.yml"
)
order_lines_view_path = os.path.join(BASE_PATH, "config/metrics_layer_config/views/test_order_lines.yml")
orders_view_path = os.path.join(BASE_PATH, "config/metrics_layer_config/views/test_orders.yml")
customers_view_path = os.path.join(BASE_PATH, "config/metrics_layer_config/views/test_customers.yml")
Expand Down Expand Up @@ -69,6 +72,7 @@
other_db_view_path,
created_workspaces_view_path,
mrr_view_path,
monthly_aggregates_view_path,
]
dashboard_paths = [sales_dashboard_path, sales_dashboard_v2_path]

Expand Down
33 changes: 20 additions & 13 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,7 +471,8 @@ def test_cli_validate(connection, fresh_project, mocker):

# assert result.exit_code == 0
assert (
result.output == "Found 7 errors in the project:\n\n"
result.output
== "Found 7 errors in the project:\n\n"
"\nCould not locate reference revenue_dimension in field total_item_costs in view order_lines\n\n"
"\nField total_item_costs in view order_lines contains invalid field reference revenue_dimension.\n\n"
"\nCould not locate reference revenue_dimension in field revenue_in_cents in view orders\n\n"
Expand Down Expand Up @@ -499,7 +500,8 @@ def test_cli_validate_broken_canon_date(connection, fresh_project, mocker):

assert result.exit_code == 0
assert (
result.output == "Found 1 error in the project:\n\n"
result.output
== "Found 1 error in the project:\n\n"
"\nCanon date customers.does_not_exist is unreachable in field total_sessions.\n\n"
)

Expand Down Expand Up @@ -756,7 +758,8 @@ def test_cli_validate_model_name_in_view(connection, fresh_project, mocker):

assert result.exit_code == 0
assert (
result.output == "Found 1 error in the project:\n\n"
result.output
== "Found 1 error in the project:\n\n"
"\nCould not find a model in the view orders. Use the model_name property to specify the model.\n\n"
)

Expand Down Expand Up @@ -799,7 +802,8 @@ def test_cli_dashboard_model_does_not_exist(connection, fresh_project, mocker):

assert result.exit_code == 0
assert (
result.output == "Found 1 error in the project:\n\n"
result.output
== "Found 1 error in the project:\n\n"
"\nCould not find or you do not have access to model missing_model in dashboard sales_dashboard\n\n"
)

Expand All @@ -820,7 +824,8 @@ def test_cli_canon_date_inaccessible(connection, fresh_project, mocker):

assert result.exit_code == 0
assert (
result.output == "Found 1 error in the project:\n\n"
result.output
== "Found 1 error in the project:\n\n"
"\nCanon date orders.missing_field is unreachable in field total_revenue.\n\n"
)

Expand Down Expand Up @@ -905,7 +910,8 @@ def test_cli_duplicate_field_names(connection, fresh_project, mocker):

assert result.exit_code == 0
assert (
result.output == "Found 1 error in the project:\n\n"
result.output
== "Found 1 error in the project:\n\n"
"\nDuplicate field names in view customers: number_of_customers\n\n"
)

Expand Down Expand Up @@ -972,7 +978,7 @@ def test_cli_validate_required_access_filters(connection, fresh_project, mocker)
assert result.exit_code == 0
assert (
result.output
== "Found 19 errors in the project:\n\n\nView order_lines does not have any access filters, but an"
== "Found 20 errors in the project:\n\n\nView order_lines does not have any access filters, but an"
" access filter with user attribute products is required.\n\n\nView orders does not have an access"
" filter with the required user attribute products\n\n\nView customers does not have any access"
" filters, but an access filter with user attribute products is required.\n\n\nView discounts does"
Expand All @@ -993,10 +999,11 @@ def test_cli_validate_required_access_filters(connection, fresh_project, mocker)
" other_db_traffic does not have any access filters, but an access filter with user attribute"
" products is required.\n\n\nView created_workspace does not have any access filters, but an"
" access filter with user attribute products is required.\n\n\nView mrr does not have any access"
" filters, but an access filter with user attribute products is required.\n\n\nView child_account"
" does not have any access filters, but an access filter with user attribute products is"
" required.\n\n\nView parent_account does not have any access filters, but an access filter with"
" user attribute products is required.\n\n"
" filters, but an access filter with user attribute products is required.\n\n\nView"
" monthly_aggregates does not have any access filters, but an access filter with user attribute"
" products is required.\n\n\nView child_account does not have any access filters, but an access"
" filter with user attribute products is required.\n\n\nView parent_account does not have any"
" access filters, but an access filter with user attribute products is required.\n\n"
)


Expand Down Expand Up @@ -1074,8 +1081,8 @@ def test_cli_list(connection, mocker, object_type: str, extra_args: list):
"models": "Found 2 models:\n\ntest_model\nnew_model\n",
"connections": "Found 3 connections:\n\ntesting_snowflake\ntesting_bigquery\ntesting_databricks\n",
"views": ( # noqa
"Found 20"
" views:\n\norder_lines\norders\ncustomers\ndiscounts\ndiscount_detail\ncountry_detail\nsessions\nevents\nlogin_events\ntraffic\nclicked_on_page\nsubmitted_form\naccounts\naa_acquired_accounts\nz_customer_accounts\nother_db_traffic\ncreated_workspace\nmrr\nchild_account\nparent_account\n" # noqa
"Found 21"
" views:\n\norder_lines\norders\ncustomers\ndiscounts\ndiscount_detail\ncountry_detail\nsessions\nevents\nlogin_events\ntraffic\nclicked_on_page\nsubmitted_form\naccounts\naa_acquired_accounts\nz_customer_accounts\nother_db_traffic\ncreated_workspace\nmrr\nmonthly_aggregates\nchild_account\nparent_account\n" # noqa
),
"fields": "Found 2 fields:\n\ndiscount_promo_name\ndiscount_usd\n",
"dimensions": "Found 3 dimensions:\n\ncountry\norder\ndiscount_code\n",
Expand Down
6 changes: 3 additions & 3 deletions tests/test_listing_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
@pytest.mark.project
def test_list_metrics(connection):
metrics = connection.list_metrics()
assert len(metrics) == 60
assert len(metrics) == 61

metrics = connection.list_metrics(view_name="order_lines", names_only=True)
assert len(metrics) == 11
Expand All @@ -26,10 +26,10 @@ def test_list_metrics(connection):
@pytest.mark.project
def test_list_dimensions(connection):
dimensions = connection.list_dimensions(show_hidden=True)
assert len(dimensions) == 98
assert len(dimensions) == 100

dimensions = connection.list_dimensions()
assert len(dimensions) == 64
assert len(dimensions) == 66

dimensions = connection.list_dimensions(view_name="order_lines", names_only=True, show_hidden=True)
dimensions_present = {
Expand Down
27 changes: 16 additions & 11 deletions tests/test_merged_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,8 +160,9 @@ def _blow_out_by_time_frame(join_graph: str, tf: list):
sub_q_0_10 = _blow_out_by_time_frame("merged_result_subquery_0_subquery_10", core_tf)
sub_q_0_11 = _blow_out_by_time_frame("merged_result_subquery_0_subquery_11", core_tf)
sub_q_0_12 = _blow_out_by_time_frame("merged_result_subquery_0_subquery_12", core_tf)
sub_q_0_14 = _blow_out_by_time_frame("merged_result_subquery_0_subquery_14", core_tf)
sub_q_0_14 = _blow_out_by_time_frame("merged_result_subquery_0_subquery_13", core_tf)
sub_q_0_15 = _blow_out_by_time_frame("merged_result_subquery_0_subquery_15", core_tf)
sub_q_0_16 = _blow_out_by_time_frame("merged_result_subquery_0_subquery_16", core_tf)
sub_q_0_1 = _blow_out_by_time_frame("merged_result_subquery_0_subquery_1", core_tf)
revenue_set = [
*sub_q_cr,
Expand All @@ -176,6 +177,7 @@ def _blow_out_by_time_frame(join_graph: str, tf: list):
*sub_q_0_12,
*sub_q_0_14,
*sub_q_0_15,
*sub_q_0_16,
*sub_q_0_1,
]
field = connection.get_field("revenue_per_session")
Expand All @@ -191,8 +193,9 @@ def _blow_out_by_time_frame(join_graph: str, tf: list):
"merged_result_subquery_0_subquery_10_date",
"merged_result_subquery_0_subquery_11_date",
"merged_result_subquery_0_subquery_12_date",
"merged_result_subquery_0_subquery_14_date",
"merged_result_subquery_0_subquery_13_date",
"merged_result_subquery_0_subquery_15_date",
"merged_result_subquery_0_subquery_16_date",
"merged_result_subquery_0_subquery_1_date",
"merged_result_subquery_0_subquery_4_date",
"merged_result_subquery_0_subquery_7_date",
Expand All @@ -210,8 +213,9 @@ def _blow_out_by_time_frame(join_graph: str, tf: list):
"merged_result_subquery_0_subquery_10_date",
"merged_result_subquery_0_subquery_11_date",
"merged_result_subquery_0_subquery_12_date",
"merged_result_subquery_0_subquery_14_date",
"merged_result_subquery_0_subquery_13_date",
"merged_result_subquery_0_subquery_15_date",
"merged_result_subquery_0_subquery_16_date",
"merged_result_subquery_0_subquery_1_date",
"merged_result_subquery_0_subquery_4_date",
"merged_result_subquery_0_subquery_7_date",
Expand Down Expand Up @@ -241,7 +245,7 @@ def _blow_out_by_time_frame(join_graph: str, tf: list):
"subquery_4",
"subquery_11",
"subquery_12",
"subquery_10",
"subquery_13",
*_blow_out_by_time_frame("merged_result_subquery_0_subquery_1", tf),
*_blow_out_by_time_frame("merged_result_subquery_1_subquery_3", core_tf),
*_blow_out_by_time_frame("merged_result_subquery_1_subquery_4", core_tf),
Expand All @@ -256,12 +260,12 @@ def _blow_out_by_time_frame(join_graph: str, tf: list):
*_blow_out_by_time_frame("merged_result_subquery_3_subquery_4", core_tf),
*_blow_out_by_time_frame("merged_result_subquery_0_subquery_4", tf),
*_blow_out_by_time_frame("merged_result_subquery_0_subquery_11", tf),
*_blow_out_by_time_frame("merged_result_subquery_10_subquery_3", core_tf),
*_blow_out_by_time_frame("merged_result_subquery_10_subquery_4", core_tf),
*_blow_out_by_time_frame("merged_result_subquery_10_subquery_11", core_tf),
*_blow_out_by_time_frame("merged_result_subquery_10_subquery_12", core_tf),
*_blow_out_by_time_frame("merged_result_subquery_1_subquery_10", core_tf),
*_blow_out_by_time_frame("merged_result_subquery_0_subquery_10", tf),
*_blow_out_by_time_frame("merged_result_subquery_13_subquery_3", core_tf),
*_blow_out_by_time_frame("merged_result_subquery_13_subquery_4", core_tf),
*_blow_out_by_time_frame("merged_result_subquery_11_subquery_13", core_tf),
*_blow_out_by_time_frame("merged_result_subquery_12_subquery_13", core_tf),
*_blow_out_by_time_frame("merged_result_subquery_1_subquery_13", core_tf),
*_blow_out_by_time_frame("merged_result_subquery_0_subquery_13", tf),
*_blow_out_by_time_frame("merged_result_subquery_11_subquery_4", core_tf),
]
assert field.join_graphs() == list(sorted(gender_graphs))
Expand All @@ -278,8 +282,9 @@ def _blow_out_by_time_frame(join_graph: str, tf: list):
*_blow_out_by_time_frame("merged_result_subquery_3_subquery_4", core_tf),
*_blow_out_by_time_frame("merged_result_subquery_4_subquery_7", core_tf),
*_blow_out_by_time_frame("merged_result_subquery_4_subquery_5", core_tf),
*_blow_out_by_time_frame("merged_result_subquery_14_subquery_4", core_tf),
*_blow_out_by_time_frame("merged_result_subquery_13_subquery_4", core_tf),
*_blow_out_by_time_frame("merged_result_subquery_15_subquery_4", core_tf),
*_blow_out_by_time_frame("merged_result_subquery_16_subquery_4", core_tf),
*_blow_out_by_time_frame("merged_result_subquery_4_subquery_8", core_tf),
*_blow_out_by_time_frame("merged_result_subquery_4_subquery_9", core_tf),
]
Expand Down
2 changes: 1 addition & 1 deletion tests/test_simple_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -1082,7 +1082,7 @@ def test_simple_query_dimension_group(connections, group: str, query_type: str):
"month_of_year_index": f"EXTRACT(MONTH FROM simple.order_date)",
"month_of_year": "FORMAT_DATETIME('%B', CAST(simple.order_date as DATETIME))",
"quarter_of_year": "EXTRACT(QUARTER FROM simple.order_date)",
"hour_of_day": f"CAST(simple.order_date AS STRING FORMAT 'HH24')",
"hour_of_day": f"CAST(CAST(simple.order_date AS STRING FORMAT 'HH24') AS INT64)",
"day_of_week": f"CAST(simple.order_date AS STRING FORMAT 'DAY')",
"day_of_month": "EXTRACT(DAY FROM simple.order_date)",
"day_of_year": "EXTRACT(DAYOFYEAR FROM simple.order_date)",
Expand Down
35 changes: 35 additions & 0 deletions tests/test_symmetric_aggregates.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import datetime

import pytest

from metrics_layer.core.model.definitions import Definitions
Expand Down Expand Up @@ -185,3 +187,36 @@ def test_query_number_with_sql(connection, query_type):
f"{order_by};"
)
assert query == correct


@pytest.mark.query
def test_query_with_corrected_no_symm_agg_triggered(connection):
query = connection.get_sql_query(
metrics=["monthly_aggregates.count_new_employees"],
dimensions=["monthly_aggregates.division"],
where=[
{
"field": "date",
"expression": "greater_or_equal_than",
"value": datetime.datetime(2024, 1, 5, 0, 0),
},
{
"field": "date",
"expression": "less_or_equal_than",
"value": datetime.datetime(2024, 10, 5, 0, 0),
},
],
order_by=[{"field": "monthly_aggregates.division", "sort": "asc"}],
limit=25,
)

# This, correctly, does not apply a symmetric aggregate
correct = (
"SELECT monthly_aggregates.division as"
" monthly_aggregates_division,COUNT(monthly_aggregates.n_new_employees) as"
" monthly_aggregates_count_new_employees FROM analytics.monthly_rollup monthly_aggregates WHERE"
" DATE_TRUNC('DAY', monthly_aggregates.record_date)>='2024-01-05T00:00:00' AND DATE_TRUNC('DAY',"
" monthly_aggregates.record_date)<='2024-10-05T00:00:00' GROUP BY monthly_aggregates.division ORDER"
" BY monthly_aggregates_division ASC NULLS LAST LIMIT 25;"
)
assert query == correct

0 comments on commit 213de25

Please sign in to comment.