From e31f7d81f02376579dcbd99989d52a88322adbdd Mon Sep 17 00:00:00 2001 From: Siddique Bagwan Date: Fri, 26 Jul 2024 20:31:22 +0530 Subject: [PATCH 1/6] fix for condition tag --- .../source/looker/looker_liquid_tag.py | 17 +- .../source/looker/looker_template_language.py | 31 +- .../source/looker/lookml_concept_context.py | 41 +- .../ingestion/source/looker/lookml_config.py | 6 +- .../ingestion/source/looker/lookml_source.py | 3 +- .../ingestion/source/looker/view_upstream.py | 10 +- .../integration/lookml/expected_output.json | 2 +- .../lookml/lookml_mces_api_bigquery.json | 2 +- .../lookml/lookml_mces_api_hive2.json | 2 +- .../lookml/lookml_mces_badsql_parser.json | 90 ++++ .../lookml/lookml_mces_offline.json | 90 ++++ ...lookml_mces_offline_platform_instance.json | 90 ++++ .../lookml_mces_with_external_urls.json | 90 ++++ .../lookml/refinements_ingestion_golden.json | 2 +- .../data.model.lkml | 4 + .../finance_notes.view.lkml | 109 +++++ .../vv_lineage_liquid_template_golden.json | 445 +++++++++++++++++- 17 files changed, 984 insertions(+), 50 deletions(-) create mode 100644 metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/finance_notes.view.lkml diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_liquid_tag.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_liquid_tag.py index 35231d273fbba..e8268e3eb427d 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_liquid_tag.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_liquid_tag.py @@ -1,5 +1,5 @@ from functools import lru_cache -from typing import ClassVar, Optional, TextIO, cast +from typing import ClassVar, Optional, TextIO from liquid import Environment from liquid.ast import Node @@ -25,18 +25,9 @@ def __init__(self, tok: Token, sql_or_lookml_reference: str, filter_name: str): self.filter_name = filter_name def render_to_output(self, context: Context, buffer: TextIO) -> Optional[bool]: - filter_value: Optional[str] = cast( - str, context.globals.get(self.filter_name) - ) # to silent lint - - if filter_value is None: - raise CustomTagException( - f'filter {self.filter_name} value is not provided for "condition" tag' - ) - - filter_value = filter_value.strip() - - buffer.write(f"{self.sql_or_lookml_reference}='{filter_value}'") + # This implementation will make sure that sql parse work correctly if looker condition tag + # is used in lookml sql field + buffer.write("1=1") return True diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_template_language.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_template_language.py index 919d9232a18c5..2989f4b9830c0 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_template_language.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_template_language.py @@ -9,6 +9,7 @@ CustomTagException, create_template, ) +from datahub.ingestion.source.looker.lookml_config import DERIVED_VIEW_PATTERN from datahub.ingestion.source.looker.str_functions import ( remove_extra_spaces_and_newlines, ) @@ -94,6 +95,24 @@ def resolve_liquid_variable(text: str, liquid_variable: Dict[Any, Any]) -> str: return text +def _complete_in_complete_sql(raw_view: dict, sql: str) -> str: + + # Looker supports sql fragments that omit the SELECT and FROM parts of the query + # Add those in if we detect that it is missing + sql_query: str = sql + + if not re.search(r"SELECT\s", sql_query, flags=re.I): + # add a SELECT clause at the beginning + sql_query = f"SELECT {sql}" + + if not re.search(r"FROM\s", sql_query, flags=re.I): + # add a FROM clause at the end + sql_query = f"{sql_query} FROM {raw_view['name']}" + + # Drop ${ and } + return re.sub(DERIVED_VIEW_PATTERN, r"\1", sql_query) + + def resolve_liquid_variable_in_view_dict( raw_view: dict, liquid_variable: Dict[Any, Any] ) -> None: @@ -102,14 +121,20 @@ def resolve_liquid_variable_in_view_dict( for view in raw_view["views"]: if "sql_table_name" in view: - view["sql_table_name"] = resolve_liquid_variable( + view["datahub_transformed_sql_table_name"] = resolve_liquid_variable( text=remove_extra_spaces_and_newlines(view["sql_table_name"]), liquid_variable=liquid_variable, - ) + ) # keeping original sql_table_name as is to avoid any visualization issue later if "derived_table" in view and "sql" in view["derived_table"]: # In sql we don't need to remove the extra spaces as sql parser takes care of extra spaces and \n # while generating URN from sql - view["derived_table"]["sql"] = resolve_liquid_variable( + view["derived_table"]["datahub_transformed_sql"] = resolve_liquid_variable( text=view["derived_table"]["sql"], liquid_variable=liquid_variable + ) # keeping original sql as is, so that on UI sql will be shown same is it is visible on looker portal + + view["derived_table"][ + "datahub_transformed_sql" + ] = _complete_in_complete_sql( + raw_view=view, sql=view["derived_table"]["datahub_transformed_sql"] ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_concept_context.py b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_concept_context.py index e528e578dcf9f..a83aa2638ec96 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_concept_context.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_concept_context.py @@ -266,15 +266,25 @@ def sql_table_name(self) -> str: sql_table_name: Optional[str] = self._get_sql_table_name_field() # if sql_table_name field is not set then the table name is equal to view-name if sql_table_name is None: - return self.raw_view[NAME].lower() + sql_table_name = self.raw_view[NAME].lower() + + return sql_table_name + + def datahub_transformed_sql_table_name(self) -> str: + table_name: Optional[str] = self.raw_view.get( + "datahub_transformed_sql_table_name" + ) + + if not table_name: + table_name = self.sql_table_name() # sql_table_name is in the format "${view-name}.SQL_TABLE_NAME" # remove extra characters if self._is_dot_sql_table_name_present(): - sql_table_name = re.sub(DERIVED_VIEW_PATTERN, r"\1", sql_table_name) + table_name = re.sub(DERIVED_VIEW_PATTERN, r"\1", table_name) # Some sql_table_name fields contain quotes like: optimizely."group", just remove the quotes - return sql_table_name.replace('"', "").replace("`", "").lower() + return table_name.replace('"', "").replace("`", "").lower() def derived_table(self) -> Dict[Any, Any]: """ @@ -296,30 +306,21 @@ def explore_source(self) -> Dict[Any, Any]: return derived_table["explore_source"] - def sql(self, transformed: bool = True) -> str: + def sql(self) -> str: """ This function should only be called if is_sql_based_derived_case return true """ derived_table = self.derived_table() - # Looker supports sql fragments that omit the SELECT and FROM parts of the query - # Add those in if we detect that it is missing - sql_query: str = derived_table["sql"] - - if transformed: # update the original sql attribute only if transformed is true - if not re.search(r"SELECT\s", sql_query, flags=re.I): - # add a SELECT clause at the beginning - sql_query = f"SELECT {sql_query}" + return derived_table["sql"] - if not re.search(r"FROM\s", sql_query, flags=re.I): - # add a FROM clause at the end - sql_query = f"{sql_query} FROM {self.name()}" - # Get the list of tables in the query - - # Drop ${ and } - sql_query = re.sub(DERIVED_VIEW_PATTERN, r"\1", sql_query) + def datahub_transformed_sql(self) -> str: + """ + This function should only be called if is_sql_based_derived_case return true + """ + derived_table = self.derived_table() - return sql_query + return derived_table["datahub_transformed_sql"] def name(self) -> str: return self.raw_view[NAME] diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_config.py b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_config.py index aa5719547c03e..f4fb1316b16a2 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_config.py @@ -11,8 +11,10 @@ from datahub.configuration.git import GitInfo from datahub.configuration.source_common import EnvConfigMixin from datahub.configuration.validate_field_rename import pydantic_renamed_field -from datahub.ingestion.source.looker.looker_config import LookerCommonConfig -from datahub.ingestion.source.looker.looker_connection import LookerConnectionDefinition +from datahub.ingestion.source.looker.looker_config import ( + LookerCommonConfig, + LookerConnectionDefinition, +) from datahub.ingestion.source.looker.looker_lib_wrapper import ( LookerAPI, LookerAPIConfig, diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py index 223d168dbe033..d77e65ac73323 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py @@ -201,8 +201,7 @@ def from_looker_dict( view_logic = view_context.view_file.raw_file_content[:max_file_snippet_length] if view_context.is_sql_based_derived_case(): - view_logic = view_context.sql(transformed=False) - # Parse SQL to extract dependencies. + view_logic = view_context.sql() view_details = ViewProperties( materialized=False, viewLogic=view_logic, diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/view_upstream.py b/metadata-ingestion/src/datahub/ingestion/source/looker/view_upstream.py index 390e71ef9d4bd..6c37806605935 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/view_upstream.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/view_upstream.py @@ -259,7 +259,7 @@ def __get_spr(self) -> Optional[SqlParsingResult]: return None spr = create_lineage_sql_parsed_result( - query=self.view_context.sql(), + query=self.view_context.datahub_transformed_sql(), default_schema=self.view_context.view_connection.default_schema, default_db=self.view_context.view_connection.default_db, platform=self.view_context.view_connection.platform, @@ -478,9 +478,9 @@ def __init__( def __get_upstream_dataset_urn(self) -> Urn: # In regular case view's upstream dataset is either same as view-name or mentioned in "sql_table_name" field - # view_context.sql_table_name() handle this condition to return dataset name + # view_context.datahub_transformed_sql_table_name() handle this condition to return dataset name qualified_table_name: str = _generate_fully_qualified_name( - sql_table_name=self.view_context.sql_table_name(), + sql_table_name=self.view_context.datahub_transformed_sql_table_name(), connection_def=self.view_context.view_connection, reporter=self.view_context.reporter, ) @@ -532,10 +532,10 @@ def __init__( ) def __get_upstream_dataset_urn(self) -> List[Urn]: - # In this case view_context.sql_table_name() refers to derived view name + # In this case view_context.datahub_transformed_sql_table_name() refers to derived view name looker_view_id = get_derived_looker_view_id( qualified_table_name=_generate_fully_qualified_name( - self.view_context.sql_table_name(), + self.view_context.datahub_transformed_sql_table_name(), self.view_context.view_connection, self.view_context.reporter, ), diff --git a/metadata-ingestion/tests/integration/lookml/expected_output.json b/metadata-ingestion/tests/integration/lookml/expected_output.json index d870c6dee4065..f42c600281ccb 100644 --- a/metadata-ingestion/tests/integration/lookml/expected_output.json +++ b/metadata-ingestion/tests/integration/lookml/expected_output.json @@ -1632,7 +1632,7 @@ "aspect": { "json": { "materialized": false, - "viewLogic": "SELECT\n customer_id,\n SUM(sale_price) AS lifetime_spend\n FROM\n order\n WHERE\n order.region='ap-south-1'\n GROUP BY 1", + "viewLogic": "SELECT\n customer_id,\n SUM(sale_price) AS lifetime_spend\n FROM\n order\n WHERE\n {% condition order_region %} order.region {% endcondition %}\n GROUP BY 1", "viewLanguage": "sql" } }, diff --git a/metadata-ingestion/tests/integration/lookml/lookml_mces_api_bigquery.json b/metadata-ingestion/tests/integration/lookml/lookml_mces_api_bigquery.json index 8813ea532fa2b..5f9b99ebe3062 100644 --- a/metadata-ingestion/tests/integration/lookml/lookml_mces_api_bigquery.json +++ b/metadata-ingestion/tests/integration/lookml/lookml_mces_api_bigquery.json @@ -1632,7 +1632,7 @@ "aspect": { "json": { "materialized": false, - "viewLogic": "SELECT\n customer_id,\n SUM(sale_price) AS lifetime_spend\n FROM\n order\n WHERE\n order.region='ap-south-1'\n GROUP BY 1", + "viewLogic": "SELECT\n customer_id,\n SUM(sale_price) AS lifetime_spend\n FROM\n order\n WHERE\n {% condition order_region %} order.region {% endcondition %}\n GROUP BY 1", "viewLanguage": "sql" } }, diff --git a/metadata-ingestion/tests/integration/lookml/lookml_mces_api_hive2.json b/metadata-ingestion/tests/integration/lookml/lookml_mces_api_hive2.json index 4bc1a0f2f7da5..1b95959f0ba1d 100644 --- a/metadata-ingestion/tests/integration/lookml/lookml_mces_api_hive2.json +++ b/metadata-ingestion/tests/integration/lookml/lookml_mces_api_hive2.json @@ -1632,7 +1632,7 @@ "aspect": { "json": { "materialized": false, - "viewLogic": "SELECT\n customer_id,\n SUM(sale_price) AS lifetime_spend\n FROM\n order\n WHERE\n order.region='ap-south-1'\n GROUP BY 1", + "viewLogic": "SELECT\n customer_id,\n SUM(sale_price) AS lifetime_spend\n FROM\n order\n WHERE\n {% condition order_region %} order.region {% endcondition %}\n GROUP BY 1", "viewLanguage": "sql" } }, diff --git a/metadata-ingestion/tests/integration/lookml/lookml_mces_badsql_parser.json b/metadata-ingestion/tests/integration/lookml/lookml_mces_badsql_parser.json index 3fd37c4722185..fd479a2baa722 100644 --- a/metadata-ingestion/tests/integration/lookml/lookml_mces_badsql_parser.json +++ b/metadata-ingestion/tests/integration/lookml/lookml_mces_badsql_parser.json @@ -1675,6 +1675,96 @@ "removed": false } }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1586847600000, + "actor": "urn:li:corpuser:datahub" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.default_schema.order,PROD)", + "type": "VIEW" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.default_schema.order,PROD),customer_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD),customer_id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.default_schema.order,PROD),sale_price)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD),lifetime_spend)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "customer_facts", + "platform": "urn:li:dataPlatform:looker", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "customer_id", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NullType": {} + } + }, + "nativeDataType": "unknown", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "lifetime_spend", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NullType": {} + } + }, + "nativeDataType": "unknown", + "recursive": false, + "isPartOfKey": false + } + ], + "primaryKeys": [] + } + }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { diff --git a/metadata-ingestion/tests/integration/lookml/lookml_mces_offline.json b/metadata-ingestion/tests/integration/lookml/lookml_mces_offline.json index 3fd37c4722185..fd479a2baa722 100644 --- a/metadata-ingestion/tests/integration/lookml/lookml_mces_offline.json +++ b/metadata-ingestion/tests/integration/lookml/lookml_mces_offline.json @@ -1675,6 +1675,96 @@ "removed": false } }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1586847600000, + "actor": "urn:li:corpuser:datahub" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.default_schema.order,PROD)", + "type": "VIEW" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.default_schema.order,PROD),customer_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD),customer_id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.default_schema.order,PROD),sale_price)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD),lifetime_spend)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "customer_facts", + "platform": "urn:li:dataPlatform:looker", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "customer_id", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NullType": {} + } + }, + "nativeDataType": "unknown", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "lifetime_spend", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NullType": {} + } + }, + "nativeDataType": "unknown", + "recursive": false, + "isPartOfKey": false + } + ], + "primaryKeys": [] + } + }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { diff --git a/metadata-ingestion/tests/integration/lookml/lookml_mces_offline_platform_instance.json b/metadata-ingestion/tests/integration/lookml/lookml_mces_offline_platform_instance.json index bb8a379fdde22..053e90d473c1b 100644 --- a/metadata-ingestion/tests/integration/lookml/lookml_mces_offline_platform_instance.json +++ b/metadata-ingestion/tests/integration/lookml/lookml_mces_offline_platform_instance.json @@ -1675,6 +1675,96 @@ "removed": false } }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1586847600000, + "actor": "urn:li:corpuser:datahub" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,warehouse.default_db.default_schema.order,DEV)", + "type": "VIEW" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,warehouse.default_db.default_schema.order,DEV),customer_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD),customer_id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,warehouse.default_db.default_schema.order,DEV),sale_price)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD),lifetime_spend)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "customer_facts", + "platform": "urn:li:dataPlatform:looker", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "customer_id", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NullType": {} + } + }, + "nativeDataType": "unknown", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "lifetime_spend", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NullType": {} + } + }, + "nativeDataType": "unknown", + "recursive": false, + "isPartOfKey": false + } + ], + "primaryKeys": [] + } + }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { diff --git a/metadata-ingestion/tests/integration/lookml/lookml_mces_with_external_urls.json b/metadata-ingestion/tests/integration/lookml/lookml_mces_with_external_urls.json index b8a2bcc020c34..44dd72e8fc41b 100644 --- a/metadata-ingestion/tests/integration/lookml/lookml_mces_with_external_urls.json +++ b/metadata-ingestion/tests/integration/lookml/lookml_mces_with_external_urls.json @@ -1683,6 +1683,96 @@ "removed": false } }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1586847600000, + "actor": "urn:li:corpuser:datahub" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.default_schema.order,PROD)", + "type": "VIEW" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.default_schema.order,PROD),customer_id)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD),customer_id)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:snowflake,default_db.default_schema.order,PROD),sale_price)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.customer_facts,PROD),lifetime_spend)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "customer_facts", + "platform": "urn:li:dataPlatform:looker", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "customer_id", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NullType": {} + } + }, + "nativeDataType": "unknown", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "lifetime_spend", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NullType": {} + } + }, + "nativeDataType": "unknown", + "recursive": false, + "isPartOfKey": false + } + ], + "primaryKeys": [] + } + }, { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { diff --git a/metadata-ingestion/tests/integration/lookml/refinements_ingestion_golden.json b/metadata-ingestion/tests/integration/lookml/refinements_ingestion_golden.json index 7265ee3c6c62b..7c2f92ac1e028 100644 --- a/metadata-ingestion/tests/integration/lookml/refinements_ingestion_golden.json +++ b/metadata-ingestion/tests/integration/lookml/refinements_ingestion_golden.json @@ -1656,7 +1656,7 @@ "aspect": { "json": { "materialized": false, - "viewLogic": "SELECT\n customer_id,\n SUM(sale_price) AS lifetime_spend\n FROM\n order\n WHERE\n order.region='ap-south-1'\n GROUP BY 1", + "viewLogic": "SELECT\n customer_id,\n SUM(sale_price) AS lifetime_spend\n FROM\n order\n WHERE\n {% condition order_region %} order.region {% endcondition %}\n GROUP BY 1", "viewLanguage": "sql" } }, diff --git a/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/data.model.lkml b/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/data.model.lkml index ea55512c5ca06..d90a87f8d109e 100644 --- a/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/data.model.lkml +++ b/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/data.model.lkml @@ -5,6 +5,7 @@ include: "employee_income_source.view.lkml" include: "employee_total_income.view.lkml" include: "top_10_employee_income_source.view.lkml" include: "employee_tax_report.view.lkml" +include: "finance_notes.view.lkml" explore: activity_logs { } @@ -19,4 +20,7 @@ explore: top_10_employee_income_source { } explore: employee_tax_report { +} + +explore: latest_account_holder_notes { } \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/finance_notes.view.lkml b/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/finance_notes.view.lkml new file mode 100644 index 0000000000000..89fe7e7eebe1b --- /dev/null +++ b/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/finance_notes.view.lkml @@ -0,0 +1,109 @@ +view: latest_account_holder_notes_base { + filter: note_date_window { + description: "Date window in which to look for notes" + default_value: "90 days ago for 90 days" + datatype: date + type: date + } + + dimension: account_number { + hidden: yes + } + + dimension: account_in_engage { + hidden: yes + } + + dimension_group: latest_am_note { + type: time + timeframes: [date, week, month, year] + convert_tz: no + datatype: timestamp + sql: ${TABLE}.latest_time_created ;; + } + + dimension: interaction_type { + type: string + sql: ${TABLE}.interaction_type ;; + } + + dimension: did { + hidden: yes + type: string + sql: ${TABLE}.did ;; + } +} + +view: latest_account_holder_notes { + extends: [latest_account_holder_notes_base] + derived_table: { + sql: WITH notes AS ( + SELECT account_id, + time_created, + operator_id + FROM `at-meta-platform-dev`.rds_performance.fct_notes + WHERE DATE(time_created) < CURRENT_DATE() + {% if note_date_window._is_filtered %} + AND {% condition note_date_window %} DATE (time_created) {% endcondition %} + {% elsif customer_performance_retailer.snapshot_date._is_filtered %} + AND {% condition customer_performance_retailer.snapshot_date %} DATE(time_created) {% endcondition %} + {% else %} + AND DATE(time_created) >= DATE_SUB(CURRENT_DATE(), INTERVAL 90 DAY) + {% endif %} + AND medium NOT IN ('PORTAL', 'Trade Marketing') + AND medium IS NOT NULL + AND account_id IS NOT NULL + ), + + squad AS ( + SELECT DISTINCT squad, + LOWER(primary_sales_person.am_username) AS am_username, + account_number, + snapshot_date, + did + FROM `at-yp-fin-emp-product-dev.finance_customer_and_product.odp_dim_dealer` + WHERE snapshot_date < CURRENT_DATE() + {% if note_date_window._is_filtered %} + AND {% condition note_date_window %} DATE (snapshot_date) {% endcondition %} + {% elsif customer_performance_retailer.snapshot_date._is_filtered %} + AND {% condition customer_performance_retailer.snapshot_date %} DATE(snapshot_date) {% endcondition %} + {% else %} + AND snapshot_date >= DATE_SUB(CURRENT_DATE(), INTERVAL 90 DAY) + {% endif %} + ), + + notes_with_squad AS ( + SELECT *, + CASE + WHEN operator_id = am_username THEN 'AM' + WHEN regexp_contains(am_username, 'vacant') THEN 'Vacant' + WHEN regexp_contains(am_username, 'squad') THEN am_username + ELSE 'Other' + END AS interaction_type + FROM notes n + LEFT JOIN squad s ON n.account_id = s.account_number + AND DATE(n.time_created) = s.snapshot_date + ), + + notes_with_acc_numbered AS ( + SELECT account_id, + time_created, + operator_id, + am_username, + squad, + interaction_type, + did, + ROW_NUMBER() OVER(PARTITION BY account_id ORDER BY time_created DESC) AS rn + FROM notes_with_squad + WHERE interaction_type NOT IN ('Other') + ) + + SELECT + account_id, + time_created AS latest_time_created, + interaction_type, + did + FROM notes_with_acc_numbered + WHERE rn = 1 ;; + } +} diff --git a/metadata-ingestion/tests/integration/lookml/vv_lineage_liquid_template_golden.json b/metadata-ingestion/tests/integration/lookml/vv_lineage_liquid_template_golden.json index 75cd50c5c6059..54785a3d9833a 100644 --- a/metadata-ingestion/tests/integration/lookml/vv_lineage_liquid_template_golden.json +++ b/metadata-ingestion/tests/integration/lookml/vv_lineage_liquid_template_golden.json @@ -302,7 +302,7 @@ "aspect": { "json": { "materialized": false, - "viewLogic": "SELECT\n employee_id,\n employee_name,\n \n prod_core.data.r_metric_summary_v2\n ,\n employee_income\n FROM source_table\n WHERE\n source_table.region='ap-south-1'", + "viewLogic": "SELECT\n employee_id,\n employee_name,\n {% if dw_eff_dt_date._is_selected or finance_dw_eff_dt_date._is_selected %}\n prod_core.data.r_metric_summary_v2\n {% elsif dw_eff_dt_week._is_selected or finance_dw_eff_dt_week._is_selected %}\n prod_core.data.r_metric_summary_v3\n {% else %}\n 'default_table' as source\n {% endif %},\n employee_income\n FROM source_table\n WHERE\n {% condition source_region %} source_table.region {% endcondition %}", "viewLanguage": "sql" } }, @@ -1300,6 +1300,433 @@ "lastRunId": "no-run-id-provided" } }, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.latest_account_holder_notes_base,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "View" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.latest_account_holder_notes_base,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "view: latest_account_holder_notes_base {\n filter: note_date_window {\n description: \"Date window in which to look for notes\"\n default_value: \"90 days ago for 90 days\"\n datatype: date\n type: date\n }\n\n dimension: account_number {\n hidden: yes\n }\n\n dimension: account_in_engage {\n hidden: yes\n }\n\n dimension_group: latest_am_note {\n type: time\n timeframes: [date, week, month, year]\n convert_tz: no\n datatype: timestamp\n sql: ${TABLE}.latest_time_created ;;\n }\n\n dimension: interaction_type {\n type: string\n sql: ${TABLE}.interaction_type ;;\n }\n\n dimension: did {\n hidden: yes\n type: string\n sql: ${TABLE}.did ;;\n }\n}\n\nview: latest_account_holder_notes {\n extends: [latest_account_holder_notes_base]\n derived_table: {\n sql: WITH notes AS (\n SELECT account_id,\n time_created,\n operator_id\n FROM `at-meta-platform-dev`.rds_performance.fct_notes\n WHERE DATE(time_created) < CURRENT_DATE()\n {% if note_date_window._is_filtered %}\n AND {% condition note_date_window %} DATE (time_created) {% endcondition %}\n {% elsif customer_performance_retailer.snapshot_date._is_filtered %}\n AND {% condition customer_performance_retailer.snapshot_date %} DATE(time_created) {% endcondition %}\n {% else %}\n AND DATE(time_created) >= DATE_SUB(CURRENT_DATE(), INTERVAL 90 DAY)\n {% endif %}\n AND medium NOT IN ('PORTAL', 'Trade Marketing')\n AND medium IS NOT NULL\n AND account_id IS NOT NULL\n ),\n\n squad AS (\n SELECT DISTINCT squad,\n LOWER(primary_sales_person.am_username) AS am_username,\n account_number,\n snapshot_date,\n did\n FROM `at-yp-fin-emp-product-dev.finance_customer_and_product.odp_dim_dealer`\n WHERE snapshot_date < CURRENT_DATE()\n {% if note_date_window._is_filtered %}\n AND {% condition note_date_window %} DATE (snapshot_date) {% endcondition %}\n {% elsif customer_performance_retailer.snapshot_date._is_filtered %}\n AND {% condition customer_performance_retailer.snapshot_date %} DATE(snapshot_date) {% endcondition %}\n {% else %}\n AND snapshot_date >= DATE_SUB(CURRENT_DATE(), INTERVAL 90 DAY)\n {% endif %}\n ),\n\n notes_with_squad AS (\n SELECT *,\n CASE\n WHEN operator_id = am_username THEN 'AM'\n WHEN regexp_contains(am_username, 'vacant') THEN 'Vacant'\n WHEN regexp_contains(am_username, 'squad') THEN am_username\n ELSE 'Other'\n END AS interaction_type\n FROM notes n\n LEFT JOIN squad s ON n.account_id = s.account_number\n AND DATE(n.time_created) = s.snapshot_date\n ),\n\n notes_with_acc_numbered AS (\n SELECT account_id,\n time_created,\n operator_id,\n am_username,\n squad,\n interaction_type,\n did,\n ROW_NUMBER() OVER(PARTITION BY account_id ORDER BY time_created DESC) AS rn\n FROM notes_with_squad\n WHERE interaction_type NOT IN ('Other')\n )\n\n SELECT\n account_id,\n time_created AS latest_time_created,\n interaction_type,\n did\n FROM notes_with_acc_numbered\n WHERE rn = 1 ;;\n }\n}\n", + "viewLanguage": "lookml" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.latest_account_holder_notes_base,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.latest_account_holder_notes_base,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/Develop/lkml_samples/" + ] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.UpstreamLineage": { + "upstreams": [ + { + "auditStamp": { + "time": 1586847600000, + "actor": "urn:li:corpuser:datahub" + }, + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,..latest_account_holder_notes_base,PROD)", + "type": "VIEW" + } + ], + "fineGrainedLineages": [ + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,..latest_account_holder_notes_base,PROD),account_number)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.latest_account_holder_notes_base,PROD),account_number)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,..latest_account_holder_notes_base,PROD),account_in_engage)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.latest_account_holder_notes_base,PROD),account_in_engage)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,..latest_account_holder_notes_base,PROD),interaction_type)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.latest_account_holder_notes_base,PROD),interaction_type)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,..latest_account_holder_notes_base,PROD),did)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.latest_account_holder_notes_base,PROD),did)" + ], + "confidenceScore": 1.0 + }, + { + "upstreamType": "FIELD_SET", + "upstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,..latest_account_holder_notes_base,PROD),latest_time_created)" + ], + "downstreamType": "FIELD", + "downstreams": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.latest_account_holder_notes_base,PROD),latest_am_note)" + ], + "confidenceScore": 1.0 + } + ] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "latest_account_holder_notes_base", + "platform": "urn:li:dataPlatform:looker", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.OtherSchema": { + "rawSchema": "" + } + }, + "fields": [ + { + "fieldPath": "account_number", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": false + }, + { + "fieldPath": "account_in_engage", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": false + }, + { + "fieldPath": "interaction_type", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": false + }, + { + "fieldPath": "did", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "string", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + } + ] + }, + "isPartOfKey": false + }, + { + "fieldPath": "latest_am_note", + "nullable": false, + "description": "", + "label": "", + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.TimeType": {} + } + }, + "nativeDataType": "time", + "recursive": false, + "globalTags": { + "tags": [ + { + "tag": "urn:li:tag:Dimension" + }, + { + "tag": "urn:li:tag:Temporal" + } + ] + }, + "isPartOfKey": false + } + ], + "primaryKeys": [] + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "looker.file.path": "finance_notes.view.lkml", + "looker.model": "data" + }, + "name": "latest_account_holder_notes_base", + "tags": [] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.latest_account_holder_notes_base,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.latest_account_holder_notes,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "View" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.latest_account_holder_notes,PROD)", + "changeType": "UPSERT", + "aspectName": "viewProperties", + "aspect": { + "json": { + "materialized": false, + "viewLogic": "WITH notes AS (\n SELECT account_id,\n time_created,\n operator_id\n FROM `at-meta-platform-dev`.rds_performance.fct_notes\n WHERE DATE(time_created) < CURRENT_DATE()\n {% if note_date_window._is_filtered %}\n AND {% condition note_date_window %} DATE (time_created) {% endcondition %}\n {% elsif customer_performance_retailer.snapshot_date._is_filtered %}\n AND {% condition customer_performance_retailer.snapshot_date %} DATE(time_created) {% endcondition %}\n {% else %}\n AND DATE(time_created) >= DATE_SUB(CURRENT_DATE(), INTERVAL 90 DAY)\n {% endif %}\n AND medium NOT IN ('PORTAL', 'Trade Marketing')\n AND medium IS NOT NULL\n AND account_id IS NOT NULL\n ),\n\n squad AS (\n SELECT DISTINCT squad,\n LOWER(primary_sales_person.am_username) AS am_username,\n account_number,\n snapshot_date,\n did\n FROM `at-yp-fin-emp-product-dev.finance_customer_and_product.odp_dim_dealer`\n WHERE snapshot_date < CURRENT_DATE()\n {% if note_date_window._is_filtered %}\n AND {% condition note_date_window %} DATE (snapshot_date) {% endcondition %}\n {% elsif customer_performance_retailer.snapshot_date._is_filtered %}\n AND {% condition customer_performance_retailer.snapshot_date %} DATE(snapshot_date) {% endcondition %}\n {% else %}\n AND snapshot_date >= DATE_SUB(CURRENT_DATE(), INTERVAL 90 DAY)\n {% endif %}\n ),\n\n notes_with_squad AS (\n SELECT *,\n CASE\n WHEN operator_id = am_username THEN 'AM'\n WHEN regexp_contains(am_username, 'vacant') THEN 'Vacant'\n WHEN regexp_contains(am_username, 'squad') THEN am_username\n ELSE 'Other'\n END AS interaction_type\n FROM notes n\n LEFT JOIN squad s ON n.account_id = s.account_number\n AND DATE(n.time_created) = s.snapshot_date\n ),\n\n notes_with_acc_numbered AS (\n SELECT account_id,\n time_created,\n operator_id,\n am_username,\n squad,\n interaction_type,\n did,\n ROW_NUMBER() OVER(PARTITION BY account_id ORDER BY time_created DESC) AS rn\n FROM notes_with_squad\n WHERE interaction_type NOT IN ('Other')\n )\n\n SELECT\n account_id,\n time_created AS latest_time_created,\n interaction_type,\n did\n FROM notes_with_acc_numbered\n WHERE rn = 1", + "viewLanguage": "sql" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.latest_account_holder_notes,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.latest_account_holder_notes,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.BrowsePaths": { + "paths": [ + "/Develop/lkml_samples/" + ] + } + }, + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": { + "looker.file.path": "finance_notes.view.lkml", + "looker.model": "data" + }, + "name": "latest_account_holder_notes", + "tags": [] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.latest_account_holder_notes,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "Develop" + }, + { + "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", + "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } +}, { "entityType": "tag", "entityUrn": "urn:li:tag:Dimension", @@ -1331,5 +1758,21 @@ "runId": "lookml-test", "lastRunId": "no-run-id-provided" } +}, +{ + "entityType": "tag", + "entityUrn": "urn:li:tag:Temporal", + "changeType": "UPSERT", + "aspectName": "tagKey", + "aspect": { + "json": { + "name": "Temporal" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "lookml-test", + "lastRunId": "no-run-id-provided" + } } ] \ No newline at end of file From 99b89d2b8697f86342a356363046298a10a902c9 Mon Sep 17 00:00:00 2001 From: Siddique Bagwan Date: Sat, 27 Jul 2024 16:30:31 +0530 Subject: [PATCH 2/6] address review comments --- .../source/looker/looker_liquid_tag.py | 2 +- .../finance_notes.view.lkml | 125 +++++++++--------- .../vv_lineage_liquid_template_golden.json | 62 ++++----- 3 files changed, 94 insertions(+), 95 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_liquid_tag.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_liquid_tag.py index e8268e3eb427d..7d4ebf00cc06e 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_liquid_tag.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_liquid_tag.py @@ -27,7 +27,7 @@ def __init__(self, tok: Token, sql_or_lookml_reference: str, filter_name: str): def render_to_output(self, context: Context, buffer: TextIO) -> Optional[bool]: # This implementation will make sure that sql parse work correctly if looker condition tag # is used in lookml sql field - buffer.write("1=1") + buffer.write(f"{self.sql_or_lookml_reference}='dummy_value'") return True diff --git a/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/finance_notes.view.lkml b/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/finance_notes.view.lkml index 89fe7e7eebe1b..77efffdacc840 100644 --- a/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/finance_notes.view.lkml +++ b/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/finance_notes.view.lkml @@ -1,109 +1,108 @@ -view: latest_account_holder_notes_base { - filter: note_date_window { - description: "Date window in which to look for notes" - default_value: "90 days ago for 90 days" +view: last_ptr_holder_fake_notes_base { + filter: finance_date_range { + default_value: "60 days ago for 60 days" datatype: date type: date } - dimension: account_number { + dimension: acc_number { hidden: yes } - dimension: account_in_engage { + dimension: fnc_in_engage { hidden: yes } - dimension_group: latest_am_note { + dimension_group: last_finance_fake_notes { type: time timeframes: [date, week, month, year] convert_tz: no datatype: timestamp - sql: ${TABLE}.latest_time_created ;; + sql: ${TABLE}.last_timestamp_created ;; } - dimension: interaction_type { + dimension: fake_type { type: string - sql: ${TABLE}.interaction_type ;; + sql: ${TABLE}.fake_type ;; } - dimension: did { + dimension: fid { hidden: yes type: string - sql: ${TABLE}.did ;; + sql: ${TABLE}.fid ;; } } -view: latest_account_holder_notes { - extends: [latest_account_holder_notes_base] +view: last_ptr_holder_fake_notes { + extends: [last_ptr_holder_fake_notes_base] derived_table: { - sql: WITH notes AS ( - SELECT account_id, - time_created, - operator_id - FROM `at-meta-platform-dev`.rds_performance.fct_notes - WHERE DATE(time_created) < CURRENT_DATE() - {% if note_date_window._is_filtered %} - AND {% condition note_date_window %} DATE (time_created) {% endcondition %} - {% elsif customer_performance_retailer.snapshot_date._is_filtered %} - AND {% condition customer_performance_retailer.snapshot_date %} DATE(time_created) {% endcondition %} + sql: WITH fake_notes AS ( + SELECT foo_id, + created_on_date, + opt_id + FROM `at-meta-platform-dev`.db_testing.finance_quotes + WHERE DATE(created_on_date) < CURRENT_DATE() + {% if finance_date_range._is_filtered %} + AND {% condition finance_date_range %} DATE (created_on_date) {% endcondition %} + {% elsif employee_testing_rpt.snapshot_timestamp._is_filtered %} + AND {% condition employee_testing_rpt.snapshot_timestamp %} DATE(created_on_date) {% endcondition %} {% else %} - AND DATE(time_created) >= DATE_SUB(CURRENT_DATE(), INTERVAL 90 DAY) + AND DATE(created_on_date) >= DATE_SUB(CURRENT_DATE(), INTERVAL 90 DAY) {% endif %} - AND medium NOT IN ('PORTAL', 'Trade Marketing') + AND medium NOT IN ('WINDOW', 'Foo Sale') AND medium IS NOT NULL - AND account_id IS NOT NULL + AND foo_id IS NOT NULL ), - squad AS ( - SELECT DISTINCT squad, - LOWER(primary_sales_person.am_username) AS am_username, - account_number, - snapshot_date, - did + abc AS ( + SELECT DISTINCT abc, + LOWER(primary_sales_person.cus_name) AS cus_name, + acc_number, + snapshot_timestamp, + fid FROM `at-yp-fin-emp-product-dev.finance_customer_and_product.odp_dim_dealer` - WHERE snapshot_date < CURRENT_DATE() - {% if note_date_window._is_filtered %} - AND {% condition note_date_window %} DATE (snapshot_date) {% endcondition %} - {% elsif customer_performance_retailer.snapshot_date._is_filtered %} - AND {% condition customer_performance_retailer.snapshot_date %} DATE(snapshot_date) {% endcondition %} + WHERE snapshot_timestamp < CURRENT_DATE() + {% if finance_date_range._is_filtered %} + AND {% condition finance_date_range %} DATE (snapshot_timestamp) {% endcondition %} + {% elsif employee_testing_rpt.snapshot_timestamp._is_filtered %} + AND {% condition employee_testing_rpt.snapshot_timestamp %} DATE(snapshot_timestamp) {% endcondition %} {% else %} - AND snapshot_date >= DATE_SUB(CURRENT_DATE(), INTERVAL 90 DAY) + AND snapshot_timestamp >= DATE_SUB(CURRENT_DATE(), INTERVAL 90 DAY) {% endif %} ), - notes_with_squad AS ( + fns_with_abc AS ( SELECT *, CASE - WHEN operator_id = am_username THEN 'AM' - WHEN regexp_contains(am_username, 'vacant') THEN 'Vacant' - WHEN regexp_contains(am_username, 'squad') THEN am_username + WHEN opt_id = cus_name THEN 'AM' + WHEN regexp_contains(cus_name, 'foo') THEN 'foo' + WHEN regexp_contains(cus_name, 'abc') THEN cus_name ELSE 'Other' - END AS interaction_type - FROM notes n - LEFT JOIN squad s ON n.account_id = s.account_number - AND DATE(n.time_created) = s.snapshot_date + END AS fake_type + FROM fake_notes n + LEFT JOIN abc s ON n.foo_id = s.acc_number + AND DATE(n.created_on_date) = s.snapshot_timestamp ), - notes_with_acc_numbered AS ( - SELECT account_id, - time_created, - operator_id, - am_username, - squad, - interaction_type, - did, - ROW_NUMBER() OVER(PARTITION BY account_id ORDER BY time_created DESC) AS rn - FROM notes_with_squad - WHERE interaction_type NOT IN ('Other') + fake_notes_with_acc_numbered AS ( + SELECT foo_id, + created_on_date, + opt_id, + cus_name, + abc, + fake_type, + fid, + ROW_NUMBER() OVER(PARTITION BY foo_id ORDER BY created_on_date DESC) AS rn + FROM fns_with_abc + WHERE fake_type NOT IN ('Other') ) SELECT - account_id, - time_created AS latest_time_created, - interaction_type, - did - FROM notes_with_acc_numbered + foo_id, + created_on_date AS last_timestamp_created, + fake_type, + fid + FROM fake_notes_with_acc_numbered WHERE rn = 1 ;; } } diff --git a/metadata-ingestion/tests/integration/lookml/vv_lineage_liquid_template_golden.json b/metadata-ingestion/tests/integration/lookml/vv_lineage_liquid_template_golden.json index 54785a3d9833a..0d291f11b8207 100644 --- a/metadata-ingestion/tests/integration/lookml/vv_lineage_liquid_template_golden.json +++ b/metadata-ingestion/tests/integration/lookml/vv_lineage_liquid_template_golden.json @@ -1302,7 +1302,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.latest_account_holder_notes_base,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.last_ptr_holder_fake_notes_base,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1320,13 +1320,13 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.latest_account_holder_notes_base,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.last_ptr_holder_fake_notes_base,PROD)", "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { "json": { "materialized": false, - "viewLogic": "view: latest_account_holder_notes_base {\n filter: note_date_window {\n description: \"Date window in which to look for notes\"\n default_value: \"90 days ago for 90 days\"\n datatype: date\n type: date\n }\n\n dimension: account_number {\n hidden: yes\n }\n\n dimension: account_in_engage {\n hidden: yes\n }\n\n dimension_group: latest_am_note {\n type: time\n timeframes: [date, week, month, year]\n convert_tz: no\n datatype: timestamp\n sql: ${TABLE}.latest_time_created ;;\n }\n\n dimension: interaction_type {\n type: string\n sql: ${TABLE}.interaction_type ;;\n }\n\n dimension: did {\n hidden: yes\n type: string\n sql: ${TABLE}.did ;;\n }\n}\n\nview: latest_account_holder_notes {\n extends: [latest_account_holder_notes_base]\n derived_table: {\n sql: WITH notes AS (\n SELECT account_id,\n time_created,\n operator_id\n FROM `at-meta-platform-dev`.rds_performance.fct_notes\n WHERE DATE(time_created) < CURRENT_DATE()\n {% if note_date_window._is_filtered %}\n AND {% condition note_date_window %} DATE (time_created) {% endcondition %}\n {% elsif customer_performance_retailer.snapshot_date._is_filtered %}\n AND {% condition customer_performance_retailer.snapshot_date %} DATE(time_created) {% endcondition %}\n {% else %}\n AND DATE(time_created) >= DATE_SUB(CURRENT_DATE(), INTERVAL 90 DAY)\n {% endif %}\n AND medium NOT IN ('PORTAL', 'Trade Marketing')\n AND medium IS NOT NULL\n AND account_id IS NOT NULL\n ),\n\n squad AS (\n SELECT DISTINCT squad,\n LOWER(primary_sales_person.am_username) AS am_username,\n account_number,\n snapshot_date,\n did\n FROM `at-yp-fin-emp-product-dev.finance_customer_and_product.odp_dim_dealer`\n WHERE snapshot_date < CURRENT_DATE()\n {% if note_date_window._is_filtered %}\n AND {% condition note_date_window %} DATE (snapshot_date) {% endcondition %}\n {% elsif customer_performance_retailer.snapshot_date._is_filtered %}\n AND {% condition customer_performance_retailer.snapshot_date %} DATE(snapshot_date) {% endcondition %}\n {% else %}\n AND snapshot_date >= DATE_SUB(CURRENT_DATE(), INTERVAL 90 DAY)\n {% endif %}\n ),\n\n notes_with_squad AS (\n SELECT *,\n CASE\n WHEN operator_id = am_username THEN 'AM'\n WHEN regexp_contains(am_username, 'vacant') THEN 'Vacant'\n WHEN regexp_contains(am_username, 'squad') THEN am_username\n ELSE 'Other'\n END AS interaction_type\n FROM notes n\n LEFT JOIN squad s ON n.account_id = s.account_number\n AND DATE(n.time_created) = s.snapshot_date\n ),\n\n notes_with_acc_numbered AS (\n SELECT account_id,\n time_created,\n operator_id,\n am_username,\n squad,\n interaction_type,\n did,\n ROW_NUMBER() OVER(PARTITION BY account_id ORDER BY time_created DESC) AS rn\n FROM notes_with_squad\n WHERE interaction_type NOT IN ('Other')\n )\n\n SELECT\n account_id,\n time_created AS latest_time_created,\n interaction_type,\n did\n FROM notes_with_acc_numbered\n WHERE rn = 1 ;;\n }\n}\n", + "viewLogic": "view: last_ptr_holder_fake_notes_base {\n filter: finance_date_range {\n default_value: \"60 days ago for 60 days\"\n datatype: date\n type: date\n }\n\n dimension: acc_number {\n hidden: yes\n }\n\n dimension: fnc_in_engage {\n hidden: yes\n }\n\n dimension_group: last_finance_fake_notes {\n type: time\n timeframes: [date, week, month, year]\n convert_tz: no\n datatype: timestamp\n sql: ${TABLE}.last_timestamp_created ;;\n }\n\n dimension: fake_type {\n type: string\n sql: ${TABLE}.fake_type ;;\n }\n\n dimension: fid {\n hidden: yes\n type: string\n sql: ${TABLE}.fid ;;\n }\n}\n\nview: last_ptr_holder_fake_notes {\n extends: [last_ptr_holder_fake_notes_base]\n derived_table: {\n sql: WITH fake_notes AS (\n SELECT foo_id,\n created_on_date,\n opt_id\n FROM `at-meta-platform-dev`.db_testing.finance_quotes\n WHERE DATE(created_on_date) < CURRENT_DATE()\n {% if finance_date_range._is_filtered %}\n AND {% condition finance_date_range %} DATE (created_on_date) {% endcondition %}\n {% elsif employee_testing_rpt.snapshot_timestamp._is_filtered %}\n AND {% condition employee_testing_rpt.snapshot_timestamp %} DATE(created_on_date) {% endcondition %}\n {% else %}\n AND DATE(created_on_date) >= DATE_SUB(CURRENT_DATE(), INTERVAL 90 DAY)\n {% endif %}\n AND medium NOT IN ('WINDOW', 'Foo Sale')\n AND medium IS NOT NULL\n AND foo_id IS NOT NULL\n ),\n\n abc AS (\n SELECT DISTINCT abc,\n LOWER(primary_sales_person.cus_name) AS cus_name,\n acc_number,\n snapshot_timestamp,\n fid\n FROM `at-yp-fin-emp-product-dev.finance_customer_and_product.odp_dim_dealer`\n WHERE snapshot_timestamp < CURRENT_DATE()\n {% if finance_date_range._is_filtered %}\n AND {% condition finance_date_range %} DATE (snapshot_timestamp) {% endcondition %}\n {% elsif employee_testing_rpt.snapshot_timestamp._is_filtered %}\n AND {% condition employee_testing_rpt.snapshot_timestamp %} DATE(snapshot_timestamp) {% endcondition %}\n {% else %}\n AND snapshot_timestamp >= DATE_SUB(CURRENT_DATE(), INTERVAL 90 DAY)\n {% endif %}\n ),\n\n fns_with_abc AS (\n SELECT *,\n CASE\n WHEN opt_id = cus_name THEN 'AM'\n WHEN regexp_contains(cus_name, 'foo') THEN 'foo'\n WHEN regexp_contains(cus_name, 'abc') THEN cus_name\n ELSE 'Other'\n END AS fake_type\n FROM fake_notes n\n LEFT JOIN abc s ON n.foo_id = s.acc_number\n AND DATE(n.created_on_date) = s.snapshot_timestamp\n ),\n\n fake_notes_with_acc_numbered AS (\n SELECT foo_id,\n created_on_date,\n opt_id,\n cus_name,\n abc,\n fake_type,\n fid,\n ROW_NUMBER() OVER(PARTITION BY foo_id ORDER BY created_on_date DESC) AS rn\n FROM fns_with_abc\n WHERE fake_type NOT IN ('Other')\n )\n\n SELECT\n foo_id,\n created_on_date AS last_timestamp_created,\n fake_type,\n fid\n FROM fake_notes_with_acc_numbered\n WHERE rn = 1 ;;\n }\n}\n", "viewLanguage": "lookml" } }, @@ -1338,7 +1338,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.latest_account_holder_notes_base,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.last_ptr_holder_fake_notes_base,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -1355,7 +1355,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.latest_account_holder_notes_base,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.last_ptr_holder_fake_notes_base,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.BrowsePaths": { @@ -1377,7 +1377,7 @@ "time": 1586847600000, "actor": "urn:li:corpuser:datahub" }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,..latest_account_holder_notes_base,PROD)", + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,..last_ptr_holder_fake_notes_base,PROD)", "type": "VIEW" } ], @@ -1385,55 +1385,55 @@ { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,..latest_account_holder_notes_base,PROD),account_number)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,..last_ptr_holder_fake_notes_base,PROD),acc_number)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.latest_account_holder_notes_base,PROD),account_number)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.last_ptr_holder_fake_notes_base,PROD),acc_number)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,..latest_account_holder_notes_base,PROD),account_in_engage)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,..last_ptr_holder_fake_notes_base,PROD),fnc_in_engage)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.latest_account_holder_notes_base,PROD),account_in_engage)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.last_ptr_holder_fake_notes_base,PROD),fnc_in_engage)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,..latest_account_holder_notes_base,PROD),interaction_type)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,..last_ptr_holder_fake_notes_base,PROD),fake_type)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.latest_account_holder_notes_base,PROD),interaction_type)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.last_ptr_holder_fake_notes_base,PROD),fake_type)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,..latest_account_holder_notes_base,PROD),did)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,..last_ptr_holder_fake_notes_base,PROD),fid)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.latest_account_holder_notes_base,PROD),did)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.last_ptr_holder_fake_notes_base,PROD),fid)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,..latest_account_holder_notes_base,PROD),latest_time_created)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,..last_ptr_holder_fake_notes_base,PROD),last_timestamp_created)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.latest_account_holder_notes_base,PROD),latest_am_note)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.last_ptr_holder_fake_notes_base,PROD),last_finance_fake_notes)" ], "confidenceScore": 1.0 } @@ -1442,7 +1442,7 @@ }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "latest_account_holder_notes_base", + "schemaName": "last_ptr_holder_fake_notes_base", "platform": "urn:li:dataPlatform:looker", "version": 0, "created": { @@ -1461,7 +1461,7 @@ }, "fields": [ { - "fieldPath": "account_number", + "fieldPath": "acc_number", "nullable": false, "description": "", "label": "", @@ -1482,7 +1482,7 @@ "isPartOfKey": false }, { - "fieldPath": "account_in_engage", + "fieldPath": "fnc_in_engage", "nullable": false, "description": "", "label": "", @@ -1503,7 +1503,7 @@ "isPartOfKey": false }, { - "fieldPath": "interaction_type", + "fieldPath": "fake_type", "nullable": false, "description": "", "label": "", @@ -1524,7 +1524,7 @@ "isPartOfKey": false }, { - "fieldPath": "did", + "fieldPath": "fid", "nullable": false, "description": "", "label": "", @@ -1545,7 +1545,7 @@ "isPartOfKey": false }, { - "fieldPath": "latest_am_note", + "fieldPath": "last_finance_fake_notes", "nullable": false, "description": "", "label": "", @@ -1578,7 +1578,7 @@ "looker.file.path": "finance_notes.view.lkml", "looker.model": "data" }, - "name": "latest_account_holder_notes_base", + "name": "last_ptr_holder_fake_notes_base", "tags": [] } } @@ -1593,7 +1593,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.latest_account_holder_notes_base,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.last_ptr_holder_fake_notes_base,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -1617,7 +1617,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.latest_account_holder_notes,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.last_ptr_holder_fake_notes,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1635,13 +1635,13 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.latest_account_holder_notes,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.last_ptr_holder_fake_notes,PROD)", "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { "json": { "materialized": false, - "viewLogic": "WITH notes AS (\n SELECT account_id,\n time_created,\n operator_id\n FROM `at-meta-platform-dev`.rds_performance.fct_notes\n WHERE DATE(time_created) < CURRENT_DATE()\n {% if note_date_window._is_filtered %}\n AND {% condition note_date_window %} DATE (time_created) {% endcondition %}\n {% elsif customer_performance_retailer.snapshot_date._is_filtered %}\n AND {% condition customer_performance_retailer.snapshot_date %} DATE(time_created) {% endcondition %}\n {% else %}\n AND DATE(time_created) >= DATE_SUB(CURRENT_DATE(), INTERVAL 90 DAY)\n {% endif %}\n AND medium NOT IN ('PORTAL', 'Trade Marketing')\n AND medium IS NOT NULL\n AND account_id IS NOT NULL\n ),\n\n squad AS (\n SELECT DISTINCT squad,\n LOWER(primary_sales_person.am_username) AS am_username,\n account_number,\n snapshot_date,\n did\n FROM `at-yp-fin-emp-product-dev.finance_customer_and_product.odp_dim_dealer`\n WHERE snapshot_date < CURRENT_DATE()\n {% if note_date_window._is_filtered %}\n AND {% condition note_date_window %} DATE (snapshot_date) {% endcondition %}\n {% elsif customer_performance_retailer.snapshot_date._is_filtered %}\n AND {% condition customer_performance_retailer.snapshot_date %} DATE(snapshot_date) {% endcondition %}\n {% else %}\n AND snapshot_date >= DATE_SUB(CURRENT_DATE(), INTERVAL 90 DAY)\n {% endif %}\n ),\n\n notes_with_squad AS (\n SELECT *,\n CASE\n WHEN operator_id = am_username THEN 'AM'\n WHEN regexp_contains(am_username, 'vacant') THEN 'Vacant'\n WHEN regexp_contains(am_username, 'squad') THEN am_username\n ELSE 'Other'\n END AS interaction_type\n FROM notes n\n LEFT JOIN squad s ON n.account_id = s.account_number\n AND DATE(n.time_created) = s.snapshot_date\n ),\n\n notes_with_acc_numbered AS (\n SELECT account_id,\n time_created,\n operator_id,\n am_username,\n squad,\n interaction_type,\n did,\n ROW_NUMBER() OVER(PARTITION BY account_id ORDER BY time_created DESC) AS rn\n FROM notes_with_squad\n WHERE interaction_type NOT IN ('Other')\n )\n\n SELECT\n account_id,\n time_created AS latest_time_created,\n interaction_type,\n did\n FROM notes_with_acc_numbered\n WHERE rn = 1", + "viewLogic": "WITH fake_notes AS (\n SELECT foo_id,\n created_on_date,\n opt_id\n FROM `at-meta-platform-dev`.db_testing.finance_quotes\n WHERE DATE(created_on_date) < CURRENT_DATE()\n {% if finance_date_range._is_filtered %}\n AND {% condition finance_date_range %} DATE (created_on_date) {% endcondition %}\n {% elsif employee_testing_rpt.snapshot_timestamp._is_filtered %}\n AND {% condition employee_testing_rpt.snapshot_timestamp %} DATE(created_on_date) {% endcondition %}\n {% else %}\n AND DATE(created_on_date) >= DATE_SUB(CURRENT_DATE(), INTERVAL 90 DAY)\n {% endif %}\n AND medium NOT IN ('WINDOW', 'Foo Sale')\n AND medium IS NOT NULL\n AND foo_id IS NOT NULL\n ),\n\n abc AS (\n SELECT DISTINCT abc,\n LOWER(primary_sales_person.cus_name) AS cus_name,\n acc_number,\n snapshot_timestamp,\n fid\n FROM `at-yp-fin-emp-product-dev.finance_customer_and_product.odp_dim_dealer`\n WHERE snapshot_timestamp < CURRENT_DATE()\n {% if finance_date_range._is_filtered %}\n AND {% condition finance_date_range %} DATE (snapshot_timestamp) {% endcondition %}\n {% elsif employee_testing_rpt.snapshot_timestamp._is_filtered %}\n AND {% condition employee_testing_rpt.snapshot_timestamp %} DATE(snapshot_timestamp) {% endcondition %}\n {% else %}\n AND snapshot_timestamp >= DATE_SUB(CURRENT_DATE(), INTERVAL 90 DAY)\n {% endif %}\n ),\n\n fns_with_abc AS (\n SELECT *,\n CASE\n WHEN opt_id = cus_name THEN 'AM'\n WHEN regexp_contains(cus_name, 'foo') THEN 'foo'\n WHEN regexp_contains(cus_name, 'abc') THEN cus_name\n ELSE 'Other'\n END AS fake_type\n FROM fake_notes n\n LEFT JOIN abc s ON n.foo_id = s.acc_number\n AND DATE(n.created_on_date) = s.snapshot_timestamp\n ),\n\n fake_notes_with_acc_numbered AS (\n SELECT foo_id,\n created_on_date,\n opt_id,\n cus_name,\n abc,\n fake_type,\n fid,\n ROW_NUMBER() OVER(PARTITION BY foo_id ORDER BY created_on_date DESC) AS rn\n FROM fns_with_abc\n WHERE fake_type NOT IN ('Other')\n )\n\n SELECT\n foo_id,\n created_on_date AS last_timestamp_created,\n fake_type,\n fid\n FROM fake_notes_with_acc_numbered\n WHERE rn = 1", "viewLanguage": "sql" } }, @@ -1653,7 +1653,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.latest_account_holder_notes,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.last_ptr_holder_fake_notes,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -1670,7 +1670,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.latest_account_holder_notes,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.last_ptr_holder_fake_notes,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.BrowsePaths": { @@ -1690,7 +1690,7 @@ "looker.file.path": "finance_notes.view.lkml", "looker.model": "data" }, - "name": "latest_account_holder_notes", + "name": "last_ptr_holder_fake_notes", "tags": [] } } @@ -1705,7 +1705,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.latest_account_holder_notes,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.last_ptr_holder_fake_notes,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { From 13c07f2f230f8a8fdc4d33db5c6fa946786a687a Mon Sep 17 00:00:00 2001 From: Siddique Bagwan Date: Mon, 29 Jul 2024 22:03:33 +0530 Subject: [PATCH 3/6] TLL should be generated even if CLL fails --- .../ingestion/source/looker/view_upstream.py | 61 +++-- .../data.model.lkml | 4 +- .../employee_salary_rating.view.lkml | 50 ++++ .../finance_notes.view.lkml | 108 --------- .../vv_lineage_liquid_template_golden.json | 217 +++--------------- 5 files changed, 124 insertions(+), 316 deletions(-) create mode 100644 metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/employee_salary_rating.view.lkml delete mode 100644 metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/finance_notes.view.lkml diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/view_upstream.py b/metadata-ingestion/src/datahub/ingestion/source/looker/view_upstream.py index 6c37806605935..22f078a1386ae 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/view_upstream.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/view_upstream.py @@ -206,6 +206,7 @@ class AbstractViewUpstream(ABC): view_context: LookerViewContext looker_view_id_cache: LookerViewIdCache config: LookMLSourceConfig + reporter: LookMLSourceReport ctx: PipelineContext def __init__( @@ -213,11 +214,13 @@ def __init__( view_context: LookerViewContext, looker_view_id_cache: LookerViewIdCache, config: LookMLSourceConfig, + reporter: LookMLSourceReport, ctx: PipelineContext, ): self.view_context = view_context self.looker_view_id_cache = looker_view_id_cache self.config = config + self.reporter = reporter self.ctx = ctx @abstractmethod @@ -244,9 +247,10 @@ def __init__( view_context: LookerViewContext, looker_view_id_cache: LookerViewIdCache, config: LookMLSourceConfig, + reporter: LookMLSourceReport, ctx: PipelineContext, ): - super().__init__(view_context, looker_view_id_cache, config, ctx) + super().__init__(view_context, looker_view_id_cache, config, reporter, ctx) # These are the function where we need to catch the response once calculated self._get_spr = lru_cache(maxsize=1)(self.__get_spr) self._get_upstream_dataset_urn = lru_cache(maxsize=1)( @@ -267,17 +271,6 @@ def __get_spr(self) -> Optional[SqlParsingResult]: env=self.view_context.view_connection.platform_env or self.config.env, graph=self.ctx.graph, ) - - if ( - spr.debug_info.table_error is not None - or spr.debug_info.column_error is not None - ): - logging.debug( - f"Failed to parsed the sql query. table_error={spr.debug_info.table_error} and " - f"column_error={spr.debug_info.column_error}" - ) - return None - return spr def __get_upstream_dataset_urn(self) -> List[Urn]: @@ -286,6 +279,15 @@ def __get_upstream_dataset_urn(self) -> List[Urn]: if sql_parsing_result is None: return [] + if sql_parsing_result.debug_info.table_error is not None: + self.reporter.report_warning( + title="Table Level Lineage Missing", + message="Error in parsing derived sql", + context=f"View-name: {self.view_context.name()}. " + f"Error: {sql_parsing_result.debug_info.table_error}", + ) + return [] + upstream_dataset_urns: List[str] = [ _drop_hive_dot(urn) for urn in sql_parsing_result.in_tables ] @@ -306,6 +308,15 @@ def create_fields(self) -> List[ViewField]: if spr is None: return [] + if spr.debug_info.column_error is not None: + self.reporter.report_warning( + title="Column Level Lineage Missing", + message="Error in parsing derived sql for CLL", + context=f"View-name: {self.view_context.name()}. " + f"Error: {spr.debug_info.column_error}", + ) + return [] + fields: List[ViewField] = [] column_lineages: List[ColumnLineageInfo] = ( @@ -336,6 +347,15 @@ def get_upstream_column_ref( if sql_parsing_result is None: return [] + if sql_parsing_result.debug_info.column_error is not None: + self.reporter.report_warning( + title="Column Level Lineage Missing", + message="Error in parsing derived sql for CLL", + context=f"View-name: {self.view_context.name()}. " + f"Error: {sql_parsing_result.debug_info.column_error}", + ) + return [] + upstreams_column_refs: List[ColumnRef] = [] if sql_parsing_result.column_lineage: for cll in sql_parsing_result.column_lineage: @@ -384,9 +404,11 @@ def __init__( view_context: LookerViewContext, looker_view_id_cache: LookerViewIdCache, config: LookMLSourceConfig, + reporter: LookMLSourceReport, ctx: PipelineContext, ): - super().__init__(view_context, looker_view_id_cache, config, ctx) + super().__init__(view_context, looker_view_id_cache, config, reporter, ctx) + self._get_upstream_dataset_urn = lru_cache(maxsize=1)( self.__get_upstream_dataset_urn ) @@ -402,7 +424,7 @@ def __get_upstream_dataset_urn(self) -> List[str]: base_folder_path=self.view_context.base_folder_path, ) - # Current view will always be present in cache. The assert will silence the lint + # Current view will always be present in cache. assert will silence the lint assert current_view_id # We're creating a "LookerExplore" just to use the urn generator. @@ -467,9 +489,10 @@ def __init__( view_context: LookerViewContext, looker_view_id_cache: LookerViewIdCache, config: LookMLSourceConfig, + reporter: LookMLSourceReport, ctx: PipelineContext, ): - super().__init__(view_context, looker_view_id_cache, config, ctx) + super().__init__(view_context, looker_view_id_cache, config, reporter, ctx) self.upstream_dataset_urn = None self._get_upstream_dataset_urn = lru_cache(maxsize=1)( @@ -522,9 +545,10 @@ def __init__( view_context: LookerViewContext, looker_view_id_cache: LookerViewIdCache, config: LookMLSourceConfig, + reporter: LookMLSourceReport, ctx: PipelineContext, ): - super().__init__(view_context, looker_view_id_cache, config, ctx) + super().__init__(view_context, looker_view_id_cache, config, reporter, ctx) self.upstream_dataset_urn = [] self._get_upstream_dataset_urn = lru_cache(maxsize=1)( @@ -591,6 +615,7 @@ def create_view_upstream( return RegularViewUpstream( view_context=view_context, config=config, + reporter=reporter, ctx=ctx, looker_view_id_cache=looker_view_id_cache, ) @@ -599,6 +624,7 @@ def create_view_upstream( return DotSqlTableNameViewUpstream( view_context=view_context, config=config, + reporter=reporter, ctx=ctx, looker_view_id_cache=looker_view_id_cache, ) @@ -610,6 +636,7 @@ def create_view_upstream( return SqlBasedDerivedViewUpstream( view_context=view_context, config=config, + reporter=reporter, ctx=ctx, looker_view_id_cache=looker_view_id_cache, ) @@ -618,6 +645,7 @@ def create_view_upstream( return NativeDerivedViewUpstream( view_context=view_context, config=config, + reporter=reporter, ctx=ctx, looker_view_id_cache=looker_view_id_cache, ) @@ -631,6 +659,7 @@ def create_view_upstream( return EmptyImplementation( view_context=view_context, config=config, + reporter=reporter, ctx=ctx, looker_view_id_cache=looker_view_id_cache, ) diff --git a/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/data.model.lkml b/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/data.model.lkml index d90a87f8d109e..6eb92d749c9f7 100644 --- a/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/data.model.lkml +++ b/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/data.model.lkml @@ -5,7 +5,7 @@ include: "employee_income_source.view.lkml" include: "employee_total_income.view.lkml" include: "top_10_employee_income_source.view.lkml" include: "employee_tax_report.view.lkml" -include: "finance_notes.view.lkml" +include: "employee_salary_rating.view.lkml" explore: activity_logs { } @@ -22,5 +22,5 @@ explore: top_10_employee_income_source { explore: employee_tax_report { } -explore: latest_account_holder_notes { +explore: employee_salary_rating { } \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/employee_salary_rating.view.lkml b/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/employee_salary_rating.view.lkml new file mode 100644 index 0000000000000..3a00099e7998e --- /dev/null +++ b/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/employee_salary_rating.view.lkml @@ -0,0 +1,50 @@ +view: employee_salary_rating { + derived_table: { + sql: SELECT + employee_id, + employee_name, + {% if dw_eff_dt_date._is_selected or finance_dw_eff_dt_date._is_selected %} + prod_core.data.r_metric_summary_v2 + {% elsif dw_eff_dt_week._is_selected or finance_dw_eff_dt_week._is_selected %} + prod_core.data.r_metric_summary_v3 + {% else %} + 'default_table' as source + {% endif %}, + employee_income + FROM source_table + WHERE + {% condition source_region %} source_table.region {% endcondition %} AND + {% if rating_window._is_filtered %} + {% condition rating_window %} DATE (rating_created) {% endcondition %} + {% endif %} + ;; + } + + filter: rating_window { + description: "Date window in which to look for rating" + default_value: "90 days ago for 90 days" + datatype: date + type: date + } + + dimension: id { + type: number + sql: ${TABLE}.employee_id;; + } + + dimension: name { + type: string + sql: ${TABLE}.employee_name;; + } + + dimension: source { + type: string + sql: ${TABLE}.source ;; + } + + dimension: income { + type: number + sql: ${TABLE}.employee_income ;; + } + +} \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/finance_notes.view.lkml b/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/finance_notes.view.lkml deleted file mode 100644 index 77efffdacc840..0000000000000 --- a/metadata-ingestion/tests/integration/lookml/vv-lineage-and-liquid-templates/finance_notes.view.lkml +++ /dev/null @@ -1,108 +0,0 @@ -view: last_ptr_holder_fake_notes_base { - filter: finance_date_range { - default_value: "60 days ago for 60 days" - datatype: date - type: date - } - - dimension: acc_number { - hidden: yes - } - - dimension: fnc_in_engage { - hidden: yes - } - - dimension_group: last_finance_fake_notes { - type: time - timeframes: [date, week, month, year] - convert_tz: no - datatype: timestamp - sql: ${TABLE}.last_timestamp_created ;; - } - - dimension: fake_type { - type: string - sql: ${TABLE}.fake_type ;; - } - - dimension: fid { - hidden: yes - type: string - sql: ${TABLE}.fid ;; - } -} - -view: last_ptr_holder_fake_notes { - extends: [last_ptr_holder_fake_notes_base] - derived_table: { - sql: WITH fake_notes AS ( - SELECT foo_id, - created_on_date, - opt_id - FROM `at-meta-platform-dev`.db_testing.finance_quotes - WHERE DATE(created_on_date) < CURRENT_DATE() - {% if finance_date_range._is_filtered %} - AND {% condition finance_date_range %} DATE (created_on_date) {% endcondition %} - {% elsif employee_testing_rpt.snapshot_timestamp._is_filtered %} - AND {% condition employee_testing_rpt.snapshot_timestamp %} DATE(created_on_date) {% endcondition %} - {% else %} - AND DATE(created_on_date) >= DATE_SUB(CURRENT_DATE(), INTERVAL 90 DAY) - {% endif %} - AND medium NOT IN ('WINDOW', 'Foo Sale') - AND medium IS NOT NULL - AND foo_id IS NOT NULL - ), - - abc AS ( - SELECT DISTINCT abc, - LOWER(primary_sales_person.cus_name) AS cus_name, - acc_number, - snapshot_timestamp, - fid - FROM `at-yp-fin-emp-product-dev.finance_customer_and_product.odp_dim_dealer` - WHERE snapshot_timestamp < CURRENT_DATE() - {% if finance_date_range._is_filtered %} - AND {% condition finance_date_range %} DATE (snapshot_timestamp) {% endcondition %} - {% elsif employee_testing_rpt.snapshot_timestamp._is_filtered %} - AND {% condition employee_testing_rpt.snapshot_timestamp %} DATE(snapshot_timestamp) {% endcondition %} - {% else %} - AND snapshot_timestamp >= DATE_SUB(CURRENT_DATE(), INTERVAL 90 DAY) - {% endif %} - ), - - fns_with_abc AS ( - SELECT *, - CASE - WHEN opt_id = cus_name THEN 'AM' - WHEN regexp_contains(cus_name, 'foo') THEN 'foo' - WHEN regexp_contains(cus_name, 'abc') THEN cus_name - ELSE 'Other' - END AS fake_type - FROM fake_notes n - LEFT JOIN abc s ON n.foo_id = s.acc_number - AND DATE(n.created_on_date) = s.snapshot_timestamp - ), - - fake_notes_with_acc_numbered AS ( - SELECT foo_id, - created_on_date, - opt_id, - cus_name, - abc, - fake_type, - fid, - ROW_NUMBER() OVER(PARTITION BY foo_id ORDER BY created_on_date DESC) AS rn - FROM fns_with_abc - WHERE fake_type NOT IN ('Other') - ) - - SELECT - foo_id, - created_on_date AS last_timestamp_created, - fake_type, - fid - FROM fake_notes_with_acc_numbered - WHERE rn = 1 ;; - } -} diff --git a/metadata-ingestion/tests/integration/lookml/vv_lineage_liquid_template_golden.json b/metadata-ingestion/tests/integration/lookml/vv_lineage_liquid_template_golden.json index 0d291f11b8207..d12ced5e42506 100644 --- a/metadata-ingestion/tests/integration/lookml/vv_lineage_liquid_template_golden.json +++ b/metadata-ingestion/tests/integration/lookml/vv_lineage_liquid_template_golden.json @@ -1302,7 +1302,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.last_ptr_holder_fake_notes_base,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.employee_salary_rating,PROD)", "changeType": "UPSERT", "aspectName": "subTypes", "aspect": { @@ -1320,14 +1320,14 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.last_ptr_holder_fake_notes_base,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.employee_salary_rating,PROD)", "changeType": "UPSERT", "aspectName": "viewProperties", "aspect": { "json": { "materialized": false, - "viewLogic": "view: last_ptr_holder_fake_notes_base {\n filter: finance_date_range {\n default_value: \"60 days ago for 60 days\"\n datatype: date\n type: date\n }\n\n dimension: acc_number {\n hidden: yes\n }\n\n dimension: fnc_in_engage {\n hidden: yes\n }\n\n dimension_group: last_finance_fake_notes {\n type: time\n timeframes: [date, week, month, year]\n convert_tz: no\n datatype: timestamp\n sql: ${TABLE}.last_timestamp_created ;;\n }\n\n dimension: fake_type {\n type: string\n sql: ${TABLE}.fake_type ;;\n }\n\n dimension: fid {\n hidden: yes\n type: string\n sql: ${TABLE}.fid ;;\n }\n}\n\nview: last_ptr_holder_fake_notes {\n extends: [last_ptr_holder_fake_notes_base]\n derived_table: {\n sql: WITH fake_notes AS (\n SELECT foo_id,\n created_on_date,\n opt_id\n FROM `at-meta-platform-dev`.db_testing.finance_quotes\n WHERE DATE(created_on_date) < CURRENT_DATE()\n {% if finance_date_range._is_filtered %}\n AND {% condition finance_date_range %} DATE (created_on_date) {% endcondition %}\n {% elsif employee_testing_rpt.snapshot_timestamp._is_filtered %}\n AND {% condition employee_testing_rpt.snapshot_timestamp %} DATE(created_on_date) {% endcondition %}\n {% else %}\n AND DATE(created_on_date) >= DATE_SUB(CURRENT_DATE(), INTERVAL 90 DAY)\n {% endif %}\n AND medium NOT IN ('WINDOW', 'Foo Sale')\n AND medium IS NOT NULL\n AND foo_id IS NOT NULL\n ),\n\n abc AS (\n SELECT DISTINCT abc,\n LOWER(primary_sales_person.cus_name) AS cus_name,\n acc_number,\n snapshot_timestamp,\n fid\n FROM `at-yp-fin-emp-product-dev.finance_customer_and_product.odp_dim_dealer`\n WHERE snapshot_timestamp < CURRENT_DATE()\n {% if finance_date_range._is_filtered %}\n AND {% condition finance_date_range %} DATE (snapshot_timestamp) {% endcondition %}\n {% elsif employee_testing_rpt.snapshot_timestamp._is_filtered %}\n AND {% condition employee_testing_rpt.snapshot_timestamp %} DATE(snapshot_timestamp) {% endcondition %}\n {% else %}\n AND snapshot_timestamp >= DATE_SUB(CURRENT_DATE(), INTERVAL 90 DAY)\n {% endif %}\n ),\n\n fns_with_abc AS (\n SELECT *,\n CASE\n WHEN opt_id = cus_name THEN 'AM'\n WHEN regexp_contains(cus_name, 'foo') THEN 'foo'\n WHEN regexp_contains(cus_name, 'abc') THEN cus_name\n ELSE 'Other'\n END AS fake_type\n FROM fake_notes n\n LEFT JOIN abc s ON n.foo_id = s.acc_number\n AND DATE(n.created_on_date) = s.snapshot_timestamp\n ),\n\n fake_notes_with_acc_numbered AS (\n SELECT foo_id,\n created_on_date,\n opt_id,\n cus_name,\n abc,\n fake_type,\n fid,\n ROW_NUMBER() OVER(PARTITION BY foo_id ORDER BY created_on_date DESC) AS rn\n FROM fns_with_abc\n WHERE fake_type NOT IN ('Other')\n )\n\n SELECT\n foo_id,\n created_on_date AS last_timestamp_created,\n fake_type,\n fid\n FROM fake_notes_with_acc_numbered\n WHERE rn = 1 ;;\n }\n}\n", - "viewLanguage": "lookml" + "viewLogic": "SELECT\n employee_id,\n employee_name,\n {% if dw_eff_dt_date._is_selected or finance_dw_eff_dt_date._is_selected %}\n prod_core.data.r_metric_summary_v2\n {% elsif dw_eff_dt_week._is_selected or finance_dw_eff_dt_week._is_selected %}\n prod_core.data.r_metric_summary_v3\n {% else %}\n 'default_table' as source\n {% endif %},\n employee_income\n FROM source_table\n WHERE\n {% condition source_region %} source_table.region {% endcondition %} AND\n {% if rating_window._is_filtered %}\n {% condition rating_window %} DATE (rating_created) {% endcondition %}\n {% endif %}", + "viewLanguage": "sql" } }, "systemMetadata": { @@ -1338,7 +1338,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.last_ptr_holder_fake_notes_base,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.employee_salary_rating,PROD)", "changeType": "UPSERT", "aspectName": "container", "aspect": { @@ -1355,7 +1355,7 @@ { "proposedSnapshot": { "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.last_ptr_holder_fake_notes_base,PROD)", + "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.employee_salary_rating,PROD)", "aspects": [ { "com.linkedin.pegasus2avro.common.BrowsePaths": { @@ -1377,7 +1377,7 @@ "time": 1586847600000, "actor": "urn:li:corpuser:datahub" }, - "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,..last_ptr_holder_fake_notes_base,PROD)", + "dataset": "urn:li:dataset:(urn:li:dataPlatform:postgres,source_table,PROD)", "type": "VIEW" } ], @@ -1385,55 +1385,44 @@ { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,..last_ptr_holder_fake_notes_base,PROD),acc_number)" - ], - "downstreamType": "FIELD", - "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.last_ptr_holder_fake_notes_base,PROD),acc_number)" - ], - "confidenceScore": 1.0 - }, - { - "upstreamType": "FIELD_SET", - "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,..last_ptr_holder_fake_notes_base,PROD),fnc_in_engage)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,source_table,PROD),employee_id)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.last_ptr_holder_fake_notes_base,PROD),fnc_in_engage)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.employee_salary_rating,PROD),id)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,..last_ptr_holder_fake_notes_base,PROD),fake_type)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,source_table,PROD),employee_name)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.last_ptr_holder_fake_notes_base,PROD),fake_type)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.employee_salary_rating,PROD),name)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,..last_ptr_holder_fake_notes_base,PROD),fid)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,source_table,PROD),source)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.last_ptr_holder_fake_notes_base,PROD),fid)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.employee_salary_rating,PROD),source)" ], "confidenceScore": 1.0 }, { "upstreamType": "FIELD_SET", "upstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,..last_ptr_holder_fake_notes_base,PROD),last_timestamp_created)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:postgres,source_table,PROD),employee_income)" ], "downstreamType": "FIELD", "downstreams": [ - "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.last_ptr_holder_fake_notes_base,PROD),last_finance_fake_notes)" + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.employee_salary_rating,PROD),income)" ], "confidenceScore": 1.0 } @@ -1442,7 +1431,7 @@ }, { "com.linkedin.pegasus2avro.schema.SchemaMetadata": { - "schemaName": "last_ptr_holder_fake_notes_base", + "schemaName": "employee_salary_rating", "platform": "urn:li:dataPlatform:looker", "version": 0, "created": { @@ -1461,37 +1450,16 @@ }, "fields": [ { - "fieldPath": "acc_number", - "nullable": false, - "description": "", - "label": "", - "type": { - "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} - } - }, - "nativeDataType": "string", - "recursive": false, - "globalTags": { - "tags": [ - { - "tag": "urn:li:tag:Dimension" - } - ] - }, - "isPartOfKey": false - }, - { - "fieldPath": "fnc_in_engage", + "fieldPath": "id", "nullable": false, "description": "", "label": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.StringType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "string", + "nativeDataType": "number", "recursive": false, "globalTags": { "tags": [ @@ -1503,7 +1471,7 @@ "isPartOfKey": false }, { - "fieldPath": "fake_type", + "fieldPath": "name", "nullable": false, "description": "", "label": "", @@ -1524,7 +1492,7 @@ "isPartOfKey": false }, { - "fieldPath": "fid", + "fieldPath": "source", "nullable": false, "description": "", "label": "", @@ -1545,24 +1513,21 @@ "isPartOfKey": false }, { - "fieldPath": "last_finance_fake_notes", + "fieldPath": "income", "nullable": false, "description": "", "label": "", "type": { "type": { - "com.linkedin.pegasus2avro.schema.TimeType": {} + "com.linkedin.pegasus2avro.schema.NumberType": {} } }, - "nativeDataType": "time", + "nativeDataType": "number", "recursive": false, "globalTags": { "tags": [ { "tag": "urn:li:tag:Dimension" - }, - { - "tag": "urn:li:tag:Temporal" } ] }, @@ -1575,10 +1540,10 @@ { "com.linkedin.pegasus2avro.dataset.DatasetProperties": { "customProperties": { - "looker.file.path": "finance_notes.view.lkml", + "looker.file.path": "employee_salary_rating.view.lkml", "looker.model": "data" }, - "name": "last_ptr_holder_fake_notes_base", + "name": "employee_salary_rating", "tags": [] } } @@ -1593,119 +1558,7 @@ }, { "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.last_ptr_holder_fake_notes_base,PROD)", - "changeType": "UPSERT", - "aspectName": "browsePathsV2", - "aspect": { - "json": { - "path": [ - { - "id": "Develop" - }, - { - "id": "urn:li:container:78f22c19304954b15e8adb1d9809975e", - "urn": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.last_ptr_holder_fake_notes,PROD)", - "changeType": "UPSERT", - "aspectName": "subTypes", - "aspect": { - "json": { - "typeNames": [ - "View" - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.last_ptr_holder_fake_notes,PROD)", - "changeType": "UPSERT", - "aspectName": "viewProperties", - "aspect": { - "json": { - "materialized": false, - "viewLogic": "WITH fake_notes AS (\n SELECT foo_id,\n created_on_date,\n opt_id\n FROM `at-meta-platform-dev`.db_testing.finance_quotes\n WHERE DATE(created_on_date) < CURRENT_DATE()\n {% if finance_date_range._is_filtered %}\n AND {% condition finance_date_range %} DATE (created_on_date) {% endcondition %}\n {% elsif employee_testing_rpt.snapshot_timestamp._is_filtered %}\n AND {% condition employee_testing_rpt.snapshot_timestamp %} DATE(created_on_date) {% endcondition %}\n {% else %}\n AND DATE(created_on_date) >= DATE_SUB(CURRENT_DATE(), INTERVAL 90 DAY)\n {% endif %}\n AND medium NOT IN ('WINDOW', 'Foo Sale')\n AND medium IS NOT NULL\n AND foo_id IS NOT NULL\n ),\n\n abc AS (\n SELECT DISTINCT abc,\n LOWER(primary_sales_person.cus_name) AS cus_name,\n acc_number,\n snapshot_timestamp,\n fid\n FROM `at-yp-fin-emp-product-dev.finance_customer_and_product.odp_dim_dealer`\n WHERE snapshot_timestamp < CURRENT_DATE()\n {% if finance_date_range._is_filtered %}\n AND {% condition finance_date_range %} DATE (snapshot_timestamp) {% endcondition %}\n {% elsif employee_testing_rpt.snapshot_timestamp._is_filtered %}\n AND {% condition employee_testing_rpt.snapshot_timestamp %} DATE(snapshot_timestamp) {% endcondition %}\n {% else %}\n AND snapshot_timestamp >= DATE_SUB(CURRENT_DATE(), INTERVAL 90 DAY)\n {% endif %}\n ),\n\n fns_with_abc AS (\n SELECT *,\n CASE\n WHEN opt_id = cus_name THEN 'AM'\n WHEN regexp_contains(cus_name, 'foo') THEN 'foo'\n WHEN regexp_contains(cus_name, 'abc') THEN cus_name\n ELSE 'Other'\n END AS fake_type\n FROM fake_notes n\n LEFT JOIN abc s ON n.foo_id = s.acc_number\n AND DATE(n.created_on_date) = s.snapshot_timestamp\n ),\n\n fake_notes_with_acc_numbered AS (\n SELECT foo_id,\n created_on_date,\n opt_id,\n cus_name,\n abc,\n fake_type,\n fid,\n ROW_NUMBER() OVER(PARTITION BY foo_id ORDER BY created_on_date DESC) AS rn\n FROM fns_with_abc\n WHERE fake_type NOT IN ('Other')\n )\n\n SELECT\n foo_id,\n created_on_date AS last_timestamp_created,\n fake_type,\n fid\n FROM fake_notes_with_acc_numbered\n WHERE rn = 1", - "viewLanguage": "sql" - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.last_ptr_holder_fake_notes,PROD)", - "changeType": "UPSERT", - "aspectName": "container", - "aspect": { - "json": { - "container": "urn:li:container:78f22c19304954b15e8adb1d9809975e" - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "proposedSnapshot": { - "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { - "urn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.last_ptr_holder_fake_notes,PROD)", - "aspects": [ - { - "com.linkedin.pegasus2avro.common.BrowsePaths": { - "paths": [ - "/Develop/lkml_samples/" - ] - } - }, - { - "com.linkedin.pegasus2avro.common.Status": { - "removed": false - } - }, - { - "com.linkedin.pegasus2avro.dataset.DatasetProperties": { - "customProperties": { - "looker.file.path": "finance_notes.view.lkml", - "looker.model": "data" - }, - "name": "last_ptr_holder_fake_notes", - "tags": [] - } - } - ] - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } -}, -{ - "entityType": "dataset", - "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.last_ptr_holder_fake_notes,PROD)", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:looker,lkml_samples.view.employee_salary_rating,PROD)", "changeType": "UPSERT", "aspectName": "browsePathsV2", "aspect": { @@ -1758,21 +1611,5 @@ "runId": "lookml-test", "lastRunId": "no-run-id-provided" } -}, -{ - "entityType": "tag", - "entityUrn": "urn:li:tag:Temporal", - "changeType": "UPSERT", - "aspectName": "tagKey", - "aspect": { - "json": { - "name": "Temporal" - } - }, - "systemMetadata": { - "lastObserved": 1586847600000, - "runId": "lookml-test", - "lastRunId": "no-run-id-provided" - } } ] \ No newline at end of file From b2266a668594637dfd91d7fcd47e38026690a1a0 Mon Sep 17 00:00:00 2001 From: sid-acryl <155424659+sid-acryl@users.noreply.github.com> Date: Tue, 30 Jul 2024 11:29:39 +0530 Subject: [PATCH 4/6] Update metadata-ingestion/src/datahub/ingestion/source/looker/looker_template_language.py Co-authored-by: Harshal Sheth --- .../datahub/ingestion/source/looker/looker_template_language.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_template_language.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_template_language.py index 2989f4b9830c0..982b514e6c5bc 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_template_language.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_template_language.py @@ -95,7 +95,7 @@ def resolve_liquid_variable(text: str, liquid_variable: Dict[Any, Any]) -> str: return text -def _complete_in_complete_sql(raw_view: dict, sql: str) -> str: +def _complete_incomplete_sql(raw_view: dict, sql: str) -> str: # Looker supports sql fragments that omit the SELECT and FROM parts of the query # Add those in if we detect that it is missing From 55cbe749a9555142f1efceffeea29e6740d927e9 Mon Sep 17 00:00:00 2001 From: Siddique Bagwan Date: Tue, 30 Jul 2024 11:43:17 +0530 Subject: [PATCH 5/6] fix function name --- .../ingestion/source/looker/looker_template_language.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_template_language.py b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_template_language.py index 982b514e6c5bc..2c523fcd98d08 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/looker_template_language.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/looker_template_language.py @@ -133,8 +133,6 @@ def resolve_liquid_variable_in_view_dict( text=view["derived_table"]["sql"], liquid_variable=liquid_variable ) # keeping original sql as is, so that on UI sql will be shown same is it is visible on looker portal - view["derived_table"][ - "datahub_transformed_sql" - ] = _complete_in_complete_sql( + view["derived_table"]["datahub_transformed_sql"] = _complete_incomplete_sql( raw_view=view, sql=view["derived_table"]["datahub_transformed_sql"] ) From 0edf84450fefa5cd8602b58ec786869d0c35f6b9 Mon Sep 17 00:00:00 2001 From: Siddique Bagwan Date: Wed, 31 Jul 2024 16:59:51 +0530 Subject: [PATCH 6/6] address review comments --- .../src/datahub/ingestion/source/looker/view_upstream.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/view_upstream.py b/metadata-ingestion/src/datahub/ingestion/source/looker/view_upstream.py index 22f078a1386ae..98646e19a7014 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/view_upstream.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/view_upstream.py @@ -283,8 +283,8 @@ def __get_upstream_dataset_urn(self) -> List[Urn]: self.reporter.report_warning( title="Table Level Lineage Missing", message="Error in parsing derived sql", - context=f"View-name: {self.view_context.name()}. " - f"Error: {sql_parsing_result.debug_info.table_error}", + context=f"View-name: {self.view_context.name()}", + exc=sql_parsing_result.debug_info.table_error, ) return [] @@ -312,8 +312,8 @@ def create_fields(self) -> List[ViewField]: self.reporter.report_warning( title="Column Level Lineage Missing", message="Error in parsing derived sql for CLL", - context=f"View-name: {self.view_context.name()}. " - f"Error: {spr.debug_info.column_error}", + context=f"View-name: {self.view_context.name()}", + exc=spr.debug_info.column_error, ) return []