diff --git a/metadata-ingestion/src/datahub/ingestion/graph/client.py b/metadata-ingestion/src/datahub/ingestion/graph/client.py index 38e965f7f65878..31200b2c369e2d 100644 --- a/metadata-ingestion/src/datahub/ingestion/graph/client.py +++ b/metadata-ingestion/src/datahub/ingestion/graph/client.py @@ -7,7 +7,7 @@ from dataclasses import dataclass from datetime import datetime from json.decoder import JSONDecodeError -from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Set, Tuple, Type +from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple, Type from avro.schema import RecordSchema from deprecated import deprecated @@ -1148,14 +1148,13 @@ def _make_schema_resolver( def initialize_schema_resolver_from_datahub( self, platform: str, platform_instance: Optional[str], env: str - ) -> Tuple["SchemaResolver", Set[str]]: + ) -> "SchemaResolver": logger.info("Initializing schema resolver") schema_resolver = self._make_schema_resolver( platform, platform_instance, env, include_graph=False ) logger.info(f"Fetching schemas for platform {platform}, env {env}") - urns = [] count = 0 with PerfTimer() as timer: for urn, schema_info in self._bulk_fetch_schema_info_by_filter( @@ -1164,7 +1163,6 @@ def initialize_schema_resolver_from_datahub( env=env, ): try: - urns.append(urn) schema_resolver.add_graphql_schema_metadata(urn, schema_info) count += 1 except Exception: @@ -1179,7 +1177,7 @@ def initialize_schema_resolver_from_datahub( ) logger.info("Finished initializing schema resolver") - return schema_resolver, set(urns) + return schema_resolver def parse_sql_lineage( self, diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py index 8a16b1a4a5f6ba..f6adbcf033bcc5 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py @@ -458,7 +458,7 @@ def _init_schema_resolver(self) -> SchemaResolver: platform=self.platform, platform_instance=self.config.platform_instance, env=self.config.env, - )[0] + ) else: logger.warning( "Failed to load schema info from DataHub as DataHubGraph is missing.", diff --git a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py index 8d57bb83b37588..215116b4c33fb0 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py +++ b/metadata-ingestion/src/datahub/ingestion/source/snowflake/snowflake_v2.py @@ -497,7 +497,7 @@ def _init_schema_resolver(self) -> SchemaResolver: platform=self.platform, platform_instance=self.config.platform_instance, env=self.config.env, - )[0] + ) else: logger.warning( "Failed to load schema info from DataHub as DataHubGraph is missing.", diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql_queries.py b/metadata-ingestion/src/datahub/ingestion/source/sql_queries.py index 2fcc93292c2efe..cd7c6476eb7492 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql_queries.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql_queries.py @@ -103,13 +103,13 @@ def __init__(self, ctx: PipelineContext, config: SqlQueriesSourceConfig): self.builder = SqlParsingBuilder(usage_config=self.config.usage) if self.config.use_schema_resolver: - schema_resolver, urns = self.graph.initialize_schema_resolver_from_datahub( + schema_resolver = self.graph.initialize_schema_resolver_from_datahub( platform=self.config.platform, platform_instance=self.config.platform_instance, env=self.config.env, ) self.schema_resolver = schema_resolver - self.urns = urns + self.urns = self.schema_resolver.get_urns() else: self.schema_resolver = self.graph._make_schema_resolver( platform=self.config.platform, diff --git a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py index f18235af3d1fd5..144aae04885d67 100644 --- a/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py +++ b/metadata-ingestion/src/datahub/utilities/sqlglot_lineage.py @@ -276,6 +276,9 @@ def __init__( shared_connection=shared_conn, ) + def get_urns(self) -> Set[str]: + return set(self._schema_cache.keys()) + def get_urn_for_table(self, table: _TableName, lower: bool = False) -> str: # TODO: Validate that this is the correct 2/3 layer hierarchy for the platform. @@ -390,8 +393,6 @@ def convert_graphql_schema_metadata_to_info( ) } - # TODO add a method to load all from graphql - def close(self) -> None: self._schema_cache.close()