From 2a3f6c42d4d363d548f21115bc41a3e85ceca416 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Mon, 17 Jul 2023 15:26:43 -0700 Subject: [PATCH 01/20] ci: workarounds for pyyaml installation (#8435) --- docker/kafka-setup/Dockerfile | 8 +++++--- docker/quickstart/generate_and_compare.sh | 1 + docker/quickstart/requirements.txt | 2 +- metadata-ingestion-modules/airflow-plugin/build.gradle | 4 ++++ 4 files changed, 11 insertions(+), 4 deletions(-) diff --git a/docker/kafka-setup/Dockerfile b/docker/kafka-setup/Dockerfile index e379ae4385246..8cf9d0869dc9b 100644 --- a/docker/kafka-setup/Dockerfile +++ b/docker/kafka-setup/Dockerfile @@ -5,7 +5,7 @@ ARG MAVEN_REPO="https://repo1.maven.org/maven2" ARG SNAKEYAML_VERSION="2.0" RUN rm /usr/share/java/cp-base-new/snakeyaml-*.jar \ - && wget -P /usr/share/java/cp-base-new $MAVEN_REPO/org/yaml/snakeyaml/$SNAKEYAML_VERSION/snakeyaml-$SNAKEYAML_VERSION.jar + && wget -P /usr/share/java/cp-base-new $MAVEN_REPO/org/yaml/snakeyaml/$SNAKEYAML_VERSION/snakeyaml-$SNAKEYAML_VERSION.jar # Based on https://github.com/blacktop's alpine kafka build FROM python:3-alpine @@ -17,7 +17,7 @@ ENV SCALA_VERSION 2.13 ENV CUB_CLASSPATH='"/usr/share/java/cp-base-new/*"' # Confluent Docker Utils Version (Namely the tag or branch to grab from git to install) -ARG PYTHON_CONFLUENT_DOCKER_UTILS_VERSION="v0.0.58" +ARG PYTHON_CONFLUENT_DOCKER_UTILS_VERSION="v0.0.60" # This can be overriden for an offline/air-gapped builds ARG PYTHON_CONFLUENT_DOCKER_UTILS_INSTALL_SPEC="git+https://github.com/confluentinc/confluent-docker-utils@${PYTHON_CONFLUENT_DOCKER_UTILS_VERSION}" @@ -36,7 +36,9 @@ RUN mkdir -p /opt \ && adduser -DH -s /sbin/nologin kafka \ && chown -R kafka: /opt/kafka \ && echo "===> Installing python packages ..." \ - && pip install --no-cache-dir jinja2 requests \ + && pip install --no-cache-dir --upgrade pip wheel setuptools \ + && pip install jinja2 requests \ + && pip install "Cython<3.0" "PyYAML<6" --no-build-isolation \ && pip install --prefer-binary --prefix=/usr/local --upgrade "${PYTHON_CONFLUENT_DOCKER_UTILS_INSTALL_SPEC}" \ && rm -rf /tmp/* \ && apk del --purge .build-deps diff --git a/docker/quickstart/generate_and_compare.sh b/docker/quickstart/generate_and_compare.sh index e34abeb9820c6..d568eb3a4c246 100755 --- a/docker/quickstart/generate_and_compare.sh +++ b/docker/quickstart/generate_and_compare.sh @@ -8,5 +8,6 @@ set -euxo pipefail python3 -m venv venv source venv/bin/activate +pip install --upgrade pip wheel setuptools pip install -r requirements.txt python generate_docker_quickstart.py check-all diff --git a/docker/quickstart/requirements.txt b/docker/quickstart/requirements.txt index a20e96afc8582..539241331120c 100644 --- a/docker/quickstart/requirements.txt +++ b/docker/quickstart/requirements.txt @@ -1,3 +1,3 @@ -PyYAML==5.4.1 +PyYAML==6.0 python-dotenv==0.17.0 click diff --git a/metadata-ingestion-modules/airflow-plugin/build.gradle b/metadata-ingestion-modules/airflow-plugin/build.gradle index 9ab590b1560b3..d895e29229c41 100644 --- a/metadata-ingestion-modules/airflow-plugin/build.gradle +++ b/metadata-ingestion-modules/airflow-plugin/build.gradle @@ -22,6 +22,10 @@ task environmentSetup(type: Exec, dependsOn: checkPythonVersion) { task installPackage(type: Exec, dependsOn: environmentSetup) { inputs.file file('setup.py') outputs.dir("${venv_name}") + // Workaround for https://github.com/yaml/pyyaml/issues/601. + // See https://github.com/yaml/pyyaml/issues/601#issuecomment-1638509577. + // and https://github.com/datahub-project/datahub/pull/8435. + commandLine 'bash', '-x', '-c', "${pip_install_command} install 'Cython<3.0' 'PyYAML<6' --no-build-isolation" commandLine 'bash', '-x', '-c', "${pip_install_command} -e ." } From 48c1dc820ec472fb39859d30cb8e886f99808d70 Mon Sep 17 00:00:00 2001 From: Andrew Sikowitz Date: Tue, 18 Jul 2023 17:29:50 -0400 Subject: [PATCH 02/20] build(ingest/boto3): Update boto3-stubs to fix CI (#8452) --- metadata-ingestion/setup.py | 2 +- .../sagemaker_processors/feature_groups.py | 12 +++---- .../aws/sagemaker_processors/lineage.py | 22 +++++------- .../source/aws/sagemaker_processors/models.py | 34 +++++++++---------- 4 files changed, 33 insertions(+), 37 deletions(-) diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 2c7571ffca0d5..029527ea959d5 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -413,7 +413,7 @@ def get_long_description(): "types-cachetools", # versions 0.1.13 and 0.1.14 seem to have issues "types-click==0.1.12", - "boto3-stubs[s3,glue,sagemaker,sts]>=1.28.3", + "boto3-stubs[s3,glue,sagemaker,sts]>=1.28.4", "types-tabulate", # avrogen package requires this "types-pytz", diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py index 207b73d4cbd46..75fae47c966fd 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/feature_groups.py @@ -23,9 +23,9 @@ if TYPE_CHECKING: from mypy_boto3_sagemaker import SageMakerClient from mypy_boto3_sagemaker.type_defs import ( - DescribeFeatureGroupResponseOutputTypeDef, + DescribeFeatureGroupResponseTypeDef, FeatureDefinitionOutputTypeDef, - FeatureGroupSummaryOutputTypeDef, + FeatureGroupSummaryTypeDef, ) @@ -35,7 +35,7 @@ class FeatureGroupProcessor: env: str report: SagemakerSourceReport - def get_all_feature_groups(self) -> List["FeatureGroupSummaryOutputTypeDef"]: + def get_all_feature_groups(self) -> List["FeatureGroupSummaryTypeDef"]: """ List all feature groups in SageMaker. """ @@ -51,7 +51,7 @@ def get_all_feature_groups(self) -> List["FeatureGroupSummaryOutputTypeDef"]: def get_feature_group_details( self, feature_group_name: str - ) -> "DescribeFeatureGroupResponseOutputTypeDef": + ) -> "DescribeFeatureGroupResponseTypeDef": """ Get details of a feature group (including list of component features). """ @@ -75,7 +75,7 @@ def get_feature_group_details( return feature_group def get_feature_group_wu( - self, feature_group_details: "DescribeFeatureGroupResponseOutputTypeDef" + self, feature_group_details: "DescribeFeatureGroupResponseTypeDef" ) -> MetadataWorkUnit: """ Generate an MLFeatureTable workunit for a SageMaker feature group. @@ -146,7 +146,7 @@ def get_feature_type(self, aws_type: str, feature_name: str) -> str: def get_feature_wu( self, - feature_group_details: "DescribeFeatureGroupResponseOutputTypeDef", + feature_group_details: "DescribeFeatureGroupResponseTypeDef", feature: "FeatureDefinitionOutputTypeDef", ) -> MetadataWorkUnit: """ diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/lineage.py b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/lineage.py index 7ba615d80346f..b677dccad24ac 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/lineage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/lineage.py @@ -9,10 +9,10 @@ if TYPE_CHECKING: from mypy_boto3_sagemaker import SageMakerClient from mypy_boto3_sagemaker.type_defs import ( - ActionSummaryOutputTypeDef, - ArtifactSummaryOutputTypeDef, - AssociationSummaryOutputTypeDef, - ContextSummaryOutputTypeDef, + ActionSummaryTypeDef, + ArtifactSummaryTypeDef, + AssociationSummaryTypeDef, + ContextSummaryTypeDef, ) @@ -49,7 +49,7 @@ class LineageProcessor: nodes: Dict[str, Dict[str, Any]] = field(default_factory=dict) lineage_info: LineageInfo = field(default_factory=LineageInfo) - def get_all_actions(self) -> List["ActionSummaryOutputTypeDef"]: + def get_all_actions(self) -> List["ActionSummaryTypeDef"]: """ List all actions in SageMaker. """ @@ -63,7 +63,7 @@ def get_all_actions(self) -> List["ActionSummaryOutputTypeDef"]: return actions - def get_all_artifacts(self) -> List["ArtifactSummaryOutputTypeDef"]: + def get_all_artifacts(self) -> List["ArtifactSummaryTypeDef"]: """ List all artifacts in SageMaker. """ @@ -77,7 +77,7 @@ def get_all_artifacts(self) -> List["ArtifactSummaryOutputTypeDef"]: return artifacts - def get_all_contexts(self) -> List["ContextSummaryOutputTypeDef"]: + def get_all_contexts(self) -> List["ContextSummaryTypeDef"]: """ List all contexts in SageMaker. """ @@ -91,9 +91,7 @@ def get_all_contexts(self) -> List["ContextSummaryOutputTypeDef"]: return contexts - def get_incoming_edges( - self, node_arn: str - ) -> List["AssociationSummaryOutputTypeDef"]: + def get_incoming_edges(self, node_arn: str) -> List["AssociationSummaryTypeDef"]: """ Get all incoming edges for a node in the lineage graph. """ @@ -107,9 +105,7 @@ def get_incoming_edges( return edges - def get_outgoing_edges( - self, node_arn: str - ) -> List["AssociationSummaryOutputTypeDef"]: + def get_outgoing_edges(self, node_arn: str) -> List["AssociationSummaryTypeDef"]: """ Get all outgoing edges for a node in the lineage graph. """ diff --git a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/models.py b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/models.py index 12770b0831693..e82cfc58f75a7 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/models.py +++ b/metadata-ingestion/src/datahub/ingestion/source/aws/sagemaker_processors/models.py @@ -45,12 +45,12 @@ if TYPE_CHECKING: from mypy_boto3_sagemaker import SageMakerClient from mypy_boto3_sagemaker.type_defs import ( - DescribeEndpointOutputOutputTypeDef, - DescribeModelOutputOutputTypeDef, - DescribeModelPackageGroupOutputOutputTypeDef, - EndpointSummaryOutputTypeDef, - ModelPackageGroupSummaryOutputTypeDef, - ModelSummaryOutputTypeDef, + DescribeEndpointOutputTypeDef, + DescribeModelOutputTypeDef, + DescribeModelPackageGroupOutputTypeDef, + EndpointSummaryTypeDef, + ModelPackageGroupSummaryTypeDef, + ModelSummaryTypeDef, ) ENDPOINT_STATUS_MAP: Dict[str, str] = { @@ -91,7 +91,7 @@ class ModelProcessor: group_arn_to_name: Dict[str, str] = field(default_factory=dict) - def get_all_models(self) -> List["ModelSummaryOutputTypeDef"]: + def get_all_models(self) -> List["ModelSummaryTypeDef"]: """ List all models in SageMaker. """ @@ -105,7 +105,7 @@ def get_all_models(self) -> List["ModelSummaryOutputTypeDef"]: return models - def get_model_details(self, model_name: str) -> "DescribeModelOutputOutputTypeDef": + def get_model_details(self, model_name: str) -> "DescribeModelOutputTypeDef": """ Get details of a model. """ @@ -113,7 +113,7 @@ def get_model_details(self, model_name: str) -> "DescribeModelOutputOutputTypeDe # see https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.describe_model return self.sagemaker_client.describe_model(ModelName=model_name) - def get_all_groups(self) -> List["ModelPackageGroupSummaryOutputTypeDef"]: + def get_all_groups(self) -> List["ModelPackageGroupSummaryTypeDef"]: """ List all model groups in SageMaker. """ @@ -128,7 +128,7 @@ def get_all_groups(self) -> List["ModelPackageGroupSummaryOutputTypeDef"]: def get_group_details( self, group_name: str - ) -> "DescribeModelPackageGroupOutputOutputTypeDef": + ) -> "DescribeModelPackageGroupOutputTypeDef": """ Get details of a model group. """ @@ -138,7 +138,7 @@ def get_group_details( ModelPackageGroupName=group_name ) - def get_all_endpoints(self) -> List["EndpointSummaryOutputTypeDef"]: + def get_all_endpoints(self) -> List["EndpointSummaryTypeDef"]: endpoints = [] # see https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.list_endpoints @@ -151,7 +151,7 @@ def get_all_endpoints(self) -> List["EndpointSummaryOutputTypeDef"]: def get_endpoint_details( self, endpoint_name: str - ) -> "DescribeEndpointOutputOutputTypeDef": + ) -> "DescribeEndpointOutputTypeDef": # see https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/sagemaker.html#SageMaker.Client.describe_endpoint return self.sagemaker_client.describe_endpoint(EndpointName=endpoint_name) @@ -171,7 +171,7 @@ def get_endpoint_status( return endpoint_status def get_endpoint_wu( - self, endpoint_details: "DescribeEndpointOutputOutputTypeDef" + self, endpoint_details: "DescribeEndpointOutputTypeDef" ) -> MetadataWorkUnit: """a Get a workunit for an endpoint. @@ -215,7 +215,7 @@ def get_endpoint_wu( def get_model_endpoints( self, - model_details: "DescribeModelOutputOutputTypeDef", + model_details: "DescribeModelOutputTypeDef", endpoint_arn_to_name: Dict[str, str], model_image: Optional[str], model_uri: Optional[str], @@ -244,7 +244,7 @@ def get_model_endpoints( return model_endpoints_sorted def get_group_wu( - self, group_details: "DescribeModelPackageGroupOutputOutputTypeDef" + self, group_details: "DescribeModelPackageGroupOutputTypeDef" ) -> MetadataWorkUnit: """ Get a workunit for a model group. @@ -294,7 +294,7 @@ def get_group_wu( return MetadataWorkUnit(id=group_name, mce=mce) def match_model_jobs( - self, model_details: "DescribeModelOutputOutputTypeDef" + self, model_details: "DescribeModelOutputTypeDef" ) -> Tuple[Set[str], Set[str], List[MLHyperParamClass], List[MLMetricClass]]: model_training_jobs: Set[str] = set() model_downstream_jobs: Set[str] = set() @@ -387,7 +387,7 @@ def strip_quotes(string: str) -> str: def get_model_wu( self, - model_details: "DescribeModelOutputOutputTypeDef", + model_details: "DescribeModelOutputTypeDef", endpoint_arn_to_name: Dict[str, str], ) -> MetadataWorkUnit: """ From cc2dc342c6164b0a1d332d45acdc68d31ba843a6 Mon Sep 17 00:00:00 2001 From: Arun Vasudevan <12974850+arunvasudevan@users.noreply.github.com> Date: Tue, 18 Jul 2023 21:24:54 -0500 Subject: [PATCH 03/20] fix(ingestion-redshift): Fix Redshift ingestion logs (#8454) --- .../src/datahub/ingestion/source/redshift/redshift.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py index 099e982691c67..c302497a48101 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py +++ b/metadata-ingestion/src/datahub/ingestion/source/redshift/redshift.py @@ -813,7 +813,7 @@ def cache_tables_and_views(self, connection, database): ) else: logger.debug( - f"View {database}.{schema}.{table.name} is filtered by view_pattern" + f"View {database}.{schema}.{view.name} is filtered by view_pattern" ) self.report.view_filtered[f"{database}.{schema}"] = ( self.report.view_filtered.get(f"{database}.{schema}", 0) + 1 From addf76c8494fe08559ff000358b993a797526e9b Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Wed, 19 Jul 2023 03:36:42 -0700 Subject: [PATCH 04/20] fix(ingest/bigquery): make sql parsing more robust (#8450) --- .../ingestion/source/bigquery_v2/bigquery.py | 6 ++++++ .../source/bigquery_v2/bigquery_report.py | 5 +++-- .../ingestion/source/bigquery_v2/lineage.py | 15 ++++++++++----- .../src/datahub/utilities/stats_collections.py | 9 +++++++-- 4 files changed, 26 insertions(+), 9 deletions(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py index 1e4a293239942..919c803222066 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py @@ -689,9 +689,15 @@ def generate_lineage(self, project_id: str) -> Iterable[MetadataWorkUnit]: f"Failed to parse lineage for view {view}: {raw_view_lineage.debug_info.table_error}" ) self.report.num_view_definitions_failed_parsing += 1 + self.report.view_definitions_parsing_failures.append( + f"Table-level sql parsing error for view {view}: {raw_view_lineage.debug_info.table_error}" + ) continue elif raw_view_lineage.debug_info.column_error: self.report.num_view_definitions_failed_column_parsing += 1 + self.report.view_definitions_parsing_failures.append( + f"Column-level sql parsing error for view {view}: {raw_view_lineage.debug_info.column_error}" + ) else: self.report.num_view_definitions_parsed += 1 diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py index b86f5cabc6b14..b57e691411f75 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery_report.py @@ -79,6 +79,7 @@ class BigQueryV2Report(ProfilingSqlReport): num_view_definitions_parsed: int = 0 num_view_definitions_failed_parsing: int = 0 num_view_definitions_failed_column_parsing: int = 0 + view_definitions_parsing_failures: LossyList[str] = field(default_factory=LossyList) read_reasons_stat: Counter[str] = dataclasses.field( default_factory=collections.Counter @@ -88,7 +89,7 @@ class BigQueryV2Report(ProfilingSqlReport): ) usage_state_size: Optional[str] = None ingestion_stage: Optional[str] = None - ingestion_stage_durations: Dict[str, str] = field(default_factory=TopKDict) + ingestion_stage_durations: TopKDict[str, float] = field(default_factory=TopKDict) _timer: Optional[PerfTimer] = field( default=None, init=False, repr=False, compare=False @@ -96,7 +97,7 @@ class BigQueryV2Report(ProfilingSqlReport): def set_ingestion_stage(self, project: str, stage: str) -> None: if self._timer: - elapsed = f"{self._timer.elapsed_seconds():.2f}" + elapsed = round(self._timer.elapsed_seconds(), 2) logger.info( f"Time spent in stage <{self.ingestion_stage}>: {elapsed} seconds" ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py index b5c8c74acc089..255a673026252 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/lineage.py @@ -153,13 +153,18 @@ def make_lineage_edges_from_parsing_result( if upstream_column_info.table == table_urn ) - table_name = str( - BigQueryTableRef.from_bigquery_table( - BigqueryTableIdentifier.from_string_name( - DatasetUrn.create_from_string(table_urn).get_dataset_name() + try: + table_name = str( + BigQueryTableRef.from_bigquery_table( + BigqueryTableIdentifier.from_string_name( + DatasetUrn.create_from_string(table_urn).get_dataset_name() + ) ) ) - ) + except IndexError as e: + logger.debug(f"Unable to parse table urn {table_urn}: {e}") + continue + table_edges[table_name] = LineageEdge( table=table_name, column_mapping=frozenset( diff --git a/metadata-ingestion/src/datahub/utilities/stats_collections.py b/metadata-ingestion/src/datahub/utilities/stats_collections.py index a41139c501175..09a9490abc0fb 100644 --- a/metadata-ingestion/src/datahub/utilities/stats_collections.py +++ b/metadata-ingestion/src/datahub/utilities/stats_collections.py @@ -1,4 +1,4 @@ -from typing import Any, Callable, DefaultDict, Dict, Optional, TypeVar +from typing import Any, Callable, DefaultDict, Dict, Optional, TypeVar, Union from typing_extensions import Protocol @@ -43,7 +43,12 @@ def as_obj(self) -> Dict[_KT, _VT]: ) except TypeError: trimmed_dict = dict(list(self.items())[: self.top_k]) - trimmed_dict[f"... top {self.top_k} of total {len(self)} entries"] = "" # type: ignore + + try: + total_value: Union[_VT, str] = sum(trimmed_dict.values()) # type: ignore + except Exception: + total_value = "" + trimmed_dict[f"... top {self.top_k} of total {len(self)} entries"] = total_value # type: ignore return trimmed_dict From 0def0e5a2e2f7c4a0cd6a674f51339fee2311b62 Mon Sep 17 00:00:00 2001 From: JifeiMei <33741213+JifeiMei@users.noreply.github.com> Date: Wed, 19 Jul 2023 18:38:00 +0800 Subject: [PATCH 05/20] =?UTF-8?q?fix(GreatExpections):=20AssertionRunEvent?= =?UTF-8?q?Class=20does=20not=20match=20the=20examp=E2=80=A6=20(#8243)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../integrations/great_expectations/action.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/metadata-ingestion/src/datahub/integrations/great_expectations/action.py b/metadata-ingestion/src/datahub/integrations/great_expectations/action.py index 36b85c306a86d..eabf62a4cda2b 100644 --- a/metadata-ingestion/src/datahub/integrations/great_expectations/action.py +++ b/metadata-ingestion/src/datahub/integrations/great_expectations/action.py @@ -317,9 +317,11 @@ def get_assertions_with_results( type=AssertionResultType.SUCCESS if success else AssertionResultType.FAILURE, - rowCount=result.get("element_count"), - missingCount=result.get("missing_count"), - unexpectedCount=result.get("unexpected_count"), + rowCount=parse_int_or_default(result.get("element_count")), + missingCount=parse_int_or_default(result.get("missing_count")), + unexpectedCount=parse_int_or_default( + result.get("unexpected_count") + ), actualAggValue=actualAggValue, externalUrl=docs_link, nativeResults=nativeResults, @@ -699,6 +701,13 @@ def get_platform_instance(self, datasource_name): return None +def parse_int_or_default(value, default_value=None): + if value is None: + return default_value + else: + return int(value) + + def make_dataset_urn_from_sqlalchemy_uri( sqlalchemy_uri, schema_name, From 695de42a8c302fdfeee7fd9d19ef666f5c880819 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Wed, 19 Jul 2023 04:16:32 -0700 Subject: [PATCH 06/20] chore(ingest): hide ignore old/new state options (#8438) --- .../datahub/ingestion/source/state/stateful_ingestion_base.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/metadata-ingestion/src/datahub/ingestion/source/state/stateful_ingestion_base.py b/metadata-ingestion/src/datahub/ingestion/source/state/stateful_ingestion_base.py index 373b10bb9547c..9dd6d27d56ea9 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/state/stateful_ingestion_base.py +++ b/metadata-ingestion/src/datahub/ingestion/source/state/stateful_ingestion_base.py @@ -68,10 +68,12 @@ class StatefulIngestionConfig(ConfigModel): ignore_old_state: bool = Field( default=False, description="If set to True, ignores the previous checkpoint state.", + hidden_from_docs=True, ) ignore_new_state: bool = Field( default=False, description="If set to True, ignores the current checkpoint state.", + hidden_from_docs=True, ) @pydantic.root_validator() From ee84464406996c75137894ef2b854b2020c0c2a8 Mon Sep 17 00:00:00 2001 From: david-leifker <114954101+david-leifker@users.noreply.github.com> Date: Wed, 19 Jul 2023 06:19:36 -0500 Subject: [PATCH 07/20] docs(env): add env vars authentication (#8436) --- docs/deploy/environment-vars.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/docs/deploy/environment-vars.md b/docs/deploy/environment-vars.md index 0721aaa7964b7..af4ae09c009fd 100644 --- a/docs/deploy/environment-vars.md +++ b/docs/deploy/environment-vars.md @@ -76,3 +76,11 @@ Simply replace the dot, `.`, with an underscore, `_`, and convert to uppercase. | `AWS_GLUE_SCHEMA_REGISTRY_REGION` | `us-east-1` | string | [`GMS`, `MCE Consumer`, `MAE Consumer`] | If using `AWS_GLUE` in the `SCHEMA_REGISTRY_TYPE` variable for the schema registry implementation. | | `AWS_GLUE_SCHEMA_REGISTRY_NAME` | `` | string | [`GMS`, `MCE Consumer`, `MAE Consumer`] | If using `AWS_GLUE` in the `SCHEMA_REGISTRY_TYPE` variable for the schema registry. | | `USE_CONFLUENT_SCHEMA_REGISTRY` | `true` | boolean | [`kafka-setup`] | Enable Confluent schema registry configuration. | + +## Frontend + +| Variable | Default | Unit/Type | Components | Description | +|------------------------------------|----------|-----------|--------------|-------------------------------------------------------------------------------------------------------------------------------------| +| `AUTH_VERBOSE_LOGGING` | `false` | boolean | [`Frontend`] | Enable verbose authentication logging. Enabling this will leak sensisitve information in the logs. Disable when finished debugging. | +| `AUTH_OIDC_GROUPS_CLAIM` | `groups` | string | [`Frontend`] | Claim to use as the user's group. | +| `AUTH_OIDC_EXTRACT_GROUPS_ENABLED` | `false` | boolean | [`Frontend`] | Auto-provision the group from the user's group claim. | From 58125d35ce04738ec94672d6a0f8ba9fdd0b7a24 Mon Sep 17 00:00:00 2001 From: Shirshanka Das Date: Wed, 19 Jul 2023 13:39:58 -0700 Subject: [PATCH 08/20] =?UTF-8?q?feat(graphql-plugins):=20add=20ability=20?= =?UTF-8?q?for=20plugins=20to=20call=20back=20to=20core=20e=E2=80=A6=20(#8?= =?UTF-8?q?449)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java | 4 +++- .../java/com/linkedin/datahub/graphql/GmsGraphQLPlugin.java | 5 +++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java index 1dd07c1873b2f..99d50cdcd6b97 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java @@ -340,6 +340,7 @@ import java.util.function.Function; import java.util.function.Supplier; import java.util.stream.Collectors; +import lombok.Getter; import lombok.extern.slf4j.Slf4j; import org.apache.commons.io.IOUtils; import org.dataloader.BatchLoaderContextProvider; @@ -355,6 +356,7 @@ * A {@link GraphQLEngine} configured to provide access to the entities and aspects on the the GMS graph. */ @Slf4j +@Getter public class GmsGraphQLEngine { private final EntityClient entityClient; @@ -613,7 +615,7 @@ public GmsGraphQLEngine(final GmsGraphQLEngineArgs args) { * @param builder */ private void configurePluginResolvers(final RuntimeWiring.Builder builder) { - this.graphQLPlugins.forEach(plugin -> plugin.configureExtraResolvers(builder)); + this.graphQLPlugins.forEach(plugin -> plugin.configureExtraResolvers(builder, this)); } diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLPlugin.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLPlugin.java index ae1e26b610a61..e7ef0c402a1de 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLPlugin.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLPlugin.java @@ -35,9 +35,10 @@ public interface GmsGraphQLPlugin { /** * Optional callback that a plugin can implement to configure any Query, Mutation or Type specific resolvers. - * @param wiringBuilder + * @param wiringBuilder : the builder being used to configure the runtime wiring + * @param baseEngine : a reference to the core engine and its graphql types */ - default void configureExtraResolvers(final RuntimeWiring.Builder wiringBuilder) { + default void configureExtraResolvers(final RuntimeWiring.Builder wiringBuilder, final GmsGraphQLEngine baseEngine) { } From 93fde6bf936dac2a4305354d98735338d2573439 Mon Sep 17 00:00:00 2001 From: RyanHolstien Date: Wed, 19 Jul 2023 20:09:14 -0500 Subject: [PATCH 09/20] feat(io): refactor metadata-io module (#8306) --- .github/scripts/check_event_type.py | 2 +- build.gradle | 1 + datahub-graphql-core/build.gradle | 2 + .../datahub/graphql/GmsGraphQLEngine.java | 10 +- .../datahub/graphql/GmsGraphQLEngineArgs.java | 2 +- .../resolvers/config/AppConfigResolver.java | 2 +- .../CreateTestConnectionRequestResolver.java | 2 +- .../resolvers/mutate/util/LabelUtils.java | 12 +- .../resolvers/mutate/util/OwnerUtils.java | 3 +- .../search/GetQuickFiltersResolver.java | 1 - ...IngestionExecutionRequestResolverTest.java | 2 +- ...eateTestConnectionRequestResolverTest.java | 2 +- datahub-upgrade/build.gradle | 1 + .../upgrade/nocode/DataMigrationStep.java | 2 +- .../datahub/upgrade/nocode/NoCodeUpgrade.java | 3 +- .../elasticsearch/steps/CleanIndicesStep.java | 2 +- ...pgradeCliApplicationTestConfiguration.java | 2 +- docs-website/sidebars.js | 1 + docs/deploy/aws.md | 2 +- docs/dev-guides/timeline.md | 4 +- docs/how/updating-datahub.md | 2 +- docs/plugins.md | 4 +- ingestion-scheduler/build.gradle | 1 + .../ingestion/IngestionScheduler.java | 2 +- .../ingestion/IngestionSchedulerTest.java | 2 +- .../java/com/linkedin/metadata/Constants.java | 3 + metadata-io/build.gradle | 6 +- .../metadata/client/JavaEntityClient.java | 6 +- .../search/custom/QueryConfiguration.java | 108 ------ .../linkedin/metadata/entity/AspectDao.java | 4 +- .../metadata/entity/EntityAspect.java | 2 +- ...ityService.java => EntityServiceImpl.java} | 99 +++--- .../linkedin/metadata/entity/EntityUtils.java | 2 +- .../metadata/graph/SiblingGraphService.java | 2 +- .../graph/elastic/ESGraphQueryDAO.java | 2 +- .../metadata/search/LineageSearchService.java | 2 +- .../AllEntitiesSearchAggregator.java | 2 +- .../search/cache/EntityDocCountCache.java | 2 +- .../indexbuilder/ESIndexBuilder.java | 2 +- .../elasticsearch/query/ESSearchDAO.java | 4 +- .../query/request/CustomizedQueryHandler.java | 2 +- .../query/request/SearchQueryBuilder.java | 77 ++++- .../query/request/SearchRequestHandler.java | 4 +- .../linkedin/metadata/AspectUtilsTest.java | 8 +- .../metadata/ESSampleDataFixture.java | 14 +- .../metadata/ESSearchLineageFixture.java | 10 +- .../entity/AspectMigrationsDaoTest.java | 10 +- .../CassandraAspectMigrationsDaoTest.java | 8 +- .../entity/CassandraEntityServiceTest.java | 18 +- .../entity/DeleteEntityServiceTest.java | 8 +- .../entity/EbeanAspectMigrationsDaoTest.java | 8 +- .../entity/EbeanEntityServiceTest.java | 30 +- .../metadata/entity/EntityServiceTest.java | 232 ++++++------- .../ElasticSearchGraphServiceTest.java | 2 +- .../search/LineageSearchServiceTest.java | 6 +- .../metadata/search/SearchServiceTest.java | 4 +- .../ElasticSearchServiceTest.java | 2 +- .../indexbuilder/ESIndexBuilderTest.java | 2 +- .../elasticsearch/query/ESSearchDAOTest.java | 2 +- .../request/AggregationQueryBuilderTest.java | 2 +- .../request/CustomizedQueryHandlerTest.java | 16 +- .../query/request/SearchQueryBuilderTest.java | 10 +- .../request/SearchRequestHandlerTest.java | 6 +- .../CassandraTimelineServiceTest.java | 6 +- .../timeline/EbeanTimelineServiceTest.java | 6 +- .../timeline/TimelineServiceTest.java | 8 +- .../kafka/MaeConsumerApplicationTest.java | 4 +- ...eConsumerApplicationTestConfiguration.java | 4 +- .../kafka/hook/UpdateIndicesHookTest.java | 4 +- .../spring/MCLSpringTestConfiguration.java | 2 +- .../kafka/MceConsumerApplicationTest.java | 4 +- ...eConsumerApplicationTestConfiguration.java | 2 +- metadata-service/configuration/build.gradle | 14 + .../metadata/config/AssetsConfiguration.java | 0 .../config/AuthPluginConfiguration.java | 0 .../metadata/config/DataHubConfiguration.java | 0 .../metadata/config/EntityProfileConfig.java | 0 .../EntityRegistryPluginConfiguration.java | 0 .../config/IngestionConfiguration.java | 0 .../metadata/config/PluginConfiguration.java | 0 .../metadata/config/PreProcessHooks.java | 0 .../metadata/config/QueriesTabConfig.java | 0 .../config/RetentionPluginConfiguration.java | 0 .../config/SystemUpdateConfiguration.java | 0 .../metadata/config/TestsConfiguration.java | 0 .../metadata/config/ViewsConfiguration.java | 0 .../metadata/config/VisualConfiguration.java | 0 .../config/cache/CacheConfiguration.java | 0 .../EntityDocCountCacheConfiguration.java | 0 .../cache/HomepageCacheConfiguration.java | 0 .../cache/PrimaryCacheConfiguration.java | 0 .../cache/SearchCacheConfiguration.java | 0 .../SearchLineageCacheConfiguration.java | 0 .../config/kafka/KafkaConfiguration.java | 0 .../config/kafka/ListenerConfiguration.java | 0 .../config/kafka/ProducerConfiguration.java | 0 .../kafka/SchemaRegistryConfiguration.java | 0 .../search/BuildIndicesConfiguration.java | 0 .../config/search/CustomConfiguration.java | 2 +- .../search/ElasticSearchConfiguration.java | 2 - .../search/ExactMatchConfiguration.java | 0 .../search/GraphQueryConfiguration.java | 0 .../config/search/PartialConfiguration.java | 0 .../config/search/SearchConfiguration.java | 0 .../search/custom/BoolQueryConfiguration.java | 0 .../custom/CustomSearchConfiguration.java | 0 .../search/custom/QueryConfiguration.java | 35 ++ .../telemetry/TelemetryConfiguration.java | 2 +- .../BatchWriteOperationsOptions.java | 0 .../src/main/resources/application.yml | 0 metadata-service/factories/build.gradle | 1 + .../auth/DataHubTokenServiceFactory.java | 4 +- .../factory/config/ConfigurationProvider.java | 10 +- .../factory/entity/EntityServiceFactory.java | 3 +- .../kafka/DataHubKafkaProducerFactory.java | 2 +- .../kafka/KafkaEventConsumerFactory.java | 2 +- .../kafka/SimpleKafkaConsumerFactory.java | 2 +- .../DUHESchemaRegistryFactory.java | 2 +- .../InternalSchemaRegistryFactory.java | 6 +- .../RecentlyEditedCandidateSourceFactory.java | 4 +- .../search/ElasticSearchServiceFactory.java | 6 +- .../boot/steps/BackfillBrowsePathsV2Step.java | 5 +- .../steps/UpgradeDefaultBrowsePathsStep.java | 4 +- .../IngestDataPlatformInstancesStepTest.java | 6 +- .../openapi/util/MappingUtil.java | 3 +- .../java/entities/EntitiesControllerTest.java | 2 +- .../src/test/java/mock/MockEntityService.java | 7 +- .../resources/entity/AspectResource.java | 3 +- .../entity/BatchIngestionRunResource.java | 2 +- .../resources/entity/EntityResource.java | 3 +- .../resources/entity/AspectResourceTest.java | 6 +- .../registry/SchemaRegistryController.java | 2 +- metadata-service/services/README.md | 5 + metadata-service/services/build.gradle | 73 ++++ .../DataHubUsageEventConstants.java | 0 .../datahubusage/DataHubUsageEventType.java | 0 .../linkedin/metadata/entity/AspectUtils.java | 0 .../metadata/entity/DeleteEntityService.java | 2 +- .../metadata/entity/DeleteEntityUtils.java | 0 .../metadata/entity/EntityService.java | 311 ++++++++++++++++++ .../metadata/entity/IngestProposalResult.java | 12 + .../linkedin/metadata/entity/ListResult.java | 0 .../metadata/entity/RetentionService.java | 2 +- .../metadata/entity/RollbackResult.java | 0 .../metadata/entity/RollbackRunResult.java | 0 .../metadata/entity/UpdateAspectResult.java | 21 ++ .../restoreindices/RestoreIndicesArgs.java | 0 .../restoreindices/RestoreIndicesResult.java | 0 .../retention/BulkApplyRetentionArgs.java | 0 .../retention/BulkApplyRetentionResult.java | 0 .../com/linkedin/metadata/graph/Edge.java | 0 .../linkedin/metadata/graph/GraphClient.java | 0 .../linkedin/metadata/graph/GraphFilters.java | 0 .../metadata/graph/GraphIndexUtils.java | 0 .../linkedin/metadata/graph/GraphService.java | 15 +- .../metadata/graph/RelatedEntitiesResult.java | 0 .../metadata/graph/RelatedEntity.java | 0 .../RecommendationsService.java | 0 .../DomainsCandidateSource.java | 0 .../EntitySearchAggregationSource.java | 0 .../RecentlySearchedSource.java | 0 .../candidatesource/RecommendationSource.java | 0 .../candidatesource/RecommendationUtils.java | 0 .../candidatesource/TopPlatformsSource.java | 0 .../candidatesource/TopTagsSource.java | 0 .../candidatesource/TopTermsSource.java | 0 .../ranker/RecommendationModuleRanker.java | 0 .../ranker/SimpleRecommendationRanker.java | 0 .../registry/SchemaRegistryService.java | 2 +- .../registry/SchemaRegistryServiceImpl.java | 2 +- .../metadata/resource/ResourceReference.java | 0 .../metadata/resource/SubResourceType.java | 0 .../metadata/search/EntitySearchService.java | 4 +- .../metadata/search/utils/QueryUtils.java | 5 +- .../metadata/secret/SecretService.java | 0 .../metadata/service/BaseService.java | 0 .../metadata/service/DataProductService.java | 0 .../metadata/service/DomainService.java | 0 .../metadata/service/GlossaryTermService.java | 0 .../metadata/service/LineageService.java | 0 .../metadata/service/OwnerService.java | 0 .../service/OwnershipTypeService.java | 0 .../metadata/service/QueryService.java | 0 .../metadata/service/SettingsService.java | 0 .../linkedin/metadata/service/TagService.java | 0 .../metadata/service/ViewService.java | 0 .../metadata/shared/ValidationUtils.java | 0 .../systemmetadata/SystemMetadataService.java | 0 .../metadata/timeline/SemanticVersion.java | 0 .../metadata/timeline/TimelineService.java | 0 .../timeline/data/ChangeCategory.java | 0 .../metadata/timeline/data/ChangeEvent.java | 0 .../timeline/data/ChangeOperation.java | 0 .../timeline/data/ChangeTransaction.java | 0 .../timeline/data/PatchOperation.java | 0 .../timeline/data/SemanticChangeType.java | 0 .../timeline/data/SemanticDifference.java | 0 .../timeseries/TimeseriesAspectService.java | 0 .../metadata/service/DomainServiceTest.java | 8 +- .../service/GlossaryTermServiceTest.java | 4 +- .../metadata/service/LineageServiceTest.java | 15 +- .../metadata/service/OwnerServiceTest.java | 4 +- .../service/OwnershipTypeServiceTest.java | 2 +- .../metadata/service/QueryServiceTest.java | 0 .../metadata/service/SettingsServiceTest.java | 0 .../metadata/service/TagServiceTest.java | 4 +- .../metadata/service/ViewServiceTest.java | 4 +- .../gms/servlet/ConfigSearchExport.java | 2 +- settings.gradle | 3 +- 209 files changed, 981 insertions(+), 495 deletions(-) delete mode 100644 metadata-io/src/main/java/com/linkedin/metadata/config/search/custom/QueryConfiguration.java rename metadata-io/src/main/java/com/linkedin/metadata/entity/{EntityService.java => EntityServiceImpl.java} (98%) create mode 100644 metadata-service/configuration/build.gradle rename {metadata-io => metadata-service/configuration}/src/main/java/com/linkedin/metadata/config/AssetsConfiguration.java (100%) rename {metadata-io => metadata-service/configuration}/src/main/java/com/linkedin/metadata/config/AuthPluginConfiguration.java (100%) rename {metadata-io => metadata-service/configuration}/src/main/java/com/linkedin/metadata/config/DataHubConfiguration.java (100%) rename {metadata-io => metadata-service/configuration}/src/main/java/com/linkedin/metadata/config/EntityProfileConfig.java (100%) rename {metadata-io => metadata-service/configuration}/src/main/java/com/linkedin/metadata/config/EntityRegistryPluginConfiguration.java (100%) rename {metadata-io => metadata-service/configuration}/src/main/java/com/linkedin/metadata/config/IngestionConfiguration.java (100%) rename {metadata-io => metadata-service/configuration}/src/main/java/com/linkedin/metadata/config/PluginConfiguration.java (100%) rename {metadata-io => metadata-service/configuration}/src/main/java/com/linkedin/metadata/config/PreProcessHooks.java (100%) rename {metadata-io => metadata-service/configuration}/src/main/java/com/linkedin/metadata/config/QueriesTabConfig.java (100%) rename {metadata-io => metadata-service/configuration}/src/main/java/com/linkedin/metadata/config/RetentionPluginConfiguration.java (100%) rename {metadata-io => metadata-service/configuration}/src/main/java/com/linkedin/metadata/config/SystemUpdateConfiguration.java (100%) rename {metadata-io => metadata-service/configuration}/src/main/java/com/linkedin/metadata/config/TestsConfiguration.java (100%) rename {metadata-io => metadata-service/configuration}/src/main/java/com/linkedin/metadata/config/ViewsConfiguration.java (100%) rename {metadata-io => metadata-service/configuration}/src/main/java/com/linkedin/metadata/config/VisualConfiguration.java (100%) rename {metadata-io => metadata-service/configuration}/src/main/java/com/linkedin/metadata/config/cache/CacheConfiguration.java (100%) rename {metadata-io => metadata-service/configuration}/src/main/java/com/linkedin/metadata/config/cache/EntityDocCountCacheConfiguration.java (100%) rename {metadata-io => metadata-service/configuration}/src/main/java/com/linkedin/metadata/config/cache/HomepageCacheConfiguration.java (100%) rename {metadata-io => metadata-service/configuration}/src/main/java/com/linkedin/metadata/config/cache/PrimaryCacheConfiguration.java (100%) rename {metadata-io => metadata-service/configuration}/src/main/java/com/linkedin/metadata/config/cache/SearchCacheConfiguration.java (100%) rename {metadata-io => metadata-service/configuration}/src/main/java/com/linkedin/metadata/config/cache/SearchLineageCacheConfiguration.java (100%) rename {metadata-io => metadata-service/configuration}/src/main/java/com/linkedin/metadata/config/kafka/KafkaConfiguration.java (100%) rename {metadata-io => metadata-service/configuration}/src/main/java/com/linkedin/metadata/config/kafka/ListenerConfiguration.java (100%) rename {metadata-io => metadata-service/configuration}/src/main/java/com/linkedin/metadata/config/kafka/ProducerConfiguration.java (100%) rename {metadata-io => metadata-service/configuration}/src/main/java/com/linkedin/metadata/config/kafka/SchemaRegistryConfiguration.java (100%) rename {metadata-io => metadata-service/configuration}/src/main/java/com/linkedin/metadata/config/search/BuildIndicesConfiguration.java (100%) rename {metadata-io => metadata-service/configuration}/src/main/java/com/linkedin/metadata/config/search/CustomConfiguration.java (100%) rename {metadata-io => metadata-service/configuration}/src/main/java/com/linkedin/metadata/config/search/ElasticSearchConfiguration.java (75%) rename {metadata-io => metadata-service/configuration}/src/main/java/com/linkedin/metadata/config/search/ExactMatchConfiguration.java (100%) rename {metadata-io => metadata-service/configuration}/src/main/java/com/linkedin/metadata/config/search/GraphQueryConfiguration.java (100%) rename {metadata-io => metadata-service/configuration}/src/main/java/com/linkedin/metadata/config/search/PartialConfiguration.java (100%) rename {metadata-io => metadata-service/configuration}/src/main/java/com/linkedin/metadata/config/search/SearchConfiguration.java (100%) rename {metadata-io => metadata-service/configuration}/src/main/java/com/linkedin/metadata/config/search/custom/BoolQueryConfiguration.java (100%) rename {metadata-io => metadata-service/configuration}/src/main/java/com/linkedin/metadata/config/search/custom/CustomSearchConfiguration.java (100%) create mode 100644 metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/custom/QueryConfiguration.java rename {metadata-io/src/main/java/com/linkedin/metadata => metadata-service/configuration/src/main/java/com/linkedin/metadata/config}/telemetry/TelemetryConfiguration.java (92%) rename {metadata-io => metadata-service/configuration}/src/main/java/com/linkedin/metadata/timeseries/BatchWriteOperationsOptions.java (100%) rename metadata-service/{factories => configuration}/src/main/resources/application.yml (100%) create mode 100644 metadata-service/services/README.md create mode 100644 metadata-service/services/build.gradle rename {metadata-io => metadata-service/services}/src/main/java/com/linkedin/metadata/datahubusage/DataHubUsageEventConstants.java (100%) rename {metadata-io => metadata-service/services}/src/main/java/com/linkedin/metadata/datahubusage/DataHubUsageEventType.java (100%) rename {metadata-io => metadata-service/services}/src/main/java/com/linkedin/metadata/entity/AspectUtils.java (100%) rename {metadata-io => metadata-service/services}/src/main/java/com/linkedin/metadata/entity/DeleteEntityService.java (99%) rename {metadata-io => metadata-service/services}/src/main/java/com/linkedin/metadata/entity/DeleteEntityUtils.java (100%) create mode 100644 metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java create mode 100644 metadata-service/services/src/main/java/com/linkedin/metadata/entity/IngestProposalResult.java rename {metadata-io => metadata-service/services}/src/main/java/com/linkedin/metadata/entity/ListResult.java (100%) rename {metadata-io => metadata-service/services}/src/main/java/com/linkedin/metadata/entity/RetentionService.java (99%) rename {metadata-io => metadata-service/services}/src/main/java/com/linkedin/metadata/entity/RollbackResult.java (100%) rename {metadata-io => metadata-service/services}/src/main/java/com/linkedin/metadata/entity/RollbackRunResult.java (100%) create mode 100644 metadata-service/services/src/main/java/com/linkedin/metadata/entity/UpdateAspectResult.java rename {metadata-io => metadata-service/services}/src/main/java/com/linkedin/metadata/entity/restoreindices/RestoreIndicesArgs.java (100%) rename {metadata-io => metadata-service/services}/src/main/java/com/linkedin/metadata/entity/restoreindices/RestoreIndicesResult.java (100%) rename {metadata-io => metadata-service/services}/src/main/java/com/linkedin/metadata/entity/retention/BulkApplyRetentionArgs.java (100%) rename {metadata-io => metadata-service/services}/src/main/java/com/linkedin/metadata/entity/retention/BulkApplyRetentionResult.java (100%) rename {metadata-io => metadata-service/services}/src/main/java/com/linkedin/metadata/graph/Edge.java (100%) rename {metadata-io => metadata-service/services}/src/main/java/com/linkedin/metadata/graph/GraphClient.java (100%) rename {metadata-io => metadata-service/services}/src/main/java/com/linkedin/metadata/graph/GraphFilters.java (100%) rename {metadata-io => metadata-service/services}/src/main/java/com/linkedin/metadata/graph/GraphIndexUtils.java (100%) rename {metadata-io => metadata-service/services}/src/main/java/com/linkedin/metadata/graph/GraphService.java (93%) rename {metadata-io => metadata-service/services}/src/main/java/com/linkedin/metadata/graph/RelatedEntitiesResult.java (100%) rename {metadata-io => metadata-service/services}/src/main/java/com/linkedin/metadata/graph/RelatedEntity.java (100%) rename {metadata-io => metadata-service/services}/src/main/java/com/linkedin/metadata/recommendation/RecommendationsService.java (100%) rename {metadata-io => metadata-service/services}/src/main/java/com/linkedin/metadata/recommendation/candidatesource/DomainsCandidateSource.java (100%) rename {metadata-io => metadata-service/services}/src/main/java/com/linkedin/metadata/recommendation/candidatesource/EntitySearchAggregationSource.java (100%) rename {metadata-io => metadata-service/services}/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlySearchedSource.java (100%) rename {metadata-io => metadata-service/services}/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecommendationSource.java (100%) rename {metadata-io => metadata-service/services}/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecommendationUtils.java (100%) rename {metadata-io => metadata-service/services}/src/main/java/com/linkedin/metadata/recommendation/candidatesource/TopPlatformsSource.java (100%) rename {metadata-io => metadata-service/services}/src/main/java/com/linkedin/metadata/recommendation/candidatesource/TopTagsSource.java (100%) rename {metadata-io => metadata-service/services}/src/main/java/com/linkedin/metadata/recommendation/candidatesource/TopTermsSource.java (100%) rename {metadata-io => metadata-service/services}/src/main/java/com/linkedin/metadata/recommendation/ranker/RecommendationModuleRanker.java (100%) rename {metadata-io => metadata-service/services}/src/main/java/com/linkedin/metadata/recommendation/ranker/SimpleRecommendationRanker.java (100%) rename {metadata-io/src/main/java/com/linkedin/metadata/schema => metadata-service/services/src/main/java/com/linkedin/metadata}/registry/SchemaRegistryService.java (89%) rename {metadata-io/src/main/java/com/linkedin/metadata/schema => metadata-service/services/src/main/java/com/linkedin/metadata}/registry/SchemaRegistryServiceImpl.java (98%) rename {metadata-io => metadata-service/services}/src/main/java/com/linkedin/metadata/resource/ResourceReference.java (100%) rename {metadata-io => metadata-service/services}/src/main/java/com/linkedin/metadata/resource/SubResourceType.java (100%) rename {metadata-io => metadata-service/services}/src/main/java/com/linkedin/metadata/search/EntitySearchService.java (97%) rename {metadata-io => metadata-service/services}/src/main/java/com/linkedin/metadata/search/utils/QueryUtils.java (98%) rename {metadata-io => metadata-service/services}/src/main/java/com/linkedin/metadata/secret/SecretService.java (100%) rename {metadata-io => metadata-service/services}/src/main/java/com/linkedin/metadata/service/BaseService.java (100%) rename {metadata-io => metadata-service/services}/src/main/java/com/linkedin/metadata/service/DataProductService.java (100%) rename {metadata-io => metadata-service/services}/src/main/java/com/linkedin/metadata/service/DomainService.java (100%) rename {metadata-io => metadata-service/services}/src/main/java/com/linkedin/metadata/service/GlossaryTermService.java (100%) rename {metadata-io => metadata-service/services}/src/main/java/com/linkedin/metadata/service/LineageService.java (100%) rename {metadata-io => metadata-service/services}/src/main/java/com/linkedin/metadata/service/OwnerService.java (100%) rename {metadata-io => metadata-service/services}/src/main/java/com/linkedin/metadata/service/OwnershipTypeService.java (100%) rename {metadata-io => metadata-service/services}/src/main/java/com/linkedin/metadata/service/QueryService.java (100%) rename {metadata-io => metadata-service/services}/src/main/java/com/linkedin/metadata/service/SettingsService.java (100%) rename {metadata-io => metadata-service/services}/src/main/java/com/linkedin/metadata/service/TagService.java (100%) rename {metadata-io => metadata-service/services}/src/main/java/com/linkedin/metadata/service/ViewService.java (100%) rename {metadata-io => metadata-service/services}/src/main/java/com/linkedin/metadata/shared/ValidationUtils.java (100%) rename {metadata-io => metadata-service/services}/src/main/java/com/linkedin/metadata/systemmetadata/SystemMetadataService.java (100%) rename {metadata-io => metadata-service/services}/src/main/java/com/linkedin/metadata/timeline/SemanticVersion.java (100%) rename {metadata-io => metadata-service/services}/src/main/java/com/linkedin/metadata/timeline/TimelineService.java (100%) rename {metadata-io => metadata-service/services}/src/main/java/com/linkedin/metadata/timeline/data/ChangeCategory.java (100%) rename {metadata-io => metadata-service/services}/src/main/java/com/linkedin/metadata/timeline/data/ChangeEvent.java (100%) rename {metadata-io => metadata-service/services}/src/main/java/com/linkedin/metadata/timeline/data/ChangeOperation.java (100%) rename {metadata-io => metadata-service/services}/src/main/java/com/linkedin/metadata/timeline/data/ChangeTransaction.java (100%) rename {metadata-io => metadata-service/services}/src/main/java/com/linkedin/metadata/timeline/data/PatchOperation.java (100%) rename {metadata-io => metadata-service/services}/src/main/java/com/linkedin/metadata/timeline/data/SemanticChangeType.java (100%) rename {metadata-io => metadata-service/services}/src/main/java/com/linkedin/metadata/timeline/data/SemanticDifference.java (100%) rename {metadata-io => metadata-service/services}/src/main/java/com/linkedin/metadata/timeseries/TimeseriesAspectService.java (100%) rename {metadata-io => metadata-service/services}/src/test/java/com/linkedin/metadata/service/DomainServiceTest.java (98%) rename {metadata-io => metadata-service/services}/src/test/java/com/linkedin/metadata/service/GlossaryTermServiceTest.java (99%) rename {metadata-io => metadata-service/services}/src/test/java/com/linkedin/metadata/service/LineageServiceTest.java (99%) rename {metadata-io => metadata-service/services}/src/test/java/com/linkedin/metadata/service/OwnerServiceTest.java (98%) rename {metadata-io => metadata-service/services}/src/test/java/com/linkedin/metadata/service/OwnershipTypeServiceTest.java (99%) rename {metadata-io => metadata-service/services}/src/test/java/com/linkedin/metadata/service/QueryServiceTest.java (100%) rename {metadata-io => metadata-service/services}/src/test/java/com/linkedin/metadata/service/SettingsServiceTest.java (100%) rename {metadata-io => metadata-service/services}/src/test/java/com/linkedin/metadata/service/TagServiceTest.java (99%) rename {metadata-io => metadata-service/services}/src/test/java/com/linkedin/metadata/service/ViewServiceTest.java (99%) diff --git a/.github/scripts/check_event_type.py b/.github/scripts/check_event_type.py index f575164a07fc1..c936497a2d307 100644 --- a/.github/scripts/check_event_type.py +++ b/.github/scripts/check_event_type.py @@ -1,7 +1,7 @@ import sys java_events = set() -with open("./metadata-io/src/main/java/com/linkedin/metadata/datahubusage/DataHubUsageEventType.java") as java_file: +with open("./metadata-service/services/src/main/java/com/linkedin/metadata/datahubusage/DataHubUsageEventType.java") as java_file: for line in java_file: if '''Event"''' not in line: continue diff --git a/build.gradle b/build.gradle index a13965e057a7c..605b4fcc050e7 100644 --- a/build.gradle +++ b/build.gradle @@ -129,6 +129,7 @@ project.ext.externalDependency = [ 'jsonSimple': 'com.googlecode.json-simple:json-simple:1.1.1', 'jsonSmart': 'net.minidev:json-smart:2.4.9', 'json': 'org.json:json:20230227', + 'junit': 'junit:junit:4.13.2', 'junitJupiterApi': "org.junit.jupiter:junit-jupiter-api:$junitJupiterVersion", 'junitJupiterParams': "org.junit.jupiter:junit-jupiter-params:$junitJupiterVersion", 'junitJupiterEngine': "org.junit.jupiter:junit-jupiter-engine:$junitJupiterVersion", diff --git a/datahub-graphql-core/build.gradle b/datahub-graphql-core/build.gradle index 12ce7c090c869..8fd45033373dc 100644 --- a/datahub-graphql-core/build.gradle +++ b/datahub-graphql-core/build.gradle @@ -7,6 +7,8 @@ dependencies { compile project(':metadata-service:restli-client') compile project(':metadata-service:auth-impl') compile project(':metadata-service:auth-config') + compile project(':metadata-service:configuration') + compile project(':metadata-service:services') compile project(':metadata-io') compile project(':metadata-utils') diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java index 99d50cdcd6b97..f22568602d6b4 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngine.java @@ -37,7 +37,6 @@ import com.linkedin.datahub.graphql.generated.CorpUser; import com.linkedin.datahub.graphql.generated.CorpUserInfo; import com.linkedin.datahub.graphql.generated.CorpUserViewsSettings; -import com.linkedin.datahub.graphql.generated.OwnershipTypeEntity; import com.linkedin.datahub.graphql.generated.Dashboard; import com.linkedin.datahub.graphql.generated.DashboardInfo; import com.linkedin.datahub.graphql.generated.DashboardStatsSummary; @@ -63,9 +62,9 @@ import com.linkedin.datahub.graphql.generated.InstitutionalMemoryMetadata; import com.linkedin.datahub.graphql.generated.LineageRelationship; import com.linkedin.datahub.graphql.generated.ListAccessTokenResult; -import com.linkedin.datahub.graphql.generated.ListOwnershipTypesResult; import com.linkedin.datahub.graphql.generated.ListDomainsResult; import com.linkedin.datahub.graphql.generated.ListGroupsResult; +import com.linkedin.datahub.graphql.generated.ListOwnershipTypesResult; import com.linkedin.datahub.graphql.generated.ListQueriesResult; import com.linkedin.datahub.graphql.generated.ListTestsResult; import com.linkedin.datahub.graphql.generated.ListViewsResult; @@ -80,6 +79,7 @@ import com.linkedin.datahub.graphql.generated.MLPrimaryKeyProperties; import com.linkedin.datahub.graphql.generated.Notebook; import com.linkedin.datahub.graphql.generated.Owner; +import com.linkedin.datahub.graphql.generated.OwnershipTypeEntity; import com.linkedin.datahub.graphql.generated.PolicyMatchCriterionValue; import com.linkedin.datahub.graphql.generated.QueryEntity; import com.linkedin.datahub.graphql.generated.QuerySubject; @@ -196,9 +196,9 @@ import com.linkedin.datahub.graphql.resolvers.mutate.UpdateParentNodeResolver; import com.linkedin.datahub.graphql.resolvers.mutate.UpdateUserSettingResolver; import com.linkedin.datahub.graphql.resolvers.operation.ReportOperationResolver; +import com.linkedin.datahub.graphql.resolvers.ownership.CreateOwnershipTypeResolver; import com.linkedin.datahub.graphql.resolvers.ownership.DeleteOwnershipTypeResolver; import com.linkedin.datahub.graphql.resolvers.ownership.ListOwnershipTypesResolver; -import com.linkedin.datahub.graphql.resolvers.ownership.CreateOwnershipTypeResolver; import com.linkedin.datahub.graphql.resolvers.ownership.UpdateOwnershipTypeResolver; import com.linkedin.datahub.graphql.resolvers.policy.DeletePolicyResolver; import com.linkedin.datahub.graphql.resolvers.policy.GetGrantedPrivilegesResolver; @@ -303,6 +303,7 @@ import com.linkedin.metadata.config.TestsConfiguration; import com.linkedin.metadata.config.ViewsConfiguration; import com.linkedin.metadata.config.VisualConfiguration; +import com.linkedin.metadata.config.telemetry.TelemetryConfiguration; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.graph.GraphClient; import com.linkedin.metadata.graph.SiblingGraphService; @@ -312,12 +313,11 @@ import com.linkedin.metadata.recommendation.RecommendationsService; import com.linkedin.metadata.secret.SecretService; import com.linkedin.metadata.service.DataProductService; +import com.linkedin.metadata.service.LineageService; import com.linkedin.metadata.service.OwnershipTypeService; import com.linkedin.metadata.service.QueryService; import com.linkedin.metadata.service.SettingsService; import com.linkedin.metadata.service.ViewService; -import com.linkedin.metadata.service.LineageService; -import com.linkedin.metadata.telemetry.TelemetryConfiguration; import com.linkedin.metadata.timeline.TimelineService; import com.linkedin.metadata.timeseries.TimeseriesAspectService; import com.linkedin.metadata.version.GitVersion; diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngineArgs.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngineArgs.java index 1c4cd09b329d6..cbcf42c4f93d9 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngineArgs.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/GmsGraphQLEngineArgs.java @@ -16,6 +16,7 @@ import com.linkedin.metadata.config.TestsConfiguration; import com.linkedin.metadata.config.ViewsConfiguration; import com.linkedin.metadata.config.VisualConfiguration; +import com.linkedin.metadata.config.telemetry.TelemetryConfiguration; import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.graph.GraphClient; import com.linkedin.metadata.graph.SiblingGraphService; @@ -28,7 +29,6 @@ import com.linkedin.metadata.service.QueryService; import com.linkedin.metadata.service.SettingsService; import com.linkedin.metadata.service.ViewService; -import com.linkedin.metadata.telemetry.TelemetryConfiguration; import com.linkedin.metadata.timeline.TimelineService; import com.linkedin.metadata.timeseries.TimeseriesAspectService; import com.linkedin.metadata.version.GitVersion; diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/config/AppConfigResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/config/AppConfigResolver.java index 876f3f08c8e25..2c55bc79fe501 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/config/AppConfigResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/config/AppConfigResolver.java @@ -26,8 +26,8 @@ import com.linkedin.metadata.config.IngestionConfiguration; import com.linkedin.metadata.config.TestsConfiguration; import com.linkedin.metadata.config.ViewsConfiguration; -import com.linkedin.metadata.telemetry.TelemetryConfiguration; import com.linkedin.metadata.config.VisualConfiguration; +import com.linkedin.metadata.config.telemetry.TelemetryConfiguration; import com.linkedin.metadata.version.GitVersion; import graphql.schema.DataFetcher; import graphql.schema.DataFetchingEnvironment; diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/CreateTestConnectionRequestResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/CreateTestConnectionRequestResolver.java index 8f4d538ca67ec..1886db62ae450 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/CreateTestConnectionRequestResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/CreateTestConnectionRequestResolver.java @@ -1,5 +1,6 @@ package com.linkedin.datahub.graphql.resolvers.ingest.execution; +import com.linkedin.metadata.config.IngestionConfiguration; import com.linkedin.common.urn.Urn; import com.linkedin.data.template.StringMap; import com.linkedin.datahub.graphql.QueryContext; @@ -9,7 +10,6 @@ import com.linkedin.entity.client.EntityClient; import com.linkedin.execution.ExecutionRequestInput; import com.linkedin.execution.ExecutionRequestSource; -import com.linkedin.metadata.config.IngestionConfiguration; import com.linkedin.metadata.key.ExecutionRequestKey; import com.linkedin.metadata.utils.EntityKeyUtils; import com.linkedin.metadata.utils.IngestionUtils; diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/LabelUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/LabelUtils.java index 1922a02fc1ca0..e2dbf1d3f9c99 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/LabelUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/LabelUtils.java @@ -135,7 +135,8 @@ public static void addTermsToResource( ) throws URISyntaxException { if (subResource == null || subResource.equals("")) { com.linkedin.common.GlossaryTerms terms = - (com.linkedin.common.GlossaryTerms) getAspectFromEntity(resourceUrn.toString(), GLOSSARY_TERM_ASPECT_NAME, entityService, new GlossaryTerms()); + (com.linkedin.common.GlossaryTerms) getAspectFromEntity(resourceUrn.toString(), GLOSSARY_TERM_ASPECT_NAME, + entityService, new GlossaryTerms()); terms.setAuditStamp(getAuditStamp(actor)); if (!terms.hasTerms()) { @@ -320,7 +321,8 @@ private static MetadataChangeProposal buildRemoveTagsToEntityProposal( EntityService entityService ) { com.linkedin.common.GlobalTags tags = - (com.linkedin.common.GlobalTags) getAspectFromEntity(resource.getResourceUrn(), TAGS_ASPECT_NAME, entityService, new GlobalTags()); + (com.linkedin.common.GlobalTags) getAspectFromEntity(resource.getResourceUrn(), TAGS_ASPECT_NAME, + entityService, new GlobalTags()); if (!tags.hasTags()) { tags.setTags(new TagAssociationArray()); @@ -357,7 +359,8 @@ private static MetadataChangeProposal buildAddTagsToEntityProposal( EntityService entityService ) throws URISyntaxException { com.linkedin.common.GlobalTags tags = - (com.linkedin.common.GlobalTags) getAspectFromEntity(resource.getResourceUrn(), TAGS_ASPECT_NAME, entityService, new GlobalTags()); + (com.linkedin.common.GlobalTags) getAspectFromEntity(resource.getResourceUrn(), TAGS_ASPECT_NAME, + entityService, new GlobalTags()); if (!tags.hasTags()) { tags.setTags(new TagAssociationArray()); @@ -449,7 +452,8 @@ private static MetadataChangeProposal buildAddTermsToEntityProposal( EntityService entityService ) throws URISyntaxException { com.linkedin.common.GlossaryTerms terms = - (com.linkedin.common.GlossaryTerms) getAspectFromEntity(resource.getResourceUrn(), GLOSSARY_TERM_ASPECT_NAME, entityService, new GlossaryTerms()); + (com.linkedin.common.GlossaryTerms) getAspectFromEntity(resource.getResourceUrn(), GLOSSARY_TERM_ASPECT_NAME, + entityService, new GlossaryTerms()); terms.setAuditStamp(getAuditStamp(actor)); if (!terms.hasTerms()) { diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/OwnerUtils.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/OwnerUtils.java index a08419b5226b4..d8a92fb3f6607 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/OwnerUtils.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/mutate/util/OwnerUtils.java @@ -72,8 +72,7 @@ public static void removeOwnersFromResources( private static MetadataChangeProposal buildAddOwnersProposal(List owners, Urn resourceUrn, Urn actor, EntityService entityService) { Ownership ownershipAspect = (Ownership) getAspectFromEntity( resourceUrn.toString(), - Constants.OWNERSHIP_ASPECT_NAME, - entityService, + Constants.OWNERSHIP_ASPECT_NAME, entityService, new Ownership()); for (OwnerInput input : owners) { addOwner(ownershipAspect, UrnUtils.getUrn(input.getOwnerUrn()), input.getType(), UrnUtils.getUrn(input.getOwnershipTypeUrn())); diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/GetQuickFiltersResolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/GetQuickFiltersResolver.java index 782760aca744b..5f4f8dd974328 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/GetQuickFiltersResolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/search/GetQuickFiltersResolver.java @@ -33,7 +33,6 @@ import static com.linkedin.datahub.graphql.resolvers.ResolverUtils.bindArgument; import static com.linkedin.datahub.graphql.resolvers.search.SearchUtils.SEARCHABLE_ENTITY_TYPES; import static com.linkedin.datahub.graphql.resolvers.search.SearchUtils.resolveView; -import static com.linkedin.datahub.graphql.types.mappers.MapperUtils.*; @Slf4j diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/CreateIngestionExecutionRequestResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/CreateIngestionExecutionRequestResolverTest.java index 35043fa0879f3..7973e49c6efdf 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/CreateIngestionExecutionRequestResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/CreateIngestionExecutionRequestResolverTest.java @@ -1,6 +1,7 @@ package com.linkedin.datahub.graphql.resolvers.ingest.execution; import com.datahub.authentication.Authentication; +import com.linkedin.metadata.config.IngestionConfiguration; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import com.linkedin.datahub.graphql.QueryContext; @@ -11,7 +12,6 @@ import com.linkedin.entity.EnvelopedAspectMap; import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.Constants; -import com.linkedin.metadata.config.IngestionConfiguration; import com.linkedin.mxe.MetadataChangeProposal; import com.linkedin.r2.RemoteInvocationException; import graphql.schema.DataFetchingEnvironment; diff --git a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/CreateTestConnectionRequestResolverTest.java b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/CreateTestConnectionRequestResolverTest.java index 0eb9366f0493b..75df240441965 100644 --- a/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/CreateTestConnectionRequestResolverTest.java +++ b/datahub-graphql-core/src/test/java/com/linkedin/datahub/graphql/resolvers/ingest/execution/CreateTestConnectionRequestResolverTest.java @@ -1,10 +1,10 @@ package com.linkedin.datahub.graphql.resolvers.ingest.execution; import com.datahub.authentication.Authentication; +import com.linkedin.metadata.config.IngestionConfiguration; import com.linkedin.datahub.graphql.QueryContext; import com.linkedin.datahub.graphql.generated.CreateTestConnectionRequestInput; import com.linkedin.entity.client.EntityClient; -import com.linkedin.metadata.config.IngestionConfiguration; import com.linkedin.mxe.MetadataChangeProposal; import graphql.schema.DataFetchingEnvironment; import org.mockito.Mockito; diff --git a/datahub-upgrade/build.gradle b/datahub-upgrade/build.gradle index 679e54871cbc8..ad2bf02bfdcc7 100644 --- a/datahub-upgrade/build.gradle +++ b/datahub-upgrade/build.gradle @@ -15,6 +15,7 @@ dependencies { compile project(':metadata-io') compile project(':metadata-service:factories') compile project(':metadata-service:restli-client') + compile project(':metadata-service:configuration') implementation externalDependency.charle compile externalDependency.javaxInject diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/nocode/DataMigrationStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/nocode/DataMigrationStep.java index 689b1fb997f38..6553bb80bb1fa 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/nocode/DataMigrationStep.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/nocode/DataMigrationStep.java @@ -9,11 +9,11 @@ import com.linkedin.datahub.upgrade.UpgradeStep; import com.linkedin.datahub.upgrade.UpgradeStepResult; import com.linkedin.metadata.Constants; +import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.utils.PegasusUtils; import com.datahub.util.RecordUtils; -import com.linkedin.metadata.entity.EntityService; import com.linkedin.metadata.entity.ebean.EbeanAspectV1; import com.linkedin.metadata.entity.ebean.EbeanAspectV2; import com.linkedin.metadata.models.EntitySpec; diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/nocode/NoCodeUpgrade.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/nocode/NoCodeUpgrade.java index d1715ce7d66a6..c12ff201faf22 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/nocode/NoCodeUpgrade.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/nocode/NoCodeUpgrade.java @@ -33,8 +33,7 @@ public NoCodeUpgrade( final Authentication systemAuthentication, final RestliEntityClient entityClient) { _steps = buildUpgradeSteps( - server, - entityService, + server, entityService, entityRegistry, systemAuthentication, entityClient); diff --git a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/CleanIndicesStep.java b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/CleanIndicesStep.java index 99c5e7444d16b..f60aa283c0140 100644 --- a/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/CleanIndicesStep.java +++ b/datahub-upgrade/src/main/java/com/linkedin/datahub/upgrade/system/elasticsearch/steps/CleanIndicesStep.java @@ -1,11 +1,11 @@ package com.linkedin.datahub.upgrade.system.elasticsearch.steps; +import com.linkedin.metadata.config.search.ElasticSearchConfiguration; import com.linkedin.datahub.upgrade.UpgradeContext; import com.linkedin.datahub.upgrade.UpgradeStep; import com.linkedin.datahub.upgrade.UpgradeStepResult; import com.linkedin.datahub.upgrade.impl.DefaultUpgradeStepResult; import com.linkedin.datahub.upgrade.system.elasticsearch.util.IndexUtils; -import com.linkedin.metadata.config.search.ElasticSearchConfiguration; import com.linkedin.metadata.search.elasticsearch.indexbuilder.ESIndexBuilder; import com.linkedin.metadata.shared.ElasticSearchIndexed; import lombok.extern.slf4j.Slf4j; diff --git a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/UpgradeCliApplicationTestConfiguration.java b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/UpgradeCliApplicationTestConfiguration.java index 8ae3a832d0aaf..fefc853be8c0b 100644 --- a/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/UpgradeCliApplicationTestConfiguration.java +++ b/datahub-upgrade/src/test/java/com/linkedin/datahub/upgrade/UpgradeCliApplicationTestConfiguration.java @@ -22,7 +22,7 @@ public class UpgradeCliApplicationTestConfiguration { private EbeanServer ebeanServer; @MockBean - private EntityService entityService; + private EntityService _entityService; @MockBean private SearchService searchService; diff --git a/docs-website/sidebars.js b/docs-website/sidebars.js index 85fa61b88ab7e..b5ffd1964d7c1 100644 --- a/docs-website/sidebars.js +++ b/docs-website/sidebars.js @@ -640,6 +640,7 @@ module.exports = { // "metadata-jobs/README", // "docs/how/add-user-data", // "docs/_feature-guide-template" + // - "metadata-service/services/README" // ], ], }; diff --git a/docs/deploy/aws.md b/docs/deploy/aws.md index a8da85c16362d..7b01ffa02a744 100644 --- a/docs/deploy/aws.md +++ b/docs/deploy/aws.md @@ -322,7 +322,7 @@ and [here](../../metadata-service/factories/src/main/java/com/linkedin/gms/facto . A mapping between the property name used in the above two files and the name used in docker/env file can be -found [here](../../metadata-service/factories/src/main/resources/application.yml). +found [here](../../metadata-service/configuration/src/main/resources/application.yml). ### Managed Streaming for Apache Kafka (MSK) diff --git a/docs/dev-guides/timeline.md b/docs/dev-guides/timeline.md index 7f90d17f322ca..966e659b90991 100644 --- a/docs/dev-guides/timeline.md +++ b/docs/dev-guides/timeline.md @@ -18,8 +18,8 @@ For the visually inclined, here is a conceptual diagram that illustrates how to ## Change Event Each modification is modeled as a -[ChangeEvent](../../metadata-io/src/main/java/com/linkedin/metadata/timeline/data/ChangeEvent.java) -which are grouped under [ChangeTransactions](../../metadata-io/src/main/java/com/linkedin/metadata/timeline/data/ChangeTransaction.java) +[ChangeEvent](../../metadata-service/services/src/main/java/com/linkedin/metadata/timeline/data/ChangeEvent.java) +which are grouped under [ChangeTransactions](../../metadata-service/services/src/main/java/com/linkedin/metadata/timeline/data/ChangeTransaction.java) based on timestamp. A `ChangeEvent` consists of: - `changeType`: An operational type for the change, either `ADD`, `MODIFY`, or `REMOVE` diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md index 2123b07d166d4..b8ecd689ce381 100644 --- a/docs/how/updating-datahub.md +++ b/docs/how/updating-datahub.md @@ -107,7 +107,7 @@ Helm with `--atomic`: In general, it is recommended to not use the `--atomic` se - #6243 apache-ranger authorizer is no longer the core part of DataHub GMS, and it is shifted as plugin. Please refer updated documentation [Configuring Authorization with Apache Ranger](./configuring-authorization-with-apache-ranger.md#configuring-your-datahub-deployment) for configuring `apache-ranger-plugin` in DataHub GMS. - #6243 apache-ranger authorizer as plugin is not supported in DataHub Kubernetes deployment. -- #6243 Authentication and Authorization plugins configuration are removed from [application.yml](../../metadata-service/factories/src/main/resources/application.yml). Refer documentation [Migration Of Plugins From application.yml](../plugins.md#migration-of-plugins-from-applicationyml) for migrating any existing custom plugins. +- #6243 Authentication and Authorization plugins configuration are removed from [application.yml](../../metadata-service/configuration/src/main/resources/application.yml). Refer documentation [Migration Of Plugins From application.yml](../plugins.md#migration-of-plugins-from-applicationyml) for migrating any existing custom plugins. - `datahub check graph-consistency` command has been removed. It was a beta API that we had considered but decided there are better solutions for this. So removing this. - `graphql_url` option of `powerbi-report-server` source deprecated as the options is not used. - #6789 BigQuery ingestion: If `enable_legacy_sharded_table_support` is set to False, sharded table names will be suffixed with \_yyyymmdd to make sure they don't clash with non-sharded tables. This means if stateful ingestion is enabled then old sharded tables will be recreated with a new id and attached tags/glossary terms/etc will need to be added again. _This behavior is not enabled by default yet, but will be enabled by default in a future release._ diff --git a/docs/plugins.md b/docs/plugins.md index 772c877cff646..1d49d99ffc328 100644 --- a/docs/plugins.md +++ b/docs/plugins.md @@ -253,10 +253,10 @@ All other access are forbidden for the plugin. > Disclaimer: In BETA version your plugin can access any port and can read/write to any location on file system, however you should implement the plugin as per above access permission to keep your plugin compatible with upcoming release of DataHub. ## Migration Of Plugins From application.yml -If you have any custom Authentication or Authorization plugin define in `authorization` or `authentication` section of [application.yml](../metadata-service/factories/src/main/resources/application.yml) then migrate them as per below steps. +If you have any custom Authentication or Authorization plugin define in `authorization` or `authentication` section of [application.yml](../metadata-service/configuration/src/main/resources/application.yml) then migrate them as per below steps. 1. Implement Plugin: For Authentication Plugin follow steps of [Implementing an Authentication Plugin](#implementing-an-authentication-plugin) and for Authorization Plugin follow steps of [Implementing an Authorization Plugin](#implementing-an-authorization-plugin) -2. Install Plugin: Install the plugins as per steps mentioned in [Plugin Installation](#plugin-installation). Here you need to map the configuration from [application.yml](../metadata-service/factories/src/main/resources/application.yml) to configuration in `config.yml`. This mapping from `application.yml` to `config.yml` is described below +2. Install Plugin: Install the plugins as per steps mentioned in [Plugin Installation](#plugin-installation). Here you need to map the configuration from [application.yml](../metadata-service/configuration/src/main/resources/application.yml) to configuration in `config.yml`. This mapping from `application.yml` to `config.yml` is described below **Mapping for Authenticators** diff --git a/ingestion-scheduler/build.gradle b/ingestion-scheduler/build.gradle index 3dec8ee400150..b15b5b8c52673 100644 --- a/ingestion-scheduler/build.gradle +++ b/ingestion-scheduler/build.gradle @@ -4,6 +4,7 @@ dependencies { compile project(path: ':metadata-models') compile project(path: ':metadata-io') compile project(path: ':metadata-service:restli-client') + compile project(':metadata-service:configuration') implementation externalDependency.slf4jApi compileOnly externalDependency.lombok annotationProcessor externalDependency.lombok diff --git a/ingestion-scheduler/src/main/java/com/datahub/metadata/ingestion/IngestionScheduler.java b/ingestion-scheduler/src/main/java/com/datahub/metadata/ingestion/IngestionScheduler.java index 5d50c0a326054..e71fe6266b955 100644 --- a/ingestion-scheduler/src/main/java/com/datahub/metadata/ingestion/IngestionScheduler.java +++ b/ingestion-scheduler/src/main/java/com/datahub/metadata/ingestion/IngestionScheduler.java @@ -1,6 +1,7 @@ package com.datahub.metadata.ingestion; import com.datahub.authentication.Authentication; +import com.linkedin.metadata.config.IngestionConfiguration; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableSet; import com.linkedin.common.urn.Urn; @@ -17,7 +18,6 @@ import com.linkedin.ingestion.DataHubIngestionSourceInfo; import com.linkedin.ingestion.DataHubIngestionSourceSchedule; import com.linkedin.metadata.Constants; -import com.linkedin.metadata.config.IngestionConfiguration; import com.linkedin.metadata.key.ExecutionRequestKey; import com.linkedin.metadata.query.ListResult; import com.linkedin.metadata.utils.GenericRecordUtils; diff --git a/ingestion-scheduler/src/test/java/com/datahub/metadata/ingestion/IngestionSchedulerTest.java b/ingestion-scheduler/src/test/java/com/datahub/metadata/ingestion/IngestionSchedulerTest.java index 3397d07d69ceb..51b7fe85f4922 100644 --- a/ingestion-scheduler/src/test/java/com/datahub/metadata/ingestion/IngestionSchedulerTest.java +++ b/ingestion-scheduler/src/test/java/com/datahub/metadata/ingestion/IngestionSchedulerTest.java @@ -1,6 +1,7 @@ package com.datahub.metadata.ingestion; import com.datahub.authentication.Authentication; +import com.linkedin.metadata.config.IngestionConfiguration; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import com.linkedin.common.UrnArray; @@ -10,7 +11,6 @@ import com.linkedin.entity.EnvelopedAspect; import com.linkedin.entity.EnvelopedAspectMap; import com.linkedin.metadata.client.JavaEntityClient; -import com.linkedin.metadata.config.IngestionConfiguration; import com.linkedin.ingestion.DataHubIngestionSourceConfig; import com.linkedin.ingestion.DataHubIngestionSourceInfo; import com.linkedin.ingestion.DataHubIngestionSourceSchedule; diff --git a/li-utils/src/main/java/com/linkedin/metadata/Constants.java b/li-utils/src/main/java/com/linkedin/metadata/Constants.java index d37ced007133c..972f52b8824ce 100644 --- a/li-utils/src/main/java/com/linkedin/metadata/Constants.java +++ b/li-utils/src/main/java/com/linkedin/metadata/Constants.java @@ -354,6 +354,9 @@ public class Constants { public static final String ELASTICSEARCH_IMPLEMENTATION_OPENSEARCH = "opensearch"; public static final String ELASTICSEARCH_IMPLEMENTATION_ELASTICSEARCH = "elasticsearch"; + // DAO + public static final long LATEST_VERSION = 0; + private Constants() { } } diff --git a/metadata-io/build.gradle b/metadata-io/build.gradle index 88c11c664fe9e..98b741b6f51a6 100644 --- a/metadata-io/build.gradle +++ b/metadata-io/build.gradle @@ -12,15 +12,17 @@ dependencies { compile project(':metadata-events:mxe-avro-1.7') compile project(':metadata-events:mxe-registration') compile project(':metadata-events:mxe-utils-avro-1.7') - compile project(path: ':metadata-models') + compile project(':metadata-models') compile project(':metadata-service:restli-client') + compile project(':metadata-service:configuration') + compile project(':metadata-service:services') compile spec.product.pegasus.data compile spec.product.pegasus.generator compile externalDependency.dgraph4j exclude group: 'com.google.guava', module: 'guava' implementation externalDependency.slf4jApi - testImplementation project(path: ':metadata-integration:java:datahub-client') + testImplementation project(':metadata-integration:java:datahub-client') runtime externalDependency.logbackClassic compileOnly externalDependency.lombok implementation externalDependency.commonsCollections diff --git a/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java b/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java index d541127d74093..911ab993e5789 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/client/JavaEntityClient.java @@ -368,8 +368,7 @@ public SearchResult searchAcrossEntities( @Nullable List facets) throws RemoteInvocationException { final SearchFlags finalFlags = searchFlags != null ? searchFlags : new SearchFlags().setFulltext(true); return ValidationUtils.validateSearchResult( - _searchService.searchAcrossEntities(entities, input, filter, null, start, count, finalFlags, facets), - _entityService); + _searchService.searchAcrossEntities(entities, input, filter, null, start, count, finalFlags, facets), _entityService); } @Nonnull @@ -406,8 +405,7 @@ public LineageSearchResult searchAcrossLineage(@Nonnull Urn sourceUrn, @Nonnull throws RemoteInvocationException { return ValidationUtils.validateLineageSearchResult( _lineageSearchService.searchAcrossLineage(sourceUrn, direction, entities, input, maxHops, filter, - sortCriterion, start, count, startTimeMillis, endTimeMillis, searchFlags), - _entityService); + sortCriterion, start, count, startTimeMillis, endTimeMillis, searchFlags), _entityService); } @Nonnull diff --git a/metadata-io/src/main/java/com/linkedin/metadata/config/search/custom/QueryConfiguration.java b/metadata-io/src/main/java/com/linkedin/metadata/config/search/custom/QueryConfiguration.java deleted file mode 100644 index cc252aa3954d3..0000000000000 --- a/metadata-io/src/main/java/com/linkedin/metadata/config/search/custom/QueryConfiguration.java +++ /dev/null @@ -1,108 +0,0 @@ -package com.linkedin.metadata.config.search.custom; - -import com.fasterxml.jackson.annotation.JsonInclude; -import com.fasterxml.jackson.core.StreamReadConstraints; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.databind.annotation.JsonDeserialize; -import com.fasterxml.jackson.databind.annotation.JsonPOJOBuilder; -import lombok.Builder; -import lombok.EqualsAndHashCode; -import lombok.Getter; -import lombok.ToString; -import lombok.extern.slf4j.Slf4j; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.common.xcontent.LoggingDeprecationHandler; -import org.elasticsearch.common.xcontent.NamedXContentRegistry; -import org.elasticsearch.common.xcontent.XContentParser; -import org.elasticsearch.common.xcontent.XContentType; -import org.elasticsearch.index.query.BoolQueryBuilder; -import org.elasticsearch.index.query.QueryBuilder; -import org.elasticsearch.index.query.functionscore.FunctionScoreQueryBuilder; -import org.elasticsearch.search.SearchModule; - -import java.io.IOException; -import java.util.Collections; -import java.util.HashMap; -import java.util.Map; -import java.util.Optional; - -import static com.linkedin.metadata.Constants.*; - - -@Slf4j -@Builder(toBuilder = true) -@Getter -@ToString -@EqualsAndHashCode -@JsonDeserialize(builder = QueryConfiguration.QueryConfigurationBuilder.class) -public class QueryConfiguration { - private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); - static { - OBJECT_MAPPER.setSerializationInclusion(JsonInclude.Include.NON_NULL); - int maxSize = Integer.parseInt(System.getenv().getOrDefault(INGESTION_MAX_SERIALIZED_STRING_LENGTH, MAX_JACKSON_STRING_SIZE)); - OBJECT_MAPPER.getFactory().setStreamReadConstraints(StreamReadConstraints.builder().maxStringLength(maxSize).build()); - } - private static final NamedXContentRegistry X_CONTENT_REGISTRY; - static { - SearchModule searchModule = new SearchModule(Settings.EMPTY, false, Collections.emptyList()); - X_CONTENT_REGISTRY = new NamedXContentRegistry(searchModule.getNamedXContents()); - } - - private String queryRegex; - @Builder.Default - private boolean simpleQuery = true; - @Builder.Default - private boolean exactMatchQuery = true; - @Builder.Default - private boolean prefixMatchQuery = true; - private BoolQueryConfiguration boolQuery; - private Map functionScore; - - public FunctionScoreQueryBuilder functionScoreQueryBuilder(QueryBuilder queryBuilder) { - return toFunctionScoreQueryBuilder(queryBuilder, functionScore); - } - - public Optional boolQueryBuilder(String query) { - if (boolQuery != null) { - log.debug("Using custom query configuration queryRegex: {}", queryRegex); - } - return Optional.ofNullable(boolQuery).map(bq -> toBoolQueryBuilder(query, bq)); - } - - @JsonPOJOBuilder(withPrefix = "") - public static class QueryConfigurationBuilder { - } - - private static BoolQueryBuilder toBoolQueryBuilder(String query, BoolQueryConfiguration boolQuery) { - try { - String jsonFragment = OBJECT_MAPPER.writeValueAsString(boolQuery) - .replace("\"{{query_string}}\"", OBJECT_MAPPER.writeValueAsString(query)); - XContentParser parser = XContentType.JSON.xContent().createParser(X_CONTENT_REGISTRY, - LoggingDeprecationHandler.INSTANCE, jsonFragment); - return BoolQueryBuilder.fromXContent(parser); - } catch (IOException e) { - throw new RuntimeException(e); - } - } - - private static FunctionScoreQueryBuilder toFunctionScoreQueryBuilder(QueryBuilder queryBuilder, - Map params) { - try { - HashMap body = new HashMap<>(params); - if (!body.isEmpty()) { - log.debug("Using custom scoring functions: {}", body); - } - - body.put("query", OBJECT_MAPPER.readValue(queryBuilder.toString(), Map.class)); - - String jsonFragment = OBJECT_MAPPER.writeValueAsString(Map.of( - "function_score", body - )); - XContentParser parser = XContentType.JSON.xContent().createParser(X_CONTENT_REGISTRY, - LoggingDeprecationHandler.INSTANCE, jsonFragment); - return (FunctionScoreQueryBuilder) FunctionScoreQueryBuilder.parseInnerQueryBuilder(parser); - } catch (IOException e) { - throw new RuntimeException(e); - } - } -} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/AspectDao.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/AspectDao.java index ebb5139d35cae..bf74b1025267f 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/AspectDao.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/AspectDao.java @@ -21,9 +21,9 @@ * Requirements for any implementation: * 1. Being able to map its internal storage representation to {@link EntityAspect}; * 2. Honor the internal versioning semantics. The latest version of any aspect is set to 0 for efficient retrieval. - * In most cases only the latest state of an aspect will be fetched. See {@link EntityService} for more details. + * In most cases only the latest state of an aspect will be fetched. See {@link EntityServiceImpl} for more details. * - * TODO: This interface exposes {@link #runInTransactionWithRetry(Supplier, int)} because {@link EntityService} concerns + * TODO: This interface exposes {@link #runInTransactionWithRetry(Supplier, int)} because {@link EntityServiceImpl} concerns * itself with batching multiple commands into a single transaction. It exposes storage concerns somewhat and it'd be * worth looking into ways to move this responsibility inside {@link AspectDao} implementations. */ diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityAspect.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityAspect.java index ed95e8564d5e9..8296edd615aad 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityAspect.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityAspect.java @@ -10,7 +10,7 @@ import java.sql.Timestamp; /** - * This is an internal representation of an entity aspect record {@link EntityService} and {@link AspectDao} + * This is an internal representation of an entity aspect record {@link EntityServiceImpl} and {@link AspectDao} * implementations are using. While {@link AspectDao} implementations have their own aspect record implementations, * they cary implementation details that should not leak outside. Therefore, this is the type to use in public * {@link AspectDao} methods. diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityService.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java similarity index 98% rename from metadata-io/src/main/java/com/linkedin/metadata/entity/EntityService.java rename to metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java index e5d549b95754d..32c77b66679f9 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityServiceImpl.java @@ -1,6 +1,7 @@ package com.linkedin.metadata.entity; import com.codahale.metrics.Timer; +import com.linkedin.metadata.config.PreProcessHooks; import com.datahub.util.RecordUtils; import com.datahub.util.exception.ModelConversionException; import com.fasterxml.jackson.core.JsonProcessingException; @@ -43,7 +44,6 @@ import com.linkedin.metadata.Constants; import com.linkedin.metadata.aspect.Aspect; import com.linkedin.metadata.aspect.VersionedAspect; -import com.linkedin.metadata.config.PreProcessHooks; import com.linkedin.metadata.entity.ebean.EbeanAspectV2; import com.linkedin.metadata.entity.restoreindices.RestoreIndicesArgs; import com.linkedin.metadata.entity.restoreindices.RestoreIndicesResult; @@ -97,7 +97,6 @@ import javax.annotation.Nonnull; import javax.annotation.Nullable; import javax.persistence.EntityNotFoundException; -import lombok.Value; import lombok.extern.slf4j.Slf4j; import static com.linkedin.metadata.Constants.*; @@ -134,7 +133,7 @@ * TODO: Consider whether we can abstract away virtual versioning semantics to subclasses of this class. */ @Slf4j -public class EntityService { +public class EntityServiceImpl implements EntityService { /** * As described above, the latest version of an aspect should always take the value 0, with @@ -147,25 +146,6 @@ public class EntityService { OBJECT_MAPPER.getFactory().setStreamReadConstraints(StreamReadConstraints.builder().maxStringLength(maxSize).build()); } - @Value - public static class UpdateAspectResult { - Urn urn; - RecordTemplate oldValue; - RecordTemplate newValue; - SystemMetadata oldSystemMetadata; - SystemMetadata newSystemMetadata; - MetadataAuditOperation operation; - AuditStamp auditStamp; - long maxVersion; - } - - @Value - public static class IngestProposalResult { - Urn urn; - boolean didUpdate; - boolean queued; - } - private static final int DEFAULT_MAX_TRANSACTION_RETRY = 3; protected final AspectDao _aspectDao; @@ -176,9 +156,6 @@ public static class IngestProposalResult { private final Boolean _alwaysEmitChangeLog; private final UpdateIndicesService _updateIndicesService; private final PreProcessHooks _preProcessHooks; - public static final String DEFAULT_RUN_ID = "no-run-id-provided"; - public static final String BROWSE_PATHS = "browsePaths"; - public static final String DATA_PLATFORM_INSTANCE = "dataPlatformInstance"; protected static final int MAX_KEYS_PER_QUERY = 500; private static final int URN_NUM_BYTES_LIMIT = 512; @@ -186,7 +163,7 @@ public static class IngestProposalResult { // TODO(iprentic): Move this to a common utils location once used in other places private static final String DELIMITER_SEPARATOR = "␟"; - public EntityService( + public EntityServiceImpl( @Nonnull final AspectDao aspectDao, @Nonnull final EventProducer producer, @Nonnull final EntityRegistry entityRegistry, @@ -249,6 +226,7 @@ public Map> getLatestAspects( } @Nonnull + @Override public Map getLatestAspectsForUrn(@Nonnull final Urn urn, @Nonnull final Set aspectNames) { Map batchGetResults = getLatestAspect(new HashSet<>(Arrays.asList(urn)), aspectNames); @@ -273,6 +251,7 @@ public Map getLatestAspectsForUrn(@Nonnull final Urn urn * @return the {@link RecordTemplate} representation of the requested aspect object, or null if one cannot be found */ @Nullable + @Override public RecordTemplate getAspect(@Nonnull final Urn urn, @Nonnull final String aspectName, @Nonnull long version) { log.debug("Invoked getAspect with urn: {}, aspectName: {}, version: {}", urn, aspectName, version); @@ -294,6 +273,7 @@ public RecordTemplate getAspect(@Nonnull final Urn urn, @Nonnull final String as * @return a map of {@link Urn} to {@link Entity} object */ @Nullable + @Override public EntityResponse getEntityV2( @Nonnull final String entityName, @Nonnull final Urn urn, @@ -310,6 +290,7 @@ public EntityResponse getEntityV2( * @param aspectNames set of aspects to fetch * @return a map of {@link Urn} to {@link Entity} object */ + @Override public Map getEntitiesV2( @Nonnull final String entityName, @Nonnull final Set urns, @@ -328,6 +309,7 @@ public Map getEntitiesV2( * @param aspectNames set of aspects to fetch * @return a map of {@link Urn} to {@link Entity} object */ + @Override public Map getEntitiesVersionedV2( @Nonnull final Set versionedUrns, @Nonnull final Set aspectNames) throws URISyntaxException { @@ -345,6 +327,7 @@ public Map getEntitiesVersionedV2( * @param aspectNames set of aspects to fetch * @return a map of {@link Urn} to {@link EnvelopedAspect} object */ + @Override public Map> getLatestEnvelopedAspects( // TODO: entityName is unused, can we remove this as a param? @Nonnull String entityName, @@ -368,6 +351,7 @@ public Map> getLatestEnvelopedAspects( * @param aspectNames set of aspects to fetch * @return a map of {@link Urn} to {@link EnvelopedAspect} object */ + @Override public Map> getVersionedEnvelopedAspects( @Nonnull Set versionedUrns, @Nonnull Set aspectNames) throws URISyntaxException { @@ -434,6 +418,7 @@ private Map> getCorrespondingAspects(Set listLatestAspects( @Nonnull final String entityName, @Nonnull final String aspectName, @@ -719,6 +707,7 @@ void validateUrn(@Nonnull final Urn urn) { } } + @Override public void ingestAspects(@Nonnull final Urn urn, @Nonnull List> aspectRecordsToIngest, @Nonnull final AuditStamp auditStamp, @Nullable SystemMetadata systemMetadata) { @@ -746,6 +735,7 @@ public void ingestAspects(@Nonnull final Urn urn, @Nonnull List logger) { RestoreIndicesResult result = new RestoreIndicesResult(); int ignored = 0; @@ -1217,6 +1212,7 @@ public RestoreIndicesResult restoreIndices(@Nonnull RestoreIndicesArgs args, @No * successful update * @return the {@link RecordTemplate} representation of the requested aspect object */ + @Override public RecordTemplate updateAspect( @Nonnull final Urn urn, @Nonnull final String entityName, @@ -1240,6 +1236,7 @@ public RecordTemplate updateAspect( * @param start the start offset * @param count the count */ + @Override public ListUrnsResult listUrns(@Nonnull final String entityName, final int start, final int count) { log.debug("Invoked listUrns with entityName: {}, start: {}, count: {}", entityName, start, count); @@ -1269,6 +1266,7 @@ public ListUrnsResult listUrns(@Nonnull final String entityName, final int start /** * Default implementations. Subclasses should feel free to override if it's more efficient to do so. */ + @Override public Entity getEntity(@Nonnull final Urn urn, @Nonnull final Set aspectNames) { return getEntities(Collections.singleton(urn), aspectNames).values().stream().findFirst().orElse(null); } @@ -1283,6 +1281,7 @@ public Entity getEntity(@Nonnull final Urn urn, @Nonnull final Set aspec * @return a map of {@link Urn} to {@link Entity} object */ @Deprecated + @Override public Map getEntities(@Nonnull final Set urns, @Nonnull Set aspectNames) { log.debug("Invoked getEntities with urns {}, aspects {}", urns, aspectNames); if (urns.isEmpty()) { @@ -1293,6 +1292,7 @@ public Map getEntities(@Nonnull final Set urns, @Nonnull Set toEntity(entry.getValue()))); } + @Override public void produceMetadataAuditEvent(@Nonnull final Urn urn, @Nonnull final String aspectName, @Nullable final RecordTemplate oldAspectValue, @Nullable final RecordTemplate newAspectValue, @Nullable final SystemMetadata oldSystemMetadata, @Nullable final SystemMetadata newSystemMetadata, @@ -1316,6 +1316,7 @@ protected Snapshot buildKeySnapshot(@Nonnull final Urn urn) { return toSnapshotUnion(toSnapshotRecord(urn, ImmutableList.of(toAspectUnion(urn, keyAspectValue)))); } + @Override public void produceMetadataAuditEventForKey(@Nonnull final Urn urn, @Nullable final SystemMetadata newSystemMetadata) { @@ -1332,11 +1333,13 @@ public void produceMetadataAuditEventForKey(@Nonnull final Urn urn, * @param aspectSpec AspectSpec of the aspect being updated * @param metadataChangeLog metadata change log to push into MCL kafka topic */ + @Override public void produceMetadataChangeLog(@Nonnull final Urn urn, AspectSpec aspectSpec, @Nonnull final MetadataChangeLog metadataChangeLog) { _producer.produceMetadataChangeLog(urn, aspectSpec, metadataChangeLog); } + @Override public void produceMetadataChangeLog(@Nonnull final Urn urn, @Nonnull String entityName, @Nonnull String aspectName, @Nonnull final AspectSpec aspectSpec, @Nullable final RecordTemplate oldAspectValue, @Nullable final RecordTemplate newAspectValue, @Nullable final SystemMetadata oldSystemMetadata, @@ -1346,11 +1349,13 @@ public void produceMetadataChangeLog(@Nonnull final Urn urn, @Nonnull String ent produceMetadataChangeLog(urn, aspectSpec, metadataChangeLog); } + @Override public RecordTemplate getLatestAspect(@Nonnull final Urn urn, @Nonnull final String aspectName) { log.debug("Invoked getLatestAspect with urn {}, aspect {}", urn, aspectName); return getAspect(urn, aspectName, ASPECT_LATEST_VERSION); } + @Override public void ingestEntities(@Nonnull final List entities, @Nonnull final AuditStamp auditStamp, @Nonnull final List systemMetadata) { log.debug("Invoked ingestEntities with entities {}, audit stamp {}", entities, auditStamp); @@ -1358,6 +1363,7 @@ public void ingestEntities(@Nonnull final List entities, @Nonnull final .forEach(pair -> ingestEntity(pair.getFirst(), auditStamp, pair.getSecond())); } + @Override public void ingestEntity(Entity entity, AuditStamp auditStamp) { SystemMetadata generatedSystemMetadata = new SystemMetadata(); generatedSystemMetadata.setRunId(DEFAULT_RUN_ID); @@ -1366,6 +1372,7 @@ public void ingestEntity(Entity entity, AuditStamp auditStamp) { ingestEntity(entity, auditStamp, generatedSystemMetadata); } + @Override public void ingestEntity(@Nonnull Entity entity, @Nonnull AuditStamp auditStamp, @Nonnull SystemMetadata systemMetadata) { log.debug("Invoked ingestEntity with entity {}, audit stamp {} systemMetadata {}", entity, auditStamp, systemMetadata.toString()); @@ -1408,15 +1415,16 @@ private boolean isAspectMissing(String entityType, String aspectName, Set> generateDefaultAspectsIfMissing(@Nonnull final Urn urn, Set includedAspects) { Set aspectsToGet = new HashSet<>(); String entityType = urnToEntityName(urn); - boolean shouldCheckBrowsePath = isAspectMissing(entityType, BROWSE_PATHS, includedAspects); + boolean shouldCheckBrowsePath = isAspectMissing(entityType, BROWSE_PATHS_ASPECT_NAME, includedAspects); if (shouldCheckBrowsePath) { - aspectsToGet.add(BROWSE_PATHS); + aspectsToGet.add(BROWSE_PATHS_ASPECT_NAME); } boolean shouldCheckBrowsePathV2 = isAspectMissing(entityType, BROWSE_PATHS_V2_ASPECT_NAME, includedAspects); @@ -1424,9 +1432,9 @@ public List> generateDefaultAspectsIfMissing(@Nonnu aspectsToGet.add(BROWSE_PATHS_V2_ASPECT_NAME); } - boolean shouldCheckDataPlatform = isAspectMissing(entityType, DATA_PLATFORM_INSTANCE, includedAspects); + boolean shouldCheckDataPlatform = isAspectMissing(entityType, DATA_PLATFORM_INSTANCE_ASPECT_NAME, includedAspects); if (shouldCheckDataPlatform) { - aspectsToGet.add(DATA_PLATFORM_INSTANCE); + aspectsToGet.add(DATA_PLATFORM_INSTANCE_ASPECT_NAME); } List> aspects = new ArrayList<>(); @@ -1441,10 +1449,10 @@ public List> generateDefaultAspectsIfMissing(@Nonnu aspects.add(Pair.of(keyAspectName, keyAspect)); } - if (shouldCheckBrowsePath && latestAspects.get(BROWSE_PATHS) == null) { + if (shouldCheckBrowsePath && latestAspects.get(BROWSE_PATHS_ASPECT_NAME) == null) { try { BrowsePaths generatedBrowsePath = buildDefaultBrowsePath(urn); - aspects.add(Pair.of(BROWSE_PATHS, generatedBrowsePath)); + aspects.add(Pair.of(BROWSE_PATHS_ASPECT_NAME, generatedBrowsePath)); } catch (URISyntaxException e) { log.error("Failed to parse urn: {}", urn); } @@ -1459,9 +1467,9 @@ public List> generateDefaultAspectsIfMissing(@Nonnu } } - if (shouldCheckDataPlatform && latestAspects.get(DATA_PLATFORM_INSTANCE) == null) { + if (shouldCheckDataPlatform && latestAspects.get(DATA_PLATFORM_INSTANCE_ASPECT_NAME) == null) { DataPlatformInstanceUtils.buildDataPlatformInstance(entityType, keyAspect) - .ifPresent(aspect -> aspects.add(Pair.of(DATA_PLATFORM_INSTANCE, aspect))); + .ifPresent(aspect -> aspects.add(Pair.of(DATA_PLATFORM_INSTANCE_ASPECT_NAME, aspect))); } return aspects; @@ -1481,6 +1489,7 @@ private void ingestSnapshotUnion(@Nonnull final Snapshot snapshotUnion, @Nonnull ingestAspects(urn, aspectRecordsToIngest, auditStamp, systemMetadata); } + @Override public Snapshot buildSnapshot(@Nonnull final Urn urn, @Nonnull final RecordTemplate aspectValue) { // if the aspect value is the key, we do not need to include the key a second time if (PegasusUtils.getAspectNameFromSchema(aspectValue.schema()).equals(getKeyAspectName(urn))) { @@ -1499,20 +1508,24 @@ protected RecordTemplate buildKeyAspect(@Nonnull final Urn urn) { return EntityKeyUtils.convertUrnToEntityKey(urn, keySpec); } + @Override public AspectSpec getKeyAspectSpec(@Nonnull final Urn urn) { return getKeyAspectSpec(urnToEntityName(urn)); } + @Override public AspectSpec getKeyAspectSpec(@Nonnull final String entityName) { final EntitySpec spec = _entityRegistry.getEntitySpec(entityName); return spec.getKeyAspectSpec(); } + @Override public Optional getAspectSpec(@Nonnull final String entityName, @Nonnull final String aspectName) { final EntitySpec entitySpec = _entityRegistry.getEntitySpec(entityName); return Optional.ofNullable(entitySpec.getAspectSpec(aspectName)); } + @Override public String getKeyAspectName(@Nonnull final Urn urn) { final EntitySpec spec = _entityRegistry.getEntitySpec(urnToEntityName(urn)); final AspectSpec keySpec = spec.getKeyAspectSpec(); @@ -1577,10 +1590,12 @@ private Map> buildEntityToValidAspects(final EntityRegistry entry -> entry.getAspectSpecs().stream().map(AspectSpec::getName).collect(Collectors.toSet()))); } + @Override public EntityRegistry getEntityRegistry() { return _entityRegistry; } + @Override public void setRetentionService(RetentionService retentionService) { _retentionService = retentionService; } @@ -1589,19 +1604,23 @@ protected Set getEntityAspectNames(final Urn entityUrn) { return getEntityAspectNames(urnToEntityName(entityUrn)); } + @Override public Set getEntityAspectNames(final String entityName) { return _entityToValidAspects.get(entityName); } + @Override public void setWritable(boolean canWrite) { log.debug("Setting writable to {}", canWrite); _aspectDao.setWritable(canWrite); } + @Override public RollbackRunResult rollbackRun(List aspectRows, String runId, boolean hardDelete) { return rollbackWithConditions(aspectRows, Collections.singletonMap("runId", runId), hardDelete); } + @Override public RollbackRunResult rollbackWithConditions(List aspectRows, Map conditions, boolean hardDelete) { List removedAspects = new ArrayList<>(); AtomicInteger rowsDeletedFromEntityDeletion = new AtomicInteger(0); @@ -1630,6 +1649,7 @@ public RollbackRunResult rollbackWithConditions(List aspectRow return new RollbackRunResult(removedAspects, rowsDeletedFromEntityDeletion.get()); } + @Override public RollbackRunResult deleteUrn(Urn urn) { List removedAspects = new ArrayList<>(); Integer rowsDeletedFromEntityDeletion = 0; @@ -1677,6 +1697,7 @@ public RollbackRunResult deleteUrn(Urn urn) { * @param urn the urn of the entity to check * @return true if the entity exists, false otherwise */ + @Override public Boolean exists(Urn urn) { final Set aspectsToFetch = getEntityAspectNames(urn); final List dbKeys = aspectsToFetch.stream() @@ -1693,6 +1714,7 @@ public Boolean exists(Urn urn) { * @param urn the urn to check * @return true is the entity is soft deleted, false otherwise. */ + @Override public Boolean isSoftDeleted(@Nonnull final Urn urn) { Objects.requireNonNull(urn, "urn is required"); final RecordTemplate statusAspect = getLatestAspect(urn, STATUS_ASPECT_NAME); @@ -1700,6 +1722,7 @@ public Boolean isSoftDeleted(@Nonnull final Urn urn) { } @Nullable + @Override public RollbackResult deleteAspect(String urn, String aspectName, @Nonnull Map conditions, boolean hardDelete) { // Validate pre-conditions before running queries Urn entityUrn; @@ -2062,6 +2085,7 @@ private RecordTemplate updateAspect( * This method currently supports datasets, charts, dashboards, data flows, data jobs, and glossary terms. */ @Nonnull + @Override public BrowsePaths buildDefaultBrowsePath(final @Nonnull Urn urn) throws URISyntaxException { Character dataPlatformDelimiter = getDataPlatformDelimiter(urn); String defaultBrowsePath = getDefaultBrowsePath(urn, this.getEntityRegistry(), dataPlatformDelimiter); @@ -2079,6 +2103,7 @@ public BrowsePaths buildDefaultBrowsePath(final @Nonnull Urn urn) throws URISynt * will have a basic "Default" folder added to their browsePathV2. */ @Nonnull + @Override public BrowsePathsV2 buildDefaultBrowsePathV2(final @Nonnull Urn urn, boolean useContainerPaths) throws URISyntaxException { Character dataPlatformDelimiter = getDataPlatformDelimiter(urn); return BrowsePathV2Utils.getDefaultBrowsePathV2(urn, this.getEntityRegistry(), dataPlatformDelimiter, this, useContainerPaths); diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityUtils.java b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityUtils.java index 53099e900ee4d..abc444faa04a2 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityUtils.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/entity/EntityUtils.java @@ -16,7 +16,7 @@ import javax.annotation.Nonnull; import lombok.extern.slf4j.Slf4j; -import static com.linkedin.metadata.entity.EntityService.*; +import static com.linkedin.metadata.Constants.*; @Slf4j diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/SiblingGraphService.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/SiblingGraphService.java index f3ba33a5f4081..7a2f0825b31cc 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/SiblingGraphService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/SiblingGraphService.java @@ -43,7 +43,7 @@ public EntityLineageResult getLineage(@Nonnull Urn entityUrn, @Nonnull LineageDi new HashSet<>(), null, null), - _entityService); + _entityService); } /** diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java index d5812f15eccac..8df7a9600ca94 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/graph/elastic/ESGraphQueryDAO.java @@ -1,6 +1,7 @@ package com.linkedin.metadata.graph.elastic; import com.codahale.metrics.Timer; +import com.linkedin.metadata.config.search.GraphQueryConfiguration; import com.datahub.util.exception.ESQueryException; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableList; @@ -9,7 +10,6 @@ import com.linkedin.common.UrnArrayArray; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; -import com.linkedin.metadata.config.search.GraphQueryConfiguration; import com.linkedin.metadata.graph.GraphFilters; import com.linkedin.metadata.graph.LineageDirection; import com.linkedin.metadata.graph.LineageRelationship; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java b/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java index c7cdd604968a5..5fb1ab0889e71 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/LineageSearchService.java @@ -1,5 +1,6 @@ package com.linkedin.metadata.search; +import com.linkedin.metadata.config.cache.SearchLineageCacheConfiguration; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; @@ -9,7 +10,6 @@ import com.linkedin.data.template.LongMap; import com.linkedin.data.template.StringArray; import com.linkedin.metadata.Constants; -import com.linkedin.metadata.config.cache.SearchLineageCacheConfiguration; import com.linkedin.metadata.graph.EntityLineageResult; import com.linkedin.metadata.graph.GraphService; import com.linkedin.metadata.graph.LineageDirection; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/aggregator/AllEntitiesSearchAggregator.java b/metadata-io/src/main/java/com/linkedin/metadata/search/aggregator/AllEntitiesSearchAggregator.java index 8b9c712d8d7a3..ee93edaf2480c 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/aggregator/AllEntitiesSearchAggregator.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/aggregator/AllEntitiesSearchAggregator.java @@ -1,9 +1,9 @@ package com.linkedin.metadata.search.aggregator; import com.codahale.metrics.Timer; +import com.linkedin.metadata.config.cache.EntityDocCountCacheConfiguration; import com.linkedin.data.template.GetMode; import com.linkedin.data.template.LongMap; -import com.linkedin.metadata.config.cache.EntityDocCountCacheConfiguration; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.query.SearchFlags; import com.linkedin.metadata.query.filter.Filter; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/cache/EntityDocCountCache.java b/metadata-io/src/main/java/com/linkedin/metadata/search/cache/EntityDocCountCache.java index d76b137131156..95f208e185df1 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/cache/EntityDocCountCache.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/cache/EntityDocCountCache.java @@ -1,7 +1,7 @@ package com.linkedin.metadata.search.cache; -import com.google.common.base.Suppliers; import com.linkedin.metadata.config.cache.EntityDocCountCacheConfiguration; +import com.google.common.base.Suppliers; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.search.EntitySearchService; import com.linkedin.metadata.utils.ConcurrencyUtils; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilder.java index 9affa4c05a566..14f67ddcbf337 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilder.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilder.java @@ -1,5 +1,6 @@ package com.linkedin.metadata.search.elasticsearch.indexbuilder; +import com.linkedin.metadata.config.search.ElasticSearchConfiguration; import com.google.common.collect.ImmutableMap; import com.linkedin.metadata.search.utils.ESUtils; @@ -21,7 +22,6 @@ import java.util.stream.Collectors; import javax.annotation.Nonnull; -import com.linkedin.metadata.config.search.ElasticSearchConfiguration; import com.linkedin.util.Pair; import io.github.resilience4j.retry.Retry; import io.github.resilience4j.retry.RetryConfig; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java index 89874b08eef9d..51e08763cd7c8 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAO.java @@ -1,12 +1,12 @@ package com.linkedin.metadata.search.elasticsearch.query; import com.codahale.metrics.Timer; +import com.linkedin.metadata.config.search.SearchConfiguration; +import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; import com.datahub.util.exception.ESQueryException; import com.fasterxml.jackson.core.type.TypeReference; import com.google.common.annotations.VisibleForTesting; import com.linkedin.data.template.LongMap; -import com.linkedin.metadata.config.search.SearchConfiguration; -import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.query.AutoCompleteResult; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/CustomizedQueryHandler.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/CustomizedQueryHandler.java index 6186a90114880..55a3474fd9f35 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/CustomizedQueryHandler.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/CustomizedQueryHandler.java @@ -1,7 +1,7 @@ package com.linkedin.metadata.search.elasticsearch.query.request; -import com.linkedin.metadata.config.search.custom.QueryConfiguration; import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; +import com.linkedin.metadata.config.search.custom.QueryConfiguration; import lombok.Builder; import lombok.Getter; import lombok.extern.slf4j.Slf4j; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilder.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilder.java index 1f54d3bbbf2ad..397352358d05f 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilder.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilder.java @@ -3,14 +3,22 @@ import com.linkedin.metadata.config.search.ExactMatchConfiguration; import com.linkedin.metadata.config.search.PartialConfiguration; import com.linkedin.metadata.config.search.SearchConfiguration; +import com.linkedin.metadata.config.search.custom.BoolQueryConfiguration; import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; import com.linkedin.metadata.config.search.custom.QueryConfiguration; +import com.fasterxml.jackson.annotation.JsonInclude; +import com.fasterxml.jackson.core.StreamReadConstraints; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.linkedin.metadata.Constants; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.SearchableFieldSpec; import com.linkedin.metadata.models.annotation.SearchScoreAnnotation; import com.linkedin.metadata.models.annotation.SearchableAnnotation; +import java.io.IOException; import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -22,9 +30,15 @@ import javax.annotation.Nullable; import com.linkedin.metadata.search.utils.ESUtils; +import lombok.extern.slf4j.Slf4j; import org.elasticsearch.common.lucene.search.function.CombineFunction; import org.elasticsearch.common.lucene.search.function.FieldValueFactorFunction; import org.elasticsearch.common.lucene.search.function.FunctionScoreQuery; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.xcontent.LoggingDeprecationHandler; +import org.elasticsearch.common.xcontent.NamedXContentRegistry; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.common.xcontent.XContentType; import org.elasticsearch.index.query.BoolQueryBuilder; import org.elasticsearch.index.query.Operator; import org.elasticsearch.index.query.QueryBuilder; @@ -34,11 +48,23 @@ import org.elasticsearch.index.query.functionscore.FieldValueFactorFunctionBuilder; import org.elasticsearch.index.query.functionscore.FunctionScoreQueryBuilder; import org.elasticsearch.index.query.functionscore.ScoreFunctionBuilders; +import org.elasticsearch.search.SearchModule; import static com.linkedin.metadata.models.SearchableFieldSpecExtractor.PRIMARY_URN_SEARCH_PROPERTIES; - +@Slf4j public class SearchQueryBuilder { + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + static { + OBJECT_MAPPER.setSerializationInclusion(JsonInclude.Include.NON_NULL); + int maxSize = Integer.parseInt(System.getenv().getOrDefault(Constants.INGESTION_MAX_SERIALIZED_STRING_LENGTH, Constants.MAX_JACKSON_STRING_SIZE)); + OBJECT_MAPPER.getFactory().setStreamReadConstraints(StreamReadConstraints.builder().maxStringLength(maxSize).build()); + } + private static final NamedXContentRegistry X_CONTENT_REGISTRY; + static { + SearchModule searchModule = new SearchModule(Settings.EMPTY, false, Collections.emptyList()); + X_CONTENT_REGISTRY = new NamedXContentRegistry(searchModule.getNamedXContents()); + } public static final String STRUCTURED_QUERY_PREFIX = "\\\\/q "; private final ExactMatchConfiguration exactMatchConfiguration; @@ -72,7 +98,7 @@ private QueryBuilder buildInternalQuery(@Nullable QueryConfiguration customQuery @Nonnull String query, boolean fulltext) { final String sanitizedQuery = query.replaceFirst("^:+", ""); final BoolQueryBuilder finalQuery = Optional.ofNullable(customQueryConfig) - .flatMap(cqc -> cqc.boolQueryBuilder(sanitizedQuery)) + .flatMap(cqc -> boolQueryBuilder(cqc, sanitizedQuery)) .orElse(QueryBuilders.boolQuery()); if (fulltext && !query.startsWith(STRUCTURED_QUERY_PREFIX)) { @@ -229,7 +255,7 @@ private FunctionScoreQueryBuilder buildScoreFunctions(@Nullable QueryConfigurati if (customQueryConfig != null) { // Prefer configuration function scoring over annotation scoring - return customQueryConfig.functionScoreQueryBuilder(queryBuilder); + return functionScoreQueryBuilder(customQueryConfig, queryBuilder); } else { return QueryBuilders.functionScoreQuery(queryBuilder, buildAnnotationScoreFunctions(entitySpecs)) .scoreMode(FunctionScoreQuery.ScoreMode.AVG) // Average score functions @@ -297,4 +323,49 @@ private static FieldValueFactorFunction.Modifier mapModifier(SearchScoreAnnotati return FieldValueFactorFunction.Modifier.NONE; } } + + public FunctionScoreQueryBuilder functionScoreQueryBuilder(QueryConfiguration customQueryConfiguration, + QueryBuilder queryBuilder) { + return toFunctionScoreQueryBuilder(queryBuilder, customQueryConfiguration.getFunctionScore()); + } + + public Optional boolQueryBuilder(QueryConfiguration customQueryConfiguration, String query) { + if (customQueryConfiguration.getBoolQuery() != null) { + log.debug("Using custom query configuration queryRegex: {}", customQueryConfiguration.getQueryRegex()); + } + return Optional.ofNullable(customQueryConfiguration.getBoolQuery()).map(bq -> toBoolQueryBuilder(query, bq)); + } + + private BoolQueryBuilder toBoolQueryBuilder(String query, BoolQueryConfiguration boolQuery) { + try { + String jsonFragment = OBJECT_MAPPER.writeValueAsString(boolQuery) + .replace("\"{{query_string}}\"", OBJECT_MAPPER.writeValueAsString(query)); + XContentParser parser = XContentType.JSON.xContent().createParser(X_CONTENT_REGISTRY, + LoggingDeprecationHandler.INSTANCE, jsonFragment); + return BoolQueryBuilder.fromXContent(parser); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + private FunctionScoreQueryBuilder toFunctionScoreQueryBuilder(QueryBuilder queryBuilder, + Map params) { + try { + HashMap body = new HashMap<>(params); + if (!body.isEmpty()) { + log.debug("Using custom scoring functions: {}", body); + } + + body.put("query", OBJECT_MAPPER.readValue(queryBuilder.toString(), Map.class)); + + String jsonFragment = OBJECT_MAPPER.writeValueAsString(Map.of( + "function_score", body + )); + XContentParser parser = XContentType.JSON.xContent().createParser(X_CONTENT_REGISTRY, + LoggingDeprecationHandler.INSTANCE, jsonFragment); + return (FunctionScoreQueryBuilder) FunctionScoreQueryBuilder.parseInnerQueryBuilder(parser); + } catch (IOException e) { + throw new RuntimeException(e); + } + } } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java index 21cec705d57f3..58d855b6f73ad 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandler.java @@ -1,13 +1,13 @@ package com.linkedin.metadata.search.elasticsearch.query.request; +import com.linkedin.metadata.config.search.SearchConfiguration; +import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.linkedin.common.urn.Urn; import com.linkedin.data.template.DoubleMap; import com.linkedin.data.template.LongMap; -import com.linkedin.metadata.config.search.SearchConfiguration; -import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.SearchableFieldSpec; import com.linkedin.metadata.models.annotation.SearchableAnnotation; diff --git a/metadata-io/src/test/java/com/linkedin/metadata/AspectUtilsTest.java b/metadata-io/src/test/java/com/linkedin/metadata/AspectUtilsTest.java index 69579352b8acf..36ebec5a42849 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/AspectUtilsTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/AspectUtilsTest.java @@ -1,13 +1,13 @@ package com.linkedin.metadata; +import com.linkedin.metadata.config.PreProcessHooks; import com.linkedin.common.FabricType; import com.linkedin.common.urn.DataPlatformUrn; import com.linkedin.common.urn.DatasetUrn; import com.linkedin.dataset.DatasetProperties; import com.linkedin.events.metadata.ChangeType; -import com.linkedin.metadata.config.PreProcessHooks; import com.linkedin.metadata.entity.AspectUtils; -import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.entity.EntityServiceImpl; import com.linkedin.metadata.entity.TestEntityRegistry; import com.linkedin.metadata.entity.ebean.EbeanAspectDao; import com.linkedin.metadata.event.EventProducer; @@ -45,7 +45,7 @@ public void testAdditionalChanges() { EventProducer mockProducer = mock(EventProducer.class); PreProcessHooks preProcessHooks = new PreProcessHooks(); preProcessHooks.setUiEnabled(true); - EntityService entityService = new EntityService(aspectDao, mockProducer, _testEntityRegistry, true, + EntityServiceImpl entityServiceImpl = new EntityServiceImpl(aspectDao, mockProducer, _testEntityRegistry, true, null, preProcessHooks); MetadataChangeProposal proposal1 = new MetadataChangeProposal(); @@ -56,7 +56,7 @@ public void testAdditionalChanges() { proposal1.setEntityType("dataset"); proposal1.setChangeType(ChangeType.PATCH); - List proposalList = AspectUtils.getAdditionalChanges(proposal1, entityService); + List proposalList = AspectUtils.getAdditionalChanges(proposal1, entityServiceImpl); // proposals for key aspect, browsePath, browsePathV2, dataPlatformInstance Assert.assertEquals(proposalList.size(), 4); Assert.assertEquals(proposalList.get(0).getChangeType(), ChangeType.UPSERT); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/ESSampleDataFixture.java b/metadata-io/src/test/java/com/linkedin/metadata/ESSampleDataFixture.java index a1393bbd65a47..64237ff30d2a4 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/ESSampleDataFixture.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/ESSampleDataFixture.java @@ -1,18 +1,18 @@ package com.linkedin.metadata; -import com.fasterxml.jackson.dataformat.yaml.YAMLMapper; -import com.linkedin.entity.client.EntityClient; -import com.linkedin.metadata.client.JavaEntityClient; -import com.linkedin.metadata.config.search.CustomConfiguration; import com.linkedin.metadata.config.PreProcessHooks; +import com.linkedin.metadata.config.cache.EntityDocCountCacheConfiguration; +import com.linkedin.metadata.config.search.CustomConfiguration; import com.linkedin.metadata.config.search.ElasticSearchConfiguration; import com.linkedin.metadata.config.search.SearchConfiguration; import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; +import com.fasterxml.jackson.dataformat.yaml.YAMLMapper; +import com.linkedin.entity.client.EntityClient; +import com.linkedin.metadata.client.JavaEntityClient; import com.linkedin.metadata.entity.AspectDao; import com.linkedin.metadata.entity.EntityAspect; import com.linkedin.metadata.entity.EntityAspectIdentifier; -import com.linkedin.metadata.config.cache.EntityDocCountCacheConfiguration; -import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.entity.EntityServiceImpl; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.search.SearchService; import com.linkedin.metadata.search.aggregator.AllEntitiesSearchAggregator; @@ -191,7 +191,7 @@ protected EntityClient entityClient( PreProcessHooks preProcessHooks = new PreProcessHooks(); preProcessHooks.setUiEnabled(true); return new JavaEntityClient( - new EntityService(mockAspectDao, null, entityRegistry, true, null, + new EntityServiceImpl(mockAspectDao, null, entityRegistry, true, null, preProcessHooks), null, entitySearchService, diff --git a/metadata-io/src/test/java/com/linkedin/metadata/ESSearchLineageFixture.java b/metadata-io/src/test/java/com/linkedin/metadata/ESSearchLineageFixture.java index 89ae579a9db01..9cedda9bfb035 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/ESSearchLineageFixture.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/ESSearchLineageFixture.java @@ -1,15 +1,15 @@ package com.linkedin.metadata; -import com.linkedin.entity.client.EntityClient; -import com.linkedin.metadata.client.JavaEntityClient; import com.linkedin.metadata.config.PreProcessHooks; +import com.linkedin.metadata.config.cache.EntityDocCountCacheConfiguration; import com.linkedin.metadata.config.cache.SearchLineageCacheConfiguration; import com.linkedin.metadata.config.search.ElasticSearchConfiguration; -import com.linkedin.metadata.config.cache.EntityDocCountCacheConfiguration; import com.linkedin.metadata.config.search.GraphQueryConfiguration; import com.linkedin.metadata.config.search.SearchConfiguration; import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; -import com.linkedin.metadata.entity.EntityService; +import com.linkedin.entity.client.EntityClient; +import com.linkedin.metadata.client.JavaEntityClient; +import com.linkedin.metadata.entity.EntityServiceImpl; import com.linkedin.metadata.graph.elastic.ESGraphQueryDAO; import com.linkedin.metadata.graph.elastic.ESGraphWriteDAO; import com.linkedin.metadata.graph.elastic.ElasticSearchGraphService; @@ -226,7 +226,7 @@ protected EntityClient entityClient( PreProcessHooks preProcessHooks = new PreProcessHooks(); preProcessHooks.setUiEnabled(true); return new JavaEntityClient( - new EntityService(null, null, entityRegistry, true, null, + new EntityServiceImpl(null, null, entityRegistry, true, null, preProcessHooks), null, entitySearchService, diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/AspectMigrationsDaoTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/AspectMigrationsDaoTest.java index 9304080460fce..6a331647583d2 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/AspectMigrationsDaoTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/AspectMigrationsDaoTest.java @@ -31,7 +31,7 @@ abstract public class AspectMigrationsDaoTest { protected final EntityRegistry _testEntityRegistry; protected EventProducer _mockProducer; - protected EntityService _entityService; + protected EntityServiceImpl _entityServiceImpl; protected RetentionService _retentionService; protected UpdateIndicesService _mockUpdateIndicesService; @@ -46,7 +46,7 @@ public void testListAllUrns() throws AssertionError { final int totalAspects = 30; final int pageSize = 25; final int lastPageSize = 5; - Map ingestedAspects = AspectIngestionUtils.ingestCorpUserKeyAspects(_entityService, totalAspects); + Map ingestedAspects = AspectIngestionUtils.ingestCorpUserKeyAspects(_entityServiceImpl, totalAspects); List ingestedUrns = ingestedAspects.keySet().stream().map(Urn::toString).collect(Collectors.toList()); List seenUrns = new ArrayList<>(); @@ -78,8 +78,8 @@ public void testListAllUrns() throws AssertionError { @Test public void testCountEntities() throws AssertionError { - AspectIngestionUtils.ingestCorpUserInfoAspects(_entityService, 11); - AspectIngestionUtils.ingestChartInfoAspects(_entityService, 22); + AspectIngestionUtils.ingestCorpUserInfoAspects(_entityServiceImpl, 11); + AspectIngestionUtils.ingestChartInfoAspects(_entityServiceImpl, 22); final int expected = 33; long actual = _migrationsDao.countEntities(); @@ -92,7 +92,7 @@ public void testCheckIfAspectExists() throws AssertionError { boolean actual = _migrationsDao.checkIfAspectExists(CORP_USER_INFO_ASPECT_NAME); assertFalse(actual); - AspectIngestionUtils.ingestCorpUserInfoAspects(_entityService, 1); + AspectIngestionUtils.ingestCorpUserInfoAspects(_entityServiceImpl, 1); actual = _migrationsDao.checkIfAspectExists(CORP_USER_INFO_ASPECT_NAME); assertTrue(actual); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/CassandraAspectMigrationsDaoTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/CassandraAspectMigrationsDaoTest.java index 7e80a67e304c3..70161fe640707 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/CassandraAspectMigrationsDaoTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/CassandraAspectMigrationsDaoTest.java @@ -1,8 +1,8 @@ package com.linkedin.metadata.entity; +import com.linkedin.metadata.config.PreProcessHooks; import com.datastax.oss.driver.api.core.CqlSession; import com.linkedin.metadata.CassandraTestUtils; -import com.linkedin.metadata.config.PreProcessHooks; import com.linkedin.metadata.entity.cassandra.CassandraAspectDao; import com.linkedin.metadata.entity.cassandra.CassandraRetentionService; import com.linkedin.metadata.event.EventProducer; @@ -49,10 +49,10 @@ private void configureComponents() { _mockUpdateIndicesService = mock(UpdateIndicesService.class); PreProcessHooks preProcessHooks = new PreProcessHooks(); preProcessHooks.setUiEnabled(true); - _entityService = new EntityService(dao, _mockProducer, _testEntityRegistry, true, _mockUpdateIndicesService, + _entityServiceImpl = new EntityServiceImpl(dao, _mockProducer, _testEntityRegistry, true, _mockUpdateIndicesService, preProcessHooks); - _retentionService = new CassandraRetentionService(_entityService, session, 1000); - _entityService.setRetentionService(_retentionService); + _retentionService = new CassandraRetentionService(_entityServiceImpl, session, 1000); + _entityServiceImpl.setRetentionService(_retentionService); _migrationsDao = dao; } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/CassandraEntityServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/CassandraEntityServiceTest.java index 6e062fd14fc21..50e562b76c4e6 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/CassandraEntityServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/CassandraEntityServiceTest.java @@ -1,5 +1,6 @@ package com.linkedin.metadata.entity; +import com.linkedin.metadata.config.PreProcessHooks; import com.datastax.oss.driver.api.core.CqlSession; import com.linkedin.common.urn.Urn; import com.linkedin.data.template.RecordTemplate; @@ -7,7 +8,6 @@ import com.linkedin.metadata.AspectGenerationUtils; import com.linkedin.metadata.AspectIngestionUtils; import com.linkedin.metadata.CassandraTestUtils; -import com.linkedin.metadata.config.PreProcessHooks; import com.linkedin.metadata.entity.cassandra.CassandraAspectDao; import com.linkedin.metadata.entity.cassandra.CassandraRetentionService; import com.linkedin.metadata.event.EventProducer; @@ -35,7 +35,7 @@ * A class that knows how to configure {@link EntityServiceTest} to run integration tests against a Cassandra database. * * This class also contains all the test methods where realities of an underlying storage leak into the - * {@link EntityService} in the form of subtle behavior differences. Ideally that should never happen, and it'd be + * {@link EntityServiceImpl} in the form of subtle behavior differences. Ideally that should never happen, and it'd be * great to address captured differences. */ public class CassandraEntityServiceTest extends EntityServiceTest { @@ -69,10 +69,10 @@ private void configureComponents() { _mockUpdateIndicesService = mock(UpdateIndicesService.class); PreProcessHooks preProcessHooks = new PreProcessHooks(); preProcessHooks.setUiEnabled(true); - _entityService = new EntityService(_aspectDao, _mockProducer, _testEntityRegistry, true, + _entityServiceImpl = new EntityServiceImpl(_aspectDao, _mockProducer, _testEntityRegistry, true, _mockUpdateIndicesService, preProcessHooks); - _retentionService = new CassandraRetentionService(_entityService, session, 1000); - _entityService.setRetentionService(_retentionService); + _retentionService = new CassandraRetentionService(_entityServiceImpl, session, 1000); + _entityServiceImpl.setRetentionService(_retentionService); } /** @@ -99,7 +99,7 @@ public void testIngestListLatestAspects() throws AssertionError { final int expectedTotalPages = 4; final int expectedEntitiesInLastPage = 10; - Map writtenAspects = AspectIngestionUtils.ingestCorpUserInfoAspects(_entityService, totalEntities); + Map writtenAspects = AspectIngestionUtils.ingestCorpUserInfoAspects(_entityServiceImpl, totalEntities); Set writtenUrns = writtenAspects.keySet(); String entity = writtenUrns.stream().findFirst().get().getEntityType(); String aspect = AspectGenerationUtils.getAspectName(new CorpUserInfo()); @@ -111,7 +111,7 @@ public void testIngestListLatestAspects() throws AssertionError { int expectedEntityCount = isLastPage ? expectedEntitiesInLastPage : pageSize; int expectedNextStart = isLastPage ? -1 : pageStart + pageSize; - ListResult page = _entityService.listLatestAspects(entity, aspect, pageStart, pageSize); + ListResult page = _entityServiceImpl.listLatestAspects(entity, aspect, pageStart, pageSize); // Check paging metadata works as expected assertEquals(page.getNextStart(), expectedNextStart); @@ -147,7 +147,7 @@ public void testIngestListUrns() throws AssertionError { final int expectedTotalPages = 4; final int expectedEntitiesInLastPage = 10; - Map writtenAspects = AspectIngestionUtils.ingestCorpUserKeyAspects(_entityService, totalEntities); + Map writtenAspects = AspectIngestionUtils.ingestCorpUserKeyAspects(_entityServiceImpl, totalEntities); Set writtenUrns = writtenAspects.keySet(); String entity = writtenUrns.stream().findFirst().get().getEntityType(); @@ -157,7 +157,7 @@ public void testIngestListUrns() throws AssertionError { int pageStart = pageNo * pageSize; int expectedEntityCount = isLastPage ? expectedEntitiesInLastPage : pageSize; - ListUrnsResult page = _entityService.listUrns(entity, pageStart, pageSize); + ListUrnsResult page = _entityServiceImpl.listUrns(entity, pageStart, pageSize); // Check paging metadata works as expected assertEquals(page.getStart().intValue(), pageStart); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/DeleteEntityServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/DeleteEntityServiceTest.java index 759680abe7b4d..98f9ce241b850 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/DeleteEntityServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/DeleteEntityServiceTest.java @@ -1,5 +1,6 @@ package com.linkedin.metadata.entity; +import com.linkedin.metadata.config.PreProcessHooks; import com.datahub.util.RecordUtils; import com.google.common.collect.ImmutableList; import com.linkedin.common.AuditStamp; @@ -9,7 +10,6 @@ import com.linkedin.entity.EntityResponse; import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.Constants; -import com.linkedin.metadata.config.PreProcessHooks; import com.linkedin.metadata.entity.ebean.EbeanAspectDao; import com.linkedin.metadata.event.EventProducer; import com.linkedin.metadata.graph.GraphService; @@ -37,7 +37,7 @@ public class DeleteEntityServiceTest { protected EbeanAspectDao _aspectDao; - protected EntityService _entityService; + protected EntityServiceImpl _entityServiceImpl; protected GraphService _graphService = Mockito.mock(GraphService.class); protected DeleteEntityService _deleteEntityService; @@ -52,9 +52,9 @@ public DeleteEntityServiceTest() { _mockUpdateIndicesService = mock(UpdateIndicesService.class); PreProcessHooks preProcessHooks = new PreProcessHooks(); preProcessHooks.setUiEnabled(true); - _entityService = new EntityService(_aspectDao, mock(EventProducer.class), _entityRegistry, true, + _entityServiceImpl = new EntityServiceImpl(_aspectDao, mock(EventProducer.class), _entityRegistry, true, _mockUpdateIndicesService, preProcessHooks); - _deleteEntityService = new DeleteEntityService(_entityService, _graphService); + _deleteEntityService = new DeleteEntityService(_entityServiceImpl, _graphService); } /** diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanAspectMigrationsDaoTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanAspectMigrationsDaoTest.java index 2aa66ae7d9d3b..62f8827b574b8 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanAspectMigrationsDaoTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanAspectMigrationsDaoTest.java @@ -1,7 +1,7 @@ package com.linkedin.metadata.entity; -import com.linkedin.metadata.EbeanTestUtils; import com.linkedin.metadata.config.PreProcessHooks; +import com.linkedin.metadata.EbeanTestUtils; import com.linkedin.metadata.entity.ebean.EbeanAspectDao; import com.linkedin.metadata.entity.ebean.EbeanRetentionService; import com.linkedin.metadata.event.EventProducer; @@ -29,10 +29,10 @@ public void setupTest() { _mockUpdateIndicesService = mock(UpdateIndicesService.class); PreProcessHooks preProcessHooks = new PreProcessHooks(); preProcessHooks.setUiEnabled(true); - _entityService = new EntityService(dao, _mockProducer, _testEntityRegistry, true, + _entityServiceImpl = new EntityServiceImpl(dao, _mockProducer, _testEntityRegistry, true, _mockUpdateIndicesService, preProcessHooks); - _retentionService = new EbeanRetentionService(_entityService, server, 1000); - _entityService.setRetentionService(_retentionService); + _retentionService = new EbeanRetentionService(_entityServiceImpl, server, 1000); + _entityServiceImpl.setRetentionService(_retentionService); _migrationsDao = dao; } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java index 3d5b8c6cfaa2b..9126aad62895d 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/EbeanEntityServiceTest.java @@ -1,5 +1,6 @@ package com.linkedin.metadata.entity; +import com.linkedin.metadata.config.PreProcessHooks; import com.linkedin.common.urn.Urn; import com.linkedin.common.urn.UrnUtils; import com.linkedin.data.template.DataTemplateUtil; @@ -7,7 +8,6 @@ import com.linkedin.identity.CorpUserInfo; import com.linkedin.metadata.AspectGenerationUtils; import com.linkedin.metadata.EbeanTestUtils; -import com.linkedin.metadata.config.PreProcessHooks; import com.linkedin.metadata.entity.ebean.EbeanAspectDao; import com.linkedin.metadata.entity.ebean.EbeanRetentionService; import com.linkedin.metadata.event.EventProducer; @@ -34,7 +34,7 @@ * A class that knows how to configure {@link EntityServiceTest} to run integration tests against a relational database. * * This class also contains all the test methods where realities of an underlying storage leak into the - * {@link EntityService} in the form of subtle behavior differences. Ideally that should never happen, and it'd be + * {@link EntityServiceImpl} in the form of subtle behavior differences. Ideally that should never happen, and it'd be * great to address captured differences. */ public class EbeanEntityServiceTest extends EntityServiceTest { @@ -51,10 +51,10 @@ public void setupTest() { _mockUpdateIndicesService = mock(UpdateIndicesService.class); PreProcessHooks preProcessHooks = new PreProcessHooks(); preProcessHooks.setUiEnabled(true); - _entityService = new EntityService(_aspectDao, _mockProducer, _testEntityRegistry, true, + _entityServiceImpl = new EntityServiceImpl(_aspectDao, _mockProducer, _testEntityRegistry, true, _mockUpdateIndicesService, preProcessHooks); - _retentionService = new EbeanRetentionService(_entityService, server, 1000); - _entityService.setRetentionService(_retentionService); + _retentionService = new EbeanRetentionService(_entityServiceImpl, server, 1000); + _entityServiceImpl.setRetentionService(_retentionService); } /** @@ -86,18 +86,18 @@ public void testIngestListLatestAspects() throws AssertionError { // Ingest CorpUserInfo Aspect #1 CorpUserInfo writeAspect1 = AspectGenerationUtils.createCorpUserInfo("email@test.com"); - _entityService.ingestAspect(entityUrn1, aspectName, writeAspect1, TEST_AUDIT_STAMP, metadata1); + _entityServiceImpl.ingestAspect(entityUrn1, aspectName, writeAspect1, TEST_AUDIT_STAMP, metadata1); // Ingest CorpUserInfo Aspect #2 CorpUserInfo writeAspect2 = AspectGenerationUtils.createCorpUserInfo("email2@test.com"); - _entityService.ingestAspect(entityUrn2, aspectName, writeAspect2, TEST_AUDIT_STAMP, metadata1); + _entityServiceImpl.ingestAspect(entityUrn2, aspectName, writeAspect2, TEST_AUDIT_STAMP, metadata1); // Ingest CorpUserInfo Aspect #3 CorpUserInfo writeAspect3 = AspectGenerationUtils.createCorpUserInfo("email3@test.com"); - _entityService.ingestAspect(entityUrn3, aspectName, writeAspect3, TEST_AUDIT_STAMP, metadata1); + _entityServiceImpl.ingestAspect(entityUrn3, aspectName, writeAspect3, TEST_AUDIT_STAMP, metadata1); // List aspects - ListResult batch1 = _entityService.listLatestAspects(entityUrn1.getEntityType(), aspectName, 0, 2); + ListResult batch1 = _entityServiceImpl.listLatestAspects(entityUrn1.getEntityType(), aspectName, 0, 2); assertEquals(batch1.getNextStart(), 2); assertEquals(batch1.getPageSize(), 2); @@ -107,7 +107,7 @@ public void testIngestListLatestAspects() throws AssertionError { assertTrue(DataTemplateUtil.areEqual(writeAspect1, batch1.getValues().get(0))); assertTrue(DataTemplateUtil.areEqual(writeAspect2, batch1.getValues().get(1))); - ListResult batch2 = _entityService.listLatestAspects(entityUrn1.getEntityType(), aspectName, 2, 2); + ListResult batch2 = _entityServiceImpl.listLatestAspects(entityUrn1.getEntityType(), aspectName, 2, 2); assertEquals(batch2.getValues().size(), 1); assertTrue(DataTemplateUtil.areEqual(writeAspect3, batch2.getValues().get(0))); } @@ -131,18 +131,18 @@ public void testIngestListUrns() throws AssertionError { // Ingest CorpUserInfo Aspect #1 RecordTemplate writeAspect1 = AspectGenerationUtils.createCorpUserKey(entityUrn1); - _entityService.ingestAspect(entityUrn1, aspectName, writeAspect1, TEST_AUDIT_STAMP, metadata1); + _entityServiceImpl.ingestAspect(entityUrn1, aspectName, writeAspect1, TEST_AUDIT_STAMP, metadata1); // Ingest CorpUserInfo Aspect #2 RecordTemplate writeAspect2 = AspectGenerationUtils.createCorpUserKey(entityUrn2); - _entityService.ingestAspect(entityUrn2, aspectName, writeAspect2, TEST_AUDIT_STAMP, metadata1); + _entityServiceImpl.ingestAspect(entityUrn2, aspectName, writeAspect2, TEST_AUDIT_STAMP, metadata1); // Ingest CorpUserInfo Aspect #3 RecordTemplate writeAspect3 = AspectGenerationUtils.createCorpUserKey(entityUrn3); - _entityService.ingestAspect(entityUrn3, aspectName, writeAspect3, TEST_AUDIT_STAMP, metadata1); + _entityServiceImpl.ingestAspect(entityUrn3, aspectName, writeAspect3, TEST_AUDIT_STAMP, metadata1); // List aspects urns - ListUrnsResult batch1 = _entityService.listUrns(entityUrn1.getEntityType(), 0, 2); + ListUrnsResult batch1 = _entityServiceImpl.listUrns(entityUrn1.getEntityType(), 0, 2); assertEquals(batch1.getStart().intValue(), 0); assertEquals(batch1.getCount().intValue(), 2); @@ -151,7 +151,7 @@ public void testIngestListUrns() throws AssertionError { assertEquals(entityUrn1.toString(), batch1.getEntities().get(0).toString()); assertEquals(entityUrn2.toString(), batch1.getEntities().get(1).toString()); - ListUrnsResult batch2 = _entityService.listUrns(entityUrn1.getEntityType(), 2, 2); + ListUrnsResult batch2 = _entityServiceImpl.listUrns(entityUrn1.getEntityType(), 2, 2); assertEquals(batch2.getStart().intValue(), 2); assertEquals(batch2.getCount().intValue(), 1); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java index 1d11bd786c4cf..d485981f32a07 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/entity/EntityServiceTest.java @@ -72,7 +72,7 @@ import static org.testng.Assert.*; /** - * A class to test {@link EntityService} + * A class to test {@link EntityServiceImpl} * * This class is generic to allow same integration tests to be reused to test all supported storage backends. * If you're adding another storage backend - you should create a new test class that extends this one providing @@ -87,7 +87,7 @@ */ abstract public class EntityServiceTest { - protected EntityService _entityService; + protected EntityServiceImpl _entityServiceImpl; protected T_AD _aspectDao; protected T_RS _retentionService; @@ -124,10 +124,10 @@ public void testIngestGetEntity() throws Exception { SystemMetadata metadata1 = AspectGenerationUtils.createSystemMetadata(); // 1. Ingest Entity - _entityService.ingestEntity(writeEntity, TEST_AUDIT_STAMP, metadata1); + _entityServiceImpl.ingestEntity(writeEntity, TEST_AUDIT_STAMP, metadata1); // 2. Retrieve Entity - com.linkedin.entity.Entity readEntity = _entityService.getEntity(entityUrn, Collections.emptySet()); + com.linkedin.entity.Entity readEntity = _entityServiceImpl.getEntity(entityUrn, Collections.emptySet()); // 3. Compare Entity Objects assertEquals(readEntity.getValue().getCorpUserSnapshot().getAspects().size(), 2); // Key + Info aspect. @@ -161,10 +161,10 @@ public void testAddKey() throws Exception { SystemMetadata metadata1 = AspectGenerationUtils.createSystemMetadata(); // 1. Ingest Entity - _entityService.ingestEntity(writeEntity, TEST_AUDIT_STAMP, metadata1); + _entityServiceImpl.ingestEntity(writeEntity, TEST_AUDIT_STAMP, metadata1); // 2. Retrieve Entity - com.linkedin.entity.Entity readEntity = _entityService.getEntity(entityUrn, Collections.emptySet()); + com.linkedin.entity.Entity readEntity = _entityServiceImpl.getEntity(entityUrn, Collections.emptySet()); // 3. Compare Entity Objects assertEquals(readEntity.getValue().getCorpUserSnapshot().getAspects().size(), 2); // Key + Info aspect. @@ -202,12 +202,12 @@ public void testIngestGetEntities() throws Exception { SystemMetadata metadata2 = AspectGenerationUtils.createSystemMetadata(1625792690, "run-123"); // 1. Ingest Entities - _entityService.ingestEntities(ImmutableList.of(writeEntity1, writeEntity2), TEST_AUDIT_STAMP, + _entityServiceImpl.ingestEntities(ImmutableList.of(writeEntity1, writeEntity2), TEST_AUDIT_STAMP, ImmutableList.of(metadata1, metadata2)); // 2. Retrieve Entities Map readEntities = - _entityService.getEntities(ImmutableSet.of(entityUrn1, entityUrn2), Collections.emptySet()); + _entityServiceImpl.getEntities(ImmutableSet.of(entityUrn1, entityUrn2), Collections.emptySet()); // 3. Compare Entity Objects @@ -279,12 +279,12 @@ public void testIngestGetEntitiesV2() throws Exception { String keyName = "corpUserKey"; // 1. Ingest Entities - _entityService.ingestEntities(ImmutableList.of(writeEntity1, writeEntity2), TEST_AUDIT_STAMP, + _entityServiceImpl.ingestEntities(ImmutableList.of(writeEntity1, writeEntity2), TEST_AUDIT_STAMP, ImmutableList.of(metadata1, metadata2)); // 2. Retrieve Entities Map readEntities = - _entityService.getEntitiesV2("corpuser", ImmutableSet.of(entityUrn1, entityUrn2), ImmutableSet.of(aspectName)); + _entityServiceImpl.getEntitiesV2("corpuser", ImmutableSet.of(entityUrn1, entityUrn2), ImmutableSet.of(aspectName)); // 3. Compare Entity Objects @@ -347,12 +347,12 @@ public void testIngestGetEntitiesVersionedV2() throws Exception { String keyName = "corpUserKey"; // 1. Ingest Entities - _entityService.ingestEntities(ImmutableList.of(writeEntity1, writeEntity2), TEST_AUDIT_STAMP, + _entityServiceImpl.ingestEntities(ImmutableList.of(writeEntity1, writeEntity2), TEST_AUDIT_STAMP, ImmutableList.of(metadata1, metadata2)); // 2. Retrieve Entities Map readEntities = - _entityService.getEntitiesVersionedV2(ImmutableSet.of(versionedUrn1, versionedUrn2), ImmutableSet.of(aspectName)); + _entityServiceImpl.getEntitiesVersionedV2(ImmutableSet.of(versionedUrn1, versionedUrn2), ImmutableSet.of(aspectName)); // 3. Compare Entity Objects @@ -414,9 +414,9 @@ public void testIngestAspectsGetLatestAspects() throws Exception { SystemMetadata metadata1 = AspectGenerationUtils.createSystemMetadata(); - _entityService.ingestAspects(entityUrn, pairToIngest, TEST_AUDIT_STAMP, metadata1); + _entityServiceImpl.ingestAspects(entityUrn, pairToIngest, TEST_AUDIT_STAMP, metadata1); - Map latestAspects = _entityService.getLatestAspectsForUrn( + Map latestAspects = _entityServiceImpl.getLatestAspectsForUrn( entityUrn, new HashSet<>(Arrays.asList(aspectName1, aspectName2)) ); @@ -445,7 +445,7 @@ public void testReingestAspectsGetLatestAspects() throws Exception { SystemMetadata metadata1 = AspectGenerationUtils.createSystemMetadata(); - _entityService.ingestAspects(entityUrn, pairToIngest, TEST_AUDIT_STAMP, metadata1); + _entityServiceImpl.ingestAspects(entityUrn, pairToIngest, TEST_AUDIT_STAMP, metadata1); final MetadataChangeLog initialChangeLog = new MetadataChangeLog(); initialChangeLog.setEntityType(entityUrn.getEntityType()); @@ -470,7 +470,7 @@ public void testReingestAspectsGetLatestAspects() throws Exception { restateChangeLog.setPreviousAspectValue(aspect); restateChangeLog.setPreviousSystemMetadata(simulatePullFromDB(metadata1, SystemMetadata.class)); - Map latestAspects = _entityService.getLatestAspectsForUrn( + Map latestAspects = _entityServiceImpl.getLatestAspectsForUrn( entityUrn, new HashSet<>(List.of(aspectName1)) ); @@ -484,7 +484,7 @@ public void testReingestAspectsGetLatestAspects() throws Exception { // Mockito detects the previous invocation and throws an error in verifying the second call unless invocations are cleared clearInvocations(_mockProducer); - _entityService.ingestAspects(entityUrn, pairToIngest, TEST_AUDIT_STAMP, metadata1); + _entityServiceImpl.ingestAspects(entityUrn, pairToIngest, TEST_AUDIT_STAMP, metadata1); verify(_mockProducer, times(1)).produceMetadataChangeLog(Mockito.eq(entityUrn), Mockito.any(), Mockito.eq(restateChangeLog)); @@ -508,7 +508,7 @@ public void testReingestLineageAspect() throws Exception { SystemMetadata metadata1 = AspectGenerationUtils.createSystemMetadata(); - _entityService.ingestAspects(entityUrn, pairToIngest, TEST_AUDIT_STAMP, metadata1); + _entityServiceImpl.ingestAspects(entityUrn, pairToIngest, TEST_AUDIT_STAMP, metadata1); final MetadataChangeLog initialChangeLog = new MetadataChangeLog(); initialChangeLog.setEntityType(entityUrn.getEntityType()); @@ -533,7 +533,7 @@ public void testReingestLineageAspect() throws Exception { restateChangeLog.setPreviousAspectValue(aspect); restateChangeLog.setPreviousSystemMetadata(simulatePullFromDB(metadata1, SystemMetadata.class)); - Map latestAspects = _entityService.getLatestAspectsForUrn( + Map latestAspects = _entityServiceImpl.getLatestAspectsForUrn( entityUrn, new HashSet<>(List.of(aspectName1)) ); @@ -547,7 +547,7 @@ public void testReingestLineageAspect() throws Exception { // Mockito detects the previous invocation and throws an error in verifying the second call unless invocations are cleared clearInvocations(_mockProducer); - _entityService.ingestAspects(entityUrn, pairToIngest, TEST_AUDIT_STAMP, metadata1); + _entityServiceImpl.ingestAspects(entityUrn, pairToIngest, TEST_AUDIT_STAMP, metadata1); verify(_mockProducer, times(1)).produceMetadataChangeLog(Mockito.eq(entityUrn), Mockito.any(), Mockito.eq(restateChangeLog)); @@ -578,7 +578,7 @@ public void testReingestLineageProposal() throws Exception { mcp1.setSystemMetadata(metadata1); mcp1.setAspectName(UPSTREAM_LINEAGE_ASPECT_NAME); - _entityService.ingestProposal(mcp1, TEST_AUDIT_STAMP, false); + _entityServiceImpl.ingestProposal(mcp1, TEST_AUDIT_STAMP, false); final MetadataChangeLog initialChangeLog = new MetadataChangeLog(); initialChangeLog.setEntityType(entityUrn.getEntityType()); @@ -601,7 +601,7 @@ public void testReingestLineageProposal() throws Exception { restateChangeLog.setPreviousAspectValue(genericAspect); restateChangeLog.setPreviousSystemMetadata(simulatePullFromDB(metadata1, SystemMetadata.class)); - Map latestAspects = _entityService.getLatestAspectsForUrn( + Map latestAspects = _entityServiceImpl.getLatestAspectsForUrn( entityUrn, new HashSet<>(List.of(aspectName1)) ); @@ -613,7 +613,7 @@ public void testReingestLineageProposal() throws Exception { // Mockito detects the previous invocation and throws an error in verifying the second call unless invocations are cleared clearInvocations(_mockProducer); - _entityService.ingestProposal(mcp1, TEST_AUDIT_STAMP, false); + _entityServiceImpl.ingestProposal(mcp1, TEST_AUDIT_STAMP, false); verify(_mockProducer, times(1)).produceMetadataChangeLog(Mockito.eq(entityUrn), Mockito.any(), Mockito.eq(restateChangeLog)); @@ -640,7 +640,7 @@ public void testIngestTimeseriesAspect() throws Exception { genericAspect.setValue(ByteString.unsafeWrap(datasetProfileSerialized)); genericAspect.setContentType("application/json"); gmce.setAspect(genericAspect); - _entityService.ingestProposal(gmce, TEST_AUDIT_STAMP, false); + _entityServiceImpl.ingestProposal(gmce, TEST_AUDIT_STAMP, false); } @Test @@ -659,7 +659,7 @@ public void testAsyncProposalVersioned() throws Exception { genericAspect.setValue(ByteString.unsafeWrap(datasetPropertiesSerialized)); genericAspect.setContentType("application/json"); gmce.setAspect(genericAspect); - _entityService.ingestProposal(gmce, TEST_AUDIT_STAMP, true); + _entityServiceImpl.ingestProposal(gmce, TEST_AUDIT_STAMP, true); verify(_mockProducer, times(0)).produceMetadataChangeLog(Mockito.eq(entityUrn), Mockito.any(), Mockito.any()); verify(_mockProducer, times(1)).produceMetadataChangeProposal(Mockito.eq(entityUrn), @@ -685,7 +685,7 @@ public void testAsyncProposalTimeseries() throws Exception { genericAspect.setValue(ByteString.unsafeWrap(datasetProfileSerialized)); genericAspect.setContentType("application/json"); gmce.setAspect(genericAspect); - _entityService.ingestProposal(gmce, TEST_AUDIT_STAMP, true); + _entityServiceImpl.ingestProposal(gmce, TEST_AUDIT_STAMP, true); verify(_mockProducer, times(1)).produceMetadataChangeLog(Mockito.eq(entityUrn), Mockito.any(), Mockito.any()); verify(_mockProducer, times(0)).produceMetadataChangeProposal(Mockito.eq(entityUrn), @@ -704,9 +704,9 @@ public void testUpdateGetAspect() throws AssertionError { CorpUserInfo writeAspect = AspectGenerationUtils.createCorpUserInfo("email@test.com"); // Validate retrieval of CorpUserInfo Aspect #1 - _entityService.updateAspect(entityUrn, "corpuser", aspectName, corpUserInfoSpec, writeAspect, TEST_AUDIT_STAMP, 1, + _entityServiceImpl.updateAspect(entityUrn, "corpuser", aspectName, corpUserInfoSpec, writeAspect, TEST_AUDIT_STAMP, 1, true); - RecordTemplate readAspect1 = _entityService.getAspect(entityUrn, aspectName, 1); + RecordTemplate readAspect1 = _entityServiceImpl.getAspect(entityUrn, aspectName, 1); assertTrue(DataTemplateUtil.areEqual(writeAspect, readAspect1)); verify(_mockProducer, times(1)).produceMetadataChangeLog(Mockito.eq(entityUrn), Mockito.eq(corpUserInfoSpec), Mockito.any()); @@ -715,9 +715,9 @@ public void testUpdateGetAspect() throws AssertionError { writeAspect.setEmail("newemail@test.com"); // Validate retrieval of CorpUserInfo Aspect #2 - _entityService.updateAspect(entityUrn, "corpuser", aspectName, corpUserInfoSpec, writeAspect, TEST_AUDIT_STAMP, 1, + _entityServiceImpl.updateAspect(entityUrn, "corpuser", aspectName, corpUserInfoSpec, writeAspect, TEST_AUDIT_STAMP, 1, false); - RecordTemplate readAspect2 = _entityService.getAspect(entityUrn, aspectName, 1); + RecordTemplate readAspect2 = _entityServiceImpl.getAspect(entityUrn, aspectName, 1); assertTrue(DataTemplateUtil.areEqual(writeAspect, readAspect2)); verifyNoMoreInteractions(_mockProducer); } @@ -734,22 +734,22 @@ public void testGetAspectAtVersion() throws AssertionError { CorpUserInfo writeAspect = AspectGenerationUtils.createCorpUserInfo("email@test.com"); // Validate retrieval of CorpUserInfo Aspect #1 - _entityService.updateAspect(entityUrn, "corpuser", aspectName, corpUserInfoSpec, writeAspect, TEST_AUDIT_STAMP, 1, + _entityServiceImpl.updateAspect(entityUrn, "corpuser", aspectName, corpUserInfoSpec, writeAspect, TEST_AUDIT_STAMP, 1, true); VersionedAspect writtenVersionedAspect = new VersionedAspect(); writtenVersionedAspect.setAspect(Aspect.create(writeAspect)); writtenVersionedAspect.setVersion(1); - VersionedAspect readAspect1 = _entityService.getVersionedAspect(entityUrn, aspectName, 1); + VersionedAspect readAspect1 = _entityServiceImpl.getVersionedAspect(entityUrn, aspectName, 1); assertTrue(DataTemplateUtil.areEqual(writtenVersionedAspect, readAspect1)); verify(_mockProducer, times(1)).produceMetadataChangeLog(Mockito.eq(entityUrn), Mockito.eq(corpUserInfoSpec), Mockito.any()); - VersionedAspect readAspect2 = _entityService.getVersionedAspect(entityUrn, aspectName, -1); + VersionedAspect readAspect2 = _entityServiceImpl.getVersionedAspect(entityUrn, aspectName, -1); assertTrue(DataTemplateUtil.areEqual(writtenVersionedAspect, readAspect2)); - VersionedAspect readAspectVersion0 = _entityService.getVersionedAspect(entityUrn, aspectName, 0); + VersionedAspect readAspectVersion0 = _entityServiceImpl.getVersionedAspect(entityUrn, aspectName, 0); assertFalse(DataTemplateUtil.areEqual(writtenVersionedAspect, readAspectVersion0)); verifyNoMoreInteractions(_mockProducer); @@ -768,19 +768,19 @@ public void testRollbackAspect() throws AssertionError { // Ingest CorpUserInfo Aspect #1 CorpUserInfo writeAspect1 = AspectGenerationUtils.createCorpUserInfo("email@test.com"); - _entityService.ingestAspect(entityUrn1, aspectName, writeAspect1, TEST_AUDIT_STAMP, metadata1); + _entityServiceImpl.ingestAspect(entityUrn1, aspectName, writeAspect1, TEST_AUDIT_STAMP, metadata1); // Ingest CorpUserInfo Aspect #2 CorpUserInfo writeAspect2 = AspectGenerationUtils.createCorpUserInfo("email2@test.com"); - _entityService.ingestAspect(entityUrn2, aspectName, writeAspect2, TEST_AUDIT_STAMP, metadata1); + _entityServiceImpl.ingestAspect(entityUrn2, aspectName, writeAspect2, TEST_AUDIT_STAMP, metadata1); // Ingest CorpUserInfo Aspect #3 CorpUserInfo writeAspect3 = AspectGenerationUtils.createCorpUserInfo("email3@test.com"); - _entityService.ingestAspect(entityUrn3, aspectName, writeAspect3, TEST_AUDIT_STAMP, metadata1); + _entityServiceImpl.ingestAspect(entityUrn3, aspectName, writeAspect3, TEST_AUDIT_STAMP, metadata1); // Ingest CorpUserInfo Aspect #1 Overwrite CorpUserInfo writeAspect1Overwrite = AspectGenerationUtils.createCorpUserInfo("email1.overwrite@test.com"); - _entityService.ingestAspect(entityUrn1, aspectName, writeAspect1Overwrite, TEST_AUDIT_STAMP, metadata2); + _entityServiceImpl.ingestAspect(entityUrn1, aspectName, writeAspect1Overwrite, TEST_AUDIT_STAMP, metadata2); // this should no-op since this run has been overwritten AspectRowSummary rollbackOverwrittenAspect = new AspectRowSummary(); @@ -788,13 +788,13 @@ public void testRollbackAspect() throws AssertionError { rollbackOverwrittenAspect.setAspectName(aspectName); rollbackOverwrittenAspect.setUrn(entityUrn1.toString()); - _entityService.rollbackRun(ImmutableList.of(rollbackOverwrittenAspect), "run-123", true); + _entityServiceImpl.rollbackRun(ImmutableList.of(rollbackOverwrittenAspect), "run-123", true); // assert nothing was deleted - RecordTemplate readAspectOriginal = _entityService.getAspect(entityUrn1, aspectName, 1); + RecordTemplate readAspectOriginal = _entityServiceImpl.getAspect(entityUrn1, aspectName, 1); assertTrue(DataTemplateUtil.areEqual(writeAspect1, readAspectOriginal)); - RecordTemplate readAspectOverwrite = _entityService.getAspect(entityUrn1, aspectName, 0); + RecordTemplate readAspectOverwrite = _entityServiceImpl.getAspect(entityUrn1, aspectName, 0); assertTrue(DataTemplateUtil.areEqual(writeAspect1Overwrite, readAspectOverwrite)); // this should delete the most recent aspect @@ -803,10 +803,10 @@ public void testRollbackAspect() throws AssertionError { rollbackRecentAspect.setAspectName(aspectName); rollbackRecentAspect.setUrn(entityUrn1.toString()); - _entityService.rollbackRun(ImmutableList.of(rollbackOverwrittenAspect), "run-456", true); + _entityServiceImpl.rollbackRun(ImmutableList.of(rollbackOverwrittenAspect), "run-456", true); // assert the new most recent aspect is the original one - RecordTemplate readNewRecentAspect = _entityService.getAspect(entityUrn1, aspectName, 0); + RecordTemplate readNewRecentAspect = _entityServiceImpl.getAspect(entityUrn1, aspectName, 0); assertTrue(DataTemplateUtil.areEqual(writeAspect1, readNewRecentAspect)); } @@ -818,18 +818,18 @@ public void testRollbackKey() throws AssertionError { SystemMetadata metadata2 = AspectGenerationUtils.createSystemMetadata(1635792689, "run-456"); String aspectName = AspectGenerationUtils.getAspectName(new CorpUserInfo()); - String keyAspectName = _entityService.getKeyAspectName(entityUrn1); + String keyAspectName = _entityServiceImpl.getKeyAspectName(entityUrn1); // Ingest CorpUserInfo Aspect #1 CorpUserInfo writeAspect1 = AspectGenerationUtils.createCorpUserInfo("email@test.com"); - _entityService.ingestAspect(entityUrn1, aspectName, writeAspect1, TEST_AUDIT_STAMP, metadata1); + _entityServiceImpl.ingestAspect(entityUrn1, aspectName, writeAspect1, TEST_AUDIT_STAMP, metadata1); - RecordTemplate writeKey1 = _entityService.buildKeyAspect(entityUrn1); - _entityService.ingestAspect(entityUrn1, keyAspectName, writeKey1, TEST_AUDIT_STAMP, metadata1); + RecordTemplate writeKey1 = _entityServiceImpl.buildKeyAspect(entityUrn1); + _entityServiceImpl.ingestAspect(entityUrn1, keyAspectName, writeKey1, TEST_AUDIT_STAMP, metadata1); // Ingest CorpUserInfo Aspect #1 Overwrite CorpUserInfo writeAspect1Overwrite = AspectGenerationUtils.createCorpUserInfo("email1.overwrite@test.com"); - _entityService.ingestAspect(entityUrn1, aspectName, writeAspect1Overwrite, TEST_AUDIT_STAMP, metadata2); + _entityServiceImpl.ingestAspect(entityUrn1, aspectName, writeAspect1Overwrite, TEST_AUDIT_STAMP, metadata2); // this should no-op since the key should have been written in the furst run AspectRowSummary rollbackKeyWithWrongRunId = new AspectRowSummary(); @@ -837,13 +837,13 @@ public void testRollbackKey() throws AssertionError { rollbackKeyWithWrongRunId.setAspectName("corpUserKey"); rollbackKeyWithWrongRunId.setUrn(entityUrn1.toString()); - _entityService.rollbackRun(ImmutableList.of(rollbackKeyWithWrongRunId), "run-456", true); + _entityServiceImpl.rollbackRun(ImmutableList.of(rollbackKeyWithWrongRunId), "run-456", true); // assert nothing was deleted - RecordTemplate readAspectOriginal = _entityService.getAspect(entityUrn1, aspectName, 1); + RecordTemplate readAspectOriginal = _entityServiceImpl.getAspect(entityUrn1, aspectName, 1); assertTrue(DataTemplateUtil.areEqual(writeAspect1, readAspectOriginal)); - RecordTemplate readAspectOverwrite = _entityService.getAspect(entityUrn1, aspectName, 0); + RecordTemplate readAspectOverwrite = _entityServiceImpl.getAspect(entityUrn1, aspectName, 0); assertTrue(DataTemplateUtil.areEqual(writeAspect1Overwrite, readAspectOverwrite)); // this should delete the most recent aspect @@ -852,10 +852,10 @@ public void testRollbackKey() throws AssertionError { rollbackKeyWithCorrectRunId.setAspectName("corpUserKey"); rollbackKeyWithCorrectRunId.setUrn(entityUrn1.toString()); - _entityService.rollbackRun(ImmutableList.of(rollbackKeyWithCorrectRunId), "run-123", true); + _entityServiceImpl.rollbackRun(ImmutableList.of(rollbackKeyWithCorrectRunId), "run-123", true); // assert the new most recent aspect is null - RecordTemplate readNewRecentAspect = _entityService.getAspect(entityUrn1, aspectName, 0); + RecordTemplate readNewRecentAspect = _entityServiceImpl.getAspect(entityUrn1, aspectName, 0); assertTrue(DataTemplateUtil.areEqual(null, readNewRecentAspect)); } @@ -869,26 +869,26 @@ public void testRollbackUrn() throws AssertionError { SystemMetadata metadata2 = AspectGenerationUtils.createSystemMetadata(1635792689, "run-456"); String aspectName = AspectGenerationUtils.getAspectName(new CorpUserInfo()); - String keyAspectName = _entityService.getKeyAspectName(entityUrn1); + String keyAspectName = _entityServiceImpl.getKeyAspectName(entityUrn1); // Ingest CorpUserInfo Aspect #1 CorpUserInfo writeAspect1 = AspectGenerationUtils.createCorpUserInfo("email@test.com"); - _entityService.ingestAspect(entityUrn1, aspectName, writeAspect1, TEST_AUDIT_STAMP, metadata1); + _entityServiceImpl.ingestAspect(entityUrn1, aspectName, writeAspect1, TEST_AUDIT_STAMP, metadata1); - RecordTemplate writeKey1 = _entityService.buildKeyAspect(entityUrn1); - _entityService.ingestAspect(entityUrn1, keyAspectName, writeKey1, TEST_AUDIT_STAMP, metadata1); + RecordTemplate writeKey1 = _entityServiceImpl.buildKeyAspect(entityUrn1); + _entityServiceImpl.ingestAspect(entityUrn1, keyAspectName, writeKey1, TEST_AUDIT_STAMP, metadata1); // Ingest CorpUserInfo Aspect #2 CorpUserInfo writeAspect2 = AspectGenerationUtils.createCorpUserInfo("email2@test.com"); - _entityService.ingestAspect(entityUrn2, aspectName, writeAspect2, TEST_AUDIT_STAMP, metadata1); + _entityServiceImpl.ingestAspect(entityUrn2, aspectName, writeAspect2, TEST_AUDIT_STAMP, metadata1); // Ingest CorpUserInfo Aspect #3 CorpUserInfo writeAspect3 = AspectGenerationUtils.createCorpUserInfo("email3@test.com"); - _entityService.ingestAspect(entityUrn3, aspectName, writeAspect3, TEST_AUDIT_STAMP, metadata1); + _entityServiceImpl.ingestAspect(entityUrn3, aspectName, writeAspect3, TEST_AUDIT_STAMP, metadata1); // Ingest CorpUserInfo Aspect #1 Overwrite CorpUserInfo writeAspect1Overwrite = AspectGenerationUtils.createCorpUserInfo("email1.overwrite@test.com"); - _entityService.ingestAspect(entityUrn1, aspectName, writeAspect1Overwrite, TEST_AUDIT_STAMP, metadata2); + _entityServiceImpl.ingestAspect(entityUrn1, aspectName, writeAspect1Overwrite, TEST_AUDIT_STAMP, metadata2); // this should no-op since the key should have been written in the furst run AspectRowSummary rollbackKeyWithWrongRunId = new AspectRowSummary(); @@ -897,13 +897,13 @@ public void testRollbackUrn() throws AssertionError { rollbackKeyWithWrongRunId.setUrn(entityUrn1.toString()); // this should delete all related aspects - _entityService.deleteUrn(UrnUtils.getUrn("urn:li:corpuser:test1")); + _entityServiceImpl.deleteUrn(UrnUtils.getUrn("urn:li:corpuser:test1")); // assert the new most recent aspect is null - RecordTemplate readNewRecentAspect = _entityService.getAspect(entityUrn1, aspectName, 0); + RecordTemplate readNewRecentAspect = _entityServiceImpl.getAspect(entityUrn1, aspectName, 0); assertTrue(DataTemplateUtil.areEqual(null, readNewRecentAspect)); - RecordTemplate deletedKeyAspect = _entityService.getAspect(entityUrn1, "corpUserKey", 0); + RecordTemplate deletedKeyAspect = _entityServiceImpl.getAspect(entityUrn1, "corpUserKey", 0); assertTrue(DataTemplateUtil.areEqual(null, deletedKeyAspect)); } @@ -919,8 +919,8 @@ public void testIngestGetLatestAspect() throws AssertionError { SystemMetadata metadata2 = AspectGenerationUtils.createSystemMetadata(1635792689, "run-456"); // Validate retrieval of CorpUserInfo Aspect #1 - _entityService.ingestAspect(entityUrn, aspectName, writeAspect1, TEST_AUDIT_STAMP, metadata1); - RecordTemplate readAspect1 = _entityService.getLatestAspect(entityUrn, aspectName); + _entityServiceImpl.ingestAspect(entityUrn, aspectName, writeAspect1, TEST_AUDIT_STAMP, metadata1); + RecordTemplate readAspect1 = _entityServiceImpl.getLatestAspect(entityUrn, aspectName); assertTrue(DataTemplateUtil.areEqual(writeAspect1, readAspect1)); ArgumentCaptor mclCaptor = ArgumentCaptor.forClass(MetadataChangeLog.class); @@ -942,8 +942,8 @@ public void testIngestGetLatestAspect() throws AssertionError { CorpUserInfo writeAspect2 = AspectGenerationUtils.createCorpUserInfo("email2@test.com"); // Validate retrieval of CorpUserInfo Aspect #2 - _entityService.ingestAspect(entityUrn, aspectName, writeAspect2, TEST_AUDIT_STAMP, metadata2); - RecordTemplate readAspect2 = _entityService.getLatestAspect(entityUrn, aspectName); + _entityServiceImpl.ingestAspect(entityUrn, aspectName, writeAspect2, TEST_AUDIT_STAMP, metadata2); + RecordTemplate readAspect2 = _entityServiceImpl.getLatestAspect(entityUrn, aspectName); EntityAspect readAspectDao1 = _aspectDao.getAspect(entityUrn.toString(), aspectName, 1); EntityAspect readAspectDao2 = _aspectDao.getAspect(entityUrn.toString(), aspectName, 0); @@ -976,16 +976,16 @@ public void testIngestGetLatestEnvelopedAspect() throws Exception { SystemMetadata metadata2 = AspectGenerationUtils.createSystemMetadata(1635792689, "run-456"); // Validate retrieval of CorpUserInfo Aspect #1 - _entityService.ingestAspect(entityUrn, aspectName, writeAspect1, TEST_AUDIT_STAMP, metadata1); - EnvelopedAspect readAspect1 = _entityService.getLatestEnvelopedAspect("corpuser", entityUrn, aspectName); + _entityServiceImpl.ingestAspect(entityUrn, aspectName, writeAspect1, TEST_AUDIT_STAMP, metadata1); + EnvelopedAspect readAspect1 = _entityServiceImpl.getLatestEnvelopedAspect("corpuser", entityUrn, aspectName); assertTrue(DataTemplateUtil.areEqual(writeAspect1, new CorpUserInfo(readAspect1.getValue().data()))); // Ingest CorpUserInfo Aspect #2 CorpUserInfo writeAspect2 = AspectGenerationUtils.createCorpUserInfo("email2@test.com"); // Validate retrieval of CorpUserInfo Aspect #2 - _entityService.ingestAspect(entityUrn, aspectName, writeAspect2, TEST_AUDIT_STAMP, metadata2); - EnvelopedAspect readAspect2 = _entityService.getLatestEnvelopedAspect("corpuser", entityUrn, aspectName); + _entityServiceImpl.ingestAspect(entityUrn, aspectName, writeAspect2, TEST_AUDIT_STAMP, metadata2); + EnvelopedAspect readAspect2 = _entityServiceImpl.getLatestEnvelopedAspect("corpuser", entityUrn, aspectName); EntityAspect readAspectDao1 = _aspectDao.getAspect(entityUrn.toString(), aspectName, 1); EntityAspect readAspectDao2 = _aspectDao.getAspect(entityUrn.toString(), aspectName, 0); @@ -1018,8 +1018,8 @@ public void testIngestSameAspect() throws AssertionError { SystemMetadata metadata3 = AspectGenerationUtils.createSystemMetadata(1635792689, "run-123"); // Validate retrieval of CorpUserInfo Aspect #1 - _entityService.ingestAspect(entityUrn, aspectName, writeAspect1, TEST_AUDIT_STAMP, metadata1); - RecordTemplate readAspect1 = _entityService.getLatestAspect(entityUrn, aspectName); + _entityServiceImpl.ingestAspect(entityUrn, aspectName, writeAspect1, TEST_AUDIT_STAMP, metadata1); + RecordTemplate readAspect1 = _entityServiceImpl.getLatestAspect(entityUrn, aspectName); assertTrue(DataTemplateUtil.areEqual(writeAspect1, readAspect1)); ArgumentCaptor mclCaptor = ArgumentCaptor.forClass(MetadataChangeLog.class); @@ -1041,8 +1041,8 @@ public void testIngestSameAspect() throws AssertionError { CorpUserInfo writeAspect2 = AspectGenerationUtils.createCorpUserInfo("email@test.com"); // Validate retrieval of CorpUserInfo Aspect #2 - _entityService.ingestAspect(entityUrn, aspectName, writeAspect2, TEST_AUDIT_STAMP, metadata2); - RecordTemplate readAspect2 = _entityService.getLatestAspect(entityUrn, aspectName); + _entityServiceImpl.ingestAspect(entityUrn, aspectName, writeAspect2, TEST_AUDIT_STAMP, metadata2); + RecordTemplate readAspect2 = _entityServiceImpl.getLatestAspect(entityUrn, aspectName); EntityAspect readAspectDao2 = _aspectDao.getAspect(entityUrn.toString(), aspectName, ASPECT_LATEST_VERSION); assertTrue(DataTemplateUtil.areEqual(writeAspect2, readAspect2)); @@ -1069,23 +1069,23 @@ public void testRetention() throws AssertionError { // Ingest CorpUserInfo Aspect CorpUserInfo writeAspect1 = AspectGenerationUtils.createCorpUserInfo("email@test.com"); - _entityService.ingestAspect(entityUrn, aspectName, writeAspect1, TEST_AUDIT_STAMP, metadata1); + _entityServiceImpl.ingestAspect(entityUrn, aspectName, writeAspect1, TEST_AUDIT_STAMP, metadata1); CorpUserInfo writeAspect1a = AspectGenerationUtils.createCorpUserInfo("email_a@test.com"); - _entityService.ingestAspect(entityUrn, aspectName, writeAspect1a, TEST_AUDIT_STAMP, metadata1); + _entityServiceImpl.ingestAspect(entityUrn, aspectName, writeAspect1a, TEST_AUDIT_STAMP, metadata1); CorpUserInfo writeAspect1b = AspectGenerationUtils.createCorpUserInfo("email_b@test.com"); - _entityService.ingestAspect(entityUrn, aspectName, writeAspect1b, TEST_AUDIT_STAMP, metadata1); + _entityServiceImpl.ingestAspect(entityUrn, aspectName, writeAspect1b, TEST_AUDIT_STAMP, metadata1); String aspectName2 = AspectGenerationUtils.getAspectName(new Status()); // Ingest Status Aspect Status writeAspect2 = new Status().setRemoved(true); - _entityService.ingestAspect(entityUrn, aspectName2, writeAspect2, TEST_AUDIT_STAMP, metadata1); + _entityServiceImpl.ingestAspect(entityUrn, aspectName2, writeAspect2, TEST_AUDIT_STAMP, metadata1); Status writeAspect2a = new Status().setRemoved(false); - _entityService.ingestAspect(entityUrn, aspectName2, writeAspect2a, TEST_AUDIT_STAMP, metadata1); + _entityServiceImpl.ingestAspect(entityUrn, aspectName2, writeAspect2a, TEST_AUDIT_STAMP, metadata1); Status writeAspect2b = new Status().setRemoved(true); - _entityService.ingestAspect(entityUrn, aspectName2, writeAspect2b, TEST_AUDIT_STAMP, metadata1); + _entityServiceImpl.ingestAspect(entityUrn, aspectName2, writeAspect2b, TEST_AUDIT_STAMP, metadata1); - assertEquals(_entityService.getAspect(entityUrn, aspectName, 1), writeAspect1); - assertEquals(_entityService.getAspect(entityUrn, aspectName2, 1), writeAspect2); + assertEquals(_entityServiceImpl.getAspect(entityUrn, aspectName, 1), writeAspect1); + assertEquals(_entityServiceImpl.getAspect(entityUrn, aspectName2, 1), writeAspect2); _retentionService.setRetention(null, null, new DataHubRetentionConfig().setRetention( new Retention().setVersion(new VersionBasedRetention().setMaxVersions(2)))); @@ -1094,13 +1094,13 @@ public void testRetention() throws AssertionError { // Ingest CorpUserInfo Aspect again CorpUserInfo writeAspect1c = AspectGenerationUtils.createCorpUserInfo("email_c@test.com"); - _entityService.ingestAspect(entityUrn, aspectName, writeAspect1c, TEST_AUDIT_STAMP, metadata1); + _entityServiceImpl.ingestAspect(entityUrn, aspectName, writeAspect1c, TEST_AUDIT_STAMP, metadata1); // Ingest Status Aspect again Status writeAspect2c = new Status().setRemoved(false); - _entityService.ingestAspect(entityUrn, aspectName2, writeAspect2c, TEST_AUDIT_STAMP, metadata1); + _entityServiceImpl.ingestAspect(entityUrn, aspectName2, writeAspect2c, TEST_AUDIT_STAMP, metadata1); - assertNull(_entityService.getAspect(entityUrn, aspectName, 1)); - assertEquals(_entityService.getAspect(entityUrn, aspectName2, 1), writeAspect2); + assertNull(_entityServiceImpl.getAspect(entityUrn, aspectName, 1)); + assertEquals(_entityServiceImpl.getAspect(entityUrn, aspectName2, 1), writeAspect2); // Reset retention policies _retentionService.setRetention(null, null, new DataHubRetentionConfig().setRetention( @@ -1108,8 +1108,8 @@ public void testRetention() throws AssertionError { _retentionService.deleteRetention("corpuser", "status"); // Invoke batch apply _retentionService.batchApplyRetention(null, null); - assertEquals(_entityService.listLatestAspects(entityUrn.getEntityType(), aspectName, 0, 10).getTotalCount(), 1); - assertEquals(_entityService.listLatestAspects(entityUrn.getEntityType(), aspectName2, 0, 10).getTotalCount(), 1); + assertEquals(_entityServiceImpl.listLatestAspects(entityUrn.getEntityType(), aspectName, 0, 10).getTotalCount(), 1); + assertEquals(_entityServiceImpl.listLatestAspects(entityUrn.getEntityType(), aspectName2, 0, 10).getTotalCount(), 1); } @Test @@ -1122,29 +1122,29 @@ public void testIngestAspectIfNotPresent() throws AssertionError { // Ingest CorpUserInfo Aspect CorpUserInfo writeAspect1 = AspectGenerationUtils.createCorpUserInfo("email@test.com"); - _entityService.ingestAspectIfNotPresent(entityUrn, aspectName, writeAspect1, TEST_AUDIT_STAMP, metadata1); + _entityServiceImpl.ingestAspectIfNotPresent(entityUrn, aspectName, writeAspect1, TEST_AUDIT_STAMP, metadata1); CorpUserInfo writeAspect1a = AspectGenerationUtils.createCorpUserInfo("email_a@test.com"); - _entityService.ingestAspectIfNotPresent(entityUrn, aspectName, writeAspect1a, TEST_AUDIT_STAMP, metadata1); + _entityServiceImpl.ingestAspectIfNotPresent(entityUrn, aspectName, writeAspect1a, TEST_AUDIT_STAMP, metadata1); CorpUserInfo writeAspect1b = AspectGenerationUtils.createCorpUserInfo("email_b@test.com"); - _entityService.ingestAspectIfNotPresent(entityUrn, aspectName, writeAspect1b, TEST_AUDIT_STAMP, metadata1); + _entityServiceImpl.ingestAspectIfNotPresent(entityUrn, aspectName, writeAspect1b, TEST_AUDIT_STAMP, metadata1); String aspectName2 = AspectGenerationUtils.getAspectName(new Status()); // Ingest Status Aspect Status writeAspect2 = new Status().setRemoved(true); - _entityService.ingestAspectIfNotPresent(entityUrn, aspectName2, writeAspect2, TEST_AUDIT_STAMP, metadata1); + _entityServiceImpl.ingestAspectIfNotPresent(entityUrn, aspectName2, writeAspect2, TEST_AUDIT_STAMP, metadata1); Status writeAspect2a = new Status().setRemoved(false); - _entityService.ingestAspectIfNotPresent(entityUrn, aspectName2, writeAspect2a, TEST_AUDIT_STAMP, metadata1); + _entityServiceImpl.ingestAspectIfNotPresent(entityUrn, aspectName2, writeAspect2a, TEST_AUDIT_STAMP, metadata1); Status writeAspect2b = new Status().setRemoved(true); - _entityService.ingestAspectIfNotPresent(entityUrn, aspectName2, writeAspect2b, TEST_AUDIT_STAMP, metadata1); + _entityServiceImpl.ingestAspectIfNotPresent(entityUrn, aspectName2, writeAspect2b, TEST_AUDIT_STAMP, metadata1); - assertEquals(_entityService.getAspect(entityUrn, aspectName, 0), writeAspect1); - assertEquals(_entityService.getAspect(entityUrn, aspectName2, 0), writeAspect2); + assertEquals(_entityServiceImpl.getAspect(entityUrn, aspectName, 0), writeAspect1); + assertEquals(_entityServiceImpl.getAspect(entityUrn, aspectName2, 0), writeAspect2); - assertNull(_entityService.getAspect(entityUrn, aspectName, 1)); - assertNull(_entityService.getAspect(entityUrn, aspectName2, 1)); + assertNull(_entityServiceImpl.getAspect(entityUrn, aspectName, 1)); + assertNull(_entityServiceImpl.getAspect(entityUrn, aspectName2, 1)); - assertEquals(_entityService.listLatestAspects(entityUrn.getEntityType(), aspectName, 0, 10).getTotalCount(), 1); - assertEquals(_entityService.listLatestAspects(entityUrn.getEntityType(), aspectName2, 0, 10).getTotalCount(), 1); + assertEquals(_entityServiceImpl.listLatestAspects(entityUrn.getEntityType(), aspectName, 0, 10).getTotalCount(), 1); + assertEquals(_entityServiceImpl.listLatestAspects(entityUrn.getEntityType(), aspectName2, 0, 10).getTotalCount(), 1); } /** @@ -1173,7 +1173,7 @@ public void testRestoreIndices() throws Exception { SystemMetadata metadata1 = AspectGenerationUtils.createSystemMetadata(); - _entityService.ingestAspects(entityUrn, pairToIngest, TEST_AUDIT_STAMP, metadata1); + _entityServiceImpl.ingestAspects(entityUrn, pairToIngest, TEST_AUDIT_STAMP, metadata1); clearInvocations(_mockProducer); @@ -1184,7 +1184,7 @@ public void testRestoreIndices() throws Exception { args.setBatchDelayMs(1L); args.setNumThreads(1); args.setUrn(urnStr); - _entityService.restoreIndices(args, obj -> { + _entityServiceImpl.restoreIndices(args, obj -> { }); ArgumentCaptor mclCaptor = ArgumentCaptor.forClass(MetadataChangeLog.class); @@ -1203,12 +1203,12 @@ public void testRestoreIndices() throws Exception { public void testValidateUrn() throws Exception { // Valid URN Urn validTestUrn = new Urn("li", "corpuser", new TupleKey("testKey")); - _entityService.validateUrn(validTestUrn); + _entityServiceImpl.validateUrn(validTestUrn); // URN with trailing whitespace Urn testUrnWithTrailingWhitespace = new Urn("li", "corpuser", new TupleKey("testKey ")); try { - _entityService.validateUrn(testUrnWithTrailingWhitespace); + _entityServiceImpl.validateUrn(testUrnWithTrailingWhitespace); Assert.fail("Should have raised IllegalArgumentException for URN with trailing whitespace"); } catch (IllegalArgumentException e) { assertEquals(e.getMessage(), "Error: cannot provide an URN with leading or trailing whitespace"); @@ -1219,7 +1219,7 @@ public void testValidateUrn() throws Exception { Urn testUrnTooLong = new Urn("li", "corpuser", new TupleKey(stringTooLong)); try { - _entityService.validateUrn(testUrnTooLong); + _entityServiceImpl.validateUrn(testUrnTooLong); Assert.fail("Should have raised IllegalArgumentException for URN too long"); } catch (IllegalArgumentException e) { assertEquals(e.getMessage(), "Error: cannot provide an URN longer than 512 bytes (when URL encoded)"); @@ -1235,9 +1235,9 @@ public void testValidateUrn() throws Exception { Urn testUrnTooLongWhenEncoded = new Urn("li", "corpUser", new TupleKey(buildStringTooLongWhenEncoded.toString())); Urn testUrnSameLengthWhenEncoded = new Urn("li", "corpUser", new TupleKey(buildStringSameLengthWhenEncoded.toString())); // Same length when encoded should be allowed, the encoded one should not be - _entityService.validateUrn(testUrnSameLengthWhenEncoded); + _entityServiceImpl.validateUrn(testUrnSameLengthWhenEncoded); try { - _entityService.validateUrn(testUrnTooLongWhenEncoded); + _entityServiceImpl.validateUrn(testUrnTooLongWhenEncoded); Assert.fail("Should have raised IllegalArgumentException for URN too long"); } catch (IllegalArgumentException e) { assertEquals(e.getMessage(), "Error: cannot provide an URN longer than 512 bytes (when URL encoded)"); @@ -1246,9 +1246,9 @@ public void testValidateUrn() throws Exception { // Urn containing disallowed character Urn testUrnSpecialCharValid = new Urn("li", "corpUser", new TupleKey("bob␇")); Urn testUrnSpecialCharInvalid = new Urn("li", "corpUser", new TupleKey("bob␟")); - _entityService.validateUrn(testUrnSpecialCharValid); + _entityServiceImpl.validateUrn(testUrnSpecialCharValid); try { - _entityService.validateUrn(testUrnSpecialCharInvalid); + _entityServiceImpl.validateUrn(testUrnSpecialCharInvalid); Assert.fail("Should have raised IllegalArgumentException for URN containing the illegal char"); } catch (IllegalArgumentException e) { assertEquals(e.getMessage(), "Error: URN cannot contain ␟ character"); @@ -1256,7 +1256,7 @@ public void testValidateUrn() throws Exception { Urn urnWithMismatchedParens = new Urn("li", "corpuser", new TupleKey("test(Key")); try { - _entityService.validateUrn(urnWithMismatchedParens); + _entityServiceImpl.validateUrn(urnWithMismatchedParens); Assert.fail("Should have raised IllegalArgumentException for URN with mismatched parens"); } catch (IllegalArgumentException e) { assertTrue(e.getMessage().contains("mismatched paren nesting")); @@ -1264,18 +1264,18 @@ public void testValidateUrn() throws Exception { Urn invalidType = new Urn("li", "fakeMadeUpType", new TupleKey("testKey")); try { - _entityService.validateUrn(invalidType); + _entityServiceImpl.validateUrn(invalidType); Assert.fail("Should have raised IllegalArgumentException for URN with non-existent entity type"); } catch (IllegalArgumentException e) { assertTrue(e.getMessage().contains("Failed to find entity with name fakeMadeUpType")); } Urn validFabricType = new Urn("li", "dataset", new TupleKey("urn:li:dataPlatform:foo", "bar", "PROD")); - _entityService.validateUrn(validFabricType); + _entityServiceImpl.validateUrn(validFabricType); Urn invalidFabricType = new Urn("li", "dataset", new TupleKey("urn:li:dataPlatform:foo", "bar", "prod")); try { - _entityService.validateUrn(invalidFabricType); + _entityServiceImpl.validateUrn(invalidFabricType); Assert.fail("Should have raised IllegalArgumentException for URN with invalid fabric type"); } catch (IllegalArgumentException e) { assertTrue(e.getMessage().contains(invalidFabricType.toString())); @@ -1283,7 +1283,7 @@ public void testValidateUrn() throws Exception { Urn urnEndingInComma = new Urn("li", "dataset", new TupleKey("urn:li:dataPlatform:foo", "bar", "PROD", "")); try { - _entityService.validateUrn(urnEndingInComma); + _entityServiceImpl.validateUrn(urnEndingInComma); Assert.fail("Should have raised IllegalArgumentException for URN ending in comma"); } catch (IllegalArgumentException e) { assertTrue(e.getMessage().contains(urnEndingInComma.toString())); @@ -1312,7 +1312,7 @@ public void testUIPreProcessedProposal() throws Exception { genericAspect.setValue(ByteString.unsafeWrap(datasetPropertiesSerialized)); genericAspect.setContentType("application/json"); gmce.setAspect(genericAspect); - _entityService.ingestProposal(gmce, TEST_AUDIT_STAMP, false); + _entityServiceImpl.ingestProposal(gmce, TEST_AUDIT_STAMP, false); ArgumentCaptor captor = ArgumentCaptor.forClass(MetadataChangeLog.class); verify(_mockProducer, times(1)).produceMetadataChangeLog(Mockito.eq(entityUrn), Mockito.any(), captor.capture()); diff --git a/metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphServiceTest.java index 91626cef9d409..1717e466359d3 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/graph/elastic/ElasticSearchGraphServiceTest.java @@ -1,12 +1,12 @@ package com.linkedin.metadata.graph.elastic; +import com.linkedin.metadata.config.search.GraphQueryConfiguration; import com.linkedin.common.FabricType; import com.linkedin.common.urn.DataPlatformUrn; import com.linkedin.common.urn.DatasetUrn; import com.linkedin.common.urn.TagUrn; import com.linkedin.common.urn.Urn; import com.linkedin.metadata.ESTestConfiguration; -import com.linkedin.metadata.config.search.GraphQueryConfiguration; import com.linkedin.metadata.graph.Edge; import com.linkedin.metadata.graph.EntityLineageResult; import com.linkedin.metadata.graph.GraphService; diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/LineageSearchServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/LineageSearchServiceTest.java index 8498004fc4a93..e8872691772de 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/LineageSearchServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/LineageSearchServiceTest.java @@ -1,5 +1,8 @@ package com.linkedin.metadata.search; +import com.linkedin.metadata.config.cache.EntityDocCountCacheConfiguration; +import com.linkedin.metadata.config.cache.SearchLineageCacheConfiguration; +import com.linkedin.metadata.config.search.SearchConfiguration; import com.datahub.test.Snapshot; import com.fasterxml.jackson.databind.node.JsonNodeFactory; import com.fasterxml.jackson.databind.node.ObjectNode; @@ -15,9 +18,6 @@ import com.linkedin.data.template.LongMap; import com.linkedin.metadata.ESTestConfiguration; import com.linkedin.metadata.TestEntityUtil; -import com.linkedin.metadata.config.cache.EntityDocCountCacheConfiguration; -import com.linkedin.metadata.config.cache.SearchLineageCacheConfiguration; -import com.linkedin.metadata.config.search.SearchConfiguration; import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; import com.linkedin.metadata.graph.EntityLineageResult; import com.linkedin.metadata.graph.GraphService; diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTest.java index 8bae3c9e91af5..2405b3b3f6e27 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/SearchServiceTest.java @@ -1,5 +1,7 @@ package com.linkedin.metadata.search; +import com.linkedin.metadata.config.cache.EntityDocCountCacheConfiguration; +import com.linkedin.metadata.config.search.SearchConfiguration; import com.datahub.test.Snapshot; import com.fasterxml.jackson.databind.node.JsonNodeFactory; import com.fasterxml.jackson.databind.node.ObjectNode; @@ -8,8 +10,6 @@ import com.linkedin.common.urn.Urn; import com.linkedin.data.template.StringArray; import com.linkedin.metadata.ESTestConfiguration; -import com.linkedin.metadata.config.cache.EntityDocCountCacheConfiguration; -import com.linkedin.metadata.config.search.SearchConfiguration; import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.models.registry.SnapshotEntityRegistry; diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchServiceTest.java index c21c5fc92e960..0e8d881b70791 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/ElasticSearchServiceTest.java @@ -1,5 +1,6 @@ package com.linkedin.metadata.search.elasticsearch; +import com.linkedin.metadata.config.search.SearchConfiguration; import com.datahub.test.Snapshot; import com.fasterxml.jackson.databind.node.JsonNodeFactory; import com.fasterxml.jackson.databind.node.ObjectNode; @@ -8,7 +9,6 @@ import com.linkedin.metadata.ESTestConfiguration; import com.linkedin.data.schema.annotation.PathSpecBasedSchemaAnnotationVisitor; import com.linkedin.metadata.browse.BrowseResult; -import com.linkedin.metadata.config.search.SearchConfiguration; import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.models.registry.SnapshotEntityRegistry; diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilderTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilderTest.java index f1f910e93376b..2416280cb8f93 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilderTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/indexbuilder/ESIndexBuilderTest.java @@ -1,8 +1,8 @@ package com.linkedin.metadata.search.elasticsearch.indexbuilder; +import com.linkedin.metadata.config.search.ElasticSearchConfiguration; import com.google.common.collect.ImmutableMap; import com.linkedin.metadata.ESTestConfiguration; -import com.linkedin.metadata.config.search.ElasticSearchConfiguration; import com.linkedin.metadata.systemmetadata.SystemMetadataMappingsBuilder; import com.linkedin.metadata.version.GitVersion; import java.util.Optional; diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAOTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAOTest.java index eb3da419651e6..b506051e9bb5d 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAOTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/ESSearchDAOTest.java @@ -1,11 +1,11 @@ package com.linkedin.metadata.search.elasticsearch.query; +import com.linkedin.metadata.config.search.SearchConfiguration; import com.datahub.test.Snapshot; import com.google.common.collect.ImmutableList; import com.linkedin.data.template.LongMap; import com.linkedin.data.template.StringArray; import com.linkedin.metadata.ESSampleDataFixture; -import com.linkedin.metadata.config.search.SearchConfiguration; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.models.registry.SnapshotEntityRegistry; import com.linkedin.metadata.query.filter.Condition; diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilderTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilderTest.java index 914911677c2c9..76160eb29af4e 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilderTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/AggregationQueryBuilderTest.java @@ -1,7 +1,7 @@ package com.linkedin.metadata.search.elasticsearch.query.request; -import com.google.common.collect.ImmutableList; import com.linkedin.metadata.config.search.SearchConfiguration; +import com.google.common.collect.ImmutableList; import com.linkedin.metadata.models.annotation.SearchableAnnotation; import java.util.Collections; import java.util.List; diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/CustomizedQueryHandlerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/CustomizedQueryHandlerTest.java index 3f63e6b43a871..3dad9c59c6b53 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/CustomizedQueryHandlerTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/CustomizedQueryHandlerTest.java @@ -1,11 +1,12 @@ package com.linkedin.metadata.search.elasticsearch.query.request; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.fasterxml.jackson.dataformat.yaml.YAMLMapper; import com.linkedin.metadata.config.search.CustomConfiguration; +import com.linkedin.metadata.config.search.SearchConfiguration; import com.linkedin.metadata.config.search.custom.BoolQueryConfiguration; -import com.linkedin.metadata.config.search.custom.QueryConfiguration; import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; +import com.linkedin.metadata.config.search.custom.QueryConfiguration; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.dataformat.yaml.YAMLMapper; import org.elasticsearch.common.lucene.search.function.CombineFunction; import org.elasticsearch.common.lucene.search.function.FunctionScoreQuery; import org.elasticsearch.index.query.MatchAllQueryBuilder; @@ -35,6 +36,10 @@ public class CustomizedQueryHandlerTest { throw new RuntimeException(e); } } + public static final SearchQueryBuilder SEARCH_QUERY_BUILDER; + static { + SEARCH_QUERY_BUILDER = new SearchQueryBuilder(new SearchConfiguration(), TEST_CONFIG); + } private static final List EXPECTED_CONFIGURATION = List.of( QueryConfiguration.builder() .queryRegex("[*]|") @@ -132,7 +137,8 @@ public void functionScoreQueryBuilderTest() { /* * Test select star */ - FunctionScoreQueryBuilder selectStarTest = test.lookupQueryConfig("*").get().functionScoreQueryBuilder(inputQuery); + FunctionScoreQueryBuilder selectStarTest = SEARCH_QUERY_BUILDER.functionScoreQueryBuilder(test.lookupQueryConfig("*").get(), + inputQuery); FunctionScoreQueryBuilder.FilterFunctionBuilder[] expectedSelectStarScoreFunctions = { new FunctionScoreQueryBuilder.FilterFunctionBuilder( @@ -156,7 +162,7 @@ public void functionScoreQueryBuilderTest() { /* * Test default (non-select start) */ - FunctionScoreQueryBuilder defaultTest = test.lookupQueryConfig("foobar").get().functionScoreQueryBuilder(inputQuery); + FunctionScoreQueryBuilder defaultTest = SEARCH_QUERY_BUILDER.functionScoreQueryBuilder(test.lookupQueryConfig("foobar").get(), inputQuery); FunctionScoreQueryBuilder.FilterFunctionBuilder[] expectedDefaultScoreFunctions = { new FunctionScoreQueryBuilder.FilterFunctionBuilder( diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilderTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilderTest.java index e35fc4528ca60..a2ec396c34b2d 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilderTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchQueryBuilderTest.java @@ -1,5 +1,10 @@ package com.linkedin.metadata.search.elasticsearch.query.request; +import com.linkedin.metadata.config.search.CustomConfiguration; +import com.linkedin.metadata.config.search.ExactMatchConfiguration; +import com.linkedin.metadata.config.search.PartialConfiguration; +import com.linkedin.metadata.config.search.SearchConfiguration; +import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; import com.fasterxml.jackson.dataformat.yaml.YAMLMapper; import com.google.common.collect.ImmutableList; import com.linkedin.metadata.TestEntitySpecBuilder; @@ -9,11 +14,6 @@ import java.util.Map; import java.util.stream.Collectors; -import com.linkedin.metadata.config.search.CustomConfiguration; -import com.linkedin.metadata.config.search.ExactMatchConfiguration; -import com.linkedin.metadata.config.search.PartialConfiguration; -import com.linkedin.metadata.config.search.SearchConfiguration; -import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; import com.linkedin.util.Pair; import org.elasticsearch.index.query.BoolQueryBuilder; import org.elasticsearch.index.query.MatchAllQueryBuilder; diff --git a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandlerTest.java b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandlerTest.java index 095c99370e8d1..f85739d20fc65 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandlerTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/search/elasticsearch/query/request/SearchRequestHandlerTest.java @@ -1,5 +1,8 @@ package com.linkedin.metadata.search.elasticsearch.query.request; +import com.linkedin.metadata.config.search.ExactMatchConfiguration; +import com.linkedin.metadata.config.search.PartialConfiguration; +import com.linkedin.metadata.config.search.SearchConfiguration; import com.google.common.collect.ImmutableList; import com.linkedin.data.template.StringArray; import com.linkedin.metadata.ESTestConfiguration; @@ -15,9 +18,6 @@ import java.util.Set; import java.util.stream.Collectors; -import com.linkedin.metadata.config.search.ExactMatchConfiguration; -import com.linkedin.metadata.config.search.PartialConfiguration; -import com.linkedin.metadata.config.search.SearchConfiguration; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.query.SearchFlags; diff --git a/metadata-io/src/test/java/com/linkedin/metadata/timeline/CassandraTimelineServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/timeline/CassandraTimelineServiceTest.java index fd8cdec73a191..407d2ae684ede 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/timeline/CassandraTimelineServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/timeline/CassandraTimelineServiceTest.java @@ -1,9 +1,9 @@ package com.linkedin.metadata.timeline; +import com.linkedin.metadata.config.PreProcessHooks; import com.datastax.oss.driver.api.core.CqlSession; import com.linkedin.metadata.CassandraTestUtils; -import com.linkedin.metadata.config.PreProcessHooks; -import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.entity.EntityServiceImpl; import com.linkedin.metadata.entity.cassandra.CassandraAspectDao; import com.linkedin.metadata.event.EventProducer; import com.linkedin.metadata.models.registry.EntityRegistryException; @@ -54,7 +54,7 @@ private void configureComponents() { _mockProducer = mock(EventProducer.class); PreProcessHooks preProcessHooks = new PreProcessHooks(); preProcessHooks.setUiEnabled(true); - _entityService = new EntityService(_aspectDao, _mockProducer, _testEntityRegistry, true, + _entityServiceImpl = new EntityServiceImpl(_aspectDao, _mockProducer, _testEntityRegistry, true, _mockUpdateIndicesService, preProcessHooks); } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/timeline/EbeanTimelineServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/timeline/EbeanTimelineServiceTest.java index 0ea2f6bb124d9..b431f786cd50a 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/timeline/EbeanTimelineServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/timeline/EbeanTimelineServiceTest.java @@ -1,8 +1,8 @@ package com.linkedin.metadata.timeline; -import com.linkedin.metadata.EbeanTestUtils; import com.linkedin.metadata.config.PreProcessHooks; -import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.EbeanTestUtils; +import com.linkedin.metadata.entity.EntityServiceImpl; import com.linkedin.metadata.entity.ebean.EbeanAspectDao; import com.linkedin.metadata.event.EventProducer; import com.linkedin.metadata.models.registry.EntityRegistryException; @@ -34,7 +34,7 @@ public void setupTest() { _mockProducer = mock(EventProducer.class); PreProcessHooks preProcessHooks = new PreProcessHooks(); preProcessHooks.setUiEnabled(true); - _entityService = new EntityService(_aspectDao, _mockProducer, _testEntityRegistry, true, + _entityServiceImpl = new EntityServiceImpl(_aspectDao, _mockProducer, _testEntityRegistry, true, _mockUpdateIndicesService, preProcessHooks); } diff --git a/metadata-io/src/test/java/com/linkedin/metadata/timeline/TimelineServiceTest.java b/metadata-io/src/test/java/com/linkedin/metadata/timeline/TimelineServiceTest.java index abd611c5a9336..b3e4b84a4962d 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/timeline/TimelineServiceTest.java +++ b/metadata-io/src/test/java/com/linkedin/metadata/timeline/TimelineServiceTest.java @@ -7,7 +7,7 @@ import com.linkedin.common.urn.Urn; import com.linkedin.data.template.RecordTemplate; import com.linkedin.metadata.entity.AspectDao; -import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.entity.EntityServiceImpl; import com.linkedin.metadata.entity.TestEntityRegistry; import com.linkedin.metadata.event.EventProducer; import com.linkedin.metadata.models.registry.ConfigEntityRegistry; @@ -61,7 +61,7 @@ abstract public class TimelineServiceTest { protected final EntityRegistry _testEntityRegistry = new MergedEntityRegistry(_snapshotEntityRegistry).apply(_configEntityRegistry); protected TimelineServiceImpl _entityTimelineService; - protected EntityService _entityService; + protected EntityServiceImpl _entityServiceImpl; protected EventProducer _mockProducer; protected UpdateIndicesService _mockUpdateIndicesService = mock(UpdateIndicesService.class); @@ -82,12 +82,12 @@ public void testGetTimeline() throws Exception { SchemaMetadata schemaMetadata = getSchemaMetadata("This is the new description for day " + i); AuditStamp daysAgo = createTestAuditStamp(i); timestamps.add(daysAgo); - _entityService.ingestAspects(entityUrn, Collections.singletonList(new Pair<>(aspectName, schemaMetadata)), + _entityServiceImpl.ingestAspects(entityUrn, Collections.singletonList(new Pair<>(aspectName, schemaMetadata)), daysAgo, getSystemMetadata(daysAgo, "run-" + i)); } Map latestAspects = - _entityService.getLatestAspectsForUrn(entityUrn, new HashSet<>(Arrays.asList(aspectName))); + _entityServiceImpl.getLatestAspectsForUrn(entityUrn, new HashSet<>(Arrays.asList(aspectName))); Set elements = new HashSet<>(); elements.add(ChangeCategory.TECHNICAL_SCHEMA); diff --git a/metadata-jobs/mae-consumer-job/src/test/java/com/linkedin/metadata/kafka/MaeConsumerApplicationTest.java b/metadata-jobs/mae-consumer-job/src/test/java/com/linkedin/metadata/kafka/MaeConsumerApplicationTest.java index 9dc813b149a31..d0190279930fe 100644 --- a/metadata-jobs/mae-consumer-job/src/test/java/com/linkedin/metadata/kafka/MaeConsumerApplicationTest.java +++ b/metadata-jobs/mae-consumer-job/src/test/java/com/linkedin/metadata/kafka/MaeConsumerApplicationTest.java @@ -15,10 +15,10 @@ public class MaeConsumerApplicationTest extends AbstractTestNGSpringContextTests { @Autowired - private EntityService mockEntityService; + private EntityService _mockEntityService; @Test public void testMaeConsumerAutoWiring() { - assertNotNull(mockEntityService); + assertNotNull(_mockEntityService); } } diff --git a/metadata-jobs/mae-consumer-job/src/test/java/com/linkedin/metadata/kafka/MaeConsumerApplicationTestConfiguration.java b/metadata-jobs/mae-consumer-job/src/test/java/com/linkedin/metadata/kafka/MaeConsumerApplicationTestConfiguration.java index 4aaee6f085201..72665ffa0b76e 100644 --- a/metadata-jobs/mae-consumer-job/src/test/java/com/linkedin/metadata/kafka/MaeConsumerApplicationTestConfiguration.java +++ b/metadata-jobs/mae-consumer-job/src/test/java/com/linkedin/metadata/kafka/MaeConsumerApplicationTestConfiguration.java @@ -3,7 +3,7 @@ import com.linkedin.entity.client.RestliEntityClient; import com.linkedin.gms.factory.auth.SystemAuthenticationFactory; import com.linkedin.metadata.dao.producer.KafkaHealthChecker; -import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.entity.EntityServiceImpl; import com.linkedin.metadata.graph.GraphService; import com.linkedin.metadata.models.registry.ConfigEntityRegistry; import com.linkedin.metadata.models.registry.EntityRegistry; @@ -21,7 +21,7 @@ public class MaeConsumerApplicationTestConfiguration { private KafkaHealthChecker kafkaHealthChecker; @MockBean - private EntityService entityService; + private EntityServiceImpl _entityServiceImpl; @MockBean private RestliEntityClient restliEntityClient; diff --git a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHookTest.java b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHookTest.java index cb8b299ec75b6..030ca83131433 100644 --- a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHookTest.java +++ b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/UpdateIndicesHookTest.java @@ -1,5 +1,7 @@ package com.linkedin.metadata.kafka.hook; +import com.linkedin.metadata.config.SystemUpdateConfiguration; +import com.linkedin.metadata.config.search.ElasticSearchConfiguration; import com.linkedin.common.AuditStamp; import com.linkedin.common.InputField; import com.linkedin.common.InputFieldArray; @@ -20,8 +22,6 @@ import com.linkedin.events.metadata.ChangeType; import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.metadata.Constants; -import com.linkedin.metadata.config.search.ElasticSearchConfiguration; -import com.linkedin.metadata.config.SystemUpdateConfiguration; import com.linkedin.metadata.graph.Edge; import com.linkedin.metadata.graph.GraphService; import com.linkedin.metadata.boot.kafka.DataHubUpgradeKafkaListener; diff --git a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringTestConfiguration.java b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringTestConfiguration.java index 206b15ff61cca..ef80c49ec4520 100644 --- a/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringTestConfiguration.java +++ b/metadata-jobs/mae-consumer/src/test/java/com/linkedin/metadata/kafka/hook/spring/MCLSpringTestConfiguration.java @@ -7,7 +7,7 @@ import com.linkedin.metadata.boot.kafka.DataHubUpgradeKafkaListener; import com.linkedin.metadata.graph.elastic.ElasticSearchGraphService; import com.linkedin.metadata.models.registry.EntityRegistry; -import com.linkedin.metadata.schema.registry.SchemaRegistryService; +import com.linkedin.metadata.registry.SchemaRegistryService; import com.linkedin.metadata.search.elasticsearch.ElasticSearchService; import com.linkedin.metadata.search.transformer.SearchDocumentTransformer; import com.linkedin.metadata.systemmetadata.SystemMetadataService; diff --git a/metadata-jobs/mce-consumer-job/src/test/java/com/linkedin/metadata/kafka/MceConsumerApplicationTest.java b/metadata-jobs/mce-consumer-job/src/test/java/com/linkedin/metadata/kafka/MceConsumerApplicationTest.java index 059c4ac263064..c23cf1ea3d165 100644 --- a/metadata-jobs/mce-consumer-job/src/test/java/com/linkedin/metadata/kafka/MceConsumerApplicationTest.java +++ b/metadata-jobs/mce-consumer-job/src/test/java/com/linkedin/metadata/kafka/MceConsumerApplicationTest.java @@ -23,13 +23,13 @@ public class MceConsumerApplicationTest extends AbstractTestNGSpringContextTests private TestRestTemplate restTemplate; @Autowired - private EntityService mockEntityService; + private EntityService _mockEntityService; @Test public void testRestliServletConfig() { RestoreIndicesResult mockResult = new RestoreIndicesResult(); mockResult.setRowsMigrated(100); - when(mockEntityService.restoreIndices(any(), any())).thenReturn(mockResult); + when(_mockEntityService.restoreIndices(any(), any())).thenReturn(mockResult); String response = this.restTemplate .postForObject("/gms/aspects?action=restoreIndices", "{\"urn\":\"\"}", String.class); diff --git a/metadata-jobs/mce-consumer-job/src/test/java/com/linkedin/metadata/kafka/MceConsumerApplicationTestConfiguration.java b/metadata-jobs/mce-consumer-job/src/test/java/com/linkedin/metadata/kafka/MceConsumerApplicationTestConfiguration.java index 747a4ab312c7a..2d09cf2043575 100644 --- a/metadata-jobs/mce-consumer-job/src/test/java/com/linkedin/metadata/kafka/MceConsumerApplicationTestConfiguration.java +++ b/metadata-jobs/mce-consumer-job/src/test/java/com/linkedin/metadata/kafka/MceConsumerApplicationTestConfiguration.java @@ -33,7 +33,7 @@ public class MceConsumerApplicationTestConfiguration { public KafkaHealthChecker kafkaHealthChecker; @MockBean - public EntityService entityService; + public EntityService _entityService; @Bean("restliEntityClient") @Primary diff --git a/metadata-service/configuration/build.gradle b/metadata-service/configuration/build.gradle new file mode 100644 index 0000000000000..8623e53d2554a --- /dev/null +++ b/metadata-service/configuration/build.gradle @@ -0,0 +1,14 @@ +plugins { + id 'java' +} + +dependencies { + compile externalDependency.jacksonDataBind + + implementation externalDependency.slf4jApi + implementation externalDependency.springCore + + compileOnly externalDependency.lombok + + annotationProcessor externalDependency.lombok +} \ No newline at end of file diff --git a/metadata-io/src/main/java/com/linkedin/metadata/config/AssetsConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/AssetsConfiguration.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/config/AssetsConfiguration.java rename to metadata-service/configuration/src/main/java/com/linkedin/metadata/config/AssetsConfiguration.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/config/AuthPluginConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/AuthPluginConfiguration.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/config/AuthPluginConfiguration.java rename to metadata-service/configuration/src/main/java/com/linkedin/metadata/config/AuthPluginConfiguration.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/config/DataHubConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/DataHubConfiguration.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/config/DataHubConfiguration.java rename to metadata-service/configuration/src/main/java/com/linkedin/metadata/config/DataHubConfiguration.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/config/EntityProfileConfig.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/EntityProfileConfig.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/config/EntityProfileConfig.java rename to metadata-service/configuration/src/main/java/com/linkedin/metadata/config/EntityProfileConfig.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/config/EntityRegistryPluginConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/EntityRegistryPluginConfiguration.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/config/EntityRegistryPluginConfiguration.java rename to metadata-service/configuration/src/main/java/com/linkedin/metadata/config/EntityRegistryPluginConfiguration.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/config/IngestionConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/IngestionConfiguration.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/config/IngestionConfiguration.java rename to metadata-service/configuration/src/main/java/com/linkedin/metadata/config/IngestionConfiguration.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/config/PluginConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/PluginConfiguration.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/config/PluginConfiguration.java rename to metadata-service/configuration/src/main/java/com/linkedin/metadata/config/PluginConfiguration.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/config/PreProcessHooks.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/PreProcessHooks.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/config/PreProcessHooks.java rename to metadata-service/configuration/src/main/java/com/linkedin/metadata/config/PreProcessHooks.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/config/QueriesTabConfig.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/QueriesTabConfig.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/config/QueriesTabConfig.java rename to metadata-service/configuration/src/main/java/com/linkedin/metadata/config/QueriesTabConfig.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/config/RetentionPluginConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/RetentionPluginConfiguration.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/config/RetentionPluginConfiguration.java rename to metadata-service/configuration/src/main/java/com/linkedin/metadata/config/RetentionPluginConfiguration.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/config/SystemUpdateConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/SystemUpdateConfiguration.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/config/SystemUpdateConfiguration.java rename to metadata-service/configuration/src/main/java/com/linkedin/metadata/config/SystemUpdateConfiguration.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/config/TestsConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/TestsConfiguration.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/config/TestsConfiguration.java rename to metadata-service/configuration/src/main/java/com/linkedin/metadata/config/TestsConfiguration.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/config/ViewsConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/ViewsConfiguration.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/config/ViewsConfiguration.java rename to metadata-service/configuration/src/main/java/com/linkedin/metadata/config/ViewsConfiguration.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/config/VisualConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/VisualConfiguration.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/config/VisualConfiguration.java rename to metadata-service/configuration/src/main/java/com/linkedin/metadata/config/VisualConfiguration.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/config/cache/CacheConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/CacheConfiguration.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/config/cache/CacheConfiguration.java rename to metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/CacheConfiguration.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/config/cache/EntityDocCountCacheConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/EntityDocCountCacheConfiguration.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/config/cache/EntityDocCountCacheConfiguration.java rename to metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/EntityDocCountCacheConfiguration.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/config/cache/HomepageCacheConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/HomepageCacheConfiguration.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/config/cache/HomepageCacheConfiguration.java rename to metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/HomepageCacheConfiguration.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/config/cache/PrimaryCacheConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/PrimaryCacheConfiguration.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/config/cache/PrimaryCacheConfiguration.java rename to metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/PrimaryCacheConfiguration.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/config/cache/SearchCacheConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/SearchCacheConfiguration.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/config/cache/SearchCacheConfiguration.java rename to metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/SearchCacheConfiguration.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/config/cache/SearchLineageCacheConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/SearchLineageCacheConfiguration.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/config/cache/SearchLineageCacheConfiguration.java rename to metadata-service/configuration/src/main/java/com/linkedin/metadata/config/cache/SearchLineageCacheConfiguration.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/config/kafka/KafkaConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/kafka/KafkaConfiguration.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/config/kafka/KafkaConfiguration.java rename to metadata-service/configuration/src/main/java/com/linkedin/metadata/config/kafka/KafkaConfiguration.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/config/kafka/ListenerConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/kafka/ListenerConfiguration.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/config/kafka/ListenerConfiguration.java rename to metadata-service/configuration/src/main/java/com/linkedin/metadata/config/kafka/ListenerConfiguration.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/config/kafka/ProducerConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/kafka/ProducerConfiguration.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/config/kafka/ProducerConfiguration.java rename to metadata-service/configuration/src/main/java/com/linkedin/metadata/config/kafka/ProducerConfiguration.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/config/kafka/SchemaRegistryConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/kafka/SchemaRegistryConfiguration.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/config/kafka/SchemaRegistryConfiguration.java rename to metadata-service/configuration/src/main/java/com/linkedin/metadata/config/kafka/SchemaRegistryConfiguration.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/config/search/BuildIndicesConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/BuildIndicesConfiguration.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/config/search/BuildIndicesConfiguration.java rename to metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/BuildIndicesConfiguration.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/config/search/CustomConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/CustomConfiguration.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/config/search/CustomConfiguration.java rename to metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/CustomConfiguration.java index d1a1ddc5d4226..7a0292c2adec1 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/config/search/CustomConfiguration.java +++ b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/CustomConfiguration.java @@ -1,7 +1,7 @@ package com.linkedin.metadata.config.search; -import com.fasterxml.jackson.databind.ObjectMapper; import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; +import com.fasterxml.jackson.databind.ObjectMapper; import lombok.Data; import lombok.extern.slf4j.Slf4j; import org.springframework.core.io.ClassPathResource; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/config/search/ElasticSearchConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/ElasticSearchConfiguration.java similarity index 75% rename from metadata-io/src/main/java/com/linkedin/metadata/config/search/ElasticSearchConfiguration.java rename to metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/ElasticSearchConfiguration.java index 7525ea7202a99..30679bbaab9ce 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/config/search/ElasticSearchConfiguration.java +++ b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/ElasticSearchConfiguration.java @@ -1,11 +1,9 @@ package com.linkedin.metadata.config.search; import lombok.Data; -import org.springframework.context.annotation.Configuration; @Data -@Configuration public class ElasticSearchConfiguration { private BuildIndicesConfiguration buildIndices; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/config/search/ExactMatchConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/ExactMatchConfiguration.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/config/search/ExactMatchConfiguration.java rename to metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/ExactMatchConfiguration.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/config/search/GraphQueryConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/GraphQueryConfiguration.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/config/search/GraphQueryConfiguration.java rename to metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/GraphQueryConfiguration.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/config/search/PartialConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/PartialConfiguration.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/config/search/PartialConfiguration.java rename to metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/PartialConfiguration.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/config/search/SearchConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/SearchConfiguration.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/config/search/SearchConfiguration.java rename to metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/SearchConfiguration.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/config/search/custom/BoolQueryConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/custom/BoolQueryConfiguration.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/config/search/custom/BoolQueryConfiguration.java rename to metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/custom/BoolQueryConfiguration.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/config/search/custom/CustomSearchConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/custom/CustomSearchConfiguration.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/config/search/custom/CustomSearchConfiguration.java rename to metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/custom/CustomSearchConfiguration.java diff --git a/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/custom/QueryConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/custom/QueryConfiguration.java new file mode 100644 index 0000000000000..cd4364a64a0c5 --- /dev/null +++ b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/search/custom/QueryConfiguration.java @@ -0,0 +1,35 @@ +package com.linkedin.metadata.config.search.custom; + +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import com.fasterxml.jackson.databind.annotation.JsonPOJOBuilder; +import lombok.Builder; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.ToString; +import lombok.extern.slf4j.Slf4j; + +import java.util.Map; + + +@Slf4j +@Builder(toBuilder = true) +@Getter +@ToString +@EqualsAndHashCode +@JsonDeserialize(builder = QueryConfiguration.QueryConfigurationBuilder.class) +public class QueryConfiguration { + + private String queryRegex; + @Builder.Default + private boolean simpleQuery = true; + @Builder.Default + private boolean exactMatchQuery = true; + @Builder.Default + private boolean prefixMatchQuery = true; + private BoolQueryConfiguration boolQuery; + private Map functionScore; + + @JsonPOJOBuilder(withPrefix = "") + public static class QueryConfigurationBuilder { + } +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/telemetry/TelemetryConfiguration.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/telemetry/TelemetryConfiguration.java similarity index 92% rename from metadata-io/src/main/java/com/linkedin/metadata/telemetry/TelemetryConfiguration.java rename to metadata-service/configuration/src/main/java/com/linkedin/metadata/config/telemetry/TelemetryConfiguration.java index 16fdcdb6f3fbb..3821cbbed83e8 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/telemetry/TelemetryConfiguration.java +++ b/metadata-service/configuration/src/main/java/com/linkedin/metadata/config/telemetry/TelemetryConfiguration.java @@ -1,4 +1,4 @@ -package com.linkedin.metadata.telemetry; +package com.linkedin.metadata.config.telemetry; import lombok.Data; /** diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/BatchWriteOperationsOptions.java b/metadata-service/configuration/src/main/java/com/linkedin/metadata/timeseries/BatchWriteOperationsOptions.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/timeseries/BatchWriteOperationsOptions.java rename to metadata-service/configuration/src/main/java/com/linkedin/metadata/timeseries/BatchWriteOperationsOptions.java diff --git a/metadata-service/factories/src/main/resources/application.yml b/metadata-service/configuration/src/main/resources/application.yml similarity index 100% rename from metadata-service/factories/src/main/resources/application.yml rename to metadata-service/configuration/src/main/resources/application.yml diff --git a/metadata-service/factories/build.gradle b/metadata-service/factories/build.gradle index e416580053120..796b6ee436b78 100644 --- a/metadata-service/factories/build.gradle +++ b/metadata-service/factories/build.gradle @@ -7,6 +7,7 @@ dependencies { compile project(':metadata-service:auth-impl') compile project(':metadata-service:auth-config') compile project(':metadata-service:plugin') + compile project(':metadata-service:configuration') compile project(':datahub-graphql-core') compile project(':metadata-service:restli-servlet-impl') compile project(':metadata-dao-impl:kafka-producer') diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/DataHubTokenServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/DataHubTokenServiceFactory.java index 20f2e3829f8c1..fc010a1aa2cae 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/DataHubTokenServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/auth/DataHubTokenServiceFactory.java @@ -36,7 +36,7 @@ public class DataHubTokenServiceFactory { */ @Autowired @Qualifier("entityService") - private EntityService entityService; + private EntityService _entityService; @Bean(name = "dataHubTokenService") @Scope("singleton") @@ -46,7 +46,7 @@ protected StatefulTokenService getInstance() { this.signingKey, this.signingAlgorithm, this.issuer, - this.entityService, + this._entityService, this.saltingKey ); } diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/config/ConfigurationProvider.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/config/ConfigurationProvider.java index ef4af64ea0493..e07630111a567 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/config/ConfigurationProvider.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/config/ConfigurationProvider.java @@ -2,18 +2,18 @@ import com.datahub.authentication.AuthenticationConfiguration; import com.datahub.authorization.AuthorizationConfiguration; -import com.linkedin.datahub.graphql.featureflags.FeatureFlags; -import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; -import com.linkedin.metadata.config.cache.CacheConfiguration; import com.linkedin.metadata.config.DataHubConfiguration; -import com.linkedin.metadata.config.search.ElasticSearchConfiguration; import com.linkedin.metadata.config.IngestionConfiguration; import com.linkedin.metadata.config.SystemUpdateConfiguration; import com.linkedin.metadata.config.TestsConfiguration; import com.linkedin.metadata.config.ViewsConfiguration; import com.linkedin.metadata.config.VisualConfiguration; +import com.linkedin.metadata.config.cache.CacheConfiguration; import com.linkedin.metadata.config.kafka.KafkaConfiguration; -import com.linkedin.metadata.telemetry.TelemetryConfiguration; +import com.linkedin.metadata.config.search.ElasticSearchConfiguration; +import com.linkedin.datahub.graphql.featureflags.FeatureFlags; +import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; +import com.linkedin.metadata.config.telemetry.TelemetryConfiguration; import lombok.Data; import org.springframework.boot.context.properties.ConfigurationProperties; import org.springframework.context.annotation.Configuration; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/EntityServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/EntityServiceFactory.java index 6bb86b01604c1..b8edd6918dc6c 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/EntityServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/entity/EntityServiceFactory.java @@ -7,6 +7,7 @@ import com.linkedin.metadata.dao.producer.KafkaHealthChecker; import com.linkedin.metadata.entity.AspectDao; import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.entity.EntityServiceImpl; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.service.UpdateIndicesService; import com.linkedin.mxe.TopicConvention; @@ -38,7 +39,7 @@ protected EntityService createInstance( final KafkaEventProducer eventProducer = new KafkaEventProducer(producer, convention, kafkaHealthChecker); FeatureFlags featureFlags = configurationProvider.getFeatureFlags(); - return new EntityService(aspectDao, eventProducer, entityRegistry, + return new EntityServiceImpl(aspectDao, eventProducer, entityRegistry, featureFlags.isAlwaysEmitChangeLog(), updateIndicesService, featureFlags.getPreProcessHooks()); } } diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/DataHubKafkaProducerFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/DataHubKafkaProducerFactory.java index 251d674909d9f..e58661b357e6a 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/DataHubKafkaProducerFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/DataHubKafkaProducerFactory.java @@ -1,11 +1,11 @@ package com.linkedin.gms.factory.kafka; +import com.linkedin.metadata.config.kafka.KafkaConfiguration; import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.gms.factory.kafka.schemaregistry.AwsGlueSchemaRegistryFactory; import com.linkedin.gms.factory.kafka.schemaregistry.InternalSchemaRegistryFactory; import com.linkedin.gms.factory.kafka.schemaregistry.KafkaSchemaRegistryFactory; import com.linkedin.gms.factory.kafka.schemaregistry.SchemaRegistryConfig; -import com.linkedin.metadata.config.kafka.KafkaConfiguration; import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; import java.util.Arrays; import java.util.Map; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/KafkaEventConsumerFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/KafkaEventConsumerFactory.java index 5362ad0b11648..ba18be6834d14 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/KafkaEventConsumerFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/KafkaEventConsumerFactory.java @@ -1,11 +1,11 @@ package com.linkedin.gms.factory.kafka; +import com.linkedin.metadata.config.kafka.KafkaConfiguration; import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.gms.factory.kafka.schemaregistry.AwsGlueSchemaRegistryFactory; import com.linkedin.gms.factory.kafka.schemaregistry.InternalSchemaRegistryFactory; import com.linkedin.gms.factory.kafka.schemaregistry.KafkaSchemaRegistryFactory; import com.linkedin.gms.factory.kafka.schemaregistry.SchemaRegistryConfig; -import com.linkedin.metadata.config.kafka.KafkaConfiguration; import java.time.Duration; import java.util.Arrays; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/SimpleKafkaConsumerFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/SimpleKafkaConsumerFactory.java index 5c402548b7cbd..05ebfdddf8b80 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/SimpleKafkaConsumerFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/SimpleKafkaConsumerFactory.java @@ -1,7 +1,7 @@ package com.linkedin.gms.factory.kafka; -import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.metadata.config.kafka.KafkaConfiguration; +import com.linkedin.gms.factory.config.ConfigurationProvider; import java.time.Duration; import java.util.Arrays; import lombok.extern.slf4j.Slf4j; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/DUHESchemaRegistryFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/DUHESchemaRegistryFactory.java index e282cc3861b41..aeef166a077c7 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/DUHESchemaRegistryFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/DUHESchemaRegistryFactory.java @@ -1,9 +1,9 @@ package com.linkedin.gms.factory.kafka.schemaregistry; +import com.linkedin.metadata.config.kafka.KafkaConfiguration; import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.metadata.boot.kafka.MockDUHEDeserializer; import com.linkedin.metadata.boot.kafka.MockDUHESerializer; -import com.linkedin.metadata.config.kafka.KafkaConfiguration; import io.confluent.kafka.serializers.AbstractKafkaSchemaSerDeConfig; import lombok.extern.slf4j.Slf4j; import org.springframework.beans.factory.annotation.Value; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/InternalSchemaRegistryFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/InternalSchemaRegistryFactory.java index b103b84307611..217dc15bbc3e8 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/InternalSchemaRegistryFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/kafka/schemaregistry/InternalSchemaRegistryFactory.java @@ -1,10 +1,10 @@ package com.linkedin.gms.factory.kafka.schemaregistry; +import com.linkedin.metadata.config.kafka.KafkaConfiguration; import com.linkedin.gms.factory.common.TopicConventionFactory; import com.linkedin.gms.factory.config.ConfigurationProvider; -import com.linkedin.metadata.config.kafka.KafkaConfiguration; -import com.linkedin.metadata.schema.registry.SchemaRegistryService; -import com.linkedin.metadata.schema.registry.SchemaRegistryServiceImpl; +import com.linkedin.metadata.registry.SchemaRegistryService; +import com.linkedin.metadata.registry.SchemaRegistryServiceImpl; import com.linkedin.mxe.TopicConvention; import io.confluent.kafka.serializers.AbstractKafkaSchemaSerDeConfig; import io.confluent.kafka.serializers.KafkaAvroDeserializer; diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/recommendation/candidatesource/RecentlyEditedCandidateSourceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/recommendation/candidatesource/RecentlyEditedCandidateSourceFactory.java index 836824391bcf7..58584a4d957de 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/recommendation/candidatesource/RecentlyEditedCandidateSourceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/recommendation/candidatesource/RecentlyEditedCandidateSourceFactory.java @@ -28,11 +28,11 @@ public class RecentlyEditedCandidateSourceFactory { @Autowired @Qualifier("entityService") - private EntityService entityService; + private EntityService _entityService; @Bean(name = "recentlyEditedCandidateSource") @Nonnull protected RecentlyEditedSource getInstance() { - return new RecentlyEditedSource(searchClient, indexConvention, entityService); + return new RecentlyEditedSource(searchClient, indexConvention, _entityService); } } diff --git a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchServiceFactory.java b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchServiceFactory.java index 0aa1c1a9cfd23..03dd2d072b4a0 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchServiceFactory.java +++ b/metadata-service/factories/src/main/java/com/linkedin/gms/factory/search/ElasticSearchServiceFactory.java @@ -1,13 +1,13 @@ package com.linkedin.gms.factory.search; +import com.linkedin.metadata.config.search.ElasticSearchConfiguration; +import com.linkedin.metadata.config.search.SearchConfiguration; +import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.dataformat.yaml.YAMLMapper; import com.linkedin.gms.factory.config.ConfigurationProvider; import com.linkedin.gms.factory.entityregistry.EntityRegistryFactory; import com.linkedin.gms.factory.spring.YamlPropertySourceFactory; -import com.linkedin.metadata.config.search.ElasticSearchConfiguration; -import com.linkedin.metadata.config.search.SearchConfiguration; -import com.linkedin.metadata.config.search.custom.CustomSearchConfiguration; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.search.elasticsearch.ElasticSearchService; import com.linkedin.metadata.search.elasticsearch.indexbuilder.EntityIndexBuilders; diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/BackfillBrowsePathsV2Step.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/BackfillBrowsePathsV2Step.java index 627c5934140a8..ea9ac57778550 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/BackfillBrowsePathsV2Step.java +++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/BackfillBrowsePathsV2Step.java @@ -26,6 +26,9 @@ import javax.annotation.Nonnull; import java.util.Set; +import static com.linkedin.metadata.Constants.*; + + @Slf4j public class BackfillBrowsePathsV2Step extends UpgradeStep { @@ -133,7 +136,7 @@ private void ingestBrowsePathsV2(Urn urn, AuditStamp auditStamp) throws Exceptio proposal.setEntityType(urn.getEntityType()); proposal.setAspectName(Constants.BROWSE_PATHS_V2_ASPECT_NAME); proposal.setChangeType(ChangeType.UPSERT); - proposal.setSystemMetadata(new SystemMetadata().setRunId(EntityService.DEFAULT_RUN_ID).setLastObserved(System.currentTimeMillis())); + proposal.setSystemMetadata(new SystemMetadata().setRunId(DEFAULT_RUN_ID).setLastObserved(System.currentTimeMillis())); proposal.setAspect(GenericRecordUtils.serializeAspect(browsePathsV2)); _entityService.ingestProposal( proposal, diff --git a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/UpgradeDefaultBrowsePathsStep.java b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/UpgradeDefaultBrowsePathsStep.java index b990400b38491..7fcafa24d7b45 100644 --- a/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/UpgradeDefaultBrowsePathsStep.java +++ b/metadata-service/factories/src/main/java/com/linkedin/metadata/boot/steps/UpgradeDefaultBrowsePathsStep.java @@ -19,6 +19,8 @@ import javax.annotation.Nonnull; import lombok.extern.slf4j.Slf4j; +import static com.linkedin.metadata.Constants.*; + /** * This is an opt-in optional upgrade step to migrate your browse paths to the new truncated form. @@ -124,7 +126,7 @@ private void migrateBrowsePath(Urn urn, AuditStamp auditStamp) throws Exception proposal.setEntityType(urn.getEntityType()); proposal.setAspectName(Constants.BROWSE_PATHS_ASPECT_NAME); proposal.setChangeType(ChangeType.UPSERT); - proposal.setSystemMetadata(new SystemMetadata().setRunId(EntityService.DEFAULT_RUN_ID).setLastObserved(System.currentTimeMillis())); + proposal.setSystemMetadata(new SystemMetadata().setRunId(DEFAULT_RUN_ID).setLastObserved(System.currentTimeMillis())); proposal.setAspect(GenericRecordUtils.serializeAspect(newPaths)); _entityService.ingestProposal( proposal, diff --git a/metadata-service/factories/src/test/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStepTest.java b/metadata-service/factories/src/test/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStepTest.java index fa351d7539e02..82f3a82c135ce 100644 --- a/metadata-service/factories/src/test/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStepTest.java +++ b/metadata-service/factories/src/test/java/com/linkedin/metadata/boot/steps/IngestDataPlatformInstancesStepTest.java @@ -127,8 +127,10 @@ private void mockDBWithWorkToDo( AspectMigrationsDao migrationsDao, int countOfCorpUserEntities, int countOfChartEntities) { - List corpUserUrns = insertMockEntities(countOfCorpUserEntities, "corpuser", "urn:li:corpuser:test%d", entityRegistry, entityService); - List charUrns = insertMockEntities(countOfChartEntities, "chart", "urn:li:chart:(looker,test%d)", entityRegistry, entityService); + List corpUserUrns = insertMockEntities(countOfCorpUserEntities, "corpuser", "urn:li:corpuser:test%d", entityRegistry, + entityService); + List charUrns = insertMockEntities(countOfChartEntities, "chart", "urn:li:chart:(looker,test%d)", entityRegistry, + entityService); List allUrnsInDB = Stream.concat(corpUserUrns.stream(), charUrns.stream()).map(Urn::toString).collect(Collectors.toList()); when(migrationsDao.checkIfAspectExists(DATA_PLATFORM_INSTANCE_ASPECT_NAME)).thenReturn(false); when(migrationsDao.countEntities()).thenReturn((long) allUrnsInDB.size()); diff --git a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java index 7ce41924fe92d..52842d2f32b2a 100644 --- a/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java +++ b/metadata-service/openapi-servlet/src/main/java/io/datahubproject/openapi/util/MappingUtil.java @@ -19,6 +19,7 @@ import com.linkedin.entity.Aspect; import com.linkedin.events.metadata.ChangeType; import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.entity.IngestProposalResult; import com.linkedin.metadata.entity.RollbackRunResult; import com.linkedin.metadata.entity.validation.ValidationException; import com.linkedin.metadata.models.EntitySpec; @@ -265,7 +266,7 @@ public static Pair ingestProposal(com.linkedin.mxe.MetadataChan log.info("Proposal: {}", serviceProposal); Throwable exceptionally = null; try { - EntityService.IngestProposalResult proposalResult = entityService.ingestProposal(serviceProposal, auditStamp, false); + IngestProposalResult proposalResult = entityService.ingestProposal(serviceProposal, auditStamp, false); Urn urn = proposalResult.getUrn(); additionalChanges.forEach(proposal -> entityService.ingestProposal(proposal, auditStamp, false)); return new Pair<>(urn.toString(), proposalResult.isDidUpdate()); diff --git a/metadata-service/openapi-servlet/src/test/java/entities/EntitiesControllerTest.java b/metadata-service/openapi-servlet/src/test/java/entities/EntitiesControllerTest.java index 2dff636de2f27..80cc80067fba2 100644 --- a/metadata-service/openapi-servlet/src/test/java/entities/EntitiesControllerTest.java +++ b/metadata-service/openapi-servlet/src/test/java/entities/EntitiesControllerTest.java @@ -6,8 +6,8 @@ import com.datahub.authentication.AuthenticationContext; import com.datahub.authorization.AuthorizationResult; import com.datahub.authorization.AuthorizerChain; -import com.fasterxml.jackson.databind.ObjectMapper; import com.linkedin.metadata.config.PreProcessHooks; +import com.fasterxml.jackson.databind.ObjectMapper; import com.linkedin.metadata.entity.AspectDao; import com.linkedin.metadata.event.EventProducer; import com.linkedin.metadata.models.registry.EntityRegistry; diff --git a/metadata-service/openapi-servlet/src/test/java/mock/MockEntityService.java b/metadata-service/openapi-servlet/src/test/java/mock/MockEntityService.java index 9838b00abd191..cf7985a7c26a9 100644 --- a/metadata-service/openapi-servlet/src/test/java/mock/MockEntityService.java +++ b/metadata-service/openapi-servlet/src/test/java/mock/MockEntityService.java @@ -1,5 +1,6 @@ package mock; +import com.linkedin.metadata.config.PreProcessHooks; import com.linkedin.common.AuditStamp; import com.linkedin.common.GlobalTags; import com.linkedin.common.GlossaryTermAssociation; @@ -19,11 +20,11 @@ import com.linkedin.entity.AspectType; import com.linkedin.entity.EnvelopedAspect; import com.linkedin.metadata.aspect.VersionedAspect; -import com.linkedin.metadata.config.PreProcessHooks; import com.linkedin.metadata.entity.AspectDao; -import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.entity.EntityServiceImpl; import com.linkedin.metadata.entity.ListResult; import com.linkedin.metadata.entity.RollbackRunResult; +import com.linkedin.metadata.entity.UpdateAspectResult; import com.linkedin.metadata.event.EventProducer; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.registry.EntityRegistry; @@ -56,7 +57,7 @@ import static entities.EntitiesControllerTest.*; -public class MockEntityService extends EntityService { +public class MockEntityService extends EntityServiceImpl { public MockEntityService(@Nonnull AspectDao aspectDao, @Nonnull EventProducer producer, @Nonnull EntityRegistry entityRegistry, @Nonnull UpdateIndicesService updateIndicesService, PreProcessHooks preProcessHooks) { super(aspectDao, producer, entityRegistry, true, updateIndicesService, preProcessHooks); diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java index 0004c82b43a6f..5c06dbfc8b25a 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/AspectResource.java @@ -16,6 +16,7 @@ import com.linkedin.metadata.authorization.PoliciesConfig; import com.linkedin.metadata.entity.AspectUtils; import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.entity.IngestProposalResult; import com.linkedin.metadata.entity.validation.ValidationException; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.query.filter.Filter; @@ -199,7 +200,7 @@ public Task ingestProposal( return RestliUtil.toTask(() -> { log.debug("Proposal: {}", metadataChangeProposal); try { - EntityService.IngestProposalResult result = _entityService.ingestProposal(metadataChangeProposal, auditStamp, asyncBool); + IngestProposalResult result = _entityService.ingestProposal(metadataChangeProposal, auditStamp, asyncBool); Urn responseUrn = result.getUrn(); if (!asyncBool) { diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/BatchIngestionRunResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/BatchIngestionRunResource.java index 674609c7f67df..3ff22fb767676 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/BatchIngestionRunResource.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/BatchIngestionRunResource.java @@ -110,7 +110,7 @@ public Task rollback(@ActionParam("runId") @Nonnull String run } try { return RestliUtil.toTask(() -> { - if (runId.equals(EntityService.DEFAULT_RUN_ID)) { + if (runId.equals(DEFAULT_RUN_ID)) { throw new IllegalArgumentException(String.format( "%s is a default run-id provided for non labeled ingestion runs. You cannot delete using this reserved run-id", runId)); diff --git a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java index 3e90f4dff2ccf..b8fd785eaad0f 100644 --- a/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java +++ b/metadata-service/restli-servlet-impl/src/main/java/com/linkedin/metadata/resources/entity/EntityResource.java @@ -428,7 +428,8 @@ public Task scrollAcrossLineage(@ActionParam(PARAM_URN) @No final SearchFlags finalFlags = searchFlags != null ? searchFlags : new SearchFlags().setSkipCache(true); return RestliUtil.toTask(() -> validateLineageScrollResult( _lineageSearchService.scrollAcrossLineage(urn, LineageDirection.valueOf(direction), entityList, input, maxHops, - filter, sortCriterion, scrollId, keepAlive, count, startTimeMillis, endTimeMillis, finalFlags), _entityService), + filter, sortCriterion, scrollId, keepAlive, count, startTimeMillis, endTimeMillis, finalFlags), + _entityService), "scrollAcrossLineage"); } diff --git a/metadata-service/restli-servlet-impl/src/test/java/com/linkedin/metadata/resources/entity/AspectResourceTest.java b/metadata-service/restli-servlet-impl/src/test/java/com/linkedin/metadata/resources/entity/AspectResourceTest.java index 0d3642b91758a..c6d36a6e29f10 100644 --- a/metadata-service/restli-servlet-impl/src/test/java/com/linkedin/metadata/resources/entity/AspectResourceTest.java +++ b/metadata-service/restli-servlet-impl/src/test/java/com/linkedin/metadata/resources/entity/AspectResourceTest.java @@ -14,6 +14,8 @@ import com.linkedin.metadata.config.PreProcessHooks; import com.linkedin.metadata.entity.AspectDao; import com.linkedin.metadata.entity.EntityService; +import com.linkedin.metadata.entity.EntityServiceImpl; +import com.linkedin.metadata.entity.UpdateAspectResult; import com.linkedin.metadata.event.EventProducer; import com.linkedin.metadata.models.AspectSpec; import com.linkedin.metadata.models.registry.EntityRegistry; @@ -48,7 +50,7 @@ public void setup() { _entityRegistry = new MockEntityRegistry(); _updateIndicesService = mock(UpdateIndicesService.class); _preProcessHooks = mock(PreProcessHooks.class); - _entityService = new EntityService(_aspectDao, _producer, _entityRegistry, false, _updateIndicesService, _preProcessHooks); + _entityService = new EntityServiceImpl(_aspectDao, _producer, _entityRegistry, false, _updateIndicesService, _preProcessHooks); _authorizer = mock(Authorizer.class); _aspectResource.setAuthorizer(_authorizer); _aspectResource.setEntityService(_entityService); @@ -77,7 +79,7 @@ public void testAsyncDefaultAspects() throws URISyntaxException { reset(_producer, _aspectDao); when(_aspectDao.runInTransactionWithRetry(any(), anyInt())) - .thenReturn(new EntityService.UpdateAspectResult(urn, null, properties, null, null, null, null, 0)); + .thenReturn(new UpdateAspectResult(urn, null, properties, null, null, null, null, 0)); _aspectResource.ingestProposal(mcp, "false"); verify(_producer, times(5)).produceMetadataChangeLog(eq(urn), any(AspectSpec.class), any(MetadataChangeLog.class)); verifyNoMoreInteractions(_producer); diff --git a/metadata-service/schema-registry-servlet/src/main/java/io/datahubproject/openapi/schema/registry/SchemaRegistryController.java b/metadata-service/schema-registry-servlet/src/main/java/io/datahubproject/openapi/schema/registry/SchemaRegistryController.java index 3c90336a142ee..0cf57361e58f8 100644 --- a/metadata-service/schema-registry-servlet/src/main/java/io/datahubproject/openapi/schema/registry/SchemaRegistryController.java +++ b/metadata-service/schema-registry-servlet/src/main/java/io/datahubproject/openapi/schema/registry/SchemaRegistryController.java @@ -2,7 +2,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.linkedin.gms.factory.kafka.schemaregistry.InternalSchemaRegistryFactory; -import com.linkedin.metadata.schema.registry.SchemaRegistryService; +import com.linkedin.metadata.registry.SchemaRegistryService; import io.datahubproject.schema_registry.openapi.generated.CompatibilityCheckResponse; import io.datahubproject.schema_registry.openapi.generated.Config; import io.datahubproject.schema_registry.openapi.generated.ConfigUpdateRequest; diff --git a/metadata-service/services/README.md b/metadata-service/services/README.md new file mode 100644 index 0000000000000..510f8756a7f5e --- /dev/null +++ b/metadata-service/services/README.md @@ -0,0 +1,5 @@ +# Service Layer + +Module to abstract away business logic from implementation specific libraries to make them lighter weight from a +dependency perspective. Service classes should be here unless they require direct usage of implementation specific libraries +(i.e. ElasticSearch, Ebean, Neo4J, etc.). \ No newline at end of file diff --git a/metadata-service/services/build.gradle b/metadata-service/services/build.gradle new file mode 100644 index 0000000000000..adc7b7bf09d99 --- /dev/null +++ b/metadata-service/services/build.gradle @@ -0,0 +1,73 @@ +apply plugin: 'java' +apply plugin: 'org.hidetake.swagger.generator' + +configurations { + enhance +} + +dependencies { + implementation externalDependency.jsonPatch + compile project(':entity-registry') + compile project(':metadata-utils') + compile project(':metadata-events:mxe-avro-1.7') + compile project(':metadata-events:mxe-registration') + compile project(':metadata-events:mxe-utils-avro-1.7') + compile project(':metadata-models') + compile project(':metadata-service:restli-client') + compile project(':metadata-service:configuration') + + implementation externalDependency.slf4jApi + implementation externalDependency.swaggerAnnotations + runtime externalDependency.logbackClassic + compileOnly externalDependency.lombok + implementation externalDependency.commonsCollections + compile externalDependency.javatuples + compile externalDependency.javaxValidation + compile externalDependency.opentelemetryAnnotations + + annotationProcessor externalDependency.lombok + + testCompile externalDependency.testng + testCompile externalDependency.junit + testCompile externalDependency.mockito + testCompile externalDependency.mockitoInline + testCompileOnly externalDependency.lombok + testCompile project(':test-models') + testImplementation project(':datahub-graphql-core') + // logback >=1.3 required due to `testcontainers` only + testImplementation 'ch.qos.logback:logback-classic:1.4.7' + + testAnnotationProcessor externalDependency.lombok + + constraints { + implementation(externalDependency.log4jCore) { + because("previous versions are vulnerable to CVE-2021-45105") + } + implementation(externalDependency.log4jApi) { + because("previous versions are vulnerable to CVE-2021-45105") + } + implementation(externalDependency.commonsText) { + because("previous versions are vulnerable to CVE-2022-42889") + } + implementation(externalDependency.snakeYaml) { + because("previous versions are vulnerable to CVE-2022-25857") + } + implementation(externalDependency.woodstoxCore) { + because("previous versions are vulnerable to CVE-2022-40151-2") + } + implementation(externalDependency.jettison) { + because("previous versions are vulnerable") + } + } +} + +test { + // https://docs.gradle.org/current/userguide/performance.html + maxParallelForks = Runtime.runtime.availableProcessors().intdiv(2) ?: 1 + testLogging.showStandardStreams = true + testLogging.exceptionFormat = 'full' +} + +tasks.withType(Test) { + enableAssertions = false +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/datahubusage/DataHubUsageEventConstants.java b/metadata-service/services/src/main/java/com/linkedin/metadata/datahubusage/DataHubUsageEventConstants.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/datahubusage/DataHubUsageEventConstants.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/datahubusage/DataHubUsageEventConstants.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/datahubusage/DataHubUsageEventType.java b/metadata-service/services/src/main/java/com/linkedin/metadata/datahubusage/DataHubUsageEventType.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/datahubusage/DataHubUsageEventType.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/datahubusage/DataHubUsageEventType.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/AspectUtils.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/AspectUtils.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/entity/AspectUtils.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/entity/AspectUtils.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/DeleteEntityService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/DeleteEntityService.java similarity index 99% rename from metadata-io/src/main/java/com/linkedin/metadata/entity/DeleteEntityService.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/entity/DeleteEntityService.java index bf077dfe0eb21..35b7bc4589b32 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/DeleteEntityService.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/DeleteEntityService.java @@ -270,7 +270,7 @@ private void updateAspect(Urn urn, String aspectName, RecordTemplate prevAspect, proposal.setAspect(GenericRecordUtils.serializeAspect(newAspect)); final AuditStamp auditStamp = new AuditStamp().setActor(UrnUtils.getUrn(Constants.SYSTEM_ACTOR)).setTime(System.currentTimeMillis()); - final EntityService.IngestProposalResult ingestProposalResult = _entityService.ingestProposal(proposal, auditStamp, false); + final IngestProposalResult ingestProposalResult = _entityService.ingestProposal(proposal, auditStamp, false); if (!ingestProposalResult.isDidUpdate()) { log.error("Failed to ingest aspect with references removed. Before {}, after: {}, please check MCP processor" diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/DeleteEntityUtils.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/DeleteEntityUtils.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/entity/DeleteEntityUtils.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/entity/DeleteEntityUtils.java diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java new file mode 100644 index 0000000000000..25edff740037e --- /dev/null +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/EntityService.java @@ -0,0 +1,311 @@ +package com.linkedin.metadata.entity; + +import com.linkedin.common.AuditStamp; +import com.linkedin.common.BrowsePaths; +import com.linkedin.common.BrowsePathsV2; +import com.linkedin.common.VersionedUrn; +import com.linkedin.common.urn.Urn; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.entity.Entity; +import com.linkedin.entity.EntityResponse; +import com.linkedin.entity.EnvelopedAspect; +import com.linkedin.events.metadata.ChangeType; +import com.linkedin.metadata.aspect.VersionedAspect; +import com.linkedin.metadata.entity.restoreindices.RestoreIndicesArgs; +import com.linkedin.metadata.entity.restoreindices.RestoreIndicesResult; +import com.linkedin.metadata.models.AspectSpec; +import com.linkedin.metadata.models.registry.EntityRegistry; +import com.linkedin.metadata.query.ListUrnsResult; +import com.linkedin.metadata.run.AspectRowSummary; +import com.linkedin.metadata.snapshot.Snapshot; +import com.linkedin.mxe.MetadataAuditOperation; +import com.linkedin.mxe.MetadataChangeLog; +import com.linkedin.mxe.MetadataChangeProposal; +import com.linkedin.mxe.SystemMetadata; +import com.linkedin.util.Pair; +import java.net.URISyntaxException; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.function.Consumer; +import javax.annotation.Nonnull; +import javax.annotation.Nullable; + + +public interface EntityService { + + /** + * Retrieves the latest aspects corresponding to a batch of {@link Urn}s based on a provided + * set of aspect names. + * + * @param urns set of urns to fetch aspects for + * @param aspectNames aspects to fetch for each urn in urns set + * @return a map of provided {@link Urn} to a List containing the requested aspects. + */ + Map> getLatestAspects( + @Nonnull final Set urns, + @Nonnull final Set aspectNames); + + Map getLatestAspectsForUrn(@Nonnull final Urn urn, @Nonnull final Set aspectNames); + + /** + * Retrieves an aspect having a specific {@link Urn}, name, & version. + * + * Note that once we drop support for legacy aspect-specific resources, + * we should make this a protected method. Only visible for backwards compatibility. + * + * @param urn an urn associated with the requested aspect + * @param aspectName name of the aspect requested + * @param version specific version of the aspect being requests + * @return the {@link RecordTemplate} representation of the requested aspect object, or null if one cannot be found + */ + RecordTemplate getAspect(@Nonnull final Urn urn, @Nonnull final String aspectName, @Nonnull long version); + + /** + * Retrieves the latest aspects for the given urn as dynamic aspect objects + * (Without having to define union objects) + * + * @param entityName name of the entity to fetch + * @param urn urn of entity to fetch + * @param aspectNames set of aspects to fetch + * @return a map of {@link Urn} to {@link Entity} object + */ + EntityResponse getEntityV2( + @Nonnull final String entityName, + @Nonnull final Urn urn, + @Nonnull final Set aspectNames) throws URISyntaxException; + + /** + * Retrieves the latest aspects for the given set of urns as dynamic aspect objects + * (Without having to define union objects) + * + * @param entityName name of the entity to fetch + * @param urns set of urns to fetch + * @param aspectNames set of aspects to fetch + * @return a map of {@link Urn} to {@link Entity} object + */ + Map getEntitiesV2( + @Nonnull final String entityName, + @Nonnull final Set urns, + @Nonnull final Set aspectNames) throws URISyntaxException; + + /** + * Retrieves the aspects for the given set of urns and versions as dynamic aspect objects + * (Without having to define union objects) + * + * @param versionedUrns set of urns to fetch with versions of aspects specified in a specialized string + * @param aspectNames set of aspects to fetch + * @return a map of {@link Urn} to {@link Entity} object + */ + Map getEntitiesVersionedV2( + @Nonnull final Set versionedUrns, + @Nonnull final Set aspectNames) throws URISyntaxException; + + /** + * Retrieves the latest aspects for the given set of urns as a list of enveloped aspects + * + * @param entityName name of the entity to fetch + * @param urns set of urns to fetch + * @param aspectNames set of aspects to fetch + * @return a map of {@link Urn} to {@link EnvelopedAspect} object + */ + Map> getLatestEnvelopedAspects( + // TODO: entityName is unused, can we remove this as a param? + @Nonnull String entityName, + @Nonnull Set urns, + @Nonnull Set aspectNames) throws URISyntaxException; + + /** + * Retrieves the latest aspects for the given set of urns as a list of enveloped aspects + * + * @param versionedUrns set of urns to fetch with versions of aspects specified in a specialized string + * @param aspectNames set of aspects to fetch + * @return a map of {@link Urn} to {@link EnvelopedAspect} object + */ + Map> getVersionedEnvelopedAspects( + @Nonnull Set versionedUrns, + @Nonnull Set aspectNames) throws URISyntaxException; + + /** + * Retrieves the latest aspect for the given urn as a list of enveloped aspects + * + * @param entityName name of the entity to fetch + * @param urn urn to fetch + * @param aspectName name of the aspect to fetch + * @return {@link EnvelopedAspect} object, or null if one cannot be found + */ + EnvelopedAspect getLatestEnvelopedAspect( + @Nonnull final String entityName, + @Nonnull final Urn urn, + @Nonnull final String aspectName) throws Exception; + + /** + * Retrieves the specific version of the aspect for the given urn + * + * @param entityName name of the entity to fetch + * @param urn urn to fetch + * @param aspectName name of the aspect to fetch + * @param version version to fetch + * @return {@link EnvelopedAspect} object, or null if one cannot be found + */ + EnvelopedAspect getEnvelopedAspect( + // TODO: entityName is only used for a debug statement, can we remove this as a param? + String entityName, + @Nonnull Urn urn, + @Nonnull String aspectName, + long version) throws Exception; + + @Deprecated + VersionedAspect getVersionedAspect(@Nonnull Urn urn, @Nonnull String aspectName, long version); + + ListResult listLatestAspects( + @Nonnull final String entityName, + @Nonnull final String aspectName, + final int start, + final int count); + + void ingestAspects(@Nonnull final Urn urn, @Nonnull List> aspectRecordsToIngest, + @Nonnull final AuditStamp auditStamp, @Nullable SystemMetadata systemMetadata); + + /** + * Ingests (inserts) a new version of an entity aspect & emits a {@link com.linkedin.mxe.MetadataAuditEvent}. + * + * Note that in general, this should not be used externally. It is currently serving upgrade scripts and + * is as such public. + * + * @param urn an urn associated with the new aspect + * @param aspectName name of the aspect being inserted + * @param newValue value of the aspect being inserted + * @param auditStamp an {@link AuditStamp} containing metadata about the writer & current time + * @param systemMetadata + * @return the {@link RecordTemplate} representation of the written aspect object + */ + RecordTemplate ingestAspect(@Nonnull final Urn urn, @Nonnull final String aspectName, + @Nonnull final RecordTemplate newValue, @Nonnull final AuditStamp auditStamp, @Nullable SystemMetadata systemMetadata); + + /** + * Ingests (inserts) a new version of an entity aspect & emits a {@link com.linkedin.mxe.MetadataAuditEvent}. + * + * This method runs a read -> write atomically in a single transaction, this is to prevent multiple IDs from being created. + * + * Note that in general, this should not be used externally. It is currently serving upgrade scripts and + * is as such public. + * + * @param urn an urn associated with the new aspect + * @param aspectName name of the aspect being inserted + * @param newValue value of the aspect being inserted + * @param auditStamp an {@link AuditStamp} containing metadata about the writer & current time + * @param systemMetadata + * @return the {@link RecordTemplate} representation of the written aspect object + */ + RecordTemplate ingestAspectIfNotPresent(@Nonnull Urn urn, @Nonnull String aspectName, + @Nonnull RecordTemplate newValue, @Nonnull AuditStamp auditStamp, @Nullable SystemMetadata systemMetadata); + + // TODO: Why not in RetentionService? + String batchApplyRetention(Integer start, Integer count, Integer attemptWithVersion, String aspectName, + String urn); + + Integer getCountAspect(@Nonnull String aspectName, @Nullable String urnLike); + + // TODO: Extract this to a different service, doesn't need to be here + RestoreIndicesResult restoreIndices(@Nonnull RestoreIndicesArgs args, @Nonnull Consumer logger); + + @Deprecated + RecordTemplate updateAspect( + @Nonnull final Urn urn, + @Nonnull final String entityName, + @Nonnull final String aspectName, + @Nonnull final AspectSpec aspectSpec, + @Nonnull final RecordTemplate newValue, + @Nonnull final AuditStamp auditStamp, + @Nonnull final long version, + @Nonnull final boolean emitMae); + + ListUrnsResult listUrns(@Nonnull final String entityName, final int start, final int count); + + @Deprecated + Entity getEntity(@Nonnull final Urn urn, @Nonnull final Set aspectNames); + + @Deprecated + Map getEntities(@Nonnull final Set urns, @Nonnull Set aspectNames); + + @Deprecated + void produceMetadataAuditEvent(@Nonnull final Urn urn, @Nonnull final String aspectName, + @Nullable final RecordTemplate oldAspectValue, @Nullable final RecordTemplate newAspectValue, + @Nullable final SystemMetadata oldSystemMetadata, @Nullable final SystemMetadata newSystemMetadata, + @Nullable final MetadataAuditOperation operation); + + @Deprecated + void produceMetadataAuditEventForKey(@Nonnull final Urn urn, + @Nullable final SystemMetadata newSystemMetadata); + + void produceMetadataChangeLog(@Nonnull final Urn urn, AspectSpec aspectSpec, + @Nonnull final MetadataChangeLog metadataChangeLog); + + void produceMetadataChangeLog(@Nonnull final Urn urn, @Nonnull String entityName, @Nonnull String aspectName, + @Nonnull final AspectSpec aspectSpec, @Nullable final RecordTemplate oldAspectValue, + @Nullable final RecordTemplate newAspectValue, @Nullable final SystemMetadata oldSystemMetadata, + @Nullable final SystemMetadata newSystemMetadata, @Nonnull AuditStamp auditStamp, @Nonnull final ChangeType changeType); + + RecordTemplate getLatestAspect(@Nonnull final Urn urn, @Nonnull final String aspectName); + + @Deprecated + void ingestEntities(@Nonnull final List entities, @Nonnull final AuditStamp auditStamp, + @Nonnull final List systemMetadata); + + @Deprecated + void ingestEntity(Entity entity, AuditStamp auditStamp); + + @Deprecated + void ingestEntity(@Nonnull Entity entity, @Nonnull AuditStamp auditStamp, + @Nonnull SystemMetadata systemMetadata); + + @Deprecated + Snapshot buildSnapshot(@Nonnull final Urn urn, @Nonnull final RecordTemplate aspectValue); + + void setRetentionService(RetentionService retentionService); + + AspectSpec getKeyAspectSpec(@Nonnull final Urn urn); + + Optional getAspectSpec(@Nonnull final String entityName, @Nonnull final String aspectName); + + String getKeyAspectName(@Nonnull final Urn urn); + + List> generateDefaultAspectsIfMissing(@Nonnull final Urn urn, + Set includedAspects); + + AspectSpec getKeyAspectSpec(@Nonnull final String entityName); + + Set getEntityAspectNames(final String entityName); + + EntityRegistry getEntityRegistry(); + + RollbackResult deleteAspect(String urn, String aspectName, @Nonnull Map conditions, boolean hardDelete); + + RollbackRunResult deleteUrn(Urn urn); + + RollbackRunResult rollbackRun(List aspectRows, String runId, boolean hardDelete); + + RollbackRunResult rollbackWithConditions(List aspectRows, Map conditions, boolean hardDelete); + + IngestProposalResult ingestProposal(@Nonnull MetadataChangeProposal mcp, + AuditStamp auditStamp, final boolean async); + + Boolean exists(Urn urn); + + Boolean isSoftDeleted(@Nonnull final Urn urn); + + void setWritable(boolean canWrite); + + BrowsePaths buildDefaultBrowsePath(final @Nonnull Urn urn) throws URISyntaxException; + + /** + * Builds the default browse path V2 aspects for all entities. + * + * This method currently supports datasets, charts, dashboards, and data jobs best. Everything else + * will have a basic "Default" folder added to their browsePathV2. + */ + @Nonnull + BrowsePathsV2 buildDefaultBrowsePathV2(final @Nonnull Urn urn, boolean useContainerPaths) throws URISyntaxException; +} diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/IngestProposalResult.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/IngestProposalResult.java new file mode 100644 index 0000000000000..27c51e050deff --- /dev/null +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/IngestProposalResult.java @@ -0,0 +1,12 @@ +package com.linkedin.metadata.entity; + +import com.linkedin.common.urn.Urn; +import lombok.Value; + + +@Value +public class IngestProposalResult { + Urn urn; + boolean didUpdate; + boolean queued; +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/ListResult.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/ListResult.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/entity/ListResult.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/entity/ListResult.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/RetentionService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/RetentionService.java similarity index 99% rename from metadata-io/src/main/java/com/linkedin/metadata/entity/RetentionService.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/entity/RetentionService.java index e4cdb5f531b93..a27cb8076721a 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/entity/RetentionService.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/RetentionService.java @@ -30,7 +30,7 @@ /** - * Service coupled with an {@link EntityService} to handle aspect record retention. + * Service coupled with an {@link EntityServiceImpl} to handle aspect record retention. * * TODO: This class is abstract with storage-specific implementations. It'd be nice to pull storage and retention * concerns apart, let (into {@link AspectDao}) deal with storage, and merge all retention concerns into a single diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/RollbackResult.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/RollbackResult.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/entity/RollbackResult.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/entity/RollbackResult.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/RollbackRunResult.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/RollbackRunResult.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/entity/RollbackRunResult.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/entity/RollbackRunResult.java diff --git a/metadata-service/services/src/main/java/com/linkedin/metadata/entity/UpdateAspectResult.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/UpdateAspectResult.java new file mode 100644 index 0000000000000..68ecdbd87dd16 --- /dev/null +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/UpdateAspectResult.java @@ -0,0 +1,21 @@ +package com.linkedin.metadata.entity; + +import com.linkedin.common.AuditStamp; +import com.linkedin.common.urn.Urn; +import com.linkedin.data.template.RecordTemplate; +import com.linkedin.mxe.MetadataAuditOperation; +import com.linkedin.mxe.SystemMetadata; +import lombok.Value; + + +@Value +public class UpdateAspectResult { + Urn urn; + RecordTemplate oldValue; + RecordTemplate newValue; + SystemMetadata oldSystemMetadata; + SystemMetadata newSystemMetadata; + MetadataAuditOperation operation; + AuditStamp auditStamp; + long maxVersion; +} diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/restoreindices/RestoreIndicesArgs.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/restoreindices/RestoreIndicesArgs.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/entity/restoreindices/RestoreIndicesArgs.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/entity/restoreindices/RestoreIndicesArgs.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/restoreindices/RestoreIndicesResult.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/restoreindices/RestoreIndicesResult.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/entity/restoreindices/RestoreIndicesResult.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/entity/restoreindices/RestoreIndicesResult.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/retention/BulkApplyRetentionArgs.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/retention/BulkApplyRetentionArgs.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/entity/retention/BulkApplyRetentionArgs.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/entity/retention/BulkApplyRetentionArgs.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/entity/retention/BulkApplyRetentionResult.java b/metadata-service/services/src/main/java/com/linkedin/metadata/entity/retention/BulkApplyRetentionResult.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/entity/retention/BulkApplyRetentionResult.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/entity/retention/BulkApplyRetentionResult.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/Edge.java b/metadata-service/services/src/main/java/com/linkedin/metadata/graph/Edge.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/graph/Edge.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/graph/Edge.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/GraphClient.java b/metadata-service/services/src/main/java/com/linkedin/metadata/graph/GraphClient.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/graph/GraphClient.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/graph/GraphClient.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/GraphFilters.java b/metadata-service/services/src/main/java/com/linkedin/metadata/graph/GraphFilters.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/graph/GraphFilters.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/graph/GraphFilters.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/GraphIndexUtils.java b/metadata-service/services/src/main/java/com/linkedin/metadata/graph/GraphIndexUtils.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/graph/GraphIndexUtils.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/graph/GraphIndexUtils.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/GraphService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/graph/GraphService.java similarity index 93% rename from metadata-io/src/main/java/com/linkedin/metadata/graph/GraphService.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/graph/GraphService.java index a374fb480154b..6f0ac4bc2f904 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/graph/GraphService.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/graph/GraphService.java @@ -17,9 +17,6 @@ import javax.annotation.Nullable; import org.apache.commons.collections.CollectionUtils; -import static com.linkedin.metadata.search.utils.QueryUtils.*; - - public interface GraphService { /** * Return lineage registry to construct graph index @@ -80,7 +77,7 @@ public interface GraphService { * findRelatedEntities(null, EMPTY_FILTER, null, EMPTY_FILTER, ["HasOwner"], RelationshipFilter.setDirection(RelationshipDirection.INCOMING), 0, 100) * - RelatedEntity("HasOwner", "dataset one") * - * Calling this method with {@link com.linkedin.metadata.query.RelationshipDirection} `UNDIRECTED` in `relationshipFilter` + * Calling this method with {@link RelationshipDirection} `UNDIRECTED` in `relationshipFilter` * is equivalent to the union of `OUTGOING` and `INCOMING` (without duplicates). * * Example III: @@ -180,9 +177,9 @@ default EntityLineageResult getLineage(@Nonnull Urn entityUrn, @Nonnull LineageD edgesByDirection.get(true).stream().map(LineageRegistry.EdgeInfo::getType).collect(Collectors.toSet())); // Fetch outgoing edges RelatedEntitiesResult outgoingEdges = - findRelatedEntities(null, newFilter("urn", entityUrn.toString()), graphFilters.getAllowedEntityTypes(), + findRelatedEntities(null, QueryUtils.newFilter("urn", entityUrn.toString()), graphFilters.getAllowedEntityTypes(), QueryUtils.EMPTY_FILTER, - relationshipTypes, newRelationshipFilter(QueryUtils.EMPTY_FILTER, RelationshipDirection.OUTGOING), offset, + relationshipTypes, QueryUtils.newRelationshipFilter(QueryUtils.EMPTY_FILTER, RelationshipDirection.OUTGOING), offset, count); // Update offset and count to fetch the correct number of incoming edges below @@ -206,9 +203,9 @@ relationshipTypes, newRelationshipFilter(QueryUtils.EMPTY_FILTER, RelationshipDi List relationshipTypes = edgesByDirection.get(false).stream().map(LineageRegistry.EdgeInfo::getType).collect(Collectors.toList()); RelatedEntitiesResult incomingEdges = - findRelatedEntities(null, newFilter("urn", entityUrn.toString()), graphFilters.getAllowedEntityTypes(), + findRelatedEntities(null, QueryUtils.newFilter("urn", entityUrn.toString()), graphFilters.getAllowedEntityTypes(), QueryUtils.EMPTY_FILTER, - relationshipTypes, newRelationshipFilter(QueryUtils.EMPTY_FILTER, RelationshipDirection.INCOMING), offset, + relationshipTypes, QueryUtils.newRelationshipFilter(QueryUtils.EMPTY_FILTER, RelationshipDirection.INCOMING), offset, count); result.setTotal(result.getTotal() + incomingEdges.getTotal()); incomingEdges.getEntities().forEach(entity -> { @@ -238,7 +235,7 @@ relationshipTypes, newRelationshipFilter(QueryUtils.EMPTY_FILTER, RelationshipDi * * An empty list of relationship types removes nothing from the node. * - * Calling this method with a {@link com.linkedin.metadata.query.RelationshipDirection} `UNDIRECTED` in `relationshipFilter` + * Calling this method with a {@link RelationshipDirection} `UNDIRECTED` in `relationshipFilter` * is equivalent to the union of `OUTGOING` and `INCOMING` (without duplicates). */ void removeEdgesFromNode(@Nonnull final Urn urn, @Nonnull final List relationshipTypes, diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/RelatedEntitiesResult.java b/metadata-service/services/src/main/java/com/linkedin/metadata/graph/RelatedEntitiesResult.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/graph/RelatedEntitiesResult.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/graph/RelatedEntitiesResult.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/graph/RelatedEntity.java b/metadata-service/services/src/main/java/com/linkedin/metadata/graph/RelatedEntity.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/graph/RelatedEntity.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/graph/RelatedEntity.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/recommendation/RecommendationsService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/RecommendationsService.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/recommendation/RecommendationsService.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/RecommendationsService.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/DomainsCandidateSource.java b/metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/DomainsCandidateSource.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/DomainsCandidateSource.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/DomainsCandidateSource.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/EntitySearchAggregationSource.java b/metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/EntitySearchAggregationSource.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/EntitySearchAggregationSource.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/EntitySearchAggregationSource.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlySearchedSource.java b/metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlySearchedSource.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlySearchedSource.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecentlySearchedSource.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecommendationSource.java b/metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecommendationSource.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecommendationSource.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecommendationSource.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecommendationUtils.java b/metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecommendationUtils.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecommendationUtils.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/RecommendationUtils.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/TopPlatformsSource.java b/metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/TopPlatformsSource.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/TopPlatformsSource.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/TopPlatformsSource.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/TopTagsSource.java b/metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/TopTagsSource.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/TopTagsSource.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/TopTagsSource.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/TopTermsSource.java b/metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/TopTermsSource.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/recommendation/candidatesource/TopTermsSource.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/candidatesource/TopTermsSource.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/recommendation/ranker/RecommendationModuleRanker.java b/metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/ranker/RecommendationModuleRanker.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/recommendation/ranker/RecommendationModuleRanker.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/ranker/RecommendationModuleRanker.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/recommendation/ranker/SimpleRecommendationRanker.java b/metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/ranker/SimpleRecommendationRanker.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/recommendation/ranker/SimpleRecommendationRanker.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/recommendation/ranker/SimpleRecommendationRanker.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/schema/registry/SchemaRegistryService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/registry/SchemaRegistryService.java similarity index 89% rename from metadata-io/src/main/java/com/linkedin/metadata/schema/registry/SchemaRegistryService.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/registry/SchemaRegistryService.java index e82b30cc7abb8..0a0be60969486 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/schema/registry/SchemaRegistryService.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/registry/SchemaRegistryService.java @@ -1,4 +1,4 @@ -package com.linkedin.metadata.schema.registry; +package com.linkedin.metadata.registry; import java.util.Optional; import org.apache.avro.Schema; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/schema/registry/SchemaRegistryServiceImpl.java b/metadata-service/services/src/main/java/com/linkedin/metadata/registry/SchemaRegistryServiceImpl.java similarity index 98% rename from metadata-io/src/main/java/com/linkedin/metadata/schema/registry/SchemaRegistryServiceImpl.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/registry/SchemaRegistryServiceImpl.java index 1f0f07c40fb07..8f7403c6aa428 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/schema/registry/SchemaRegistryServiceImpl.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/registry/SchemaRegistryServiceImpl.java @@ -1,4 +1,4 @@ -package com.linkedin.metadata.schema.registry; +package com.linkedin.metadata.registry; import com.google.common.collect.BiMap; import com.google.common.collect.HashBiMap; diff --git a/metadata-io/src/main/java/com/linkedin/metadata/resource/ResourceReference.java b/metadata-service/services/src/main/java/com/linkedin/metadata/resource/ResourceReference.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/resource/ResourceReference.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/resource/ResourceReference.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/resource/SubResourceType.java b/metadata-service/services/src/main/java/com/linkedin/metadata/resource/SubResourceType.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/resource/SubResourceType.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/resource/SubResourceType.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/EntitySearchService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/search/EntitySearchService.java similarity index 97% rename from metadata-io/src/main/java/com/linkedin/metadata/search/EntitySearchService.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/search/EntitySearchService.java index 4e1b8b9f1d447..de5bdb62f201b 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/EntitySearchService.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/search/EntitySearchService.java @@ -71,7 +71,7 @@ public interface EntitySearchService { * @param from index to start the search from * @param size the number of search hits to return * @param searchFlags flags controlling search options - * @return a {@link com.linkedin.metadata.dao.SearchResult} that contains a list of matched documents and related search result metadata + * @return a {@link SearchResult} that contains a list of matched documents and related search result metadata */ @Nonnull SearchResult search(@Nonnull String entityName, @Nonnull String input, @Nullable Filter postFilters, @@ -92,7 +92,7 @@ SearchResult search(@Nonnull String entityName, @Nonnull String input, @Nullable * @param size the number of search hits to return * @param searchFlags flags controlling search options * @param facets list of facets we want aggregations for - * @return a {@link com.linkedin.metadata.dao.SearchResult} that contains a list of matched documents and related search result metadata + * @return a {@link SearchResult} that contains a list of matched documents and related search result metadata */ @Nonnull SearchResult search(@Nonnull String entityName, @Nonnull String input, @Nullable Filter postFilters, diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/QueryUtils.java b/metadata-service/services/src/main/java/com/linkedin/metadata/search/utils/QueryUtils.java similarity index 98% rename from metadata-io/src/main/java/com/linkedin/metadata/search/utils/QueryUtils.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/search/utils/QueryUtils.java index 0e5a285c28dbd..31b94425d6815 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/utils/QueryUtils.java +++ b/metadata-service/services/src/main/java/com/linkedin/metadata/search/utils/QueryUtils.java @@ -5,7 +5,6 @@ import com.linkedin.data.template.RecordTemplate; import com.linkedin.data.template.StringArray; import com.linkedin.metadata.aspect.AspectVersion; -import com.linkedin.metadata.dao.BaseReadDAO; import com.linkedin.metadata.query.filter.Condition; import com.linkedin.metadata.query.filter.ConjunctiveCriterion; import com.linkedin.metadata.query.filter.ConjunctiveCriterionArray; @@ -23,6 +22,8 @@ import javax.annotation.Nonnull; import javax.annotation.Nullable; +import static com.linkedin.metadata.Constants.*; + public class QueryUtils { @@ -83,7 +84,7 @@ public static Filter filterOrDefaultEmptyFilter(@Nullable Filter filter) { public static Set latestAspectVersions(@Nonnull Set> aspectClasses) { return aspectClasses.stream() .map(aspectClass -> new AspectVersion().setAspect(ModelUtils.getAspectName(aspectClass)) - .setVersion(BaseReadDAO.LATEST_VERSION)) + .setVersion(LATEST_VERSION)) .collect(Collectors.toSet()); } diff --git a/metadata-io/src/main/java/com/linkedin/metadata/secret/SecretService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/secret/SecretService.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/secret/SecretService.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/secret/SecretService.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/service/BaseService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/service/BaseService.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/service/BaseService.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/service/BaseService.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/service/DataProductService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/service/DataProductService.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/service/DataProductService.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/service/DataProductService.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/service/DomainService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/service/DomainService.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/service/DomainService.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/service/DomainService.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/service/GlossaryTermService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/service/GlossaryTermService.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/service/GlossaryTermService.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/service/GlossaryTermService.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/service/LineageService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/service/LineageService.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/service/LineageService.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/service/LineageService.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/service/OwnerService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/service/OwnerService.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/service/OwnerService.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/service/OwnerService.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/service/OwnershipTypeService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/service/OwnershipTypeService.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/service/OwnershipTypeService.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/service/OwnershipTypeService.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/service/QueryService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/service/QueryService.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/service/QueryService.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/service/QueryService.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/service/SettingsService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/service/SettingsService.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/service/SettingsService.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/service/SettingsService.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/service/TagService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/service/TagService.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/service/TagService.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/service/TagService.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/service/ViewService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/service/ViewService.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/service/ViewService.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/service/ViewService.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/shared/ValidationUtils.java b/metadata-service/services/src/main/java/com/linkedin/metadata/shared/ValidationUtils.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/shared/ValidationUtils.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/shared/ValidationUtils.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/SystemMetadataService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/systemmetadata/SystemMetadataService.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/systemmetadata/SystemMetadataService.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/systemmetadata/SystemMetadataService.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeline/SemanticVersion.java b/metadata-service/services/src/main/java/com/linkedin/metadata/timeline/SemanticVersion.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/timeline/SemanticVersion.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/timeline/SemanticVersion.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeline/TimelineService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/timeline/TimelineService.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/timeline/TimelineService.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/timeline/TimelineService.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeline/data/ChangeCategory.java b/metadata-service/services/src/main/java/com/linkedin/metadata/timeline/data/ChangeCategory.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/timeline/data/ChangeCategory.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/timeline/data/ChangeCategory.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeline/data/ChangeEvent.java b/metadata-service/services/src/main/java/com/linkedin/metadata/timeline/data/ChangeEvent.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/timeline/data/ChangeEvent.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/timeline/data/ChangeEvent.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeline/data/ChangeOperation.java b/metadata-service/services/src/main/java/com/linkedin/metadata/timeline/data/ChangeOperation.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/timeline/data/ChangeOperation.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/timeline/data/ChangeOperation.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeline/data/ChangeTransaction.java b/metadata-service/services/src/main/java/com/linkedin/metadata/timeline/data/ChangeTransaction.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/timeline/data/ChangeTransaction.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/timeline/data/ChangeTransaction.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeline/data/PatchOperation.java b/metadata-service/services/src/main/java/com/linkedin/metadata/timeline/data/PatchOperation.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/timeline/data/PatchOperation.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/timeline/data/PatchOperation.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeline/data/SemanticChangeType.java b/metadata-service/services/src/main/java/com/linkedin/metadata/timeline/data/SemanticChangeType.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/timeline/data/SemanticChangeType.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/timeline/data/SemanticChangeType.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeline/data/SemanticDifference.java b/metadata-service/services/src/main/java/com/linkedin/metadata/timeline/data/SemanticDifference.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/timeline/data/SemanticDifference.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/timeline/data/SemanticDifference.java diff --git a/metadata-io/src/main/java/com/linkedin/metadata/timeseries/TimeseriesAspectService.java b/metadata-service/services/src/main/java/com/linkedin/metadata/timeseries/TimeseriesAspectService.java similarity index 100% rename from metadata-io/src/main/java/com/linkedin/metadata/timeseries/TimeseriesAspectService.java rename to metadata-service/services/src/main/java/com/linkedin/metadata/timeseries/TimeseriesAspectService.java diff --git a/metadata-io/src/test/java/com/linkedin/metadata/service/DomainServiceTest.java b/metadata-service/services/src/test/java/com/linkedin/metadata/service/DomainServiceTest.java similarity index 98% rename from metadata-io/src/test/java/com/linkedin/metadata/service/DomainServiceTest.java rename to metadata-service/services/src/test/java/com/linkedin/metadata/service/DomainServiceTest.java index a600ec610ba99..4338d883ece1d 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/service/DomainServiceTest.java +++ b/metadata-service/services/src/test/java/com/linkedin/metadata/service/DomainServiceTest.java @@ -2,6 +2,9 @@ import com.datahub.authentication.Actor; import com.datahub.authentication.ActorType; +import com.datahub.authentication.Authentication; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import com.linkedin.common.UrnArray; import com.linkedin.common.urn.Urn; @@ -12,6 +15,7 @@ import com.linkedin.entity.EntityResponse; import com.linkedin.entity.EnvelopedAspect; import com.linkedin.entity.EnvelopedAspectMap; +import com.linkedin.entity.client.EntityClient; import com.linkedin.metadata.Constants; import com.linkedin.metadata.resource.ResourceReference; import com.linkedin.metadata.utils.GenericRecordUtils; @@ -20,12 +24,8 @@ import java.util.List; import javax.annotation.Nullable; import org.mockito.Mockito; -import org.testcontainers.shaded.com.google.common.collect.ImmutableList; -import org.testcontainers.shaded.com.google.common.collect.ImmutableMap; import org.testng.Assert; import org.testng.annotations.Test; -import com.linkedin.entity.client.EntityClient; -import com.datahub.authentication.Authentication; public class DomainServiceTest { diff --git a/metadata-io/src/test/java/com/linkedin/metadata/service/GlossaryTermServiceTest.java b/metadata-service/services/src/test/java/com/linkedin/metadata/service/GlossaryTermServiceTest.java similarity index 99% rename from metadata-io/src/test/java/com/linkedin/metadata/service/GlossaryTermServiceTest.java rename to metadata-service/services/src/test/java/com/linkedin/metadata/service/GlossaryTermServiceTest.java index f2e25792fe8bc..567a457efcf93 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/service/GlossaryTermServiceTest.java +++ b/metadata-service/services/src/test/java/com/linkedin/metadata/service/GlossaryTermServiceTest.java @@ -3,6 +3,8 @@ import com.datahub.authentication.Actor; import com.datahub.authentication.ActorType; import com.datahub.authentication.Authentication; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import com.linkedin.common.GlossaryTermAssociation; import com.linkedin.common.GlossaryTermAssociationArray; @@ -28,8 +30,6 @@ import java.util.List; import javax.annotation.Nullable; import org.mockito.Mockito; -import org.testcontainers.shaded.com.google.common.collect.ImmutableList; -import org.testcontainers.shaded.com.google.common.collect.ImmutableMap; import org.testng.Assert; import org.testng.annotations.Test; diff --git a/metadata-io/src/test/java/com/linkedin/metadata/service/LineageServiceTest.java b/metadata-service/services/src/test/java/com/linkedin/metadata/service/LineageServiceTest.java similarity index 99% rename from metadata-io/src/test/java/com/linkedin/metadata/service/LineageServiceTest.java rename to metadata-service/services/src/test/java/com/linkedin/metadata/service/LineageServiceTest.java index db5e0a88bf25e..9df8b9ecf46e8 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/service/LineageServiceTest.java +++ b/metadata-service/services/src/test/java/com/linkedin/metadata/service/LineageServiceTest.java @@ -3,6 +3,7 @@ import com.datahub.authentication.Actor; import com.datahub.authentication.ActorType; import com.datahub.authentication.Authentication; +import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import com.linkedin.chart.ChartDataSourceType; import com.linkedin.chart.ChartDataSourceTypeArray; @@ -35,19 +36,17 @@ import com.linkedin.metadata.Constants; import com.linkedin.metadata.utils.GenericRecordUtils; import com.linkedin.mxe.MetadataChangeProposal; -import org.joda.time.DateTimeUtils; -import org.mockito.Mockito; -import org.testcontainers.shaded.com.google.common.collect.ImmutableMap; -import org.testng.annotations.BeforeMethod; -import org.testng.annotations.Test; - -import javax.annotation.Nonnull; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.List; +import javax.annotation.Nonnull; +import org.joda.time.DateTimeUtils; +import org.mockito.Mockito; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; -import static org.testng.Assert.assertThrows; +import static org.testng.Assert.*; public class LineageServiceTest { private static AuditStamp _auditStamp; diff --git a/metadata-io/src/test/java/com/linkedin/metadata/service/OwnerServiceTest.java b/metadata-service/services/src/test/java/com/linkedin/metadata/service/OwnerServiceTest.java similarity index 98% rename from metadata-io/src/test/java/com/linkedin/metadata/service/OwnerServiceTest.java rename to metadata-service/services/src/test/java/com/linkedin/metadata/service/OwnerServiceTest.java index 9247837fe0b32..c23a151e52734 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/service/OwnerServiceTest.java +++ b/metadata-service/services/src/test/java/com/linkedin/metadata/service/OwnerServiceTest.java @@ -3,6 +3,8 @@ import com.datahub.authentication.Actor; import com.datahub.authentication.ActorType; import com.datahub.authentication.Authentication; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import com.linkedin.common.Owner; import com.linkedin.common.OwnerArray; @@ -24,8 +26,6 @@ import java.util.List; import javax.annotation.Nullable; import org.mockito.Mockito; -import org.testcontainers.shaded.com.google.common.collect.ImmutableList; -import org.testcontainers.shaded.com.google.common.collect.ImmutableMap; import org.testng.Assert; import org.testng.annotations.Test; diff --git a/metadata-io/src/test/java/com/linkedin/metadata/service/OwnershipTypeServiceTest.java b/metadata-service/services/src/test/java/com/linkedin/metadata/service/OwnershipTypeServiceTest.java similarity index 99% rename from metadata-io/src/test/java/com/linkedin/metadata/service/OwnershipTypeServiceTest.java rename to metadata-service/services/src/test/java/com/linkedin/metadata/service/OwnershipTypeServiceTest.java index fd79ab7b99ccd..dcb4a745732b2 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/service/OwnershipTypeServiceTest.java +++ b/metadata-service/services/src/test/java/com/linkedin/metadata/service/OwnershipTypeServiceTest.java @@ -3,6 +3,7 @@ import com.datahub.authentication.Actor; import com.datahub.authentication.ActorType; import com.datahub.authentication.Authentication; +import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import com.linkedin.common.AuditStamp; import com.linkedin.common.urn.Urn; @@ -19,7 +20,6 @@ import com.linkedin.ownership.OwnershipTypeInfo; import com.linkedin.r2.RemoteInvocationException; import org.mockito.Mockito; -import org.testcontainers.shaded.com.google.common.collect.ImmutableMap; import org.testng.Assert; import org.testng.annotations.Test; diff --git a/metadata-io/src/test/java/com/linkedin/metadata/service/QueryServiceTest.java b/metadata-service/services/src/test/java/com/linkedin/metadata/service/QueryServiceTest.java similarity index 100% rename from metadata-io/src/test/java/com/linkedin/metadata/service/QueryServiceTest.java rename to metadata-service/services/src/test/java/com/linkedin/metadata/service/QueryServiceTest.java diff --git a/metadata-io/src/test/java/com/linkedin/metadata/service/SettingsServiceTest.java b/metadata-service/services/src/test/java/com/linkedin/metadata/service/SettingsServiceTest.java similarity index 100% rename from metadata-io/src/test/java/com/linkedin/metadata/service/SettingsServiceTest.java rename to metadata-service/services/src/test/java/com/linkedin/metadata/service/SettingsServiceTest.java diff --git a/metadata-io/src/test/java/com/linkedin/metadata/service/TagServiceTest.java b/metadata-service/services/src/test/java/com/linkedin/metadata/service/TagServiceTest.java similarity index 99% rename from metadata-io/src/test/java/com/linkedin/metadata/service/TagServiceTest.java rename to metadata-service/services/src/test/java/com/linkedin/metadata/service/TagServiceTest.java index cc1e4831b30b0..125265540dc77 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/service/TagServiceTest.java +++ b/metadata-service/services/src/test/java/com/linkedin/metadata/service/TagServiceTest.java @@ -3,6 +3,8 @@ import com.datahub.authentication.Actor; import com.datahub.authentication.ActorType; import com.datahub.authentication.Authentication; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import com.linkedin.common.GlobalTags; import com.linkedin.common.TagAssociation; @@ -28,8 +30,6 @@ import java.util.List; import javax.annotation.Nullable; import org.mockito.Mockito; -import org.testcontainers.shaded.com.google.common.collect.ImmutableList; -import org.testcontainers.shaded.com.google.common.collect.ImmutableMap; import org.testng.Assert; import org.testng.annotations.Test; diff --git a/metadata-io/src/test/java/com/linkedin/metadata/service/ViewServiceTest.java b/metadata-service/services/src/test/java/com/linkedin/metadata/service/ViewServiceTest.java similarity index 99% rename from metadata-io/src/test/java/com/linkedin/metadata/service/ViewServiceTest.java rename to metadata-service/services/src/test/java/com/linkedin/metadata/service/ViewServiceTest.java index 79449c1ff62c1..5841717e7db93 100644 --- a/metadata-io/src/test/java/com/linkedin/metadata/service/ViewServiceTest.java +++ b/metadata-service/services/src/test/java/com/linkedin/metadata/service/ViewServiceTest.java @@ -3,6 +3,8 @@ import com.datahub.authentication.Actor; import com.datahub.authentication.ActorType; import com.datahub.authentication.Authentication; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import com.linkedin.common.AuditStamp; import com.linkedin.common.urn.Urn; @@ -28,8 +30,6 @@ import com.linkedin.view.DataHubViewType; import java.util.Collections; import org.mockito.Mockito; -import org.testcontainers.shaded.com.google.common.collect.ImmutableList; -import org.testcontainers.shaded.com.google.common.collect.ImmutableMap; import org.testng.Assert; import org.testng.annotations.Test; diff --git a/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/ConfigSearchExport.java b/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/ConfigSearchExport.java index c14d7cbf1ab2f..2c26c00e9c4d6 100644 --- a/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/ConfigSearchExport.java +++ b/metadata-service/servlet/src/main/java/com/datahub/gms/servlet/ConfigSearchExport.java @@ -1,9 +1,9 @@ package com.datahub.gms.servlet; +import com.linkedin.metadata.config.search.SearchConfiguration; import com.datahub.gms.util.CSVWriter; import com.linkedin.datahub.graphql.resolvers.EntityTypeMapper; import com.linkedin.gms.factory.config.ConfigurationProvider; -import com.linkedin.metadata.config.search.SearchConfiguration; import com.linkedin.metadata.models.EntitySpec; import com.linkedin.metadata.models.registry.EntityRegistry; import com.linkedin.metadata.query.SearchFlags; diff --git a/settings.gradle b/settings.gradle index e4d1702829bc8..f326cbf10b96b 100644 --- a/settings.gradle +++ b/settings.gradle @@ -60,4 +60,5 @@ include 'metadata-service:schema-registry-api' include 'metadata-service:schema-registry-servlet' include 'metadata-integration:java:examples' include 'mock-entity-registry' - +include 'metadata-service:services' +include 'metadata-service:configuration' From d734b2849e514b1c539d195123602b11cefdfc20 Mon Sep 17 00:00:00 2001 From: Ellie O'Neil <110510035+eboneil@users.noreply.github.com> Date: Wed, 19 Jul 2023 22:36:26 -0400 Subject: [PATCH 10/20] feat(ingest/mysql): Add estimate row count for mysql (#8420) --- .../ingestion/source/ge_data_profiler.py | 29 +- .../ingestion/source/ge_profiling_config.py | 2 +- ..._profile_table_row_count_estimate_only.yml | 14 + .../mysql_table_row_count_estimate_only.json | 404 ++++++++++++++++++ .../tests/integration/mysql/test_mysql.py | 4 + 5 files changed, 443 insertions(+), 10 deletions(-) create mode 100644 metadata-ingestion/tests/integration/mysql/mysql_profile_table_row_count_estimate_only.yml create mode 100644 metadata-ingestion/tests/integration/mysql/mysql_table_row_count_estimate_only.json diff --git a/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py b/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py index ab259b9b16dd2..4ea721f6fd0cc 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py +++ b/metadata-ingestion/src/datahub/ingestion/source/ge_data_profiler.py @@ -359,21 +359,32 @@ def _get_column_cardinality( @_run_with_query_combiner def _get_dataset_rows(self, dataset_profile: DatasetProfileClass) -> None: - if ( - self.config.profile_table_row_count_estimate_only - and self.dataset.engine.dialect.name.lower() == "postgresql" - ): + if self.config.profile_table_row_count_estimate_only: schema_name = self.dataset_name.split(".")[1] table_name = self.dataset_name.split(".")[2] logger.debug( f"Getting estimated rowcounts for table:{self.dataset_name}, schema:{schema_name}, table:{table_name}" ) + + dialect_name = self.dataset.engine.dialect.name.lower() + if dialect_name == "postgresql": + get_estimate_script = sa.text( + f"SELECT c.reltuples AS estimate FROM pg_class c JOIN pg_namespace n ON n.oid = c.relnamespace WHERE c.relname = '{table_name}' AND n.nspname = '{schema_name}'" + ) + elif dialect_name == "mysql": + get_estimate_script = sa.text( + f"SELECT table_rows AS estimate FROM information_schema.tables WHERE table_schema = '{schema_name}' AND table_name = '{table_name}'" + ) + else: + logger.debug( + f"Dialect {dialect_name} not supported for feature " + f"profile_table_row_count_estimate_only. Proceeding with full row count." + ) + dataset_profile.rowCount = self.dataset.get_row_count() + return + dataset_profile.rowCount = int( - self.dataset.engine.execute( - sa.text( - f"SELECT c.reltuples AS estimate FROM pg_class c JOIN pg_namespace n ON n.oid = c.relnamespace WHERE c.relname = '{table_name}' AND n.nspname = '{schema_name}'" - ) - ).scalar() + self.dataset.engine.execute(get_estimate_script).scalar() ) else: dataset_profile.rowCount = self.dataset.get_row_count() diff --git a/metadata-ingestion/src/datahub/ingestion/source/ge_profiling_config.py b/metadata-ingestion/src/datahub/ingestion/source/ge_profiling_config.py index 3f61a99bfd585..8c5f1646c1d67 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/ge_profiling_config.py +++ b/metadata-ingestion/src/datahub/ingestion/source/ge_profiling_config.py @@ -118,7 +118,7 @@ class GEProfilingConfig(ConfigModel): profile_table_row_count_estimate_only: bool = Field( default=False, description="Use an approximate query for row count. This will be much faster but slightly " - "less accurate. Only supported for Postgres. ", + "less accurate. Only supported for Postgres and MySQL. ", ) # The default of (5 * cpu_count) is adopted from the default max_workers diff --git a/metadata-ingestion/tests/integration/mysql/mysql_profile_table_row_count_estimate_only.yml b/metadata-ingestion/tests/integration/mysql/mysql_profile_table_row_count_estimate_only.yml new file mode 100644 index 0000000000000..d6a9ed85a8fce --- /dev/null +++ b/metadata-ingestion/tests/integration/mysql/mysql_profile_table_row_count_estimate_only.yml @@ -0,0 +1,14 @@ +source: + type: mysql + config: + username: root + password: example + host_port: localhost:53307 + database: northwind + profiling: + enabled: True + profile_table_row_count_estimate_only: true +sink: + type: file + config: + filename: "./mysql_mces.json" diff --git a/metadata-ingestion/tests/integration/mysql/mysql_table_row_count_estimate_only.json b/metadata-ingestion/tests/integration/mysql/mysql_table_row_count_estimate_only.json new file mode 100644 index 0000000000000..e668525b930af --- /dev/null +++ b/metadata-ingestion/tests/integration/mysql/mysql_table_row_count_estimate_only.json @@ -0,0 +1,404 @@ +[ +{ + "entityType": "container", + "entityUrn": "urn:li:container:dc2ae101b66746b9c2b6df8ee89ca88f", + "changeType": "UPSERT", + "aspectName": "containerProperties", + "aspect": { + "json": { + "customProperties": { + "platform": "mysql", + "env": "PROD", + "database": "northwind" + }, + "name": "northwind" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "mysql-2020_04_14-07_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:dc2ae101b66746b9c2b6df8ee89ca88f", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "mysql-2020_04_14-07_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:dc2ae101b66746b9c2b6df8ee89ca88f", + "changeType": "UPSERT", + "aspectName": "dataPlatformInstance", + "aspect": { + "json": { + "platform": "urn:li:dataPlatform:mysql" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "mysql-2020_04_14-07_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:dc2ae101b66746b9c2b6df8ee89ca88f", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Database" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "mysql-2020_04_14-07_00_00" + } +}, +{ + "entityType": "container", + "entityUrn": "urn:li:container:dc2ae101b66746b9c2b6df8ee89ca88f", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "mysql-2020_04_14-07_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,northwind.customers,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:dc2ae101b66746b9c2b6df8ee89ca88f" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "mysql-2020_04_14-07_00_00" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:mysql,northwind.customers,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": {}, + "name": "customers", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "northwind.customers", + "platform": "urn:li:dataPlatform:mysql", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": true + }, + { + "fieldPath": "company", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=50)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "last_name", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=50)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "first_name", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=50)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "email_address", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=50)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "priority", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "FLOAT()", + "recursive": false, + "isPartOfKey": false + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "mysql-2020_04_14-07_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,northwind.customers,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "mysql-2020_04_14-07_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,northwind.customers,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:dc2ae101b66746b9c2b6df8ee89ca88f", + "urn": "urn:li:container:dc2ae101b66746b9c2b6df8ee89ca88f" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "mysql-2020_04_14-07_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,northwind.orders,PROD)", + "changeType": "UPSERT", + "aspectName": "container", + "aspect": { + "json": { + "container": "urn:li:container:dc2ae101b66746b9c2b6df8ee89ca88f" + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "mysql-2020_04_14-07_00_00" + } +}, +{ + "proposedSnapshot": { + "com.linkedin.pegasus2avro.metadata.snapshot.DatasetSnapshot": { + "urn": "urn:li:dataset:(urn:li:dataPlatform:mysql,northwind.orders,PROD)", + "aspects": [ + { + "com.linkedin.pegasus2avro.common.Status": { + "removed": false + } + }, + { + "com.linkedin.pegasus2avro.dataset.DatasetProperties": { + "customProperties": {}, + "name": "orders", + "tags": [] + } + }, + { + "com.linkedin.pegasus2avro.schema.SchemaMetadata": { + "schemaName": "northwind.orders", + "platform": "urn:li:dataPlatform:mysql", + "version": 0, + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "hash": "", + "platformSchema": { + "com.linkedin.pegasus2avro.schema.MySqlDDL": { + "tableSchema": "" + } + }, + "fields": [ + { + "fieldPath": "id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": true + }, + { + "fieldPath": "description", + "nullable": true, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.StringType": {} + } + }, + "nativeDataType": "VARCHAR(length=50)", + "recursive": false, + "isPartOfKey": false + }, + { + "fieldPath": "customer_id", + "nullable": false, + "type": { + "type": { + "com.linkedin.pegasus2avro.schema.NumberType": {} + } + }, + "nativeDataType": "INTEGER()", + "recursive": false, + "isPartOfKey": false + } + ], + "foreignKeys": [ + { + "name": "fk_order_customer", + "foreignFields": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,northwind.customers,PROD),id)" + ], + "sourceFields": [ + "urn:li:schemaField:(urn:li:dataset:(urn:li:dataPlatform:mysql,northwind.orders,PROD),customer_id)" + ], + "foreignDataset": "urn:li:dataset:(urn:li:dataPlatform:mysql,northwind.customers,PROD)" + } + ] + } + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "mysql-2020_04_14-07_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,northwind.orders,PROD)", + "changeType": "UPSERT", + "aspectName": "subTypes", + "aspect": { + "json": { + "typeNames": [ + "Table" + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "mysql-2020_04_14-07_00_00" + } +}, +{ + "entityType": "dataset", + "entityUrn": "urn:li:dataset:(urn:li:dataPlatform:mysql,northwind.orders,PROD)", + "changeType": "UPSERT", + "aspectName": "browsePathsV2", + "aspect": { + "json": { + "path": [ + { + "id": "urn:li:container:dc2ae101b66746b9c2b6df8ee89ca88f", + "urn": "urn:li:container:dc2ae101b66746b9c2b6df8ee89ca88f" + } + ] + } + }, + "systemMetadata": { + "lastObserved": 1586847600000, + "runId": "mysql-2020_04_14-07_00_00" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/mysql/test_mysql.py b/metadata-ingestion/tests/integration/mysql/test_mysql.py index bf5806bc57b13..8c8626a2d2297 100644 --- a/metadata-ingestion/tests/integration/mysql/test_mysql.py +++ b/metadata-ingestion/tests/integration/mysql/test_mysql.py @@ -48,6 +48,10 @@ def mysql_runner(docker_compose_runner, pytestconfig, test_resources_dir): ("mysql_to_file_with_db.yml", "mysql_mces_with_db_golden.json"), ("mysql_to_file_no_db.yml", "mysql_mces_no_db_golden.json"), ("mysql_profile_table_level_only.yml", "mysql_table_level_only.json"), + ( + "mysql_profile_table_row_count_estimate_only.yml", + "mysql_table_row_count_estimate_only.json", + ), ], ) @freeze_time(FROZEN_TIME) From 9df70d7355e2c66a29665d8cae082f237f5d8470 Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Thu, 20 Jul 2023 08:25:30 +0530 Subject: [PATCH 11/20] ingest(elasticsearch): add basic profiling (#8351) --- .../docs/transformer/dataset_transformer.md | 28 +++++ metadata-ingestion/setup.py | 1 + .../ingestion/source/elastic_search.py | 101 +++++++++++++++++- .../ingestion/transformer/add_dataset_tags.py | 42 ++------ .../transformer/dataset_transformer.py | 58 +++++++++- .../transformer/extract_dataset_tags.py | 62 +++++++++++ .../tests/unit/test_elasticsearch_source.py | 28 +++++ .../tests/unit/test_transform_dataset.py | 51 +++++++++ 8 files changed, 332 insertions(+), 39 deletions(-) create mode 100644 metadata-ingestion/src/datahub/ingestion/transformer/extract_dataset_tags.py diff --git a/metadata-ingestion/docs/transformer/dataset_transformer.md b/metadata-ingestion/docs/transformer/dataset_transformer.md index 753e9c87300d9..cb06656940918 100644 --- a/metadata-ingestion/docs/transformer/dataset_transformer.md +++ b/metadata-ingestion/docs/transformer/dataset_transformer.md @@ -175,6 +175,34 @@ transformers: The main use case of `simple_remove_dataset_ownership` is to remove incorrect owners present in the source. You can use it along with the [Simple Add Dataset ownership](#simple-add-dataset-ownership) to remove wrong owners and add the correct ones. Note that whatever owners you send via `simple_remove_dataset_ownership` will overwrite the owners present in the UI. +## Extract Dataset globalTags +### Config Details +| Field | Required | Type | Default | Description | +|-----------------------------|----------|--------------|---------------|------------------------------------------------------------------| +| `extract_tags_from` | ✅ | string | `urn` | Which field to extract tag from. Currently only `urn` is supported. | +| `extract_tags_regex` | ✅ | string | `.*` | Regex to use to extract tag.| +| `replace_existing` | | boolean | `false` | Whether to remove owners from entity sent by ingestion source. | +| `semantics` | | enum | `OVERWRITE` | Whether to OVERWRITE or PATCH the entity present on DataHub GMS. | + +Let’s suppose we’d like to add a dataset tags based on part of urn. To do so, we can use the `extract_dataset_tags` transformer that’s included in the ingestion framework. + +The config, which we’d append to our ingestion recipe YAML, would look like this: + + ```yaml + transformers: + - type: "extract_dataset_tags" + config: + extract_tags_from: "urn" + extract_tags_regex: ".([^._]*)_" + ``` + +So if we have input URNs like +- `urn:li:dataset:(urn:li:dataPlatform:kafka,clusterid.USA-ops-team_table1,PROD)` +- `urn:li:dataset:(urn:li:dataPlatform:kafka,clusterid.Canada-marketing_table1,PROD)` + +a tag called `USA-ops-team` and `Canada-marketing` will be added to them respectively. This is helpful in case you are using prefixes in your datasets to segregate different things. Now you can turn that segregation into a tag on your dataset in DataHub for further use. + + ## Simple Add Dataset globalTags ### Config Details | Field | Required | Type | Default | Description | diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 029527ea959d5..36f4151df0359 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -613,6 +613,7 @@ def get_long_description(): "add_dataset_tags = datahub.ingestion.transformer.add_dataset_tags:AddDatasetTags", "simple_add_dataset_tags = datahub.ingestion.transformer.add_dataset_tags:SimpleAddDatasetTags", "pattern_add_dataset_tags = datahub.ingestion.transformer.add_dataset_tags:PatternAddDatasetTags", + "extract_dataset_tags = datahub.ingestion.transformer.extract_dataset_tags:ExtractDatasetTags", "add_dataset_terms = datahub.ingestion.transformer.add_dataset_terms:AddDatasetTerms", "simple_add_dataset_terms = datahub.ingestion.transformer.add_dataset_terms:SimpleAddDatasetTerms", "pattern_add_dataset_terms = datahub.ingestion.transformer.add_dataset_terms:PatternAddDatasetTerms", diff --git a/metadata-ingestion/src/datahub/ingestion/source/elastic_search.py b/metadata-ingestion/src/datahub/ingestion/source/elastic_search.py index ef0f4ed21f83c..ab6aa18dac4d2 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/elastic_search.py +++ b/metadata-ingestion/src/datahub/ingestion/source/elastic_search.py @@ -1,5 +1,7 @@ import json import logging +import re +import time from collections import defaultdict from dataclasses import dataclass, field from hashlib import md5 @@ -9,7 +11,7 @@ from pydantic import validator from pydantic.fields import Field -from datahub.configuration.common import AllowDenyPattern +from datahub.configuration.common import AllowDenyPattern, ConfigModel from datahub.configuration.source_common import ( EnvConfigMixin, PlatformInstanceConfigMixin, @@ -45,6 +47,7 @@ BooleanTypeClass, BytesTypeClass, DataPlatformInstanceClass, + DatasetProfileClass, DatasetPropertiesClass, DateTypeClass, NullTypeClass, @@ -56,6 +59,7 @@ SubTypesClass, ) from datahub.utilities.config_clean import remove_protocol +from datahub.utilities.urns.dataset_urn import DatasetUrn logger = logging.getLogger(__name__) @@ -191,6 +195,43 @@ def report_dropped(self, index: str) -> None: self.filtered.append(index) +class ElasticProfiling(ConfigModel): + enabled: bool = Field( + default=False, + description="Whether to enable profiling for the elastic search source.", + ) + + +class CollapseUrns(ConfigModel): + urns_suffix_regex: List[str] = Field( + default_factory=list, + description="""List of regex patterns to remove from the name of the URN. All of the indices before removal of URNs are considered as the same dataset. These are applied in order for each URN. + The main case where you would want to have multiple of these if the name where you are trying to remove suffix from have different formats. + e.g. ending with -YYYY-MM-DD as well as ending -epochtime would require you to have 2 regex patterns to remove the suffixes across all URNs.""", + ) + + +def collapse_name(name: str, collapse_urns: CollapseUrns) -> str: + for suffix in collapse_urns.urns_suffix_regex: + name = re.sub(suffix, "", name) + return name + + +def collapse_urn(urn: str, collapse_urns: CollapseUrns) -> str: + if len(collapse_urns.urns_suffix_regex) == 0: + return urn + urn_obj = DatasetUrn.create_from_string(urn) + name = collapse_name(name=urn_obj.get_dataset_name(), collapse_urns=collapse_urns) + data_platform_urn = urn_obj.get_data_platform_urn() + return str( + DatasetUrn.create_from_ids( + platform_id=data_platform_urn.get_entity_id_as_string(), + table_name=name, + env=urn_obj.get_env(), + ) + ) + + class ElasticsearchSourceConfig(PlatformInstanceConfigMixin, EnvConfigMixin): host: str = Field( default="localhost:9200", description="The elastic search host URI." @@ -249,6 +290,13 @@ class ElasticsearchSourceConfig(PlatformInstanceConfigMixin, EnvConfigMixin): description="The regex patterns for filtering index templates to ingest.", ) + profiling: ElasticProfiling = Field( + default_factory=ElasticProfiling, + ) + collapse_urns: CollapseUrns = Field( + default_factory=CollapseUrns, + ) + @validator("host") def host_colon_port_comma(cls, host_val: str) -> str: for entry in host_val.split(","): @@ -295,6 +343,7 @@ def __init__(self, config: ElasticsearchSourceConfig, ctx: PipelineContext): self.report = ElasticsearchSourceReport() self.data_stream_partition_count: Dict[str, int] = defaultdict(int) self.platform: str = "elasticsearch" + self.profiling_info: Dict[str, DatasetProfileClass] = {} @classmethod def create( @@ -317,6 +366,12 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: yield mcp.as_workunit() else: self.report.report_dropped(index) + for urn, profiling_info in self.profiling_info.items(): + yield MetadataChangeProposalWrapper( + entityUrn=urn, + aspect=profiling_info, + ).as_workunit() + self.profiling_info = {} for mcp in self._get_data_stream_index_count_mcps(): yield mcp.as_workunit() @@ -337,6 +392,9 @@ def _get_data_stream_index_count_mcps( env=self.source_config.env, platform_instance=self.source_config.platform_instance, ) + dataset_urn = collapse_urn( + urn=dataset_urn, collapse_urns=self.source_config.collapse_urns + ) yield MetadataChangeProposalWrapper( entityUrn=dataset_urn, aspect=DatasetPropertiesClass( @@ -364,6 +422,9 @@ def _extract_mcps( else: raw_index = self.client.indices.get_template(name=index) raw_index_metadata = raw_index[index] + collapsed_index_name = collapse_name( + name=index, collapse_urns=self.source_config.collapse_urns + ) # 1. Construct and emit the schemaMetadata aspect # 1.1 Generate the schema fields from ES mappings. @@ -378,7 +439,7 @@ def _extract_mcps( # 1.2 Generate the SchemaMetadata aspect schema_metadata = SchemaMetadata( - schemaName=index, + schemaName=collapsed_index_name, platform=make_data_platform_urn(self.platform), version=0, hash=md5_hash, @@ -393,6 +454,9 @@ def _extract_mcps( platform_instance=self.source_config.platform_instance, env=self.source_config.env, ) + dataset_urn = collapse_urn( + urn=dataset_urn, collapse_urns=self.source_config.collapse_urns + ) yield MetadataChangeProposalWrapper( entityUrn=dataset_urn, aspect=schema_metadata, @@ -458,6 +522,39 @@ def _extract_mcps( ), ) + if self.source_config.profiling.enabled: + cat_response = self.client.cat.indices( + index=index, params={"format": "json", "bytes": "b"} + ) + if len(cat_response) == 1: + index_res = cat_response[0] + docs_count = int(index_res["docs.count"]) + size = int(index_res["store.size"]) + if len(self.source_config.collapse_urns.urns_suffix_regex) > 0: + if dataset_urn not in self.profiling_info: + self.profiling_info[dataset_urn] = DatasetProfileClass( + timestampMillis=int(time.time() * 1000), + rowCount=docs_count, + columnCount=len(schema_fields), + sizeInBytes=size, + ) + else: + existing_profile = self.profiling_info[dataset_urn] + if existing_profile.rowCount is not None: + docs_count = docs_count + existing_profile.rowCount + if existing_profile.sizeInBytes is not None: + size = size + existing_profile.sizeInBytes + self.profiling_info[dataset_urn] = DatasetProfileClass( + timestampMillis=int(time.time() * 1000), + rowCount=docs_count, + columnCount=len(schema_fields), + sizeInBytes=size, + ) + else: + logger.warning( + "Unexpected response from cat response with multiple rows" + ) + def get_report(self): return self.report diff --git a/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_tags.py b/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_tags.py index 02df9e77a6a6d..5a276ad899c48 100644 --- a/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_tags.py +++ b/metadata-ingestion/src/datahub/ingestion/transformer/add_dataset_tags.py @@ -2,13 +2,11 @@ from datahub.configuration.common import ( KeyValuePattern, - TransformerSemantics, TransformerSemanticsConfigModel, ) from datahub.configuration.import_resolver import pydantic_resolve_key from datahub.emitter.mce_builder import Aspect from datahub.ingestion.api.common import PipelineContext -from datahub.ingestion.graph.client import DataHubGraph from datahub.ingestion.transformer.dataset_transformer import DatasetTagsTransformer from datahub.metadata.schema_classes import GlobalTagsClass, TagAssociationClass @@ -35,50 +33,22 @@ def create(cls, config_dict: dict, ctx: PipelineContext) -> "AddDatasetTags": config = AddDatasetTagsConfig.parse_obj(config_dict) return cls(config, ctx) - @staticmethod - def _merge_with_server_global_tags( - graph: DataHubGraph, urn: str, global_tags_aspect: Optional[GlobalTagsClass] - ) -> Optional[GlobalTagsClass]: - if not global_tags_aspect or not global_tags_aspect.tags: - # nothing to add, no need to consult server - return None - - # Merge the transformed tags with existing server tags. - # The transformed tags takes precedence, which may change the tag context. - server_global_tags_aspect = graph.get_tags(entity_urn=urn) - if server_global_tags_aspect: - global_tags_aspect.tags = list( - { - **{tag.tag: tag for tag in server_global_tags_aspect.tags}, - **{tag.tag: tag for tag in global_tags_aspect.tags}, - }.values() - ) - - return global_tags_aspect - def transform_aspect( self, entity_urn: str, aspect_name: str, aspect: Optional[Aspect] ) -> Optional[Aspect]: in_global_tags_aspect: GlobalTagsClass = cast(GlobalTagsClass, aspect) out_global_tags_aspect: GlobalTagsClass = GlobalTagsClass(tags=[]) - # Check if user want to keep existing tags - if in_global_tags_aspect is not None and self.config.replace_existing is False: - out_global_tags_aspect.tags.extend(in_global_tags_aspect.tags) + self.update_if_keep_existing( + self.config, in_global_tags_aspect, out_global_tags_aspect + ) tags_to_add = self.config.get_tags_to_add(entity_urn) if tags_to_add is not None: out_global_tags_aspect.tags.extend(tags_to_add) - if self.config.semantics == TransformerSemantics.PATCH: - assert self.ctx.graph - return cast( - Optional[Aspect], - AddDatasetTags._merge_with_server_global_tags( - self.ctx.graph, entity_urn, out_global_tags_aspect - ), - ) - - return cast(Aspect, out_global_tags_aspect) + return self.get_result_semantics( + self.config, self.ctx.graph, entity_urn, out_global_tags_aspect + ) class SimpleDatasetTagConfig(TransformerSemanticsConfigModel): diff --git a/metadata-ingestion/src/datahub/ingestion/transformer/dataset_transformer.py b/metadata-ingestion/src/datahub/ingestion/transformer/dataset_transformer.py index 22f8a11037137..0753d6c3fd830 100644 --- a/metadata-ingestion/src/datahub/ingestion/transformer/dataset_transformer.py +++ b/metadata-ingestion/src/datahub/ingestion/transformer/dataset_transformer.py @@ -1,11 +1,18 @@ import logging from abc import ABCMeta -from typing import List +from typing import List, Optional, cast +from datahub.configuration.common import ( + TransformerSemantics, + TransformerSemanticsConfigModel, +) +from datahub.emitter.mce_builder import Aspect +from datahub.ingestion.graph.client import DataHubGraph from datahub.ingestion.transformer.base_transformer import ( BaseTransformer, SingleAspectTransformer, ) +from datahub.metadata.schema_classes import GlobalTagsClass log = logging.getLogger(__name__) @@ -39,6 +46,55 @@ class DatasetTagsTransformer(DatasetTransformer, metaclass=ABCMeta): def aspect_name(self) -> str: return "globalTags" + @staticmethod + def merge_with_server_global_tags( + graph: DataHubGraph, urn: str, global_tags_aspect: Optional[GlobalTagsClass] + ) -> Optional[GlobalTagsClass]: + if not global_tags_aspect or not global_tags_aspect.tags: + # nothing to add, no need to consult server + return None + + # Merge the transformed tags with existing server tags. + # The transformed tags takes precedence, which may change the tag context. + server_global_tags_aspect = graph.get_tags(entity_urn=urn) + if server_global_tags_aspect: + global_tags_aspect.tags = list( + { + **{tag.tag: tag for tag in server_global_tags_aspect.tags}, + **{tag.tag: tag for tag in global_tags_aspect.tags}, + }.values() + ) + + return global_tags_aspect + + @staticmethod + def update_if_keep_existing( + config: TransformerSemanticsConfigModel, + in_global_tags_aspect: GlobalTagsClass, + out_global_tags_aspect: GlobalTagsClass, + ) -> None: + """Check if user want to keep existing tags""" + if in_global_tags_aspect is not None and config.replace_existing is False: + out_global_tags_aspect.tags.extend(in_global_tags_aspect.tags) + + @staticmethod + def get_result_semantics( + config: TransformerSemanticsConfigModel, + graph: Optional[DataHubGraph], + urn: str, + out_global_tags_aspect: Optional[GlobalTagsClass], + ) -> Optional[Aspect]: + if config.semantics == TransformerSemantics.PATCH: + assert graph + return cast( + Optional[Aspect], + DatasetTagsTransformer.merge_with_server_global_tags( + graph, urn, out_global_tags_aspect + ), + ) + + return cast(Aspect, out_global_tags_aspect) + class DatasetTermsTransformer(DatasetTransformer, metaclass=ABCMeta): def aspect_name(self) -> str: diff --git a/metadata-ingestion/src/datahub/ingestion/transformer/extract_dataset_tags.py b/metadata-ingestion/src/datahub/ingestion/transformer/extract_dataset_tags.py new file mode 100644 index 0000000000000..25b18f0806fd6 --- /dev/null +++ b/metadata-ingestion/src/datahub/ingestion/transformer/extract_dataset_tags.py @@ -0,0 +1,62 @@ +import re +from enum import Enum +from typing import List, Optional, cast + +from datahub.configuration.common import TransformerSemanticsConfigModel +from datahub.emitter.mce_builder import Aspect +from datahub.ingestion.api.common import PipelineContext +from datahub.ingestion.transformer.dataset_transformer import DatasetTagsTransformer +from datahub.metadata.schema_classes import GlobalTagsClass, TagAssociationClass +from datahub.utilities.urns.dataset_urn import DatasetUrn + + +class ExtractTagsOption(Enum): + URN = "urn" + + +class ExtractDatasetTagsConfig(TransformerSemanticsConfigModel): + extract_tags_from: ExtractTagsOption = ExtractTagsOption.URN + extract_tags_regex: str + + +class ExtractDatasetTags(DatasetTagsTransformer): + """Transformer that add tags to datasets according to configuration by extracting from metadata. Currently only extracts from name.""" + + def __init__(self, config: ExtractDatasetTagsConfig, ctx: PipelineContext): + super().__init__() + self.ctx: PipelineContext = ctx + self.config: ExtractDatasetTagsConfig = config + + @classmethod + def create(cls, config_dict: dict, ctx: PipelineContext) -> "ExtractDatasetTags": + config = ExtractDatasetTagsConfig.parse_obj(config_dict) + return cls(config, ctx) + + def _get_tags_to_add(self, entity_urn: str) -> List[TagAssociationClass]: + if self.config.extract_tags_from == ExtractTagsOption.URN: + urn = DatasetUrn.create_from_string(entity_urn) + match = re.search(self.config.extract_tags_regex, urn.get_dataset_name()) + if match: + captured_group = match.group(1) + tag = f"urn:li:tag:{captured_group}" + return [TagAssociationClass(tag=tag)] + return [] + else: + raise NotImplementedError() + + def transform_aspect( + self, entity_urn: str, aspect_name: str, aspect: Optional[Aspect] + ) -> Optional[Aspect]: + in_global_tags_aspect: GlobalTagsClass = cast(GlobalTagsClass, aspect) + out_global_tags_aspect: GlobalTagsClass = GlobalTagsClass(tags=[]) + self.update_if_keep_existing( + self.config, in_global_tags_aspect, out_global_tags_aspect + ) + + tags_to_add = self._get_tags_to_add(entity_urn) + if tags_to_add is not None: + out_global_tags_aspect.tags.extend(tags_to_add) + + return self.get_result_semantics( + self.config, self.ctx.graph, entity_urn, out_global_tags_aspect + ) diff --git a/metadata-ingestion/tests/unit/test_elasticsearch_source.py b/metadata-ingestion/tests/unit/test_elasticsearch_source.py index 7a5fc1b58b6b1..cd158b855cdfc 100644 --- a/metadata-ingestion/tests/unit/test_elasticsearch_source.py +++ b/metadata-ingestion/tests/unit/test_elasticsearch_source.py @@ -7,8 +7,10 @@ import pytest from datahub.ingestion.source.elastic_search import ( + CollapseUrns, ElasticsearchSourceConfig, ElasticToSchemaFieldConverter, + collapse_urn, ) from datahub.metadata.com.linkedin.pegasus2avro.schema import SchemaField @@ -2470,3 +2472,29 @@ def test_host_port_parsing() -> None: with pytest.raises(pydantic.ValidationError): ElasticsearchSourceConfig.parse_obj(config_dict) + + +def test_collapse_urns() -> None: + assert ( + collapse_urn( + urn="urn:li:dataset:(urn:li:dataPlatform:elasticsearch,platform1.prefix_datahub_usage_event-000059,PROD)", + collapse_urns=CollapseUrns( + urns_suffix_regex=[ + "-\\d+$", + ] + ), + ) + == "urn:li:dataset:(urn:li:dataPlatform:elasticsearch,platform1.prefix_datahub_usage_event,PROD)" + ) + + assert ( + collapse_urn( + urn="urn:li:dataset:(urn:li:dataPlatform:elasticsearch,platform1.prefix_datahub_usage_event-2023.01.11,PROD)", + collapse_urns=CollapseUrns( + urns_suffix_regex=[ + "-\\d{4}\\.\\d{2}\\.\\d{2}", + ] + ), + ) + == "urn:li:dataset:(urn:li:dataPlatform:elasticsearch,platform1.prefix_datahub_usage_event,PROD)" + ) diff --git a/metadata-ingestion/tests/unit/test_transform_dataset.py b/metadata-ingestion/tests/unit/test_transform_dataset.py index 893431dda6b34..8b2535eea1fe9 100644 --- a/metadata-ingestion/tests/unit/test_transform_dataset.py +++ b/metadata-ingestion/tests/unit/test_transform_dataset.py @@ -61,6 +61,7 @@ SimpleAddDatasetDomain, ) from datahub.ingestion.transformer.dataset_transformer import DatasetTransformer +from datahub.ingestion.transformer.extract_dataset_tags import ExtractDatasetTags from datahub.ingestion.transformer.mark_dataset_status import MarkDatasetStatus from datahub.ingestion.transformer.remove_dataset_ownership import ( SimpleRemoveDatasetOwnership, @@ -288,6 +289,56 @@ def test_simple_dataset_ownership_with_type_transformation(mock_time): assert ownership_aspect.owners[0].type == models.OwnershipTypeClass.PRODUCER +def _test_extract_tags(in_urn: str, regex_str: str, out_tag: str) -> None: + input = make_generic_dataset(entity_urn=in_urn) + transformer = ExtractDatasetTags.create( + { + "extract_tags_from": "urn", + "extract_tags_regex": regex_str, + "semantics": "overwrite", + }, + PipelineContext(run_id="test"), + ) + output = list( + transformer.transform( + [ + RecordEnvelope(input, metadata={}), + RecordEnvelope(EndOfStream(), metadata={}), + ] + ) + ) + + assert len(output) == 3 + assert output[0].record == input + tags_aspect = output[1].record.aspect + assert isinstance(tags_aspect, GlobalTagsClass) + assert len(tags_aspect.tags) == 1 + assert tags_aspect.tags[0].tag == out_tag + + +def test_extract_dataset_tags(mock_time): + _test_extract_tags( + in_urn="urn:li:dataset:(urn:li:dataPlatform:kafka,clusterid.part1-part2-part3_part4,PROD)", + regex_str="(.*)", + out_tag="urn:li:tag:clusterid.part1-part2-part3_part4", + ) + _test_extract_tags( + in_urn="urn:li:dataset:(urn:li:dataPlatform:kafka,clusterid.USA-ops-team_table1,PROD)", + regex_str=".([^._]*)_", + out_tag="urn:li:tag:USA-ops-team", + ) + _test_extract_tags( + in_urn="urn:li:dataset:(urn:li:dataPlatform:kafka,clusterid.Canada-marketing_table1,PROD)", + regex_str=".([^._]*)_", + out_tag="urn:li:tag:Canada-marketing", + ) + _test_extract_tags( + in_urn="urn:li:dataset:(urn:li:dataPlatform:elasticsearch,abcdef-prefix_datahub_usage_event-000027,PROD)", + regex_str="([^._]*)_", + out_tag="urn:li:tag:abcdef-prefix", + ) + + def test_simple_dataset_ownership_with_invalid_type_transformation(mock_time): with pytest.raises(ValueError): SimpleAddDatasetOwnership.create( From 47616ff093b34960474949e6db0e62645330e846 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Thu, 20 Jul 2023 11:04:30 -0700 Subject: [PATCH 12/20] feat(ingest/lookml): fail when nothing was produced (#8464) --- .../src/datahub/ingestion/source/looker/lookml_source.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py index dedadb9bbecae..362b4e5530638 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/looker/lookml_source.py @@ -1853,6 +1853,13 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: yield from self.get_internal_workunits() + if not self.report.events_produced and not self.report.failures: + # Don't pass if we didn't produce any events. + self.report.report_failure( + "
", + "No metadata was produced. Check the logs for more details.", + ) + def _recursively_check_manifests( self, tmp_dir: str, project_name: str, project_visited: Set[str] ) -> None: @@ -1976,7 +1983,7 @@ def get_internal_workunits(self) -> Iterable[MetadataWorkUnit]: # noqa: C901 if connectionDefinition is None: self.reporter.report_warning( f"model-{model_name}", - f"Failed to load connection {model.connection}. Check your API key permissions.", + f"Failed to load connection {model.connection}. Check your API key permissions and/or connection_to_platform_map configuration.", ) self.reporter.report_models_dropped(model_name) continue From d733363bedf6d3558751e2ec28102f2468333620 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Thu, 20 Jul 2023 11:05:25 -0700 Subject: [PATCH 13/20] chore(ingest): drop bigquery-beta and snowflake-beta aliases (#8451) --- docs/how/updating-datahub.md | 1 + metadata-ingestion/setup.py | 11 ----------- .../ingestion/source/source_registry.py | 19 ------------------- 3 files changed, 1 insertion(+), 30 deletions(-) diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md index b8ecd689ce381..03b3d763ed247 100644 --- a/docs/how/updating-datahub.md +++ b/docs/how/updating-datahub.md @@ -14,6 +14,7 @@ This file documents any backwards-incompatible changes in DataHub and assists pe `profile_table_level_only` together with `include_field_xyz` config options to ingest certain column-level metrics. Instead, set `profile_table_level_only` to `false` and individually enable / disable desired field metrics. +- #8451: The `bigquery-beta` and `snowflake-beta` source aliases have been dropped. Use `bigquery` and `snowflake` as the source type instead. ### Potential Downtime diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index 36f4151df0359..04bc03a236f07 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -292,14 +292,6 @@ def get_long_description(): "sqlalchemy-bigquery>=1.4.1", "google-cloud-datacatalog-lineage==0.2.2", }, - "bigquery-beta": sql_common - | bigquery_common - | { - *sqllineage_lib, - *sqlglot_lib, - "sql_metadata", - "sqlalchemy-bigquery>=1.4.1", - }, # deprecated, but keeping the extra for backwards compatibility "clickhouse": sql_common | clickhouse_common, "clickhouse-usage": sql_common | usage_common | clickhouse_common, "datahub-lineage-file": set(), @@ -370,9 +362,6 @@ def get_long_description(): "sagemaker": aws_common, "salesforce": {"simple-salesforce"}, "snowflake": snowflake_common | usage_common | sqlglot_lib, - "snowflake-beta": ( - snowflake_common | usage_common | sqlglot_lib - ), # deprecated, but keeping the extra for backwards compatibility "sqlalchemy": sql_common, "superset": { "requests", diff --git a/metadata-ingestion/src/datahub/ingestion/source/source_registry.py b/metadata-ingestion/src/datahub/ingestion/source/source_registry.py index 23fe1bd3baf54..37f088bcd7b50 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/source_registry.py +++ b/metadata-ingestion/src/datahub/ingestion/source/source_registry.py @@ -8,25 +8,6 @@ source_registry.register_from_entrypoint("datahub.ingestion.source.plugins") # Deprecations. -source_registry.register_alias( - "snowflake-beta", - "snowflake", - lambda: warnings.warn( - "source type snowflake-beta is deprecated, use snowflake instead", - ConfigurationWarning, - stacklevel=3, - ), -) -source_registry.register_alias( - "bigquery-beta", - "bigquery", - lambda: warnings.warn( - "source type bigquery-beta is deprecated, use bigquery instead", - ConfigurationWarning, - stacklevel=3, - ), -) - source_registry.register_alias( "redshift-usage", "redshift-usage-legacy", From f4fde211686d86c2219a51ee8d856878cc16d992 Mon Sep 17 00:00:00 2001 From: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com> Date: Thu, 20 Jul 2023 23:42:33 +0530 Subject: [PATCH 14/20] feat(ingest/nifi): add support for basic auth in nifi (#8457) --- metadata-ingestion/docs/sources/nifi/nifi.md | 3 + .../src/datahub/ingestion/source/nifi.py | 116 +++++++++++------- .../tests/integration/nifi/test_nifi.py | 66 +++++----- .../tests/unit/test_nifi_source.py | 18 ++- 4 files changed, 119 insertions(+), 84 deletions(-) diff --git a/metadata-ingestion/docs/sources/nifi/nifi.md b/metadata-ingestion/docs/sources/nifi/nifi.md index 5ea6a53186c0d..7da9430ee8ed5 100644 --- a/metadata-ingestion/docs/sources/nifi/nifi.md +++ b/metadata-ingestion/docs/sources/nifi/nifi.md @@ -15,6 +15,9 @@ If nifi has been configured to use [Kerberos SPNEGO](https://nifi.apache.org/doc sudo apt install krb5-user kinit user@REALM ``` +#### Basic Authentication (`auth: BASIC_AUTH`) +Connector will use [HTTPBasicAuth](https://requests.readthedocs.io/en/latest/user/authentication/#basic-authentication) with `username` and `password`. + #### No Authentication (`auth: NO_AUTH`) This is useful for testing purposes. diff --git a/metadata-ingestion/src/datahub/ingestion/source/nifi.py b/metadata-ingestion/src/datahub/ingestion/source/nifi.py index 27cd5aeb3c68d..559d103aa6e5a 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/nifi.py +++ b/metadata-ingestion/src/datahub/ingestion/source/nifi.py @@ -16,6 +16,7 @@ from pydantic.fields import Field from requests import Response from requests.adapters import HTTPAdapter +from requests.models import HTTPBasicAuth from requests_gssapi import HTTPSPNEGOAuth import datahub.emitter.mce_builder as builder @@ -66,6 +67,7 @@ class NifiAuthType(Enum): SINGLE_USER = "SINGLE_USER" CLIENT_CERT = "CLIENT_CERT" KERBEROS = "KERBEROS" + BASIC_AUTH = "BASIC_AUTH" class NifiSourceConfig(EnvConfigMixin): @@ -132,17 +134,17 @@ def validate_auth_params(cla, values): raise ValueError( "Config `client_cert_file` is required for CLIENT_CERT auth" ) - elif values.get("auth") is NifiAuthType.SINGLE_USER and ( - not values.get("username") or not values.get("password") - ): + elif values.get("auth") in ( + NifiAuthType.SINGLE_USER, + NifiAuthType.BASIC_AUTH, + ) and (not values.get("username") or not values.get("password")): raise ValueError( - "Config `username` and `password` is required for SINGLE_USER auth" + f"Config `username` and `password` is required for {values.get('auth').value} auth" ) return values @root_validator(pre=False) def validator_site_url_to_site_name(cls, values): - site_url_to_site_name = values.get("site_url_to_site_name") site_url = values.get("site_url") site_name = values.get("site_name") @@ -667,41 +669,8 @@ def fetch_provenance_events( of processor type {processor.type}, Start date: {startDate}, End date: {endDate}" ) - older_version: bool = self.nifi_flow.version is not None and version.parse( - self.nifi_flow.version - ) < version.parse("1.13.0") - - if older_version: - searchTerms = { - "ProcessorID": processor.id, - "EventType": eventType, - } - else: - searchTerms = { - "ProcessorID": {"value": processor.id}, # type: ignore - "EventType": {"value": eventType}, # type: ignore - } - - payload = json.dumps( - { - "provenance": { - "request": { - "maxResults": 1000, - "summarize": False, - "searchTerms": searchTerms, - "startDate": startDate.strftime("%m/%d/%Y %H:%M:%S %Z"), - "endDate": ( - endDate.strftime("%m/%d/%Y %H:%M:%S %Z") - if endDate - else None - ), - } - } - } - ) - logger.debug(payload) - provenance_response = self.session.post( - url=urljoin(self.rest_api_base_url, PROVENANCE_ENDPOINT), data=payload + provenance_response = self.submit_provenance_query( + processor, eventType, startDate, endDate ) if provenance_response.ok: @@ -759,6 +728,58 @@ def fetch_provenance_events( logger.warning(provenance_response.text) return + def submit_provenance_query(self, processor, eventType, startDate, endDate): + older_version: bool = self.nifi_flow.version is not None and version.parse( + self.nifi_flow.version + ) < version.parse("1.13.0") + + if older_version: + searchTerms = { + "ProcessorID": processor.id, + "EventType": eventType, + } + else: + searchTerms = { + "ProcessorID": {"value": processor.id}, # type: ignore + "EventType": {"value": eventType}, # type: ignore + } + + payload = json.dumps( + { + "provenance": { + "request": { + "maxResults": 1000, + "summarize": False, + "searchTerms": searchTerms, + "startDate": startDate.strftime("%m/%d/%Y %H:%M:%S %Z"), + "endDate": ( + endDate.strftime("%m/%d/%Y %H:%M:%S %Z") + if endDate + else None + ), + } + } + } + ) + logger.debug(payload) + self.session.headers.update({}) + + self.session.headers.update({"Content-Type": "application/json"}) + provenance_response = self.session.post( + url=urljoin(self.rest_api_base_url, PROVENANCE_ENDPOINT), + data=payload, + ) + + # Revert to default content-type if basic-auth + if self.config.auth is NifiAuthType.BASIC_AUTH: + self.session.headers.update( + { + "Content-Type": "application/x-www-form-urlencoded", + } + ) + + return provenance_response + def report_warning(self, key: str, reason: str) -> None: logger.warning(f"{key}: {reason}") self.report.report_warning(key, reason) @@ -958,6 +979,19 @@ def authenticate(self): # Token not required return + if self.config.auth is NifiAuthType.BASIC_AUTH: + assert self.config.username is not None + assert self.config.password is not None + self.session.auth = HTTPBasicAuth( + self.config.username, self.config.password + ) + self.session.headers.update( + { + "Content-Type": "application/x-www-form-urlencoded", + } + ) + return + if self.config.auth is NifiAuthType.CLIENT_CERT: self.session.mount( self.rest_api_base_url, @@ -1001,8 +1035,6 @@ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]: self.report.report_failure(self.config.site_url, "Failed to authenticate") return - self.session.headers.update({"Content-Type": "application/json"}) - # Creates nifi_flow by invoking /flow rest api and saves as self.nifi_flow try: self.create_nifi_flow() diff --git a/metadata-ingestion/tests/integration/nifi/test_nifi.py b/metadata-ingestion/tests/integration/nifi/test_nifi.py index 3ba194e9b5e12..58efd32c6deb3 100644 --- a/metadata-ingestion/tests/integration/nifi/test_nifi.py +++ b/metadata-ingestion/tests/integration/nifi/test_nifi.py @@ -107,7 +107,7 @@ def test_nifi_ingest_standalone( @freeze_time(FROZEN_TIME) @pytest.mark.slow_integration -def test_nifi_ingest_cluster(loaded_nifi, pytestconfig, test_resources_dir): +def test_nifi_ingest_cluster(loaded_nifi, pytestconfig, tmp_path, test_resources_dir): # Wait for nifi cluster to execute all lineage processors, max wait time 120 seconds url = "http://localhost:9080/nifi-api/flow/process-groups/root" for i in range(23): @@ -124,37 +124,39 @@ def test_nifi_ingest_cluster(loaded_nifi, pytestconfig, test_resources_dir): logging.info(f"Waited for time {i*5} seconds") break test_resources_dir = pytestconfig.rootpath / "tests/integration/nifi" - # Run nifi ingestion run. - pipeline = Pipeline.create( - { - "run_id": "nifi-test-cluster", - "source": { - "type": "nifi", - "config": { - "site_url": "http://localhost:9080/nifi/", - "auth": "NO_AUTH", - "site_url_to_site_name": { - "http://nifi01:9080/nifi/": "default", - "http://nifi02:9081/nifi/": "default", - "http://nifi03:9082/nifi/": "default", + # Run the metadata ingestion pipeline. + with fs_helpers.isolated_filesystem(tmp_path): + # Run nifi ingestion run. + pipeline = Pipeline.create( + { + "run_id": "nifi-test-cluster", + "source": { + "type": "nifi", + "config": { + "site_url": "http://localhost:9080/nifi/", + "auth": "NO_AUTH", + "site_url_to_site_name": { + "http://nifi01:9080/nifi/": "default", + "http://nifi02:9081/nifi/": "default", + "http://nifi03:9082/nifi/": "default", + }, }, }, - }, - "sink": { - "type": "file", - "config": {"filename": "./nifi_mces_cluster.json"}, - }, - } - ) - pipeline.run() - pipeline.raise_from_status() + "sink": { + "type": "file", + "config": {"filename": "./nifi_mces_cluster.json"}, + }, + } + ) + pipeline.run() + pipeline.raise_from_status() - # Verify the output. - mce_helpers.check_golden_file( - pytestconfig, - output_path="nifi_mces_cluster.json", - golden_path=test_resources_dir / "nifi_mces_golden_cluster.json", - ignore_paths=[ - r"root\[\d+\]\['aspect'\]\['json'\]\['customProperties'\]\['last_event_time'\]", - ], - ) + # Verify the output. + mce_helpers.check_golden_file( + pytestconfig, + output_path="nifi_mces_cluster.json", + golden_path=test_resources_dir / "nifi_mces_golden_cluster.json", + ignore_paths=[ + r"root\[\d+\]\['aspect'\]\['json'\]\['customProperties'\]\['last_event_time'\]", + ], + ) diff --git a/metadata-ingestion/tests/unit/test_nifi_source.py b/metadata-ingestion/tests/unit/test_nifi_source.py index a90f03bea3e42..d9e2b6e35e157 100644 --- a/metadata-ingestion/tests/unit/test_nifi_source.py +++ b/metadata-ingestion/tests/unit/test_nifi_source.py @@ -278,27 +278,29 @@ def mocked_functions(mock_provenance_events, mock_delete_provenance, provenance_ mock_provenance_events.return_value = puts3_provenance_response -def test_single_user_auth_without_password(): +@pytest.mark.parametrize("auth", ["SINGLE_USER", "BASIC_AUTH"]) +def test_auth_without_password(auth): with pytest.raises( - ValueError, match="`username` and `password` is required for SINGLE_USER auth" + ValueError, match=f"`username` and `password` is required for {auth} auth" ): NifiSourceConfig.parse_obj( { "site_url": "https://localhost:8443", - "auth": "SINGLE_USER", + "auth": auth, "username": "someuser", } ) -def test_single_user_auth_without_username_and_password(): +@pytest.mark.parametrize("auth", ["SINGLE_USER", "BASIC_AUTH"]) +def test_auth_without_username_and_password(auth): with pytest.raises( - ValueError, match="`username` and `password` is required for SINGLE_USER auth" + ValueError, match=f"`username` and `password` is required for {auth} auth" ): NifiSourceConfig.parse_obj( { "site_url": "https://localhost:8443", - "auth": "SINGLE_USER", + "auth": auth, } ) @@ -316,7 +318,6 @@ def test_client_cert_auth_without_client_cert_file(): def test_single_user_auth_failed_to_get_token(): - config = NifiSourceConfig( site_url="https://localhost:12345", # will never work username="username", @@ -338,7 +339,6 @@ def test_single_user_auth_failed_to_get_token(): def test_kerberos_auth_failed_to_get_token(): - config = NifiSourceConfig( site_url="https://localhost:12345", # will never work auth="KERBEROS", @@ -358,7 +358,6 @@ def test_kerberos_auth_failed_to_get_token(): def test_client_cert_auth_failed(): - config = NifiSourceConfig( site_url="https://localhost:12345", # will never work auth="CLIENT_CERT", @@ -379,7 +378,6 @@ def test_client_cert_auth_failed(): def test_failure_to_create_nifi_flow(): - with patch("datahub.ingestion.source.nifi.NifiSource.authenticate"): config = NifiSourceConfig( site_url="https://localhost:12345", # will never work From b5e039ff4e083382134295b9fd1dcbb88d79d83c Mon Sep 17 00:00:00 2001 From: Kos Korchak <97058061+kkorchak@users.noreply.github.com> Date: Thu, 20 Jul 2023 15:54:33 -0400 Subject: [PATCH 15/20] fix(tests): Fix query_tab test that was failing on CI run (#8463) --- .../cypress/cypress/e2e/query/query_tab.js | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/smoke-test/tests/cypress/cypress/e2e/query/query_tab.js b/smoke-test/tests/cypress/cypress/e2e/query/query_tab.js index 0d8bcf7b4a655..4d01cac15724e 100644 --- a/smoke-test/tests/cypress/cypress/e2e/query/query_tab.js +++ b/smoke-test/tests/cypress/cypress/e2e/query/query_tab.js @@ -11,7 +11,7 @@ describe("manage queries", () => { cy.openEntityTab("Queries") }) - it("go to querys tab on dataset page then, create, edit, make default, delete a view", () => { + it("go to queries tab on dataset page then, create, edit, make default, delete a view", () => { const runId = Date.now() // Headers @@ -27,6 +27,7 @@ describe("manage queries", () => { cy.get('.ProseMirror').click(); cy.get('.ProseMirror').type(`Test Description-${runId}`); cy.get('[data-testid="query-builder-save-button"]').click(); + cy.waitTextVisible("Created Query!"); // Verify the card cy.waitTextVisible(`+ Test Query-${runId}`); @@ -36,10 +37,10 @@ describe("manage queries", () => { // View the Query cy.get('[data-testid="query-content-0"]').click(); + cy.get('.ant-modal-content').waitTextVisible(`+ Test Query-${runId}`); + cy.get('.ant-modal-content').waitTextVisible(`Test Table-${runId}`); + cy.get('.ant-modal-content').waitTextVisible(`Test Description-${runId}`); cy.get('[data-testid="query-modal-close-button"]').click(); - cy.waitTextVisible(`+ Test Query-${runId}`); - cy.waitTextVisible(`Test Table-${runId}`); - cy.waitTextVisible(`Test Description-${runId}`); // Edit the Query cy.get('[data-testid="query-edit-button-0"]').click() @@ -52,20 +53,23 @@ describe("manage queries", () => { cy.get('.ProseMirror').clear(); cy.get('.ProseMirror').type(`Edited Description-${runId}`); cy.get('[data-testid="query-builder-save-button"]').click(); + cy.waitTextVisible("Edited Query!"); - // Verify the card + // Verify edited Query card + cy.get('[data-testid="query-content-0"]').scrollIntoView().should('be.visible'); cy.waitTextVisible(`+ Test Query-${runId} + Edited Query-${runId}`); - cy.waitTextVisible(`Edited Description-${runId}`); + cy.waitTextVisible(`Edited Table-${runId}`); cy.waitTextVisible(`Edited Description-${runId}`); // Delete the Query cy.get('[data-testid="query-more-button-0"]').click(); cy.get('[data-testid="query-delete-button-0"]').click(); cy.contains('Yes').click(); + cy.waitTextVisible("Deleted Query!"); // Query should be gone cy.ensureTextNotPresent(`+ Test Query-${runId} + Edited Query-${runId}`); - cy.ensureTextNotPresent(`Edited Description-${runId}`); + cy.ensureTextNotPresent(`Edited Table-${runId}`); cy.ensureTextNotPresent(`Edited Description-${runId}`); }); }); \ No newline at end of file From f4c0ed3aab445b05ecaccd5f3c68c3861f30444c Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Fri, 21 Jul 2023 03:01:06 +0530 Subject: [PATCH 16/20] ingest(mysql): add storage bytes information (#8294) Co-authored-by: Andrew Sikowitz --- .../src/datahub/ingestion/source/sql/mysql.py | 12 +++++++ .../ingestion/source/sql/sql_common.py | 36 +++++++++++++++++++ 2 files changed, 48 insertions(+) diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/mysql.py b/metadata-ingestion/src/datahub/ingestion/source/sql/mysql.py index 3be9a5df4f0b9..3e89dd53c1eec 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/mysql.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/mysql.py @@ -5,6 +5,7 @@ from sqlalchemy import util from sqlalchemy.dialects.mysql import base from sqlalchemy.dialects.mysql.enumerated import SET +from sqlalchemy.engine.reflection import Inspector from datahub.ingestion.api.decorators import ( SourceCapability, @@ -83,3 +84,14 @@ def get_platform(self): def create(cls, config_dict, ctx): config = MySQLConfig.parse_obj(config_dict) return cls(config, ctx) + + def add_profile_metadata(self, inspector: Inspector) -> None: + if not self.config.profiling.enabled: + return + with inspector.engine.connect() as conn: + for row in conn.execute( + "SELECT table_schema, table_name, data_length from information_schema.tables" + ): + self.profile_metadata_info.dataset_name_to_storage_bytes[ + f"{row.table_schema}.{row.table_name}" + ] = row.data_length diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py index fb659d9548540..42ea7aed9b620 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_common.py @@ -309,6 +309,15 @@ def get_schema_metadata( ] +@dataclass +class ProfileMetadata: + """ + A class to hold information about the table for profile enrichment + """ + + dataset_name_to_storage_bytes: Dict[str, int] = field(default_factory=dict) + + class SQLAlchemySource(StatefulIngestionSourceBase): """A Base class for all SQL Sources that use SQLAlchemy to extend""" @@ -317,6 +326,7 @@ def __init__(self, config: SQLAlchemyConfig, ctx: PipelineContext, platform: str self.config = config self.platform = platform self.report: SQLSourceReport = SQLSourceReport() + self.profile_metadata_info: ProfileMetadata = ProfileMetadata() config_report = { config_option: config.dict().get(config_option) @@ -484,6 +494,16 @@ def get_workunits_internal(self) -> Iterable[Union[MetadataWorkUnit, SqlWorkUnit profile_requests: List["GEProfilerRequest"] = [] if sql_config.profiling.enabled: profiler = self.get_profiler_instance(inspector) + try: + self.add_profile_metadata(inspector) + except Exception as e: + logger.warning( + "Failed to get enrichment data for profiler", exc_info=True + ) + self.report.report_warning( + "profile_metadata", + f"Failed to get enrichment data for profile {e}", + ) db_name = self.get_db_name(inspector) yield from self.gen_database_containers( @@ -1098,6 +1118,13 @@ def loop_profiler_requests( ), ) + def add_profile_metadata(self, inspector: Inspector) -> None: + """ + Method to add profile metadata in a sub-class that can be used to enrich profile metadata. + This is meant to change self.profile_metadata_info in the sub-class. + """ + pass + def loop_profiler( self, profile_requests: List["GEProfilerRequest"], @@ -1113,6 +1140,15 @@ def loop_profiler( if profile is None: continue dataset_name = request.pretty_name + if ( + dataset_name in self.profile_metadata_info.dataset_name_to_storage_bytes + and profile.sizeInBytes is None + ): + profile.sizeInBytes = ( + self.profile_metadata_info.dataset_name_to_storage_bytes[ + dataset_name + ] + ) dataset_urn = make_dataset_urn_with_platform_instance( self.platform, dataset_name, From a4f4ff386c4dbea1ec51798a972f5eb4f3ef80b7 Mon Sep 17 00:00:00 2001 From: Chris Collins Date: Fri, 21 Jul 2023 10:44:57 -0400 Subject: [PATCH 17/20] fix(cache) Fix caching bug with new search filters (#8434) Co-authored-by: Aseem Bansal --- .../metadata/search/client/CachingEntitySearchService.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/metadata-io/src/main/java/com/linkedin/metadata/search/client/CachingEntitySearchService.java b/metadata-io/src/main/java/com/linkedin/metadata/search/client/CachingEntitySearchService.java index 804ab3303f6b0..56f6fed3ad9d2 100644 --- a/metadata-io/src/main/java/com/linkedin/metadata/search/client/CachingEntitySearchService.java +++ b/metadata-io/src/main/java/com/linkedin/metadata/search/client/CachingEntitySearchService.java @@ -154,8 +154,8 @@ public SearchResult getCachedSearchResults( batchSize, querySize -> getRawSearchResults(entityName, query, filters, sortCriterion, querySize.getFrom(), querySize.getSize(), flags, facets), - querySize -> Quintet.with(entityName, query, filters != null ? toJsonString(filters) : null, - sortCriterion != null ? toJsonString(sortCriterion) : null, querySize), flags, enableCache).getSearchResults(from, size); + querySize -> Sextet.with(entityName, query, filters != null ? toJsonString(filters) : null, + sortCriterion != null ? toJsonString(sortCriterion) : null, facets, querySize), flags, enableCache).getSearchResults(from, size); } From 392f9f1bc018f1f9f767273a74458e986885a843 Mon Sep 17 00:00:00 2001 From: Chris Collins Date: Fri, 21 Jul 2023 10:48:46 -0400 Subject: [PATCH 18/20] fix(browseV2) Escape forward slashes in browse v2 query (#8446) --- .../resolvers/chart/BrowseV2Resolver.java | 4 +++- .../src/app/home/AcrylDemoBanner.tsx | 21 +++++++++++++++---- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/chart/BrowseV2Resolver.java b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/chart/BrowseV2Resolver.java index 41a1d22485ea4..76abddc9a99a9 100644 --- a/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/chart/BrowseV2Resolver.java +++ b/datahub-graphql-core/src/main/java/com/linkedin/datahub/graphql/resolvers/chart/BrowseV2Resolver.java @@ -49,6 +49,8 @@ public CompletableFuture get(DataFetchingEnvironment environmen final int start = input.getStart() != null ? input.getStart() : DEFAULT_START; final int count = input.getCount() != null ? input.getCount() : DEFAULT_COUNT; final String query = input.getQuery() != null ? input.getQuery() : "*"; + // escape forward slash since it is a reserved character in Elasticsearch + final String sanitizedQuery = ResolverUtils.escapeForwardSlash(query); return CompletableFuture.supplyAsync(() -> { try { @@ -64,7 +66,7 @@ public CompletableFuture get(DataFetchingEnvironment environmen maybeResolvedView != null ? SearchUtils.combineFilters(filter, maybeResolvedView.getDefinition().getFilter()) : filter, - query, + sanitizedQuery, start, count, context.getAuthentication() diff --git a/datahub-web-react/src/app/home/AcrylDemoBanner.tsx b/datahub-web-react/src/app/home/AcrylDemoBanner.tsx index 87efae03e0a7c..0a6316a71db16 100644 --- a/datahub-web-react/src/app/home/AcrylDemoBanner.tsx +++ b/datahub-web-react/src/app/home/AcrylDemoBanner.tsx @@ -33,13 +33,17 @@ const StyledLink = styled(Link)` font-weight: 700; `; +const TextContent = styled.div` + max-width: 1025px; +`; + export default function AcrylDemoBanner() { return ( - Schedule a Demo of Managed Datahub - + Schedule a Demo of Managed DataHub + DataHub is already the industry's #1 Open Source Data Catalog.{' '} Schedule a demo {' '} - of Acryl Cloud to see the advanced features that take it to the next level! - + of Acryl DataHub to see the advanced features that take it to the next level or purchase Acryl Cloud + on{' '} + + AWS Marketplace + + ! + ); From bec018257c8c7c9af7dc79d91e8db34414188ff0 Mon Sep 17 00:00:00 2001 From: mohdsiddique Date: Sat, 22 Jul 2023 02:31:57 +0530 Subject: [PATCH 19/20] fix(ingestion/powerbi-report-srever): handle requests.exceptions.JSONDecodeError (#8442) Co-authored-by: MohdSiddiqueBagwan --- .../source/powerbi_report_server/constants.py | 2 - .../powerbi_report_server/report_server.py | 49 ++- .../report_server_domain.py | 5 - .../golden_test_fail_api_ingest.json | 326 +++++++++++++++++ .../golden_test_ingest.json | 334 +++++++++++------- .../test_powerbi_report_server.py | 123 ++++--- 6 files changed, 641 insertions(+), 198 deletions(-) create mode 100644 metadata-ingestion/tests/integration/powerbi_report_server/golden_test_fail_api_ingest.json diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/constants.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/constants.py index 92560a11b90eb..9f409793272dd 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/constants.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/constants.py @@ -82,8 +82,6 @@ class Constant: Constant.LINKED_REPORTS: "{PBIRS_BASE_URL}/LinkedReports", Constant.LINKED_REPORT: "{PBIRS_BASE_URL}/LinkedReports({LINKED_REPORT_ID})", Constant.ME: "{PBIRS_BASE_URLL}/Me", - Constant.MOBILE_REPORTS: "{PBIRS_BASE_URL}/MobileReports", - Constant.MOBILE_REPORT: "{PBIRS_BASE_URL}/MobileReports({MOBILE_REPORT_ID})", Constant.POWERBI_REPORTS: "{PBIRS_BASE_URL}/PowerBiReports", Constant.POWERBI_REPORT: "{PBIRS_BASE_URL}/PowerBiReports({POWERBI_REPORT_ID})", Constant.POWERBI_REPORT_DATASOURCES: "{PBIRS_BASE_URL}/PowerBiReports({ID})/DataSources", diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server.py index 7419241511eaf..e66119f6e8d76 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server.py @@ -35,7 +35,6 @@ from datahub.ingestion.source.powerbi_report_server.report_server_domain import ( CorpUser, LinkedReport, - MobileReport, Owner, OwnershipData, PowerBiReport, @@ -116,6 +115,29 @@ class PowerBiReportServerDashboardSourceConfig(PowerBiReportServerAPIConfig): chart_pattern: AllowDenyPattern = AllowDenyPattern.allow_all() +def log_http_error(e: BaseException, message: str) -> Any: + LOGGER.warning(message) + + if isinstance(e, requests.exceptions.HTTPError): + LOGGER.warning(f"HTTP status-code = {e.response.status_code}") + + LOGGER.debug(msg=message, exc_info=e) + + return e + + +def get_response_dict(response: requests.Response, error_message: str) -> dict: + + result_dict: dict = {} + try: + response.raise_for_status() + result_dict = response.json() + except BaseException as e: + log_http_error(e=e, message=error_message) + + return result_dict + + class PowerBiReportServerAPI: # API endpoints of PowerBI Report Server to fetch reports, datasets @@ -144,14 +166,15 @@ def requests_get(self, url_http: str, url_https: str, content_type: str) -> Any: url=url_http, auth=self.get_auth_credentials, ) - # Check if we got response from PowerBi Report Server - if response.status_code != 200: - message: str = "Failed to fetch Report from powerbi-report-server for" - LOGGER.warning(message) - LOGGER.warning("{}={}".format(Constant.ReportId, content_type)) - raise ValueError(message) - return response.json() + error_message: str = ( + f"Failed to fetch {content_type} Report from powerbi-report-server" + ) + + return get_response_dict( + response=response, + error_message=error_message, + ) def get_all_reports(self) -> List[Any]: """ @@ -159,7 +182,6 @@ def get_all_reports(self) -> List[Any]: """ report_types_mapping: Dict[str, Any] = { Constant.REPORTS: Report, - Constant.MOBILE_REPORTS: MobileReport, Constant.LINKED_REPORTS: LinkedReport, Constant.POWERBI_REPORTS: PowerBiReport, } @@ -174,15 +196,17 @@ def get_all_reports(self) -> List[Any]: report_get_endpoint_https = report_get_endpoint.format( PBIRS_BASE_URL=self.__config.get_base_api_https_url, ) + response_dict = self.requests_get( url_http=report_get_endpoint_http, url_https=report_get_endpoint_https, content_type=report_type, - )["value"] - if response_dict: + ) + + if response_dict.get("value"): reports.extend( report_types_mapping[report_type].parse_obj(report) - for report in response_dict + for report in response_dict.get("value") ) return reports @@ -487,7 +511,6 @@ class PowerBiReportServerDashboardSource(Source): Next types of report can be ingested: - PowerBI report(.pbix) - Paginated report(.rdl) - - Mobile report - Linked report """ diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server_domain.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server_domain.py index adcbcaaed96e6..60426fc5bd660 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server_domain.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi_report_server/report_server_domain.py @@ -239,11 +239,6 @@ class Manifest(BaseModel): resources: List[Dict[str, List]] = Field(alias="Resources") -class MobileReport(CatalogItem): - allow_caching: bool = Field(alias="AllowCaching") - manifest: Manifest = Field(alias="Manifest") - - class PowerBIReport(CatalogItem): has_data_sources: bool = Field(alias="HasDataSources") diff --git a/metadata-ingestion/tests/integration/powerbi_report_server/golden_test_fail_api_ingest.json b/metadata-ingestion/tests/integration/powerbi_report_server/golden_test_fail_api_ingest.json new file mode 100644 index 0000000000000..69a567654cac1 --- /dev/null +++ b/metadata-ingestion/tests/integration/powerbi_report_server/golden_test_fail_api_ingest.json @@ -0,0 +1,326 @@ +[ +{ + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:TEST_USER", + "changeType": "UPSERT", + "aspectName": "corpUserInfo", + "aspect": { + "json": { + "customProperties": {}, + "active": true, + "displayName": "TEST_USER", + "email": "" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:TEST_USER", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:TEST_USER", + "changeType": "UPSERT", + "aspectName": "corpUserKey", + "aspect": { + "json": { + "username": "TEST_USER" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938a)", + "changeType": "UPSERT", + "aspectName": "browsePaths", + "aspect": { + "json": { + "paths": [ + "/powerbi_report_server/dev/server_alias/Reports/path/to/Testa" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938a)", + "changeType": "UPSERT", + "aspectName": "dashboardInfo", + "aspect": { + "json": { + "customProperties": { + "workspaceName": "PowerBI Report Server", + "workspaceId": "host_port", + "createdBy": "TEST_USER", + "createdDate": "2022-02-03 07:00:00", + "modifiedBy": "TEST_USER", + "modifiedDate": "2022-02-03 07:00:00", + "dataSource": "" + }, + "title": "Testa", + "description": "", + "charts": [], + "datasets": [], + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + }, + "dashboardUrl": "http://host_port/Reports/powerbi/path/to/Testa" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938a)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938a)", + "changeType": "UPSERT", + "aspectName": "dashboardKey", + "aspect": { + "json": { + "dashboardTool": "powerbi", + "dashboardId": "powerbi.linkedin.com/dashboards/ee56dc21-248a-4138-a446-ee5ab1fc938a" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938a)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:TEST_USER", + "type": "TECHNICAL_OWNER" + }, + { + "owner": "urn:li:corpuser:TEST_USER", + "type": "NONE" + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:TEST_USER", + "changeType": "UPSERT", + "aspectName": "corpUserInfo", + "aspect": { + "json": { + "customProperties": {}, + "active": true, + "displayName": "TEST_USER", + "email": "" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:TEST_USER", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "corpuser", + "entityUrn": "urn:li:corpuser:TEST_USER", + "changeType": "UPSERT", + "aspectName": "corpUserKey", + "aspect": { + "json": { + "username": "TEST_USER" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938d)", + "changeType": "UPSERT", + "aspectName": "browsePaths", + "aspect": { + "json": { + "paths": [ + "/powerbi_report_server/dev/server_alias/Reports/path/to/Testd" + ] + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938d)", + "changeType": "UPSERT", + "aspectName": "dashboardInfo", + "aspect": { + "json": { + "customProperties": { + "workspaceName": "PowerBI Report Server", + "workspaceId": "host_port", + "createdBy": "TEST_USER", + "createdDate": "2022-02-03 07:00:00", + "modifiedBy": "TEST_USER", + "modifiedDate": "2022-02-03 07:00:00", + "dataSource": "" + }, + "title": "Testd", + "description": "", + "charts": [], + "datasets": [], + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + }, + "dashboardUrl": "http://host_port/Reports/powerbi/path/to/Testd" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938d)", + "changeType": "UPSERT", + "aspectName": "status", + "aspect": { + "json": { + "removed": false + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938d)", + "changeType": "UPSERT", + "aspectName": "dashboardKey", + "aspect": { + "json": { + "dashboardTool": "powerbi", + "dashboardId": "powerbi.linkedin.com/dashboards/ee56dc21-248a-4138-a446-ee5ab1fc938d" + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +}, +{ + "entityType": "dashboard", + "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938d)", + "changeType": "UPSERT", + "aspectName": "ownership", + "aspect": { + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:TEST_USER", + "type": "TECHNICAL_OWNER" + }, + { + "owner": "urn:li:corpuser:TEST_USER", + "type": "NONE" + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } + }, + "systemMetadata": { + "lastObserved": 1643871600000, + "runId": "powerbi-report-server-test" + } +} +] \ No newline at end of file diff --git a/metadata-ingestion/tests/integration/powerbi_report_server/golden_test_ingest.json b/metadata-ingestion/tests/integration/powerbi_report_server/golden_test_ingest.json index 9b202baa947d5..f4277e41d58c7 100644 --- a/metadata-ingestion/tests/integration/powerbi_report_server/golden_test_ingest.json +++ b/metadata-ingestion/tests/integration/powerbi_report_server/golden_test_ingest.json @@ -5,8 +5,12 @@ "changeType": "UPSERT", "aspectName": "corpUserInfo", "aspect": { - "value": "{\"active\": true, \"displayName\": \"TEST_USER\", \"email\": \"\"}", - "contentType": "application/json" + "json": { + "customProperties": {}, + "active": true, + "displayName": "TEST_USER", + "email": "" + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -19,8 +23,9 @@ "changeType": "UPSERT", "aspectName": "status", "aspect": { - "value": "{\"removed\": false}", - "contentType": "application/json" + "json": { + "removed": false + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -33,8 +38,9 @@ "changeType": "UPSERT", "aspectName": "corpUserKey", "aspect": { - "value": "{\"username\": \"TEST_USER\"}", - "contentType": "application/json" + "json": { + "username": "TEST_USER" + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -47,8 +53,11 @@ "changeType": "UPSERT", "aspectName": "browsePaths", "aspect": { - "value": "{\"paths\": [\"/powerbi_report_server/dev/server_alias/Reports/path/to/Testa\"]}", - "contentType": "application/json" + "json": { + "paths": [ + "/powerbi_report_server/dev/server_alias/Reports/path/to/Testa" + ] + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -61,8 +70,32 @@ "changeType": "UPSERT", "aspectName": "dashboardInfo", "aspect": { - "value": "{\"customProperties\": {\"workspaceName\": \"PowerBI Report Server\", \"workspaceId\": \"host_port\", \"createdBy\": \"TEST_USER\", \"createdDate\": \"2022-02-03 07:00:00\", \"modifiedBy\": \"TEST_USER\", \"modifiedDate\": \"2022-02-03 07:00:00\", \"dataSource\": \"\"}, \"title\": \"Testa\", \"description\": \"\", \"charts\": [], \"datasets\": [], \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"dashboardUrl\": \"http://host_port/Reports/powerbi/path/to/Testa\"}", - "contentType": "application/json" + "json": { + "customProperties": { + "workspaceName": "PowerBI Report Server", + "workspaceId": "host_port", + "createdBy": "TEST_USER", + "createdDate": "2022-02-03 07:00:00", + "modifiedBy": "TEST_USER", + "modifiedDate": "2022-02-03 07:00:00", + "dataSource": "" + }, + "title": "Testa", + "description": "", + "charts": [], + "datasets": [], + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + }, + "dashboardUrl": "http://host_port/Reports/powerbi/path/to/Testa" + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -75,8 +108,9 @@ "changeType": "UPSERT", "aspectName": "status", "aspect": { - "value": "{\"removed\": false}", - "contentType": "application/json" + "json": { + "removed": false + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -89,8 +123,10 @@ "changeType": "UPSERT", "aspectName": "dashboardKey", "aspect": { - "value": "{\"dashboardTool\": \"powerbi\", \"dashboardId\": \"powerbi.linkedin.com/dashboards/ee56dc21-248a-4138-a446-ee5ab1fc938a\"}", - "contentType": "application/json" + "json": { + "dashboardTool": "powerbi", + "dashboardId": "powerbi.linkedin.com/dashboards/ee56dc21-248a-4138-a446-ee5ab1fc938a" + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -103,8 +139,22 @@ "changeType": "UPSERT", "aspectName": "ownership", "aspect": { - "value": "{\"owners\": [{\"owner\": \"urn:li:corpuser:TEST_USER\", \"type\": \"TECHNICAL_OWNER\"}, {\"owner\": \"urn:li:corpuser:TEST_USER\", \"type\": \"NONE\"}], \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}", - "contentType": "application/json" + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:TEST_USER", + "type": "TECHNICAL_OWNER" + }, + { + "owner": "urn:li:corpuser:TEST_USER", + "type": "NONE" + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -117,8 +167,12 @@ "changeType": "UPSERT", "aspectName": "corpUserInfo", "aspect": { - "value": "{\"active\": true, \"displayName\": \"TEST_USER\", \"email\": \"\"}", - "contentType": "application/json" + "json": { + "customProperties": {}, + "active": true, + "displayName": "TEST_USER", + "email": "" + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -131,8 +185,9 @@ "changeType": "UPSERT", "aspectName": "status", "aspect": { - "value": "{\"removed\": false}", - "contentType": "application/json" + "json": { + "removed": false + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -145,78 +200,9 @@ "changeType": "UPSERT", "aspectName": "corpUserKey", "aspect": { - "value": "{\"username\": \"TEST_USER\"}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-report-server-test" - } -}, -{ - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938b)", - "changeType": "UPSERT", - "aspectName": "browsePaths", - "aspect": { - "value": "{\"paths\": [\"/powerbi_report_server/dev/server_alias/Reports/path/to/Testb\"]}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-report-server-test" - } -}, -{ - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938b)", - "changeType": "UPSERT", - "aspectName": "dashboardInfo", - "aspect": { - "value": "{\"customProperties\": {\"workspaceName\": \"PowerBI Report Server\", \"workspaceId\": \"host_port\", \"createdBy\": \"TEST_USER\", \"createdDate\": \"2022-02-03 07:00:00\", \"modifiedBy\": \"TEST_USER\", \"modifiedDate\": \"2022-02-03 07:00:00\", \"dataSource\": \"\"}, \"title\": \"Testb\", \"description\": \"\", \"charts\": [], \"datasets\": [], \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"dashboardUrl\": \"http://host_port/Reports/powerbi/path/to/Testb\"}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-report-server-test" - } -}, -{ - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938b)", - "changeType": "UPSERT", - "aspectName": "status", - "aspect": { - "value": "{\"removed\": false}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-report-server-test" - } -}, -{ - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938b)", - "changeType": "UPSERT", - "aspectName": "dashboardKey", - "aspect": { - "value": "{\"dashboardTool\": \"powerbi\", \"dashboardId\": \"powerbi.linkedin.com/dashboards/ee56dc21-248a-4138-a446-ee5ab1fc938b\"}", - "contentType": "application/json" - }, - "systemMetadata": { - "lastObserved": 1643871600000, - "runId": "powerbi-report-server-test" - } -}, -{ - "entityType": "dashboard", - "entityUrn": "urn:li:dashboard:(powerbi,reports.ee56dc21-248a-4138-a446-ee5ab1fc938b)", - "changeType": "UPSERT", - "aspectName": "ownership", - "aspect": { - "value": "{\"owners\": [{\"owner\": \"urn:li:corpuser:TEST_USER\", \"type\": \"TECHNICAL_OWNER\"}, {\"owner\": \"urn:li:corpuser:TEST_USER\", \"type\": \"NONE\"}], \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}", - "contentType": "application/json" + "json": { + "username": "TEST_USER" + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -229,8 +215,12 @@ "changeType": "UPSERT", "aspectName": "corpUserInfo", "aspect": { - "value": "{\"active\": true, \"displayName\": \"TEST_USER\", \"email\": \"\"}", - "contentType": "application/json" + "json": { + "customProperties": {}, + "active": true, + "displayName": "TEST_USER", + "email": "" + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -243,8 +233,9 @@ "changeType": "UPSERT", "aspectName": "status", "aspect": { - "value": "{\"removed\": false}", - "contentType": "application/json" + "json": { + "removed": false + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -257,8 +248,9 @@ "changeType": "UPSERT", "aspectName": "corpUserKey", "aspect": { - "value": "{\"username\": \"TEST_USER\"}", - "contentType": "application/json" + "json": { + "username": "TEST_USER" + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -271,8 +263,11 @@ "changeType": "UPSERT", "aspectName": "browsePaths", "aspect": { - "value": "{\"paths\": [\"/powerbi_report_server/dev/server_alias/Reports/path/to/Testc\"]}", - "contentType": "application/json" + "json": { + "paths": [ + "/powerbi_report_server/dev/server_alias/Reports/path/to/Testc" + ] + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -285,8 +280,32 @@ "changeType": "UPSERT", "aspectName": "dashboardInfo", "aspect": { - "value": "{\"customProperties\": {\"workspaceName\": \"PowerBI Report Server\", \"workspaceId\": \"host_port\", \"createdBy\": \"TEST_USER\", \"createdDate\": \"2022-02-03 07:00:00\", \"modifiedBy\": \"TEST_USER\", \"modifiedDate\": \"2022-02-03 07:00:00\", \"dataSource\": \"\"}, \"title\": \"Testc\", \"description\": \"\", \"charts\": [], \"datasets\": [], \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"dashboardUrl\": \"http://host_port/Reports/powerbi/path/to/Testc\"}", - "contentType": "application/json" + "json": { + "customProperties": { + "workspaceName": "PowerBI Report Server", + "workspaceId": "host_port", + "createdBy": "TEST_USER", + "createdDate": "2022-02-03 07:00:00", + "modifiedBy": "TEST_USER", + "modifiedDate": "2022-02-03 07:00:00", + "dataSource": "" + }, + "title": "Testc", + "description": "", + "charts": [], + "datasets": [], + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + }, + "dashboardUrl": "http://host_port/Reports/powerbi/path/to/Testc" + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -299,8 +318,9 @@ "changeType": "UPSERT", "aspectName": "status", "aspect": { - "value": "{\"removed\": false}", - "contentType": "application/json" + "json": { + "removed": false + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -313,8 +333,10 @@ "changeType": "UPSERT", "aspectName": "dashboardKey", "aspect": { - "value": "{\"dashboardTool\": \"powerbi\", \"dashboardId\": \"powerbi.linkedin.com/dashboards/ee56dc21-248a-4138-a446-ee5ab1fc938c\"}", - "contentType": "application/json" + "json": { + "dashboardTool": "powerbi", + "dashboardId": "powerbi.linkedin.com/dashboards/ee56dc21-248a-4138-a446-ee5ab1fc938c" + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -327,8 +349,22 @@ "changeType": "UPSERT", "aspectName": "ownership", "aspect": { - "value": "{\"owners\": [{\"owner\": \"urn:li:corpuser:TEST_USER\", \"type\": \"TECHNICAL_OWNER\"}, {\"owner\": \"urn:li:corpuser:TEST_USER\", \"type\": \"NONE\"}], \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}", - "contentType": "application/json" + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:TEST_USER", + "type": "TECHNICAL_OWNER" + }, + { + "owner": "urn:li:corpuser:TEST_USER", + "type": "NONE" + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -341,8 +377,12 @@ "changeType": "UPSERT", "aspectName": "corpUserInfo", "aspect": { - "value": "{\"active\": true, \"displayName\": \"TEST_USER\", \"email\": \"\"}", - "contentType": "application/json" + "json": { + "customProperties": {}, + "active": true, + "displayName": "TEST_USER", + "email": "" + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -355,8 +395,9 @@ "changeType": "UPSERT", "aspectName": "status", "aspect": { - "value": "{\"removed\": false}", - "contentType": "application/json" + "json": { + "removed": false + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -369,8 +410,9 @@ "changeType": "UPSERT", "aspectName": "corpUserKey", "aspect": { - "value": "{\"username\": \"TEST_USER\"}", - "contentType": "application/json" + "json": { + "username": "TEST_USER" + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -383,8 +425,11 @@ "changeType": "UPSERT", "aspectName": "browsePaths", "aspect": { - "value": "{\"paths\": [\"/powerbi_report_server/dev/server_alias/Reports/path/to/Testd\"]}", - "contentType": "application/json" + "json": { + "paths": [ + "/powerbi_report_server/dev/server_alias/Reports/path/to/Testd" + ] + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -397,8 +442,32 @@ "changeType": "UPSERT", "aspectName": "dashboardInfo", "aspect": { - "value": "{\"customProperties\": {\"workspaceName\": \"PowerBI Report Server\", \"workspaceId\": \"host_port\", \"createdBy\": \"TEST_USER\", \"createdDate\": \"2022-02-03 07:00:00\", \"modifiedBy\": \"TEST_USER\", \"modifiedDate\": \"2022-02-03 07:00:00\", \"dataSource\": \"\"}, \"title\": \"Testd\", \"description\": \"\", \"charts\": [], \"datasets\": [], \"lastModified\": {\"created\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}, \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}, \"dashboardUrl\": \"http://host_port/Reports/powerbi/path/to/Testd\"}", - "contentType": "application/json" + "json": { + "customProperties": { + "workspaceName": "PowerBI Report Server", + "workspaceId": "host_port", + "createdBy": "TEST_USER", + "createdDate": "2022-02-03 07:00:00", + "modifiedBy": "TEST_USER", + "modifiedDate": "2022-02-03 07:00:00", + "dataSource": "" + }, + "title": "Testd", + "description": "", + "charts": [], + "datasets": [], + "lastModified": { + "created": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + }, + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + }, + "dashboardUrl": "http://host_port/Reports/powerbi/path/to/Testd" + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -411,8 +480,9 @@ "changeType": "UPSERT", "aspectName": "status", "aspect": { - "value": "{\"removed\": false}", - "contentType": "application/json" + "json": { + "removed": false + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -425,8 +495,10 @@ "changeType": "UPSERT", "aspectName": "dashboardKey", "aspect": { - "value": "{\"dashboardTool\": \"powerbi\", \"dashboardId\": \"powerbi.linkedin.com/dashboards/ee56dc21-248a-4138-a446-ee5ab1fc938d\"}", - "contentType": "application/json" + "json": { + "dashboardTool": "powerbi", + "dashboardId": "powerbi.linkedin.com/dashboards/ee56dc21-248a-4138-a446-ee5ab1fc938d" + } }, "systemMetadata": { "lastObserved": 1643871600000, @@ -439,8 +511,22 @@ "changeType": "UPSERT", "aspectName": "ownership", "aspect": { - "value": "{\"owners\": [{\"owner\": \"urn:li:corpuser:TEST_USER\", \"type\": \"TECHNICAL_OWNER\"}, {\"owner\": \"urn:li:corpuser:TEST_USER\", \"type\": \"NONE\"}], \"lastModified\": {\"time\": 0, \"actor\": \"urn:li:corpuser:unknown\"}}", - "contentType": "application/json" + "json": { + "owners": [ + { + "owner": "urn:li:corpuser:TEST_USER", + "type": "TECHNICAL_OWNER" + }, + { + "owner": "urn:li:corpuser:TEST_USER", + "type": "NONE" + } + ], + "lastModified": { + "time": 0, + "actor": "urn:li:corpuser:unknown" + } + } }, "systemMetadata": { "lastObserved": 1643871600000, diff --git a/metadata-ingestion/tests/integration/powerbi_report_server/test_powerbi_report_server.py b/metadata-ingestion/tests/integration/powerbi_report_server/test_powerbi_report_server.py index c03190be66964..826c2b77bce36 100644 --- a/metadata-ingestion/tests/integration/powerbi_report_server/test_powerbi_report_server.py +++ b/metadata-ingestion/tests/integration/powerbi_report_server/test_powerbi_report_server.py @@ -21,7 +21,7 @@ def mock_user_to_add(*args, **kwargs): return None -def register_mock_api(request_mock): +def register_mock_api(request_mock, override_mock_data={}): api_vs_response = { "https://host_port/Reports/api/v2.0/Reports": { "method": "GET", @@ -52,37 +52,6 @@ def register_mock_api(request_mock): ] }, }, - "https://host_port/Reports/api/v2.0/MobileReports": { - "method": "GET", - "status_code": 200, - "json": { - "value": [ - { - "Id": "ee56dc21-248a-4138-a446-ee5ab1fc938b", - "Name": "Testb", - "Description": None, - "Path": "/path/to/Testb", - "Type": "MobileReport", - "Hidden": False, - "Size": 1010101, - "ModifiedBy": "TEST_USER", - "ModifiedDate": str(datetime.now()), - "CreatedBy": "TEST_USER", - "CreatedDate": str(datetime.now()), - "ParentFolderId": "47495172-89ab-455f-a446-fffd3cf239cb", - "IsFavorite": False, - "ContentType": None, - "Content": "", - "HasDataSources": True, - "Roles": [], - "HasSharedDataSets": True, - "HasParameters": True, - "AllowCaching": True, - "Manifest": {"Resources": []}, - }, - ] - }, - }, "https://host_port/Reports/api/v2.0/LinkedReports": { "method": "GET", "status_code": 200, @@ -141,6 +110,8 @@ def register_mock_api(request_mock): }, } + api_vs_response.update(override_mock_data) + for url in api_vs_response.keys(): request_mock.register_uri( api_vs_response[url]["method"], @@ -164,6 +135,30 @@ def default_source_config(): } +def get_default_recipe(output_path: str) -> dict: + return { + "run_id": "powerbi-report-server-test", + "source": { + "type": "powerbi-report-server", + "config": { + **default_source_config(), + }, + }, + "sink": { + "type": "file", + "config": {"filename": output_path}, # , + }, + } + + +def add_mock_method_in_pipeline(pipeline: Pipeline) -> None: + pipeline.ctx.graph = mock.MagicMock() + pipeline.ctx.graph.get_ownership = mock.MagicMock() + pipeline.ctx.graph.get_ownership.side_effect = mock_existing_users + pipeline.ctx.graph.get_aspect_v2 = mock.MagicMock() + pipeline.ctx.graph.get_aspect_v2.side_effect = mock_user_to_add + + @freeze_time(FROZEN_TIME) @mock.patch("requests_ntlm.HttpNtlmAuth") def test_powerbi_ingest(mock_msal, pytestconfig, tmp_path, mock_time, requests_mock): @@ -174,34 +169,54 @@ def test_powerbi_ingest(mock_msal, pytestconfig, tmp_path, mock_time, requests_m register_mock_api(request_mock=requests_mock) pipeline = Pipeline.create( - { - "run_id": "powerbi-report-server-test", - "source": { - "type": "powerbi-report-server", - "config": { - **default_source_config(), - }, - }, - "sink": { - "type": "file", - "config": { - "filename": f"{tmp_path}/powerbi_report_server_mces.json", - }, - }, - } + get_default_recipe(output_path=f"{tmp_path}/powerbi_report_server_mces.json") ) - pipeline.ctx.graph = mock.MagicMock() - pipeline.ctx.graph.get_ownership = mock.MagicMock() - pipeline.ctx.graph.get_ownership.side_effect = mock_existing_users - pipeline.ctx.graph.get_aspect_v2 = mock.MagicMock() - pipeline.ctx.graph.get_aspect_v2.side_effect = mock_user_to_add + + add_mock_method_in_pipeline(pipeline=pipeline) + + pipeline.run() + pipeline.raise_from_status() + + golden_file = "golden_test_ingest.json" + mce_helpers.check_golden_file( + pytestconfig, + output_path=tmp_path / "powerbi_report_server_mces.json", + golden_path=f"{test_resources_dir}/{golden_file}", + ) + + +@freeze_time(FROZEN_TIME) +@mock.patch("requests_ntlm.HttpNtlmAuth") +def test_powerbi_ingest_with_failure( + mock_msal, pytestconfig, tmp_path, mock_time, requests_mock +): + test_resources_dir = ( + pytestconfig.rootpath / "tests/integration/powerbi_report_server" + ) + + register_mock_api( + request_mock=requests_mock, + override_mock_data={ + "https://host_port/Reports/api/v2.0/LinkedReports": { + "method": "GET", + "status_code": 404, + "json": {"error": "Request Failed"}, + } + }, + ) + + pipeline = Pipeline.create( + get_default_recipe(output_path=f"{tmp_path}/powerbi_report_server_mces.json") + ) + + add_mock_method_in_pipeline(pipeline=pipeline) pipeline.run() pipeline.raise_from_status() - mce_out_file = "golden_test_ingest.json" + golden_file = "golden_test_fail_api_ingest.json" mce_helpers.check_golden_file( pytestconfig, output_path=tmp_path / "powerbi_report_server_mces.json", - golden_path=f"{test_resources_dir}/{mce_out_file}", + golden_path=f"{test_resources_dir}/{golden_file}", ) From 056d3619f0923757c1e4afe9f0e5a56de1a64dde Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Fri, 21 Jul 2023 14:14:06 -0700 Subject: [PATCH 20/20] feat(sdk): easily generate container urns (#8198) Co-authored-by: Aseem Bansal --- docs/how/updating-datahub.md | 2 ++ .../src/datahub/emitter/mce_builder.py | 21 ++++++++++++-- .../src/datahub/emitter/mcp_builder.py | 29 ++++++++++++------- .../src/datahub/ingestion/graph/client.py | 13 +++++---- .../ingestion/source/bigquery_v2/bigquery.py | 6 ++-- .../data_lake_common/data_lake_utils.py | 4 +-- .../ingestion/source/powerbi/powerbi.py | 5 ++-- .../powerbi/rest_api_wrapper/data_classes.py | 10 +++---- .../datahub/ingestion/source/sql/athena.py | 6 ++-- .../datahub/ingestion/source/sql/sql_utils.py | 14 ++++----- .../source/sql/two_tier_sql_source.py | 8 ++--- .../src/datahub/ingestion/source/tableau.py | 8 ++--- .../datahub/ingestion/source/unity/source.py | 4 +-- 13 files changed, 78 insertions(+), 52 deletions(-) diff --git a/docs/how/updating-datahub.md b/docs/how/updating-datahub.md index 03b3d763ed247..b705c973cdbb5 100644 --- a/docs/how/updating-datahub.md +++ b/docs/how/updating-datahub.md @@ -20,6 +20,8 @@ individually enable / disable desired field metrics. ### Deprecations +- #8198: In the Python SDK, the `PlatformKey` class has been renamed to `ContainerKey`. + ### Other notable Changes ## 0.10.4 diff --git a/metadata-ingestion/src/datahub/emitter/mce_builder.py b/metadata-ingestion/src/datahub/emitter/mce_builder.py index 9c44949741297..47727d5784a19 100644 --- a/metadata-ingestion/src/datahub/emitter/mce_builder.py +++ b/metadata-ingestion/src/datahub/emitter/mce_builder.py @@ -6,7 +6,17 @@ import time from enum import Enum from hashlib import md5 -from typing import Any, List, Optional, Type, TypeVar, Union, cast, get_type_hints +from typing import ( + TYPE_CHECKING, + Any, + List, + Optional, + Type, + TypeVar, + Union, + cast, + get_type_hints, +) import typing_inspect @@ -50,6 +60,9 @@ os.getenv("DATAHUB_DATASET_URN_TO_LOWER", "false") == "true" ) +if TYPE_CHECKING: + from datahub.emitter.mcp_builder import DatahubKey + # TODO: Delete this once lower-casing is the standard. def set_dataset_urn_to_lower(value: bool) -> None: @@ -132,7 +145,11 @@ def dataset_key_to_urn(key: DatasetKeyClass) -> str: ) -def make_container_urn(guid: str) -> str: +def make_container_urn(guid: Union[str, "DatahubKey"]) -> str: + from datahub.emitter.mcp_builder import DatahubKey + + if isinstance(guid, DatahubKey): + guid = guid.guid() return f"urn:li:container:{guid}" diff --git a/metadata-ingestion/src/datahub/emitter/mcp_builder.py b/metadata-ingestion/src/datahub/emitter/mcp_builder.py index 9051f2e82fa1f..40df214f49433 100644 --- a/metadata-ingestion/src/datahub/emitter/mcp_builder.py +++ b/metadata-ingestion/src/datahub/emitter/mcp_builder.py @@ -54,7 +54,9 @@ def guid(self) -> str: return _stable_guid_from_dict(bag) -class PlatformKey(DatahubKey): +class ContainerKey(DatahubKey): + """Base class for container guid keys. Most users should use one of the subclasses instead.""" + platform: str instance: Optional[str] = None @@ -81,8 +83,15 @@ def guid_dict(self) -> Dict[str, str]: def property_dict(self) -> Dict[str, str]: return self.dict(by_alias=True, exclude_none=True) + def as_urn(self) -> str: + return make_container_urn(guid=self.guid()) + + +# DEPRECATION: Keeping the `PlatformKey` name around for backwards compatibility. +PlatformKey = ContainerKey + -class DatabaseKey(PlatformKey): +class DatabaseKey(ContainerKey): database: str @@ -90,11 +99,11 @@ class SchemaKey(DatabaseKey): db_schema: str = Field(alias="schema") -class ProjectIdKey(PlatformKey): +class ProjectIdKey(ContainerKey): project_id: str -class MetastoreKey(PlatformKey): +class MetastoreKey(ContainerKey): metastore: str @@ -110,11 +119,11 @@ class BigQueryDatasetKey(ProjectIdKey): dataset_id: str -class FolderKey(PlatformKey): +class FolderKey(ContainerKey): folder_abs_path: str -class BucketKey(PlatformKey): +class BucketKey(ContainerKey): bucket_name: str @@ -127,7 +136,7 @@ def default(self, obj: Any) -> Any: return json.JSONEncoder.default(self, obj) -KeyType = TypeVar("KeyType", bound=PlatformKey) +KeyType = TypeVar("KeyType", bound=ContainerKey) def add_domain_to_entity_wu( @@ -188,7 +197,7 @@ def gen_containers( container_key: KeyType, name: str, sub_types: List[str], - parent_container_key: Optional[PlatformKey] = None, + parent_container_key: Optional[ContainerKey] = None, extra_properties: Optional[Dict[str, str]] = None, domain_urn: Optional[str] = None, description: Optional[str] = None, @@ -199,9 +208,7 @@ def gen_containers( created: Optional[int] = None, last_modified: Optional[int] = None, ) -> Iterable[MetadataWorkUnit]: - container_urn = make_container_urn( - guid=container_key.guid(), - ) + container_urn = container_key.as_urn() yield MetadataChangeProposalWrapper( entityUrn=f"{container_urn}", # entityKeyAspect=ContainerKeyClass(guid=parent_container_key.guid()), diff --git a/metadata-ingestion/src/datahub/ingestion/graph/client.py b/metadata-ingestion/src/datahub/ingestion/graph/client.py index 2f817ee69a637..cac53c350f2ea 100644 --- a/metadata-ingestion/src/datahub/ingestion/graph/client.py +++ b/metadata-ingestion/src/datahub/ingestion/graph/client.py @@ -88,6 +88,12 @@ class RemovedStatusFilter(enum.Enum): """Search only soft-deleted entities.""" +@dataclass +class RelatedEntity: + urn: str + relationship_type: str + + def _graphql_entity_type(entity_type: str) -> str: """Convert the entity types into GraphQL "EntityType" enum values.""" @@ -769,11 +775,6 @@ class RelationshipDirection(str, enum.Enum): INCOMING = "INCOMING" OUTGOING = "OUTGOING" - @dataclass - class RelatedEntity: - urn: str - relationship_type: str - def get_related_entities( self, entity_urn: str, @@ -794,7 +795,7 @@ def get_related_entities( }, ) for related_entity in response.get("entities", []): - yield DataHubGraph.RelatedEntity( + yield RelatedEntity( urn=related_entity["urn"], relationship_type=related_entity["relationshipType"], ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py index 919c803222066..ccda00ba293ef 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py +++ b/metadata-ingestion/src/datahub/ingestion/source/bigquery_v2/bigquery.py @@ -20,7 +20,7 @@ set_dataset_urn_to_lower, ) from datahub.emitter.mcp import MetadataChangeProposalWrapper -from datahub.emitter.mcp_builder import BigQueryDatasetKey, PlatformKey, ProjectIdKey +from datahub.emitter.mcp_builder import BigQueryDatasetKey, ContainerKey, ProjectIdKey from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.api.decorators import ( SupportStatus, @@ -434,7 +434,7 @@ def get_dataplatform_instance_aspect( entityUrn=dataset_urn, aspect=aspect ).as_workunit() - def gen_dataset_key(self, db_name: str, schema: str) -> PlatformKey: + def gen_dataset_key(self, db_name: str, schema: str) -> ContainerKey: return BigQueryDatasetKey( project_id=db_name, dataset_id=schema, @@ -443,7 +443,7 @@ def gen_dataset_key(self, db_name: str, schema: str) -> PlatformKey: backcompat_env_as_instance=True, ) - def gen_project_id_key(self, database: str) -> PlatformKey: + def gen_project_id_key(self, database: str) -> ContainerKey: return ProjectIdKey( project_id=database, platform=self.platform, diff --git a/metadata-ingestion/src/datahub/ingestion/source/data_lake_common/data_lake_utils.py b/metadata-ingestion/src/datahub/ingestion/source/data_lake_common/data_lake_utils.py index 0a65537772390..b04718a9eabba 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/data_lake_common/data_lake_utils.py +++ b/metadata-ingestion/src/datahub/ingestion/source/data_lake_common/data_lake_utils.py @@ -3,9 +3,9 @@ from datahub.emitter.mcp_builder import ( BucketKey, + ContainerKey, FolderKey, KeyType, - PlatformKey, add_dataset_to_container, gen_containers, ) @@ -45,7 +45,7 @@ def create_emit_containers( container_key: KeyType, name: str, sub_types: List[str], - parent_container_key: Optional[PlatformKey] = None, + parent_container_key: Optional[ContainerKey] = None, domain_urn: Optional[str] = None, ) -> Iterable[MetadataWorkUnit]: if container_key.guid() not in self.processed_containers: diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py index 33596091e420d..919cb83e4d832 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/powerbi.py @@ -9,7 +9,7 @@ import datahub.emitter.mce_builder as builder import datahub.ingestion.source.powerbi.rest_api_wrapper.data_classes as powerbi_data_classes from datahub.emitter.mcp import MetadataChangeProposalWrapper -from datahub.emitter.mcp_builder import PlatformKey, gen_containers +from datahub.emitter.mcp_builder import ContainerKey, gen_containers from datahub.ingestion.api.common import PipelineContext from datahub.ingestion.api.decorators import ( SourceCapability, @@ -104,7 +104,7 @@ def __init__( self.__reporter = reporter self.__dataplatform_instance_resolver = dataplatform_instance_resolver self.processed_datasets: Set[powerbi_data_classes.PowerBIDataset] = set() - self.workspace_key: PlatformKey + self.workspace_key: ContainerKey @staticmethod def urn_to_lowercase(value: str, flag: bool) -> str: @@ -256,7 +256,6 @@ def to_datahub_schema( self, table: powerbi_data_classes.Table, ) -> SchemaMetadataClass: - fields = [] table_fields = ( [self.to_datahub_schema_field(column) for column in table.columns] diff --git a/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py b/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py index 28a5fac8b127b..2d2d9f527788f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py +++ b/metadata-ingestion/src/datahub/ingestion/source/powerbi/rest_api_wrapper/data_classes.py @@ -2,7 +2,7 @@ from enum import Enum from typing import Any, Dict, List, Optional, Union -from datahub.emitter.mcp_builder import PlatformKey +from datahub.emitter.mcp_builder import ContainerKey from datahub.metadata.schema_classes import ( BooleanTypeClass, DateTypeClass, @@ -28,11 +28,11 @@ } -class WorkspaceKey(PlatformKey): +class WorkspaceKey(ContainerKey): workspace: str -class DatasetKey(PlatformKey): +class DatasetKey(ContainerKey): dataset: str @@ -57,7 +57,7 @@ def get_workspace_key( platform_name: str, platform_instance: Optional[str] = None, workspace_id_as_urn_part: Optional[bool] = False, - ) -> PlatformKey: + ) -> ContainerKey: return WorkspaceKey( workspace=self.get_urn_part(workspace_id_as_urn_part), platform=platform_name, @@ -150,7 +150,7 @@ def __eq__(self, instance): def __hash__(self): return hash(self.__members()) - def get_dataset_key(self, platform_name: str) -> PlatformKey: + def get_dataset_key(self, platform_name: str) -> ContainerKey: return DatasetKey( dataset=self.id, platform=platform_name, diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py b/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py index 46f9fd240db04..8b2eed36ac6b3 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/athena.py @@ -9,7 +9,7 @@ from sqlalchemy.engine.reflection import Inspector from datahub.configuration.validate_field_rename import pydantic_renamed_field -from datahub.emitter.mcp_builder import PlatformKey +from datahub.emitter.mcp_builder import ContainerKey from datahub.ingestion.api.decorators import ( SourceCapability, SupportStatus, @@ -211,7 +211,7 @@ def gen_schema_containers( extra_properties=extra_properties, ) - def get_database_container_key(self, db_name: str, schema: str) -> PlatformKey: + def get_database_container_key(self, db_name: str, schema: str) -> ContainerKey: # Because our overridden get_allowed_schemas method returns db_name as the schema name, # the db_name and schema here will be the same. Hence, we just ignore the schema parameter. # Based on community feedback, db_name only available if it is explicitly specified in the connection string. @@ -232,7 +232,7 @@ def add_table_to_schema_container( dataset_urn: str, db_name: str, schema: str, - schema_container_key: Optional[PlatformKey] = None, + schema_container_key: Optional[ContainerKey] = None, ) -> Iterable[MetadataWorkUnit]: yield from add_table_to_schema_container( dataset_urn=dataset_urn, diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_utils.py b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_utils.py index 7554dd5af3103..a5f5034d175c6 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/sql_utils.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/sql_utils.py @@ -8,8 +8,8 @@ ) from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.emitter.mcp_builder import ( + ContainerKey, DatabaseKey, - PlatformKey, SchemaKey, add_dataset_to_container, add_domain_to_entity_wu, @@ -28,7 +28,7 @@ def gen_schema_key( platform: str, platform_instance: Optional[str], env: Optional[str], -) -> PlatformKey: +) -> ContainerKey: return SchemaKey( database=db_name, schema=schema, @@ -41,7 +41,7 @@ def gen_schema_key( def gen_database_key( database: str, platform: str, platform_instance: Optional[str], env: Optional[str] -) -> PlatformKey: +) -> ContainerKey: return DatabaseKey( database=database, platform=platform, @@ -55,8 +55,8 @@ def gen_schema_container( schema: str, database: str, sub_types: List[str], - database_container_key: PlatformKey, - schema_container_key: PlatformKey, + database_container_key: ContainerKey, + schema_container_key: ContainerKey, domain_registry: Optional[DomainRegistry] = None, domain_config: Optional[Dict[str, AllowDenyPattern]] = None, name: Optional[str] = None, @@ -113,7 +113,7 @@ def gen_domain_urn( def gen_database_container( database: str, - database_container_key: PlatformKey, + database_container_key: ContainerKey, sub_types: List[str], domain_config: Optional[Dict[str, AllowDenyPattern]] = None, domain_registry: Optional[DomainRegistry] = None, @@ -152,7 +152,7 @@ def gen_database_container( def add_table_to_schema_container( dataset_urn: str, - parent_container_key: PlatformKey, + parent_container_key: ContainerKey, ) -> Iterable[MetadataWorkUnit]: yield from add_dataset_to_container( container_key=parent_container_key, diff --git a/metadata-ingestion/src/datahub/ingestion/source/sql/two_tier_sql_source.py b/metadata-ingestion/src/datahub/ingestion/source/sql/two_tier_sql_source.py index f105829d874de..d9062cef06eae 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/sql/two_tier_sql_source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/sql/two_tier_sql_source.py @@ -7,7 +7,7 @@ from datahub.configuration.common import AllowDenyPattern from datahub.configuration.validate_field_rename import pydantic_renamed_field -from datahub.emitter.mcp_builder import PlatformKey +from datahub.emitter.mcp_builder import ContainerKey from datahub.ingestion.api.workunit import MetadataWorkUnit from datahub.ingestion.source.sql.sql_common import SQLAlchemySource, logger from datahub.ingestion.source.sql.sql_config import ( @@ -56,7 +56,7 @@ def __init__(self, config, ctx, platform): super().__init__(config, ctx, platform) self.config: TwoTierSQLAlchemyConfig = config - def get_database_container_key(self, db_name: str, schema: str) -> PlatformKey: + def get_database_container_key(self, db_name: str, schema: str) -> ContainerKey: # Because our overridden get_allowed_schemas method returns db_name as the schema name, # the db_name and schema here will be the same. Hence, we just ignore the schema parameter. assert db_name == schema @@ -72,7 +72,7 @@ def add_table_to_schema_container( dataset_urn: str, db_name: str, schema: str, - schema_container_key: Optional[PlatformKey] = None, + schema_container_key: Optional[ContainerKey] = None, ) -> Iterable[MetadataWorkUnit]: yield from add_table_to_schema_container( dataset_urn=dataset_urn, @@ -86,7 +86,7 @@ def get_allowed_schemas( # dbName itself as an allowed schema yield db_name - def gen_schema_key(self, db_name: str, schema: str) -> PlatformKey: + def gen_schema_key(self, db_name: str, schema: str) -> ContainerKey: # Sanity check that we don't try to generate schema containers for 2 tier databases. raise NotImplementedError diff --git a/metadata-ingestion/src/datahub/ingestion/source/tableau.py b/metadata-ingestion/src/datahub/ingestion/source/tableau.py index 5ad39425c3f73..67bd1af6c2d7f 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/tableau.py +++ b/metadata-ingestion/src/datahub/ingestion/source/tableau.py @@ -33,7 +33,7 @@ ) from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.emitter.mcp_builder import ( - PlatformKey, + ContainerKey, add_entity_to_container, gen_containers, ) @@ -358,11 +358,11 @@ def projects_backward_compatibility(cls, values: Dict) -> Dict: return values -class WorkbookKey(PlatformKey): +class WorkbookKey(ContainerKey): workbook_id: str -class ProjectKey(PlatformKey): +class ProjectKey(ContainerKey): project_id: str @@ -1682,7 +1682,7 @@ def emit_datasource( ) def _get_datasource_container_key(self, datasource, workbook, is_embedded_ds): - container_key: Optional[PlatformKey] = None + container_key: Optional[ContainerKey] = None if is_embedded_ds: # It is embedded then parent is container is workbook if workbook is not None: container_key = self.gen_workbook_key(workbook) diff --git a/metadata-ingestion/src/datahub/ingestion/source/unity/source.py b/metadata-ingestion/src/datahub/ingestion/source/unity/source.py index 9d82a9e247a00..ec7d00c7bcc63 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/unity/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/unity/source.py @@ -15,8 +15,8 @@ from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.emitter.mcp_builder import ( CatalogKey, + ContainerKey, MetastoreKey, - PlatformKey, UnitySchemaKey, add_dataset_to_container, gen_containers, @@ -432,7 +432,7 @@ def gen_catalog_containers(self, catalog: Catalog) -> Iterable[MetadataWorkUnit] external_url=f"{self.external_url_base}/{catalog.name}", ) - def gen_schema_key(self, schema: Schema) -> PlatformKey: + def gen_schema_key(self, schema: Schema) -> ContainerKey: return UnitySchemaKey( unity_schema=schema.name, platform=self.platform,