From 3f9490d9018d4505eac413f92a9301cfca5c0ae7 Mon Sep 17 00:00:00 2001 From: Aseem Bansal Date: Wed, 31 Jan 2024 14:42:40 +0530 Subject: [PATCH] feat(cli): option to init via username password, add lint for smoke-test (#9675) Co-authored-by: Harshal Sheth --- .github/workflows/docker-unified.yml | 12 + docs/cli.md | 2 +- .../src/datahub/cli/cli_utils.py | 207 ++++++------ .../src/datahub/cli/config_utils.py | 102 ++++++ .../src/datahub/cli/docker_cli.py | 2 +- .../src/datahub/cli/env_utils.py | 11 + .../src/datahub/cli/ingest_cli.py | 19 +- .../src/datahub/cli/lite_cli.py | 2 +- .../src/datahub/emitter/mcp_patch_builder.py | 5 +- metadata-ingestion/src/datahub/entrypoints.py | 36 +- .../integrations/great_expectations/action.py | 2 +- .../src/datahub/specific/chart.py | 2 +- .../src/datahub/specific/dashboard.py | 2 +- .../src/datahub/specific/datajob.py | 2 +- .../src/datahub/specific/dataproduct.py | 1 - .../src/datahub/specific/dataset.py | 2 +- .../src/datahub/telemetry/telemetry.py | 3 +- .../tests/unit/test_cli_utils.py | 21 ++ smoke-test/.gitignore | 1 + smoke-test/build.gradle | 35 +- smoke-test/pyproject.toml | 46 +++ smoke-test/requirements.txt | 10 +- smoke-test/run-quickstart.sh | 4 +- .../timeseries/dataset_profile_gen.py | 8 +- .../tests/assertions/assertions_test.py | 40 ++- smoke-test/tests/browse/browse_test.py | 7 +- .../cli/{datahub-cli.py => datahub_cli.py} | 7 +- smoke-test/tests/cli/datahub_graph_test.py | 11 +- .../cli/delete_cmd/test_timeseries_delete.py | 8 +- .../ingest_cmd/test_timeseries_rollback.py | 2 - .../user_groups_cmd}/__init__.py | 0 .../cli/user_groups_cmd/test_group_cmd.py | 9 +- smoke-test/tests/consistency_utils.py | 2 +- .../tests/containers/containers_test.py | 9 +- smoke-test/tests/cypress/integration_test.py | 24 +- .../tests/dataproduct/test_dataproduct.py | 26 +- smoke-test/tests/delete/delete_test.py | 16 +- .../tests/deprecation/deprecation_test.py | 8 +- smoke-test/tests/domains/domains_test.py | 12 +- smoke-test/tests/lineage/test_lineage.py | 7 +- .../__init__.py | 0 .../managed_ingestion_test.py | 13 +- smoke-test/tests/patch/common_patch_tests.py | 86 ++--- .../tests/patch/test_datajob_patches.py | 23 +- .../tests/patch/test_dataset_patches.py | 42 +-- smoke-test/tests/policies/test_policies.py | 10 +- smoke-test/tests/privileges/__init__.py | 0 .../tests/privileges/test_privileges.py | 275 ++++++++------- smoke-test/tests/privileges/utils.py | 101 +++--- .../tests/read_only/test_services_up.py | 8 +- .../setup/lineage/ingest_data_job_change.py | 35 +- .../lineage/ingest_dataset_join_change.py | 27 +- .../lineage/ingest_input_datasets_change.py | 34 +- .../setup/lineage/ingest_time_lineage.py | 12 +- smoke-test/tests/setup/lineage/utils.py | 60 ++-- .../test_structured_properties.py | 315 +++++++++--------- smoke-test/tests/tags_and_terms/__init__.py | 0 .../data.json | 0 .../tags_and_terms_test.py | 12 +- smoke-test/tests/telemetry/telemetry_test.py | 4 +- smoke-test/tests/test_stateful_ingestion.py | 16 +- smoke-test/tests/tests/tests_test.py | 12 +- smoke-test/tests/timeline/timeline_test.py | 9 +- .../tokens/revokable_access_token_test.py | 60 ++-- smoke-test/tests/utilities/file_emitter.py | 11 +- smoke-test/tests/utils.py | 31 +- smoke-test/tests/views/views_test.py | 12 +- 67 files changed, 1161 insertions(+), 772 deletions(-) create mode 100644 metadata-ingestion/src/datahub/cli/config_utils.py create mode 100644 metadata-ingestion/src/datahub/cli/env_utils.py create mode 100644 smoke-test/pyproject.toml rename smoke-test/tests/cli/{datahub-cli.py => datahub_cli.py} (93%) rename smoke-test/tests/{managed-ingestion => cli/user_groups_cmd}/__init__.py (100%) rename smoke-test/tests/{tags-and-terms => managed_ingestion}/__init__.py (100%) rename smoke-test/tests/{managed-ingestion => managed_ingestion}/managed_ingestion_test.py (98%) create mode 100644 smoke-test/tests/privileges/__init__.py create mode 100644 smoke-test/tests/tags_and_terms/__init__.py rename smoke-test/tests/{tags-and-terms => tags_and_terms}/data.json (100%) rename smoke-test/tests/{tags-and-terms => tags_and_terms}/tags_and_terms_test.py (98%) diff --git a/.github/workflows/docker-unified.yml b/.github/workflows/docker-unified.yml index 5e9112726b010..24490ef83ae5d 100644 --- a/.github/workflows/docker-unified.yml +++ b/.github/workflows/docker-unified.yml @@ -64,6 +64,18 @@ jobs: steps: - name: Check out the repo uses: hsheth2/sane-checkout-action@v1 + - uses: actions/setup-python@v4 + with: + python-version: "3.10" + cache: "pip" + - name: Set up JDK 17 + uses: actions/setup-java@v3 + with: + distribution: "zulu" + java-version: 17 + - name: Run lint on smoke test + run: | + ./gradlew :smoke-test:lint - name: Compute Tag id: tag run: | diff --git a/docs/cli.md b/docs/cli.md index 927270b42259d..3f67f1de6204d 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -180,7 +180,7 @@ failure_log: ### init The init command is used to tell `datahub` about where your DataHub instance is located. The CLI will point to localhost DataHub by default. -Running `datahub init` will allow you to customize the datahub instance you are communicating with. +Running `datahub init` will allow you to customize the datahub instance you are communicating with. It has an optional `--use-password` option which allows to initialise the config using username, password. We foresee this mainly being used by admins as majority of organisations will be using SSO and there won't be any passwords to use. **_Note_**: Provide your GMS instance's host when the prompt asks you for the DataHub host. diff --git a/metadata-ingestion/src/datahub/cli/cli_utils.py b/metadata-ingestion/src/datahub/cli/cli_utils.py index 8ac9a101121be..1bb3b01e078dd 100644 --- a/metadata-ingestion/src/datahub/cli/cli_utils.py +++ b/metadata-ingestion/src/datahub/cli/cli_utils.py @@ -9,12 +9,11 @@ import click import requests -import yaml from deprecated import deprecated -from pydantic import BaseModel, ValidationError from requests.models import Response from requests.sessions import Session +from datahub.cli import config_utils from datahub.emitter.aspect import ASPECT_MAP, TIMESERIES_ASPECT_MAP from datahub.emitter.request_helper import make_curl_command from datahub.emitter.serialization_helper import post_json_transform @@ -23,13 +22,6 @@ log = logging.getLogger(__name__) -DEFAULT_GMS_HOST = "http://localhost:8080" -CONDENSED_DATAHUB_CONFIG_PATH = "~/.datahubenv" -DATAHUB_CONFIG_PATH = os.path.expanduser(CONDENSED_DATAHUB_CONFIG_PATH) - -DATAHUB_ROOT_FOLDER = os.path.expanduser("~/.datahub") - -ENV_SKIP_CONFIG = "DATAHUB_SKIP_CONFIG" ENV_METADATA_HOST_URL = "DATAHUB_GMS_URL" ENV_METADATA_HOST = "DATAHUB_GMS_HOST" ENV_METADATA_PORT = "DATAHUB_GMS_PORT" @@ -45,25 +37,6 @@ # For the methods that aren't duplicates, that logic should be moved to the client. -class GmsConfig(BaseModel): - server: str - token: Optional[str] = None - - -class DatahubConfig(BaseModel): - gms: GmsConfig - - -def get_boolean_env_variable(key: str, default: bool = False) -> bool: - value = os.environ.get(key) - if value is None: - return default - elif value.lower() in ("true", "1"): - return True - else: - return False - - def set_env_variables_override_config(url: str, token: Optional[str]) -> None: """Should be used to override the config when using rest emitter""" config_override[ENV_METADATA_HOST_URL] = url @@ -71,78 +44,6 @@ def set_env_variables_override_config(url: str, token: Optional[str]) -> None: config_override[ENV_METADATA_TOKEN] = token -def persist_datahub_config(config: dict) -> None: - with open(DATAHUB_CONFIG_PATH, "w+") as outfile: - yaml.dump(config, outfile, default_flow_style=False) - return None - - -def write_gms_config( - host: str, token: Optional[str], merge_with_previous: bool = True -) -> None: - config = DatahubConfig(gms=GmsConfig(server=host, token=token)) - if merge_with_previous: - try: - previous_config = get_client_config(as_dict=True) - assert isinstance(previous_config, dict) - except Exception as e: - # ok to fail on this - previous_config = {} - log.debug( - f"Failed to retrieve config from file {DATAHUB_CONFIG_PATH}: {e}. This isn't fatal." - ) - config_dict = {**previous_config, **config.dict()} - else: - config_dict = config.dict() - persist_datahub_config(config_dict) - - -def should_skip_config() -> bool: - return get_boolean_env_variable(ENV_SKIP_CONFIG, False) - - -def ensure_datahub_config() -> None: - if not os.path.isfile(DATAHUB_CONFIG_PATH): - click.secho( - f"No {CONDENSED_DATAHUB_CONFIG_PATH} file found, generating one for you...", - bold=True, - ) - write_gms_config(DEFAULT_GMS_HOST, None) - - -def get_client_config(as_dict: bool = False) -> Union[Optional[DatahubConfig], dict]: - with open(DATAHUB_CONFIG_PATH, "r") as stream: - try: - config_json = yaml.safe_load(stream) - if as_dict: - return config_json - try: - datahub_config = DatahubConfig.parse_obj(config_json) - return datahub_config - except ValidationError as e: - click.echo( - f"Received error, please check your {CONDENSED_DATAHUB_CONFIG_PATH}" - ) - click.echo(e, err=True) - sys.exit(1) - except yaml.YAMLError as exc: - click.secho(f"{DATAHUB_CONFIG_PATH} malformed, error: {exc}", bold=True) - return None - - -def get_details_from_config(): - datahub_config = get_client_config(as_dict=False) - assert isinstance(datahub_config, DatahubConfig) - if datahub_config is not None: - gms_config = datahub_config.gms - - gms_host = gms_config.server - gms_token = gms_config.token - return gms_host, gms_token - else: - return None, None - - def get_details_from_env() -> Tuple[Optional[str], Optional[str]]: host = os.environ.get(ENV_METADATA_HOST) port = os.environ.get(ENV_METADATA_PORT) @@ -178,12 +79,12 @@ def get_url_and_token(): if len(config_override.keys()) > 0: gms_host = config_override.get(ENV_METADATA_HOST_URL) gms_token = config_override.get(ENV_METADATA_TOKEN) - elif should_skip_config(): + elif config_utils.should_skip_config(): gms_host = gms_host_env gms_token = gms_token_env else: - ensure_datahub_config() - gms_host_conf, gms_token_conf = get_details_from_config() + config_utils.ensure_datahub_config() + gms_host_conf, gms_token_conf = config_utils.get_details_from_config() gms_host = first_non_null([gms_host_env, gms_host_conf]) gms_token = first_non_null([gms_token_env, gms_token_conf]) return gms_host, gms_token @@ -253,14 +154,18 @@ def parse_run_restli_response(response: requests.Response) -> dict: exit() if not isinstance(response_json, dict): - click.echo(f"Received error, please check your {CONDENSED_DATAHUB_CONFIG_PATH}") + click.echo( + f"Received error, please check your {config_utils.CONDENSED_DATAHUB_CONFIG_PATH}" + ) click.echo() click.echo(response_json) exit() summary = response_json.get("value") if not isinstance(summary, dict): - click.echo(f"Received error, please check your {CONDENSED_DATAHUB_CONFIG_PATH}") + click.echo( + f"Received error, please check your {config_utils.CONDENSED_DATAHUB_CONFIG_PATH}" + ) click.echo() click.echo(response_json) exit() @@ -686,3 +591,95 @@ def command(ctx: click.Context) -> None: ctx.exit(1) return command + + +def get_session_login_as( + username: str, password: str, frontend_url: str +) -> requests.Session: + session = requests.Session() + headers = { + "Content-Type": "application/json", + } + system_auth = get_system_auth() + if system_auth is not None: + session.headers.update({"Authorization": system_auth}) + else: + data = '{"username":"' + username + '", "password":"' + password + '"}' + response = session.post(f"{frontend_url}/logIn", headers=headers, data=data) + response.raise_for_status() + return session + + +def _ensure_valid_gms_url_acryl_cloud(url: str) -> str: + if "acryl.io" not in url: + return url + if url.startswith("http://"): + url = url.replace("http://", "https://") + if url.endswith("acryl.io"): + url = f"{url}/gms" + return url + + +def fixup_gms_url(url: str) -> str: + if url is None: + return "" + if url.endswith("/"): + url = url.rstrip("/") + url = _ensure_valid_gms_url_acryl_cloud(url) + return url + + +def guess_frontend_url_from_gms_url(gms_url: str) -> str: + gms_url = fixup_gms_url(gms_url) + url = gms_url + if url.endswith("/gms"): + url = gms_url.rstrip("/gms") + if url.endswith("8080"): + url = url[:-4] + "9002" + return url + + +def generate_access_token( + username: str, + password: str, + gms_url: str, + token_name: Optional[str] = None, + validity: str = "ONE_HOUR", +) -> Tuple[str, str]: + frontend_url = guess_frontend_url_from_gms_url(gms_url) + session = get_session_login_as( + username=username, + password=password, + frontend_url=frontend_url, + ) + now = datetime.now() + timestamp = now.astimezone().isoformat() + if token_name is None: + token_name = f"cli token {timestamp}" + json = { + "query": """mutation createAccessToken($input: CreateAccessTokenInput!) { + createAccessToken(input: $input) { + accessToken + metadata { + id + actorUrn + ownerUrn + name + description + } + } + }""", + "variables": { + "input": { + "type": "PERSONAL", + "actorUrn": f"urn:li:corpuser:{username}", + "duration": validity, + "name": token_name, + } + }, + } + response = session.post(f"{frontend_url}/api/v2/graphql", json=json) + response.raise_for_status() + return token_name, response.json().get("data", {}).get("createAccessToken", {}).get( + "accessToken", None + ) diff --git a/metadata-ingestion/src/datahub/cli/config_utils.py b/metadata-ingestion/src/datahub/cli/config_utils.py new file mode 100644 index 0000000000000..4b69aec8081ab --- /dev/null +++ b/metadata-ingestion/src/datahub/cli/config_utils.py @@ -0,0 +1,102 @@ +import logging +import os +import sys +from typing import Optional, Union + +import click +import yaml +from pydantic import BaseModel, ValidationError + +from datahub.cli.env_utils import get_boolean_env_variable + +__help__ = ( + "For helper methods to contain manipulation of the config file in local system." +) +log = logging.getLogger(__name__) + +DEFAULT_GMS_HOST = "http://localhost:8080" +CONDENSED_DATAHUB_CONFIG_PATH = "~/.datahubenv" +DATAHUB_CONFIG_PATH = os.path.expanduser(CONDENSED_DATAHUB_CONFIG_PATH) +DATAHUB_ROOT_FOLDER = os.path.expanduser("~/.datahub") +ENV_SKIP_CONFIG = "DATAHUB_SKIP_CONFIG" + + +class GmsConfig(BaseModel): + server: str + token: Optional[str] = None + + +class DatahubConfig(BaseModel): + gms: GmsConfig + + +def persist_datahub_config(config: dict) -> None: + with open(DATAHUB_CONFIG_PATH, "w+") as outfile: + yaml.dump(config, outfile, default_flow_style=False) + return None + + +def write_gms_config( + host: str, token: Optional[str], merge_with_previous: bool = True +) -> None: + config = DatahubConfig(gms=GmsConfig(server=host, token=token)) + if merge_with_previous: + try: + previous_config = get_client_config(as_dict=True) + assert isinstance(previous_config, dict) + except Exception as e: + # ok to fail on this + previous_config = {} + log.debug( + f"Failed to retrieve config from file {DATAHUB_CONFIG_PATH}: {e}. This isn't fatal." + ) + config_dict = {**previous_config, **config.dict()} + else: + config_dict = config.dict() + persist_datahub_config(config_dict) + + +def get_details_from_config(): + datahub_config = get_client_config(as_dict=False) + assert isinstance(datahub_config, DatahubConfig) + if datahub_config is not None: + gms_config = datahub_config.gms + + gms_host = gms_config.server + gms_token = gms_config.token + return gms_host, gms_token + else: + return None, None + + +def should_skip_config() -> bool: + return get_boolean_env_variable(ENV_SKIP_CONFIG, False) + + +def ensure_datahub_config() -> None: + if not os.path.isfile(DATAHUB_CONFIG_PATH): + click.secho( + f"No {CONDENSED_DATAHUB_CONFIG_PATH} file found, generating one for you...", + bold=True, + ) + write_gms_config(DEFAULT_GMS_HOST, None) + + +def get_client_config(as_dict: bool = False) -> Union[Optional[DatahubConfig], dict]: + with open(DATAHUB_CONFIG_PATH, "r") as stream: + try: + config_json = yaml.safe_load(stream) + if as_dict: + return config_json + try: + datahub_config = DatahubConfig.parse_obj(config_json) + return datahub_config + except ValidationError as e: + click.echo( + f"Received error, please check your {CONDENSED_DATAHUB_CONFIG_PATH}" + ) + click.echo(e, err=True) + sys.exit(1) + except yaml.YAMLError as exc: + click.secho(f"{DATAHUB_CONFIG_PATH} malformed, error: {exc}", bold=True) + return None diff --git a/metadata-ingestion/src/datahub/cli/docker_cli.py b/metadata-ingestion/src/datahub/cli/docker_cli.py index 0e0bc37c61573..099f57d975bbb 100644 --- a/metadata-ingestion/src/datahub/cli/docker_cli.py +++ b/metadata-ingestion/src/datahub/cli/docker_cli.py @@ -21,7 +21,7 @@ from expandvars import expandvars from requests_file import FileAdapter -from datahub.cli.cli_utils import DATAHUB_ROOT_FOLDER +from datahub.cli.config_utils import DATAHUB_ROOT_FOLDER from datahub.cli.docker_check import ( DATAHUB_COMPOSE_LEGACY_VOLUME_FILTERS, DATAHUB_COMPOSE_PROJECT_FILTER, diff --git a/metadata-ingestion/src/datahub/cli/env_utils.py b/metadata-ingestion/src/datahub/cli/env_utils.py new file mode 100644 index 0000000000000..8909036eab6cd --- /dev/null +++ b/metadata-ingestion/src/datahub/cli/env_utils.py @@ -0,0 +1,11 @@ +import os + + +def get_boolean_env_variable(key: str, default: bool = False) -> bool: + value = os.environ.get(key) + if value is None: + return default + elif value.lower() in ("true", "1"): + return True + else: + return False diff --git a/metadata-ingestion/src/datahub/cli/ingest_cli.py b/metadata-ingestion/src/datahub/cli/ingest_cli.py index 9c55f52497c0e..2e66b18e48145 100644 --- a/metadata-ingestion/src/datahub/cli/ingest_cli.py +++ b/metadata-ingestion/src/datahub/cli/ingest_cli.py @@ -15,12 +15,7 @@ import datahub as datahub_package from datahub.cli import cli_utils -from datahub.cli.cli_utils import ( - CONDENSED_DATAHUB_CONFIG_PATH, - format_aspect_summaries, - get_session_and_host, - post_rollback_endpoint, -) +from datahub.cli.config_utils import CONDENSED_DATAHUB_CONFIG_PATH from datahub.configuration.config_loader import load_config_file from datahub.ingestion.graph.client import get_default_graph from datahub.ingestion.run.connection import ConnectionManager @@ -436,7 +431,7 @@ def mcps(path: str) -> None: def list_runs(page_offset: int, page_size: int, include_soft_deletes: bool) -> None: """List recent ingestion runs to datahub""" - session, gms_host = get_session_and_host() + session, gms_host = cli_utils.get_session_and_host() url = f"{gms_host}/runs?action=list" @@ -485,7 +480,7 @@ def show( run_id: str, start: int, count: int, include_soft_deletes: bool, show_aspect: bool ) -> None: """Describe a provided ingestion run to datahub""" - session, gms_host = get_session_and_host() + session, gms_host = cli_utils.get_session_and_host() url = f"{gms_host}/runs?action=describe" @@ -504,7 +499,11 @@ def show( rows = parse_restli_response(response) if not show_aspect: click.echo( - tabulate(format_aspect_summaries(rows), RUN_TABLE_COLUMNS, tablefmt="grid") + tabulate( + cli_utils.format_aspect_summaries(rows), + RUN_TABLE_COLUMNS, + tablefmt="grid", + ) ) else: for row in rows: @@ -546,7 +545,7 @@ def rollback( aspects_affected, unsafe_entity_count, unsafe_entities, - ) = post_rollback_endpoint(payload_obj, "/runs?action=rollback") + ) = cli_utils.post_rollback_endpoint(payload_obj, "/runs?action=rollback") click.echo( "Rolling back deletes the entities created by a run and reverts the updated aspects" diff --git a/metadata-ingestion/src/datahub/cli/lite_cli.py b/metadata-ingestion/src/datahub/cli/lite_cli.py index 8636187a51d09..7e2ad23a7753f 100644 --- a/metadata-ingestion/src/datahub/cli/lite_cli.py +++ b/metadata-ingestion/src/datahub/cli/lite_cli.py @@ -9,7 +9,7 @@ from click.shell_completion import CompletionItem from click_default_group import DefaultGroup -from datahub.cli.cli_utils import ( +from datahub.cli.config_utils import ( DATAHUB_ROOT_FOLDER, DatahubConfig, get_client_config, diff --git a/metadata-ingestion/src/datahub/emitter/mcp_patch_builder.py b/metadata-ingestion/src/datahub/emitter/mcp_patch_builder.py index be68d46472a55..5a9eb074f1b09 100644 --- a/metadata-ingestion/src/datahub/emitter/mcp_patch_builder.py +++ b/metadata-ingestion/src/datahub/emitter/mcp_patch_builder.py @@ -12,6 +12,7 @@ MetadataChangeProposalClass, SystemMetadataClass, ) +from datahub.utilities.urns.urn import guess_entity_type def _recursive_to_obj(obj: Any) -> Any: @@ -47,13 +48,11 @@ class MetadataPatchProposal: def __init__( self, urn: str, - entity_type: str, system_metadata: Optional[SystemMetadataClass] = None, audit_header: Optional[KafkaAuditHeaderClass] = None, ) -> None: self.urn = urn - # TODO: Remove the entity_type parameter, as MCPW can infer it from the URN. - self.entity_type = entity_type + self.entity_type = guess_entity_type(urn) self.system_metadata = system_metadata self.audit_header = audit_header self.patches = defaultdict(list) diff --git a/metadata-ingestion/src/datahub/entrypoints.py b/metadata-ingestion/src/datahub/entrypoints.py index 1bf090a2e514e..4f6c596b7bf20 100644 --- a/metadata-ingestion/src/datahub/entrypoints.py +++ b/metadata-ingestion/src/datahub/entrypoints.py @@ -9,9 +9,13 @@ import datahub as datahub_package from datahub.cli.check_cli import check from datahub.cli.cli_utils import ( + fixup_gms_url, + generate_access_token, + make_shim_command, +) +from datahub.cli.config_utils import ( DATAHUB_CONFIG_PATH, get_boolean_env_variable, - make_shim_command, write_gms_config, ) from datahub.cli.delete_cli import delete @@ -99,8 +103,15 @@ def version() -> None: @datahub.command() +@click.option( + "--use-password", + type=bool, + is_flag=True, + default=False, + help="If passed then uses password to initialise token.", +) @telemetry.with_telemetry() -def init() -> None: +def init(use_password: bool = False) -> None: """Configure which datahub instance to connect to""" if os.path.isfile(DATAHUB_CONFIG_PATH): @@ -110,11 +121,22 @@ def init() -> None: host = click.prompt( "Enter your DataHub host", type=str, default="http://localhost:8080" ) - token = click.prompt( - "Enter your DataHub access token (Supports env vars via `{VAR_NAME}` syntax)", - type=str, - default="", - ) + host = fixup_gms_url(host) + if use_password: + username = click.prompt("Enter your DataHub username", type=str) + password = click.prompt( + "Enter your DataHub password", + type=str, + ) + _, token = generate_access_token( + username=username, password=password, gms_url=host + ) + else: + token = click.prompt( + "Enter your DataHub access token (Supports env vars via `{VAR_NAME}` syntax)", + type=str, + default="", + ) write_gms_config(host, token) click.echo(f"Written to {DATAHUB_CONFIG_PATH}") diff --git a/metadata-ingestion/src/datahub/integrations/great_expectations/action.py b/metadata-ingestion/src/datahub/integrations/great_expectations/action.py index 8b393a8f6f1c6..9890fea990c4e 100644 --- a/metadata-ingestion/src/datahub/integrations/great_expectations/action.py +++ b/metadata-ingestion/src/datahub/integrations/great_expectations/action.py @@ -32,7 +32,7 @@ from sqlalchemy.engine.url import make_url import datahub.emitter.mce_builder as builder -from datahub.cli.cli_utils import get_boolean_env_variable +from datahub.cli.env_utils import get_boolean_env_variable from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.emitter.rest_emitter import DatahubRestEmitter from datahub.emitter.serialization_helper import pre_json_transform diff --git a/metadata-ingestion/src/datahub/specific/chart.py b/metadata-ingestion/src/datahub/specific/chart.py index 5dc394e8ebe0f..47ea539491359 100644 --- a/metadata-ingestion/src/datahub/specific/chart.py +++ b/metadata-ingestion/src/datahub/specific/chart.py @@ -40,7 +40,7 @@ def __init__( audit_header: The Kafka audit header of the chart (optional). """ super().__init__( - urn, "chart", system_metadata=system_metadata, audit_header=audit_header + urn, system_metadata=system_metadata, audit_header=audit_header ) self.custom_properties_patch_helper = CustomPropertiesPatchHelper( self, ChartInfo.ASPECT_NAME diff --git a/metadata-ingestion/src/datahub/specific/dashboard.py b/metadata-ingestion/src/datahub/specific/dashboard.py index 855dcc5685cea..64f28a57cb180 100644 --- a/metadata-ingestion/src/datahub/specific/dashboard.py +++ b/metadata-ingestion/src/datahub/specific/dashboard.py @@ -40,7 +40,7 @@ def __init__( audit_header: The Kafka audit header of the dashboard (optional). """ super().__init__( - urn, "dashboard", system_metadata=system_metadata, audit_header=audit_header + urn, system_metadata=system_metadata, audit_header=audit_header ) self.custom_properties_patch_helper = CustomPropertiesPatchHelper( self, DashboardInfo.ASPECT_NAME diff --git a/metadata-ingestion/src/datahub/specific/datajob.py b/metadata-ingestion/src/datahub/specific/datajob.py index 0338a1320c15b..174749b3268bf 100644 --- a/metadata-ingestion/src/datahub/specific/datajob.py +++ b/metadata-ingestion/src/datahub/specific/datajob.py @@ -41,7 +41,7 @@ def __init__( audit_header: The Kafka audit header of the data job (optional). """ super().__init__( - urn, "datajob", system_metadata=system_metadata, audit_header=audit_header + urn, system_metadata=system_metadata, audit_header=audit_header ) self.custom_properties_patch_helper = CustomPropertiesPatchHelper( self, DataJobInfo.ASPECT_NAME diff --git a/metadata-ingestion/src/datahub/specific/dataproduct.py b/metadata-ingestion/src/datahub/specific/dataproduct.py index 2c174e0c9a6cb..c698c511fd9b5 100644 --- a/metadata-ingestion/src/datahub/specific/dataproduct.py +++ b/metadata-ingestion/src/datahub/specific/dataproduct.py @@ -30,7 +30,6 @@ def __init__( ) -> None: super().__init__( urn, - "dataProduct", system_metadata=system_metadata, audit_header=audit_header, ) diff --git a/metadata-ingestion/src/datahub/specific/dataset.py b/metadata-ingestion/src/datahub/specific/dataset.py index d3c3de36198e3..c59cdb8ddfa38 100644 --- a/metadata-ingestion/src/datahub/specific/dataset.py +++ b/metadata-ingestion/src/datahub/specific/dataset.py @@ -98,7 +98,7 @@ def __init__( audit_header: Optional[KafkaAuditHeaderClass] = None, ) -> None: super().__init__( - urn, "dataset", system_metadata=system_metadata, audit_header=audit_header + urn, system_metadata=system_metadata, audit_header=audit_header ) self.custom_properties_patch_helper = CustomPropertiesPatchHelper( self, DatasetProperties.ASPECT_NAME diff --git a/metadata-ingestion/src/datahub/telemetry/telemetry.py b/metadata-ingestion/src/datahub/telemetry/telemetry.py index c399f2e1a27e5..a802125e76b4e 100644 --- a/metadata-ingestion/src/datahub/telemetry/telemetry.py +++ b/metadata-ingestion/src/datahub/telemetry/telemetry.py @@ -13,7 +13,8 @@ from typing_extensions import ParamSpec import datahub as datahub_package -from datahub.cli.cli_utils import DATAHUB_ROOT_FOLDER, get_boolean_env_variable +from datahub.cli.config_utils import DATAHUB_ROOT_FOLDER +from datahub.cli.env_utils import get_boolean_env_variable from datahub.configuration.common import ExceptionWithProps from datahub.ingestion.graph.client import DataHubGraph from datahub.metadata.schema_classes import _custom_package_path diff --git a/metadata-ingestion/tests/unit/test_cli_utils.py b/metadata-ingestion/tests/unit/test_cli_utils.py index cb0b7c734ee0a..bc1826d422e38 100644 --- a/metadata-ingestion/tests/unit/test_cli_utils.py +++ b/metadata-ingestion/tests/unit/test_cli_utils.py @@ -59,3 +59,24 @@ def test_correct_url_when_gms_host_port_url_protocol_set(): ) def test_correct_url_when_url_set(): assert cli_utils.get_details_from_env() == ("https://example.com", None) + + +def test_fixup_gms_url(): + assert cli_utils.fixup_gms_url("http://localhost:8080") == "http://localhost:8080" + assert cli_utils.fixup_gms_url("http://localhost:8080/") == "http://localhost:8080" + assert cli_utils.fixup_gms_url("http://abc.acryl.io") == "https://abc.acryl.io/gms" + + +def test_guess_frontend_url_from_gms_url(): + assert ( + cli_utils.guess_frontend_url_from_gms_url("http://localhost:8080") + == "http://localhost:9002" + ) + assert ( + cli_utils.guess_frontend_url_from_gms_url("http://localhost:8080/") + == "http://localhost:9002" + ) + assert ( + cli_utils.guess_frontend_url_from_gms_url("https://abc.acryl.io/gms") + == "https://abc.acryl.io" + ) diff --git a/smoke-test/.gitignore b/smoke-test/.gitignore index 44d3f620a1937..b8af2eef535a0 100644 --- a/smoke-test/.gitignore +++ b/smoke-test/.gitignore @@ -127,6 +127,7 @@ venv.bak/ .mypy_cache/ .dmypy.json dmypy.json +.ruff_cache/ # Pyre type checker .pyre/ diff --git a/smoke-test/build.gradle b/smoke-test/build.gradle index 1614a4b8527dc..a6f3cd793ddd6 100644 --- a/smoke-test/build.gradle +++ b/smoke-test/build.gradle @@ -1,5 +1,10 @@ apply plugin: 'com.github.node-gradle.node' +ext { + python_executable = 'python3' + venv_name = 'venv' +} + node { // If true, it will download node using above parameters. @@ -38,4 +43,32 @@ task yarnInstall(type: YarnTask) { println "Root directory: ${project.rootDir}"; environment = ['NODE_OPTIONS': '--openssl-legacy-provider'] args = ['install', '--cwd', "${project.rootDir}/smoke-test/tests/cypress"] -} \ No newline at end of file +} + +task installDev(type: Exec) { + inputs.file file('pyproject.toml') + inputs.file file('requirements.txt') + outputs.file("${venv_name}/.build_install_dev_sentinel") + commandLine 'bash', '-x', '-c', + "${python_executable} -m venv ${venv_name} && " + + "${venv_name}/bin/pip install --upgrade pip wheel setuptools && " + + "${venv_name}/bin/pip install -r requirements.txt && " + + "touch ${venv_name}/.build_install_dev_sentinel" +} + +task lint(type: Exec, dependsOn: installDev) { + commandLine 'bash', '-c', + "source ${venv_name}/bin/activate && set -x && " + + "black --check --diff tests/ && " + + "isort --check --diff tests/ && " + + "ruff --statistics tests/ && " + + "mypy tests/" +} +task lintFix(type: Exec, dependsOn: installDev) { + commandLine 'bash', '-c', + "source ${venv_name}/bin/activate && set -x && " + + "black tests/ && " + + "isort tests/ && " + + "ruff --fix tests/ && " + + "mypy tests/" +} diff --git a/smoke-test/pyproject.toml b/smoke-test/pyproject.toml new file mode 100644 index 0000000000000..c7745d0e9a364 --- /dev/null +++ b/smoke-test/pyproject.toml @@ -0,0 +1,46 @@ +[build-system] +requires = ["setuptools"] +build-backend = "setuptools.build_meta" + +[project] +name = "smoke-test" +version = "0.0.0" +description = "" +authors = [ + { name="Acryl Data", email="eng@acryl.io" }, +] +requires-python = ">=3.9" + + +[tool.black] +extend-exclude = ''' +# A regex preceded with ^/ will apply only to files and directories +# in the root of the project. +tmp +venv +''' +include = '\.pyi?$' +target-version = ['py310'] + +[tool.isort] +profile = 'black' + +[tool.ruff] +ignore = [ + 'E501', # Ignore line length, since black handles that. + 'D203', # Ignore 1 blank line required before class docstring. +] + +[tool.mypy] +exclude = "^(venv/|build/|dist/)" +ignore_missing_imports = true +namespace_packages = false +check_untyped_defs = true +disallow_untyped_decorators = true +warn_unused_configs = true +# eventually we'd like to enable these +disallow_incomplete_defs = false +disallow_untyped_defs = false + +[tool.pyright] +extraPaths = ['tests'] diff --git a/smoke-test/requirements.txt b/smoke-test/requirements.txt index e37de9caddc69..c5d43163dff5d 100644 --- a/smoke-test/requirements.txt +++ b/smoke-test/requirements.txt @@ -7,4 +7,12 @@ slack-sdk==3.18.1 aiohttp joblib pytest-xdist -networkx \ No newline at end of file +networkx +# libaries for linting below this +black==23.7.0 +isort==5.12.0 +mypy==1.5.1 +ruff==0.0.287 +# stub version are copied from metadata-ingestion/setup.py and that should be the source of truth +types-requests>=2.28.11.6,<=2.31.0.3 +types-PyYAML diff --git a/smoke-test/run-quickstart.sh b/smoke-test/run-quickstart.sh index cd747321ad602..05c321566d54a 100755 --- a/smoke-test/run-quickstart.sh +++ b/smoke-test/run-quickstart.sh @@ -4,10 +4,8 @@ set -euxo pipefail DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" cd "$DIR" -python3 -m venv venv +../gradlew :smoke-test:installDev source venv/bin/activate -pip install --upgrade pip wheel setuptools -pip install -r requirements.txt mkdir -p ~/.datahub/plugins/frontend/auth/ echo "test_user:test_pass" >> ~/.datahub/plugins/frontend/auth/user.props diff --git a/smoke-test/tests/aspect_generators/timeseries/dataset_profile_gen.py b/smoke-test/tests/aspect_generators/timeseries/dataset_profile_gen.py index bc22b74ed185c..f808e7a58a329 100644 --- a/smoke-test/tests/aspect_generators/timeseries/dataset_profile_gen.py +++ b/smoke-test/tests/aspect_generators/timeseries/dataset_profile_gen.py @@ -1,8 +1,10 @@ from typing import Iterable -from datahub.metadata.schema_classes import (DatasetFieldProfileClass, - DatasetProfileClass, - TimeWindowSizeClass) +from datahub.metadata.schema_classes import ( + DatasetFieldProfileClass, + DatasetProfileClass, + TimeWindowSizeClass, +) from tests.utils import get_timestampmillis_at_start_of_day diff --git a/smoke-test/tests/assertions/assertions_test.py b/smoke-test/tests/assertions/assertions_test.py index 48f3564e6cd97..78ba68a840f0d 100644 --- a/smoke-test/tests/assertions/assertions_test.py +++ b/smoke-test/tests/assertions/assertions_test.py @@ -7,24 +7,30 @@ from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.api.common import PipelineContext, RecordEnvelope from datahub.ingestion.api.sink import NoopWriteCallback -from datahub.ingestion.sink.file import FileSink, FileSinkConfig -from datahub.metadata.com.linkedin.pegasus2avro.assertion import \ - AssertionStdAggregation -from datahub.metadata.schema_classes import (AssertionInfoClass, - AssertionResultClass, - AssertionResultTypeClass, - AssertionRunEventClass, - AssertionRunStatusClass, - AssertionStdOperatorClass, - AssertionTypeClass, - DatasetAssertionInfoClass, - DatasetAssertionScopeClass, - PartitionSpecClass, - PartitionTypeClass) +from datahub.ingestion.sink.file import FileSink +from datahub.metadata.com.linkedin.pegasus2avro.assertion import AssertionStdAggregation +from datahub.metadata.schema_classes import ( + AssertionInfoClass, + AssertionResultClass, + AssertionResultTypeClass, + AssertionRunEventClass, + AssertionRunStatusClass, + AssertionStdOperatorClass, + AssertionTypeClass, + DatasetAssertionInfoClass, + DatasetAssertionScopeClass, + PartitionSpecClass, + PartitionTypeClass, +) import requests_wrapper as requests -from tests.utils import (delete_urns_from_file, get_gms_url, get_sleep_info, - ingest_file_via_rest, wait_for_healthcheck_util) +from tests.utils import ( + delete_urns_from_file, + get_gms_url, + get_sleep_info, + ingest_file_via_rest, + wait_for_healthcheck_util, +) restli_default_headers = { "X-RestLi-Protocol-Version": "2.0.0", @@ -210,7 +216,7 @@ def create_test_data(test_file): ) fileSink: FileSink = FileSink.create( - FileSinkConfig(filename=test_file), ctx=PipelineContext(run_id="test-file") + {"filename": test_file}, ctx=PipelineContext(run_id="test-file") ) for mcp in [mcp1, mcp2, mcp3, mcp4, mcp5, mcp6, mcp7]: fileSink.write_record_async( diff --git a/smoke-test/tests/browse/browse_test.py b/smoke-test/tests/browse/browse_test.py index 550f0062d5a39..adeb6775a150d 100644 --- a/smoke-test/tests/browse/browse_test.py +++ b/smoke-test/tests/browse/browse_test.py @@ -1,10 +1,6 @@ -import time - import pytest -import requests_wrapper as requests -from tests.utils import (delete_urns_from_file, get_frontend_url, - ingest_file_via_rest) +from tests.utils import delete_urns_from_file, get_frontend_url, ingest_file_via_rest TEST_DATASET_1_URN = "urn:li:dataset:(urn:li:dataPlatform:kafka,test-browse-1,PROD)" TEST_DATASET_2_URN = "urn:li:dataset:(urn:li:dataPlatform:kafka,test-browse-2,PROD)" @@ -29,7 +25,6 @@ def test_healthchecks(wait_for_healthchecks): @pytest.mark.dependency(depends=["test_healthchecks"]) def test_get_browse_paths(frontend_session, ingest_cleanup_data): - # Iterate through each browse path, starting with the root get_browse_paths_query = """query browse($input: BrowseInput!) {\n diff --git a/smoke-test/tests/cli/datahub-cli.py b/smoke-test/tests/cli/datahub_cli.py similarity index 93% rename from smoke-test/tests/cli/datahub-cli.py rename to smoke-test/tests/cli/datahub_cli.py index c3db6028efceb..81ae6a34264ad 100644 --- a/smoke-test/tests/cli/datahub-cli.py +++ b/smoke-test/tests/cli/datahub_cli.py @@ -1,10 +1,7 @@ import json -from time import sleep import pytest -from datahub.cli.cli_utils import (get_aspects_for_entity, guess_entity_type, - post_entity) -from datahub.cli.ingest_cli import get_session_and_host, rollback +from datahub.cli.cli_utils import get_aspects_for_entity, get_session_and_host from tests.utils import ingest_file_via_rest, wait_for_writes_to_sync @@ -115,5 +112,5 @@ def test_rollback_editable(): ) # But first ingestion aspects should not be present assert "browsePaths" not in get_aspects_for_entity( - entity_urn=dataset_urn, typed=False + entity_urn=dataset_urn, aspects=["browsePaths"], typed=False ) diff --git a/smoke-test/tests/cli/datahub_graph_test.py b/smoke-test/tests/cli/datahub_graph_test.py index 17c8924fb0998..1e324477adb6b 100644 --- a/smoke-test/tests/cli/datahub_graph_test.py +++ b/smoke-test/tests/cli/datahub_graph_test.py @@ -1,11 +1,14 @@ import pytest import tenacity from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph -from datahub.metadata.schema_classes import (KafkaSchemaClass, - SchemaMetadataClass) +from datahub.metadata.schema_classes import KafkaSchemaClass, SchemaMetadataClass -from tests.utils import (delete_urns_from_file, get_gms_url, get_sleep_info, - ingest_file_via_rest) +from tests.utils import ( + delete_urns_from_file, + get_gms_url, + get_sleep_info, + ingest_file_via_rest, +) sleep_sec, sleep_times = get_sleep_info() diff --git a/smoke-test/tests/cli/delete_cmd/test_timeseries_delete.py b/smoke-test/tests/cli/delete_cmd/test_timeseries_delete.py index 106da7cd8d71e..cfbbacea1ed79 100644 --- a/smoke-test/tests/cli/delete_cmd/test_timeseries_delete.py +++ b/smoke-test/tests/cli/delete_cmd/test_timeseries_delete.py @@ -2,7 +2,6 @@ import logging import sys import tempfile -import time from json import JSONDecodeError from typing import Any, Dict, List, Optional @@ -12,11 +11,8 @@ from datahub.entrypoints import datahub from datahub.metadata.schema_classes import DatasetProfileClass -import requests_wrapper as requests -from tests.aspect_generators.timeseries.dataset_profile_gen import \ - gen_dataset_profiles -from tests.utils import (get_strftime_from_timestamp_millis, - wait_for_writes_to_sync) +from tests.aspect_generators.timeseries.dataset_profile_gen import gen_dataset_profiles +from tests.utils import get_strftime_from_timestamp_millis, wait_for_writes_to_sync logger = logging.getLogger(__name__) diff --git a/smoke-test/tests/cli/ingest_cmd/test_timeseries_rollback.py b/smoke-test/tests/cli/ingest_cmd/test_timeseries_rollback.py index e962b1a5cafd6..aa7c90cc6f988 100644 --- a/smoke-test/tests/cli/ingest_cmd/test_timeseries_rollback.py +++ b/smoke-test/tests/cli/ingest_cmd/test_timeseries_rollback.py @@ -1,5 +1,4 @@ import json -import time from typing import Any, Dict, List, Optional import datahub.emitter.mce_builder as builder @@ -8,7 +7,6 @@ from datahub.entrypoints import datahub from datahub.metadata.schema_classes import DatasetProfileClass -import requests_wrapper as requests from tests.utils import ingest_file_via_rest, wait_for_writes_to_sync runner = CliRunner(mix_stderr=False) diff --git a/smoke-test/tests/managed-ingestion/__init__.py b/smoke-test/tests/cli/user_groups_cmd/__init__.py similarity index 100% rename from smoke-test/tests/managed-ingestion/__init__.py rename to smoke-test/tests/cli/user_groups_cmd/__init__.py diff --git a/smoke-test/tests/cli/user_groups_cmd/test_group_cmd.py b/smoke-test/tests/cli/user_groups_cmd/test_group_cmd.py index 7b986d3be0444..555687c98ed3e 100644 --- a/smoke-test/tests/cli/user_groups_cmd/test_group_cmd.py +++ b/smoke-test/tests/cli/user_groups_cmd/test_group_cmd.py @@ -1,16 +1,15 @@ import json import sys import tempfile -import time from typing import Any, Dict, Iterable, List +import pytest import yaml from click.testing import CliRunner, Result from datahub.api.entities.corpgroup.corpgroup import CorpGroup from datahub.entrypoints import datahub from datahub.ingestion.graph.client import DataHubGraph, get_default_graph -import requests_wrapper as requests from tests.utils import wait_for_writes_to_sync runner = CliRunner(mix_stderr=False) @@ -43,7 +42,6 @@ def gen_datahub_groups(num_groups: int) -> Iterable[CorpGroup]: description=f"The Group {i}", picture_link=f"https://images.google.com/group{i}.jpg", slack=f"@group{i}", - admins=["user1"], members=["user2"], ) yield group @@ -65,7 +63,7 @@ def get_group_ownership(user_urn: str) -> List[str]: graph = get_default_graph() entities = graph.get_related_entities( entity_urn=user_urn, - relationship_types="OwnedBy", + relationship_types=["OwnedBy"], direction=DataHubGraph.RelationshipDirection.INCOMING, ) return [entity.urn for entity in entities] @@ -75,12 +73,13 @@ def get_group_membership(user_urn: str) -> List[str]: graph = get_default_graph() entities = graph.get_related_entities( entity_urn=user_urn, - relationship_types="IsMemberOfGroup", + relationship_types=["IsMemberOfGroup"], direction=DataHubGraph.RelationshipDirection.OUTGOING, ) return [entity.urn for entity in entities] +@pytest.mark.skip(reason="Functionality and test needs to be validated for correctness") def test_group_upsert(wait_for_healthchecks: Any) -> None: num_groups: int = 10 for i, datahub_group in enumerate(gen_datahub_groups(num_groups)): diff --git a/smoke-test/tests/consistency_utils.py b/smoke-test/tests/consistency_utils.py index 607835bf3649c..1af9399c2dc9a 100644 --- a/smoke-test/tests/consistency_utils.py +++ b/smoke-test/tests/consistency_utils.py @@ -30,7 +30,7 @@ def wait_for_writes_to_sync(max_timeout_in_sec: int = 120) -> None: result = str(completed_process.stdout) lines = result.splitlines() - lag_values = [int(l) for l in lines if l != ""] + lag_values = [int(line) for line in lines if line != ""] maximum_lag = max(lag_values) if maximum_lag == 0: lag_zero = True diff --git a/smoke-test/tests/containers/containers_test.py b/smoke-test/tests/containers/containers_test.py index 227645a87d30a..4997102702e57 100644 --- a/smoke-test/tests/containers/containers_test.py +++ b/smoke-test/tests/containers/containers_test.py @@ -1,7 +1,6 @@ import pytest -from tests.utils import (delete_urns_from_file, get_frontend_url, - ingest_file_via_rest) +from tests.utils import delete_urns_from_file, get_frontend_url, ingest_file_via_rest @pytest.fixture(scope="module", autouse=False) @@ -21,12 +20,10 @@ def test_healthchecks(wait_for_healthchecks): @pytest.mark.dependency(depends=["test_healthchecks"]) def test_get_full_container(frontend_session, ingest_cleanup_data): - container_urn = "urn:li:container:SCHEMA" container_name = "datahub_schema" container_description = "The DataHub schema" editable_container_description = "custom description" - dataset_urn = "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)" # Get a full container get_container_json = { @@ -129,7 +126,6 @@ def test_get_full_container(frontend_session, ingest_cleanup_data): @pytest.mark.dependency(depends=["test_healthchecks", "test_get_full_container"]) def test_get_parent_container(frontend_session, ingest_cleanup_data): - dataset_urn = "urn:li:dataset:(urn:li:dataPlatform:hive,SampleHiveDataset,PROD)" # Get count of existing secrets @@ -165,7 +161,6 @@ def test_get_parent_container(frontend_session, ingest_cleanup_data): @pytest.mark.dependency(depends=["test_healthchecks", "test_get_full_container"]) def test_update_container(frontend_session, ingest_cleanup_data): - container_urn = "urn:li:container:SCHEMA" new_tag = "urn:li:tag:Test" @@ -227,7 +222,7 @@ def test_update_container(frontend_session, ingest_cleanup_data): "ownerUrn": new_owner, "resourceUrn": container_urn, "ownerEntityType": "CORP_USER", - "ownershipTypeUrn": "urn:li:ownershipType:__system__technical_owner" + "ownershipTypeUrn": "urn:li:ownershipType:__system__technical_owner", } }, } diff --git a/smoke-test/tests/cypress/integration_test.py b/smoke-test/tests/cypress/integration_test.py index 4ad2bc53fa87d..4124ced999446 100644 --- a/smoke-test/tests/cypress/integration_test.py +++ b/smoke-test/tests/cypress/integration_test.py @@ -5,11 +5,17 @@ import pytest -from tests.setup.lineage.ingest_time_lineage import (get_time_lineage_urns, - ingest_time_lineage) -from tests.utils import (create_datahub_step_state_aspects, delete_urns, - delete_urns_from_file, get_admin_username, - ingest_file_via_rest) +from tests.setup.lineage.ingest_time_lineage import ( + get_time_lineage_urns, + ingest_time_lineage, +) +from tests.utils import ( + create_datahub_step_state_aspects, + delete_urns, + delete_urns_from_file, + get_admin_username, + ingest_file_via_rest, +) CYPRESS_TEST_DATA_DIR = "tests/cypress" @@ -178,8 +184,10 @@ def test_run_cypress(frontend_session, wait_for_healthchecks): print(f"test strategy is {test_strategy}") test_spec_arg = "" if test_strategy is not None: - specs = _get_spec_map(strategy_spec_map.get(test_strategy)) - test_spec_arg = f" --spec '{specs}' " + specs = strategy_spec_map.get(test_strategy) + assert specs is not None + specs_str = _get_spec_map(specs) + test_spec_arg = f" --spec '{specs_str}' " print("Running Cypress tests with command") command = f"NO_COLOR=1 npx cypress run {record_arg} {test_spec_arg} {tag_arg}" @@ -194,6 +202,8 @@ def test_run_cypress(frontend_session, wait_for_healthchecks): stderr=subprocess.PIPE, cwd=f"{CYPRESS_TEST_DATA_DIR}", ) + assert proc.stdout is not None + assert proc.stderr is not None stdout = proc.stdout.read() stderr = proc.stderr.read() return_code = proc.wait() diff --git a/smoke-test/tests/dataproduct/test_dataproduct.py b/smoke-test/tests/dataproduct/test_dataproduct.py index baef1cb1cb3ba..0d0141e9111c0 100644 --- a/smoke-test/tests/dataproduct/test_dataproduct.py +++ b/smoke-test/tests/dataproduct/test_dataproduct.py @@ -1,8 +1,6 @@ import logging import os -import subprocess import tempfile -import time from random import randint from typing import List @@ -14,17 +12,24 @@ from datahub.ingestion.api.sink import NoopWriteCallback from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph from datahub.ingestion.sink.file import FileSink, FileSinkConfig -from datahub.metadata.schema_classes import (DataProductPropertiesClass, - DatasetPropertiesClass, - DomainPropertiesClass, - DomainsClass) +from datahub.metadata.schema_classes import ( + DataProductPropertiesClass, + DatasetPropertiesClass, + DomainPropertiesClass, + DomainsClass, +) from datahub.utilities.urns.urn import Urn +from tests.utils import ( + delete_urns_from_file, + get_gms_url, + get_sleep_info, + ingest_file_via_rest, + wait_for_writes_to_sync, +) + logger = logging.getLogger(__name__) -import requests_wrapper as requests -from tests.utils import (delete_urns_from_file, get_gms_url, get_sleep_info, - ingest_file_via_rest, wait_for_writes_to_sync) start_index = randint(10, 10000) dataset_urns = [ @@ -82,7 +87,6 @@ def create_test_data(filename: str): @pytest.fixture(scope="module", autouse=False) def ingest_cleanup_data(request): - new_file, filename = tempfile.mkstemp() try: create_test_data(filename) @@ -160,7 +164,6 @@ def validate_relationships( ) @pytest.mark.dependency(depends=["test_healthchecks"]) def test_create_data_product(ingest_cleanup_data): - domain_urn = Urn("domain", [datahub_guid({"name": "Marketing"})]) graph: DataHubGraph = DataHubGraph(config=DatahubClientConfig(server=get_gms_url())) result = graph.execute_graphql( @@ -191,6 +194,7 @@ def test_create_data_product(ingest_cleanup_data): assert result["batchSetDataProduct"] is True data_product_props = graph.get_aspect(data_product_urn, DataProductPropertiesClass) assert data_product_props is not None + assert data_product_props.assets is not None assert data_product_props.description == "Test Description" assert data_product_props.name == "Test Data Product" assert len(data_product_props.assets) == len(dataset_urns) diff --git a/smoke-test/tests/delete/delete_test.py b/smoke-test/tests/delete/delete_test.py index d920faaf3a89a..21833d0bd30a1 100644 --- a/smoke-test/tests/delete/delete_test.py +++ b/smoke-test/tests/delete/delete_test.py @@ -1,14 +1,16 @@ import json import os -from time import sleep import pytest -from datahub.cli.cli_utils import get_aspects_for_entity -from datahub.cli.ingest_cli import get_session_and_host - -from tests.utils import (delete_urns_from_file, get_datahub_graph, - ingest_file_via_rest, wait_for_healthcheck_util, - wait_for_writes_to_sync) +from datahub.cli.cli_utils import get_aspects_for_entity, get_session_and_host + +from tests.utils import ( + delete_urns_from_file, + get_datahub_graph, + ingest_file_via_rest, + wait_for_healthcheck_util, + wait_for_writes_to_sync, +) # Disable telemetry os.environ["DATAHUB_TELEMETRY_ENABLED"] = "false" diff --git a/smoke-test/tests/deprecation/deprecation_test.py b/smoke-test/tests/deprecation/deprecation_test.py index a8969804d03d7..ae3890aeda956 100644 --- a/smoke-test/tests/deprecation/deprecation_test.py +++ b/smoke-test/tests/deprecation/deprecation_test.py @@ -1,7 +1,11 @@ import pytest -from tests.utils import (delete_urns_from_file, get_frontend_url, get_root_urn, - ingest_file_via_rest) +from tests.utils import ( + delete_urns_from_file, + get_frontend_url, + get_root_urn, + ingest_file_via_rest, +) @pytest.fixture(scope="module", autouse=True) diff --git a/smoke-test/tests/domains/domains_test.py b/smoke-test/tests/domains/domains_test.py index fa8c918e3cbe1..1d83b032d7a8f 100644 --- a/smoke-test/tests/domains/domains_test.py +++ b/smoke-test/tests/domains/domains_test.py @@ -1,8 +1,13 @@ import pytest import tenacity -from tests.utils import (delete_urns_from_file, get_frontend_url, get_gms_url, - get_sleep_info, ingest_file_via_rest) +from tests.utils import ( + delete_urns_from_file, + get_frontend_url, + get_gms_url, + get_sleep_info, + ingest_file_via_rest, +) sleep_sec, sleep_times = get_sleep_info() @@ -26,7 +31,6 @@ def test_healthchecks(wait_for_healthchecks): stop=tenacity.stop_after_attempt(sleep_times), wait=tenacity.wait_fixed(sleep_sec) ) def _ensure_more_domains(frontend_session, list_domains_json, before_count): - # Get new count of Domains response = frontend_session.post( f"{get_frontend_url()}/api/v2/graphql", json=list_domains_json @@ -47,7 +51,6 @@ def _ensure_more_domains(frontend_session, list_domains_json, before_count): @pytest.mark.dependency(depends=["test_healthchecks"]) def test_create_list_get_domain(frontend_session): - # Setup: Delete the domain (if exists) response = frontend_session.post( f"{get_gms_url()}/entities?action=delete", json={"urn": "urn:li:domain:test id"} @@ -167,7 +170,6 @@ def test_create_list_get_domain(frontend_session): @pytest.mark.dependency(depends=["test_healthchecks", "test_create_list_get_domain"]) def test_set_unset_domain(frontend_session, ingest_cleanup_data): - # Set and Unset a Domain for a dataset. Note that this doesn't test for adding domains to charts, dashboards, charts, & jobs. dataset_urn = ( "urn:li:dataset:(urn:li:dataPlatform:kafka,test-tags-terms-sample-kafka,PROD)" diff --git a/smoke-test/tests/lineage/test_lineage.py b/smoke-test/tests/lineage/test_lineage.py index 52d61d666c7d9..9cd98d1245bbb 100644 --- a/smoke-test/tests/lineage/test_lineage.py +++ b/smoke-test/tests/lineage/test_lineage.py @@ -49,6 +49,7 @@ from datahub.utilities.urns.dataset_urn import DatasetUrn from datahub.utilities.urns.urn import Urn from pydantic import BaseModel, validator + from tests.utils import ingest_file_via_rest, wait_for_writes_to_sync logger = logging.getLogger(__name__) @@ -85,7 +86,6 @@ def ingest_tableau_cll_via_rest(wait_for_healthchecks) -> None: ingest_file_via_rest( "tests/lineage/tableau_cll_mcps.json", ) - yield def search_across_lineage( @@ -499,6 +499,7 @@ def get_lineage_mcps(self) -> Iterable[MetadataChangeProposalWrapper]: def get_lineage_mcps_for_hop( self, hop_index: int ) -> Iterable[MetadataChangeProposalWrapper]: + assert self.expectations is not None if self.lineage_style == Scenario.LineageStyle.DATASET_JOB_DATASET: fine_grained_lineage = FineGrainedLineage( upstreamType=FineGrainedLineageUpstreamType.FIELD_SET, @@ -722,11 +723,9 @@ def cleanup(self, delete_agent: DeleteAgent) -> None: for urn in self._generated_urns: delete_agent.delete_entity(urn) - def generate_expectation(self, query: ImpactQuery) -> LineageExpectation: - return self.expectations.generate_query_expectation_pairs(query) - def test_expectation(self, graph: DataHubGraph) -> bool: print("Testing expectation...") + assert self.expectations is not None try: for hop_index in range(self.num_hops): for dataset_urn in self.get_upstream_dataset_urns(hop_index): diff --git a/smoke-test/tests/tags-and-terms/__init__.py b/smoke-test/tests/managed_ingestion/__init__.py similarity index 100% rename from smoke-test/tests/tags-and-terms/__init__.py rename to smoke-test/tests/managed_ingestion/__init__.py diff --git a/smoke-test/tests/managed-ingestion/managed_ingestion_test.py b/smoke-test/tests/managed_ingestion/managed_ingestion_test.py similarity index 98% rename from smoke-test/tests/managed-ingestion/managed_ingestion_test.py rename to smoke-test/tests/managed_ingestion/managed_ingestion_test.py index 6d95f731f32b1..4a4bdca2e4592 100644 --- a/smoke-test/tests/managed-ingestion/managed_ingestion_test.py +++ b/smoke-test/tests/managed_ingestion/managed_ingestion_test.py @@ -3,8 +3,7 @@ import pytest import tenacity -from tests.utils import (get_frontend_url, get_sleep_info, - wait_for_healthcheck_util) +from tests.utils import get_frontend_url, get_sleep_info, wait_for_healthcheck_util sleep_sec, sleep_times = get_sleep_info() @@ -206,7 +205,6 @@ def _ensure_execution_request_present(frontend_session, execution_request_urn): @pytest.mark.dependency(depends=["test_healthchecks"]) def test_create_list_get_remove_secret(frontend_session): - # Get count of existing secrets json_q = { "query": """query listSecrets($input: ListSecretsInput!) {\n @@ -265,7 +263,13 @@ def test_create_list_get_remove_secret(frontend_session): "query": """mutation updateSecret($input: UpdateSecretInput!) {\n updateSecret(input: $input) }""", - "variables": {"input": {"urn": secret_urn, "name": "SMOKE_TEST", "value": "mytestvalue.updated"}}, + "variables": { + "input": { + "urn": secret_urn, + "name": "SMOKE_TEST", + "value": "mytestvalue.updated", + } + }, } response = frontend_session.post( @@ -333,7 +337,6 @@ def test_create_list_get_remove_secret(frontend_session): @pytest.mark.dependency(depends=["test_healthchecks"]) def test_create_list_get_remove_ingestion_source(frontend_session): - # Get count of existing ingestion sources res_data = _get_ingestionSources(frontend_session) diff --git a/smoke-test/tests/patch/common_patch_tests.py b/smoke-test/tests/patch/common_patch_tests.py index f1d6abf5da794..9530edb760c13 100644 --- a/smoke-test/tests/patch/common_patch_tests.py +++ b/smoke-test/tests/patch/common_patch_tests.py @@ -2,21 +2,26 @@ import uuid from typing import Dict, Optional, Type -from datahub.emitter.mce_builder import (make_tag_urn, make_term_urn, - make_user_urn) +from datahub.emitter.mce_builder import make_tag_urn, make_term_urn, make_user_urn from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.emitter.mcp_patch_builder import MetadataPatchProposal from datahub.ingestion.graph.client import DataHubGraph, DataHubGraphConfig -from datahub.metadata.schema_classes import (AuditStampClass, GlobalTagsClass, - GlossaryTermAssociationClass, - GlossaryTermsClass, OwnerClass, - OwnershipClass, - OwnershipTypeClass, - TagAssociationClass, _Aspect) +from datahub.metadata.schema_classes import ( + AuditStampClass, + GlobalTagsClass, + GlossaryTermAssociationClass, + GlossaryTermsClass, + OwnerClass, + OwnershipClass, + OwnershipTypeClass, + TagAssociationClass, + _Aspect, +) def helper_test_entity_terms_patch( - test_entity_urn: str, patch_builder_class: Type[MetadataPatchProposal] + test_entity_urn: str, + patch_builder_class: Type[MetadataPatchProposal], ): def get_terms(graph, entity_urn): return graph.get_aspect( @@ -57,9 +62,9 @@ def get_terms(graph, entity_urn): assert terms_read.terms[1].urn == new_term.urn assert terms_read.terms[1].context is None - for patch_mcp in ( - patch_builder_class(test_entity_urn).remove_term(term_urn).build() - ): + patch_builder = patch_builder_class(test_entity_urn) + assert hasattr(patch_builder, "remove_term") + for patch_mcp in patch_builder.remove_term(term_urn).build(): graph.emit_mcp(patch_mcp) pass @@ -69,9 +74,9 @@ def get_terms(graph, entity_urn): def helper_test_dataset_tags_patch( - test_entity_urn: str, patch_builder_class: Type[MetadataPatchProposal] + test_entity_urn: str, + patch_builder_class: Type[MetadataPatchProposal], ): - tag_urn = make_tag_urn(tag=f"testTag-{uuid.uuid4()}") tag_association = TagAssociationClass(tag=tag_urn, context="test") @@ -80,10 +85,11 @@ def helper_test_dataset_tags_patch( with DataHubGraph(DataHubGraphConfig()) as graph: graph.emit_mcp(mcpw) - tags_read: GlobalTagsClass = graph.get_aspect( + tags_read = graph.get_aspect( entity_urn=test_entity_urn, aspect_type=GlobalTagsClass, ) + assert tags_read is not None assert tags_read.tags[0].tag == tag_urn assert tags_read.tags[0].context == "test" @@ -98,14 +104,15 @@ def helper_test_dataset_tags_patch( entity_urn=test_entity_urn, aspect_type=GlobalTagsClass, ) + assert tags_read is not None assert tags_read.tags[0].tag == tag_urn assert tags_read.tags[0].context == "test" assert tags_read.tags[1].tag == new_tag.tag assert tags_read.tags[1].context is None - for patch_mcp in ( - patch_builder_class(test_entity_urn).remove_tag(tag_urn).build() - ): + patch_builder = patch_builder_class(test_entity_urn) + assert hasattr(patch_builder, "remove_tag") + for patch_mcp in patch_builder.remove_tag(tag_urn).build(): graph.emit_mcp(patch_mcp) pass @@ -113,12 +120,14 @@ def helper_test_dataset_tags_patch( entity_urn=test_entity_urn, aspect_type=GlobalTagsClass, ) + assert tags_read is not None assert len(tags_read.tags) == 1 assert tags_read.tags[0].tag == new_tag.tag def helper_test_ownership_patch( - test_entity_urn: str, patch_builder_class: Type[MetadataPatchProposal] + test_entity_urn: str, + patch_builder_class: Type[MetadataPatchProposal], ): owner_to_set = OwnerClass( owner=make_user_urn("jdoe"), type=OwnershipTypeClass.DATAOWNER @@ -133,27 +142,26 @@ def helper_test_ownership_patch( ) with DataHubGraph(DataHubGraphConfig()) as graph: graph.emit_mcp(mcpw) - owner: OwnershipClass = graph.get_aspect( - entity_urn=test_entity_urn, aspect_type=OwnershipClass - ) + owner = graph.get_aspect(entity_urn=test_entity_urn, aspect_type=OwnershipClass) + assert owner is not None assert owner.owners[0].owner == make_user_urn("jdoe") - for patch_mcp in ( - patch_builder_class(test_entity_urn).add_owner(owner_to_add).build() - ): + patch_builder = patch_builder_class(test_entity_urn) + assert hasattr(patch_builder, "add_owner") + for patch_mcp in patch_builder.add_owner(owner_to_add).build(): graph.emit_mcp(patch_mcp) owner = graph.get_aspect(entity_urn=test_entity_urn, aspect_type=OwnershipClass) + assert owner is not None assert len(owner.owners) == 2 - for patch_mcp in ( - patch_builder_class(test_entity_urn) - .remove_owner(make_user_urn("gdoe")) - .build() - ): + patch_builder = patch_builder_class(test_entity_urn) + assert hasattr(patch_builder, "remove_owner") + for patch_mcp in patch_builder.remove_owner(make_user_urn("gdoe")).build(): graph.emit_mcp(patch_mcp) owner = graph.get_aspect(entity_urn=test_entity_urn, aspect_type=OwnershipClass) + assert owner is not None assert len(owner.owners) == 1 assert owner.owners[0].owner == make_user_urn("jdoe") @@ -172,6 +180,7 @@ def get_custom_properties( aspect_type=custom_properties_aspect_class, ) assert custom_properties_aspect + assert hasattr(custom_properties_aspect, "customProperties") return custom_properties_aspect.customProperties base_property_map = {"base_property": "base_property_value"} @@ -195,6 +204,7 @@ def get_custom_properties( } entity_patch_builder = patch_builder_class(test_entity_urn) + assert hasattr(entity_patch_builder, "add_custom_property") for k, v in new_properties.items(): entity_patch_builder.add_custom_property(k, v) @@ -212,11 +222,9 @@ def get_custom_properties( assert custom_properties[k] == v # Remove property - for patch_mcp in ( - patch_builder_class(test_entity_urn) - .remove_custom_property("test_property") - .build() - ): + patch_builder = patch_builder_class(test_entity_urn) + assert hasattr(patch_builder, "remove_custom_property") + for patch_mcp in patch_builder.remove_custom_property("test_property").build(): graph.emit_mcp(patch_mcp) custom_properties = get_custom_properties(graph, test_entity_urn) @@ -230,11 +238,9 @@ def get_custom_properties( assert custom_properties[k] == v # Replace custom properties - for patch_mcp in ( - patch_builder_class(test_entity_urn) - .set_custom_properties(new_properties) - .build() - ): + patch_builder = patch_builder_class(test_entity_urn) + assert hasattr(patch_builder, "set_custom_properties") + for patch_mcp in patch_builder.set_custom_properties(new_properties).build(): graph.emit_mcp(patch_mcp) custom_properties = get_custom_properties(graph, test_entity_urn) diff --git a/smoke-test/tests/patch/test_datajob_patches.py b/smoke-test/tests/patch/test_datajob_patches.py index 342d5d683228a..ce63d4571d6c8 100644 --- a/smoke-test/tests/patch/test_datajob_patches.py +++ b/smoke-test/tests/patch/test_datajob_patches.py @@ -3,14 +3,19 @@ from datahub.emitter.mce_builder import make_data_job_urn, make_dataset_urn from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.graph.client import DataHubGraph, DataHubGraphConfig -from datahub.metadata.schema_classes import (DataJobInfoClass, - DataJobInputOutputClass, - EdgeClass) +from datahub.metadata.schema_classes import ( + DataJobInfoClass, + DataJobInputOutputClass, + EdgeClass, +) from datahub.specific.datajob import DataJobPatchBuilder from tests.patch.common_patch_tests import ( - helper_test_custom_properties_patch, helper_test_dataset_tags_patch, - helper_test_entity_terms_patch, helper_test_ownership_patch) + helper_test_custom_properties_patch, + helper_test_dataset_tags_patch, + helper_test_entity_terms_patch, + helper_test_ownership_patch, +) def _make_test_datajob_urn( @@ -76,10 +81,12 @@ def test_datajob_inputoutput_dataset_patch(wait_for_healthchecks): with DataHubGraph(DataHubGraphConfig()) as graph: graph.emit_mcp(mcpw) - inputoutput_lineage_read: DataJobInputOutputClass = graph.get_aspect( + inputoutput_lineage_read = graph.get_aspect( entity_urn=datajob_urn, aspect_type=DataJobInputOutputClass, ) + assert inputoutput_lineage_read is not None + assert inputoutput_lineage_read.inputDatasetEdges is not None assert ( inputoutput_lineage_read.inputDatasetEdges[0].destinationUrn == other_dataset_urn @@ -97,6 +104,8 @@ def test_datajob_inputoutput_dataset_patch(wait_for_healthchecks): entity_urn=datajob_urn, aspect_type=DataJobInputOutputClass, ) + assert inputoutput_lineage_read is not None + assert inputoutput_lineage_read.inputDatasetEdges is not None assert len(inputoutput_lineage_read.inputDatasetEdges) == 2 assert ( inputoutput_lineage_read.inputDatasetEdges[0].destinationUrn @@ -119,6 +128,8 @@ def test_datajob_inputoutput_dataset_patch(wait_for_healthchecks): entity_urn=datajob_urn, aspect_type=DataJobInputOutputClass, ) + assert inputoutput_lineage_read is not None + assert inputoutput_lineage_read.inputDatasetEdges is not None assert len(inputoutput_lineage_read.inputDatasetEdges) == 1 assert ( inputoutput_lineage_read.inputDatasetEdges[0].destinationUrn diff --git a/smoke-test/tests/patch/test_dataset_patches.py b/smoke-test/tests/patch/test_dataset_patches.py index 6704d19760fb9..ec6b4a91fa6be 100644 --- a/smoke-test/tests/patch/test_dataset_patches.py +++ b/smoke-test/tests/patch/test_dataset_patches.py @@ -1,29 +1,27 @@ -import time import uuid from typing import Dict, Optional -from datahub.emitter.mce_builder import (make_dataset_urn, make_tag_urn, - make_term_urn, make_user_urn) +from datahub.emitter.mce_builder import make_dataset_urn, make_tag_urn, make_term_urn from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.graph.client import DataHubGraph, DataHubGraphConfig -from datahub.metadata.schema_classes import (AuditStampClass, - DatasetLineageTypeClass, - DatasetPropertiesClass, - EditableSchemaFieldInfoClass, - EditableSchemaMetadataClass, - GlobalTagsClass, - GlossaryTermAssociationClass, - GlossaryTermsClass, OwnerClass, - OwnershipClass, - OwnershipTypeClass, - TagAssociationClass, - UpstreamClass, - UpstreamLineageClass) +from datahub.metadata.schema_classes import ( + DatasetLineageTypeClass, + DatasetPropertiesClass, + EditableSchemaFieldInfoClass, + EditableSchemaMetadataClass, + GlossaryTermAssociationClass, + TagAssociationClass, + UpstreamClass, + UpstreamLineageClass, +) from datahub.specific.dataset import DatasetPatchBuilder from tests.patch.common_patch_tests import ( - helper_test_custom_properties_patch, helper_test_dataset_tags_patch, - helper_test_entity_terms_patch, helper_test_ownership_patch) + helper_test_custom_properties_patch, + helper_test_dataset_tags_patch, + helper_test_entity_terms_patch, + helper_test_ownership_patch, +) # Common Aspect Patch Tests @@ -135,7 +133,6 @@ def get_field_info( def test_field_terms_patch(wait_for_healthchecks): - dataset_urn = make_dataset_urn( platform="hive", name=f"SampleHiveDataset-{uuid.uuid4()}", env="PROD" ) @@ -174,6 +171,7 @@ def test_field_terms_patch(wait_for_healthchecks): assert field_info assert field_info.description == "This is a test field" + assert field_info.glossaryTerms is not None assert len(field_info.glossaryTerms.terms) == 1 assert field_info.glossaryTerms.terms[0].urn == new_term.urn @@ -191,11 +189,11 @@ def test_field_terms_patch(wait_for_healthchecks): assert field_info assert field_info.description == "This is a test field" + assert field_info.glossaryTerms is not None assert len(field_info.glossaryTerms.terms) == 0 def test_field_tags_patch(wait_for_healthchecks): - dataset_urn = make_dataset_urn( platform="hive", name=f"SampleHiveDataset-{uuid.uuid4()}", env="PROD" ) @@ -235,6 +233,7 @@ def test_field_tags_patch(wait_for_healthchecks): assert field_info assert field_info.description == "This is a test field" + assert field_info.globalTags is not None assert len(field_info.globalTags.tags) == 1 assert field_info.globalTags.tags[0].tag == new_tag.tag @@ -253,6 +252,7 @@ def test_field_tags_patch(wait_for_healthchecks): assert field_info assert field_info.description == "This is a test field" + assert field_info.globalTags is not None assert len(field_info.globalTags.tags) == 1 assert field_info.globalTags.tags[0].tag == new_tag.tag @@ -270,6 +270,7 @@ def test_field_tags_patch(wait_for_healthchecks): assert field_info assert field_info.description == "This is a test field" + assert field_info.globalTags is not None assert len(field_info.globalTags.tags) == 0 @@ -285,7 +286,6 @@ def get_custom_properties( def test_custom_properties_patch(wait_for_healthchecks): - dataset_urn = make_dataset_urn( platform="hive", name=f"SampleHiveDataset-{uuid.uuid4()}", env="PROD" ) diff --git a/smoke-test/tests/policies/test_policies.py b/smoke-test/tests/policies/test_policies.py index 67142181d2b96..186550482190c 100644 --- a/smoke-test/tests/policies/test_policies.py +++ b/smoke-test/tests/policies/test_policies.py @@ -1,8 +1,13 @@ import pytest import tenacity -from tests.utils import (get_frontend_session, get_frontend_url, get_root_urn, - get_sleep_info, wait_for_healthcheck_util) +from tests.utils import ( + get_frontend_session, + get_frontend_url, + get_root_urn, + get_sleep_info, + wait_for_healthcheck_util, +) TEST_POLICY_NAME = "Updated Platform Policy" @@ -90,7 +95,6 @@ def _ensure_policy_present(frontend_session, new_urn): @pytest.mark.dependency(depends=["test_healthchecks"]) def test_frontend_policy_operations(frontend_session): - json = { "query": """mutation createPolicy($input: PolicyUpdateInput!) {\n createPolicy(input: $input) }""", diff --git a/smoke-test/tests/privileges/__init__.py b/smoke-test/tests/privileges/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/smoke-test/tests/privileges/test_privileges.py b/smoke-test/tests/privileges/test_privileges.py index e1cb848cccf8e..a4c607dac89f2 100644 --- a/smoke-test/tests/privileges/test_privileges.py +++ b/smoke-test/tests/privileges/test_privileges.py @@ -1,14 +1,34 @@ import pytest import tenacity -from tests.utils import (get_frontend_session, wait_for_writes_to_sync, wait_for_healthcheck_util, - get_frontend_url, get_admin_credentials,get_sleep_info) -from tests.privileges.utils import * +from tests.privileges.utils import ( + assign_role, + assign_user_to_group, + create_group, + create_user, + create_user_policy, + remove_group, + remove_policy, + remove_user, + set_base_platform_privileges_policy_status, + set_view_dataset_sensitive_info_policy_status, + set_view_entity_profile_privileges_policy_status, +) +from tests.utils import ( + get_admin_credentials, + get_frontend_session, + get_frontend_url, + get_sleep_info, + login_as, + wait_for_healthcheck_util, + wait_for_writes_to_sync, +) pytestmark = pytest.mark.no_cypress_suite1 sleep_sec, sleep_times = get_sleep_info() + @pytest.fixture(scope="session") def wait_for_healthchecks(): wait_for_healthcheck_util() @@ -37,7 +57,7 @@ def privileges_and_test_user_setup(admin_session): # Sleep for eventual consistency wait_for_writes_to_sync() - # Create a new user + # Create a new user admin_session = create_user(admin_session, "user", "user") yield @@ -57,15 +77,16 @@ def privileges_and_test_user_setup(admin_session): @tenacity.retry( stop=tenacity.stop_after_attempt(sleep_times), wait=tenacity.wait_fixed(sleep_sec) ) -def _ensure_cant_perform_action(session, json,assertion_key): - action_response = session.post( - f"{get_frontend_url()}/api/v2/graphql", json=json) +def _ensure_cant_perform_action(session, json, assertion_key): + action_response = session.post(f"{get_frontend_url()}/api/v2/graphql", json=json) action_response.raise_for_status() action_data = action_response.json() - assert action_data["errors"][0]["extensions"]["code"] == 403, action_data["errors"][0] + assert action_data["errors"][0]["extensions"]["code"] == 403, action_data["errors"][ + 0 + ] assert action_data["errors"][0]["extensions"]["type"] == "UNAUTHORIZED" - assert action_data["data"][assertion_key] == None + assert action_data["data"][assertion_key] is None @tenacity.retry( @@ -73,7 +94,8 @@ def _ensure_cant_perform_action(session, json,assertion_key): ) def _ensure_can_create_secret(session, json, urn): create_secret_success = session.post( - f"{get_frontend_url()}/api/v2/graphql", json=json) + f"{get_frontend_url()}/api/v2/graphql", json=json + ) create_secret_success.raise_for_status() secret_data = create_secret_success.json() @@ -81,14 +103,15 @@ def _ensure_can_create_secret(session, json, urn): assert secret_data["data"] assert secret_data["data"]["createSecret"] assert secret_data["data"]["createSecret"] == urn - + @tenacity.retry( stop=tenacity.stop_after_attempt(10), wait=tenacity.wait_fixed(sleep_sec) ) def _ensure_can_create_ingestion_source(session, json): create_ingestion_success = session.post( - f"{get_frontend_url()}/api/v2/graphql", json=json) + f"{get_frontend_url()}/api/v2/graphql", json=json + ) create_ingestion_success.raise_for_status() ingestion_data = create_ingestion_success.json() @@ -98,14 +121,15 @@ def _ensure_can_create_ingestion_source(session, json): assert ingestion_data["data"]["createIngestionSource"] is not None return ingestion_data["data"]["createIngestionSource"] - + @tenacity.retry( stop=tenacity.stop_after_attempt(10), wait=tenacity.wait_fixed(sleep_sec) ) def _ensure_can_create_access_token(session, json): create_access_token_success = session.post( - f"{get_frontend_url()}/api/v2/graphql", json=json) + f"{get_frontend_url()}/api/v2/graphql", json=json + ) create_access_token_success.raise_for_status() ingestion_data = create_access_token_success.json() @@ -128,50 +152,49 @@ def _ensure_can_create_user_policy(session, json): assert res_data["data"] assert res_data["data"]["createPolicy"] is not None - return res_data["data"]["createPolicy"] + return res_data["data"]["createPolicy"] @pytest.mark.dependency(depends=["test_healthchecks"]) def test_privilege_to_create_and_manage_secrets(): - (admin_user, admin_pass) = get_admin_credentials() admin_session = login_as(admin_user, admin_pass) user_session = login_as("user", "user") secret_urn = "urn:li:dataHubSecret:TestSecretName" # Verify new user can't create secrets - create_secret = { + create_secret = { "query": """mutation createSecret($input: CreateSecretInput!) {\n createSecret(input: $input)\n}""", "variables": { - "input":{ - "name":"TestSecretName", - "value":"Test Secret Value", - "description":"Test Secret Description" - } + "input": { + "name": "TestSecretName", + "value": "Test Secret Value", + "description": "Test Secret Description", + } }, } - _ensure_cant_perform_action(user_session, create_secret,"createSecret") - + _ensure_cant_perform_action(user_session, create_secret, "createSecret") # Assign privileges to the new user to manage secrets - policy_urn = create_user_policy("urn:li:corpuser:user", ["MANAGE_SECRETS"], admin_session) + policy_urn = create_user_policy( + "urn:li:corpuser:user", ["MANAGE_SECRETS"], admin_session + ) # Verify new user can create and manage secrets # Create a secret _ensure_can_create_secret(user_session, create_secret, secret_urn) - # Remove a secret - remove_secret = { + remove_secret = { "query": """mutation deleteSecret($urn: String!) {\n deleteSecret(urn: $urn)\n}""", - "variables": { - "urn": secret_urn - }, + "variables": {"urn": secret_urn}, } - remove_secret_response = user_session.post(f"{get_frontend_url()}/api/v2/graphql", json=remove_secret) + remove_secret_response = user_session.post( + f"{get_frontend_url()}/api/v2/graphql", json=remove_secret + ) remove_secret_response.raise_for_status() secret_data = remove_secret_response.json() @@ -180,28 +203,29 @@ def test_privilege_to_create_and_manage_secrets(): assert secret_data["data"]["deleteSecret"] assert secret_data["data"]["deleteSecret"] == secret_urn - # Remove the policy remove_policy(policy_urn, admin_session) # Ensure user can't create secret after policy is removed - _ensure_cant_perform_action(user_session, create_secret,"createSecret") + _ensure_cant_perform_action(user_session, create_secret, "createSecret") @pytest.mark.dependency(depends=["test_healthchecks"]) def test_privilege_to_create_and_manage_ingestion_source(): - (admin_user, admin_pass) = get_admin_credentials() admin_session = login_as(admin_user, admin_pass) user_session = login_as("user", "user") # Verify new user can't create ingestion source - create_ingestion_source = { + create_ingestion_source = { "query": """mutation createIngestionSource($input: UpdateIngestionSourceInput!) {\n createIngestionSource(input: $input)\n}""", - "variables": {"input":{"type":"snowflake","name":"test","config": - {"recipe": - """{\"source\":{\"type\":\"snowflake\",\"config\":{ + "variables": { + "input": { + "type": "snowflake", + "name": "test", + "config": { + "recipe": """{\"source\":{\"type\":\"snowflake\",\"config\":{ \"account_id\":null, \"include_table_lineage\":true, \"include_view_lineage\":true, @@ -209,25 +233,39 @@ def test_privilege_to_create_and_manage_ingestion_source(): \"include_views\":true, \"profiling\":{\"enabled\":true,\"profile_table_level_only\":true}, \"stateful_ingestion\":{\"enabled\":true}}}}""", - "executorId":"default","debugMode":False,"extraArgs":[]}}}, + "executorId": "default", + "debugMode": False, + "extraArgs": [], + }, + } + }, } - _ensure_cant_perform_action(user_session, create_ingestion_source, "createIngestionSource") + _ensure_cant_perform_action( + user_session, create_ingestion_source, "createIngestionSource" + ) + # Assign privileges to the new user to manage ingestion source + policy_urn = create_user_policy( + "urn:li:corpuser:user", ["MANAGE_INGESTION"], admin_session + ) - # Assign privileges to the new user to manage ingestion source - policy_urn = create_user_policy("urn:li:corpuser:user", ["MANAGE_INGESTION"], admin_session) - # Verify new user can create and manage ingestion source(edit, delete) - ingestion_source_urn = _ensure_can_create_ingestion_source(user_session, create_ingestion_source) + ingestion_source_urn = _ensure_can_create_ingestion_source( + user_session, create_ingestion_source + ) # Edit ingestion source - update_ingestion_source = { + update_ingestion_source = { "query": """mutation updateIngestionSource($urn: String!, $input: UpdateIngestionSourceInput!) {\n updateIngestionSource(urn: $urn, input: $input)\n}""", - "variables": {"urn":ingestion_source_urn, - "input":{"type":"snowflake","name":"test updated", - "config":{"recipe":"""{\"source\":{\"type\":\"snowflake\",\"config\":{ + "variables": { + "urn": ingestion_source_urn, + "input": { + "type": "snowflake", + "name": "test updated", + "config": { + "recipe": """{\"source\":{\"type\":\"snowflake\",\"config\":{ \"account_id\":null, \"include_table_lineage\":true, \"include_view_lineage\":true, @@ -235,11 +273,17 @@ def test_privilege_to_create_and_manage_ingestion_source(): \"include_views\":true, \"profiling\":{\"enabled\":true,\"profile_table_level_only\":true}, \"stateful_ingestion\":{\"enabled\":true}}}}""", - "executorId":"default","debugMode":False,"extraArgs":[]}}} + "executorId": "default", + "debugMode": False, + "extraArgs": [], + }, + }, + }, } update_ingestion_success = user_session.post( - f"{get_frontend_url()}/api/v2/graphql", json=update_ingestion_source) + f"{get_frontend_url()}/api/v2/graphql", json=update_ingestion_source + ) update_ingestion_success.raise_for_status() ingestion_data = update_ingestion_success.json() @@ -248,17 +292,16 @@ def test_privilege_to_create_and_manage_ingestion_source(): assert ingestion_data["data"]["updateIngestionSource"] assert ingestion_data["data"]["updateIngestionSource"] == ingestion_source_urn - # Delete ingestion source - remove_ingestion_source = { + remove_ingestion_source = { "query": """mutation deleteIngestionSource($urn: String!) {\n deleteIngestionSource(urn: $urn)\n}""", - "variables": { - "urn": ingestion_source_urn - }, + "variables": {"urn": ingestion_source_urn}, } - remove_ingestion_response = user_session.post(f"{get_frontend_url()}/api/v2/graphql", json=remove_ingestion_source) + remove_ingestion_response = user_session.post( + f"{get_frontend_url()}/api/v2/graphql", json=remove_ingestion_source + ) remove_ingestion_response.raise_for_status() ingestion_data = remove_ingestion_response.json() @@ -271,75 +314,81 @@ def test_privilege_to_create_and_manage_ingestion_source(): remove_policy(policy_urn, admin_session) # Ensure that user can't create ingestion source after policy is removed - _ensure_cant_perform_action(user_session, create_ingestion_source, "createIngestionSource") + _ensure_cant_perform_action( + user_session, create_ingestion_source, "createIngestionSource" + ) +@pytest.mark.skip(reason="Functionality and test needs to be validated for correctness") @pytest.mark.dependency(depends=["test_healthchecks"]) def test_privilege_to_create_and_manage_access_tokens(): - (admin_user, admin_pass) = get_admin_credentials() admin_session = login_as(admin_user, admin_pass) user_session = login_as("user", "user") - # Verify new user can't create access token - create_access_token = { + create_access_token = { "query": """mutation createAccessToken($input: CreateAccessTokenInput!) {\n createAccessToken(input: $input) {\n accessToken\n __typename\n }\n}\n""", - "variables": {"input":{"actorUrn":"urn:li:corpuser:user", - "type":"PERSONAL", - "duration":"ONE_MONTH", - "name":"test", - "description":"test"}} + "variables": { + "input": { + "actorUrn": "urn:li:corpuser:user", + "type": "PERSONAL", + "duration": "ONE_MONTH", + "name": "test", + "description": "test", + } + }, } - _ensure_cant_perform_action(user_session, create_access_token,"createAccessToken") - + _ensure_cant_perform_action(user_session, create_access_token, "createAccessToken") # Assign privileges to the new user to create and manage access tokens - policy_urn = create_user_policy("urn:li:corpuser:user", ["MANAGE_ACCESS_TOKENS"], admin_session) - + policy_urn = create_user_policy( + "urn:li:corpuser:user", ["MANAGE_ACCESS_TOKENS"], admin_session + ) # Verify new user can create and manage access token(create, revoke) # Create a access token _ensure_can_create_access_token(user_session, create_access_token) - # List access tokens first to get token id - list_access_tokens = { + list_access_tokens = { "query": """query listAccessTokens($input: ListAccessTokenInput!) {\n listAccessTokens(input: $input) {\n start\n count\n total\n tokens {\n urn\n type\n id\n name\n description\n actorUrn\n ownerUrn\n createdAt\n expiresAt\n __typename\n }\n __typename\n }\n}\n""", "variables": { - "input":{ - "start":0,"count":10,"filters":[{ - "field":"ownerUrn", - "values":["urn:li:corpuser:user"]}]} - } + "input": { + "start": 0, + "count": 10, + "filters": [{"field": "ownerUrn", "values": ["urn:li:corpuser:user"]}], + } + }, } - list_tokens_response = user_session.post(f"{get_frontend_url()}/api/v2/graphql", json=list_access_tokens) + list_tokens_response = user_session.post( + f"{get_frontend_url()}/api/v2/graphql", json=list_access_tokens + ) list_tokens_response.raise_for_status() list_tokens_data = list_tokens_response.json() assert list_tokens_data assert list_tokens_data["data"] assert list_tokens_data["data"]["listAccessTokens"]["tokens"][0]["id"] is not None - - access_token_id = list_tokens_data["data"]["listAccessTokens"]["tokens"][0]["id"] + access_token_id = list_tokens_data["data"]["listAccessTokens"]["tokens"][0]["id"] # Revoke access token - revoke_access_token = { + revoke_access_token = { "query": "mutation revokeAccessToken($tokenId: String!) {\n revokeAccessToken(tokenId: $tokenId)\n}\n", - "variables": { - "tokenId": access_token_id - }, + "variables": {"tokenId": access_token_id}, } - revoke_token_response = user_session.post(f"{get_frontend_url()}/api/v2/graphql", json=revoke_access_token) + revoke_token_response = user_session.post( + f"{get_frontend_url()}/api/v2/graphql", json=revoke_access_token + ) revoke_token_response.raise_for_status() revoke_token_data = revoke_token_response.json() @@ -348,22 +397,18 @@ def test_privilege_to_create_and_manage_access_tokens(): assert revoke_token_data["data"]["revokeAccessToken"] assert revoke_token_data["data"]["revokeAccessToken"] is True - # Remove the policy remove_policy(policy_urn, admin_session) - # Ensure that user can't create access token after policy is removed - _ensure_cant_perform_action(user_session, create_access_token,"createAccessToken") + _ensure_cant_perform_action(user_session, create_access_token, "createAccessToken") @pytest.mark.dependency(depends=["test_healthchecks"]) def test_privilege_to_create_and_manage_policies(): - (admin_user, admin_pass) = get_admin_credentials() admin_session = login_as(admin_user, admin_pass) user_session = login_as("user", "user") - # Verify new user can't create a policy create_policy = { @@ -376,7 +421,7 @@ def test_privilege_to_create_and_manage_policies(): "name": "Policy Name", "description": "Policy Description", "state": "ACTIVE", - "resources": {"filter":{"criteria":[]}}, + "resources": {"filter": {"criteria": []}}, "privileges": ["MANAGE_POLICIES"], "actors": { "users": [], @@ -388,19 +433,19 @@ def test_privilege_to_create_and_manage_policies(): }, } - _ensure_cant_perform_action(user_session, create_policy,"createPolicy") - + _ensure_cant_perform_action(user_session, create_policy, "createPolicy") # Assign privileges to the new user to create and manage policies - admin_policy_urn = create_user_policy("urn:li:corpuser:user", ["MANAGE_POLICIES"], admin_session) - + admin_policy_urn = create_user_policy( + "urn:li:corpuser:user", ["MANAGE_POLICIES"], admin_session + ) # Verify new user can create and manage policy(create, edit, delete) # Create a policy user_policy_urn = _ensure_can_create_user_policy(user_session, create_policy) # Edit a policy - edit_policy = { + edit_policy = { "query": """mutation updatePolicy($urn: String!, $input: PolicyUpdateInput!) {\n updatePolicy(urn: $urn, input: $input) }""", "variables": { @@ -422,7 +467,9 @@ def test_privilege_to_create_and_manage_policies(): }, }, } - edit_policy_response = user_session.post(f"{get_frontend_url()}/api/v2/graphql", json=edit_policy) + edit_policy_response = user_session.post( + f"{get_frontend_url()}/api/v2/graphql", json=edit_policy + ) edit_policy_response.raise_for_status() res_data = edit_policy_response.json() @@ -431,12 +478,14 @@ def test_privilege_to_create_and_manage_policies(): assert res_data["data"]["updatePolicy"] == user_policy_urn # Delete a policy - remove_user_policy = { + remove_user_policy = { "query": "mutation deletePolicy($urn: String!) {\n deletePolicy(urn: $urn)\n}\n", - "variables":{"urn":user_policy_urn} + "variables": {"urn": user_policy_urn}, } - remove_policy_response = user_session.post(f"{get_frontend_url()}/api/v2/graphql", json=remove_user_policy) + remove_policy_response = user_session.post( + f"{get_frontend_url()}/api/v2/graphql", json=remove_user_policy + ) remove_policy_response.raise_for_status() res_data = remove_policy_response.json() @@ -444,18 +493,16 @@ def test_privilege_to_create_and_manage_policies(): assert res_data["data"] assert res_data["data"]["deletePolicy"] == user_policy_urn - # Remove the user privilege by admin remove_policy(admin_policy_urn, admin_session) - # Ensure that user can't create a policy after privilege is removed by admin - _ensure_cant_perform_action(user_session, create_policy,"createPolicy") + _ensure_cant_perform_action(user_session, create_policy, "createPolicy") +@pytest.mark.skip(reason="Functionality and test needs to be validated for correctness") @pytest.mark.dependency(depends=["test_healthchecks"]) def test_privilege_from_group_role_can_create_and_manage_secret(): - (admin_user, admin_pass) = get_admin_credentials() admin_session = login_as(admin_user, admin_pass) user_session = login_as("user", "user") @@ -466,20 +513,20 @@ def test_privilege_from_group_role_can_create_and_manage_secret(): "query": """mutation createSecret($input: CreateSecretInput!) {\n createSecret(input: $input)\n}""", "variables": { - "input":{ - "name":"TestSecretName", - "value":"Test Secret Value", - "description":"Test Secret Description" + "input": { + "name": "TestSecretName", + "value": "Test Secret Value", + "description": "Test Secret Description", } }, } - _ensure_cant_perform_action(user_session, create_secret,"createSecret") + _ensure_cant_perform_action(user_session, create_secret, "createSecret") # Create group and grant it the admin role. group_urn = create_group(admin_session, "Test Group") # Assign admin role to group - assign_role(admin_session,"urn:li:dataHubRole:Admin", [group_urn]) + assign_role(admin_session, "urn:li:dataHubRole:Admin", [group_urn]) # Assign user to group assign_user_to_group(admin_session, group_urn, ["urn:li:corpuser:user"]) @@ -492,12 +539,12 @@ def test_privilege_from_group_role_can_create_and_manage_secret(): remove_secret = { "query": """mutation deleteSecret($urn: String!) {\n deleteSecret(urn: $urn)\n}""", - "variables": { - "urn": secret_urn - }, + "variables": {"urn": secret_urn}, } - remove_secret_response = user_session.post(f"{get_frontend_url()}/api/v2/graphql", json=remove_secret) + remove_secret_response = user_session.post( + f"{get_frontend_url()}/api/v2/graphql", json=remove_secret + ) remove_secret_response.raise_for_status() secret_data = remove_secret_response.json() @@ -510,4 +557,4 @@ def test_privilege_from_group_role_can_create_and_manage_secret(): remove_group(admin_session, group_urn) # Ensure user can't create secret after policy is removed - _ensure_cant_perform_action(user_session, create_secret,"createSecret") + _ensure_cant_perform_action(user_session, create_secret, "createSecret") diff --git a/smoke-test/tests/privileges/utils.py b/smoke-test/tests/privileges/utils.py index eeb385a243a90..1e58ec4085b70 100644 --- a/smoke-test/tests/privileges/utils.py +++ b/smoke-test/tests/privileges/utils.py @@ -1,10 +1,9 @@ -import requests_wrapper as requests from tests.consistency_utils import wait_for_writes_to_sync -from tests.utils import (get_frontend_url, wait_for_writes_to_sync, get_admin_credentials) +from tests.utils import get_admin_credentials, get_frontend_url, login_as def set_base_platform_privileges_policy_status(status, session): - base_platform_privileges = { + base_platform_privileges = { "query": """mutation updatePolicy($urn: String!, $input: PolicyUpdateInput!) {\n updatePolicy(urn: $urn, input: $input) }""", "variables": { @@ -14,18 +13,20 @@ def set_base_platform_privileges_policy_status(status, session): "state": status, "name": "All Users - Base Platform Privileges", "description": "Grants base platform privileges to ALL users of DataHub. Change this policy to alter that behavior.", - "privileges": ["MANAGE_INGESTION", - "MANAGE_SECRETS", - "MANAGE_USERS_AND_GROUPS", - "VIEW_ANALYTICS", - "GENERATE_PERSONAL_ACCESS_TOKENS", - "MANAGE_DOMAINS", - "MANAGE_GLOBAL_ANNOUNCEMENTS", - "MANAGE_TESTS", - "MANAGE_GLOSSARIES", - "MANAGE_TAGS", - "MANAGE_GLOBAL_VIEWS", - "MANAGE_GLOBAL_OWNERSHIP_TYPES"], + "privileges": [ + "MANAGE_INGESTION", + "MANAGE_SECRETS", + "MANAGE_USERS_AND_GROUPS", + "VIEW_ANALYTICS", + "GENERATE_PERSONAL_ACCESS_TOKENS", + "MANAGE_DOMAINS", + "MANAGE_GLOBAL_ANNOUNCEMENTS", + "MANAGE_TESTS", + "MANAGE_GLOSSARIES", + "MANAGE_TAGS", + "MANAGE_GLOBAL_VIEWS", + "MANAGE_GLOBAL_OWNERSHIP_TYPES", + ], "actors": { "users": [], "groups": None, @@ -38,13 +39,15 @@ def set_base_platform_privileges_policy_status(status, session): }, } base_privileges_response = session.post( - f"{get_frontend_url()}/api/v2/graphql", json=base_platform_privileges) + f"{get_frontend_url()}/api/v2/graphql", json=base_platform_privileges + ) base_privileges_response.raise_for_status() base_res_data = base_privileges_response.json() assert base_res_data["data"]["updatePolicy"] == "urn:li:dataHubPolicy:7" + def set_view_dataset_sensitive_info_policy_status(status, session): - dataset_sensitive_information = { + dataset_sensitive_information = { "query": """mutation updatePolicy($urn: String!, $input: PolicyUpdateInput!) {\n updatePolicy(urn: $urn, input: $input) }""", "variables": { @@ -54,7 +57,7 @@ def set_view_dataset_sensitive_info_policy_status(status, session): "state": status, "name": "All Users - View Dataset Sensitive Information", "description": "Grants viewing privileges of usage and profile information of all datasets for all users", - "privileges": ["VIEW_DATASET_USAGE","VIEW_DATASET_PROFILE"], + "privileges": ["VIEW_DATASET_USAGE", "VIEW_DATASET_PROFILE"], "actors": { "users": [], "groups": None, @@ -67,13 +70,18 @@ def set_view_dataset_sensitive_info_policy_status(status, session): }, } sensitive_info_response = session.post( - f"{get_frontend_url()}/api/v2/graphql", json=dataset_sensitive_information) + f"{get_frontend_url()}/api/v2/graphql", json=dataset_sensitive_information + ) sensitive_info_response.raise_for_status() sens_info_data = sensitive_info_response.json() - assert sens_info_data["data"]["updatePolicy"] == "urn:li:dataHubPolicy:view-dataset-sensitive" + assert ( + sens_info_data["data"]["updatePolicy"] + == "urn:li:dataHubPolicy:view-dataset-sensitive" + ) + def set_view_entity_profile_privileges_policy_status(status, session): - view_entity_page = { + view_entity_page = { "query": """mutation updatePolicy($urn: String!, $input: PolicyUpdateInput!) {\n updatePolicy(urn: $urn, input: $input) }""", "variables": { @@ -83,12 +91,14 @@ def set_view_entity_profile_privileges_policy_status(status, session): "state": status, "name": "All Users - View Entity Page", "description": "Grants entity view to all users", - "privileges": ["VIEW_ENTITY_PAGE", - "SEARCH_PRIVILEGE", - "GET_COUNTS_PRIVILEGE", - "GET_TIMESERIES_ASPECT_PRIVILEGE", - "GET_ENTITY_PRIVILEGE", - "GET_TIMELINE_PRIVILEGE"], + "privileges": [ + "VIEW_ENTITY_PAGE", + "SEARCH_PRIVILEGE", + "GET_COUNTS_PRIVILEGE", + "GET_TIMESERIES_ASPECT_PRIVILEGE", + "GET_ENTITY_PRIVILEGE", + "GET_TIMELINE_PRIVILEGE", + ], "actors": { "users": [], "groups": None, @@ -101,10 +111,15 @@ def set_view_entity_profile_privileges_policy_status(status, session): }, } view_entity_response = session.post( - f"{get_frontend_url()}/api/v2/graphql", json=view_entity_page) + f"{get_frontend_url()}/api/v2/graphql", json=view_entity_page + ) view_entity_response.raise_for_status() view_entity_data = view_entity_response.json() - assert view_entity_data["data"]["updatePolicy"] == "urn:li:dataHubPolicy:view-entity-page-all" + assert ( + view_entity_data["data"]["updatePolicy"] + == "urn:li:dataHubPolicy:view-entity-page-all" + ) + def create_user(session, email, password): # Remove user if exists @@ -136,9 +151,7 @@ def create_user(session, email, password): "title": "Data Engineer", "inviteToken": invite_token, } - sign_up_response = session.post( - f"{get_frontend_url()}/signUp", json=sign_up_json - ) + sign_up_response = session.post(f"{get_frontend_url()}/signUp", json=sign_up_json) sign_up_response.raise_for_status() assert sign_up_response assert "error" not in sign_up_response @@ -149,16 +162,6 @@ def create_user(session, email, password): return admin_session -def login_as(username, password): - session = requests.Session() - headers = { - "Content-Type": "application/json", - } - data = '{"username":"' + username + '", "password":"' + password + '"}' - response = session.post(f"{get_frontend_url()}/logIn", headers=headers, data=data) - response.raise_for_status() - return session - def remove_user(session, urn): json = { "query": """mutation removeUser($urn: String!) {\n @@ -170,6 +173,7 @@ def remove_user(session, urn): response.raise_for_status() return response.json() + def create_group(session, name): json = { "query": """mutation createGroup($input: CreateGroupInput!) {\n @@ -185,6 +189,7 @@ def create_group(session, name): assert res_data["data"]["createGroup"] return res_data["data"]["createGroup"] + def remove_group(session, urn): json = { "query": """mutation removeGroup($urn: String!) {\n @@ -200,6 +205,7 @@ def remove_group(session, urn): assert res_data["data"]["removeGroup"] return res_data["data"]["removeGroup"] + def assign_user_to_group(session, group_urn, user_urns): json = { "query": """mutation addGroupMembers($groupUrn: String!, $userUrns: [String!]!) {\n @@ -215,6 +221,7 @@ def assign_user_to_group(session, group_urn, user_urns): assert res_data["data"]["addGroupMembers"] return res_data["data"]["addGroupMembers"] + def assign_role(session, role_urn, actor_urns): json = { "query": """mutation batchAssignRole($input: BatchAssignRoleInput!) {\n @@ -231,6 +238,7 @@ def assign_role(session, role_urn, actor_urns): assert res_data["data"]["batchAssignRole"] return res_data["data"]["batchAssignRole"] + def create_user_policy(user_urn, privileges, session): policy = { "query": """mutation createPolicy($input: PolicyUpdateInput!) {\n @@ -241,7 +249,7 @@ def create_user_policy(user_urn, privileges, session): "name": "Policy Name", "description": "Policy Description", "state": "ACTIVE", - "resources": {"filter":{"criteria":[]}}, + "resources": {"filter": {"criteria": []}}, "privileges": privileges, "actors": { "users": [user_urn], @@ -262,6 +270,7 @@ def create_user_policy(user_urn, privileges, session): assert res_data["data"]["createPolicy"] return res_data["data"]["createPolicy"] + def remove_policy(urn, session): remove_policy_json = { "query": """mutation deletePolicy($urn: String!) {\n @@ -269,11 +278,13 @@ def remove_policy(urn, session): "variables": {"urn": urn}, } - response = session.post(f"{get_frontend_url()}/api/v2/graphql", json=remove_policy_json) + response = session.post( + f"{get_frontend_url()}/api/v2/graphql", json=remove_policy_json + ) response.raise_for_status() res_data = response.json() assert res_data assert res_data["data"] assert res_data["data"]["deletePolicy"] - assert res_data["data"]["deletePolicy"] == urn \ No newline at end of file + assert res_data["data"]["deletePolicy"] == urn diff --git a/smoke-test/tests/read_only/test_services_up.py b/smoke-test/tests/read_only/test_services_up.py index 792a5063d3f8b..4e00f910ceb73 100644 --- a/smoke-test/tests/read_only/test_services_up.py +++ b/smoke-test/tests/read_only/test_services_up.py @@ -1,8 +1,8 @@ import os +import re import pytest import requests -import re from tests.utils import get_gms_url, wait_for_healthcheck_util @@ -14,9 +14,11 @@ def test_services_up(): wait_for_healthcheck_util() + def looks_like_a_short_sha(sha: str) -> bool: return len(sha) == 7 and re.match(r"[0-9a-f]{7}", sha) is not None + @pytest.mark.read_only def test_gms_config_accessible() -> None: gms_config = requests.get(f"{get_gms_url()}/config").json() @@ -33,4 +35,6 @@ def test_gms_config_accessible() -> None: default_cli_version: str = gms_config["managedIngestion"]["defaultCliVersion"] print(f"Default CLI version: {default_cli_version}") assert not default_cli_version.startswith("@") - assert "." in default_cli_version or looks_like_a_short_sha(default_cli_version), "Default CLI version does not look like a version string" + assert "." in default_cli_version or looks_like_a_short_sha( + default_cli_version + ), "Default CLI version does not look like a version string" diff --git a/smoke-test/tests/setup/lineage/ingest_data_job_change.py b/smoke-test/tests/setup/lineage/ingest_data_job_change.py index 588a1625419bc..2746baf89600e 100644 --- a/smoke-test/tests/setup/lineage/ingest_data_job_change.py +++ b/smoke-test/tests/setup/lineage/ingest_data_job_change.py @@ -1,20 +1,31 @@ from typing import List -from datahub.emitter.mce_builder import (make_data_flow_urn, - make_data_job_urn_with_flow, - make_dataset_urn) +from datahub.emitter.mce_builder import ( + make_data_flow_urn, + make_data_job_urn_with_flow, + make_dataset_urn, +) from datahub.emitter.rest_emitter import DatahubRestEmitter -from datahub.metadata.schema_classes import (DateTypeClass, NumberTypeClass, - SchemaFieldDataTypeClass, - StringTypeClass) +from datahub.metadata.schema_classes import ( + DateTypeClass, + NumberTypeClass, + SchemaFieldDataTypeClass, + StringTypeClass, +) -from tests.setup.lineage.constants import (AIRFLOW_DATA_PLATFORM, - SNOWFLAKE_DATA_PLATFORM, - TIMESTAMP_MILLIS_EIGHT_DAYS_AGO, - TIMESTAMP_MILLIS_ONE_DAY_AGO) +from tests.setup.lineage.constants import ( + AIRFLOW_DATA_PLATFORM, + SNOWFLAKE_DATA_PLATFORM, + TIMESTAMP_MILLIS_EIGHT_DAYS_AGO, + TIMESTAMP_MILLIS_ONE_DAY_AGO, +) from tests.setup.lineage.helper_classes import Dataset, Field, Pipeline, Task -from tests.setup.lineage.utils import (create_edge, create_node, - create_nodes_and_edges, emit_mcps) +from tests.setup.lineage.utils import ( + create_edge, + create_node, + create_nodes_and_edges, + emit_mcps, +) # Constants for Case 2 DAILY_TEMPERATURE_DATASET_ID = "climate.daily_temperature" diff --git a/smoke-test/tests/setup/lineage/ingest_dataset_join_change.py b/smoke-test/tests/setup/lineage/ingest_dataset_join_change.py index bb9f51b6b5e9b..4a8da1fcf0588 100644 --- a/smoke-test/tests/setup/lineage/ingest_dataset_join_change.py +++ b/smoke-test/tests/setup/lineage/ingest_dataset_join_change.py @@ -2,17 +2,26 @@ from datahub.emitter.mce_builder import make_dataset_urn from datahub.emitter.rest_emitter import DatahubRestEmitter -from datahub.metadata.schema_classes import (NumberTypeClass, - SchemaFieldDataTypeClass, - StringTypeClass, UpstreamClass) +from datahub.metadata.schema_classes import ( + NumberTypeClass, + SchemaFieldDataTypeClass, + StringTypeClass, + UpstreamClass, +) -from tests.setup.lineage.constants import (DATASET_ENTITY_TYPE, - SNOWFLAKE_DATA_PLATFORM, - TIMESTAMP_MILLIS_EIGHT_DAYS_AGO, - TIMESTAMP_MILLIS_ONE_DAY_AGO) +from tests.setup.lineage.constants import ( + DATASET_ENTITY_TYPE, + SNOWFLAKE_DATA_PLATFORM, + TIMESTAMP_MILLIS_EIGHT_DAYS_AGO, + TIMESTAMP_MILLIS_ONE_DAY_AGO, +) from tests.setup.lineage.helper_classes import Dataset, Field -from tests.setup.lineage.utils import (create_node, create_upstream_edge, - create_upstream_mcp, emit_mcps) +from tests.setup.lineage.utils import ( + create_node, + create_upstream_edge, + create_upstream_mcp, + emit_mcps, +) # Constants for Case 3 GDP_DATASET_ID = "economic_data.gdp" diff --git a/smoke-test/tests/setup/lineage/ingest_input_datasets_change.py b/smoke-test/tests/setup/lineage/ingest_input_datasets_change.py index 6079d7a3d2b63..143c65c082656 100644 --- a/smoke-test/tests/setup/lineage/ingest_input_datasets_change.py +++ b/smoke-test/tests/setup/lineage/ingest_input_datasets_change.py @@ -1,20 +1,30 @@ from typing import List -from datahub.emitter.mce_builder import (make_data_flow_urn, - make_data_job_urn_with_flow, - make_dataset_urn) +from datahub.emitter.mce_builder import ( + make_data_flow_urn, + make_data_job_urn_with_flow, + make_dataset_urn, +) from datahub.emitter.rest_emitter import DatahubRestEmitter -from datahub.metadata.schema_classes import (NumberTypeClass, - SchemaFieldDataTypeClass, - StringTypeClass) +from datahub.metadata.schema_classes import ( + NumberTypeClass, + SchemaFieldDataTypeClass, + StringTypeClass, +) -from tests.setup.lineage.constants import (AIRFLOW_DATA_PLATFORM, - BQ_DATA_PLATFORM, - TIMESTAMP_MILLIS_EIGHT_DAYS_AGO, - TIMESTAMP_MILLIS_ONE_DAY_AGO) +from tests.setup.lineage.constants import ( + AIRFLOW_DATA_PLATFORM, + BQ_DATA_PLATFORM, + TIMESTAMP_MILLIS_EIGHT_DAYS_AGO, + TIMESTAMP_MILLIS_ONE_DAY_AGO, +) from tests.setup.lineage.helper_classes import Dataset, Field, Pipeline, Task -from tests.setup.lineage.utils import (create_edge, create_node, - create_nodes_and_edges, emit_mcps) +from tests.setup.lineage.utils import ( + create_edge, + create_node, + create_nodes_and_edges, + emit_mcps, +) # Constants for Case 1 TRANSACTIONS_DATASET_ID = "transactions.transactions" diff --git a/smoke-test/tests/setup/lineage/ingest_time_lineage.py b/smoke-test/tests/setup/lineage/ingest_time_lineage.py index 3aec979707290..116e6cd63dd9f 100644 --- a/smoke-test/tests/setup/lineage/ingest_time_lineage.py +++ b/smoke-test/tests/setup/lineage/ingest_time_lineage.py @@ -4,11 +4,17 @@ from datahub.emitter.rest_emitter import DatahubRestEmitter from tests.setup.lineage.ingest_data_job_change import ( - get_data_job_change_urns, ingest_data_job_change) + get_data_job_change_urns, + ingest_data_job_change, +) from tests.setup.lineage.ingest_dataset_join_change import ( - get_dataset_join_change_urns, ingest_dataset_join_change) + get_dataset_join_change_urns, + ingest_dataset_join_change, +) from tests.setup.lineage.ingest_input_datasets_change import ( - get_input_datasets_change_urns, ingest_input_datasets_change) + get_input_datasets_change_urns, + ingest_input_datasets_change, +) SERVER = os.getenv("DATAHUB_SERVER") or "http://localhost:8080" TOKEN = os.getenv("DATAHUB_TOKEN") or "" diff --git a/smoke-test/tests/setup/lineage/utils.py b/smoke-test/tests/setup/lineage/utils.py index c72f6ccb89b7a..d4c16ed3b7a21 100644 --- a/smoke-test/tests/setup/lineage/utils.py +++ b/smoke-test/tests/setup/lineage/utils.py @@ -1,29 +1,38 @@ -import datetime from typing import List -from datahub.emitter.mce_builder import (make_data_flow_urn, - make_data_job_urn_with_flow, - make_data_platform_urn, - make_dataset_urn) +from datahub.emitter.mce_builder import ( + make_data_flow_urn, + make_data_job_urn_with_flow, + make_data_platform_urn, + make_dataset_urn, +) from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.emitter.rest_emitter import DatahubRestEmitter from datahub.metadata.com.linkedin.pegasus2avro.dataset import UpstreamLineage -from datahub.metadata.schema_classes import (AuditStampClass, ChangeTypeClass, - DataFlowInfoClass, - DataJobInfoClass, - DataJobInputOutputClass, - DatasetLineageTypeClass, - DatasetPropertiesClass, EdgeClass, - MySqlDDLClass, SchemaFieldClass, - SchemaMetadataClass, - UpstreamClass) - -from tests.setup.lineage.constants import (DATA_FLOW_ENTITY_TYPE, - DATA_FLOW_INFO_ASPECT_NAME, - DATA_JOB_ENTITY_TYPE, - DATA_JOB_INFO_ASPECT_NAME, - DATA_JOB_INPUT_OUTPUT_ASPECT_NAME, - DATASET_ENTITY_TYPE) +from datahub.metadata.com.linkedin.pegasus2avro.mxe import SystemMetadata +from datahub.metadata.schema_classes import ( + AuditStampClass, + ChangeTypeClass, + DataFlowInfoClass, + DataJobInfoClass, + DataJobInputOutputClass, + DatasetLineageTypeClass, + DatasetPropertiesClass, + EdgeClass, + MySqlDDLClass, + SchemaFieldClass, + SchemaMetadataClass, + UpstreamClass, +) + +from tests.setup.lineage.constants import ( + DATA_FLOW_ENTITY_TYPE, + DATA_FLOW_INFO_ASPECT_NAME, + DATA_JOB_ENTITY_TYPE, + DATA_JOB_INFO_ASPECT_NAME, + DATA_JOB_INPUT_OUTPUT_ASPECT_NAME, + DATASET_ENTITY_TYPE, +) from tests.setup.lineage.helper_classes import Dataset, Pipeline @@ -176,17 +185,16 @@ def create_upstream_mcp( run_id: str = "", ) -> MetadataChangeProposalWrapper: print(f"Creating upstreamLineage aspect for {entity_urn}") - timestamp_millis: int = int(datetime.datetime.now().timestamp() * 1000) mcp = MetadataChangeProposalWrapper( entityType=entity_type, entityUrn=entity_urn, changeType=ChangeTypeClass.UPSERT, aspectName="upstreamLineage", aspect=UpstreamLineage(upstreams=upstreams), - systemMetadata={ - "lastObserved": timestamp_millis, - "runId": run_id, - }, + systemMetadata=SystemMetadata( + lastObserved=timestamp_millis, + runId=run_id, + ), ) return mcp diff --git a/smoke-test/tests/structured_properties/test_structured_properties.py b/smoke-test/tests/structured_properties/test_structured_properties.py index 83994776076b0..de85d2af95e03 100644 --- a/smoke-test/tests/structured_properties/test_structured_properties.py +++ b/smoke-test/tests/structured_properties/test_structured_properties.py @@ -1,31 +1,39 @@ import logging import os -from datahub.ingestion.graph.filters import SearchFilterRule -from tests.consistency_utils import wait_for_writes_to_sync import tempfile from random import randint -from tests.utilities.file_emitter import FileEmitter from typing import Iterable, List, Optional, Union import pytest + # import tenacity from datahub.api.entities.dataset.dataset import Dataset -from datahub.api.entities.structuredproperties.structuredproperties import \ - StructuredProperties +from datahub.api.entities.structuredproperties.structuredproperties import ( + StructuredProperties, +) from datahub.emitter.mce_builder import make_dataset_urn, make_schema_field_urn from datahub.emitter.mcp import MetadataChangeProposalWrapper from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph from datahub.metadata.schema_classes import ( - EntityTypeInfoClass, PropertyValueClass, StructuredPropertiesClass, - StructuredPropertyDefinitionClass, StructuredPropertyValueAssignmentClass) + EntityTypeInfoClass, + PropertyValueClass, + StructuredPropertiesClass, + StructuredPropertyDefinitionClass, + StructuredPropertyValueAssignmentClass, +) from datahub.specific.dataset import DatasetPatchBuilder -from datahub.utilities.urns.structured_properties_urn import \ - StructuredPropertyUrn +from datahub.utilities.urns.structured_properties_urn import StructuredPropertyUrn from datahub.utilities.urns.urn import Urn -from tests.utils import (delete_urns, delete_urns_from_file, get_gms_url, - get_sleep_info, ingest_file_via_rest, - wait_for_writes_to_sync) +from tests.consistency_utils import wait_for_writes_to_sync +from tests.utilities.file_emitter import FileEmitter +from tests.utils import ( + delete_urns, + delete_urns_from_file, + get_gms_url, + get_sleep_info, + ingest_file_via_rest, +) logger = logging.getLogger(__name__) @@ -36,8 +44,7 @@ ] schema_field_urns = [ - make_schema_field_urn(dataset_urn, "column_1") - for dataset_urn in dataset_urns + make_schema_field_urn(dataset_urn, "column_1") for dataset_urn in dataset_urns ] generated_urns = [d for d in dataset_urns] + [f for f in schema_field_urns] @@ -45,6 +52,7 @@ default_namespace = "io.acryl.privacy" + def create_logical_entity( entity_name: str, ) -> Iterable[MetadataChangeProposalWrapper]: @@ -66,14 +74,13 @@ def create_test_data(filename: str): file_emitter.close() wait_for_writes_to_sync() + sleep_sec, sleep_times = get_sleep_info() @pytest.fixture(scope="module", autouse=False) def graph() -> DataHubGraph: - graph: DataHubGraph = DataHubGraph( - config=DatahubClientConfig(server=get_gms_url()) - ) + graph: DataHubGraph = DataHubGraph(config=DatahubClientConfig(server=get_gms_url())) return graph @@ -132,7 +139,7 @@ def attach_property_to_entity( property_name: str, property_value: Union[str, float, List[str | float]], graph: DataHubGraph, - namespace: str = default_namespace + namespace: str = default_namespace, ): if isinstance(property_value, list): property_values: List[Union[str, float]] = property_value @@ -159,15 +166,12 @@ def get_property_from_entity( property_name: str, graph: DataHubGraph, ): - structured_properties: Optional[ - StructuredPropertiesClass - ] = graph.get_aspect(urn, StructuredPropertiesClass) + structured_properties: Optional[StructuredPropertiesClass] = graph.get_aspect( + urn, StructuredPropertiesClass + ) assert structured_properties is not None for property in structured_properties.properties: - if ( - property.propertyUrn - == f"urn:li:structuredProperty:{property_name}" - ): + if property.propertyUrn == f"urn:li:structuredProperty:{property_name}": return property.values return None @@ -181,16 +185,14 @@ def test_structured_property_string(ingest_cleanup_data, graph): property_name = "retentionPolicy" create_property_definition(property_name, graph) - generated_urns.append(f"urn:li:structuredProperty:{default_namespace}.retentionPolicy") - - attach_property_to_entity( - dataset_urns[0], property_name, ["30d"], graph=graph + generated_urns.append( + f"urn:li:structuredProperty:{default_namespace}.retentionPolicy" ) + attach_property_to_entity(dataset_urns[0], property_name, ["30d"], graph=graph) + try: - attach_property_to_entity( - dataset_urns[0], property_name, 200030, graph=graph - ) + attach_property_to_entity(dataset_urns[0], property_name, 200030, graph=graph) raise AssertionError( "Should not be able to attach a number to a string property" ) @@ -208,12 +210,12 @@ def test_structured_property_string(ingest_cleanup_data, graph): @pytest.mark.dependency(depends=["test_healthchecks"]) def test_structured_property_double(ingest_cleanup_data, graph): property_name = "expiryTime" - generated_urns.append(f"urn:li:structuredProperty:{default_namespace}.{property_name}") + generated_urns.append( + f"urn:li:structuredProperty:{default_namespace}.{property_name}" + ) create_property_definition(property_name, graph, value_type="number") - attach_property_to_entity( - dataset_urns[0], property_name, 2000034, graph=graph - ) + attach_property_to_entity(dataset_urns[0], property_name, 2000034, graph=graph) try: attach_property_to_entity( @@ -232,9 +234,7 @@ def test_structured_property_double(ingest_cleanup_data, graph): attach_property_to_entity( dataset_urns[0], property_name, [2000034, 2000035], graph=graph ) - raise AssertionError( - "Should not be able to attach a list to a number property" - ) + raise AssertionError("Should not be able to attach a list to a number property") except Exception as e: if not isinstance(e, AssertionError): pass @@ -249,15 +249,15 @@ def test_structured_property_double(ingest_cleanup_data, graph): @pytest.mark.dependency(depends=["test_healthchecks"]) def test_structured_property_double_multiple(ingest_cleanup_data, graph): property_name = "versions" - generated_urns.append(f"urn:li:structuredProperty:{default_namespace}.{property_name}") + generated_urns.append( + f"urn:li:structuredProperty:{default_namespace}.{property_name}" + ) create_property_definition( property_name, graph, value_type="number", cardinality="MULTIPLE" ) - attach_property_to_entity( - dataset_urns[0], property_name, [1.0, 2.0], graph=graph - ) + attach_property_to_entity(dataset_urns[0], property_name, [1.0, 2.0], graph=graph) # @tenacity.retry( @@ -265,11 +265,11 @@ def test_structured_property_double_multiple(ingest_cleanup_data, graph): # wait=tenacity.wait_fixed(sleep_sec), # ) @pytest.mark.dependency(depends=["test_healthchecks"]) -def test_structured_property_string_allowed_values( - ingest_cleanup_data, graph -): +def test_structured_property_string_allowed_values(ingest_cleanup_data, graph): property_name = "enumProperty" - generated_urns.append(f"urn:li:structuredProperty:{default_namespace}.{property_name}") + generated_urns.append( + f"urn:li:structuredProperty:{default_namespace}.{property_name}" + ) create_property_definition( property_name, @@ -301,9 +301,7 @@ def test_structured_property_string_allowed_values( @pytest.mark.dependency(depends=["test_healthchecks"]) -def test_structured_property_definition_evolution( - ingest_cleanup_data, graph -): +def test_structured_property_definition_evolution(ingest_cleanup_data, graph): property_name = "enumProperty1234" create_property_definition( @@ -316,7 +314,9 @@ def test_structured_property_definition_evolution( PropertyValueClass(value="bar"), ], ) - generated_urns.append(f"urn:li:structuredProperty:{default_namespace}.{property_name}") + generated_urns.append( + f"urn:li:structuredProperty:{default_namespace}.{property_name}" + ) try: create_property_definition( @@ -345,9 +345,7 @@ def test_structured_property_definition_evolution( # ) @pytest.mark.dependency(depends=["test_healthchecks"]) def test_structured_property_schema_field(ingest_cleanup_data, graph): - property_name = ( - f"deprecationDate{randint(10, 10000)}" - ) + property_name = f"deprecationDate{randint(10, 10000)}" create_property_definition( property_name, @@ -356,26 +354,31 @@ def test_structured_property_schema_field(ingest_cleanup_data, graph): value_type="date", entity_types=["schemaField"], ) - generated_urns.append(f"urn:li:structuredProperty:io.datahubproject.test.{property_name}") + generated_urns.append( + f"urn:li:structuredProperty:io.datahubproject.test.{property_name}" + ) attach_property_to_entity( - schema_field_urns[0], property_name, "2020-10-01", graph=graph, namespace="io.datahubproject.test" + schema_field_urns[0], + property_name, + "2020-10-01", + graph=graph, + namespace="io.datahubproject.test", ) - assert ( - get_property_from_entity( - schema_field_urns[0], f"io.datahubproject.test.{property_name}", graph=graph - ) - == ["2020-10-01"] - ) + assert get_property_from_entity( + schema_field_urns[0], f"io.datahubproject.test.{property_name}", graph=graph + ) == ["2020-10-01"] try: attach_property_to_entity( - schema_field_urns[0], property_name, 200030, graph=graph, namespace="io.datahubproject.test" - ) - raise AssertionError( - "Should not be able to attach a number to a DATE property" + schema_field_urns[0], + property_name, + 200030, + graph=graph, + namespace="io.datahubproject.test", ) + raise AssertionError("Should not be able to attach a number to a DATE property") except Exception as e: if not isinstance(e, AssertionError): pass @@ -388,49 +391,38 @@ def test_dataset_yaml_loader(ingest_cleanup_data, graph): "tests/structured_properties/test_structured_properties.yaml" ) - for dataset in Dataset.from_yaml( - "tests/structured_properties/test_dataset.yaml" - ): + for dataset in Dataset.from_yaml("tests/structured_properties/test_dataset.yaml"): for mcp in dataset.generate_mcp(): graph.emit(mcp) wait_for_writes_to_sync() property_name = "io.acryl.dataManagement.deprecationDate" - assert ( - get_property_from_entity( - make_schema_field_urn( - make_dataset_urn("hive", "user.clicks"), "ip" - ), - property_name, - graph=graph, - ) - == ["2023-01-01"] - ) + assert get_property_from_entity( + make_schema_field_urn(make_dataset_urn("hive", "user.clicks"), "ip"), + property_name, + graph=graph, + ) == ["2023-01-01"] dataset = Dataset.from_datahub( graph=graph, urn="urn:li:dataset:(urn:li:dataPlatform:hive,user.clicks,PROD)", ) field_name = "ip" + assert dataset.schema_metadata is not None + assert dataset.schema_metadata.fields is not None matching_fields = [ f for f in dataset.schema_metadata.fields - if Dataset._simplify_field_path(f.id) == field_name + if f.id is not None and Dataset._simplify_field_path(f.id) == field_name ] assert len(matching_fields) == 1 - assert ( - matching_fields[0].structured_properties[ - Urn.make_structured_property_urn( - "io.acryl.dataManagement.deprecationDate" - ) - ] - == ["2023-01-01"] - ) + assert matching_fields[0].structured_properties is not None + assert matching_fields[0].structured_properties[ + Urn.make_structured_property_urn("io.acryl.dataManagement.deprecationDate") + ] == ["2023-01-01"] -def test_dataset_structured_property_validation( - ingest_cleanup_data, graph, caplog -): +def test_dataset_structured_property_validation(ingest_cleanup_data, graph, caplog): from datahub.api.entities.dataset.dataset import Dataset property_name = "replicationSLA" @@ -440,7 +432,9 @@ def test_dataset_structured_property_validation( create_property_definition( property_name=property_name, graph=graph, value_type=value_type ) - generated_urns.append(f"urn:li:structuredProperty:{default_namespace}.replicationSLA") + generated_urns.append( + f"urn:li:structuredProperty:{default_namespace}.replicationSLA" + ) attach_property_to_entity( dataset_urns[0], property_name, [property_value], graph=graph @@ -453,21 +447,15 @@ def test_dataset_structured_property_validation( float(property_value), ) - assert ( - Dataset.validate_structured_property("testName", "testValue") is None - ) + assert Dataset.validate_structured_property("testName", "testValue") is None bad_property_value = "2023-09-20" assert ( - Dataset.validate_structured_property( - property_name, bad_property_value - ) - is None + Dataset.validate_structured_property(property_name, bad_property_value) is None ) - -def test_structured_property_search(ingest_cleanup_data, graph: DataHubGraph, caplog): +def test_structured_property_search(ingest_cleanup_data, graph: DataHubGraph, caplog): def to_es_name(property_name, namespace=default_namespace): namespace_field = namespace.replace(".", "_") return f"structuredProperties.{namespace_field}_{property_name}" @@ -478,88 +466,116 @@ def to_es_name(property_name, namespace=default_namespace): create_property_definition( namespace="io.datahubproject.test", property_name=field_property_name, - graph=graph, value_type="date", entity_types=["schemaField"] + graph=graph, + value_type="date", + entity_types=["schemaField"], + ) + generated_urns.append( + f"urn:li:structuredProperty:io.datahubproject.test.{field_property_name}" ) - generated_urns.append(f"urn:li:structuredProperty:io.datahubproject.test.{field_property_name}") attach_property_to_entity( - schema_field_urns[0], field_property_name, "2020-10-01", graph=graph, namespace="io.datahubproject.test" + schema_field_urns[0], + field_property_name, + "2020-10-01", + graph=graph, + namespace="io.datahubproject.test", ) dataset_property_name = "replicationSLA" property_value = 30 value_type = "number" - create_property_definition(property_name=dataset_property_name, graph=graph, value_type=value_type) - generated_urns.append(f"urn:li:structuredProperty:{default_namespace}.{dataset_property_name}") + create_property_definition( + property_name=dataset_property_name, graph=graph, value_type=value_type + ) + generated_urns.append( + f"urn:li:structuredProperty:{default_namespace}.{dataset_property_name}" + ) - attach_property_to_entity(dataset_urns[0], dataset_property_name, [property_value], graph=graph) + attach_property_to_entity( + dataset_urns[0], dataset_property_name, [property_value], graph=graph + ) # [] = default entities which includes datasets, does not include fields - entity_urns = list(graph.get_urns_by_filter(extraFilters=[ - { - "field": to_es_name(dataset_property_name), - "negated": "false", - "condition": "EXISTS", - } - ])) + entity_urns = list( + graph.get_urns_by_filter( + extraFilters=[ + { + "field": to_es_name(dataset_property_name), + "negated": "false", + "condition": "EXISTS", + } + ] + ) + ) assert len(entity_urns) == 1 assert entity_urns[0] == dataset_urns[0] # Search over schema field specifically - field_structured_prop = graph.get_aspect(entity_urn=schema_field_urns[0], aspect_type=StructuredPropertiesClass) + field_structured_prop = graph.get_aspect( + entity_urn=schema_field_urns[0], aspect_type=StructuredPropertiesClass + ) assert field_structured_prop == StructuredPropertiesClass( properties=[ StructuredPropertyValueAssignmentClass( propertyUrn=f"urn:li:structuredProperty:io.datahubproject.test.{field_property_name}", - values=["2020-10-01"] + values=["2020-10-01"], ) ] ) # Search over entities that do not include the field - field_urns = list(graph.get_urns_by_filter(entity_types=["tag"], - extraFilters=[ - { - "field": to_es_name(field_property_name, - namespace="io.datahubproject.test"), - "negated": "false", - "condition": "EXISTS", - } - ])) + field_urns = list( + graph.get_urns_by_filter( + entity_types=["tag"], + extraFilters=[ + { + "field": to_es_name( + field_property_name, namespace="io.datahubproject.test" + ), + "negated": "false", + "condition": "EXISTS", + } + ], + ) + ) assert len(field_urns) == 0 # OR the two properties together to return both results - field_urns = list(graph.get_urns_by_filter(entity_types=["dataset", "tag"], - extraFilters=[ - { - "field": to_es_name(dataset_property_name), - "negated": "false", - "condition": "EXISTS", - } - ])) + field_urns = list( + graph.get_urns_by_filter( + entity_types=["dataset", "tag"], + extraFilters=[ + { + "field": to_es_name(dataset_property_name), + "negated": "false", + "condition": "EXISTS", + } + ], + ) + ) assert len(field_urns) == 1 assert dataset_urns[0] in field_urns -def test_dataset_structured_property_patch( - ingest_cleanup_data, graph, caplog -): +@pytest.mark.skip(reason="Functionality and test needs to be validated for correctness") +def test_dataset_structured_property_patch(ingest_cleanup_data, graph, caplog): property_name = "replicationSLA" property_value = 30 value_type = "number" create_property_definition( - property_name=property_name, - graph=graph, - value_type=value_type + property_name=property_name, graph=graph, value_type=value_type ) - dataset_patcher: DatasetPatchBuilder = DatasetPatchBuilder( - urn=dataset_urns[0] - ) + dataset_patcher: DatasetPatchBuilder = DatasetPatchBuilder(urn=dataset_urns[0]) - dataset_patcher.set_structured_property(StructuredPropertyUrn.make_structured_property_urn( - f"{default_namespace}.{property_name}"), property_value) + dataset_patcher.set_structured_property( + StructuredPropertyUrn.make_structured_property_urn( + f"{default_namespace}.{property_name}" + ), + property_value, + ) for mcp in dataset_patcher.build(): graph.emit(mcp) @@ -567,11 +583,12 @@ def test_dataset_structured_property_patch( dataset = Dataset.from_datahub(graph=graph, urn=dataset_urns[0]) assert dataset.structured_properties is not None - assert ( - [int(float(k)) for k in dataset.structured_properties[ + assert isinstance(dataset.structured_properties, list) + assert [ + int(float(k)) + for k in dataset.structured_properties[ StructuredPropertyUrn.make_structured_property_urn( f"{default_namespace}.{property_name}" ) - ]] - == [property_value] - ) + ] + ] == [property_value] diff --git a/smoke-test/tests/tags_and_terms/__init__.py b/smoke-test/tests/tags_and_terms/__init__.py new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/smoke-test/tests/tags-and-terms/data.json b/smoke-test/tests/tags_and_terms/data.json similarity index 100% rename from smoke-test/tests/tags-and-terms/data.json rename to smoke-test/tests/tags_and_terms/data.json diff --git a/smoke-test/tests/tags-and-terms/tags_and_terms_test.py b/smoke-test/tests/tags_and_terms/tags_and_terms_test.py similarity index 98% rename from smoke-test/tests/tags-and-terms/tags_and_terms_test.py rename to smoke-test/tests/tags_and_terms/tags_and_terms_test.py index 6ac75765286f0..34404a1ddff59 100644 --- a/smoke-test/tests/tags-and-terms/tags_and_terms_test.py +++ b/smoke-test/tests/tags_and_terms/tags_and_terms_test.py @@ -1,16 +1,20 @@ import pytest -from tests.utils import (delete_urns_from_file, get_frontend_url, - ingest_file_via_rest, wait_for_healthcheck_util) +from tests.utils import ( + delete_urns_from_file, + get_frontend_url, + ingest_file_via_rest, + wait_for_healthcheck_util, +) @pytest.fixture(scope="module", autouse=True) def ingest_cleanup_data(request): print("ingesting test data") - ingest_file_via_rest("tests/tags-and-terms/data.json") + ingest_file_via_rest("tests/tags_and_terms/data.json") yield print("removing test data") - delete_urns_from_file("tests/tags-and-terms/data.json") + delete_urns_from_file("tests/tags_and_terms/data.json") @pytest.fixture(scope="session") diff --git a/smoke-test/tests/telemetry/telemetry_test.py b/smoke-test/tests/telemetry/telemetry_test.py index b7cd6fa0517df..963d85baef3bb 100644 --- a/smoke-test/tests/telemetry/telemetry_test.py +++ b/smoke-test/tests/telemetry/telemetry_test.py @@ -5,7 +5,9 @@ def test_no_client_id(): client_id_urn = "urn:li:telemetry:clientId" - aspect = ["clientId"] # this is checking for the removal of the invalid aspect RemoveClientIdAspectStep.java + aspect = [ + "clientId" + ] # this is checking for the removal of the invalid aspect RemoveClientIdAspectStep.java res_data = json.dumps( get_aspects_for_entity(entity_urn=client_id_urn, aspects=aspect, typed=False) diff --git a/smoke-test/tests/test_stateful_ingestion.py b/smoke-test/tests/test_stateful_ingestion.py index c6adb402e5d51..5eac25059ec62 100644 --- a/smoke-test/tests/test_stateful_ingestion.py +++ b/smoke-test/tests/test_stateful_ingestion.py @@ -4,15 +4,19 @@ from datahub.ingestion.run.pipeline import Pipeline from datahub.ingestion.source.sql.mysql import MySQLConfig, MySQLSource from datahub.ingestion.source.state.checkpoint import Checkpoint -from datahub.ingestion.source.state.entity_removal_state import \ - GenericCheckpointState -from datahub.ingestion.source.state.stale_entity_removal_handler import \ - StaleEntityRemovalHandler +from datahub.ingestion.source.state.entity_removal_state import GenericCheckpointState +from datahub.ingestion.source.state.stale_entity_removal_handler import ( + StaleEntityRemovalHandler, +) from sqlalchemy import create_engine from sqlalchemy.sql import text -from tests.utils import (get_gms_url, get_mysql_password, get_mysql_url, - get_mysql_username) +from tests.utils import ( + get_gms_url, + get_mysql_password, + get_mysql_url, + get_mysql_username, +) def test_stateful_ingestion(wait_for_healthchecks): diff --git a/smoke-test/tests/tests/tests_test.py b/smoke-test/tests/tests/tests_test.py index 213a2ea087b7a..28005c8397d0d 100644 --- a/smoke-test/tests/tests/tests_test.py +++ b/smoke-test/tests/tests/tests_test.py @@ -1,9 +1,13 @@ import pytest import tenacity -from tests.utils import (delete_urns_from_file, get_frontend_url, - get_sleep_info, ingest_file_via_rest, - wait_for_healthcheck_util) +from tests.utils import ( + delete_urns_from_file, + get_frontend_url, + get_sleep_info, + ingest_file_via_rest, + wait_for_healthcheck_util, +) sleep_sec, sleep_times = get_sleep_info() @@ -37,7 +41,6 @@ def test_healthchecks(wait_for_healthchecks): def create_test(frontend_session): - # Create new Test create_test_json = { "query": """mutation createTest($input: CreateTestInput!) {\n @@ -84,7 +87,6 @@ def delete_test(frontend_session, test_urn): @pytest.mark.dependency(depends=["test_healthchecks"]) def test_create_test(frontend_session, wait_for_healthchecks): - test_urn = create_test(frontend_session) # Get the test diff --git a/smoke-test/tests/timeline/timeline_test.py b/smoke-test/tests/timeline/timeline_test.py index c075d981487db..f8a0e425c3781 100644 --- a/smoke-test/tests/timeline/timeline_test.py +++ b/smoke-test/tests/timeline/timeline_test.py @@ -1,15 +1,14 @@ import json -import pytest -from time import sleep +import pytest from datahub.cli import timeline_cli from datahub.cli.cli_utils import guess_entity_type, post_entity -from tests.utils import (get_datahub_graph, ingest_file_via_rest, - wait_for_writes_to_sync) +from tests.utils import get_datahub_graph, ingest_file_via_rest, wait_for_writes_to_sync pytestmark = pytest.mark.no_cypress_suite1 + def test_all(): platform = "urn:li:dataPlatform:kafka" dataset_name = "test-timeline-sample-kafka" @@ -184,7 +183,7 @@ def put(urn: str, aspect: str, aspect_data: str) -> None: entity_type = guess_entity_type(urn) with open(aspect_data) as fp: aspect_obj = json.load(fp) - status = post_entity( + post_entity( urn=urn, aspect_name=aspect, entity_type=entity_type, diff --git a/smoke-test/tests/tokens/revokable_access_token_test.py b/smoke-test/tests/tokens/revokable_access_token_test.py index 55f3de594af4e..10332b32b9caf 100644 --- a/smoke-test/tests/tokens/revokable_access_token_test.py +++ b/smoke-test/tests/tokens/revokable_access_token_test.py @@ -1,11 +1,14 @@ import os -from time import sleep import pytest -import requests -from tests.utils import (get_admin_credentials, get_frontend_url, - wait_for_healthcheck_util, wait_for_writes_to_sync) +from tests.utils import ( + get_admin_credentials, + get_frontend_url, + login_as, + wait_for_healthcheck_util, + wait_for_writes_to_sync, +) # Disable telemetry os.environ["DATAHUB_TELEMETRY_ENABLED"] = "false" @@ -29,7 +32,7 @@ def test_healthchecks(wait_for_healthchecks): @pytest.fixture(scope="class", autouse=True) def custom_user_setup(): """Fixture to execute setup before and tear down after all tests are run""" - admin_session = loginAs(admin_user, admin_pass) + admin_session = login_as(admin_user, admin_pass) res_data = removeUser(admin_session, "urn:li:corpuser:user") assert res_data @@ -77,7 +80,7 @@ def custom_user_setup(): # signUp will override the session cookie to the new user to be signed up. admin_session.cookies.clear() - admin_session = loginAs(admin_user, admin_pass) + admin_session = login_as(admin_user, admin_pass) # Make user created user is there. res_data = listUsers(admin_session) @@ -91,7 +94,7 @@ def custom_user_setup(): res_data = removeUser(admin_session, "urn:li:corpuser:user") assert res_data assert res_data["data"] - assert res_data["data"]["removeUser"] == True + assert res_data["data"]["removeUser"] is True # Sleep for eventual consistency wait_for_writes_to_sync() @@ -106,7 +109,7 @@ def custom_user_setup(): @pytest.fixture(autouse=True) def access_token_setup(): """Fixture to execute asserts before and after a test is run""" - admin_session = loginAs(admin_user, admin_pass) + admin_session = login_as(admin_user, admin_pass) res_data = listAccessTokens(admin_session) assert res_data @@ -127,7 +130,7 @@ def access_token_setup(): @pytest.mark.dependency(depends=["test_healthchecks"]) def test_admin_can_create_list_and_revoke_tokens(wait_for_healthchecks): - admin_session = loginAs(admin_user, admin_pass) + admin_session = login_as(admin_user, admin_pass) # Using a super account, there should be no tokens res_data = listAccessTokens(admin_session) @@ -170,7 +173,7 @@ def test_admin_can_create_list_and_revoke_tokens(wait_for_healthchecks): assert res_data assert res_data["data"] assert res_data["data"]["revokeAccessToken"] - assert res_data["data"]["revokeAccessToken"] == True + assert res_data["data"]["revokeAccessToken"] is True # Sleep for eventual consistency wait_for_writes_to_sync() @@ -184,7 +187,7 @@ def test_admin_can_create_list_and_revoke_tokens(wait_for_healthchecks): @pytest.mark.dependency(depends=["test_healthchecks"]) def test_admin_can_create_and_revoke_tokens_for_other_user(wait_for_healthchecks): - admin_session = loginAs(admin_user, admin_pass) + admin_session = login_as(admin_user, admin_pass) # Using a super account, there should be no tokens res_data = listAccessTokens(admin_session) @@ -227,7 +230,7 @@ def test_admin_can_create_and_revoke_tokens_for_other_user(wait_for_healthchecks assert res_data assert res_data["data"] assert res_data["data"]["revokeAccessToken"] - assert res_data["data"]["revokeAccessToken"] == True + assert res_data["data"]["revokeAccessToken"] is True # Sleep for eventual consistency wait_for_writes_to_sync() @@ -241,7 +244,7 @@ def test_admin_can_create_and_revoke_tokens_for_other_user(wait_for_healthchecks @pytest.mark.dependency(depends=["test_healthchecks"]) def test_non_admin_can_create_list_revoke_tokens(wait_for_healthchecks): - user_session = loginAs("user", "user") + user_session = login_as("user", "user") # Normal user should be able to generate token for himself. res_data = generateAccessToken_v2(user_session, "urn:li:corpuser:user") @@ -280,7 +283,7 @@ def test_non_admin_can_create_list_revoke_tokens(wait_for_healthchecks): assert res_data assert res_data["data"] assert res_data["data"]["revokeAccessToken"] - assert res_data["data"]["revokeAccessToken"] == True + assert res_data["data"]["revokeAccessToken"] is True # Sleep for eventual consistency wait_for_writes_to_sync() @@ -296,7 +299,7 @@ def test_non_admin_can_create_list_revoke_tokens(wait_for_healthchecks): @pytest.mark.dependency(depends=["test_healthchecks"]) def test_admin_can_manage_tokens_generated_by_other_user(wait_for_healthchecks): - admin_session = loginAs(admin_user, admin_pass) + admin_session = login_as(admin_user, admin_pass) # Using a super account, there should be no tokens res_data = listAccessTokens(admin_session) @@ -306,7 +309,7 @@ def test_admin_can_manage_tokens_generated_by_other_user(wait_for_healthchecks): assert len(res_data["data"]["listAccessTokens"]["tokens"]) == 0 admin_session.cookies.clear() - user_session = loginAs("user", "user") + user_session = login_as("user", "user") res_data = generateAccessToken_v2(user_session, "urn:li:corpuser:user") assert res_data assert res_data["data"] @@ -326,7 +329,7 @@ def test_admin_can_manage_tokens_generated_by_other_user(wait_for_healthchecks): # Admin should be able to list other tokens user_session.cookies.clear() - admin_session = loginAs(admin_user, admin_pass) + admin_session = login_as(admin_user, admin_pass) res_data = listAccessTokens( admin_session, [{"field": "ownerUrn", "values": ["urn:li:corpuser:user"]}] ) @@ -346,18 +349,18 @@ def test_admin_can_manage_tokens_generated_by_other_user(wait_for_healthchecks): # Admin can delete token created by someone else. admin_session.cookies.clear() - admin_session = loginAs(admin_user, admin_pass) + admin_session = login_as(admin_user, admin_pass) res_data = revokeAccessToken(admin_session, user_tokenId) assert res_data assert res_data["data"] assert res_data["data"]["revokeAccessToken"] - assert res_data["data"]["revokeAccessToken"] == True + assert res_data["data"]["revokeAccessToken"] is True # Sleep for eventual consistency wait_for_writes_to_sync() # Using a normal account, check that all its tokens where removed. user_session.cookies.clear() - user_session = loginAs("user", "user") + user_session = login_as("user", "user") res_data = listAccessTokens( user_session, [{"field": "ownerUrn", "values": ["urn:li:corpuser:user"]}] ) @@ -367,7 +370,7 @@ def test_admin_can_manage_tokens_generated_by_other_user(wait_for_healthchecks): assert len(res_data["data"]["listAccessTokens"]["tokens"]) == 0 # Using the super account, check that all tokens where removed. - admin_session = loginAs(admin_user, admin_pass) + admin_session = login_as(admin_user, admin_pass) res_data = listAccessTokens( admin_session, [{"field": "ownerUrn", "values": ["urn:li:corpuser:user"]}] ) @@ -379,7 +382,7 @@ def test_admin_can_manage_tokens_generated_by_other_user(wait_for_healthchecks): @pytest.mark.dependency(depends=["test_healthchecks"]) def test_non_admin_can_not_generate_tokens_for_others(wait_for_healthchecks): - user_session = loginAs("user", "user") + user_session = login_as("user", "user") # Normal user should not be able to generate token for another user res_data = generateAccessToken_v2(user_session, f"urn:li:corpuser:{admin_user}") assert res_data @@ -467,19 +470,6 @@ def revokeAccessToken(session, tokenId): return response.json() -def loginAs(username, password): - session = requests.Session() - - headers = { - "Content-Type": "application/json", - } - data = '{"username":"' + username + '", "password":"' + password + '"}' - response = session.post(f"{get_frontend_url()}/logIn", headers=headers, data=data) - response.raise_for_status() - - return session - - def removeUser(session, urn): # Remove user json = { diff --git a/smoke-test/tests/utilities/file_emitter.py b/smoke-test/tests/utilities/file_emitter.py index 27a91c360af8a..ddbcff8db31d8 100644 --- a/smoke-test/tests/utilities/file_emitter.py +++ b/smoke-test/tests/utilities/file_emitter.py @@ -1,11 +1,14 @@ -from datahub.ingestion.sink.file import FileSink, FileSinkConfig +import time + from datahub.ingestion.api.common import PipelineContext, RecordEnvelope from datahub.ingestion.api.sink import NoopWriteCallback -import time +from datahub.ingestion.sink.file import FileSink, FileSinkConfig class FileEmitter: - def __init__(self, filename: str, run_id: str = f"test_{int(time.time()*1000.0)}") -> None: + def __init__( + self, filename: str, run_id: str = f"test_{int(time.time()*1000.0)}" + ) -> None: self.sink: FileSink = FileSink( ctx=PipelineContext(run_id=run_id), config=FileSinkConfig(filename=filename), @@ -18,4 +21,4 @@ def emit(self, event): ) def close(self): - self.sink.close() \ No newline at end of file + self.sink.close() diff --git a/smoke-test/tests/utils.py b/smoke-test/tests/utils.py index bd75b13d1910f..29b956bde9ab8 100644 --- a/smoke-test/tests/utils.py +++ b/smoke-test/tests/utils.py @@ -2,14 +2,10 @@ import json import logging import os -import subprocess -import time from datetime import datetime, timedelta, timezone -from time import sleep from typing import Any, Dict, List, Tuple -from datahub.cli import cli_utils -from datahub.cli.cli_utils import get_system_auth +from datahub.cli import cli_utils, env_utils from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph from datahub.ingestion.run.pipeline import Pipeline from joblib import Parallel, delayed @@ -22,23 +18,14 @@ def get_frontend_session(): - session = requests.Session() + username, password = get_admin_credentials() + return login_as(username, password) - headers = { - "Content-Type": "application/json", - } - system_auth = get_system_auth() - if system_auth is not None: - session.headers.update({"Authorization": system_auth}) - else: - username, password = get_admin_credentials() - data = '{"username":"' + username + '", "password":"' + password + '"}' - response = session.post( - f"{get_frontend_url()}/logIn", headers=headers, data=data - ) - response.raise_for_status() - return session +def login_as(username: str, password: str): + return cli_utils.get_session_login_as( + username=username, password=password, frontend_url=get_frontend_url() + ) def get_admin_username() -> str: @@ -146,7 +133,7 @@ def delete_urns(urns: List[str]) -> None: def delete_urns_from_file(filename: str, shared_data: bool = False) -> None: - if not cli_utils.get_boolean_env_variable("CLEANUP_DATA", True): + if not env_utils.get_boolean_env_variable("CLEANUP_DATA", True): print("Not cleaning data to save time") return session = requests.Session() @@ -223,7 +210,7 @@ def create_datahub_step_state_aspect( def create_datahub_step_state_aspects( - username: str, onboarding_ids: str, onboarding_filename + username: str, onboarding_ids: List[str], onboarding_filename: str ) -> None: """ For a specific user, creates dataHubStepState aspects for each onboarding id in the list diff --git a/smoke-test/tests/views/views_test.py b/smoke-test/tests/views/views_test.py index 685c3bd80b04d..a99f1f0dbb245 100644 --- a/smoke-test/tests/views/views_test.py +++ b/smoke-test/tests/views/views_test.py @@ -1,10 +1,7 @@ -import time - import pytest import tenacity -from tests.utils import (delete_urns_from_file, get_frontend_url, get_gms_url, - get_sleep_info, ingest_file_via_rest) +from tests.utils import get_frontend_url, get_sleep_info sleep_sec, sleep_times = get_sleep_info() @@ -19,7 +16,6 @@ def test_healthchecks(wait_for_healthchecks): stop=tenacity.stop_after_attempt(sleep_times), wait=tenacity.wait_fixed(sleep_sec) ) def _ensure_more_views(frontend_session, list_views_json, query_name, before_count): - # Get new count of Views response = frontend_session.post( f"{get_frontend_url()}/api/v2/graphql", json=list_views_json @@ -43,7 +39,6 @@ def _ensure_more_views(frontend_session, list_views_json, query_name, before_cou stop=tenacity.stop_after_attempt(sleep_times), wait=tenacity.wait_fixed(sleep_sec) ) def _ensure_less_views(frontend_session, list_views_json, query_name, before_count): - # Get new count of Views response = frontend_session.post( f"{get_frontend_url()}/api/v2/graphql", json=list_views_json @@ -64,7 +59,6 @@ def _ensure_less_views(frontend_session, list_views_json, query_name, before_cou @pytest.mark.dependency(depends=["test_healthchecks"]) def test_create_list_delete_global_view(frontend_session): - # Get count of existing views list_global_views_json = { "query": """query listGlobalViews($input: ListGlobalViewsInput!) {\n @@ -161,8 +155,6 @@ def test_create_list_delete_global_view(frontend_session): before_count=before_count, ) - delete_json = {"urn": view_urn} - # Delete the View delete_view_json = { "query": """mutation deleteView($urn: String!) {\n @@ -190,7 +182,6 @@ def test_create_list_delete_global_view(frontend_session): depends=["test_healthchecks", "test_create_list_delete_global_view"] ) def test_create_list_delete_personal_view(frontend_session): - # Get count of existing views list_my_views_json = { "query": """query listMyViews($input: ListMyViewsInput!) {\n @@ -314,7 +305,6 @@ def test_create_list_delete_personal_view(frontend_session): depends=["test_healthchecks", "test_create_list_delete_personal_view"] ) def test_update_global_view(frontend_session): - # First create a view new_view_name = "Test View" new_view_description = "Test Description"