Skip to content

Commit

Permalink
fix(lint): run black, isort
Browse files Browse the repository at this point in the history
  • Loading branch information
anshbansal committed Oct 10, 2023
1 parent 57f855e commit 3871fc9
Show file tree
Hide file tree
Showing 35 changed files with 457 additions and 453 deletions.
33 changes: 17 additions & 16 deletions smoke-test/tests/assertions/assertions_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,28 +2,29 @@
import urllib

import pytest
import requests_wrapper as requests
import tenacity
from datahub.emitter.mce_builder import make_dataset_urn, make_schema_field_urn
from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.ingestion.api.common import PipelineContext, RecordEnvelope
from datahub.ingestion.api.sink import NoopWriteCallback
from datahub.ingestion.sink.file import FileSink, FileSinkConfig
from datahub.metadata.com.linkedin.pegasus2avro.assertion import AssertionStdAggregation
from datahub.metadata.schema_classes import (
AssertionInfoClass,
AssertionResultClass,
AssertionResultTypeClass,
AssertionRunEventClass,
AssertionRunStatusClass,
AssertionStdOperatorClass,
AssertionTypeClass,
DatasetAssertionInfoClass,
DatasetAssertionScopeClass,
PartitionSpecClass,
PartitionTypeClass,
)
from tests.utils import delete_urns_from_file, get_gms_url, ingest_file_via_rest, wait_for_healthcheck_util, get_sleep_info
from datahub.metadata.com.linkedin.pegasus2avro.assertion import \
AssertionStdAggregation
from datahub.metadata.schema_classes import (AssertionInfoClass,
AssertionResultClass,
AssertionResultTypeClass,
AssertionRunEventClass,
AssertionRunStatusClass,
AssertionStdOperatorClass,
AssertionTypeClass,
DatasetAssertionInfoClass,
DatasetAssertionScopeClass,
PartitionSpecClass,
PartitionTypeClass)

import requests_wrapper as requests
from tests.utils import (delete_urns_from_file, get_gms_url, get_sleep_info,
ingest_file_via_rest, wait_for_healthcheck_util)

restli_default_headers = {
"X-RestLi-Protocol-Version": "2.0.0",
Expand Down
51 changes: 35 additions & 16 deletions smoke-test/tests/browse/browse_test.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import time

import pytest
import requests_wrapper as requests
from tests.utils import delete_urns_from_file, get_frontend_url, ingest_file_via_rest

import requests_wrapper as requests
from tests.utils import (delete_urns_from_file, get_frontend_url,
ingest_file_via_rest)

TEST_DATASET_1_URN = "urn:li:dataset:(urn:li:dataPlatform:kafka,test-browse-1,PROD)"
TEST_DATASET_2_URN = "urn:li:dataset:(urn:li:dataPlatform:kafka,test-browse-2,PROD)"
Expand Down Expand Up @@ -51,7 +52,9 @@ def test_get_browse_paths(frontend_session, ingest_cleanup_data):
# /prod -- There should be one entity
get_browse_paths_json = {
"query": get_browse_paths_query,
"variables": {"input": { "type": "DATASET", "path": ["prod"], "start": 0, "count": 100 } },
"variables": {
"input": {"type": "DATASET", "path": ["prod"], "start": 0, "count": 100}
},
}

response = frontend_session.post(
Expand All @@ -67,12 +70,19 @@ def test_get_browse_paths(frontend_session, ingest_cleanup_data):

browse = res_data["data"]["browse"]
print(browse)
assert browse["entities"] == [{ "urn": TEST_DATASET_3_URN }]
assert browse["entities"] == [{"urn": TEST_DATASET_3_URN}]

# /prod/kafka1
get_browse_paths_json = {
"query": get_browse_paths_query,
"variables": {"input": { "type": "DATASET", "path": ["prod", "kafka1"], "start": 0, "count": 10 } },
"variables": {
"input": {
"type": "DATASET",
"path": ["prod", "kafka1"],
"start": 0,
"count": 10,
}
},
}

response = frontend_session.post(
Expand All @@ -88,16 +98,27 @@ def test_get_browse_paths(frontend_session, ingest_cleanup_data):

browse = res_data["data"]["browse"]
assert browse == {
"total": 3,
"entities": [{ "urn": TEST_DATASET_1_URN }, { "urn": TEST_DATASET_2_URN }, { "urn": TEST_DATASET_3_URN }],
"groups": [],
"metadata": { "path": ["prod", "kafka1"], "totalNumEntities": 0 }
"total": 3,
"entities": [
{"urn": TEST_DATASET_1_URN},
{"urn": TEST_DATASET_2_URN},
{"urn": TEST_DATASET_3_URN},
],
"groups": [],
"metadata": {"path": ["prod", "kafka1"], "totalNumEntities": 0},
}

# /prod/kafka2
get_browse_paths_json = {
"query": get_browse_paths_query,
"variables": {"input": { "type": "DATASET", "path": ["prod", "kafka2"], "start": 0, "count": 10 } },
"variables": {
"input": {
"type": "DATASET",
"path": ["prod", "kafka2"],
"start": 0,
"count": 10,
}
},
}

response = frontend_session.post(
Expand All @@ -113,10 +134,8 @@ def test_get_browse_paths(frontend_session, ingest_cleanup_data):

browse = res_data["data"]["browse"]
assert browse == {
"total": 2,
"entities": [{ "urn": TEST_DATASET_1_URN }, { "urn": TEST_DATASET_2_URN }],
"groups": [],
"metadata": { "path": ["prod", "kafka2"], "totalNumEntities": 0 }
"total": 2,
"entities": [{"urn": TEST_DATASET_1_URN}, {"urn": TEST_DATASET_2_URN}],
"groups": [],
"metadata": {"path": ["prod", "kafka2"], "totalNumEntities": 0},
}


76 changes: 57 additions & 19 deletions smoke-test/tests/cli/datahub-cli.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import json
import pytest
from time import sleep
from datahub.cli.cli_utils import guess_entity_type, post_entity, get_aspects_for_entity

import pytest
from datahub.cli.cli_utils import (get_aspects_for_entity, guess_entity_type,
post_entity)
from datahub.cli.ingest_cli import get_session_and_host, rollback

from tests.utils import ingest_file_via_rest, wait_for_writes_to_sync

ingested_dataset_run_id = ""
Expand All @@ -24,58 +27,93 @@ def test_setup():

session, gms_host = get_session_and_host()

assert "browsePaths" not in get_aspects_for_entity(entity_urn=dataset_urn, aspects=["browsePaths"], typed=False)
assert "editableDatasetProperties" not in get_aspects_for_entity(entity_urn=dataset_urn, aspects=["editableDatasetProperties"], typed=False)
assert "browsePaths" not in get_aspects_for_entity(
entity_urn=dataset_urn, aspects=["browsePaths"], typed=False
)
assert "editableDatasetProperties" not in get_aspects_for_entity(
entity_urn=dataset_urn, aspects=["editableDatasetProperties"], typed=False
)

ingested_dataset_run_id = ingest_file_via_rest("tests/cli/cli_test_data.json").config.run_id
ingested_dataset_run_id = ingest_file_via_rest(
"tests/cli/cli_test_data.json"
).config.run_id
print("Setup ingestion id: " + ingested_dataset_run_id)

assert "browsePaths" in get_aspects_for_entity(entity_urn=dataset_urn, aspects=["browsePaths"], typed=False)
assert "browsePaths" in get_aspects_for_entity(
entity_urn=dataset_urn, aspects=["browsePaths"], typed=False
)

yield

# Clean up
rollback_url = f"{gms_host}/runs?action=rollback"

session.post(rollback_url, data=json.dumps({"runId": ingested_editable_run_id, "dryRun": False, "hardDelete": True}))
session.post(rollback_url, data=json.dumps({"runId": ingested_dataset_run_id, "dryRun": False, "hardDelete": True}))
session.post(
rollback_url,
data=json.dumps(
{"runId": ingested_editable_run_id, "dryRun": False, "hardDelete": True}
),
)
session.post(
rollback_url,
data=json.dumps(
{"runId": ingested_dataset_run_id, "dryRun": False, "hardDelete": True}
),
)

assert "browsePaths" not in get_aspects_for_entity(entity_urn=dataset_urn, aspects=["browsePaths"], typed=False)
assert "editableDatasetProperties" not in get_aspects_for_entity(entity_urn=dataset_urn, aspects=["editableDatasetProperties"], typed=False)
assert "browsePaths" not in get_aspects_for_entity(
entity_urn=dataset_urn, aspects=["browsePaths"], typed=False
)
assert "editableDatasetProperties" not in get_aspects_for_entity(
entity_urn=dataset_urn, aspects=["editableDatasetProperties"], typed=False
)


@pytest.mark.dependency()
def test_rollback_editable():
global ingested_dataset_run_id
global ingested_editable_run_id
platform = "urn:li:dataPlatform:kafka"
dataset_name = (
"test-rollback"
)
dataset_name = "test-rollback"
env = "PROD"
dataset_urn = f"urn:li:dataset:({platform},{dataset_name},{env})"

session, gms_host = get_session_and_host()

print("Ingested dataset id:", ingested_dataset_run_id)
# Assert that second data ingestion worked
assert "browsePaths" in get_aspects_for_entity(entity_urn=dataset_urn, aspects=["browsePaths"], typed=False)
assert "browsePaths" in get_aspects_for_entity(
entity_urn=dataset_urn, aspects=["browsePaths"], typed=False
)

# Make editable change
ingested_editable_run_id = ingest_file_via_rest("tests/cli/cli_editable_test_data.json").config.run_id
ingested_editable_run_id = ingest_file_via_rest(
"tests/cli/cli_editable_test_data.json"
).config.run_id
print("ingested editable id:", ingested_editable_run_id)
# Assert that second data ingestion worked
assert "editableDatasetProperties" in get_aspects_for_entity(entity_urn=dataset_urn, aspects=["editableDatasetProperties"], typed=False)
assert "editableDatasetProperties" in get_aspects_for_entity(
entity_urn=dataset_urn, aspects=["editableDatasetProperties"], typed=False
)

# rollback ingestion 1
rollback_url = f"{gms_host}/runs?action=rollback"

session.post(rollback_url, data=json.dumps({"runId": ingested_dataset_run_id, "dryRun": False, "hardDelete": False}))
session.post(
rollback_url,
data=json.dumps(
{"runId": ingested_dataset_run_id, "dryRun": False, "hardDelete": False}
),
)

# Allow async MCP processor to handle ingestions & rollbacks
wait_for_writes_to_sync()

# EditableDatasetProperties should still be part of the entity that was soft deleted.
assert "editableDatasetProperties" in get_aspects_for_entity(entity_urn=dataset_urn, aspects=["editableDatasetProperties"], typed=False)
assert "editableDatasetProperties" in get_aspects_for_entity(
entity_urn=dataset_urn, aspects=["editableDatasetProperties"], typed=False
)
# But first ingestion aspects should not be present
assert "browsePaths" not in get_aspects_for_entity(entity_urn=dataset_urn, typed=False)
assert "browsePaths" not in get_aspects_for_entity(
entity_urn=dataset_urn, typed=False
)
12 changes: 5 additions & 7 deletions smoke-test/tests/cli/datahub_graph_test.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
import pytest
import tenacity
from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph
from datahub.metadata.schema_classes import KafkaSchemaClass, SchemaMetadataClass
from tests.utils import (
delete_urns_from_file,
get_gms_url,
get_sleep_info,
ingest_file_via_rest,
)
from datahub.metadata.schema_classes import (KafkaSchemaClass,
SchemaMetadataClass)

from tests.utils import (delete_urns_from_file, get_gms_url, get_sleep_info,
ingest_file_via_rest)

sleep_sec, sleep_times = get_sleep_info()

Expand Down
12 changes: 7 additions & 5 deletions smoke-test/tests/cli/delete_cmd/test_timeseries_delete.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,22 @@
import json
import logging
import sys
import tempfile
import time
import sys
from json import JSONDecodeError
from typing import Any, Dict, List, Optional

from click.testing import CliRunner, Result

import datahub.emitter.mce_builder as builder
from click.testing import CliRunner, Result
from datahub.emitter.serialization_helper import pre_json_transform
from datahub.entrypoints import datahub
from datahub.metadata.schema_classes import DatasetProfileClass

import requests_wrapper as requests
from tests.aspect_generators.timeseries.dataset_profile_gen import \
gen_dataset_profiles
from tests.utils import get_strftime_from_timestamp_millis, wait_for_writes_to_sync
import requests_wrapper as requests
from tests.utils import (get_strftime_from_timestamp_millis,
wait_for_writes_to_sync)

logger = logging.getLogger(__name__)

Expand All @@ -33,6 +34,7 @@
def sync_elastic() -> None:
wait_for_writes_to_sync()


def datahub_put_profile(dataset_profile: DatasetProfileClass) -> None:
with tempfile.NamedTemporaryFile("w+t", suffix=".json") as aspect_file:
aspect_text: str = json.dumps(pre_json_transform(dataset_profile.to_obj()))
Expand Down
6 changes: 3 additions & 3 deletions smoke-test/tests/cli/ingest_cmd/test_timeseries_rollback.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@
import time
from typing import Any, Dict, List, Optional

from click.testing import CliRunner, Result

import datahub.emitter.mce_builder as builder
from click.testing import CliRunner, Result
from datahub.emitter.serialization_helper import post_json_transform
from datahub.entrypoints import datahub
from datahub.metadata.schema_classes import DatasetProfileClass
from tests.utils import ingest_file_via_rest, wait_for_writes_to_sync

import requests_wrapper as requests
from tests.utils import ingest_file_via_rest, wait_for_writes_to_sync

runner = CliRunner(mix_stderr=False)

Expand Down
3 changes: 2 additions & 1 deletion smoke-test/tests/cli/user_groups_cmd/test_group_cmd.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
import json
import sys
import tempfile
import time
from typing import Any, Dict, Iterable, List

import yaml
from click.testing import CliRunner, Result
from datahub.api.entities.corpgroup.corpgroup import CorpGroup
from datahub.entrypoints import datahub
from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
import time

import requests_wrapper as requests
from tests.utils import wait_for_writes_to_sync

Expand Down
4 changes: 2 additions & 2 deletions smoke-test/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

import pytest

from tests.utils import wait_for_healthcheck_util, get_frontend_session
from tests.test_result_msg import send_message
from tests.utils import get_frontend_session, wait_for_healthcheck_util

# Disable telemetry
os.environ["DATAHUB_TELEMETRY_ENABLED"] = "false"
Expand All @@ -28,5 +28,5 @@ def test_healthchecks(wait_for_healthchecks):


def pytest_sessionfinish(session, exitstatus):
""" whole test run finishes. """
"""whole test run finishes."""
send_message(exitstatus)
Loading

0 comments on commit 3871fc9

Please sign in to comment.