Skip to content

Commit

Permalink
Remove Provider, add Enum for Audience
Browse files Browse the repository at this point in the history
  • Loading branch information
murdo-moj committed Nov 14, 2024
1 parent 46eaa28 commit f9c4c00
Show file tree
Hide file tree
Showing 7 changed files with 59 additions and 34 deletions.
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
from collections import defaultdict
from datetime import datetime, timezone
from datetime import datetime
from importlib.resources import files
import logging
from typing import Any, Tuple

from data_platform_catalogue.entities import (
Audience,
AccessInformation,
Column,
ColumnRef,
Expand All @@ -20,6 +22,8 @@
UsageRestrictions,
)

logger = logging.getLogger(__name__)

PROPERTIES_EMPTY_STRING_FIELDS = ("description", "externalUrl")

# Note: Data owner is missing as an ownershipType entity in Datahub, but it still seems to be
Expand Down Expand Up @@ -173,6 +177,28 @@ def parse_tags(entity: dict[str, Any]) -> list[TagRef]:
return tags


def get_refresh_period_from_cadet_tags(
tags: list[TagRef],
refresh_schedules: list[str] = ["daily", "weekly", "monthly"]
) -> str:
# Check if any of the tags are refresh period tags eg "daily_opg"
refresh_period_tags = [
schedule
for tag_ref in tags
for schedule in refresh_schedules
if schedule in tag_ref.display_name
]
if len(refresh_period_tags) > 1:
logger.warn(f"More than one refresh period tag found: {tags=}")

if refresh_period_tags:
refresh_schedule = refresh_period_tags[0]
return refresh_schedule

if not refresh_period_tags:
return ""


def parse_glossary_terms(entity: dict[str, Any]) -> list[GlossaryTermRef]:
"""
Parse glossary_term information into a list of TagRef for displaying
Expand Down Expand Up @@ -223,15 +249,8 @@ def parse_properties(
usage_restrictions = UsageRestrictions.model_validate(custom_properties_dict)
data_summary = DataSummary.model_validate(custom_properties_dict)
tags = parse_tags(entity)
expected_refresh_periods = ["daily", "weekly", "monthly"]
refresh_period_tags = [
tag_ref.display_name
for tag_ref in tags
if tag_ref.display_name in expected_refresh_periods
]
data_summary.refresh_period = " ".join(refresh_period_tags).capitalize()
audience = custom_properties_dict.get("Audience", "")
provider = custom_properties_dict.get("Provider", "")
data_summary.refresh_period = get_refresh_period_from_cadet_tags(tags)
audience = custom_properties_dict.get("audience", "Internal")

further_information = FurtherInformation.model_validate(custom_properties_dict)

Expand All @@ -240,8 +259,7 @@ def parse_properties(
usage_restrictions=usage_restrictions,
data_summary=data_summary,
further_information=further_information,
audience=audience,
provider=provider,
audience=audience
)

return properties, custom_properties
Expand Down
13 changes: 7 additions & 6 deletions lib/datahub-client/data_platform_catalogue/entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@ class RelationshipType(Enum):
CHILD = "CHILD"


class Audience(Enum):
INTERNAL = "Internal"
PUBLISHED = "Published"


class EntityRef(BaseModel):
"""
A reference to another entity in the metadata graph.
Expand Down Expand Up @@ -348,13 +353,9 @@ class CustomEntityProperties(BaseModel):
description="Routes to further information about the data",
default_factory=FurtherInformation,
)
audience: str = Field(
audience: Audience = Field(
description="If the data is published or not",
default="",
)
provider: str = Field(
decription="Source of this metadata",
default=""
default=Audience.INTERNAL,
)


Expand Down
15 changes: 15 additions & 0 deletions lib/datahub-client/tests/client/datahub/test_graphql_helpers.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
from datetime import datetime, timezone

import pytest
import pytest

from data_platform_catalogue.client.graphql_helpers import (
DATA_CUSTODIAN,
_make_user_email_from_urn,
_parse_owners_by_type,
get_refresh_period_from_cadet_tags,
parse_columns,
parse_created_and_modified,
parse_data_owner,
Expand Down Expand Up @@ -651,3 +653,16 @@ def test_parse_updated():

assert parse_updated(example_with_updated) == expected_timestamp
assert parse_updated(example_no_updated) is None


@pytest.mark.parametrize(
"tags, expected_refresh_period",
[
([TagRef(display_name="daily_opg", urn="urn:li:tag:daily_opg")], "daily"),
([TagRef(display_name="monthly", urn="urn:li:tag:monthly")], "monthly"),
([TagRef(display_name="dc_cadet", urn="urn:li:tag:dc_cadet")], ""),
],
)
def test_get_refresh_period_from_cadet_tags(tags, expected_refresh_period):
refresh_period = get_refresh_period_from_cadet_tags(tags)
assert refresh_period == expected_refresh_period
4 changes: 2 additions & 2 deletions lib/datahub-client/tests/client/datahub/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from data_platform_catalogue.client.search import SearchClient
from data_platform_catalogue.entities import (
Audience,
AccessInformation,
DataSummary,
EntityRef,
Expand Down Expand Up @@ -1178,10 +1179,9 @@ def test_search_for_container(mock_graph, searcher):
"name": "test_db",
},
metadata={
"audience": "",
"audience": Audience.INTERNAL,
"owner": "Shannon Lovett",
"owner_email": "[email protected]",
"provider": "",
"domain_name": "testdom",
"domain_id": "urn:li:domain:testdom",
"entity_types": {
Expand Down
3 changes: 1 addition & 2 deletions lib/datahub-client/tests/snapshots/test_upsert_table.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,7 @@
"dc_teams_channel_name": "",
"dc_teams_channel_url": "",
"dc_team_email": "",
"audience": "",
"provider": ""
"audience": "Internal"
},
"name": "Dataset",
"qualifiedName": "database.Dataset",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,7 @@
"dc_teams_channel_name": "",
"dc_teams_channel_url": "",
"dc_team_email": "",
"audience": "",
"provider": ""
"audience": "Internal"
},
"name": "my_database",
"description": "little test db"
Expand Down Expand Up @@ -121,8 +120,7 @@
"dc_teams_channel_name": "",
"dc_teams_channel_url": "",
"dc_team_email": "",
"audience": "",
"provider": ""
"audience": "Internal"
},
"name": "Dataset",
"qualifiedName": "database.Dataset",
Expand Down
10 changes: 2 additions & 8 deletions templates/details_base.html
Original file line number Diff line number Diff line change
Expand Up @@ -81,18 +81,12 @@ <h2 class="govuk-heading-s govuk-!-margin-top-3">
{{entity.custom_properties.audience}}
</li>
{% endif %}
{% if entity.custom_properties.provider %}
<li>
<span class="govuk-!-font-weight-bold">Metadata provider:</span>
{{entity.custom_properties.provider}}
</li>
{% endif %}
{% if entity.last_updated %}
<!-- {% if entity.last_updated %}
<li>
<span class="govuk-!-font-weight-bold">Data last updated:</span>
{{entity.last_updated|date:"d M Y"}}
</li>
{% endif %}
{% endif %} -->
{% if entity.last_modified %}
<li>
<span class="govuk-!-font-weight-bold">Metadata last updated:</span>
Expand Down

0 comments on commit f9c4c00

Please sign in to comment.