diff --git a/contracting_process/field_level/range/date_time.py b/contracting_process/field_level/range/date_time.py index c066418a..85ba1108 100644 --- a/contracting_process/field_level/range/date_time.py +++ b/contracting_process/field_level/range/date_time.py @@ -1,7 +1,7 @@ from datetime import date from tools.checks import field_quality_check -from tools.helpers import parse_date +from tools.getter import parse_date name = "date_time" lower_bound = date(1990, 1, 1) diff --git a/contracting_process/resource_level/coherent/amendments_dates.py b/contracting_process/resource_level/coherent/amendments_dates.py index 3c702520..371e45bd 100644 --- a/contracting_process/resource_level/coherent/amendments_dates.py +++ b/contracting_process/resource_level/coherent/amendments_dates.py @@ -1,6 +1,5 @@ from tools.checks import get_empty_result_resource -from tools.getter import get_values -from tools.helpers import parse_date +from tools.getter import get_values, parse_date version = 1.0 diff --git a/contracting_process/resource_level/coherent/dates.py b/contracting_process/resource_level/coherent/dates.py index 4e2a789c..a0e054e5 100644 --- a/contracting_process/resource_level/coherent/dates.py +++ b/contracting_process/resource_level/coherent/dates.py @@ -1,6 +1,5 @@ from tools.checks import get_empty_result_resource -from tools.getter import get_values -from tools.helpers import parse_date +from tools.getter import get_values, parse_date version = 1.0 diff --git a/contracting_process/resource_level/coherent/documents_dates.py b/contracting_process/resource_level/coherent/documents_dates.py index 8ebfafd0..e03c6bc0 100644 --- a/contracting_process/resource_level/coherent/documents_dates.py +++ b/contracting_process/resource_level/coherent/documents_dates.py @@ -1,6 +1,5 @@ from tools.checks import get_empty_result_resource -from tools.getter import get_values -from tools.helpers import parse_date +from tools.getter import get_values, parse_date version = 1.0 diff --git a/contracting_process/resource_level/coherent/milestones_dates.py b/contracting_process/resource_level/coherent/milestones_dates.py index f4b0013c..afa7745f 100644 --- a/contracting_process/resource_level/coherent/milestones_dates.py +++ b/contracting_process/resource_level/coherent/milestones_dates.py @@ -1,6 +1,5 @@ from tools.checks import get_empty_result_resource -from tools.getter import get_values -from tools.helpers import parse_date +from tools.getter import get_values, parse_date version = 1.0 diff --git a/contracting_process/resource_level/coherent/period.py b/contracting_process/resource_level/coherent/period.py index e6666a33..178b8bdf 100644 --- a/contracting_process/resource_level/coherent/period.py +++ b/contracting_process/resource_level/coherent/period.py @@ -1,6 +1,5 @@ from tools.checks import get_empty_result_resource -from tools.getter import get_values -from tools.helpers import parse_datetime +from tools.getter import get_values, parse_datetime version = 1.0 diff --git a/contracting_process/resource_level/coherent/value_realistic.py b/contracting_process/resource_level/coherent/value_realistic.py index 5a156de9..01e6fb25 100644 --- a/contracting_process/resource_level/coherent/value_realistic.py +++ b/contracting_process/resource_level/coherent/value_realistic.py @@ -1,7 +1,6 @@ from tools.checks import get_empty_result_resource from tools.currency_converter import convert -from tools.getter import get_values -from tools.helpers import parse_datetime +from tools.getter import get_values, parse_datetime version = 1.0 diff --git a/contracting_process/resource_level/consistent/contracts_implementation_transactions_value.py b/contracting_process/resource_level/consistent/contracts_implementation_transactions_value.py index cbaeb507..83619b79 100644 --- a/contracting_process/resource_level/consistent/contracts_implementation_transactions_value.py +++ b/contracting_process/resource_level/consistent/contracts_implementation_transactions_value.py @@ -1,7 +1,6 @@ from tools.checks import get_empty_result_resource from tools.currency_converter import convert -from tools.getter import get_values -from tools.helpers import parse_datetime +from tools.getter import get_values, parse_datetime version = 1.0 diff --git a/contracting_process/resource_level/consistent/contracts_value.py b/contracting_process/resource_level/consistent/contracts_value.py index 4f1881c7..e1e1b1ef 100644 --- a/contracting_process/resource_level/consistent/contracts_value.py +++ b/contracting_process/resource_level/consistent/contracts_value.py @@ -1,6 +1,6 @@ from tools.checks import get_empty_result_resource from tools.currency_converter import convert -from tools.helpers import parse_datetime +from tools.getter import parse_datetime version = 1.0 diff --git a/contracting_process/resource_level/consistent/period_duration_in_days.py b/contracting_process/resource_level/consistent/period_duration_in_days.py index df52e494..d8fb6a21 100644 --- a/contracting_process/resource_level/consistent/period_duration_in_days.py +++ b/contracting_process/resource_level/consistent/period_duration_in_days.py @@ -1,8 +1,7 @@ import math from tools.checks import get_empty_result_resource -from tools.getter import deep_get, get_values -from tools.helpers import parse_datetime +from tools.getter import deep_get, get_values, parse_datetime version = 1.0 diff --git a/contracting_process/resource_level/consistent/tender_value.py b/contracting_process/resource_level/consistent/tender_value.py index d976dcb2..414da043 100644 --- a/contracting_process/resource_level/consistent/tender_value.py +++ b/contracting_process/resource_level/consistent/tender_value.py @@ -1,7 +1,6 @@ from tools.checks import get_empty_result_resource from tools.currency_converter import convert -from tools.getter import get_values -from tools.helpers import parse_date +from tools.getter import get_values, parse_date version = 1.0 diff --git a/dataset/distribution/value.py b/dataset/distribution/value.py index cf28cd98..0edaa8ad 100644 --- a/dataset/distribution/value.py +++ b/dataset/distribution/value.py @@ -2,8 +2,7 @@ from tools.checks import get_empty_result_dataset from tools.currency_converter import convert, currency_available -from tools.getter import get_values -from tools.helpers import parse_date +from tools.getter import get_values, parse_date version = 1.0 diff --git a/dataset/meta_data_aggregator.py b/dataset/meta_data_aggregator.py index 92120cf4..839d8240 100644 --- a/dataset/meta_data_aggregator.py +++ b/dataset/meta_data_aggregator.py @@ -7,8 +7,7 @@ from tools import settings from tools.currency_converter import convert -from tools.getter import get_values -from tools.helpers import parse_datetime +from tools.getter import get_values, parse_datetime from tools.services import get_cursor DATE_STR_FORMAT = "%b-%-y" diff --git a/tests/tools/test_getter.py b/tests/tools/test_getter.py index c72fdac6..8441df1a 100644 --- a/tests/tools/test_getter.py +++ b/tests/tools/test_getter.py @@ -1,4 +1,11 @@ -from tools.getter import get_values +from datetime import datetime, timedelta, timezone + +import pytest + +from tools.getter import get_values, parse_date, parse_datetime + +EMPTY = [None, "", 0, 0.0, False, set(), (), [], {}] +NON_STR = [None, 1, 1.0, True, {1}, (1,), [1], {1}] item = { "id": "123", @@ -15,6 +22,144 @@ } +@pytest.mark.parametrize("value", EMPTY) +def test_parse_datetime_empty(value): + assert parse_datetime(value) is None + + +@pytest.mark.parametrize("value", NON_STR) +def test_parse_datetime_type(value): + assert parse_datetime(value) is None + + +@pytest.mark.parametrize("value", ["x", "200101"]) +def test_parse_datetime_invalid(value): + assert parse_datetime(value) is None + + +@pytest.mark.parametrize( + "value,components", + [ + ("2001", (2001, 1, 1, 0, 0)), + ("2001-02", (2001, 2, 1, 0, 0)), + ("2001-02-03", (2001, 2, 3, 0, 0)), + ("20010203", (2001, 2, 3, 0, 0)), + ], +) +def test_parse_datetime_date(value, components): + assert parse_datetime(value) == datetime(*components) + + +# The tests serve to document the formats that are accepted. We don't test week formats. +# +# Dateutil can parse truncated times like "2001-02-03T00:5" and "2001-02-03T00:00:6", but these formats don't support +# time zones. This is undocumented behavior. +# +# https://dateutil.readthedocs.io/en/stable/parser.html#dateutil.parser.isoparse +@pytest.mark.parametrize( + "suffix,tz", + [ + ("", None), + ("Z", timezone.utc), + # UTC. + ("+00", timezone.utc), + ("-00", timezone.utc), + ("+0000", timezone.utc), + ("-0000", timezone.utc), + ("+00:00", timezone.utc), + ("-00:00", timezone.utc), + # Non-UTC. + ("+07", timezone(timedelta(seconds=25200))), + ("-07", timezone(timedelta(seconds=-25200))), + ("+0708", timezone(timedelta(seconds=25680))), + ("-0708", timezone(timedelta(seconds=-25680))), + ("+07:08", timezone(timedelta(seconds=25680))), + ("-07:08", timezone(timedelta(seconds=-25680))), + ], +) +@pytest.mark.parametrize( + "value,components", + [ + # With separators. + ("2001-02-03T04", (2001, 2, 3, 4, 0)), + ("2001-02-03T04:05", (2001, 2, 3, 4, 5)), + ("2001-02-03T04:05:06", (2001, 2, 3, 4, 5, 6)), + ("2001-02-03T04:05:06.0", (2001, 2, 3, 4, 5, 6)), + ("2001-02-03T04:05:06.123456789", (2001, 2, 3, 4, 5, 6, 123456)), + # Without separators. (Note: If the "separator" is a number, it is discarded!) + ("20010203.04", (2001, 2, 3, 4, 0)), + ("20010203.0405", (2001, 2, 3, 4, 5)), + ("20010203.040506", (2001, 2, 3, 4, 5, 6)), + ("20010203.040506,0", (2001, 2, 3, 4, 5, 6)), + ("20010203.040506,123456789", (2001, 2, 3, 4, 5, 6, 123456)), + # 24-hour clock. + ("2001-02-03T00", (2001, 2, 3, 0, 0)), + ("2001-02-03T24", (2001, 2, 4, 0, 0)), + ], +) +def test_parse_datetime_dateutil(value, components, suffix, tz): + assert parse_datetime(value + suffix) == datetime(*components, tzinfo=tz) + + +# The datetime library can handle short components and long timezones. +@pytest.mark.parametrize( + "suffix,tz", + [ + ("Z", timezone.utc), + # UTC. + ("+0000", timezone.utc), + ("-0000", timezone.utc), + ("+00:00", timezone.utc), + ("-00:00", timezone.utc), + ("+00:00:00", timezone.utc), + ("-00:00:00", timezone.utc), + # Non-UTC. + ("+0708", timezone(timedelta(seconds=25680))), + ("-0708", timezone(timedelta(seconds=-25680))), + ("+07:08", timezone(timedelta(seconds=25680))), + ("-07:08", timezone(timedelta(seconds=-25680))), + ("+07:08:09", timezone(timedelta(seconds=25689))), + ("-07:08:09", timezone(timedelta(seconds=-25689))), + ], +) +def test_parse_datetime_library(suffix, tz): + assert parse_datetime("1000-2-3T4:5:6" + suffix) == datetime(1000, 2, 3, 4, 5, 6, tzinfo=tz) + + +@pytest.mark.parametrize("value", EMPTY) +def test_parse_date_empty(value): + assert parse_date(value) is None + + +@pytest.mark.parametrize("value", NON_STR) +def test_parse_date_type(value): + assert parse_date(value) is None + + +@pytest.mark.parametrize("value", ["10000-01-01", "x"]) +def test_parse_date_invalid(value): + assert parse_date(value) is None + + +@pytest.mark.parametrize( + "value,components", + [ + # Date only. + ("2001", (2001, 1, 1)), + ("2001-02", (2001, 2, 1)), + ("2001-02-03", (2001, 2, 3)), + ("20010203", (2001, 2, 3)), + # Truncated components. + ("1000-2-3", (1000, 2, 3)), + # Extra components. + ("2001-02-03xxx", (2001, 2, 3)), + ("2001-02-03T04:05:06Z", (2001, 2, 3)), + ], +) +def test_parse_date(value, components): + assert parse_date(value) == datetime(*components).date() + + def test_get_values_invalid(): assert get_values({"tender": {"tenderers": "string"}}, "tender.tenderers.contactPoint.name") == [] diff --git a/tests/tools/test_helpers.py b/tests/tools/test_helpers.py index ca56c6dd..6e77a585 100644 --- a/tests/tools/test_helpers.py +++ b/tests/tools/test_helpers.py @@ -1,151 +1,8 @@ -from datetime import datetime, timedelta, timezone - import pytest from tests import is_subset_dict, override_settings from tools import settings -from tools.helpers import ReservoirSampler, is_step_required, parse_date, parse_datetime - -EMPTY = [None, "", 0, 0.0, False, set(), (), [], {}] -NON_STR = [None, 1, 1.0, True, {1}, (1,), [1], {1}] - - -@pytest.mark.parametrize("value", EMPTY) -def test_parse_datetime_empty(value): - assert parse_datetime(value) is None - - -@pytest.mark.parametrize("value", NON_STR) -def test_parse_datetime_type(value): - assert parse_datetime(value) is None - - -@pytest.mark.parametrize("value", ["x", "200101"]) -def test_parse_datetime_invalid(value): - assert parse_datetime(value) is None - - -@pytest.mark.parametrize( - "value,components", - [ - ("2001", (2001, 1, 1, 0, 0)), - ("2001-02", (2001, 2, 1, 0, 0)), - ("2001-02-03", (2001, 2, 3, 0, 0)), - ("20010203", (2001, 2, 3, 0, 0)), - ], -) -def test_parse_datetime_date(value, components): - assert parse_datetime(value) == datetime(*components) - - -# The tests serve to document the formats that are accepted. We don't test week formats. -# -# Dateutil can parse truncated times like "2001-02-03T00:5" and "2001-02-03T00:00:6", but these formats don't support -# time zones. This is undocumented behavior. -# -# https://dateutil.readthedocs.io/en/stable/parser.html#dateutil.parser.isoparse -@pytest.mark.parametrize( - "suffix,tz", - [ - ("", None), - ("Z", timezone.utc), - # UTC. - ("+00", timezone.utc), - ("-00", timezone.utc), - ("+0000", timezone.utc), - ("-0000", timezone.utc), - ("+00:00", timezone.utc), - ("-00:00", timezone.utc), - # Non-UTC. - ("+07", timezone(timedelta(seconds=25200))), - ("-07", timezone(timedelta(seconds=-25200))), - ("+0708", timezone(timedelta(seconds=25680))), - ("-0708", timezone(timedelta(seconds=-25680))), - ("+07:08", timezone(timedelta(seconds=25680))), - ("-07:08", timezone(timedelta(seconds=-25680))), - ], -) -@pytest.mark.parametrize( - "value,components", - [ - # With separators. - ("2001-02-03T04", (2001, 2, 3, 4, 0)), - ("2001-02-03T04:05", (2001, 2, 3, 4, 5)), - ("2001-02-03T04:05:06", (2001, 2, 3, 4, 5, 6)), - ("2001-02-03T04:05:06.0", (2001, 2, 3, 4, 5, 6)), - ("2001-02-03T04:05:06.123456789", (2001, 2, 3, 4, 5, 6, 123456)), - # Without separators. (Note: If the "separator" is a number, it is discarded!) - ("20010203.04", (2001, 2, 3, 4, 0)), - ("20010203.0405", (2001, 2, 3, 4, 5)), - ("20010203.040506", (2001, 2, 3, 4, 5, 6)), - ("20010203.040506,0", (2001, 2, 3, 4, 5, 6)), - ("20010203.040506,123456789", (2001, 2, 3, 4, 5, 6, 123456)), - # 24-hour clock. - ("2001-02-03T00", (2001, 2, 3, 0, 0)), - ("2001-02-03T24", (2001, 2, 4, 0, 0)), - ], -) -def test_parse_datetime_dateutil(value, components, suffix, tz): - assert parse_datetime(value + suffix) == datetime(*components, tzinfo=tz) - - -# The datetime library can handle short components and long timezones. -@pytest.mark.parametrize( - "suffix,tz", - [ - ("Z", timezone.utc), - # UTC. - ("+0000", timezone.utc), - ("-0000", timezone.utc), - ("+00:00", timezone.utc), - ("-00:00", timezone.utc), - ("+00:00:00", timezone.utc), - ("-00:00:00", timezone.utc), - # Non-UTC. - ("+0708", timezone(timedelta(seconds=25680))), - ("-0708", timezone(timedelta(seconds=-25680))), - ("+07:08", timezone(timedelta(seconds=25680))), - ("-07:08", timezone(timedelta(seconds=-25680))), - ("+07:08:09", timezone(timedelta(seconds=25689))), - ("-07:08:09", timezone(timedelta(seconds=-25689))), - ], -) -def test_parse_datetime_library(suffix, tz): - assert parse_datetime("1000-2-3T4:5:6" + suffix) == datetime(1000, 2, 3, 4, 5, 6, tzinfo=tz) - - -@pytest.mark.parametrize("value", EMPTY) -def test_parse_date_empty(value): - assert parse_date(value) is None - - -@pytest.mark.parametrize("value", NON_STR) -def test_parse_date_type(value): - assert parse_date(value) is None - - -@pytest.mark.parametrize("value", ["10000-01-01", "x"]) -def test_parse_date_invalid(value): - assert parse_date(value) is None - - -@pytest.mark.parametrize( - "value,components", - [ - # Date only. - ("2001", (2001, 1, 1)), - ("2001-02", (2001, 2, 1)), - ("2001-02-03", (2001, 2, 3)), - ("20010203", (2001, 2, 3)), - # Truncated components. - ("1000-2-3", (1000, 2, 3)), - # Extra components. - ("2001-02-03xxx", (2001, 2, 3)), - ("2001-02-03T04:05:06Z", (2001, 2, 3)), - ], -) -def test_parse_date(value, components): - assert parse_date(value) == datetime(*components).date() +from tools.helpers import ReservoirSampler, is_step_required def test_is_subset_dict(): diff --git a/tools/getter.py b/tools/getter.py index ddc2a65b..7de10dcb 100644 --- a/tools/getter.py +++ b/tools/getter.py @@ -2,11 +2,44 @@ from datetime import date, datetime from typing import Any, List, Optional, Type -from tools.helpers import parse_date, parse_datetime +from dateutil.parser import isoparse regex = r"^([^[]*)\[([\d]*)\]$" +# https://datatracker.ietf.org/doc/html/rfc3339#section-5.6 +def parse_datetime(string: Optional[str]) -> Optional[datetime]: + """ + Parse a string to a datetime. + """ + if string is None or not isinstance(string, str): + return None + try: + return isoparse(string) + except ValueError: + pass + try: + return datetime.strptime(string, "%Y-%m-%dT%H:%M:%S%z") + except ValueError: + pass + + +def parse_date(string: Optional[str]) -> Optional[date]: + """ + Parse a string to a date. + """ + if not string or not isinstance(string, str): + return None + try: + return isoparse(string[:10]).date() + except ValueError: + pass + try: + return datetime.strptime(string[:10], "%Y-%m-%d").date() + except ValueError: + pass + + def deep_has(value: Any, path: str) -> bool: """ Returns whether a nested value exists in nested dicts, safely. diff --git a/tools/helpers.py b/tools/helpers.py index c195643a..97d11d6a 100644 --- a/tools/helpers.py +++ b/tools/helpers.py @@ -1,8 +1,6 @@ import random -from datetime import date, datetime from typing import Any, List, Optional -from dateutil.parser import isoparse from yapw.methods.blocking import ack, publish from tools import settings @@ -10,39 +8,6 @@ from tools.state import set_dataset_state, state -# https://datatracker.ietf.org/doc/html/rfc3339#section-5.6 -def parse_datetime(string: Optional[str]) -> Optional[datetime]: - """ - Parse a string to a datetime. - """ - if string is None or not isinstance(string, str): - return None - try: - return isoparse(string) - except ValueError: - pass - try: - return datetime.strptime(string, "%Y-%m-%dT%H:%M:%S%z") - except ValueError: - pass - - -def parse_date(string: Optional[str]) -> Optional[date]: - """ - Parse a string to a date. - """ - if not string or not isinstance(string, str): - return None - try: - return isoparse(string[:10]).date() - except ValueError: - pass - try: - return datetime.strptime(string[:10], "%Y-%m-%d").date() - except ValueError: - pass - - class ReservoirSampler: def __init__(self, samples_cap: int): if samples_cap < 1: @@ -70,6 +35,7 @@ def is_step_required(*steps: str) -> bool: return any(step in settings.STEPS for step in steps) +# Has affinity with services.py, but would result in circular dependency due to `set_dataset_state()`. def finish_callback( client_state, channel, method, dataset_id: int, phase: Optional[str] = None, routing_key: Optional[str] = None ) -> None: