From 6e87e2167ba018af9148ef595f7df113b3444158 Mon Sep 17 00:00:00 2001 From: Johan Schreurs Date: Wed, 9 Aug 2023 17:59:50 +0200 Subject: [PATCH 1/4] Issue #421 Add: Specify period with single year or month --- openeo/util.py | 59 ++++++++++++++++++++++++++++++++++++++++++++++ tests/test_util.py | 49 +++++++++++++++++++++++++++++++++++++- 2 files changed, 107 insertions(+), 1 deletion(-) diff --git a/openeo/util.py b/openeo/util.py index 1a4794536..780cd5ae5 100644 --- a/openeo/util.py +++ b/openeo/util.py @@ -275,9 +275,68 @@ def get_temporal_extent(*args, elif extent: assert start_date is None and end_date is None start_date, end_date = extent + if start_date and not end_date and isinstance(start_date, str): + start_date, end_date = string_to_temporal_extent(start_date) return convertor(start_date) if start_date else None, convertor(end_date) if end_date else None +def string_to_temporal_extent(start_date: str) -> Tuple[dt.date, dt.date]: + """Convert a string that represents a year or a month, into a date range. + + For example: + "2021" : means all data for 2021 + "2022-08": means all data for the month of august in 2022. + + If the day is included in the string then we leave it alone. + Note that `get_temporal_extent` already handles having one date string, + where the day is present in the string. + """ + + # Skip if it represents a day or if it is not even a string + # If it is a day, we leave it alone and let the upstream function handle + # that case because a day could be a start date or an end date. + if not isinstance(start_date, str): + return start_date, None + + # day also matches a datetime, no $ at the end + regex_day = re.compile(r"^(\d{4})[:/_-](\d{2})[:/_-](\d{2})") + regex_month = re.compile(r"^(\d{4})[:/_-](\d{2})$") + regex_year = re.compile(r"^\d{4}$") + + match_day = regex_day.match(start_date) + match_month = regex_month.match(start_date) + match_year = regex_year.match(start_date) + + if match_day: + return start_date, None + + if not (match_year or match_month): + raise ValueError( + "Value does not represent a year or a year + month: format should " + f"be 'yyyy' or 'yyyy-dd', start_date={start_date}" + ) + + if match_month: + year_start = int(match_month.group(1)) + month_start = int(match_month.group(2)) + if month_start == 12: + year_end = year_start + 1 + month_end = 1 + else: + month_end = month_start + 1 + year_end = year_start + else: + year_start = int(start_date) + year_end = year_start + 1 + month_start = 1 + month_end = 1 + + date_start = dt.date(year_start, month_start, 1) + date_end = dt.date(year_end, month_end, 1) + + return date_start, date_end + + class ContextTimer: """ Context manager to measure the "wall clock" time (in seconds) inside/for a block of code. diff --git a/tests/test_util.py b/tests/test_util.py index 311c3b4c5..97ce10a99 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -28,6 +28,7 @@ ensure_list, first_not_none, get_temporal_extent, + string_to_temporal_extent, guess_format, repr_truncate, rfc3339, @@ -389,7 +390,10 @@ def test_get_temporal_extent(): assert get_temporal_extent(start_date="2019-03-15", end_date="2019-10-11") == ("2019-03-15", "2019-10-11") assert get_temporal_extent(start_date="2019-03-15") == ("2019-03-15", None) assert get_temporal_extent(end_date="2019-10-11") == (None, "2019-10-11") - + assert get_temporal_extent(start_date="2019") == ("2019-01-01", "2020-01-01") + assert get_temporal_extent(start_date="2019-01") == ("2019-01-01", "2019-02-01") + assert get_temporal_extent(start_date="2019-11") == ("2019-11-01", "2019-12-01") + assert get_temporal_extent(start_date="2019-12") == ("2019-12-01", "2020-01-01") def test_context_timer_basic(): with mock.patch.object(ContextTimer, "_clock", new=_fake_clock([3, 5, 8, 13])): @@ -986,3 +990,46 @@ def test_crs_to_epsg_code_raises_valueerror(epsg_input): """EPSG codes can not be 0 or negative.""" with pytest.raises(ValueError): crs_to_epsg_code(epsg_input) + + +@pytest.mark.parametrize( + ["date_input", "expected_start", "expected_end"], + [ + ("2023", dt.date(2023, 1, 1), dt.date(2024, 1, 1)), + ("1999", dt.date(1999, 1, 1), dt.date(2000, 1, 1)), + ("2023-03", dt.date(2023, 3, 1), dt.date(2023, 4, 1)), + ("2023/03", dt.date(2023, 3, 1), dt.date(2023, 4, 1)), + ("2023-01", dt.date(2023, 1, 1), dt.date(2023, 2, 1)), + ("2023/01", dt.date(2023, 1, 1), dt.date(2023, 2, 1)), + ("2022-12", dt.date(2022, 12, 1), dt.date(2023, 1, 1)), + ("2022/12", dt.date(2022, 12, 1), dt.date(2023, 1, 1)), + ("2022-11", dt.date(2022, 11, 1), dt.date(2022, 12, 1)), + ("2022/11", dt.date(2022, 11, 1), dt.date(2022, 12, 1)), + ("2022-12-31", "2022-12-31", None), + ("2022/12/31", "2022/12/31", None), + ("2022-11-30", "2022-11-30", None), + ("2022/11/30", "2022/11/30", None), + ("2022-12-31T12:33:05", "2022-12-31T12:33:05", None), + (dt.date(2022, 11, 1), dt.date(2022, 11, 1), None), + (dt.datetime(2022, 11, 1, 15, 30, 00), dt.datetime(2022, 11, 1, 15, 30, 00), None), + ], +) +def test_string_to_temporal_extent(date_input: str, expected_start: dt.date, expected_end: dt.date): + actual_start, actual_end = string_to_temporal_extent(date_input) + assert actual_start == expected_start + assert actual_end == expected_end + + +@pytest.mark.parametrize( + "date_input", + [ + "20-22-12-31", + "2022/12/31/aa1/bb/cc", + "20-2--12", + "20-1-1-", + "20-2-", + ], +) +def test_string_to_temporal_extent_raises_valueerror(date_input: Union[str, dt.date, dt.datetime]): + with pytest.raises(ValueError): + string_to_temporal_extent(date_input) From 54d72ac2c1032e7a1f6f839bc634c63cecf1f13b Mon Sep 17 00:00:00 2001 From: Johan Schreurs Date: Fri, 11 Aug 2023 17:21:47 +0200 Subject: [PATCH 2/4] fixup! Issue #421 Add: Specify period with single year or month --- openeo/util.py | 26 ++++++++++++++++++-------- tests/test_util.py | 9 ++++++--- 2 files changed, 24 insertions(+), 11 deletions(-) diff --git a/openeo/util.py b/openeo/util.py index 780cd5ae5..f30ceec24 100644 --- a/openeo/util.py +++ b/openeo/util.py @@ -292,28 +292,38 @@ def string_to_temporal_extent(start_date: str) -> Tuple[dt.date, dt.date]: where the day is present in the string. """ - # Skip if it represents a day or if it is not even a string - # If it is a day, we leave it alone and let the upstream function handle - # that case because a day could be a start date or an end date. + # Skip it if the string represents a day or if it is not even a string + # If it is a day, we want to let the upstream function handle that case + # because a day could be either a start date or an end date. if not isinstance(start_date, str): return start_date, None - # day also matches a datetime, no $ at the end - regex_day = re.compile(r"^(\d{4})[:/_-](\d{2})[:/_-](\d{2})") + # Using a separate and stricter regular expressions to detect day, month, + # or year. Having a regex that only matches one type of period makes it + # easier to check it is effectively only a year, or only a month, + # but not a day. Datetime strings are more complex so we use rfc3339 to + # check whether or not it represents a datetime. + regex_day = re.compile(r"^(\d{4})[:/_-](\d{2})[:/_-](\d{2})$") regex_month = re.compile(r"^(\d{4})[:/_-](\d{2})$") regex_year = re.compile(r"^\d{4}$") + try: + rfc3339.parse_datetime(start_date) + is_date_time = True + except ValueError as exc: + is_date_time = False + match_day = regex_day.match(start_date) match_month = regex_month.match(start_date) match_year = regex_year.match(start_date) - if match_day: + if is_date_time or match_day: return start_date, None if not (match_year or match_month): raise ValueError( - "Value does not represent a year or a year + month: format should " - f"be 'yyyy' or 'yyyy-dd', start_date={start_date}" + f"The value of start_date='{start_date}' does not represent any of: " + + "a year ('yyyy'), a year + month ('yyyy-dd'), a date, or a datetime." ) if match_month: diff --git a/tests/test_util.py b/tests/test_util.py index 97ce10a99..1fe84b2f3 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -1009,7 +1009,7 @@ def test_crs_to_epsg_code_raises_valueerror(epsg_input): ("2022/12/31", "2022/12/31", None), ("2022-11-30", "2022-11-30", None), ("2022/11/30", "2022/11/30", None), - ("2022-12-31T12:33:05", "2022-12-31T12:33:05", None), + ("2022-12-31T12:33:05Z", "2022-12-31T12:33:05Z", None), (dt.date(2022, 11, 1), dt.date(2022, 11, 1), None), (dt.datetime(2022, 11, 1, 15, 30, 00), dt.datetime(2022, 11, 1, 15, 30, 00), None), ], @@ -1023,11 +1023,14 @@ def test_string_to_temporal_extent(date_input: str, expected_start: dt.date, exp @pytest.mark.parametrize( "date_input", [ + "foobar", "20-22-12-31", "2022/12/31/aa1/bb/cc", "20-2--12", - "20-1-1-", - "20-2-", + "2021-2--12", + "2021-1-1-", + "2021-2-", + "-2021-2", ], ) def test_string_to_temporal_extent_raises_valueerror(date_input: Union[str, dt.date, dt.datetime]): From 56de5ce5cbda412cf3c8cc9ab07fcb6c74f46f0a Mon Sep 17 00:00:00 2001 From: Johan Schreurs Date: Wed, 16 Aug 2023 16:36:29 +0200 Subject: [PATCH 3/4] Issue #421 Docstring: document param, return type, add doctest examples --- docs/api.rst | 2 +- openeo/util.py | 97 ++++++++++++++++++++++++++++++++++++++++++---- tests/test_util.py | 13 +++++++ 3 files changed, 103 insertions(+), 9 deletions(-) diff --git a/docs/api.rst b/docs/api.rst index 097633709..ae9771049 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -126,7 +126,7 @@ openeo.util ------------- .. automodule:: openeo.util - :members: to_bbox_dict, BBoxDict, load_json_resource + :members: to_bbox_dict, BBoxDict, load_json_resource, string_to_temporal_extent openeo.processes diff --git a/openeo/util.py b/openeo/util.py index 7b50f174e..137c026c1 100644 --- a/openeo/util.py +++ b/openeo/util.py @@ -286,17 +286,98 @@ def get_temporal_extent(*args, return convertor(start_date) if start_date else None, convertor(end_date) if end_date else None -def string_to_temporal_extent(start_date: str) -> Tuple[dt.date, dt.date]: - """Convert a string that represents a year or a month, into a date range. +def string_to_temporal_extent( + start_date: Union[str, dt.datetime, dt.date] +) -> Tuple[Union[dt.date, dt.datetime, str], Union[dt.date, dt.datetime, None]]: + """Convert a string into a date range when it is an abbreviation for an entire year or month. + + The result is a 2-tuple ``(start, end)`` that represents the period as a + half-open interval, where the end date is not included in the period. + + The intent of this function is to only convert values into a periods + when they are clearly abbreviations, and in all other cases leave the original + start_date as it was, because there can be too many complications otherwise. + + The reason being that calling functions, e.g. ``get_temporal_extent``, + can allow you to specifying both a start date **and** end date, but either date + can be ``None``. What such an open-ended interval means depends very much on + what the calling function/method is meant to do, so the caller should really + handle that themselves. + + When we don't convert, the return value is the tuple ``(start_date, None)`` + using the original parameter value start_date, unprocessed. + + :param start_date: + + - Typically a string that represents either a year, a year + month, a day, + or a datetime, and it always indicates the *beginning* of that period. + - Other data types allowed are a ``datetime.date`` and ``datetime.datetime``, + and in that case we return the tuple ``(start_date, None)`` where + ``start_date`` is our original input parameter ``start_date`` as-is. + Similarly, strings that represent a date or datetime are not processed + any further and the return value is also ``(start_date, None)``. + - Any other type raises a TypeError. + + - Allowed string formats are: + - For year: "yyyy" + - For year + month: "yyyy-mm" + Some other separators than "-" technically work but they are discouraged. + - For date and datetime you must follow the RFC 3339 format. See also: class ``Rfc3339`` - For example: - "2021" : means all data for 2021 - "2022-08": means all data for the month of august in 2022. + :return: + The result is a 2-tuple of the form ``(start, end)`` that represents + the period as a half-open interval, where the end date is not included, + i.e. end is the first day that is no longer part of the time slot. + + When start_date was indeed an abbreviation and thus was converted to + a period, then the element types will be ``(datetime.date, datetime.date)`` + + If no conversion happened we return the original start_date wrapped in a + 2-tuple: ``(start_date, None)`` so the type is the same as the input's type. + + :raises TypeError: + when start_date is neither of the following types: + str, datetime.date, datetime.datetime + + :raises ValueError: + when start_date was a string but not recognized as either a year, + a month, a date, or datetime. + + Examples + -------- - If the day is included in the string then we leave it alone. - Note that `get_temporal_extent` already handles having one date string, - where the day is present in the string. + >>> import datetime + + 1. Year: use all data from the start of 2021 to the end of 2021. + >>> string_to_temporal_extent("2021") + (datetime.date(2021, 1, 1), datetime.date(2022, 1, 1)) + + 2. Year + month: all data from the start of August 2022 to the end of August 2022. + >>> string_to_temporal_extent("2022-08") + (datetime.date(2022, 8, 1), datetime.date(2022, 9, 1)) + + 3. We received a full date 2022-08-15: + In this case we should not process start_date. The calling function/method must + handle end date, depending on what an interval with an open end means for the caller. + See for example how ``get_temporal_extent`` handles this. + >>> string_to_temporal_extent("2022-08-15") + ('2022-08-15', None) + + 4. Similar to 3), but with a datetime.date instead of a string containing a date. + >>> string_to_temporal_extent(datetime.date(2022, 8, 15)) + (datetime.date(2022, 8, 15), None) + + 5. Similar to 3) & 4), but with a datetime.datetime instance. + >>> string_to_temporal_extent(datetime.datetime(2022, 8, 15, 0, 0)) + (datetime.datetime(2022, 8, 15, 0, 0), None) """ + supported_types = (str, dt.date, dt.datetime) + if not isinstance(start_date, supported_types): + raise TypeError( + "Value of start_date must be one of the following types:" + + "str, datetime.date, datetime.datetime" + + f"but it is {type(start_date)}, value={start_date}" + ) # Skip it if the string represents a day or if it is not even a string # If it is a day, we want to let the upstream function handle that case diff --git a/tests/test_util.py b/tests/test_util.py index 80eb282a9..eb59a563c 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -1108,3 +1108,16 @@ def test_string_to_temporal_extent(date_input: str, expected_start: dt.date, exp def test_string_to_temporal_extent_raises_valueerror(date_input: Union[str, dt.date, dt.datetime]): with pytest.raises(ValueError): string_to_temporal_extent(date_input) + + +@pytest.mark.parametrize( + "date_input", + [ + 2000, + {}, + (), + ], +) +def test_string_to_temporal_extent_raises_typeerror(date_input: any): + with pytest.raises(TypeError): + string_to_temporal_extent(date_input) From ca1d97aa95954bd5db878053d1e390d658929c4c Mon Sep 17 00:00:00 2001 From: Johan Schreurs Date: Wed, 16 Aug 2023 17:11:42 +0200 Subject: [PATCH 4/4] Issue #421 Docstring: fix formatting of example/doctest --- openeo/util.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/openeo/util.py b/openeo/util.py index 137c026c1..b8b45823f 100644 --- a/openeo/util.py +++ b/openeo/util.py @@ -347,27 +347,27 @@ def string_to_temporal_extent( -------- >>> import datetime - - 1. Year: use all data from the start of 2021 to the end of 2021. + >>> + >>> # 1. Year: use all data from the start of 2021 to the end of 2021. >>> string_to_temporal_extent("2021") (datetime.date(2021, 1, 1), datetime.date(2022, 1, 1)) - - 2. Year + month: all data from the start of August 2022 to the end of August 2022. + >>> + >>> # 2. Year + month: all data from the start of August 2022 to the end of August 2022. >>> string_to_temporal_extent("2022-08") (datetime.date(2022, 8, 1), datetime.date(2022, 9, 1)) - - 3. We received a full date 2022-08-15: - In this case we should not process start_date. The calling function/method must - handle end date, depending on what an interval with an open end means for the caller. - See for example how ``get_temporal_extent`` handles this. + >>> + >>> # 3. We received a full date 2022-08-15: + >>> # In this case we should not process start_date. The calling function/method must + >>> # handle end date, depending on what an interval with an open end means for the caller. + >>> # See for example how ``get_temporal_extent`` handles this. >>> string_to_temporal_extent("2022-08-15") ('2022-08-15', None) - - 4. Similar to 3), but with a datetime.date instead of a string containing a date. + >>> + >>> # 4. Similar to 3), but with a datetime.date instead of a string containing a date. >>> string_to_temporal_extent(datetime.date(2022, 8, 15)) (datetime.date(2022, 8, 15), None) - - 5. Similar to 3) & 4), but with a datetime.datetime instance. + >>> + >>> # 5. Similar to 3) & 4), but with a datetime.datetime instance. >>> string_to_temporal_extent(datetime.datetime(2022, 8, 15, 0, 0)) (datetime.datetime(2022, 8, 15, 0, 0), None) """