Skip to content

Commit

Permalink
Merge pull request #461 from Open-EO/issue421_specifying-temporal-ext…
Browse files Browse the repository at this point in the history
…ent-with-single-string

Issue421 specifying temporal extent with single string
  • Loading branch information
jdries authored Aug 21, 2023
2 parents 5bb0da8 + ca1d97a commit cbd4053
Show file tree
Hide file tree
Showing 3 changed files with 215 additions and 2 deletions.
2 changes: 1 addition & 1 deletion docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ openeo.util
-------------

.. automodule:: openeo.util
:members: to_bbox_dict, BBoxDict, load_json_resource
:members: to_bbox_dict, BBoxDict, load_json_resource, string_to_temporal_extent


openeo.processes
Expand Down
150 changes: 150 additions & 0 deletions openeo/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,9 +281,159 @@ def get_temporal_extent(*args,
elif extent:
assert start_date is None and end_date is None
start_date, end_date = extent
if start_date and not end_date and isinstance(start_date, str):
start_date, end_date = string_to_temporal_extent(start_date)
return convertor(start_date) if start_date else None, convertor(end_date) if end_date else None


def string_to_temporal_extent(
start_date: Union[str, dt.datetime, dt.date]
) -> Tuple[Union[dt.date, dt.datetime, str], Union[dt.date, dt.datetime, None]]:
"""Convert a string into a date range when it is an abbreviation for an entire year or month.
The result is a 2-tuple ``(start, end)`` that represents the period as a
half-open interval, where the end date is not included in the period.
The intent of this function is to only convert values into a periods
when they are clearly abbreviations, and in all other cases leave the original
start_date as it was, because there can be too many complications otherwise.
The reason being that calling functions, e.g. ``get_temporal_extent``,
can allow you to specifying both a start date **and** end date, but either date
can be ``None``. What such an open-ended interval means depends very much on
what the calling function/method is meant to do, so the caller should really
handle that themselves.
When we don't convert, the return value is the tuple ``(start_date, None)``
using the original parameter value start_date, unprocessed.
:param start_date:
- Typically a string that represents either a year, a year + month, a day,
or a datetime, and it always indicates the *beginning* of that period.
- Other data types allowed are a ``datetime.date`` and ``datetime.datetime``,
and in that case we return the tuple ``(start_date, None)`` where
``start_date`` is our original input parameter ``start_date`` as-is.
Similarly, strings that represent a date or datetime are not processed
any further and the return value is also ``(start_date, None)``.
- Any other type raises a TypeError.
- Allowed string formats are:
- For year: "yyyy"
- For year + month: "yyyy-mm"
Some other separators than "-" technically work but they are discouraged.
- For date and datetime you must follow the RFC 3339 format. See also: class ``Rfc3339``
:return:
The result is a 2-tuple of the form ``(start, end)`` that represents
the period as a half-open interval, where the end date is not included,
i.e. end is the first day that is no longer part of the time slot.
When start_date was indeed an abbreviation and thus was converted to
a period, then the element types will be ``(datetime.date, datetime.date)``
If no conversion happened we return the original start_date wrapped in a
2-tuple: ``(start_date, None)`` so the type is the same as the input's type.
:raises TypeError:
when start_date is neither of the following types:
str, datetime.date, datetime.datetime
:raises ValueError:
when start_date was a string but not recognized as either a year,
a month, a date, or datetime.
Examples
--------
>>> import datetime
>>>
>>> # 1. Year: use all data from the start of 2021 to the end of 2021.
>>> string_to_temporal_extent("2021")
(datetime.date(2021, 1, 1), datetime.date(2022, 1, 1))
>>>
>>> # 2. Year + month: all data from the start of August 2022 to the end of August 2022.
>>> string_to_temporal_extent("2022-08")
(datetime.date(2022, 8, 1), datetime.date(2022, 9, 1))
>>>
>>> # 3. We received a full date 2022-08-15:
>>> # In this case we should not process start_date. The calling function/method must
>>> # handle end date, depending on what an interval with an open end means for the caller.
>>> # See for example how ``get_temporal_extent`` handles this.
>>> string_to_temporal_extent("2022-08-15")
('2022-08-15', None)
>>>
>>> # 4. Similar to 3), but with a datetime.date instead of a string containing a date.
>>> string_to_temporal_extent(datetime.date(2022, 8, 15))
(datetime.date(2022, 8, 15), None)
>>>
>>> # 5. Similar to 3) & 4), but with a datetime.datetime instance.
>>> string_to_temporal_extent(datetime.datetime(2022, 8, 15, 0, 0))
(datetime.datetime(2022, 8, 15, 0, 0), None)
"""
supported_types = (str, dt.date, dt.datetime)
if not isinstance(start_date, supported_types):
raise TypeError(
"Value of start_date must be one of the following types:"
+ "str, datetime.date, datetime.datetime"
+ f"but it is {type(start_date)}, value={start_date}"
)

# Skip it if the string represents a day or if it is not even a string
# If it is a day, we want to let the upstream function handle that case
# because a day could be either a start date or an end date.
if not isinstance(start_date, str):
return start_date, None

# Using a separate and stricter regular expressions to detect day, month,
# or year. Having a regex that only matches one type of period makes it
# easier to check it is effectively only a year, or only a month,
# but not a day. Datetime strings are more complex so we use rfc3339 to
# check whether or not it represents a datetime.
regex_day = re.compile(r"^(\d{4})[:/_-](\d{2})[:/_-](\d{2})$")
regex_month = re.compile(r"^(\d{4})[:/_-](\d{2})$")
regex_year = re.compile(r"^\d{4}$")

try:
rfc3339.parse_datetime(start_date)
is_date_time = True
except ValueError as exc:
is_date_time = False

match_day = regex_day.match(start_date)
match_month = regex_month.match(start_date)
match_year = regex_year.match(start_date)

if is_date_time or match_day:
return start_date, None

if not (match_year or match_month):
raise ValueError(
f"The value of start_date='{start_date}' does not represent any of: "
+ "a year ('yyyy'), a year + month ('yyyy-dd'), a date, or a datetime."
)

if match_month:
year_start = int(match_month.group(1))
month_start = int(match_month.group(2))
if month_start == 12:
year_end = year_start + 1
month_end = 1
else:
month_end = month_start + 1
year_end = year_start
else:
year_start = int(start_date)
year_end = year_start + 1
month_start = 1
month_end = 1

date_start = dt.date(year_start, month_start, 1)
date_end = dt.date(year_end, month_end, 1)

return date_start, date_end


class ContextTimer:
"""
Context manager to measure the "wall clock" time (in seconds) inside/for a block of code.
Expand Down
65 changes: 64 additions & 1 deletion tests/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
ensure_list,
first_not_none,
get_temporal_extent,
string_to_temporal_extent,
guess_format,
repr_truncate,
rfc3339,
Expand Down Expand Up @@ -392,7 +393,10 @@ def test_get_temporal_extent():
assert get_temporal_extent(start_date="2019-03-15", end_date="2019-10-11") == ("2019-03-15", "2019-10-11")
assert get_temporal_extent(start_date="2019-03-15") == ("2019-03-15", None)
assert get_temporal_extent(end_date="2019-10-11") == (None, "2019-10-11")

assert get_temporal_extent(start_date="2019") == ("2019-01-01", "2020-01-01")
assert get_temporal_extent(start_date="2019-01") == ("2019-01-01", "2019-02-01")
assert get_temporal_extent(start_date="2019-11") == ("2019-11-01", "2019-12-01")
assert get_temporal_extent(start_date="2019-12") == ("2019-12-01", "2020-01-01")

def test_context_timer_basic():
with mock.patch.object(ContextTimer, "_clock", new=_fake_clock([3, 5, 8, 13])):
Expand Down Expand Up @@ -1058,3 +1062,62 @@ def test_normalize_crs_succeeds_with_correct_projstring(self, epsg_input, expect
def test_normalize_crs_handles_incorrect_crs(self, epsg_input, use_pyproj):
with pytest.raises(ValueError):
normalize_crs(epsg_input, use_pyproj=use_pyproj)


@pytest.mark.parametrize(
["date_input", "expected_start", "expected_end"],
[
("2023", dt.date(2023, 1, 1), dt.date(2024, 1, 1)),
("1999", dt.date(1999, 1, 1), dt.date(2000, 1, 1)),
("2023-03", dt.date(2023, 3, 1), dt.date(2023, 4, 1)),
("2023/03", dt.date(2023, 3, 1), dt.date(2023, 4, 1)),
("2023-01", dt.date(2023, 1, 1), dt.date(2023, 2, 1)),
("2023/01", dt.date(2023, 1, 1), dt.date(2023, 2, 1)),
("2022-12", dt.date(2022, 12, 1), dt.date(2023, 1, 1)),
("2022/12", dt.date(2022, 12, 1), dt.date(2023, 1, 1)),
("2022-11", dt.date(2022, 11, 1), dt.date(2022, 12, 1)),
("2022/11", dt.date(2022, 11, 1), dt.date(2022, 12, 1)),
("2022-12-31", "2022-12-31", None),
("2022/12/31", "2022/12/31", None),
("2022-11-30", "2022-11-30", None),
("2022/11/30", "2022/11/30", None),
("2022-12-31T12:33:05Z", "2022-12-31T12:33:05Z", None),
(dt.date(2022, 11, 1), dt.date(2022, 11, 1), None),
(dt.datetime(2022, 11, 1, 15, 30, 00), dt.datetime(2022, 11, 1, 15, 30, 00), None),
],
)
def test_string_to_temporal_extent(date_input: str, expected_start: dt.date, expected_end: dt.date):
actual_start, actual_end = string_to_temporal_extent(date_input)
assert actual_start == expected_start
assert actual_end == expected_end


@pytest.mark.parametrize(
"date_input",
[
"foobar",
"20-22-12-31",
"2022/12/31/aa1/bb/cc",
"20-2--12",
"2021-2--12",
"2021-1-1-",
"2021-2-",
"-2021-2",
],
)
def test_string_to_temporal_extent_raises_valueerror(date_input: Union[str, dt.date, dt.datetime]):
with pytest.raises(ValueError):
string_to_temporal_extent(date_input)


@pytest.mark.parametrize(
"date_input",
[
2000,
{},
(),
],
)
def test_string_to_temporal_extent_raises_typeerror(date_input: any):
with pytest.raises(TypeError):
string_to_temporal_extent(date_input)

0 comments on commit cbd4053

Please sign in to comment.