Skip to content

Commit

Permalink
Merge pull request #34 from reichlab/bsweger/update-cladetime-date-va…
Browse files Browse the repository at this point in the history
…lidations/33

Update CladeTime tree_as_of and sequence_as_of date handling
  • Loading branch information
bsweger authored Oct 10, 2024
2 parents 64e6ad0 + 424dda2 commit 92f9e46
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 18 deletions.
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ build-backend = "setuptools.build_meta"
[tool.pytest.ini_options]
tmp_path_retention_policy = "none"
filterwarnings = [
"ignore::cladetime.exceptions.CladeTimeFutureDateWarning",
"ignore::DeprecationWarning",
'ignore:polars found a filename',
]
Expand Down
72 changes: 57 additions & 15 deletions src/cladetime/cladetime.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
"""Class for clade time traveling."""

import warnings
from datetime import datetime, timezone

import polars as pl
import structlog

from cladetime.exceptions import CladeTimeInvalidDateError, CladeTimeInvalidURLError
from cladetime.exceptions import CladeTimeFutureDateWarning, CladeTimeInvalidDateError, CladeTimeInvalidURLError
from cladetime.util.config import Config
from cladetime.util.reference import _get_s3_object_url
from cladetime.util.sequence import _get_ncov_metadata, get_covid_genome_metadata
Expand Down Expand Up @@ -51,15 +52,17 @@ def __init__(self, sequence_as_of=None, tree_as_of=None):
Use the NextStrain sequences and sequence metadata that were available
as of this date. Can be a datetime object, a string in the format
"YYYY-MM-DD", or None (which defaults to the current date and time).
CladeTime treats all dates and times as UTC.
tree_as_of : datetime | str | None, default = now()
Use the NextStrain reference tree that was available as of this date.
Can be a datetime object, a string in the format
"YYYY-MM-DD", or None (which defaults to the current date and time).
"YYYY-MM-DD", or None (which defaults to the sequence_as_of date).
CladeTime treats all dates and times as UTC.
"""

self._config = self._get_config()
self.sequence_as_of = self._validate_as_of_date(sequence_as_of)
self.tree_as_of = self._validate_as_of_date(tree_as_of)
self.sequence_as_of = sequence_as_of
self.tree_as_of = tree_as_of
self._ncov_metadata = {}
self._sequence_metadata = pl.LazyFrame()

Expand All @@ -78,13 +81,52 @@ def __init__(self, sequence_as_of=None, tree_as_of=None):
else:
self.url_ncov_metadata = None

@property
def sequence_as_of(self) -> datetime:
return self._sequence_as_of

@sequence_as_of.setter
def sequence_as_of(self, date) -> None:
"""Set the sequence_as_of attribute."""
sequence_as_of = self._validate_as_of_date(date)
utc_now = datetime.now(timezone.utc)
if sequence_as_of > utc_now:
warnings.warn(
f"specified sequence_as_of is in the future, defaulting to current time: {utc_now}",
category=CladeTimeFutureDateWarning,
)
sequence_as_of = utc_now

self._sequence_as_of = sequence_as_of

@property
def tree_as_of(self) -> datetime:
return self._tree_as_of

@tree_as_of.setter
def tree_as_of(self, date) -> None:
"""Set the tree_as_of attribute."""
if date is None:
tree_as_of = self.sequence_as_of
else:
tree_as_of = self._validate_as_of_date(date)
utc_now = datetime.now(timezone.utc)
if tree_as_of > utc_now:
warnings.warn(
f"specified tree_as_of is in the future, defaulting to sequence_as_of: {self.sequence_as_of}",
category=CladeTimeFutureDateWarning,
)
tree_as_of = self.sequence_as_of

self._tree_as_of = tree_as_of

@property
def ncov_metadata(self):
return self._ncov_metadata

@ncov_metadata.getter
def ncov_metadata(self) -> dict:
"""Set the ncov_metadata attribute."""
"""Get the ncov_metadata attribute."""
if self.url_ncov_metadata:
metadata = _get_ncov_metadata(self.url_ncov_metadata)
return metadata
Expand All @@ -98,13 +140,12 @@ def sequence_metadata(self):

@sequence_metadata.getter
def sequence_metadata(self) -> pl.LazyFrame:
"""Set the sequence_metadata attribute."""
"""Get the sequence_metadata attribute."""
if self.url_sequence_metadata:
sequence_metadata = get_covid_genome_metadata(metadata_url=self.url_sequence_metadata)
return sequence_metadata
else:
raise CladeTimeInvalidURLError("CladeTime is missing url_sequence_metadata")
return sequence_metadata

def __repr__(self):
return f"CladeTime(sequence_as_of={self.sequence_as_of}, tree_as_of={self.tree_as_of})"
Expand All @@ -121,22 +162,23 @@ def _get_config(self) -> Config:
return config

def _validate_as_of_date(self, as_of: str) -> datetime:
"""Validate date the as_of dates used to instantiate CladeTime."""
"""Validate an as_of date used to instantiate CladeTime.
All dates used to instantiate CladeTime are assigned
a datetime tzinfo of UTC.
"""
if as_of is None:
as_of_date = datetime.now()
as_of_date = datetime.now(timezone.utc)
elif isinstance(as_of, datetime):
as_of_date = as_of
as_of_date = as_of.replace(tzinfo=timezone.utc)
elif isinstance(as_of, str):
try:
as_of_date = datetime.strptime(as_of, "%Y-%m-%d")
as_of_date = datetime.strptime(as_of, "%Y-%m-%d").replace(tzinfo=timezone.utc)
except ValueError as e:
raise CladeTimeInvalidDateError(f"Invalid date string: {as_of} (should be in YYYY-MM-DD format)") from e

as_of_date = as_of_date.replace(microsecond=0, tzinfo=timezone.utc)
as_of_date = as_of_date.replace(microsecond=0)
if as_of_date < self._config.nextstrain_min_seq_date:
raise CladeTimeInvalidDateError(f"Date must be after May 1, 2023: {as_of_date}")

if as_of_date > datetime.now().replace(tzinfo=timezone.utc):
raise CladeTimeInvalidDateError(f"Date cannot be in the future: {as_of_date}")

return as_of_date
4 changes: 4 additions & 0 deletions src/cladetime/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,7 @@ class CladeTimeInvalidDateError(Error):

class CladeTimeInvalidURLError(Error):
"""Raised when CladeTime encounters an invalid URL."""


class CladeTimeFutureDateWarning(Warning):
"""Raised when CladeTime as_of date is in the future."""
33 changes: 30 additions & 3 deletions tests/unit/test_cladetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import dateutil.tz
import pytest
from cladetime.cladetime import CladeTime
from cladetime.exceptions import CladeTimeInvalidDateError, CladeTimeInvalidURLError
from cladetime.exceptions import CladeTimeFutureDateWarning, CladeTimeInvalidDateError, CladeTimeInvalidURLError
from freezegun import freeze_time


Expand Down Expand Up @@ -36,14 +36,32 @@ def test_cladetime_no_args():
datetime(2024, 9, 30, 18, 24, 59, 655398),
None,
datetime(2024, 9, 30, 18, 24, 59, tzinfo=timezone.utc),
datetime(2025, 7, 13, 16, 21, 34, tzinfo=timezone.utc),
datetime(2024, 9, 30, 18, 24, 59, tzinfo=timezone.utc),
),
(
datetime(2024, 2, 22, 22, 22, 22, 222222, tzinfo=dateutil.tz.gettz("US/Eastern")),
datetime(2024, 2, 22, tzinfo=dateutil.tz.gettz("US/Eastern")),
datetime(2024, 2, 22, 22, 22, 22, tzinfo=timezone.utc),
datetime(2024, 2, 22, tzinfo=timezone.utc),
),
(
"2023-12-21",
None,
datetime(2023, 12, 21, tzinfo=timezone.utc),
datetime(2023, 12, 21, tzinfo=timezone.utc),
),
(
"2063-12-21",
None,
datetime(2025, 7, 13, 16, 21, 34, tzinfo=timezone.utc),
datetime(2025, 7, 13, 16, 21, 34, tzinfo=timezone.utc),
),
(
"2063-12-21",
"2074-07-13",
datetime(2025, 7, 13, 16, 21, 34, tzinfo=timezone.utc),
datetime(2025, 7, 13, 16, 21, 34, tzinfo=timezone.utc),
),
],
)
def test_cladetime_as_of_dates(sequence_as_of, tree_as_of, expected_sequence_as_of, expected_tree_as_of):
Expand All @@ -54,12 +72,21 @@ def test_cladetime_as_of_dates(sequence_as_of, tree_as_of, expected_sequence_as_
assert ct.tree_as_of == expected_tree_as_of


@pytest.mark.parametrize("bad_date", ["2020-07-13", "2022-12-32", "2063-04-05"])
@pytest.mark.parametrize("bad_date", ["2020-07-13", "2022-12-32"])
def test_cladetime_invalid_date(bad_date):
with pytest.raises(CladeTimeInvalidDateError):
CladeTime(sequence_as_of=bad_date, tree_as_of=bad_date)


def test_cladetime_future_date():
with pytest.warns(CladeTimeFutureDateWarning):
CladeTime(sequence_as_of="2063-07-13")
with pytest.warns(CladeTimeFutureDateWarning):
CladeTime(tree_as_of="2063-07-13")
with pytest.warns(CladeTimeFutureDateWarning):
CladeTime(sequence_as_of="2023-12-31", tree_as_of="2063-07-13")


@pytest.mark.parametrize(
"sequence_as_of, expected_content",
[
Expand Down

0 comments on commit 92f9e46

Please sign in to comment.