Skip to content

Commit

Permalink
Parse format string directly
Browse files Browse the repository at this point in the history
  • Loading branch information
adamjstewart committed Sep 13, 2021
1 parent 45369f8 commit cb8381c
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 16 deletions.
22 changes: 6 additions & 16 deletions torchgeo/datasets/geo.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import math
import os
import re
import sys
from datetime import datetime
from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, cast

Expand All @@ -27,7 +26,7 @@
from torch import Tensor
from torch.utils.data import Dataset

from .utils import BoundingBox
from .utils import BoundingBox, disambiguate_timestamp

# https://github.com/pytorch/pytorch/issues/60979
# https://github.com/pytorch/pytorch/pull/61045
Expand Down Expand Up @@ -226,20 +225,11 @@ def __init__(
# Skip files that rasterio is unable to read
continue
else:
mint: float = 0
maxt: float = sys.maxsize
mint: float = datetime.min.timestamp()
maxt: float = datetime.max.timestamp()
if "date" in match.groupdict():
date = match.group("date")
time = datetime.strptime(date, self.date_format)
mint = time.timestamp()

# If filename only contains the year (e.g. CDL),
# assume that this data point spans the entire year
if self.date_format in ["%Y", "%y"]:
time = datetime(time.year, 12, 31, 23, 59, 59)
maxt = time.timestamp()
else:
maxt = mint
mint, maxt = disambiguate_timestamp(date, self.date_format)

coords = (minx, maxx, miny, maxy, mint, maxt)
self.index.insert(i, coords, filepath)
Expand Down Expand Up @@ -459,8 +449,8 @@ def __init__(
# Skip files that fiona is unable to read
continue
else:
mint = 0
maxt = sys.maxsize
mint = datetime.min.timestamp()
maxt = datetime.max.timestamp()
coords = (minx, maxx, miny, maxy, mint, maxt)
self.index.insert(i, coords, filepath)
i += 1
Expand Down
54 changes: 54 additions & 0 deletions torchgeo/datasets/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import os
import tarfile
import zipfile
from datetime import datetime
from typing import Any, Dict, Iterator, List, Optional, Tuple, Union

import torch
Expand All @@ -22,6 +23,7 @@
"download_and_extract_archive",
"extract_archive",
"BoundingBox",
"disambiguate_timestamp",
"working_dir",
"collate_dict",
)
Expand Down Expand Up @@ -255,6 +257,58 @@ def intersects(self, other: "BoundingBox") -> bool:
)


def disambiguate_timestamp(date_str: str, format: str) -> Tuple[float, float]:
"""Disambiguate partial timestamps.
Args:
date_str: string representing date and time of a data point
format: format codes accepted by :meth:`datetime.datetime.strptime`
Returns:
(mint, maxt) tuple for indexing
"""
mint = datetime.strptime(date_str, format)

# TODO: I don't know how to handle weeks, as the min and max values
# for weeks can't go into `datetime` directly

# TODO: This doesn't correctly handle literal `%%` characters in format

# TODO: This is really tedious, is there a better way to do this?

if not any([f"{c}%" in format for c in "yYcxG"]):
# No temporal info
mint = datetime.min
maxt = datetime.max
elif not any([f"{c}%" in format for c in "bBmjUWcxV"]):
# Year resolution
maxt = datetime(mint.year, 12, 31, 23, 59, 59, 999999)
elif not any([f"{c}%" in format for c in "djcx"]):
# Month resolution
maxt = datetime(mint.year, mint.month, 31, 23, 59, 59, 999999)
elif not any([f"{c}%" in format for c in "HIcX"]):
# Day resolution
maxt = datetime(mint.year, mint.month, mint.day, 23, 59, 59, 999999)
elif not any([f"{c}%" in format for c in "McX"]):
# Hour resolution
maxt = datetime(mint.year, mint.month, mint.day, mint.hour, 59, 59, 999999)
elif not any([f"{c}%" in format for c in "ScX"]):
# Minute resolution
maxt = datetime(
mint.year, mint.month, mint.day, mint.hour, mint.minute, 59, 999999
)
elif not any([f"{c}%" in format for c in "f"]):
# Second resolution
maxt = datetime(
mint.year, mint.month, mint.day, mint.hour, mint.minute, mint.second, 999999
)
else:
# Microsecond resolution
maxt = mint

return mint.timestamp(), maxt.timestamp()


@contextlib.contextmanager
def working_dir(dirname: str, create: bool = False) -> Iterator[None]:
"""Context manager for changing directories.
Expand Down

0 comments on commit cb8381c

Please sign in to comment.