Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into toniof-ke51-normali…
Browse files Browse the repository at this point in the history
…sation

# Conflicts:
#	CHANGES.md
  • Loading branch information
TonioF committed May 26, 2021
2 parents 781fec3 + cd2b164 commit 7d4341a
Show file tree
Hide file tree
Showing 4 changed files with 72 additions and 29 deletions.
3 changes: 3 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
## Changes in 0.8.2 (in development)

* Fixed the issue that xcube gen2 would not print tracebacks to stderr when raising
CubeGeneratorErrors (#448).
* Enhanced `xcube.core.normalize.normalize_dataset()` function to also normalize datasets with latitudes given as
`latitude_centers` and to invert decreasing latitude coordinate values.
* Introduced `xcube.core.normalize.cubify_dataset()` function to normalize a dataset
Expand Down
40 changes: 40 additions & 0 deletions test/core/test_timecoord.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,24 @@ def test_get_time_range_from_data(self):
self.assertEqual('2010-01-01T00:00:00', pd.Timestamp(time_range[0]).isoformat())
self.assertEqual('2010-01-06T00:00:00', pd.Timestamp(time_range[1]).isoformat())

def test_get_time_range_from_data_with_irregular_data(self):
cube = new_cube(drop_bounds=True,
time_freq='M')
time_range = get_time_range_from_data(cube)
self.assertIsNotNone(time_range)
self.assertEqual('2010-01-31T00:00:00', pd.Timestamp(time_range[0]).isoformat())
self.assertEqual('2010-06-30T00:00:00', pd.Timestamp(time_range[1]).isoformat())

def test_get_time_range_from_data_with_irregular_data_and_no_metadata(self):
cube = new_cube(drop_bounds=True,
time_freq='M')
cube.attrs.pop('time_coverage_start')
cube.attrs.pop('time_coverage_end')
time_range = get_time_range_from_data(cube)
self.assertIsNotNone(time_range)
self.assertEqual('2010-02-14T00:00:00', pd.Timestamp(time_range[0]).isoformat())
self.assertEqual('2010-06-14T00:00:00', pd.Timestamp(time_range[1]).isoformat())

def test_get_time_range_from_data_cftime(self):
cube = new_cube(drop_bounds=True,
use_cftime=True,
Expand All @@ -85,6 +103,28 @@ def test_get_time_range_from_data_cftime(self):
self.assertEqual('2010-01-01T00:00:00', pd.Timestamp(time_range[0]).isoformat())
self.assertEqual('2010-01-06T00:00:00', pd.Timestamp(time_range[1]).isoformat())

def test_get_time_range_from_data_with_irregular_cftime_data(self):
cube = new_cube(drop_bounds=True,
time_freq='M',
use_cftime=True,
time_dtype=None)
time_range = get_time_range_from_data(cube)
self.assertIsNotNone(time_range)
self.assertEqual('2010-01-31T00:00:00', pd.Timestamp(time_range[0]).isoformat())
self.assertEqual('2010-06-30T00:00:00', pd.Timestamp(time_range[1]).isoformat())

def test_get_time_range_from_data_with_irregular_cftime_data_and_no_metadata(self):
cube = new_cube(drop_bounds=True,
time_freq='M',
use_cftime=True,
time_dtype=None)
cube.attrs.pop('time_coverage_start')
cube.attrs.pop('time_coverage_end')
time_range = get_time_range_from_data(cube)
self.assertIsNotNone(time_range)
self.assertEqual('2010-02-14T00:00:00', pd.Timestamp(time_range[0]).isoformat())
self.assertEqual('2010-06-14T00:00:00', pd.Timestamp(time_range[1]).isoformat())

def test_get_time_range_from_data_time_named_t(self):
cube = new_cube(drop_bounds=True, time_name='t')
time_range = get_time_range_from_data(cube)
Expand Down
1 change: 1 addition & 0 deletions xcube/cli/gen2.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ def dump_cube_info(cube_info: CubeInfo):
print(**print_kwargs)
print('Remote traceback:', **print_kwargs)
print('=================', **print_kwargs)
print(e.remote_traceback, **print_kwargs)
raise click.ClickException(f'{e}') from e

except DataStoreError as e:
Expand Down
57 changes: 28 additions & 29 deletions xcube/core/timecoord.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import cftime
import datetime
from typing import Optional, Sequence, Tuple, Union

Expand Down Expand Up @@ -63,31 +64,33 @@ def add_time_coords(dataset: xr.Dataset, time_range: Tuple[float, float]) -> xr.
time_var.encoding['calendar'] = DATETIME_CALENDAR
if t1 != t2:
time_var.attrs['bounds'] = 'time_bnds'
dataset = dataset.assign_coords(time_bnds=(['time', 'bnds'],
from_time_in_days_since_1970([t1, t2]).reshape(1, 2)))
dataset = dataset.assign_coords(
time_bnds=(['time', 'bnds'], from_time_in_days_since_1970([t1, t2]).reshape(1, 2))
)
time_bnds_var = dataset.coords['time_bnds']
time_bnds_var.attrs['long_name'] = 'time'
time_bnds_var.attrs['standard_name'] = 'time'
# Avoiding xarray error:
# ValueError: failed to prevent overwriting existing key units in attrs on variable 'time'.
# This is probably an encoding field used by xarray to describe how a variable is serialized.
# To proceed, remove this key from the variable's attributes manually.
# ValueError: failed to prevent overwriting existing key units in attrs on variable
# 'time'. This is probably an encoding field used by xarray to describe how a variable
# is serialized.
# To proceed, remove this key from the variable's attributes manually.
# time_bnds_var.attrs['units'] = DATETIME_UNITS
# time_bnds_var.attrs['calendar'] = DATETIME_CALENDAR
time_bnds_var.encoding['units'] = DATETIME_UNITS
time_bnds_var.encoding['calendar'] = DATETIME_CALENDAR
return dataset


def get_time_range_from_data(dataset: xr.Dataset, maybe_consider_metadata: bool=True) \
def get_time_range_from_data(dataset: xr.Dataset, maybe_consider_metadata: bool = True) \
-> Tuple[Optional[float], Optional[float]]:
"""
Determines a time range from a dataset by inspecting its time_bounds or time data arrays.
In cases where no time bounds are given and no time periodicity can be determined,
metadata may be considered.
:param dataset: The dataset of which the time range shall be determined
"param maybe_consider_metadata": Whether metadata shall be considered.
:param maybe_consider_metadata: Whether metadata shall be considered.
Only used when the dataset has no time bounds array and no time periodicity.
The values will only be set when they do not contradict the values from the data arrays.
:return: A tuple with two float values: The first one represents the start time,
Expand All @@ -112,30 +115,24 @@ def get_time_range_from_data(dataset: xr.Dataset, maybe_consider_metadata: bool=
time_bnds_name = time.attrs.get("bounds", "time_bnds")
if time_bnds_name in dataset:
return _get_time_range_from_time_bounds(dataset, time_bnds_name)
if time.size == 1:
is_cf_time = isinstance(time[0].values.item(), cftime.datetime)
data_start = pd.to_datetime(time[0].values.item().isoformat()) \
if is_cf_time else time[0].values
data_end = pd.to_datetime(time[-1].values.item().isoformat()) \
if is_cf_time else time[-1].values
if time.size < 3:
return _maybe_return_time_range_from_metadata(dataset,
time.values[0],
time.values[0],
maybe_consider_metadata)
if time.size == 2:
return _maybe_return_time_range_from_metadata(dataset,
time.values[0],
time.values[1],
data_start,
data_end,
maybe_consider_metadata)
time_diff = time.diff(dim=time.dims[0]).values
time_res = time_diff[0]
time_regular = all([time_res - diff == np.timedelta64(0) for diff in time_diff[1:]])
if time_regular:
try:
return time.values[0] - time_res / 2, time.values[-1] + time_res / 2
except TypeError:
# Time is probably given as cftime.DatetimeJulian or cftime.DatetimeGregorian
# To convert it to datetime, we must derive its isoformat first.
return (pd.to_datetime(time.values[0].isoformat()) - time_res / 2).to_datetime64(), \
(pd.to_datetime(time.values[-1].isoformat()) + time_res / 2).to_datetime64()
return data_start - time_res / 2, data_end + time_res / 2
return _maybe_return_time_range_from_metadata(dataset,
time.values[0],
time.values[-1],
data_start,
data_end,
maybe_consider_metadata)


Expand Down Expand Up @@ -196,15 +193,16 @@ def remove_time_part_from_isoformat(datetime_str: str) -> str:


def to_time_in_days_since_1970(time_str: str, pattern=None) -> float:
datetime = pd.to_datetime(time_str, format=pattern, infer_datetime_format=False, utc=True)
timedelta = datetime - REF_DATETIME
date_time = pd.to_datetime(time_str, format=pattern, infer_datetime_format=False, utc=True)
timedelta = date_time - REF_DATETIME
return timedelta.days + timedelta.seconds / SECONDS_PER_DAY + \
timedelta.microseconds / MICROSECONDS_PER_DAY


def from_time_in_days_since_1970(time_value: Union[float, Sequence[float]]) -> np.ndarray:
if isinstance(time_value, int) or isinstance(time_value, float):
return pd.to_datetime(time_value, utc=True, unit='d', origin='unix').round(freq='ms').to_datetime64()
return pd.to_datetime(time_value, utc=True, unit='d', origin='unix').round(freq='ms') \
.to_datetime64()
else:
return np.array(list(map(from_time_in_days_since_1970, time_value)))

Expand All @@ -214,8 +212,9 @@ def timestamp_to_iso_string(time: Union[np.datetime64, datetime.datetime], freq=
Convert a UTC timestamp given as nanos, millis, seconds, etc. since 1970-01-01 00:00:00
to an ISO-format string.
:param time: UTC timestamp given as time delta since since 1970-01-01 00:00:00 in the units given by
the numpy datetime64 type, so it can be as nanos, millis, seconds since 1970-01-01 00:00:00.
:param time: UTC timestamp given as time delta since since 1970-01-01 00:00:00 in the units
given by the numpy datetime64 type, so it can be as nanos, millis,
seconds since 1970-01-01 00:00:00.
:param freq: time rounding resolution. See pandas.Timestamp.round().
:return: ISO-format string.
"""
Expand Down

0 comments on commit 7d4341a

Please sign in to comment.