Skip to content

Commit

Permalink
Merge branch 'main' into pathlib
Browse files Browse the repository at this point in the history
  • Loading branch information
ecomodeller authored Oct 11, 2023
2 parents d64b9cf + 47882c2 commit b051de5
Show file tree
Hide file tree
Showing 18 changed files with 473 additions and 137 deletions.
9 changes: 4 additions & 5 deletions .github/workflows/build_docs.yml
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
name: Build documentation (don't publish)

on:
push:
pull_request:
pull_request:
branches: [ main ]

jobs:
Expand All @@ -11,9 +10,9 @@ jobs:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
- name: Set up Python 3.10
uses: actions/setup-python@v2
uses: actions/setup-python@v4
with:
python-version: "3.10"

Expand All @@ -30,4 +29,4 @@ jobs:
uses: actions/upload-artifact@v2
with:
name: html
path: docs/_build/html
path: docs/_build/html
24 changes: 16 additions & 8 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,25 +1,33 @@

build: test
#python setup.py sdist bdist_wheel
LIB = mikeio

check: lint typecheck test

build: typecheck test
python -m build

lint:
ruff .

test:
pytest --disable-warnings

typecheck:
mypy $(LIB)/ --config-file pyproject.toml

coverage:
pytest --cov-report html --cov=$(LIB) tests/

doctest:
pytest mikeio/dfs/*.py mikeio/dfsu/*.py mikeio/eum/*.py mikeio/pfs/*.py mikeio/spatial/_grid_geometry.py --doctest-modules
rm -f *.dfs* # remove temporary files, created from doctests

perftest:
pytest tests/performance/ --durations=0

typecheck:
mypy mikeio/

coverage:
pytest --cov-report html --cov=mikeio tests/

docs: FORCE
cd docs; make html ;cd -

FORCE:


29 changes: 29 additions & 0 deletions docs/dfs2.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,35 @@ items:
0: Elevation <Total Water Depth> (meter)
```

## Subset in space

The most convenient way to subset in space is to use the `sel` method, which returns a new (smaller) dataset, which can be further processed or written to disk using the `to_dfs` method.

```python
>>> ds.geometry
<mikeio.Grid2D>
x: [12.2, 12.21, ..., 13.1] (nx=216, dx=0.004167)
y: [55.2, 55.21, ..., 56.3] (ny=264, dy=0.004167)
projection: LONG/LAT
>>> ds_aoi = ds.sel(x=slice(12.5, 13.0), y=slice(55.5, 56.0))
>>> ds_aoi.geometry
<mikeio.Grid2D>
x: [12.5, 12.5, ..., 12.99] (nx=120, dx=0.004167)
y: [55.5, 55.5, ..., 55.99] (ny=120, dy=0.004167)
projection: LONG/LAT
```

In order to specify an open-ended subset (i.e. where the end of the subset is the end of the domain), use `None` as the end of the slice.

```python
>>> ds.sel(x=slice(None,13.0))
<mikeio.Dataset>
dims: (time:1, y:264, x:191)
time: 2020-05-15 11:04:52 (time-invariant)
geometry: Grid2D (ny=264, nx=191)
items:
0: Elevation <Total Water Depth> (meter)
```

## Grid2D

Expand Down
69 changes: 69 additions & 0 deletions mikeio/_time.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
from __future__ import annotations
from datetime import datetime
from dataclasses import dataclass
from typing import List, Iterable, Optional

import pandas as pd


@dataclass
class DateTimeSelector:
"""Helper class for selecting time steps from a pandas DatetimeIndex"""

index: pd.DatetimeIndex

def isel(
self,
x: Optional[
int | Iterable[int] | str | datetime | pd.DatetimeIndex | slice
] = None,
) -> List[int]:
"""Select time steps from a pandas DatetimeIndex
Parameters
----------
x : int, Iterable[int], str, datetime, pd.DatetimeIndex, slice
Time steps to select, negative indices are supported
Returns
-------
List[int]
List of indices in the range (0, len(index)
Examples
--------
>>> idx = pd.date_range("2000-01-01", periods=4, freq="D")
>>> dts = DateTimeSelector(idx)
>>> dts.isel(None)
[0, 1, 2, 3]
>>> dts.isel(0)
[0]
>>> dts.isel(-1)
[3]
"""

indices = list(range(len(self.index)))

if x is None:
return indices

if isinstance(x, int):
return [indices[x]]

if isinstance(x, (datetime, str)):
loc = self.index.get_loc(x)
if isinstance(loc, int):
return [loc]
elif isinstance(loc, slice):
return list(range(loc.start, loc.stop))

if isinstance(x, slice):
if isinstance(x.start, int) or isinstance(x.stop, int):
return indices[x]
else:
s = self.index.slice_indexer(x.start, x.stop)
return list(range(s.start, s.stop))

if isinstance(x, Iterable):
return [self.isel(t)[0] for t in x]

return indices
47 changes: 11 additions & 36 deletions mikeio/dataset/_data_utils.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from __future__ import annotations
import re
from datetime import datetime
from typing import Iterable, Sequence, Sized, Tuple
from typing import Iterable, Sequence, Sized, Tuple, Union, List

import numpy as np
import pandas as pd

from .._time import DateTimeSelector


def _to_safe_name(name: str) -> str:
tmp = re.sub("[^0-9a-zA-Z]", "_", name)
Expand All @@ -18,45 +19,19 @@ def _n_selected_timesteps(x: Sized, k: slice | Sized) -> int:
return len(k)


def _get_time_idx_list(time: pd.DatetimeIndex, steps):
def _get_time_idx_list(time: pd.DatetimeIndex, steps) -> Union [List[int], slice]:
"""Find list of idx in DatetimeIndex"""

if isinstance(steps, str):
parts = steps.split(",")
if len(parts) == 1:
parts.append(parts[0]) # end=start

if parts[0] == "":
steps = slice(parts[1]) # stop only
elif parts[1] == "":
steps = slice(parts[0], None) # start only
else:
steps = slice(parts[0], parts[1])
# indexing with a slice needs to be handled differently, since slicing returns a view

if isinstance(steps, (list, tuple)) and isinstance(
steps[0], (str, datetime, np.datetime64, pd.Timestamp)
):
steps = pd.DatetimeIndex(steps)
if isinstance(steps, pd.DatetimeIndex):
return time.get_indexer(steps)
if isinstance(steps, (str, datetime, np.datetime64, pd.Timestamp)):
steps = slice(steps, steps)
if isinstance(steps, slice):
try:
s = time.slice_indexer(
steps.start,
steps.stop,
)
steps = list(range(s.start, s.stop))
except TypeError:
pass # TODO this seems fishy!
# steps = list(range(*steps.indices(len(time))))
elif isinstance(steps, int):
steps = [steps]
# TODO what is the return type of this function
return steps
if isinstance(steps.start, int) and isinstance(steps.stop, int):
return steps

dts = DateTimeSelector(time)
return dts.isel(steps)

# TODO this only used by DataArray, so consider to move it there
class DataUtilsMixin:
"""DataArray Utils"""

Expand Down Expand Up @@ -107,7 +82,7 @@ def _set_by_boolean_mask(data: np.ndarray, mask: np.ndarray, value) -> None:
def _parse_time(time) -> pd.DatetimeIndex:
"""Allow anything that we can create a DatetimeIndex from"""
if time is None:
time = [pd.Timestamp(2018, 1, 1)]
time = [pd.Timestamp(2018, 1, 1)] # TODO is this the correct epoch?
if isinstance(time, str) or (not isinstance(time, Iterable)):
time = [time]

Expand Down
27 changes: 26 additions & 1 deletion mikeio/dataset/_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from copy import deepcopy
from datetime import datetime
from functools import cached_property
from typing import Iterable, Optional, Sequence, Tuple
from typing import Iterable, Optional, Sequence, Tuple, Mapping


import numpy as np
Expand Down Expand Up @@ -840,10 +840,14 @@ def sel(
time: 1997-09-15 21:00:00 - 1997-09-16 03:00:00 (3 records)
geometry: Dfsu2D (3700 elements, 2090 nodes)
"""
if any([isinstance(v, slice) for v in kwargs.values()]):
return self._sel_with_slice(kwargs)

da = self

# select in space
if len(kwargs) > 0:

idx = self.geometry.find_index(**kwargs)
if isinstance(idx, tuple):
# TODO: support for dfs3
Expand All @@ -867,6 +871,27 @@ def sel(
da = da[time] # __getitem__ is 🚀

return da

def _sel_with_slice(self, kwargs: Mapping[str,slice]) -> "DataArray":
for k, v in kwargs.items():
if isinstance(v, slice):
idx_start = self.geometry.find_index(**{k:v.start})
idx_stop = self.geometry.find_index(**{k:v.stop})
pos = 0
if isinstance(idx_start, tuple):
if k == "x":
pos = 0
if k == "y":
pos = 1

start = idx_start[pos][0] if idx_start is not None else None
stop = idx_stop[pos][0] if idx_stop is not None else None

idx = slice(start, stop)

self = self.isel(idx, axis=k)

return self

def interp(
# TODO find out optimal syntax to allow interpolation to single point, new time, grid, mesh...
Expand Down
2 changes: 1 addition & 1 deletion mikeio/dataset/_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -561,7 +561,7 @@ def __set_or_insert_item(self, key, value, insert=False):
self._data_vars[key] = value
self._set_name_attr(key, value)

def insert(self, key: int, value: DataArray):
def insert(self, key, value: DataArray):
"""Insert DataArray in a specific position
Parameters
Expand Down
Loading

0 comments on commit b051de5

Please sign in to comment.