diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json new file mode 100644 index 00000000..bf2eaf07 --- /dev/null +++ b/.devcontainer/devcontainer.json @@ -0,0 +1,15 @@ +{ "name": "Hydropandas", + // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile + "image": "mcr.microsoft.com/devcontainers/python:0-3.11", + "updateContentCommand": "chmod u+x .devcontainer/setup.sh && .devcontainer/setup.sh && chmod 666 .devcontainer/setup.sh", + "postAttachCommand": "code examples/01_groundwater_observations.ipynb", //alternative for customizations openFile -> https://github.com/orgs/community/discussions/58399#discussioncomment-6222762 + "customizations": { + "vscode": { + "extensions": [ + "github.codespaces", + "ms-python.python", + "ms-toolsai.jupyter" + ] + } + } + } \ No newline at end of file diff --git a/.devcontainer/setup.sh b/.devcontainer/setup.sh new file mode 100644 index 00000000..55b00bca --- /dev/null +++ b/.devcontainer/setup.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +# Create model environments +pip install -e . \ No newline at end of file diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 27f8fad9..4fbd5034 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -11,34 +11,25 @@ on: jobs: test: - runs-on: ${{ matrix.os }} + runs-on: ubuntu-latest strategy: fail-fast: true matrix: include: - name: Test suite with py312-ubuntu python: "3.12" - os: ubuntu-latest toxenv: py312 - name: Test suite with py311-ubuntu python: "3.11" - os: ubuntu-latest toxenv: py311 - name: Test suite with py310-ubuntu python: "3.10" - os: ubuntu-latest toxenv: py310 - - name: Formatting with black + isort - python: "3.9" - os: ubuntu-latest - toxenv: format - - name: Linting with flake8 + ruff - python: "3.9" - os: ubuntu-latest - toxenv: lint + - name: Formatting and linting with ruff + python: "3.11" + toxenv: ruff - name: Test suite for notebooks and coverage python: "3.9" - os: ubuntu-latest toxenv: notebooks name: ${{ matrix.name }} @@ -68,11 +59,4 @@ jobs: run: tox -e ${{ matrix.toxenv }} --notest - name: Test - run: tox -e ${{ matrix.toxenv }} --skip-pkg-install - - - name: Run codacy-coverage-reporter - if: ${{ matrix.toxenv == 'notebooks' && github.repository == 'ArtesiaWater/hydropandas' && success() }} - uses: codacy/codacy-coverage-reporter-action@master - with: - project-token: ${{ secrets.CODACY_PROJECT_TOKEN }} - coverage-reports: coverage.xml + run: tox -e ${{ matrix.toxenv }} --skip-pkg-install \ No newline at end of file diff --git a/hydropandas/__init__.py b/hydropandas/__init__.py index ac33cdf7..05b4f3f3 100644 --- a/hydropandas/__init__.py +++ b/hydropandas/__init__.py @@ -33,6 +33,7 @@ WaterlvlObs, WaterQualityObs, ) +from .rcparams import rcParams from .version import __version__, show_versions logging.getLogger("hydropandas").addHandler(logging.NullHandler()) diff --git a/hydropandas/extensions/plots.py b/hydropandas/extensions/plots.py index d25bdc0e..b33cb830 100644 --- a/hydropandas/extensions/plots.py +++ b/hydropandas/extensions/plots.py @@ -735,9 +735,9 @@ def series_per_group( if savefig: if isinstance(by, list): by_name = "-".join(by) + groupname = "-".join(groupname) else: by_name = by - groupname = "-".join(groupname) if naming_method is None: filename = f"series_by_{by_name}_group_{groupname}.png" elif naming_method == "infer_name_monitoring_well": diff --git a/hydropandas/io/bro.py b/hydropandas/io/bro.py index 92322def..4f6499dd 100644 --- a/hydropandas/io/bro.py +++ b/hydropandas/io/bro.py @@ -5,6 +5,7 @@ import json import logging import xml.etree.ElementTree +from functools import lru_cache import numpy as np import pandas as pd @@ -12,6 +13,7 @@ from pyproj import Proj, Transformer from tqdm import tqdm +from ..rcparams import rcParams from ..util import EPSG_28992 logger = logging.getLogger(__name__) @@ -422,8 +424,9 @@ def get_full_metadata_from_gmw(bro_id, tube_nr): return meta -def get_tube_nrs_from_gmw(bro_id): - """returns all tube numbers from a groundwater monitoring well (gmw) +@lru_cache() +def _get_gmw_from_bro_id(bro_id, retries=0): + """get a gmw object from a bro_id Parameters ---------- @@ -432,31 +435,71 @@ def get_tube_nrs_from_gmw(bro_id): Returns ------- - list of int - tube numbers + Element + xml reference to gmw object + + Raises + ------ + ValueError + if bro_id is invalid. """ + if not bro_id.startswith("GMW"): + raise ValueError("can only get metadata if bro id starts with GMW") + ns = { "dsgmw": "http://www.broservices.nl/xsd/dsgmw/1.1", "gmwcommon": "http://www.broservices.nl/xsd/gmwcommon/1.1", "gml": "http://www.opengis.net/gml/3.2", } - if not bro_id.startswith("GMW"): - raise ValueError("can only get metadata if bro id starts with GMW") - url = f"https://publiek.broservices.nl/gm/gmw/v1/objects/{bro_id}" req = requests.get(url) # read results tree = xml.etree.ElementTree.fromstring(req.text) - # get gmw gmws = tree.findall(".//dsgmw:GMW_PO", ns) if len(gmws) != 1: + max_retries = rcParams["bro"]["max_retries"] + val_ind = req.text.find("valid") + valid = req.text[(val_ind + 9) : (val_ind + 14)] + if valid == "false" and retries < max_retries: + logger.debug( + f"got invalid response for {bro_id}, trying again {retries+1}/{max_retries}" + ) + return _get_gmw_from_bro_id(bro_id, retries=retries + 1) + elif valid == "false": + raise Exception( + f"got invalid response for {bro_id} after trying {retries} times" + ) raise (Exception("Only one gmw supported")) gmw = gmws[0] + return gmw + + +def get_tube_nrs_from_gmw(bro_id): + """returns all tube numbers from a groundwater monitoring well (gmw) + + Parameters + ---------- + bro_id : str + bro id of groundwater monitoring well e.g. 'GMW000000036287'. + + Returns + ------- + list of int + tube numbers + """ + ns = { + "dsgmw": "http://www.broservices.nl/xsd/dsgmw/1.1", + "gmwcommon": "http://www.broservices.nl/xsd/gmwcommon/1.1", + "gml": "http://www.opengis.net/gml/3.2", + } + + gmw = _get_gmw_from_bro_id(bro_id) + # get tube nrs tube_numbers = [ int(tube.text) @@ -490,28 +533,16 @@ def get_metadata_from_gmw(bro_id, tube_nr): dictionary with metadata. """ + if not isinstance(tube_nr, int): + raise TypeError(f"expected integer got {type(tube_nr)}") + ns = { "dsgmw": "http://www.broservices.nl/xsd/dsgmw/1.1", "gmwcommon": "http://www.broservices.nl/xsd/gmwcommon/1.1", "gml": "http://www.opengis.net/gml/3.2", } - if not bro_id.startswith("GMW"): - raise ValueError("can only get metadata if bro id starts with GMW") - - if not isinstance(tube_nr, int): - raise TypeError(f"expected integer got {type(tube_nr)}") - - url = f"https://publiek.broservices.nl/gm/gmw/v1/objects/{bro_id}" - req = requests.get(url) - - # read results - tree = xml.etree.ElementTree.fromstring(req.text) - - gmws = tree.findall(".//dsgmw:GMW_PO", ns) - if len(gmws) != 1: - raise (Exception("Only one gmw supported")) - gmw = gmws[0] + gmw = _get_gmw_from_bro_id(bro_id) meta = {"monitoring_well": bro_id, "tube_nr": tube_nr, "source": "BRO"} diff --git a/hydropandas/io/knmi.py b/hydropandas/io/knmi.py index e8568ffe..e78cf995 100644 --- a/hydropandas/io/knmi.py +++ b/hydropandas/io/knmi.py @@ -1023,7 +1023,7 @@ def get_knmi_daily_meteo_url(stn: int) -> Tuple[pd.DataFrame, Dict[str, Any]]: def read_knmi_file( - path: Union[str, Path, StringIO] + path: Union[str, Path, StringIO], ) -> Tuple[pd.DataFrame, Dict[str, Any]]: """read knmi daily meteo data from a file diff --git a/hydropandas/io/pastas.py b/hydropandas/io/pastas.py index 0ecba10a..f3af54cf 100644 --- a/hydropandas/io/pastas.py +++ b/hydropandas/io/pastas.py @@ -3,6 +3,7 @@ @author: Artesia """ + import logging import numbers diff --git a/hydropandas/obs_collection.py b/hydropandas/obs_collection.py index 3e837dea..b7a5e1a8 100644 --- a/hydropandas/obs_collection.py +++ b/hydropandas/obs_collection.py @@ -812,6 +812,19 @@ def read_pastastore( ) +def _obscollection_constructor_with_fallback(*args, **kwargs): + """ + A flexible constructor for ObsCollection._constructor, which falls back + to returning a DataFrame (if a certain operation does not preserve the + obs column). Copied from geopandas. + """ + oc = ObsCollection(*args, **kwargs) + if "obs" not in oc.columns: + oc = pd.DataFrame(oc) + + return oc + + class ObsCollection(pd.DataFrame): """Class for a collection of point observations. @@ -871,7 +884,7 @@ def __init__(self, *args, **kwargs): @property def _constructor(self): - return ObsCollection + return _obscollection_constructor_with_fallback def _infer_otype(self): """Infer observation type from the obs column. diff --git a/hydropandas/observation.py b/hydropandas/observation.py index 9731dc38..71fcb0be 100644 --- a/hydropandas/observation.py +++ b/hydropandas/observation.py @@ -17,11 +17,11 @@ import logging import os import warnings +from _io import StringIO from typing import List, Optional import numpy as np import pandas as pd -from _io import StringIO from pandas._config import get_option from pandas.api.types import is_numeric_dtype from pandas.io.formats import console diff --git a/hydropandas/rcparams.py b/hydropandas/rcparams.py new file mode 100644 index 00000000..8650953d --- /dev/null +++ b/hydropandas/rcparams.py @@ -0,0 +1 @@ +rcParams = {"bro": {"max_retries": 5}} diff --git a/hydropandas/version.py b/hydropandas/version.py index b449fac5..4cc307b9 100644 --- a/hydropandas/version.py +++ b/hydropandas/version.py @@ -1,7 +1,7 @@ from importlib import metadata from sys import version as os_version -__version__ = "0.12.1" +__version__ = "0.12.2" def show_versions(): diff --git a/pyproject.toml b/pyproject.toml index 53aed96e..7892ed99 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -65,11 +65,10 @@ rtd = [ "nbsphinx", "nbsphinx_link", ] -linting = ["flake8", "ruff"] -formatting = ["black[jupyter]", "isort"] +ruffing = ["ruff"] pytesting = ["hydropandas[full,rtd]", "pytest>=7", "pytest-cov", "pytest-sugar"] coveraging = ["coverage"] -dev = ["hydropandas[linting,formatting,pytesting]", "tox"] +dev = ["hydropandas[ruffing,pytesting]", "tox"] [tool.setuptools] include-package-data = true @@ -99,7 +98,7 @@ markers = ["slow: mark test as slow."] legacy_tox_ini = """ [tox] requires = tox>=4 - env_list = format, lint, notebooks, py{39, 310, 311, 312} + env_list = ruff, notebooks, py{39, 310, 311, 312} [testenv] description = run unit tests @@ -114,19 +113,18 @@ legacy_tox_ini = """ coverage run -m pytest tests coverage xml - [testenv:format] - description = run formatters - basepython = python3.9 - extras = formatting + [testenv:ruff] + description = run ruff checks + basepython = python3.11 + extras = ruffing commands = - black . --check --diff - isort . --check-only --diff + ruff check --extend-select I --preview + ruff format --check - [testenv:lint] - description = run linters - basepython = python3.9 - extras = linting + [testenv:ruff_fix] + description = run ruff locally and fix issues + extras = ruffing commands = - flake8 . --max-line-length=88 --ignore=E741,W503 - ruff check . + ruff check --extend-select I --fix + ruff format """ diff --git a/readme.md b/readme.md index ac518779..41363b6e 100644 --- a/readme.md +++ b/readme.md @@ -2,7 +2,8 @@ [![PyPi](https://img.shields.io/pypi/v/hydropandas.svg)](https://pypi.python.org/pypi/hydropandas) [![PyPi Supported Python Versions](https://img.shields.io/pypi/pyversions/hydropandas)](https://pypi.python.org/pypi/hydropandas) -[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/ArtesiaWater/hydropandas/master) +[](https://codespaces.new/ArtesiaWater/hydropandas?quickstart=1) + [![hydropandas](https://github.com/ArtesiaWater/hydropandas/workflows/hydropandas/badge.svg)](https://github.com/ArtesiaWater/hydropandas/actions?query=workflow%3Ahydropandas) [![Codacy Badge](https://app.codacy.com/project/badge/Grade/c1b99f474bdc49b0a47e00e4e9f66c2f)](https://www.codacy.com/gh/ArtesiaWater/hydropandas/dashboard?utm_source=github.com&utm_medium=referral&utm_content=ArtesiaWater/hydropandas&utm_campaign=Badge_Grade) diff --git a/tests/test_001_to_from.py b/tests/test_001_to_from.py index b8511f70..bbcddde4 100644 --- a/tests/test_001_to_from.py +++ b/tests/test_001_to_from.py @@ -18,7 +18,7 @@ def test_bro_gld(): def test_bro_gmn(): # single observation - bro_id = "GMN000000000163" + bro_id = "GMN000000001084" # 34 objects as per 2024-7-11 hpd.read_bro(bro_id=bro_id, only_metadata=True) @@ -31,7 +31,7 @@ def test_bro_extent(): def test_bro_extent_too_big(): - extent = (102395, 213550, 334331, 473920) # to many observations in extent + extent = (102395, 213550, 334331, 473920) # too many observations in extent with pytest.raises(HTTPError): hpd.read_bro(extent=extent, only_metadata=True) diff --git a/tests/test_002_obs_objects.py b/tests/test_002_obs_objects.py index 244c2729..0bc037a1 100644 --- a/tests/test_002_obs_objects.py +++ b/tests/test_002_obs_objects.py @@ -100,6 +100,17 @@ def test_copy_obs(): assert "answer" in o3.meta.keys(), "copy method failed" +def test_returns(): + # check if a DataFrame is returned when an ObsCollection is sliced without the + # 'obs' column + oc = _obscollection_from_list() + + assert isinstance(oc.loc[:, ["x", "y"]], pd.DataFrame) + assert not isinstance(oc.loc[:, ["x", "y"]], hpd.ObsCollection) + + assert isinstance(oc.loc[:, ["x", "y", "obs"]], hpd.ObsCollection) + + def test_convert_waterlvl_groundwater_obs(): # create WaterlvlObs df = pd.DataFrame( diff --git a/tests/test_005_dino.py b/tests/test_005_dino.py index 1ded9a79..3e03ecb3 100644 --- a/tests/test_005_dino.py +++ b/tests/test_005_dino.py @@ -18,12 +18,12 @@ def test_dino_csv_new_style(): def test_dino_csv_duplicate_index(): # contains 1 duplicate index 2019-11-19 fname = "./tests/data/2019-Dino-test/Grondwaterstanden_Put/B22D0155001_1.csv" - measurements, meta = dino.read_dino_groundwater_csv(fname) + measurements, _ = dino.read_dino_groundwater_csv(fname) # check if measurements contains duplicate indices assert measurements.index.duplicated().any() - measurements, meta = dino.read_dino_groundwater_csv( + measurements, _ = dino.read_dino_groundwater_csv( fname, remove_duplicates=True, keep_dup="last" ) diff --git a/tests/test_006_knmi.py b/tests/test_006_knmi.py index 79da5f56..70ef4628 100644 --- a/tests/test_006_knmi.py +++ b/tests/test_006_knmi.py @@ -147,8 +147,8 @@ def test_calculate_evaporation(): def test_download_knmi_xy(): - df1, meta1 = knmi.get_knmi_obs(meteo_var="RH", stn=344) - df2, meta2 = knmi.get_knmi_obs(meteo_var="RH", xy=(90600, 442800)) + df1, _ = knmi.get_knmi_obs(meteo_var="RH", stn=344) + df2, _ = knmi.get_knmi_obs(meteo_var="RH", xy=(90600, 442800)) assert df1.equals(df2), "Dataframes should be identical" diff --git a/tests/test_013_lizard.py b/tests/test_013_lizard.py index d7d65e2e..a24f6046 100644 --- a/tests/test_013_lizard.py +++ b/tests/test_013_lizard.py @@ -21,17 +21,14 @@ def test_codes(): def test_many_tubed_well(): - oc = hpd.read_lizard(codes="EEWP004", tube_nr="all") assert not oc.empty def test_complex_well(): - oc = hpd.read_lizard(codes="BUWP014", tube_nr="all") assert not oc.empty def test_combine(): - hpd.GroundwaterObs.from_lizard("39F-0736", tube_nr=1, type_timeseries="combine")