diff --git a/LICENSE b/LICENSE index 21f1959a..ff3c8765 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2020 O.E. Ebbens, D.A. Brakenhoff, R. Calje +Copyright (c) 2020 O.N. Ebbens, D.A. Brakenhoff, R. Calje Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/hydropandas/__init__.py b/hydropandas/__init__.py index 3097634a..4c02fbe3 100644 --- a/hydropandas/__init__.py +++ b/hydropandas/__init__.py @@ -15,6 +15,7 @@ read_fews, read_imod, read_knmi, + read_lizard, read_menyanthes, read_modflow, read_pastastore, diff --git a/hydropandas/extensions/gwobs.py b/hydropandas/extensions/gwobs.py index d88fb69e..3391ee3e 100644 --- a/hydropandas/extensions/gwobs.py +++ b/hydropandas/extensions/gwobs.py @@ -298,19 +298,21 @@ def get_zvec(x, y, gwf=None, ds=None): cid = nlmod.dims.xy_to_icell2d((x, y), ds) sel = ds.sel(icell2d=cid) - zvec = np.concatenate(([sel["top"].data], sel["botm"].data)) + zvec = np.concatenate(([sel["top"].values], sel["botm"].values)) mask = np.isnan(zvec) idx = np.where(~mask, np.arange(mask.size), 0) np.maximum.accumulate(idx, axis=0, out=idx) zvec[mask] = zvec[idx[mask]] else: sel = ds.sel(x=x, y=y, method="nearest") - first_notna = np.nonzero(np.isfinite(np.atleast_1d(sel["top"].data)))[0][0] - if sel["top"].data.shape == tuple(): - top = np.atleast_1d(sel["top"].data) + first_notna = np.nonzero(np.isfinite(np.atleast_1d(sel["top"].values)))[0][ + 0 + ] + if sel["top"].values.shape == tuple(): + top = np.atleast_1d(sel["top"].values) else: - top = np.atleast_1d(sel["top"].data[[first_notna]]) - zvec = np.concatenate([top, sel["botm"].data]) + top = np.atleast_1d(sel["top"].values[[first_notna]]) + zvec = np.concatenate([top, sel["botm"].values]) mask = np.isnan(zvec) idx = np.where(~mask, np.arange(mask.size), 0) np.maximum.accumulate(idx, axis=0, out=idx) diff --git a/hydropandas/extensions/plots.py b/hydropandas/extensions/plots.py index 491a552b..f53d099b 100644 --- a/hydropandas/extensions/plots.py +++ b/hydropandas/extensions/plots.py @@ -4,7 +4,9 @@ import matplotlib.pyplot as plt import numpy as np from matplotlib.gridspec import GridSpec +from tqdm.auto import tqdm +from ..observation import GroundwaterObs from . import accessor logger = logging.getLogger(__name__) @@ -644,6 +646,59 @@ def section_plot( return fig, axes + def series_per_group(self, plot_column, by=None, savefig=True, outputdir="."): + """Plot time series per group. + + The default groupby is based on identical x, y coordinates, so plots unique + time series per location. + + Parameters + ---------- + plot_column : str + name of column containing time series data + by : (list of) str or (list of) array-like + groupby parameters, default is None which sets groupby to + columns ["x", "y"]. + savefig : bool, optional + save figures, by default True + outputdir : str, optional + path to output directory, by default the current directory (".") + """ + if by is None: + by = ["x", "y"] + gr = self._obj.groupby(by=by) + for _, group in tqdm(gr, desc="Plotting series per group", total=len(gr)): + f, ax = plt.subplots(1, 1, figsize=(10, 3)) + for name, row in group.iterrows(): + if isinstance(row.obs, GroundwaterObs): + lbl = ( + f"{name} (NAP{row['screen_top']:+.1f}" + f"-{row['screen_bottom']:+.1f}m)" + ) + else: + lbl = f"{name}" + ax.plot( + row.obs.index, + row.obs[plot_column], + label=lbl, + ) + ax.legend( + loc=(0, 1), + frameon=False, + ncol=min(group.index.size, 3), + fontsize="x-small", + ) + ax.set_ylabel(row["unit"]) + ax.grid(True) + f.tight_layout() + if savefig: + if isinstance(row.obs, GroundwaterObs): + name = name.split("-")[0] + f.savefig( + os.path.join(outputdir, f"{name}.png"), bbox_inches="tight", dpi=150 + ) + plt.close(f) + @accessor.register_obs_accessor("plots") class ObsPlots: diff --git a/hydropandas/io/fews.py b/hydropandas/io/fews.py index 736e786b..240d7970 100644 --- a/hydropandas/io/fews.py +++ b/hydropandas/io/fews.py @@ -395,11 +395,13 @@ class of the observations, e.g. GroundwaterObs or WaterlvlObs [d + " " + t for d, t in zip(date, time)], errors="coerce" ) ts = pd.DataFrame(events, index=index) - ts.loc[:, "value"] = ts.loc[:, "value"].astype(float) - if remove_nan and (not ts.empty): - ts.dropna(subset=["value"], inplace=True) - header["unit"] = "m NAP" + if not ts.empty: + ts["value"] = ts["value"].astype(float) + + if remove_nan: + ts.dropna(subset=["value"], inplace=True) + header["unit"] = "m NAP" o, header = _obs_from_meta(ts, header, translate_dic, ObsClass) if locationIds is not None: diff --git a/hydropandas/io/lizard.py b/hydropandas/io/lizard.py new file mode 100644 index 00000000..d93e6358 --- /dev/null +++ b/hydropandas/io/lizard.py @@ -0,0 +1,672 @@ +import logging +import math +import pathlib +from concurrent.futures import ThreadPoolExecutor + +import geopandas +import pandas as pd +import requests +from pyproj import Transformer +from shapely.geometry import Polygon +from tqdm import tqdm + +logger = logging.getLogger(__name__) + +# TODO: +# - check transformation from EPSG:28992 to WGS84 (elsewhere in hydropandas we use +# another definition for EPSG:28992 that is provided in util.py) + +# NOTE: currently only the vitens API is officially supported. If/when new endpoints +# are added we should check whether we want to add the URL as argument or add supported +# sources to this dictionary: +LIZARD_APIS = {"vitens": "https://vitens.lizard.net/api/v4/"} + + +def check_status_obs(metadata, timeseries): + """Checks if a monitoring tube is still active. + + If there are no measurements in the last 180 days, the monitoring + tube is considered inactive. + + Parameters + ---------- + metadata : dict + metadata of the monitoring tube + timeseries : pandas DataFrame + timeseries of the monitoring well + + Returns + ------- + metadata DataFrame including the status of the monitoring well + """ + if timeseries.empty: + metadata["status"] = "no timeseries available" + return metadata + + last_measurement_date = timeseries.last_valid_index() + today = pd.to_datetime("today").normalize() + + if today - last_measurement_date < pd.Timedelta(days=180): + metadata["status"] = "active" + + else: + metadata["status"] = "inactive" + + return metadata + + +def extent_to_wgs84_polygon(extent): + """Translates an extent (xmin, xmax, ymin, ymax) to a polygon with coordinate system + WGS84. + + Parameters + ---------- + extent : list or tuple + extent in epsg 28992 within which the observations are collected. + + Returns + ------- + polygon of the extent with coordinate system WGS84 + """ + transformer = Transformer.from_crs("EPSG:28992", "WGS84") + + lon_min, lat_min = transformer.transform(extent[0], extent[2]) + lon_max, lat_max = transformer.transform(extent[1], extent[3]) + + poly_T = Polygon( + [(lat_min, lon_min), (lat_max, lon_min), (lat_max, lon_max), (lat_min, lon_max)] + ) + + return poly_T + + +def translate_flag(timeseries): + """Translates Vitens Lizard flags from integer to text. + + Parameters + ---------- + timeseries : pandas.DataFrame + timeseries of a monitoring well with flags + + Returns + ------- + timeseries : pandas.DataFrame + timeseries with translated quality flags + """ + translate_dic = { + 0: "betrouwbaar", + 1: "betrouwbaar", + 3: "onbeslist", + 4: "onbeslist", + 6: "onbetrouwbaar", + 7: "onbetrouwbaar", + 99: "onongevalideerd", + -99: "verwijderd", + } + timeseries["flag"] = timeseries["flag"].replace(translate_dic) + + return timeseries + + +def get_metadata_mw_from_code(code, source="vitens"): + """Extracts the Groundwater Station parameters from a monitoring well based on the + code of the monitoring well. + + Parameters + ---------- + code : str + code of the monitoring well + source : str + source indicating URL endpoint, currently only "vitens" is officially supported. + + Raises + ------ + ValueError + if code of the monitoring well is not known + + Returns + ------- + groundwaterstation_metadata : dict + dictionary with all available metadata of the monitoring well and its filters + """ + lizard_GWS_endpoint = f"{LIZARD_APIS[source]}groundwaterstations/" + url_groundwaterstation_code = f"{lizard_GWS_endpoint}?code={code}" + + try: + groundwaterstation_metadata = requests.get(url_groundwaterstation_code).json()[ + "results" + ][0] + + except IndexError: + raise ValueError(f"Code {code} is invalid") + + return groundwaterstation_metadata + + +def _prepare_API_input(nr_pages, url_groundwater): + """Get API data pages within the defined extent. + + Parameters + ---------- + nr_pages : int + number of the pages on which the information is stored + url_groundwater : str + location of the used API to extract the data + + Returns + ------- + urls : list + list of the page number and the corresponding url + """ + urls = [] + for page in range(nr_pages): + true_page = page + 1 # The real page number is attached to the import thread + urls = [url_groundwater + "&page={}".format(true_page)] + return urls + + +def _download(url, timeout=1800): + """Function to download the data from the API using the ThreadPoolExecutor. + + Parameters + ---------- + url : str + url of an API page + timeout : int, optional + number of seconds to wait before terminating request + + Returns + ------- + dictionary with timeseries data + """ + data = requests.get(url=url, timeout=timeout) + data = data.json()["results"] + + return data + + +def get_metadata_tube(metadata_mw, tube_nr): + """Extract the metadata for a specific tube from the monitoring well metadata. + + Parameters + ---------- + metadata_mw : dict + dictionary with all available metadata of the monitoring well and all its + filters + tube_nr : int or None + select metadata from a specific tube number + + Raises + ------ + ValueError + if code of the monitoring well is invalid. + + Returns + ------- + dictionary with metadata of a specific tube + """ + + if tube_nr is None: + tube_nr = 1 + + metadata = { + "monitoring_well": metadata_mw["name"], + "ground_level": metadata_mw["surface_level"], + "source": "lizard", + "unit": "m NAP", + "metadata_available": True, + "status": None, + } + + for metadata_tube in metadata_mw["filters"]: + if metadata_tube["code"].endswith(str(tube_nr)): + break + else: + raise ValueError(f"{metadata_mw['name']} doesn't have a tube number {tube_nr}") + + metadata.update( + { + "tube_nr": tube_nr, + "name": metadata_tube["code"].replace("-", ""), + "tube_top": metadata_tube["top_level"], + "screen_top": metadata_tube["filter_top_level"], + "screen_bottom": metadata_tube["filter_bottom_level"], + } + ) + + lon, lat, _ = metadata_mw["geometry"]["coordinates"] + transformer = Transformer.from_crs("WGS84", "EPSG:28992") + metadata["x"], metadata["y"] = transformer.transform(lat, lon) + + if not metadata_tube["timeseries"]: + metadata["timeseries_type"] = None + else: + for series in metadata_tube["timeseries"]: + series_info = requests.get(series).json() + if series_info["name"] == "WNS9040.hand": + metadata["uuid_hand"] = series_info["uuid"] + metadata["start_hand"] = series_info["start"] + elif series_info["name"] == "WNS9040": + metadata["uuid_diver"] = series_info["uuid"] + metadata["start_diver"] = series_info["start"] + + if (metadata.get("start_hand") is None) and ( + metadata.get("start_diver") is None + ): + metadata["timeseries_type"] = None + elif (metadata.get("start_hand") is not None) and ( + metadata.get("start_diver") is not None + ): + metadata["timeseries_type"] = "diver + hand" + elif metadata.get("start_hand") is None: + metadata["timeseries_type"] = "diver" + elif metadata.get("start_diver") is None: + metadata["timeseries_type"] = "hand" + + return metadata + + +def get_timeseries_uuid(uuid, tmin, tmax, page_size=100000, source="vitens"): + """ + Get the time series (hand or diver) using the uuid. + + ---------- + uuid : str + Universally Unique Identifier of the tube and type of time series. + tmin : str YYYY-m-d + start of the observations, by default the entire serie is returned + tmax : int YYYY-m-d + end of the observations, by default the entire serie is returned + page_size : int, optional + Query parameter which can extend the response size. The default is 100000. + source : str, optional + source indicating URL endpoint, currently only "vitens" is officially supported + + Returns + ------- + pd.DataFrame + pandas DataFrame with the timeseries of the monitoring well + """ + + url_timeseries = LIZARD_APIS[source] + "timeseries/{}".format(uuid) + + if tmin is not None: + tmin = pd.to_datetime(tmin).isoformat("T") + + if tmax is not None: + tmax = pd.to_datetime(tmax).isoformat("T") + + params = {"start": tmin, "end": tmax, "page_size": page_size} + url = url_timeseries + "/events/" + + time_series_events = requests.get(url=url, params=params).json()["results"] + time_series_df = pd.DataFrame(time_series_events) + + if time_series_df.empty: + return pd.DataFrame() + + else: + time_series_df = translate_flag(time_series_df) + + timeseries_sel = time_series_df.loc[:, ["time", "value", "flag", "comment"]] + timeseries_sel["time"] = pd.to_datetime( + timeseries_sel["time"], format="%Y-%m-%dT%H:%M:%SZ", errors="coerce" + ) + pd.DateOffset(hours=1) + + timeseries_sel = timeseries_sel[~timeseries_sel["time"].isnull()] + + timeseries_sel.set_index("time", inplace=True) + timeseries_sel.index.rename("peil_datum_tijd", inplace=True) + # timeseries_sel.dropna(inplace=True) + + return timeseries_sel + + +def _merge_timeseries(hand_measurements, diver_measurements): + """Merges the timeseries of the hand and diver measurements into one timeserie. + + Parameters + ---------- + hand_measurements : DataFrame + DataFrame containing the hand measurements of the monitoring well + diver_measurements : DataFrame + DataFrame containing the Diver measurements of the monitoring well + + Returns + ------- + DataFrame where hand and diver measurements are merged in one timeseries + """ + if hand_measurements.empty and diver_measurements.empty: + measurements = pd.DataFrame() + + elif diver_measurements.first_valid_index() is None: + measurements = hand_measurements + print( + "no diver measuremets available for {}".format( + hand_measurements.iloc[0]["name"] + ) + ) + + else: + hand_measurements_sel = hand_measurements.loc[ + hand_measurements.index < diver_measurements.first_valid_index() + ] + measurements = pd.concat([hand_measurements_sel, diver_measurements], axis=0) + + return measurements + + +def _combine_timeseries(hand_measurements, diver_measurements): + """Combines the timeseries of the hand and diver measurements into one DataFrame. + + Parameters + ---------- + hand_measurements : DataFrame + DataFrame containing the hand measurements of the monitoring well + diver_measurements : DataFrame + DataFrame containing the Diver measurements of the monitoring well + + Returns + ------- + a combined DataFrame with both hand, and diver measurements + DESCRIPTION. + """ + hand_measurements.rename( + columns={"value": "value_hand", "flag": "flag_hand"}, inplace=True + ) + diver_measurements.rename( + columns={"value": "value_diver", "flag": "flag_diver"}, inplace=True + ) + + measurements = pd.concat([hand_measurements, diver_measurements], axis=1) + measurements = measurements.loc[ + :, ["value_hand", "value_diver", "flag_hand", "flag_diver"] + ] + measurements.loc[:, "name"] = hand_measurements.loc[:, "name"][0] + measurements.loc[:, "filter_nr"] = hand_measurements.loc[:, "filter_nr"][0] + + return measurements + + +def get_timeseries_tube(tube_metadata, tmin, tmax, type_timeseries): + """Extracts multiple timeseries (hand and/or diver measurements) for a specific tube + using the Lizard API. + + Parameters + ---------- + tube_metadata : dict + metadata of a tube + tmin : str YYYY-m-d, optional + start of the observations, by default the entire serie is returned + tmax : Ttr YYYY-m-d, optional + end of the observations, by default the entire serie is returned + type_timeseries : str, optional + hand: returns only hand measurements + diver: returns only diver measurements + merge: the hand and diver measurements into one time series (default) + combine: keeps hand and diver measurements separeted + + Returns + ------- + measurements : pandas DataFrame + timeseries of the monitoring well + metadata_df : dict + metadata of the monitoring well + """ + if tube_metadata["timeseries_type"] is None: + return pd.DataFrame(), tube_metadata + + if type_timeseries in ["hand", "merge", "combine"]: + if "hand" in tube_metadata["timeseries_type"]: + hand_measurements = get_timeseries_uuid( + tube_metadata.pop("uuid_hand"), + tmin, + tmax, + ) + else: + hand_measurements = None + + if type_timeseries in ["diver", "merge", "combine"]: + if "diver" in tube_metadata["timeseries_type"]: + diver_measurements = get_timeseries_uuid( + tube_metadata.pop("uuid_diver"), + tmin, + tmax, + ) + else: + diver_measurements = None + + if type_timeseries == "hand" and hand_measurements is not None: + measurements = hand_measurements + elif type_timeseries == "diver" and diver_measurements is not None: + measurements = diver_measurements + elif type_timeseries in ["merge", "combine"]: + if (hand_measurements is not None) and (diver_measurements is not None): + if type_timeseries == "merge": + measurements = _merge_timeseries(hand_measurements, diver_measurements) + elif type_timeseries == "combine": + measurements = _combine_timeseries( + hand_measurements, diver_measurements + ) + elif hand_measurements is not None: + measurements = hand_measurements + elif diver_measurements is not None: + measurements = diver_measurements + + return measurements, tube_metadata + + +def get_lizard_groundwater( + code, + tube_nr=None, + tmin=None, + tmax=None, + type_timeseries="merge", + only_metadata=False, +): + """Extracts the metadata and timeseries of an observation well from a LIZARD-API + based on the code of a monitoring well. + + Parameters + ---------- + code : str + code of the measuring well, e.g. '27B-0444' + tube_nr : int, optional + select specific tube top + Default selects tube_nr = 1 + tmin : str YYYY-m-d, optional + start of the observations, by default the entire serie is returned + tmax : Ttr YYYY-m-d, optional + end of the observations, by default the entire serie is returned + type_timeseries : str, optional + hand: returns only hand measurements + diver: returns only diver measurements + merge: the hand and diver measurements into one time series (default) + combine: keeps hand and diver measurements separated + only_metadata : bool, optional + if True only metadata is returned and no time series data. The + default is False. + + Returns + ------- + measurements : pd.DataFrame + returns a DataFrame with metadata and timeseries + tube_metadata : dict + dictionary containing metadata + """ + + groundwaterstation_metadata = get_metadata_mw_from_code(code) + + tube_metadata = get_metadata_tube(groundwaterstation_metadata, tube_nr) + + if only_metadata: + return pd.DataFrame(), tube_metadata + + measurements, tube_metadata = get_timeseries_tube( + tube_metadata, tmin, tmax, type_timeseries + ) + tube_metadata = check_status_obs(tube_metadata, measurements) + + return measurements, tube_metadata + + +def get_obs_list_from_codes( + codes, + ObsClass, + tube_nr="all", + tmin=None, + tmax=None, + type_timeseries="merge", + only_metadata=False, +): + """Get all observations from a list of codes of the monitoring wells and a list of + tube numbers. + + Parameters + ---------- + codes : list of str or str + codes of the monitoring wells + ObsClass : type + class of the observations, e.g. GroundwaterObs + tube_nr : lst of str + list of tube numbers of the monitoring wells that should be selected. + By default 'all' available tubes are selected. + tmin : str YYYY-m-d, optional + start of the observations, by default the entire serie is returned + tmax : Ttr YYYY-m-d, optional + end of the observations, by default the entire serie is returned + type_timeseries : str, optional + hand: returns only hand measurements + diver: returns only diver measurements + merge: the hand and diver measurements into one time series (default) + combine: keeps hand and diver measurements separeted + only_metadata : bool, optional + if True only metadata is returned and no time series data. The + default is False. + + Returns + ------- + obs_list + list of observations + """ + + if isinstance(codes, str): + codes = [codes] + + if not hasattr(codes, "__iter__"): + raise TypeError("argument 'codes' should be an iterable") + + obs_list = [] + for code in codes: + groundwaterstation_metadata = get_metadata_mw_from_code(code) + if tube_nr == "all": + for metadata_tube in groundwaterstation_metadata["filters"]: + tube_nr = int(metadata_tube["code"][-3:]) + o = ObsClass.from_lizard( + code, + tube_nr, + tmin, + tmax, + type_timeseries, + only_metadata=only_metadata, + ) + obs_list.append(o) + else: + o = ObsClass.from_lizard( + code, tube_nr, tmin, tmax, type_timeseries, only_metadata=only_metadata + ) + obs_list.append(o) + + return obs_list + + +def get_obs_list_from_extent( + extent, + ObsClass, + tube_nr="all", + tmin=None, + tmax=None, + type_timeseries="merge", + only_metadata=False, + page_size=100, + nr_threads=10, + source="vitens", +): + """Get all observations within a specified extent. + + Parameters + ---------- + extent : list or shapefile + get groundwater monitoring wells wihtin this extent [xmin, xmax, ymin, ymax] + or within a predefined Polygon from a shapefile + ObsClass : type + class of the observations, e.g. GroundwaterObs + tube_nr : lst of str + list of tube numbers of the monitoring wells that should be selected. + By default 'all' available tubes are selected. + tmin : str, optional + start of the observations (format YYYY-m-d), by default the entire series + is returned + tmax : str, optional + end of the observations (format YYYY-m-d), by default the entire series + is returned + type_timeseries : str, optional + merge: the hand and diver measurements into one time series (merge; default) or + combine: keeps hand and diver measurements separeted + The default is merge. + only_metadata : bool, optional + if True only metadata is returned and no time series data. The + default is False. + source : str + source indicating URL endpoint, currently only "vitens" is officially supported. + + + Returns + ------- + obs_col : TYPE + ObsCollection DataFrame with the 'obs' column + """ + + if isinstance(extent, (list, tuple)): + polygon_T = extent_to_wgs84_polygon(extent) + + elif isinstance(extent, str) or isinstance(extent, pathlib.PurePath): + polygon = geopandas.read_file(extent) + # TODO: check this transformation + polygon_T = polygon.to_crs("WGS84", "EPSG:28992").loc[0, "geometry"] + else: + raise TypeError("Extent should be a shapefile or a list of coordinates") + + lizard_GWS_endpoint = f"{LIZARD_APIS[source]}groundwaterstations/" + url_groundwaterstation_extent = ( + f"{lizard_GWS_endpoint}?geometry__within={polygon_T}&page_size={page_size}" + ) + + groundwaterstation_data = requests.get(url_groundwaterstation_extent).json() + nr_results = groundwaterstation_data["count"] + nr_pages = math.ceil(nr_results / page_size) + + print("Number of monitoring wells: {}".format(nr_results)) + print("Number of pages: {}".format(nr_pages)) + + if nr_threads > nr_pages: + nr_threads = nr_pages + + urls = _prepare_API_input(nr_pages, url_groundwaterstation_extent) + + arg_tuple = (ObsClass, tube_nr, tmin, tmax, type_timeseries, only_metadata) + codes = [] + with ThreadPoolExecutor(max_workers=nr_threads) as executor: + for result in tqdm(executor.map(_download, urls), total=nr_pages, desc="Page"): + codes += [(d["code"],) + arg_tuple for d in result] + + obs_list = [] + with ThreadPoolExecutor() as executor: + for obs_list_mw in tqdm( + executor.map(lambda args: get_obs_list_from_codes(*args), codes), + total=len(codes), + desc="monitoring well", + ): + obs_list += obs_list_mw + + return obs_list diff --git a/hydropandas/obs_collection.py b/hydropandas/obs_collection.py index 11d5173e..70a2a6da 100644 --- a/hydropandas/obs_collection.py +++ b/hydropandas/obs_collection.py @@ -1,4 +1,4 @@ -"""module with ObsCollection class for a collection of observations. +"""Module with ObsCollection class for a collection of observations. The ObsCollection class is a subclass of a pandas DataFrame with additional attributes and methods. @@ -20,6 +20,60 @@ logger = logging.getLogger(__name__) +def read_lizard( + extent=None, + codes=None, + name="", + tube_nr="all", + tmin=None, + tmax=None, + type_timeseries="merge", + only_metadata=False, +): + """Get all observations from a list of codes of the monitoring wells and a list of + tube numbers. + + Parameters + ---------- + extent : list, shapefile path or None + get groundwater monitoring wells within this extent [xmin, ymin, xmax, ymax] + or within a predefined Polygon from a shapefile + codes : lst of str or None + codes of the monitoring wells + tube_nr : lst of str + list of tube numbers of the monitoring wells that should be selected. + By default 'all' available tubes are selected. + tmin : str YYYY-m-d, optional + start of the observations, by default the entire time series is returned + tmax : Ttr YYYY-m-d, optional + end of the observations, by default the entire time series is returned + type_timeseries : str, optional + hand: returns only hand measurements + diver: returns only diver measurements + merge: the hand and diver measurements into one time series (default) + combine: keeps hand and diver measurements separeted + only_metadata : bool, optional + if True only metadata is returned and no time series data. The + default is False. + + Returns + ------- + ObsCollection + ObsCollection DataFrame with the 'obs' column + """ + oc = ObsCollection.from_lizard( + extent=extent, + codes=codes, + name=name, + tube_nr=tube_nr, + tmin=tmin, + tmax=tmax, + type_timeseries=type_timeseries, + only_metadata=only_metadata, + ) + return oc + + def read_bro( extent=None, bro_id=None, @@ -31,9 +85,7 @@ def read_bro( epsg=28992, ignore_max_obs=False, ): - """get all the observations within an extent or within a - groundwatermonitoring net. - + """Get all the observations within an extent or within a groundwatermonitoring net. Parameters ---------- @@ -65,7 +117,6 @@ def read_bro( ------- ObsCollection ObsCollection DataFrame with the 'obs' column - """ oc = ObsCollection.from_bro( @@ -125,8 +176,8 @@ def read_dino( name=None, **kwargs, ): - """Read dino observations within an extent from the server or from a - directory with downloaded files. + """Read dino observations within an extent from the server or from a directory with + downloaded files. Parameters ---------- @@ -167,9 +218,9 @@ class of the observations, so far only GroundwaterObs is supported def read_excel(path, meta_sheet_name="metadata"): - """Create an observation collection from an excel file. The excel file - should have the same format as excel files created with the `to_excel` - method of an ObsCollection. + """Create an observation collection from an excel file. The excel file should have + the same format as excel files created with the `to_excel` method of an + ObsCollection. Parameters ---------- @@ -340,8 +391,7 @@ def read_knmi( use_api=True, raise_exceptions=True, ): - """Get knmi observations from a list of locations or a list of - stations. + """Get knmi observations from a list of locations or a list of stations. Parameters ---------- @@ -506,7 +556,7 @@ class of the observations, can be PrecipitationObs, EvaporationObs def read_menyanthes( path, name="", ObsClass=obs.Obs, load_oseries=True, load_stresses=True ): - """read a Menyanthes file + """Read a Menyanthes file. Parameters ---------- @@ -596,7 +646,7 @@ def read_pickle( compression="infer", storage_options=None, ): - """wrapper around pd.read_pickle + """Wrapper around pd.read_pickle. Parameters ---------- @@ -759,7 +809,7 @@ def read_pastastore( class ObsCollection(pd.DataFrame): - """class for a collection of point observations. + """Class for a collection of point observations. An ObsCollection object is a subclass of a pandas.DataFrame and allows for additional attributes and methods. Additional attributes are @@ -773,7 +823,6 @@ class ObsCollection(pd.DataFrame): name of the observation collection meta : dic metadata of the observation collection - """ # temporary properties @@ -965,8 +1014,8 @@ def _is_consistent(self, check_individual_obs=True): return True def add_observation(self, o, check_consistency=True, **kwargs): - """add an observation to an existing observation collection. If the - observation exists the two observations are merged. + """Add an observation to an existing observation collection. If the observation + exists the two observations are merged. Parameters ---------- @@ -1002,7 +1051,6 @@ def add_observation(self, o, check_consistency=True, **kwargs): Returns ------- None. - """ if check_consistency: if not self._is_consistent(): @@ -1029,8 +1077,8 @@ def add_observation(self, o, check_consistency=True, **kwargs): def add_obs_collection( self, obs_collection, check_consistency=True, inplace=False, **kwargs ): - """add one observation collection to another observation - collection. See add_observation method for more details + """Add one observation collection to another observation collection. See + add_observation method for more details. Parameters ---------- @@ -1068,7 +1116,6 @@ def add_obs_collection( ------- ObsCollection or None merged ObsCollection if ``inplace=True``. - """ if check_consistency: if not self._is_consistent(): @@ -1127,9 +1174,8 @@ def from_bro( epsg=28992, ignore_max_obs=False, ): - """get all the observations within an extent or within a - groundwatermonitoring net. - + """Get all the observations within an extent or within a groundwatermonitoring + net. Parameters ---------- @@ -1161,7 +1207,6 @@ def from_bro( ------- ObsCollection ObsCollection DataFrame with the 'obs' column - """ from .io.bro import get_obs_list_from_extent, get_obs_list_from_gmn @@ -1193,14 +1238,84 @@ def from_bro( return cls(obs_df, name=name, meta=meta) + @classmethod + def from_lizard( + cls, + extent=None, + codes=None, + name="", + tube_nr="all", + tmin=None, + tmax=None, + type_timeseries="merge", + only_metadata=False, + ): + """Get all observations within a specified extent. + + Parameters + ---------- + extent : list, shapefile path or None + get groundwater monitoring wells wihtin this extent [xmin, ymin, xmax, ymax] + or within a predefined Polygon from a shapefile + codes : lst of str or None + codes of the monitoring wells + tube_nr : lst of str + list of tube numbers of the monitoring wells that should be selected. + By default 'all' available tubes are selected. + tmin : str YYYY-m-d, optional + start of the observations, by default the entire serie is returned + tmax : Ttr YYYY-m-d, optional + end of the observations, by default the entire serie is returned + type_timeseries : str, optional + hand: returns only hand measurements + diver: returns only diver measurements + merge: the hand and diver measurements into one time series (default) + combine: keeps hand and diver measurements separeted + The default is merge. + only_metadata : bool, optional + if True only metadata is returned and no time series data. The + default is False. + + Returns + ------- + ObsCollection + ObsCollection DataFrame with the 'obs' column + """ + + from .io.lizard import get_obs_list_from_codes, get_obs_list_from_extent + + if extent is not None: + obs_list = get_obs_list_from_extent( + extent, + obs.GroundwaterObs, + tube_nr, + tmin, + tmax, + type_timeseries, + only_metadata=only_metadata, + ) + elif codes is not None: + obs_list = get_obs_list_from_codes( + codes, + obs.GroundwaterObs, + tube_nr, + tmin, + tmax, + type_timeseries, + only_metadata=only_metadata, + ) + else: + raise ValueError("specify codes or extent") + + return cls(obs_list, name=name) + @classmethod def from_bronhouderportaal_bro( cls, dirname, full_meta=False, ): - """get all the metadata from dirname. - + """Get all the metadata from dirname. Parameters ---------- @@ -1213,7 +1328,6 @@ def from_bronhouderportaal_bro( ------- ObsCollection ObsCollection DataFrame without the 'obs' column - """ from .io.bronhouderportaal_bro import get_obs_list_from_dir @@ -1230,8 +1344,8 @@ def from_bronhouderportaal_bro( @classmethod def from_dataframe(cls, df, obs_list=None, ObsClass=obs.GroundwaterObs): - """Create an observation collection from a DataFrame by adding a column - with empty observations. + """Create an observation collection from a DataFrame by adding a column with + empty observations. Parameters ---------- @@ -1261,9 +1375,9 @@ def from_dataframe(cls, df, obs_list=None, ObsClass=obs.GroundwaterObs): @classmethod def from_excel(cls, path, meta_sheet_name="metadata"): - """Create an observation collection from an excel file. The excel file - should have the same format as excel files created with the `to_excel` - method of an ObsCollection. + """Create an observation collection from an excel file. The excel file should + have the same format as excel files created with the `to_excel` method of an + ObsCollection. Parameters ---------- @@ -1315,8 +1429,8 @@ def from_dino( name=None, **kwargs, ): - """Read dino data within an extent from the server or from a directory - with downloaded files. + """Read dino data within an extent from the server or from a directory with + downloaded files. Parameters ---------- @@ -1619,8 +1733,7 @@ def from_knmi( use_api=True, raise_exceptions=True, ): - """Get knmi observations from a list of locations or a list of - stations. + """Get knmi observations from a list of locations or a list of stations. Parameters ---------- @@ -1732,7 +1845,7 @@ class of the observations, can be PrecipitationObs, EvaporationObs @classmethod def from_list(cls, obs_list, name=""): - """read observations from a list of obs objects. + """Read observations from a list of obs objects. Parameters ---------- @@ -1919,9 +2032,8 @@ def from_pastastore( return cls(obs_df, name=pstore.name, meta=meta) def to_excel(self, path, meta_sheet_name="metadata"): - """Write an ObsCollection to an excel, the first sheet in the - excel contains the metadata, the other tabs are the timeseries of each - observation. + """Write an ObsCollection to an excel, the first sheet in the excel contains the + metadata, the other tabs are the timeseries of each observation. The excel can be read using the read_excel function of hydropandas. @@ -1976,7 +2088,7 @@ def to_pi_xml(self, fname, timezone="", version="1.24"): fews.write_pi_xml(self, fname, timezone=timezone, version=version) def to_gdf(self, xcol="x", ycol="y", crs=28992, drop_obs=True): - """convert ObsCollection to GeoDataFrame. + """Convert ObsCollection to GeoDataFrame. Parameters ---------- @@ -2012,7 +2124,7 @@ def to_pastastore( conn=None, overwrite=False, ): - """add observations to a new or existing pastastore. + """Add observations to a new or existing pastastore. Parameters ---------- @@ -2055,7 +2167,7 @@ def to_pastastore( return pstore def to_shapefile(self, path, xcol="x", ycol="y"): - """save ObsCollection as shapefile. + """Save ObsCollection as shapefile. Parameters ---------- @@ -2090,8 +2202,8 @@ def to_shapefile(self, path, xcol="x", ycol="y"): gdf.to_file(path) def add_meta_to_df(self, key="all"): - """Get the values from the meta dictionary of each observation object - and add these to the ObsCollection as a column. + """Get the values from the meta dictionary of each observation object and add + these to the ObsCollection as a column. to the ObsCollection @@ -2154,8 +2266,8 @@ def interpolate( epsilon: Optional[int] = None, col: Optional[str] = None, ): - """Interpolation method for ObsCollections using the Scipy radial basis - function (RBF) + """Interpolation method for ObsCollections using the Scipy radial basis function + (RBF) Parameters ---------- diff --git a/hydropandas/observation.py b/hydropandas/observation.py index 730e3882..deb57d20 100644 --- a/hydropandas/observation.py +++ b/hydropandas/observation.py @@ -30,7 +30,7 @@ class Obs(pd.DataFrame): - """generic class for a time series with measurements at a certain location. + """Generic class for a time series with measurements at a certain location. Unless specified explicitly the first numeric column in the observation is used for analysis and plotting. @@ -61,17 +61,16 @@ class Obs(pd.DataFrame): _metadata = ["name", "x", "y", "meta", "filename", "source", "unit"] def __init__(self, *args, **kwargs): - """constructor of Obs class. + """Constructor of Obs class. - *args must be input for the pandas.DataFrame constructor, - **kwargs can be one of the attributes listed in _metadata or - keyword arguments for the constructor of a pandas.DataFrame. + *args must be input for the pandas.DataFrame constructor, **kwargs can be one of + the attributes listed in _metadata or keyword arguments for the constructor of a + pandas.DataFrame. """ - if len(args) > 0: - if isinstance(args[0], Obs): - for key in args[0]._metadata: - if (key in Obs._metadata) and (key not in kwargs.keys()): - kwargs[key] = getattr(args[0], key) + if (len(args) > 0) and isinstance(args[0], Obs): + for key in args[0]._metadata: + if (key in Obs._metadata) and (key not in kwargs): + kwargs[key] = getattr(args[0], key) self.x = kwargs.pop("x", np.nan) self.y = kwargs.pop("y", np.nan) @@ -122,11 +121,8 @@ def __repr__(self) -> str: return buf.getvalue() - def _repr_html_(self, collapse=True): - """ - Uses the pandas DataFrame html representation with the metadata - prepended. - """ + def _repr_html_(self, collapse=False): + """Uses the pandas DataFrame html representation with the metadata prepended.""" obs_type = f'

hydropandas.{type(self).__name__}

\n' metadata_dic = {key: getattr(self, key) for key in self._metadata} @@ -185,13 +181,12 @@ def _constructor(self): return Obs def _get_first_numeric_col_name(self): - """get the first numeric column name of the observations + """Get the first numeric column name of the observations. Returns ------- col : str or int column name. - """ if self.empty: return None @@ -203,7 +198,7 @@ def _get_first_numeric_col_name(self): return col def copy(self, deep=True): - """create a copy of the observation. + """Create a copy of the observation. When ``deep=True`` (default), a new object will be created with a copy of the calling object's data and indices. Modifications to @@ -241,8 +236,7 @@ def copy(self, deep=True): return o def to_collection_dict(self, include_meta=False): - """get dictionary with registered attributes and their values of an Obs - object. + """Get dictionary with registered attributes and their values of an Obs object. This method can be used to create a dataframe from a collection of Obs objects. @@ -272,8 +266,7 @@ def to_collection_dict(self, include_meta=False): return d def merge_metadata(self, right, overlap="error"): - """Merge the metadata of an Obs object with metadata from another Obs - object. + """Merge the metadata of an Obs object with metadata from another Obs object. Parameters ---------- @@ -347,7 +340,7 @@ def merge_metadata(self, right, overlap="error"): return new_metadata def _merge_timeseries(self, right, overlap="error"): - """merge two timeseries. + """Merge two timeseries. Parameters ---------- @@ -525,10 +518,13 @@ class GroundwaterObs(Obs): ] def __init__(self, *args, **kwargs): - """ - *args must be input for the pandas.DataFrame constructor, - **kwargs can be one of the attributes listed in _metadata or - keyword arguments for the constructor of a pandas.DataFrame. + """Constructor for ObsCollection. + + Parameters + ---------- + *args must be input for the pandas.DataFrame constructor + **kwargs can be one of the attributes listed in _metadata or keyword arguments + for the constructor of a pandas.DataFrame. """ if len(args) > 0: if isinstance(args[0], Obs): @@ -561,8 +557,7 @@ def from_bro( drop_duplicate_times=True, only_metadata=False, ): - """download BRO groundwater observations from the server. - + """Download BRO groundwater observations from the server. Parameters ---------- @@ -581,12 +576,14 @@ def from_bro( drop_duplicate_times : bool, optional if True rows with a duplicate time stamp are removed keeping only the first row. The default is True. + only_metadata : bool, optional + if True only metadata is returned and no time series data. The + default is False Returns ------- TYPE DESCRIPTION. - """ from .io import bro @@ -618,6 +615,72 @@ def from_bro( tube_top=meta.pop("tube_top"), ) + @classmethod + def from_lizard( + cls, + code, + tube_nr=None, + tmin=None, + tmax=None, + type_timeseries="merge", + only_metadata=False, + ): + """Extracts the metadata and timeseries of a observation well from a LIZARD-API + based on the code of a monitoring well. + + Parameters + ---------- + code : str + code of the measuring well + tube_nr : int, optional + select specific tube top + Default selects tube_nr = 1 + tmin : str YYYY-m-d, optional + start of the observations, by default the entire serie is returned + tmax : Ttr YYYY-m-d, optional + end of the observations, by default the entire serie is returned + type_timeseries : str, optional + hand: returns only hand measurements + diver: returns only diver measurements + merge: the hand and diver measurements into one time series (default) + combine: keeps hand and diver measurements separated + only_metadata : bool, optional + if True only metadata is returned and no time series data. The + default is False. + + Returns + ------- + ObsCollection + Returns a DataFrame with metadata and timeseries + """ + + from .io import lizard + + measurements, meta = lizard.get_lizard_groundwater( + code, + tube_nr, + tmin, + tmax, + type_timeseries, + only_metadata=only_metadata, + ) + return cls( + measurements, + name=meta.pop("name"), + x=meta.pop("x"), + y=meta.pop("y"), + source=meta.pop("source"), + unit=meta.pop("unit"), + screen_bottom=meta.pop("screen_bottom"), + screen_top=meta.pop("screen_top"), + ground_level=meta.pop("ground_level"), + metadata_available=meta.pop("metadata_available"), + monitoring_well=meta.pop("monitoring_well"), + tube_nr=meta.pop("tube_nr"), + tube_top=meta.pop("tube_top"), + meta=meta, + ) + @classmethod def from_bronhouderportaal_bro( cls, @@ -625,9 +688,8 @@ def from_bronhouderportaal_bro( tube_nr, full_meta=False, ): - """load BRO groundwater metadata from XML file. Mind that - bro_id is applicable, because file is not yet imported in BRO - + """Load BRO groundwater metadata from XML file. Mind that bro_id is applicable, + because file is not yet imported in BRO. Parameters ---------- @@ -640,9 +702,8 @@ def from_bronhouderportaal_bro( Returns ------- - TYPE - DESCRIPTION. - + ObsCollection + ObsCollection containing observations from XML file. """ from .io import bronhouderportaal_bro @@ -677,7 +738,7 @@ def from_dino( path=None, **kwargs, ): - """download dino data from the server. + """Download dino data from the server. Parameters ---------- @@ -696,7 +757,7 @@ def from_dino( @classmethod def from_artdino_file(cls, path=None, **kwargs): - """read a dino csv file (artdiver style). + """Read a dino csv file (artdiver style). Parameters ---------- @@ -714,8 +775,7 @@ def from_artdino_file(cls, path=None, **kwargs): @classmethod def from_wiski(cls, path, **kwargs): - """ - Read data from a WISKI file. + """Read data from a WISKI file. Parameters: ----------- @@ -779,8 +839,7 @@ def from_pastastore(cls, pstore, libname, name, metadata_mapping=None): class WaterQualityObs(Obs): - """class for water quality ((grond)watersamenstelling) point - observations. + """Class for water quality ((grond)watersamenstelling) point observations. Subclass of the Obs class """ @@ -814,7 +873,7 @@ def _constructor(self): @classmethod def from_dino(cls, path, **kwargs): - """read dino file with groundwater quality data. + """Read dino file with groundwater quality data. Parameters ---------- @@ -832,7 +891,7 @@ def from_dino(cls, path, **kwargs): class WaterlvlObs(Obs): - """class for water level point observations. + """Class for water level point observations. Subclass of the Obs class """ @@ -857,7 +916,7 @@ def _constructor(self): @classmethod def from_dino(cls, path, **kwargs): - """read a dino file with waterlvl data. + """Read a dino file with waterlvl data. Parameters ---------- @@ -900,7 +959,7 @@ def from_waterinfo(cls, path, **kwargs): class ModelObs(Obs): - """class for model point results. + """Class for model point results. Subclass of the Obs class """ @@ -924,7 +983,7 @@ def _constructor(self): class MeteoObs(Obs): - """class for meteorological timeseries. + """Class for meteorological timeseries. Subclass of the Obs class """ @@ -1045,7 +1104,7 @@ def from_wow( start: Optional[pd.Timestamp] = None, end: Optional[pd.Timestamp] = None, ): - """Get a MeteoObs timeseries from a wow.knmi.nl station + """Get a MeteoObs timeseries from a wow.knmi.nl station. Parameters ---------- @@ -1084,7 +1143,7 @@ def from_wow( class EvaporationObs(MeteoObs): - """class for evaporation timeseries. + """Class for evaporation timeseries. Subclass of the MeteoObs class """ @@ -1175,7 +1234,7 @@ def from_knmi( class PrecipitationObs(MeteoObs): - """class for precipitation timeseries. + """Class for precipitation timeseries. Subclass of the MeteoObs class """ @@ -1297,7 +1356,7 @@ def from_wow( start: Optional[pd.Timestamp] = None, end: Optional[pd.Timestamp] = None, ): - """Get a PrecipitationObs timeseries from a wow.knmi.nl station + """Get a PrecipitationObs timeseries from a wow.knmi.nl station. Parameters ---------- diff --git a/hydropandas/util.py b/hydropandas/util.py index bd4de076..35e7da51 100644 --- a/hydropandas/util.py +++ b/hydropandas/util.py @@ -27,7 +27,7 @@ def _obslist_to_frame(obs_list): - """convert a list of observations to a pandas DataFrame. + """Convert a list of observations to a pandas DataFrame. Parameters ---------- @@ -96,20 +96,21 @@ def unzip_file(src, dst, force=False, preserve_datetime=False): def get_files( file_or_dir, ext, unpackdir=None, force_unpack=False, preserve_datetime=False ): - """internal method to get list of files with specific extension from - dirname. + """Internal method to get list of files with specific extension from dirname. Parameters ---------- file_or_dir : str - file or path to data + file or path to data. ext : str - extension of filenames to store in list + extension of filenames to store in list. + unpackdir : str + directory to story unpacked zip file, only used in case of a zipfile. force_unpack : bool, optional - force unzip, by default False + force unzip, by default False. preserve_datetime : bool, optional - preserve datetime of unzipped files, by default False - (useful for checking whether data has changed) + preserve datetime of unzipped files, by default False. Used for + checking whether data has changed. """ # check if unpackdir is same as file_or_dir, if same, this can cause # problems when the unpackdir still contains zips that will be unpacked @@ -117,6 +118,7 @@ def get_files( if unpackdir is not None: if os.path.normcase(unpackdir) == os.path.normcase(file_or_dir): raise ValueError("Please specify a different folder to unpack files!") + # identify whether file_or_dir started as zip if file_or_dir.endswith(".zip"): iszip = True @@ -260,17 +262,18 @@ def get_color_logger(level="INFO"): handler = logging.StreamHandler(sys.stdout) handler.setFormatter(formatter) - logger = logging.getLogger() - logger.handlers[:] = [] - logger.addHandler(handler) - logger.setLevel(getattr(logging, level)) + clogger = logging.getLogger() + clogger.handlers[:] = [] + clogger.addHandler(handler) + clogger.setLevel(getattr(logging, level)) logging.captureWarnings(True) - return logger + return clogger def oc_to_df(oc, col: Optional[str] = None) -> pd.DataFrame: - """convert an observation collection to + """Convert an observation collection to a DataFrame where every column has one + observation. Parameters ---------- @@ -305,7 +308,6 @@ def interpolate( ) -> pd.DataFrame: """Interpolation method using the Scipy radial basis function (RBF) - Parameters ---------- xy : List[List[float]] diff --git a/hydropandas/version.py b/hydropandas/version.py index c5981731..61fb31ca 100644 --- a/hydropandas/version.py +++ b/hydropandas/version.py @@ -1 +1 @@ -__version__ = "0.9.3" +__version__ = "0.10.0" diff --git a/tests/test_011_bro.py b/tests/test_011_bro.py index c7ee5ba7..7c9ebf1d 100644 --- a/tests/test_011_bro.py +++ b/tests/test_011_bro.py @@ -19,7 +19,7 @@ def test_metadata_full(): def test_groundwater_monitoring_net_metadata(): - bro_id = "GMN000000000163" + bro_id = "GMN000000000001" bro.get_obs_list_from_gmn(bro_id, hpd.GroundwaterObs, only_metadata=True) @@ -53,7 +53,7 @@ def test_get_gld_ids_from_gmw(): def test_obs_list_from_extent(): - extent = (102395, 103121, 434331, 434750) + # extent = (102395, 103121, 434331, 434750) extent = [116500, 120000, 439000, 442000] bro.get_obs_list_from_extent( extent, hpd.GroundwaterObs, tmin=None, tmax=None, epsg=28992, only_metadata=True diff --git a/tests/test_013_lizard.py b/tests/test_013_lizard.py new file mode 100644 index 00000000..299666b8 --- /dev/null +++ b/tests/test_013_lizard.py @@ -0,0 +1,18 @@ +import hydropandas as hpd + + +def test_single_observation(): + code = "27BP0003" + o = hpd.GroundwaterObs.from_lizard(code) + assert o.tube_nr == 1 + + +def test_extent(): + extent = [201500, 202000, 502000, 502200] + oc = hpd.read_lizard(extent) + assert not oc.empty + + +def test_codes(): + oc = hpd.read_lizard(codes="27BP0003") + assert not oc.empty