Merge pull request #118 from ArtesiaWater/dev

bug fixes
ArtesiaWater · Mar 31, 2023 · 0039a76 · 0039a76
2 parents c31a5cb + 9f48d49
commit 0039a76
Show file tree

Hide file tree

Showing 11 changed files with 280 additions and 148 deletions.
diff --git a/hydropandas/extensions/plots.py b/hydropandas/extensions/plots.py
@@ -378,6 +378,7 @@ def section_plot(
             - limit y-axis of section plot to observations only
             - remove the checking (if obs are near bottom) from this function
             - moving the legend outside the plot
+            - set xlim of observation plot more tight when tmin is not specified
         """
 
         # prepare column for x location in section plot
@@ -555,6 +556,8 @@ def section_plot(
         else:
             ax_section.set_xlabel(section_label_x)
 
+        ax_obs.set_xlim(left=tmin, right=tmax)
+
         # rotate labels on observation axis
         ax_obs.set_xticks(
             ax_obs.get_xticks(),

diff --git a/hydropandas/io/bro.py b/hydropandas/io/bro.py
@@ -16,7 +16,7 @@
 
 
 def get_obs_list_from_gmn(bro_id, ObsClass, only_metadata=False, keep_all_obs=True):
-    """
+    """get a list of observation from a groundwater monitoring network.
 
     Parameters
     ----------
@@ -60,7 +60,8 @@ class of the observations, so far only GroundwaterObs is supported
     gmn = tree.find(".//xmlns:GMN_PO", ns)
     gmws = gmn.findall("xmlns:measuringPoint", ns)
 
-    logger.info(f"{len(gmws)} groundwater monitoring wells within groundwater meetnet")
+    logger.info(
+        f"{len(gmws)} groundwater monitoring wells within groundwater meetnet")
 
     obs_list = []
     for gmw in tqdm(gmws):
@@ -130,26 +131,37 @@ def get_bro_groundwater(bro_id, tube_nr=None, only_metadata=False, **kwargs):
 
     elif bro_id.startswith("GMW"):
         if tube_nr is None:
-            raise ValueError("if bro_id is GMW a filternumber should be specified")
+            raise ValueError(
+                "if bro_id is GMW a filternumber should be specified")
 
         meta = get_metadata_from_gmw(bro_id, tube_nr)
-        gld_id = get_gld_id_from_gmw(bro_id, tube_nr)
+        gld_ids = get_gld_ids_from_gmw(bro_id, tube_nr)
 
-        if gld_id is None:
+        if gld_ids is None:
             meta["name"] = f"{bro_id}_{tube_nr}"
             only_metadata = True  # cannot get time series without gld id
         else:
-            meta["name"] = gld_id
+            meta["name"] = f"{bro_id}_{tube_nr}"
+            meta["gld_ids"] = gld_ids
 
         if only_metadata:
             empty_df = pd.DataFrame()
             return empty_df, meta
 
-        return measurements_from_gld(gld_id, **kwargs)
+        for i, gld_id in enumerate(gld_ids):
+            if i == 0:
+                df, meta_new = measurements_from_gld(gld_id, **kwargs)
+                meta.update(meta_new)
+            else:
+                df_new, meta_new = measurements_from_gld(gld_id, **kwargs)
+                df = pd.concat([df, df_new], axis=1)
+                meta.update(meta_new)
+
+        return df, meta
 
 
-def get_gld_id_from_gmw(bro_id, tube_nr, quality_regime="IMBRO/A"):
-    """get bro_id of a grondwterstandendossier (gld) from a bro_id of a
+def get_gld_ids_from_gmw(bro_id, tube_nr):
+    """get bro_ids of multiple grondwaterstandendossier (gld) from a bro_id of a
     grondwatermonitoringsput (gmw).
 
     Parameters
@@ -158,8 +170,6 @@ def get_gld_id_from_gmw(bro_id, tube_nr, quality_regime="IMBRO/A"):
         starts with 'GLD' or 'GMW' e.g. 'GMW000000036287'.
     tube_nr : int
         tube number.
-    quality_regime : str
-        either choose 'IMBRO/A' or 'IMBRO'.
 
     Raises
     ------
@@ -170,8 +180,8 @@ def get_gld_id_from_gmw(bro_id, tube_nr, quality_regime="IMBRO/A"):
 
     Returns
     -------
-    str
-        bro_id of a grondwaterstandonderzoek (gld).
+    list of str or None
+        bro_ids of a grondwaterstandonderzoek (gld).
 
     """
     if not bro_id.startswith("GMW"):
@@ -193,37 +203,14 @@ def get_gld_id_from_gmw(bro_id, tube_nr, quality_regime="IMBRO/A"):
 
     for tube in d["monitoringTubeReferences"]:
         if tube["tubeNumber"] == tube_nr:
-            if len(tube["gldReferences"]) == 1:
-                return tube["gldReferences"][0]["broId"]
-            elif len(tube["gldReferences"]) == 0:
+            if len(tube["gldReferences"]) == 0:
                 logger.info(
                     f"no groundwater level dossier for {bro_id} and tube number"
                     f"{tube_nr}"
                 )
                 return None
-            elif len(tube["gldReferences"]) == 2:
-                logger.info(
-                    f"two gld references found for GMW {bro_id} and tube nr"
-                    f"{tube_nr}, using {quality_regime} quality regime"
-                )
-                for gldref in tube["gldReferences"]:
-                    url2 = gldref["url"]
-                    req2 = requests.get(url2)
-                    ns = {
-                        "ns11": "http://www.broservices.nl/xsd/dsgld/1.0",
-                        "brocom": "http://www.broservices.nl/xsd/brocommon/3.0",
-                    }
-                    tree = xml.etree.ElementTree.fromstring(req2.text)
-                    gld = tree.findall(".//ns11:GLD_O", ns)[0]
-                    qualityRegime = gld.find("brocom:qualityRegime", ns).text
-                    if qualityRegime == quality_regime:
-                        return gldref["broId"]
-                logger.info(
-                    f"no gld reference with quality regime {quality_regime} was found"
-                )
-                return None
             else:
-                raise RuntimeError("unexpected number of gld references")
+                return [gldref["broId"] for gldref in tube["gldReferences"]]
 
 
 def measurements_from_gld(
@@ -295,11 +282,13 @@ def measurements_from_gld(
         raise (Exception("Only one gld supported"))
     gld = glds[0]
 
-    meta = {"name": bro_id, "source": "BRO"}
-    meta["monitoring_well"] = gld.find("ns11:monitoringPoint//gldcommon:broId", ns).text
+    meta = {"source": "BRO"}
+    meta["monitoring_well"] = gld.find(
+        "ns11:monitoringPoint//gldcommon:broId", ns).text
     meta["tube_nr"] = int(
         gld.find("ns11:monitoringPoint//gldcommon:tubeNumber", ns).text
     )
+    meta["name"] = f"{meta['monitoring_well']}_{meta['tube_nr']}"
     gmn = gld.find("ns11:groundwaterMonitoringNet//gldcommon:broId", ns)
     if gmn is None:
         meta["monitoringsnet"] = None
@@ -313,7 +302,8 @@ def measurements_from_gld(
         np.nan if value.text is None else float(value.text)
         for value in gld.findall(f"{msts}//waterml:value", ns)
     ]
-    qualifiers = [q.text for q in gld.findall(f"{msts}//swe:Category//swe:value", ns)]
+    qualifiers = [q.text for q in gld.findall(
+        f"{msts}//swe:Category//swe:value", ns)]
 
     # to dataframe
     df = pd.DataFrame(
@@ -574,6 +564,13 @@ class of the observations, e.g. GroundwaterObs or WaterlvlObs
         DESCRIPTION.
 
     """
+
+    if only_metadata and not keep_all_obs:
+        logger.error(
+            "you will get an empty ObsCollection with only_metadata is True and"
+            "keep_all_obs is False"
+        )
+
     url = "https://publiek.broservices.nl/gm/gmw/v1/characteristics/searches?"
 
     data = {}
@@ -615,7 +612,7 @@ class of the observations, e.g. GroundwaterObs or WaterlvlObs
         [gmw.text for gmw in tree.findall(".//dsgmw:GMW_C//brocom:broId", ns)]
     )
 
-    if len(gmws_ids) > 1000:
+    if len(gmws_ids) > 1000 and not ignore_max_obs:
         ans = input(
             f"You requested to download {len(gmws_ids)} observations, this can"
             "take a while. Are you sure you want to continue [Y/n]? "

diff --git a/hydropandas/io/fews.py b/hydropandas/io/fews.py
@@ -184,8 +184,7 @@ class of the observations, e.g. GroundwaterObs or WaterlvlObs
                     loc = h_attr.text
                     if loc not in locationIds:
                         element.clear()
-                        logger.info(
-                            f" ... skipping '{loc}', not in locationIds")
+                        logger.info(f" ... skipping '{loc}', not in locationIds")
                         continue
 
                 if filterdict is not None:
@@ -508,8 +507,7 @@ class of the observations, e.g. GroundwaterObs or WaterlvlObs
             source="FEWS",
         )
     else:
-        o = ObsClass(ts, x=x, y=y, unit=unit, meta=header,
-                     name=name, source="FEWS")
+        o = ObsClass(ts, x=x, y=y, unit=unit, meta=header, name=name, source="FEWS")
 
     return o, header
 
@@ -581,8 +579,7 @@ def write_pi_xml(obs_coll, fname, timezone=1.0, version="1.24"):
                         tag=htag, date=hdate, time=htime
                     )
                 elif htag.endswith("timeStep"):
-                    hline = '<{tag} unit="{unit}"/>\n'.format(
-                        tag=htag, unit=hval)
+                    hline = '<{tag} unit="{unit}"/>\n'.format(tag=htag, unit=hval)
                 else:
                     hline = paramline.format(tag=htag, param=hval)
                 hlines.append(3 * "\t" + hline)

diff --git a/hydropandas/io/menyanthes.py b/hydropandas/io/menyanthes.py
@@ -1,24 +1,70 @@
-# -*- coding: utf-8 -*-
-"""Created on Thu Oct 10 11:01:22 2019.
-
-@author: oebbe
-"""
-
 import logging
 import os
 
 import numpy as np
-from pandas import DataFrame, Series
+from pandas import DataFrame, Series, Timedelta, Timestamp
 from scipy.io import loadmat
 
 from ..observation import GroundwaterObs, WaterlvlObs
-from ..util import matlab2datetime
 
 logger = logging.getLogger(__name__)
 
 
+def matlab2datetime(tindex):
+    """
+    Transform a MATLAB serial date number to a Python datetime object, rounded
+    to seconds.
+
+    Parameters
+    ----------
+    tindex : float
+        The MATLAB serial date number to convert.
+
+    Returns
+    -------
+    datetime : datetime.datetime
+        The equivalent datetime object in Python.
+
+    Notes
+    -----
+    MATLAB serial date numbers represent the number of days elapsed since
+    January 1, 0000 (the proleptic Gregorian calendar), with January 1, 0000 as
+    day 1. Fractions of a day can be represented as a decimal.
+
+    The returned datetime object is rounded to the nearest second.
+
+    Examples
+    --------
+    >>> matlab2datetime(719529.496527778)
+    datetime.datetime(2019, 1, 1, 11, 55, 2)
+
+    """
+    day = Timestamp.fromordinal(int(tindex))
+    dayfrac = Timedelta(days=float(tindex) % 1) - Timedelta(days=366)
+    return day + dayfrac
+
+
 def read_file(fname, ObsClass, load_oseries=True, load_stresses=True):
-    """This method is used to read the file."""
+    """
+    Read data from a Menyanthes file and create observation objects.
+
+    Parameters
+    ----------
+    fname : str
+        Name of the Menyanthes file to read.
+    ObsClass : GroundwaterObs or WaterlvlObs
+        Class of observation object to create.
+    load_oseries : bool, optional
+        Flag indicating whether to load observation series or not, by default
+        True.
+    load_stresses : bool, optional
+        Flag indicating whether to load stresses or not, by default True.
+
+    Returns
+    -------
+    obs_list : list
+        List of observation objects created from the Menyanthes file.
+    """
 
     logger.info(f"reading menyanthes file {fname}")
 
@@ -117,7 +163,38 @@ def read_file(fname, ObsClass, load_oseries=True, load_stresses=True):
 
 
 def read_oseries(mat):
-    """Read the oseries from a mat file from menyanthes."""
+    """Read the oseries from a mat file from menyanthes.
+
+    Parameters
+    ----------
+    mat : dict
+        A dictionary object containing the Menyanthes file data.
+
+    Returns
+    -------
+    dict
+        A dictionary containing oseries data, with oseries names as keys and
+        their corresponding metadata and values as values.
+
+    Notes
+    -----
+    This function reads the oseries data from a Menyanthes file in .mat format
+    and returns it in a dictionary format. The oseries data contains the
+    following metadata:
+        - name: The name of the oseries.
+        - x: The x-coordinate of the oseries location.
+        - y: The y-coordinate of the oseries location.
+        - source: The data source.
+        - unit: The unit of measurement.
+
+    In addition to the metadata, the oseries data also contains a pandas Series
+    object named 'values', which contains the time series data for the oseries.
+
+    Examples
+    --------
+    >>> mat = loadmat('menyanthes_file.mat')
+    >>> d_h = read_oseries(mat)
+    """
     d_h = {}
 
     # Check if more then one time series model is present
@@ -158,12 +235,20 @@ def read_oseries(mat):
 
 
 def read_stresses(mat):
+    """Reads the stresses from a mat file from menyanthes.
+
+    Parameters
+    ----------
+    mat : dict
+        A dictionary object containing the mat file.
+
+    Returns
+    -------
+    dict
+        A dictionary object containing the stresses data.
+    """
     d_in = {}
 
-    # Check if more then one time series is present
-    # if not isinstance(mat["IN"], np.ndarray):
-    #     mat["IN"] = [mat["IN"]]
-
     # Read all the time series
     for i, IN in enumerate(mat["IN"]):
         if not hasattr(IN, "Name") and not hasattr(IN, "name"):