Merge pull request #191 from ArtesiaWater/dev

Update master for new release (0.11.0)
ArtesiaWater · Mar 11, 2024 · b60fb6c · b60fb6c
2 parents 0950abe + 18ab689
commit b60fb6c
Show file tree

Hide file tree

Showing 16 changed files with 4,626 additions and 86 deletions.
diff --git a/examples/02_knmi_observations.ipynb b/examples/02_knmi_observations.ipynb
@@ -1477,11 +1477,10 @@
     "oc = hpd.ObsCollection([precip1, precip2])\n",
     "gdf = oc.to_gdf()\n",
     "gdf = gdf.set_crs(28992)\n",
-    "gdf = gdf.to_crs(3857)\n",
     "gdf[\"name\"] = gdf.index\n",
     "ax = gdf.buffer(2000).plot(alpha=0, figsize=(8, 8))\n",
     "gdf.plot(\"name\", ax=ax, cmap=\"jet\", legend=True, markersize=100)\n",
-    "cx.add_basemap(ax)"
+    "cx.add_basemap(ax, crs=28992)"
    ]
   },
   {
@@ -3331,9 +3330,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "hpd_env",
+   "display_name": "dev",
    "language": "python",
-   "name": "python3"
+   "name": "dev"
   },
   "language_info": {
    "codemirror_mode": {
@@ -3345,7 +3344,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.12"
+   "version": "3.10.13"
   }
  },
  "nbformat": 4,

diff --git a/hydropandas/io/bro.py b/hydropandas/io/bro.py
@@ -148,14 +148,12 @@ def get_bro_groundwater(bro_id, tube_nr=None, only_metadata=False, **kwargs):
             empty_df = pd.DataFrame()
             return empty_df, meta
 
+        dfl = []
         for i, gld_id in enumerate(gld_ids):
-            if i == 0:
-                df, meta_new = measurements_from_gld(gld_id, **kwargs)
-                meta.update(meta_new)
-            else:
-                df_new, meta_new = measurements_from_gld(gld_id, **kwargs)
-                df = pd.concat([df, df_new], axis=1)
-                meta.update(meta_new)
+            df, meta_new = measurements_from_gld(gld_id, **kwargs)
+            meta.update(meta_new)
+            dfl.append(df)
+        df = pd.concat(dfl, axis=0)
 
         return df, meta
 
@@ -305,15 +303,15 @@ def measurements_from_gld(
 
     # to dataframe
     df = pd.DataFrame(
-        index=pd.to_datetime(times),
+        index=pd.to_datetime(times, utc=True).tz_convert("CET"),
         data={"values": values, "qualifier": qualifiers},
     )
 
     # wintertime
     if to_wintertime:
         # remove time zone information by transforming to dutch winter time
         df.index = pd.to_datetime(df.index, utc=True).tz_localize(None) + pd.Timedelta(
-            1, unit="H"
+            1, unit="h"
         )
 
     # duplicates

diff --git a/hydropandas/io/knmi.py b/hydropandas/io/knmi.py
@@ -108,7 +108,9 @@ def get_knmi_obs(
         ts, meta = get_knmi_timeseries_stn(stn, meteo_var, settings, start, end)
     elif fname is not None:
         logger.info(f"get KNMI data from file {fname} and meteo variable {meteo_var}")
-        ts, meta = get_knmi_timeseries_fname(fname, meteo_var, settings, start, end)
+        ts, meta = get_knmi_timeseries_fname(
+            str(fname), meteo_var, settings, start, end
+        )
     elif xy is not None:
         logger.info(
             f"get KNMI data from station nearest to coordinates {xy} and meteo"

diff --git a/hydropandas/io/lizard.py b/hydropandas/io/lizard.py
@@ -161,7 +161,7 @@ def _prepare_API_input(nr_pages, url_groundwater):
     urls = []
     for page in range(nr_pages):
         true_page = page + 1  # The real page number is attached to the import thread
-        urls = [url_groundwater + "&page={}".format(true_page)]
+        urls += [url_groundwater + "&page={}".format(true_page)]
     return urls
 
 
@@ -185,6 +185,32 @@ def _download(url, timeout=1800):
     return data
 
 
+def _split_mw_tube_nr(code):
+    """get the tube number from a code that consists of the name and the tube number.
+
+    Parameters
+    ----------
+    code : str
+        name + tube_nr. e.g. 'BUWP014-11' or 'BUWP014012'
+
+    Returns
+    -------
+    monitoring well, tube_number (str, int)
+
+    Notes
+    -----
+    The format of the name + tube_nr is not very consistent and this function may need
+    further finetuning.
+    """
+
+    if code[-3:].isdigit():
+        return code[:-3], int(code[-3:])
+    else:
+        # assume there is a '-' to split name and filter number
+        tube_nr = code.split("-")[-1]
+        return code.strip(f"-{tube_nr}"), int(tube_nr)
+
+
 def get_metadata_tube(metadata_mw, tube_nr):
     """Extract the metadata for a specific tube from the monitoring well metadata.
 
@@ -218,30 +244,65 @@ def get_metadata_tube(metadata_mw, tube_nr):
         "status": None,
     }
 
+    metadata_tube_list = []
     for metadata_tube in metadata_mw["filters"]:
-        if metadata_tube["code"].endswith(str(tube_nr)):
-            break
-    else:
+        # check if name+filternr ends with three digits
+        code, tbnr = _split_mw_tube_nr(metadata_tube["code"])
+        if tbnr == tube_nr:
+            metadata_tube_list.append(metadata_tube)
+
+    if len(metadata_tube_list) == 0:
         raise ValueError(f"{metadata_mw['name']} doesn't have a tube number {tube_nr}")
+    elif len(metadata_tube_list) == 1:
+        mtd_tube = metadata_tube_list[0]
+    elif len(metadata_tube_list) > 1:
+        # tube has probably been replaced, multiple tubes with the same code and tube nr
+        # merge metadata from all tubes
+        logger.info(
+            f"there are {len(metadata_tube_list)} instances of {code} and tube "
+            f"{tube_nr}, trying to merge all in one observation object"
+        )
+        mtd_tube = metadata_tube_list[0].copy()
+        relevant_keys = {
+            "top_level",
+            "filter_top_level",
+            "filter_bottom_level",
+            "timeseries",
+        }
+        for metadata_tube in metadata_tube_list:
+            for key in set(metadata_tube.keys()) & relevant_keys:
+                # check if properties are always the same for a tube number
+                val = metadata_tube[key]
+                if key in ["top_level", "filter_top_level", "filter_bottom_level"]:
+                    if val != mtd_tube[key]:
+                        logger.warning(
+                            f"multiple {key} values found ({val} & {mtd_tube[key]})"
+                            f" for {code} and tube {tube_nr}, using {mtd_tube[key]}"
+                        )
+                # merge time series from all tubes with the same code and tube number
+                elif key == "timeseries":
+                    mtd_tube[key] += val
+
+        mtd_tube["code"] = f"{code}{tube_nr}"
 
     metadata.update(
         {
             "tube_nr": tube_nr,
-            "name": metadata_tube["code"].replace("-", ""),
-            "tube_top": metadata_tube["top_level"],
-            "screen_top": metadata_tube["filter_top_level"],
-            "screen_bottom": metadata_tube["filter_bottom_level"],
+            "name": mtd_tube["code"].replace("-", ""),
+            "tube_top": mtd_tube["top_level"],
+            "screen_top": mtd_tube["filter_top_level"],
+            "screen_bottom": mtd_tube["filter_bottom_level"],
         }
     )
 
     lon, lat, _ = metadata_mw["geometry"]["coordinates"]
     transformer = Transformer.from_crs("WGS84", "EPSG:28992")
     metadata["x"], metadata["y"] = transformer.transform(lat, lon)
 
-    if not metadata_tube["timeseries"]:
+    if not mtd_tube["timeseries"]:
         metadata["timeseries_type"] = None
     else:
-        for series in metadata_tube["timeseries"]:
+        for series in mtd_tube["timeseries"]:
             series_info = requests.get(series).json()
             if series_info["name"] == "WNS9040.hand":
                 metadata["uuid_hand"] = series_info["uuid"]
@@ -382,8 +443,6 @@ def _combine_timeseries(hand_measurements, diver_measurements):
     measurements = measurements.loc[
         :, ["value_hand", "value_diver", "flag_hand", "flag_diver"]
     ]
-    measurements.loc[:, "name"] = hand_measurements.loc[:, "name"][0]
-    measurements.loc[:, "filter_nr"] = hand_measurements.loc[:, "filter_nr"][0]
 
     return measurements
 
@@ -413,6 +472,7 @@ def get_timeseries_tube(tube_metadata, tmin, tmax, type_timeseries):
     metadata_df : dict
         metadata of the monitoring well
     """
+
     if tube_metadata["timeseries_type"] is None:
         return pd.DataFrame(), tube_metadata
 
@@ -559,18 +619,23 @@ class of the observations, e.g. GroundwaterObs
     obs_list = []
     for code in codes:
         groundwaterstation_metadata = get_metadata_mw_from_code(code)
+        tubes = []
         if tube_nr == "all":
             for metadata_tube in groundwaterstation_metadata["filters"]:
-                tube_nr = int(metadata_tube["code"][-3:])
-                o = ObsClass.from_lizard(
-                    code,
-                    tube_nr,
-                    tmin,
-                    tmax,
-                    type_timeseries,
-                    only_metadata=only_metadata,
-                )
-                obs_list.append(o)
+                tnr = _split_mw_tube_nr(metadata_tube["code"])[-1]
+                if tnr not in tubes:
+                    logger.info(f"get {code}{tnr}")
+                    o = ObsClass.from_lizard(
+                        code,
+                        tnr,
+                        tmin,
+                        tmax,
+                        type_timeseries,
+                        only_metadata=only_metadata,
+                    )
+                    obs_list.append(o)
+                    tubes.append(tnr)
+
         else:
             o = ObsClass.from_lizard(
                 code, tube_nr, tmin, tmax, type_timeseries, only_metadata=only_metadata

diff --git a/hydropandas/io/solinst.py b/hydropandas/io/solinst.py
@@ -0,0 +1,138 @@
+import logging
+import os
+import zipfile
+
+import numpy as np
+import pandas as pd
+from pyproj import Transformer
+
+logger = logging.getLogger(__name__)
+
+
+def read_solinst_file(
+    path,
+    transform_coords=True,
+):
+    """Read Solinst logger file (XLE)
+
+    Parameters
+    ----------
+    path : str
+        path to Solinst file (.xle)
+    transform_coords : boolean
+        convert coordinates from WGS84 to RD
+
+    Returns
+    -------
+    df : pandas.DataFrame
+        DataFrame containing file content
+    meta : dict, optional
+        dict containing meta
+    """
+
+    # open file
+    path = str(path)
+    name = os.path.splitext(os.path.basename(path))[0]
+    if path.endswith(".xle"):
+        f = path
+    elif path.endswith(".zip"):
+        zf = zipfile.ZipFile(path)
+        f = zf.open("{}.xle".format(name))
+    else:
+        raise NotImplementedError(
+            "File type '{}' not supported!".format(os.path.splitext(path)[-1])
+        )
+
+    logger.info("reading -> {}".format(f))
+
+    # read channel 1 data header
+    df_ch1_data_header = pd.read_xml(path, xpath="/Body_xle/Ch1_data_header")
+    series_ch1_data_header = df_ch1_data_header.T.iloc[:, 0]
+    colname_ch1 = (
+        series_ch1_data_header.Identification.lower()
+        + "_"
+        + series_ch1_data_header.Unit.lower()
+    )
+
+    # read channel 2 data header
+    df_ch2_data_header = pd.read_xml(path, xpath="/Body_xle/Ch2_data_header")
+    series_ch2_data_header = df_ch2_data_header.T.iloc[:, 0]
+    colname_ch2 = (
+        series_ch2_data_header.Identification.lower()
+        + "_"
+        + series_ch2_data_header.Unit.lower()
+    )
+
+    # read observations
+    df = pd.read_xml(
+        path,
+        xpath="/Body_xle/Data/Log",
+    )
+    df.rename(columns={"ch1": colname_ch1, "ch2": colname_ch2}, inplace=True)
+    if "ms" in df.columns:
+        df["date_time"] = pd.to_datetime(
+            df["Date"] + " " + df["Time"]
+        ) + pd.to_timedelta(df["ms"], unit="ms")
+        drop_cols = ["id", "Date", "Time", "ms"]
+    else:
+        df["date_time"] = pd.to_datetime(df["Date"] + " " + df["Time"])
+        drop_cols = ["id", "Date", "Time"]
+    df.set_index("date_time", inplace=True)
+
+    df.drop(columns=drop_cols, inplace=True)
+
+    # parse meta into dict, per group in XLE file
+    meta = {}
+    # read file info
+    df_file_info = pd.read_xml(path, xpath="/Body_xle/File_info")
+    dict_file_info = df_file_info.T.iloc[:, 0].to_dict()
+
+    # read instrument info
+    df_instrument_info = pd.read_xml(path, xpath="/Body_xle/Instrument_info")
+    dict_instrument_info = df_instrument_info.T.iloc[:, 0].to_dict()
+
+    # read instrument info
+    df_instrument_info_data_header = pd.read_xml(
+        path, xpath="/Body_xle/Instrument_info_data_header"
+    )
+    dict_instrument_info_data_header = df_instrument_info_data_header.T.iloc[
+        :, 0
+    ].to_dict()
+
+    meta = {
+        **dict_file_info,
+        **dict_instrument_info,
+        **dict_instrument_info_data_header,
+    }
+
+    if transform_coords:
+        # lat and lon has 0,000 when location is not supplied
+        # replace comma with point first
+        if isinstance(meta["Latitude"], str):
+            meta["Latitude"] = float(meta["Latitude"].replace(",", "."))
+        if isinstance(meta["Longtitude"], str):
+            meta["Longtitude"] = float(meta["Longtitude"].replace(",", "."))
+        if (meta["Latitude"] != 0) & (meta["Longtitude"] != 0):
+            # NOTE: check EPSG:28992 definition and whether location is showing up in
+            # the right spot.
+            transformer = Transformer.from_crs("epsg:4326", "epsg:28992")
+            x, y = transformer.transform(meta["Latitude"], meta["Longtitude"])
+            x = np.round(x, 2)
+            y = np.round(y, 2)
+        else:
+            logger.warning("file has no location included")
+            x = None
+            y = None
+    else:
+        x = meta["Latitude"]
+        y = meta["Longtitude"]
+    meta["x"] = x
+    meta["y"] = y
+    meta["filename"] = f
+    meta["source"] = meta["Created_by"]
+    meta["name"] = name
+    meta["monitoring_well"] = name
+    meta["unit"] = series_ch1_data_header.Unit.lower()
+    meta["metadata_available"] = True
+
+    return df, meta