Skip to content

Commit

Permalink
Merge pull request #191 from ArtesiaWater/dev
Browse files Browse the repository at this point in the history
Update master for new release (0.11.0)
  • Loading branch information
OnnoEbbens authored Mar 11, 2024
2 parents 0950abe + 18ab689 commit b60fb6c
Show file tree
Hide file tree
Showing 16 changed files with 4,626 additions and 86 deletions.
9 changes: 4 additions & 5 deletions examples/02_knmi_observations.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1477,11 +1477,10 @@
"oc = hpd.ObsCollection([precip1, precip2])\n",
"gdf = oc.to_gdf()\n",
"gdf = gdf.set_crs(28992)\n",
"gdf = gdf.to_crs(3857)\n",
"gdf[\"name\"] = gdf.index\n",
"ax = gdf.buffer(2000).plot(alpha=0, figsize=(8, 8))\n",
"gdf.plot(\"name\", ax=ax, cmap=\"jet\", legend=True, markersize=100)\n",
"cx.add_basemap(ax)"
"cx.add_basemap(ax, crs=28992)"
]
},
{
Expand Down Expand Up @@ -3331,9 +3330,9 @@
],
"metadata": {
"kernelspec": {
"display_name": "hpd_env",
"display_name": "dev",
"language": "python",
"name": "python3"
"name": "dev"
},
"language_info": {
"codemirror_mode": {
Expand All @@ -3345,7 +3344,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
"version": "3.10.13"
}
},
"nbformat": 4,
Expand Down
16 changes: 7 additions & 9 deletions hydropandas/io/bro.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,14 +148,12 @@ def get_bro_groundwater(bro_id, tube_nr=None, only_metadata=False, **kwargs):
empty_df = pd.DataFrame()
return empty_df, meta

dfl = []
for i, gld_id in enumerate(gld_ids):
if i == 0:
df, meta_new = measurements_from_gld(gld_id, **kwargs)
meta.update(meta_new)
else:
df_new, meta_new = measurements_from_gld(gld_id, **kwargs)
df = pd.concat([df, df_new], axis=1)
meta.update(meta_new)
df, meta_new = measurements_from_gld(gld_id, **kwargs)
meta.update(meta_new)
dfl.append(df)
df = pd.concat(dfl, axis=0)

return df, meta

Expand Down Expand Up @@ -305,15 +303,15 @@ def measurements_from_gld(

# to dataframe
df = pd.DataFrame(
index=pd.to_datetime(times),
index=pd.to_datetime(times, utc=True).tz_convert("CET"),
data={"values": values, "qualifier": qualifiers},
)

# wintertime
if to_wintertime:
# remove time zone information by transforming to dutch winter time
df.index = pd.to_datetime(df.index, utc=True).tz_localize(None) + pd.Timedelta(
1, unit="H"
1, unit="h"
)

# duplicates
Expand Down
4 changes: 3 additions & 1 deletion hydropandas/io/knmi.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,9 @@ def get_knmi_obs(
ts, meta = get_knmi_timeseries_stn(stn, meteo_var, settings, start, end)
elif fname is not None:
logger.info(f"get KNMI data from file {fname} and meteo variable {meteo_var}")
ts, meta = get_knmi_timeseries_fname(fname, meteo_var, settings, start, end)
ts, meta = get_knmi_timeseries_fname(
str(fname), meteo_var, settings, start, end
)
elif xy is not None:
logger.info(
f"get KNMI data from station nearest to coordinates {xy} and meteo"
Expand Down
109 changes: 87 additions & 22 deletions hydropandas/io/lizard.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ def _prepare_API_input(nr_pages, url_groundwater):
urls = []
for page in range(nr_pages):
true_page = page + 1 # The real page number is attached to the import thread
urls = [url_groundwater + "&page={}".format(true_page)]
urls += [url_groundwater + "&page={}".format(true_page)]
return urls


Expand All @@ -185,6 +185,32 @@ def _download(url, timeout=1800):
return data


def _split_mw_tube_nr(code):
"""get the tube number from a code that consists of the name and the tube number.
Parameters
----------
code : str
name + tube_nr. e.g. 'BUWP014-11' or 'BUWP014012'
Returns
-------
monitoring well, tube_number (str, int)
Notes
-----
The format of the name + tube_nr is not very consistent and this function may need
further finetuning.
"""

if code[-3:].isdigit():
return code[:-3], int(code[-3:])
else:
# assume there is a '-' to split name and filter number
tube_nr = code.split("-")[-1]
return code.strip(f"-{tube_nr}"), int(tube_nr)


def get_metadata_tube(metadata_mw, tube_nr):
"""Extract the metadata for a specific tube from the monitoring well metadata.
Expand Down Expand Up @@ -218,30 +244,65 @@ def get_metadata_tube(metadata_mw, tube_nr):
"status": None,
}

metadata_tube_list = []
for metadata_tube in metadata_mw["filters"]:
if metadata_tube["code"].endswith(str(tube_nr)):
break
else:
# check if name+filternr ends with three digits
code, tbnr = _split_mw_tube_nr(metadata_tube["code"])
if tbnr == tube_nr:
metadata_tube_list.append(metadata_tube)

if len(metadata_tube_list) == 0:
raise ValueError(f"{metadata_mw['name']} doesn't have a tube number {tube_nr}")
elif len(metadata_tube_list) == 1:
mtd_tube = metadata_tube_list[0]
elif len(metadata_tube_list) > 1:
# tube has probably been replaced, multiple tubes with the same code and tube nr
# merge metadata from all tubes
logger.info(
f"there are {len(metadata_tube_list)} instances of {code} and tube "
f"{tube_nr}, trying to merge all in one observation object"
)
mtd_tube = metadata_tube_list[0].copy()
relevant_keys = {
"top_level",
"filter_top_level",
"filter_bottom_level",
"timeseries",
}
for metadata_tube in metadata_tube_list:
for key in set(metadata_tube.keys()) & relevant_keys:
# check if properties are always the same for a tube number
val = metadata_tube[key]
if key in ["top_level", "filter_top_level", "filter_bottom_level"]:
if val != mtd_tube[key]:
logger.warning(
f"multiple {key} values found ({val} & {mtd_tube[key]})"
f" for {code} and tube {tube_nr}, using {mtd_tube[key]}"
)
# merge time series from all tubes with the same code and tube number
elif key == "timeseries":
mtd_tube[key] += val

mtd_tube["code"] = f"{code}{tube_nr}"

metadata.update(
{
"tube_nr": tube_nr,
"name": metadata_tube["code"].replace("-", ""),
"tube_top": metadata_tube["top_level"],
"screen_top": metadata_tube["filter_top_level"],
"screen_bottom": metadata_tube["filter_bottom_level"],
"name": mtd_tube["code"].replace("-", ""),
"tube_top": mtd_tube["top_level"],
"screen_top": mtd_tube["filter_top_level"],
"screen_bottom": mtd_tube["filter_bottom_level"],
}
)

lon, lat, _ = metadata_mw["geometry"]["coordinates"]
transformer = Transformer.from_crs("WGS84", "EPSG:28992")
metadata["x"], metadata["y"] = transformer.transform(lat, lon)

if not metadata_tube["timeseries"]:
if not mtd_tube["timeseries"]:
metadata["timeseries_type"] = None
else:
for series in metadata_tube["timeseries"]:
for series in mtd_tube["timeseries"]:
series_info = requests.get(series).json()
if series_info["name"] == "WNS9040.hand":
metadata["uuid_hand"] = series_info["uuid"]
Expand Down Expand Up @@ -382,8 +443,6 @@ def _combine_timeseries(hand_measurements, diver_measurements):
measurements = measurements.loc[
:, ["value_hand", "value_diver", "flag_hand", "flag_diver"]
]
measurements.loc[:, "name"] = hand_measurements.loc[:, "name"][0]
measurements.loc[:, "filter_nr"] = hand_measurements.loc[:, "filter_nr"][0]

return measurements

Expand Down Expand Up @@ -413,6 +472,7 @@ def get_timeseries_tube(tube_metadata, tmin, tmax, type_timeseries):
metadata_df : dict
metadata of the monitoring well
"""

if tube_metadata["timeseries_type"] is None:
return pd.DataFrame(), tube_metadata

Expand Down Expand Up @@ -559,18 +619,23 @@ class of the observations, e.g. GroundwaterObs
obs_list = []
for code in codes:
groundwaterstation_metadata = get_metadata_mw_from_code(code)
tubes = []
if tube_nr == "all":
for metadata_tube in groundwaterstation_metadata["filters"]:
tube_nr = int(metadata_tube["code"][-3:])
o = ObsClass.from_lizard(
code,
tube_nr,
tmin,
tmax,
type_timeseries,
only_metadata=only_metadata,
)
obs_list.append(o)
tnr = _split_mw_tube_nr(metadata_tube["code"])[-1]
if tnr not in tubes:
logger.info(f"get {code}{tnr}")
o = ObsClass.from_lizard(
code,
tnr,
tmin,
tmax,
type_timeseries,
only_metadata=only_metadata,
)
obs_list.append(o)
tubes.append(tnr)

else:
o = ObsClass.from_lizard(
code, tube_nr, tmin, tmax, type_timeseries, only_metadata=only_metadata
Expand Down
138 changes: 138 additions & 0 deletions hydropandas/io/solinst.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
import logging
import os
import zipfile

import numpy as np
import pandas as pd
from pyproj import Transformer

logger = logging.getLogger(__name__)


def read_solinst_file(
path,
transform_coords=True,
):
"""Read Solinst logger file (XLE)
Parameters
----------
path : str
path to Solinst file (.xle)
transform_coords : boolean
convert coordinates from WGS84 to RD
Returns
-------
df : pandas.DataFrame
DataFrame containing file content
meta : dict, optional
dict containing meta
"""

# open file
path = str(path)
name = os.path.splitext(os.path.basename(path))[0]
if path.endswith(".xle"):
f = path
elif path.endswith(".zip"):
zf = zipfile.ZipFile(path)
f = zf.open("{}.xle".format(name))
else:
raise NotImplementedError(
"File type '{}' not supported!".format(os.path.splitext(path)[-1])
)

logger.info("reading -> {}".format(f))

# read channel 1 data header
df_ch1_data_header = pd.read_xml(path, xpath="/Body_xle/Ch1_data_header")
series_ch1_data_header = df_ch1_data_header.T.iloc[:, 0]
colname_ch1 = (
series_ch1_data_header.Identification.lower()
+ "_"
+ series_ch1_data_header.Unit.lower()
)

# read channel 2 data header
df_ch2_data_header = pd.read_xml(path, xpath="/Body_xle/Ch2_data_header")
series_ch2_data_header = df_ch2_data_header.T.iloc[:, 0]
colname_ch2 = (
series_ch2_data_header.Identification.lower()
+ "_"
+ series_ch2_data_header.Unit.lower()
)

# read observations
df = pd.read_xml(
path,
xpath="/Body_xle/Data/Log",
)
df.rename(columns={"ch1": colname_ch1, "ch2": colname_ch2}, inplace=True)
if "ms" in df.columns:
df["date_time"] = pd.to_datetime(
df["Date"] + " " + df["Time"]
) + pd.to_timedelta(df["ms"], unit="ms")
drop_cols = ["id", "Date", "Time", "ms"]
else:
df["date_time"] = pd.to_datetime(df["Date"] + " " + df["Time"])
drop_cols = ["id", "Date", "Time"]
df.set_index("date_time", inplace=True)

df.drop(columns=drop_cols, inplace=True)

# parse meta into dict, per group in XLE file
meta = {}
# read file info
df_file_info = pd.read_xml(path, xpath="/Body_xle/File_info")
dict_file_info = df_file_info.T.iloc[:, 0].to_dict()

# read instrument info
df_instrument_info = pd.read_xml(path, xpath="/Body_xle/Instrument_info")
dict_instrument_info = df_instrument_info.T.iloc[:, 0].to_dict()

# read instrument info
df_instrument_info_data_header = pd.read_xml(
path, xpath="/Body_xle/Instrument_info_data_header"
)
dict_instrument_info_data_header = df_instrument_info_data_header.T.iloc[
:, 0
].to_dict()

meta = {
**dict_file_info,
**dict_instrument_info,
**dict_instrument_info_data_header,
}

if transform_coords:
# lat and lon has 0,000 when location is not supplied
# replace comma with point first
if isinstance(meta["Latitude"], str):
meta["Latitude"] = float(meta["Latitude"].replace(",", "."))
if isinstance(meta["Longtitude"], str):
meta["Longtitude"] = float(meta["Longtitude"].replace(",", "."))
if (meta["Latitude"] != 0) & (meta["Longtitude"] != 0):
# NOTE: check EPSG:28992 definition and whether location is showing up in
# the right spot.
transformer = Transformer.from_crs("epsg:4326", "epsg:28992")
x, y = transformer.transform(meta["Latitude"], meta["Longtitude"])
x = np.round(x, 2)
y = np.round(y, 2)
else:
logger.warning("file has no location included")
x = None
y = None
else:
x = meta["Latitude"]
y = meta["Longtitude"]
meta["x"] = x
meta["y"] = y
meta["filename"] = f
meta["source"] = meta["Created_by"]
meta["name"] = name
meta["monitoring_well"] = name
meta["unit"] = series_ch1_data_header.Unit.lower()
meta["metadata_available"] = True

return df, meta
Loading

0 comments on commit b60fb6c

Please sign in to comment.