Skip to content

Commit

Permalink
Merge pull request #118 from ArtesiaWater/dev
Browse files Browse the repository at this point in the history
bug fixes
  • Loading branch information
ArtesiaWater authored Mar 31, 2023
2 parents c31a5cb + 9f48d49 commit 0039a76
Show file tree
Hide file tree
Showing 11 changed files with 280 additions and 148 deletions.
3 changes: 3 additions & 0 deletions hydropandas/extensions/plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,6 +378,7 @@ def section_plot(
- limit y-axis of section plot to observations only
- remove the checking (if obs are near bottom) from this function
- moving the legend outside the plot
- set xlim of observation plot more tight when tmin is not specified
"""

# prepare column for x location in section plot
Expand Down Expand Up @@ -555,6 +556,8 @@ def section_plot(
else:
ax_section.set_xlabel(section_label_x)

ax_obs.set_xlim(left=tmin, right=tmax)

# rotate labels on observation axis
ax_obs.set_xticks(
ax_obs.get_xticks(),
Expand Down
81 changes: 39 additions & 42 deletions hydropandas/io/bro.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@


def get_obs_list_from_gmn(bro_id, ObsClass, only_metadata=False, keep_all_obs=True):
"""
"""get a list of observation from a groundwater monitoring network.
Parameters
----------
Expand Down Expand Up @@ -60,7 +60,8 @@ class of the observations, so far only GroundwaterObs is supported
gmn = tree.find(".//xmlns:GMN_PO", ns)
gmws = gmn.findall("xmlns:measuringPoint", ns)

logger.info(f"{len(gmws)} groundwater monitoring wells within groundwater meetnet")
logger.info(
f"{len(gmws)} groundwater monitoring wells within groundwater meetnet")

obs_list = []
for gmw in tqdm(gmws):
Expand Down Expand Up @@ -130,26 +131,37 @@ def get_bro_groundwater(bro_id, tube_nr=None, only_metadata=False, **kwargs):

elif bro_id.startswith("GMW"):
if tube_nr is None:
raise ValueError("if bro_id is GMW a filternumber should be specified")
raise ValueError(
"if bro_id is GMW a filternumber should be specified")

meta = get_metadata_from_gmw(bro_id, tube_nr)
gld_id = get_gld_id_from_gmw(bro_id, tube_nr)
gld_ids = get_gld_ids_from_gmw(bro_id, tube_nr)

if gld_id is None:
if gld_ids is None:
meta["name"] = f"{bro_id}_{tube_nr}"
only_metadata = True # cannot get time series without gld id
else:
meta["name"] = gld_id
meta["name"] = f"{bro_id}_{tube_nr}"
meta["gld_ids"] = gld_ids

if only_metadata:
empty_df = pd.DataFrame()
return empty_df, meta

return measurements_from_gld(gld_id, **kwargs)
for i, gld_id in enumerate(gld_ids):
if i == 0:
df, meta_new = measurements_from_gld(gld_id, **kwargs)
meta.update(meta_new)
else:
df_new, meta_new = measurements_from_gld(gld_id, **kwargs)
df = pd.concat([df, df_new], axis=1)
meta.update(meta_new)

return df, meta


def get_gld_id_from_gmw(bro_id, tube_nr, quality_regime="IMBRO/A"):
"""get bro_id of a grondwterstandendossier (gld) from a bro_id of a
def get_gld_ids_from_gmw(bro_id, tube_nr):
"""get bro_ids of multiple grondwaterstandendossier (gld) from a bro_id of a
grondwatermonitoringsput (gmw).
Parameters
Expand All @@ -158,8 +170,6 @@ def get_gld_id_from_gmw(bro_id, tube_nr, quality_regime="IMBRO/A"):
starts with 'GLD' or 'GMW' e.g. 'GMW000000036287'.
tube_nr : int
tube number.
quality_regime : str
either choose 'IMBRO/A' or 'IMBRO'.
Raises
------
Expand All @@ -170,8 +180,8 @@ def get_gld_id_from_gmw(bro_id, tube_nr, quality_regime="IMBRO/A"):
Returns
-------
str
bro_id of a grondwaterstandonderzoek (gld).
list of str or None
bro_ids of a grondwaterstandonderzoek (gld).
"""
if not bro_id.startswith("GMW"):
Expand All @@ -193,37 +203,14 @@ def get_gld_id_from_gmw(bro_id, tube_nr, quality_regime="IMBRO/A"):

for tube in d["monitoringTubeReferences"]:
if tube["tubeNumber"] == tube_nr:
if len(tube["gldReferences"]) == 1:
return tube["gldReferences"][0]["broId"]
elif len(tube["gldReferences"]) == 0:
if len(tube["gldReferences"]) == 0:
logger.info(
f"no groundwater level dossier for {bro_id} and tube number"
f"{tube_nr}"
)
return None
elif len(tube["gldReferences"]) == 2:
logger.info(
f"two gld references found for GMW {bro_id} and tube nr"
f"{tube_nr}, using {quality_regime} quality regime"
)
for gldref in tube["gldReferences"]:
url2 = gldref["url"]
req2 = requests.get(url2)
ns = {
"ns11": "http://www.broservices.nl/xsd/dsgld/1.0",
"brocom": "http://www.broservices.nl/xsd/brocommon/3.0",
}
tree = xml.etree.ElementTree.fromstring(req2.text)
gld = tree.findall(".//ns11:GLD_O", ns)[0]
qualityRegime = gld.find("brocom:qualityRegime", ns).text
if qualityRegime == quality_regime:
return gldref["broId"]
logger.info(
f"no gld reference with quality regime {quality_regime} was found"
)
return None
else:
raise RuntimeError("unexpected number of gld references")
return [gldref["broId"] for gldref in tube["gldReferences"]]


def measurements_from_gld(
Expand Down Expand Up @@ -295,11 +282,13 @@ def measurements_from_gld(
raise (Exception("Only one gld supported"))
gld = glds[0]

meta = {"name": bro_id, "source": "BRO"}
meta["monitoring_well"] = gld.find("ns11:monitoringPoint//gldcommon:broId", ns).text
meta = {"source": "BRO"}
meta["monitoring_well"] = gld.find(
"ns11:monitoringPoint//gldcommon:broId", ns).text
meta["tube_nr"] = int(
gld.find("ns11:monitoringPoint//gldcommon:tubeNumber", ns).text
)
meta["name"] = f"{meta['monitoring_well']}_{meta['tube_nr']}"
gmn = gld.find("ns11:groundwaterMonitoringNet//gldcommon:broId", ns)
if gmn is None:
meta["monitoringsnet"] = None
Expand All @@ -313,7 +302,8 @@ def measurements_from_gld(
np.nan if value.text is None else float(value.text)
for value in gld.findall(f"{msts}//waterml:value", ns)
]
qualifiers = [q.text for q in gld.findall(f"{msts}//swe:Category//swe:value", ns)]
qualifiers = [q.text for q in gld.findall(
f"{msts}//swe:Category//swe:value", ns)]

# to dataframe
df = pd.DataFrame(
Expand Down Expand Up @@ -574,6 +564,13 @@ class of the observations, e.g. GroundwaterObs or WaterlvlObs
DESCRIPTION.
"""

if only_metadata and not keep_all_obs:
logger.error(
"you will get an empty ObsCollection with only_metadata is True and"
"keep_all_obs is False"
)

url = "https://publiek.broservices.nl/gm/gmw/v1/characteristics/searches?"

data = {}
Expand Down Expand Up @@ -615,7 +612,7 @@ class of the observations, e.g. GroundwaterObs or WaterlvlObs
[gmw.text for gmw in tree.findall(".//dsgmw:GMW_C//brocom:broId", ns)]
)

if len(gmws_ids) > 1000:
if len(gmws_ids) > 1000 and not ignore_max_obs:
ans = input(
f"You requested to download {len(gmws_ids)} observations, this can"
"take a while. Are you sure you want to continue [Y/n]? "
Expand Down
9 changes: 3 additions & 6 deletions hydropandas/io/fews.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,8 +184,7 @@ class of the observations, e.g. GroundwaterObs or WaterlvlObs
loc = h_attr.text
if loc not in locationIds:
element.clear()
logger.info(
f" ... skipping '{loc}', not in locationIds")
logger.info(f" ... skipping '{loc}', not in locationIds")
continue

if filterdict is not None:
Expand Down Expand Up @@ -508,8 +507,7 @@ class of the observations, e.g. GroundwaterObs or WaterlvlObs
source="FEWS",
)
else:
o = ObsClass(ts, x=x, y=y, unit=unit, meta=header,
name=name, source="FEWS")
o = ObsClass(ts, x=x, y=y, unit=unit, meta=header, name=name, source="FEWS")

return o, header

Expand Down Expand Up @@ -581,8 +579,7 @@ def write_pi_xml(obs_coll, fname, timezone=1.0, version="1.24"):
tag=htag, date=hdate, time=htime
)
elif htag.endswith("timeStep"):
hline = '<{tag} unit="{unit}"/>\n'.format(
tag=htag, unit=hval)
hline = '<{tag} unit="{unit}"/>\n'.format(tag=htag, unit=hval)
else:
hline = paramline.format(tag=htag, param=hval)
hlines.append(3 * "\t" + hline)
Expand Down
113 changes: 99 additions & 14 deletions hydropandas/io/menyanthes.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,70 @@
# -*- coding: utf-8 -*-
"""Created on Thu Oct 10 11:01:22 2019.
@author: oebbe
"""

import logging
import os

import numpy as np
from pandas import DataFrame, Series
from pandas import DataFrame, Series, Timedelta, Timestamp
from scipy.io import loadmat

from ..observation import GroundwaterObs, WaterlvlObs
from ..util import matlab2datetime

logger = logging.getLogger(__name__)


def matlab2datetime(tindex):
"""
Transform a MATLAB serial date number to a Python datetime object, rounded
to seconds.
Parameters
----------
tindex : float
The MATLAB serial date number to convert.
Returns
-------
datetime : datetime.datetime
The equivalent datetime object in Python.
Notes
-----
MATLAB serial date numbers represent the number of days elapsed since
January 1, 0000 (the proleptic Gregorian calendar), with January 1, 0000 as
day 1. Fractions of a day can be represented as a decimal.
The returned datetime object is rounded to the nearest second.
Examples
--------
>>> matlab2datetime(719529.496527778)
datetime.datetime(2019, 1, 1, 11, 55, 2)
"""
day = Timestamp.fromordinal(int(tindex))
dayfrac = Timedelta(days=float(tindex) % 1) - Timedelta(days=366)
return day + dayfrac


def read_file(fname, ObsClass, load_oseries=True, load_stresses=True):
"""This method is used to read the file."""
"""
Read data from a Menyanthes file and create observation objects.
Parameters
----------
fname : str
Name of the Menyanthes file to read.
ObsClass : GroundwaterObs or WaterlvlObs
Class of observation object to create.
load_oseries : bool, optional
Flag indicating whether to load observation series or not, by default
True.
load_stresses : bool, optional
Flag indicating whether to load stresses or not, by default True.
Returns
-------
obs_list : list
List of observation objects created from the Menyanthes file.
"""

logger.info(f"reading menyanthes file {fname}")

Expand Down Expand Up @@ -117,7 +163,38 @@ def read_file(fname, ObsClass, load_oseries=True, load_stresses=True):


def read_oseries(mat):
"""Read the oseries from a mat file from menyanthes."""
"""Read the oseries from a mat file from menyanthes.
Parameters
----------
mat : dict
A dictionary object containing the Menyanthes file data.
Returns
-------
dict
A dictionary containing oseries data, with oseries names as keys and
their corresponding metadata and values as values.
Notes
-----
This function reads the oseries data from a Menyanthes file in .mat format
and returns it in a dictionary format. The oseries data contains the
following metadata:
- name: The name of the oseries.
- x: The x-coordinate of the oseries location.
- y: The y-coordinate of the oseries location.
- source: The data source.
- unit: The unit of measurement.
In addition to the metadata, the oseries data also contains a pandas Series
object named 'values', which contains the time series data for the oseries.
Examples
--------
>>> mat = loadmat('menyanthes_file.mat')
>>> d_h = read_oseries(mat)
"""
d_h = {}

# Check if more then one time series model is present
Expand Down Expand Up @@ -158,12 +235,20 @@ def read_oseries(mat):


def read_stresses(mat):
"""Reads the stresses from a mat file from menyanthes.
Parameters
----------
mat : dict
A dictionary object containing the mat file.
Returns
-------
dict
A dictionary object containing the stresses data.
"""
d_in = {}

# Check if more then one time series is present
# if not isinstance(mat["IN"], np.ndarray):
# mat["IN"] = [mat["IN"]]

# Read all the time series
for i, IN in enumerate(mat["IN"]):
if not hasattr(IN, "Name") and not hasattr(IN, "name"):
Expand Down
Loading

0 comments on commit 0039a76

Please sign in to comment.