Release v0.12.4 (#244)

* allow get_stations with meteo_var == slice(None) * only filter stations if start/end is provided * ensure current station is in dataframe so nearest station can be determined * fix for knmi precipitation data station attribute is sometimes a series with duplicate entries #241 (#242) * reshuffle a bit * ruff * up version for minor release --------- Co-authored-by: OnnoEbbens <[email protected]>
ArtesiaWater · Sep 27, 2024 · 034b1b5 · 034b1b5
1 parent 9850536
commit 034b1b5
Show file tree

Hide file tree

Showing 3 changed files with 41 additions and 16 deletions.
diff --git a/hydropandas/io/knmi.py b/hydropandas/io/knmi.py
@@ -54,7 +54,10 @@ def get_knmi_obs(
     **kwargs:
         fill_missing_obs : bool, optional
             if True nan values in time series are filled with nearby time series.
-            The default is False.
+            The default is False. Note: if the given stn has no data between start and
+            end the data from nearby stations is used. In this case the metadata of the
+            Observation is the metadata from the nearest station that has any
+            measurement in the given period.
         interval : str, optional
             desired time interval for observations. Options are 'daily' and
             'hourly'. The default is 'daily'.
@@ -399,13 +402,17 @@ def get_stations(
         meteo_var = "EV24"
 
     # select only stations with meteo_var
+    if meteo_var == slice(None):
+        meteo_mask = stations.loc[:, meteo_var].any(axis=1)
+    else:
+        meteo_mask = stations.loc[:, meteo_var]
     stations = stations.loc[
-        stations.loc[:, meteo_var],
-        ["lon", "lat", "name", "x", "y", "altitude", "tmin", "tmax"],
+        meteo_mask, ["lon", "lat", "name", "x", "y", "altitude", "tmin", "tmax"]
     ]
 
     # select only stations with measurement
-    stations = _get_stations_tmin_tmax(stations, start, end)
+    if start is not None or end is not None:
+        stations = _get_stations_tmin_tmax(stations, start, end)
 
     return stations
 
@@ -479,6 +486,11 @@ def get_station_name(stn: int, stations: Union[pd.DataFrame, None] = None) -> st
         return None
 
     stn_name = stations.at[stn, "name"]
+    if isinstance(stn_name, pd.Series):
+        raise ValueError(
+            f'station {stn} is a meteo- and a precipitation station, please indicate which one you want to use using a "meteo_var"'
+        )
+
     stn_name = stn_name.upper().replace(" ", "-").replace("(", "").replace(")", "")
     return stn_name
 
@@ -529,13 +541,20 @@ def fill_missing_measurements(
 
     # get the location of the stations
     stations = get_stations(meteo_var=meteo_var, start=start, end=end)
-    if stn_name is None:
-        stn_name = get_station_name(stn=stn, stations=stations)
+    if stn not in stations.index:
+        # no measurements in given period, continue with empty dataframe
+        knmi_df = pd.DataFrame()
 
-    # download data from station
-    knmi_df, variables, station_meta = download_knmi_data(
-        stn, meteo_var, start, end, settings, stn_name
-    )
+        # add location of station without data to dataframe
+        stations = pd.concat([stations, get_stations(meteo_var=meteo_var).loc[[stn]]])
+    else:
+        if stn_name is None:
+            stn_name = get_station_name(stn=stn, stations=stations)
+
+        # download data from station if it has data between start-end
+        knmi_df, variables, station_meta = download_knmi_data(
+            stn, meteo_var, start, end, settings, stn_name
+        )
 
     # if the first station cannot be read, read another station as the first
     ignore = [stn]
@@ -548,6 +567,7 @@ def fill_missing_measurements(
             meteo_var=meteo_var,
             start=start,
             end=end,
+            stations=stations,
             ignore=ignore,
         )
         if stn_lst is None:
@@ -1214,8 +1234,7 @@ def interpret_knmi_file(
             raise ValueError(
                 f"Cannot handle multiple stations {unique_stn} in single file"
             )
-        else:
-            stn = df.at[df.index[0], "STN"]
+        stn = unique_stn[0]
 
         if add_day or add_hour:
             if add_day and add_hour:
@@ -1659,7 +1678,10 @@ class of the observations, can be PrecipitationObs or
     **kwargs:
         fill_missing_obs : bool, optional
             if True nan values in time series are filled with nearby time series.
-            The default is False.
+            The default is False. Note: if the given stn has no data between start and
+            end the data from nearby stations is used. In this case the metadata of the
+            Observation is the metadata from the nearest station that has any
+            measurement in the given period.
         interval : str, optional
             desired time interval for observations. Options are 'daily' and
             'hourly'. The default is 'daily'.
@@ -1674,7 +1696,7 @@ class of the observations, can be PrecipitationObs or
 
     Returns
     -------
-    obs_list : list of obsevation objects
+    obs_list : list of observation objects
         collection of multiple point observations
     """
 

diff --git a/hydropandas/observation.py b/hydropandas/observation.py
@@ -1083,7 +1083,10 @@ def from_knmi(
             end date of observations. The default is None.
         fill_missing_obs : bool, optional
             if True nan values in time series are filled with nearby time series.
-            The default is False.
+            The default is False. Note: if the given stn has no data between start and
+            end the data from nearby stations is used. In this case the metadata of the
+            Observation is the metadata from the nearest station that has any
+            measurement in the given period.
         interval : str, optional
             desired time interval for observations. Options are 'daily' and
             'hourly'. The default is 'daily'.

diff --git a/hydropandas/version.py b/hydropandas/version.py
@@ -1,7 +1,7 @@
 from importlib import metadata
 from sys import version as os_version
 
-__version__ = "0.12.3"
+__version__ = "0.12.4"
 
 
 def show_versions():