diff --git a/python_cs_functions/attributes.py b/python_cs_functions/attributes.py
index 885f2f8..0c27760 100644
--- a/python_cs_functions/attributes.py
+++ b/python_cs_functions/attributes.py
@@ -6,6 +6,7 @@
 import pandas as pd
 import rasterio
 import xarray as xr
+import warnings
 from dateutil.relativedelta import relativedelta
 from osgeo import gdal, osr
 from rasterstats import zonal_stats
@@ -93,7 +94,9 @@ def attributes_from_pelletier(geo_folder, dataset, shp_str, l_values, l_index):
 
     for file,att,unit in zip(files,attrs,units):
         tif = str( geo_folder / dataset / 'raw' / file )
+        check_and_set_nodata_value_on_pelletier_file(tif)
         zonal_out = zonal_stats(shp_str, tif, stats=stats)
+        zonal_out = check_zonal_stats_outcomes(zonal_out, new_val=0) # certain tifs have no data because there is no variable X, so we set these to 0
         scale,offset = read_scale_and_offset(tif)
         l_values = update_values_list(l_values, stats, zonal_out, scale, offset)
         l_index += [('Soil', f'{att}_min',  f'{unit}', 'Pelletier'),
@@ -103,6 +106,18 @@ def attributes_from_pelletier(geo_folder, dataset, shp_str, l_values, l_index):
         
     return l_values,l_index
 
+def check_and_set_nodata_value_on_pelletier_file(tif, nodata=255):
+    if not get_geotif_nodata_value(tif): # no no-data value set yet
+        data = get_geotif_data_as_array(tif)
+        if data.max() == nodata: # but we do need to set a new no-data value here
+            data = None
+            ds = gdal.Open(tif)
+            band = ds.GetRasterBand(1)
+            band.SetNoDataValue(255)
+            ds.FlushCache()
+            ds = None
+            return
+
 def attributes_from_streamflow(hyd_folder, dataset, basin_id, pre, row, l_values, l_index):
     
     '''Calculates various streamflow signatures'''
@@ -139,6 +154,10 @@ def attributes_from_streamflow(hyd_folder, dataset, basin_id, pre, row, l_values
     assert hyd['time'][0].values == pre['time'][0].values, 'attributes_from_streamflow(): mismatch between precipitation and streamflow start timestamp'
     assert hyd['time'][-1].values == pre['time'][-1].values, 'attributes_from_streamflow(): mismatch between precipitation and streamflow final timestamp'
     
+    # Gap-fill the streamflow series if needed, so that we have identical length time series to work with
+    hyd = hyd.resample(time='D').asfreq()
+    assert len(hyd['time']) == len(pre['time']), 'attributes_from_streamflow(): different number of timesteps in precipitation and streamflow series'
+
     # Create a water-year time variable
     hyd['water_year'] = hyd['time'].dt.year.where(hyd['time'].dt.month < 10, hyd['time'].dt.year + 1)
     pre['water_year'] = pre['time'].dt.year.where(pre['time'].dt.month < 10, pre['time'].dt.year + 1)
@@ -157,14 +176,21 @@ def attributes_from_hydrolakes(geo_folder, dataset, l_values, l_index):
     
     '''Calculates open water attributes from HydroLAKES'''
 
-    # Load the file
+    # Define the standard file name
     lake_str = str(geo_folder / dataset / 'raw' / 'HydroLAKES_polys_v10_NorthAmerica.shp')
-    lakes = gpd.read_file(lake_str)
 
-    # General stats
-    res_mask  = lakes['Lake_type'] == 2 # Lake Type 2 == reservoir; see docs (https://data.hydrosheds.org/file/technical-documentation/HydroLAKES_TechDoc_v10.pdf)
-    num_lakes = len(lakes)
-    num_resvr = res_mask.sum() 
+    # Check if the file exists (some basins won't have lakes)
+    lakes = None # if we have a lake file this will get overwritten - we use this in get_open_water_stats()
+    if os.path.exists(lake_str):
+        lakes = gpd.read_file(lake_str)
+
+        # General stats
+        res_mask  = lakes['Lake_type'] == 2 # Lake Type 2 == reservoir; see docs (https://data.hydrosheds.org/file/technical-documentation/HydroLAKES_TechDoc_v10.pdf)
+        num_lakes = len(lakes)
+        num_resvr = res_mask.sum() 
+    else:
+        num_lakes = 0
+        num_resvr = 0
     l_values.append(num_lakes)
     l_index.append(('Open water', 'open_water_number',  '-', 'HydroLAKES'))
     l_values.append(num_resvr)
@@ -595,6 +621,14 @@ def attributes_from_worldclim(geo_folder, dataset, shp_str, l_values, l_index):
     return l_values, l_index
 
 ## ------- Component functions
+def check_zonal_stats_outcomes(zonal_out, new_val=np.nan):
+    '''Checks for None value in zonal_out, and sets these to np.nan or user-defined value'''
+    for ix in range(0,len(zonal_out)):
+         for key,val in zonal_out[ix].items():
+             if val is None:
+                 zonal_out[ix][key] = new_val
+    return zonal_out
+
 # Sine functions for P and T as per Woods (2009), Eq. 2 and 3. Period = 1 year
 def sine_function_temp(x, mean, delta, phase, period=1):
     result = mean + delta * np.sin(2*np.pi*(x-phase)/period)
@@ -806,6 +840,9 @@ def update_values_list(l_values, stats, zonal_out, scale, offset):
     return l_values
 
 def read_scale_and_offset(geotiff_path):
+    # Enforce data type
+    geotiff_path = str(geotiff_path)
+    
     # Open the GeoTIFF file
     dataset = gdal.Open(geotiff_path)
 
@@ -822,6 +859,7 @@ def read_scale_and_offset(geotiff_path):
     return scale, offset
 
 def get_geotif_data_as_array(file, band=1):
+    file = str(file)
     ds = gdal.Open(file) # open the file
     band = ds.GetRasterBand(band) # get the data band
     data = band.ReadAsArray() # convert to numpy array for further manipulation   
@@ -841,6 +879,8 @@ def enforce_data_range(data,min,max,replace_with='limit'):
     return data
 
 def write_geotif_sameDomain(src_file,des_file,des_data, nodata_value=None):
+    # Enforce data type
+    src_file = str(src_file)
     
     # load the source file to get the appropriate attributes
     src_ds = gdal.Open(src_file)
@@ -1058,7 +1098,12 @@ def calculate_signatures(hyd, pre, source, l_values, l_index):
     # Mean monthly flows
     monthly_q = hyd['q_obs'].resample(time='1ME').mean().groupby('time.month')
     monthly_m = monthly_q.mean()
-    monthly_s = monthly_q.std()
+    with warnings.catch_warnings():
+        # This mutes a "RuntimeWarning: Degrees of freedom <= 0 for slice. 
+        #  var = nanvar(a, axis=axis, dtype=dtype, out=out, ddof=ddof,"
+        #  warning which we'll get when calculating std() on NaN months
+        warnings.simplefilter("ignore", category=RuntimeWarning)
+        monthly_s = monthly_q.std()
     l_values, l_index = process_monthly_means_to_lists(monthly_m, 'mean', l_values, l_index, 'daily_streamflow', 'mm day^-1', 'Hydrology', source)
     l_values, l_index = process_monthly_means_to_lists(monthly_s, 'std',  l_values, l_index, 'daily_streamflow', 'mm day^-1', 'Hydrology', source)
     
@@ -1073,7 +1118,7 @@ def calculate_signatures(hyd, pre, source, l_values, l_index):
     l_index.append( ('Hydrology', 'runoff_ratio_std', 'mm d^-1 month-1', f'{source}, ERA5') )
     
     # Streamflow elasticity
-    q_elas = np.median(((daily_mean_q - daily_mean_q.mean())/(daily_mean_p - daily_mean_p.mean()))*(daily_mean_p.mean()/daily_mean_q.mean()))
+    q_elas = np.nanmedian(((daily_mean_q - daily_mean_q.mean())/(daily_mean_p - daily_mean_p.mean()))*(daily_mean_p.mean()/daily_mean_q.mean()))
     l_values.append(q_elas)
     l_index.append( ('Hydrology', 'streamflow_elasticity', '-', f'{source}, ERA5') )
     
@@ -1082,13 +1127,21 @@ def calculate_signatures(hyd, pre, source, l_values, l_index):
     slopes = []
     for year,group in groups:
         flows = group.values.copy()
-        flows.sort()
-        flows = np.log(flows)
-        slope = (np.percentile(flows,66) - np.percentile(flows,33)) / (.66*len(flows) - .33*len(flows))
-        slopes.append(slope)
+        if (flows == 0).all() or (np.isnan(flows).all()):
+            slopes.append(np.nan) # so we can ignore these years
+        else:
+            # Account for NaNs - we don't want these to influence the calculations
+            flows = flows[~np.isnan(flows)]
+            # Account for zero and NaN flows that mess with log calculations
+            flows[flows == 0] = flows.mean()/1000 # add 0.1% of mean flow to zeroes      
+            # Do the actual stuff
+            flows.sort()
+            flows = np.log(flows)
+            slope = (np.percentile(flows,66) - np.percentile(flows,33)) / (.66*len(flows) - .33*len(flows))
+            slopes.append(slope)
     slopes = np.array(slopes)
-    slope_m = slopes.mean()
-    slope_s = slopes.std()
+    slope_m = np.nanmean(slopes)
+    slope_s = np.nanstd(slopes)
     
     l_values.append(slope_m)
     l_index.append( ('Hydrology', 'fdc_slope_mean', '-', f'{source}') )
@@ -1096,8 +1149,11 @@ def calculate_signatures(hyd, pre, source, l_values, l_index):
     l_index.append( ('Hydrology', 'fdc_slope_std', '-', f'{source}') )
     
     # Baseflow index
-    rec,_ = baseflow.separation(hyd['q_obs'].values, method='Eckhardt') # Find baseflow with Eckhardt filter
+    nan_mask = np.isnan(hyd['q_obs'])
+    q_filled = hyd['q_obs'].interpolate_na(dim='time', method='linear')
+    rec,_ = baseflow.separation(q_filled.values, method='Eckhardt') # Find baseflow with Eckhardt filter
     tmp = xr.DataArray(rec['Eckhardt'], dims='time', coords={'time': hyd['time']}) # Prepare a new variable to put in 'hyd'
+    tmp[nan_mask] = np.nan
     hyd['q_bas'] = tmp
     daily_mean_qbase = hyd['q_bas'].groupby(hyd['water_year']).mean()
     bfi_m = (daily_mean_qbase / daily_mean_q).mean()
@@ -1114,9 +1170,13 @@ def calculate_signatures(hyd, pre, source, l_values, l_index):
     groups = tmp_frc_flow.groupby(hyd['water_year'])
     dates = []
     for year,group in groups:
-        hdf = group[group > 0.5][0]['time'].values
-        dates.append(pd.Timestamp(hdf).dayofyear)
+        if np.isnan(group).all(): # happens in a few basins with year-long+ periods of zero flow
+            dates.append(np.nan)
+        else:
+            hdf = group[group > 0.5][0]['time'].values
+            dates.append(pd.Timestamp(hdf).dayofyear)
     dates = np.array(dates)
+    dates = dates[~np.isnan(dates)] # Remove NaNs so we can use the circ stats
     hdf_m = circmean(dates, high=366)
     hdf_s = circstd(dates, high=366)
     l_values.append(hdf_m)
@@ -1126,9 +1186,14 @@ def calculate_signatures(hyd, pre, source, l_values, l_index):
     
     # Quantiles
     groups = hyd['q_obs'].groupby(hyd['water_year'])
-    for quantile in [0.01, 0.05, 0.10, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99]:
-        q_m = groups.quantile(quantile).mean()
-        q_s = groups.quantile(quantile).std()
+    with warnings.catch_warnings():
+        # This mutes a "RuntimeWarning: All-NaN slice encountered
+        #  return _nanquantile_unchecked("
+        #  warning which we'll get when calculating quantiles on NaN years
+        warnings.simplefilter("ignore", category=RuntimeWarning)
+        for quantile in [0.01, 0.05, 0.10, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99]:
+            q_m = groups.quantile(quantile, skipna=True).mean()
+            q_s = groups.quantile(quantile).std()
         l_values.append(float(q_m.values))
         l_index.append(('Hydrology', f'q{int(quantile*100)}_mean', 'mm day^-1', f'{source}'))
         l_values.append(float(q_s.values))
@@ -1137,7 +1202,7 @@ def calculate_signatures(hyd, pre, source, l_values, l_index):
     # Durations
     variable  = 'q_obs'
     no_flow_threshold = 0 # mm d-1
-    no_flow_condition = hyd[variable] < no_flow_threshold
+    no_flow_condition = hyd[variable] <= no_flow_threshold
     l_values,l_index  = calculate_flow_period_stats('flow',no_flow_condition,'no',l_values,l_index,dataset='source',units='days',category='Hydrology')
     
     low_flow_threshold = 0.2 * hyd['q_obs'].mean() # mm d-1
@@ -1154,13 +1219,13 @@ def calculate_signatures(hyd, pre, source, l_values, l_index):
 def get_open_water_stats(gdf, att, mask, l_values, l_index):
     '''Calculates min, mean, max, std and total att ('Lake_area', 'Vol_total'), optionally using a reservoir mask'''
 
-    # Setup
+    # Initialization - this must happen before we check if we got a GDF,
+    #   because otherwise these values won't be available for l_index
     if mask == 'all':
-        mask = gdf.index >= 0 # Selects everything if no mask is provided
-        type = 'open_water' # no reservoir mask, hence we're working with lakes
+        water = 'open_water' # no reservoir mask, hence we're working with lakes
     elif mask == 'reservoir':
-        mask = gdf['Lake_type'] == 2
-        type = 'reservoir'
+        water = 'reservoir'
+
     if att == 'Lake_area':
         units = 'km^2' # https://data.hydrosheds.org/file/technical-documentation/HydroLAKES_TechDoc_v10.pdf
         att_d = 'area'
@@ -1168,17 +1233,38 @@ def get_open_water_stats(gdf, att, mask, l_values, l_index):
         units = 'million~m^3' # https://data.hydrosheds.org/file/technical-documentation/HydroLAKES_TechDoc_v10.pdf
         att_d = 'volume'
 
+    # Check if we were handed a lake polygon
+    if gdf is not None:
+        # Setup
+        if mask == 'all':
+            mask = gdf.index >= 0 # Selects everything if no mask is provided
+        elif mask == 'reservoir':
+            mask = gdf['Lake_type'] == 2
+        
+        # Get the values
+        min_val = gdf[mask][att].min()
+        mean_val = gdf[mask][att].mean()
+        max_val = gdf[mask][att].max()
+        std_val = gdf[mask][att].std()
+        tot_val = gdf[mask][att].sum()# total
+    else:
+        min_val = 0
+        mean_val = 0
+        max_val = 0
+        std_val = 0
+        tot_val = 0
+
     # Stats
-    l_values.append(gdf[mask][att].min())
-    l_index.append(('Open water', f'{type}_{att_d}_min',  f'{units}', 'HydroLAKES'))
-    l_values.append(gdf[mask][att].mean())
-    l_index.append(('Open water', f'{type}_{att_d}_mean',  f'{units}', 'HydroLAKES'))
-    l_values.append(gdf[mask][att].max())
-    l_index.append(('Open water', f'{type}_{att_d}_max',  f'{units}', 'HydroLAKES'))
-    l_values.append(gdf[mask][att].std())
-    l_index.append(('Open water', f'{type}_{att_d}_std',  f'{units}', 'HydroLAKES'))
-    l_values.append(gdf[mask][att].sum()) # total
-    l_index.append(('Open water', f'{type}_{att_d}_total',  f'{units}', 'HydroLAKES'))
+    l_values.append(min_val)
+    l_index.append(('Open water', f'{water}_{att_d}_min',  f'{units}', 'HydroLAKES'))
+    l_values.append(mean_val)
+    l_index.append(('Open water', f'{water}_{att_d}_mean',  f'{units}', 'HydroLAKES'))
+    l_values.append(max_val)
+    l_index.append(('Open water', f'{water}_{att_d}_max',  f'{units}', 'HydroLAKES'))
+    l_values.append(std_val)
+    l_index.append(('Open water', f'{water}_{att_d}_std',  f'{units}', 'HydroLAKES'))
+    l_values.append(tot_val) 
+    l_index.append(('Open water', f'{water}_{att_d}_total',  f'{units}', 'HydroLAKES'))
     
     return l_values, l_index
 
@@ -1190,18 +1276,19 @@ def get_aspect_attributes(tif,l_values,l_index):
     aspect = get_geotif_data_as_array(tif)
     no_data = get_geotif_nodata_value(tif)
     masked_aspect = np.ma.masked_array(aspect, aspect == no_data)
+    values = masked_aspect.compressed() # scipy < 1.12.0 circmean/std don't work well with masked arrays
 
     ## Calculate the statistics
-    l_values.append(masked_aspect.min())
+    l_values.append(values.min())
     l_index.append(('Topography', 'aspect_min',  'degrees', 'MERIT Hydro'))
     
-    l_values.append(circmean(masked_aspect,high=360))
+    l_values.append(circmean(values,high=360))
     l_index.append(('Topography', 'aspect_mean',  'degrees', 'MERIT Hydro'))
 
-    l_values.append(circstd(masked_aspect,high=360))
+    l_values.append(circstd(values,high=360))
     l_index.append(('Topography', 'aspect_std',  'degrees', 'MERIT Hydro'))
 
-    l_values.append(masked_aspect.max())
+    l_values.append(values.max())
     l_index.append(('Topography', 'aspect_max',  'degrees', 'MERIT Hydro'))
     
     return l_values,l_index
@@ -1210,41 +1297,62 @@ def get_river_attributes(riv_str, l_values, l_index, area):
     
     '''Calculates topographic attributes from a MERIT Hydro Basins river polygon'''
 
-    # Load shapefiles
-    river = gpd.read_file(riv_str)
-    river = river.set_index('COMID')
-    
-    # Raw data
-    stream_lengths = []
-    headwaters = river[river['maxup'] == 0] # identify reaches with no upstream
-    for COMID in headwaters.index:
-        stream_length = 0
-        while COMID in river.index:
-            stream_length += river.loc[COMID]['lengthkm'] # Add the length of the current segment
-            COMID = river.loc[COMID]['NextDownID'] # Get the downstream reach
-        stream_lengths.append(stream_length) # If we get here we ran out of downstream IDs
-    
-    # Stats
-    stream_total = river['lengthkm'].sum()
-    stream_lengths = np.array(stream_lengths)
-    l_values.append(stream_lengths.min())
+    # Initialize zeros for cases where we have no shapefile or an empty one
+    stream_total = 0
+    min_length = 0
+    mean_length = 0
+    max_length = 0
+    std_length = 0
+    riv_order = 0
+    density = 0
+    elongation = 0
+    
+    # Check if the file exists (for headwaters we won't have a river polygon)
+    if os.path.exists(riv_str):
+        # Load shapefiles
+        river = gpd.read_file(riv_str)
+        river = river.set_index('COMID')
+
+        # Check if we actually have a river segment (empty shapefile is not the same as no shapefile)
+        if len(river) > 0:
+            # Raw data
+            stream_lengths = []
+            headwaters = river[river['maxup'] == 0] # identify reaches with no upstream
+            for COMID in headwaters.index:
+                stream_length = 0
+                while COMID in river.index:
+                    stream_length += river.loc[COMID]['lengthkm'] # Add the length of the current segment
+                    COMID = river.loc[COMID]['NextDownID'] # Get the downstream reach
+                stream_lengths.append(stream_length) # If we get here we ran out of downstream IDs
+        
+            # Stats
+            stream_total = river['lengthkm'].sum()
+            stream_lengths = np.array(stream_lengths)
+            min_length = stream_lengths.min()
+            mean_length = stream_lengths.mean()
+            max_length = stream_lengths.max()
+            std_length = stream_lengths.std()
+            riv_order = river['order'].max()
+            density = stream_total/area
+            elongation = 2*np.sqrt(area/np.pi)/max_length
+
+    # Update lists
+    l_values.append(min_length)
     l_index.append(('Topography', 'stream_length_min',  'km', 'MERIT Hydro Basins'))
-    l_values.append(stream_lengths.mean())
+    l_values.append(mean_length)
     l_index.append(('Topography', 'stream_length_mean',  'km', 'MERIT Hydro Basins'))
-    l_values.append(stream_lengths.max())
+    l_values.append(max_length)
     l_index.append(('Topography', 'stream_length_max',  'km', 'MERIT Hydro Basins'))
-    l_values.append(stream_lengths.std())
+    l_values.append(std_length)
     l_index.append(('Topography', 'stream_length_std',  'km', 'MERIT Hydro Basins'))
     l_values.append(stream_total)
     l_index.append(('Topography', 'stream_length_total',  'km', 'MERIT Hydro Basins'))
     
     # Order
-    l_values.append(river['order'].max())
+    l_values.append(riv_order)
     l_index.append(('Topography', 'steam_order_max',  '-', 'MERIT Hydro Basins'))
 
     # Derived
-    density = stream_total/area
-    elongation = 2*np.sqrt(area/np.pi)/stream_lengths.max()
     l_values.append(density)
     l_index.append(('Topography', 'stream_density',  'km^-1', 'Derived'))
     l_values.append(elongation)
@@ -1480,9 +1588,9 @@ def aridity_and_fraction_snow_from_worldclim(geo_folder, dataset):
         month_ix = int(month)-1 # -1 to account for zero-based indexing: Jan value is at index 0, not 1
 
         # Load data
-        prc_path = clim_folder / 'prec' / prc_file
-        pet_path = clim_folder / 'pet'  / pet_file
-        tmp_path = clim_folder / 'tavg' / tmp_file      
+        prc_path = str(clim_folder / 'prec' / prc_file)
+        pet_path = str(clim_folder / 'pet'  / pet_file)
+        tmp_path = str(clim_folder / 'tavg' / tmp_file )     
         prc = get_geotif_data_as_array(prc_path) # [mm]
         pet = get_geotif_data_as_array(pet_path) # [mm]
         tmp = get_geotif_data_as_array(tmp_path) # [C]
@@ -1536,8 +1644,8 @@ def oudin_pet_from_worldclim(geo_folder, dataset, debug=False):
         month_ix = int(month)-1 # -1 to account for zero-based indexing: Jan value is at index 0, not 1
         
         # Load data
-        srad_path = clim_folder / 'srad' / srad_file
-        tavg_path = clim_folder / 'tavg' / tavg_file      
+        srad_path = str(clim_folder / 'srad' / srad_file)
+        tavg_path = str(clim_folder / 'tavg' / tavg_file)     
         srad = get_geotif_data_as_array(srad_path) / 1000 # [kJ m-2 day-1] / 1000 = [MJ m-2 day-1]
         tavg = get_geotif_data_as_array(tavg_path)