From 6963ededd10297e1931f70342ed70c0a06b2b917 Mon Sep 17 00:00:00 2001
From: Vincent LHEUREUX <vincent.lheureux@hotmail.fr>
Date: Fri, 14 Jun 2024 17:09:55 +0200
Subject: [PATCH 1/3] added era5 raster

---
 src/xsar/base_dataset.py   | 146 +++++++++++++++++++++++-----------
 src/xsar/raster_readers.py | 155 +++++++++++++++++++++++++++++--------
 2 files changed, 224 insertions(+), 77 deletions(-)

diff --git a/src/xsar/base_dataset.py b/src/xsar/base_dataset.py
index 049277ec..b774aa60 100644
--- a/src/xsar/base_dataset.py
+++ b/src/xsar/base_dataset.py
@@ -28,7 +28,8 @@
 logger.addHandler(logging.NullHandler())
 
 # we know tiff as no geotransform : ignore warning
-warnings.filterwarnings("ignore", category=rasterio.errors.NotGeoreferencedWarning)
+warnings.filterwarnings(
+    "ignore", category=rasterio.errors.NotGeoreferencedWarning)
 
 # allow nan without warnings
 # some dask warnings are still non filtered: https://github.com/dask/dask/issues/3245
@@ -108,7 +109,8 @@ def _bbox_coords(self):
         """
         Dataset bounding box, in line/sample coordinates
         """
-        bbox_ext = bbox_coords(self.dataset.line.values, self.dataset.sample.values)
+        bbox_ext = bbox_coords(self.dataset.line.values,
+                               self.dataset.sample.values)
         return bbox_ext
 
     @property
@@ -229,12 +231,15 @@ def ll2coords(self, *args):
         else:
             scalar = True
 
-        tolerance = np.max([np.percentile(np.diff(self.dataset[c].values), 90) / 2 for c in ['line', 'sample']]) + 1
+        tolerance = np.max([np.percentile(
+            np.diff(self.dataset[c].values), 90) / 2 for c in ['line', 'sample']]) + 1
         try:
             # select the nearest valid pixel in ds
-            ds_nearest = self.dataset.sel(line=line, sample=sample, method='nearest', tolerance=tolerance)
+            ds_nearest = self.dataset.sel(
+                line=line, sample=sample, method='nearest', tolerance=tolerance)
             if scalar:
-                (line, sample) = (ds_nearest.line.values.item(), ds_nearest.sample.values.item())
+                (line, sample) = (ds_nearest.line.values.item(),
+                                  ds_nearest.sample.values.item())
             else:
                 (line, sample) = (ds_nearest.line.values, ds_nearest.sample.values)
         except KeyError:
@@ -268,7 +273,8 @@ def _set_rio(self, ds):
                 # remove/reset some incorrect attrs set by rio
                 # (long_name is 'latitude', but it's incorrect for line axis ...)
                 for ax in ['line', 'sample']:
-                    [ds[v][ax].attrs.pop(k, None) for k in ['long_name', 'standard_name']]
+                    [ds[v][ax].attrs.pop(k, None)
+                     for k in ['long_name', 'standard_name']]
                     ds[v][ax].attrs['units'] = '1'
 
         if not want_dataset:
@@ -282,8 +288,10 @@ def _set_rio(self, ds):
     def _local_gcps(self):
         # get local gcps, for rioxarray.reproject (row and col are *index*, not coordinates)
         local_gcps = []
-        line_decimated = self.dataset.line.values[::int(self.dataset.line.size / 20) + 1]
-        sample_decimated = self.dataset.sample.values[::int(self.dataset.sample.size / 20) + 1]
+        line_decimated = self.dataset.line.values[::int(
+            self.dataset.line.size / 20) + 1]
+        sample_decimated = self.dataset.sample.values[::int(
+            self.dataset.sample.size / 20) + 1]
         XX, YY = np.meshgrid(line_decimated, sample_decimated)
         if self.sar_meta.product == 'SLC':
             logger.debug(
@@ -339,7 +347,8 @@ def get_burst_valid_location(self):
                       ibur * line_per_burst + valind.max(), lvs[valind].max()]
             valid_locations[ibur, :] = valloc
         tmpda = xr.DataArray(dims=['burst', 'limits'],
-                             coords={'burst': self.datatree['bursts'].ds['burst'].values, 'limits': np.arange(4)},
+                             coords={
+                                 'burst': self.datatree['bursts'].ds['burst'].values, 'limits': np.arange(4)},
                              data=valid_locations,
                              name='valid_location',
                              attrs={
@@ -366,8 +375,11 @@ def get_bursts_polygons(self, only_valid_location=True):
 
         """
         if self.resolution is not None:
-            factor_range = self.dataset['sampleSpacing'].values / self.resolution  # eg 2.3/100
-            factor_azimuth = self.dataset['lineSpacing'].values / self.resolution
+            # eg 2.3/100
+            factor_range = self.dataset['sampleSpacing'].values / \
+                self.resolution
+            factor_azimuth = self.dataset['lineSpacing'].values / \
+                self.resolution
         else:
             factor_range = 1
             factor_azimuth = 1
@@ -378,14 +390,16 @@ def get_bursts_polygons(self, only_valid_location=True):
             for submeta in self.sar_meta._submeta:
                 block = submeta.bursts(only_valid_location=only_valid_location)
                 block['subswath'] = submeta.dsid
-                block = block.set_index('subswath', append=True).reorder_levels(['subswath', 'burst'])
+                block = block.set_index('subswath', append=True).reorder_levels(
+                    ['subswath', 'burst'])
                 blocks_list.append(block)
             blocks = pd.concat(blocks_list)
         else:
             # burst_list = self._bursts
             self.get_burst_valid_location()
             burst_list = self.datatree['bursts'].ds
-            nb_samples = int(self.datatree['image'].ds['numberOfSamples'] * factor_range)
+            nb_samples = int(
+                self.datatree['image'].ds['numberOfSamples'] * factor_range)
             if burst_list['burst'].size == 0:
                 blocks = gpd.GeoDataFrame()
             else:
@@ -394,20 +408,23 @@ def get_bursts_polygons(self, only_valid_location=True):
                 inds_burst, geoloc_azitime, geoloc_iburst, geoloc_line = self._get_indices_bursts()
                 for burst_ind, uu in enumerate(np.unique(inds_burst)):
                     if only_valid_location:
-                        extent = np.copy(burst_list['valid_location'].values[burst_ind, :])
+                        extent = np.copy(
+                            burst_list['valid_location'].values[burst_ind, :])
                         area = box(int(extent[0] * factor_azimuth), int(extent[1] * factor_range),
                                    int(extent[2] * factor_azimuth), int(extent[3] * factor_range))
                     else:
                         inds_one_val = np.where(inds_burst == uu)[0]
                         bursts_az_inds[uu] = inds_one_val
-                        area = box(bursts_az_inds[burst_ind][0], 0, bursts_az_inds[burst_ind][-1], nb_samples)
+                        area = box(
+                            bursts_az_inds[burst_ind][0], 0, bursts_az_inds[burst_ind][-1], nb_samples)
                     burst = pd.Series(dict([
                         ('geometry_image', area)]))
                     bursts.append(burst)
                 # to geopandas
                 blocks = pd.concat(bursts, axis=1).T
                 blocks = gpd.GeoDataFrame(blocks)
-                blocks['geometry'] = blocks['geometry_image'].apply(self.coords2ll)
+                blocks['geometry'] = blocks['geometry_image'].apply(
+                    self.coords2ll)
                 blocks.index.name = 'burst'
         return blocks
 
@@ -435,7 +452,8 @@ def _get_indices_bursts(self):
             geoloc_line = self.sar_meta.geoloc['line'].values
             # geoloc_line = self.datatree['geolocation_annotation'].ds['line'].values
             # find the indice of the bursts in the geolocation grid
-            geoloc_iburst = np.floor(geoloc_line / float(burst_nlines)).astype('int32')
+            geoloc_iburst = np.floor(
+                geoloc_line / float(burst_nlines)).astype('int32')
             # find the indices of the bursts in the high resolution grid
             line = np.arange(0, self.sar_meta.image['numberOfLines'])
             # line = np.arange(0, self.datatree.attrs['numberOfLines'])
@@ -464,10 +482,12 @@ def _rasterize_mask_by_chunks(line, sample, mask='land'):
             # chunk footprint polygon, in dataset coordinates (with buffer, to enlarge a little the footprint)
             chunk_footprint_coords = Polygon(chunk_coords).buffer(10)
             # chunk footprint polygon, in lon/lat
-            chunk_footprint_ll = self.sar_meta.coords2ll(chunk_footprint_coords)
+            chunk_footprint_ll = self.sar_meta.coords2ll(
+                chunk_footprint_coords)
 
             # get vector mask over chunk, in lon/lat
-            vector_mask_ll = self.sar_meta.get_mask(mask).intersection(chunk_footprint_ll)
+            vector_mask_ll = self.sar_meta.get_mask(
+                mask).intersection(chunk_footprint_ll)
 
             if vector_mask_ll.is_empty:
                 # no intersection with mask, return zeros
@@ -502,7 +522,8 @@ def _rasterize_mask_by_chunks(line, sample, mask='land'):
                 func_kwargs={'mask': mask}
             )
             name = '%s_mask' % mask
-            da_mask.attrs['history'] = yaml.safe_dump({name: self.sar_meta.get_mask(mask, describe=True)})
+            da_mask.attrs['history'] = yaml.safe_dump(
+                {name: self.sar_meta.get_mask(mask, describe=True)})
             da_mask.attrs['meaning'] = '0: ocean , 1: land'
             da_list.append(da_mask.to_dataset(name=name))
 
@@ -544,7 +565,8 @@ def coords2ll_SLC(self, *args):
         if isinstance(lines, list) or isinstance(lines, np.ndarray):
             pass
         else:  # in case of a single point,
-            lines = [lines]  # to avoid error when declaring the da_line and da_sample below
+            # to avoid error when declaring the da_line and da_sample below
+            lines = [lines]
             samples = [samples]
         da_line = xr.DataArray(lines, dims='points')
         da_sample = xr.DataArray(samples, dims='points')
@@ -581,19 +603,25 @@ def _rasterize_mask_by_chunks(line, sample, mask='land'):
             """
             chunk_coords = bbox_coords(line, sample, pad=None)
             # chunk footprint polygon, in dataset coordinates (with buffer, to enlarge a little the footprint)
-            chunk_footprint_coords = Polygon(chunk_coords)  # .buffer(1) #no buffer-> corruption of the polygon
+            # .buffer(1) #no buffer-> corruption of the polygon
+            chunk_footprint_coords = Polygon(chunk_coords)
             assert chunk_footprint_coords.is_valid
             lines, samples = chunk_footprint_coords.exterior.xy
             lines = np.array([hh for hh in lines]).astype(int)
-            lines = np.clip(lines, a_min=0, a_max=self.dataset['line'].max().values)
+            lines = np.clip(
+                lines, a_min=0, a_max=self.dataset['line'].max().values)
             samples = np.array([hh for hh in samples]).astype(int)
-            samples = np.clip(samples, a_min=0, a_max=self.dataset['sample'].max().values)
-            chunk_footprint_lon, chunk_footprint_lat = self.coords2ll_SLC(lines, samples)
-            chunk_footprint_ll = Polygon(np.vstack([chunk_footprint_lon, chunk_footprint_lat]).T)
+            samples = np.clip(samples, a_min=0,
+                              a_max=self.dataset['sample'].max().values)
+            chunk_footprint_lon, chunk_footprint_lat = self.coords2ll_SLC(
+                lines, samples)
+            chunk_footprint_ll = Polygon(
+                np.vstack([chunk_footprint_lon, chunk_footprint_lat]).T)
             if chunk_footprint_ll.is_valid is False:
                 chunk_footprint_ll = make_valid(chunk_footprint_ll)
             # get vector mask over chunk, in lon/lat
-            vector_mask_ll = self.sar_meta.get_mask(mask).intersection(chunk_footprint_ll)
+            vector_mask_ll = self.sar_meta.get_mask(
+                mask).intersection(chunk_footprint_ll)
             if vector_mask_ll.is_empty:
                 # no intersection with mask, return zeros
                 return np.zeros((line.size, sample.size))
@@ -603,15 +631,18 @@ def _rasterize_mask_by_chunks(line, sample, mask='land'):
                 lons_ma, lats_ma = vector_mask_ll.exterior.xy
                 lons = np.array([hh for hh in lons_ma])
                 lats = np.array([hh for hh in lats_ma])
-                vector_mask_coords_lines, vector_mask_coords_samples = self.ll2coords_SLC(lons, lats)
-                vector_mask_coords = [Polygon(np.vstack([vector_mask_coords_lines, vector_mask_coords_samples]).T)]
+                vector_mask_coords_lines, vector_mask_coords_samples = self.ll2coords_SLC(
+                    lons, lats)
+                vector_mask_coords = [
+                    Polygon(np.vstack([vector_mask_coords_lines, vector_mask_coords_samples]).T)]
             else:  # multipolygon
                 vector_mask_coords = []  # to store polygons in image coordinates
                 for iio, onepoly in enumerate(vector_mask_ll.geoms):
                     lons_ma, lats_ma = onepoly.exterior.xy
                     lons = np.array([hh for hh in lons_ma])
                     lats = np.array([hh for hh in lats_ma])
-                    vector_mask_coords_lines, vector_mask_coords_samples = self.ll2coords_SLC(lons, lats)
+                    vector_mask_coords_lines, vector_mask_coords_samples = self.ll2coords_SLC(
+                        lons, lats)
                     vector_mask_coords.append(
                         Polygon(np.vstack([vector_mask_coords_lines, vector_mask_coords_samples]).T))
 
@@ -639,17 +670,22 @@ def _rasterize_mask_by_chunks(line, sample, mask='land'):
         da_dict = {}  # dictionnary to store the DataArray of each mask and each burst
         for burst_id in range(len(all_bursts['geometry_image'])):
             a_burst_bbox = all_bursts['geometry_image'].iloc[burst_id]
-            line_index = np.array([int(jj) for jj in a_burst_bbox.exterior.xy[0]])
-            sample_index = np.array([int(jj) for jj in a_burst_bbox.exterior.xy[1]])
-            logger.debug('line_index : %s %s %s', line_index, line_index.min(), line_index.max())
+            line_index = np.array([int(jj)
+                                  for jj in a_burst_bbox.exterior.xy[0]])
+            sample_index = np.array([int(jj)
+                                    for jj in a_burst_bbox.exterior.xy[1]])
+            logger.debug('line_index : %s %s %s', line_index,
+                         line_index.min(), line_index.max())
             logger.debug('dataset shape %s', self.dataset.digital_number.shape)
             a_burst_subset = self.dataset.isel({'line': slice(line_index.min(), line_index.max()),
                                                 'sample': slice(sample_index.min(), sample_index.max()), 'pol': 0})
-            logger.debug('a_burst_subset %s', a_burst_subset.digital_number.shape)
+            logger.debug('a_burst_subset %s',
+                         a_burst_subset.digital_number.shape)
             # logging.info('burst : %s lines: %s samples: %s',burst_id,a_burst_subset.digital_number.line,a_burst_subset.digital_number.sample)
             da_tmpl = xr.DataArray(
                 dask.array.empty_like(
-                    np.empty((len(a_burst_subset.digital_number.line), len(a_burst_subset.digital_number.sample))),
+                    np.empty((len(a_burst_subset.digital_number.line),
+                             len(a_burst_subset.digital_number.sample))),
                     dtype=np.int8, name="empty_var_tmpl-%s" % dask.base.tokenize(self.sar_meta.name)),
                 dims=('line', 'sample'),
                 coords={
@@ -674,7 +710,8 @@ def _rasterize_mask_by_chunks(line, sample, mask='land'):
                                                'line': a_burst_subset.digital_number.line,
                                                'sample': a_burst_subset.digital_number.sample, })
                 name = '%s_maskv2' % mask
-                da_mask.attrs['history'] = yaml.safe_dump({name: self.sar_meta.get_mask(mask, describe=True)})
+                da_mask.attrs['history'] = yaml.safe_dump(
+                    {name: self.sar_meta.get_mask(mask, describe=True)})
                 da_mask.attrs['meaning'] = '0: ocean , 1: land'
                 da_mask = da_mask.fillna(0)  # zero -> ocean
                 da_mask = da_mask.astype(np.int8)
@@ -699,7 +736,8 @@ def _rasterize_mask_by_chunks(line, sample, mask='land'):
         tmpmerged = tmpmerged.rename({'land_maskv2': 'land_mask'})
         tmpmerged.attrs['land_mask_computed_by_burst'] = True
         self.dataset = tmpmerged
-        self.datatree['measurement'] = self.datatree['measurement'].assign(tmpmerged)
+        self.datatree['measurement'] = self.datatree['measurement'].assign(
+            tmpmerged)
         self.datatree['measurement'].attrs = tmpmerged.attrs
 
     @property
@@ -744,6 +782,7 @@ def map_raster(self, raster_ds):
                 raster_ds = raster_ds.reindex({coord: raster_ds[coord][::-1]})
 
         # from lon/lat box in xsar dataset, get the corresponding box in raster_ds (by index)
+        """
         ilon_range = [
             np.searchsorted(raster_ds.x.values, lon_range[0]),
             np.searchsorted(raster_ds.x.values, lon_range[1])
@@ -752,15 +791,29 @@ def map_raster(self, raster_ds):
             np.searchsorted(raster_ds.y.values, lat_range[0]),
             np.searchsorted(raster_ds.y.values, lat_range[1])
         ]
+        """  # for incomplete raster (not global like hwrf)
+        ilon_range = [
+            np.max([1, np.searchsorted(raster_ds.x.values, lon_range[0])]),
+            np.min([np.searchsorted(raster_ds.x.values,
+                   lon_range[1]), raster_ds.x.size])
+        ]
+        ilat_range = [
+            np.max([1, np.searchsorted(raster_ds.y.values, lat_range[0])]),
+            np.min([np.searchsorted(raster_ds.y.values,
+                   lat_range[1]), raster_ds.y.size])
+        ]
+
         # enlarge the raster selection range, for correct interpolation
-        ilon_range, ilat_range = [[rg[0] - 1, rg[1] + 1] for rg in (ilon_range, ilat_range)]
+        ilon_range, ilat_range = [[rg[0] - 1, rg[1] + 1]
+                                  for rg in (ilon_range, ilat_range)]
 
         # select the xsar box in the raster
         raster_ds = raster_ds.isel(x=slice(*ilon_range), y=slice(*ilat_range))
 
         # upscale coordinates, in original projection
         # 1D array of lons/lats, trying to have same spacing as dataset (if not to high)
-        num = min((self._dataset.sample.size + self._dataset.line.size) // 2, 1000)
+        num = min((self._dataset.sample.size +
+                  self._dataset.line.size) // 2, 1000)
         lons = np.linspace(*lon_range, num=num)
         lats = np.linspace(*lat_range, num=num)
 
@@ -779,7 +832,8 @@ def map_raster(self, raster_ds):
                 upscaled_da = raster_da.interp(x=lons, y=lats)
             else:
                 upscaled_da = map_blocks_coords(
-                    xr.DataArray(dims=['y', 'x'], coords={'x': lons, 'y': lats}).chunk(1000),
+                    xr.DataArray(dims=['y', 'x'], coords={
+                                 'x': lons, 'y': lats}).chunk(1000),
                     RectBivariateSpline(
                         raster_da.y.values,
                         raster_da.x.values,
@@ -829,7 +883,8 @@ def _load_rasters_vars(self):
                 'footprint': self.sar_meta.footprint
             }
 
-            logger.debug('adding raster "%s" from resource "%s"' % (name, str(resource)))
+            logger.debug('adding raster "%s" from resource "%s"' %
+                         (name, str(resource)))
             if get_function is not None:
                 try:
                     resource_dec = get_function(resource, **kwargs_get)
@@ -851,10 +906,12 @@ def _load_rasters_vars(self):
             if get_function is not None:
                 hist_res.update({'resource_decoded': resource_dec[1]})
 
-            reprojected_ds = self.map_raster(raster_ds).rename({v: '%s_%s' % (name, v) for v in raster_ds})
+            reprojected_ds = self.map_raster(raster_ds).rename(
+                {v: '%s_%s' % (name, v) for v in raster_ds})
 
             for v in reprojected_ds:
-                reprojected_ds[v].attrs['history'] = yaml.safe_dump({v: hist_res})
+                reprojected_ds[v].attrs['history'] = yaml.safe_dump({
+                                                                    v: hist_res})
 
             da_var_list.append(reprojected_ds)
         return xr.merge(da_var_list)
@@ -879,5 +936,6 @@ def _get_lut(self, var_name):
         try:
             lut = self._luts[lut_name]
         except KeyError:
-            raise ValueError("can't find lut from name '%s' for variable '%s' " % (lut_name, var_name))
+            raise ValueError(
+                "can't find lut from name '%s' for variable '%s' " % (lut_name, var_name))
         return lut
diff --git a/src/xsar/raster_readers.py b/src/xsar/raster_readers.py
index 7bf86e07..b1f26a36 100644
--- a/src/xsar/raster_readers.py
+++ b/src/xsar/raster_readers.py
@@ -6,13 +6,13 @@
 import pandas as pd
 
 
-def resource_strftime(resource, **kwargs):
+def resource_strftime_hh(resource, **kwargs):
     """
     From a resource string like '%Y/%j/myfile_%Y%m%d%H%M.nc' and a date like 'Timestamp('2018-10-13 06:23:22.317102')',
     returns a tuple composed of the closer available date and string like '/2018/286/myfile_201810130600.nc'
 
     If ressource string is an url (ie 'ftp://ecmwf/%Y/%j/myfile_%Y%m%d%H%M.nc'), fsspec will be used to retreive the file locally.
-   
+
     Parameters
     ----------
     resource: str
@@ -47,9 +47,48 @@ def resource_strftime(resource, **kwargs):
         second=0,
         microsecond=0
     )
+
     return date, url_get(date.strftime(resource))
 
 
+def resource_strftime_dd(resource, **kwargs):
+    """20210909T130650
+    From a resource string like '%Y/%m/myfile_%Y%m%d.nc' and a date like 'Timestamp('2021-09-09 13:06:50.00000')',
+    returns a tuple composed of the closer available date and string like '/2021/09/myfile_20210909.nc'
+
+    If ressource string is an url (ie 'ftp:/era5/'%Y/%m/myfile_%Y%m%d.nc'), fsspec will be used to retreive the file locally.
+
+    Parameters
+    ----------
+    resource: str
+
+        resource string, with strftime template
+
+    date: datetime
+
+        date to be used
+
+    Returns
+    -------
+    tuple : (datetime,str)
+
+    """
+    date = kwargs['date']
+    step = kwargs['step']
+
+    delta = datetime.timedelta(hours=step) / 2
+    date = date.replace(
+        year=(date+delta).year,
+        month=(date+delta).month,
+        day=(date+delta).day,
+        hour=(date+delta).hour // step * step,
+        minute=0,
+        second=0,
+        microsecond=0
+    )
+
+    return date, url_get(date.strftime(resource))
+
 
 def _to_lon180(ds):
     # roll [0, 360] to [-180, 180] on dim x
@@ -61,22 +100,24 @@ def _to_lon180(ds):
 def ecmwf_0100_1h(fname, **kwargs):
     """
     ecmwf 0.1 deg 1h reader (ECMWF_FORECAST_0100_202109091300_10U_10V.nc)
-    
+
     Parameters
     ----------
     fname: str
-        
+
         hwrf filename
 
     Returns
     -------
     xarray.Dataset
     """
-    ecmwf_ds = xr.open_dataset(fname, chunks={'Longitude': 1000, 'Latitude': 1000}).isel(time=0)
-    ecmwf_ds.attrs['time'] = datetime.datetime.fromtimestamp(ecmwf_ds.time.item() // 1000000000)
+    ecmwf_ds = xr.open_dataset(
+        fname, chunks={'Longitude': 1000, 'Latitude': 1000}).isel(time=0)
+    ecmwf_ds.attrs['time'] = datetime.datetime.fromtimestamp(
+        ecmwf_ds.time.item() // 1000000000)
     if 'time' in ecmwf_ds:
         ecmwf_ds = ecmwf_ds.drop("time")
-    ecmwf_ds = ecmwf_ds[["Longitude","Latitude","10U","10V"]].rename(
+    ecmwf_ds = ecmwf_ds[["Longitude", "Latitude", "10U", "10V"]].rename(
         {
             'Longitude': 'x',
             'Latitude': 'y',
@@ -84,7 +125,8 @@ def ecmwf_0100_1h(fname, **kwargs):
             '10V': 'V10'
         }
     )
-    ecmwf_ds.attrs = {k: ecmwf_ds.attrs[k] for k in ['title', 'institution', 'time']}
+    ecmwf_ds.attrs = {k: ecmwf_ds.attrs[k]
+                      for k in ['title', 'institution', 'time']}
 
     # dataset is lon [0, 360], make it [-180,180]
     ecmwf_ds = _to_lon180(ecmwf_ds)
@@ -97,18 +139,19 @@ def ecmwf_0100_1h(fname, **kwargs):
 def ecmwf_0125_1h(fname, **kwargs):
     """
     ecmwf 0.125 deg 1h reader (ecmwf_201709071100.nc)
-    
+
     Parameters
     ----------
     fname: str
-        
+
         hwrf filename
 
     Returns
     -------
     xarray.Dataset
     """
-    ecmwf_ds = xr.open_dataset(fname, chunks={'longitude': 1000, 'latitude': 1000})
+    ecmwf_ds = xr.open_dataset(
+        fname, chunks={'longitude': 1000, 'latitude': 1000})
 
     ecmwf_ds = ecmwf_ds.rename(
         {'longitude': 'x', 'latitude': 'y'}
@@ -122,22 +165,23 @@ def ecmwf_0125_1h(fname, **kwargs):
     # dataset is lon [0, 360], make it [-180,180]
     ecmwf_ds = _to_lon180(ecmwf_ds)
 
-    ecmwf_ds.attrs['time'] = datetime.datetime.fromisoformat(ecmwf_ds.attrs['date'])
+    ecmwf_ds.attrs['time'] = datetime.datetime.fromisoformat(
+        ecmwf_ds.attrs['date'])
 
     ecmwf_ds.rio.write_crs("EPSG:4326", inplace=True)
 
     return ecmwf_ds
 
 
-def hwrf_0015_3h(fname,**kwargs):
+def hwrf_0015_3h(fname, **kwargs):
     """
     hwrf 0.015 deg 3h reader ()
-    
-    
+
+
     Parameters
     ----------
     fname: str
-        
+
         hwrf filename
 
     Returns
@@ -145,29 +189,67 @@ def hwrf_0015_3h(fname,**kwargs):
     xarray.Dataset
     """
     hwrf_ds = xr.open_dataset(fname)
-    try : 
-        hwrf_ds = hwrf_ds[['U','V','LON','LAT']]
+    try:
+        hwrf_ds = hwrf_ds[['U', 'V', 'LON', 'LAT']]
         hwrf_ds = hwrf_ds.squeeze('t', drop=True)
 
-    except Exception as e: 
-        raise ValueError("date '%s' can't be find in %s " % (kwargs['date'], fname))
-    
-    hwrf_ds.attrs['time'] = datetime.datetime.strftime(kwargs['date'],'%Y-%m-%d %H:%M:%S')
+    except Exception as e:
+        raise ValueError("date '%s' can't be find in %s " %
+                         (kwargs['date'], fname))
 
-    hwrf_ds = hwrf_ds.assign_coords({"x":hwrf_ds.LON.values[0,:],"y":hwrf_ds.LAT.values[:,0]}).drop_vars(['LON','LAT']).rename(
-            {
-                'U': 'U10',
-                'V': 'V10'
-            }
-        )
+    hwrf_ds.attrs['time'] = datetime.datetime.strftime(
+        kwargs['date'], '%Y-%m-%d %H:%M:%S')
 
-    #hwrf_ds.attrs = {k: hwrf_ds.attrs[k] for k in ['institution', 'time']}
+    hwrf_ds = hwrf_ds.assign_coords({"x": hwrf_ds.LON.values[0, :], "y": hwrf_ds.LAT.values[:, 0]}).drop_vars(['LON', 'LAT']).rename(
+        {
+            'U': 'U10',
+            'V': 'V10'
+        }
+    )
+
+    # hwrf_ds.attrs = {k: hwrf_ds.attrs[k] for k in ['institution', 'time']}
     hwrf_ds = _to_lon180(hwrf_ds)
     hwrf_ds.rio.write_crs("EPSG:4326", inplace=True)
 
     return hwrf_ds
 
 
+def era5_0250_1h(fname, **kwargs):
+    """
+    era5 0.250 deg 1h reader ()
+
+
+    Parameters
+    ----------
+    fname: str
+
+        era5 filename
+
+    Returns
+    -------
+    xarray.Dataset
+    """
+
+    ds_era5 = xr.open_dataset(fname)
+    ds_era5 = ds_era5[['u10', 'v10', 'latitude025', 'longitude025']]
+    ds_era5 = ds_era5.sel(time=str(kwargs['date']))
+    ds_era5 = ds_era5.drop('time')
+
+    ds_era5 = ds_era5.rename(
+        {
+            'longitude025': 'x',
+            'latitude025': 'y',
+            'u10': 'U10',
+            'v10': 'V10'
+        }
+    )
+
+    ds_era5.attrs['time'] = kwargs['date']
+    ds_era5 = _to_lon180(ds_era5)
+    ds_era5.rio.write_crs("EPSG:4326", inplace=True)
+    return ds_era5
+
+
 def gebco(gebco_files):
     """gebco file reader (geotiff from https://www.gebco.net/data_and_products/gridded_bathymetry_data)"""
     return xr.combine_by_coords(
@@ -178,9 +260,16 @@ def gebco(gebco_files):
         ]
     )
 
+
 # list available rasters as a pandas dataframe
-available_rasters = pd.DataFrame(columns=['resource', 'read_function', 'get_function'])
+available_rasters = pd.DataFrame(
+    columns=['resource', 'read_function', 'get_function'])
 available_rasters.loc['gebco'] = [None, gebco, glob.glob]
-available_rasters.loc['ecmwf_0100_1h'] = [None, ecmwf_0100_1h, bind(resource_strftime, ..., step=1)]
-available_rasters.loc['ecmwf_0125_1h'] = [None, ecmwf_0125_1h, bind(resource_strftime, ..., step=1)]
-available_rasters.loc['hwrf_0015_3h'] = [None, hwrf_0015_3h, bind(resource_strftime, ..., step=3)]
+available_rasters.loc['ecmwf_0100_1h'] = [
+    None, ecmwf_0100_1h, bind(resource_strftime_hh, ..., step=1)]
+available_rasters.loc['ecmwf_0125_1h'] = [
+    None, ecmwf_0125_1h, bind(resource_strftime_hh, ..., step=1)]
+available_rasters.loc['hwrf_0015_3h'] = [
+    None, hwrf_0015_3h, bind(resource_strftime_hh, ..., step=3)]
+available_rasters.loc['era5_0250_1h'] = [
+    None, era5_0250_1h, bind(resource_strftime_dd, ..., step=1)]

From 60064e8d5311c00d56d3464689821affbea75881 Mon Sep 17 00:00:00 2001
From: Vincent LHEUREUX <vincent.lheureux@hotmail.fr>
Date: Fri, 14 Jun 2024 17:18:18 +0200
Subject: [PATCH 2/3] resource_strftime is enough

---
 src/xsar/raster_readers.py | 49 ++++----------------------------------
 1 file changed, 5 insertions(+), 44 deletions(-)

diff --git a/src/xsar/raster_readers.py b/src/xsar/raster_readers.py
index b1f26a36..647b31f5 100644
--- a/src/xsar/raster_readers.py
+++ b/src/xsar/raster_readers.py
@@ -6,7 +6,7 @@
 import pandas as pd
 
 
-def resource_strftime_hh(resource, **kwargs):
+def resource_strftime(resource, **kwargs):
     """
     From a resource string like '%Y/%j/myfile_%Y%m%d%H%M.nc' and a date like 'Timestamp('2018-10-13 06:23:22.317102')',
     returns a tuple composed of the closer available date and string like '/2018/286/myfile_201810130600.nc'
@@ -51,45 +51,6 @@ def resource_strftime_hh(resource, **kwargs):
     return date, url_get(date.strftime(resource))
 
 
-def resource_strftime_dd(resource, **kwargs):
-    """20210909T130650
-    From a resource string like '%Y/%m/myfile_%Y%m%d.nc' and a date like 'Timestamp('2021-09-09 13:06:50.00000')',
-    returns a tuple composed of the closer available date and string like '/2021/09/myfile_20210909.nc'
-
-    If ressource string is an url (ie 'ftp:/era5/'%Y/%m/myfile_%Y%m%d.nc'), fsspec will be used to retreive the file locally.
-
-    Parameters
-    ----------
-    resource: str
-
-        resource string, with strftime template
-
-    date: datetime
-
-        date to be used
-
-    Returns
-    -------
-    tuple : (datetime,str)
-
-    """
-    date = kwargs['date']
-    step = kwargs['step']
-
-    delta = datetime.timedelta(hours=step) / 2
-    date = date.replace(
-        year=(date+delta).year,
-        month=(date+delta).month,
-        day=(date+delta).day,
-        hour=(date+delta).hour // step * step,
-        minute=0,
-        second=0,
-        microsecond=0
-    )
-
-    return date, url_get(date.strftime(resource))
-
-
 def _to_lon180(ds):
     # roll [0, 360] to [-180, 180] on dim x
     ds = ds.roll(x=-np.searchsorted(ds.x, 180), roll_coords=True)
@@ -266,10 +227,10 @@ def gebco(gebco_files):
     columns=['resource', 'read_function', 'get_function'])
 available_rasters.loc['gebco'] = [None, gebco, glob.glob]
 available_rasters.loc['ecmwf_0100_1h'] = [
-    None, ecmwf_0100_1h, bind(resource_strftime_hh, ..., step=1)]
+    None, ecmwf_0100_1h, bind(resource_strftime, ..., step=1)]
 available_rasters.loc['ecmwf_0125_1h'] = [
-    None, ecmwf_0125_1h, bind(resource_strftime_hh, ..., step=1)]
+    None, ecmwf_0125_1h, bind(resource_strftime, ..., step=1)]
 available_rasters.loc['hwrf_0015_3h'] = [
-    None, hwrf_0015_3h, bind(resource_strftime_hh, ..., step=3)]
+    None, hwrf_0015_3h, bind(resource_strftime, ..., step=3)]
 available_rasters.loc['era5_0250_1h'] = [
-    None, era5_0250_1h, bind(resource_strftime_dd, ..., step=1)]
+    None, era5_0250_1h, bind(resource_strftime, ..., step=1)]

From 86e99985bf77052849fa307b94a9788ce8a80a28 Mon Sep 17 00:00:00 2001
From: Vincent LHEUREUX <vincent.lheureux@hotmail.fr>
Date: Fri, 14 Jun 2024 17:22:56 +0200
Subject: [PATCH 3/3] add chunk

---
 src/xsar/raster_readers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/xsar/raster_readers.py b/src/xsar/raster_readers.py
index 647b31f5..d04c54c6 100644
--- a/src/xsar/raster_readers.py
+++ b/src/xsar/raster_readers.py
@@ -191,7 +191,7 @@ def era5_0250_1h(fname, **kwargs):
     xarray.Dataset
     """
 
-    ds_era5 = xr.open_dataset(fname)
+    ds_era5 = xr.open_dataset(fname, chunks={'time': 1})
     ds_era5 = ds_era5[['u10', 'v10', 'latitude025', 'longitude025']]
     ds_era5 = ds_era5.sel(time=str(kwargs['date']))
     ds_era5 = ds_era5.drop('time')