pytroll · mraspaud · Oct 5, 2023 · Mar 11, 2022 · Mar 11, 2022 · Mar 11, 2022
@@ -5,15 +5,6 @@ reader:
   reader: !!python/name:satpy.readers.yaml_reader.FileYAMLReader
   sensors: [modis]
 
-navigations:
-  hdf_eos_geo:
-      description: MODIS navigation
-      file_type: hdf_eos_geo
-      latitude_key: Latitude
-      longitude_key: Longitude
-      nadir_resolution: [1000]
-      rows_per_scan: 10
-
 datasets:
   '1':
     name: '1'

@@ -25,12 +25,13 @@
 from contextlib import suppress
 from datetime import datetime
 
+import dask.array.core
 import numpy as np
 import xarray as xr
 from pyhdf.error import HDF4Error
 from pyhdf.SD import SD
 
-from satpy import CHUNK_SIZE, DataID
+from satpy import DataID
 from satpy.readers.file_handlers import BaseFileHandler
 
 logger = logging.getLogger(__name__)
@@ -216,14 +217,48 @@ def load_dataset(self, dataset_name, is_category=False):
         from satpy.readers.hdf4_utils import from_sds
 
         dataset = self._read_dataset_in_file(dataset_name)
-        dask_arr = from_sds(dataset, chunks=CHUNK_SIZE)
+        chunks = self._chunks_for_variable(dataset)
+        dask_arr = from_sds(dataset, chunks=chunks)
         dims = ('y', 'x') if dask_arr.ndim == 2 else None
         data = xr.DataArray(dask_arr, dims=dims,
                             attrs=dataset.attributes())
         data = self._scale_and_mask_data_array(data, is_category=is_category)
 
         return data
 
+    def _chunks_for_variable(self, hdf_dataset):
+        scan_length_250m = 40
+        var_shape = hdf_dataset.info()[2]
+        res_multiplier = self._get_res_multiplier(var_shape)
+        non_yx_chunks = tuple()
+        if len(var_shape) == 3:
+            # assume (band, y, x)
+            non_yx_chunks = ((1,) * var_shape[0],)
+            var_shape = var_shape[-2:]
+        elif len(var_shape) != 2:
+            # don't guess
+            return dask.array.core.normalize_chunks("auto", shape=var_shape, dtype=np.float32)
+        shape_for_250m = tuple(dim_size * res_multiplier for dim_size in var_shape)
+        chunks_for_250m = dask.array.core.normalize_chunks(("auto", -1), shape=shape_for_250m, dtype=np.float32)
+        row_chunks_for_250m = chunks_for_250m[0][0]
+        scanbased_row_chunks_for_250m = np.round(row_chunks_for_250m / scan_length_250m) * scan_length_250m
+        var_row_chunks = scanbased_row_chunks_for_250m / res_multiplier
+        var_row_chunks = max(var_row_chunks, scan_length_250m / res_multiplier)  # avoid getting 0 chunk size
+        return non_yx_chunks + (var_row_chunks, -1)
+
+    @staticmethod
+    def _get_res_multiplier(var_shape):
+        num_columns_to_multiplier = {
+            271: 20,  # 5km
+            1354: 4,  # 1km
+            2708: 2,  # 500m
+            5416: 1,  # 250m
+        }
+        for max_columns, res_multiplier in num_columns_to_multiplier.items():
+            if var_shape[-1] <= max_columns:
+                return res_multiplier
+        return 1
+
     def _scale_and_mask_data_array(self, data, is_category=False):
         good_mask, new_fill = self._get_good_data_mask(data, is_category=is_category)
         scale_factor = data.attrs.pop('scale_factor', None)

@@ -48,7 +48,6 @@
 import numpy as np
 import xarray as xr
 
-from satpy import CHUNK_SIZE
 from satpy.readers.hdf4_utils import from_sds
 from satpy.readers.hdfeos_base import HDFEOSBaseFileReader, HDFEOSGeoReader
 
@@ -95,8 +94,8 @@ def get_dataset(self, key, info):
                 index = band_names.index(key['name'])
             except ValueError:
                 continue
-            uncertainty = self.sd.select(dataset + "_Uncert_Indexes")
-            array = xr.DataArray(from_sds(subdata, chunks=CHUNK_SIZE)[index, :, :],
+            chunks = self._chunks_for_variable(subdata)
+            array = xr.DataArray(from_sds(subdata, chunks=chunks)[index, :, :],
                                  dims=['y', 'x']).astype(np.float32)
             valid_range = var_attrs['valid_range']
 
@@ -122,7 +121,9 @@ def get_dataset(self, key, info):
 
             array = array.where(array >= np.float32(valid_range[0]))
             array = array.where(array <= np.float32(valid_range[1]))
-            array = array.where(from_sds(uncertainty, chunks=CHUNK_SIZE)[index, :, :] < 15)
+            uncertainty = self.sd.select(dataset + "_Uncert_Indexes")
+            uncertainty_chunks = self._chunks_for_variable(uncertainty)
+            array = array.where(from_sds(uncertainty, chunks=uncertainty_chunks)[index, :, :] < 15)
 
             if key['calibration'] == 'brightness_temperature':
                 projectable = calibrate_bt(array, var_attrs, index, key['name'])

diff --git a/satpy/tests/reader_tests/test_modis_l1b.py b/satpy/tests/reader_tests/test_modis_l1b.py
@@ -41,6 +41,18 @@ def _check_shared_metadata(data_arr):
     assert "rows_per_scan" in data_arr.attrs
     assert isinstance(data_arr.attrs["rows_per_scan"], int)
     assert data_arr.attrs['reader'] == 'modis_l1b'
+    assert "resolution" in data_arr.attrs
+    res = data_arr.attrs["resolution"]
+    if res == 5000:
+        assert data_arr.chunks == ((2, 2, 2), (data_arr.shape[1],))
+    elif res == 1000:
+        assert data_arr.chunks == ((10, 10, 10), (data_arr.shape[1],))
+    elif res == 500:
+        assert data_arr.chunks == ((20, 20, 20), (data_arr.shape[1],))
+    elif res == 250:
+        assert data_arr.chunks == ((40, 40, 40), (data_arr.shape[1],))
+    else:
+        raise ValueError(f"Unexpected resolution: {res}")
 
 
 def _load_and_check_geolocation(scene, resolution, exp_res, exp_shape, has_res,
@@ -137,7 +149,8 @@ def test_load_longitude_latitude(self, input_files, has_5km, has_500, has_250, d
         shape_500m = _shape_for_resolution(500)
         shape_250m = _shape_for_resolution(250)
         default_shape = _shape_for_resolution(default_res)
-        with dask.config.set(scheduler=CustomScheduler(max_computes=1 + has_5km + has_500 + has_250)):
+        scheduler = CustomScheduler(max_computes=1 + has_5km + has_500 + has_250)
+        with dask.config.set({'scheduler': scheduler, 'array.chunk-size': '1 MiB'}):
             _load_and_check_geolocation(scene, "*", default_res, default_shape, True)
             _load_and_check_geolocation(scene, 5000, 5000, shape_5km, has_5km)
             _load_and_check_geolocation(scene, 500, 500, shape_500m, has_500)
@@ -147,7 +160,8 @@ def test_load_sat_zenith_angle(self, modis_l1b_nasa_mod021km_file):
         """Test loading satellite zenith angle band."""
         scene = Scene(reader='modis_l1b', filenames=modis_l1b_nasa_mod021km_file)
         dataset_name = 'satellite_zenith_angle'
-        scene.load([dataset_name])
+        with dask.config.set({'array.chunk-size': '1 MiB'}):
+            scene.load([dataset_name])
         dataset = scene[dataset_name]
         assert dataset.shape == _shape_for_resolution(1000)
         assert dataset.attrs['resolution'] == 1000
@@ -157,7 +171,8 @@ def test_load_vis(self, modis_l1b_nasa_mod021km_file):
         """Test loading visible band."""
         scene = Scene(reader='modis_l1b', filenames=modis_l1b_nasa_mod021km_file)
         dataset_name = '1'
-        scene.load([dataset_name])
+        with dask.config.set({'array.chunk-size': '1 MiB'}):
+            scene.load([dataset_name])
         dataset = scene[dataset_name]
         assert dataset.shape == _shape_for_resolution(1000)
         assert dataset.attrs['resolution'] == 1000