Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add resolution dependent chunk sizing to 'modis_l1b' reader #2052

Merged
merged 17 commits into from
Oct 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ repos:
- types-pkg-resources
- types-PyYAML
- types-requests
args: ["--python-version", "3.8", "--ignore-missing-imports"]
args: ["--python-version", "3.9", "--ignore-missing-imports"]
- repo: https://github.com/pycqa/isort
rev: 5.12.0
hooks:
Expand Down
71 changes: 1 addition & 70 deletions satpy/_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,80 +17,11 @@
# satpy. If not, see <http://www.gnu.org/licenses/>.
"""Backports and compatibility fixes for satpy."""

from threading import RLock

_NOT_FOUND = object()


class CachedPropertyBackport:
"""Backport of cached_property from Python-3.8.

Source: https://github.com/python/cpython/blob/v3.8.0/Lib/functools.py#L930
"""

def __init__(self, func): # noqa
self.func = func
self.attrname = None
self.__doc__ = func.__doc__
self.lock = RLock()

def __set_name__(self, owner, name): # noqa
if self.attrname is None:
self.attrname = name
elif name != self.attrname:
raise TypeError(
"Cannot assign the same cached_property to two different names "
f"({self.attrname!r} and {name!r})."
)

def __get__(self, instance, owner=None): # noqa
if instance is None:
return self
if self.attrname is None:
raise TypeError(
"Cannot use cached_property instance without calling __set_name__ on it.")
try:
cache = instance.__dict__ # noqa
except AttributeError: # not all objects have __dict__ (e.g. class defines slots)
msg = (
f"No '__dict__' attribute on {type(instance).__name__!r} "
f"instance to cache {self.attrname!r} property."
)
raise TypeError(msg) from None
val = cache.get(self.attrname, _NOT_FOUND)
if val is _NOT_FOUND:
with self.lock:
# check if another thread filled cache while we awaited lock
val = cache.get(self.attrname, _NOT_FOUND)
if val is _NOT_FOUND:
val = self.func(instance)
try:
cache[self.attrname] = val
except TypeError:
msg = (
f"The '__dict__' attribute on {type(instance).__name__!r} instance "
f"does not support item assignment for caching {self.attrname!r} property."
)
raise TypeError(msg) from None
return val


try:
from functools import cached_property # type: ignore
except ImportError:
# for python < 3.8
cached_property = CachedPropertyBackport # type: ignore

from functools import cache, cached_property # noqa

Check notice on line 20 in satpy/_compat.py

View check run for this annotation

CodeScene Delta Analysis / CodeScene Cloud Delta Analysis (main)

✅ No longer an issue: Complex Method

CachedPropertyBackport.__get__ is no longer above the threshold for cyclomatic complexity

try:
from numpy.typing import ArrayLike, DTypeLike # noqa
except ImportError:
# numpy <1.20
from numpy import dtype as DTypeLike # noqa
from numpy import ndarray as ArrayLike # noqa


try:
from functools import cache # type: ignore
except ImportError:
from functools import lru_cache as cache # noqa
13 changes: 1 addition & 12 deletions satpy/_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,20 +26,9 @@
import tempfile
from collections import OrderedDict
from importlib.metadata import EntryPoint, entry_points
from pathlib import Path
from importlib.resources import files as impr_files
from typing import Iterable

try:
from importlib.resources import files as impr_files # type: ignore
except ImportError:
# Python 3.8
def impr_files(module_name: str) -> Path:
"""Get path to module as a backport for Python 3.8."""
from importlib.resources import path as impr_path

with impr_path(module_name, "__init__.py") as pkg_init_path:
return pkg_init_path.parent

import appdirs
from donfig import Config

Expand All @@ -48,7 +37,7 @@
LOG = logging.getLogger(__name__)

BASE_PATH = os.path.dirname(os.path.realpath(__file__))
# FIXME: Use package_resources?

Check notice on line 40 in satpy/_config.py

View check run for this annotation

codefactor.io / CodeFactor

satpy/_config.py#L40

unresolved comment '# FIXME: Use package_resources?' (C100)
PACKAGE_CONFIG_PATH = os.path.join(BASE_PATH, 'etc')

_satpy_dirs = appdirs.AppDirs(appname='satpy', appauthor='pytroll')
Expand Down
9 changes: 0 additions & 9 deletions satpy/etc/readers/modis_l1b.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,6 @@ reader:
sensors: [modis]
default_datasets: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36]

navigations:
hdf_eos_geo:
description: MODIS navigation
file_type: hdf_eos_geo
latitude_key: Latitude
longitude_key: Longitude
nadir_resolution: [1000]
rows_per_scan: 10

datasets:
'1':
name: '1'
Expand Down
32 changes: 29 additions & 3 deletions satpy/readers/hdfeos_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,9 @@

from satpy import DataID
from satpy.readers.file_handlers import BaseFileHandler
from satpy.utils import get_legacy_chunk_size
from satpy.utils import normalize_low_res_chunks

logger = logging.getLogger(__name__)
CHUNK_SIZE = get_legacy_chunk_size()


def interpolate(clons, clats, csatz, src_resolution, dst_resolution):
Expand Down Expand Up @@ -215,14 +214,41 @@
from satpy.readers.hdf4_utils import from_sds

dataset = self._read_dataset_in_file(dataset_name)
dask_arr = from_sds(dataset, chunks=CHUNK_SIZE)
chunks = self._chunks_for_variable(dataset)
dask_arr = from_sds(dataset, chunks=chunks)
djhoese marked this conversation as resolved.
Show resolved Hide resolved
dims = ('y', 'x') if dask_arr.ndim == 2 else None
data = xr.DataArray(dask_arr, dims=dims,
attrs=dataset.attributes())
data = self._scale_and_mask_data_array(data, is_category=is_category)

return data

def _chunks_for_variable(self, hdf_dataset):
scan_length_250m = 40
var_shape = hdf_dataset.info()[2]
res_multiplier = self._get_res_multiplier(var_shape)
num_nonyx_dims = len(var_shape) - 2
return normalize_low_res_chunks(
(1,) * num_nonyx_dims + ("auto", -1),
var_shape,
(1,) * num_nonyx_dims + (scan_length_250m, -1),
(1,) * num_nonyx_dims + (res_multiplier, res_multiplier),
np.float32,
)

@staticmethod
def _get_res_multiplier(var_shape):
num_columns_to_multiplier = {
271: 20, # 5km
djhoese marked this conversation as resolved.
Show resolved Hide resolved
1354: 4, # 1km
2708: 2, # 500m
5416: 1, # 250m
}
for max_columns, res_multiplier in num_columns_to_multiplier.items():
if var_shape[-1] <= max_columns:
return res_multiplier
return 1

Check warning on line 250 in satpy/readers/hdfeos_base.py

View check run for this annotation

Codecov / codecov/patch

satpy/readers/hdfeos_base.py#L250

Added line #L250 was not covered by tests
djhoese marked this conversation as resolved.
Show resolved Hide resolved

def _scale_and_mask_data_array(self, data, is_category=False):
"""Unscale byte data and mask invalid/fill values.

Expand Down
8 changes: 4 additions & 4 deletions satpy/readers/modis_l1b.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,10 +78,8 @@

from satpy.readers.hdf4_utils import from_sds
from satpy.readers.hdfeos_base import HDFEOSBaseFileReader, HDFEOSGeoReader
from satpy.utils import get_legacy_chunk_size

logger = logging.getLogger(__name__)
CHUNK_SIZE = get_legacy_chunk_size()


class HDFEOSBandReader(HDFEOSBaseFileReader):
Expand Down Expand Up @@ -118,7 +116,8 @@ def get_dataset(self, key, info):
subdata = self.sd.select(var_name)
var_attrs = subdata.attributes()
uncertainty = self.sd.select(var_name + "_Uncert_Indexes")
array = xr.DataArray(from_sds(subdata, chunks=CHUNK_SIZE)[band_index, :, :],
chunks = self._chunks_for_variable(subdata)
array = xr.DataArray(from_sds(subdata, chunks=chunks)[band_index, :, :],
dims=['y', 'x']).astype(np.float32)
valid_range = var_attrs['valid_range']
valid_min = np.float32(valid_range[0])
Expand Down Expand Up @@ -214,7 +213,8 @@ def _mask_invalid(self, array, valid_min, valid_max):
def _mask_uncertain_pixels(self, array, uncertainty, band_index):
if not self._mask_saturated:
return array
band_uncertainty = from_sds(uncertainty, chunks=CHUNK_SIZE)[band_index, :, :]
uncertainty_chunks = self._chunks_for_variable(uncertainty)
band_uncertainty = from_sds(uncertainty, chunks=uncertainty_chunks)[band_index, :, :]
array = array.where(band_uncertainty < 15)
return array

Expand Down
9 changes: 1 addition & 8 deletions satpy/resample.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/env python

Check notice on line 1 in satpy/resample.py

View check run for this annotation

CodeScene Delta Analysis / CodeScene Cloud Delta Analysis (main)

✅ Getting better: Lines of Code in a Single File

The lines of code decreases from 838 to 834, improve code health by reducing it to 600. The number of Lines of Code in a single file. More Lines of Code lowers the code health.

Check notice on line 1 in satpy/resample.py

View check run for this annotation

CodeScene Delta Analysis / CodeScene Cloud Delta Analysis (main)

ℹ Getting worse: Overall Code Complexity

The mean cyclomatic complexity increases from 4.07 to 4.13, threshold = 4. This file has many conditional statements (e.g. if, for, while) across its implementation, leading to lower code health. Avoid adding more conditionals.
# -*- coding: utf-8 -*-
# Copyright (c) 2015-2018 Satpy developers
#
Expand Down Expand Up @@ -143,6 +143,7 @@
import os
import warnings
from logging import getLogger
from math import lcm # type: ignore
from weakref import WeakValueDictionary

import dask
Expand All @@ -157,14 +158,6 @@

from satpy.utils import PerformanceWarning, get_legacy_chunk_size

try:
from math import lcm # type: ignore
except ImportError:
def lcm(a, b):
"""Get 'Least Common Multiple' with Python 3.8 compatibility."""
from math import gcd
return abs(a * b) // gcd(a, b)

try:
from pyresample.resampler import BaseResampler as PRBaseResampler
except ImportError:
Expand Down Expand Up @@ -510,7 +503,7 @@
epsilon=epsilon)

if self.resampler is None:
# FIXME: We need to move all of this caching logic to pyresample

Check notice on line 506 in satpy/resample.py

View check run for this annotation

codefactor.io / CodeFactor

satpy/resample.py#L506

unresolved comment '# FIXME: We need to move all of this caching logic to pyresample' (C100)
self.resampler = XArrayResamplerNN(**kwargs)

try:
Expand Down Expand Up @@ -715,7 +708,7 @@
new_src = SwathDefinition(lons, lats)

swath_points_in_grid, cols, rows = ll2cr(new_src, target_geo_def)
# FIXME: How do we check swath usage/coverage if we only do this

Check notice on line 711 in satpy/resample.py

View check run for this annotation

codefactor.io / CodeFactor

satpy/resample.py#L711

unresolved comment '# FIXME: How do we check swath usage/coverage if we only do this' (C100)
# per-block
# # Determine if enough of the input swath was used
# grid_name = getattr(self.target_geo_def, "name", "N/A")
Expand Down Expand Up @@ -1350,7 +1343,7 @@
return result


# TODO: move this to pyresample.resampler

Check notice on line 1346 in satpy/resample.py

View check run for this annotation

codefactor.io / CodeFactor

satpy/resample.py#L1346

unresolved comment '# TODO: move this to pyresample.resampler' (C100)
RESAMPLERS = {"kd_tree": KDTreeResampler,
"nearest": KDTreeResampler,
"bilinear": BilinearResampler,
Expand All @@ -1373,7 +1366,7 @@
PRBaseResampler = BaseResampler


# TODO: move this to pyresample

Check notice on line 1369 in satpy/resample.py

View check run for this annotation

codefactor.io / CodeFactor

satpy/resample.py#L1369

unresolved comment '# TODO: move this to pyresample' (C100)
def prepare_resampler(source_area, destination_area, resampler=None, **resample_kwargs):
"""Instantiate and return a resampler."""
if resampler is None:
Expand Down Expand Up @@ -1406,7 +1399,7 @@
return key, resampler_instance


# TODO: move this to pyresample

Check notice on line 1402 in satpy/resample.py

View check run for this annotation

codefactor.io / CodeFactor

satpy/resample.py#L1402

unresolved comment '# TODO: move this to pyresample' (C100)
def resample(source_area, data, destination_area,
resampler=None, **kwargs):
"""Do the resampling."""
Expand Down
2 changes: 1 addition & 1 deletion satpy/tests/reader_tests/modis_tests/_modis_fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def _shape_for_resolution(resolution: int) -> tuple[int, int]:
return factor * shape_1km[0], factor * shape_1km[1]


def _generate_lonlat_data(resolution: int) -> np.ndarray:
def _generate_lonlat_data(resolution: int) -> tuple[np.ndarray, np.ndarray]:
shape = _shape_for_resolution(resolution)
lat = np.repeat(np.linspace(35., 45., shape[0])[:, None], shape[1], 1)
lat *= np.linspace(0.9, 1.1, shape[1])
Expand Down
24 changes: 20 additions & 4 deletions satpy/tests/reader_tests/modis_tests/test_modis_l1b.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,18 @@
assert "rows_per_scan" in data_arr.attrs
assert isinstance(data_arr.attrs["rows_per_scan"], int)
assert data_arr.attrs['reader'] == 'modis_l1b'
assert "resolution" in data_arr.attrs
res = data_arr.attrs["resolution"]
if res == 5000:
assert data_arr.chunks == ((2, 2, 2), (data_arr.shape[1],))
elif res == 1000:
assert data_arr.chunks == ((10, 10, 10), (data_arr.shape[1],))
elif res == 500:
assert data_arr.chunks == ((20, 20, 20), (data_arr.shape[1],))
elif res == 250:
assert data_arr.chunks == ((40, 40, 40), (data_arr.shape[1],))
else:
raise ValueError(f"Unexpected resolution: {res}")

Check warning on line 65 in satpy/tests/reader_tests/modis_tests/test_modis_l1b.py

View check run for this annotation

Codecov / codecov/patch

satpy/tests/reader_tests/modis_tests/test_modis_l1b.py#L65

Added line #L65 was not covered by tests


def _load_and_check_geolocation(scene, resolution, exp_res, exp_shape, has_res,
Expand Down Expand Up @@ -147,7 +159,8 @@
shape_500m = _shape_for_resolution(500)
shape_250m = _shape_for_resolution(250)
default_shape = _shape_for_resolution(default_res)
with dask.config.set(scheduler=CustomScheduler(max_computes=1 + has_5km + has_500 + has_250)):
scheduler = CustomScheduler(max_computes=1 + has_5km + has_500 + has_250)
with dask.config.set({'scheduler': scheduler, 'array.chunk-size': '1 MiB'}):
_load_and_check_geolocation(scene, "*", default_res, default_shape, True)
_load_and_check_geolocation(scene, 5000, 5000, shape_5km, has_5km)
_load_and_check_geolocation(scene, 500, 500, shape_500m, has_500)
Expand All @@ -157,7 +170,8 @@
"""Test loading satellite zenith angle band."""
scene = Scene(reader='modis_l1b', filenames=modis_l1b_nasa_mod021km_file)
dataset_name = 'satellite_zenith_angle'
scene.load([dataset_name])
with dask.config.set({'array.chunk-size': '1 MiB'}):
scene.load([dataset_name])
dataset = scene[dataset_name]
assert dataset.shape == _shape_for_resolution(1000)
assert dataset.attrs['resolution'] == 1000
Expand All @@ -167,7 +181,8 @@
"""Test loading visible band."""
scene = Scene(reader='modis_l1b', filenames=modis_l1b_nasa_mod021km_file)
dataset_name = '1'
scene.load([dataset_name])
with dask.config.set({'array.chunk-size': '1 MiB'}):
scene.load([dataset_name])
dataset = scene[dataset_name]
assert dataset[0, 0] == 300.0
assert dataset.shape == _shape_for_resolution(1000)
Expand All @@ -180,7 +195,8 @@
scene = Scene(reader='modis_l1b', filenames=modis_l1b_nasa_mod021km_file,
reader_kwargs={"mask_saturated": mask_saturated})
dataset_name = '2'
scene.load([dataset_name])
with dask.config.set({'array.chunk-size': '1 MiB'}):
scene.load([dataset_name])
dataset = scene[dataset_name]
assert dataset.shape == _shape_for_resolution(1000)
assert dataset.attrs['resolution'] == 1000
Expand Down
48 changes: 0 additions & 48 deletions satpy/tests/test_compat.py

This file was deleted.

Loading
Loading