Skip to content

Commit

Permalink
Fix NaN nodata values in band statistics
Browse files Browse the repository at this point in the history
  • Loading branch information
mplough-kobold authored and Kirill888 committed Apr 12, 2024
1 parent a0ba90a commit 3b7c8d8
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 14 deletions.
17 changes: 3 additions & 14 deletions odc/geo/cog/_tifffile.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,20 +87,8 @@ def _stats_from_layer(

axis = (yaxis, yaxis + 1)
npix = pix.shape[yaxis] * pix.shape[yaxis + 1]
if nodata is not None:
dd = da.ma.masked_equal(pix, nodata)
return unwrap(
{
"minimum": dd.min(axis=axis),
"maximum": dd.max(axis=axis),
"mean": dd.mean(axis=axis),
"stddev": dd.std(axis=axis),
"valid_percent": da.isfinite(dd).sum(axis=axis) * (100 / npix),
},
pix.ndim,
)

if pix.dtype.kind == "f":
if nodata is None or np.isnan(nodata):
dd = pix
return unwrap(
{
Expand All @@ -113,7 +101,8 @@ def _stats_from_layer(
pix.ndim,
)

dd = pix
# Exclude both nodata and invalid (e.g. NaN) values from statistics computation
dd = da.ma.masked_where((pix == nodata) | ~(np.isfinite(pix)), pix)
return unwrap(
{
"minimum": dd.min(axis=axis),
Expand Down
57 changes: 57 additions & 0 deletions tests/test_cog.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
from pathlib import Path
from typing import Optional, Tuple

import numpy as np
import pytest
from dask import array as da

from odc.geo.cog import CogMeta, cog_gbox, save_cog_with_dask
from odc.geo.cog._shared import compute_cog_spec, num_overviews
Expand All @@ -14,6 +16,7 @@
_gdal_sample_descriptions,
_make_empty_cog,
_norm_compression_tifffile,
_stats_from_layer,
geotiff_metadata,
)
from odc.geo.geobox import GeoBox
Expand Down Expand Up @@ -450,3 +453,57 @@ def test_cog_with_dask_smoke_test(gbox: GeoBox, tmp_path: Path, dtype):
fname = str(tmp_path / "cog-bandnames-incorrect.tif")
with pytest.raises(ValueError):
save_cog_with_dask(img, fname, compression="deflate", level=2)


@pytest.mark.parametrize(
("array", "nodata", "minimum", "maximum", "mean", "stddev", "valid_percent"),
[
pytest.param([[1, 1], [1, 1]], None, 1, 1, 1, 0, 100, id="basic int"),
pytest.param(
[[1.0, 1.0], [1.0, 1.0]], None, 1.0, 1.0, 1.0, 0.0, 100, id="basic float"
),
pytest.param([[1, 0], [0, 1]], 0, 1, 1, 1, 0, 50, id="int with numeric nodata"),
pytest.param(
[[1.0, 0.0], [0.0, 1.0]], 0, 1, 1, 1, 0, 50, id="float with numeric nodata"
),
pytest.param(
[[1.0, np.nan], [np.nan, 1.0]],
None,
1,
1,
1,
0,
50,
id="float with nan, None nodata",
),
pytest.param(
[[1.0, np.nan], [np.nan, 1.0]],
np.nan,
1,
1,
1,
0,
50,
id="float with nan, nan nodata",
),
pytest.param(
[[1.0, np.nan], [0, 1.0]],
0,
1,
1,
1,
0,
50,
id="float with nan, numeric nodata",
),
],
)
def test_stats_from_layer(array, nodata, minimum, maximum, mean, stddev, valid_percent):
x = da.from_array(array)
stats = _stats_from_layer(x, nodata).compute()[0]

assert stats["minimum"] == minimum
assert stats["maximum"] == minimum
assert stats["mean"] == mean
assert stats["stddev"] == stddev
assert stats["valid_percent"] == valid_percent

0 comments on commit 3b7c8d8

Please sign in to comment.