From b54a72f33805df366d530f9b0d228a0325029cee Mon Sep 17 00:00:00 2001 From: Eric Koch Date: Fri, 18 Oct 2024 11:27:13 -0400 Subject: [PATCH 1/6] Remove _warn_slow for daskSC --- spectral_cube/dask_spectral_cube.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/spectral_cube/dask_spectral_cube.py b/spectral_cube/dask_spectral_cube.py index 148e336c7..cb629e4cb 100644 --- a/spectral_cube/dask_spectral_cube.py +++ b/spectral_cube/dask_spectral_cube.py @@ -636,7 +636,6 @@ def median(self, axis=None, **kwargs): if axis is None: # da.nanmedian raises NotImplementedError since it is not possible # to do efficiently, so we use Numpy instead. - self._warn_slow('median') return np.nanmedian(self._compute(data), **kwargs) else: return self._compute(da.nanmedian(self._get_filled_data(fill=np.nan), axis=axis, **kwargs)) @@ -660,7 +659,6 @@ def percentile(self, q, axis=None, **kwargs): if axis is None: # There is no way to compute the percentile of the whole array in # chunks. - self._warn_slow('percentile') return np.nanpercentile(data, q, **kwargs) else: # Rechunk so that there is only one chunk along the desired axis @@ -694,7 +692,6 @@ def mad_std(self, axis=None, ignore_nan=True, **kwargs): if axis is None: # In this case we have to load the full data - even dask's # nanmedian doesn't work efficiently over the whole array. - self._warn_slow('mad_std') return stats.mad_std(data, ignore_nan=ignore_nan, **kwargs) else: # Rechunk so that there is only one chunk along the desired axis From 20421baa86eeb5f6cca8b4509b971ec4f3dba46f Mon Sep 17 00:00:00 2001 From: Eric Koch Date: Fri, 18 Oct 2024 11:30:08 -0400 Subject: [PATCH 2/6] Skip warning check with DaskSC --- spectral_cube/tests/test_spectral_cube.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/spectral_cube/tests/test_spectral_cube.py b/spectral_cube/tests/test_spectral_cube.py index 870248e36..212d949e1 100644 --- a/spectral_cube/tests/test_spectral_cube.py +++ b/spectral_cube/tests/test_spectral_cube.py @@ -109,9 +109,10 @@ def test_arithmetic_warning(data_vda_jybeam_lower, recwarn, use_dask): assert not cube._is_huge - # make sure the small cube raises a warning about loading into memory - with pytest.warns(UserWarning, match='requires loading the entire'): - cube + 5*cube.unit + if not use_dask: + # make sure the small cube raises a warning about loading into memory + with pytest.warns(UserWarning, match='requires loading the entire'): + cube + 5*cube.unit def test_huge_disallowed(data_vda_jybeam_lower, use_dask): From d170bcdd52f837cc472e9ba12d0bc7ccceee55cf Mon Sep 17 00:00:00 2001 From: Eric Koch Date: Fri, 18 Oct 2024 11:52:36 -0400 Subject: [PATCH 3/6] Fix DaskSC stats with nans check --- spectral_cube/tests/test_dask.py | 8 ++++++-- spectral_cube/tests/test_spectral_cube.py | 5 +---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/spectral_cube/tests/test_dask.py b/spectral_cube/tests/test_dask.py index 12175f5c4..f3ae681c4 100644 --- a/spectral_cube/tests/test_dask.py +++ b/spectral_cube/tests/test_dask.py @@ -106,13 +106,17 @@ def test_statistics(data_adv): def test_statistics_withnans(data_adv): cube = DaskSpectralCube.read(data_adv).rechunk(chunks=(1, 2, 3)) + # shape is 2, 3, 4 - cube._data[:,:,:2] = np.nan + cube._data[:,:,:1] = np.nan # ensure some chunks are all nan cube.rechunk((1,2,2)) stats = cube.statistics() + for key in ('min', 'max', 'sum'): - assert stats[key] == getattr(cube, key)() + np.testing.assert_allclose(stats[key], + getattr(cube, key)(), + rtol=1e-10) @pytest.mark.skipif(not CASA_INSTALLED, reason='Requires CASA to be installed') diff --git a/spectral_cube/tests/test_spectral_cube.py b/spectral_cube/tests/test_spectral_cube.py index 212d949e1..fd92fb74b 100644 --- a/spectral_cube/tests/test_spectral_cube.py +++ b/spectral_cube/tests/test_spectral_cube.py @@ -134,10 +134,7 @@ def test_huge_disallowed(data_vda_jybeam_lower, use_dask): with pytest.raises(ValueError, match='entire cube into memory'): cube + 5*cube.unit - if use_dask: - with pytest.raises(ValueError, match='entire cube into memory'): - cube.mad_std() - else: + if not use_dask: with pytest.raises(ValueError, match='entire cube into memory'): cube.max(how='cube') From c97b5879391359d69310eccf6edf51d5271cb45b Mon Sep 17 00:00:00 2001 From: Eric Koch Date: Fri, 18 Oct 2024 12:10:04 -0400 Subject: [PATCH 4/6] Add all nans stats test --- spectral_cube/tests/test_dask.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/spectral_cube/tests/test_dask.py b/spectral_cube/tests/test_dask.py index f3ae681c4..39573f323 100644 --- a/spectral_cube/tests/test_dask.py +++ b/spectral_cube/tests/test_dask.py @@ -119,6 +119,23 @@ def test_statistics_withnans(data_adv): rtol=1e-10) +def test_statistics_allnans(data_adv): + cube = DaskSpectralCube.read(data_adv).rechunk(chunks=(1, 2, 3)) + + # shape is 2, 3, 4 + cube._data[:,:,:2] = np.nan + # ensure some chunks are all nan + cube.rechunk((1,2,2)) + stats = cube.statistics() + + for key in ('min', 'max', 'mean', 'sigma', 'rms'): + assert np.isnan(stats[key]) + + # Sum of NaNs is 0 + assert stats['sum'] == 0 * cube.unit + assert stats['sumsq'] == (0 * cube.unit)**2 + + @pytest.mark.skipif(not CASA_INSTALLED, reason='Requires CASA to be installed') def test_statistics_consistency_casa(data_adv, tmp_path): From ff4282f24a6b1e89b1f0dbbc3ea2648b4d1ba934 Mon Sep 17 00:00:00 2001 From: Eric Koch Date: Fri, 18 Oct 2024 14:24:26 -0400 Subject: [PATCH 5/6] Add MJD-OBS to test header --- spectral_cube/tests/data/header_jybeam.hdr | 85 +++++++++++----------- 1 file changed, 43 insertions(+), 42 deletions(-) diff --git a/spectral_cube/tests/data/header_jybeam.hdr b/spectral_cube/tests/data/header_jybeam.hdr index 1ab5d0d76..b5e425d09 100644 --- a/spectral_cube/tests/data/header_jybeam.hdr +++ b/spectral_cube/tests/data/header_jybeam.hdr @@ -1,50 +1,51 @@ -SIMPLE = T / Written by IDL: Fri Feb 20 13:46:36 2009 -BITPIX = -32 / -NAXIS = 4 / -NAXIS1 = 1884 / -NAXIS2 = 2606 / -NAXIS3 = 200 // -NAXIS4 = 1 / -EXTEND = T / -BSCALE = 1.00000000000E+00 / -BZERO = 0.00000000000E+00 / -TELESCOP= 'VLA ' / -CDELT1 = -5.55555561268E-04 / -CRPIX1 = 1.37300000000E+03 / -CRVAL1 = 2.31837500515E+01 / +SIMPLE = T / Written by IDL: Fri Feb 20 13:46:36 2009 +BITPIX = -32 / +NAXIS = 4 / +NAXIS1 = 1884 / +NAXIS2 = 2606 / +NAXIS3 = 200 // +NAXIS4 = 1 / +EXTEND = T / +BSCALE = 1.00000000000E+00 / +BZERO = 0.00000000000E+00 / +TELESCOP= 'VLA ' / +CDELT1 = -5.55555561268E-04 / +CRPIX1 = 1.37300000000E+03 / +CRVAL1 = 2.31837500515E+01 / CUNIT1 = 'deg' -CTYPE1 = 'RA---SIN' / -CDELT2 = 5.55555561268E-04 / -CRPIX2 = 1.15200000000E+03 / -CRVAL2 = 3.05765277962E+01 / +CTYPE1 = 'RA---SIN' / +CDELT2 = 5.55555561268E-04 / +CRPIX2 = 1.15200000000E+03 / +CRVAL2 = 3.05765277962E+01 / CUNIT2 = 'deg' -CTYPE2 = 'DEC--SIN' / -CDELT3 = 1.28821496879E+00 / -CRPIX3 = 1.00000000000E+00 / -CRVAL3 = -3.21214698632E+02 / -CTYPE3 = 'VOPT' / +CTYPE2 = 'DEC--SIN' / +CDELT3 = 1.28821496879E+00 / +CRPIX3 = 1.00000000000E+00 / +CRVAL3 = -3.21214698632E+02 / +CTYPE3 = 'VOPT' / CUNIT3 = 'km/s' -CDELT4 = 1.00000000000E+00 / -CRPIX4 = 1.00000000000E+00 / -CRVAL4 = 1.00000000000E+00 / -CTYPE4 = 'STOKES ' / +CDELT4 = 1.00000000000E+00 / +CRPIX4 = 1.00000000000E+00 / +CRVAL4 = 1.00000000000E+00 / +CTYPE4 = 'STOKES ' / CUNIT4 = '' SPECSYS = 'BARYCENT' -DATE-OBS= '1998-06-18T16:30:25.4' / -RESTFREQ= 1.42040571841E+09 / -CELLSCAL= 'CONSTANT' / -BUNIT = 'JY/BEAM ' / -EPOCH = 2.00000000000E+03 / -OBJECT = 'M33 ' / -OBSERVER= 'AT206 ' / -VOBS = -2.57256763070E+01 / -LTYPE = 'channel ' / -LSTART = 2.15000000000E+02 / -LWIDTH = 1.00000000000E+00 / -LSTEP = 1.00000000000E+00 / -BTYPE = 'intensity' / -DATAMIN = -6.57081836835E-03 / -DATAMAX = 1.52362231165E-02 / +DATE-OBS= '1998-06-18T16:30:25.4' / +MJD-OBS = 50982.687794 / +RESTFREQ= 1.42040571841E+09 / +CELLSCAL= 'CONSTANT' / +BUNIT = 'JY/BEAM ' / +EPOCH = 2.00000000000E+03 / +OBJECT = 'M33 ' / +OBSERVER= 'AT206 ' / +VOBS = -2.57256763070E+01 / +LTYPE = 'channel ' / +LSTART = 2.15000000000E+02 / +LWIDTH = 1.00000000000E+00 / +LSTEP = 1.00000000000E+00 / +BTYPE = 'intensity' / +DATAMIN = -6.57081836835E-03 / +DATAMAX = 1.52362231165E-02 / BMAJ = 0.0002777777777777778 BMIN = 0.0002777777777777778 BPA = 0.0 From d14947bd67c2a3303ebf95bf98a9e8d1a4cdaa7b Mon Sep 17 00:00:00 2001 From: Eric Koch Date: Fri, 18 Oct 2024 17:18:06 -0400 Subject: [PATCH 6/6] Restore slow warning for _apply_everywhere and _cube_on_cube_operation --- spectral_cube/tests/test_spectral_cube.py | 10 ++++++---- spectral_cube/utils.py | 9 +++++++-- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/spectral_cube/tests/test_spectral_cube.py b/spectral_cube/tests/test_spectral_cube.py index fd92fb74b..1c912928e 100644 --- a/spectral_cube/tests/test_spectral_cube.py +++ b/spectral_cube/tests/test_spectral_cube.py @@ -109,10 +109,12 @@ def test_arithmetic_warning(data_vda_jybeam_lower, recwarn, use_dask): assert not cube._is_huge - if not use_dask: - # make sure the small cube raises a warning about loading into memory - with pytest.warns(UserWarning, match='requires loading the entire'): - cube + 5*cube.unit + # make sure the small cube raises a warning about loading into memory + with pytest.warns(UserWarning, match='requires loading the entire'): + cube + 5*cube.unit + + with pytest.warns(UserWarning, match='requires loading the entire'): + cube + cube def test_huge_disallowed(data_vda_jybeam_lower, use_dask): diff --git a/spectral_cube/utils.py b/spectral_cube/utils.py index 58b031766..ddd6915e5 100644 --- a/spectral_cube/utils.py +++ b/spectral_cube/utils.py @@ -38,7 +38,12 @@ def wrapper(self, *args, **kwargs): accepts_how_keyword = 'how' in argspec.args or argspec.varkw == 'how' warn_how = accepts_how_keyword and ((kwargs.get('how') == 'cube') or 'how' not in kwargs) - + + # This restores showing the "loading the entire cube into memory" warning for + # _apply_everywhere and _cube_on_cube_operation + if function.__name__ in ['_apply_everywhere', '_cube_on_cube_operation']: + warn_how = True + if self._is_huge and not self.allow_huge_operations: warn_message = ("This function ({0}) requires loading the entire " "cube into memory, and the cube is large ({1} " @@ -50,7 +55,7 @@ def wrapper(self, *args, **kwargs): warn_message += ("Alternatively, you may want to consider using an " "approach that does not load the whole cube into " "memory by specifying how='slice' or how='ray'. ") - + warn_message += ("See {bigdataurl} for details.".format(bigdataurl=bigdataurl)) raise ValueError(warn_message)