diff --git a/nowcast/workers/get_onc_ferry.py b/nowcast/workers/get_onc_ferry.py index 305ae717..0cca9996 100644 --- a/nowcast/workers/get_onc_ferry.py +++ b/nowcast/workers/get_onc_ferry.py @@ -293,11 +293,13 @@ def _get_water_data(ferry_platform, device_category, ymd, devices_config): return device_data -def _empty_device_data(ferry_platform, device_category, ymd, sensors): +def _empty_device_data( + ferry_platform, device_category, ymd, sensors, time_coord="sampleTime" +): # Response from ONC contains no sensor data, so return an # empty DataArray logger.warning( - f"No ONC {ferry_platform} {device_category} data for {ymd}; " + f"No ONC {ferry_platform} {device_category} {sensors} data for {ymd}; " f"substituting empty dataset" ) onc_units = { @@ -321,15 +323,21 @@ def _empty_device_data(ferry_platform, device_category, ymd, sensors): sensor: xarray.DataArray( name=sensor, data=numpy.array([], dtype=float), - coords={"sampleTime": numpy.array([], dtype="datetime64[ns]")}, - dims="sampleTime", + coords={time_coord: numpy.array([], dtype="datetime64[ns]")}, + dims=time_coord, attrs={ + "device_category": device_category, "qaqcFlag": numpy.array([], dtype=numpy.int64), "unitOfMeasure": onc_units[sensor], + "units": "degrees_Celcius" + if sensor in {"temperature", "air_temperature"} + else onc_units[sensor], }, ) for sensor in sensors.split(",") } + if len(data_arrays) == 1: + return data_arrays[sensors] return xarray.Dataset(data_arrays) @@ -341,9 +349,14 @@ def _qaqc_filter(ferry_platform, device, device_data, ymd, devices_config): f"filtering ONC {ferry_platform} {device} {onc_sensor} data " f"for {ymd} to exclude 1= 7 ) @@ -351,17 +364,19 @@ def _qaqc_filter(ferry_platform, device, device_data, ymd, devices_config): cf_units = cf_units_mapping[onc_data.unitOfMeasure] except KeyError: cf_units = onc_data.unitOfMeasure - sensor_data_arrays.append( - xarray.DataArray( + if not sensor_qaqc_mask.any(): + data_array = _empty_device_data( + ferry_platform, device, ymd, onc_sensor, time_coord="time" + ) + else: + data_array = xarray.DataArray( name=sensor, - data=onc_data[not_nan_mask][sensor_qaqc_mask].values, - coords={ - "time": onc_data.sampleTime[not_nan_mask][sensor_qaqc_mask].values - }, + data=onc_data[sensor_qaqc_mask].values, + coords={"time": onc_data.sampleTime[sensor_qaqc_mask].values}, dims="time", attrs={"device_category": device, "units": cf_units}, ) - ) + sensor_data_arrays.append(data_array) return sensor_data_arrays @@ -387,7 +402,7 @@ def count(values, axis): else: try: data_array = array.resample(time="1Min").mean() - except IndexError: + except (IndexError, ValueError): # array is empty, meaning there are no observations with # qaqcFlag<=1 or qaqcFlac>=7, so substitute a DataArray full of NaNs logger.warning( @@ -418,7 +433,11 @@ def count(values, axis): sample_count_var = f"{var}_sample_count" sample_count_array = array.resample(time="1Min").count() sample_count_array.attrs = array.attrs - del sample_count_array.attrs["units"] + try: + del sample_count_array.attrs["units"] + except KeyError: + # empty data arrays lack units attributes + pass data_vars[sample_count_var] = _create_dataarray( sample_count_var, sample_count_array, ferry_platform, location_config ) diff --git a/tests/workers/test_get_onc_ferry.py b/tests/workers/test_get_onc_ferry.py index b5f8ffdc..32f1a3f6 100644 --- a/tests/workers/test_get_onc_ferry.py +++ b/tests/workers/test_get_onc_ferry.py @@ -291,16 +291,30 @@ class TestGetWaterData: pass -@pytest.mark.parametrize( - "ferry_platform, device, sensors", - [("TWDP", "TSG", "temperature,conductivity,salinity")], -) class TestEmptyDeviceData: """Unit tests for _empty_device_data() function.""" - def test_empty_device_data(self, ferry_platform, device, sensors, caplog): + def test_msg(self, caplog): + caplog.set_level(logging.DEBUG) + + get_onc_ferry._empty_device_data( + "TWDP", "TSG", "2024-02-08", "temperature,conductivity,salinity" + ) + + expected = ( + f"No ONC TWDP TSG temperature,conductivity,salinity data for 2024-02-08; " + f"substituting empty dataset" + ) + assert caplog.records[0].levelname == "WARNING" + assert caplog.messages[0] == expected + + @pytest.mark.parametrize( + "ferry_platform, device_category, sensors", + [("TWDP", "TSG", "temperature,conductivity,salinity")], + ) + def test_empty_device_data(self, ferry_platform, device_category, sensors, caplog): dataset = get_onc_ferry._empty_device_data( - ferry_platform, device, "2017-12-01", sensors + ferry_platform, device_category, "2017-12-01", sensors ) for sensor in sensors.split(","): assert sensor in dataset.data_vars @@ -311,6 +325,35 @@ def test_empty_device_data(self, ferry_platform, device, sensors, caplog): assert dataset.sampleTime.dtype == "datetime64[ns]" assert "sampleTime" in dataset.dims + @pytest.mark.parametrize( + "ferry_platform, device_category, sensors, uom, units", + [ + ("TWDP", "TSG", "temperature", "C", "degrees_Celcius"), + ("TWDP", "TSG", "conductivity", "S/m", "S/m"), + ("TWDP", "TSG", "salinity", "g/kg", "g/kg"), + ("TWDP", "OXYSENSOR", "oxygen_saturation", "percent", "percent"), + ("TWDP", "OXYSENSOR", "oxygen_corrected", "ml/l", "ml/l"), + ("TWDP", "OXYSENSOR", "temperature", "C", "degrees_Celcius"), + ("TWDP", "TURBCHLFL", "cdom_fluorescence", "ppb", "ppb"), + ("TWDP", "TURBCHLFL", "chlorophyll", "ug/l", "ug/l"), + ("TWDP", "TURBCHLFL", "turbidity", "NTU", "NTU"), + ("TWDP", "CO2SENSOR", "partial_pressure", "pCO2 uatm", "pCO2 uatm"), + ("TWDP", "CO2SENSOR", "co2", "umol/mol", "umol/mol"), + ("TWDP", "TEMPHUMID", "air_temperature", "C", "degrees_Celcius"), + ("TWDP", "TEMPHUMID", "rel_humidity", "%", "%"), + ("TWDP", "BARPRESS", "barometric_pressure", "hPa", "hPa"), + ("TWDP", "PYRANOMETER", "solar_radiation", "W/m^2", "W/m^2"), + ("TWDP", "PYRGEOMETER", "downward_radiation", "W/m^2", "W/m^2"), + ], + ) + def test_attrs(self, ferry_platform, device_category, sensors, uom, units, caplog): + dataset = get_onc_ferry._empty_device_data( + ferry_platform, device_category, "2024-02-08", sensors + ) + assert dataset.attrs["device_category"] == device_category + assert dataset.attrs["unitOfMeasure"] == uom + assert dataset.attrs["units"] == units + @pytest.mark.parametrize("ferry_platform", ["TWDP"]) class TestQaqcFilter: