From 7bbda057915980d46701dc74c80a8c1384aa60cc Mon Sep 17 00:00:00 2001 From: Doug Latornell Date: Fri, 2 Feb 2024 15:49:04 -0800 Subject: [PATCH 01/13] Modernize test_get_onc_ctd Replace unittest.mock.patch decorator with pytest.fixture for mock worker. Add unit tests for production YAML config file elements related to worker; re: issue #117. Replace unittest.mock.patch decorator with pytest caplog fixture for tests of logging; re: issue #82. --- tests/workers/test_get_onc_ctd.py | 114 ++++++++++++++++++------------ 1 file changed, 70 insertions(+), 44 deletions(-) diff --git a/tests/workers/test_get_onc_ctd.py b/tests/workers/test_get_onc_ctd.py index 3dd81ecb..b446583b 100644 --- a/tests/workers/test_get_onc_ctd.py +++ b/tests/workers/test_get_onc_ctd.py @@ -18,10 +18,11 @@ """Unit tests for SalishSeaCast get_onc_ctd worker. """ +import logging from types import SimpleNamespace -from unittest.mock import Mock, patch import arrow +import nemo_nowcast import pytest from nowcast.workers import get_onc_ctd @@ -33,69 +34,94 @@ def config(base_config): return base_config -@patch("nowcast.workers.get_onc_ctd.NowcastWorker", spec=True) +@pytest.fixture +def mock_worker(mock_nowcast_worker, monkeypatch): + monkeypatch.setattr(get_onc_ctd, "NowcastWorker", mock_nowcast_worker) + + class TestMain: """Unit tests for main() function.""" - def test_instantiate_worker(self, m_worker): - m_worker().cli = Mock(name="cli") - get_onc_ctd.main() - args, kwargs = m_worker.call_args - assert args == ("get_onc_ctd",) - assert "description" in kwargs - - def test_init_cli(self, m_worker): - m_worker().cli = Mock(name="cli") - get_onc_ctd.main() - m_worker().init_cli.assert_called_once_with() - - def test_add_onc_station_arg(self, m_worker): - m_worker().cli = Mock(name="cli") - get_onc_ctd.main() - args, kwargs = m_worker().cli.add_argument.call_args_list[0] - assert args == ("onc_station",) - assert kwargs["choices"] == {"SCVIP", "SEVIP", "USDDL"} - assert "help" in kwargs - - def test_add_data_date_arg(self, m_worker): - m_worker().cli = Mock(name="cli") - get_onc_ctd.main() - args, kwargs = m_worker().cli.add_date_option.call_args_list[0] - assert args == ("--data-date",) - assert kwargs["default"] == arrow.utcnow().floor("day").shift(days=-1) - assert "help" in kwargs - - def test_run_worker(self, m_worker): - m_worker().cli = Mock(name="cli") - get_onc_ctd.main() - args, kwargs = m_worker().run.call_args - expected = (get_onc_ctd.get_onc_ctd, get_onc_ctd.success, get_onc_ctd.failure) - assert args == expected + def test_instantiate_worker(self, mock_worker): + worker = get_onc_ctd.main() + assert worker.name == "get_onc_ctd" + assert worker.description.startswith( + "Salish Sea nowcast worker that downloads CTD temperature and salinity data" + ) + + def test_add_onc_station_arg(self, mock_worker): + worker = get_onc_ctd.main() + assert worker.cli.parser._actions[3].dest == "onc_station" + assert worker.cli.parser._actions[3].choices == {"SCVIP", "SEVIP", "USDDL"} + assert worker.cli.parser._actions[3].help + + def test_add_run_date_option(self, mock_worker): + worker = get_onc_ctd.main() + assert worker.cli.parser._actions[4].dest == "data_date" + expected = nemo_nowcast.cli.CommandLineInterface.arrow_date + assert worker.cli.parser._actions[4].type == expected + expected = arrow.utcnow().floor("day").shift(days=-1) + assert worker.cli.parser._actions[4].default == expected + assert worker.cli.parser._actions[4].help + + +class TestConfig: + """Unit tests for production YAML config file elements related to worker.""" + + def test_message_registry(self, prod_config): + assert "get_onc_ctd" in prod_config["message registry"]["workers"] + msg_registry = prod_config["message registry"]["workers"]["get_onc_ctd"] + assert msg_registry["checklist key"] == "ONC CTD data" + + def test_message_registry_keys(self, prod_config): + msg_registry = prod_config["message registry"]["workers"]["get_onc_ctd"] + assert list(msg_registry.keys()) == [ + "checklist key", + "success SCVIP", + "success SEVIP", + "success USDDL", + "failure", + "crash", + ] + + def test_obs_ctd_data_section(self, prod_config): + ctd_data = prod_config["observations"]["ctd data"] + assert ctd_data["dest dir"] == "/results/observations/ONC/CTD/" + expected = "{station}/{station}_CTD_15m_{yyyymmdd}.nc" + assert ctd_data["filepath template"] == expected @pytest.mark.parametrize("onc_station", ["SCVIP", "SEVIP", "USDDL"]) -@patch("nowcast.workers.get_onc_ctd.logger", autospec=True) class TestSuccess: """Unit tests for success() function.""" - def test_success(self, m_logger, onc_station): + def test_success(self, onc_station, caplog): parsed_args = SimpleNamespace( onc_station=onc_station, data_date=arrow.get("2016-09-09") ) + caplog.set_level(logging.DEBUG) + msg_type = get_onc_ctd.success(parsed_args) - assert m_logger.info.called - assert msg_type == "success {}".format(onc_station) + + assert caplog.records[0].levelname == "INFO" + expected = f"2016-09-09 ONC {onc_station} CTD T&S file created" + assert caplog.messages[0] == expected + assert msg_type == f"success {onc_station}" @pytest.mark.parametrize("onc_station", ["SCVIP", "SEVIP", "USDDL"]) -@patch("nowcast.workers.get_onc_ctd.logger", autospec=True) class TestFailure: """Unit tests for failure() function.""" - def test_failure(self, m_logger, onc_station): + def test_failure(self, onc_station, caplog): parsed_args = SimpleNamespace( onc_station=onc_station, data_date=arrow.get("2016-09-09") ) + caplog.set_level(logging.DEBUG) + msg_type = get_onc_ctd.failure(parsed_args) - assert m_logger.critical.called - assert msg_type == "failure" + + assert caplog.records[0].levelname == "CRITICAL" + expected = f"2016-09-09 ONC {onc_station} CTD T&S file creation failed" + assert caplog.messages[0] == expected + assert msg_type == f"failure" From 8a1e7ac668b0cf89333efe99d38732d38a6e1d1a Mon Sep 17 00:00:00 2001 From: Doug Latornell Date: Fri, 2 Feb 2024 15:58:29 -0800 Subject: [PATCH 02/13] Add unit test for YAML config file elements Added unit test for production YAML configuration file ctd data observations elements used in after_* () functions. --- tests/test_next_workers.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/test_next_workers.py b/tests/test_next_workers.py index 1fecc832..0cb1413b 100644 --- a/tests/test_next_workers.py +++ b/tests/test_next_workers.py @@ -138,6 +138,14 @@ def worker_modules(): assert f"after_{worker_module.stem}" in after_funcs +class TestConfig: + """Unit tests for production YAML config file elements used in after_*() functions""" + + def test_obs_ctd_data(self, prod_config): + cdt_data = prod_config["observations"]["ctd data"] + assert cdt_data["stations"] == ["SCVIP", "SEVIP"] + + class TestAfterDownloadWeather: """Unit tests for the after_download_weather function.""" From 509c2abf394686fceb8d62f170a812c296a37d83 Mon Sep 17 00:00:00 2001 From: Doug Latornell Date: Fri, 2 Feb 2024 16:15:23 -0800 Subject: [PATCH 03/13] Improve get_onc_ctd.main() function Update docstring re: issue #121. Return worker so that modernized unit tests for main() work; re issue #81. --- nowcast/workers/get_onc_ctd.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/nowcast/workers/get_onc_ctd.py b/nowcast/workers/get_onc_ctd.py index ef91bc2b..290185b6 100644 --- a/nowcast/workers/get_onc_ctd.py +++ b/nowcast/workers/get_onc_ctd.py @@ -42,9 +42,7 @@ def main(): - """Set up and run the worker. - - For command-line usage see: + """For command-line usage see: :command:`python -m nowcast.workers.get_onc_ctd -h` """ @@ -61,6 +59,7 @@ def main(): help="UTC date to get ONC node CTD data for.", ) worker.run(get_onc_ctd, success, failure) + return worker def success(parsed_args): From ea4820dece879c030ca9e2d7ce5a91de6edd9d74 Mon Sep 17 00:00:00 2001 From: Doug Latornell Date: Wed, 7 Feb 2024 15:53:51 -0800 Subject: [PATCH 04/13] Update get_onc_ctd worker to use ONC API v3 Modified the get_onc_ctd worker to use ONC API v3 'getByLocation' method. The new method uses a location code instead of a station name. Also, these changes include adding a 'dateTo' parameter to limit the data to a specific time range for the day. Additionally, small changes to variable and parameter names were made to match the new method requirements. --- config/nowcast.yaml | 6 +++--- nowcast/workers/get_onc_ctd.py | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/config/nowcast.yaml b/config/nowcast.yaml index d3436fcc..e6b7ffad 100644 --- a/config/nowcast.yaml +++ b/config/nowcast.yaml @@ -245,7 +245,7 @@ temperature salinity: observations: - # Dataset that maps high resolution lon/lat grid on to NEMO j/i indices + # Dataset that maps high-resolution lon/lat grid on to NEMO j/i indices lon/lat to NEMO ji map: /SalishSeaCast/grid/grid_from_lat_lon_mask999.nc # ONC Strait of Georgia nodes real-time CTD data ctd data: @@ -259,7 +259,7 @@ observations: dest dir: /results/observations/ONC/CTD/ # Template for ONC CTD T&S data file path # **Must be quoted to project {} characters** - filepath template: '{station}/{station}_CTD_15m_{yyyymmdd}.nc' + filepath template: "{station}/{station}_CTD_15m_{yyyymmdd}.nc" # ONC Strait of Georgia ferry platforms real-time data ferry data: @@ -274,7 +274,7 @@ observations: csv dir: /opp/observations/AISDATA/ # Destination directory for VFPA HADCP data netCDF files dest dir: /opp/observations/AISDATA/netcdf/ - # Template for VFPA HADCP data file path + # Template for the VFPA HADCP data file path # **Must be quoted to project {} characters** filepath template: 'VFPA_2ND_NARROWS_HADCP_2s_{yyyymm}.nc' diff --git a/nowcast/workers/get_onc_ctd.py b/nowcast/workers/get_onc_ctd.py index 290185b6..fe5e141b 100644 --- a/nowcast/workers/get_onc_ctd.py +++ b/nowcast/workers/get_onc_ctd.py @@ -79,15 +79,15 @@ def failure(parsed_args): def get_onc_ctd(parsed_args, config, *args): ymd = parsed_args.data_date.format("YYYY-MM-DD") logger.info(f"requesting ONC {parsed_args.onc_station} CTD T&S data for {ymd}") - TOKEN = os.environ["ONC_USER_TOKEN"] onc_data = data_tools.get_onc_data( "scalardata", - "getByStation", - TOKEN, - station=parsed_args.onc_station, - deviceCategory="CTD", - sensors="salinity,temperature", + "getByLocation", + os.environ["ONC_USER_TOKEN"], + locationCode=parsed_args.onc_station, + deviceCategoryCode="CTD", + sensorCategoryCodes="salinity,temperature", dateFrom=data_tools.onc_datetime(f"{ymd} 00:00", "utc"), + dateTo=data_tools.onc_datetime(f"{ymd} 23:59", "utc"), ) try: ctd_data = data_tools.onc_json_to_dataset(onc_data) From 39a130f2979641828518e1eabc330f8727a548d5 Mon Sep 17 00:00:00 2001 From: Doug Latornell Date: Thu, 8 Feb 2024 11:06:19 -0800 Subject: [PATCH 05/13] Remove test skipping due to resolved issue #174 The pytest skip marker for "_resample_nav_coord()" function in "tests/workers/test_get_onc_ferry.py" file has been removed. This has happened following the successful resolution of issue number #174, making it unnecessary to skip these specific unit tests anymore. Fixes issue #174 --- tests/workers/test_get_onc_ferry.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/workers/test_get_onc_ferry.py b/tests/workers/test_get_onc_ferry.py index ad2ef7c4..7aa494a3 100644 --- a/tests/workers/test_get_onc_ferry.py +++ b/tests/workers/test_get_onc_ferry.py @@ -156,8 +156,6 @@ class TestCalcLocationArrays: class TestResampleNavCoord: """Unit test for _resample_nav_coord() function.""" - # TODO: remove this skip when issue #174 is resolved - @pytest.mark.skip(reason="fails on GHA with pandas=2.0.0; see issue #174") def test_resample_nav_coord(self, ferry_platform): nav_data = xarray.Dataset( data_vars={ From 4f51798d45adb5608ea06f2f5ac821583ccde03c Mon Sep 17 00:00:00 2001 From: Doug Latornell Date: Thu, 8 Feb 2024 11:24:53 -0800 Subject: [PATCH 06/13] Add unit tests for YAML config file elements Add unit tests for production YAML config file elements related to worker; re: issue #117. --- tests/workers/test_get_onc_ferry.py | 84 ++++++++++++++++++++++++++++- 1 file changed, 82 insertions(+), 2 deletions(-) diff --git a/tests/workers/test_get_onc_ferry.py b/tests/workers/test_get_onc_ferry.py index 7aa494a3..b5f8ffdc 100644 --- a/tests/workers/test_get_onc_ferry.py +++ b/tests/workers/test_get_onc_ferry.py @@ -86,10 +86,90 @@ def test_message_registry_keys(self, prod_config): "crash", ] + def test_TWDP_ferry_platform(self, prod_config): + ferry_config = prod_config["observations"]["ferry data"]["ferries"]["TWDP"] + assert ferry_config["route name"] == "Tsawwassen - Duke Point" + expected = "Mobile Platforms, British Columbia Ferries, Tsawwassen - Duke Point" + assert ferry_config["ONC station description"] == expected + + def test_TWDP_location(self, prod_config): + location_config = prod_config["observations"]["ferry data"]["ferries"]["TWDP"][ + "location" + ] + assert location_config["stations"] == ["TWDP.N1", "TWDP.N2"] + assert location_config["device category"] == "NAV" + assert location_config["sensors"] == ["longitude", "latitude"] + assert location_config["terminals"] == ["Tsawwassen", "Duke Pt."] + + def test_TWDP_devices(self, prod_config): + devices_config = prod_config["observations"]["ferry data"]["ferries"]["TWDP"][ + "devices" + ] + expected = { + "TSG": { + "sensors": { + "temperature": "temperature", + "conductivity": "conductivity", + "salinity": "salinity", + }, + }, + "OXYSENSOR": { + "sensors": { + "o2_saturation": "oxygen_saturation", + "o2_concentration_corrected": "oxygen_corrected", + "o2_temperature": "temperature", + }, + }, + "TURBCHLFL": { + "sensors": { + "cdom_fluorescence": "cdom_fluorescence", + "chlorophyll": "chlorophyll", + "turbidity": "turbidity", + }, + }, + "CO2SENSOR": { + "sensors": { + "co2_partial_pressure": "partial_pressure", + "co2_concentration_linearized": "co2", + }, + }, + "TEMPHUMID": { + "sensors": { + "air_temperature": "air_temperature", + "relative_humidity": "rel_humidity", + }, + }, + "BARPRESS": { + "sensors": { + "barometric_pressure": "barometric_pressure", + }, + }, + "PYRANOMETER": { + "sensors": { + "solar_radiation": "solar_radiation", + }, + }, + "PYRGEOMETER": { + "sensors": { + "longwave_radiation": "downward_radiation", + }, + }, + } + assert devices_config == expected + def test_lon_lat_ji_map_path(self, prod_config): nemo_ji_map = prod_config["observations"]["lon/lat to NEMO ji map"] assert nemo_ji_map == "/SalishSeaCast/grid/grid_from_lat_lon_mask999.nc" + def test_TWDP_file_path_template(self, prod_config): + file_path_tmpl = prod_config["observations"]["ferry data"]["ferries"]["TWDP"][ + "filepath template" + ] + assert ( + file_path_tmpl + == "{ferry_platform}/{ferry_platform}_TSG_O2_TURBCHLFL_CO2_METEO_1m_{yyyymmdd}.nc" + ) + def test_dest_dir(self, prod_config): ferry_data_config = prod_config["observations"]["ferry data"] assert ferry_data_config["dest dir"] == "/results/observations/ONC/ferries/" @@ -103,7 +183,7 @@ def test_success(self, ferry_platform, caplog): parsed_args = SimpleNamespace( ferry_platform=ferry_platform, data_date=arrow.get("2016-09-09") ) - caplog.set_level(logging.INFO) + caplog.set_level(logging.DEBUG) msg_type = get_onc_ferry.success(parsed_args) @@ -121,7 +201,7 @@ def test_failure(self, ferry_platform, caplog): parsed_args = SimpleNamespace( ferry_platform=ferry_platform, data_date=arrow.get("2016-09-09") ) - caplog.set_level(logging.CRITICAL) + caplog.set_level(logging.DEBUG) msg_type = get_onc_ferry.failure(parsed_args) From 91b227ae8db00fa96bde2a731a91dec91dc36a75 Mon Sep 17 00:00:00 2001 From: Doug Latornell Date: Thu, 8 Feb 2024 11:53:17 -0800 Subject: [PATCH 07/13] Update YAML config with Tsawwassen - Duke Point ferry route The 'nowcast.yaml' configuration file has been updated to include the Tsawwassen - Duke Point ferry route details including devices, sensors, and other relevant data. --- config/nowcast.yaml | 61 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 59 insertions(+), 2 deletions(-) diff --git a/config/nowcast.yaml b/config/nowcast.yaml index e6b7ffad..401437de 100644 --- a/config/nowcast.yaml +++ b/config/nowcast.yaml @@ -263,8 +263,65 @@ observations: # ONC Strait of Georgia ferry platforms real-time data ferry data: - ferries: {} - # see PR#109 for TWDP ferry example of this section and corresponding config tests + ferries: + # Tsawwassen - Duke Point route + TWDP: + route name: Tsawwassen - Duke Point + ONC station description: Mobile Platforms, British Columbia Ferries, Tsawwassen - Duke Point + location: + # ONC scalardata.getByStation API query parameters: + # + # ONC station identifiers to use for nav data, in priority order + stations: + - TWDP.N1 + - TWDP.N2 + device category: NAV + sensors: + - longitude + - latitude + terminals: + # Terminal names from salishsea_tools.places.PLACES + - Tsawwassen + - Duke Pt. + devices: + # ONC device category + TSG: + # device sensor names + sensors: + # ERDDAP sensor name: ONC sensor name + temperature: temperature + conductivity: conductivity + salinity: salinity + OXYSENSOR: + sensors: + o2_saturation: oxygen_saturation + o2_concentration_corrected: oxygen_corrected + o2_temperature: temperature + TURBCHLFL: + sensors: + cdom_fluorescence: cdom_fluorescence + chlorophyll: chlorophyll + turbidity: turbidity + CO2SENSOR: + sensors: + co2_partial_pressure: partial_pressure + co2_concentration_linearized: co2 + TEMPHUMID: + sensors: + air_temperature: air_temperature + relative_humidity: rel_humidity + BARPRESS: + sensors: + barometric_pressure: barometric_pressure + PYRANOMETER: + sensors: + solar_radiation: solar_radiation + PYRGEOMETER: + sensors: + longwave_radiation: downward_radiation + # Template for ONC ferry data file path + # **Must be quoted to project {} characters** + filepath template: "{ferry_platform}/{ferry_platform}_TSG_O2_TURBCHLFL_CO2_METEO_1m_{yyyymmdd}.nc" # Destination directory for ONC ferry data netCDF files dest dir: /results/observations/ONC/ferries/ From 89659ef8312ce9c2828231ce8c65e341768d2485 Mon Sep 17 00:00:00 2001 From: Doug Latornell Date: Thu, 8 Feb 2024 11:54:39 -0800 Subject: [PATCH 08/13] Update dataset ID for TWDP-ferry The dataset ID for TWDP-ferry was updated in the 'nowcast.yaml' configuration file and in respective test. The change is reflected accurately to match the new details for the Tsawwassen - Duke Point ferry route. --- config/nowcast.yaml | 2 +- tests/workers/test_ping_erddap.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/config/nowcast.yaml b/config/nowcast.yaml index 401437de..6f94ee6c 100644 --- a/config/nowcast.yaml +++ b/config/nowcast.yaml @@ -486,7 +486,7 @@ erddap: # out of service since 22-Dec-2019; repair ETA unknown # - ubcONCUSDDLCTD15mV1 TWDP-ferry: - - ubcONCTWDP1mV1 + - ubcONCTWDP1mV18-01 nowcast-green: - ubcSSg3DBiologyFields1hV21-11 - ubcSSg3DLightFields1hV21-11 diff --git a/tests/workers/test_ping_erddap.py b/tests/workers/test_ping_erddap.py index e8a79e7b..3def163e 100644 --- a/tests/workers/test_ping_erddap.py +++ b/tests/workers/test_ping_erddap.py @@ -153,7 +153,7 @@ def test_erddap_section(self, prod_config): assert erddap["datasetIDs"]["SEVIP-CTD"] == ["ubcONCSEVIPCTD15mV1"] # USDDL-CTD went out of service since 22-Dec-2019; repair ETA unknown # assert erddap["datasetIDs"]["USDDL-CTD"] == ["ubcONCUSDDLCTD15mV1"] - assert erddap["datasetIDs"]["TWDP-ferry"] == ["ubcONCTWDP1mV1"] + assert erddap["datasetIDs"]["TWDP-ferry"] == ["ubcONCTWDP1mV18-01"] assert erddap["datasetIDs"]["nowcast-green"] == [ "ubcSSg3DBiologyFields1hV21-11", "ubcSSg3DLightFields1hV21-11", From b70601cc3085c54c9949d8cb4fa82becc5f45130 Mon Sep 17 00:00:00 2001 From: Doug Latornell Date: Thu, 8 Feb 2024 15:30:56 -0800 Subject: [PATCH 09/13] Update get_onc_ferry worker to use ONC API v3 Modified the get_onc_ferry worker to use ONC API v3 'getByLocation' method. The new method uses a location code instead of a station name. Also, these changes include adding a 'dateTo' parameter to limit the data to a specific time range for the day. Server-side averaging into 1-second bins is used to ensure that an entire day's observations from each sensor can be obtained by a single API request; there is a limit of 100,000 "rows" per sensor. Additionally, small changes to variable and parameter names were made to match the new method requirements. --- nowcast/workers/get_onc_ferry.py | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/nowcast/workers/get_onc_ferry.py b/nowcast/workers/get_onc_ferry.py index 9c638392..b161eec2 100644 --- a/nowcast/workers/get_onc_ferry.py +++ b/nowcast/workers/get_onc_ferry.py @@ -15,9 +15,9 @@ """SalishSeaCast worker that downloads data for a specified UTC day from an ONC BC Ferries measurement platform. -The data are filtered to include only values for which qaqcFlag == 1 +The data are filtered to include only values for which qaqcFlag <= 1 or qaqcFlag >= 7 (meaning that all of ONC's automated QA/QC tests were passed). -After filtering the data are aggregated into 1 minute bins. +After filtering, the data are aggregated into 1-minute bins. The aggregation functions are mean, standard deviation, and sample count. The data are stored as a netCDF-4/HDF5 file that is accessible via @@ -138,6 +138,7 @@ def get_onc_ferry(parsed_args, config, *args): os.fspath(nc_filepath), encoding=encoding, unlimited_dims=("time",) ) checklist = {ferry_platform: os.fspath(nc_filepath)} + return checklist def _get_nav_data(ferry_platform, ymd, location_config): @@ -148,12 +149,15 @@ def _get_nav_data(ferry_platform, ymd, location_config): try: onc_data = data_tools.get_onc_data( "scalardata", - "getByStation", + "getByLocation", os.environ["ONC_USER_TOKEN"], - station=station, - deviceCategory=device_category, - sensors=sensors, + locationCode=station, + deviceCategoryCode=device_category, + sensorCategoryCodes=sensors, dateFrom=(data_tools.onc_datetime(f"{ymd} 00:00", "utc")), + dateTo=data_tools.onc_datetime(f"{ymd} 23:59", "utc"), + resampleType="avg", + resamplePeriod=1, ) except requests.HTTPError as e: msg = ( @@ -260,12 +264,15 @@ def _get_water_data(ferry_platform, device_category, ymd, devices_config): try: onc_data = data_tools.get_onc_data( "scalardata", - "getByStation", + "getByLocation", os.environ["ONC_USER_TOKEN"], - station=ferry_platform, - deviceCategory=device_category, - sensors=sensors, - dateFrom=(data_tools.onc_datetime(f"{ymd} 00:00", "utc")), + locationCode=ferry_platform, + deviceCategoryCode=device_category, + sensorCategoryCodes=sensors, + dateFrom=data_tools.onc_datetime(f"{ymd} 00:00", "utc"), + dateTo=data_tools.onc_datetime(f"{ymd} 23:59", "utc"), + resampleType="avg", + resamplePeriod=1, ) except requests.HTTPError as e: if e.response.status_code == 504: From c865c19b4f64576b3cd3103b707e5a084d4e40dd Mon Sep 17 00:00:00 2001 From: Doug Latornell Date: Mon, 26 Feb 2024 15:49:29 -0800 Subject: [PATCH 10/13] Update ONC_data_product_url attr in ferry datasets Updated the ONC_data_product_url attribute in the datasets produced by the get_onc_ferry worker to reflect changes in the data source's domain name and API query parameter names. Code changes include updating the ONC data API domain name, adding the 'locationCode' query parameter, and changing the 'deviceCategory' query parameter name to 'deviceCategoryCode'. All of those changes are for compatibility with the ONC data API v3. --- nowcast/workers/get_onc_ferry.py | 34 +++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/nowcast/workers/get_onc_ferry.py b/nowcast/workers/get_onc_ferry.py index b161eec2..153c293f 100644 --- a/nowcast/workers/get_onc_ferry.py +++ b/nowcast/workers/get_onc_ferry.py @@ -445,28 +445,40 @@ def _create_dataarray(var, array, ferry_platform, location_config): "ioos category": "location", "standard name": "longitude", "long name": "Longitude", - "ONC_data_product_url": f"http://dmas.uvic.ca/DataSearch?deviceCategory={location_config['device category']}", + "ONC_data_product_url": ( + f"http://data.oceannetworks.ca/DataSearch?&locationCode={ferry_platform}" + f"&deviceCategoryCode={location_config['device category']}" + ), }, "latitude": { "name": "latitude", "ioos category": "location", "standard name": "latitude", "long name": "Latitude", - "ONC_data_product_url": f"http://dmas.uvic.ca/DataSearch?deviceCategory={location_config['device category']}", + "ONC_data_product_url": ( + f"http://data.oceannetworks.ca/DataSearch?locationCode={ferry_platform}" + f"&deviceCategoryCode={location_config['device category']}" + ), }, "nemo_grid_j": { "name": "nemo_grid_j", "ioos category": "location", "standard name": "nemo_grid_j", "long name": "NEMO grid j index", - "ONC_data_product_url": f"http://dmas.uvic.ca/DataSearch?deviceCategory={location_config['device category']}", + "ONC_data_product_url": ( + f"http://data.oceannetworks.ca/DataSearch?locationCode={ferry_platform}" + f"&deviceCategoryCode={location_config['device category']}" + ), }, "nemo_grid_i": { "name": "nemo_grid_i", "ioos category": "location", "standard name": "nemo_grid_i", "long name": "NEMO grid i index", - "ONC_data_product_url": f"http://dmas.uvic.ca/DataSearch?deviceCategory={location_config['device category']}", + "ONC_data_product_url": ( + f"http://data.oceannetworks.ca/DataSearch?locationCode={ferry_platform}" + f"&deviceCategoryCode={location_config['device category']}" + ), }, "on_crossing_mask": { "name": "on_crossing_mask", @@ -475,7 +487,9 @@ def _create_dataarray(var, array, ferry_platform, location_config): "long name": "On Crossing", "flag_values": "0, 1", "flag_meanings": "in berth, on crossing", - "ONC_data_product_url": f"http://dmas.uvic.ca/DataSearch?location={ferry_platform}", + "ONC_data_product_url": ( + f"http://data.oceannetworks.ca/DataSearch?locationCode={ferry_platform}" + ), }, "crossing_number": { "name": "crossing_number", @@ -493,7 +507,9 @@ def _create_dataarray(var, array, ferry_platform, location_config): "crossing_number==n observation from the previous day, " "where n is max(crossing_number). " "The number of crossings per day varies throughout the year.", - "ONC_data_product_url": f"http://dmas.uvic.ca/DataSearch?location={ferry_platform}", + "ONC_data_product_url": ( + f"http://data.oceannetworks.ca/DataSearch?locationCode={ferry_platform}" + ), }, "temperature": { "name": "temperature", @@ -814,11 +830,11 @@ def _create_dataarray(var, array, ferry_platform, location_config): dataset_array.attrs["ONC_stationCode"] = array.attrs["station"] dataset_array.attrs[ "ONC_data_product_url" - ] += f'&location={array.attrs["station"]}' + ] += f'&locationCode={array.attrs["station"]}' except KeyError: dataset_array.attrs["ONC_data_product_url"] = ( - f"http://dmas.uvic.ca/DataSearch?location={ferry_platform}" - f"&deviceCategory={array.device_category}" + f"http://data.oceannetworks.ca/DataSearch?locationCode={ferry_platform}" + f"&deviceCategoryCode={array.device_category}" ) return dataset_array From 3807386e65d62fa8689cc36a1f4d63328bcef82a Mon Sep 17 00:00:00 2001 From: Doug Latornell Date: Thu, 29 Feb 2024 15:29:31 -0800 Subject: [PATCH 11/13] Correct representation of relative humidity attribute Changed the naming representation of the relative humidity attribute in the 'get_onc_ferry' worker. The attribute was initially named "REL_HUMIDITY" and it was renamed to "rel_humidity", matching the common low-caps format of the other attributes for consistency of naming convention. --- nowcast/workers/get_onc_ferry.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nowcast/workers/get_onc_ferry.py b/nowcast/workers/get_onc_ferry.py index 153c293f..0be195e5 100644 --- a/nowcast/workers/get_onc_ferry.py +++ b/nowcast/workers/get_onc_ferry.py @@ -312,7 +312,7 @@ def _empty_device_data(ferry_platform, device_category, ymd, sensors): "partial_pressure": "pCO2 uatm", "co2": "umol/mol", "air_temperature": "C", - "REL_HUMIDITY": "%", + "rel_humidity": "%", "barometric_pressure": "hPa", "solar_radiation": "W/m^2", "downward_radiation": "W/m^2", From cfaed7ce9e7e1fe024527bfbfea458779a00827a Mon Sep 17 00:00:00 2001 From: Doug Latornell Date: Thu, 29 Feb 2024 15:38:26 -0800 Subject: [PATCH 12/13] Update QA/QC filter criteria in get_onc_ferry worker Adjust the QA/QC filter settings in the 'get_onc_ferry' worker to now include data where the 'qaqcFlag' attribute is less than or equal to 1 or greater than or equal to 7. This update is necessary because of the change to server-side averaging in the request. --- nowcast/workers/get_onc_ferry.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/nowcast/workers/get_onc_ferry.py b/nowcast/workers/get_onc_ferry.py index 0be195e5..305ae717 100644 --- a/nowcast/workers/get_onc_ferry.py +++ b/nowcast/workers/get_onc_ferry.py @@ -339,11 +339,14 @@ def _qaqc_filter(ferry_platform, device, device_data, ymd, devices_config): for sensor, onc_sensor in devices_config[device]["sensors"].items(): logger.debug( f"filtering ONC {ferry_platform} {device} {onc_sensor} data " - f"for {ymd} to exlude qaqcFlag!=1" + f"for {ymd} to exclude 1= 7 + ) try: cf_units = cf_units_mapping[onc_data.unitOfMeasure] except KeyError: @@ -386,10 +389,10 @@ def count(values, axis): data_array = array.resample(time="1Min").mean() except IndexError: # array is empty, meaning there are no observations with - # qaqcFlag!=1, so substitute a DataArray full of NaNs + # qaqcFlag<=1 or qaqcFlac>=7, so substitute a DataArray full of NaNs logger.warning( f"ONC {ferry_platform} {array.device_category} " - f"{array.name} data for {ymd} contains no qaqcFlag==1 " + f"{array.name} data for {ymd} contains no qaqcFlag<=1 or qaqcFlac>=7 " f"values; substituting NaNs" ) nan_values = numpy.empty_like(data_vars["longitude"].values) @@ -425,7 +428,7 @@ def count(values, axis): coords={"time": data_arrays.longitude.time.values}, attrs={ "history": f"""{now} Download raw data from ONC scalardata API. -{now} Filter to exclude data with qaqcFlag != 1. +{now} Filter to exclude data with 1 Date: Thu, 29 Feb 2024 15:42:52 -0800 Subject: [PATCH 13/13] Improve empty data array handling ONC API v3 has more/different ways of returning empty responses to data requests that we have to handle. It's not as clean as I would like, but it is good enough for the purpose. --- nowcast/workers/get_onc_ferry.py | 51 +++++++++++++++++--------- tests/workers/test_get_onc_ferry.py | 55 +++++++++++++++++++++++++---- 2 files changed, 84 insertions(+), 22 deletions(-) diff --git a/nowcast/workers/get_onc_ferry.py b/nowcast/workers/get_onc_ferry.py index 305ae717..0cca9996 100644 --- a/nowcast/workers/get_onc_ferry.py +++ b/nowcast/workers/get_onc_ferry.py @@ -293,11 +293,13 @@ def _get_water_data(ferry_platform, device_category, ymd, devices_config): return device_data -def _empty_device_data(ferry_platform, device_category, ymd, sensors): +def _empty_device_data( + ferry_platform, device_category, ymd, sensors, time_coord="sampleTime" +): # Response from ONC contains no sensor data, so return an # empty DataArray logger.warning( - f"No ONC {ferry_platform} {device_category} data for {ymd}; " + f"No ONC {ferry_platform} {device_category} {sensors} data for {ymd}; " f"substituting empty dataset" ) onc_units = { @@ -321,15 +323,21 @@ def _empty_device_data(ferry_platform, device_category, ymd, sensors): sensor: xarray.DataArray( name=sensor, data=numpy.array([], dtype=float), - coords={"sampleTime": numpy.array([], dtype="datetime64[ns]")}, - dims="sampleTime", + coords={time_coord: numpy.array([], dtype="datetime64[ns]")}, + dims=time_coord, attrs={ + "device_category": device_category, "qaqcFlag": numpy.array([], dtype=numpy.int64), "unitOfMeasure": onc_units[sensor], + "units": "degrees_Celcius" + if sensor in {"temperature", "air_temperature"} + else onc_units[sensor], }, ) for sensor in sensors.split(",") } + if len(data_arrays) == 1: + return data_arrays[sensors] return xarray.Dataset(data_arrays) @@ -341,9 +349,14 @@ def _qaqc_filter(ferry_platform, device, device_data, ymd, devices_config): f"filtering ONC {ferry_platform} {device} {onc_sensor} data " f"for {ymd} to exclude 1= 7 ) @@ -351,17 +364,19 @@ def _qaqc_filter(ferry_platform, device, device_data, ymd, devices_config): cf_units = cf_units_mapping[onc_data.unitOfMeasure] except KeyError: cf_units = onc_data.unitOfMeasure - sensor_data_arrays.append( - xarray.DataArray( + if not sensor_qaqc_mask.any(): + data_array = _empty_device_data( + ferry_platform, device, ymd, onc_sensor, time_coord="time" + ) + else: + data_array = xarray.DataArray( name=sensor, - data=onc_data[not_nan_mask][sensor_qaqc_mask].values, - coords={ - "time": onc_data.sampleTime[not_nan_mask][sensor_qaqc_mask].values - }, + data=onc_data[sensor_qaqc_mask].values, + coords={"time": onc_data.sampleTime[sensor_qaqc_mask].values}, dims="time", attrs={"device_category": device, "units": cf_units}, ) - ) + sensor_data_arrays.append(data_array) return sensor_data_arrays @@ -387,7 +402,7 @@ def count(values, axis): else: try: data_array = array.resample(time="1Min").mean() - except IndexError: + except (IndexError, ValueError): # array is empty, meaning there are no observations with # qaqcFlag<=1 or qaqcFlac>=7, so substitute a DataArray full of NaNs logger.warning( @@ -418,7 +433,11 @@ def count(values, axis): sample_count_var = f"{var}_sample_count" sample_count_array = array.resample(time="1Min").count() sample_count_array.attrs = array.attrs - del sample_count_array.attrs["units"] + try: + del sample_count_array.attrs["units"] + except KeyError: + # empty data arrays lack units attributes + pass data_vars[sample_count_var] = _create_dataarray( sample_count_var, sample_count_array, ferry_platform, location_config ) diff --git a/tests/workers/test_get_onc_ferry.py b/tests/workers/test_get_onc_ferry.py index b5f8ffdc..32f1a3f6 100644 --- a/tests/workers/test_get_onc_ferry.py +++ b/tests/workers/test_get_onc_ferry.py @@ -291,16 +291,30 @@ class TestGetWaterData: pass -@pytest.mark.parametrize( - "ferry_platform, device, sensors", - [("TWDP", "TSG", "temperature,conductivity,salinity")], -) class TestEmptyDeviceData: """Unit tests for _empty_device_data() function.""" - def test_empty_device_data(self, ferry_platform, device, sensors, caplog): + def test_msg(self, caplog): + caplog.set_level(logging.DEBUG) + + get_onc_ferry._empty_device_data( + "TWDP", "TSG", "2024-02-08", "temperature,conductivity,salinity" + ) + + expected = ( + f"No ONC TWDP TSG temperature,conductivity,salinity data for 2024-02-08; " + f"substituting empty dataset" + ) + assert caplog.records[0].levelname == "WARNING" + assert caplog.messages[0] == expected + + @pytest.mark.parametrize( + "ferry_platform, device_category, sensors", + [("TWDP", "TSG", "temperature,conductivity,salinity")], + ) + def test_empty_device_data(self, ferry_platform, device_category, sensors, caplog): dataset = get_onc_ferry._empty_device_data( - ferry_platform, device, "2017-12-01", sensors + ferry_platform, device_category, "2017-12-01", sensors ) for sensor in sensors.split(","): assert sensor in dataset.data_vars @@ -311,6 +325,35 @@ def test_empty_device_data(self, ferry_platform, device, sensors, caplog): assert dataset.sampleTime.dtype == "datetime64[ns]" assert "sampleTime" in dataset.dims + @pytest.mark.parametrize( + "ferry_platform, device_category, sensors, uom, units", + [ + ("TWDP", "TSG", "temperature", "C", "degrees_Celcius"), + ("TWDP", "TSG", "conductivity", "S/m", "S/m"), + ("TWDP", "TSG", "salinity", "g/kg", "g/kg"), + ("TWDP", "OXYSENSOR", "oxygen_saturation", "percent", "percent"), + ("TWDP", "OXYSENSOR", "oxygen_corrected", "ml/l", "ml/l"), + ("TWDP", "OXYSENSOR", "temperature", "C", "degrees_Celcius"), + ("TWDP", "TURBCHLFL", "cdom_fluorescence", "ppb", "ppb"), + ("TWDP", "TURBCHLFL", "chlorophyll", "ug/l", "ug/l"), + ("TWDP", "TURBCHLFL", "turbidity", "NTU", "NTU"), + ("TWDP", "CO2SENSOR", "partial_pressure", "pCO2 uatm", "pCO2 uatm"), + ("TWDP", "CO2SENSOR", "co2", "umol/mol", "umol/mol"), + ("TWDP", "TEMPHUMID", "air_temperature", "C", "degrees_Celcius"), + ("TWDP", "TEMPHUMID", "rel_humidity", "%", "%"), + ("TWDP", "BARPRESS", "barometric_pressure", "hPa", "hPa"), + ("TWDP", "PYRANOMETER", "solar_radiation", "W/m^2", "W/m^2"), + ("TWDP", "PYRGEOMETER", "downward_radiation", "W/m^2", "W/m^2"), + ], + ) + def test_attrs(self, ferry_platform, device_category, sensors, uom, units, caplog): + dataset = get_onc_ferry._empty_device_data( + ferry_platform, device_category, "2024-02-08", sensors + ) + assert dataset.attrs["device_category"] == device_category + assert dataset.attrs["unitOfMeasure"] == uom + assert dataset.attrs["units"] == units + @pytest.mark.parametrize("ferry_platform", ["TWDP"]) class TestQaqcFilter: