From 81e4d2526e583c92307fbfbfd12af29b402b66a0 Mon Sep 17 00:00:00 2001 From: Doug Latornell Date: Fri, 14 Jun 2024 15:12:55 -0700 Subject: [PATCH] Fix various warnings revealed by pytest (#273) * Update time frequency in test_get_onc_ferry test Change to lowercase "s" for seconds frequency in the sample time pandas date range generation in test_get_onc_ferry unit test. This change resolves: FutureWarning: 'S' is deprecated and will be removed in a future version, please use 's' instead. * Update delimiter in daily_river_flows CSV read The delimiter for the pandas.read_csv() function has been changed from `delim_whitespace=True` to `sep="\s+"`. This change resolves: FutureWarning: The 'delim_whitespace' keyword in pd.read_csv is deprecated and will be removed in a future version. Use ``sep='\s+'`` instead. * Update dimension assertions in several tests Changed the assertions to use 'sizes' instead of 'dims'. This change resolves: FutureWarning: The return type of `Dataset.dims` will be changed to return a set of dimension names in future, in order to be more consistent with `DataArray.dims`. To access a mapping from dimension names to lengths, please use `Dataset.sizes`. * Add assertions for expected CSV read ParserWarning Updated the test_daily_river_flows.TestReadRiverCSV.test_one_long_line() test to assert the expected ParserWarning. This warning is raised when there are differences in the lengths of the lines being parsed in the CSV data. Now, the test checks the warning's content to confirm its origin from mismatched length. * Update pandas Series value assignment re: pandas 3 Modified how Theodosia "Secondary River Flow" data is filled in case of null values in the 'daily_river_flows' and 'make_v202111_runoff_file' workers. Instead of using inplace=True method, a more explicit assignment operation has been used to ensure compatibility with pandas 3.0. This change resolves: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method. The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy. For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object. --- nowcast/daily_river_flows.py | 6 +++--- nowcast/workers/make_v202111_runoff_file.py | 4 ++-- tests/test_daily_river_flows.py | 14 +++++++++----- tests/workers/test_get_onc_ferry.py | 2 +- tests/workers/test_make_v202111_runoff_file.py | 8 ++++---- 5 files changed, 19 insertions(+), 15 deletions(-) diff --git a/nowcast/daily_river_flows.py b/nowcast/daily_river_flows.py index e08e4390..86f264b6 100644 --- a/nowcast/daily_river_flows.py +++ b/nowcast/daily_river_flows.py @@ -121,7 +121,7 @@ def _parse_long_csv_line(line): # .csv files pd.read_csv, header=None, - delim_whitespace=True, + sep="\\s+", index_col=False, names=["year", "month", "day", "flow"], engine="python", @@ -193,8 +193,8 @@ def _read_river_Theodosia(config): # Used for dates before Scotty part was gauged, or in the event of missing obs parts[2]["FlowFromDiversion"] = parts[2].Diversion * theodosia_from_diversion_only theodosia = theodosia.merge(parts[2], how="outer", on="date", sort=True) - theodosia["Secondary River Flow"].fillna( - theodosia["FlowFromDiversion"], inplace=True + theodosia["Secondary River Flow"] = theodosia["Secondary River Flow"].fillna( + theodosia["FlowFromDiversion"] ) theodosia.drop( diff --git a/nowcast/workers/make_v202111_runoff_file.py b/nowcast/workers/make_v202111_runoff_file.py index d09c0ca1..e7746620 100644 --- a/nowcast/workers/make_v202111_runoff_file.py +++ b/nowcast/workers/make_v202111_runoff_file.py @@ -365,8 +365,8 @@ def _read_river_Theodosia(config): # Used for dates before Scotty part was gauged, or in the event of missing obs parts[2]["FlowFromDiversion"] = parts[2].Diversion * theodosia_from_diversion_only theodosia = theodosia.merge(parts[2], how="outer", on="date", sort=True) - theodosia["Secondary River Flow"].fillna( - theodosia["FlowFromDiversion"], inplace=True + theodosia["Secondary River Flow"] = theodosia["Secondary River Flow"].fillna( + theodosia["FlowFromDiversion"] ) theodosia.drop( diff --git a/tests/test_daily_river_flows.py b/tests/test_daily_river_flows.py index 7611c870..e3b49acc 100644 --- a/tests/test_daily_river_flows.py +++ b/tests/test_daily_river_flows.py @@ -108,7 +108,9 @@ def test_one_long_line(self): """ ) - river_flow = daily_river_flows._read_river_csv(io.StringIO(csv_lines)) + with pytest.warns(pandas.errors.ParserWarning) as warning_record: + # We expect a ParserWarning due to the difference in length of the lines we're parsing + river_flow = daily_river_flows._read_river_csv(io.StringIO(csv_lines)) expected = pandas.DataFrame( { @@ -119,6 +121,8 @@ def test_one_long_line(self): } ) pandas.testing.assert_frame_equal(river_flow, expected) + expected = "Length of header or names does not match length of data. This leads to a loss of data with index_col=False." + assert str(warning_record[0].message) == expected class TestSetDateAsIndex: @@ -1445,10 +1449,10 @@ def test_dims(self, runoff_array, config): obs_date, runoff_array, config ) - assert len(runoff_ds.dims) == 3 - assert runoff_ds.dims["time_counter"] == 1 - assert runoff_ds.dims["y"] == runoff_array.shape[0] - assert runoff_ds.dims["x"] == runoff_array.shape[1] + assert len(runoff_ds.sizes) == 3 + assert runoff_ds.sizes["time_counter"] == 1 + assert runoff_ds.sizes["y"] == runoff_array.shape[0] + assert runoff_ds.sizes["x"] == runoff_array.shape[1] def test_dataset_attrs(self, runoff_array, config, monkeypatch): def mock_now(tz): diff --git a/tests/workers/test_get_onc_ferry.py b/tests/workers/test_get_onc_ferry.py index 32f1a3f6..72ca2bcb 100644 --- a/tests/workers/test_get_onc_ferry.py +++ b/tests/workers/test_get_onc_ferry.py @@ -246,7 +246,7 @@ def test_resample_nav_coord(self, ferry_platform): }, coords={ "sampleTime": pandas.date_range( - start="2021-03-08T10:14:43.082000000", periods=59, freq="1S" + start="2021-03-08T10:14:43.082000000", periods=59, freq="1s" ) }, attrs={"station": "TWDP.N1"}, diff --git a/tests/workers/test_make_v202111_runoff_file.py b/tests/workers/test_make_v202111_runoff_file.py index e0b2c005..7f2a85fd 100644 --- a/tests/workers/test_make_v202111_runoff_file.py +++ b/tests/workers/test_make_v202111_runoff_file.py @@ -1873,10 +1873,10 @@ def test_dims(self, runoff_array, config): obs_date, runoff_array, config ) - assert len(runoff_ds.dims) == 3 - assert runoff_ds.dims["time_counter"] == 1 - assert runoff_ds.dims["y"] == runoff_array.shape[0] - assert runoff_ds.dims["x"] == runoff_array.shape[1] + assert len(runoff_ds.sizes) == 3 + assert runoff_ds.sizes["time_counter"] == 1 + assert runoff_ds.sizes["y"] == runoff_array.shape[0] + assert runoff_ds.sizes["x"] == runoff_array.shape[1] def test_dataset_attrs(self, runoff_array, config, monkeypatch): def mock_now(tz):