From 81e4d2526e583c92307fbfbfd12af29b402b66a0 Mon Sep 17 00:00:00 2001
From: Doug Latornell <dlatornell@eoas.ubc.ca>
Date: Fri, 14 Jun 2024 15:12:55 -0700
Subject: [PATCH] Fix various warnings revealed by pytest (#273)

* Update time frequency in test_get_onc_ferry test

Change to lowercase "s" for seconds frequency in the sample time pandas
date range generation in test_get_onc_ferry unit test. This change resolves:

    FutureWarning: 'S' is deprecated and will be removed in a future version,
    please use 's' instead.

* Update delimiter in daily_river_flows CSV read

The delimiter for the pandas.read_csv() function has been changed from
`delim_whitespace=True` to `sep="\s+"`. This change resolves:

    FutureWarning: The 'delim_whitespace' keyword in pd.read_csv is deprecated
    and will be removed in a future version. Use ``sep='\s+'`` instead.

* Update dimension assertions in several tests

Changed the assertions to use 'sizes' instead of 'dims'. This change resolves:

    FutureWarning: The return type of `Dataset.dims` will be changed to return
    a set of dimension names in future, in order to be more consistent with
    `DataArray.dims`. To access a mapping from dimension names to lengths,
    please use `Dataset.sizes`.

* Add assertions for expected CSV read ParserWarning

Updated the test_daily_river_flows.TestReadRiverCSV.test_one_long_line() test
to assert the expected ParserWarning. This warning is raised when there are
differences in the lengths of the lines being parsed in the CSV data. Now, the
test checks the warning's content to confirm its origin from mismatched length.

* Update pandas Series value assignment re: pandas 3

Modified how Theodosia "Secondary River Flow" data is filled in case of null
values in the 'daily_river_flows' and 'make_v202111_runoff_file' workers.
Instead of using inplace=True method, a more explicit assignment operation has
been used to ensure compatibility with pandas 3.0. This change resolves:

    FutureWarning: A value is trying to be set on a copy of a DataFrame or
    Series through chained assignment using an inplace method. The behavior
    will change in pandas 3.0. This inplace method will never work because the
    intermediate object on which we are setting values always behaves as a copy.

     For example, when doing 'df[col].method(value, inplace=True)', try using
     'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value)
     instead, to perform the operation inplace on the original object.
---
 nowcast/daily_river_flows.py                   |  6 +++---
 nowcast/workers/make_v202111_runoff_file.py    |  4 ++--
 tests/test_daily_river_flows.py                | 14 +++++++++-----
 tests/workers/test_get_onc_ferry.py            |  2 +-
 tests/workers/test_make_v202111_runoff_file.py |  8 ++++----
 5 files changed, 19 insertions(+), 15 deletions(-)

diff --git a/nowcast/daily_river_flows.py b/nowcast/daily_river_flows.py
index e08e4390..86f264b6 100644
--- a/nowcast/daily_river_flows.py
+++ b/nowcast/daily_river_flows.py
@@ -121,7 +121,7 @@ def _parse_long_csv_line(line):
     # .csv files
     pd.read_csv,
     header=None,
-    delim_whitespace=True,
+    sep="\\s+",
     index_col=False,
     names=["year", "month", "day", "flow"],
     engine="python",
@@ -193,8 +193,8 @@ def _read_river_Theodosia(config):
     # Used for dates before Scotty part was gauged, or in the event of missing obs
     parts[2]["FlowFromDiversion"] = parts[2].Diversion * theodosia_from_diversion_only
     theodosia = theodosia.merge(parts[2], how="outer", on="date", sort=True)
-    theodosia["Secondary River Flow"].fillna(
-        theodosia["FlowFromDiversion"], inplace=True
+    theodosia["Secondary River Flow"] = theodosia["Secondary River Flow"].fillna(
+        theodosia["FlowFromDiversion"]
     )
 
     theodosia.drop(
diff --git a/nowcast/workers/make_v202111_runoff_file.py b/nowcast/workers/make_v202111_runoff_file.py
index d09c0ca1..e7746620 100644
--- a/nowcast/workers/make_v202111_runoff_file.py
+++ b/nowcast/workers/make_v202111_runoff_file.py
@@ -365,8 +365,8 @@ def _read_river_Theodosia(config):
     # Used for dates before Scotty part was gauged, or in the event of missing obs
     parts[2]["FlowFromDiversion"] = parts[2].Diversion * theodosia_from_diversion_only
     theodosia = theodosia.merge(parts[2], how="outer", on="date", sort=True)
-    theodosia["Secondary River Flow"].fillna(
-        theodosia["FlowFromDiversion"], inplace=True
+    theodosia["Secondary River Flow"] = theodosia["Secondary River Flow"].fillna(
+        theodosia["FlowFromDiversion"]
     )
 
     theodosia.drop(
diff --git a/tests/test_daily_river_flows.py b/tests/test_daily_river_flows.py
index 7611c870..e3b49acc 100644
--- a/tests/test_daily_river_flows.py
+++ b/tests/test_daily_river_flows.py
@@ -108,7 +108,9 @@ def test_one_long_line(self):
             """
         )
 
-        river_flow = daily_river_flows._read_river_csv(io.StringIO(csv_lines))
+        with pytest.warns(pandas.errors.ParserWarning) as warning_record:
+            # We expect a ParserWarning due to the difference in length of the lines we're parsing
+            river_flow = daily_river_flows._read_river_csv(io.StringIO(csv_lines))
 
         expected = pandas.DataFrame(
             {
@@ -119,6 +121,8 @@ def test_one_long_line(self):
             }
         )
         pandas.testing.assert_frame_equal(river_flow, expected)
+        expected = "Length of header or names does not match length of data. This leads to a loss of data with index_col=False."
+        assert str(warning_record[0].message) == expected
 
 
 class TestSetDateAsIndex:
@@ -1445,10 +1449,10 @@ def test_dims(self, runoff_array, config):
             obs_date, runoff_array, config
         )
 
-        assert len(runoff_ds.dims) == 3
-        assert runoff_ds.dims["time_counter"] == 1
-        assert runoff_ds.dims["y"] == runoff_array.shape[0]
-        assert runoff_ds.dims["x"] == runoff_array.shape[1]
+        assert len(runoff_ds.sizes) == 3
+        assert runoff_ds.sizes["time_counter"] == 1
+        assert runoff_ds.sizes["y"] == runoff_array.shape[0]
+        assert runoff_ds.sizes["x"] == runoff_array.shape[1]
 
     def test_dataset_attrs(self, runoff_array, config, monkeypatch):
         def mock_now(tz):
diff --git a/tests/workers/test_get_onc_ferry.py b/tests/workers/test_get_onc_ferry.py
index 32f1a3f6..72ca2bcb 100644
--- a/tests/workers/test_get_onc_ferry.py
+++ b/tests/workers/test_get_onc_ferry.py
@@ -246,7 +246,7 @@ def test_resample_nav_coord(self, ferry_platform):
             },
             coords={
                 "sampleTime": pandas.date_range(
-                    start="2021-03-08T10:14:43.082000000", periods=59, freq="1S"
+                    start="2021-03-08T10:14:43.082000000", periods=59, freq="1s"
                 )
             },
             attrs={"station": "TWDP.N1"},
diff --git a/tests/workers/test_make_v202111_runoff_file.py b/tests/workers/test_make_v202111_runoff_file.py
index e0b2c005..7f2a85fd 100644
--- a/tests/workers/test_make_v202111_runoff_file.py
+++ b/tests/workers/test_make_v202111_runoff_file.py
@@ -1873,10 +1873,10 @@ def test_dims(self, runoff_array, config):
             obs_date, runoff_array, config
         )
 
-        assert len(runoff_ds.dims) == 3
-        assert runoff_ds.dims["time_counter"] == 1
-        assert runoff_ds.dims["y"] == runoff_array.shape[0]
-        assert runoff_ds.dims["x"] == runoff_array.shape[1]
+        assert len(runoff_ds.sizes) == 3
+        assert runoff_ds.sizes["time_counter"] == 1
+        assert runoff_ds.sizes["y"] == runoff_array.shape[0]
+        assert runoff_ds.sizes["x"] == runoff_array.shape[1]
 
     def test_dataset_attrs(self, runoff_array, config, monkeypatch):
         def mock_now(tz):