From 1529df6ab5cb129de578e69b7de1bb309a75b153 Mon Sep 17 00:00:00 2001 From: Stijn de Gooijer Date: Fri, 17 Nov 2023 17:50:08 +0100 Subject: [PATCH] depr(python,rust!): Rename DataFrame column index methods (#12542) --- crates/polars-core/src/frame/mod.rs | 58 ++++++------ crates/polars-io/src/csv/read_impl/mod.rs | 4 +- crates/polars-io/src/parquet/read_impl.rs | 2 +- .../src/executors/sinks/joins/inner_left.rs | 2 +- .../src/logical_plan/functions/rename.rs | 2 +- .../reference/dataframe/modify_select.rst | 3 + py-polars/polars/dataframe/frame.py | 94 +++++++++++++++---- .../polars/io/spreadsheet/_write_utils.py | 4 +- py-polars/src/dataframe.rs | 12 +-- py-polars/tests/unit/dataframe/test_df.py | 14 +-- .../tests/unit/functions/test_functions.py | 2 +- py-polars/tests/unit/test_constructors.py | 2 +- 12 files changed, 126 insertions(+), 73 deletions(-) diff --git a/crates/polars-core/src/frame/mod.rs b/crates/polars-core/src/frame/mod.rs index cea3f6b3df55..32f8aee919ae 100644 --- a/crates/polars-core/src/frame/mod.rs +++ b/crates/polars-core/src/frame/mod.rs @@ -186,7 +186,7 @@ impl DataFrame { /// Get the index of the column. fn check_name_to_idx(&self, name: &str) -> PolarsResult { - self.find_idx_by_name(name) + self.get_column_index(name) .ok_or_else(|| polars_err!(ColumnNotFound: "{}", name)) } @@ -1091,7 +1091,7 @@ impl DataFrame { /// Insert a new column at a given index without checking for duplicates. /// This can leave the [`DataFrame`] at an invalid state - fn insert_at_idx_no_name_check( + fn insert_column_no_name_check( &mut self, index: usize, series: Series, @@ -1106,19 +1106,19 @@ impl DataFrame { } /// Insert a new column at a given index. - pub fn insert_at_idx( + pub fn insert_column( &mut self, index: usize, column: S, ) -> PolarsResult<&mut Self> { let series = column.into_series(); self.check_already_present(series.name())?; - self.insert_at_idx_no_name_check(index, series) + self.insert_column_no_name_check(index, series) } fn add_column_by_search(&mut self, series: Series) -> PolarsResult<()> { - if let Some(idx) = self.find_idx_by_name(series.name()) { - self.replace_at_idx(idx, series)?; + if let Some(idx) = self.get_column_index(series.name()) { + self.replace_column(idx, series)?; } else { self.columns.push(series); } @@ -1170,7 +1170,7 @@ impl DataFrame { if self.columns.get(idx).map(|s| s.name()) != Some(name) { self.add_column_by_search(s)?; } else { - self.replace_at_idx(idx, s)?; + self.replace_column(idx, s)?; } } else { self.columns.push(s); @@ -1345,20 +1345,20 @@ impl DataFrame { /// "Mana" => &[250, 100, 0], /// "Strength" => &[30, 150, 300])?; /// - /// assert_eq!(df.find_idx_by_name("Name"), Some(0)); - /// assert_eq!(df.find_idx_by_name("Health"), Some(1)); - /// assert_eq!(df.find_idx_by_name("Mana"), Some(2)); - /// assert_eq!(df.find_idx_by_name("Strength"), Some(3)); - /// assert_eq!(df.find_idx_by_name("Haste"), None); + /// assert_eq!(df.get_column_index("Name"), Some(0)); + /// assert_eq!(df.get_column_index("Health"), Some(1)); + /// assert_eq!(df.get_column_index("Mana"), Some(2)); + /// assert_eq!(df.get_column_index("Strength"), Some(3)); + /// assert_eq!(df.get_column_index("Haste"), None); /// # Ok::<(), PolarsError>(()) /// ``` - pub fn find_idx_by_name(&self, name: &str) -> Option { + pub fn get_column_index(&self, name: &str) -> Option { self.columns.iter().position(|s| s.name() == name) } /// Get column index of a [`Series`] by name. - pub fn try_find_idx_by_name(&self, name: &str) -> PolarsResult { - self.find_idx_by_name(name) + pub fn try_get_column_index(&self, name: &str) -> PolarsResult { + self.get_column_index(name) .ok_or_else(|| polars_err!(ColumnNotFound: "{}", name)) } @@ -1376,9 +1376,7 @@ impl DataFrame { /// # Ok::<(), PolarsError>(()) /// ``` pub fn column(&self, name: &str) -> PolarsResult<&Series> { - let idx = self - .find_idx_by_name(name) - .ok_or_else(|| polars_err!(ColumnNotFound: "{}", name))?; + let idx = self.try_get_column_index(name)?; Ok(self.select_at_idx(idx).unwrap()) } @@ -1605,7 +1603,7 @@ impl DataFrame { /// *Note: the length of the Series should remain the same otherwise the DataFrame is invalid.* /// For this reason the method is not public fn select_mut(&mut self, name: &str) -> Option<&mut Series> { - let opt_idx = self.find_idx_by_name(name); + let opt_idx = self.get_column_index(name); opt_idx.and_then(|idx| self.select_at_idx_mut(idx)) } @@ -1974,28 +1972,28 @@ impl DataFrame { /// let mut df = DataFrame::new(vec![s0, s1])?; /// /// // Add 32 to get lowercase ascii values - /// df.replace_at_idx(1, df.select_at_idx(1).unwrap() + 32); + /// df.replace_column(1, df.select_at_idx(1).unwrap() + 32); /// # Ok::<(), PolarsError>(()) /// ``` - pub fn replace_at_idx( + pub fn replace_column( &mut self, - idx: usize, - new_col: S, + index: usize, + new_column: S, ) -> PolarsResult<&mut Self> { polars_ensure!( - idx < self.width(), + index < self.width(), ShapeMismatch: "unable to replace at index {}, the DataFrame has only {} columns", - idx, self.width(), + index, self.width(), ); - let mut new_column = new_col.into_series(); + let mut new_column = new_column.into_series(); polars_ensure!( new_column.len() == self.height(), ShapeMismatch: "unable to replace a column, series length {} doesn't match the DataFrame height {}", new_column.len(), self.height(), ); - let old_col = &mut self.columns[idx]; + let old_col = &mut self.columns[index]; mem::swap(old_col, &mut new_column); Ok(self) } @@ -2228,9 +2226,7 @@ impl DataFrame { F: FnOnce(&Series) -> PolarsResult, S: IntoSeries, { - let idx = self - .find_idx_by_name(column) - .ok_or_else(|| polars_err!(ColumnNotFound: "{}", column))?; + let idx = self.try_get_column_index(column)?; self.try_apply_at_idx(idx, f) } @@ -2547,7 +2543,7 @@ impl DataFrame { let mut summary = concat_df_unchecked(&tmp); - summary.insert_at_idx(0, Series::new("describe", headers))?; + summary.insert_column(0, Series::new("describe", headers))?; Ok(summary) } diff --git a/crates/polars-io/src/csv/read_impl/mod.rs b/crates/polars-io/src/csv/read_impl/mod.rs index aa077a9e61a2..d61b979bc6ee 100644 --- a/crates/polars-io/src/csv/read_impl/mod.rs +++ b/crates/polars-io/src/csv/read_impl/mod.rs @@ -80,7 +80,7 @@ pub(crate) fn cast_columns( // cast to the original dtypes in the schema for fld in to_cast { // field may not be projected - if let Some(idx) = df.find_idx_by_name(fld.name()) { + if let Some(idx) = df.get_column_index(fld.name()) { df.try_apply_at_idx(idx, |s| cast_fn(s, fld))?; } } @@ -554,7 +554,7 @@ impl<'a> CoreReader<'a> { if bytes.is_empty() { let mut df = DataFrame::from(self.schema.as_ref()); if let Some(ref row_count) = self.row_count { - df.insert_at_idx(0, Series::new_empty(&row_count.name, &IDX_DTYPE))?; + df.insert_column(0, Series::new_empty(&row_count.name, &IDX_DTYPE))?; } return Ok(df); } diff --git a/crates/polars-io/src/parquet/read_impl.rs b/crates/polars-io/src/parquet/read_impl.rs index 61bcf91c7099..54421abc70e8 100644 --- a/crates/polars-io/src/parquet/read_impl.rs +++ b/crates/polars-io/src/parquet/read_impl.rs @@ -342,7 +342,7 @@ pub(super) fn materialize_empty_df( let mut df = DataFrame::from(schema.as_ref()); if let Some(row_count) = row_count { - df.insert_at_idx(0, Series::new_empty(&row_count.name, &IDX_DTYPE)) + df.insert_column(0, Series::new_empty(&row_count.name, &IDX_DTYPE)) .unwrap(); } diff --git a/crates/polars-pipe/src/executors/sinks/joins/inner_left.rs b/crates/polars-pipe/src/executors/sinks/joins/inner_left.rs index 7b2f7f223c24..e527255fb1ea 100644 --- a/crates/polars-pipe/src/executors/sinks/joins/inner_left.rs +++ b/crates/polars-pipe/src/executors/sinks/joins/inner_left.rs @@ -141,7 +141,7 @@ impl GenericJoinProbe { if !self.swapped_or_left && self.join_column_idx.is_none() { let mut idx = names .iter() - .filter_map(|name| chunk.data.find_idx_by_name(name)) + .filter_map(|name| chunk.data.get_column_index(name)) .collect::>(); // ensure that it is sorted so that we can later remove columns in // a predictable order diff --git a/crates/polars-plan/src/logical_plan/functions/rename.rs b/crates/polars-plan/src/logical_plan/functions/rename.rs index e6b0c99655fd..34715d5bcce7 100644 --- a/crates/polars-plan/src/logical_plan/functions/rename.rs +++ b/crates/polars-plan/src/logical_plan/functions/rename.rs @@ -7,7 +7,7 @@ pub(super) fn rename_impl( ) -> PolarsResult { let positions = existing .iter() - .map(|old| df.find_idx_by_name(old)) + .map(|old| df.get_column_index(old)) .collect::>(); for (pos, name) in positions.iter().zip(new.iter()) { diff --git a/py-polars/docs/source/reference/dataframe/modify_select.rst b/py-polars/docs/source/reference/dataframe/modify_select.rst index e672e67508cb..bf9b07e1df86 100644 --- a/py-polars/docs/source/reference/dataframe/modify_select.rst +++ b/py-polars/docs/source/reference/dataframe/modify_select.rst @@ -21,6 +21,7 @@ Manipulation/selection DataFrame.find_idx_by_name DataFrame.gather_every DataFrame.get_column + DataFrame.get_column_index DataFrame.get_columns DataFrame.group_by DataFrame.group_by_dynamic @@ -31,6 +32,7 @@ Manipulation/selection DataFrame.head DataFrame.hstack DataFrame.insert_at_idx + DataFrame.insert_column DataFrame.interpolate DataFrame.item DataFrame.iter_rows @@ -47,6 +49,7 @@ Manipulation/selection DataFrame.rename DataFrame.replace DataFrame.replace_at_idx + DataFrame.replace_column DataFrame.reverse DataFrame.rolling DataFrame.row diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index b839b055b4b5..4d09eb8ce93f 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -1606,9 +1606,9 @@ def __getitem__( start = col_selection.start stop = col_selection.stop if isinstance(col_selection.start, str): - start = self.find_idx_by_name(col_selection.start) + start = self.get_column_index(col_selection.start) if isinstance(col_selection.stop, str): - stop = self.find_idx_by_name(col_selection.stop) + 1 + stop = self.get_column_index(col_selection.stop) + 1 col_selection = slice(start, stop, col_selection.step) @@ -1775,7 +1775,7 @@ def __setitem__( # now find the location to place series # df[idx] if isinstance(col_selection, int): - self.replace_at_idx(col_selection, s) + self.replace_column(col_selection, s) # df["foo"] elif isinstance(col_selection, str): self._replace(col_selection, s) @@ -3113,7 +3113,7 @@ def write_excel( column_widths = _unpack_multi_column_dict(column_widths or {}) # type: ignore[assignment] for column in df.columns: - col_idx, options = table_start[1] + df.find_idx_by_name(column), {} + col_idx, options = table_start[1] + df.get_column_index(column), {} if column in hidden_columns: options = {"hidden": True} if column in column_widths: # type: ignore[operator] @@ -3880,22 +3880,24 @@ def rename(self, mapping: dict[str, str]) -> DataFrame: """ return self.lazy().rename(mapping).collect(_eager=True) - def insert_at_idx(self, index: int, series: Series) -> Self: + def insert_column(self, index: int, column: Series) -> Self: """ - Insert a Series at a certain column index. This operation is in place. + Insert a Series at a certain column index. + + This operation is in place. Parameters ---------- index - Column to insert the new `Series` column. - series + Index at which to insert the new `Series` column. + column `Series` to insert. Examples -------- >>> df = pl.DataFrame({"foo": [1, 2, 3], "bar": [4, 5, 6]}) >>> s = pl.Series("baz", [97, 98, 99]) - >>> df.insert_at_idx(1, s) + >>> df.insert_column(1, s) shape: (3, 3) ┌─────┬─────┬─────┐ │ foo ┆ baz ┆ bar │ @@ -3915,7 +3917,7 @@ def insert_at_idx(self, index: int, series: Series) -> Self: ... } ... ) >>> s = pl.Series("d", [-2.5, 15, 20.5, 0]) - >>> df.insert_at_idx(3, s) + >>> df.insert_column(3, s) shape: (4, 4) ┌─────┬──────┬───────┬──────┐ │ a ┆ b ┆ c ┆ d │ @@ -3931,7 +3933,7 @@ def insert_at_idx(self, index: int, series: Series) -> Self: """ if index < 0: index = len(self.columns) + index - self._df.insert_at_idx(index, series._s) + self._df.insert_column(index, column._s) return self def filter( @@ -4234,10 +4236,10 @@ def describe( # return results as a frame df_summary = self.__class__(summary) - df_summary.insert_at_idx(0, pl.Series("describe", metrics)) + df_summary.insert_column(0, pl.Series("describe", metrics)) return df_summary - def find_idx_by_name(self, name: str) -> int: + def get_column_index(self, name: str) -> int: """ Find the index of a column by name. @@ -4251,21 +4253,23 @@ def find_idx_by_name(self, name: str) -> int: >>> df = pl.DataFrame( ... {"foo": [1, 2, 3], "bar": [6, 7, 8], "ham": ["a", "b", "c"]} ... ) - >>> df.find_idx_by_name("ham") + >>> df.get_column_index("ham") 2 """ - return self._df.find_idx_by_name(name) + return self._df.get_column_index(name) - def replace_at_idx(self, index: int, series: Series) -> Self: + def replace_column(self, index: int, column: Series) -> Self: """ Replace a column at an index location. + This operation is in place. + Parameters ---------- index Column index. - series + column Series that will replace the column. Examples @@ -4278,7 +4282,7 @@ def replace_at_idx(self, index: int, series: Series) -> Self: ... } ... ) >>> s = pl.Series("apple", [10, 20, 30]) - >>> df.replace_at_idx(0, s) + >>> df.replace_column(0, s) shape: (3, 3) ┌───────┬─────┬─────┐ │ apple ┆ bar ┆ ham │ @@ -4289,11 +4293,10 @@ def replace_at_idx(self, index: int, series: Series) -> Self: │ 20 ┆ 7 ┆ b │ │ 30 ┆ 8 ┆ c │ └───────┴─────┴─────┘ - """ if index < 0: index = len(self.columns) + index - self._df.replace_at_idx(index, series._s) + self._df.replace_column(index, column._s) return self def sort( @@ -10423,6 +10426,57 @@ def take_every(self, n: int) -> DataFrame: """ return self.gather_every(n) + @deprecate_renamed_function("get_column_index", version="0.19.14") + def find_idx_by_name(self, name: str) -> int: + """ + Find the index of a column by name. + + .. deprecated:: 0.19.14 + This method has been renamed to :func:`get_column_index`. + + Parameters + ---------- + name + Name of the column to find. + """ + return self.get_column_index(name) + + @deprecate_renamed_function("insert_column", version="0.19.14") + @deprecate_renamed_parameter("series", "column", version="0.19.14") + def insert_at_idx(self, index: int, column: Series) -> Self: + """ + Insert a Series at a certain column index. This operation is in place. + + .. deprecated:: 0.19.14 + This method has been renamed to :func:`insert_column`. + + Parameters + ---------- + index + Column to insert the new `Series` column. + column + `Series` to insert. + """ + return self.insert_column(index, column) + + @deprecate_renamed_function("replace_column", version="0.19.14") + @deprecate_renamed_parameter("series", "new_column", version="0.19.14") + def replace_at_idx(self, index: int, new_column: Series) -> Self: + """ + Replace a column at an index location. + + .. deprecated:: 0.19.14 + This method has been renamed to :func:`replace_column`. + + Parameters + ---------- + index + Column index. + new_column + Series that will replace the column. + """ + return self.replace_column(index, new_column) + def _prepare_other_arg(other: Any, length: int | None = None) -> Series: # if not a series create singleton series such that it will broadcast diff --git a/py-polars/polars/io/spreadsheet/_write_utils.py b/py-polars/polars/io/spreadsheet/_write_utils.py index 406b9a4eabac..5bcd2c209a9b 100644 --- a/py-polars/polars/io/spreadsheet/_write_utils.py +++ b/py-polars/polars/io/spreadsheet/_write_utils.py @@ -78,7 +78,7 @@ def get(self, fmt: dict[str, Any] | Format) -> Format: def _adjacent_cols(df: DataFrame, cols: Iterable[str], min_max: dict[str, Any]) -> bool: """Indicate if the given columns are all adjacent to one another.""" - idxs = sorted(df.find_idx_by_name(col) for col in cols) + idxs = sorted(df.get_column_index(col) for col in cols) if idxs != sorted(range(min(idxs), max(idxs) + 1)): return False else: @@ -188,7 +188,7 @@ def _xl_column_range( """Return the excel sheet range of a named column, accounting for all offsets.""" col_start = ( table_start[0] + int(include_header), - table_start[1] + df.find_idx_by_name(col) if isinstance(col, str) else col[0], + table_start[1] + df.get_column_index(col) if isinstance(col, str) else col[0], ) col_finish = ( col_start[0] + len(df) - 1, diff --git a/py-polars/src/dataframe.rs b/py-polars/src/dataframe.rs index de4912b1808a..da5d7a44d534 100644 --- a/py-polars/src/dataframe.rs +++ b/py-polars/src/dataframe.rs @@ -1047,8 +1047,8 @@ impl PyDataFrame { self.df.select_at_idx(idx).map(|s| PySeries::new(s.clone())) } - pub fn find_idx_by_name(&self, name: &str) -> Option { - self.df.find_idx_by_name(name) + pub fn get_column_index(&self, name: &str) -> Option { + self.df.get_column_index(name) } pub fn get_column(&self, name: &str) -> PyResult { @@ -1085,16 +1085,16 @@ impl PyDataFrame { Ok(()) } - pub fn replace_at_idx(&mut self, index: usize, new_col: PySeries) -> PyResult<()> { + pub fn replace_column(&mut self, index: usize, new_column: PySeries) -> PyResult<()> { self.df - .replace_at_idx(index, new_col.series) + .replace_column(index, new_column.series) .map_err(PyPolarsErr::from)?; Ok(()) } - pub fn insert_at_idx(&mut self, index: usize, new_col: PySeries) -> PyResult<()> { + pub fn insert_column(&mut self, index: usize, column: PySeries) -> PyResult<()> { self.df - .insert_at_idx(index, new_col.series) + .insert_column(index, column.series) .map_err(PyPolarsErr::from)?; Ok(()) } diff --git a/py-polars/tests/unit/dataframe/test_df.py b/py-polars/tests/unit/dataframe/test_df.py index 0f33acb5acc9..07245b547a2b 100644 --- a/py-polars/tests/unit/dataframe/test_df.py +++ b/py-polars/tests/unit/dataframe/test_df.py @@ -348,22 +348,22 @@ def test_assignment() -> None: assert df["foo"].to_list() == [1, 9, 9] -def test_insert_at_idx() -> None: +def test_insert_column() -> None: df = ( pl.DataFrame({"z": [3, 4, 5]}) - .insert_at_idx(0, pl.Series("x", [1, 2, 3])) - .insert_at_idx(-1, pl.Series("y", [2, 3, 4])) + .insert_column(0, pl.Series("x", [1, 2, 3])) + .insert_column(-1, pl.Series("y", [2, 3, 4])) ) expected_df = pl.DataFrame({"x": [1, 2, 3], "y": [2, 3, 4], "z": [3, 4, 5]}) assert_frame_equal(expected_df, df) -def test_replace_at_idx() -> None: +def test_replace_column() -> None: df = ( pl.DataFrame({"x": [1, 2, 3], "y": [2, 3, 4], "z": [3, 4, 5]}) - .replace_at_idx(0, pl.Series("a", [4, 5, 6])) - .replace_at_idx(-2, pl.Series("b", [5, 6, 7])) - .replace_at_idx(-1, pl.Series("c", [6, 7, 8])) + .replace_column(0, pl.Series("a", [4, 5, 6])) + .replace_column(-2, pl.Series("b", [5, 6, 7])) + .replace_column(-1, pl.Series("c", [6, 7, 8])) ) expected_df = pl.DataFrame({"a": [4, 5, 6], "b": [5, 6, 7], "c": [6, 7, 8]}) assert_frame_equal(expected_df, df) diff --git a/py-polars/tests/unit/functions/test_functions.py b/py-polars/tests/unit/functions/test_functions.py index d5bd6ff8568d..b157a8978733 100644 --- a/py-polars/tests/unit/functions/test_functions.py +++ b/py-polars/tests/unit/functions/test_functions.py @@ -167,7 +167,7 @@ def test_align_frames() -> None: (pf1[["a", "b"]] * pf2[["a", "b"]]) .fill_null(0) .select(pl.sum_horizontal("*").alias("dot")) - .insert_at_idx(0, pf1["date"]) + .insert_column(0, pf1["date"]) ) # confirm we match the same operation in pandas assert_frame_equal(pl_dot, pl.from_pandas(pd_dot)) diff --git a/py-polars/tests/unit/test_constructors.py b/py-polars/tests/unit/test_constructors.py index c67a9366d3ad..b5107edd1c41 100644 --- a/py-polars/tests/unit/test_constructors.py +++ b/py-polars/tests/unit/test_constructors.py @@ -968,7 +968,7 @@ def test_init_only_columns() -> None: pl.col("c").cast(pl.Int8), ] ) - expected.insert_at_idx(3, pl.Series("d", [], pl.List(pl.UInt8))) + expected.insert_column(3, pl.Series("d", [], pl.List(pl.UInt8))) assert df.shape == (0, 4) assert_frame_equal(df, expected)