Skip to content

Commit

Permalink
depr(python,rust!): Rename DataFrame column index methods (#12542)
Browse files Browse the repository at this point in the history
  • Loading branch information
stinodego authored Nov 17, 2023
1 parent 60843a0 commit 1529df6
Show file tree
Hide file tree
Showing 12 changed files with 126 additions and 73 deletions.
58 changes: 27 additions & 31 deletions crates/polars-core/src/frame/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ impl DataFrame {

/// Get the index of the column.
fn check_name_to_idx(&self, name: &str) -> PolarsResult<usize> {
self.find_idx_by_name(name)
self.get_column_index(name)
.ok_or_else(|| polars_err!(ColumnNotFound: "{}", name))
}

Expand Down Expand Up @@ -1091,7 +1091,7 @@ impl DataFrame {

/// Insert a new column at a given index without checking for duplicates.
/// This can leave the [`DataFrame`] at an invalid state
fn insert_at_idx_no_name_check(
fn insert_column_no_name_check(
&mut self,
index: usize,
series: Series,
Expand All @@ -1106,19 +1106,19 @@ impl DataFrame {
}

/// Insert a new column at a given index.
pub fn insert_at_idx<S: IntoSeries>(
pub fn insert_column<S: IntoSeries>(
&mut self,
index: usize,
column: S,
) -> PolarsResult<&mut Self> {
let series = column.into_series();
self.check_already_present(series.name())?;
self.insert_at_idx_no_name_check(index, series)
self.insert_column_no_name_check(index, series)
}

fn add_column_by_search(&mut self, series: Series) -> PolarsResult<()> {
if let Some(idx) = self.find_idx_by_name(series.name()) {
self.replace_at_idx(idx, series)?;
if let Some(idx) = self.get_column_index(series.name()) {
self.replace_column(idx, series)?;
} else {
self.columns.push(series);
}
Expand Down Expand Up @@ -1170,7 +1170,7 @@ impl DataFrame {
if self.columns.get(idx).map(|s| s.name()) != Some(name) {
self.add_column_by_search(s)?;
} else {
self.replace_at_idx(idx, s)?;
self.replace_column(idx, s)?;
}
} else {
self.columns.push(s);
Expand Down Expand Up @@ -1345,20 +1345,20 @@ impl DataFrame {
/// "Mana" => &[250, 100, 0],
/// "Strength" => &[30, 150, 300])?;
///
/// assert_eq!(df.find_idx_by_name("Name"), Some(0));
/// assert_eq!(df.find_idx_by_name("Health"), Some(1));
/// assert_eq!(df.find_idx_by_name("Mana"), Some(2));
/// assert_eq!(df.find_idx_by_name("Strength"), Some(3));
/// assert_eq!(df.find_idx_by_name("Haste"), None);
/// assert_eq!(df.get_column_index("Name"), Some(0));
/// assert_eq!(df.get_column_index("Health"), Some(1));
/// assert_eq!(df.get_column_index("Mana"), Some(2));
/// assert_eq!(df.get_column_index("Strength"), Some(3));
/// assert_eq!(df.get_column_index("Haste"), None);
/// # Ok::<(), PolarsError>(())
/// ```
pub fn find_idx_by_name(&self, name: &str) -> Option<usize> {
pub fn get_column_index(&self, name: &str) -> Option<usize> {
self.columns.iter().position(|s| s.name() == name)
}

/// Get column index of a [`Series`] by name.
pub fn try_find_idx_by_name(&self, name: &str) -> PolarsResult<usize> {
self.find_idx_by_name(name)
pub fn try_get_column_index(&self, name: &str) -> PolarsResult<usize> {
self.get_column_index(name)
.ok_or_else(|| polars_err!(ColumnNotFound: "{}", name))
}

Expand All @@ -1376,9 +1376,7 @@ impl DataFrame {
/// # Ok::<(), PolarsError>(())
/// ```
pub fn column(&self, name: &str) -> PolarsResult<&Series> {
let idx = self
.find_idx_by_name(name)
.ok_or_else(|| polars_err!(ColumnNotFound: "{}", name))?;
let idx = self.try_get_column_index(name)?;
Ok(self.select_at_idx(idx).unwrap())
}

Expand Down Expand Up @@ -1605,7 +1603,7 @@ impl DataFrame {
/// *Note: the length of the Series should remain the same otherwise the DataFrame is invalid.*
/// For this reason the method is not public
fn select_mut(&mut self, name: &str) -> Option<&mut Series> {
let opt_idx = self.find_idx_by_name(name);
let opt_idx = self.get_column_index(name);

opt_idx.and_then(|idx| self.select_at_idx_mut(idx))
}
Expand Down Expand Up @@ -1974,28 +1972,28 @@ impl DataFrame {
/// let mut df = DataFrame::new(vec![s0, s1])?;
///
/// // Add 32 to get lowercase ascii values
/// df.replace_at_idx(1, df.select_at_idx(1).unwrap() + 32);
/// df.replace_column(1, df.select_at_idx(1).unwrap() + 32);
/// # Ok::<(), PolarsError>(())
/// ```
pub fn replace_at_idx<S: IntoSeries>(
pub fn replace_column<S: IntoSeries>(
&mut self,
idx: usize,
new_col: S,
index: usize,
new_column: S,
) -> PolarsResult<&mut Self> {
polars_ensure!(
idx < self.width(),
index < self.width(),
ShapeMismatch:
"unable to replace at index {}, the DataFrame has only {} columns",
idx, self.width(),
index, self.width(),
);
let mut new_column = new_col.into_series();
let mut new_column = new_column.into_series();
polars_ensure!(
new_column.len() == self.height(),
ShapeMismatch:
"unable to replace a column, series length {} doesn't match the DataFrame height {}",
new_column.len(), self.height(),
);
let old_col = &mut self.columns[idx];
let old_col = &mut self.columns[index];
mem::swap(old_col, &mut new_column);
Ok(self)
}
Expand Down Expand Up @@ -2228,9 +2226,7 @@ impl DataFrame {
F: FnOnce(&Series) -> PolarsResult<S>,
S: IntoSeries,
{
let idx = self
.find_idx_by_name(column)
.ok_or_else(|| polars_err!(ColumnNotFound: "{}", column))?;
let idx = self.try_get_column_index(column)?;
self.try_apply_at_idx(idx, f)
}

Expand Down Expand Up @@ -2547,7 +2543,7 @@ impl DataFrame {

let mut summary = concat_df_unchecked(&tmp);

summary.insert_at_idx(0, Series::new("describe", headers))?;
summary.insert_column(0, Series::new("describe", headers))?;

Ok(summary)
}
Expand Down
4 changes: 2 additions & 2 deletions crates/polars-io/src/csv/read_impl/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ pub(crate) fn cast_columns(
// cast to the original dtypes in the schema
for fld in to_cast {
// field may not be projected
if let Some(idx) = df.find_idx_by_name(fld.name()) {
if let Some(idx) = df.get_column_index(fld.name()) {
df.try_apply_at_idx(idx, |s| cast_fn(s, fld))?;
}
}
Expand Down Expand Up @@ -554,7 +554,7 @@ impl<'a> CoreReader<'a> {
if bytes.is_empty() {
let mut df = DataFrame::from(self.schema.as_ref());
if let Some(ref row_count) = self.row_count {
df.insert_at_idx(0, Series::new_empty(&row_count.name, &IDX_DTYPE))?;
df.insert_column(0, Series::new_empty(&row_count.name, &IDX_DTYPE))?;
}
return Ok(df);
}
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-io/src/parquet/read_impl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,7 @@ pub(super) fn materialize_empty_df(
let mut df = DataFrame::from(schema.as_ref());

if let Some(row_count) = row_count {
df.insert_at_idx(0, Series::new_empty(&row_count.name, &IDX_DTYPE))
df.insert_column(0, Series::new_empty(&row_count.name, &IDX_DTYPE))
.unwrap();
}

Expand Down
2 changes: 1 addition & 1 deletion crates/polars-pipe/src/executors/sinks/joins/inner_left.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ impl GenericJoinProbe {
if !self.swapped_or_left && self.join_column_idx.is_none() {
let mut idx = names
.iter()
.filter_map(|name| chunk.data.find_idx_by_name(name))
.filter_map(|name| chunk.data.get_column_index(name))
.collect::<Vec<_>>();
// ensure that it is sorted so that we can later remove columns in
// a predictable order
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-plan/src/logical_plan/functions/rename.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ pub(super) fn rename_impl(
) -> PolarsResult<DataFrame> {
let positions = existing
.iter()
.map(|old| df.find_idx_by_name(old))
.map(|old| df.get_column_index(old))
.collect::<Vec<_>>();

for (pos, name) in positions.iter().zip(new.iter()) {
Expand Down
3 changes: 3 additions & 0 deletions py-polars/docs/source/reference/dataframe/modify_select.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ Manipulation/selection
DataFrame.find_idx_by_name
DataFrame.gather_every
DataFrame.get_column
DataFrame.get_column_index
DataFrame.get_columns
DataFrame.group_by
DataFrame.group_by_dynamic
Expand All @@ -31,6 +32,7 @@ Manipulation/selection
DataFrame.head
DataFrame.hstack
DataFrame.insert_at_idx
DataFrame.insert_column
DataFrame.interpolate
DataFrame.item
DataFrame.iter_rows
Expand All @@ -47,6 +49,7 @@ Manipulation/selection
DataFrame.rename
DataFrame.replace
DataFrame.replace_at_idx
DataFrame.replace_column
DataFrame.reverse
DataFrame.rolling
DataFrame.row
Expand Down
Loading

0 comments on commit 1529df6

Please sign in to comment.