From 7eb8fe0bbbaca95bcb9023e6af35084bcff4091e Mon Sep 17 00:00:00 2001 From: Marco Gorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Thu, 20 Jun 2024 14:15:00 +0100 Subject: [PATCH] feat: rename DataFrame.melt to DataFrame.unpivot, and rename its args: id_vars -> index, value_vars -> on --- crates/polars-core/src/frame/explode.rs | 103 +++++++++--------- crates/polars-core/src/prelude.rs | 2 +- crates/polars-lazy/src/frame/mod.rs | 8 +- crates/polars-lazy/src/tests/queries.rs | 10 +- .../src/pipeline/dispatcher/mod.rs | 2 +- crates/polars-plan/src/plans/builder_dsl.rs | 4 +- crates/polars-plan/src/plans/builder_ir.rs | 4 +- crates/polars-plan/src/plans/functions/dsl.rs | 8 +- crates/polars-plan/src/plans/functions/mod.rs | 22 ++-- .../polars-plan/src/plans/functions/schema.rs | 16 +-- .../plans/optimizer/predicate_pushdown/mod.rs | 4 +- .../projection_pushdown/functions/mod.rs | 8 +- .../functions/{melt.rs => unpivot.rs} | 14 +-- .../src/plans/optimizer/slice_pushdown_lp.rs | 2 +- crates/polars/src/docs/eager.rs | 8 +- docs/_build/API_REFERENCE_LINKS.yml | 4 +- .../transformations/{melt.py => unpivot.py} | 6 +- docs/src/rust/Cargo.toml | 4 +- .../transformations/{melt.rs => unpivot.rs} | 6 +- docs/user-guide/concepts/streaming.md | 2 +- docs/user-guide/transformations/index.md | 2 +- docs/user-guide/transformations/melt.md | 21 ---- docs/user-guide/transformations/unpivot.md | 21 ++++ mkdocs.yml | 2 +- .../reference/dataframe/modify_select.rst | 2 +- .../reference/lazyframe/modify_select.rst | 2 +- py-polars/polars/dataframe/frame.py | 38 ++++--- py-polars/polars/lazyframe/frame.py | 34 +++--- py-polars/src/dataframe/general.rs | 14 +-- py-polars/src/lazyframe/mod.rs | 10 +- py-polars/src/lazyframe/visitor/nodes.rs | 11 +- .../tests/unit/operations/test_filter.py | 4 +- .../{test_melt.py => test_unpivot.py} | 42 +++---- py-polars/tests/unit/test_projections.py | 6 +- 34 files changed, 228 insertions(+), 218 deletions(-) rename crates/polars-plan/src/plans/optimizer/projection_pushdown/functions/{melt.rs => unpivot.rs} (85%) rename docs/src/python/user-guide/transformations/{melt.py => unpivot.py} (68%) rename docs/src/rust/user-guide/transformations/{melt.rs => unpivot.rs} (78%) delete mode 100644 docs/user-guide/transformations/melt.md create mode 100644 docs/user-guide/transformations/unpivot.md rename py-polars/tests/unit/operations/{test_melt.py => test_unpivot.py} (57%) diff --git a/crates/polars-core/src/frame/explode.rs b/crates/polars-core/src/frame/explode.rs index 7b1055eccd22..047497aa8a11 100644 --- a/crates/polars-core/src/frame/explode.rs +++ b/crates/polars-core/src/frame/explode.rs @@ -20,15 +20,15 @@ fn get_exploded(series: &Series) -> PolarsResult<(Series, OffsetsBuffer)> { } } -/// Arguments for `[DataFrame::melt]` function +/// Arguments for `[DataFrame::unpivot]` function #[derive(Clone, Default, Debug, PartialEq, Eq, Hash)] #[cfg_attr(feature = "serde-lazy", derive(Serialize, Deserialize))] -pub struct MeltArgs { - pub id_vars: Vec, - pub value_vars: Vec, +pub struct UnpivotArgs { + pub index: Vec, + pub on: Vec, pub variable_name: Option, pub value_name: Option, - /// Whether the melt may be done + /// Whether the unpivot may be done /// in the streaming engine /// This will not have a stable ordering pub streamable: bool, @@ -189,10 +189,10 @@ impl DataFrame { /// /// # Arguments /// - /// * `id_vars` - String slice that represent the columns to use as id variables. - /// * `value_vars` - String slice that represent the columns to use as value variables. + /// * `index` - String slice that represent the columns to use as id variables. + /// * `on` - String slice that represent the columns to use as value variables. /// - /// If `value_vars` is empty all columns that are not in `id_vars` will be used. + /// If `on` is empty all columns that are not in `index` will be used. /// /// ```ignore /// # use polars_core::prelude::*; @@ -202,9 +202,9 @@ impl DataFrame { /// "D" => &[2, 4, 6] /// )?; /// - /// let melted = df.melt(&["A", "B"], &["C", "D"])?; + /// let unpivoted = df.unpivot(&["A", "B"], &["C", "D"])?; /// println!("{:?}", df); - /// println!("{:?}", melted); + /// println!("{:?}", unpivoted); /// # Ok::<(), PolarsError>(()) /// ``` /// Outputs: @@ -239,25 +239,25 @@ impl DataFrame { /// | "a" | 5 | "D" | 6 | /// +-----+-----+----------+-------+ /// ``` - pub fn melt(&self, id_vars: I, value_vars: J) -> PolarsResult + pub fn unpivot(&self, index: I, on: J) -> PolarsResult where I: IntoVec, J: IntoVec, { - let id_vars = id_vars.into_vec(); - let value_vars = value_vars.into_vec(); - self.melt2(MeltArgs { - id_vars, - value_vars, + let id_vars = index.into_vec(); + let value_vars = on.into_vec(); + self.unpivot2(UnpivotArgs { + index: id_vars, + on: value_vars, ..Default::default() }) } - /// Similar to melt, but without generics. This may be easier if you want to pass - /// an empty `id_vars` or empty `value_vars`. - pub fn melt2(&self, args: MeltArgs) -> PolarsResult { - let id_vars = args.id_vars; - let mut value_vars = args.value_vars; + /// Similar to unpivot, but without generics. This may be easier if you want to pass + /// an empty `index` or empty `on`. + pub fn unpivot2(&self, args: UnpivotArgs) -> PolarsResult { + let index = args.index; + let mut on = args.on; let variable_name = args.variable_name.as_deref().unwrap_or("variable"); let value_name = args.value_name.as_deref().unwrap_or("value"); @@ -265,25 +265,25 @@ impl DataFrame { let len = self.height(); // if value vars is empty we take all columns that are not in id_vars. - if value_vars.is_empty() { + if on.is_empty() { // return empty frame if there are no columns available to use as value vars - if id_vars.len() == self.width() { + if index.len() == self.width() { let variable_col = Series::new_empty(variable_name, &DataType::String); let value_col = Series::new_empty(variable_name, &DataType::Null); - let mut out = self.select(id_vars).unwrap().clear().columns; + let mut out = self.select(index).unwrap().clear().columns; out.push(variable_col); out.push(value_col); return Ok(unsafe { DataFrame::new_no_checks(out) }); } - let id_vars_set = PlHashSet::from_iter(id_vars.iter().map(|s| s.as_str())); - value_vars = self + let index_set = PlHashSet::from_iter(index.iter().map(|s| s.as_str())); + on = self .get_columns() .iter() .filter_map(|s| { - if id_vars_set.contains(s.name()) { + if index_set.contains(s.name()) { None } else { Some(s.name().into()) @@ -294,7 +294,7 @@ impl DataFrame { // values will all be placed in single column, so we must find their supertype let schema = self.schema(); - let mut iter = value_vars.iter().map(|v| { + let mut iter = on.iter().map(|v| { schema .get(v) .ok_or_else(|| polars_err!(ColumnNotFound: "{}", v)) @@ -304,31 +304,30 @@ impl DataFrame { st = try_get_supertype(&st, dt?)?; } - // The column name of the variable that is melted - let mut variable_col = - MutableBinaryViewArray::::with_capacity(len * value_vars.len() + 1); + // The column name of the variable that is unpivoted + let mut variable_col = MutableBinaryViewArray::::with_capacity(len * on.len() + 1); // prepare ids - let ids_ = self.select_with_schema_unchecked(id_vars, &schema)?; + let ids_ = self.select_with_schema_unchecked(index, &schema)?; let mut ids = ids_.clone(); if ids.width() > 0 { - for _ in 0..value_vars.len() - 1 { + for _ in 0..on.len() - 1 { ids.vstack_mut_unchecked(&ids_) } } ids.as_single_chunk_par(); drop(ids_); - let mut values = Vec::with_capacity(value_vars.len()); + let mut values = Vec::with_capacity(on.len()); - for value_column_name in &value_vars { + for value_column_name in &on { variable_col.extend_constant(len, Some(value_column_name.as_str())); // ensure we go via the schema so we are O(1) // self.column() is linear - // together with this loop that would make it O^2 over value_vars + // together with this loop that would make it O^2 over `on` let (pos, _name, _dtype) = schema.try_get_full(value_column_name)?; let col = &self.columns[pos]; let value_col = col.cast(&st).map_err( - |_| polars_err!(InvalidOperation: "'melt/unpivot' not supported for dtype: {}", col.dtype()), + |_| polars_err!(InvalidOperation: "'unpivot' not supported for dtype: {}", col.dtype()), )?; values.extend_from_slice(value_col.chunks()) } @@ -434,7 +433,7 @@ mod test { #[test] #[cfg_attr(miri, ignore)] - fn test_melt() -> PolarsResult<()> { + fn test_unpivot() -> PolarsResult<()> { let df = df!("A" => &["a", "b", "a"], "B" => &[1, 3, 5], "C" => &[10, 11, 12], @@ -442,20 +441,20 @@ mod test { ) .unwrap(); - let melted = df.melt(["A", "B"], ["C", "D"])?; + let unpivoted = df.unpivot(["A", "B"], ["C", "D"])?; assert_eq!( - Vec::from(melted.column("value")?.i32()?), + Vec::from(unpivoted.column("value")?.i32()?), &[Some(10), Some(11), Some(12), Some(2), Some(4), Some(6)] ); - let args = MeltArgs { - id_vars: vec![], - value_vars: vec![], + let args = UnpivotArgs { + index: vec![], + on: vec![], ..Default::default() }; - let melted = df.melt2(args).unwrap(); - let value = melted.column("value")?; + let unpivoted = df.unpivot2(args).unwrap(); + let value = unpivoted.column("value")?; // String because of supertype let value = value.str()?; let value = value.into_no_null_iter().collect::>(); @@ -464,22 +463,22 @@ mod test { &["a", "b", "a", "1", "3", "5", "10", "11", "12", "2", "4", "6"] ); - let args = MeltArgs { - id_vars: vec!["A".into()], - value_vars: vec![], + let args = UnpivotArgs { + index: vec!["A".into()], + on: vec![], ..Default::default() }; - let melted = df.melt2(args).unwrap(); - let value = melted.column("value")?; + let unpivoted = df.unpivot2(args).unwrap(); + let value = unpivoted.column("value")?; let value = value.i32()?; let value = value.into_no_null_iter().collect::>(); assert_eq!(value, &[1, 3, 5, 10, 11, 12, 2, 4, 6]); - let variable = melted.column("variable")?; + let variable = unpivoted.column("variable")?; let variable = variable.str()?; let variable = variable.into_no_null_iter().collect::>(); assert_eq!(variable, &["B", "B", "B", "C", "C", "C", "D", "D", "D"]); - assert!(melted.column("A").is_ok()); + assert!(unpivoted.column("A").is_ok()); Ok(()) } } diff --git a/crates/polars-core/src/prelude.rs b/crates/polars-core/src/prelude.rs index eda6704d5c39..934e5ddbd69d 100644 --- a/crates/polars-core/src/prelude.rs +++ b/crates/polars-core/src/prelude.rs @@ -37,7 +37,7 @@ pub use crate::datatypes::{ArrayCollectIterExt, *}; pub use crate::error::{ polars_bail, polars_ensure, polars_err, polars_warn, PolarsError, PolarsResult, }; -pub use crate::frame::explode::MeltArgs; +pub use crate::frame::explode::UnpivotArgs; #[cfg(feature = "algorithm_group_by")] pub(crate) use crate::frame::group_by::aggregations::*; #[cfg(feature = "algorithm_group_by")] diff --git a/crates/polars-lazy/src/frame/mod.rs b/crates/polars-lazy/src/frame/mod.rs index e12b7e5566f1..0e617c53d0f4 100644 --- a/crates/polars-lazy/src/frame/mod.rs +++ b/crates/polars-lazy/src/frame/mod.rs @@ -1600,12 +1600,12 @@ impl LazyFrame { self.slice(neg_tail, n) } - /// Melt the DataFrame from wide to long format. + /// Unpivot the DataFrame from wide to long format. /// - /// See [`MeltArgs`] for information on how to melt a DataFrame. - pub fn melt(self, args: MeltArgs) -> LazyFrame { + /// See [`UnpivotArgs`] for information on how to unpivot a DataFrame. + pub fn unpivot(self, args: UnpivotArgs) -> LazyFrame { let opt_state = self.get_opt_state(); - let lp = self.get_plan_builder().melt(args).build(); + let lp = self.get_plan_builder().unpivot(args).build(); Self::from_logical_plan(lp, opt_state) } diff --git a/crates/polars-lazy/src/tests/queries.rs b/crates/polars-lazy/src/tests/queries.rs index cc49a2c2f32d..690dc6f57ea9 100644 --- a/crates/polars-lazy/src/tests/queries.rs +++ b/crates/polars-lazy/src/tests/queries.rs @@ -46,18 +46,18 @@ fn test_lazy_alias() { } #[test] -fn test_lazy_melt() { +fn test_lazy_unpivot() { let df = get_df(); - let args = MeltArgs { - id_vars: vec!["petal_width".into(), "petal_length".into()], - value_vars: vec!["sepal_length".into(), "sepal_width".into()], + let args = UnpivotArgs { + index: vec!["petal_width".into(), "petal_length".into()], + on: vec!["sepal_length".into(), "sepal_width".into()], ..Default::default() }; let out = df .lazy() - .melt(args) + .unpivot(args) .filter(col("variable").eq(lit("sepal_length"))) .select([col("variable"), col("petal_width"), col("value")]) .collect() diff --git a/crates/polars-pipe/src/pipeline/dispatcher/mod.rs b/crates/polars-pipe/src/pipeline/dispatcher/mod.rs index 901f1fd771cb..216dec376c05 100644 --- a/crates/polars-pipe/src/pipeline/dispatcher/mod.rs +++ b/crates/polars-pipe/src/pipeline/dispatcher/mod.rs @@ -62,7 +62,7 @@ impl ThreadedSink { /// succeed. /// Think for example on multiply a few columns, or applying a predicate. /// Operators can shrink the batches: filter -/// Grow the batches: explode/ melt +/// Grow the batches: explode/ unpivot /// Keep them the same size: element-wise operations /// The probe side of join operations is also an operator. /// diff --git a/crates/polars-plan/src/plans/builder_dsl.rs b/crates/polars-plan/src/plans/builder_dsl.rs index 2e90b494344f..ccebeed92992 100644 --- a/crates/polars-plan/src/plans/builder_dsl.rs +++ b/crates/polars-plan/src/plans/builder_dsl.rs @@ -339,10 +339,10 @@ impl DslBuilder { .into() } - pub fn melt(self, args: MeltArgs) -> Self { + pub fn unpivot(self, args: UnpivotArgs) -> Self { DslPlan::MapFunction { input: Arc::new(self.0), - function: DslFunction::Melt { args }, + function: DslFunction::Unpivot { args }, } .into() } diff --git a/crates/polars-plan/src/plans/builder_ir.rs b/crates/polars-plan/src/plans/builder_ir.rs index bd7f42ab5596..1bab177f41b1 100644 --- a/crates/polars-plan/src/plans/builder_ir.rs +++ b/crates/polars-plan/src/plans/builder_ir.rs @@ -297,10 +297,10 @@ impl<'a> IRBuilder<'a> { self.add_alp(lp) } - pub fn melt(self, args: Arc) -> Self { + pub fn unpivot(self, args: Arc) -> Self { let lp = IR::MapFunction { input: self.root, - function: FunctionNode::Melt { + function: FunctionNode::Unpivot { args, schema: Default::default(), }, diff --git a/crates/polars-plan/src/plans/functions/dsl.rs b/crates/polars-plan/src/plans/functions/dsl.rs index 9ee12e10a35c..6c53e8b676f0 100644 --- a/crates/polars-plan/src/plans/functions/dsl.rs +++ b/crates/polars-plan/src/plans/functions/dsl.rs @@ -9,8 +9,8 @@ pub enum DslFunction { Explode { columns: Vec, }, - Melt { - args: MeltArgs, + Unpivot { + args: UnpivotArgs, }, RowIndex { name: Arc, @@ -79,7 +79,7 @@ impl DslFunction { schema: Default::default(), } }, - DslFunction::Melt { args } => FunctionNode::Melt { + DslFunction::Unpivot { args } => FunctionNode::Unpivot { args: Arc::new(args), schema: Default::default(), }, @@ -125,7 +125,7 @@ impl Display for DslFunction { match self { FunctionNode(inner) => write!(f, "{inner}"), Explode { .. } => write!(f, "EXPLODE"), - Melt { .. } => write!(f, "MELT"), + Unpivot { .. } => write!(f, "UNPIVOT"), RowIndex { .. } => write!(f, "WITH ROW INDEX"), Stats(_) => write!(f, "STATS"), FillNan(_) => write!(f, "FILL NAN"), diff --git a/crates/polars-plan/src/plans/functions/mod.rs b/crates/polars-plan/src/plans/functions/mod.rs index 1a24a5c4e852..bdb778541e0a 100644 --- a/crates/polars-plan/src/plans/functions/mod.rs +++ b/crates/polars-plan/src/plans/functions/mod.rs @@ -90,8 +90,8 @@ pub enum FunctionNode { #[cfg_attr(feature = "serde", serde(skip))] schema: CachedSchema, }, - Melt { - args: Arc, + Unpivot { + args: Arc, #[cfg_attr(feature = "serde", serde(skip))] schema: CachedSchema, }, @@ -125,7 +125,7 @@ impl PartialEq for FunctionNode { }, ) => existing_l == existing_r && new_l == new_r, (Explode { columns: l, .. }, Explode { columns: r, .. }) => l == r, - (Melt { args: l, .. }, Melt { args: r, .. }) => l == r, + (Unpivot { args: l, .. }, Unpivot { args: r, .. }) => l == r, (RowIndex { name: l, .. }, RowIndex { name: r, .. }) => l == r, #[cfg(feature = "merge_sorted")] (MergeSorted { column: l }, MergeSorted { column: r }) => l == r, @@ -165,7 +165,7 @@ impl Hash for FunctionNode { new.hash(state); }, FunctionNode::Explode { columns, schema: _ } => columns.hash(state), - FunctionNode::Melt { args, schema: _ } => args.hash(state), + FunctionNode::Unpivot { args, schema: _ } => args.hash(state), FunctionNode::RowIndex { name, schema: _, @@ -187,7 +187,7 @@ impl FunctionNode { #[cfg(feature = "merge_sorted")] MergeSorted { .. } => false, Count { .. } | Unnest { .. } | Rename { .. } | Explode { .. } => true, - Melt { args, .. } => args.streamable, + Unpivot { args, .. } => args.streamable, Opaque { streamable, .. } => *streamable, #[cfg(feature = "python")] OpaquePython { streamable, .. } => *streamable, @@ -201,7 +201,7 @@ impl FunctionNode { match self { #[cfg(feature = "merge_sorted")] MergeSorted { .. } => true, - Explode { .. } | Melt { .. } => true, + Explode { .. } | Unpivot { .. } => true, _ => false, } } @@ -212,7 +212,7 @@ impl FunctionNode { Opaque { predicate_pd, .. } => *predicate_pd, #[cfg(feature = "python")] OpaquePython { predicate_pd, .. } => *predicate_pd, - Rechunk | Unnest { .. } | Rename { .. } | Explode { .. } | Melt { .. } => true, + Rechunk | Unnest { .. } | Rename { .. } | Explode { .. } | Unpivot { .. } => true, #[cfg(feature = "merge_sorted")] MergeSorted { .. } => true, RowIndex { .. } | Count { .. } => false, @@ -231,7 +231,7 @@ impl FunctionNode { | Unnest { .. } | Rename { .. } | Explode { .. } - | Melt { .. } => true, + | Unpivot { .. } => true, #[cfg(feature = "merge_sorted")] MergeSorted { .. } => true, RowIndex { .. } => true, @@ -295,9 +295,9 @@ impl FunctionNode { }, Rename { existing, new, .. } => rename::rename_impl(df, existing, new), Explode { columns, .. } => df.explode(columns.as_ref()), - Melt { args, .. } => { + Unpivot { args, .. } => { let args = (**args).clone(); - df.melt2(args) + df.unpivot2(args) }, RowIndex { name, offset, .. } => df.with_row_index(name.as_ref(), *offset), } @@ -353,7 +353,7 @@ impl Display for FunctionNode { }, Rename { .. } => write!(f, "RENAME"), Explode { .. } => write!(f, "EXPLODE"), - Melt { .. } => write!(f, "MELT"), + Unpivot { .. } => write!(f, "UNPIVOT"), RowIndex { .. } => write!(f, "WITH ROW INDEX"), } } diff --git a/crates/polars-plan/src/plans/functions/schema.rs b/crates/polars-plan/src/plans/functions/schema.rs index 532cdd9f4168..fbb83c137733 100644 --- a/crates/polars-plan/src/plans/functions/schema.rs +++ b/crates/polars-plan/src/plans/functions/schema.rs @@ -11,7 +11,7 @@ impl FunctionNode { RowIndex { schema, .. } | Explode { schema, .. } | Rename { schema, .. } - | Melt { schema, .. } => { + | Unpivot { schema, .. } => { let mut guard = schema.lock().unwrap(); *guard = None; }, @@ -98,7 +98,7 @@ impl FunctionNode { Ok(Cow::Owned(row_index_schema(schema, input_schema, name))) }, Explode { schema, columns } => explode_schema(schema, input_schema, columns), - Melt { schema, args } => melt_schema(args, schema, input_schema), + Unpivot { schema, args } => unpivot_schema(args, schema, input_schema), } } } @@ -143,8 +143,8 @@ fn explode_schema<'a>( Ok(Cow::Owned(schema)) } -fn melt_schema<'a>( - args: &MeltArgs, +fn unpivot_schema<'a>( + args: &UnpivotArgs, cached_schema: &CachedSchema, input_schema: &'a Schema, ) -> PolarsResult> { @@ -154,7 +154,7 @@ fn melt_schema<'a>( } let mut new_schema = args - .id_vars + .index .iter() .map(|id| Field::new(id, input_schema.get(id).unwrap().clone())) .collect::(); @@ -175,15 +175,15 @@ fn melt_schema<'a>( let mut supertype = DataType::Null; // take all columns that are not in `id_vars` as `value_var` - if args.value_vars.is_empty() { - let id_vars = PlHashSet::from_iter(&args.id_vars); + if args.on.is_empty() { + let id_vars = PlHashSet::from_iter(&args.index); for (name, dtype) in input_schema.iter() { if !id_vars.contains(name) { supertype = try_get_supertype(&supertype, dtype).unwrap(); } } } else { - for name in &args.value_vars { + for name in &args.on { let dtype = input_schema.get(name).unwrap(); supertype = try_get_supertype(&supertype, dtype).unwrap(); } diff --git a/crates/polars-plan/src/plans/optimizer/predicate_pushdown/mod.rs b/crates/polars-plan/src/plans/optimizer/predicate_pushdown/mod.rs index c1c29d3ec310..6d07ff86ddf7 100644 --- a/crates/polars-plan/src/plans/optimizer/predicate_pushdown/mod.rs +++ b/crates/polars-plan/src/plans/optimizer/predicate_pushdown/mod.rs @@ -524,7 +524,7 @@ impl<'a> PredicatePushDown<'a> { expr_arena, )) }, - FunctionNode::Melt { args, .. } => { + FunctionNode::Unpivot { args, .. } => { let variable_name = args.variable_name.as_deref().unwrap_or("variable"); let value_name = args.value_name.as_deref().unwrap_or("value"); @@ -533,7 +533,7 @@ impl<'a> PredicatePushDown<'a> { let name = &*name; name == variable_name || name == value_name - || args.value_vars.iter().any(|s| s.as_str() == name) + || args.on.iter().any(|s| s.as_str() == name) }; let local_predicates = transfer_to_local_by_name( expr_arena, diff --git a/crates/polars-plan/src/plans/optimizer/projection_pushdown/functions/mod.rs b/crates/polars-plan/src/plans/optimizer/projection_pushdown/functions/mod.rs index 12d236635a4b..0a9c80827b9e 100644 --- a/crates/polars-plan/src/plans/optimizer/projection_pushdown/functions/mod.rs +++ b/crates/polars-plan/src/plans/optimizer/projection_pushdown/functions/mod.rs @@ -1,6 +1,6 @@ -mod melt; +mod unpivot; -use melt::process_melt; +use unpivot::process_unpivot; use super::*; @@ -64,13 +64,13 @@ pub(super) fn process_functions( .explode(columns.clone()) .build()) }, - Melt { ref args, .. } => { + Unpivot { ref args, .. } => { let lp = IR::MapFunction { input, function: function.clone(), }; - process_melt( + process_unpivot( proj_pd, lp, args, diff --git a/crates/polars-plan/src/plans/optimizer/projection_pushdown/functions/melt.rs b/crates/polars-plan/src/plans/optimizer/projection_pushdown/functions/unpivot.rs similarity index 85% rename from crates/polars-plan/src/plans/optimizer/projection_pushdown/functions/melt.rs rename to crates/polars-plan/src/plans/optimizer/projection_pushdown/functions/unpivot.rs index 8c63331ae704..70704f76fa9b 100644 --- a/crates/polars-plan/src/plans/optimizer/projection_pushdown/functions/melt.rs +++ b/crates/polars-plan/src/plans/optimizer/projection_pushdown/functions/unpivot.rs @@ -1,17 +1,17 @@ use super::*; #[allow(clippy::too_many_arguments)] -pub(super) fn process_melt( +pub(super) fn process_unpivot( proj_pd: &mut ProjectionPushDown, lp: IR, - args: &Arc, + args: &Arc, input: Node, acc_projections: Vec, projections_seen: usize, lp_arena: &mut Arena, expr_arena: &mut Arena, ) -> PolarsResult { - if args.value_vars.is_empty() { + if args.on.is_empty() { // restart projection pushdown proj_pd.no_pushdown_restart_opt(lp, acc_projections, projections_seen, lp_arena, expr_arena) } else { @@ -28,10 +28,10 @@ pub(super) fn process_melt( } // make sure that the requested columns are projected - args.id_vars.iter().for_each(|name| { + args.index.iter().for_each(|name| { add_str_to_accumulated(name, &mut acc_projections, &mut projected_names, expr_arena) }); - args.value_vars.iter().for_each(|name| { + args.on.iter().for_each(|name| { add_str_to_accumulated(name, &mut acc_projections, &mut projected_names, expr_arena) }); @@ -44,9 +44,9 @@ pub(super) fn process_melt( expr_arena, )?; - // re-make melt node so that the schema is updated + // re-make unpivot node so that the schema is updated let lp = IRBuilder::new(input, expr_arena, lp_arena) - .melt(args.clone()) + .unpivot(args.clone()) .build(); if local_projections.is_empty() { diff --git a/crates/polars-plan/src/plans/optimizer/slice_pushdown_lp.rs b/crates/polars-plan/src/plans/optimizer/slice_pushdown_lp.rs index c88302703d05..b804e50692ea 100644 --- a/crates/polars-plan/src/plans/optimizer/slice_pushdown_lp.rs +++ b/crates/polars-plan/src/plans/optimizer/slice_pushdown_lp.rs @@ -349,7 +349,7 @@ impl SlicePushDown { | m @ (DataFrameScan {..}, _) | m @ (Sort {..}, _) | m @ (MapFunction {function: FunctionNode::Explode {..}, ..}, _) - | m @ (MapFunction {function: FunctionNode::Melt {..}, ..}, _) + | m @ (MapFunction {function: FunctionNode::Unpivot {..}, ..}, _) | m @ (Cache {..}, _) | m @ (Distinct {..}, _) | m @ (GroupBy{..},_) diff --git a/crates/polars/src/docs/eager.rs b/crates/polars/src/docs/eager.rs index a62872e8059d..28e12057bde8 100644 --- a/crates/polars/src/docs/eager.rs +++ b/crates/polars/src/docs/eager.rs @@ -24,7 +24,7 @@ //! * [Joins](#joins) //! * [GroupBy](#group_by) //! - [pivot](#pivot) -//! * [Melt](#melt) +//! * [Unpivot](#unpivot) //! * [Explode](#explode) //! * [IO](#io) //! - [Read CSV](#read-csv) @@ -460,7 +460,7 @@ //! # } //! ``` //! -//! ## Melt +//! ## Unpivot //! //! ``` //! use polars::prelude::*; @@ -473,8 +473,8 @@ //! "D" => &[2, 4, 6] //! ]?; //! -//! let melted = df.melt(&["A", "B"], &["C", "D"]).unwrap(); -//! // melted: +//! let unpivoted = df.unpivot(&["A", "B"], &["C", "D"]).unwrap(); +//! // unpivoted: //! //! // +-----+-----+----------+-------+ //! // | A | B | variable | value | diff --git a/docs/_build/API_REFERENCE_LINKS.yml b/docs/_build/API_REFERENCE_LINKS.yml index 02d8fd143ff3..aa3e28f68d31 100644 --- a/docs/_build/API_REFERENCE_LINKS.yml +++ b/docs/_build/API_REFERENCE_LINKS.yml @@ -98,7 +98,7 @@ python: join_asof: https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.join_asof.html concat: https://docs.pola.rs/api/python/stable/reference/api/polars.concat.html pivot: https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.pivot.html - melt: https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.melt.html + unpivot: https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.unpivot.html is_between: https://docs.pola.rs/api/python/stable/reference/expressions/api/polars.Expr.is_between.html date_range: https://docs.pola.rs/api/python/stable/reference/api/polars.date_range.html @@ -177,7 +177,7 @@ rust: DataFrame.explode: https://docs.pola.rs/api/rust/dev/polars/frame/struct.DataFrame.html#method.explode pivot: https://docs.pola.rs/api/rust/dev/polars_lazy/frame/pivot/fn.pivot.html - melt: https://docs.pola.rs/api/rust/dev/polars/frame/struct.DataFrame.html#method.melt + unpivot: https://docs.pola.rs/api/rust/dev/polars/frame/struct.DataFrame.html#method.unpivot upsample: https://docs.pola.rs/api/rust/dev/polars/frame/struct.DataFrame.html#method.upsample join_asof: https://docs.pola.rs/api/rust/dev/polars/prelude/trait.AsofJoin.html#method.join_asof unnest: https://docs.pola.rs/api/rust/dev/polars/frame/struct.DataFrame.html#method.unnest diff --git a/docs/src/python/user-guide/transformations/melt.py b/docs/src/python/user-guide/transformations/unpivot.py similarity index 68% rename from docs/src/python/user-guide/transformations/melt.py rename to docs/src/python/user-guide/transformations/unpivot.py index e9bf53a96ec7..03efe884baa0 100644 --- a/docs/src/python/user-guide/transformations/melt.py +++ b/docs/src/python/user-guide/transformations/unpivot.py @@ -12,7 +12,7 @@ print(df) # --8<-- [end:df] -# --8<-- [start:melt] -out = df.melt(id_vars=["A", "B"], value_vars=["C", "D"]) +# --8<-- [start:unpivot] +out = df.unpivot(index=["A", "B"], on=["C", "D"]) print(out) -# --8<-- [end:melt] +# --8<-- [end:unpivot] diff --git a/docs/src/rust/Cargo.toml b/docs/src/rust/Cargo.toml index fa41c7eed87a..3f40edd72d42 100644 --- a/docs/src/rust/Cargo.toml +++ b/docs/src/rust/Cargo.toml @@ -132,8 +132,8 @@ name = "user-guide-transformations-joins" path = "user-guide/transformations/joins.rs" required-features = ["polars/lazy"] [[bin]] -name = "user-guide-transformations-melt" -path = "user-guide/transformations/melt.rs" +name = "user-guide-transformations-unpivot" +path = "user-guide/transformations/unpivot.rs" [[bin]] name = "user-guide-transformations-pivot" path = "user-guide/transformations/pivot.rs" diff --git a/docs/src/rust/user-guide/transformations/melt.rs b/docs/src/rust/user-guide/transformations/unpivot.rs similarity index 78% rename from docs/src/rust/user-guide/transformations/melt.rs rename to docs/src/rust/user-guide/transformations/unpivot.rs index ff797423d293..a094d7364e7d 100644 --- a/docs/src/rust/user-guide/transformations/melt.rs +++ b/docs/src/rust/user-guide/transformations/unpivot.rs @@ -13,9 +13,9 @@ fn main() -> Result<(), Box> { println!("{}", &df); // --8<-- [end:df] - // --8<-- [start:melt] - let out = df.melt(["A", "B"], ["C", "D"])?; + // --8<-- [start:unpivot] + let out = df.unpivot(["A", "B"], ["C", "D"])?; println!("{}", &out); - // --8<-- [end:melt] + // --8<-- [end:unpivot] Ok(()) } diff --git a/docs/user-guide/concepts/streaming.md b/docs/user-guide/concepts/streaming.md index 0365e944f47e..0dbafec6ec7b 100644 --- a/docs/user-guide/concepts/streaming.md +++ b/docs/user-guide/concepts/streaming.md @@ -18,7 +18,7 @@ Streaming is supported for many operations including: - `join` - `unique` - `sort` -- `explode`,`melt` +- `explode`,`unpivot` - `scan_csv`,`scan_parquet`,`scan_ipc` This list is not exhaustive. Polars is in active development, and more operations can be added without explicit notice. diff --git a/docs/user-guide/transformations/index.md b/docs/user-guide/transformations/index.md index cd673786643c..3092c5be3c37 100644 --- a/docs/user-guide/transformations/index.md +++ b/docs/user-guide/transformations/index.md @@ -5,4 +5,4 @@ The focus of this section is to describe different types of data transformations - [Joins](joins.md) - [Concatenation](concatenation.md) - [Pivot](pivot.md) -- [Melt](melt.md) +- [Unpivot](unpivot.md) diff --git a/docs/user-guide/transformations/melt.md b/docs/user-guide/transformations/melt.md deleted file mode 100644 index 3e6efe35723e..000000000000 --- a/docs/user-guide/transformations/melt.md +++ /dev/null @@ -1,21 +0,0 @@ -# Melts - -Melt operations unpivot a DataFrame from wide format to long format - -## Dataset - -{{code_block('user-guide/transformations/melt','df',['DataFrame'])}} - -```python exec="on" result="text" session="user-guide/transformations/melt" ---8<-- "python/user-guide/transformations/melt.py:df" -``` - -## Eager + lazy - -`Eager` and `lazy` have the same API. - -{{code_block('user-guide/transformations/melt','melt',['melt'])}} - -```python exec="on" result="text" session="user-guide/transformations/melt" ---8<-- "python/user-guide/transformations/melt.py:melt" -``` diff --git a/docs/user-guide/transformations/unpivot.md b/docs/user-guide/transformations/unpivot.md new file mode 100644 index 000000000000..83715a001dc8 --- /dev/null +++ b/docs/user-guide/transformations/unpivot.md @@ -0,0 +1,21 @@ +# Unpivots + +Unpivot unpivots a DataFrame from wide format to long format + +## Dataset + +{{code_block('user-guide/transformations/unpivot','df',['DataFrame'])}} + +```python exec="on" result="text" session="user-guide/transformations/unpivot" +--8<-- "python/user-guide/transformations/unpivot.py:df" +``` + +## Eager + lazy + +`Eager` and `lazy` have the same API. + +{{code_block('user-guide/transformations/unpivot','unpivot',['unpivot'])}} + +```python exec="on" result="text" session="user-guide/transformations/unpivot" +--8<-- "python/user-guide/transformations/unpivot.py:unpivot" +``` diff --git a/mkdocs.yml b/mkdocs.yml index 47fa8d9a216c..d2e3c1e637fa 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -43,7 +43,7 @@ nav: - user-guide/transformations/joins.md - user-guide/transformations/concatenation.md - user-guide/transformations/pivot.md - - user-guide/transformations/melt.md + - user-guide/transformations/unpivot.md - Time series: - user-guide/transformations/time-series/parsing.md - user-guide/transformations/time-series/filter.md diff --git a/py-polars/docs/source/reference/dataframe/modify_select.rst b/py-polars/docs/source/reference/dataframe/modify_select.rst index 26cb474f7c63..da497de12daf 100644 --- a/py-polars/docs/source/reference/dataframe/modify_select.rst +++ b/py-polars/docs/source/reference/dataframe/modify_select.rst @@ -35,7 +35,6 @@ Manipulation/selection DataFrame.join DataFrame.join_asof DataFrame.limit - DataFrame.melt DataFrame.merge_sorted DataFrame.partition_by DataFrame.pipe @@ -64,6 +63,7 @@ Manipulation/selection DataFrame.transpose DataFrame.unique DataFrame.unnest + DataFrame.unpivot DataFrame.unstack DataFrame.update DataFrame.upsample diff --git a/py-polars/docs/source/reference/lazyframe/modify_select.rst b/py-polars/docs/source/reference/lazyframe/modify_select.rst index 41c83b2c7a20..dfe772923c95 100644 --- a/py-polars/docs/source/reference/lazyframe/modify_select.rst +++ b/py-polars/docs/source/reference/lazyframe/modify_select.rst @@ -28,7 +28,6 @@ Manipulation/selection LazyFrame.join_asof LazyFrame.last LazyFrame.limit - LazyFrame.melt LazyFrame.merge_sorted LazyFrame.rename LazyFrame.reverse @@ -44,6 +43,7 @@ Manipulation/selection LazyFrame.top_k LazyFrame.unique LazyFrame.unnest + LazyFrame.unpivot LazyFrame.update LazyFrame.with_columns LazyFrame.with_columns_seq diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index ec0ce54249ba..17e5c9403822 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -7630,6 +7630,10 @@ def pivot( ------- DataFrame + Notes + ----- + In some other frameworks, you might know this operation as `pivot_wider`. + Examples -------- >>> df = pl.DataFrame( @@ -7798,10 +7802,10 @@ def pivot( ) ) - def melt( + def unpivot( self, - id_vars: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None = None, - value_vars: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None = None, + index: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None = None, + on: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None = None, variable_name: str | None = None, value_name: str | None = None, ) -> Self: @@ -7811,22 +7815,28 @@ def melt( Optionally leaves identifiers set. This function is useful to massage a DataFrame into a format where one or more - columns are identifier variables (id_vars) while all other columns, considered - measured variables (value_vars), are "unpivoted" to the row axis leaving just + columns are identifier variables (index) while all other columns, considered + measured variables (on), are "unpivoted" to the row axis leaving just two non-identifier columns, 'variable' and 'value'. Parameters ---------- - id_vars + index Column(s) or selector(s) to use as identifier variables. - value_vars - Column(s) or selector(s) to use as values variables; if `value_vars` - is empty all columns that are not in `id_vars` will be used. + on + Column(s) or selector(s) to use as values variables; if `on` + is empty all columns that are not in `index` will be used. variable_name Name to give to the `variable` column. Defaults to "variable" value_name Name to give to the `value` column. Defaults to "value" + Notes + ----- + If you're coming from pandas, this is similar to `pandas.DataFrame.melt`, + but with `index` replacing `id_vars` and `on` replacing `value_vars`. + In other frameworks, you might know this operation as `pivot_longer`. + Examples -------- >>> df = pl.DataFrame( @@ -7837,7 +7847,7 @@ def melt( ... } ... ) >>> import polars.selectors as cs - >>> df.melt(id_vars="a", value_vars=cs.numeric()) + >>> df.unpivot(index="a", on=cs.numeric()) shape: (6, 3) ┌─────┬──────────┬───────┐ │ a ┆ variable ┆ value │ @@ -7852,12 +7862,10 @@ def melt( │ z ┆ c ┆ 6 │ └─────┴──────────┴───────┘ """ - value_vars = [] if value_vars is None else _expand_selectors(self, value_vars) - id_vars = [] if id_vars is None else _expand_selectors(self, id_vars) + on = [] if on is None else _expand_selectors(self, on) + index = [] if index is None else _expand_selectors(self, index) - return self._from_pydf( - self._df.melt(id_vars, value_vars, value_name, variable_name) - ) + return self._from_pydf(self._df.unpivot(index, on, value_name, variable_name)) @unstable() def unstack( diff --git a/py-polars/polars/lazyframe/frame.py b/py-polars/polars/lazyframe/frame.py index 698884360910..69d58afeedb1 100644 --- a/py-polars/polars/lazyframe/frame.py +++ b/py-polars/polars/lazyframe/frame.py @@ -5703,10 +5703,10 @@ def drop_nulls( subset = _expand_selectors(self, subset) return self._from_pyldf(self._ldf.drop_nulls(subset)) - def melt( + def unpivot( self, - id_vars: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None = None, - value_vars: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None = None, + index: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None = None, + on: ColumnNameOrSelector | Sequence[ColumnNameOrSelector] | None = None, variable_name: str | None = None, value_name: str | None = None, *, @@ -5718,26 +5718,32 @@ def melt( Optionally leaves identifiers set. This function is useful to massage a DataFrame into a format where one or more - columns are identifier variables (id_vars) while all other columns, considered - measured variables (value_vars), are "unpivoted" to the row axis leaving just + columns are identifier variables (index) while all other columns, considered + measured variables (on), are "unpivoted" to the row axis leaving just two non-identifier columns, 'variable' and 'value'. Parameters ---------- - id_vars + index Column(s) or selector(s) to use as identifier variables. - value_vars - Column(s) or selector(s) to use as values variables; if `value_vars` - is empty all columns that are not in `id_vars` will be used. + on + Column(s) or selector(s) to use as values variables; if `on` + is empty all columns that are not in `index` will be used. variable_name Name to give to the `variable` column. Defaults to "variable" value_name Name to give to the `value` column. Defaults to "value" streamable Allow this node to run in the streaming engine. - If this runs in streaming, the output of the melt operation + If this runs in streaming, the output of the unpivot operation will not have a stable ordering. + Notes + ----- + If you're coming from pandas, this is similar to `pandas.DataFrame.melt`, + but with `index` replacing `id_vars` and `on` replacing `value_vars`. + In other frameworks, you might know this operation as `pivot_longer`. + Examples -------- >>> lf = pl.LazyFrame( @@ -5748,7 +5754,7 @@ def melt( ... } ... ) >>> import polars.selectors as cs - >>> lf.melt(id_vars="a", value_vars=cs.numeric()).collect() + >>> lf.unpivot(index="a", on=cs.numeric()).collect() shape: (6, 3) ┌─────┬──────────┬───────┐ │ a ┆ variable ┆ value │ @@ -5763,11 +5769,11 @@ def melt( │ z ┆ c ┆ 6 │ └─────┴──────────┴───────┘ """ - value_vars = [] if value_vars is None else _expand_selectors(self, value_vars) - id_vars = [] if id_vars is None else _expand_selectors(self, id_vars) + on = [] if on is None else _expand_selectors(self, on) + index = [] if index is None else _expand_selectors(self, index) return self._from_pyldf( - self._ldf.melt(id_vars, value_vars, value_name, variable_name, streamable) + self._ldf.unpivot(index, on, value_name, variable_name, streamable) ) def map_batches( diff --git a/py-polars/src/dataframe/general.rs b/py-polars/src/dataframe/general.rs index fef05d9350ff..340f5d1837ba 100644 --- a/py-polars/src/dataframe/general.rs +++ b/py-polars/src/dataframe/general.rs @@ -397,22 +397,22 @@ impl PyDataFrame { PyDataFrame::new(self.df.clone()) } - pub fn melt( + pub fn unpivot( &self, - id_vars: Vec, - value_vars: Vec, + index: Vec, + on: Vec, value_name: Option<&str>, variable_name: Option<&str>, ) -> PyResult { - let args = MeltArgs { - id_vars: strings_to_smartstrings(id_vars), - value_vars: strings_to_smartstrings(value_vars), + let args = UnpivotArgs { + index: strings_to_smartstrings(index), + on: strings_to_smartstrings(on), value_name: value_name.map(|s| s.into()), variable_name: variable_name.map(|s| s.into()), streamable: false, }; - let df = self.df.melt2(args).map_err(PyPolarsErr::from)?; + let df = self.df.unpivot2(args).map_err(PyPolarsErr::from)?; Ok(PyDataFrame::new(df)) } diff --git a/py-polars/src/lazyframe/mod.rs b/py-polars/src/lazyframe/mod.rs index 9de61cd2b022..022114db4e42 100644 --- a/py-polars/src/lazyframe/mod.rs +++ b/py-polars/src/lazyframe/mod.rs @@ -1112,7 +1112,7 @@ impl PyLazyFrame { } #[pyo3(signature = (id_vars, value_vars, value_name, variable_name, streamable))] - fn melt( + fn unpivot( &self, id_vars: Vec, value_vars: Vec, @@ -1120,16 +1120,16 @@ impl PyLazyFrame { variable_name: Option, streamable: bool, ) -> Self { - let args = MeltArgs { - id_vars: strings_to_smartstrings(id_vars), - value_vars: strings_to_smartstrings(value_vars), + let args = UnpivotArgs { + index: strings_to_smartstrings(id_vars), + on: strings_to_smartstrings(value_vars), value_name: value_name.map(|s| s.into()), variable_name: variable_name.map(|s| s.into()), streamable, }; let ldf = self.ldf.clone(); - ldf.melt(args).into() + ldf.unpivot(args).into() } fn with_row_index(&self, name: &str, offset: Option) -> Self { diff --git a/py-polars/src/lazyframe/visitor/nodes.rs b/py-polars/src/lazyframe/visitor/nodes.rs index 5c0832a07d0f..c206e59254d3 100644 --- a/py-polars/src/lazyframe/visitor/nodes.rs +++ b/py-polars/src/lazyframe/visitor/nodes.rs @@ -520,13 +520,10 @@ pub(crate) fn into_py(py: Python<'_>, plan: &IR) -> PyResult { columns.iter().map(|s| s.to_string()).collect::>(), ) .to_object(py), - FunctionNode::Melt { args, schema: _ } => ( - "melt", - args.id_vars.iter().map(|s| s.as_str()).collect::>(), - args.value_vars - .iter() - .map(|s| s.as_str()) - .collect::>(), + FunctionNode::Unpivot { args, schema: _ } => ( + "unpivot", + args.index.iter().map(|s| s.as_str()).collect::>(), + args.on.iter().map(|s| s.as_str()).collect::>(), args.variable_name .as_ref() .map_or_else(|| py.None(), |s| s.as_str().to_object(py)), diff --git a/py-polars/tests/unit/operations/test_filter.py b/py-polars/tests/unit/operations/test_filter.py index 61bb23fd231f..85db19b228aa 100644 --- a/py-polars/tests/unit/operations/test_filter.py +++ b/py-polars/tests/unit/operations/test_filter.py @@ -29,7 +29,7 @@ def test_filter_contains_nth_11205() -> None: assert df.filter(pl.first()).is_empty() -def test_melt_values_predicate_pushdown() -> None: +def test_unpivot_values_predicate_pushdown() -> None: lf = pl.DataFrame( { "id": [1], @@ -40,7 +40,7 @@ def test_melt_values_predicate_pushdown() -> None: ).lazy() assert ( - lf.melt("id", ["asset_key_1", "asset_key_2", "asset_key_3"]) + lf.unpivot("id", ["asset_key_1", "asset_key_2", "asset_key_3"]) .filter(pl.col("value") == pl.lit("123")) .collect() ).to_dict(as_series=False) == { diff --git a/py-polars/tests/unit/operations/test_melt.py b/py-polars/tests/unit/operations/test_unpivot.py similarity index 57% rename from py-polars/tests/unit/operations/test_melt.py rename to py-polars/tests/unit/operations/test_unpivot.py index 4c36581e11b7..174928b2e9fc 100644 --- a/py-polars/tests/unit/operations/test_melt.py +++ b/py-polars/tests/unit/operations/test_unpivot.py @@ -5,22 +5,22 @@ from polars.testing import assert_frame_equal -def test_melt() -> None: +def test_unpivot() -> None: df = pl.DataFrame({"A": ["a", "b", "c"], "B": [1, 3, 5], "C": [2, 4, 6]}) for _idv, _vv in (("A", ("B", "C")), (cs.string(), cs.integer())): - melted_eager = df.melt(id_vars="A", value_vars=["B", "C"]) - assert all(melted_eager["value"] == [1, 3, 5, 2, 4, 6]) + unpivoted_eager = df.unpivot(index="A", on=["B", "C"]) + assert all(unpivoted_eager["value"] == [1, 3, 5, 2, 4, 6]) - melted_lazy = df.lazy().melt(id_vars="A", value_vars=["B", "C"]) - assert all(melted_lazy.collect()["value"] == [1, 3, 5, 2, 4, 6]) + unpivoted_lazy = df.lazy().unpivot(index="A", on=["B", "C"]) + assert all(unpivoted_lazy.collect()["value"] == [1, 3, 5, 2, 4, 6]) - melted = df.melt(id_vars="A", value_vars="B") - assert all(melted["value"] == [1, 3, 5]) + unpivoted = df.unpivot(index="A", on="B") + assert all(unpivoted["value"] == [1, 3, 5]) n = 3 - for melted in [df.melt(), df.lazy().melt().collect()]: - assert melted["variable"].to_list() == ["A"] * n + ["B"] * n + ["C"] * n - assert melted["value"].to_list() == [ + for unpivoted in [df.unpivot(), df.lazy().unpivot().collect()]: + assert unpivoted["variable"].to_list() == ["A"] * n + ["B"] * n + ["C"] * n + assert unpivoted["value"].to_list() == [ "a", "b", "c", @@ -32,12 +32,12 @@ def test_melt() -> None: "6", ] - for melted in [ - df.melt(value_name="foo", variable_name="bar"), - df.lazy().melt(value_name="foo", variable_name="bar").collect(), + for unpivoted in [ + df.unpivot(value_name="foo", variable_name="bar"), + df.lazy().unpivot(value_name="foo", variable_name="bar").collect(), ]: - assert melted["bar"].to_list() == ["A"] * n + ["B"] * n + ["C"] * n - assert melted["foo"].to_list() == [ + assert unpivoted["bar"].to_list() == ["A"] * n + ["B"] * n + ["C"] * n + assert unpivoted["foo"].to_list() == [ "a", "b", "c", @@ -50,7 +50,7 @@ def test_melt() -> None: ] -def test_melt_projection_pd_7747() -> None: +def test_unpivot_projection_pd_7747() -> None: df = pl.LazyFrame( { "number": [1, 2, 1, 2, 1], @@ -60,7 +60,7 @@ def test_melt_projection_pd_7747() -> None: ) result = ( df.with_columns(pl.col("age").alias("wgt")) - .melt(id_vars="number", value_vars="wgt") + .unpivot(index="number", on="wgt") .select("number", "value") .collect() ) @@ -74,10 +74,10 @@ def test_melt_projection_pd_7747() -> None: # https://github.com/pola-rs/polars/issues/10075 -def test_melt_no_value_vars() -> None: +def test_unpivot_no_value_vars() -> None: lf = pl.LazyFrame({"a": [1, 2, 3]}) - result = lf.melt("a") + result = lf.unpivot("a") expected = pl.LazyFrame( schema={"a": pl.Int64, "variable": pl.String, "value": pl.Null} @@ -85,8 +85,8 @@ def test_melt_no_value_vars() -> None: assert_frame_equal(result, expected) -def test_melt_raise_list() -> None: +def test_unpivot_raise_list() -> None: with pytest.raises(pl.exceptions.InvalidOperationError): pl.LazyFrame( {"a": ["x", "y"], "b": [["test", "test2"], ["test3", "test4"]]} - ).melt().collect() + ).unpivot().collect() diff --git a/py-polars/tests/unit/test_projections.py b/py-polars/tests/unit/test_projections.py index 96c3b5a33dad..802061265baa 100644 --- a/py-polars/tests/unit/test_projections.py +++ b/py-polars/tests/unit/test_projections.py @@ -23,12 +23,12 @@ def test_projection_on_semi_join_4789() -> None: assert q.collect().to_dict(as_series=False) == {"a": [1], "p": [1], "seq": [[1]]} -def test_melt_projection_pd_block_4997() -> None: +def test_unpivot_projection_pd_block_4997() -> None: assert ( pl.DataFrame({"col1": ["a"], "col2": ["b"]}) .with_row_index() .lazy() - .melt(id_vars="index") + .unpivot(index="index") .group_by("index") .agg(pl.col("variable").alias("result")) .collect() @@ -69,7 +69,7 @@ def test_unnest_projection_pushdown() -> None: lf = pl.DataFrame({"x|y|z": [1, 2], "a|b|c": [2, 3]}).lazy() mlf = ( - lf.melt() + lf.unpivot() .with_columns(pl.col("variable").str.split_exact("|", 2)) .unnest("variable") )