From 869fa1d06744760955a5943d613520cb473cd336 Mon Sep 17 00:00:00 2001 From: Brayan Jules Date: Tue, 15 Aug 2023 17:54:54 -0400 Subject: [PATCH 01/10] feat(rust): utf8 to temporal casting --- crates/polars-core/src/chunked_array/cast.rs | 31 ++++++++++++++++++++ py-polars/tests/unit/test_lazy.py | 2 +- py-polars/tests/unit/test_queries.py | 27 +++++++++++++++++ 3 files changed, 59 insertions(+), 1 deletion(-) diff --git a/crates/polars-core/src/chunked_array/cast.rs b/crates/polars-core/src/chunked_array/cast.rs index 6a93bc7f0364..0acdf51739f3 100644 --- a/crates/polars-core/src/chunked_array/cast.rs +++ b/crates/polars-core/src/chunked_array/cast.rs @@ -7,6 +7,7 @@ use arrow::compute::cast::CastOptions; use crate::chunked_array::categorical::CategoricalChunkedBuilder; #[cfg(feature = "timezones")] use crate::chunked_array::temporal::validate_time_zone; +use crate::prelude::DataType::Datetime; use crate::prelude::*; pub(crate) fn cast_chunks( @@ -203,6 +204,36 @@ impl ChunkCast for Utf8Chunked { polars_bail!(ComputeError: "expected 'precision' or 'scale' when casting to Decimal") }, }, + #[cfg(feature = "dtype-date")] + DataType::Date => { + let result = cast_chunks(&self.chunks, &data_type, true)?; + let out = Series::try_from((self.name(), result))?; + Ok(out) + }, + #[cfg(feature = "dtype-datetime")] + DataType::Datetime(tu, tz) => { + let out = match tz { + #[cfg(feature = "timezones")] + Some(tz) => { + validate_time_zone(tz)?; + let result = cast_chunks( + &self.chunks, + &Datetime(TimeUnit::Nanoseconds, Some(tz.clone())), + true, + )?; + Series::try_from((self.name(), result)) + }, + _ => { + let result = cast_chunks( + &self.chunks, + &Datetime(TimeUnit::Nanoseconds, None), + true, + )?; + Series::try_from((self.name(), result)) + }, + }; + out + }, _ => cast_impl(self.name(), &self.chunks, data_type), } } diff --git a/py-polars/tests/unit/test_lazy.py b/py-polars/tests/unit/test_lazy.py index f3fc2cdcab14..47a108580083 100644 --- a/py-polars/tests/unit/test_lazy.py +++ b/py-polars/tests/unit/test_lazy.py @@ -1313,7 +1313,7 @@ def test_quadratic_behavior_4736() -> None: ldf.select(reduce(add, (pl.col(fld) for fld in ldf.columns))) -@pytest.mark.parametrize("input_dtype", [pl.Utf8, pl.Int64, pl.Float64]) +@pytest.mark.parametrize("input_dtype", [pl.Int64, pl.Float64]) def test_from_epoch(input_dtype: pl.PolarsDataType) -> None: ldf = pl.LazyFrame( [ diff --git a/py-polars/tests/unit/test_queries.py b/py-polars/tests/unit/test_queries.py index 946c0f42e6f6..18dc04b589a9 100644 --- a/py-polars/tests/unit/test_queries.py +++ b/py-polars/tests/unit/test_queries.py @@ -364,3 +364,30 @@ def test_datetime_supertype_5236() -> None: ) assert out.shape == (0, 2) assert out.dtypes == [pl.Datetime("ns", "UTC")] * 2 + + +def test_utf8_date() -> None: + df = pl.DataFrame({"x1": ["2021-01-01"]}).with_columns( + **{"x1-date": pl.col("x1").cast(pl.Date)} + ) + out = df.select(pl.col("x1-date")) + assert out.shape == (1, 1) + assert out.dtypes == [pl.Date] + + +def test_utf8_datetime() -> None: + df = pl.DataFrame( + {"x1": ["2021-12-19T16:39:57-02:00", "2022-12-19T16:39:57"]} + ).with_columns( + **{ + "x1-datetime-ns": pl.col("x1").cast(pl.Datetime(time_unit="ns")), + "x1-datetime-ms": pl.col("x1").cast(pl.Datetime(time_unit="ms")), + "x1-datetime-us": pl.col("x1").cast(pl.Datetime(time_unit="us")), + } + ) + + out = df.select( + pl.col("x1-datetime-ns"), pl.col("x1-datetime-ms"), pl.col("x1-datetime-us") + ) + assert out.shape == (2, 3) + assert out.dtypes == [pl.Datetime, pl.Datetime, pl.Datetime] From bb731535c90a82da6d1df2420048a64363aab3a7 Mon Sep 17 00:00:00 2001 From: Brayan Jules Date: Tue, 15 Aug 2023 17:54:54 -0400 Subject: [PATCH 02/10] feat(rust): utf8 to temporal casting --- crates/polars-core/src/chunked_array/cast.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/polars-core/src/chunked_array/cast.rs b/crates/polars-core/src/chunked_array/cast.rs index 0acdf51739f3..27ecd5bb6e7b 100644 --- a/crates/polars-core/src/chunked_array/cast.rs +++ b/crates/polars-core/src/chunked_array/cast.rs @@ -206,12 +206,12 @@ impl ChunkCast for Utf8Chunked { }, #[cfg(feature = "dtype-date")] DataType::Date => { - let result = cast_chunks(&self.chunks, &data_type, true)?; + let result = cast_chunks(&self.chunks, data_type, true)?; let out = Series::try_from((self.name(), result))?; Ok(out) }, #[cfg(feature = "dtype-datetime")] - DataType::Datetime(tu, tz) => { + DataType::Datetime(_tu, tz) => { let out = match tz { #[cfg(feature = "timezones")] Some(tz) => { From 34ba75a35fc9d7cf7610bdd4d0ba186c2168fbb6 Mon Sep 17 00:00:00 2001 From: Brayan Jules Date: Mon, 9 Oct 2023 23:37:07 -0300 Subject: [PATCH 03/10] feat: utf8 to timestamp/date casting support Support for different timeunits added in nano-arrow --- crates/nano-arrow/src/compute/cast/mod.rs | 36 +++++++++++++-- crates/nano-arrow/src/compute/cast/utf8_to.rs | 32 +++++++------ crates/nano-arrow/src/temporal_conversions.rs | 46 +++++++------------ crates/polars-core/src/chunked_array/cast.rs | 11 ++--- py-polars/tests/unit/test_queries.py | 21 ++++++++- 5 files changed, 91 insertions(+), 55 deletions(-) diff --git a/crates/nano-arrow/src/compute/cast/mod.rs b/crates/nano-arrow/src/compute/cast/mod.rs index f13a638a9c0d..0ac8e7b8085e 100644 --- a/crates/nano-arrow/src/compute/cast/mod.rs +++ b/crates/nano-arrow/src/compute/cast/mod.rs @@ -578,9 +578,23 @@ pub fn cast(array: &dyn Array, to_type: &DataType, options: CastOptions) -> Resu LargeUtf8 => Ok(Box::new(utf8_to_large_utf8( array.as_any().downcast_ref().unwrap(), ))), - Timestamp(TimeUnit::Nanosecond, None) => utf8_to_naive_timestamp_ns_dyn::(array), + Timestamp(TimeUnit::Nanosecond, None) => { + utf8_to_naive_timestamp_dyn::(array, TimeUnit::Nanosecond) + }, + Timestamp(TimeUnit::Millisecond, None) => { + utf8_to_naive_timestamp_dyn::(array, TimeUnit::Millisecond) + }, + Timestamp(TimeUnit::Microsecond, None) => { + utf8_to_naive_timestamp_dyn::(array, TimeUnit::Microsecond) + }, Timestamp(TimeUnit::Nanosecond, Some(tz)) => { - utf8_to_timestamp_ns_dyn::(array, tz.clone()) + utf8_to_timestamp_dyn::(array, tz.clone(), TimeUnit::Nanosecond) + }, + Timestamp(TimeUnit::Millisecond, Some(tz)) => { + utf8_to_timestamp_dyn::(array, tz.clone(), TimeUnit::Millisecond) + }, + Timestamp(TimeUnit::Microsecond, Some(tz)) => { + utf8_to_timestamp_dyn::(array, tz.clone(), TimeUnit::Microsecond) }, _ => Err(Error::NotYetImplemented(format!( "Casting from {from_type:?} to {to_type:?} not supported", @@ -605,9 +619,23 @@ pub fn cast(array: &dyn Array, to_type: &DataType, options: CastOptions) -> Resu to_type.clone(), ) .boxed()), - Timestamp(TimeUnit::Nanosecond, None) => utf8_to_naive_timestamp_ns_dyn::(array), + Timestamp(TimeUnit::Nanosecond, None) => { + utf8_to_naive_timestamp_dyn::(array, TimeUnit::Nanosecond) + }, + Timestamp(TimeUnit::Millisecond, None) => { + utf8_to_naive_timestamp_dyn::(array, TimeUnit::Millisecond) + }, + Timestamp(TimeUnit::Microsecond, None) => { + utf8_to_naive_timestamp_dyn::(array, TimeUnit::Microsecond) + }, Timestamp(TimeUnit::Nanosecond, Some(tz)) => { - utf8_to_timestamp_ns_dyn::(array, tz.clone()) + utf8_to_timestamp_dyn::(array, tz.clone(), TimeUnit::Nanosecond) + }, + Timestamp(TimeUnit::Millisecond, Some(tz)) => { + utf8_to_timestamp_dyn::(array, tz.clone(), TimeUnit::Millisecond) + }, + Timestamp(TimeUnit::Microsecond, Some(tz)) => { + utf8_to_timestamp_dyn::(array, tz.clone(), TimeUnit::Microsecond) }, _ => Err(Error::NotYetImplemented(format!( "Casting from {from_type:?} to {to_type:?} not supported", diff --git a/crates/nano-arrow/src/compute/cast/utf8_to.rs b/crates/nano-arrow/src/compute/cast/utf8_to.rs index 9c86ff85da54..c1d2cfa73414 100644 --- a/crates/nano-arrow/src/compute/cast/utf8_to.rs +++ b/crates/nano-arrow/src/compute/cast/utf8_to.rs @@ -2,12 +2,12 @@ use chrono::Datelike; use super::CastOptions; use crate::array::*; -use crate::datatypes::DataType; +use crate::datatypes::{DataType, TimeUnit}; use crate::error::Result; use crate::offset::Offset; use crate::temporal_conversions::{ - utf8_to_naive_timestamp_ns as utf8_to_naive_timestamp_ns_, - utf8_to_timestamp_ns as utf8_to_timestamp_ns_, EPOCH_DAYS_FROM_CE, + utf8_to_naive_timestamp as utf8_to_naive_timestamp_, utf8_to_timestamp as utf8_to_timestamp_, + EPOCH_DAYS_FROM_CE, }; use crate::types::NativeType; @@ -110,34 +110,40 @@ pub fn utf8_to_dictionary( Ok(array.into()) } -pub(super) fn utf8_to_naive_timestamp_ns_dyn( +pub(super) fn utf8_to_naive_timestamp_dyn( from: &dyn Array, + tu: TimeUnit, ) -> Result> { let from = from.as_any().downcast_ref().unwrap(); - Ok(Box::new(utf8_to_naive_timestamp_ns::(from))) + Ok(Box::new(utf8_to_naive_timestamp::(from, tu))) } -/// [`crate::temporal_conversions::utf8_to_timestamp_ns`] applied for RFC3339 formatting -pub fn utf8_to_naive_timestamp_ns(from: &Utf8Array) -> PrimitiveArray { - utf8_to_naive_timestamp_ns_(from, RFC3339) +/// [`crate::temporal_conversions::utf8_to_timestamp`] applied for RFC3339 formatting +pub fn utf8_to_naive_timestamp( + from: &Utf8Array, + tu: TimeUnit, +) -> PrimitiveArray { + utf8_to_naive_timestamp_(from, RFC3339, tu) } -pub(super) fn utf8_to_timestamp_ns_dyn( +pub(super) fn utf8_to_timestamp_dyn( from: &dyn Array, timezone: String, + tu: TimeUnit, ) -> Result> { let from = from.as_any().downcast_ref().unwrap(); - utf8_to_timestamp_ns::(from, timezone) + utf8_to_timestamp::(from, timezone, tu) .map(Box::new) .map(|x| x as Box) } -/// [`crate::temporal_conversions::utf8_to_timestamp_ns`] applied for RFC3339 formatting -pub fn utf8_to_timestamp_ns( +/// [`crate::temporal_conversions::utf8_to_timestamp`] applied for RFC3339 formatting +pub fn utf8_to_timestamp( from: &Utf8Array, timezone: String, + tu: TimeUnit, ) -> Result> { - utf8_to_timestamp_ns_(from, RFC3339, timezone) + utf8_to_timestamp_(from, RFC3339, timezone, tu) } /// Conversion of utf8 diff --git a/crates/nano-arrow/src/temporal_conversions.rs b/crates/nano-arrow/src/temporal_conversions.rs index 5058d1d887bd..8ba3d2523678 100644 --- a/crates/nano-arrow/src/temporal_conversions.rs +++ b/crates/nano-arrow/src/temporal_conversions.rs @@ -323,17 +323,6 @@ pub fn parse_offset(offset: &str) -> Result { .expect("FixedOffset::east out of bounds")) } -/// Parses `value` to `Option` consistent with the Arrow's definition of timestamp with timezone. -/// `tz` must be built from `timezone` (either via [`parse_offset`] or `chrono-tz`). -#[inline] -pub fn utf8_to_timestamp_ns_scalar( - value: &str, - fmt: &str, - tz: &T, -) -> Option { - utf8_to_timestamp_scalar(value, fmt, tz, &TimeUnit::Nanosecond) -} - /// Parses `value` to `Option` consistent with the Arrow's definition of timestamp with timezone. /// `tz` must be built from `timezone` (either via [`parse_offset`] or `chrono-tz`). /// Returns in scale `tz` of `TimeUnit`. @@ -364,12 +353,6 @@ pub fn utf8_to_timestamp_scalar( } } -/// Parses `value` to `Option` consistent with the Arrow's definition of timestamp without timezone. -#[inline] -pub fn utf8_to_naive_timestamp_ns_scalar(value: &str, fmt: &str) -> Option { - utf8_to_naive_timestamp_scalar(value, fmt, &TimeUnit::Nanosecond) -} - /// Parses `value` to `Option` consistent with the Arrow's definition of timestamp without timezone. /// Returns in scale `tz` of `TimeUnit`. #[inline] @@ -388,18 +371,18 @@ pub fn utf8_to_naive_timestamp_scalar(value: &str, fmt: &str, tu: &TimeUnit) -> .ok() } -fn utf8_to_timestamp_ns_impl( +fn utf8_to_timestamp_impl( array: &Utf8Array, fmt: &str, timezone: String, tz: T, + tu: TimeUnit, ) -> PrimitiveArray { let iter = array .iter() - .map(|x| x.and_then(|x| utf8_to_timestamp_ns_scalar(x, fmt, &tz))); + .map(|x| x.and_then(|x| utf8_to_timestamp_scalar(x, fmt, &tz, &tu))); - PrimitiveArray::from_trusted_len_iter(iter) - .to(DataType::Timestamp(TimeUnit::Nanosecond, Some(timezone))) + PrimitiveArray::from_trusted_len_iter(iter).to(DataType::Timestamp(tu, Some(timezone))) } /// Parses `value` to a [`chrono_tz::Tz`] with the Arrow's definition of timestamp with a timezone. @@ -413,13 +396,14 @@ pub fn parse_offset_tz(timezone: &str) -> Result { #[cfg(feature = "chrono-tz")] #[cfg_attr(docsrs, doc(cfg(feature = "chrono-tz")))] -fn chrono_tz_utf_to_timestamp_ns( +fn chrono_tz_utf_to_timestamp( array: &Utf8Array, fmt: &str, timezone: String, + tu: TimeUnit, ) -> Result> { let tz = parse_offset_tz(&timezone)?; - Ok(utf8_to_timestamp_ns_impl(array, fmt, timezone, tz)) + Ok(utf8_to_timestamp_impl(array, fmt, timezone, tz, tu)) } #[cfg(not(feature = "chrono-tz"))] @@ -436,22 +420,23 @@ fn chrono_tz_utf_to_timestamp_ns( /// Parses a [`Utf8Array`] to a timeozone-aware timestamp, i.e. [`PrimitiveArray`] with type `Timestamp(Nanosecond, Some(timezone))`. /// # Implementation /// * parsed values with timezone other than `timezone` are converted to `timezone`. -/// * parsed values without timezone are null. Use [`utf8_to_naive_timestamp_ns`] to parse naive timezones. +/// * parsed values without timezone are null. Use [`utf8_to_naive_timestamp`] to parse naive timezones. /// * Null elements remain null; non-parsable elements are null. /// The feature `"chrono-tz"` enables IANA and zoneinfo formats for `timezone`. /// # Error /// This function errors iff `timezone` is not parsable to an offset. -pub fn utf8_to_timestamp_ns( +pub fn utf8_to_timestamp( array: &Utf8Array, fmt: &str, timezone: String, + tu: TimeUnit, ) -> Result> { let tz = parse_offset(timezone.as_str()); if let Ok(tz) = tz { - Ok(utf8_to_timestamp_ns_impl(array, fmt, timezone, tz)) + Ok(utf8_to_timestamp_impl(array, fmt, timezone, tz, tu)) } else { - chrono_tz_utf_to_timestamp_ns(array, fmt, timezone) + chrono_tz_utf_to_timestamp(array, fmt, timezone, tu) } } @@ -459,15 +444,16 @@ pub fn utf8_to_timestamp_ns( /// [`PrimitiveArray`] with type `Timestamp(Nanosecond, None)`. /// Timezones are ignored. /// Null elements remain null; non-parsable elements are set to null. -pub fn utf8_to_naive_timestamp_ns( +pub fn utf8_to_naive_timestamp( array: &Utf8Array, fmt: &str, + tu: TimeUnit, ) -> PrimitiveArray { let iter = array .iter() - .map(|x| x.and_then(|x| utf8_to_naive_timestamp_ns_scalar(x, fmt))); + .map(|x| x.and_then(|x| utf8_to_naive_timestamp_scalar(x, fmt, &tu))); - PrimitiveArray::from_trusted_len_iter(iter).to(DataType::Timestamp(TimeUnit::Nanosecond, None)) + PrimitiveArray::from_trusted_len_iter(iter).to(DataType::Timestamp(tu, None)) } fn add_month(year: i32, month: u32, months: i32) -> chrono::NaiveDate { diff --git a/crates/polars-core/src/chunked_array/cast.rs b/crates/polars-core/src/chunked_array/cast.rs index 65626ae990c7..67e5588fa9e4 100644 --- a/crates/polars-core/src/chunked_array/cast.rs +++ b/crates/polars-core/src/chunked_array/cast.rs @@ -204,24 +204,21 @@ impl ChunkCast for Utf8Chunked { Ok(out) }, #[cfg(feature = "dtype-datetime")] - DataType::Datetime(_tu, tz) => { + DataType::Datetime(tu, tz) => { let out = match tz { #[cfg(feature = "timezones")] Some(tz) => { validate_time_zone(tz)?; let result = cast_chunks( &self.chunks, - &Datetime(TimeUnit::Nanoseconds, Some(tz.clone())), + &Datetime(tu.to_owned(), Some(tz.clone())), true, )?; Series::try_from((self.name(), result)) }, _ => { - let result = cast_chunks( - &self.chunks, - &Datetime(TimeUnit::Nanoseconds, None), - true, - )?; + let result = + cast_chunks(&self.chunks, &Datetime(tu.to_owned(), None), true)?; Series::try_from((self.name(), result)) }, }; diff --git a/py-polars/tests/unit/test_queries.py b/py-polars/tests/unit/test_queries.py index 8ececf3a535e..13ec031ffc54 100644 --- a/py-polars/tests/unit/test_queries.py +++ b/py-polars/tests/unit/test_queries.py @@ -371,6 +371,7 @@ def test_shift_drop_nulls_10875() -> None: "a" ].to_list() == [1, 2] + def test_utf8_date() -> None: df = pl.DataFrame({"x1": ["2021-01-01"]}).with_columns( **{"x1-date": pl.col("x1").cast(pl.Date)} @@ -382,7 +383,25 @@ def test_utf8_date() -> None: def test_utf8_datetime() -> None: df = pl.DataFrame( - {"x1": ["2021-12-19T16:39:57-02:00", "2022-12-19T16:39:57"]} + {"x1": ["2021-12-19T00:39:57", "2022-12-19T16:39:57"]} + ).with_columns( + **{ + "x1-datetime-ns": pl.col("x1").cast(pl.Datetime(time_unit="ns")), + "x1-datetime-ms": pl.col("x1").cast(pl.Datetime(time_unit="ms")), + "x1-datetime-us": pl.col("x1").cast(pl.Datetime(time_unit="us")), + } + ) + + out = df.select( + pl.col("x1-datetime-ns"), pl.col("x1-datetime-ms"), pl.col("x1-datetime-us") + ) + assert out.shape == (2, 3) + assert out.dtypes == [pl.Datetime, pl.Datetime, pl.Datetime] + + +def test_utf8_datetime_timezone() -> None: + df = pl.DataFrame( + {"x1": ["1996-12-19T16:39:57-02:00", "2022-12-19T00:39:57-03:00"]} ).with_columns( **{ "x1-datetime-ns": pl.col("x1").cast(pl.Datetime(time_unit="ns")), From 8be15bd89d2e4ade5f740b43daed25f75f3344fd Mon Sep 17 00:00:00 2001 From: Brayan Jules Date: Mon, 16 Oct 2023 08:57:57 -0300 Subject: [PATCH 04/10] feat: added missing tests for failure scenarios, also fixed casting from int to date. --- crates/nano-arrow/src/temporal_conversions.rs | 3 +- crates/polars-core/src/chunked_array/cast.rs | 5 ++- .../src/chunked_array/temporal/mod.rs | 14 +++++++ py-polars/tests/unit/test_lazy.py | 2 +- py-polars/tests/unit/test_queries.py | 38 ++++++++++++++++--- 5 files changed, 53 insertions(+), 9 deletions(-) diff --git a/crates/nano-arrow/src/temporal_conversions.rs b/crates/nano-arrow/src/temporal_conversions.rs index 8ba3d2523678..8ab1b3ec3ffa 100644 --- a/crates/nano-arrow/src/temporal_conversions.rs +++ b/crates/nano-arrow/src/temporal_conversions.rs @@ -407,10 +407,11 @@ fn chrono_tz_utf_to_timestamp( } #[cfg(not(feature = "chrono-tz"))] -fn chrono_tz_utf_to_timestamp_ns( +fn chrono_tz_utf_to_timestamp( _: &Utf8Array, _: &str, timezone: String, + _: TimeUnit, ) -> Result> { Err(Error::InvalidArgumentError(format!( "timezone \"{timezone}\" cannot be parsed (feature chrono-tz is not active)", diff --git a/crates/polars-core/src/chunked_array/cast.rs b/crates/polars-core/src/chunked_array/cast.rs index 67e5588fa9e4..45a61d169601 100644 --- a/crates/polars-core/src/chunked_array/cast.rs +++ b/crates/polars-core/src/chunked_array/cast.rs @@ -5,6 +5,7 @@ use arrow::compute::cast::CastOptions; #[cfg(feature = "dtype-categorical")] use crate::chunked_array::categorical::CategoricalChunkedBuilder; +use crate::chunked_array::temporal::{validate_is_number}; #[cfg(feature = "timezones")] use crate::chunked_array::temporal::validate_time_zone; use crate::prelude::DataType::Datetime; @@ -198,13 +199,13 @@ impl ChunkCast for Utf8Chunked { }, }, #[cfg(feature = "dtype-date")] - DataType::Date => { + DataType::Date if !validate_is_number(&self.chunks) => { let result = cast_chunks(&self.chunks, data_type, true)?; let out = Series::try_from((self.name(), result))?; Ok(out) }, #[cfg(feature = "dtype-datetime")] - DataType::Datetime(tu, tz) => { + DataType::Datetime(tu, tz) if !validate_is_number(&self.chunks) => { let out = match tz { #[cfg(feature = "timezones")] Some(tz) => { diff --git a/crates/polars-core/src/chunked_array/temporal/mod.rs b/crates/polars-core/src/chunked_array/temporal/mod.rs index 737ff5086d47..9e0759a9b31d 100644 --- a/crates/polars-core/src/chunked_array/temporal/mod.rs +++ b/crates/polars-core/src/chunked_array/temporal/mod.rs @@ -15,6 +15,7 @@ use chrono::NaiveDateTime; use chrono::NaiveTime; #[cfg(feature = "timezones")] use chrono_tz::Tz; +use polars_arrow::prelude::ArrayRef; #[cfg(feature = "dtype-time")] pub use time::time_to_time64ns; @@ -35,3 +36,16 @@ pub(crate) fn validate_time_zone(tz: &str) -> PolarsResult<()> { }, } } + +pub(crate) fn validate_is_number(vec_array: &Vec) -> bool { + vec_array.iter().all(|array|is_parsable_as_number(array)) +} + +fn is_parsable_as_number(array: &ArrayRef) -> bool { + if let Some(array) = array.as_any().downcast_ref::() { + array.iter().all(|value| value.expect("Unable to parse int string to datetime").parse::().is_ok()) + } else { + false + } +} + diff --git a/py-polars/tests/unit/test_lazy.py b/py-polars/tests/unit/test_lazy.py index 5a7ba0ee4c74..39e46f6a0846 100644 --- a/py-polars/tests/unit/test_lazy.py +++ b/py-polars/tests/unit/test_lazy.py @@ -1328,7 +1328,7 @@ def test_quadratic_behavior_4736() -> None: ldf.select(reduce(add, (pl.col(fld) for fld in ldf.columns))) -@pytest.mark.parametrize("input_dtype", [pl.Int64, pl.Float64]) +@pytest.mark.parametrize("input_dtype", [pl.Int64, pl.Float64, pl.Utf8]) def test_from_epoch(input_dtype: pl.PolarsDataType) -> None: ldf = pl.LazyFrame( [ diff --git a/py-polars/tests/unit/test_queries.py b/py-polars/tests/unit/test_queries.py index 13ec031ffc54..8db9c1283904 100644 --- a/py-polars/tests/unit/test_queries.py +++ b/py-polars/tests/unit/test_queries.py @@ -5,8 +5,10 @@ import numpy as np import pandas as pd +import pytest import polars as pl +from polars import ComputeError from polars.testing import assert_frame_equal @@ -381,6 +383,13 @@ def test_utf8_date() -> None: assert out.dtypes == [pl.Date] +def test_wrong_utf8_date() -> None: + df = pl.DataFrame({"x1": ["2021-01-aa"]}) + + with pytest.raises(ComputeError): + df.with_columns(**{"x1-date": pl.col("x1").cast(pl.Date)}) + + def test_utf8_datetime() -> None: df = pl.DataFrame( {"x1": ["2021-12-19T00:39:57", "2022-12-19T16:39:57"]} @@ -399,19 +408,38 @@ def test_utf8_datetime() -> None: assert out.dtypes == [pl.Datetime, pl.Datetime, pl.Datetime] +def test_wrong_utf8_datetime() -> None: + df = pl.DataFrame({"x1": ["2021-12-19 00:39:57", "2022-12-19 16:39:57"]}) + with pytest.raises(ComputeError): + df.with_columns( + **{"x1-datetime-ns": pl.col("x1").cast(pl.Datetime(time_unit="ns"))} + ) + + def test_utf8_datetime_timezone() -> None: df = pl.DataFrame( - {"x1": ["1996-12-19T16:39:57-02:00", "2022-12-19T00:39:57-03:00"]} + {"x1": ["1996-12-19T16:39:57 +00:00", "2022-12-19T00:39:57 +00:00"]} ).with_columns( **{ - "x1-datetime-ns": pl.col("x1").cast(pl.Datetime(time_unit="ns")), - "x1-datetime-ms": pl.col("x1").cast(pl.Datetime(time_unit="ms")), - "x1-datetime-us": pl.col("x1").cast(pl.Datetime(time_unit="us")), + "x1-datetime-ns": pl.col("x1").cast( + pl.Datetime(time_unit="ns", time_zone="America/Caracas") + ), + "x1-datetime-ms": pl.col("x1").cast( + pl.Datetime(time_unit="ms", time_zone="America/Santiago") + ), + "x1-datetime-us": pl.col("x1").cast( + pl.Datetime(time_unit="us", time_zone="UTC") + ), } ) out = df.select( pl.col("x1-datetime-ns"), pl.col("x1-datetime-ms"), pl.col("x1-datetime-us") ) + assert out.shape == (2, 3) - assert out.dtypes == [pl.Datetime, pl.Datetime, pl.Datetime] + assert out.dtypes == [ + pl.Datetime("ns", "America/Caracas"), + pl.Datetime("ms", "America/Santiago"), + pl.Datetime("us", "UTC"), + ] From de17db621854fa8af5128ad9b2c839b5e673b204 Mon Sep 17 00:00:00 2001 From: Brayan Jules Date: Mon, 16 Oct 2023 09:31:44 -0300 Subject: [PATCH 05/10] fix: fixed issue regarding arrow libraries import and code formatting --- crates/polars-core/src/chunked_array/cast.rs | 2 +- .../polars-core/src/chunked_array/temporal/mod.rs | 14 +++++++++----- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/crates/polars-core/src/chunked_array/cast.rs b/crates/polars-core/src/chunked_array/cast.rs index e539182a234d..214ed5f19482 100644 --- a/crates/polars-core/src/chunked_array/cast.rs +++ b/crates/polars-core/src/chunked_array/cast.rs @@ -5,7 +5,7 @@ use arrow::compute::cast::CastOptions; #[cfg(feature = "dtype-categorical")] use crate::chunked_array::categorical::CategoricalChunkedBuilder; -use crate::chunked_array::temporal::{validate_is_number}; +use crate::chunked_array::temporal::validate_is_number; #[cfg(feature = "timezones")] use crate::chunked_array::temporal::validate_time_zone; use crate::prelude::DataType::Datetime; diff --git a/crates/polars-core/src/chunked_array/temporal/mod.rs b/crates/polars-core/src/chunked_array/temporal/mod.rs index 9e0759a9b31d..c6ea220b7d21 100644 --- a/crates/polars-core/src/chunked_array/temporal/mod.rs +++ b/crates/polars-core/src/chunked_array/temporal/mod.rs @@ -15,13 +15,13 @@ use chrono::NaiveDateTime; use chrono::NaiveTime; #[cfg(feature = "timezones")] use chrono_tz::Tz; -use polars_arrow::prelude::ArrayRef; #[cfg(feature = "dtype-time")] pub use time::time_to_time64ns; pub use self::conversion::*; #[cfg(feature = "timezones")] use crate::prelude::{polars_bail, PolarsResult}; +use crate::prelude::{ArrayRef, LargeStringArray}; pub fn unix_time() -> NaiveDateTime { NaiveDateTime::from_timestamp_opt(0, 0).unwrap() @@ -38,14 +38,18 @@ pub(crate) fn validate_time_zone(tz: &str) -> PolarsResult<()> { } pub(crate) fn validate_is_number(vec_array: &Vec) -> bool { - vec_array.iter().all(|array|is_parsable_as_number(array)) + vec_array.iter().all(|array| is_parsable_as_number(array)) } fn is_parsable_as_number(array: &ArrayRef) -> bool { - if let Some(array) = array.as_any().downcast_ref::() { - array.iter().all(|value| value.expect("Unable to parse int string to datetime").parse::().is_ok()) + if let Some(array) = array.as_any().downcast_ref::() { + array.iter().all(|value| { + value + .expect("Unable to parse int string to datetime") + .parse::() + .is_ok() + }) } else { false } } - From 17e14028669eec4e439e1efe7c7b19257e7fd191 Mon Sep 17 00:00:00 2001 From: Brayan Jules Date: Mon, 16 Oct 2023 18:45:03 -0300 Subject: [PATCH 06/10] fix: fixed validate_is_number import issue, also added missing dataframe validation on unit tests --- crates/polars-core/src/chunked_array/cast.rs | 3 +- .../src/chunked_array/temporal/mod.rs | 4 +- py-polars/tests/unit/test_queries.py | 49 +++++++++++++++++-- 3 files changed, 48 insertions(+), 8 deletions(-) diff --git a/crates/polars-core/src/chunked_array/cast.rs b/crates/polars-core/src/chunked_array/cast.rs index 214ed5f19482..365cd88cafb3 100644 --- a/crates/polars-core/src/chunked_array/cast.rs +++ b/crates/polars-core/src/chunked_array/cast.rs @@ -5,9 +5,8 @@ use arrow::compute::cast::CastOptions; #[cfg(feature = "dtype-categorical")] use crate::chunked_array::categorical::CategoricalChunkedBuilder; -use crate::chunked_array::temporal::validate_is_number; #[cfg(feature = "timezones")] -use crate::chunked_array::temporal::validate_time_zone; +use crate::chunked_array::temporal::{validate_is_number, validate_time_zone}; use crate::prelude::DataType::Datetime; use crate::prelude::*; diff --git a/crates/polars-core/src/chunked_array/temporal/mod.rs b/crates/polars-core/src/chunked_array/temporal/mod.rs index c6ea220b7d21..3b6a38aede8b 100644 --- a/crates/polars-core/src/chunked_array/temporal/mod.rs +++ b/crates/polars-core/src/chunked_array/temporal/mod.rs @@ -37,8 +37,8 @@ pub(crate) fn validate_time_zone(tz: &str) -> PolarsResult<()> { } } -pub(crate) fn validate_is_number(vec_array: &Vec) -> bool { - vec_array.iter().all(|array| is_parsable_as_number(array)) +pub(crate) fn validate_is_number(vec_array: &[ArrayRef]) -> bool { + vec_array.iter().all(is_parsable_as_number) } fn is_parsable_as_number(array: &ArrayRef) -> bool { diff --git a/py-polars/tests/unit/test_queries.py b/py-polars/tests/unit/test_queries.py index 8db9c1283904..af623feb0e2c 100644 --- a/py-polars/tests/unit/test_queries.py +++ b/py-polars/tests/unit/test_queries.py @@ -1,6 +1,6 @@ from __future__ import annotations -from datetime import datetime, timedelta +from datetime import datetime, timedelta, date from typing import Any import numpy as np @@ -378,9 +378,11 @@ def test_utf8_date() -> None: df = pl.DataFrame({"x1": ["2021-01-01"]}).with_columns( **{"x1-date": pl.col("x1").cast(pl.Date)} ) + expected = pl.DataFrame({"x1-date":[date(2021,1,1)]}) out = df.select(pl.col("x1-date")) assert out.shape == (1, 1) assert out.dtypes == [pl.Date] + assert_frame_equal(expected, out) def test_wrong_utf8_date() -> None: @@ -400,12 +402,26 @@ def test_utf8_datetime() -> None: "x1-datetime-us": pl.col("x1").cast(pl.Datetime(time_unit="us")), } ) + first_row = datetime(year=2021, month=12, day=19, hour=00, minute=39, second=57) + second_row = datetime(year=2022, month=12, day=19, hour=16, minute=39, second=57) + expected = pl.DataFrame( + { + "x1-datetime-ns": [first_row, second_row], + "x1-datetime-ms": [first_row, second_row], + "x1-datetime-us": [first_row, second_row] + } + ).select( + pl.col("x1-datetime-ns").dt.cast_time_unit("ns"), + pl.col("x1-datetime-ms").dt.cast_time_unit("ms"), + pl.col("x1-datetime-us").dt.cast_time_unit("us"), + ) out = df.select( pl.col("x1-datetime-ns"), pl.col("x1-datetime-ms"), pl.col("x1-datetime-us") ) assert out.shape == (2, 3) assert out.dtypes == [pl.Datetime, pl.Datetime, pl.Datetime] + assert_frame_equal(expected, out) def test_wrong_utf8_datetime() -> None: @@ -417,22 +433,46 @@ def test_wrong_utf8_datetime() -> None: def test_utf8_datetime_timezone() -> None: + ccs_tz = "America/Caracas" + stg_tz = "America/Santiago" + utc_tz = "UTC" df = pl.DataFrame( {"x1": ["1996-12-19T16:39:57 +00:00", "2022-12-19T00:39:57 +00:00"]} ).with_columns( **{ "x1-datetime-ns": pl.col("x1").cast( - pl.Datetime(time_unit="ns", time_zone="America/Caracas") + pl.Datetime(time_unit="ns", time_zone=ccs_tz) ), "x1-datetime-ms": pl.col("x1").cast( - pl.Datetime(time_unit="ms", time_zone="America/Santiago") + pl.Datetime(time_unit="ms", time_zone=stg_tz) ), "x1-datetime-us": pl.col("x1").cast( - pl.Datetime(time_unit="us", time_zone="UTC") + pl.Datetime(time_unit="us", time_zone=utc_tz) ), } ) + expected = pl.DataFrame( + { + "x1-datetime-ns": [ + datetime(year=1996, month=12, day=19, hour=12, minute=39, second=57), + datetime(year=2022, month=12, day=18, hour=20, minute=39, second=57), + ], + "x1-datetime-ms": [ + datetime(year=1996, month=12, day=19, hour=13, minute=39, second=57), + datetime(year=2022, month=12, day=18, hour=21, minute=39, second=57), + ], + "x1-datetime-us": [ + datetime(year=1996, month=12, day=19, hour=16, minute=39, second=57), + datetime(year=2022, month=12, day=19, hour=00, minute=39, second=57), + ], + } + ).select( + pl.col("x1-datetime-ns").dt.cast_time_unit("ns").dt.replace_time_zone(ccs_tz), + pl.col("x1-datetime-ms").dt.cast_time_unit("ms").dt.replace_time_zone(stg_tz), + pl.col("x1-datetime-us").dt.cast_time_unit("us").dt.replace_time_zone(utc_tz), + ) + out = df.select( pl.col("x1-datetime-ns"), pl.col("x1-datetime-ms"), pl.col("x1-datetime-us") ) @@ -443,3 +483,4 @@ def test_utf8_datetime_timezone() -> None: pl.Datetime("ms", "America/Santiago"), pl.Datetime("us", "UTC"), ] + assert_frame_equal(expected, out) From 084a7e1c4838d3e5c98d8fda1bdad5646babd938 Mon Sep 17 00:00:00 2001 From: Brayan Jules Date: Mon, 16 Oct 2023 19:52:53 -0300 Subject: [PATCH 07/10] fix: fixed linter issues. --- crates/polars-core/src/chunked_array/cast.rs | 4 +++- py-polars/tests/unit/test_queries.py | 6 +++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/crates/polars-core/src/chunked_array/cast.rs b/crates/polars-core/src/chunked_array/cast.rs index 365cd88cafb3..96f5fa1ead04 100644 --- a/crates/polars-core/src/chunked_array/cast.rs +++ b/crates/polars-core/src/chunked_array/cast.rs @@ -5,8 +5,10 @@ use arrow::compute::cast::CastOptions; #[cfg(feature = "dtype-categorical")] use crate::chunked_array::categorical::CategoricalChunkedBuilder; +#[cfg(feature = "temporal")] +use crate::chunked_array::temporal::validate_is_number; #[cfg(feature = "timezones")] -use crate::chunked_array::temporal::{validate_is_number, validate_time_zone}; +use crate::chunked_array::temporal::validate_time_zone; use crate::prelude::DataType::Datetime; use crate::prelude::*; diff --git a/py-polars/tests/unit/test_queries.py b/py-polars/tests/unit/test_queries.py index af623feb0e2c..48e116c39656 100644 --- a/py-polars/tests/unit/test_queries.py +++ b/py-polars/tests/unit/test_queries.py @@ -1,6 +1,6 @@ from __future__ import annotations -from datetime import datetime, timedelta, date +from datetime import date, datetime, timedelta from typing import Any import numpy as np @@ -378,7 +378,7 @@ def test_utf8_date() -> None: df = pl.DataFrame({"x1": ["2021-01-01"]}).with_columns( **{"x1-date": pl.col("x1").cast(pl.Date)} ) - expected = pl.DataFrame({"x1-date":[date(2021,1,1)]}) + expected = pl.DataFrame({"x1-date": [date(2021, 1, 1)]}) out = df.select(pl.col("x1-date")) assert out.shape == (1, 1) assert out.dtypes == [pl.Date] @@ -408,7 +408,7 @@ def test_utf8_datetime() -> None: { "x1-datetime-ns": [first_row, second_row], "x1-datetime-ms": [first_row, second_row], - "x1-datetime-us": [first_row, second_row] + "x1-datetime-us": [first_row, second_row], } ).select( pl.col("x1-datetime-ns").dt.cast_time_unit("ns"), From 00082c58ecad5c8c03e2c52cff8c4fe17fc9f8d0 Mon Sep 17 00:00:00 2001 From: Brayan Jules Date: Tue, 17 Oct 2023 18:37:40 -0300 Subject: [PATCH 08/10] fix: removing additional asserts from unit test, also improved pattern matching on timestamp casting --- crates/polars-arrow/src/compute/cast/mod.rs | 40 ++++----------------- py-polars/tests/unit/test_queries.py | 10 ------ 2 files changed, 6 insertions(+), 44 deletions(-) diff --git a/crates/polars-arrow/src/compute/cast/mod.rs b/crates/polars-arrow/src/compute/cast/mod.rs index 4324526381ca..d33525285c7d 100644 --- a/crates/polars-arrow/src/compute/cast/mod.rs +++ b/crates/polars-arrow/src/compute/cast/mod.rs @@ -580,23 +580,9 @@ pub fn cast( LargeUtf8 => Ok(Box::new(utf8_to_large_utf8( array.as_any().downcast_ref().unwrap(), ))), - Timestamp(TimeUnit::Nanosecond, None) => { - utf8_to_naive_timestamp_dyn::(array, TimeUnit::Nanosecond) - }, - Timestamp(TimeUnit::Millisecond, None) => { - utf8_to_naive_timestamp_dyn::(array, TimeUnit::Millisecond) - }, - Timestamp(TimeUnit::Microsecond, None) => { - utf8_to_naive_timestamp_dyn::(array, TimeUnit::Microsecond) - }, - Timestamp(TimeUnit::Nanosecond, Some(tz)) => { - utf8_to_timestamp_dyn::(array, tz.clone(), TimeUnit::Nanosecond) - }, - Timestamp(TimeUnit::Millisecond, Some(tz)) => { - utf8_to_timestamp_dyn::(array, tz.clone(), TimeUnit::Millisecond) - }, - Timestamp(TimeUnit::Microsecond, Some(tz)) => { - utf8_to_timestamp_dyn::(array, tz.clone(), TimeUnit::Microsecond) + Timestamp(tu, None) => utf8_to_naive_timestamp_dyn::(array, tu.to_owned()), + Timestamp(tu, Some(tz)) => { + utf8_to_timestamp_dyn::(array, tz.clone(), tu.to_owned()) }, _ => polars_bail!(InvalidOperation: "casting from {from_type:?} to {to_type:?} not supported", @@ -621,23 +607,9 @@ pub fn cast( to_type.clone(), ) .boxed()), - Timestamp(TimeUnit::Nanosecond, None) => { - utf8_to_naive_timestamp_dyn::(array, TimeUnit::Nanosecond) - }, - Timestamp(TimeUnit::Millisecond, None) => { - utf8_to_naive_timestamp_dyn::(array, TimeUnit::Millisecond) - }, - Timestamp(TimeUnit::Microsecond, None) => { - utf8_to_naive_timestamp_dyn::(array, TimeUnit::Microsecond) - }, - Timestamp(TimeUnit::Nanosecond, Some(tz)) => { - utf8_to_timestamp_dyn::(array, tz.clone(), TimeUnit::Nanosecond) - }, - Timestamp(TimeUnit::Millisecond, Some(tz)) => { - utf8_to_timestamp_dyn::(array, tz.clone(), TimeUnit::Millisecond) - }, - Timestamp(TimeUnit::Microsecond, Some(tz)) => { - utf8_to_timestamp_dyn::(array, tz.clone(), TimeUnit::Microsecond) + Timestamp(tu, None) => utf8_to_naive_timestamp_dyn::(array, tu.to_owned()), + Timestamp(tu, Some(tz)) => { + utf8_to_timestamp_dyn::(array, tz.clone(), tu.to_owned()) }, _ => polars_bail!(InvalidOperation: "casting from {from_type:?} to {to_type:?} not supported", diff --git a/py-polars/tests/unit/test_queries.py b/py-polars/tests/unit/test_queries.py index 48e116c39656..14835176cc80 100644 --- a/py-polars/tests/unit/test_queries.py +++ b/py-polars/tests/unit/test_queries.py @@ -380,8 +380,6 @@ def test_utf8_date() -> None: ) expected = pl.DataFrame({"x1-date": [date(2021, 1, 1)]}) out = df.select(pl.col("x1-date")) - assert out.shape == (1, 1) - assert out.dtypes == [pl.Date] assert_frame_equal(expected, out) @@ -419,8 +417,6 @@ def test_utf8_datetime() -> None: out = df.select( pl.col("x1-datetime-ns"), pl.col("x1-datetime-ms"), pl.col("x1-datetime-us") ) - assert out.shape == (2, 3) - assert out.dtypes == [pl.Datetime, pl.Datetime, pl.Datetime] assert_frame_equal(expected, out) @@ -477,10 +473,4 @@ def test_utf8_datetime_timezone() -> None: pl.col("x1-datetime-ns"), pl.col("x1-datetime-ms"), pl.col("x1-datetime-us") ) - assert out.shape == (2, 3) - assert out.dtypes == [ - pl.Datetime("ns", "America/Caracas"), - pl.Datetime("ms", "America/Santiago"), - pl.Datetime("us", "UTC"), - ] assert_frame_equal(expected, out) From d847f69767eecee3ed2e88ac86ee2cf25231b679 Mon Sep 17 00:00:00 2001 From: Brayan Jules Date: Fri, 20 Oct 2023 22:21:50 -0300 Subject: [PATCH 09/10] fix: fixed the bug that incorrectly enabled the conversion from epoch string to datetime. --- crates/polars-core/src/chunked_array/cast.rs | 6 ++---- .../src/chunked_array/temporal/mod.rs | 19 +------------------ py-polars/tests/unit/test_lazy.py | 19 ++++++++++++++++++- 3 files changed, 21 insertions(+), 23 deletions(-) diff --git a/crates/polars-core/src/chunked_array/cast.rs b/crates/polars-core/src/chunked_array/cast.rs index 96f5fa1ead04..8daddeac1d81 100644 --- a/crates/polars-core/src/chunked_array/cast.rs +++ b/crates/polars-core/src/chunked_array/cast.rs @@ -5,8 +5,6 @@ use arrow::compute::cast::CastOptions; #[cfg(feature = "dtype-categorical")] use crate::chunked_array::categorical::CategoricalChunkedBuilder; -#[cfg(feature = "temporal")] -use crate::chunked_array::temporal::validate_is_number; #[cfg(feature = "timezones")] use crate::chunked_array::temporal::validate_time_zone; use crate::prelude::DataType::Datetime; @@ -199,13 +197,13 @@ impl ChunkCast for Utf8Chunked { }, }, #[cfg(feature = "dtype-date")] - DataType::Date if !validate_is_number(&self.chunks) => { + DataType::Date => { let result = cast_chunks(&self.chunks, data_type, true)?; let out = Series::try_from((self.name(), result))?; Ok(out) }, #[cfg(feature = "dtype-datetime")] - DataType::Datetime(tu, tz) if !validate_is_number(&self.chunks) => { + DataType::Datetime(tu, tz) => { let out = match tz { #[cfg(feature = "timezones")] Some(tz) => { diff --git a/crates/polars-core/src/chunked_array/temporal/mod.rs b/crates/polars-core/src/chunked_array/temporal/mod.rs index 3b6a38aede8b..0a89825f6959 100644 --- a/crates/polars-core/src/chunked_array/temporal/mod.rs +++ b/crates/polars-core/src/chunked_array/temporal/mod.rs @@ -19,9 +19,9 @@ use chrono_tz::Tz; pub use time::time_to_time64ns; pub use self::conversion::*; +use crate::prelude::ArrayRef; #[cfg(feature = "timezones")] use crate::prelude::{polars_bail, PolarsResult}; -use crate::prelude::{ArrayRef, LargeStringArray}; pub fn unix_time() -> NaiveDateTime { NaiveDateTime::from_timestamp_opt(0, 0).unwrap() @@ -36,20 +36,3 @@ pub(crate) fn validate_time_zone(tz: &str) -> PolarsResult<()> { }, } } - -pub(crate) fn validate_is_number(vec_array: &[ArrayRef]) -> bool { - vec_array.iter().all(is_parsable_as_number) -} - -fn is_parsable_as_number(array: &ArrayRef) -> bool { - if let Some(array) = array.as_any().downcast_ref::() { - array.iter().all(|value| { - value - .expect("Unable to parse int string to datetime") - .parse::() - .is_ok() - }) - } else { - false - } -} diff --git a/py-polars/tests/unit/test_lazy.py b/py-polars/tests/unit/test_lazy.py index 2fedbf853435..7dc6478ab5d2 100644 --- a/py-polars/tests/unit/test_lazy.py +++ b/py-polars/tests/unit/test_lazy.py @@ -1375,7 +1375,7 @@ def test_quadratic_behavior_4736() -> None: ldf.select(reduce(add, (pl.col(fld) for fld in ldf.columns))) -@pytest.mark.parametrize("input_dtype", [pl.Int64, pl.Float64, pl.Utf8]) +@pytest.mark.parametrize("input_dtype", [pl.Int64, pl.Float64]) def test_from_epoch(input_dtype: pl.PolarsDataType) -> None: ldf = pl.LazyFrame( [ @@ -1415,6 +1415,23 @@ def test_from_epoch(input_dtype: pl.PolarsDataType) -> None: _ = ldf.select(pl.from_epoch(ts_col, time_unit="s2")) # type: ignore[call-overload] +def test_from_epoch_str() -> None: + ldf = pl.LazyFrame( + [ + pl.Series("timestamp_ms", [1147880044 * 1_000]).cast(pl.Utf8), + pl.Series("timestamp_us", [1147880044 * 1_000_000]).cast(pl.Utf8), + ] + ) + + with pytest.raises(ComputeError): + ldf.select( + [ + pl.from_epoch(pl.col("timestamp_ms"), time_unit="ms"), + pl.from_epoch(pl.col("timestamp_us"), time_unit="us"), + ] + ).collect() + + def test_cumagg_types() -> None: ldf = pl.LazyFrame({"a": [1, 2], "b": [True, False], "c": [1.3, 2.4]}) cumsum_lf = ldf.select( From c5459f1226218c09478fb86ffa41453dd62c80a7 Mon Sep 17 00:00:00 2001 From: Brayan Jules Date: Fri, 20 Oct 2023 22:37:26 -0300 Subject: [PATCH 10/10] fix: removed unused import --- crates/polars-core/src/chunked_array/temporal/mod.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/crates/polars-core/src/chunked_array/temporal/mod.rs b/crates/polars-core/src/chunked_array/temporal/mod.rs index 0a89825f6959..737ff5086d47 100644 --- a/crates/polars-core/src/chunked_array/temporal/mod.rs +++ b/crates/polars-core/src/chunked_array/temporal/mod.rs @@ -19,7 +19,6 @@ use chrono_tz::Tz; pub use time::time_to_time64ns; pub use self::conversion::*; -use crate::prelude::ArrayRef; #[cfg(feature = "timezones")] use crate::prelude::{polars_bail, PolarsResult};