From d4c56a2ad4aa9768b901f890c420f2982aabd5b6 Mon Sep 17 00:00:00 2001 From: Weijie Guo Date: Tue, 31 Oct 2023 19:10:39 +0800 Subject: [PATCH] refactor(rust): Make all functions in binary namespace non-anonymous (#12126) --- crates/polars-plan/src/dsl/binary.rs | 26 +++++++++ .../src/dsl/function_expr/binary.rs | 55 +++++++++++++++++++ .../polars-plan/src/dsl/function_expr/mod.rs | 8 +++ .../src/dsl/function_expr/schema.rs | 7 +-- crates/polars/Cargo.toml | 2 +- py-polars/src/expr/binary.rs | 45 ++------------- 6 files changed, 95 insertions(+), 48 deletions(-) diff --git a/crates/polars-plan/src/dsl/binary.rs b/crates/polars-plan/src/dsl/binary.rs index 57c40df7ed6f..29e7e1891c38 100644 --- a/crates/polars-plan/src/dsl/binary.rs +++ b/crates/polars-plan/src/dsl/binary.rs @@ -33,4 +33,30 @@ impl BinaryNameSpace { true, ) } + + #[cfg(feature = "binary_encoding")] + pub fn hex_decode(self, strict: bool) -> Expr { + self.0 + .map_private(FunctionExpr::BinaryExpr(BinaryFunction::HexDecode(strict))) + } + + #[cfg(feature = "binary_encoding")] + pub fn hex_encode(self) -> Expr { + self.0 + .map_private(FunctionExpr::BinaryExpr(BinaryFunction::HexEncode)) + } + + #[cfg(feature = "binary_encoding")] + pub fn base64_decode(self, strict: bool) -> Expr { + self.0 + .map_private(FunctionExpr::BinaryExpr(BinaryFunction::Base64Decode( + strict, + ))) + } + + #[cfg(feature = "binary_encoding")] + pub fn base64_encode(self) -> Expr { + self.0 + .map_private(FunctionExpr::BinaryExpr(BinaryFunction::Base64Encode)) + } } diff --git a/crates/polars-plan/src/dsl/function_expr/binary.rs b/crates/polars-plan/src/dsl/function_expr/binary.rs index 0aa8688dde13..24b2418b6cbd 100644 --- a/crates/polars-plan/src/dsl/function_expr/binary.rs +++ b/crates/polars-plan/src/dsl/function_expr/binary.rs @@ -9,6 +9,28 @@ pub enum BinaryFunction { Contains, StartsWith, EndsWith, + #[cfg(feature = "binary_encoding")] + HexDecode(bool), + #[cfg(feature = "binary_encoding")] + HexEncode, + #[cfg(feature = "binary_encoding")] + Base64Decode(bool), + #[cfg(feature = "binary_encoding")] + Base64Encode, +} + +impl BinaryFunction { + pub(super) fn get_field(&self, mapper: FieldsMapper) -> PolarsResult { + use BinaryFunction::*; + match self { + Contains { .. } => mapper.with_dtype(DataType::Boolean), + EndsWith | StartsWith => mapper.with_dtype(DataType::Boolean), + #[cfg(feature = "binary_encoding")] + HexDecode(_) | Base64Decode(_) => mapper.with_same_dtype(), + #[cfg(feature = "binary_encoding")] + HexEncode | Base64Encode => mapper.with_dtype(DataType::Utf8), + } + } } impl Display for BinaryFunction { @@ -18,6 +40,14 @@ impl Display for BinaryFunction { Contains { .. } => "contains", StartsWith => "starts_with", EndsWith => "ends_with", + #[cfg(feature = "binary_encoding")] + HexDecode(_) => "hex_decode", + #[cfg(feature = "binary_encoding")] + HexEncode => "hex_encode", + #[cfg(feature = "binary_encoding")] + Base64Decode(_) => "base64_decode", + #[cfg(feature = "binary_encoding")] + Base64Encode => "base64_encode", }; write!(f, "bin.{s}") } @@ -38,6 +68,7 @@ pub(super) fn ends_with(s: &[Series]) -> PolarsResult { .with_name(ca.name()) .into_series()) } + pub(super) fn starts_with(s: &[Series]) -> PolarsResult { let ca = s[0].binary()?; let prefix = s[1].binary()?; @@ -48,6 +79,30 @@ pub(super) fn starts_with(s: &[Series]) -> PolarsResult { .into_series()) } +#[cfg(feature = "binary_encoding")] +pub(super) fn hex_decode(s: &Series, strict: bool) -> PolarsResult { + let ca = s.binary()?; + ca.hex_decode(strict).map(|ok| ok.into_series()) +} + +#[cfg(feature = "binary_encoding")] +pub(super) fn hex_encode(s: &Series) -> PolarsResult { + let ca = s.binary()?; + Ok(ca.hex_encode()) +} + +#[cfg(feature = "binary_encoding")] +pub(super) fn base64_decode(s: &Series, strict: bool) -> PolarsResult { + let ca = s.binary()?; + ca.base64_decode(strict).map(|ok| ok.into_series()) +} + +#[cfg(feature = "binary_encoding")] +pub(super) fn base64_encode(s: &Series) -> PolarsResult { + let ca = s.binary()?; + Ok(ca.base64_encode()) +} + impl From for FunctionExpr { fn from(b: BinaryFunction) -> Self { FunctionExpr::BinaryExpr(b) diff --git a/crates/polars-plan/src/dsl/function_expr/mod.rs b/crates/polars-plan/src/dsl/function_expr/mod.rs index 7bac0f6433d0..04e3a2b50f9c 100644 --- a/crates/polars-plan/src/dsl/function_expr/mod.rs +++ b/crates/polars-plan/src/dsl/function_expr/mod.rs @@ -1121,6 +1121,14 @@ impl From for SpecialEq> { StartsWith => { map_as_slice!(binary::starts_with) }, + #[cfg(feature = "binary_encoding")] + HexDecode(strict) => map!(binary::hex_decode, strict), + #[cfg(feature = "binary_encoding")] + HexEncode => map!(binary::hex_encode), + #[cfg(feature = "binary_encoding")] + Base64Decode(strict) => map!(binary::base64_decode, strict), + #[cfg(feature = "binary_encoding")] + Base64Encode => map!(binary::base64_encode), } } } diff --git a/crates/polars-plan/src/dsl/function_expr/schema.rs b/crates/polars-plan/src/dsl/function_expr/schema.rs index 23d875d47e2d..58d50b7530b4 100644 --- a/crates/polars-plan/src/dsl/function_expr/schema.rs +++ b/crates/polars-plan/src/dsl/function_expr/schema.rs @@ -24,12 +24,7 @@ impl FunctionExpr { SearchSorted(_) => mapper.with_dtype(IDX_DTYPE), #[cfg(feature = "strings")] StringExpr(s) => s.get_field(mapper), - BinaryExpr(s) => { - use BinaryFunction::*; - match s { - Contains { .. } | EndsWith | StartsWith => mapper.with_dtype(DataType::Boolean), - } - }, + BinaryExpr(s) => s.get_field(mapper), #[cfg(feature = "temporal")] TemporalExpr(fun) => fun.get_field(mapper), #[cfg(feature = "range")] diff --git a/crates/polars/Cargo.toml b/crates/polars/Cargo.toml index ab98a9dc0d85..25209d2272be 100644 --- a/crates/polars/Cargo.toml +++ b/crates/polars/Cargo.toml @@ -136,7 +136,7 @@ extract_jsonpath = [ "polars-lazy?/extract_jsonpath", ] string_encoding = ["polars-ops/string_encoding", "polars-core/strings"] -binary_encoding = ["polars-ops/binary_encoding"] +binary_encoding = ["polars-ops/binary_encoding", "polars-lazy?/binary_encoding"] group_by_list = ["polars-core/group_by_list", "polars-ops/group_by_list"] lazy_regex = ["polars-lazy?/regex"] cum_agg = ["polars-ops/cum_agg", "polars-lazy?/cum_agg"] diff --git a/py-polars/src/expr/binary.rs b/py-polars/src/expr/binary.rs index 4a4aad88fa35..7a243de8897c 100644 --- a/py-polars/src/expr/binary.rs +++ b/py-polars/src/expr/binary.rs @@ -1,4 +1,3 @@ -use polars::prelude::*; use pyo3::prelude::*; use crate::PyExpr; @@ -23,57 +22,21 @@ impl PyExpr { #[cfg(feature = "binary_encoding")] fn bin_hex_decode(&self, strict: bool) -> Self { - self.inner - .clone() - .map( - move |s| { - s.binary()? - .hex_decode(strict) - .map(|s| Some(s.into_series())) - }, - GetOutput::same_type(), - ) - .with_fmt("bin.hex_decode") - .into() + self.inner.clone().binary().hex_decode(strict).into() } #[cfg(feature = "binary_encoding")] fn bin_base64_decode(&self, strict: bool) -> Self { - self.inner - .clone() - .map( - move |s| { - s.binary()? - .base64_decode(strict) - .map(|s| Some(s.into_series())) - }, - GetOutput::same_type(), - ) - .with_fmt("bin.base64_decode") - .into() + self.inner.clone().binary().base64_decode(strict).into() } #[cfg(feature = "binary_encoding")] fn bin_hex_encode(&self) -> Self { - self.inner - .clone() - .map( - move |s| s.binary().map(|s| Some(s.hex_encode().into_series())), - GetOutput::from_type(DataType::Utf8), - ) - .with_fmt("bin.hex_encode") - .into() + self.inner.clone().binary().hex_encode().into() } #[cfg(feature = "binary_encoding")] fn bin_base64_encode(&self) -> Self { - self.inner - .clone() - .map( - move |s| s.binary().map(|s| Some(s.base64_encode().into_series())), - GetOutput::from_type(DataType::Utf8), - ) - .with_fmt("bin.base64_encode") - .into() + self.inner.clone().binary().base64_encode().into() } }