diff --git a/Cargo.lock b/Cargo.lock index caba9579aeda..ef9b0fb1bdeb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -30,7 +30,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" dependencies = [ "cfg-if", - "const-random", "getrandom", "once_cell", "version_check", @@ -168,51 +167,6 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" -[[package]] -name = "arrow-array" -version = "53.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd2bf348cf9f02a5975c5962c7fa6dee107a2009a7b41ac5fb1a027e12dc033f" -dependencies = [ - "ahash", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "chrono", - "half", - "hashbrown 0.14.5", - "num", -] - -[[package]] -name = "arrow-buffer" -version = "53.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3092e37715f168976012ce52273c3989b5793b0db5f06cbaa246be25e5f0924d" -dependencies = [ - "bytes", - "half", - "num", -] - -[[package]] -name = "arrow-data" -version = "53.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e4ac0c4ee79150afe067dc4857154b3ee9c1cd52b5f40d59a77306d0ed18d65" -dependencies = [ - "arrow-buffer", - "arrow-schema", - "half", - "num", -] - -[[package]] -name = "arrow-schema" -version = "53.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c85320a3a2facf2b2822b57aa9d6d9d55edb8aee0b6b5d3b8df158e503d10858" - [[package]] name = "arrow2" version = "0.17.4" @@ -1044,26 +998,6 @@ version = "0.9.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" -[[package]] -name = "const-random" -version = "0.1.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359" -dependencies = [ - "const-random-macro", -] - -[[package]] -name = "const-random-macro" -version = "0.1.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" -dependencies = [ - "getrandom", - "once_cell", - "tiny-keccak", -] - [[package]] name = "constant_time_eq" version = "0.3.1" @@ -1691,7 +1625,6 @@ checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" dependencies = [ "cfg-if", "crunchy", - "num-traits", ] [[package]] @@ -2390,20 +2323,6 @@ dependencies = [ "winapi", ] -[[package]] -name = "num" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" -dependencies = [ - "num-bigint", - "num-complex", - "num-integer", - "num-iter", - "num-rational", - "num-traits", -] - [[package]] name = "num-bigint" version = "0.4.6" @@ -2439,28 +2358,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "num-iter" -version = "0.1.45" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" -dependencies = [ - "autocfg", - "num-integer", - "num-traits", -] - -[[package]] -name = "num-rational" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" -dependencies = [ - "num-bigint", - "num-integer", - "num-traits", -] - [[package]] name = "num-traits" version = "0.2.19" @@ -2810,7 +2707,6 @@ version = "0.43.1" dependencies = [ "ahash", "apache-avro", - "arrow-buffer", "avro-schema", "either", "ethnum", @@ -2839,10 +2735,6 @@ name = "polars-arrow" version = "0.43.1" dependencies = [ "ahash", - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", "async-stream", "atoi", "atoi_simd", @@ -2923,7 +2815,6 @@ name = "polars-core" version = "0.43.1" dependencies = [ "ahash", - "arrow-array", "bincode", "bitflags", "bytemuck", @@ -4583,15 +4474,6 @@ dependencies = [ "time-core", ] -[[package]] -name = "tiny-keccak" -version = "2.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" -dependencies = [ - "crunchy", -] - [[package]] name = "tinytemplate" version = "1.2.1" diff --git a/Cargo.toml b/Cargo.toml index 884e4f109e62..fe7ec311cc7c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -25,10 +25,6 @@ repository = "https://github.com/pola-rs/polars" ahash = ">=0.8.5" aho-corasick = "1.1" arboard = { version = "3.4.0", default-features = false } -arrow-array = { version = ">=41", default-features = false } -arrow-buffer = { version = ">=41", default-features = false } -arrow-data = { version = ">=41", default-features = false } -arrow-schema = { version = ">=41", default-features = false } atoi = "2" atoi_simd = "0.15.5" atomic-waker = "1" diff --git a/crates/polars-arrow/Cargo.toml b/crates/polars-arrow/Cargo.toml index 0cd85cbae483..8f10cd21fcdb 100644 --- a/crates/polars-arrow/Cargo.toml +++ b/crates/polars-arrow/Cargo.toml @@ -67,11 +67,7 @@ multiversion = { workspace = true, optional = true } # Faster hashing ahash = { workspace = true } -# Support conversion to/from arrow-rs -arrow-array = { workspace = true, optional = true } -arrow-buffer = { workspace = true, optional = true } -arrow-data = { workspace = true, optional = true } -arrow-schema = { workspace = true, optional = true } +# For async arrow flight conversion async-stream = { version = "0.3", optional = true } tokio = { workspace = true, optional = true, features = ["io-util"] } @@ -101,7 +97,6 @@ getrandom = { version = "0.2", features = ["js"] } [features] default = [] full = [ - "arrow_rs", "io_ipc", "io_flight", "io_ipc_compression", @@ -114,7 +109,6 @@ full = [ # parses timezones used in timestamp conversions "chrono-tz", ] -arrow_rs = ["arrow-buffer", "arrow-schema", "arrow-data", "arrow-array"] io_ipc = ["arrow-format", "polars-error/arrow-format"] io_ipc_compression = ["lz4", "zstd", "io_ipc"] io_flight = ["io_ipc", "arrow-format/flight-data", "async-stream", "futures", "tokio"] diff --git a/crates/polars-arrow/src/array/binary/data.rs b/crates/polars-arrow/src/array/binary/data.rs deleted file mode 100644 index 2c08d94eb1b0..000000000000 --- a/crates/polars-arrow/src/array/binary/data.rs +++ /dev/null @@ -1,43 +0,0 @@ -use arrow_data::{ArrayData, ArrayDataBuilder}; - -use crate::array::{Arrow2Arrow, BinaryArray}; -use crate::bitmap::Bitmap; -use crate::offset::{Offset, OffsetsBuffer}; - -impl Arrow2Arrow for BinaryArray { - fn to_data(&self) -> ArrayData { - let dtype = self.dtype.clone().into(); - let builder = ArrayDataBuilder::new(dtype) - .len(self.offsets().len_proxy()) - .buffers(vec![ - self.offsets.clone().into_inner().into(), - self.values.clone().into(), - ]) - .nulls(self.validity.as_ref().map(|b| b.clone().into())); - - // SAFETY: Array is valid - unsafe { builder.build_unchecked() } - } - - fn from_data(data: &ArrayData) -> Self { - let dtype = data.data_type().clone().into(); - - if data.is_empty() { - // Handle empty offsets - return Self::new_empty(dtype); - } - - let buffers = data.buffers(); - - // SAFETY: ArrayData is valid - let mut offsets = unsafe { OffsetsBuffer::new_unchecked(buffers[0].clone().into()) }; - offsets.slice(data.offset(), data.len() + 1); - - Self { - dtype, - offsets, - values: buffers[1].clone().into(), - validity: data.nulls().map(|n| Bitmap::from_null_buffer(n.clone())), - } - } -} diff --git a/crates/polars-arrow/src/array/binary/mod.rs b/crates/polars-arrow/src/array/binary/mod.rs index 9cd06adaaabf..b590a4554597 100644 --- a/crates/polars-arrow/src/array/binary/mod.rs +++ b/crates/polars-arrow/src/array/binary/mod.rs @@ -21,9 +21,6 @@ mod mutable; pub use mutable::*; use polars_error::{polars_bail, PolarsResult}; -#[cfg(feature = "arrow_rs")] -mod data; - /// A [`BinaryArray`] is Arrow's semantically equivalent of an immutable `Vec>>`. /// It implements [`Array`]. /// diff --git a/crates/polars-arrow/src/array/boolean/data.rs b/crates/polars-arrow/src/array/boolean/data.rs deleted file mode 100644 index 6c497896775c..000000000000 --- a/crates/polars-arrow/src/array/boolean/data.rs +++ /dev/null @@ -1,36 +0,0 @@ -use arrow_buffer::{BooleanBuffer, NullBuffer}; -use arrow_data::{ArrayData, ArrayDataBuilder}; - -use crate::array::{Arrow2Arrow, BooleanArray}; -use crate::bitmap::Bitmap; -use crate::datatypes::ArrowDataType; - -impl Arrow2Arrow for BooleanArray { - fn to_data(&self) -> ArrayData { - let buffer = NullBuffer::from(self.values.clone()); - - let builder = ArrayDataBuilder::new(arrow_schema::DataType::Boolean) - .len(buffer.len()) - .offset(buffer.offset()) - .buffers(vec![buffer.into_inner().into_inner()]) - .nulls(self.validity.as_ref().map(|b| b.clone().into())); - - // SAFETY: Array is valid - unsafe { builder.build_unchecked() } - } - - fn from_data(data: &ArrayData) -> Self { - assert_eq!(data.data_type(), &arrow_schema::DataType::Boolean); - - let buffers = data.buffers(); - let buffer = BooleanBuffer::new(buffers[0].clone(), data.offset(), data.len()); - // Use NullBuffer to compute set count - let values = Bitmap::from_null_buffer(NullBuffer::new(buffer)); - - Self { - dtype: ArrowDataType::Boolean, - values, - validity: data.nulls().map(|n| Bitmap::from_null_buffer(n.clone())), - } - } -} diff --git a/crates/polars-arrow/src/array/boolean/mod.rs b/crates/polars-arrow/src/array/boolean/mod.rs index 5cd9870fdbf4..c1a17c0f27f3 100644 --- a/crates/polars-arrow/src/array/boolean/mod.rs +++ b/crates/polars-arrow/src/array/boolean/mod.rs @@ -7,8 +7,6 @@ use crate::bitmap::{Bitmap, MutableBitmap}; use crate::datatypes::{ArrowDataType, PhysicalType}; use crate::trusted_len::TrustedLen; -#[cfg(feature = "arrow_rs")] -mod data; mod ffi; pub(super) mod fmt; mod from; diff --git a/crates/polars-arrow/src/array/dictionary/data.rs b/crates/polars-arrow/src/array/dictionary/data.rs deleted file mode 100644 index a5eda5a0fd73..000000000000 --- a/crates/polars-arrow/src/array/dictionary/data.rs +++ /dev/null @@ -1,49 +0,0 @@ -use arrow_data::{ArrayData, ArrayDataBuilder}; - -use crate::array::{ - from_data, to_data, Arrow2Arrow, DictionaryArray, DictionaryKey, PrimitiveArray, -}; -use crate::datatypes::{ArrowDataType, PhysicalType}; - -impl Arrow2Arrow for DictionaryArray { - fn to_data(&self) -> ArrayData { - let keys = self.keys.to_data(); - let builder = keys - .into_builder() - .data_type(self.dtype.clone().into()) - .child_data(vec![to_data(self.values.as_ref())]); - - // SAFETY: Dictionary is valid - unsafe { builder.build_unchecked() } - } - - fn from_data(data: &ArrayData) -> Self { - let key = match data.data_type() { - arrow_schema::DataType::Dictionary(k, _) => k.as_ref(), - d => panic!("unsupported dictionary type {d}"), - }; - - let dtype = ArrowDataType::from(data.data_type().clone()); - assert_eq!( - dtype.to_physical_type(), - PhysicalType::Dictionary(K::KEY_TYPE) - ); - - let key_builder = ArrayDataBuilder::new(key.clone()) - .buffers(vec![data.buffers()[0].clone()]) - .offset(data.offset()) - .len(data.len()) - .nulls(data.nulls().cloned()); - - // SAFETY: Dictionary is valid - let key_data = unsafe { key_builder.build_unchecked() }; - let keys = PrimitiveArray::from_data(&key_data); - let values = from_data(&data.child_data()[0]); - - Self { - dtype, - keys, - values, - } - } -} diff --git a/crates/polars-arrow/src/array/dictionary/mod.rs b/crates/polars-arrow/src/array/dictionary/mod.rs index d53970dacd98..f23c409c48a9 100644 --- a/crates/polars-arrow/src/array/dictionary/mod.rs +++ b/crates/polars-arrow/src/array/dictionary/mod.rs @@ -8,8 +8,6 @@ use crate::scalar::{new_scalar, Scalar}; use crate::trusted_len::TrustedLen; use crate::types::NativeType; -#[cfg(feature = "arrow_rs")] -mod data; mod ffi; pub(super) mod fmt; mod iterator; diff --git a/crates/polars-arrow/src/array/fixed_size_binary/data.rs b/crates/polars-arrow/src/array/fixed_size_binary/data.rs deleted file mode 100644 index f04be9883f64..000000000000 --- a/crates/polars-arrow/src/array/fixed_size_binary/data.rs +++ /dev/null @@ -1,37 +0,0 @@ -use arrow_data::{ArrayData, ArrayDataBuilder}; - -use crate::array::{Arrow2Arrow, FixedSizeBinaryArray}; -use crate::bitmap::Bitmap; -use crate::buffer::Buffer; -use crate::datatypes::ArrowDataType; - -impl Arrow2Arrow for FixedSizeBinaryArray { - fn to_data(&self) -> ArrayData { - let dtype = self.dtype.clone().into(); - let builder = ArrayDataBuilder::new(dtype) - .len(self.len()) - .buffers(vec![self.values.clone().into()]) - .nulls(self.validity.as_ref().map(|b| b.clone().into())); - - // SAFETY: Array is valid - unsafe { builder.build_unchecked() } - } - - fn from_data(data: &ArrayData) -> Self { - let dtype: ArrowDataType = data.data_type().clone().into(); - let size = match dtype { - ArrowDataType::FixedSizeBinary(size) => size, - _ => unreachable!("must be FixedSizeBinary"), - }; - - let mut values: Buffer = data.buffers()[0].clone().into(); - values.slice(data.offset() * size, data.len() * size); - - Self { - size, - dtype, - values, - validity: data.nulls().map(|n| Bitmap::from_null_buffer(n.clone())), - } - } -} diff --git a/crates/polars-arrow/src/array/fixed_size_binary/mod.rs b/crates/polars-arrow/src/array/fixed_size_binary/mod.rs index ec3f96626c14..f8f5a1760d45 100644 --- a/crates/polars-arrow/src/array/fixed_size_binary/mod.rs +++ b/crates/polars-arrow/src/array/fixed_size_binary/mod.rs @@ -3,8 +3,6 @@ use crate::bitmap::Bitmap; use crate::buffer::Buffer; use crate::datatypes::ArrowDataType; -#[cfg(feature = "arrow_rs")] -mod data; mod ffi; pub(super) mod fmt; mod iterator; diff --git a/crates/polars-arrow/src/array/fixed_size_list/data.rs b/crates/polars-arrow/src/array/fixed_size_list/data.rs deleted file mode 100644 index c1f353db691a..000000000000 --- a/crates/polars-arrow/src/array/fixed_size_list/data.rs +++ /dev/null @@ -1,38 +0,0 @@ -use arrow_data::{ArrayData, ArrayDataBuilder}; - -use crate::array::{from_data, to_data, Arrow2Arrow, FixedSizeListArray}; -use crate::bitmap::Bitmap; -use crate::datatypes::ArrowDataType; - -impl Arrow2Arrow for FixedSizeListArray { - fn to_data(&self) -> ArrayData { - let dtype = self.dtype.clone().into(); - let builder = ArrayDataBuilder::new(dtype) - .len(self.len()) - .nulls(self.validity.as_ref().map(|b| b.clone().into())) - .child_data(vec![to_data(self.values.as_ref())]); - - // SAFETY: Array is valid - unsafe { builder.build_unchecked() } - } - - fn from_data(data: &ArrayData) -> Self { - let dtype: ArrowDataType = data.data_type().clone().into(); - let length = data.len() - data.offset(); - let size = match dtype { - ArrowDataType::FixedSizeList(_, size) => size, - _ => unreachable!("must be FixedSizeList type"), - }; - - let mut values = from_data(&data.child_data()[0]); - values.slice(data.offset() * size, data.len() * size); - - Self { - size, - length, - dtype, - values, - validity: data.nulls().map(|n| Bitmap::from_null_buffer(n.clone())), - } - } -} diff --git a/crates/polars-arrow/src/array/fixed_size_list/mod.rs b/crates/polars-arrow/src/array/fixed_size_list/mod.rs index 4f1622819813..b8340825d0c7 100644 --- a/crates/polars-arrow/src/array/fixed_size_list/mod.rs +++ b/crates/polars-arrow/src/array/fixed_size_list/mod.rs @@ -2,8 +2,6 @@ use super::{new_empty_array, new_null_array, Array, Splitable}; use crate::bitmap::Bitmap; use crate::datatypes::{ArrowDataType, Field}; -#[cfg(feature = "arrow_rs")] -mod data; mod ffi; pub(super) mod fmt; mod iterator; diff --git a/crates/polars-arrow/src/array/list/data.rs b/crates/polars-arrow/src/array/list/data.rs deleted file mode 100644 index 0d28583df125..000000000000 --- a/crates/polars-arrow/src/array/list/data.rs +++ /dev/null @@ -1,38 +0,0 @@ -use arrow_data::{ArrayData, ArrayDataBuilder}; - -use crate::array::{from_data, to_data, Arrow2Arrow, ListArray}; -use crate::bitmap::Bitmap; -use crate::offset::{Offset, OffsetsBuffer}; - -impl Arrow2Arrow for ListArray { - fn to_data(&self) -> ArrayData { - let dtype = self.dtype.clone().into(); - - let builder = ArrayDataBuilder::new(dtype) - .len(self.len()) - .buffers(vec![self.offsets.clone().into_inner().into()]) - .nulls(self.validity.as_ref().map(|b| b.clone().into())) - .child_data(vec![to_data(self.values.as_ref())]); - - // SAFETY: Array is valid - unsafe { builder.build_unchecked() } - } - - fn from_data(data: &ArrayData) -> Self { - let dtype = data.data_type().clone().into(); - if data.is_empty() { - // Handle empty offsets - return Self::new_empty(dtype); - } - - let mut offsets = unsafe { OffsetsBuffer::new_unchecked(data.buffers()[0].clone().into()) }; - offsets.slice(data.offset(), data.len() + 1); - - Self { - dtype, - offsets, - values: from_data(&data.child_data()[0]), - validity: data.nulls().map(|n| Bitmap::from_null_buffer(n.clone())), - } - } -} diff --git a/crates/polars-arrow/src/array/list/mod.rs b/crates/polars-arrow/src/array/list/mod.rs index 9c74201133b1..87f7b709f14b 100644 --- a/crates/polars-arrow/src/array/list/mod.rs +++ b/crates/polars-arrow/src/array/list/mod.rs @@ -4,8 +4,6 @@ use crate::bitmap::Bitmap; use crate::datatypes::{ArrowDataType, Field}; use crate::offset::{Offset, Offsets, OffsetsBuffer}; -#[cfg(feature = "arrow_rs")] -mod data; mod ffi; pub(super) mod fmt; mod iterator; diff --git a/crates/polars-arrow/src/array/map/data.rs b/crates/polars-arrow/src/array/map/data.rs deleted file mode 100644 index b5530886d817..000000000000 --- a/crates/polars-arrow/src/array/map/data.rs +++ /dev/null @@ -1,38 +0,0 @@ -use arrow_data::{ArrayData, ArrayDataBuilder}; - -use crate::array::{from_data, to_data, Arrow2Arrow, MapArray}; -use crate::bitmap::Bitmap; -use crate::offset::OffsetsBuffer; - -impl Arrow2Arrow for MapArray { - fn to_data(&self) -> ArrayData { - let dtype = self.dtype.clone().into(); - - let builder = ArrayDataBuilder::new(dtype) - .len(self.len()) - .buffers(vec![self.offsets.clone().into_inner().into()]) - .nulls(self.validity.as_ref().map(|b| b.clone().into())) - .child_data(vec![to_data(self.field.as_ref())]); - - // SAFETY: Array is valid - unsafe { builder.build_unchecked() } - } - - fn from_data(data: &ArrayData) -> Self { - let dtype = data.data_type().clone().into(); - if data.is_empty() { - // Handle empty offsets - return Self::new_empty(dtype); - } - - let mut offsets = unsafe { OffsetsBuffer::new_unchecked(data.buffers()[0].clone().into()) }; - offsets.slice(data.offset(), data.len() + 1); - - Self { - dtype: data.data_type().clone().into(), - offsets, - field: from_data(&data.child_data()[0]), - validity: data.nulls().map(|n| Bitmap::from_null_buffer(n.clone())), - } - } -} diff --git a/crates/polars-arrow/src/array/map/mod.rs b/crates/polars-arrow/src/array/map/mod.rs index 5497c1d7342b..1018c21c830a 100644 --- a/crates/polars-arrow/src/array/map/mod.rs +++ b/crates/polars-arrow/src/array/map/mod.rs @@ -4,8 +4,6 @@ use crate::bitmap::Bitmap; use crate::datatypes::{ArrowDataType, Field}; use crate::offset::OffsetsBuffer; -#[cfg(feature = "arrow_rs")] -mod data; mod ffi; pub(super) mod fmt; mod iterator; diff --git a/crates/polars-arrow/src/array/mod.rs b/crates/polars-arrow/src/array/mod.rs index 08702e8021d3..a8d0469d5f08 100644 --- a/crates/polars-arrow/src/array/mod.rs +++ b/crates/polars-arrow/src/array/mod.rs @@ -409,115 +409,6 @@ pub fn new_null_array(dtype: ArrowDataType, length: usize) -> Box { } } -/// Trait providing bi-directional conversion between polars_arrow [`Array`] and arrow-rs [`ArrayData`] -/// -/// [`ArrayData`]: arrow_data::ArrayData -#[cfg(feature = "arrow_rs")] -pub trait Arrow2Arrow: Array { - /// Convert this [`Array`] into [`ArrayData`] - fn to_data(&self) -> arrow_data::ArrayData; - - /// Create this [`Array`] from [`ArrayData`] - fn from_data(data: &arrow_data::ArrayData) -> Self; -} - -#[cfg(feature = "arrow_rs")] -macro_rules! to_data_dyn { - ($array:expr, $ty:ty) => {{ - let f = |x: &$ty| x.to_data(); - general_dyn!($array, $ty, f) - }}; -} - -#[cfg(feature = "arrow_rs")] -impl From> for arrow_array::ArrayRef { - fn from(value: Box) -> Self { - value.as_ref().into() - } -} - -#[cfg(feature = "arrow_rs")] -impl From<&dyn Array> for arrow_array::ArrayRef { - fn from(value: &dyn Array) -> Self { - arrow_array::make_array(to_data(value)) - } -} - -#[cfg(feature = "arrow_rs")] -impl From for Box { - fn from(value: arrow_array::ArrayRef) -> Self { - value.as_ref().into() - } -} - -#[cfg(feature = "arrow_rs")] -impl From<&dyn arrow_array::Array> for Box { - fn from(value: &dyn arrow_array::Array) -> Self { - from_data(&value.to_data()) - } -} - -/// Convert an polars_arrow [`Array`] to [`arrow_data::ArrayData`] -#[cfg(feature = "arrow_rs")] -pub fn to_data(array: &dyn Array) -> arrow_data::ArrayData { - use crate::datatypes::PhysicalType::*; - match array.dtype().to_physical_type() { - Null => to_data_dyn!(array, NullArray), - Boolean => to_data_dyn!(array, BooleanArray), - Primitive(primitive) => with_match_primitive_type_full!(primitive, |$T| { - to_data_dyn!(array, PrimitiveArray<$T>) - }), - Binary => to_data_dyn!(array, BinaryArray), - LargeBinary => to_data_dyn!(array, BinaryArray), - FixedSizeBinary => to_data_dyn!(array, FixedSizeBinaryArray), - Utf8 => to_data_dyn!(array, Utf8Array::), - LargeUtf8 => to_data_dyn!(array, Utf8Array::), - List => to_data_dyn!(array, ListArray::), - LargeList => to_data_dyn!(array, ListArray::), - FixedSizeList => to_data_dyn!(array, FixedSizeListArray), - Struct => to_data_dyn!(array, StructArray), - Union => to_data_dyn!(array, UnionArray), - Dictionary(key_type) => { - match_integer_type!(key_type, |$T| { - to_data_dyn!(array, DictionaryArray::<$T>) - }) - }, - Map => to_data_dyn!(array, MapArray), - BinaryView | Utf8View => todo!(), - } -} - -/// Convert an [`arrow_data::ArrayData`] to polars_arrow [`Array`] -#[cfg(feature = "arrow_rs")] -pub fn from_data(data: &arrow_data::ArrayData) -> Box { - use crate::datatypes::PhysicalType::*; - let dtype: ArrowDataType = data.data_type().clone().into(); - match dtype.to_physical_type() { - Null => Box::new(NullArray::from_data(data)), - Boolean => Box::new(BooleanArray::from_data(data)), - Primitive(primitive) => with_match_primitive_type_full!(primitive, |$T| { - Box::new(PrimitiveArray::<$T>::from_data(data)) - }), - Binary => Box::new(BinaryArray::::from_data(data)), - LargeBinary => Box::new(BinaryArray::::from_data(data)), - FixedSizeBinary => Box::new(FixedSizeBinaryArray::from_data(data)), - Utf8 => Box::new(Utf8Array::::from_data(data)), - LargeUtf8 => Box::new(Utf8Array::::from_data(data)), - List => Box::new(ListArray::::from_data(data)), - LargeList => Box::new(ListArray::::from_data(data)), - FixedSizeList => Box::new(FixedSizeListArray::from_data(data)), - Struct => Box::new(StructArray::from_data(data)), - Union => Box::new(UnionArray::from_data(data)), - Dictionary(key_type) => { - match_integer_type!(key_type, |$T| { - Box::new(DictionaryArray::<$T>::from_data(data)) - }) - }, - Map => Box::new(MapArray::from_data(data)), - BinaryView | Utf8View => todo!(), - } -} - macro_rules! clone_dyn { ($array:expr, $ty:ty) => {{ let f = |x: &$ty| Box::new(x.clone()); diff --git a/crates/polars-arrow/src/array/null.rs b/crates/polars-arrow/src/array/null.rs index 4960b263667c..e6e840d86860 100644 --- a/crates/polars-arrow/src/array/null.rs +++ b/crates/polars-arrow/src/array/null.rs @@ -213,24 +213,3 @@ impl FromFfi for NullArray { Self::try_new(dtype, array.array().len()) } } - -#[cfg(feature = "arrow_rs")] -mod arrow { - use arrow_data::{ArrayData, ArrayDataBuilder}; - - use super::*; - impl NullArray { - /// Convert this array into [`arrow_data::ArrayData`] - pub fn to_data(&self) -> ArrayData { - let builder = ArrayDataBuilder::new(arrow_schema::DataType::Null).len(self.len()); - - // SAFETY: safe by construction - unsafe { builder.build_unchecked() } - } - - /// Create this array from [`ArrayData`] - pub fn from_data(data: &ArrayData) -> Self { - Self::new(ArrowDataType::Null, data.len()) - } - } -} diff --git a/crates/polars-arrow/src/array/primitive/data.rs b/crates/polars-arrow/src/array/primitive/data.rs deleted file mode 100644 index 56a94107cb89..000000000000 --- a/crates/polars-arrow/src/array/primitive/data.rs +++ /dev/null @@ -1,33 +0,0 @@ -use arrow_data::{ArrayData, ArrayDataBuilder}; - -use crate::array::{Arrow2Arrow, PrimitiveArray}; -use crate::bitmap::Bitmap; -use crate::buffer::Buffer; -use crate::types::NativeType; - -impl Arrow2Arrow for PrimitiveArray { - fn to_data(&self) -> ArrayData { - let dtype = self.dtype.clone().into(); - - let builder = ArrayDataBuilder::new(dtype) - .len(self.len()) - .buffers(vec![self.values.clone().into()]) - .nulls(self.validity.as_ref().map(|b| b.clone().into())); - - // SAFETY: Array is valid - unsafe { builder.build_unchecked() } - } - - fn from_data(data: &ArrayData) -> Self { - let dtype = data.data_type().clone().into(); - - let mut values: Buffer = data.buffers()[0].clone().into(); - values.slice(data.offset(), data.len()); - - Self { - dtype, - values, - validity: data.nulls().map(|n| Bitmap::from_null_buffer(n.clone())), - } - } -} diff --git a/crates/polars-arrow/src/array/primitive/mod.rs b/crates/polars-arrow/src/array/primitive/mod.rs index 85353ecb5178..8accc161faf2 100644 --- a/crates/polars-arrow/src/array/primitive/mod.rs +++ b/crates/polars-arrow/src/array/primitive/mod.rs @@ -11,8 +11,6 @@ use crate::datatypes::*; use crate::trusted_len::TrustedLen; use crate::types::{days_ms, f16, i256, months_days_ns, NativeType}; -#[cfg(feature = "arrow_rs")] -mod data; mod ffi; pub(super) mod fmt; mod from_natural; diff --git a/crates/polars-arrow/src/array/struct_/data.rs b/crates/polars-arrow/src/array/struct_/data.rs deleted file mode 100644 index a65b491bfe77..000000000000 --- a/crates/polars-arrow/src/array/struct_/data.rs +++ /dev/null @@ -1,29 +0,0 @@ -use arrow_data::{ArrayData, ArrayDataBuilder}; - -use crate::array::{from_data, to_data, Arrow2Arrow, StructArray}; -use crate::bitmap::Bitmap; - -impl Arrow2Arrow for StructArray { - fn to_data(&self) -> ArrayData { - let dtype = self.dtype.clone().into(); - - let builder = ArrayDataBuilder::new(dtype) - .len(self.len()) - .nulls(self.validity.as_ref().map(|b| b.clone().into())) - .child_data(self.values.iter().map(|x| to_data(x.as_ref())).collect()); - - // SAFETY: Array is valid - unsafe { builder.build_unchecked() } - } - - fn from_data(data: &ArrayData) -> Self { - let dtype = data.data_type().clone().into(); - - Self { - dtype, - length: data.len(), - values: data.child_data().iter().map(from_data).collect(), - validity: data.nulls().map(|n| Bitmap::from_null_buffer(n.clone())), - } - } -} diff --git a/crates/polars-arrow/src/array/struct_/mod.rs b/crates/polars-arrow/src/array/struct_/mod.rs index 11d0f2de200f..eeaac519bb0d 100644 --- a/crates/polars-arrow/src/array/struct_/mod.rs +++ b/crates/polars-arrow/src/array/struct_/mod.rs @@ -2,8 +2,6 @@ use super::{new_empty_array, new_null_array, Array, Splitable}; use crate::bitmap::Bitmap; use crate::datatypes::{ArrowDataType, Field}; -#[cfg(feature = "arrow_rs")] -mod data; mod ffi; pub(super) mod fmt; mod iterator; diff --git a/crates/polars-arrow/src/array/union/data.rs b/crates/polars-arrow/src/array/union/data.rs deleted file mode 100644 index 869fdcfc248d..000000000000 --- a/crates/polars-arrow/src/array/union/data.rs +++ /dev/null @@ -1,70 +0,0 @@ -use arrow_data::{ArrayData, ArrayDataBuilder}; - -use crate::array::{from_data, to_data, Arrow2Arrow, UnionArray}; -use crate::buffer::Buffer; -use crate::datatypes::ArrowDataType; - -impl Arrow2Arrow for UnionArray { - fn to_data(&self) -> ArrayData { - let dtype = arrow_schema::DataType::from(self.dtype.clone()); - let len = self.len(); - - let builder = match self.offsets.clone() { - Some(offsets) => ArrayDataBuilder::new(dtype) - .len(len) - .buffers(vec![self.types.clone().into(), offsets.into()]) - .child_data(self.fields.iter().map(|x| to_data(x.as_ref())).collect()), - None => ArrayDataBuilder::new(dtype) - .len(len) - .buffers(vec![self.types.clone().into()]) - .child_data( - self.fields - .iter() - .map(|x| to_data(x.as_ref()).slice(self.offset, len)) - .collect(), - ), - }; - - // SAFETY: Array is valid - unsafe { builder.build_unchecked() } - } - - fn from_data(data: &ArrayData) -> Self { - let dtype: ArrowDataType = data.data_type().clone().into(); - - let fields = data.child_data().iter().map(from_data).collect(); - let buffers = data.buffers(); - let mut types: Buffer = buffers[0].clone().into(); - types.slice(data.offset(), data.len()); - let offsets = match buffers.len() == 2 { - true => { - let mut offsets: Buffer = buffers[1].clone().into(); - offsets.slice(data.offset(), data.len()); - Some(offsets) - }, - false => None, - }; - - // Map from type id to array index - let map = match &dtype { - ArrowDataType::Union(_, Some(ids), _) => { - let mut map = [0; 127]; - for (pos, &id) in ids.iter().enumerate() { - map[id as usize] = pos; - } - Some(map) - }, - ArrowDataType::Union(_, None, _) => None, - _ => unreachable!("must be Union type"), - }; - - Self { - types, - map, - fields, - offsets, - dtype, - offset: data.offset(), - } - } -} diff --git a/crates/polars-arrow/src/array/union/mod.rs b/crates/polars-arrow/src/array/union/mod.rs index e42d268f5c06..f8007a485ed5 100644 --- a/crates/polars-arrow/src/array/union/mod.rs +++ b/crates/polars-arrow/src/array/union/mod.rs @@ -6,8 +6,6 @@ use crate::buffer::Buffer; use crate::datatypes::{ArrowDataType, Field, UnionMode}; use crate::scalar::{new_scalar, Scalar}; -#[cfg(feature = "arrow_rs")] -mod data; mod ffi; pub(super) mod fmt; mod iterator; diff --git a/crates/polars-arrow/src/array/utf8/data.rs b/crates/polars-arrow/src/array/utf8/data.rs deleted file mode 100644 index 37f73a089aa6..000000000000 --- a/crates/polars-arrow/src/array/utf8/data.rs +++ /dev/null @@ -1,42 +0,0 @@ -use arrow_data::{ArrayData, ArrayDataBuilder}; - -use crate::array::{Arrow2Arrow, Utf8Array}; -use crate::bitmap::Bitmap; -use crate::offset::{Offset, OffsetsBuffer}; - -impl Arrow2Arrow for Utf8Array { - fn to_data(&self) -> ArrayData { - let dtype = self.dtype().clone().into(); - let builder = ArrayDataBuilder::new(dtype) - .len(self.offsets().len_proxy()) - .buffers(vec![ - self.offsets.clone().into_inner().into(), - self.values.clone().into(), - ]) - .nulls(self.validity.as_ref().map(|b| b.clone().into())); - - // SAFETY: Array is valid - unsafe { builder.build_unchecked() } - } - - fn from_data(data: &ArrayData) -> Self { - let dtype = data.data_type().clone().into(); - if data.is_empty() { - // Handle empty offsets - return Self::new_empty(dtype); - } - - let buffers = data.buffers(); - - // SAFETY: ArrayData is valid - let mut offsets = unsafe { OffsetsBuffer::new_unchecked(buffers[0].clone().into()) }; - offsets.slice(data.offset(), data.len() + 1); - - Self { - dtype, - offsets, - values: buffers[1].clone().into(), - validity: data.nulls().map(|n| Bitmap::from_null_buffer(n.clone())), - } - } -} diff --git a/crates/polars-arrow/src/array/utf8/mod.rs b/crates/polars-arrow/src/array/utf8/mod.rs index ebec52b78d28..fffa36ba2f8f 100644 --- a/crates/polars-arrow/src/array/utf8/mod.rs +++ b/crates/polars-arrow/src/array/utf8/mod.rs @@ -11,8 +11,6 @@ use crate::datatypes::ArrowDataType; use crate::offset::{Offset, Offsets, OffsetsBuffer}; use crate::trusted_len::TrustedLen; -#[cfg(feature = "arrow_rs")] -mod data; mod ffi; pub(super) mod fmt; mod from; diff --git a/crates/polars-arrow/src/bitmap/immutable.rs b/crates/polars-arrow/src/bitmap/immutable.rs index 2ba89e68568a..a896651467d2 100644 --- a/crates/polars-arrow/src/bitmap/immutable.rs +++ b/crates/polars-arrow/src/bitmap/immutable.rs @@ -595,22 +595,6 @@ impl Bitmap { ) -> std::result::Result { Ok(MutableBitmap::try_from_trusted_len_iter_unchecked(iterator)?.into()) } - - /// Create a new [`Bitmap`] from an arrow [`NullBuffer`] - /// - /// [`NullBuffer`]: arrow_buffer::buffer::NullBuffer - #[cfg(feature = "arrow_rs")] - pub fn from_null_buffer(value: arrow_buffer::buffer::NullBuffer) -> Self { - let offset = value.offset(); - let length = value.len(); - let unset_bits = value.null_count(); - Self { - storage: SharedStorage::from_arrow_buffer(value.buffer().clone()), - offset, - length, - unset_bit_count_cache: AtomicU64::new(unset_bits as u64), - } - } } impl<'a> IntoIterator for &'a Bitmap { @@ -631,17 +615,6 @@ impl IntoIterator for Bitmap { } } -#[cfg(feature = "arrow_rs")] -impl From for arrow_buffer::buffer::NullBuffer { - fn from(value: Bitmap) -> Self { - let null_count = value.unset_bits(); - let buffer = value.storage.into_arrow_buffer(); - let buffer = arrow_buffer::buffer::BooleanBuffer::new(buffer, value.offset, value.length); - // SAFETY: null count is accurate - unsafe { arrow_buffer::buffer::NullBuffer::new_unchecked(buffer, null_count) } - } -} - impl Splitable for Bitmap { #[inline(always)] fn check_bound(&self, offset: usize) -> bool { diff --git a/crates/polars-arrow/src/buffer/immutable.rs b/crates/polars-arrow/src/buffer/immutable.rs index 9e612427a315..1c6e5b5aa4ff 100644 --- a/crates/polars-arrow/src/buffer/immutable.rs +++ b/crates/polars-arrow/src/buffer/immutable.rs @@ -288,24 +288,6 @@ impl IntoIterator for Buffer { } } -#[cfg(feature = "arrow_rs")] -impl From for Buffer { - fn from(value: arrow_buffer::Buffer) -> Self { - Self::from_storage(SharedStorage::from_arrow_buffer(value)) - } -} - -#[cfg(feature = "arrow_rs")] -impl From> for arrow_buffer::Buffer { - fn from(value: Buffer) -> Self { - let offset = value.offset(); - value - .storage - .into_arrow_buffer() - .slice_with_length(offset * size_of::(), value.length * size_of::()) - } -} - unsafe impl<'a, T: 'a> ArrayAccessor<'a> for Buffer { type Item = &'a T; diff --git a/crates/polars-arrow/src/datatypes/field.rs b/crates/polars-arrow/src/datatypes/field.rs index 8bf18af82f46..b1a5baf5c0ee 100644 --- a/crates/polars-arrow/src/datatypes/field.rs +++ b/crates/polars-arrow/src/datatypes/field.rs @@ -60,60 +60,3 @@ impl Field { &self.dtype } } - -#[cfg(feature = "arrow_rs")] -impl From for arrow_schema::Field { - fn from(value: Field) -> Self { - Self::new( - value.name.to_string(), - value.dtype.into(), - value.is_nullable, - ) - .with_metadata( - value - .metadata - .into_iter() - .map(|(k, v)| (k.to_string(), v.to_string())) - .collect(), - ) - } -} - -#[cfg(feature = "arrow_rs")] -impl From for Field { - fn from(value: arrow_schema::Field) -> Self { - (&value).into() - } -} - -#[cfg(feature = "arrow_rs")] -impl From<&arrow_schema::Field> for Field { - fn from(value: &arrow_schema::Field) -> Self { - let dtype = value.data_type().clone().into(); - let metadata = value - .metadata() - .iter() - .map(|(k, v)| (PlSmallStr::from_str(k), PlSmallStr::from_str(v))) - .collect(); - Self::new( - PlSmallStr::from_str(value.name().as_str()), - dtype, - value.is_nullable(), - ) - .with_metadata(metadata) - } -} - -#[cfg(feature = "arrow_rs")] -impl From for Field { - fn from(value: arrow_schema::FieldRef) -> Self { - value.as_ref().into() - } -} - -#[cfg(feature = "arrow_rs")] -impl From<&arrow_schema::FieldRef> for Field { - fn from(value: &arrow_schema::FieldRef) -> Self { - value.as_ref().into() - } -} diff --git a/crates/polars-arrow/src/datatypes/mod.rs b/crates/polars-arrow/src/datatypes/mod.rs index 8f2226c709e6..c609ffbe432f 100644 --- a/crates/polars-arrow/src/datatypes/mod.rs +++ b/crates/polars-arrow/src/datatypes/mod.rs @@ -175,143 +175,6 @@ pub enum ArrowDataType { Unknown, } -#[cfg(feature = "arrow_rs")] -impl From for arrow_schema::DataType { - fn from(value: ArrowDataType) -> Self { - use arrow_schema::{Field as ArrowField, UnionFields}; - - match value { - ArrowDataType::Null => Self::Null, - ArrowDataType::Boolean => Self::Boolean, - ArrowDataType::Int8 => Self::Int8, - ArrowDataType::Int16 => Self::Int16, - ArrowDataType::Int32 => Self::Int32, - ArrowDataType::Int64 => Self::Int64, - ArrowDataType::UInt8 => Self::UInt8, - ArrowDataType::UInt16 => Self::UInt16, - ArrowDataType::UInt32 => Self::UInt32, - ArrowDataType::UInt64 => Self::UInt64, - ArrowDataType::Float16 => Self::Float16, - ArrowDataType::Float32 => Self::Float32, - ArrowDataType::Float64 => Self::Float64, - ArrowDataType::Timestamp(unit, tz) => { - Self::Timestamp(unit.into(), tz.map(|x| Arc::::from(x.as_str()))) - }, - ArrowDataType::Date32 => Self::Date32, - ArrowDataType::Date64 => Self::Date64, - ArrowDataType::Time32(unit) => Self::Time32(unit.into()), - ArrowDataType::Time64(unit) => Self::Time64(unit.into()), - ArrowDataType::Duration(unit) => Self::Duration(unit.into()), - ArrowDataType::Interval(unit) => Self::Interval(unit.into()), - ArrowDataType::Binary => Self::Binary, - ArrowDataType::FixedSizeBinary(size) => Self::FixedSizeBinary(size as _), - ArrowDataType::LargeBinary => Self::LargeBinary, - ArrowDataType::Utf8 => Self::Utf8, - ArrowDataType::LargeUtf8 => Self::LargeUtf8, - ArrowDataType::List(f) => Self::List(Arc::new((*f).into())), - ArrowDataType::FixedSizeList(f, size) => { - Self::FixedSizeList(Arc::new((*f).into()), size as _) - }, - ArrowDataType::LargeList(f) => Self::LargeList(Arc::new((*f).into())), - ArrowDataType::Struct(f) => Self::Struct(f.into_iter().map(ArrowField::from).collect()), - ArrowDataType::Union(fields, Some(ids), mode) => { - let ids = ids.into_iter().map(|x| x as _); - let fields = fields.into_iter().map(ArrowField::from); - Self::Union(UnionFields::new(ids, fields), mode.into()) - }, - ArrowDataType::Union(fields, None, mode) => { - let ids = 0..fields.len() as i8; - let fields = fields.into_iter().map(ArrowField::from); - Self::Union(UnionFields::new(ids, fields), mode.into()) - }, - ArrowDataType::Map(f, ordered) => Self::Map(Arc::new((*f).into()), ordered), - ArrowDataType::Dictionary(key, value, _) => Self::Dictionary( - Box::new(ArrowDataType::from(key).into()), - Box::new((*value).into()), - ), - ArrowDataType::Decimal(precision, scale) => { - Self::Decimal128(precision as _, scale as _) - }, - ArrowDataType::Decimal256(precision, scale) => { - Self::Decimal256(precision as _, scale as _) - }, - ArrowDataType::Extension(_, d, _) => (*d).into(), - ArrowDataType::BinaryView | ArrowDataType::Utf8View => { - panic!("view datatypes not supported by arrow-rs") - }, - ArrowDataType::Unknown => unimplemented!(), - } - } -} - -#[cfg(feature = "arrow_rs")] -impl From for ArrowDataType { - fn from(value: arrow_schema::DataType) -> Self { - use arrow_schema::DataType; - match value { - DataType::Null => Self::Null, - DataType::Boolean => Self::Boolean, - DataType::Int8 => Self::Int8, - DataType::Int16 => Self::Int16, - DataType::Int32 => Self::Int32, - DataType::Int64 => Self::Int64, - DataType::UInt8 => Self::UInt8, - DataType::UInt16 => Self::UInt16, - DataType::UInt32 => Self::UInt32, - DataType::UInt64 => Self::UInt64, - DataType::Float16 => Self::Float16, - DataType::Float32 => Self::Float32, - DataType::Float64 => Self::Float64, - DataType::Timestamp(unit, tz) => { - Self::Timestamp(unit.into(), tz.map(|x| PlSmallStr::from_str(x.as_ref()))) - }, - DataType::Date32 => Self::Date32, - DataType::Date64 => Self::Date64, - DataType::Time32(unit) => Self::Time32(unit.into()), - DataType::Time64(unit) => Self::Time64(unit.into()), - DataType::Duration(unit) => Self::Duration(unit.into()), - DataType::Interval(unit) => Self::Interval(unit.into()), - DataType::Binary => Self::Binary, - DataType::FixedSizeBinary(size) => Self::FixedSizeBinary(size as _), - DataType::LargeBinary => Self::LargeBinary, - DataType::Utf8 => Self::Utf8, - DataType::LargeUtf8 => Self::LargeUtf8, - DataType::List(f) => Self::List(Box::new(f.into())), - DataType::FixedSizeList(f, size) => Self::FixedSizeList(Box::new(f.into()), size as _), - DataType::LargeList(f) => Self::LargeList(Box::new(f.into())), - DataType::Struct(f) => Self::Struct(f.into_iter().map(Into::into).collect()), - DataType::Union(fields, mode) => { - let ids = fields.iter().map(|(x, _)| x as _).collect(); - let fields = fields.iter().map(|(_, f)| f.into()).collect(); - Self::Union(fields, Some(ids), mode.into()) - }, - DataType::Map(f, ordered) => Self::Map(Box::new(f.into()), ordered), - DataType::Dictionary(key, value) => { - let key = match *key { - DataType::Int8 => IntegerType::Int8, - DataType::Int16 => IntegerType::Int16, - DataType::Int32 => IntegerType::Int32, - DataType::Int64 => IntegerType::Int64, - DataType::UInt8 => IntegerType::UInt8, - DataType::UInt16 => IntegerType::UInt16, - DataType::UInt32 => IntegerType::UInt32, - DataType::UInt64 => IntegerType::UInt64, - d => panic!("illegal dictionary key type: {d}"), - }; - Self::Dictionary(key, Box::new((*value).into()), false) - }, - DataType::Decimal128(precision, scale) => Self::Decimal(precision as _, scale as _), - DataType::Decimal256(precision, scale) => Self::Decimal256(precision as _, scale as _), - DataType::RunEndEncoded(_, _) => { - panic!("Run-end encoding not supported by polars_arrow") - }, - // This ensures that it doesn't fail to compile when new variants are added to Arrow - #[allow(unreachable_patterns)] - dtype => unimplemented!("unsupported datatype: {dtype}"), - } - } -} - /// Mode of [`ArrowDataType::Union`] #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] @@ -322,26 +185,6 @@ pub enum UnionMode { Sparse, } -#[cfg(feature = "arrow_rs")] -impl From for arrow_schema::UnionMode { - fn from(value: UnionMode) -> Self { - match value { - UnionMode::Dense => Self::Dense, - UnionMode::Sparse => Self::Sparse, - } - } -} - -#[cfg(feature = "arrow_rs")] -impl From for UnionMode { - fn from(value: arrow_schema::UnionMode) -> Self { - match value { - arrow_schema::UnionMode::Dense => Self::Dense, - arrow_schema::UnionMode::Sparse => Self::Sparse, - } - } -} - impl UnionMode { /// Constructs a [`UnionMode::Sparse`] if the input bool is true, /// or otherwise constructs a [`UnionMode::Dense`] @@ -378,30 +221,6 @@ pub enum TimeUnit { Nanosecond, } -#[cfg(feature = "arrow_rs")] -impl From for arrow_schema::TimeUnit { - fn from(value: TimeUnit) -> Self { - match value { - TimeUnit::Nanosecond => Self::Nanosecond, - TimeUnit::Millisecond => Self::Millisecond, - TimeUnit::Microsecond => Self::Microsecond, - TimeUnit::Second => Self::Second, - } - } -} - -#[cfg(feature = "arrow_rs")] -impl From for TimeUnit { - fn from(value: arrow_schema::TimeUnit) -> Self { - match value { - arrow_schema::TimeUnit::Nanosecond => Self::Nanosecond, - arrow_schema::TimeUnit::Millisecond => Self::Millisecond, - arrow_schema::TimeUnit::Microsecond => Self::Microsecond, - arrow_schema::TimeUnit::Second => Self::Second, - } - } -} - /// Interval units defined in Arrow #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] @@ -415,28 +234,6 @@ pub enum IntervalUnit { MonthDayNano, } -#[cfg(feature = "arrow_rs")] -impl From for arrow_schema::IntervalUnit { - fn from(value: IntervalUnit) -> Self { - match value { - IntervalUnit::YearMonth => Self::YearMonth, - IntervalUnit::DayTime => Self::DayTime, - IntervalUnit::MonthDayNano => Self::MonthDayNano, - } - } -} - -#[cfg(feature = "arrow_rs")] -impl From for IntervalUnit { - fn from(value: arrow_schema::IntervalUnit) -> Self { - match value { - arrow_schema::IntervalUnit::YearMonth => Self::YearMonth, - arrow_schema::IntervalUnit::DayTime => Self::DayTime, - arrow_schema::IntervalUnit::MonthDayNano => Self::MonthDayNano, - } - } -} - impl ArrowDataType { /// the [`PhysicalType`] of this [`ArrowDataType`]. pub fn to_physical_type(&self) -> PhysicalType { diff --git a/crates/polars-arrow/src/storage.rs b/crates/polars-arrow/src/storage.rs index a193fd519a38..e7656f9d880b 100644 --- a/crates/polars-arrow/src/storage.rs +++ b/crates/polars-arrow/src/storage.rs @@ -7,12 +7,8 @@ use std::sync::atomic::{AtomicU64, Ordering}; use crate::ffi::InternalArrowArray; enum BackingStorage { - Vec { - capacity: usize, - }, + Vec { capacity: usize }, InternalArrowArray(InternalArrowArray), - #[cfg(feature = "arrow_rs")] - ArrowBuffer(arrow_buffer::Buffer), } struct SharedStorageInner { @@ -28,8 +24,6 @@ impl Drop for SharedStorageInner { fn drop(&mut self) { match self.backing.take() { Some(BackingStorage::InternalArrowArray(a)) => drop(a), - #[cfg(feature = "arrow_rs")] - Some(BackingStorage::ArrowBuffer(b)) => drop(b), Some(BackingStorage::Vec { capacity }) => unsafe { drop(Vec::from_raw_parts(self.ptr, self.length, capacity)) }, @@ -96,35 +90,6 @@ impl SharedStorage { } } -#[cfg(feature = "arrow_rs")] -impl SharedStorage { - pub fn from_arrow_buffer(buffer: arrow_buffer::Buffer) -> Self { - let ptr = buffer.as_ptr(); - let align_offset = ptr.align_offset(align_of::()); - assert_eq!(align_offset, 0, "arrow_buffer::Buffer misaligned"); - let length = buffer.len() / size_of::(); - - let inner = SharedStorageInner { - ref_count: AtomicU64::new(1), - ptr: ptr as *mut T, - length, - backing: Some(BackingStorage::ArrowBuffer(buffer)), - phantom: PhantomData, - }; - Self { - inner: NonNull::new(Box::into_raw(Box::new(inner))).unwrap(), - phantom: PhantomData, - } - } - - pub fn into_arrow_buffer(self) -> arrow_buffer::Buffer { - let ptr = NonNull::new(self.as_ptr() as *mut u8).unwrap(); - let len = self.len() * size_of::(); - let arc = std::sync::Arc::new(self); - unsafe { arrow_buffer::Buffer::from_custom_allocation(ptr, len, arc) } - } -} - impl SharedStorage { #[inline(always)] pub fn len(&self) -> usize { diff --git a/crates/polars-core/Cargo.toml b/crates/polars-core/Cargo.toml index 13047c5f600b..d01ae4dd0203 100644 --- a/crates/polars-core/Cargo.toml +++ b/crates/polars-core/Cargo.toml @@ -17,7 +17,6 @@ polars-utils = { workspace = true } ahash = { workspace = true } arrow = { workspace = true } -arrow-array = { workspace = true, optional = true } bitflags = { workspace = true } bytemuck = { workspace = true } chrono = { workspace = true, optional = true } @@ -100,7 +99,6 @@ partition_by = ["algorithm_group_by"] describe = [] timezones = ["temporal", "chrono", "chrono-tz", "arrow/chrono-tz", "arrow/timezones"] dynamic_group_by = ["dtype-datetime", "dtype-date"] -arrow_rs = ["arrow-array", "arrow/arrow_rs"] list_arithmetic = [] # opt-in datatypes for Series diff --git a/crates/polars-core/src/series/mod.rs b/crates/polars-core/src/series/mod.rs index 8ad018a4a678..227e53ec56d7 100644 --- a/crates/polars-core/src/series/mod.rs +++ b/crates/polars-core/src/series/mod.rs @@ -298,11 +298,6 @@ impl Series { Self::try_from((name, array)) } - #[cfg(feature = "arrow_rs")] - pub fn from_arrow_rs(name: PlSmallStr, array: &dyn arrow_array::Array) -> PolarsResult { - Self::from_arrow(name, array.into()) - } - /// Shrink the capacity of this array to fit its length. pub fn shrink_to_fit(&mut self) { self._get_inner_mut().shrink_to_fit() diff --git a/crates/polars/Cargo.toml b/crates/polars/Cargo.toml index 5baefb2e1ec2..c625b276f553 100644 --- a/crates/polars/Cargo.toml +++ b/crates/polars/Cargo.toml @@ -26,8 +26,7 @@ polars-utils = { workspace = true } [dev-dependencies] ahash = { workspace = true } apache-avro = { version = "0.17", features = ["snappy"] } -arrow = { workspace = true, features = ["arrow_rs"] } -arrow-buffer = { workspace = true } +arrow = { workspace = true } avro-schema = { workspace = true, features = ["async"] } either = { workspace = true } ethnum = "1" diff --git a/crates/polars/tests/it/arrow/bitmap/immutable.rs b/crates/polars/tests/it/arrow/bitmap/immutable.rs index 4f2b3f3748b0..336322534de2 100644 --- a/crates/polars/tests/it/arrow/bitmap/immutable.rs +++ b/crates/polars/tests/it/arrow/bitmap/immutable.rs @@ -76,28 +76,3 @@ fn debug() { "Bitmap { len: 7, offset: 2, bytes: [0b111110__, 0b_______1] }" ); } - -#[test] -fn from_arrow() { - use arrow_buffer::buffer::{BooleanBuffer, NullBuffer}; - let buffer = arrow_buffer::Buffer::from_iter(vec![true, true, true, false, false, false, true]); - let bools = BooleanBuffer::new(buffer, 0, 7); - let nulls = NullBuffer::new(bools); - assert_eq!(nulls.null_count(), 3); - - let bitmap = Bitmap::from_null_buffer(nulls.clone()); - assert_eq!(nulls.null_count(), bitmap.unset_bits()); - assert_eq!(nulls.len(), bitmap.len()); - let back = NullBuffer::from(bitmap); - assert_eq!(nulls, back); - - let nulls = nulls.slice(1, 3); - assert_eq!(nulls.null_count(), 1); - assert_eq!(nulls.len(), 3); - - let bitmap = Bitmap::from_null_buffer(nulls.clone()); - assert_eq!(nulls.null_count(), bitmap.unset_bits()); - assert_eq!(nulls.len(), bitmap.len()); - let back = NullBuffer::from(bitmap); - assert_eq!(nulls, back); -} diff --git a/crates/polars/tests/it/arrow/buffer/immutable.rs b/crates/polars/tests/it/arrow/buffer/immutable.rs index 9065b52fba35..cc8742ba73ae 100644 --- a/crates/polars/tests/it/arrow/buffer/immutable.rs +++ b/crates/polars/tests/it/arrow/buffer/immutable.rs @@ -43,73 +43,3 @@ fn from_vec() { assert_eq!(buffer.len(), 3); assert_eq!(buffer.as_slice(), &[0, 1, 2]); } - -#[test] -fn from_arrow() { - let buffer = arrow_buffer::Buffer::from_vec(vec![1_i32, 2_i32, 3_i32]); - let b = Buffer::::from(buffer.clone()); - assert_eq!(b.len(), 3); - assert_eq!(b.as_slice(), &[1, 2, 3]); - let back = arrow_buffer::Buffer::from(b); - assert_eq!(back, buffer); - - let buffer = buffer.slice(4); - let b = Buffer::::from(buffer.clone()); - assert_eq!(b.len(), 2); - assert_eq!(b.as_slice(), &[2, 3]); - let back = arrow_buffer::Buffer::from(b); - assert_eq!(back, buffer); - - let buffer = arrow_buffer::Buffer::from_vec(vec![1_i64, 2_i64]); - let b = Buffer::::from(buffer.clone()); - assert_eq!(b.len(), 4); - assert_eq!(b.as_slice(), &[1, 0, 2, 0]); - let back = arrow_buffer::Buffer::from(b); - assert_eq!(back, buffer); - - let buffer = buffer.slice(4); - let b = Buffer::::from(buffer.clone()); - assert_eq!(b.len(), 3); - assert_eq!(b.as_slice(), &[0, 2, 0]); - let back = arrow_buffer::Buffer::from(b); - assert_eq!(back, buffer); -} - -#[test] -fn from_arrow_vec() { - // Zero-copy vec conversion in arrow-rs - let buffer = arrow_buffer::Buffer::from_vec(vec![1_i32, 2_i32, 3_i32]); - let back: Vec = buffer.into_vec().unwrap(); - - // Zero-copy vec conversion in arrow2 - let buffer = Buffer::::from(back); - let back: Vec = buffer.into_mut().unwrap_right(); - - let buffer = arrow_buffer::Buffer::from_vec(back); - let buffer = Buffer::::from(buffer); - - // But not possible after conversion between buffer representations - let _ = buffer.into_mut().unwrap_left(); - - let buffer = Buffer::::from(vec![1_i32]); - let buffer = arrow_buffer::Buffer::from(buffer); - - // But not possible after conversion between buffer representations - let _ = buffer.into_vec::().unwrap_err(); -} - -#[test] -#[should_panic(expected = "arrow_buffer::Buffer misaligned")] -fn from_arrow_misaligned() { - let buffer = arrow_buffer::Buffer::from_vec(vec![1_i32, 2_i32, 3_i32]).slice(1); - let _ = Buffer::::from(buffer); -} - -#[test] -fn from_arrow_sliced() { - let buffer = arrow_buffer::Buffer::from_vec(vec![1_i32, 2_i32, 3_i32]); - let b = Buffer::::from(buffer); - let sliced = b.sliced(1, 2); - let back = arrow_buffer::Buffer::from(sliced); - assert_eq!(back.typed_data::(), &[2, 3]); -}