From a440bdd8953f61ec622b1ac47a04e15f82cba7ed Mon Sep 17 00:00:00 2001 From: Ian Lai Date: Wed, 5 Feb 2025 06:24:17 +0000 Subject: [PATCH 01/10] refactor: replace uses of arrow_buffer and arrow_array with reexport in arrow --- datafusion/common/src/hash_utils.rs | 5 ++--- datafusion/common/src/scalar/mod.rs | 5 ++--- datafusion/core/tests/dataframe/mod.rs | 2 +- .../functions-aggregate/benches/array_agg.rs | 6 ++++-- .../functions-aggregate/src/correlation.rs | 4 ++-- datafusion/functions-nested/benches/map.rs | 2 +- datafusion/functions-nested/src/array_has.rs | 2 +- datafusion/functions-nested/src/concat.rs | 8 +++++--- datafusion/functions-nested/src/except.rs | 2 +- datafusion/functions-nested/src/extract.rs | 13 ++++--------- datafusion/functions-nested/src/flatten.rs | 2 +- datafusion/functions-nested/src/make_array.rs | 2 +- datafusion/functions-nested/src/map_extract.rs | 2 +- datafusion/functions-nested/src/range.rs | 18 +++++++++--------- datafusion/functions-nested/src/remove.rs | 2 +- datafusion/functions-nested/src/repeat.rs | 2 +- datafusion/functions-nested/src/replace.rs | 6 +++--- datafusion/functions-nested/src/resize.rs | 9 +++++---- datafusion/functions-nested/src/reverse.rs | 2 +- datafusion/functions-nested/src/sort.rs | 4 ++-- datafusion/functions-nested/src/utils.rs | 2 +- datafusion/functions/src/core/greatest.rs | 2 +- datafusion/functions/src/core/least.rs | 2 +- datafusion/functions/src/string/common.rs | 5 ++--- datafusion/functions/src/strings.rs | 6 +++--- datafusion/functions/src/unicode/substr.rs | 4 ++-- .../physical-expr-common/src/binary_map.rs | 8 ++++---- .../src/expressions/is_not_null.rs | 2 +- .../physical-expr/src/expressions/is_null.rs | 2 +- .../group_values/multi_group_by/bytes.rs | 10 +++++----- .../group_values/multi_group_by/bytes_view.rs | 7 ++----- .../group_values/multi_group_by/primitive.rs | 6 ++---- .../aggregates/group_values/null_builder.rs | 3 ++- .../group_values/single_group_by/primitive.rs | 7 ++++--- .../physical-plan/src/joins/hash_join.rs | 2 +- datafusion/physical-plan/src/joins/mod.rs | 2 +- .../src/joins/stream_join_utils.rs | 6 ++++-- datafusion/physical-plan/src/sorts/cursor.rs | 2 +- datafusion/physical-plan/src/unnest.rs | 6 ++++-- 39 files changed, 90 insertions(+), 92 deletions(-) diff --git a/datafusion/common/src/hash_utils.rs b/datafusion/common/src/hash_utils.rs index 0d1d93acf1fc..37b4cecc005c 100644 --- a/datafusion/common/src/hash_utils.rs +++ b/datafusion/common/src/hash_utils.rs @@ -25,8 +25,7 @@ use arrow::array::*; use arrow::datatypes::*; #[cfg(not(feature = "force_hash_collisions"))] use arrow::{downcast_dictionary_array, downcast_primitive_array}; -use arrow_buffer::IntervalDayTime; -use arrow_buffer::IntervalMonthDayNano; +use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano}; #[cfg(not(feature = "force_hash_collisions"))] use crate::cast::{ @@ -700,7 +699,7 @@ mod tests { // Tests actual values of hashes, which are different if forcing collisions #[cfg(not(feature = "force_hash_collisions"))] fn create_hashes_for_struct_arrays() { - use arrow_buffer::Buffer; + use arrow::buffer::Buffer; let boolarr = Arc::new(BooleanArray::from(vec![ false, false, true, true, true, true, diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs index 9d3429b67796..21b52e019880 100644 --- a/datafusion/common/src/scalar/mod.rs +++ b/datafusion/common/src/scalar/mod.rs @@ -3958,12 +3958,11 @@ mod tests { }; use crate::assert_batches_eq; - use arrow::buffer::OffsetBuffer; + use arrow::array::{types::Float64Type, NullBufferBuilder}; + use arrow::buffer::{Buffer, OffsetBuffer}; use arrow::compute::{is_null, kernels}; use arrow::error::ArrowError; use arrow::util::pretty::pretty_format_columns; - use arrow_array::types::Float64Type; - use arrow_buffer::{Buffer, NullBufferBuilder}; use arrow_schema::Fields; use chrono::NaiveDate; use rand::Rng; diff --git a/datafusion/core/tests/dataframe/mod.rs b/datafusion/core/tests/dataframe/mod.rs index e570ec75c691..d6ba4d5337c6 100644 --- a/datafusion/core/tests/dataframe/mod.rs +++ b/datafusion/core/tests/dataframe/mod.rs @@ -19,6 +19,7 @@ mod dataframe_functions; mod describe; +use arrow::buffer::ScalarBuffer; use arrow::datatypes::{DataType, Field, Float32Type, Int32Type, Schema, UInt64Type}; use arrow::util::pretty::pretty_format_batches; use arrow::{ @@ -33,7 +34,6 @@ use arrow_array::{ record_batch, Array, BooleanArray, DictionaryArray, Float32Array, Float64Array, Int8Array, UnionArray, }; -use arrow_buffer::ScalarBuffer; use arrow_schema::{ArrowError, SchemaRef, UnionFields, UnionMode}; use datafusion_functions_aggregate::count::count_udaf; use datafusion_functions_aggregate::expr_fn::{ diff --git a/datafusion/functions-aggregate/benches/array_agg.rs b/datafusion/functions-aggregate/benches/array_agg.rs index c4599cdfc9b3..c9792d541a4f 100644 --- a/datafusion/functions-aggregate/benches/array_agg.rs +++ b/datafusion/functions-aggregate/benches/array_agg.rs @@ -17,7 +17,9 @@ use std::sync::Arc; -use arrow::array::{Array, ArrayRef, ArrowPrimitiveType, AsArray, ListArray}; +use arrow::array::{ + Array, ArrayRef, ArrowPrimitiveType, AsArray, ListArray, NullBufferBuilder, +}; use arrow::datatypes::Int64Type; use arrow::util::bench_util::create_primitive_array; use arrow_schema::Field; @@ -25,8 +27,8 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion}; use datafusion_expr::Accumulator; use datafusion_functions_aggregate::array_agg::ArrayAggAccumulator; +use arrow::buffer::OffsetBuffer; use arrow::util::test_util::seedable_rng; -use arrow_buffer::{NullBufferBuilder, OffsetBuffer}; use rand::distributions::{Distribution, Standard}; use rand::Rng; diff --git a/datafusion/functions-aggregate/src/correlation.rs b/datafusion/functions-aggregate/src/correlation.rs index 2741fe4bfc00..ac57256ce882 100644 --- a/datafusion/functions-aggregate/src/correlation.rs +++ b/datafusion/functions-aggregate/src/correlation.rs @@ -23,7 +23,8 @@ use std::mem::size_of_val; use std::sync::Arc; use arrow::array::{ - downcast_array, Array, AsArray, BooleanArray, Float64Array, UInt64Array, + downcast_array, Array, AsArray, BooleanArray, Float64Array, NullBufferBuilder, + UInt64Array, }; use arrow::compute::{and, filter, is_not_null, kernels::cast}; use arrow::datatypes::{Float64Type, UInt64Type}; @@ -31,7 +32,6 @@ use arrow::{ array::ArrayRef, datatypes::{DataType, Field}, }; -use arrow_buffer::NullBufferBuilder; use datafusion_expr::{EmitTo, GroupsAccumulator}; use datafusion_functions_aggregate_common::aggregate::groups_accumulator::accumulate::accumulate_multiple; use log::debug; diff --git a/datafusion/functions-nested/benches/map.rs b/datafusion/functions-nested/benches/map.rs index f92bb6cecf9c..22bef99becef 100644 --- a/datafusion/functions-nested/benches/map.rs +++ b/datafusion/functions-nested/benches/map.rs @@ -17,8 +17,8 @@ extern crate criterion; +use arrow::buffer::{OffsetBuffer, ScalarBuffer}; use arrow_array::{Int32Array, ListArray, StringArray}; -use arrow_buffer::{OffsetBuffer, ScalarBuffer}; use arrow_schema::{DataType, Field}; use criterion::{black_box, criterion_group, criterion_main, Criterion}; use rand::prelude::ThreadRng; diff --git a/datafusion/functions-nested/src/array_has.rs b/datafusion/functions-nested/src/array_has.rs index df007b5cd60d..e56f5633b2a5 100644 --- a/datafusion/functions-nested/src/array_has.rs +++ b/datafusion/functions-nested/src/array_has.rs @@ -18,10 +18,10 @@ //! [`ScalarUDFImpl`] definitions for array_has, array_has_all and array_has_any functions. use arrow::array::{Array, ArrayRef, BooleanArray, OffsetSizeTrait}; +use arrow::buffer::BooleanBuffer; use arrow::datatypes::DataType; use arrow::row::{RowConverter, Rows, SortField}; use arrow_array::{Datum, GenericListArray, Scalar}; -use arrow_buffer::BooleanBuffer; use datafusion_common::cast::as_generic_list_array; use datafusion_common::utils::string_utils::string_array_to_vec; use datafusion_common::{exec_err, Result, ScalarValue}; diff --git a/datafusion/functions-nested/src/concat.rs b/datafusion/functions-nested/src/concat.rs index 93305faad56f..0e98c31ba663 100644 --- a/datafusion/functions-nested/src/concat.rs +++ b/datafusion/functions-nested/src/concat.rs @@ -20,9 +20,11 @@ use std::sync::Arc; use std::{any::Any, cmp::Ordering}; -use arrow::array::{Capacities, MutableArrayData}; -use arrow_array::{Array, ArrayRef, GenericListArray, OffsetSizeTrait}; -use arrow_buffer::{NullBufferBuilder, OffsetBuffer}; +use arrow::array::{ + Array, ArrayRef, Capacities, GenericListArray, MutableArrayData, NullBufferBuilder, + OffsetSizeTrait, +}; +use arrow::buffer::OffsetBuffer; use arrow_schema::{DataType, Field}; use datafusion_common::Result; use datafusion_common::{ diff --git a/datafusion/functions-nested/src/except.rs b/datafusion/functions-nested/src/except.rs index 356c92983ae2..8cb870dba058 100644 --- a/datafusion/functions-nested/src/except.rs +++ b/datafusion/functions-nested/src/except.rs @@ -18,10 +18,10 @@ //! [`ScalarUDFImpl`] definitions for array_except function. use crate::utils::{check_datatypes, make_scalar_function}; +use arrow::buffer::OffsetBuffer; use arrow::row::{RowConverter, SortField}; use arrow_array::cast::AsArray; use arrow_array::{Array, ArrayRef, GenericListArray, OffsetSizeTrait}; -use arrow_buffer::OffsetBuffer; use arrow_schema::{DataType, FieldRef}; use datafusion_common::{exec_err, internal_err, HashSet, Result}; use datafusion_expr::{ diff --git a/datafusion/functions-nested/src/extract.rs b/datafusion/functions-nested/src/extract.rs index c87a96dca7a4..849600a24168 100644 --- a/datafusion/functions-nested/src/extract.rs +++ b/datafusion/functions-nested/src/extract.rs @@ -17,17 +17,12 @@ //! [`ScalarUDFImpl`] definitions for array_element, array_slice, array_pop_front, array_pop_back, and array_any_value functions. -use arrow::array::Array; -use arrow::array::ArrayRef; -use arrow::array::ArrowNativeTypeOp; -use arrow::array::Capacities; -use arrow::array::GenericListArray; -use arrow::array::Int64Array; -use arrow::array::MutableArrayData; -use arrow::array::OffsetSizeTrait; +use arrow::array::{ + Array, ArrayRef, ArrowNativeTypeOp, Capacities, GenericListArray, Int64Array, + MutableArrayData, NullBufferBuilder, OffsetSizeTrait, +}; use arrow::buffer::OffsetBuffer; use arrow::datatypes::DataType; -use arrow_buffer::NullBufferBuilder; use arrow_schema::DataType::{FixedSizeList, LargeList, List}; use arrow_schema::Field; use datafusion_common::cast::as_int64_array; diff --git a/datafusion/functions-nested/src/flatten.rs b/datafusion/functions-nested/src/flatten.rs index 30bf2fcbf624..b97b9e3c68a9 100644 --- a/datafusion/functions-nested/src/flatten.rs +++ b/datafusion/functions-nested/src/flatten.rs @@ -18,8 +18,8 @@ //! [`ScalarUDFImpl`] definitions for flatten function. use crate::utils::make_scalar_function; +use arrow::buffer::OffsetBuffer; use arrow_array::{ArrayRef, GenericListArray, OffsetSizeTrait}; -use arrow_buffer::OffsetBuffer; use arrow_schema::DataType; use arrow_schema::DataType::{FixedSizeList, LargeList, List, Null}; use datafusion_common::cast::{ diff --git a/datafusion/functions-nested/src/make_array.rs b/datafusion/functions-nested/src/make_array.rs index 0283cdd40275..d43d0b4eae91 100644 --- a/datafusion/functions-nested/src/make_array.rs +++ b/datafusion/functions-nested/src/make_array.rs @@ -23,10 +23,10 @@ use std::vec; use crate::utils::make_scalar_function; use arrow::array::{ArrayData, Capacities, MutableArrayData}; +use arrow::buffer::OffsetBuffer; use arrow_array::{ new_null_array, Array, ArrayRef, GenericListArray, NullArray, OffsetSizeTrait, }; -use arrow_buffer::OffsetBuffer; use arrow_schema::DataType::{List, Null}; use arrow_schema::{DataType, Field}; use datafusion_common::utils::SingleRowListArrayBuilder; diff --git a/datafusion/functions-nested/src/map_extract.rs b/datafusion/functions-nested/src/map_extract.rs index 8ccfae0ff93e..268c3235cc49 100644 --- a/datafusion/functions-nested/src/map_extract.rs +++ b/datafusion/functions-nested/src/map_extract.rs @@ -20,9 +20,9 @@ use arrow::array::{ArrayRef, Capacities, MutableArrayData}; use arrow_array::{make_array, ListArray}; +use arrow::buffer::OffsetBuffer; use arrow::datatypes::DataType; use arrow_array::{Array, MapArray}; -use arrow_buffer::OffsetBuffer; use arrow_schema::Field; use datafusion_common::{cast::as_map_array, exec_err, Result}; diff --git a/datafusion/functions-nested/src/range.rs b/datafusion/functions-nested/src/range.rs index ff148f04ac5f..c3f52cef3366 100644 --- a/datafusion/functions-nested/src/range.rs +++ b/datafusion/functions-nested/src/range.rs @@ -18,16 +18,16 @@ //! [`ScalarUDFImpl`] definitions for range and gen_series functions. use crate::utils::make_scalar_function; -use arrow::array::{Array, ArrayRef, Int64Array, ListArray, ListBuilder}; -use arrow::datatypes::{DataType, Field}; -use arrow_array::builder::{Date32Builder, TimestampNanosecondBuilder}; -use arrow_array::temporal_conversions::as_datetime_with_timezone; -use arrow_array::timezone::Tz; -use arrow_array::types::{ - Date32Type, IntervalMonthDayNanoType, TimestampNanosecondType as TSNT, +use arrow::array::{ + builder::{Date32Builder, TimestampNanosecondBuilder}, + temporal_conversions::as_datetime_with_timezone, + timezone::Tz, + types::{Date32Type, IntervalMonthDayNanoType, TimestampNanosecondType as TSNT}, + Array, ArrayRef, Int64Array, ListArray, ListBuilder, NullArray, NullBufferBuilder, + TimestampNanosecondArray, }; -use arrow_array::{NullArray, TimestampNanosecondArray}; -use arrow_buffer::{NullBufferBuilder, OffsetBuffer}; +use arrow::buffer::OffsetBuffer; +use arrow::datatypes::{DataType, Field}; use arrow_schema::DataType::*; use arrow_schema::IntervalUnit::MonthDayNano; use arrow_schema::TimeUnit::Nanosecond; diff --git a/datafusion/functions-nested/src/remove.rs b/datafusion/functions-nested/src/remove.rs index bf7f4746618f..64b6405176a3 100644 --- a/datafusion/functions-nested/src/remove.rs +++ b/datafusion/functions-nested/src/remove.rs @@ -19,11 +19,11 @@ use crate::utils; use crate::utils::make_scalar_function; +use arrow::buffer::OffsetBuffer; use arrow_array::cast::AsArray; use arrow_array::{ new_empty_array, Array, ArrayRef, BooleanArray, GenericListArray, OffsetSizeTrait, }; -use arrow_buffer::OffsetBuffer; use arrow_schema::{DataType, Field}; use datafusion_common::cast::as_int64_array; use datafusion_common::{exec_err, Result}; diff --git a/datafusion/functions-nested/src/repeat.rs b/datafusion/functions-nested/src/repeat.rs index 2bc4721f3cfa..455fb3dd3023 100644 --- a/datafusion/functions-nested/src/repeat.rs +++ b/datafusion/functions-nested/src/repeat.rs @@ -19,13 +19,13 @@ use crate::utils::make_scalar_function; use arrow::array::{Capacities, MutableArrayData}; +use arrow::buffer::OffsetBuffer; use arrow::compute; use arrow::compute::cast; use arrow_array::{ new_null_array, Array, ArrayRef, GenericListArray, ListArray, OffsetSizeTrait, UInt64Array, }; -use arrow_buffer::OffsetBuffer; use arrow_schema::DataType::{LargeList, List}; use arrow_schema::{DataType, Field}; use datafusion_common::cast::{as_large_list_array, as_list_array, as_uint64_array}; diff --git a/datafusion/functions-nested/src/replace.rs b/datafusion/functions-nested/src/replace.rs index 106887c51396..1f12625a52b8 100644 --- a/datafusion/functions-nested/src/replace.rs +++ b/datafusion/functions-nested/src/replace.rs @@ -18,12 +18,12 @@ //! [`ScalarUDFImpl`] definitions for array_replace, array_replace_n and array_replace_all functions. use arrow::array::{ - Array, ArrayRef, AsArray, Capacities, MutableArrayData, OffsetSizeTrait, + Array, ArrayRef, AsArray, Capacities, GenericListArray, MutableArrayData, + NullBufferBuilder, OffsetSizeTrait, }; use arrow::datatypes::DataType; -use arrow_array::GenericListArray; -use arrow_buffer::{NullBufferBuilder, OffsetBuffer}; +use arrow::buffer::OffsetBuffer; use arrow_schema::Field; use datafusion_common::cast::as_int64_array; use datafusion_common::{exec_err, Result}; diff --git a/datafusion/functions-nested/src/resize.rs b/datafusion/functions-nested/src/resize.rs index 441f44e47f6e..f167134f9b22 100644 --- a/datafusion/functions-nested/src/resize.rs +++ b/datafusion/functions-nested/src/resize.rs @@ -18,11 +18,12 @@ //! [`ScalarUDFImpl`] definitions for array_resize function. use crate::utils::make_scalar_function; -use arrow::array::{Capacities, MutableArrayData}; -use arrow_array::{ - new_null_array, Array, ArrayRef, GenericListArray, Int64Array, OffsetSizeTrait, +use arrow::array::{ + new_null_array, Array, ArrayRef, Capacities, GenericListArray, Int64Array, + MutableArrayData, NullBufferBuilder, OffsetSizeTrait, }; -use arrow_buffer::{ArrowNativeType, NullBufferBuilder, OffsetBuffer}; +use arrow::buffer::OffsetBuffer; +use arrow::datatypes::ArrowNativeType; use arrow_schema::DataType::{FixedSizeList, LargeList, List}; use arrow_schema::{DataType, FieldRef}; use datafusion_common::cast::{as_int64_array, as_large_list_array, as_list_array}; diff --git a/datafusion/functions-nested/src/reverse.rs b/datafusion/functions-nested/src/reverse.rs index 8538ba5cac12..9fd955094ae6 100644 --- a/datafusion/functions-nested/src/reverse.rs +++ b/datafusion/functions-nested/src/reverse.rs @@ -19,8 +19,8 @@ use crate::utils::make_scalar_function; use arrow::array::{Capacities, MutableArrayData}; +use arrow::buffer::OffsetBuffer; use arrow_array::{Array, ArrayRef, GenericListArray, OffsetSizeTrait}; -use arrow_buffer::OffsetBuffer; use arrow_schema::DataType::{LargeList, List, Null}; use arrow_schema::{DataType, FieldRef}; use datafusion_common::cast::{as_large_list_array, as_list_array}; diff --git a/datafusion/functions-nested/src/sort.rs b/datafusion/functions-nested/src/sort.rs index 8e45ccbf74b8..e4dcc02286f3 100644 --- a/datafusion/functions-nested/src/sort.rs +++ b/datafusion/functions-nested/src/sort.rs @@ -18,9 +18,9 @@ //! [`ScalarUDFImpl`] definitions for array_sort function. use crate::utils::make_scalar_function; +use arrow::array::{Array, ArrayRef, ListArray, NullBufferBuilder}; +use arrow::buffer::OffsetBuffer; use arrow::compute; -use arrow_array::{Array, ArrayRef, ListArray}; -use arrow_buffer::{NullBufferBuilder, OffsetBuffer}; use arrow_schema::DataType::{FixedSizeList, LargeList, List}; use arrow_schema::{DataType, Field, SortOptions}; use datafusion_common::cast::{as_list_array, as_string_array}; diff --git a/datafusion/functions-nested/src/utils.rs b/datafusion/functions-nested/src/utils.rs index c54d6d49cecc..e1961dccf54a 100644 --- a/datafusion/functions-nested/src/utils.rs +++ b/datafusion/functions-nested/src/utils.rs @@ -21,11 +21,11 @@ use std::sync::Arc; use arrow::{array::ArrayRef, datatypes::DataType}; +use arrow::buffer::OffsetBuffer; use arrow_array::{ Array, BooleanArray, GenericListArray, ListArray, OffsetSizeTrait, Scalar, UInt32Array, }; -use arrow_buffer::OffsetBuffer; use arrow_schema::{Field, Fields}; use datafusion_common::cast::{as_large_list_array, as_list_array}; use datafusion_common::{ diff --git a/datafusion/functions/src/core/greatest.rs b/datafusion/functions/src/core/greatest.rs index 7ad8c7372896..6864da2d5c06 100644 --- a/datafusion/functions/src/core/greatest.rs +++ b/datafusion/functions/src/core/greatest.rs @@ -17,10 +17,10 @@ use crate::core::greatest_least_utils::GreatestLeastOperator; use arrow::array::{make_comparator, Array, BooleanArray}; +use arrow::buffer::BooleanBuffer; use arrow::compute::kernels::cmp; use arrow::compute::SortOptions; use arrow::datatypes::DataType; -use arrow_buffer::BooleanBuffer; use datafusion_common::{internal_err, Result, ScalarValue}; use datafusion_doc::Documentation; use datafusion_expr::ColumnarValue; diff --git a/datafusion/functions/src/core/least.rs b/datafusion/functions/src/core/least.rs index 02299feb9b74..a26b14babf2c 100644 --- a/datafusion/functions/src/core/least.rs +++ b/datafusion/functions/src/core/least.rs @@ -17,10 +17,10 @@ use crate::core::greatest_least_utils::GreatestLeastOperator; use arrow::array::{make_comparator, Array, BooleanArray}; +use arrow::buffer::BooleanBuffer; use arrow::compute::kernels::cmp; use arrow::compute::SortOptions; use arrow::datatypes::DataType; -use arrow_buffer::BooleanBuffer; use datafusion_common::{internal_err, Result, ScalarValue}; use datafusion_doc::Documentation; use datafusion_expr::ColumnarValue; diff --git a/datafusion/functions/src/string/common.rs b/datafusion/functions/src/string/common.rs index 6e5f767013d4..5e0567eafea2 100644 --- a/datafusion/functions/src/string/common.rs +++ b/datafusion/functions/src/string/common.rs @@ -23,11 +23,10 @@ use std::sync::Arc; use crate::strings::make_and_append_view; use arrow::array::{ new_null_array, Array, ArrayRef, GenericStringArray, GenericStringBuilder, - OffsetSizeTrait, StringBuilder, StringViewArray, + NullBufferBuilder, OffsetSizeTrait, StringBuilder, StringViewArray, }; -use arrow::buffer::Buffer; +use arrow::buffer::{Buffer, ScalarBuffer}; use arrow::datatypes::DataType; -use arrow_buffer::{NullBufferBuilder, ScalarBuffer}; use datafusion_common::cast::{as_generic_string_array, as_string_view_array}; use datafusion_common::Result; use datafusion_common::{exec_err, ScalarValue}; diff --git a/datafusion/functions/src/strings.rs b/datafusion/functions/src/strings.rs index bb991c28fe4d..6299b353d57a 100644 --- a/datafusion/functions/src/strings.rs +++ b/datafusion/functions/src/strings.rs @@ -19,11 +19,11 @@ use std::mem::size_of; use arrow::array::{ make_view, Array, ArrayAccessor, ArrayDataBuilder, ArrayIter, ByteView, - GenericStringArray, LargeStringArray, OffsetSizeTrait, StringArray, StringViewArray, - StringViewBuilder, + GenericStringArray, LargeStringArray, NullBufferBuilder, OffsetSizeTrait, + StringArray, StringViewArray, StringViewBuilder, }; +use arrow::buffer::{MutableBuffer, NullBuffer}; use arrow::datatypes::DataType; -use arrow_buffer::{MutableBuffer, NullBuffer, NullBufferBuilder}; /// Abstracts iteration over different types of string arrays. #[deprecated(since = "45.0.0", note = "Use arrow::array::StringArrayType instead")] diff --git a/datafusion/functions/src/unicode/substr.rs b/datafusion/functions/src/unicode/substr.rs index 00737a2fe814..3767166cab51 100644 --- a/datafusion/functions/src/unicode/substr.rs +++ b/datafusion/functions/src/unicode/substr.rs @@ -22,10 +22,10 @@ use crate::strings::make_and_append_view; use crate::utils::{make_scalar_function, utf8_to_str_type}; use arrow::array::{ Array, ArrayIter, ArrayRef, AsArray, GenericStringBuilder, Int64Array, - OffsetSizeTrait, StringArrayType, StringViewArray, + NullBufferBuilder, OffsetSizeTrait, StringArrayType, StringViewArray, }; +use arrow::buffer::ScalarBuffer; use arrow::datatypes::DataType; -use arrow_buffer::{NullBufferBuilder, ScalarBuffer}; use datafusion_common::cast::as_int64_array; use datafusion_common::{exec_err, plan_err, Result}; use datafusion_expr::{ diff --git a/datafusion/physical-expr-common/src/binary_map.rs b/datafusion/physical-expr-common/src/binary_map.rs index bdff494518da..809c619e9845 100644 --- a/datafusion/physical-expr-common/src/binary_map.rs +++ b/datafusion/physical-expr-common/src/binary_map.rs @@ -19,14 +19,14 @@ //! StringArray / LargeStringArray / BinaryArray / LargeBinaryArray. use ahash::RandomState; -use arrow::array::cast::AsArray; -use arrow::array::types::{ByteArrayType, GenericBinaryType, GenericStringType}; use arrow::array::{ + cast::AsArray, + types::{ByteArrayType, GenericBinaryType, GenericStringType}, Array, ArrayRef, BufferBuilder, GenericBinaryArray, GenericStringArray, - OffsetSizeTrait, + NullBufferBuilder, OffsetSizeTrait, }; +use arrow::buffer::{NullBuffer, OffsetBuffer, ScalarBuffer}; use arrow::datatypes::DataType; -use arrow_buffer::{NullBuffer, NullBufferBuilder, OffsetBuffer, ScalarBuffer}; use datafusion_common::hash_utils::create_hashes; use datafusion_common::utils::proxy::{HashTableAllocExt, VecAllocExt}; use std::any::type_name; diff --git a/datafusion/physical-expr/src/expressions/is_not_null.rs b/datafusion/physical-expr/src/expressions/is_not_null.rs index 4930865f4c98..8e3544622b80 100644 --- a/datafusion/physical-expr/src/expressions/is_not_null.rs +++ b/datafusion/physical-expr/src/expressions/is_not_null.rs @@ -115,12 +115,12 @@ pub fn is_not_null(arg: Arc) -> Result> mod tests { use super::*; use crate::expressions::col; + use arrow::buffer::ScalarBuffer; use arrow::{ array::{BooleanArray, StringArray}, datatypes::*, }; use arrow_array::{Array, Float64Array, Int32Array, UnionArray}; - use arrow_buffer::ScalarBuffer; use datafusion_common::cast::as_boolean_array; #[test] diff --git a/datafusion/physical-expr/src/expressions/is_null.rs b/datafusion/physical-expr/src/expressions/is_null.rs index 6a02d5ecc1f2..ca8d67230557 100644 --- a/datafusion/physical-expr/src/expressions/is_null.rs +++ b/datafusion/physical-expr/src/expressions/is_null.rs @@ -114,12 +114,12 @@ pub fn is_null(arg: Arc) -> Result> { mod tests { use super::*; use crate::expressions::col; + use arrow::buffer::ScalarBuffer; use arrow::{ array::{BooleanArray, StringArray}, datatypes::*, }; use arrow_array::{Array, Float64Array, Int32Array, UnionArray}; - use arrow_buffer::ScalarBuffer; use datafusion_common::cast::as_boolean_array; #[test] diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs index e75c75a235b7..c4525256dbae 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs @@ -17,11 +17,12 @@ use crate::aggregates::group_values::multi_group_by::{nulls_equal_to, GroupColumn}; use crate::aggregates::group_values::null_builder::MaybeNullBufferBuilder; -use arrow::array::{AsArray, BufferBuilder, GenericBinaryArray, GenericStringArray}; +use arrow::array::{ + types::GenericStringType, Array, ArrayRef, AsArray, BufferBuilder, + GenericBinaryArray, GenericByteArray, GenericStringArray, OffsetSizeTrait, +}; use arrow::buffer::{OffsetBuffer, ScalarBuffer}; use arrow::datatypes::{ByteArrayType, DataType, GenericBinaryType}; -use arrow_array::types::GenericStringType; -use arrow_array::{Array, ArrayRef, GenericByteArray, OffsetSizeTrait}; use datafusion_common::utils::proxy::VecAllocExt; use datafusion_physical_expr_common::binary_map::{OutputType, INITIAL_BUFFER_CAPACITY}; use itertools::izip; @@ -404,8 +405,7 @@ mod tests { use std::sync::Arc; use crate::aggregates::group_values::multi_group_by::bytes::ByteGroupValueBuilder; - use arrow_array::{ArrayRef, StringArray}; - use arrow_buffer::NullBufferBuilder; + use arrow::array::{ArrayRef, NullBufferBuilder, StringArray}; use datafusion_physical_expr::binary_map::OutputType; use super::GroupColumn; diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs index c3d88b894999..d170411b833c 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs @@ -18,10 +18,9 @@ use crate::aggregates::group_values::multi_group_by::{nulls_equal_to, GroupColumn}; use crate::aggregates::group_values::null_builder::MaybeNullBufferBuilder; use arrow::array::{make_view, AsArray, ByteView}; -use arrow::buffer::ScalarBuffer; +use arrow::buffer::{Buffer, ScalarBuffer}; use arrow::datatypes::ByteViewType; use arrow_array::{Array, ArrayRef, GenericByteViewArray}; -use arrow_buffer::Buffer; use itertools::izip; use std::marker::PhantomData; use std::mem::{replace, size_of}; @@ -545,10 +544,8 @@ mod tests { use std::sync::Arc; use crate::aggregates::group_values::multi_group_by::bytes_view::ByteViewGroupValueBuilder; - use arrow::array::AsArray; + use arrow::array::{ArrayRef, AsArray, NullBufferBuilder, StringViewArray}; use arrow::datatypes::StringViewType; - use arrow_array::{ArrayRef, StringViewArray}; - use arrow_buffer::NullBufferBuilder; use super::GroupColumn; diff --git a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs index cd5dfae86ee9..c85245d05592 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs @@ -17,9 +17,8 @@ use crate::aggregates::group_values::multi_group_by::{nulls_equal_to, GroupColumn}; use crate::aggregates::group_values::null_builder::MaybeNullBufferBuilder; +use arrow::array::{cast::AsArray, Array, ArrayRef, ArrowPrimitiveType, PrimitiveArray}; use arrow::buffer::ScalarBuffer; -use arrow_array::cast::AsArray; -use arrow_array::{Array, ArrayRef, ArrowPrimitiveType, PrimitiveArray}; use arrow_schema::DataType; use datafusion_execution::memory_pool::proxy::VecAllocExt; use itertools::izip; @@ -212,9 +211,8 @@ mod tests { use std::sync::Arc; use crate::aggregates::group_values::multi_group_by::primitive::PrimitiveGroupValueBuilder; + use arrow::array::{ArrayRef, Int64Array, NullBufferBuilder}; use arrow::datatypes::Int64Type; - use arrow_array::{ArrayRef, Int64Array}; - use arrow_buffer::NullBufferBuilder; use arrow_schema::DataType; use super::GroupColumn; diff --git a/datafusion/physical-plan/src/aggregates/group_values/null_builder.rs b/datafusion/physical-plan/src/aggregates/group_values/null_builder.rs index a584cf58e50a..369d921d2fc8 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/null_builder.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/null_builder.rs @@ -15,7 +15,8 @@ // specific language governing permissions and limitations // under the License. -use arrow_buffer::{BooleanBufferBuilder, NullBuffer}; +use arrow::array::BooleanBufferBuilder; +use arrow::buffer::NullBuffer; /// Builder for an (optional) null mask /// diff --git a/datafusion/physical-plan/src/aggregates/group_values/single_group_by/primitive.rs b/datafusion/physical-plan/src/aggregates/group_values/single_group_by/primitive.rs index 78a5f619fcd6..f613e8d795e3 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/single_group_by/primitive.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/single_group_by/primitive.rs @@ -17,11 +17,12 @@ use crate::aggregates::group_values::GroupValues; use ahash::RandomState; +use arrow::array::{ + cast::AsArray, ArrayRef, ArrowNativeTypeOp, ArrowPrimitiveType, NullBufferBuilder, + PrimitiveArray, +}; use arrow::datatypes::i256; use arrow::record_batch::RecordBatch; -use arrow_array::cast::AsArray; -use arrow_array::{ArrayRef, ArrowNativeTypeOp, ArrowPrimitiveType, PrimitiveArray}; -use arrow_buffer::NullBufferBuilder; use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano}; use arrow_schema::DataType; use datafusion_common::Result; diff --git a/datafusion/physical-plan/src/joins/hash_join.rs b/datafusion/physical-plan/src/joins/hash_join.rs index bac72e8a0cc7..6d33a3a56eac 100644 --- a/datafusion/physical-plan/src/joins/hash_join.rs +++ b/datafusion/physical-plan/src/joins/hash_join.rs @@ -1644,9 +1644,9 @@ mod tests { }; use arrow::array::{Date32Array, Int32Array}; + use arrow::buffer::NullBuffer; use arrow::datatypes::{DataType, Field}; use arrow_array::StructArray; - use arrow_buffer::NullBuffer; use datafusion_common::{ assert_batches_eq, assert_batches_sorted_eq, assert_contains, exec_err, ScalarValue, diff --git a/datafusion/physical-plan/src/joins/mod.rs b/datafusion/physical-plan/src/joins/mod.rs index bfdeb2fd6e27..fdb5cdeb5136 100644 --- a/datafusion/physical-plan/src/joins/mod.rs +++ b/datafusion/physical-plan/src/joins/mod.rs @@ -17,7 +17,7 @@ //! DataFusion Join implementations -use arrow_buffer::BooleanBufferBuilder; +use arrow::array::BooleanBufferBuilder; pub use cross_join::CrossJoinExec; pub use hash_join::HashJoinExec; pub use nested_loop_join::NestedLoopJoinExec; diff --git a/datafusion/physical-plan/src/joins/stream_join_utils.rs b/datafusion/physical-plan/src/joins/stream_join_utils.rs index 6d4f06b3aef2..00d9a6b0ae46 100644 --- a/datafusion/physical-plan/src/joins/stream_join_utils.rs +++ b/datafusion/physical-plan/src/joins/stream_join_utils.rs @@ -26,9 +26,11 @@ use crate::joins::utils::{JoinFilter, JoinHashMapType}; use crate::metrics::{ExecutionPlanMetricsSet, MetricBuilder}; use crate::{metrics, ExecutionPlan}; +use arrow::array::{ + ArrowPrimitiveType, BooleanBufferBuilder, NativeAdapter, PrimitiveArray, RecordBatch, +}; use arrow::compute::concat_batches; -use arrow_array::{ArrowPrimitiveType, NativeAdapter, PrimitiveArray, RecordBatch}; -use arrow_buffer::{ArrowNativeType, BooleanBufferBuilder}; +use arrow_buffer::ArrowNativeType; use arrow_schema::{Schema, SchemaRef}; use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode}; use datafusion_common::{ diff --git a/datafusion/physical-plan/src/sorts/cursor.rs b/datafusion/physical-plan/src/sorts/cursor.rs index 5cd24b89f5c1..2c298c9327f4 100644 --- a/datafusion/physical-plan/src/sorts/cursor.rs +++ b/datafusion/physical-plan/src/sorts/cursor.rs @@ -18,6 +18,7 @@ use std::cmp::Ordering; use arrow::buffer::ScalarBuffer; +use arrow::buffer::{Buffer, OffsetBuffer}; use arrow::compute::SortOptions; use arrow::datatypes::ArrowNativeTypeOp; use arrow::row::Rows; @@ -25,7 +26,6 @@ use arrow_array::types::ByteArrayType; use arrow_array::{ Array, ArrowPrimitiveType, GenericByteArray, OffsetSizeTrait, PrimitiveArray, }; -use arrow_buffer::{Buffer, OffsetBuffer}; use datafusion_execution::memory_pool::MemoryReservation; /// A comparable collection of values for use with [`Cursor`] diff --git a/datafusion/physical-plan/src/unnest.rs b/datafusion/physical-plan/src/unnest.rs index ef6797c9b10d..942dd7881052 100644 --- a/datafusion/physical-plan/src/unnest.rs +++ b/datafusion/physical-plan/src/unnest.rs @@ -951,9 +951,11 @@ fn repeat_arrs_from_indices( #[cfg(test)] mod tests { use super::*; + use arrow::array::{ + GenericListArray, NullBufferBuilder, OffsetSizeTrait, StringArray, + }; + use arrow::buffer::{NullBuffer, OffsetBuffer}; use arrow::datatypes::{Field, Int32Type}; - use arrow_array::{GenericListArray, OffsetSizeTrait, StringArray}; - use arrow_buffer::{NullBuffer, NullBufferBuilder, OffsetBuffer}; use datafusion_common::assert_batches_eq; // Create a GenericListArray with the following list values: From d6dfddc3dc0b1f2becbf72a5a8fa1b3d1b524428 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 5 Feb 2025 05:26:05 -0500 Subject: [PATCH 02/10] Remove arrow-buffer in common --- datafusion/common/Cargo.toml | 1 - datafusion/common/src/hash_utils.rs | 2 +- datafusion/common/src/scalar/mod.rs | 3 ++- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/datafusion/common/Cargo.toml b/datafusion/common/Cargo.toml index fe6d652be700..1050b376be8a 100644 --- a/datafusion/common/Cargo.toml +++ b/datafusion/common/Cargo.toml @@ -52,7 +52,6 @@ apache-avro = { version = "0.17", default-features = false, features = [ ], optional = true } arrow = { workspace = true } arrow-array = { workspace = true } -arrow-buffer = { workspace = true } arrow-ipc = { workspace = true } arrow-schema = { workspace = true } base64 = "0.22.1" diff --git a/datafusion/common/src/hash_utils.rs b/datafusion/common/src/hash_utils.rs index 37b4cecc005c..e78d42257b9c 100644 --- a/datafusion/common/src/hash_utils.rs +++ b/datafusion/common/src/hash_utils.rs @@ -21,11 +21,11 @@ use std::sync::Arc; use ahash::RandomState; +use arrow::array::types::{IntervalDayTime, IntervalMonthDayNano}; use arrow::array::*; use arrow::datatypes::*; #[cfg(not(feature = "force_hash_collisions"))] use arrow::{downcast_dictionary_array, downcast_primitive_array}; -use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano}; #[cfg(not(feature = "force_hash_collisions"))] use crate::cast::{ diff --git a/datafusion/common/src/scalar/mod.rs b/datafusion/common/src/scalar/mod.rs index 21b52e019880..6cd6a43941c8 100644 --- a/datafusion/common/src/scalar/mod.rs +++ b/datafusion/common/src/scalar/mod.rs @@ -40,6 +40,8 @@ use crate::cast::{ use crate::error::{DataFusionError, Result, _exec_err, _internal_err, _not_impl_err}; use crate::hash_utils::create_hashes; use crate::utils::SingleRowListArrayBuilder; +use arrow::array::types::{IntervalDayTime, IntervalMonthDayNano}; +use arrow::buffer::ScalarBuffer; use arrow::compute::kernels::numeric::*; use arrow::util::display::{array_value_to_string, ArrayFormatter, FormatOptions}; use arrow::{ @@ -54,7 +56,6 @@ use arrow::{ UInt16Type, UInt32Type, UInt64Type, UInt8Type, DECIMAL128_MAX_PRECISION, }, }; -use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano, ScalarBuffer}; use arrow_schema::{UnionFields, UnionMode}; use crate::format::DEFAULT_CAST_OPTIONS; From b2711ad44bdd3a1ef0d52516ec69c80d2879fcf6 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 5 Feb 2025 05:28:12 -0500 Subject: [PATCH 03/10] Remove dependency in core --- datafusion/core/Cargo.toml | 1 - datafusion/core/tests/expr_api/simplification.rs | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml index b708c18f5b75..de36c5925e0b 100644 --- a/datafusion/core/Cargo.toml +++ b/datafusion/core/Cargo.toml @@ -136,7 +136,6 @@ xz2 = { version = "0.1", optional = true, features = ["static"] } zstd = { version = "0.13", optional = true, default-features = false } [dev-dependencies] -arrow-buffer = { workspace = true } async-trait = { workspace = true } criterion = { version = "0.5", features = ["async_tokio"] } ctor = { workspace = true } diff --git a/datafusion/core/tests/expr_api/simplification.rs b/datafusion/core/tests/expr_api/simplification.rs index 1e6ff8088d0a..246898995315 100644 --- a/datafusion/core/tests/expr_api/simplification.rs +++ b/datafusion/core/tests/expr_api/simplification.rs @@ -19,7 +19,7 @@ use arrow::datatypes::{DataType, Field, Schema}; use arrow_array::{ArrayRef, Int32Array}; -use arrow_buffer::IntervalDayTime; +use arrow::array::types::IntervalDayTime; use chrono::{DateTime, TimeZone, Utc}; use datafusion::{error::Result, execution::context::ExecutionProps, prelude::*}; use datafusion_common::cast::as_int32_array; From 6b0d7084deb12c16840c4505b90e672ad16bc7a8 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 5 Feb 2025 05:45:36 -0500 Subject: [PATCH 04/10] remove another ne --- datafusion/functions-aggregate/Cargo.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/datafusion/functions-aggregate/Cargo.toml b/datafusion/functions-aggregate/Cargo.toml index 333f0d9cdd79..78e22011b61a 100644 --- a/datafusion/functions-aggregate/Cargo.toml +++ b/datafusion/functions-aggregate/Cargo.toml @@ -40,7 +40,6 @@ path = "src/lib.rs" [dependencies] ahash = { workspace = true } arrow = { workspace = true } -arrow-buffer = { workspace = true } arrow-schema = { workspace = true } datafusion-common = { workspace = true } datafusion-doc = { workspace = true } From 34bf128a2b500af2919492d7382691820c92584e Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 5 Feb 2025 05:46:30 -0500 Subject: [PATCH 05/10] remove from functions-nested --- datafusion/core/tests/expr_api/simplification.rs | 2 +- datafusion/functions-nested/Cargo.toml | 1 - datafusion/functions-nested/src/map.rs | 3 ++- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/datafusion/core/tests/expr_api/simplification.rs b/datafusion/core/tests/expr_api/simplification.rs index 246898995315..76df4a1f1105 100644 --- a/datafusion/core/tests/expr_api/simplification.rs +++ b/datafusion/core/tests/expr_api/simplification.rs @@ -17,9 +17,9 @@ //! This program demonstrates the DataFusion expression simplification API. +use arrow::array::types::IntervalDayTime; use arrow::datatypes::{DataType, Field, Schema}; use arrow_array::{ArrayRef, Int32Array}; -use arrow::array::types::IntervalDayTime; use chrono::{DateTime, TimeZone, Utc}; use datafusion::{error::Result, execution::context::ExecutionProps, prelude::*}; use datafusion_common::cast::as_int32_array; diff --git a/datafusion/functions-nested/Cargo.toml b/datafusion/functions-nested/Cargo.toml index e7254e4125cb..01fbc73cba12 100644 --- a/datafusion/functions-nested/Cargo.toml +++ b/datafusion/functions-nested/Cargo.toml @@ -42,7 +42,6 @@ path = "src/lib.rs" [dependencies] arrow = { workspace = true } arrow-array = { workspace = true } -arrow-buffer = { workspace = true } arrow-ord = { workspace = true } arrow-schema = { workspace = true } datafusion-common = { workspace = true } diff --git a/datafusion/functions-nested/src/map.rs b/datafusion/functions-nested/src/map.rs index d484cc834262..8c78de68f86e 100644 --- a/datafusion/functions-nested/src/map.rs +++ b/datafusion/functions-nested/src/map.rs @@ -20,8 +20,9 @@ use std::collections::VecDeque; use std::sync::Arc; use arrow::array::ArrayData; +use arrow::buffer::Buffer; +use arrow::datatypes::ToByteSlice; use arrow_array::{Array, ArrayRef, MapArray, OffsetSizeTrait, StructArray}; -use arrow_buffer::{Buffer, ToByteSlice}; use arrow_schema::{DataType, Field, SchemaBuilder}; use datafusion_common::utils::{fixed_size_list_to_arrays, list_to_arrays}; From 3b339b41925c75d3236724fc3966e3c354f91fc9 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 5 Feb 2025 05:47:28 -0500 Subject: [PATCH 06/10] remove from physical-expr --- datafusion/physical-expr/Cargo.toml | 1 - datafusion/physical-expr/src/expressions/in_list.rs | 2 +- datafusion/physical-expr/src/intervals/cp_solver.rs | 2 +- datafusion/physical-expr/src/intervals/utils.rs | 2 +- 4 files changed, 3 insertions(+), 4 deletions(-) diff --git a/datafusion/physical-expr/Cargo.toml b/datafusion/physical-expr/Cargo.toml index 5e0832673697..d93a402db318 100644 --- a/datafusion/physical-expr/Cargo.toml +++ b/datafusion/physical-expr/Cargo.toml @@ -39,7 +39,6 @@ path = "src/lib.rs" ahash = { workspace = true } arrow = { workspace = true } arrow-array = { workspace = true } -arrow-buffer = { workspace = true } arrow-schema = { workspace = true } datafusion-common = { workspace = true, default-features = true } datafusion-expr = { workspace = true } diff --git a/datafusion/physical-expr/src/expressions/in_list.rs b/datafusion/physical-expr/src/expressions/in_list.rs index 29577740aab4..dfe9a905dfea 100644 --- a/datafusion/physical-expr/src/expressions/in_list.rs +++ b/datafusion/physical-expr/src/expressions/in_list.rs @@ -25,6 +25,7 @@ use std::sync::Arc; use crate::physical_expr::physical_exprs_bag_equal; use crate::PhysicalExpr; +use arrow::array::types::{IntervalDayTime, IntervalMonthDayNano}; use arrow::array::*; use arrow::buffer::BooleanBuffer; use arrow::compute::kernels::boolean::{not, or_kleene}; @@ -32,7 +33,6 @@ use arrow::compute::take; use arrow::datatypes::*; use arrow::util::bit_iterator::BitIndexIterator; use arrow::{downcast_dictionary_array, downcast_primitive_array}; -use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano}; use datafusion_common::cast::{ as_boolean_array, as_generic_binary_array, as_string_array, }; diff --git a/datafusion/physical-expr/src/intervals/cp_solver.rs b/datafusion/physical-expr/src/intervals/cp_solver.rs index f5a83c58deec..166d2564fdf3 100644 --- a/datafusion/physical-expr/src/intervals/cp_solver.rs +++ b/datafusion/physical-expr/src/intervals/cp_solver.rs @@ -722,8 +722,8 @@ mod tests { use crate::expressions::{BinaryExpr, Column}; use crate::intervals::test_utils::gen_conjunctive_numerical_expr; + use arrow::array::types::{IntervalDayTime, IntervalMonthDayNano}; use arrow::datatypes::TimeUnit; - use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano}; use arrow_schema::Field; use datafusion_common::ScalarValue; diff --git a/datafusion/physical-expr/src/intervals/utils.rs b/datafusion/physical-expr/src/intervals/utils.rs index 496db7b454df..56af8238c04e 100644 --- a/datafusion/physical-expr/src/intervals/utils.rs +++ b/datafusion/physical-expr/src/intervals/utils.rs @@ -24,7 +24,7 @@ use crate::{ PhysicalExpr, }; -use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano}; +use arrow::array::types::{IntervalDayTime, IntervalMonthDayNano}; use arrow_schema::{DataType, SchemaRef}; use datafusion_common::{internal_err, Result, ScalarValue}; use datafusion_expr::interval_arithmetic::Interval; From 469979a4a8f30aa7c8624ab890ddfdc7a7159bfc Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 5 Feb 2025 05:47:59 -0500 Subject: [PATCH 07/10] remove from physical-expr-common --- datafusion/physical-expr-common/Cargo.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/datafusion/physical-expr-common/Cargo.toml b/datafusion/physical-expr-common/Cargo.toml index 00ddb11cc42d..14d6ca64d15e 100644 --- a/datafusion/physical-expr-common/Cargo.toml +++ b/datafusion/physical-expr-common/Cargo.toml @@ -38,7 +38,6 @@ path = "src/lib.rs" [dependencies] ahash = { workspace = true } arrow = { workspace = true } -arrow-buffer = { workspace = true } datafusion-common = { workspace = true, default-features = true } datafusion-expr-common = { workspace = true } hashbrown = { workspace = true } From 920daa81e026dc6a6cd4da607c4edf0f691ec158 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 5 Feb 2025 05:51:34 -0500 Subject: [PATCH 08/10] Remove from physical-plan --- datafusion/physical-plan/Cargo.toml | 1 - .../src/aggregates/group_values/single_group_by/primitive.rs | 2 +- datafusion/physical-plan/src/aggregates/topk/hash_table.rs | 2 +- datafusion/physical-plan/src/aggregates/topk/heap.rs | 3 ++- datafusion/physical-plan/src/joins/stream_join_utils.rs | 2 +- datafusion/physical-plan/src/joins/symmetric_hash_join.rs | 3 +-- datafusion/physical-plan/src/joins/test_utils.rs | 2 +- datafusion/physical-plan/src/joins/utils.rs | 5 +++-- 8 files changed, 10 insertions(+), 10 deletions(-) diff --git a/datafusion/physical-plan/Cargo.toml b/datafusion/physical-plan/Cargo.toml index a72d19cda3b1..a002e3861f11 100644 --- a/datafusion/physical-plan/Cargo.toml +++ b/datafusion/physical-plan/Cargo.toml @@ -42,7 +42,6 @@ path = "src/lib.rs" ahash = { workspace = true } arrow = { workspace = true } arrow-array = { workspace = true } -arrow-buffer = { workspace = true } arrow-ord = { workspace = true } arrow-schema = { workspace = true } async-trait = { workspace = true } diff --git a/datafusion/physical-plan/src/aggregates/group_values/single_group_by/primitive.rs b/datafusion/physical-plan/src/aggregates/group_values/single_group_by/primitive.rs index f613e8d795e3..5a6235edb25a 100644 --- a/datafusion/physical-plan/src/aggregates/group_values/single_group_by/primitive.rs +++ b/datafusion/physical-plan/src/aggregates/group_values/single_group_by/primitive.rs @@ -17,13 +17,13 @@ use crate::aggregates::group_values::GroupValues; use ahash::RandomState; +use arrow::array::types::{IntervalDayTime, IntervalMonthDayNano}; use arrow::array::{ cast::AsArray, ArrayRef, ArrowNativeTypeOp, ArrowPrimitiveType, NullBufferBuilder, PrimitiveArray, }; use arrow::datatypes::i256; use arrow::record_batch::RecordBatch; -use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano}; use arrow_schema::DataType; use datafusion_common::Result; use datafusion_execution::memory_pool::proxy::VecAllocExt; diff --git a/datafusion/physical-plan/src/aggregates/topk/hash_table.rs b/datafusion/physical-plan/src/aggregates/topk/hash_table.rs index 23a07ebec305..514214858fa1 100644 --- a/datafusion/physical-plan/src/aggregates/topk/hash_table.rs +++ b/datafusion/physical-plan/src/aggregates/topk/hash_table.rs @@ -20,13 +20,13 @@ use crate::aggregates::group_values::HashValue; use crate::aggregates::topk::heap::Comparable; use ahash::RandomState; +use arrow::array::types::{IntervalDayTime, IntervalMonthDayNano}; use arrow::datatypes::i256; use arrow_array::builder::PrimitiveBuilder; use arrow_array::cast::AsArray; use arrow_array::{ downcast_primitive, Array, ArrayRef, ArrowPrimitiveType, PrimitiveArray, StringArray, }; -use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano}; use arrow_schema::DataType; use datafusion_common::DataFusionError; use datafusion_common::Result; diff --git a/datafusion/physical-plan/src/aggregates/topk/heap.rs b/datafusion/physical-plan/src/aggregates/topk/heap.rs index ec1277f8fd55..fc68df9b82ed 100644 --- a/datafusion/physical-plan/src/aggregates/topk/heap.rs +++ b/datafusion/physical-plan/src/aggregates/topk/heap.rs @@ -17,10 +17,11 @@ //! A custom binary heap implementation for performant top K aggregation +use arrow::array::types::{IntervalDayTime, IntervalMonthDayNano}; +use arrow::buffer::ScalarBuffer; use arrow::datatypes::i256; use arrow_array::cast::AsArray; use arrow_array::{downcast_primitive, ArrayRef, ArrowPrimitiveType, PrimitiveArray}; -use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano, ScalarBuffer}; use arrow_schema::DataType; use datafusion_common::DataFusionError; use datafusion_common::Result; diff --git a/datafusion/physical-plan/src/joins/stream_join_utils.rs b/datafusion/physical-plan/src/joins/stream_join_utils.rs index 00d9a6b0ae46..a3b3a37aa7ef 100644 --- a/datafusion/physical-plan/src/joins/stream_join_utils.rs +++ b/datafusion/physical-plan/src/joins/stream_join_utils.rs @@ -30,7 +30,7 @@ use arrow::array::{ ArrowPrimitiveType, BooleanBufferBuilder, NativeAdapter, PrimitiveArray, RecordBatch, }; use arrow::compute::concat_batches; -use arrow_buffer::ArrowNativeType; +use arrow::datatypes::ArrowNativeType; use arrow_schema::{Schema, SchemaRef}; use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode}; use datafusion_common::{ diff --git a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs index 59aab3395ea2..47af4ab9a765 100644 --- a/datafusion/physical-plan/src/joins/symmetric_hash_join.rs +++ b/datafusion/physical-plan/src/joins/symmetric_hash_join.rs @@ -63,9 +63,8 @@ use arrow::array::{ UInt64Array, }; use arrow::compute::concat_batches; -use arrow::datatypes::{Schema, SchemaRef}; +use arrow::datatypes::{ArrowNativeType, Schema, SchemaRef}; use arrow::record_batch::RecordBatch; -use arrow_buffer::ArrowNativeType; use datafusion_common::hash_utils::create_hashes; use datafusion_common::utils::bisect; use datafusion_common::{internal_err, plan_err, HashSet, JoinSide, JoinType, Result}; diff --git a/datafusion/physical-plan/src/joins/test_utils.rs b/datafusion/physical-plan/src/joins/test_utils.rs index 37d6c0aff850..0e3d03fcae94 100644 --- a/datafusion/physical-plan/src/joins/test_utils.rs +++ b/datafusion/physical-plan/src/joins/test_utils.rs @@ -27,12 +27,12 @@ use crate::memory::MemoryExec; use crate::repartition::RepartitionExec; use crate::{common, ExecutionPlan, ExecutionPlanProperties, Partitioning}; +use arrow::array::types::IntervalDayTime; use arrow::util::pretty::pretty_format_batches; use arrow_array::{ ArrayRef, Float64Array, Int32Array, IntervalDayTimeArray, RecordBatch, TimestampMillisecondArray, }; -use arrow_buffer::IntervalDayTime; use arrow_schema::{DataType, Schema}; use datafusion_common::{Result, ScalarValue}; use datafusion_execution::TaskContext; diff --git a/datafusion/physical-plan/src/joins/utils.rs b/datafusion/physical-plan/src/joins/utils.rs index 5327793d01e2..dbe90077bc8c 100644 --- a/datafusion/physical-plan/src/joins/utils.rs +++ b/datafusion/physical-plan/src/joins/utils.rs @@ -37,11 +37,12 @@ use arrow::array::{ UInt32Builder, UInt64Array, }; use arrow::compute; -use arrow::datatypes::{Field, Schema, SchemaBuilder, UInt32Type, UInt64Type}; +use arrow::datatypes::{ + ArrowNativeType, Field, Schema, SchemaBuilder, UInt32Type, UInt64Type, +}; use arrow::record_batch::{RecordBatch, RecordBatchOptions}; use arrow_array::builder::UInt64Builder; use arrow_array::{ArrowPrimitiveType, NativeAdapter, PrimitiveArray}; -use arrow_buffer::ArrowNativeType; use datafusion_common::cast::as_boolean_array; use datafusion_common::stats::Precision; use datafusion_common::tree_node::{Transformed, TransformedResult, TreeNode}; From f476a6454f511c0b34b9061665b345c1640118a7 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 5 Feb 2025 05:52:54 -0500 Subject: [PATCH 09/10] Remove from substrait --- datafusion/substrait/Cargo.toml | 1 - datafusion/substrait/src/logical_plan/consumer.rs | 8 +++++--- datafusion/substrait/src/logical_plan/producer.rs | 3 ++- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/datafusion/substrait/Cargo.toml b/datafusion/substrait/Cargo.toml index 226f65b983dd..f13d2b77a787 100644 --- a/datafusion/substrait/Cargo.toml +++ b/datafusion/substrait/Cargo.toml @@ -31,7 +31,6 @@ rust-version = { workspace = true } workspace = true [dependencies] -arrow-buffer = { workspace = true } async-recursion = "1.0" async-trait = { workspace = true } chrono = { workspace = true } diff --git a/datafusion/substrait/src/logical_plan/consumer.rs b/datafusion/substrait/src/logical_plan/consumer.rs index b17a8967e5bb..89112e3fe84e 100644 --- a/datafusion/substrait/src/logical_plan/consumer.rs +++ b/datafusion/substrait/src/logical_plan/consumer.rs @@ -15,7 +15,8 @@ // specific language governing permissions and limitations // under the License. -use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano, OffsetBuffer}; +use arrow::array::types::{IntervalDayTime, IntervalMonthDayNano}; +use arrow::buffer::OffsetBuffer; use async_recursion::async_recursion; use datafusion::arrow::array::MapArray; use datafusion::arrow::datatypes::{ @@ -67,7 +68,7 @@ use datafusion::logical_expr::{ }; use datafusion::prelude::{lit, JoinType}; use datafusion::{ - error::Result, logical_expr::utils::split_conjunction, prelude::Column, + arrow, error::Result, logical_expr::utils::split_conjunction, prelude::Column, scalar::ScalarValue, }; use std::collections::HashSet; @@ -3278,7 +3279,8 @@ mod test { from_substrait_literal_without_names, from_substrait_rex, DefaultSubstraitConsumer, }; - use arrow_buffer::IntervalMonthDayNano; + use arrow::array::types::IntervalMonthDayNano; + use datafusion::arrow; use datafusion::common::DFSchema; use datafusion::error::Result; use datafusion::execution::SessionState; diff --git a/datafusion/substrait/src/logical_plan/producer.rs b/datafusion/substrait/src/logical_plan/producer.rs index e501ddf5c698..42c226174932 100644 --- a/datafusion/substrait/src/logical_plan/producer.rs +++ b/datafusion/substrait/src/logical_plan/producer.rs @@ -2535,7 +2535,8 @@ mod test { from_substrait_named_struct, from_substrait_type_without_names, DefaultSubstraitConsumer, }; - use arrow_buffer::{IntervalDayTime, IntervalMonthDayNano}; + use arrow::array::types::{IntervalDayTime, IntervalMonthDayNano}; + use datafusion::arrow; use datafusion::arrow::array::{ GenericListArray, Int64Builder, MapBuilder, StringBuilder, }; From b736f298e08166c689e6cc329db4ceae9cd3e814 Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 5 Feb 2025 08:46:00 -0500 Subject: [PATCH 10/10] fix datafusion-cli/Cargo.lock --- datafusion-cli/Cargo.lock | 6 ------ 1 file changed, 6 deletions(-) diff --git a/datafusion-cli/Cargo.lock b/datafusion-cli/Cargo.lock index cbb4c29563dc..4c5c29965dc4 100644 --- a/datafusion-cli/Cargo.lock +++ b/datafusion-cli/Cargo.lock @@ -1309,7 +1309,6 @@ dependencies = [ "apache-avro", "arrow", "arrow-array", - "arrow-buffer", "arrow-ipc", "arrow-schema", "base64 0.22.1", @@ -1419,7 +1418,6 @@ version = "45.0.0" dependencies = [ "ahash", "arrow", - "arrow-buffer", "arrow-schema", "datafusion-common", "datafusion-doc", @@ -1451,7 +1449,6 @@ version = "45.0.0" dependencies = [ "arrow", "arrow-array", - "arrow-buffer", "arrow-ord", "arrow-schema", "datafusion-common", @@ -1537,7 +1534,6 @@ dependencies = [ "ahash", "arrow", "arrow-array", - "arrow-buffer", "arrow-schema", "datafusion-common", "datafusion-expr", @@ -1559,7 +1555,6 @@ version = "45.0.0" dependencies = [ "ahash", "arrow", - "arrow-buffer", "datafusion-common", "datafusion-expr-common", "hashbrown 0.14.5", @@ -1593,7 +1588,6 @@ dependencies = [ "ahash", "arrow", "arrow-array", - "arrow-buffer", "arrow-ord", "arrow-schema", "async-trait",