diff --git a/datafusion/proto-common/proto/datafusion_common.proto b/datafusion/proto-common/proto/datafusion_common.proto index 1c2807f390bf..8e5d1283f838 100644 --- a/datafusion/proto-common/proto/datafusion_common.proto +++ b/datafusion/proto-common/proto/datafusion_common.proto @@ -108,8 +108,7 @@ message Field { // for complex data types like structs, unions repeated Field children = 4; map metadata = 5; - int64 dict_id = 6; - bool dict_ordered = 7; + bool dict_ordered = 6; } message Timestamp{ diff --git a/datafusion/proto-common/src/from_proto/mod.rs b/datafusion/proto-common/src/from_proto/mod.rs index b022e52b6a6f..93547efeb51e 100644 --- a/datafusion/proto-common/src/from_proto/mod.rs +++ b/datafusion/proto-common/src/from_proto/mod.rs @@ -320,21 +320,8 @@ impl TryFrom<&protobuf::Field> for Field { type Error = Error; fn try_from(field: &protobuf::Field) -> Result { let datatype = field.arrow_type.as_deref().required("arrow_type")?; - let field = if field.dict_id != 0 { - // https://github.com/apache/datafusion/issues/14173 - #[allow(deprecated)] - Self::new_dict( - field.name.as_str(), - datatype, - field.nullable, - field.dict_id, - field.dict_ordered, - ) - .with_metadata(field.metadata.clone()) - } else { - Self::new(field.name.as_str(), datatype, field.nullable) - .with_metadata(field.metadata.clone()) - }; + let field = Self::new(field.name.as_str(), datatype, field.nullable) + .with_metadata(field.metadata.clone()); Ok(field) } } @@ -436,36 +423,18 @@ impl TryFrom<&protobuf::ScalarValue> for ScalarValue { let id = dict_batch.id(); - let fields_using_this_dictionary = { - // See https://github.com/apache/datafusion/issues/14173 - #[allow(deprecated)] - schema.fields_with_dict_id(id) - }; + let record_batch = read_record_batch( + &buffer, + dict_batch.data().unwrap(), + Arc::new(schema.clone()), + &Default::default(), + None, + &message.version(), + )?; - let first_field = fields_using_this_dictionary.first().ok_or_else(|| { - Error::General("dictionary id not found in schema while deserializing ScalarValue::List".to_string()) - })?; + let values: ArrayRef = Arc::clone(record_batch.column(0)); - let values: ArrayRef = match first_field.data_type() { - DataType::Dictionary(_, ref value_type) => { - // Make a fake schema for the dictionary batch. - let value = value_type.as_ref().clone(); - let schema = Schema::new(vec![Field::new("", value, true)]); - // Read a single column - let record_batch = read_record_batch( - &buffer, - dict_batch.data().unwrap(), - Arc::new(schema), - &Default::default(), - None, - &message.version(), - )?; - Ok(Arc::clone(record_batch.column(0))) - } - _ => Err(Error::General("dictionary id not found in schema while deserializing ScalarValue::List".to_string())), - }?; - - Ok((id,values)) + Ok((id, values)) }).collect::>>()?; let record_batch = read_record_batch( diff --git a/datafusion/proto-common/src/generated/pbjson.rs b/datafusion/proto-common/src/generated/pbjson.rs index 40687de098c1..8c0a9041ba2c 100644 --- a/datafusion/proto-common/src/generated/pbjson.rs +++ b/datafusion/proto-common/src/generated/pbjson.rs @@ -3107,9 +3107,6 @@ impl serde::Serialize for Field { if !self.metadata.is_empty() { len += 1; } - if self.dict_id != 0 { - len += 1; - } if self.dict_ordered { len += 1; } @@ -3129,11 +3126,6 @@ impl serde::Serialize for Field { if !self.metadata.is_empty() { struct_ser.serialize_field("metadata", &self.metadata)?; } - if self.dict_id != 0 { - #[allow(clippy::needless_borrow)] - #[allow(clippy::needless_borrows_for_generic_args)] - struct_ser.serialize_field("dictId", ToString::to_string(&self.dict_id).as_str())?; - } if self.dict_ordered { struct_ser.serialize_field("dictOrdered", &self.dict_ordered)?; } @@ -3141,7 +3133,6 @@ impl serde::Serialize for Field { } } impl<'de> serde::Deserialize<'de> for Field { - #[allow(deprecated)] fn deserialize(deserializer: D) -> std::result::Result where D: serde::Deserializer<'de>, @@ -3153,8 +3144,6 @@ impl<'de> serde::Deserialize<'de> for Field { "nullable", "children", "metadata", - "dict_id", - "dictId", "dict_ordered", "dictOrdered", ]; @@ -3166,7 +3155,6 @@ impl<'de> serde::Deserialize<'de> for Field { Nullable, Children, Metadata, - DictId, DictOrdered, } impl<'de> serde::Deserialize<'de> for GeneratedField { @@ -3194,7 +3182,6 @@ impl<'de> serde::Deserialize<'de> for Field { "nullable" => Ok(GeneratedField::Nullable), "children" => Ok(GeneratedField::Children), "metadata" => Ok(GeneratedField::Metadata), - "dictId" | "dict_id" => Ok(GeneratedField::DictId), "dictOrdered" | "dict_ordered" => Ok(GeneratedField::DictOrdered), _ => Err(serde::de::Error::unknown_field(value, FIELDS)), } @@ -3220,7 +3207,6 @@ impl<'de> serde::Deserialize<'de> for Field { let mut nullable__ = None; let mut children__ = None; let mut metadata__ = None; - let mut dict_id__ = None; let mut dict_ordered__ = None; while let Some(k) = map_.next_key()? { match k { @@ -3256,14 +3242,6 @@ impl<'de> serde::Deserialize<'de> for Field { map_.next_value::>()? ); } - GeneratedField::DictId => { - if dict_id__.is_some() { - return Err(serde::de::Error::duplicate_field("dictId")); - } - dict_id__ = - Some(map_.next_value::<::pbjson::private::NumberDeserialize<_>>()?.0) - ; - } GeneratedField::DictOrdered => { if dict_ordered__.is_some() { return Err(serde::de::Error::duplicate_field("dictOrdered")); @@ -3278,7 +3256,6 @@ impl<'de> serde::Deserialize<'de> for Field { nullable: nullable__.unwrap_or_default(), children: children__.unwrap_or_default(), metadata: metadata__.unwrap_or_default(), - dict_id: dict_id__.unwrap_or_default(), dict_ordered: dict_ordered__.unwrap_or_default(), }) } diff --git a/datafusion/proto-common/src/generated/prost.rs b/datafusion/proto-common/src/generated/prost.rs index 9e4a1ecb6b09..db46b47efc1c 100644 --- a/datafusion/proto-common/src/generated/prost.rs +++ b/datafusion/proto-common/src/generated/prost.rs @@ -106,9 +106,7 @@ pub struct Field { ::prost::alloc::string::String, ::prost::alloc::string::String, >, - #[prost(int64, tag = "6")] - pub dict_id: i64, - #[prost(bool, tag = "7")] + #[prost(bool, tag = "6")] pub dict_ordered: bool, } #[derive(Clone, PartialEq, ::prost::Message)] diff --git a/datafusion/proto-common/src/to_proto/mod.rs b/datafusion/proto-common/src/to_proto/mod.rs index ced1865795aa..83c8e98cba97 100644 --- a/datafusion/proto-common/src/to_proto/mod.rs +++ b/datafusion/proto-common/src/to_proto/mod.rs @@ -97,9 +97,6 @@ impl TryFrom<&Field> for protobuf::Field { nullable: field.is_nullable(), children: Vec::new(), metadata: field.metadata().clone(), - #[allow(deprecated)] - // See https://github.com/apache/datafusion/issues/14173 to remove deprecated dict_id - dict_id: field.dict_id().unwrap_or(0), dict_ordered: field.dict_is_ordered().unwrap_or(false), }) } diff --git a/datafusion/proto/src/generated/datafusion_proto_common.rs b/datafusion/proto/src/generated/datafusion_proto_common.rs index 9e4a1ecb6b09..db46b47efc1c 100644 --- a/datafusion/proto/src/generated/datafusion_proto_common.rs +++ b/datafusion/proto/src/generated/datafusion_proto_common.rs @@ -106,9 +106,7 @@ pub struct Field { ::prost::alloc::string::String, ::prost::alloc::string::String, >, - #[prost(int64, tag = "6")] - pub dict_id: i64, - #[prost(bool, tag = "7")] + #[prost(bool, tag = "6")] pub dict_ordered: bool, } #[derive(Clone, PartialEq, ::prost::Message)] diff --git a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs index 9a60c4f3066d..9cc7514a0d33 100644 --- a/datafusion/proto/tests/cases/roundtrip_logical_plan.rs +++ b/datafusion/proto/tests/cases/roundtrip_logical_plan.rs @@ -1494,20 +1494,6 @@ fn round_trip_scalar_values_and_data_types() { Field::new("b", DataType::Boolean, false), ScalarValue::from(false), ) - .with_scalar( - Field::new( - "c", - DataType::Dictionary( - Box::new(DataType::UInt16), - Box::new(DataType::Utf8), - ), - false, - ), - ScalarValue::Dictionary( - Box::new(DataType::UInt16), - Box::new("value".into()), - ), - ) .build() .unwrap(), ScalarValue::try_from(&DataType::Struct(Fields::from(vec![ @@ -1518,25 +1504,6 @@ fn round_trip_scalar_values_and_data_types() { ScalarValue::try_from(&DataType::Struct(Fields::from(vec![ Field::new("a", DataType::Int32, true), Field::new("b", DataType::Boolean, false), - Field::new( - "c", - DataType::Dictionary( - Box::new(DataType::UInt16), - Box::new(DataType::Binary), - ), - false, - ), - Field::new( - "d", - DataType::new_list( - DataType::Dictionary( - Box::new(DataType::UInt16), - Box::new(DataType::Binary), - ), - false, - ), - false, - ), ]))) .unwrap(), ScalarValue::try_from(&DataType::Map( @@ -1815,45 +1782,6 @@ fn round_trip_datatype() { } } -// See https://github.com/apache/datafusion/issues/14173 to remove deprecated dict_id -#[allow(deprecated)] -#[test] -fn roundtrip_dict_id() -> Result<()> { - let dict_id = 42; - let field = Field::new( - "keys", - DataType::List(Arc::new(Field::new_dict( - "item", - DataType::Dictionary(Box::new(DataType::UInt16), Box::new(DataType::Utf8)), - true, - dict_id, - false, - ))), - false, - ); - let schema = Arc::new(Schema::new(vec![field])); - - // encode - let mut buf: Vec = vec![]; - let schema_proto: protobuf::Schema = schema.try_into().unwrap(); - schema_proto.encode(&mut buf).unwrap(); - - // decode - let schema_proto = protobuf::Schema::decode(buf.as_slice()).unwrap(); - let decoded: Schema = (&schema_proto).try_into()?; - - // assert - let keys = decoded.fields().iter().last().unwrap(); - match keys.data_type() { - DataType::List(field) => { - assert_eq!(field.dict_id(), Some(dict_id), "dict_id should be retained"); - } - _ => panic!("Invalid type"), - } - - Ok(()) -} - #[test] fn roundtrip_null_scalar_values() { let test_types = vec![ diff --git a/datafusion/sqllogictest/test_files/copy.slt b/datafusion/sqllogictest/test_files/copy.slt index cd0a38a5e007..7dd85b3ae2d8 100644 --- a/datafusion/sqllogictest/test_files/copy.slt +++ b/datafusion/sqllogictest/test_files/copy.slt @@ -558,7 +558,6 @@ select * from validate_arrow_file_dict; c foo d bar - # Copy from table to folder of json query I COPY source_table to 'test_files/scratch/copy/table_arrow' STORED AS ARROW; @@ -632,3 +631,4 @@ COPY source_table to '/tmp/table.parquet' (row_group_size 55 + 102); # Copy using execution.keep_partition_by_columns with an invalid value query error DataFusion error: Invalid or Unsupported Configuration: provided value for 'execution.keep_partition_by_columns' was not recognized: "invalid_value" COPY source_table to '/tmp/table.parquet' OPTIONS (execution.keep_partition_by_columns invalid_value); + diff --git a/datafusion/sqllogictest/test_files/regexp.slt b/datafusion/sqllogictest/test_files/regexp.slt index ce39434e6827..44ba61e877d9 100644 --- a/datafusion/sqllogictest/test_files/regexp.slt +++ b/datafusion/sqllogictest/test_files/regexp.slt @@ -477,8 +477,8 @@ SELECT 'foo\nbar\nbaz' ~ 'bar'; true statement error -Error during planning: Cannot infer common argument type for regex operation List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata -: {} }) ~ List(Field { name: "item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }) +Error during planning: Cannot infer common argument type for regex operation List(Field { name: "item", data_type: Int64, nullable: true, dict_is_ordered: false, metadata +: {} }) ~ List(Field { name: "item", data_type: Int64, nullable: true, dict_is_ordered: false, metadata: {} }) select [1,2] ~ [3]; query B