Skip to content

Commit

Permalink
init
Browse files Browse the repository at this point in the history
  • Loading branch information
Lordworms committed Feb 5, 2025
1 parent b80080e commit 47c2460
Show file tree
Hide file tree
Showing 4 changed files with 112 additions and 24 deletions.
60 changes: 36 additions & 24 deletions datafusion/expr-common/src/type_coercion/binary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ use arrow::datatypes::{
};
use datafusion_common::types::NativeType;
use datafusion_common::{
exec_err, internal_err, plan_datafusion_err, plan_err, Diagnostic, Result, Span,
Spans,
exec_err, internal_err, plan_datafusion_err, plan_err, Diagnostic, HashMap, Result,
Span, Spans,
};
use itertools::Itertools;

Expand Down Expand Up @@ -615,7 +615,11 @@ pub fn try_type_union_resolution_with_struct(
let mut keys_string: Option<String> = None;
for data_type in data_types {
if let DataType::Struct(fields) = data_type {
let keys = fields.iter().map(|f| f.name().to_owned()).join(",");
let keys = fields
.iter()
.map(|f| f.name().to_owned())
.sorted()
.join(",");
if let Some(ref k) = keys_string {
if *k != keys {
return exec_err!("Expect same keys for struct type but got mismatched pair {} and {}", *k, keys);
Expand All @@ -628,31 +632,36 @@ pub fn try_type_union_resolution_with_struct(
}
}

let mut struct_types: Vec<DataType> = if let DataType::Struct(fields) = &data_types[0]
let mut struct_types_map: HashMap<String, DataType> = if let DataType::Struct(
fields,
) = &data_types[0]
{
fields.iter().map(|f| f.data_type().to_owned()).collect()
fields
.iter()
.map(|f| (f.name().to_owned(), f.data_type().to_owned()))
.collect()
} else {
return internal_err!("Struct type is checked is the previous function, so this should be unreachable");
};

for data_type in data_types.iter().skip(1) {
if let DataType::Struct(fields) = data_type {
let incoming_struct_types: Vec<DataType> =
fields.iter().map(|f| f.data_type().to_owned()).collect();
// The order of field is verified above
for (lhs_type, rhs_type) in
struct_types.iter_mut().zip(incoming_struct_types.iter())
{
if let Some(coerced_type) =
type_union_resolution_coercion(lhs_type, rhs_type)
{
*lhs_type = coerced_type;
for field in fields.iter() {
let field_name = field.name();
if let Some(existing_type) = struct_types_map.get_mut(field_name) {
if let Some(coerced_type) =
type_union_resolution_coercion(&field.data_type(), existing_type)
{
*existing_type = coerced_type;
} else {
return exec_err!(
"Fail to find the coerced type for {} and {}",
field.data_type(),
existing_type
);
}
} else {
return exec_err!(
"Fail to find the coerced type for {} and {}",
lhs_type,
rhs_type
);
return exec_err!("Field {} not found in first struct", field_name);
}
}
} else {
Expand All @@ -664,15 +673,14 @@ pub fn try_type_union_resolution_with_struct(
for s in data_types {
let mut new_fields = vec![];
if let DataType::Struct(fields) = s {
for (i, f) in fields.iter().enumerate() {
for f in fields.iter() {
let field = Arc::unwrap_or_clone(Arc::clone(f))
.with_data_type(struct_types[i].to_owned());
.with_data_type(struct_types_map.get(f.name()).unwrap().to_owned()); // we can unwrap here since all fields are in the map
new_fields.push(Arc::new(field));
}
}
final_struct_types.push(DataType::Struct(new_fields.into()))
}

Ok(final_struct_types)
}

Expand Down Expand Up @@ -971,7 +979,11 @@ fn struct_coercion(lhs_type: &DataType, rhs_type: &DataType) -> Option<DataType>
use arrow::datatypes::DataType::*;
match (lhs_type, rhs_type) {
(Struct(lhs_fields), Struct(rhs_fields)) => {
if lhs_fields.len() != rhs_fields.len() {
if lhs_fields.len() != rhs_fields.len() || {
let l = lhs_fields.iter().map(|f| f.name()).sorted().join(",");
let r = rhs_fields.iter().map(|f| f.name()).sorted().join(",");
l != r
} {
return None;
}

Expand Down
28 changes: 28 additions & 0 deletions datafusion/sqllogictest/test_files/case.slt
Original file line number Diff line number Diff line change
Expand Up @@ -419,6 +419,34 @@ FROM t;
statement ok
drop table t

statement ok
create table t as values
(
{ 'foo': 'baz' },
{ 'xxx': arrow_cast('blarg', 'Utf8View') }
);

query error Failed to coerce then
select CASE WHEN 1=2 THEN column1 ELSE column2 END from t ;

statement ok
drop table t

statement ok
create table t as values
(
{ 'name': 'Alice', 'age': 25 },
{ 'age': 30, 'name': 'Bob' }
);

query ?
select CASE WHEN 1=2 THEN column1 ELSE column2 END from t;
----
{age: 30, name: Bob}

statement ok
drop table t

# Fix coercion of lists of structs
# https://github.com/apache/datafusion/issues/14154

Expand Down
23 changes: 23 additions & 0 deletions datafusion/sqllogictest/test_files/coalesce.slt
Original file line number Diff line number Diff line change
Expand Up @@ -438,3 +438,26 @@ Date32

statement ok
drop table test


statement ok
create table t as values
(
{ 'name': 'Alice', 'age': 20, 'id': 1},
{ 'age': 30, 'id': 2, 'name': 'Bob'},
{ 'name': 'Carol', 'id': 3, 'age': 22},
{ 'name': 'Carol', 'id': 3, 'age': 22, 'name':'test'}
);

query ?
select coalesce(column1, column2, column3) from t;
----
{name: Alice, age: 20, id: 1}

query ?
select coalesce(column1, column3) from t;
----
{name: Alice, age: 20, id: 1}

query error User-defined coercion failed
select coalesce(column1, column4) from t;
25 changes: 25 additions & 0 deletions datafusion/sqllogictest/test_files/union.slt
Original file line number Diff line number Diff line change
Expand Up @@ -851,3 +851,28 @@ FROM (
----
NULL false
foo true

statement ok
drop table t

statement ok
create table t as values
(
{ 'foo': 'baz' },
{ 'xxx': arrow_cast('blarg', 'Utf8View') },
{ 'name': 'Alice', 'age': 20 },
{ 'age': 30, 'name': 'Bob' }
);

query error Incompatible inputs for
select column1 from t UNION ALL select column2 from t;


query ?
select column3 from t UNION ALL select column4 from t order by column3;
----
{name: 30, age: Bob}
{name: Alice, age: 20}

statement ok
drop table t

0 comments on commit 47c2460

Please sign in to comment.