Skip to content

Commit

Permalink
fix: use null type when read from unknown row (#12128)
Browse files Browse the repository at this point in the history
  • Loading branch information
reswqa authored Nov 1, 2023
1 parent 32b47a8 commit e469481
Show file tree
Hide file tree
Showing 6 changed files with 75 additions and 6 deletions.
6 changes: 5 additions & 1 deletion crates/polars-arrow/src/legacy/array/null.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use crate::array::{Array, MutableArray, NullArray};
use crate::bitmap::MutableBitmap;
use crate::datatypes::DataType;

#[derive(Debug, Default)]
#[derive(Debug, Default, Clone)]
pub struct MutableNullArray {
len: usize,
}
Expand Down Expand Up @@ -48,6 +48,10 @@ impl MutableArray for MutableNullArray {
}

impl MutableNullArray {
pub fn new(len: usize) -> Self {
MutableNullArray { len }
}

pub fn extend_nulls(&mut self, null_count: usize) {
self.len += null_count;
}
Expand Down
2 changes: 2 additions & 0 deletions crates/polars-core/src/chunked_array/builder/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ mod boolean;
#[cfg(feature = "dtype-array")]
pub mod fixed_size_list;
pub mod list;
mod null;
mod primitive;
mod utf8;

Expand All @@ -18,6 +19,7 @@ pub use boolean::*;
#[cfg(feature = "dtype-array")]
pub(crate) use fixed_size_list::*;
pub use list::*;
pub use null::*;
pub use primitive::*;
pub use utf8::*;

Expand Down
37 changes: 37 additions & 0 deletions crates/polars-core/src/chunked_array/builder/null.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
use arrow::legacy::array::null::MutableNullArray;

use super::*;
use crate::series::implementations::null::NullChunked;

#[derive(Clone)]
pub struct NullChunkedBuilder {
array_builder: MutableNullArray,
pub(crate) field: Field,
}

impl NullChunkedBuilder {
pub fn new(name: &str, len: usize) -> Self {
let array_builder = MutableNullArray::new(len);

NullChunkedBuilder {
array_builder,
field: Field::new(name, DataType::Null),
}
}

/// Appends a null slot into the builder
#[inline]
pub fn append_null(&mut self) {
self.array_builder.push_null()
}

pub fn finish(mut self) -> NullChunked {
let arr = self.array_builder.as_box();
let ca = NullChunked::new(Arc::from(self.field.name.as_str()), arr.len());
ca
}

pub fn shrink_to_fit(&mut self) {
self.array_builder.shrink_to_fit()
}
}
22 changes: 22 additions & 0 deletions crates/polars-core/src/frame/row/av_buffer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use polars_utils::unreachable_unchecked_release;
use smartstring::alias::String as SmartString;

use super::*;
use crate::chunked_array::builder::NullChunkedBuilder;
#[cfg(feature = "dtype-struct")]
use crate::prelude::any_value::arr_to_any_value;

Expand Down Expand Up @@ -38,6 +39,7 @@ pub enum AnyValueBuffer<'a> {
Float32(PrimitiveChunkedBuilder<Float32Type>),
Float64(PrimitiveChunkedBuilder<Float64Type>),
Utf8(Utf8ChunkedBuilder),
Null(NullChunkedBuilder),
All(DataType, Vec<AnyValue<'a>>),
}

Expand Down Expand Up @@ -107,6 +109,7 @@ impl<'a> AnyValueBuffer<'a> {
(Time(builder), AnyValue::Time(v)) => builder.append_value(v),
#[cfg(feature = "dtype-time")]
(Time(builder), AnyValue::Null) => builder.append_null(),
(Null(builder), AnyValue::Null) => builder.append_null(),
// Struct and List can be recursive so use anyvalues for that
(All(_, vals), v) => vals.push(v),

Expand Down Expand Up @@ -237,6 +240,11 @@ impl<'a> AnyValueBuffer<'a> {
std::mem::swap(&mut new, b);
new.finish().into_series()
},
Null(b) => {
let mut new = NullChunkedBuilder::new(b.field.name(), 0);
std::mem::swap(&mut new, b);
new.finish().into_series()
},
All(dtype, vals) => {
let out = Series::from_any_values_and_dtype("", vals, dtype, false).unwrap();
let mut new = Vec::with_capacity(capacity);
Expand Down Expand Up @@ -287,6 +295,7 @@ impl From<(&DataType, usize)> for AnyValueBuffer<'_> {
Float32 => AnyValueBuffer::Float32(PrimitiveChunkedBuilder::new("", len)),
Float64 => AnyValueBuffer::Float64(PrimitiveChunkedBuilder::new("", len)),
Utf8 => AnyValueBuffer::Utf8(Utf8ChunkedBuilder::new("", len, len * 5)),
Null => AnyValueBuffer::Null(NullChunkedBuilder::new("", 0)),
// Struct and List can be recursive so use anyvalues for that
dt => AnyValueBuffer::All(dt.clone(), Vec::with_capacity(len)),
}
Expand Down Expand Up @@ -315,6 +324,7 @@ pub enum AnyValueBufferTrusted<'a> {
#[cfg(feature = "dtype-struct")]
// not the trusted variant!
Struct(Vec<(AnyValueBuffer<'a>, SmartString)>),
Null(NullChunkedBuilder),
All(DataType, Vec<AnyValue<'a>>),
}

Expand Down Expand Up @@ -349,6 +359,7 @@ impl<'a> AnyValueBufferTrusted<'a> {
b.add(AnyValue::Null);
}
},
Null(builder) => builder.append_null(),
All(_, vals) => vals.push(AnyValue::Null),
}
}
Expand Down Expand Up @@ -427,6 +438,12 @@ impl<'a> AnyValueBufferTrusted<'a> {
};
builder.append_value(*v)
},
Null(builder) => {
let AnyValue::Null = val else {
unreachable_unchecked_release!()
};
builder.append_null()
},
_ => {
unreachable_unchecked_release!()
},
Expand Down Expand Up @@ -601,6 +618,11 @@ impl<'a> AnyValueBufferTrusted<'a> {
.collect::<Vec<_>>();
StructChunked::new("", &v).unwrap().into_series()
},
Null(b) => {
let mut new = NullChunkedBuilder::new(b.field.name(), 0);
std::mem::swap(&mut new, b);
new.finish().into_series()
},
All(dtype, vals) => {
let mut swap_vals = Vec::with_capacity(capacity);
std::mem::swap(vals, &mut swap_vals);
Expand Down
8 changes: 3 additions & 5 deletions py-polars/src/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,12 +58,10 @@ impl PyDataFrame {
rows_to_schema_supertypes(&rows, infer_schema_length.map(|n| std::cmp::max(1, n)))
.map_err(PyPolarsErr::from)?;

// Replace inferred nulls with boolean and erase scale from inferred decimals.
// Erase scale from inferred decimals.
for dtype in final_schema.iter_dtypes_mut() {
match dtype {
DataType::Null => *dtype = DataType::Boolean,
DataType::Decimal(_, _) => *dtype = DataType::Decimal(None, None),
_ => (),
if let DataType::Decimal(_, _) = dtype {
*dtype = DataType::Decimal(None, None)
}
}

Expand Down
6 changes: 6 additions & 0 deletions py-polars/tests/unit/dataframe/test_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -3593,3 +3593,9 @@ def test_interchange() -> None:
assert dfi.num_rows() == 2
assert dfi.get_column(0).dtype[1] == 64
assert dfi.get_column_by_name("c").get_buffers()["data"][0].bufsize == 6


def test_from_dicts_undeclared_column_dtype() -> None:
data = [{"a": 1, "b": 2}]
result = pl.from_dicts(data, schema=["x"])
assert result.schema == {"x": pl.Null}

0 comments on commit e469481

Please sign in to comment.