Skip to content

Commit

Permalink
avoid realloc; mutable schema dtype iter/assign
Browse files Browse the repository at this point in the history
  • Loading branch information
alexander-beedie committed Oct 27, 2023
1 parent f3e3a26 commit e0f2537
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 19 deletions.
7 changes: 7 additions & 0 deletions crates/polars-core/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,13 @@ impl Schema {
self.inner.iter().map(|(_name, dtype)| dtype)
}

/// Iterates over mut references to the dtypes in this schema
pub fn iter_dtypes_mut(
&mut self,
) -> impl Iterator<Item = &mut DataType> + '_ + ExactSizeIterator {
self.inner.iter_mut().map(|(_name, dtype)| dtype)
}

/// Iterates over references to the names in this schema
pub fn iter_names(&self) -> impl Iterator<Item = &SmartString> + '_ + ExactSizeIterator {
self.inner.iter().map(|(name, _dtype)| name)
Expand Down
32 changes: 13 additions & 19 deletions py-polars/src/dataframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,33 +54,26 @@ impl PyDataFrame {
schema_overrides_by_idx: Option<Vec<(usize, DataType)>>,
) -> PyResult<Self> {
// Object builder must be registered, this is done on import.
let inferred_schema =
let mut final_schema =
rows_to_schema_supertypes(&rows, infer_schema_length.map(|n| std::cmp::max(1, n)))
.map_err(PyPolarsErr::from)?;

// Replace inferred nulls with boolean and erase scale from inferred decimals.
let mut final_schema =
Schema::from_iter(
inferred_schema
.iter_fields()
.map(|mut fld| match fld.data_type() {
DataType::Null => {
fld.coerce(DataType::Boolean);
fld
},
DataType::Decimal(_, _) => {
fld.coerce(DataType::Decimal(None, None));
fld
},
_ => fld,
}),
);
for dtype in final_schema.iter_dtypes_mut() {
match dtype {
DataType::Null => *dtype = DataType::Boolean,
DataType::Decimal(_, _) => *dtype = DataType::Decimal(None, None),
_ => (),
}
}

// Integrate explicit/inferred schema.
if let Some(schema) = schema {
for (i, (name, dtype)) in schema.into_iter().enumerate() {
if let Some((name_, dtype_)) = final_schema.get_at_index_mut(i) {
*name_ = name;

// If user sets dtype unknown, we use the inferred datatype.
// If schema dtype is Unknown, overwrite with inferred datatype.
if !matches!(dtype, DataType::Unknown) {
*dtype_ = dtype;
}
Expand All @@ -89,7 +82,8 @@ impl PyDataFrame {
}
}
}
// Optional per-field overrides; supersede default/inferred dtypes.

// Optional per-field overrides; these supersede default/inferred dtypes.
if let Some(overrides) = schema_overrides_by_idx {
for (i, dtype) in overrides {
if let Some((_, dtype_)) = final_schema.get_at_index_mut(i) {
Expand Down

0 comments on commit e0f2537

Please sign in to comment.