Skip to content

Commit

Permalink
Merge branch 'main' of https://github.com/barak1412/polars into fix_j…
Browse files Browse the repository at this point in the history
…oin_nulls
  • Loading branch information
barak1412 committed Nov 8, 2024
2 parents e02aa32 + 3cdb7c2 commit c221925
Show file tree
Hide file tree
Showing 102 changed files with 1,266 additions and 823 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/lint-global.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@ jobs:
- name: Lint Markdown and TOML
uses: dprint/[email protected]
- name: Spell Check with Typos
uses: crate-ci/typos@v1.26.8
uses: crate-ci/typos@v1.27.2
1 change: 0 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions crates/polars-arrow/src/array/boolean/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -357,8 +357,8 @@ impl BooleanArray {
(dtype, values, validity)
}

/// Creates a `[BooleanArray]` from its internal representation.
/// This is the inverted from `[BooleanArray::into_inner]`
/// Creates a [`BooleanArray`] from its internal representation.
/// This is the inverted from [`BooleanArray::into_inner`]
///
/// # Safety
/// Callers must ensure all invariants of this struct are upheld.
Expand Down
8 changes: 4 additions & 4 deletions crates/polars-arrow/src/array/primitive/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -311,8 +311,8 @@ impl<T: NativeType> PrimitiveArray<T> {
(dtype, values, validity)
}

/// Creates a `[PrimitiveArray]` from its internal representation.
/// This is the inverted from `[PrimitiveArray::into_inner]`
/// Creates a [`PrimitiveArray`] from its internal representation.
/// This is the inverted from [`PrimitiveArray::into_inner`]
pub fn from_inner(
dtype: ArrowDataType,
values: Buffer<T>,
Expand All @@ -322,8 +322,8 @@ impl<T: NativeType> PrimitiveArray<T> {
Ok(unsafe { Self::from_inner_unchecked(dtype, values, validity) })
}

/// Creates a `[PrimitiveArray]` from its internal representation.
/// This is the inverted from `[PrimitiveArray::into_inner]`
/// Creates a [`PrimitiveArray`] from its internal representation.
/// This is the inverted from [`PrimitiveArray::into_inner`]
///
/// # Safety
/// Callers must ensure all invariants of this struct are upheld.
Expand Down
4 changes: 2 additions & 2 deletions crates/polars-arrow/src/bitmap/immutable.rs
Original file line number Diff line number Diff line change
Expand Up @@ -472,8 +472,8 @@ impl Bitmap {
}
}

/// Creates a `[Bitmap]` from its internal representation.
/// This is the inverted from `[Bitmap::into_inner]`
/// Creates a [`Bitmap`] from its internal representation.
/// This is the inverted from [`Bitmap::into_inner`]
///
/// # Safety
/// Callers must ensure all invariants of this struct are upheld.
Expand Down
Empty file.
9 changes: 9 additions & 0 deletions crates/polars-core/src/chunked_array/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,4 +81,13 @@ impl ArrayChunked {

ArrayChunked::try_from_chunk_iter(self.name().clone(), chunks)
}

/// Recurse nested types until we are at the leaf array.
pub fn get_leaf_array(&self) -> Series {
let mut current = self.get_inner();
while let Some(child_array) = current.try_array() {
current = child_array.get_inner();
}
current
}
}
9 changes: 9 additions & 0 deletions crates/polars-core/src/chunked_array/from_iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,15 @@ where
}
}

impl FromIterator<Option<Column>> for ListChunked {
fn from_iter<T: IntoIterator<Item = Option<Column>>>(iter: T) -> Self {
ListChunked::from_iter(
iter.into_iter()
.map(|c| c.map(|c| c.take_materialized_series())),
)
}
}

impl FromIterator<Option<Series>> for ListChunked {
#[inline]
fn from_iter<I: IntoIterator<Item = Option<Series>>>(iter: I) -> Self {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,7 @@ impl CategoricalChunked {
}
}

/// Create an `[Iterator]` that iterates over the `&str` values of the `[CategoricalChunked]`.
/// Create an [`Iterator`] that iterates over the `&str` values of the [`CategoricalChunked`].
pub fn iter_str(&self) -> CatIter<'_> {
let iter = self.physical().into_iter();
CatIter {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ unsafe fn any_as_u8_slice<T: Sized>(p: &T) -> &[u8] {
std::slice::from_raw_parts((p as *const T) as *const u8, size_of::<T>())
}

/// Create an extension Array that can be sent to arrow and (once wrapped in `[PolarsExtension]` will
/// Create an extension Array that can be sent to arrow and (once wrapped in [`PolarsExtension`] will
/// also call drop on `T`, when the array is dropped.
pub(crate) fn create_extension<I: Iterator<Item = Option<T>> + TrustedLen, T: Sized + Default>(
iter: I,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ impl PolarsExtension {
Self { array: Some(array) }
}

/// Take the Array hold by `[PolarsExtension]` and forget polars extension,
/// Take the Array hold by [`PolarsExtension`] and forget polars extension,
/// so that drop is not called
pub(crate) fn take_and_forget(self) -> FixedSizeBinaryArray {
let mut md = ManuallyDrop::new(self);
Expand Down Expand Up @@ -57,15 +57,15 @@ impl PolarsExtension {
}
}

/// Calls the heap allocated function in the `[ExtensionSentinel]` that knows
/// how to convert the `[FixedSizeBinaryArray]` to a `Series` of type `[ObjectChunked<T>]`
/// Calls the heap allocated function in the [`ExtensionSentinel`] that knows
/// how to convert the [`FixedSizeBinaryArray`] to a `Series` of type [`ObjectChunked<T>`]
pub(crate) unsafe fn get_series(&self, name: &PlSmallStr) -> Series {
self.with_sentinel(|sent| {
(sent.to_series_fn.as_ref().unwrap())(self.array.as_ref().unwrap(), name)
})
}

// heap allocates a function that converts the binary array to a Series of `[ObjectChunked<T>]`
// heap allocates a function that converts the binary array to a Series of [`ObjectChunked<T>`]
// the `name` will be the `name` of the output `Series` when this function is called (later).
pub(crate) unsafe fn set_to_series_fn<T: PolarsObject>(&mut self) {
let f = Box::new(move |arr: &FixedSizeBinaryArray, name: &PlSmallStr| {
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-core/src/chunked_array/ops/fill_null.rs
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ fn fill_with_gather<F: Fn(&Bitmap) -> Vec<IdxSize>>(

let idx = bits_to_idx(validity);

Ok(unsafe { s.take_unchecked_from_slice(&idx) })
Ok(unsafe { s.take_slice_unchecked(&idx) })
}

fn fill_forward_gather(s: &Series) -> PolarsResult<Series> {
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-core/src/chunked_array/struct_/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -380,7 +380,7 @@ impl StructChunked {
unsafe { DataFrame::new_no_checks(self.len(), columns) }
}

/// Get access to one of this `[StructChunked]`'s fields
/// Get access to one of this [`StructChunked`]'s fields
pub fn field_by_name(&self, name: &str) -> PolarsResult<Series> {
self.fields_as_series()
.into_iter()
Expand Down
69 changes: 3 additions & 66 deletions crates/polars-core/src/frame/column/arithmetic.rs
Original file line number Diff line number Diff line change
@@ -1,70 +1,7 @@
use num_traits::{Num, NumCast};
use polars_error::{polars_bail, PolarsResult};
use polars_error::PolarsResult;

use super::{Column, ScalarColumn, Series};
use crate::utils::Container;

fn output_length(a: &Column, b: &Column) -> PolarsResult<usize> {
match (a.len(), b.len()) {
// broadcasting
(1, o) | (o, 1) => Ok(o),
// equal
(a, b) if a == b => Ok(a),
// unequal
(a, b) => {
polars_bail!(InvalidOperation: "cannot do arithmetic operation on series of different lengths: got {} and {}", a, b)
},
}
}

fn unit_series_op<F: Fn(&Series, &Series) -> PolarsResult<Series>>(
l: &Series,
r: &Series,
op: F,
length: usize,
) -> PolarsResult<Column> {
debug_assert!(l.len() <= 1);
debug_assert!(r.len() <= 1);

op(l, r)
.map(|s| ScalarColumn::from_single_value_series(s, length))
.map(Column::from)
}

fn op_with_broadcast<F: Fn(&Series, &Series) -> PolarsResult<Series>>(
l: &Column,
r: &Column,
op: F,
) -> PolarsResult<Column> {
// Here we rely on the underlying broadcast operations.

let length = output_length(l, r)?;
match (l, r) {
(Column::Series(l), Column::Scalar(r)) => {
let r = r.as_single_value_series();
if l.len() == 1 {
unit_series_op(l, &r, op, length)
} else {
op(l, &r).map(Column::from)
}
},
(Column::Scalar(l), Column::Series(r)) => {
let l = l.as_single_value_series();
if r.len() == 1 {
unit_series_op(&l, r, op, length)
} else {
op(&l, r).map(Column::from)
}
},
(Column::Scalar(l), Column::Scalar(r)) => unit_series_op(
&l.as_single_value_series(),
&r.as_single_value_series(),
op,
length,
),
(l, r) => op(l.as_materialized_series(), r.as_materialized_series()).map(Column::from),
}
}

fn num_op_with_broadcast<T: Num + NumCast, F: Fn(&Series, T) -> Series>(
c: &'_ Column,
Expand All @@ -90,7 +27,7 @@ macro_rules! broadcastable_ops {

#[inline]
fn $op(self, rhs: Self) -> Self::Output {
op_with_broadcast(&self, &rhs, |l, r| l.$op(r))
self.try_apply_broadcasting_binary_elementwise(&rhs, |l, r| l.$op(r))
}
}

Expand All @@ -99,7 +36,7 @@ macro_rules! broadcastable_ops {

#[inline]
fn $op(self, rhs: Self) -> Self::Output {
op_with_broadcast(self, rhs, |l, r| l.$op(r))
self.try_apply_broadcasting_binary_elementwise(rhs, |l, r| l.$op(r))
}
}
)+
Expand Down
Loading

0 comments on commit c221925

Please sign in to comment.