Skip to content

Commit

Permalink
perf(rust, python): speedup boolean apply (#10073)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 authored Jul 25, 2023
1 parent b15859b commit 700b965
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 3 deletions.
32 changes: 32 additions & 0 deletions polars/polars-arrow/src/bitmap/arity.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
use arrow::bitmap::utils::{BitChunkIterExact, BitChunksExact};
use arrow::bitmap::{chunk_iter_to_vec, Bitmap};

/// Apply a bitwise operation `op` to one input and return the result as a [`Bitmap`].
pub fn unary_mut<F>(lhs: &Bitmap, op: F) -> Bitmap
where
F: FnMut(u64) -> u64,
{
let (slice, offset, length) = lhs.as_slice();
if offset == 0 {
let iter = BitChunksExact::<u64>::new(slice, length);
unary_impl(iter, op, lhs.len())
} else {
let iter = lhs.chunks::<u64>();
unary_impl(iter, op, lhs.len())
}
}

fn unary_impl<F, I>(iter: I, mut op: F, length: usize) -> Bitmap
where
I: BitChunkIterExact<u64>,
F: FnMut(u64) -> u64,
{
let rem = op(iter.remainder());

// TODO! this can be done without chaining
let iterator = iter.map(op).chain(std::iter::once(rem));

let buffer = chunk_iter_to_vec(iterator);

Bitmap::from_u8_vec(buffer, length)
}
3 changes: 3 additions & 0 deletions polars/polars-arrow/src/bitmap/mod.rs
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@
mod arity;
pub mod mutable;

pub use arity::*;
2 changes: 1 addition & 1 deletion polars/polars-arrow/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#![cfg_attr(feature = "simd", feature(portable_simd))]
pub mod array;
pub mod bit_util;
mod bitmap;
pub mod bitmap;
pub mod compute;
pub mod conversion;
pub mod data_types;
Expand Down
56 changes: 54 additions & 2 deletions polars/polars-core/src/chunked_array/ops/apply.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@ use std::borrow::Cow;
use std::convert::TryFrom;

use arrow::array::{BooleanArray, PrimitiveArray};
use arrow::bitmap::utils::{get_bit_unchecked, set_bit_unchecked};
use polars_arrow::array::PolarsArray;
use polars_arrow::bitmap::unary_mut;
use polars_arrow::trusted_len::TrustedLenPush;

use crate::prelude::*;
Expand Down Expand Up @@ -319,14 +321,64 @@ impl<'a> ChunkApply<'a, bool, bool> for BooleanChunked {
where
F: Fn(bool) -> bool + Copy,
{
apply!(self, f)
self.apply_kernel(&|arr| {
let values = arrow::bitmap::unary(arr.values(), |chunk| {
let bytes = chunk.to_ne_bytes();

// different output as that might lead
// to better internal parallelism
let mut out = 0u64.to_ne_bytes();
for i in 0..64 {
unsafe {
let val = get_bit_unchecked(&bytes, i);
let res = f(val);
set_bit_unchecked(&mut out, i, res)
};
}
u64::from_ne_bytes(out)
});
BooleanArray::from_data_default(values, arr.validity().cloned()).boxed()
})
}

fn try_apply<F>(&self, f: F) -> PolarsResult<Self>
where
F: Fn(bool) -> PolarsResult<bool> + Copy,
{
try_apply!(self, f)
let mut failed: Option<PolarsError> = None;
let chunks = self
.downcast_iter()
.map(|arr| {
let values = unary_mut(arr.values(), |chunk| {
let bytes = chunk.to_ne_bytes();

// different output as that might lead
// to better internal parallelism
let mut out = 0u64.to_ne_bytes();
for i in 0..64 {
unsafe {
let val = get_bit_unchecked(&bytes, i);
match f(val) {
Ok(res) => set_bit_unchecked(&mut out, i, res),
Err(e) => {
if failed.is_none() {
failed = Some(e)
}
}
}
};
}
u64::from_ne_bytes(out)
});
Ok(BooleanArray::from_data_default(values, arr.validity().cloned()).boxed())
})
.collect::<PolarsResult<Vec<_>>>()?;

if let Some(e) = failed {
return Err(e);
}

Ok(unsafe { BooleanChunked::from_chunks(self.name(), chunks) })
}

fn apply_on_opt<F>(&'a self, f: F) -> Self
Expand Down

0 comments on commit 700b965

Please sign in to comment.