Skip to content

Commit

Permalink
Merge branch 'main' of https://github.com/pola-rs/polars
Browse files Browse the repository at this point in the history
  • Loading branch information
rben01 committed Aug 25, 2023
2 parents 9f433f8 + 5d1b28a commit 704bbf7
Show file tree
Hide file tree
Showing 394 changed files with 8,034 additions and 4,607 deletions.
2 changes: 1 addition & 1 deletion .github/deploy_manylinux.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ maturin publish \
# now compile polars with bigidx feature
sed -i 's/name = "polars"/name = "polars-u64-idx"/' pyproject.toml
# a brittle hack to insert the 'bigidx' feature
sed -i 's/"dynamic_groupby",/"dynamic_groupby",\n"bigidx",/' Cargo.toml
sed -i 's/"dynamic_group_by",/"dynamic_group_by",\n"bigidx",/' Cargo.toml

maturin publish \
--skip-existing \
Expand Down
19 changes: 19 additions & 0 deletions .github/workflows/clear-caches.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Clearing caches regularly takes care of Rust caches growing to problematic size over time

name: Clear caches

on:
schedule:
- cron: '0 4 * * MON'
workflow_dispatch:

jobs:
clear-caches:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3

- name: Clear all caches
run: gh cache delete --all
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
4 changes: 3 additions & 1 deletion .github/workflows/lint-py-polars.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ on:
pull_request:
paths:
- crates/**
- Cargo.toml
- py-polars/src/**
- py-polars/Cargo.toml
- .github/workflows/lint-py-polars.yml
Expand All @@ -12,6 +13,7 @@ on:
- main
paths:
- crates/**
- Cargo.toml
- py-polars/src/**
- py-polars/Cargo.toml
- .github/workflows/lint-py-polars.yml
Expand Down Expand Up @@ -43,4 +45,4 @@ jobs:
run: cargo fmt --all -- --check

- name: Run clippy
run: cargo clippy -- -D warnings
run: cargo clippy --locked -- -D warnings
2 changes: 1 addition & 1 deletion .github/workflows/release-python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ jobs:
run: |
sed -i 's/name = "polars"/name = "polars-u64-idx"/' py-polars/pyproject.toml
# A brittle hack to insert the 'bigidx' feature
sed -i 's/"dynamic_groupby",/"dynamic_groupby",\n"bigidx",/' py-polars/Cargo.toml
sed -i 's/"dynamic_group_by",/"dynamic_group_by",\n"bigidx",/' py-polars/Cargo.toml
- name: Publish wheel
uses: PyO3/maturin-action@v1
Expand Down
10 changes: 4 additions & 6 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,10 @@ xxhash-rust = { version = "0.8.6", features = ["xxh3"] }

[workspace.dependencies.arrow]
package = "arrow2"
# git = "https://github.com/jorgecarleitao/arrow2"
# git = "https://github.com/ritchie46/arrow2"
# rev = "9beabec8cfb5502582d31ab898fdd36e7af0873c"
# path = "../arrow2"
# branch = "duration_json"
version = "0.17.4"
git = "https://github.com/jorgecarleitao/arrow2"
rev = "ba6a882bc1542b0b899774b696ebea77482b5c31"
# branch = ""
# version = "0.17.4"
default-features = false
features = [
"compute_aggregate",
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ shape: (5, 8)
>>> # and continue in python
>>> lf = context.execute(query)
>>> (lf.join(other_table)
... .groupby("foo")
... .group_by("foo")
... .agg(
... pl.col("sum_v1").count()
... ).collect())
Expand Down Expand Up @@ -220,7 +220,7 @@ point to the `main` branch of this repo.
polars = { git = "https://github.com/pola-rs/polars", rev = "<optional git tag>" }
```

Required Rust version `>=1.62`
Required Rust version `>=1.65`.

## Contributing

Expand Down
4 changes: 2 additions & 2 deletions crates/polars-algo/src/algo.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ pub fn hist(s: &Series, bins: Option<&Series>, bin_count: Option<usize>) -> Resu
DataType::UInt16 => (lit(u32::MIN), AnyValue::UInt16(u16::MAX)),
_ => polars_bail!(
InvalidOperation:
"cannot take histogram of non-numeric types; consider a groupby and count"
"cannot take histogram of non-numeric types; consider a group_by and count"
),
};
let mut bins = bins.extend_constant(max_value, 1)?;
Expand Down Expand Up @@ -92,7 +92,7 @@ pub fn hist(s: &Series, bins: Option<&Series>, bin_count: Option<usize>) -> Resu

let out = out
.select(["category", s.name()])?
.groupby(["category"])?
.group_by(["category"])?
.count()?;

cuts.left_join(&out, [category_str], [category_str])?
Expand Down
8 changes: 4 additions & 4 deletions crates/polars-arrow/src/array/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ macro_rules! iter_to_values {

pub trait ListFromIter {
/// Create a list-array from an iterator.
/// Used in groupby agg-list
/// Used in group_by agg-list
///
/// # Safety
/// Will produce incorrect arrays if size hint is incorrect.
Expand Down Expand Up @@ -136,7 +136,7 @@ pub trait ListFromIter {
}

/// Create a list-array from an iterator.
/// Used in groupby agg-list
/// Used in group_by agg-list
///
/// # Safety
/// Will produce incorrect arrays if size hint is incorrect.
Expand Down Expand Up @@ -166,7 +166,7 @@ pub trait ListFromIter {
}

/// Create a list-array from an iterator.
/// Used in groupby agg-list
/// Used in group_by agg-list
///
/// # Safety
/// Will produce incorrect arrays if size hint is incorrect.
Expand Down Expand Up @@ -212,7 +212,7 @@ pub trait ListFromIter {
}

/// Create a list-array from an iterator.
/// Used in groupby agg-list
/// Used in group_by agg-list
///
/// # Safety
/// Will produce incorrect arrays if size hint is incorrect.
Expand Down
14 changes: 14 additions & 0 deletions crates/polars-arrow/src/kernels/atan2.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
use arrow::array::PrimitiveArray;
use arrow::compute::arity::binary;
use arrow::types::NativeType;
use num_traits::Float;

pub fn atan2<T: NativeType>(
arr_1: &PrimitiveArray<T>,
arr_2: &PrimitiveArray<T>,
) -> PrimitiveArray<T>
where
T: Float,
{
binary(arr_1, arr_2, arr_1.data_type().clone(), |a, b| a.atan2(b))
}
1 change: 1 addition & 0 deletions crates/polars-arrow/src/kernels/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use arrow::array::BooleanArray;
use arrow::bitmap::utils::BitChunks;
#[cfg(feature = "simd")]
pub mod agg_mean;
pub mod atan2;
#[cfg(feature = "dtype-array")]
pub mod comparison;
pub mod concatenate;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ macro_rules! minmax_window {
};
let empty_overlap = old_last_end <= start;

if entering.is_some_and(|em| $new_is_m(&self.m, em.1) || empty_overlap) {
if entering.map(|em| $new_is_m(&self.m, em.1) || empty_overlap) == Some(true) {
// The entering extremum "beats" the previous extremum so we can ignore the overlap
self.update_m_and_m_idx(entering.unwrap());
return self.m;
Expand Down
3 changes: 2 additions & 1 deletion crates/polars-arrow/src/kernels/rolling/window.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ impl<'a, T: NativeType + IsFloat + PartialOrd> SortedBuf<'a, T> {
if start >= self.last_end {
self.buf.clear();
let new_window = self.slice.get_unchecked(start..end);
self.buf.extend_from_slice(new_window)
self.buf.extend_from_slice(new_window);
sort_buf(&mut self.buf);
} else {
// remove elements that should leave the window
for idx in self.last_start..start {
Expand Down
29 changes: 6 additions & 23 deletions crates/polars-arrow/src/kernels/time.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ use arrow::temporal_conversions::{
};
use chrono::{LocalResult, NaiveDateTime, TimeZone};
use chrono_tz::Tz;
use polars_error::polars_bail;

use crate::error::PolarsResult;

Expand Down Expand Up @@ -37,55 +36,39 @@ fn convert_to_naive_local(
}
}

fn convert_to_timestamp(
from_tz: Tz,
to_tz: Tz,
pub fn replace_time_zone(
arr: &PrimitiveArray<i64>,
tu: TimeUnit,
from_tz: &Tz,
to_tz: &Tz,
use_earliest: Option<bool>,
) -> PolarsResult<PrimitiveArray<i64>> {
let res = match tu {
TimeUnit::Millisecond => try_unary(
arr,
|value| {
let ndt = timestamp_ms_to_datetime(value);
Ok(convert_to_naive_local(&from_tz, &to_tz, ndt, use_earliest)?.timestamp_millis())
Ok(convert_to_naive_local(from_tz, to_tz, ndt, use_earliest)?.timestamp_millis())
},
ArrowDataType::Int64,
),
TimeUnit::Microsecond => try_unary(
arr,
|value| {
let ndt = timestamp_us_to_datetime(value);
Ok(convert_to_naive_local(&from_tz, &to_tz, ndt, use_earliest)?.timestamp_micros())
Ok(convert_to_naive_local(from_tz, to_tz, ndt, use_earliest)?.timestamp_micros())
},
ArrowDataType::Int64,
),
TimeUnit::Nanosecond => try_unary(
arr,
|value| {
let ndt = timestamp_ns_to_datetime(value);
Ok(convert_to_naive_local(&from_tz, &to_tz, ndt, use_earliest)?.timestamp_nanos())
Ok(convert_to_naive_local(from_tz, to_tz, ndt, use_earliest)?.timestamp_nanos())
},
ArrowDataType::Int64,
),
_ => unreachable!(),
};
Ok(res?)
}

pub fn replace_time_zone(
arr: &PrimitiveArray<i64>,
tu: TimeUnit,
from: &str,
to: &str,
use_earliest: Option<bool>,
) -> PolarsResult<PrimitiveArray<i64>> {
match from.parse::<chrono_tz::Tz>() {
Ok(from_tz) => match to.parse::<chrono_tz::Tz>() {
Ok(to_tz) => convert_to_timestamp(from_tz, to_tz, arr, tu, use_earliest),
Err(_) => polars_bail!(ComputeError: "unable to parse time zone: '{}'", to),
},
Err(_) => polars_bail!(ComputeError: "unable to parse time zone: '{}'", from),
}
}
10 changes: 4 additions & 6 deletions crates/polars-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ lazy = []

# ~40% faster collect, needed until trustedlength iter stabilizes
# more fast paths, slower compilation
performant = ["polars-arrow/performant"]
performant = ["polars-arrow/performant", "reinterpret"]

# extra utilities for Utf8Chunked
strings = ["regex", "polars-arrow/strings", "arrow/compute_substring", "polars-error/regex"]
Expand All @@ -77,7 +77,6 @@ sort_multiple = []
rows = []

# operations
is_in = []
zip_with = []
round_series = []
checked_arithmetic = []
Expand All @@ -92,8 +91,8 @@ row_hash = []
reinterpret = []
take_opt_iter = []
mode = []
# allow groupby operation on list type
groupby_list = []
# allow group_by operation on list type
group_by_list = []
# cumsum, cummin, etc.
cum_agg = []
# rolling window functions
Expand All @@ -114,7 +113,7 @@ semi_anti_join = []
chunked_ids = []
describe = []
timezones = ["chrono-tz", "arrow/chrono-tz", "polars-arrow/timezones"]
dynamic_groupby = ["dtype-datetime", "dtype-date"]
dynamic_group_by = ["dtype-datetime", "dtype-date"]

# opt-in datatypes for Series
dtype-date = ["temporal"]
Expand All @@ -141,7 +140,6 @@ serde-lazy = ["serde", "polars-arrow/serde", "indexmap/serde", "smartstring/serd

docs-selection = [
"ndarray",
"is_in",
"rows",
"docs",
"strings",
Expand Down
1 change: 1 addition & 0 deletions crates/polars-core/src/chunked_array/arithmetic/decimal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use polars_arrow::compute::arithmetics::decimal;

use super::*;
use crate::prelude::DecimalChunked;
use crate::utils::align_chunks_binary;

// TODO: remove
impl ArrayArithmetics for i128 {
Expand Down
18 changes: 4 additions & 14 deletions crates/polars-core/src/chunked_array/arithmetic/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ use polars_arrow::utils::combine_validities_and;

use crate::prelude::*;
use crate::series::IsSorted;
use crate::utils::{align_chunks_binary, align_chunks_binary_owned};
use crate::utils::align_chunks_binary_owned;

pub trait ArrayArithmetics
where
Expand Down Expand Up @@ -148,12 +148,7 @@ impl Add for &BinaryChunked {
};
}

let (lhs, rhs) = align_chunks_binary(self, rhs);
let chunks = lhs
.downcast_iter()
.zip(rhs.downcast_iter())
.map(|(a, b)| concat_binary(a, b));
ChunkedArray::from_chunk_iter(self.name(), chunks)
arity::binary(self, rhs, concat_binary)
}
}

Expand Down Expand Up @@ -194,20 +189,15 @@ impl Add for &BooleanChunked {
if rhs.len() == 1 {
let rhs = rhs.get(0);
return match rhs {
Some(rhs) => self.apply_cast_numeric(|v| v as IdxSize + rhs as IdxSize),
Some(rhs) => self.apply_values_generic(|v| v as IdxSize + rhs as IdxSize),
None => IdxCa::full_null(self.name(), self.len()),
};
}
// Broadcasting path lhs.
if self.len() == 1 {
return rhs.add(self);
}
let (lhs, rhs) = align_chunks_binary(self, rhs);
let chunks = lhs
.downcast_iter()
.zip(rhs.downcast_iter())
.map(|(a, b)| add_boolean(a, b));
ChunkedArray::from_chunk_iter(self.name(), chunks)
arity::binary(self, rhs, add_boolean)
}
}

Expand Down
Loading

0 comments on commit 704bbf7

Please sign in to comment.