From 868b9c0d3f1f24ec4a5947971903a6885e2ce8d0 Mon Sep 17 00:00:00 2001 From: Stijn de Gooijer Date: Sat, 9 Sep 2023 21:35:15 +0200 Subject: [PATCH] Rename --- .../logical/categorical/merge.rs | 4 +- .../logical/categorical/string_cache.rs | 60 +++++++++++-------- crates/polars-io/src/csv/read.rs | 4 +- .../src/csv/read_impl/batched_mmap.rs | 4 +- .../src/csv/read_impl/batched_read.rs | 4 +- crates/polars-io/src/parquet/read_impl.rs | 2 +- .../src/physical_plan/expressions/window.rs | 2 +- .../src/logical_plan/functions/mod.rs | 4 +- crates/polars/tests/it/core/joins.rs | 6 +- py-polars/src/functions/string_cache.rs | 6 +- 10 files changed, 52 insertions(+), 44 deletions(-) diff --git a/crates/polars-core/src/chunked_array/logical/categorical/merge.rs b/crates/polars-core/src/chunked_array/logical/categorical/merge.rs index cf5dbf0c301d9..a5251097bf96f 100644 --- a/crates/polars-core/src/chunked_array/logical/categorical/merge.rs +++ b/crates/polars-core/src/chunked_array/logical/categorical/merge.rs @@ -153,13 +153,13 @@ impl CategoricalChunked { mod test { use super::*; use crate::chunked_array::categorical::CategoricalChunkedBuilder; - use crate::{disable_string_cache, enable_string_cache, IUseStringCache}; + use crate::{disable_string_cache, enable_string_cache, StringCacheHolder}; #[test] fn test_merge_rev_map() { let _lock = SINGLE_LOCK.lock(); disable_string_cache(); - let _sc = IUseStringCache::hold(); + let _sc = StringCacheHolder::hold(); let mut builder1 = CategoricalChunkedBuilder::new("foo", 10); let mut builder2 = CategoricalChunkedBuilder::new("foo", 10); diff --git a/crates/polars-core/src/chunked_array/logical/categorical/string_cache.rs b/crates/polars-core/src/chunked_array/logical/categorical/string_cache.rs index 1b7b41c5b7813..d6fbd9532829e 100644 --- a/crates/polars-core/src/chunked_array/logical/categorical/string_cache.rs +++ b/crates/polars-core/src/chunked_array/logical/categorical/string_cache.rs @@ -16,44 +16,65 @@ use crate::prelude::InitHashMaps; pub(crate) static USE_STRING_CACHE: AtomicU32 = AtomicU32::new(0); static STRING_CACHE_UUID_CTR: AtomicU32 = AtomicU32::new(0); -/// RAII for the string cache. +/// Enable the global string cache as long as the object is alive ([RAII]). /// -/// If an operation creates categoricals and uses them in a join -/// or comparison that operation must hold this cache via -/// `let handle = IUseStringCache::hold()` -/// The cache is valid until `handle` is dropped. +/// # Examples +/// +/// Enable the string cache by initializing the object: +/// +/// ``` +/// use polars_core::StringCacheHolder; +/// +/// let handle = StringCacheHolder::hold(); +/// ``` +/// +/// The string cache is enabled until `handle` is dropped. /// /// # De-allocation /// /// Multiple threads can hold the string cache at the same time. -/// The contents of the cache will only get dropped when no -/// thread holds it. -pub struct IUseStringCache { +/// The contents of the cache will only get dropped when no thread holds it. +/// +/// [RAII]: https://en.wikipedia.org/wiki/Resource_acquisition_is_initialization +pub struct StringCacheHolder { // only added so that it will never be constructed directly #[allow(dead_code)] private_zst: (), } -impl Default for IUseStringCache { +impl Default for StringCacheHolder { fn default() -> Self { Self::hold() } } -impl IUseStringCache { +impl StringCacheHolder { /// Hold the StringCache - pub fn hold() -> IUseStringCache { + pub fn hold() -> StringCacheHolder { set_string_cache(true); - IUseStringCache { private_zst: () } + StringCacheHolder { private_zst: () } } } -impl Drop for IUseStringCache { +impl Drop for StringCacheHolder { fn drop(&mut self) { set_string_cache(false) } } +/// Increment or decrement the number of string cache uses. +fn set_string_cache(active: bool) { + if active { + USE_STRING_CACHE.fetch_add(1, Ordering::Release); + } else { + let previous = USE_STRING_CACHE.fetch_sub(1, Ordering::Release); + if previous == 0 || previous == 1 { + USE_STRING_CACHE.store(0, Ordering::Release); + STRING_CACHE.clear() + } + } +} + /// Enable the global string cache. /// /// [`Categorical`] columns created under the same global string cache have the @@ -88,19 +109,6 @@ pub fn using_string_cache() -> bool { USE_STRING_CACHE.load(Ordering::Acquire) > 0 } -/// Increment or decrement the number of string cache uses. -fn set_string_cache(active: bool) { - if active { - USE_STRING_CACHE.fetch_add(1, Ordering::Release); - } else { - let previous = USE_STRING_CACHE.fetch_sub(1, Ordering::Release); - if previous == 0 || previous == 1 { - USE_STRING_CACHE.store(0, Ordering::Release); - STRING_CACHE.clear() - } - } -} - // This is the hash and the Index offset in the linear buffer #[derive(Copy, Clone)] struct Key { diff --git a/crates/polars-io/src/csv/read.rs b/crates/polars-io/src/csv/read.rs index 5f0b3a2285965..8d5b4f67dc904 100644 --- a/crates/polars-io/src/csv/read.rs +++ b/crates/polars-io/src/csv/read.rs @@ -584,7 +584,7 @@ where #[cfg(feature = "dtype-categorical")] if _has_cat { - _cat_lock = Some(polars_core::IUseStringCache::hold()) + _cat_lock = Some(polars_core::StringCacheHolder::hold()) } let mut csv_reader = self.core_reader(Some(Arc::new(schema)), to_cast)?; @@ -602,7 +602,7 @@ where }) .unwrap_or(false); if has_cat { - _cat_lock = Some(polars_core::IUseStringCache::hold()) + _cat_lock = Some(polars_core::StringCacheHolder::hold()) } } let mut csv_reader = self.core_reader(self.schema.clone(), vec![])?; diff --git a/crates/polars-io/src/csv/read_impl/batched_mmap.rs b/crates/polars-io/src/csv/read_impl/batched_mmap.rs index 18824d5e08f1a..93251de658bfd 100644 --- a/crates/polars-io/src/csv/read_impl/batched_mmap.rs +++ b/crates/polars-io/src/csv/read_impl/batched_mmap.rs @@ -136,7 +136,7 @@ impl<'a> CoreReader<'a> { // RAII structure that will ensure we maintain a global stringcache #[cfg(feature = "dtype-categorical")] let _cat_lock = if _has_cat { - Some(polars_core::IUseStringCache::hold()) + Some(polars_core::StringCacheHolder::hold()) } else { None }; @@ -196,7 +196,7 @@ pub struct BatchedCsvReaderMmap<'a> { schema: SchemaRef, rows_read: IdxSize, #[cfg(feature = "dtype-categorical")] - _cat_lock: Option, + _cat_lock: Option, #[cfg(not(feature = "dtype-categorical"))] _cat_lock: Option, } diff --git a/crates/polars-io/src/csv/read_impl/batched_read.rs b/crates/polars-io/src/csv/read_impl/batched_read.rs index af3831f00b703..7f6b94c579f12 100644 --- a/crates/polars-io/src/csv/read_impl/batched_read.rs +++ b/crates/polars-io/src/csv/read_impl/batched_read.rs @@ -219,7 +219,7 @@ impl<'a> CoreReader<'a> { // RAII structure that will ensure we maintain a global stringcache #[cfg(feature = "dtype-categorical")] let _cat_lock = if _has_cat { - Some(polars_core::IUseStringCache::hold()) + Some(polars_core::StringCacheHolder::hold()) } else { None }; @@ -279,7 +279,7 @@ pub struct BatchedCsvReaderRead<'a> { schema: SchemaRef, rows_read: IdxSize, #[cfg(feature = "dtype-categorical")] - _cat_lock: Option, + _cat_lock: Option, #[cfg(not(feature = "dtype-categorical"))] _cat_lock: Option, } diff --git a/crates/polars-io/src/parquet/read_impl.rs b/crates/polars-io/src/parquet/read_impl.rs index aadbf06e6f847..f9a4a6546cde9 100644 --- a/crates/polars-io/src/parquet/read_impl.rs +++ b/crates/polars-io/src/parquet/read_impl.rs @@ -254,7 +254,7 @@ pub fn read_parquet( let _string_cache = if n_row_groups > 1 { #[cfg(feature = "dtype-categorical")] { - Some(polars_core::IUseStringCache::hold()) + Some(polars_core::StringCacheHolder::hold()) } #[cfg(not(feature = "dtype-categorical"))] { diff --git a/crates/polars-lazy/src/physical_plan/expressions/window.rs b/crates/polars-lazy/src/physical_plan/expressions/window.rs index cbae8d3705241..7a0e0c7f80a6d 100644 --- a/crates/polars-lazy/src/physical_plan/expressions/window.rs +++ b/crates/polars-lazy/src/physical_plan/expressions/window.rs @@ -495,7 +495,7 @@ impl PhysicalExpr for WindowExpr { // Worst case is that a categorical is created with indexes from the string // cache which is fine, as the physical representation is undefined. #[cfg(feature = "dtype-categorical")] - let _sc = polars_core::IUseStringCache::hold(); + let _sc = polars_core::StringCacheHolder::hold(); let mut ac = self.run_aggregation(df, state, &gb)?; use MapStrategy::*; diff --git a/crates/polars-plan/src/logical_plan/functions/mod.rs b/crates/polars-plan/src/logical_plan/functions/mod.rs index fd7ef573ea0cd..9d46a3cd528de 100644 --- a/crates/polars-plan/src/logical_plan/functions/mod.rs +++ b/crates/polars-plan/src/logical_plan/functions/mod.rs @@ -10,7 +10,7 @@ use std::sync::Arc; use polars_core::prelude::*; #[cfg(feature = "dtype-categorical")] -use polars_core::IUseStringCache; +use polars_core::StringCacheHolder; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; use smartstring::alias::String as SmartString; @@ -332,7 +332,7 @@ impl FunctionNode { // we use a global string cache here as streaming chunks all have different rev maps #[cfg(feature = "dtype-categorical")] { - let _hold = IUseStringCache::hold(); + let _hold = StringCacheHolder::hold(); Arc::get_mut(function).unwrap().call_udf(df) } diff --git a/crates/polars/tests/it/core/joins.rs b/crates/polars/tests/it/core/joins.rs index ca9a62c4b5def..f4da92ebd1424 100644 --- a/crates/polars/tests/it/core/joins.rs +++ b/crates/polars/tests/it/core/joins.rs @@ -1,6 +1,6 @@ use polars_core::utils::{accumulate_dataframes_vertical, split_df}; #[cfg(feature = "dtype-categorical")] -use polars_core::{disable_string_cache, IUseStringCache}; +use polars_core::{disable_string_cache, StringCacheHolder}; use super::*; @@ -256,7 +256,7 @@ fn test_join_multiple_columns() { #[cfg_attr(miri, ignore)] #[cfg(feature = "dtype-categorical")] fn test_join_categorical() { - let _lock = IUseStringCache::hold(); + let _lock = StringCacheHolder::hold(); let _lock = polars_core::SINGLE_LOCK.lock(); let (mut df_a, mut df_b) = get_dfs(); @@ -298,7 +298,7 @@ fn test_join_categorical() { disable_string_cache(); // _sc is needed to ensure we hold the string cache. - let _sc = IUseStringCache::hold(); + let _sc = StringCacheHolder::hold(); df_b.try_apply("bar", |s| s.cast(&DataType::Categorical(None))) .unwrap(); diff --git a/py-polars/src/functions/string_cache.rs b/py-polars/src/functions/string_cache.rs index 7613953e11661..6172738028986 100644 --- a/py-polars/src/functions/string_cache.rs +++ b/py-polars/src/functions/string_cache.rs @@ -1,5 +1,5 @@ use polars_core; -use polars_core::IUseStringCache; +use polars_core::StringCacheHolder; use pyo3::prelude::*; #[pyfunction] @@ -19,7 +19,7 @@ pub fn using_string_cache() -> bool { #[pyclass] pub struct PyStringCacheHolder { - _inner: IUseStringCache, + _inner: StringCacheHolder, } #[pymethods] @@ -27,7 +27,7 @@ impl PyStringCacheHolder { #[new] fn new() -> Self { Self { - _inner: IUseStringCache::hold(), + _inner: StringCacheHolder::hold(), } } }