Skip to content

Commit

Permalink
sligtly adapt default reader
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Oct 28, 2023
1 parent a4b33e6 commit 50859d7
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 27 deletions.
7 changes: 0 additions & 7 deletions crates/polars-core/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,3 @@ pub(crate) fn decimal_is_active() -> bool {
pub fn verbose() -> bool {
std::env::var("POLARS_VERBOSE").as_deref().unwrap_or("") == "1"
}

pub fn concurrent_download_limit() -> usize {
std::env::var("POLARS_CONC_DOWNLOAD_LIMIT")
.ok()
.and_then(|v| v.parse::<usize>().ok())
.unwrap_or(64)
}
22 changes: 2 additions & 20 deletions crates/polars-lazy/src/physical_plan/executors/scan/parquet.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use std::path::{Path, PathBuf};

use polars_core::config::{concurrent_download_limit, verbose};
use polars_core::config::verbose;
use polars_core::utils::accumulate_dataframes_vertical;
use polars_io::cloud::CloudOptions;
use polars_io::parquet::FileMetaData;
Expand Down Expand Up @@ -165,25 +165,7 @@ impl ParquetExec {
let cloud_options = self.cloud_options.as_ref();

let mut result = vec![];
let batch_size = if let Some(md) = self.metadata.as_ref() {
let n_columns = self
.file_options
.with_columns
.as_ref()
.map(|opt| opt.len())
.unwrap_or(first_schema.len());
let concurrent_per_file = md.row_groups.len() * n_columns;
if verbose {
eprintln!(
"estimated concurrent downloads per file: {}",
concurrent_per_file
);
}
concurrent_download_limit() / concurrent_per_file + 1
} else {
std::cmp::min(POOL.current_num_threads(), 16)
};

let batch_size = 5;
let mut remaining_rows_to_read = self.file_options.n_rows.unwrap_or(usize::MAX);
let mut base_row_count = self.file_options.row_count.take();
let mut processed = 0;
Expand Down

0 comments on commit 50859d7

Please sign in to comment.