diff --git a/crates/polars-io/src/csv/read/mod.rs b/crates/polars-io/src/csv/read/mod.rs index aced5f377208..969be1a58908 100644 --- a/crates/polars-io/src/csv/read/mod.rs +++ b/crates/polars-io/src/csv/read/mod.rs @@ -30,4 +30,3 @@ pub use parser::count_rows; pub use read_impl::batched::{BatchedCsvReader, OwnedBatchedCsvReader}; pub use reader::CsvReader; pub use schema_inference::infer_file_schema; -pub use utils::is_compressed; diff --git a/crates/polars-io/src/csv/read/utils.rs b/crates/polars-io/src/csv/read/utils.rs index 61a14399237a..33c6a6c8f290 100644 --- a/crates/polars-io/src/csv/read/utils.rs +++ b/crates/polars-io/src/csv/read/utils.rs @@ -45,22 +45,6 @@ pub(crate) fn get_file_chunks( offsets } -// magic numbers -const GZIP: [u8; 2] = [31, 139]; -const ZLIB0: [u8; 2] = [0x78, 0x01]; -const ZLIB1: [u8; 2] = [0x78, 0x9C]; -const ZLIB2: [u8; 2] = [0x78, 0xDA]; -const ZSTD: [u8; 4] = [0x28, 0xB5, 0x2F, 0xFD]; - -/// check if csv file is compressed -pub fn is_compressed(bytes: &[u8]) -> bool { - bytes.starts_with(&ZLIB0) - || bytes.starts_with(&ZLIB1) - || bytes.starts_with(&ZLIB2) - || bytes.starts_with(&GZIP) - || bytes.starts_with(&ZSTD) -} - #[cfg(any(feature = "decompress", feature = "decompress-fast"))] fn decompress_impl( decoder: &mut R, @@ -145,6 +129,7 @@ pub(crate) fn decompress( quote_char: Option, eol_char: u8, ) -> Option> { + use crate::utils::compression::magic::*; if bytes.starts_with(&GZIP) { let mut decoder = flate2::read::MultiGzDecoder::new(bytes); decompress_impl(&mut decoder, n_rows, separator, quote_char, eol_char) diff --git a/crates/polars-io/src/utils/compression.rs b/crates/polars-io/src/utils/compression.rs new file mode 100644 index 000000000000..d771b4c6ca1e --- /dev/null +++ b/crates/polars-io/src/utils/compression.rs @@ -0,0 +1,19 @@ +// magic numbers +pub mod magic { + pub const GZIP: [u8; 2] = [31, 139]; + pub const ZLIB0: [u8; 2] = [0x78, 0x01]; + pub const ZLIB1: [u8; 2] = [0x78, 0x9C]; + pub const ZLIB2: [u8; 2] = [0x78, 0xDA]; + pub const ZSTD: [u8; 4] = [0x28, 0xB5, 0x2F, 0xFD]; +} + +/// check if csv file is compressed +pub fn is_compressed(bytes: &[u8]) -> bool { + use magic::*; + + bytes.starts_with(&ZLIB0) + || bytes.starts_with(&ZLIB1) + || bytes.starts_with(&ZLIB2) + || bytes.starts_with(&GZIP) + || bytes.starts_with(&ZSTD) +} diff --git a/crates/polars-io/src/utils/mod.rs b/crates/polars-io/src/utils/mod.rs index 4711fab55609..a940870c9270 100644 --- a/crates/polars-io/src/utils/mod.rs +++ b/crates/polars-io/src/utils/mod.rs @@ -1,5 +1,7 @@ +pub mod compression; mod other; +pub use compression::is_compressed; pub use other::*; pub const URL_ENCODE_CHAR_SET: &percent_encoding::AsciiSet = &percent_encoding::CONTROLS diff --git a/crates/polars-io/src/utils/other.rs b/crates/polars-io/src/utils/other.rs index 91b94ee24dde..6f31b796776d 100644 --- a/crates/polars-io/src/utils/other.rs +++ b/crates/polars-io/src/utils/other.rs @@ -10,7 +10,6 @@ use polars_error::to_compute_err; use regex::{Regex, RegexBuilder}; use crate::mmap::{MmapBytesReader, ReaderBytes}; -use crate::prelude::is_compressed; pub fn get_reader_bytes<'a, R: Read + MmapBytesReader + ?Sized>( reader: &'a mut R, @@ -50,6 +49,7 @@ pub unsafe fn maybe_decompress_bytes<'a>( out: &'a mut Vec, ) -> PolarsResult<&'a [u8]> { assert!(out.is_empty()); + use crate::prelude::is_compressed; let is_compressed = bytes.len() >= 4 && is_compressed(bytes); if is_compressed {