Skip to content

Commit

Permalink
c
Browse files Browse the repository at this point in the history
  • Loading branch information
nameexhaustion committed Jul 24, 2024
1 parent 82b1b0d commit 1b403b7
Show file tree
Hide file tree
Showing 5 changed files with 23 additions and 18 deletions.
1 change: 0 additions & 1 deletion crates/polars-io/src/csv/read/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,3 @@ pub use parser::count_rows;
pub use read_impl::batched::{BatchedCsvReader, OwnedBatchedCsvReader};
pub use reader::CsvReader;
pub use schema_inference::infer_file_schema;
pub use utils::is_compressed;
17 changes: 1 addition & 16 deletions crates/polars-io/src/csv/read/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,22 +45,6 @@ pub(crate) fn get_file_chunks(
offsets
}

// magic numbers
const GZIP: [u8; 2] = [31, 139];
const ZLIB0: [u8; 2] = [0x78, 0x01];
const ZLIB1: [u8; 2] = [0x78, 0x9C];
const ZLIB2: [u8; 2] = [0x78, 0xDA];
const ZSTD: [u8; 4] = [0x28, 0xB5, 0x2F, 0xFD];

/// check if csv file is compressed
pub fn is_compressed(bytes: &[u8]) -> bool {
bytes.starts_with(&ZLIB0)
|| bytes.starts_with(&ZLIB1)
|| bytes.starts_with(&ZLIB2)
|| bytes.starts_with(&GZIP)
|| bytes.starts_with(&ZSTD)
}

#[cfg(any(feature = "decompress", feature = "decompress-fast"))]
fn decompress_impl<R: Read>(
decoder: &mut R,
Expand Down Expand Up @@ -145,6 +129,7 @@ pub(crate) fn decompress(
quote_char: Option<u8>,
eol_char: u8,
) -> Option<Vec<u8>> {
use crate::utils::compression::magic::*;
if bytes.starts_with(&GZIP) {
let mut decoder = flate2::read::MultiGzDecoder::new(bytes);
decompress_impl(&mut decoder, n_rows, separator, quote_char, eol_char)
Expand Down
19 changes: 19 additions & 0 deletions crates/polars-io/src/utils/compression.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
// magic numbers
pub mod magic {
pub const GZIP: [u8; 2] = [31, 139];
pub const ZLIB0: [u8; 2] = [0x78, 0x01];
pub const ZLIB1: [u8; 2] = [0x78, 0x9C];
pub const ZLIB2: [u8; 2] = [0x78, 0xDA];
pub const ZSTD: [u8; 4] = [0x28, 0xB5, 0x2F, 0xFD];
}

/// check if csv file is compressed
pub fn is_compressed(bytes: &[u8]) -> bool {
use magic::*;

bytes.starts_with(&ZLIB0)
|| bytes.starts_with(&ZLIB1)
|| bytes.starts_with(&ZLIB2)
|| bytes.starts_with(&GZIP)
|| bytes.starts_with(&ZSTD)
}
2 changes: 2 additions & 0 deletions crates/polars-io/src/utils/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
pub mod compression;
mod other;

pub use compression::is_compressed;
pub use other::*;

pub const URL_ENCODE_CHAR_SET: &percent_encoding::AsciiSet = &percent_encoding::CONTROLS
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-io/src/utils/other.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ use polars_error::to_compute_err;
use regex::{Regex, RegexBuilder};

use crate::mmap::{MmapBytesReader, ReaderBytes};
use crate::prelude::is_compressed;

pub fn get_reader_bytes<'a, R: Read + MmapBytesReader + ?Sized>(
reader: &'a mut R,
Expand Down Expand Up @@ -50,6 +49,7 @@ pub unsafe fn maybe_decompress_bytes<'a>(
out: &'a mut Vec<u8>,
) -> PolarsResult<&'a [u8]> {
assert!(out.is_empty());
use crate::prelude::is_compressed;
let is_compressed = bytes.len() >= 4 && is_compressed(bytes);

if is_compressed {
Expand Down

0 comments on commit 1b403b7

Please sign in to comment.