Skip to content

Commit

Permalink
depr(python, rust!): Rename write_csv parameter quote to `quote_c…
Browse files Browse the repository at this point in the history
…har` (#11583)
  • Loading branch information
svaningelgem authored Oct 9, 2023
1 parent 9d40f0a commit 3b3c4a0
Show file tree
Hide file tree
Showing 30 changed files with 211 additions and 201 deletions.
2 changes: 1 addition & 1 deletion crates/polars-io/src/csv/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
//!
//! CsvWriter::new(&mut file)
//! .has_header(true)
//! .with_delimiter(b',')
//! .with_separator(b',')
//! .finish(df)
//! }
//! ```
Expand Down
28 changes: 14 additions & 14 deletions crates/polars-io/src/csv/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,20 +30,20 @@ pub(crate) fn next_line_position_naive(input: &[u8], eol_char: u8) -> Option<usi
pub(crate) fn next_line_position(
mut input: &[u8],
mut expected_fields: Option<usize>,
delimiter: u8,
separator: u8,
quote_char: Option<u8>,
eol_char: u8,
) -> Option<usize> {
fn accept_line(
line: &[u8],
expected_fields: usize,
delimiter: u8,
separator: u8,
eol_char: u8,
quote_char: Option<u8>,
) -> bool {
let mut count = 0usize;
for (field, _) in SplitFields::new(line, delimiter, quote_char, eol_char) {
if memchr2_iter(delimiter, eol_char, field).count() >= expected_fields {
for (field, _) in SplitFields::new(line, separator, quote_char, eol_char) {
if memchr2_iter(separator, eol_char, field).count() >= expected_fields {
return false;
}
count += 1;
Expand Down Expand Up @@ -95,10 +95,10 @@ pub(crate) fn next_line_position(
match (line, expected_fields) {
// count the fields, and determine if they are equal to what we expect from the schema
(Some(line), Some(expected_fields)) => {
if accept_line(line, expected_fields, delimiter, eol_char, quote_char) {
if accept_line(line, expected_fields, separator, eol_char, quote_char) {
let mut valid = true;
for line in lines.take(2) {
if !accept_line(line, expected_fields, delimiter, eol_char, quote_char) {
if !accept_line(line, expected_fields, separator, eol_char, quote_char) {
valid = false;
break;
}
Expand Down Expand Up @@ -160,13 +160,13 @@ pub(crate) fn skip_whitespace(input: &[u8]) -> &[u8] {
}

#[inline]
/// Can be used to skip whitespace, but exclude the delimiter
/// Can be used to skip whitespace, but exclude the separator
pub(crate) fn skip_whitespace_exclude(input: &[u8], exclude: u8) -> &[u8] {
skip_condition(input, |b| b != exclude && (is_whitespace(b)))
}

#[inline]
/// Can be used to skip whitespace, but exclude the delimiter
/// Can be used to skip whitespace, but exclude the separator
pub(crate) fn skip_whitespace_line_ending_exclude(
input: &[u8],
exclude: u8,
Expand All @@ -188,7 +188,7 @@ pub(crate) fn get_line_stats(
n_lines: usize,
eol_char: u8,
expected_fields: usize,
delimiter: u8,
separator: u8,
quote_char: Option<u8>,
) -> Option<(f32, f32)> {
let mut lengths = Vec::with_capacity(n_lines);
Expand All @@ -204,7 +204,7 @@ pub(crate) fn get_line_stats(
let pos = next_line_position(
bytes_trunc,
Some(expected_fields),
delimiter,
separator,
quote_char,
eol_char,
)?;
Expand Down Expand Up @@ -350,7 +350,7 @@ fn skip_this_line(bytes: &[u8], quote: Option<u8>, eol_char: u8) -> &[u8] {
pub(super) fn parse_lines<'a>(
mut bytes: &'a [u8],
offset: usize,
delimiter: u8,
separator: u8,
comment_char: Option<u8>,
quote_char: Option<u8>,
eol_char: u8,
Expand Down Expand Up @@ -391,9 +391,9 @@ pub(super) fn parse_lines<'a>(
// only when we have one column \n should not be skipped
// other widths should have commas.
bytes = if schema_len > 1 {
skip_whitespace_line_ending_exclude(bytes, delimiter, eol_char)
skip_whitespace_line_ending_exclude(bytes, separator, eol_char)
} else {
skip_whitespace_exclude(bytes, delimiter)
skip_whitespace_exclude(bytes, separator)
};
if bytes.is_empty() {
return Ok(original_bytes_len);
Expand All @@ -416,7 +416,7 @@ pub(super) fn parse_lines<'a>(
let mut next_projected = unsafe { projection_iter.next().unwrap_unchecked() };
let mut processed_fields = 0;

let mut iter = SplitFields::new(bytes, delimiter, quote_char, eol_char);
let mut iter = SplitFields::new(bytes, separator, quote_char, eol_char);
let mut idx = 0u32;
let mut read_sol = 0;
loop {
Expand Down
20 changes: 10 additions & 10 deletions crates/polars-io/src/csv/read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ where
projection: Option<Vec<usize>>,
/// Optional column names to project/ select.
columns: Option<Vec<String>>,
delimiter: Option<u8>,
separator: Option<u8>,
pub(crate) schema: Option<SchemaRef>,
encoding: CsvEncoding,
n_threads: Option<usize>,
Expand Down Expand Up @@ -204,9 +204,9 @@ where
self
}

/// Set the CSV file's column delimiter as a byte character
pub fn with_delimiter(mut self, delimiter: u8) -> Self {
self.delimiter = Some(delimiter);
/// Set the CSV file's column separator as a byte character
pub fn with_separator(mut self, separator: u8) -> Self {
self.separator = Some(separator);
self
}

Expand Down Expand Up @@ -310,8 +310,8 @@ where
}

/// Set the `char` used as quote char. The default is `b'"'`. If set to `[None]` quoting is disabled.
pub fn with_quote_char(mut self, quote: Option<u8>) -> Self {
self.quote_char = quote;
pub fn with_quote_char(mut self, quote_char: Option<u8>) -> Self {
self.quote_char = quote_char;
self
}

Expand Down Expand Up @@ -358,7 +358,7 @@ impl<'a, R: MmapBytesReader + 'a> CsvReader<'a, R> {
self.skip_rows_before_header,
std::mem::take(&mut self.projection),
self.max_records,
self.delimiter,
self.separator,
self.has_header,
self.ignore_errors,
self.schema.clone(),
Expand Down Expand Up @@ -481,7 +481,7 @@ impl<'a> CsvReader<'a, Box<dyn MmapBytesReader>> {

let (inferred_schema, _, _) = infer_file_schema(
&reader_bytes,
self.delimiter.unwrap_or(b','),
self.separator.unwrap_or(b','),
self.max_records,
self.has_header,
None,
Expand Down Expand Up @@ -510,7 +510,7 @@ impl<'a> CsvReader<'a, Box<dyn MmapBytesReader>> {

let (inferred_schema, _, _) = infer_file_schema(
&reader_bytes,
self.delimiter.unwrap_or(b','),
self.separator.unwrap_or(b','),
self.max_records,
self.has_header,
None,
Expand Down Expand Up @@ -543,7 +543,7 @@ where
max_records: Some(128),
skip_rows_before_header: 0,
projection: None,
delimiter: None,
separator: None,
has_header: true,
ignore_errors: false,
schema: None,
Expand Down
18 changes: 9 additions & 9 deletions crates/polars-io/src/csv/read_impl/batched_mmap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ pub(crate) fn get_file_chunks_iterator(
chunk_size: usize,
bytes: &[u8],
expected_fields: usize,
delimiter: u8,
separator: u8,
quote_char: Option<u8>,
eol_char: u8,
) {
Expand All @@ -27,7 +27,7 @@ pub(crate) fn get_file_chunks_iterator(
let end_pos = match next_line_position(
&bytes[search_pos..],
Some(expected_fields),
delimiter,
separator,
quote_char,
eol_char,
) {
Expand All @@ -49,7 +49,7 @@ struct ChunkOffsetIter<'a> {
// not a promise, but something we want
rows_per_batch: usize,
expected_fields: usize,
delimiter: u8,
separator: u8,
quote_char: Option<u8>,
eol_char: u8,
}
Expand All @@ -68,7 +68,7 @@ impl<'a> Iterator for ChunkOffsetIter<'a> {
let bytes_first_row = next_line_position(
&self.bytes[self.last_offset + 2..],
Some(self.expected_fields),
self.delimiter,
self.separator,
self.quote_char,
self.eol_char,
)
Expand All @@ -84,7 +84,7 @@ impl<'a> Iterator for ChunkOffsetIter<'a> {
self.rows_per_batch * bytes_first_row,
self.bytes,
self.expected_fields,
self.delimiter,
self.separator,
self.quote_char,
self.eol_char,
);
Expand Down Expand Up @@ -124,7 +124,7 @@ impl<'a> CoreReader<'a> {
n_chunks: offset_batch_size,
rows_per_batch: self.chunk_size,
expected_fields: self.schema.len(),
delimiter: self.delimiter,
separator: self.separator,
quote_char: self.quote_char,
eol_char: self.eol_char,
};
Expand Down Expand Up @@ -164,7 +164,7 @@ impl<'a> CoreReader<'a> {
truncate_ragged_lines: self.truncate_ragged_lines,
n_rows: self.n_rows,
encoding: self.encoding,
delimiter: self.delimiter,
separator: self.separator,
schema: self.schema,
rows_read: 0,
_cat_lock,
Expand Down Expand Up @@ -192,7 +192,7 @@ pub struct BatchedCsvReaderMmap<'a> {
ignore_errors: bool,
n_rows: Option<usize>,
encoding: CsvEncoding,
delimiter: u8,
separator: u8,
schema: SchemaRef,
rows_read: IdxSize,
#[cfg(feature = "dtype-categorical")]
Expand Down Expand Up @@ -233,7 +233,7 @@ impl<'a> BatchedCsvReaderMmap<'a> {
.map(|(bytes_offset_thread, stop_at_nbytes)| {
let mut df = read_chunk(
bytes,
self.delimiter,
self.separator,
self.schema.as_ref(),
self.ignore_errors,
&self.projection,
Expand Down
22 changes: 11 additions & 11 deletions crates/polars-io/src/csv/read_impl/batched_read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ pub(crate) fn get_offsets(
chunk_size: usize,
bytes: &[u8],
expected_fields: usize,
delimiter: u8,
separator: u8,
quote_char: Option<u8>,
eol_char: u8,
) {
Expand All @@ -29,7 +29,7 @@ pub(crate) fn get_offsets(
let end_pos = match next_line_position(
&bytes[search_pos..],
Some(expected_fields),
delimiter,
separator,
quote_char,
eol_char,
) {
Expand Down Expand Up @@ -57,7 +57,7 @@ struct ChunkReader<'a> {
// not a promise, but something we want
rows_per_batch: usize,
expected_fields: usize,
delimiter: u8,
separator: u8,
quote_char: Option<u8>,
eol_char: u8,
}
Expand All @@ -67,7 +67,7 @@ impl<'a> ChunkReader<'a> {
file: &'a File,
rows_per_batch: usize,
expected_fields: usize,
delimiter: u8,
separator: u8,
quote_char: Option<u8>,
eol_char: u8,
page_size: u64,
Expand All @@ -85,7 +85,7 @@ impl<'a> ChunkReader<'a> {
n_chunks: 16,
rows_per_batch,
expected_fields,
delimiter,
separator,
quote_char,
eol_char,
}
Expand Down Expand Up @@ -132,7 +132,7 @@ impl<'a> ChunkReader<'a> {
bytes_first_row = next_line_position(
&self.buf[2..],
Some(self.expected_fields),
self.delimiter,
self.separator,
self.quote_char,
self.eol_char,
);
Expand Down Expand Up @@ -179,7 +179,7 @@ impl<'a> ChunkReader<'a> {
self.rows_per_batch * bytes_first_row,
&self.buf,
self.expected_fields,
self.delimiter,
self.separator,
self.quote_char,
self.eol_char,
);
Expand All @@ -206,7 +206,7 @@ impl<'a> CoreReader<'a> {
file,
self.chunk_size,
self.schema.len(),
self.delimiter,
self.separator,
self.quote_char,
self.eol_char,
4096,
Expand Down Expand Up @@ -247,7 +247,7 @@ impl<'a> CoreReader<'a> {
truncate_ragged_lines: self.truncate_ragged_lines,
n_rows: self.n_rows,
encoding: self.encoding,
delimiter: self.delimiter,
separator: self.separator,
schema: self.schema,
rows_read: 0,
_cat_lock,
Expand Down Expand Up @@ -275,7 +275,7 @@ pub struct BatchedCsvReaderRead<'a> {
truncate_ragged_lines: bool,
n_rows: Option<usize>,
encoding: CsvEncoding,
delimiter: u8,
separator: u8,
schema: SchemaRef,
rows_read: IdxSize,
#[cfg(feature = "dtype-categorical")]
Expand Down Expand Up @@ -330,7 +330,7 @@ impl<'a> BatchedCsvReaderRead<'a> {
let stop_at_n_bytes = chunk.len();
let mut df = read_chunk(
chunk,
self.delimiter,
self.separator,
self.schema.as_ref(),
self.ignore_errors,
&self.projection,
Expand Down
Loading

0 comments on commit 3b3c4a0

Please sign in to comment.