Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

depr(python, rust!): Rename write_csv parameter quote to quote_char #11583

Merged
merged 11 commits into from
Oct 9, 2023
2 changes: 1 addition & 1 deletion crates/polars-io/src/csv/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
//!
//! CsvWriter::new(&mut file)
//! .has_header(true)
//! .with_delimiter(b',')
//! .with_separator(b',')
//! .finish(df)
//! }
//! ```
Expand Down
28 changes: 14 additions & 14 deletions crates/polars-io/src/csv/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,20 +30,20 @@ pub(crate) fn next_line_position_naive(input: &[u8], eol_char: u8) -> Option<usi
pub(crate) fn next_line_position(
mut input: &[u8],
mut expected_fields: Option<usize>,
delimiter: u8,
separator: u8,
quote_char: Option<u8>,
eol_char: u8,
) -> Option<usize> {
fn accept_line(
line: &[u8],
expected_fields: usize,
delimiter: u8,
separator: u8,
eol_char: u8,
quote_char: Option<u8>,
) -> bool {
let mut count = 0usize;
for (field, _) in SplitFields::new(line, delimiter, quote_char, eol_char) {
if memchr2_iter(delimiter, eol_char, field).count() >= expected_fields {
for (field, _) in SplitFields::new(line, separator, quote_char, eol_char) {
if memchr2_iter(separator, eol_char, field).count() >= expected_fields {
return false;
}
count += 1;
Expand Down Expand Up @@ -95,10 +95,10 @@ pub(crate) fn next_line_position(
match (line, expected_fields) {
// count the fields, and determine if they are equal to what we expect from the schema
(Some(line), Some(expected_fields)) => {
if accept_line(line, expected_fields, delimiter, eol_char, quote_char) {
if accept_line(line, expected_fields, separator, eol_char, quote_char) {
let mut valid = true;
for line in lines.take(2) {
if !accept_line(line, expected_fields, delimiter, eol_char, quote_char) {
if !accept_line(line, expected_fields, separator, eol_char, quote_char) {
valid = false;
break;
}
Expand Down Expand Up @@ -160,13 +160,13 @@ pub(crate) fn skip_whitespace(input: &[u8]) -> &[u8] {
}

#[inline]
/// Can be used to skip whitespace, but exclude the delimiter
/// Can be used to skip whitespace, but exclude the separator
pub(crate) fn skip_whitespace_exclude(input: &[u8], exclude: u8) -> &[u8] {
skip_condition(input, |b| b != exclude && (is_whitespace(b)))
}

#[inline]
/// Can be used to skip whitespace, but exclude the delimiter
/// Can be used to skip whitespace, but exclude the separator
pub(crate) fn skip_whitespace_line_ending_exclude(
input: &[u8],
exclude: u8,
Expand All @@ -188,7 +188,7 @@ pub(crate) fn get_line_stats(
n_lines: usize,
eol_char: u8,
expected_fields: usize,
delimiter: u8,
separator: u8,
quote_char: Option<u8>,
) -> Option<(f32, f32)> {
let mut lengths = Vec::with_capacity(n_lines);
Expand All @@ -204,7 +204,7 @@ pub(crate) fn get_line_stats(
let pos = next_line_position(
bytes_trunc,
Some(expected_fields),
delimiter,
separator,
quote_char,
eol_char,
)?;
Expand Down Expand Up @@ -350,7 +350,7 @@ fn skip_this_line(bytes: &[u8], quote: Option<u8>, eol_char: u8) -> &[u8] {
pub(super) fn parse_lines<'a>(
mut bytes: &'a [u8],
offset: usize,
delimiter: u8,
separator: u8,
comment_char: Option<u8>,
quote_char: Option<u8>,
eol_char: u8,
Expand Down Expand Up @@ -391,9 +391,9 @@ pub(super) fn parse_lines<'a>(
// only when we have one column \n should not be skipped
// other widths should have commas.
bytes = if schema_len > 1 {
skip_whitespace_line_ending_exclude(bytes, delimiter, eol_char)
skip_whitespace_line_ending_exclude(bytes, separator, eol_char)
} else {
skip_whitespace_exclude(bytes, delimiter)
skip_whitespace_exclude(bytes, separator)
};
if bytes.is_empty() {
return Ok(original_bytes_len);
Expand All @@ -416,7 +416,7 @@ pub(super) fn parse_lines<'a>(
let mut next_projected = unsafe { projection_iter.next().unwrap_unchecked() };
let mut processed_fields = 0;

let mut iter = SplitFields::new(bytes, delimiter, quote_char, eol_char);
let mut iter = SplitFields::new(bytes, separator, quote_char, eol_char);
let mut idx = 0u32;
let mut read_sol = 0;
loop {
Expand Down
20 changes: 10 additions & 10 deletions crates/polars-io/src/csv/read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ where
projection: Option<Vec<usize>>,
/// Optional column names to project/ select.
columns: Option<Vec<String>>,
delimiter: Option<u8>,
separator: Option<u8>,
pub(crate) schema: Option<SchemaRef>,
encoding: CsvEncoding,
n_threads: Option<usize>,
Expand Down Expand Up @@ -204,9 +204,9 @@ where
self
}

/// Set the CSV file's column delimiter as a byte character
pub fn with_delimiter(mut self, delimiter: u8) -> Self {
self.delimiter = Some(delimiter);
/// Set the CSV file's column separator as a byte character
pub fn with_separator(mut self, separator: u8) -> Self {
self.separator = Some(separator);
self
}

Expand Down Expand Up @@ -310,8 +310,8 @@ where
}

/// Set the `char` used as quote char. The default is `b'"'`. If set to `[None]` quoting is disabled.
pub fn with_quote_char(mut self, quote: Option<u8>) -> Self {
self.quote_char = quote;
pub fn with_quote_char(mut self, quote_char: Option<u8>) -> Self {
self.quote_char = quote_char;
self
}

Expand Down Expand Up @@ -358,7 +358,7 @@ impl<'a, R: MmapBytesReader + 'a> CsvReader<'a, R> {
self.skip_rows_before_header,
std::mem::take(&mut self.projection),
self.max_records,
self.delimiter,
self.separator,
self.has_header,
self.ignore_errors,
self.schema.clone(),
Expand Down Expand Up @@ -481,7 +481,7 @@ impl<'a> CsvReader<'a, Box<dyn MmapBytesReader>> {

let (inferred_schema, _, _) = infer_file_schema(
&reader_bytes,
self.delimiter.unwrap_or(b','),
self.separator.unwrap_or(b','),
self.max_records,
self.has_header,
None,
Expand Down Expand Up @@ -510,7 +510,7 @@ impl<'a> CsvReader<'a, Box<dyn MmapBytesReader>> {

let (inferred_schema, _, _) = infer_file_schema(
&reader_bytes,
self.delimiter.unwrap_or(b','),
self.separator.unwrap_or(b','),
self.max_records,
self.has_header,
None,
Expand Down Expand Up @@ -543,7 +543,7 @@ where
max_records: Some(128),
skip_rows_before_header: 0,
projection: None,
delimiter: None,
separator: None,
has_header: true,
ignore_errors: false,
schema: None,
Expand Down
18 changes: 9 additions & 9 deletions crates/polars-io/src/csv/read_impl/batched_mmap.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ pub(crate) fn get_file_chunks_iterator(
chunk_size: usize,
bytes: &[u8],
expected_fields: usize,
delimiter: u8,
separator: u8,
quote_char: Option<u8>,
eol_char: u8,
) {
Expand All @@ -27,7 +27,7 @@ pub(crate) fn get_file_chunks_iterator(
let end_pos = match next_line_position(
&bytes[search_pos..],
Some(expected_fields),
delimiter,
separator,
quote_char,
eol_char,
) {
Expand All @@ -49,7 +49,7 @@ struct ChunkOffsetIter<'a> {
// not a promise, but something we want
rows_per_batch: usize,
expected_fields: usize,
delimiter: u8,
separator: u8,
quote_char: Option<u8>,
eol_char: u8,
}
Expand All @@ -68,7 +68,7 @@ impl<'a> Iterator for ChunkOffsetIter<'a> {
let bytes_first_row = next_line_position(
&self.bytes[self.last_offset + 2..],
Some(self.expected_fields),
self.delimiter,
self.separator,
self.quote_char,
self.eol_char,
)
Expand All @@ -84,7 +84,7 @@ impl<'a> Iterator for ChunkOffsetIter<'a> {
self.rows_per_batch * bytes_first_row,
self.bytes,
self.expected_fields,
self.delimiter,
self.separator,
self.quote_char,
self.eol_char,
);
Expand Down Expand Up @@ -124,7 +124,7 @@ impl<'a> CoreReader<'a> {
n_chunks: offset_batch_size,
rows_per_batch: self.chunk_size,
expected_fields: self.schema.len(),
delimiter: self.delimiter,
separator: self.separator,
quote_char: self.quote_char,
eol_char: self.eol_char,
};
Expand Down Expand Up @@ -164,7 +164,7 @@ impl<'a> CoreReader<'a> {
truncate_ragged_lines: self.truncate_ragged_lines,
n_rows: self.n_rows,
encoding: self.encoding,
delimiter: self.delimiter,
separator: self.separator,
schema: self.schema,
rows_read: 0,
_cat_lock,
Expand Down Expand Up @@ -192,7 +192,7 @@ pub struct BatchedCsvReaderMmap<'a> {
ignore_errors: bool,
n_rows: Option<usize>,
encoding: CsvEncoding,
delimiter: u8,
separator: u8,
schema: SchemaRef,
rows_read: IdxSize,
#[cfg(feature = "dtype-categorical")]
Expand Down Expand Up @@ -233,7 +233,7 @@ impl<'a> BatchedCsvReaderMmap<'a> {
.map(|(bytes_offset_thread, stop_at_nbytes)| {
let mut df = read_chunk(
bytes,
self.delimiter,
self.separator,
self.schema.as_ref(),
self.ignore_errors,
&self.projection,
Expand Down
22 changes: 11 additions & 11 deletions crates/polars-io/src/csv/read_impl/batched_read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ pub(crate) fn get_offsets(
chunk_size: usize,
bytes: &[u8],
expected_fields: usize,
delimiter: u8,
separator: u8,
quote_char: Option<u8>,
eol_char: u8,
) {
Expand All @@ -29,7 +29,7 @@ pub(crate) fn get_offsets(
let end_pos = match next_line_position(
&bytes[search_pos..],
Some(expected_fields),
delimiter,
separator,
quote_char,
eol_char,
) {
Expand Down Expand Up @@ -57,7 +57,7 @@ struct ChunkReader<'a> {
// not a promise, but something we want
rows_per_batch: usize,
expected_fields: usize,
delimiter: u8,
separator: u8,
quote_char: Option<u8>,
eol_char: u8,
}
Expand All @@ -67,7 +67,7 @@ impl<'a> ChunkReader<'a> {
file: &'a File,
rows_per_batch: usize,
expected_fields: usize,
delimiter: u8,
separator: u8,
quote_char: Option<u8>,
eol_char: u8,
page_size: u64,
Expand All @@ -85,7 +85,7 @@ impl<'a> ChunkReader<'a> {
n_chunks: 16,
rows_per_batch,
expected_fields,
delimiter,
separator,
quote_char,
eol_char,
}
Expand Down Expand Up @@ -132,7 +132,7 @@ impl<'a> ChunkReader<'a> {
bytes_first_row = next_line_position(
&self.buf[2..],
Some(self.expected_fields),
self.delimiter,
self.separator,
self.quote_char,
self.eol_char,
);
Expand Down Expand Up @@ -179,7 +179,7 @@ impl<'a> ChunkReader<'a> {
self.rows_per_batch * bytes_first_row,
&self.buf,
self.expected_fields,
self.delimiter,
self.separator,
self.quote_char,
self.eol_char,
);
Expand All @@ -206,7 +206,7 @@ impl<'a> CoreReader<'a> {
file,
self.chunk_size,
self.schema.len(),
self.delimiter,
self.separator,
self.quote_char,
self.eol_char,
4096,
Expand Down Expand Up @@ -247,7 +247,7 @@ impl<'a> CoreReader<'a> {
truncate_ragged_lines: self.truncate_ragged_lines,
n_rows: self.n_rows,
encoding: self.encoding,
delimiter: self.delimiter,
separator: self.separator,
schema: self.schema,
rows_read: 0,
_cat_lock,
Expand Down Expand Up @@ -275,7 +275,7 @@ pub struct BatchedCsvReaderRead<'a> {
truncate_ragged_lines: bool,
n_rows: Option<usize>,
encoding: CsvEncoding,
delimiter: u8,
separator: u8,
schema: SchemaRef,
rows_read: IdxSize,
#[cfg(feature = "dtype-categorical")]
Expand Down Expand Up @@ -330,7 +330,7 @@ impl<'a> BatchedCsvReaderRead<'a> {
let stop_at_n_bytes = chunk.len();
let mut df = read_chunk(
chunk,
self.delimiter,
self.separator,
self.schema.as_ref(),
self.ignore_errors,
&self.projection,
Expand Down
Loading