Skip to content

Commit

Permalink
fix: read_csv for empty lines (#11924)
Browse files Browse the repository at this point in the history
  • Loading branch information
uchiiii authored Oct 22, 2023
1 parent a01f243 commit 5993a57
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 0 deletions.
4 changes: 4 additions & 0 deletions crates/polars-io/src/csv/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,10 @@ pub fn infer_file_schema_inner(
// keep track so that we can determine the amount of bytes read
end_ptr = line.as_ptr() as usize + line.len();

if line.is_empty() {
continue;
}

if let Some(c) = comment_char {
// line is a comment -> skip
if line[0] == c {
Expand Down
24 changes: 24 additions & 0 deletions py-polars/tests/unit/io/test_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -527,6 +527,30 @@ def test_empty_bytes() -> None:
assert_frame_equal(df, pl.DataFrame())


def test_empty_line_with_single_column() -> None:
df = pl.read_csv(
b"a\n\nb\n",
new_columns=["A"],
has_header=False,
comment_char="#",
use_pyarrow=False,
)
expected = pl.DataFrame({"A": ["a", None, "b"]})
assert_frame_equal(df, expected)


def test_empty_line_with_multiple_columns() -> None:
df = pl.read_csv(
b"a,b\n\nc,d\n",
new_columns=["A", "B"],
has_header=False,
comment_char="#",
use_pyarrow=False,
)
expected = pl.DataFrame({"A": ["a", "c"], "B": ["b", "d"]})
assert_frame_equal(df, expected)


def test_csv_quote_char() -> None:
expected = pl.DataFrame(
[
Expand Down

0 comments on commit 5993a57

Please sign in to comment.