diff --git a/lib/csv/parser.rb b/lib/csv/parser.rb index 4da87fba..d7fcd10e 100644 --- a/lib/csv/parser.rb +++ b/lib/csv/parser.rb @@ -220,6 +220,15 @@ def keep_back end # trace(__method__, :repos, start, buffer) @scanner.pos = start + last_scanner, last_start, last_buffer = @keeps.last + # Drop the last buffer when the last buffer is the same data + # in the last keep. If we keep it, we have duplicated data + # by the next keep_back. + if last_scanner == @scanner and + last_buffer and + last_buffer == last_scanner.string.byteslice(last_start, start) + @keeps.last[2] = nil + end end read_chunk if @scanner.eos? end diff --git a/test/csv/parse/test_inputs_scanner.rb b/test/csv/parse/test_inputs_scanner.rb index 81327c18..b82bb6c3 100644 --- a/test/csv/parse/test_inputs_scanner.rb +++ b/test/csv/parse/test_inputs_scanner.rb @@ -3,6 +3,20 @@ class TestCSVParseInputsScanner < Test::Unit::TestCase include CSVHelper + def test_scan_keep_nested_back + input = CSV::Parser::UnoptimizedStringIO.new("abcdef") + scanner = CSV::Parser::InputsScanner.new([input], + Encoding::UTF_8, + nil) + scanner.keep_start + assert_equal("abc", scanner.scan_all(/[a-c]+/)) + scanner.keep_start + assert_equal("def", scanner.scan_all(/[d-f]+/)) + scanner.keep_back + scanner.keep_back + assert_equal("abcdef", scanner.scan_all(/[a-f]+/)) + end + def test_scan_keep_over_chunks_nested_back input = CSV::Parser::UnoptimizedStringIO.new("abcdefghijklmnl") scanner = CSV::Parser::InputsScanner.new([input], diff --git a/test/csv/parse/test_skip_lines.rb b/test/csv/parse/test_skip_lines.rb index 2fea02eb..e700a70d 100644 --- a/test/csv/parse/test_skip_lines.rb +++ b/test/csv/parse/test_skip_lines.rb @@ -11,6 +11,17 @@ def test_default assert_nil(csv.skip_lines) end + def parse(data, **options) + # We use Tempfile here to use CSV::Parser::InputsScanner. + Tempfile.open(["csv-", ".csv"]) do |file| + file.print(data) + file.close + CSV.open(file, **options) do |csv| + csv.read + end + end + end + def test_regexp csv = <<-CSV 1 @@ -22,7 +33,7 @@ def test_regexp ["1"], ["4"], ], - CSV.parse(csv, :skip_lines => /\A\s*#/)) + parse(csv, :skip_lines => /\A\s*#/)) end def test_regexp_quoted @@ -37,7 +48,7 @@ def test_regexp_quoted ["#3"], ["4"], ], - CSV.parse(csv, :skip_lines => /\A\s*#/)) + parse(csv, :skip_lines => /\A\s*#/)) end def test_string @@ -51,7 +62,7 @@ def test_string ["1"], ["4"], ], - CSV.parse(csv, :skip_lines => ".")) + parse(csv, :skip_lines => ".")) end class RegexStub @@ -88,7 +99,7 @@ def test_matchable ["1"], ["3"], ], - CSV.parse(csv, :skip_lines => Matchable.new(/\A#/))) + parse(csv, :skip_lines => Matchable.new(/\A#/))) end def test_multibyte_data @@ -98,29 +109,36 @@ def test_multibyte_data value = "\u3042\u3044\u3046" with_chunk_size("5") do assert_equal([[value], [value]], - CSV.parse("#{value}\n#{value}\n", - :skip_lines => /\A#/)) + parse("#{value}\n#{value}\n", + :skip_lines => /\A#/)) end end def test_empty_line_and_liberal_parsing assert_equal([["a", "b"]], - CSV.parse("a,b\n", - :liberal_parsing => true, - :skip_lines => /^$/)) + parse("a,b\n", + :liberal_parsing => true, + :skip_lines => /^$/)) end def test_crlf assert_equal([["a", "b"]], - CSV.parse("a,b\r\n,\r\n", - :skip_lines => /^,+$/)) + parse("a,b\r\n,\r\n", + :skip_lines => /^,+$/)) end def test_crlf_strip_no_last_crlf assert_equal([["a"], ["b"]], - CSV.parse("a\r\nb", - row_sep: "\r\n", - skip_lines: /^ *$/, - strip: true)) + parse("a\r\nb", + row_sep: "\r\n", + skip_lines: /^ *$/, + strip: true)) + end + + def test_crlf_quoted_lf + assert_equal([["\n", ""]], + parse("\"\n\",\"\"\r\n", + row_sep: "\r\n", + skip_lines: /not matched/)) end end