Skip to content

Commit

Permalink
Don't build quoted_fields array when not needed (#312)
Browse files Browse the repository at this point in the history
```
N_ROWS=5000 rake benchmark:write benchmark:parse benchmark:parse_liberal_parsing benchmark:parse_quote_char_nil benchmark:parse_strip
```
```
RUBYLIB= BUNDLER_ORIG_RUBYLIB= /Users/vladimirkochnev/.asdf/installs/ruby/3.3.3/bin/ruby -v -S benchmark-driver /Users/vladimirkochnev/code/csv/benchmark/write.yaml
ruby 3.3.3 (2024-06-12 revision f1c7b6f435) [arm64-darwin23]
Calculating -------------------------------------
                          csv 3.3.0      master
   generate_line: fields     29.211      29.755 i/s -     100.000 times in 3.423319s 3.360759s
      generate_line: Row     28.090      28.121 i/s -     100.000 times in 3.560007s 3.556013s
     generate_line: Hash     26.398      26.888 i/s -     100.000 times in 3.788145s 3.719147s
               << fields    130.692     142.421 i/s -     100.000 times in 0.765156s 0.702142s
                  << Row    103.416     107.886 i/s -     100.000 times in 0.966972s 0.926906s
                 << Hash    109.760     114.806 i/s -     100.000 times in 0.911082s 0.871038s
<< fields: write headers    131.147     141.668 i/s -     100.000 times in 0.762501s 0.705878s
   << Row: write headers    102.956     108.919 i/s -     100.000 times in 0.971286s 0.918117s
  << Hash: write headers    109.498     115.403 i/s -     100.000 times in 0.913259s 0.866528s

Comparison:
                generate_line: fields
                  master:        29.8 i/s
               csv 3.3.0:        29.2 i/s - 1.02x  slower

                   generate_line: Row
                  master:        28.1 i/s
               csv 3.3.0:        28.1 i/s - 1.00x  slower

                  generate_line: Hash
                  master:        26.9 i/s
               csv 3.3.0:        26.4 i/s - 1.02x  slower

                            << fields
                  master:       142.4 i/s
               csv 3.3.0:       130.7 i/s - 1.09x  slower

                               << Row
                  master:       107.9 i/s
               csv 3.3.0:       103.4 i/s - 1.04x  slower

                              << Hash
                  master:       114.8 i/s
               csv 3.3.0:       109.8 i/s - 1.05x  slower

             << fields: write headers
                  master:       141.7 i/s
               csv 3.3.0:       131.1 i/s - 1.08x  slower

                << Row: write headers
                  master:       108.9 i/s
               csv 3.3.0:       103.0 i/s - 1.06x  slower

               << Hash: write headers
                  master:       115.4 i/s
               csv 3.3.0:       109.5 i/s - 1.05x  slower

```
```
RUBYLIB= BUNDLER_ORIG_RUBYLIB= /Users/vladimirkochnev/.asdf/installs/ruby/3.3.3/bin/ruby -v -S benchmark-driver /Users/vladimirkochnev/code/csv/benchmark/parse.yaml
ruby 3.3.3 (2024-06-12 revision f1c7b6f435) [arm64-darwin23]
Calculating -------------------------------------
                      csv 3.3.0      master
            unquoted     21.798      22.176 i/s -     100.000 times in 4.587570s 4.509469s
              quoted     11.580      12.896 i/s -     100.000 times in 8.635392s 7.754641s
               mixed     14.082      14.139 i/s -     100.000 times in 7.101360s 7.072725s
     include_col_sep      5.206       5.191 i/s -     100.000 times in 19.209061s 19.265310s
     include_row_sep      5.125       5.179 i/s -     100.000 times in 19.513305s 19.307953s
        encode_utf-8     16.247      16.221 i/s -     100.000 times in 6.154900s 6.165029s
         encode_sjis     16.811      16.442 i/s -     100.000 times in 5.948591s 6.082152s

Comparison:
                         unquoted
              master:        22.2 i/s
           csv 3.3.0:        21.8 i/s - 1.02x  slower

                           quoted
              master:        12.9 i/s
           csv 3.3.0:        11.6 i/s - 1.11x  slower

                            mixed
              master:        14.1 i/s
           csv 3.3.0:        14.1 i/s - 1.00x  slower

                  include_col_sep
           csv 3.3.0:         5.2 i/s
              master:         5.2 i/s - 1.00x  slower

                  include_row_sep
              master:         5.2 i/s
           csv 3.3.0:         5.1 i/s - 1.01x  slower

                     encode_utf-8
           csv 3.3.0:        16.2 i/s
              master:        16.2 i/s - 1.00x  slower

                      encode_sjis
           csv 3.3.0:        16.8 i/s
              master:        16.4 i/s - 1.02x  slower

```
```
RUBYLIB= BUNDLER_ORIG_RUBYLIB= /Users/vladimirkochnev/.asdf/installs/ruby/3.3.3/bin/ruby -v -S benchmark-driver /Users/vladimirkochnev/code/csv/benchmark/parse_liberal_parsing.yaml
ruby 3.3.3 (2024-06-12 revision f1c7b6f435) [arm64-darwin23]
Calculating -------------------------------------
                                   csv 3.3.0      master
                         unquoted      8.135       8.270 i/s -     100.000 times in 12.291808s 12.091261s
         unquoted_backslash_quote      3.865       3.854 i/s -     100.000 times in 25.872675s 25.946134s
                           quoted      3.627       3.598 i/s -     100.000 times in 27.572211s 27.789378s
quoted_double_quote_outside_quote
      2.260       2.216 i/s -     100.000 times in 44.241118s 45.117111s
           quoted_backslash_quote      1.795       1.789 i/s -     100.000 times in 55.721082s 55.903782s
                  include_col_sep      3.622       3.615 i/s -     100.000 times in 27.606966s 27.664617s
                  include_row_sep      3.575       3.611 i/s -     100.000 times in 27.970871s 27.694692s
                     encode_utf-8      8.041       8.175 i/s -     100.000 times in 12.436682s 12.232314s
                      encode_sjis      8.515       8.147 i/s -     100.000 times in 11.744171s 12.274468s

Comparison:
                                      unquoted
                           master:         8.3 i/s
                        csv 3.3.0:         8.1 i/s - 1.02x  slower

                      unquoted_backslash_quote
                        csv 3.3.0:         3.9 i/s
                           master:         3.9 i/s - 1.00x  slower

                                        quoted
                        csv 3.3.0:         3.6 i/s
                           master:         3.6 i/s - 1.01x  slower

             quoted_double_quote_outside_quote
                        csv 3.3.0:         2.3 i/s
                           master:         2.2 i/s - 1.02x  slower

                        quoted_backslash_quote
                        csv 3.3.0:         1.8 i/s
                           master:         1.8 i/s - 1.00x  slower

                               include_col_sep
                        csv 3.3.0:         3.6 i/s
                           master:         3.6 i/s - 1.00x  slower

                               include_row_sep
                           master:         3.6 i/s
                        csv 3.3.0:         3.6 i/s - 1.01x  slower

                                  encode_utf-8
                           master:         8.2 i/s
                        csv 3.3.0:         8.0 i/s - 1.02x  slower

                                   encode_sjis
                        csv 3.3.0:         8.5 i/s
                           master:         8.1 i/s - 1.05x  slower
```
```
RUBYLIB= BUNDLER_ORIG_RUBYLIB= /Users/vladimirkochnev/.asdf/installs/ruby/3.3.3/bin/ruby -v -S benchmark-driver /Users/vladimirkochnev/code/csv/benchmark/parse_quote_char_nil.yaml
ruby 3.3.3 (2024-06-12 revision f1c7b6f435) [arm64-darwin23]
Calculating -------------------------------------
                      csv 3.3.0      master
  without_quote_char     22.806      22.552 i/s -     100.000 times in 4.384858s 4.434139s
      quote_char_nil     32.576      44.911 i/s -     100.000 times in 3.069777s 2.226621s
       col_sep_space     12.182      12.341 i/s -     100.000 times in 8.208668s 8.102909s

Comparison:
               without_quote_char
           csv 3.3.0:        22.8 i/s
              master:        22.6 i/s - 1.01x  slower

                   quote_char_nil
              master:        44.9 i/s
           csv 3.3.0:        32.6 i/s - 1.38x  slower

                    col_sep_space
              master:        12.3 i/s
           csv 3.3.0:        12.2 i/s - 1.01x  slower

```
```
RUBYLIB= BUNDLER_ORIG_RUBYLIB= /Users/vladimirkochnev/.asdf/installs/ruby/3.3.3/bin/ruby -v -S benchmark-driver /Users/vladimirkochnev/code/csv/benchmark/parse_strip.yaml
ruby 3.3.3 (2024-06-12 revision f1c7b6f435) [arm64-darwin23]
Calculating -------------------------------------
                      csv 3.3.0      master
             default     13.025      13.075 i/s -     100.000 times in 7.677346s 7.648185s
      no_quote_strip      8.823       8.866 i/s -     100.000 times in 11.333992s 11.279182s

Comparison:
                          default
              master:        13.1 i/s
           csv 3.3.0:        13.0 i/s - 1.00x  slower

                   no_quote_strip
              master:         8.9 i/s
           csv 3.3.0:         8.8 i/s - 1.00x  slower

```
  • Loading branch information
marshall-lee authored Jul 21, 2024
1 parent e75132e commit bb93c28
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 11 deletions.
9 changes: 8 additions & 1 deletion lib/csv/fields_converter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,13 @@ class CSV
# Note: Don't use this class directly. This is an internal class.
class FieldsConverter
include Enumerable

NO_QUOTED_FIELDS = [] # :nodoc:
def NO_QUOTED_FIELDS.[](_index)
false
end
NO_QUOTED_FIELDS.freeze

#
# A CSV::FieldsConverter is a data structure for storing the
# fields converter properties to be passed as a parameter
Expand Down Expand Up @@ -44,7 +51,7 @@ def empty?
@converters.empty?
end

def convert(fields, headers, lineno, quoted_fields)
def convert(fields, headers, lineno, quoted_fields=NO_QUOTED_FIELDS)
return fields unless need_convert?

fields.collect.with_index do |field, index|
Expand Down
14 changes: 6 additions & 8 deletions lib/csv/parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -757,7 +757,7 @@ def prepare_header
case headers
when Array
@raw_headers = headers
quoted_fields = [false] * @raw_headers.size
quoted_fields = FieldsConverter::NO_QUOTED_FIELDS
@use_headers = true
when String
@raw_headers, quoted_fields = parse_headers(headers)
Expand Down Expand Up @@ -941,11 +941,9 @@ def parse_no_quote(&block)
if line.empty?
next if @skip_blanks
row = []
quoted_fields = []
else
line = strip_value(line)
row = line.split(@split_column_separator, -1)
quoted_fields = [false] * row.size
if @max_field_size
row.each do |column|
validate_field_size(column)
Expand All @@ -959,7 +957,7 @@ def parse_no_quote(&block)
end
end
@last_line = original_line
emit_row(row, quoted_fields, &block)
emit_row(row, &block)
end
end

Expand All @@ -981,7 +979,7 @@ def parse_quotable_loose(&block)
next
end
row = []
quoted_fields = []
quoted_fields = FieldsConverter::NO_QUOTED_FIELDS
elsif line.include?(@cr) or line.include?(@lf)
@scanner.keep_back
@parse_method = :parse_quotable_robust
Expand Down Expand Up @@ -1043,13 +1041,13 @@ def parse_quotable_robust(&block)
quoted_fields << @quoted_column_value
elsif parse_row_end
if row.empty? and value.nil?
emit_row([], [], &block) unless @skip_blanks
emit_row(row, &block) unless @skip_blanks
else
row << value
quoted_fields << @quoted_column_value
emit_row(row, quoted_fields, &block)
row = []
quoted_fields = []
quoted_fields.clear
end
skip_needless_lines
start_row
Expand Down Expand Up @@ -1254,7 +1252,7 @@ def start_row
@scanner.keep_start
end

def emit_row(row, quoted_fields, &block)
def emit_row(row, quoted_fields=FieldsConverter::NO_QUOTED_FIELDS, &block)
@lineno += 1

raw_row = row
Expand Down
3 changes: 1 addition & 2 deletions lib/csv/writer.rb
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,7 @@ def <<(row)
@lineno += 1

if @fields_converter
quoted_fields = [false] * row.size
row = @fields_converter.convert(row, nil, lineno, quoted_fields)
row = @fields_converter.convert(row, nil, lineno)
end

i = -1
Expand Down

0 comments on commit bb93c28

Please sign in to comment.