From ce9119802be7685ec93a68741a5d9da3dbe86122 Mon Sep 17 00:00:00 2001 From: Marcelo Date: Mon, 5 Aug 2024 07:31:33 +0200 Subject: [PATCH] Allow CSV.open with StringIO argument (#302) Fix #300 --- lib/csv.rb | 74 +++++++++++++++++++++++++-------- test/csv/interface/test_read.rb | 18 ++++++++ 2 files changed, 74 insertions(+), 18 deletions(-) diff --git a/lib/csv.rb b/lib/csv.rb index 0cf49eb..4166700 100644 --- a/lib/csv.rb +++ b/lib/csv.rb @@ -1508,10 +1508,8 @@ def generate_lines(rows, **options) # # :call-seq: - # open(file_path, mode = "rb", **options ) -> new_csv - # open(io, mode = "rb", **options ) -> new_csv - # open(file_path, mode = "rb", **options ) { |csv| ... } -> object - # open(io, mode = "rb", **options ) { |csv| ... } -> object + # open(path_or_io, mode = "rb", **options ) -> new_csv + # open(path_or_io, mode = "rb", **options ) { |csv| ... } -> object # # possible options elements: # keyword form: @@ -1520,7 +1518,7 @@ def generate_lines(rows, **options) # :undef => :replace # replace undefined conversion # :replace => string # replacement string ("?" or "\uFFFD" if not specified) # - # * Argument +path+, if given, must be the path to a file. + # * Argument +path_or_io+, must be a file path or an \IO stream. # :include: ../doc/csv/arguments/io.rdoc # * Argument +mode+, if given, must be a \File mode. # See {Access Modes}[https://docs.ruby-lang.org/en/master/File.html#class-File-label-Access+Modes]. @@ -1544,6 +1542,9 @@ def generate_lines(rows, **options) # path = 't.csv' # File.write(path, string) # + # string_io = StringIO.new + # string_io << "foo,0\nbar,1\nbaz,2\n" + # # --- # # With no block given, returns a new \CSV object. @@ -1556,6 +1557,9 @@ def generate_lines(rows, **options) # csv = CSV.open(File.open(path)) # csv # => # # + # Create a \CSV object using a \StringIO: + # csv = CSV.open(string_io) + # csv # => # # --- # # With a block given, calls the block with the created \CSV object; @@ -1573,16 +1577,24 @@ def generate_lines(rows, **options) # Output: # # # + # Using a \StringIO: + # csv = CSV.open(string_io) {|csv| p csv} + # csv # => # + # Output: + # # # --- # # Raises an exception if the argument is not a \String object or \IO object: # # Raises TypeError (no implicit conversion of Symbol into String) # CSV.open(:foo) - def open(filename, mode="r", **options) + def open(filename_or_io, mode="r", **options) # wrap a File opened with the remaining +args+ with no newline # decorator file_opts = {} - may_enable_bom_deletection_automatically(mode, options, file_opts) + may_enable_bom_detection_automatically(filename_or_io, + mode, + options, + file_opts) file_opts.merge!(options) unless file_opts.key?(:newline) file_opts[:universal_newline] ||= false @@ -1592,14 +1604,19 @@ def open(filename, mode="r", **options) options.delete(:replace) options.delete_if {|k, _| /newline\z/.match?(k)} - begin - f = File.open(filename, mode, **file_opts) - rescue ArgumentError => e - raise unless /needs binmode/.match?(e.message) and mode == "r" - mode = "rb" - file_opts = {encoding: Encoding.default_external}.merge(file_opts) - retry + if filename_or_io.is_a?(StringIO) + f = create_stringio(filename_or_io.string, mode, **file_opts) + else + begin + f = File.open(filename_or_io, mode, **file_opts) + rescue ArgumentError => e + raise unless /needs binmode/.match?(e.message) and mode == "r" + mode = "rb" + file_opts = {encoding: Encoding.default_external}.merge(file_opts) + retry + end end + begin csv = new(f, **options) rescue Exception @@ -1886,16 +1903,37 @@ def table(path, **options) private_constant :ON_WINDOWS private - def may_enable_bom_deletection_automatically(mode, options, file_opts) - # "bom|utf-8" may be buggy on Windows: - # https://bugs.ruby-lang.org/issues/20526 - return if ON_WINDOWS + def may_enable_bom_detection_automatically(filename_or_io, + mode, + options, + file_opts) + if filename_or_io.is_a?(StringIO) + # Support to StringIO was dropped for Ruby 2.6 and earlier without BOM support: + # https://github.com/ruby/stringio/pull/47 + return if RUBY_VERSION < "2.7" + else + # "bom|utf-8" may be buggy on Windows: + # https://bugs.ruby-lang.org/issues/20526 + return if ON_WINDOWS + end return unless Encoding.default_external == Encoding::UTF_8 return if options.key?(:encoding) return if options.key?(:external_encoding) return if mode.include?(":") file_opts[:encoding] = "bom|utf-8" end + + if RUBY_VERSION < "2.7" + def create_stringio(str, mode, opts) + opts.delete_if {|k, _| k == :universal_newline or DEFAULT_OPTIONS.key?(k)} + raise ArgumentError, "Unsupported options parsing StringIO: #{opts.keys}" unless opts.empty? + StringIO.new(str, mode) + end + else + def create_stringio(str, mode, opts) + StringIO.new(str, mode, **opts) + end + end end # :call-seq: diff --git a/test/csv/interface/test_read.rb b/test/csv/interface/test_read.rb index b0e1e24..7537c76 100644 --- a/test/csv/interface/test_read.rb +++ b/test/csv/interface/test_read.rb @@ -32,6 +32,24 @@ def test_foreach assert_equal(@rows, rows) end + def test_foreach_stringio + string_io = StringIO.new(@data) + rows = CSV.foreach(string_io, col_sep: "\t", row_sep: "\r\n").to_a + assert_equal(@rows, rows) + end + + def test_foreach_stringio_with_bom + if RUBY_VERSION < "2.7" + # Support to StringIO was dropped for Ruby 2.6 and earlier without BOM support: + # https://github.com/ruby/stringio/pull/47 + omit("StringIO's BOM support isn't available with Ruby < 2.7") + end + + string_io = StringIO.new("\ufeff#{@data}") # U+FEFF ZERO WIDTH NO-BREAK SPACE + rows = CSV.foreach(string_io, col_sep: "\t", row_sep: "\r\n").to_a + assert_equal(@rows, rows) + end + if respond_to?(:ractor) ractor def test_foreach_in_ractor