From 189851d20a1a8f9be1a5cb9539f5f652f862f75c Mon Sep 17 00:00:00 2001 From: Bart de Water <496367+bdewater@users.noreply.github.com> Date: Wed, 21 Aug 2024 02:17:37 -0400 Subject: [PATCH] Add Time converter to CSV::Converters (#313) Ruby recommends working with Time objects, unless you have a good reason to use DateTime: https://ruby-doc.org/stdlib-2.5.0/libdoc/date/rdoc/DateTime.html#class-DateTime-label-When+should+you+use+DateTime+and+when+should+you+use+Time-3F This commit adds the missing converter for the common modern use case. --- doc/csv/recipes/parsing.rdoc | 11 ++- lib/csv.rb | 24 +++++- test/csv/test_data_converters.rb | 142 +++++++++++++++++++++++++++++++ 3 files changed, 174 insertions(+), 3 deletions(-) diff --git a/doc/csv/recipes/parsing.rdoc b/doc/csv/recipes/parsing.rdoc index e7bfc072..63673072 100644 --- a/doc/csv/recipes/parsing.rdoc +++ b/doc/csv/recipes/parsing.rdoc @@ -45,6 +45,7 @@ All code snippets on this page assume that the following has been executed: - {Recipe: Convert Fields to Numerics}[#label-Recipe-3A+Convert+Fields+to+Numerics] - {Recipe: Convert Fields to Dates}[#label-Recipe-3A+Convert+Fields+to+Dates] - {Recipe: Convert Fields to DateTimes}[#label-Recipe-3A+Convert+Fields+to+DateTimes] + - {Recipe: Convert Fields to Times}[#label-Recipe-3A+Convert+Fields+to+Times] - {Recipe: Convert Assorted Fields to Objects}[#label-Recipe-3A+Convert+Assorted+Fields+to+Objects] - {Recipe: Convert Fields to Other Objects}[#label-Recipe-3A+Convert+Fields+to+Other+Objects] - {Recipe: Filter Field Strings}[#label-Recipe-3A+Filter+Field+Strings] @@ -339,6 +340,7 @@ There are built-in field converters for converting to objects of certain classes - \Integer - \Date - \DateTime +- \Time Other built-in field converters include: - +:numeric+: converts to \Integer and \Float. @@ -381,6 +383,13 @@ Convert fields to \DateTime objects using built-in converter +:date_time+: parsed = CSV.parse(source, headers: true, converters: :date_time) parsed.map {|row| row['DateTime'].class} # => [DateTime, DateTime, DateTime] +===== Recipe: Convert Fields to Times + +Convert fields to \Time objects using built-in converter +:time+: + source = "Name,Time\nfoo,2001-02-03\nbar,2001-02-04\nbaz,2020-05-07T14:59:00-05:00\n" + parsed = CSV.parse(source, headers: true, converters: :time) + parsed.map {|row| row['Time'].class} # => [Time, Time, Time] + ===== Recipe: Convert Assorted Fields to Objects Convert assorted fields to objects using built-in converter +:all+: @@ -542,4 +551,4 @@ Output: # # # - # \ No newline at end of file + # diff --git a/lib/csv.rb b/lib/csv.rb index 41667006..b969d73c 100644 --- a/lib/csv.rb +++ b/lib/csv.rb @@ -91,6 +91,7 @@ require "forwardable" require "date" +require "time" require "stringio" require_relative "csv/fields_converter" @@ -521,6 +522,7 @@ # - :float: converts each \String-embedded float into a true \Float. # - :date: converts each \String-embedded date into a true \Date. # - :date_time: converts each \String-embedded date-time into a true \DateTime +# - :time: converts each \String-embedded time into a true \Time # . # This example creates a converter proc, then stores it: # strip_converter = proc {|field| field.strip } @@ -631,6 +633,7 @@ # [:numeric, [:integer, :float]] # [:date, Proc] # [:date_time, Proc] +# [:time, Proc] # [:all, [:date_time, :numeric]] # # Each of these converters transcodes values to UTF-8 before attempting conversion. @@ -675,6 +678,15 @@ # csv = CSV.parse_line(data, converters: :date_time) # csv # => [#, "x"] # +# Converter +time+ converts each field that Time::parse accepts: +# data = '2020-05-07T14:59:00-05:00,x' +# # Without the converter +# csv = CSV.parse_line(data) +# csv # => ["2020-05-07T14:59:00-05:00", "x"] +# # With the converter +# csv = CSV.parse_line(data, converters: :time) +# csv # => [2020-05-07 14:59:00 -0500, "x"] +# # Converter +:numeric+ converts with both +:date_time+ and +:numeric+.. # # As seen above, method #convert adds \converters to a \CSV instance, @@ -871,10 +883,10 @@ def initialize(encoding, line_number) # A Regexp used to find and convert some common Date formats. DateMatcher = / \A(?: (\w+,?\s+)?\w+\s+\d{1,2},?\s+\d{2,4} | \d{4}-\d{2}-\d{2} )\z /x - # A Regexp used to find and convert some common DateTime formats. + # A Regexp used to find and convert some common (Date)Time formats. DateTimeMatcher = / \A(?: (\w+,?\s+)?\w+\s+\d{1,2}\s+\d{1,2}:\d{1,2}:\d{1,2},?\s+\d{2,4} | - # ISO-8601 and RFC-3339 (space instead of T) recognized by DateTime.parse + # ISO-8601 and RFC-3339 (space instead of T) recognized by (Date)Time.parse \d{4}-\d{2}-\d{2} (?:[T\s]\d{2}:\d{2}(?::\d{2}(?:\.\d+)?(?:[+-]\d{2}(?::\d{2})|Z)?)?)? )\z /x @@ -912,6 +924,14 @@ def initialize(encoding, line_number) f end }, + time: lambda { |f| + begin + e = f.encode(ConverterEncoding) + e.match?(DateTimeMatcher) ? Time.parse(e) : f + rescue # encoding conversion or parse errors + f + end + }, all: [:date_time, :numeric], } diff --git a/test/csv/test_data_converters.rb b/test/csv/test_data_converters.rb index c20a5d1f..6c46cd90 100644 --- a/test/csv/test_data_converters.rb +++ b/test/csv/test_data_converters.rb @@ -187,4 +187,146 @@ def test_builtin_date_time_converter_rfc3339_tab_utc assert_equal(datetime, CSV::Converters[:date_time][rfc3339_string]) end + + def test_builtin_time_converter + # does convert + assert_instance_of(Time, + CSV::Converters[:time][@win_safe_time_str]) + + # does not convert + assert_instance_of(String, CSV::Converters[:time]["junk"]) + end + + def test_builtin_time_converter_iso8601_date + iso8601_string = "2018-01-14" + time = Time.new(2018, 1, 14) + assert_equal(time, + CSV::Converters[:time][iso8601_string]) + end + + def test_builtin_time_converter_iso8601_minute + iso8601_string = "2018-01-14T22:25" + time = Time.new(2018, 1, 14, 22, 25) + assert_equal(time, + CSV::Converters[:time][iso8601_string]) + end + + def test_builtin_time_converter_iso8601_second + iso8601_string = "2018-01-14T22:25:19" + time = Time.new(2018, 1, 14, 22, 25, 19) + assert_equal(time, + CSV::Converters[:time][iso8601_string]) + end + + def test_builtin_time_converter_iso8601_under_second + iso8601_string = "2018-01-14T22:25:19.1" + time = Time.new(2018, 1, 14, 22, 25, 19.1r) + assert_equal(time, + CSV::Converters[:time][iso8601_string]) + end + + def test_builtin_time_converter_iso8601_under_second_offset + iso8601_string = "2018-01-14T22:25:19.1+09:00" + time = Time.new(2018, 1, 14, 22, 25, 19.1r, "+09:00") + assert_equal(time, + CSV::Converters[:time][iso8601_string]) + end + + def test_builtin_time_converter_iso8601_offset + iso8601_string = "2018-01-14T22:25:19+09:00" + time = Time.new(2018, 1, 14, 22, 25, 19, "+09:00") + assert_equal(time, + CSV::Converters[:time][iso8601_string]) + end + + def test_builtin_time_converter_iso8601_utc + iso8601_string = "2018-01-14T22:25:19Z" + time = Time.utc(2018, 1, 14, 22, 25, 19) + assert_equal(time, + CSV::Converters[:time][iso8601_string]) + end + + def test_builtin_time_converter_rfc3339_minute + rfc3339_string = "2018-01-14 22:25" + time = Time.new(2018, 1, 14, 22, 25) + assert_equal(time, + CSV::Converters[:time][rfc3339_string]) + end + + def test_builtin_time_converter_rfc3339_second + rfc3339_string = "2018-01-14 22:25:19" + time = Time.new(2018, 1, 14, 22, 25, 19) + assert_equal(time, + CSV::Converters[:time][rfc3339_string]) + end + + def test_builtin_time_converter_rfc3339_under_second + rfc3339_string = "2018-01-14 22:25:19.1" + time = Time.new(2018, 1, 14, 22, 25, 19.1r) + assert_equal(time, + CSV::Converters[:time][rfc3339_string]) + end + + def test_builtin_time_converter_rfc3339_under_second_offset + rfc3339_string = "2018-01-14 22:25:19.1+09:00" + time = Time.new(2018, 1, 14, 22, 25, 19.1r, "+09:00") + assert_equal(time, + CSV::Converters[:time][rfc3339_string]) + end + + def test_builtin_time_converter_rfc3339_offset + rfc3339_string = "2018-01-14 22:25:19+09:00" + time = Time.new(2018, 1, 14, 22, 25, 19, "+09:00") + assert_equal(time, + CSV::Converters[:time][rfc3339_string]) + end + + def test_builtin_time_converter_rfc3339_utc + rfc3339_string = "2018-01-14 22:25:19Z" + time = Time.utc(2018, 1, 14, 22, 25, 19) + assert_equal(time, + CSV::Converters[:time][rfc3339_string]) + end + + def test_builtin_time_converter_rfc3339_tab_minute + rfc3339_string = "2018-01-14\t22:25" + time = Time.new(2018, 1, 14, 22, 25) + assert_equal(time, + CSV::Converters[:time][rfc3339_string]) + end + + def test_builtin_time_converter_rfc3339_tab_second + rfc3339_string = "2018-01-14\t22:25:19" + time = Time.new(2018, 1, 14, 22, 25, 19) + assert_equal(time, + CSV::Converters[:time][rfc3339_string]) + end + + def test_builtin_time_converter_rfc3339_tab_under_second + rfc3339_string = "2018-01-14\t22:25:19.1" + time = Time.new(2018, 1, 14, 22, 25, 19.1r) + assert_equal(time, + CSV::Converters[:time][rfc3339_string]) + end + + def test_builtin_time_converter_rfc3339_tab_under_second_offset + rfc3339_string = "2018-01-14\t22:25:19.1+09:00" + time = Time.new(2018, 1, 14, 22, 25, 19.1r, "+09:00") + assert_equal(time, + CSV::Converters[:time][rfc3339_string]) + end + + def test_builtin_time_converter_rfc3339_tab_offset + rfc3339_string = "2018-01-14\t22:25:19+09:00" + time = Time.new(2018, 1, 14, 22, 25, 19, "+09:00") + assert_equal(time, + CSV::Converters[:time][rfc3339_string]) + end + + def test_builtin_time_converter_rfc3339_tab_utc + rfc3339_string = "2018-01-14\t22:25:19Z" + time = Time.utc(2018, 1, 14, 22, 25, 19) + assert_equal(time, + CSV::Converters[:time][rfc3339_string]) + end end