diff --git a/email_reply_parser.gemspec b/email_reply_parser.gemspec index 2154fae..e94f5a4 100644 --- a/email_reply_parser.gemspec +++ b/email_reply_parser.gemspec @@ -63,6 +63,7 @@ Gem::Specification.new do |s| test/emails/email_sig_delimiter_in_middle_of_line.txt test/emails/greedy_on.txt test/emails/pathological.txt + test/emails/email_with_kind_regards.txt ] # = MANIFEST = diff --git a/lib/email_reply_parser.rb b/lib/email_reply_parser.rb index e81cbcb..d3cd259 100644 --- a/lib/email_reply_parser.rb +++ b/lib/email_reply_parser.rb @@ -32,6 +32,26 @@ class EmailReplyParser VERSION = "0.5.9" + class << self + attr_writer :configuration + + # Public: Configuration + # + # Returns a Configration instance . + # + def configuration + @configuration ||= Configuration.new + end + + # Public: Configures EmailReplyParser + # + # block - a default configuration instance is exposed in the block + # + def configure + yield(configuration) + end + end + # Public: Splits an email body into a list of Fragments. # # text - A String email body. @@ -50,6 +70,18 @@ def self.parse_reply(text) self.read(text).visible_text end + ### Configuration + + # A Configuration instance. + class Configuration + # Configuration has an Array of regards + attr_accessor :regards + + def initialize + @regards = [] + end + end + ### Emails # An Email instance represents a parsed body String. @@ -141,6 +173,24 @@ def read(text) SIG_REGEX = Regexp.new(SIGNATURE) end + # Regular expression for regards + # + # Returns a Regexp instance if regards are configured, otherwise it returns + # nil + def regards_regex + return nil if EmailReplyParser.configuration.regards.empty? + value = EmailReplyParser.configuration.regards.map do |regard| + "(#{regard.reverse}$)" + end.join('|') + + begin + require 're2' + RE2::Regexp.new(value, case_sensitive: false) + rescue LoadError + Regexp.new(value, ignore_case: true) + end + end + ### Line-by-Line Parsing # Scans the given line of text and figures out which fragment it belongs @@ -166,6 +216,16 @@ def scan_line(line) end end + # Mark the current Fragment as a regards if regards are configured and + # the current line is empty and the Fragment starts with a common regards + # indicator. + if regards_regex && @fragment && line == EMPTY + if regards_regex.match @fragment.lines.last + @fragment.regards = true + finish_fragment + end + end + # If the line matches the current fragment, add it. Note that a common # reply header also counts as part of the quoted Fragment, even though # it doesn't start with `>`. @@ -217,7 +277,7 @@ def finish_fragment if @fragment @fragment.finish if !@found_visible - if @fragment.quoted? || @fragment.signature? || + if @fragment.quoted? || @fragment.signature? || @fragment.regards? || @fragment.to_s.strip == EMPTY @fragment.hidden = true else @@ -235,7 +295,7 @@ def finish_fragment # Represents a group of paragraphs in the email sharing common attributes. # Paragraphs should get their own fragment if they are a quoted area or a # signature. - class Fragment < Struct.new(:quoted, :signature, :hidden) + class Fragment < Struct.new(:quoted, :signature, :hidden, :regards) # This is an Array of String lines of content. Since the content is # reversed, this array is backwards, and contains reversed strings. attr_reader :lines, @@ -245,7 +305,7 @@ class Fragment < Struct.new(:quoted, :signature, :hidden) :content def initialize(quoted, first_line) - self.signature = self.hidden = false + self.signature = self.hidden = self.regards = false self.quoted = quoted @lines = [first_line] @content = nil @@ -255,6 +315,7 @@ def initialize(quoted, first_line) alias quoted? quoted alias signature? signature alias hidden? hidden + alias regards? regards # Builds the string content by joining the lines and reversing them. # diff --git a/test/email_reply_parser_test.rb b/test/email_reply_parser_test.rb index df509cb..de1f38a 100644 --- a/test/email_reply_parser_test.rb +++ b/test/email_reply_parser_test.rb @@ -9,6 +9,14 @@ EMAIL_FIXTURE_PATH = dir + 'emails' class EmailReplyParserTest < Test::Unit::TestCase + def test_regards_configuration + EmailReplyParser.configure do |config| + config.regards = ['best regards'] + end + + assert_equal ['best regards'], EmailReplyParser.configuration.regards + end + def test_encoding_should_be_maintained body = IO.read EMAIL_FIXTURE_PATH.join("email_1_1.txt").to_s EmailReplyParser.read body @@ -222,6 +230,15 @@ def test_doesnt_remove_signature_delimiter_in_mid_line assert_equal 1, reply.fragments.size end + def test_kind_regards_signature + EmailReplyParser.configure do |config| + config.regards = ['Kind regards'] + end + reply = email('email_with_kind_regards') + assert_match(/Thats a great idea/, reply.fragments[0].to_s) + assert_equal [false, true], reply.fragments.map { |f| f.regards? } + end + def email(name) body = IO.read EMAIL_FIXTURE_PATH.join("#{name}.txt").to_s EmailReplyParser.read body diff --git a/test/emails/email_with_kind_regards.txt b/test/emails/email_with_kind_regards.txt new file mode 100644 index 0000000..870761c --- /dev/null +++ b/test/emails/email_with_kind_regards.txt @@ -0,0 +1,9 @@ +Hey, + +Thats a great idea! + + +Kind regards + +Tim Tommy +CEO