diff --git a/lib/strip_attributes.rb b/lib/strip_attributes.rb index 287a840..cb1048e 100644 --- a/lib/strip_attributes.rb +++ b/lib/strip_attributes.rb @@ -70,8 +70,9 @@ def self.strip_string(value, options = nil) # U+200D ZERO WIDTH JOINER # U+2060 WORD JOINER # U+FEFF ZERO WIDTH NO-BREAK SPACE - if value.respond_to?(:gsub!) - value.gsub!(/\A[[:space:]\u180E\u200B\u200C\u200D\u2060\uFEFF]+|[[:space:]\u180E\u200B\u200C\u200D\u2060\uFEFF]+\z/, '') + regex = /\A[[:space:]\u180E\u200B\u200C\u200D\u2060\uFEFF]+|[[:space:]\u180E\u200B\u200C\u200D\u2060\uFEFF]+\z/ + if value.respond_to?(:gsub!) && Encoding.compatible?(value, regex) + value.gsub!(regex, '') end elsif value.respond_to?(:strip!) value.strip! diff --git a/test/strip_attributes_test.rb b/test/strip_attributes_test.rb index 751aa3a..427fe7a 100644 --- a/test/strip_attributes_test.rb +++ b/test/strip_attributes_test.rb @@ -267,7 +267,10 @@ def test_should_strip_unicode return if "\u0020" != " " # U200A - HAIR SPACE # U200B - ZERO WIDTH SPACE + # U20AC - EURO SIGN + assert_equal "foo", StripAttributes.strip("\u200A\u200B foo\u200A\u200B ") + assert_equal "foo\u20AC".force_encoding("ASCII-8BIT"), StripAttributes.strip("foo\u20AC ".force_encoding("ASCII-8BIT")) end end end