From 6eb4c44a15eb1707dde60959e85e5c536ef136e2 Mon Sep 17 00:00:00 2001 From: Mikel Lindsaar Date: Tue, 26 Apr 2011 17:40:02 +1000 Subject: [PATCH] Pulling Active Support multibyte requirements over for Ruby 1.8.7 --- Gemfile | 1 - lib/mail.rb | 16 +- lib/mail/core_extensions/string/access.rb | 98 ++++ lib/mail/core_extensions/string/multibyte.rb | 72 +++ lib/mail/indifferent_hash.rb | 136 ++++- lib/mail/multibyte.rb | 42 ++ lib/mail/multibyte/chars.rb | 474 ++++++++++++++++++ lib/mail/multibyte/exceptions.rb | 8 + lib/mail/multibyte/unicode.rb | 392 +++++++++++++++ lib/mail/multibyte/utils.rb | 60 +++ lib/mail/version_specific/ruby_1_8.rb | 30 +- lib/mail/version_specific/ruby_1_9.rb | 9 - .../mail/fields/common/parameter_hash_spec.rb | 4 +- .../fields/content_disposition_field_spec.rb | 4 +- spec/mail/fields/content_type_field_spec.rb | 60 +-- spec/mail/message_spec.rb | 7 +- spec/mail/mime_messages_spec.rb | 4 +- 17 files changed, 1336 insertions(+), 81 deletions(-) create mode 100644 lib/mail/core_extensions/string/access.rb create mode 100644 lib/mail/core_extensions/string/multibyte.rb create mode 100644 lib/mail/multibyte.rb create mode 100644 lib/mail/multibyte/chars.rb create mode 100644 lib/mail/multibyte/exceptions.rb create mode 100644 lib/mail/multibyte/unicode.rb create mode 100644 lib/mail/multibyte/utils.rb diff --git a/Gemfile b/Gemfile index fdad09e19..c195c2721 100644 --- a/Gemfile +++ b/Gemfile @@ -1,6 +1,5 @@ source :rubygems -gem "activesupport", ">= 2.3.6" gem "tlsmail" if RUBY_VERSION <= '1.8.6' gem "mime-types", "~> 1.16" gem "treetop", "~> 1.4.8" diff --git a/lib/mail.rb b/lib/mail.rb index 90e6697e9..52b994d82 100644 --- a/lib/mail.rb +++ b/lib/mail.rb @@ -26,13 +26,19 @@ module Mail # :doc: require 'mail/version' - require 'mail/core_extensions/nil' - require 'mail/core_extensions/string' + # Only load our extensions if AS is not already loaded + unless defined?(ActiveSupport) + require 'mail/core_extensions/nil' + require 'mail/core_extensions/string' + require 'mail/core_extensions/string/access' + require 'mail/core_extensions/string/multibyte' + require 'mail/core_extensions/object' + require 'mail/multibyte' + require 'mail/indifferent_hash' + end + require 'mail/core_extensions/shellwords' unless String.new.respond_to?(:shellescape) require 'mail/core_extensions/smtp' if RUBY_VERSION < '1.9.3' - require 'mail/core_extensions/object' - - require 'mail/indifferent_hash' require 'mail/patterns' require 'mail/utilities' diff --git a/lib/mail/core_extensions/string/access.rb b/lib/mail/core_extensions/string/access.rb new file mode 100644 index 000000000..8a08acaa0 --- /dev/null +++ b/lib/mail/core_extensions/string/access.rb @@ -0,0 +1,98 @@ + +class String + unless '1.9'.respond_to?(:force_encoding) + # Returns the character at the +position+ treating the string as an array (where 0 is the first character). + # + # Examples: + # "hello".at(0) # => "h" + # "hello".at(4) # => "o" + # "hello".at(10) # => ERROR if < 1.9, nil in 1.9 + def at(position) + mb_chars[position, 1].to_s + end + + # Returns the remaining of the string from the +position+ treating the string as an array (where 0 is the first character). + # + # Examples: + # "hello".from(0) # => "hello" + # "hello".from(2) # => "llo" + # "hello".from(10) # => "" if < 1.9, nil in 1.9 + def from(position) + mb_chars[position..-1].to_s + end + + # Returns the beginning of the string up to the +position+ treating the string as an array (where 0 is the first character). + # + # Examples: + # "hello".to(0) # => "h" + # "hello".to(2) # => "hel" + # "hello".to(10) # => "hello" + def to(position) + mb_chars[0..position].to_s + end + + # Returns the first character of the string or the first +limit+ characters. + # + # Examples: + # "hello".first # => "h" + # "hello".first(2) # => "he" + # "hello".first(10) # => "hello" + def first(limit = 1) + if limit == 0 + '' + elsif limit >= size + self + else + mb_chars[0...limit].to_s + end + end + + # Returns the last character of the string or the last +limit+ characters. + # + # Examples: + # "hello".last # => "o" + # "hello".last(2) # => "lo" + # "hello".last(10) # => "hello" + def last(limit = 1) + if limit == 0 + '' + elsif limit >= size + self + else + mb_chars[(-limit)..-1].to_s + end + end + else + def at(position) + self[position] + end + + def from(position) + self[position..-1] + end + + def to(position) + self[0..position] + end + + def first(limit = 1) + if limit == 0 + '' + elsif limit >= size + self + else + to(limit - 1) + end + end + + def last(limit = 1) + if limit == 0 + '' + elsif limit >= size + self + else + from(-limit) + end + end + end +end diff --git a/lib/mail/core_extensions/string/multibyte.rb b/lib/mail/core_extensions/string/multibyte.rb new file mode 100644 index 000000000..34d372ec7 --- /dev/null +++ b/lib/mail/core_extensions/string/multibyte.rb @@ -0,0 +1,72 @@ +# encoding: utf-8 +require 'mail/multibyte' + +class String + if RUBY_VERSION >= "1.9" + # == Multibyte proxy + # + # +mb_chars+ is a multibyte safe proxy for string methods. + # + # In Ruby 1.8 and older it creates and returns an instance of the Mail::Multibyte::Chars class which + # encapsulates the original string. A Unicode safe version of all the String methods are defined on this proxy + # class. If the proxy class doesn't respond to a certain method, it's forwarded to the encapsuled string. + # + # name = 'Claus Müller' + # name.reverse # => "rell??M sualC" + # name.length # => 13 + # + # name.mb_chars.reverse.to_s # => "rellüM sualC" + # name.mb_chars.length # => 12 + # + # In Ruby 1.9 and newer +mb_chars+ returns +self+ because String is (mostly) encoding aware. This means that + # it becomes easy to run one version of your code on multiple Ruby versions. + # + # == Method chaining + # + # All the methods on the Chars proxy which normally return a string will return a Chars object. This allows + # method chaining on the result of any of these methods. + # + # name.mb_chars.reverse.length # => 12 + # + # == Interoperability and configuration + # + # The Chars object tries to be as interchangeable with String objects as possible: sorting and comparing between + # String and Char work like expected. The bang! methods change the internal string representation in the Chars + # object. Interoperability problems can be resolved easily with a +to_s+ call. + # + # For more information about the methods defined on the Chars proxy see Mail::Multibyte::Chars. For + # information about how to change the default Multibyte behaviour see Mail::Multibyte. + def mb_chars + if Mail::Multibyte.proxy_class.consumes?(self) + Mail::Multibyte.proxy_class.new(self) + else + self + end + end + + def is_utf8? #:nodoc + case encoding + when Encoding::UTF_8 + valid_encoding? + when Encoding::ASCII_8BIT, Encoding::US_ASCII + dup.force_encoding(Encoding::UTF_8).valid_encoding? + else + false + end + end + else + def mb_chars + if Mail::Multibyte.proxy_class.wants?(self) + Mail::Multibyte.proxy_class.new(self) + else + self + end + end + + # Returns true if the string has UTF-8 semantics (a String used for purely byte resources is unlikely to have + # them), returns false otherwise. + def is_utf8? + Mail::Multibyte::Chars.consumes?(self) + end + end +end diff --git a/lib/mail/indifferent_hash.rb b/lib/mail/indifferent_hash.rb index 58703ecd1..7a5e13b6d 100644 --- a/lib/mail/indifferent_hash.rb +++ b/lib/mail/indifferent_hash.rb @@ -1,26 +1,142 @@ module Mail # Sort of like ActiveSupport HashWithIndifferentAccess, but lighter class IndifferentHash < Hash - def initialize(other=nil) - if other.is_a?(Hash) - self.default = other.default - self.update(other) + + def initialize(constructor = {}) + if constructor.is_a?(Hash) + super() + update(constructor) + else + super(constructor) + end + end + + def default(key = nil) + if key.is_a?(Symbol) && include?(key = key.to_s) + self[key] else super end end - def [](key_name) - super(key_name.to_sym) + def self.new_from_hash_copying_default(hash) + IndifferentHash.new(hash).tap do |new_hash| + new_hash.default = hash.default + end end - def []=(k, v) - super(k.to_sym, v) + alias_method :regular_writer, :[]= unless method_defined?(:regular_writer) + alias_method :regular_update, :update unless method_defined?(:regular_update) + + # Assigns a new value to the hash: + # + # hash = HashWithIndifferentAccess.new + # hash[:key] = "value" + # + def []=(key, value) + regular_writer(convert_key(key), convert_value(value)) end + alias_method :store, :[]= + + # Updates the instantized hash with values from the second: + # + # hash_1 = HashWithIndifferentAccess.new + # hash_1[:key] = "value" + # + # hash_2 = HashWithIndifferentAccess.new + # hash_2[:key] = "New Value!" + # + # hash_1.update(hash_2) # => {"key"=>"New Value!"} + # def update(other_hash) - super(other_hash.inject({}) {|c, (k, v)| c[k.to_sym] = v; c}) + other_hash.each_pair { |key, value| regular_writer(convert_key(key), convert_value(value)) } + self end - alias merge! update + + alias_method :merge!, :update + + # Checks the hash for a key matching the argument passed in: + # + # hash = HashWithIndifferentAccess.new + # hash["key"] = "value" + # hash.key? :key # => true + # hash.key? "key" # => true + # + def key?(key) + super(convert_key(key)) + end + + alias_method :include?, :key? + alias_method :has_key?, :key? + alias_method :member?, :key? + + # Fetches the value for the specified key, same as doing hash[key] + def fetch(key, *extras) + super(convert_key(key), *extras) + end + + # Returns an array of the values at the specified indices: + # + # hash = HashWithIndifferentAccess.new + # hash[:a] = "x" + # hash[:b] = "y" + # hash.values_at("a", "b") # => ["x", "y"] + # + def values_at(*indices) + indices.collect {|key| self[convert_key(key)]} + end + + # Returns an exact copy of the hash. + def dup + IndifferentHash.new(self) + end + + # Merges the instantized and the specified hashes together, giving precedence to the values from the second hash + # Does not overwrite the existing hash. + def merge(hash) + self.dup.update(hash) + end + + # Performs the opposite of merge, with the keys and values from the first hash taking precedence over the second. + # This overloaded definition prevents returning a regular hash, if reverse_merge is called on a HashWithDifferentAccess. + def reverse_merge(other_hash) + super self.class.new_from_hash_copying_default(other_hash) + end + + def reverse_merge!(other_hash) + replace(reverse_merge( other_hash )) + end + + # Removes a specified key from the hash. + def delete(key) + super(convert_key(key)) + end + + def stringify_keys!; self end + def stringify_keys; dup end + def symbolize_keys; to_hash.symbolize_keys end + def to_options!; self end + + def to_hash + Hash.new(default).merge!(self) + end + + protected + + def convert_key(key) + key.kind_of?(Symbol) ? key.to_s : key + end + + def convert_value(value) + if value.class == Hash + self.class.new_from_hash_copying_default(value) + elsif value.is_a?(Array) + value.dup.replace(value.map { |e| convert_value(e) }) + else + value + end + end + end end diff --git a/lib/mail/multibyte.rb b/lib/mail/multibyte.rb new file mode 100644 index 000000000..a7dde4824 --- /dev/null +++ b/lib/mail/multibyte.rb @@ -0,0 +1,42 @@ +# encoding: utf-8 +module Mail #:nodoc: + module Multibyte + require 'mail/multibyte/exceptions' + require 'mail/multibyte/chars' + require 'mail/multibyte/unicode' + + # The proxy class returned when calling mb_chars. You can use this accessor to configure your own proxy + # class so you can support other encodings. See the Mail::Multibyte::Chars implementation for + # an example how to do this. + # + # Example: + # Mail::Multibyte.proxy_class = CharsForUTF32 + def self.proxy_class=(klass) + @proxy_class = klass + end + + # Returns the current proxy class + def self.proxy_class + @proxy_class ||= Mail::Multibyte::Chars + end + + # Regular expressions that describe valid byte sequences for a character + VALID_CHARACTER = { + # Borrowed from the Kconv library by Shinji KONO - (also as seen on the W3C site) + 'UTF-8' => /\A(?: + [\x00-\x7f] | + [\xc2-\xdf] [\x80-\xbf] | + \xe0 [\xa0-\xbf] [\x80-\xbf] | + [\xe1-\xef] [\x80-\xbf] [\x80-\xbf] | + \xf0 [\x90-\xbf] [\x80-\xbf] [\x80-\xbf] | + [\xf1-\xf3] [\x80-\xbf] [\x80-\xbf] [\x80-\xbf] | + \xf4 [\x80-\x8f] [\x80-\xbf] [\x80-\xbf])\z /xn, + # Quick check for valid Shift-JIS characters, disregards the odd-even pairing + 'Shift_JIS' => /\A(?: + [\x00-\x7e\xa1-\xdf] | + [\x81-\x9f\xe0-\xef] [\x40-\x7e\x80-\x9e\x9f-\xfc])\z /xn + } + end +end + +require 'mail/multibyte/utils' \ No newline at end of file diff --git a/lib/mail/multibyte/chars.rb b/lib/mail/multibyte/chars.rb new file mode 100644 index 000000000..c6d37fa77 --- /dev/null +++ b/lib/mail/multibyte/chars.rb @@ -0,0 +1,474 @@ +# encoding: utf-8 + +module Mail #:nodoc: + module Multibyte #:nodoc: + # Chars enables you to work transparently with UTF-8 encoding in the Ruby String class without having extensive + # knowledge about the encoding. A Chars object accepts a string upon initialization and proxies String methods in an + # encoding safe manner. All the normal String methods are also implemented on the proxy. + # + # String methods are proxied through the Chars object, and can be accessed through the +mb_chars+ method. Methods + # which would normally return a String object now return a Chars object so methods can be chained. + # + # "The Perfect String ".mb_chars.downcase.strip.normalize # => "the perfect string" + # + # Chars objects are perfectly interchangeable with String objects as long as no explicit class checks are made. + # If certain methods do explicitly check the class, call +to_s+ before you pass chars objects to them. + # + # bad.explicit_checking_method "T".mb_chars.downcase.to_s + # + # The default Chars implementation assumes that the encoding of the string is UTF-8, if you want to handle different + # encodings you can write your own multibyte string handler and configure it through + # Mail::Multibyte.proxy_class. + # + # class CharsForUTF32 + # def size + # @wrapped_string.size / 4 + # end + # + # def self.accepts?(string) + # string.length % 4 == 0 + # end + # end + # + # Mail::Multibyte.proxy_class = CharsForUTF32 + class Chars + attr_reader :wrapped_string + alias to_s wrapped_string + alias to_str wrapped_string + + if RUBY_VERSION >= "1.9" + # Creates a new Chars instance by wrapping _string_. + def initialize(string) + @wrapped_string = string + @wrapped_string.force_encoding(Encoding::UTF_8) unless @wrapped_string.frozen? + end + else + def initialize(string) #:nodoc: + @wrapped_string = string + end + end + + # Forward all undefined methods to the wrapped string. + def method_missing(method, *args, &block) + if method.to_s =~ /!$/ + @wrapped_string.__send__(method, *args, &block) + self + else + result = @wrapped_string.__send__(method, *args, &block) + result.kind_of?(String) ? chars(result) : result + end + end + + # Returns +true+ if _obj_ responds to the given method. Private methods are included in the search + # only if the optional second parameter evaluates to +true+. + def respond_to?(method, include_private=false) + super || @wrapped_string.respond_to?(method, include_private) || false + end + + # Enable more predictable duck-typing on String-like classes. See Object#acts_like?. + def acts_like_string? + true + end + + # Returns +true+ when the proxy class can handle the string. Returns +false+ otherwise. + def self.consumes?(string) + # Unpack is a little bit faster than regular expressions. + string.unpack('U*') + true + rescue ArgumentError + false + end + + include Comparable + + # Returns -1, 0, or 1, depending on whether the Chars object is to be sorted before, + # equal or after the object on the right side of the operation. It accepts any object + # that implements +to_s+: + # + # 'é'.mb_chars <=> 'ü'.mb_chars # => -1 + # + # See String#<=> for more details. + def <=>(other) + @wrapped_string <=> other.to_s + end + + if RUBY_VERSION < "1.9" + # Returns +true+ if the Chars class can and should act as a proxy for the string _string_. Returns + # +false+ otherwise. + def self.wants?(string) + $KCODE == 'UTF8' && consumes?(string) + end + + # Returns a new Chars object containing the _other_ object concatenated to the string. + # + # Example: + # ('Café'.mb_chars + ' périferôl').to_s # => "Café périferôl" + def +(other) + chars(@wrapped_string + other) + end + + # Like String#=~ only it returns the character offset (in codepoints) instead of the byte offset. + # + # Example: + # 'Café périferôl'.mb_chars =~ /ô/ # => 12 + def =~(other) + translate_offset(@wrapped_string =~ other) + end + + # Inserts the passed string at specified codepoint offsets. + # + # Example: + # 'Café'.mb_chars.insert(4, ' périferôl').to_s # => "Café périferôl" + def insert(offset, fragment) + unpacked = Unicode.u_unpack(@wrapped_string) + unless offset > unpacked.length + @wrapped_string.replace( + Unicode.u_unpack(@wrapped_string).insert(offset, *Unicode.u_unpack(fragment)).pack('U*') + ) + else + raise IndexError, "index #{offset} out of string" + end + self + end + + # Returns +true+ if contained string contains _other_. Returns +false+ otherwise. + # + # Example: + # 'Café'.mb_chars.include?('é') # => true + def include?(other) + # We have to redefine this method because Enumerable defines it. + @wrapped_string.include?(other) + end + + # Returns the position _needle_ in the string, counting in codepoints. Returns +nil+ if _needle_ isn't found. + # + # Example: + # 'Café périferôl'.mb_chars.index('ô') # => 12 + # 'Café périferôl'.mb_chars.index(/\w/u) # => 0 + def index(needle, offset=0) + wrapped_offset = first(offset).wrapped_string.length + index = @wrapped_string.index(needle, wrapped_offset) + index ? (Unicode.u_unpack(@wrapped_string.slice(0...index)).size) : nil + end + + # Returns the position _needle_ in the string, counting in + # codepoints, searching backward from _offset_ or the end of the + # string. Returns +nil+ if _needle_ isn't found. + # + # Example: + # 'Café périferôl'.mb_chars.rindex('é') # => 6 + # 'Café périferôl'.mb_chars.rindex(/\w/u) # => 13 + def rindex(needle, offset=nil) + offset ||= length + wrapped_offset = first(offset).wrapped_string.length + index = @wrapped_string.rindex(needle, wrapped_offset) + index ? (Unicode.u_unpack(@wrapped_string.slice(0...index)).size) : nil + end + + # Returns the number of codepoints in the string + def size + Unicode.u_unpack(@wrapped_string).size + end + alias_method :length, :size + + # Strips entire range of Unicode whitespace from the right of the string. + def rstrip + chars(@wrapped_string.gsub(Unicode::TRAILERS_PAT, '')) + end + + # Strips entire range of Unicode whitespace from the left of the string. + def lstrip + chars(@wrapped_string.gsub(Unicode::LEADERS_PAT, '')) + end + + # Strips entire range of Unicode whitespace from the right and left of the string. + def strip + rstrip.lstrip + end + + # Returns the codepoint of the first character in the string. + # + # Example: + # 'こんにちは'.mb_chars.ord # => 12371 + def ord + Unicode.u_unpack(@wrapped_string)[0] + end + + # Works just like String#rjust, only integer specifies characters instead of bytes. + # + # Example: + # + # "¾ cup".mb_chars.rjust(8).to_s + # # => " ¾ cup" + # + # "¾ cup".mb_chars.rjust(8, " ").to_s # Use non-breaking whitespace + # # => "   ¾ cup" + def rjust(integer, padstr=' ') + justify(integer, :right, padstr) + end + + # Works just like String#ljust, only integer specifies characters instead of bytes. + # + # Example: + # + # "¾ cup".mb_chars.rjust(8).to_s + # # => "¾ cup " + # + # "¾ cup".mb_chars.rjust(8, " ").to_s # Use non-breaking whitespace + # # => "¾ cup   " + def ljust(integer, padstr=' ') + justify(integer, :left, padstr) + end + + # Works just like String#center, only integer specifies characters instead of bytes. + # + # Example: + # + # "¾ cup".mb_chars.center(8).to_s + # # => " ¾ cup " + # + # "¾ cup".mb_chars.center(8, " ").to_s # Use non-breaking whitespace + # # => " ¾ cup  " + def center(integer, padstr=' ') + justify(integer, :center, padstr) + end + + else + def =~(other) + @wrapped_string =~ other + end + end + + # Works just like String#split, with the exception that the items in the resulting list are Chars + # instances instead of String. This makes chaining methods easier. + # + # Example: + # 'Café périferôl'.mb_chars.split(/é/).map { |part| part.upcase.to_s } # => ["CAF", " P", "RIFERÔL"] + def split(*args) + @wrapped_string.split(*args).map { |i| i.mb_chars } + end + + # Like String#[]=, except instead of byte offsets you specify character offsets. + # + # Example: + # + # s = "Müller" + # s.mb_chars[2] = "e" # Replace character with offset 2 + # s + # # => "Müeler" + # + # s = "Müller" + # s.mb_chars[1, 2] = "ö" # Replace 2 characters at character offset 1 + # s + # # => "Möler" + def []=(*args) + replace_by = args.pop + # Indexed replace with regular expressions already works + if args.first.is_a?(Regexp) + @wrapped_string[*args] = replace_by + else + result = Unicode.u_unpack(@wrapped_string) + if args[0].is_a?(Fixnum) + raise IndexError, "index #{args[0]} out of string" if args[0] >= result.length + min = args[0] + max = args[1].nil? ? min : (min + args[1] - 1) + range = Range.new(min, max) + replace_by = [replace_by].pack('U') if replace_by.is_a?(Fixnum) + elsif args.first.is_a?(Range) + raise RangeError, "#{args[0]} out of range" if args[0].min >= result.length + range = args[0] + else + needle = args[0].to_s + min = index(needle) + max = min + Unicode.u_unpack(needle).length - 1 + range = Range.new(min, max) + end + result[range] = Unicode.u_unpack(replace_by) + @wrapped_string.replace(result.pack('U*')) + end + end + + # Reverses all characters in the string. + # + # Example: + # 'Café'.mb_chars.reverse.to_s # => 'éfaC' + def reverse + chars(Unicode.g_unpack(@wrapped_string).reverse.flatten.pack('U*')) + end + + # Implements Unicode-aware slice with codepoints. Slicing on one point returns the codepoints for that + # character. + # + # Example: + # 'こんにちは'.mb_chars.slice(2..3).to_s # => "にち" + def slice(*args) + if args.size > 2 + raise ArgumentError, "wrong number of arguments (#{args.size} for 1)" # Do as if we were native + elsif (args.size == 2 && !(args.first.is_a?(Numeric) || args.first.is_a?(Regexp))) + raise TypeError, "cannot convert #{args.first.class} into Integer" # Do as if we were native + elsif (args.size == 2 && !args[1].is_a?(Numeric)) + raise TypeError, "cannot convert #{args[1].class} into Integer" # Do as if we were native + elsif args[0].kind_of? Range + cps = Unicode.u_unpack(@wrapped_string).slice(*args) + result = cps.nil? ? nil : cps.pack('U*') + elsif args[0].kind_of? Regexp + result = @wrapped_string.slice(*args) + elsif args.size == 1 && args[0].kind_of?(Numeric) + character = Unicode.u_unpack(@wrapped_string)[args[0]] + result = character && [character].pack('U') + else + cps = Unicode.u_unpack(@wrapped_string).slice(*args) + result = cps && cps.pack('U*') + end + result && chars(result) + end + alias_method :[], :slice + + # Limit the byte size of the string to a number of bytes without breaking characters. Usable + # when the storage for a string is limited for some reason. + # + # Example: + # s = 'こんにちは' + # s.mb_chars.limit(7) # => "こに" + def limit(limit) + slice(0...translate_offset(limit)) + end + + # Convert characters in the string to uppercase. + # + # Example: + # 'Laurent, où sont les tests ?'.mb_chars.upcase.to_s # => "LAURENT, OÙ SONT LES TESTS ?" + def upcase + chars(Unicode.apply_mapping @wrapped_string, :uppercase_mapping) + end + + # Convert characters in the string to lowercase. + # + # Example: + # 'VĚDA A VÝZKUM'.mb_chars.downcase.to_s # => "věda a výzkum" + def downcase + chars(Unicode.apply_mapping @wrapped_string, :lowercase_mapping) + end + + # Converts the first character to uppercase and the remainder to lowercase. + # + # Example: + # 'über'.mb_chars.capitalize.to_s # => "Über" + def capitalize + (slice(0) || chars('')).upcase + (slice(1..-1) || chars('')).downcase + end + + # Capitalizes the first letter of every word, when possible. + # + # Example: + # "ÉL QUE SE ENTERÓ".mb_chars.titleize # => "Él Que Se Enteró" + # "日本語".mb_chars.titleize # => "日本語" + def titleize + chars(downcase.to_s.gsub(/\b('?[\S])/u) { Unicode.apply_mapping $1, :uppercase_mapping }) + end + alias_method :titlecase, :titleize + + # Returns the KC normalization of the string by default. NFKC is considered the best normalization form for + # passing strings to databases and validations. + # + # * form - The form you want to normalize in. Should be one of the following: + # :c, :kc, :d, or :kd. Default is + # Mail::Multibyte::Unicode.default_normalization_form + def normalize(form = nil) + chars(Unicode.normalize(@wrapped_string, form)) + end + + # Performs canonical decomposition on all the characters. + # + # Example: + # 'é'.length # => 2 + # 'é'.mb_chars.decompose.to_s.length # => 3 + def decompose + chars(Unicode.decompose_codepoints(:canonical, Unicode.u_unpack(@wrapped_string)).pack('U*')) + end + + # Performs composition on all the characters. + # + # Example: + # 'é'.length # => 3 + # 'é'.mb_chars.compose.to_s.length # => 2 + def compose + chars(Unicode.compose_codepoints(Unicode.u_unpack(@wrapped_string)).pack('U*')) + end + + # Returns the number of grapheme clusters in the string. + # + # Example: + # 'क्षि'.mb_chars.length # => 4 + # 'क्षि'.mb_chars.g_length # => 3 + def g_length + Unicode.g_unpack(@wrapped_string).length + end + + # Replaces all ISO-8859-1 or CP1252 characters by their UTF-8 equivalent resulting in a valid UTF-8 string. + # + # Passing +true+ will forcibly tidy all bytes, assuming that the string's encoding is entirely CP1252 or ISO-8859-1. + def tidy_bytes(force = false) + chars(Unicode.tidy_bytes(@wrapped_string, force)) + end + + %w(capitalize downcase lstrip reverse rstrip slice strip tidy_bytes upcase).each do |method| + # Only define a corresponding bang method for methods defined in the proxy; On 1.9 the proxy will + # exclude lstrip!, rstrip! and strip! because they are already work as expected on multibyte strings. + if public_method_defined?(method) + define_method("#{method}!") do |*args| + @wrapped_string = send(args.nil? ? method : method, *args).to_s + self + end + end + end + + protected + + def translate_offset(byte_offset) #:nodoc: + return nil if byte_offset.nil? + return 0 if @wrapped_string == '' + + if @wrapped_string.respond_to?(:force_encoding) + @wrapped_string = @wrapped_string.dup.force_encoding(Encoding::ASCII_8BIT) + end + + begin + @wrapped_string[0...byte_offset].unpack('U*').length + rescue ArgumentError => e + byte_offset -= 1 + retry + end + end + + def justify(integer, way, padstr=' ') #:nodoc: + raise ArgumentError, "zero width padding" if padstr.length == 0 + padsize = integer - size + padsize = padsize > 0 ? padsize : 0 + case way + when :right + result = @wrapped_string.dup.insert(0, padding(padsize, padstr)) + when :left + result = @wrapped_string.dup.insert(-1, padding(padsize, padstr)) + when :center + lpad = padding((padsize / 2.0).floor, padstr) + rpad = padding((padsize / 2.0).ceil, padstr) + result = @wrapped_string.dup.insert(0, lpad).insert(-1, rpad) + end + chars(result) + end + + def padding(padsize, padstr=' ') #:nodoc: + if padsize != 0 + chars(padstr * ((padsize / Unicode.u_unpack(padstr).size) + 1)).slice(0, padsize) + else + '' + end + end + + def chars(string) #:nodoc: + self.class.new(string) + end + end + end +end diff --git a/lib/mail/multibyte/exceptions.rb b/lib/mail/multibyte/exceptions.rb new file mode 100644 index 000000000..2d88f9e8c --- /dev/null +++ b/lib/mail/multibyte/exceptions.rb @@ -0,0 +1,8 @@ +# encoding: utf-8 + +module Mail #:nodoc: + module Multibyte #:nodoc: + # Raised when a problem with the encoding was found. + class EncodingError < StandardError; end + end +end \ No newline at end of file diff --git a/lib/mail/multibyte/unicode.rb b/lib/mail/multibyte/unicode.rb new file mode 100644 index 000000000..b4036212c --- /dev/null +++ b/lib/mail/multibyte/unicode.rb @@ -0,0 +1,392 @@ +module Mail + module Multibyte + module Unicode + + extend self + + # A list of all available normalization forms. See http://www.unicode.org/reports/tr15/tr15-29.html for more + # information about normalization. + NORMALIZATION_FORMS = [:c, :kc, :d, :kd] + + # The Unicode version that is supported by the implementation + UNICODE_VERSION = '5.2.0' + + # The default normalization used for operations that require normalization. It can be set to any of the + # normalizations in NORMALIZATION_FORMS. + # + # Example: + # Mail::Multibyte::Unicode.default_normalization_form = :c + attr_accessor :default_normalization_form + @default_normalization_form = :kc + + # Hangul character boundaries and properties + HANGUL_SBASE = 0xAC00 + HANGUL_LBASE = 0x1100 + HANGUL_VBASE = 0x1161 + HANGUL_TBASE = 0x11A7 + HANGUL_LCOUNT = 19 + HANGUL_VCOUNT = 21 + HANGUL_TCOUNT = 28 + HANGUL_NCOUNT = HANGUL_VCOUNT * HANGUL_TCOUNT + HANGUL_SCOUNT = 11172 + HANGUL_SLAST = HANGUL_SBASE + HANGUL_SCOUNT + HANGUL_JAMO_FIRST = 0x1100 + HANGUL_JAMO_LAST = 0x11FF + + # All the unicode whitespace + WHITESPACE = [ + (0x0009..0x000D).to_a, # White_Space # Cc [5] .. + 0x0020, # White_Space # Zs SPACE + 0x0085, # White_Space # Cc + 0x00A0, # White_Space # Zs NO-BREAK SPACE + 0x1680, # White_Space # Zs OGHAM SPACE MARK + 0x180E, # White_Space # Zs MONGOLIAN VOWEL SEPARATOR + (0x2000..0x200A).to_a, # White_Space # Zs [11] EN QUAD..HAIR SPACE + 0x2028, # White_Space # Zl LINE SEPARATOR + 0x2029, # White_Space # Zp PARAGRAPH SEPARATOR + 0x202F, # White_Space # Zs NARROW NO-BREAK SPACE + 0x205F, # White_Space # Zs MEDIUM MATHEMATICAL SPACE + 0x3000, # White_Space # Zs IDEOGRAPHIC SPACE + ].flatten.freeze + + # BOM (byte order mark) can also be seen as whitespace, it's a non-rendering character used to distinguish + # between little and big endian. This is not an issue in utf-8, so it must be ignored. + LEADERS_AND_TRAILERS = WHITESPACE + [65279] # ZERO-WIDTH NO-BREAK SPACE aka BOM + + # Returns a regular expression pattern that matches the passed Unicode codepoints + def self.codepoints_to_pattern(array_of_codepoints) #:nodoc: + array_of_codepoints.collect{ |e| [e].pack 'U*' }.join('|') + end + TRAILERS_PAT = /(#{codepoints_to_pattern(LEADERS_AND_TRAILERS)})+\Z/u + LEADERS_PAT = /\A(#{codepoints_to_pattern(LEADERS_AND_TRAILERS)})+/u + + # Unpack the string at codepoints boundaries. Raises an EncodingError when the encoding of the string isn't + # valid UTF-8. + # + # Example: + # Unicode.u_unpack('Café') # => [67, 97, 102, 233] + def u_unpack(string) + begin + string.unpack 'U*' + rescue ArgumentError + raise EncodingError, 'malformed UTF-8 character' + end + end + + # Detect whether the codepoint is in a certain character class. Returns +true+ when it's in the specified + # character class and +false+ otherwise. Valid character classes are: :cr, :lf, :l, + # :v, :lv, :lvt and :t. + # + # Primarily used by the grapheme cluster support. + def in_char_class?(codepoint, classes) + classes.detect { |c| database.boundary[c] === codepoint } ? true : false + end + + # Unpack the string at grapheme boundaries. Returns a list of character lists. + # + # Example: + # Unicode.g_unpack('क्षि') # => [[2325, 2381], [2359], [2367]] + # Unicode.g_unpack('Café') # => [[67], [97], [102], [233]] + def g_unpack(string) + codepoints = u_unpack(string) + unpacked = [] + pos = 0 + marker = 0 + eoc = codepoints.length + while(pos < eoc) + pos += 1 + previous = codepoints[pos-1] + current = codepoints[pos] + if ( + # CR X LF + ( previous == database.boundary[:cr] and current == database.boundary[:lf] ) or + # L X (L|V|LV|LVT) + ( database.boundary[:l] === previous and in_char_class?(current, [:l,:v,:lv,:lvt]) ) or + # (LV|V) X (V|T) + ( in_char_class?(previous, [:lv,:v]) and in_char_class?(current, [:v,:t]) ) or + # (LVT|T) X (T) + ( in_char_class?(previous, [:lvt,:t]) and database.boundary[:t] === current ) or + # X Extend + (database.boundary[:extend] === current) + ) + else + unpacked << codepoints[marker..pos-1] + marker = pos + end + end + unpacked + end + + # Reverse operation of g_unpack. + # + # Example: + # Unicode.g_pack(Unicode.g_unpack('क्षि')) # => 'क्षि' + def g_pack(unpacked) + (unpacked.flatten).pack('U*') + end + + # Re-order codepoints so the string becomes canonical. + def reorder_characters(codepoints) + length = codepoints.length- 1 + pos = 0 + while pos < length do + cp1, cp2 = database.codepoints[codepoints[pos]], database.codepoints[codepoints[pos+1]] + if (cp1.combining_class > cp2.combining_class) && (cp2.combining_class > 0) + codepoints[pos..pos+1] = cp2.code, cp1.code + pos += (pos > 0 ? -1 : 1) + else + pos += 1 + end + end + codepoints + end + + # Decompose composed characters to the decomposed form. + def decompose_codepoints(type, codepoints) + codepoints.inject([]) do |decomposed, cp| + # if it's a hangul syllable starter character + if HANGUL_SBASE <= cp and cp < HANGUL_SLAST + sindex = cp - HANGUL_SBASE + ncp = [] # new codepoints + ncp << HANGUL_LBASE + sindex / HANGUL_NCOUNT + ncp << HANGUL_VBASE + (sindex % HANGUL_NCOUNT) / HANGUL_TCOUNT + tindex = sindex % HANGUL_TCOUNT + ncp << (HANGUL_TBASE + tindex) unless tindex == 0 + decomposed.concat ncp + # if the codepoint is decomposable in with the current decomposition type + elsif (ncp = database.codepoints[cp].decomp_mapping) and (!database.codepoints[cp].decomp_type || type == :compatability) + decomposed.concat decompose_codepoints(type, ncp.dup) + else + decomposed << cp + end + end + end + + # Compose decomposed characters to the composed form. + def compose_codepoints(codepoints) + pos = 0 + eoa = codepoints.length - 1 + starter_pos = 0 + starter_char = codepoints[0] + previous_combining_class = -1 + while pos < eoa + pos += 1 + lindex = starter_char - HANGUL_LBASE + # -- Hangul + if 0 <= lindex and lindex < HANGUL_LCOUNT + vindex = codepoints[starter_pos+1] - HANGUL_VBASE rescue vindex = -1 + if 0 <= vindex and vindex < HANGUL_VCOUNT + tindex = codepoints[starter_pos+2] - HANGUL_TBASE rescue tindex = -1 + if 0 <= tindex and tindex < HANGUL_TCOUNT + j = starter_pos + 2 + eoa -= 2 + else + tindex = 0 + j = starter_pos + 1 + eoa -= 1 + end + codepoints[starter_pos..j] = (lindex * HANGUL_VCOUNT + vindex) * HANGUL_TCOUNT + tindex + HANGUL_SBASE + end + starter_pos += 1 + starter_char = codepoints[starter_pos] + # -- Other characters + else + current_char = codepoints[pos] + current = database.codepoints[current_char] + if current.combining_class > previous_combining_class + if ref = database.composition_map[starter_char] + composition = ref[current_char] + else + composition = nil + end + unless composition.nil? + codepoints[starter_pos] = composition + starter_char = composition + codepoints.delete_at pos + eoa -= 1 + pos -= 1 + previous_combining_class = -1 + else + previous_combining_class = current.combining_class + end + else + previous_combining_class = current.combining_class + end + if current.combining_class == 0 + starter_pos = pos + starter_char = codepoints[pos] + end + end + end + codepoints + end + + # Replaces all ISO-8859-1 or CP1252 characters by their UTF-8 equivalent resulting in a valid UTF-8 string. + # + # Passing +true+ will forcibly tidy all bytes, assuming that the string's encoding is entirely CP1252 or ISO-8859-1. + def tidy_bytes(string, force = false) + if force + return string.unpack("C*").map do |b| + tidy_byte(b) + end.flatten.compact.pack("C*").unpack("U*").pack("U*") + end + + bytes = string.unpack("C*") + conts_expected = 0 + last_lead = 0 + + bytes.each_index do |i| + + byte = bytes[i] + is_cont = byte > 127 && byte < 192 + is_lead = byte > 191 && byte < 245 + is_unused = byte > 240 + is_restricted = byte > 244 + + # Impossible or highly unlikely byte? Clean it. + if is_unused || is_restricted + bytes[i] = tidy_byte(byte) + elsif is_cont + # Not expecting contination byte? Clean up. Otherwise, now expect one less. + conts_expected == 0 ? bytes[i] = tidy_byte(byte) : conts_expected -= 1 + else + if conts_expected > 0 + # Expected continuation, but got ASCII or leading? Clean backwards up to + # the leading byte. + (1..(i - last_lead)).each {|j| bytes[i - j] = tidy_byte(bytes[i - j])} + conts_expected = 0 + end + if is_lead + # Final byte is leading? Clean it. + if i == bytes.length - 1 + bytes[i] = tidy_byte(bytes.last) + else + # Valid leading byte? Expect continuations determined by position of + # first zero bit, with max of 3. + conts_expected = byte < 224 ? 1 : byte < 240 ? 2 : 3 + last_lead = i + end + end + end + end + bytes.empty? ? "" : bytes.flatten.compact.pack("C*").unpack("U*").pack("U*") + end + + # Returns the KC normalization of the string by default. NFKC is considered the best normalization form for + # passing strings to databases and validations. + # + # * string - The string to perform normalization on. + # * form - The form you want to normalize in. Should be one of the following: + # :c, :kc, :d, or :kd. Default is + # Mail::Multibyte.default_normalization_form + def normalize(string, form=nil) + form ||= @default_normalization_form + # See http://www.unicode.org/reports/tr15, Table 1 + codepoints = u_unpack(string) + case form + when :d + reorder_characters(decompose_codepoints(:canonical, codepoints)) + when :c + compose_codepoints(reorder_characters(decompose_codepoints(:canonical, codepoints))) + when :kd + reorder_characters(decompose_codepoints(:compatability, codepoints)) + when :kc + compose_codepoints(reorder_characters(decompose_codepoints(:compatability, codepoints))) + else + raise ArgumentError, "#{form} is not a valid normalization variant", caller + end.pack('U*') + end + + def apply_mapping(string, mapping) #:nodoc: + u_unpack(string).map do |codepoint| + cp = database.codepoints[codepoint] + if cp and (ncp = cp.send(mapping)) and ncp > 0 + ncp + else + codepoint + end + end.pack('U*') + end + + # Holds data about a codepoint in the Unicode database + class Codepoint + attr_accessor :code, :combining_class, :decomp_type, :decomp_mapping, :uppercase_mapping, :lowercase_mapping + end + + # Holds static data from the Unicode database + class UnicodeDatabase + ATTRIBUTES = :codepoints, :composition_exclusion, :composition_map, :boundary, :cp1252 + + attr_writer(*ATTRIBUTES) + + def initialize + @codepoints = Hash.new(Codepoint.new) + @composition_exclusion = [] + @composition_map = {} + @boundary = {} + @cp1252 = {} + end + + # Lazy load the Unicode database so it's only loaded when it's actually used + ATTRIBUTES.each do |attr_name| + class_eval(<<-EOS, __FILE__, __LINE__ + 1) + def #{attr_name} # def codepoints + load # load + @#{attr_name} # @codepoints + end # end + EOS + end + + # Loads the Unicode database and returns all the internal objects of UnicodeDatabase. + def load + begin + @codepoints, @composition_exclusion, @composition_map, @boundary, @cp1252 = File.open(self.class.filename, 'rb') { |f| Marshal.load f.read } + rescue Exception => e + raise IOError.new("Couldn't load the Unicode tables for UTF8Handler (#{e.message}), Mail::Multibyte is unusable") + end + + # Redefine the === method so we can write shorter rules for grapheme cluster breaks + @boundary.each do |k,_| + @boundary[k].instance_eval do + def ===(other) + detect { |i| i === other } ? true : false + end + end if @boundary[k].kind_of?(Array) + end + + # define attr_reader methods for the instance variables + class << self + attr_reader(*ATTRIBUTES) + end + end + + # Returns the directory in which the data files are stored + def self.dirname + File.dirname(__FILE__) + '/../values/' + end + + # Returns the filename for the data file for this version + def self.filename + File.expand_path File.join(dirname, "unicode_tables.dat") + end + end + + private + + def tidy_byte(byte) + if byte < 160 + [database.cp1252[byte] || byte].pack("U").unpack("C*") + elsif byte < 192 + [194, byte] + else + [195, byte - 64] + end + end + + def database + @database ||= UnicodeDatabase.new + end + + end + end +end diff --git a/lib/mail/multibyte/utils.rb b/lib/mail/multibyte/utils.rb new file mode 100644 index 000000000..5371fa536 --- /dev/null +++ b/lib/mail/multibyte/utils.rb @@ -0,0 +1,60 @@ +# encoding: utf-8 + +module Mail #:nodoc: + module Multibyte #:nodoc: + if Kernel.const_defined?(:Encoding) + # Returns a regular expression that matches valid characters in the current encoding + def self.valid_character + VALID_CHARACTER[Encoding.default_external.to_s] + end + else + def self.valid_character + case $KCODE + when 'UTF8' + VALID_CHARACTER['UTF-8'] + when 'SJIS' + VALID_CHARACTER['Shift_JIS'] + end + end + end + + if 'string'.respond_to?(:valid_encoding?) + # Verifies the encoding of a string + def self.verify(string) + string.valid_encoding? + end + else + def self.verify(string) + if expression = valid_character + # Splits the string on character boundaries, which are determined based on $KCODE. + string.split(//).all? { |c| expression =~ c } + else + true + end + end + end + + # Verifies the encoding of the string and raises an exception when it's not valid + def self.verify!(string) + raise EncodingError.new("Found characters with invalid encoding") unless verify(string) + end + + if 'string'.respond_to?(:force_encoding) + # Removes all invalid characters from the string. + # + # Note: this method is a no-op in Ruby 1.9 + def self.clean(string) + string + end + else + def self.clean(string) + if expression = valid_character + # Splits the string on character boundaries, which are determined based on $KCODE. + string.split(//).grep(expression).join + else + string + end + end + end + end +end diff --git a/lib/mail/version_specific/ruby_1_8.rb b/lib/mail/version_specific/ruby_1_8.rb index 679552635..5558f5e8b 100644 --- a/lib/mail/version_specific/ruby_1_8.rb +++ b/lib/mail/version_specific/ruby_1_8.rb @@ -1,9 +1,5 @@ # encoding: utf-8 -# For multibyte strings in Ruby 1.8 -require 'active_support' -require 'active_support/core_ext/string' - module Mail class Ruby18 require 'base64' @@ -19,13 +15,13 @@ def Ruby18.escape_paren( str ) re = /([\(\)])/ # Only match unescaped parens str.gsub(re) { |s| '\\' + s } end - + def Ruby18.paren( str ) str = $1 if str =~ /^\((.*)?\)$/ str = escape_paren( str ) '(' + str + ')' end - + def Ruby18.escape_bracket( str ) re = /\\\>/ str = str.gsub(re) { |s| '>'} @@ -34,36 +30,36 @@ def Ruby18.escape_bracket( str ) re = /([\<\>])/ # Only match unescaped parens str.gsub(re) { |s| '\\' + s } end - + def Ruby18.bracket( str ) str = $1 if str =~ /^\<(.*)?\>$/ str = escape_bracket( str ) '<' + str + '>' end - + def Ruby18.decode_base64(str) Base64.decode64(str) if str end - + def Ruby18.encode_base64(str) Base64.encode64(str) end - + def Ruby18.has_constant?(klass, string) klass.constants.include?( string ) end - + def Ruby18.get_constant(klass, string) klass.const_get( string ) end - + def Ruby18.b_value_encode(str, encoding) # Ruby 1.8 requires an encoding to work raise ArgumentError, "Must supply an encoding" if encoding.nil? encoding = encoding.to_s.upcase.gsub('_', '-') [Encodings::Base64.encode(str), encoding] end - + def Ruby18.b_value_decode(str) match = str.match(/\=\?(.+)?\?[Bb]\?(.+)?\?\=/m) if match @@ -72,14 +68,14 @@ def Ruby18.b_value_decode(str) end str end - + def Ruby18.q_value_encode(str, encoding) # Ruby 1.8 requires an encoding to work raise ArgumentError, "Must supply an encoding" if encoding.nil? encoding = encoding.to_s.upcase.gsub('_', '-') [Encodings::QuotedPrintable.encode(str), encoding] end - + def Ruby18.q_value_decode(str) match = str.match(/\=\?(.+)?\?[Qq]\?(.+)?\?\=/m) if match @@ -88,11 +84,11 @@ def Ruby18.q_value_decode(str) end str end - + def Ruby18.param_decode(str, encoding) URI.unescape(str) end - + def Ruby18.param_encode(str) encoding = $KCODE.to_s.downcase language = Configuration.instance.param_encode_language diff --git a/lib/mail/version_specific/ruby_1_9.rb b/lib/mail/version_specific/ruby_1_9.rb index 3c31c8819..f708b0768 100644 --- a/lib/mail/version_specific/ruby_1_9.rb +++ b/lib/mail/version_specific/ruby_1_9.rb @@ -1,14 +1,5 @@ # encoding: utf-8 -unless ''.respond_to?(:mb_chars) - class String - # Compatability with ActiveSupport, which returns self in 1.9 - def mb_chars - self - end - end -end - module Mail class Ruby19 diff --git a/spec/mail/fields/common/parameter_hash_spec.rb b/spec/mail/fields/common/parameter_hash_spec.rb index b3d872516..ad8645c8e 100644 --- a/spec/mail/fields/common/parameter_hash_spec.rb +++ b/spec/mail/fields/common/parameter_hash_spec.rb @@ -5,8 +5,8 @@ it "should return the values in the hash" do hash = Mail::ParameterHash.new hash.merge!({'value1' => 'one', 'value2' => 'two'}) - hash.keys.should include(:value1) - hash.keys.should include(:value2) + hash.keys.should include("value1") + hash.keys.should include("value2") hash.values.should include('one') hash.values.should include('two') end diff --git a/spec/mail/fields/content_disposition_field_spec.rb b/spec/mail/fields/content_disposition_field_spec.rb index e4f9b249c..1ba9d19c7 100644 --- a/spec/mail/fields/content_disposition_field_spec.rb +++ b/spec/mail/fields/content_disposition_field_spec.rb @@ -53,12 +53,12 @@ c.decoded.should == 'attachment' end end - + describe "instance methods" do it "should give it's disposition type" do c = Mail::ContentDispositionField.new('Content-Disposition: attachment; filename=File') c.disposition_type.should == 'attachment' - c.parameters.should == {:filename => 'File'} + c.parameters.should == {"filename" => 'File'} end # see spec/fixtures/trec_2005_corpus/missing_content_disposition.eml diff --git a/spec/mail/fields/content_type_field_spec.rb b/spec/mail/fields/content_type_field_spec.rb index 2d526ce7d..8a2d70455 100644 --- a/spec/mail/fields/content_type_field_spec.rb +++ b/spec/mail/fields/content_type_field_spec.rb @@ -147,17 +147,17 @@ it "should return a parameter as a hash" do c = Mail::ContentTypeField.new('text/plain; charset=US-ASCII') - c.parameters.should == {:charset => 'US-ASCII'} + c.parameters.should == {"charset" => 'US-ASCII'} end it "should return multiple parameters as a hash" do c = Mail::ContentTypeField.new('text/plain; charset=US-ASCII; format=flowed') - c.parameters.should == {:charset => 'US-ASCII', :format => 'flowed'} + c.parameters.should == {"charset" => 'US-ASCII', "format" => 'flowed'} end it "should return boundry parameters" do c = Mail::ContentTypeField.new('multipart/mixed; boundary=Apple-Mail-13-196941151') - c.parameters.should == {:boundary => 'Apple-Mail-13-196941151'} + c.parameters.should == {"boundary" => 'Apple-Mail-13-196941151'} end it "should be indifferent with the access" do @@ -197,7 +197,7 @@ c.content_type.should == 'application/octet-stream' c.main_type.should == 'application' c.sub_type.should == 'octet-stream' - c.parameters.should == {:'name*' => "iso-2022-jp'ja'01%20Quien%20Te%20Dij%8aat.%20Pitbull.mp3"} + c.parameters.should == {'name*' => "iso-2022-jp'ja'01%20Quien%20Te%20Dij%8aat.%20Pitbull.mp3"} end it "should handle 'application/pdf;'" do @@ -215,7 +215,7 @@ c.content_type.should == 'application/pdf' c.main_type.should == 'application' c.sub_type.should == 'pdf' - c.parameters.should == {:name => "broken.pdf"} + c.parameters.should == {"name" => "broken.pdf"} end it "should handle 'application/pkcs7-signature;'" do @@ -233,7 +233,7 @@ c.content_type.should == 'application/pkcs7-signature' c.main_type.should == 'application' c.sub_type.should == 'pkcs7-signature' - c.parameters.should == {:name => "smime.p7s"} + c.parameters.should == {"name" => "smime.p7s"} end it "should handle 'application/x-gzip; NAME=blah.gz'" do @@ -242,7 +242,7 @@ c.content_type.should == 'application/x-gzip' c.main_type.should == 'application' c.sub_type.should == 'x-gzip' - c.parameters.should == {:NAME => "blah.gz"} + c.parameters.should == {"NAME" => "blah.gz"} end it "should handle 'image/jpeg'" do @@ -314,7 +314,7 @@ c.content_type.should == 'multipart/alternative' c.main_type.should == 'multipart' c.sub_type.should == 'alternative' - c.parameters.should == {:boundary =>"----=_NextPart_000_0093_01C81419.EB75E850"} + c.parameters.should == {"boundary" =>"----=_NextPart_000_0093_01C81419.EB75E850"} end it "should handle 'multipart/alternative; boundary=----=_NextPart_000_0093_01C81419.EB75E850'" do @@ -323,7 +323,7 @@ c.content_type.should == 'multipart/alternative' c.main_type.should == 'multipart' c.sub_type.should == 'alternative' - c.parameters.should == {:boundary =>"----=_NextPart_000_0093_01C81419.EB75E850"} + c.parameters.should == {"boundary" =>"----=_NextPart_000_0093_01C81419.EB75E850"} end it "should handle 'Multipart/Alternative;boundary=MuLtIpArT_BoUnDaRy'" do @@ -332,7 +332,7 @@ c.content_type.should == 'multipart/alternative' c.main_type.should == 'multipart' c.sub_type.should == 'alternative' - c.parameters.should == {:boundary =>"MuLtIpArT_BoUnDaRy"} + c.parameters.should == {"boundary" =>"MuLtIpArT_BoUnDaRy"} end it "should handle 'Multipart/Alternative;boundary=MuLtIpArT_BoUnDaRy'" do @@ -341,7 +341,7 @@ c.content_type.should == 'multipart/alternative' c.main_type.should == 'multipart' c.sub_type.should == 'alternative' - c.parameters.should == {:boundary =>"MuLtIpArT_BoUnDaRy"} + c.parameters.should == {"boundary" =>"MuLtIpArT_BoUnDaRy"} end it "should handle 'multipart/mixed'" do @@ -368,7 +368,7 @@ c.content_type.should == 'multipart/mixed' c.main_type.should == 'multipart' c.sub_type.should == 'mixed' - c.parameters.should == {:boundary => "Apple-Mail-13-196941151"} + c.parameters.should == {"boundary" => "Apple-Mail-13-196941151"} end it "should handle 'multipart/mixed; boundary=mimepart_427e4cb4ca329_133ae40413c81ef'" do @@ -377,7 +377,7 @@ c.content_type.should == 'multipart/mixed' c.main_type.should == 'multipart' c.sub_type.should == 'mixed' - c.parameters.should == {:boundary => "mimepart_427e4cb4ca329_133ae40413c81ef"} + c.parameters.should == {"boundary" => "mimepart_427e4cb4ca329_133ae40413c81ef"} end it "should handle 'multipart/report; report-type=delivery-status;'" do @@ -386,7 +386,7 @@ c.content_type.should == 'multipart/report' c.main_type.should == 'multipart' c.sub_type.should == 'report' - c.parameters.should == {:"report-type" => "delivery-status"} + c.parameters.should == {"report-type" => "delivery-status"} end it "should handle 'multipart/signed;'" do @@ -422,7 +422,7 @@ c.content_type.should == 'text/html' c.main_type.should == 'text' c.sub_type.should == 'html' - c.parameters.should == {:charset => 'iso-8859-1'} + c.parameters.should == {"charset" => 'iso-8859-1'} end it "should handle 'TEXT/PLAIN; charset=ISO-8859-1;'" do @@ -431,7 +431,7 @@ c.content_type.should == 'text/plain' c.main_type.should == 'text' c.sub_type.should == 'plain' - c.parameters.should == {:charset => 'ISO-8859-1'} + c.parameters.should == {"charset" => 'ISO-8859-1'} end it "should handle 'text/plain'" do @@ -458,7 +458,7 @@ c.content_type.should == 'text/plain' c.main_type.should == 'text' c.sub_type.should == 'plain' - c.parameters.should == {:charset => 'ISO-8859-1'} + c.parameters.should == {"charset" => 'ISO-8859-1'} end it "should handle 'text/plain; charset=ISO-8859-1;'" do @@ -467,7 +467,7 @@ c.content_type.should == 'text/plain' c.main_type.should == 'text' c.sub_type.should == 'plain' - c.parameters.should == {:charset => 'ISO-8859-1', :format => 'flowed'} + c.parameters.should == {"charset" => 'ISO-8859-1', "format" => 'flowed'} end it "should handle 'text/plain; charset=us-ascii;'" do @@ -476,7 +476,7 @@ c.content_type.should == 'text/plain' c.main_type.should == 'text' c.sub_type.should == 'plain' - c.parameters.should == {:charset => 'us-ascii'} + c.parameters.should == {"charset" => 'us-ascii'} end it "should handle 'text/plain; charset=US-ASCII; format=flowed'" do @@ -485,7 +485,7 @@ c.content_type.should == 'text/plain' c.main_type.should == 'text' c.sub_type.should == 'plain' - c.parameters.should == {:charset => 'US-ASCII', :format => 'flowed'} + c.parameters.should == {"charset" => 'US-ASCII', "format" => 'flowed'} end it "should handle 'text/plain; charset=US-ASCII; format=flowed'" do @@ -494,7 +494,7 @@ c.content_type.should == 'text/plain' c.main_type.should == 'text' c.sub_type.should == 'plain' - c.parameters.should == {:charset => 'US-ASCII', :format => 'flowed'} + c.parameters.should == {"charset" => 'US-ASCII', "format" => 'flowed'} end it "should handle 'text/plain; charset=utf-8'" do @@ -503,7 +503,7 @@ c.content_type.should == 'text/plain' c.main_type.should == 'text' c.sub_type.should == 'plain' - c.parameters.should == {:charset => 'utf-8'} + c.parameters.should == {"charset" => 'utf-8'} end it "should handle 'text/plain; charset=utf-8'" do @@ -512,7 +512,7 @@ c.content_type.should == 'text/plain' c.main_type.should == 'text' c.sub_type.should == 'plain' - c.parameters.should == {:charset => 'X-UNKNOWN'} + c.parameters.should == {"charset" => 'X-UNKNOWN'} end it "should handle 'text/x-ruby-script;'" do @@ -530,7 +530,7 @@ c.content_type.should == 'text/x-ruby-script' c.main_type.should == 'text' c.sub_type.should == 'x-ruby-script' - c.parameters.should == {:name => 'hello.rb'} + c.parameters.should == {"name" => 'hello.rb'} end it "should handle 'multipart/mixed; boundary=\"=_NextPart_Lycos_15031600484464_ID\"" do @@ -539,7 +539,7 @@ c.content_type.should == 'multipart/mixed' c.main_type.should == 'multipart' c.sub_type.should == 'mixed' - c.parameters.should == {:boundary => '=_NextPart_Lycos_15031600484464_ID'} + c.parameters.should == {"boundary" => '=_NextPart_Lycos_15031600484464_ID'} end it "should handle 'multipart/alternative; boundary=----=_=NextPart_000_0093_01C81419.EB75E850" do @@ -548,7 +548,7 @@ c.content_type.should == 'multipart/alternative' c.main_type.should == 'multipart' c.sub_type.should == 'alternative' - c.parameters.should == {:boundary => '----=_=NextPart_000_0093_01C81419.EB75E850'} + c.parameters.should == {"boundary" => '----=_=NextPart_000_0093_01C81419.EB75E850'} end it "should handle 'multipart/alternative; boundary=\"----=_=NextPart_000_0093_01C81419.EB75E850\"" do @@ -557,7 +557,7 @@ c.content_type.should == 'multipart/alternative' c.main_type.should == 'multipart' c.sub_type.should == 'alternative' - c.parameters.should == {:boundary => '----=_=NextPart_000_0093_01C81419.EB75E850'} + c.parameters.should == {"boundary" => '----=_=NextPart_000_0093_01C81419.EB75E850'} end it "should handle 'multipart/related;boundary=1_4626B816_9F1690;Type=\"application/smil\";Start=\"\"'" do @@ -566,7 +566,7 @@ c.content_type.should == 'multipart/related' c.main_type.should == 'multipart' c.sub_type.should == 'related' - c.parameters.should == {:boundary => '1_4626B816_9F1690', :Type => 'application/smil', :Start => ''} + c.parameters.should == {"boundary" => '1_4626B816_9F1690', "Type" => 'application/smil', "Start" => ''} end it "should handle 'IMAGE/JPEG; name=\"IM 006.jpg\"'" do @@ -575,7 +575,7 @@ c.content_type.should == 'image/jpeg' c.main_type.should == 'image' c.sub_type.should == 'jpeg' - c.parameters.should == {:name => "IM 006.jpg"} + c.parameters.should == {"name" => "IM 006.jpg"} end end @@ -623,7 +623,7 @@ result = %Q{Content-Type: application/octet-stream;\r\n\sfilename*=sjis'jp'01%20Quien%20Te%20Dij%91at.%20Pitbull.mp3\r\n} end c.filename = string - c.parameters.should == {:filename => string} + c.parameters.should == {"filename" => string} c.encoded.should == result $KCODE = @original if RUBY_VERSION < '1.9' end diff --git a/spec/mail/message_spec.rb b/spec/mail/message_spec.rb index e482e6e1e..a0a811f7a 100644 --- a/spec/mail/message_spec.rb +++ b/spec/mail/message_spec.rb @@ -1065,19 +1065,19 @@ def basic_email mail = Mail.new mail.content_type = ["text", "plain", { :charset => 'US-ASCII' }] mail[:content_type].encoded.should == %Q[Content-Type: text/plain;\r\n\scharset=US-ASCII\r\n] - mail.content_type_parameters.should == {:charset => "US-ASCII"} + mail.content_type_parameters.should == {"charset" => "US-ASCII"} end it "should be able to set a content type with an array and hash with a non-usascii field" do mail = Mail.new mail.content_type = ["text", "plain", { :charset => 'UTF-8' }] mail[:content_type].encoded.should == %Q[Content-Type: text/plain;\r\n\scharset=UTF-8\r\n] - mail.content_type_parameters.should == {:charset => "UTF-8"} + mail.content_type_parameters.should == {"charset" => "UTF-8"} end it "should allow us to specify a content type in a block" do mail = Mail.new { content_type ["text", "plain", { "charset" => "UTF-8" }] } - mail.content_type_parameters.should == {:charset => "UTF-8"} + mail.content_type_parameters.should == {"charset" => "UTF-8"} end end @@ -1494,6 +1494,7 @@ def self.delivering_email(mail) it "shouldn't die with an invalid Content-Type header" do mail = Mail.new('Content-Type: invalid/invalid; charset="iso-8859-1"') + mail.attachment? doing { mail.attachment? }.should_not raise_error end diff --git a/spec/mail/mime_messages_spec.rb b/spec/mail/mime_messages_spec.rb index 0b681ed74..b20d9b720 100644 --- a/spec/mail/mime_messages_spec.rb +++ b/spec/mail/mime_messages_spec.rb @@ -53,7 +53,7 @@ it "should return the content-type parameters" do mail = Mail.new("Content-Type: text/plain; charset=US-ASCII; format=flowed") - mail.content_type_parameters.should == {:charset => 'US-ASCII', :format => 'flowed'} + mail.content_type_parameters.should == {"charset" => 'US-ASCII', "format" => 'flowed'} end it "should recognize a multipart email" do @@ -455,7 +455,7 @@ m.parts.first[:content_type].content_type.should == 'image/png' m.parts.last[:content_type].content_type.should == 'text/plain' end - + it "should allow you to add a body as text part if you have added a file and not truncate after newlines - issue 208" do m = Mail.new do from 'mikel@from.lindsaar.net'