From 6eb4c44a15eb1707dde60959e85e5c536ef136e2 Mon Sep 17 00:00:00 2001
From: Mikel Lindsaar <mikel@rubyx.com>
Date: Tue, 26 Apr 2011 17:40:02 +1000
Subject: [PATCH] Pulling Active Support multibyte requirements over for Ruby
 1.8.7

---
 Gemfile                                       |   1 -
 lib/mail.rb                                   |  16 +-
 lib/mail/core_extensions/string/access.rb     |  98 ++++
 lib/mail/core_extensions/string/multibyte.rb  |  72 +++
 lib/mail/indifferent_hash.rb                  | 136 ++++-
 lib/mail/multibyte.rb                         |  42 ++
 lib/mail/multibyte/chars.rb                   | 474 ++++++++++++++++++
 lib/mail/multibyte/exceptions.rb              |   8 +
 lib/mail/multibyte/unicode.rb                 | 392 +++++++++++++++
 lib/mail/multibyte/utils.rb                   |  60 +++
 lib/mail/version_specific/ruby_1_8.rb         |  30 +-
 lib/mail/version_specific/ruby_1_9.rb         |   9 -
 .../mail/fields/common/parameter_hash_spec.rb |   4 +-
 .../fields/content_disposition_field_spec.rb  |   4 +-
 spec/mail/fields/content_type_field_spec.rb   |  60 +--
 spec/mail/message_spec.rb                     |   7 +-
 spec/mail/mime_messages_spec.rb               |   4 +-
 17 files changed, 1336 insertions(+), 81 deletions(-)
 create mode 100644 lib/mail/core_extensions/string/access.rb
 create mode 100644 lib/mail/core_extensions/string/multibyte.rb
 create mode 100644 lib/mail/multibyte.rb
 create mode 100644 lib/mail/multibyte/chars.rb
 create mode 100644 lib/mail/multibyte/exceptions.rb
 create mode 100644 lib/mail/multibyte/unicode.rb
 create mode 100644 lib/mail/multibyte/utils.rb

diff --git a/Gemfile b/Gemfile
index fdad09e19..c195c2721 100644
--- a/Gemfile
+++ b/Gemfile
@@ -1,6 +1,5 @@
 source :rubygems
 
-gem "activesupport", ">= 2.3.6"
 gem "tlsmail" if RUBY_VERSION <= '1.8.6'
 gem "mime-types", "~> 1.16"
 gem "treetop", "~> 1.4.8"
diff --git a/lib/mail.rb b/lib/mail.rb
index 90e6697e9..52b994d82 100644
--- a/lib/mail.rb
+++ b/lib/mail.rb
@@ -26,13 +26,19 @@ module Mail # :doc:
 
   require 'mail/version'
 
-  require 'mail/core_extensions/nil'
-  require 'mail/core_extensions/string'
+  # Only load our extensions if AS is not already loaded
+  unless defined?(ActiveSupport)
+    require 'mail/core_extensions/nil'
+    require 'mail/core_extensions/string'
+    require 'mail/core_extensions/string/access'
+    require 'mail/core_extensions/string/multibyte'
+    require 'mail/core_extensions/object'
+    require 'mail/multibyte'
+    require 'mail/indifferent_hash'
+  end
+
   require 'mail/core_extensions/shellwords' unless String.new.respond_to?(:shellescape)
   require 'mail/core_extensions/smtp' if RUBY_VERSION < '1.9.3'
-  require 'mail/core_extensions/object'
-
-  require 'mail/indifferent_hash'
 
   require 'mail/patterns'
   require 'mail/utilities'
diff --git a/lib/mail/core_extensions/string/access.rb b/lib/mail/core_extensions/string/access.rb
new file mode 100644
index 000000000..8a08acaa0
--- /dev/null
+++ b/lib/mail/core_extensions/string/access.rb
@@ -0,0 +1,98 @@
+
+class String
+  unless '1.9'.respond_to?(:force_encoding)
+    # Returns the character at the +position+ treating the string as an array (where 0 is the first character).
+    #
+    # Examples:
+    #   "hello".at(0)  # => "h"
+    #   "hello".at(4)  # => "o"
+    #   "hello".at(10) # => ERROR if < 1.9, nil in 1.9
+    def at(position)
+      mb_chars[position, 1].to_s
+    end
+
+    # Returns the remaining of the string from the +position+ treating the string as an array (where 0 is the first character).
+    #
+    # Examples:
+    #   "hello".from(0)  # => "hello"
+    #   "hello".from(2)  # => "llo"
+    #   "hello".from(10) # => "" if < 1.9, nil in 1.9
+    def from(position)
+      mb_chars[position..-1].to_s
+    end
+
+    # Returns the beginning of the string up to the +position+ treating the string as an array (where 0 is the first character).
+    #
+    # Examples:
+    #   "hello".to(0)  # => "h"
+    #   "hello".to(2)  # => "hel"
+    #   "hello".to(10) # => "hello"
+    def to(position)
+      mb_chars[0..position].to_s
+    end
+
+    # Returns the first character of the string or the first +limit+ characters.
+    #
+    # Examples:
+    #   "hello".first     # => "h"
+    #   "hello".first(2)  # => "he"
+    #   "hello".first(10) # => "hello"
+    def first(limit = 1)
+      if limit == 0
+        ''
+      elsif limit >= size
+        self
+      else
+        mb_chars[0...limit].to_s
+      end
+    end
+
+    # Returns the last character of the string or the last +limit+ characters.
+    #
+    # Examples:
+    #   "hello".last     # => "o"
+    #   "hello".last(2)  # => "lo"
+    #   "hello".last(10) # => "hello"
+    def last(limit = 1)
+      if limit == 0
+        ''
+      elsif limit >= size
+        self
+      else
+        mb_chars[(-limit)..-1].to_s
+      end
+    end
+  else
+    def at(position)
+      self[position]
+    end
+
+    def from(position)
+      self[position..-1]
+    end
+
+    def to(position)
+      self[0..position]
+    end
+
+    def first(limit = 1)
+      if limit == 0
+        ''
+      elsif limit >= size
+        self
+      else
+        to(limit - 1)
+      end
+    end
+
+    def last(limit = 1)
+      if limit == 0
+        ''
+      elsif limit >= size
+        self
+      else
+        from(-limit)
+      end
+    end
+  end
+end
diff --git a/lib/mail/core_extensions/string/multibyte.rb b/lib/mail/core_extensions/string/multibyte.rb
new file mode 100644
index 000000000..34d372ec7
--- /dev/null
+++ b/lib/mail/core_extensions/string/multibyte.rb
@@ -0,0 +1,72 @@
+# encoding: utf-8
+require 'mail/multibyte'
+
+class String
+  if RUBY_VERSION >= "1.9"
+    # == Multibyte proxy
+    #
+    # +mb_chars+ is a multibyte safe proxy for string methods.
+    #
+    # In Ruby 1.8 and older it creates and returns an instance of the Mail::Multibyte::Chars class which
+    # encapsulates the original string. A Unicode safe version of all the String methods are defined on this proxy
+    # class. If the proxy class doesn't respond to a certain method, it's forwarded to the encapsuled string.
+    #
+    #   name = 'Claus Müller'
+    #   name.reverse # => "rell??M sualC"
+    #   name.length  # => 13
+    #
+    #   name.mb_chars.reverse.to_s # => "rellüM sualC"
+    #   name.mb_chars.length       # => 12
+    #
+    # In Ruby 1.9 and newer +mb_chars+ returns +self+ because String is (mostly) encoding aware. This means that
+    # it becomes easy to run one version of your code on multiple Ruby versions.
+    #
+    # == Method chaining
+    #
+    # All the methods on the Chars proxy which normally return a string will return a Chars object. This allows
+    # method chaining on the result of any of these methods.
+    #
+    #   name.mb_chars.reverse.length # => 12
+    #
+    # == Interoperability and configuration
+    #
+    # The Chars object tries to be as interchangeable with String objects as possible: sorting and comparing between
+    # String and Char work like expected. The bang! methods change the internal string representation in the Chars
+    # object. Interoperability problems can be resolved easily with a +to_s+ call.
+    #
+    # For more information about the methods defined on the Chars proxy see Mail::Multibyte::Chars. For
+    # information about how to change the default Multibyte behaviour see Mail::Multibyte.
+    def mb_chars
+      if Mail::Multibyte.proxy_class.consumes?(self)
+        Mail::Multibyte.proxy_class.new(self)
+      else
+        self
+      end
+    end
+
+    def is_utf8? #:nodoc
+      case encoding
+      when Encoding::UTF_8
+        valid_encoding?
+      when Encoding::ASCII_8BIT, Encoding::US_ASCII
+        dup.force_encoding(Encoding::UTF_8).valid_encoding?
+      else
+        false
+      end
+    end
+  else
+    def mb_chars
+      if Mail::Multibyte.proxy_class.wants?(self)
+        Mail::Multibyte.proxy_class.new(self)
+      else
+        self
+      end
+    end
+
+    # Returns true if the string has UTF-8 semantics (a String used for purely byte resources is unlikely to have
+    # them), returns false otherwise.
+    def is_utf8?
+      Mail::Multibyte::Chars.consumes?(self)
+    end
+  end
+end
diff --git a/lib/mail/indifferent_hash.rb b/lib/mail/indifferent_hash.rb
index 58703ecd1..7a5e13b6d 100644
--- a/lib/mail/indifferent_hash.rb
+++ b/lib/mail/indifferent_hash.rb
@@ -1,26 +1,142 @@
 module Mail
   # Sort of like ActiveSupport HashWithIndifferentAccess, but lighter
   class IndifferentHash < Hash
-    def initialize(other=nil)
-      if other.is_a?(Hash)
-        self.default = other.default
-        self.update(other)
+
+    def initialize(constructor = {})
+      if constructor.is_a?(Hash)
+        super()
+        update(constructor)
+      else
+        super(constructor)
+      end
+    end
+
+    def default(key = nil)
+      if key.is_a?(Symbol) && include?(key = key.to_s)
+        self[key]
       else
         super
       end
     end
 
-    def [](key_name)
-      super(key_name.to_sym)
+    def self.new_from_hash_copying_default(hash)
+      IndifferentHash.new(hash).tap do |new_hash|
+        new_hash.default = hash.default
+      end
     end
 
-    def []=(k, v)
-      super(k.to_sym, v)
+    alias_method :regular_writer, :[]= unless method_defined?(:regular_writer)
+    alias_method :regular_update, :update unless method_defined?(:regular_update)
+
+    # Assigns a new value to the hash:
+    #
+    #   hash = HashWithIndifferentAccess.new
+    #   hash[:key] = "value"
+    #
+    def []=(key, value)
+      regular_writer(convert_key(key), convert_value(value))
     end
 
+    alias_method :store, :[]=
+
+    # Updates the instantized hash with values from the second:
+    #
+    #   hash_1 = HashWithIndifferentAccess.new
+    #   hash_1[:key] = "value"
+    #
+    #   hash_2 = HashWithIndifferentAccess.new
+    #   hash_2[:key] = "New Value!"
+    #
+    #   hash_1.update(hash_2) # => {"key"=>"New Value!"}
+    #
     def update(other_hash)
-      super(other_hash.inject({}) {|c, (k, v)| c[k.to_sym] = v; c})
+      other_hash.each_pair { |key, value| regular_writer(convert_key(key), convert_value(value)) }
+      self
     end
-    alias merge! update
+
+    alias_method :merge!, :update
+
+    # Checks the hash for a key matching the argument passed in:
+    #
+    #   hash = HashWithIndifferentAccess.new
+    #   hash["key"] = "value"
+    #   hash.key? :key  # => true
+    #   hash.key? "key" # => true
+    #
+    def key?(key)
+      super(convert_key(key))
+    end
+
+    alias_method :include?, :key?
+    alias_method :has_key?, :key?
+    alias_method :member?, :key?
+
+    # Fetches the value for the specified key, same as doing hash[key]
+    def fetch(key, *extras)
+      super(convert_key(key), *extras)
+    end
+
+    # Returns an array of the values at the specified indices:
+    #
+    #   hash = HashWithIndifferentAccess.new
+    #   hash[:a] = "x"
+    #   hash[:b] = "y"
+    #   hash.values_at("a", "b") # => ["x", "y"]
+    #
+    def values_at(*indices)
+      indices.collect {|key| self[convert_key(key)]}
+    end
+
+    # Returns an exact copy of the hash.
+    def dup
+      IndifferentHash.new(self)
+    end
+
+    # Merges the instantized and the specified hashes together, giving precedence to the values from the second hash
+    # Does not overwrite the existing hash.
+    def merge(hash)
+      self.dup.update(hash)
+    end
+
+    # Performs the opposite of merge, with the keys and values from the first hash taking precedence over the second.
+    # This overloaded definition prevents returning a regular hash, if reverse_merge is called on a HashWithDifferentAccess.
+    def reverse_merge(other_hash)
+      super self.class.new_from_hash_copying_default(other_hash)
+    end
+
+    def reverse_merge!(other_hash)
+      replace(reverse_merge( other_hash ))
+    end
+
+    # Removes a specified key from the hash.
+    def delete(key)
+      super(convert_key(key))
+    end
+
+    def stringify_keys!; self end
+    def stringify_keys; dup end
+    def symbolize_keys; to_hash.symbolize_keys end
+    def to_options!; self end
+
+    def to_hash
+      Hash.new(default).merge!(self)
+    end
+
+  protected
+
+    def convert_key(key)
+      key.kind_of?(Symbol) ? key.to_s : key
+    end
+
+    def convert_value(value)
+      if value.class == Hash
+        self.class.new_from_hash_copying_default(value)
+      elsif value.is_a?(Array)
+        value.dup.replace(value.map { |e| convert_value(e) })
+      else
+        value
+      end
+    end
+
   end
 end
diff --git a/lib/mail/multibyte.rb b/lib/mail/multibyte.rb
new file mode 100644
index 000000000..a7dde4824
--- /dev/null
+++ b/lib/mail/multibyte.rb
@@ -0,0 +1,42 @@
+# encoding: utf-8
+module Mail #:nodoc:
+  module Multibyte
+    require 'mail/multibyte/exceptions'
+    require 'mail/multibyte/chars'
+    require 'mail/multibyte/unicode'
+
+    # The proxy class returned when calling mb_chars. You can use this accessor to configure your own proxy
+    # class so you can support other encodings. See the Mail::Multibyte::Chars implementation for
+    # an example how to do this.
+    #
+    # Example:
+    #   Mail::Multibyte.proxy_class = CharsForUTF32
+    def self.proxy_class=(klass)
+      @proxy_class = klass
+    end
+
+    # Returns the current proxy class
+    def self.proxy_class
+      @proxy_class ||= Mail::Multibyte::Chars
+    end
+
+    # Regular expressions that describe valid byte sequences for a character
+    VALID_CHARACTER = {
+      # Borrowed from the Kconv library by Shinji KONO - (also as seen on the W3C site)
+      'UTF-8' => /\A(?:
+                  [\x00-\x7f]                                         |
+                  [\xc2-\xdf] [\x80-\xbf]                             |
+                  \xe0        [\xa0-\xbf] [\x80-\xbf]                 |
+                  [\xe1-\xef] [\x80-\xbf] [\x80-\xbf]                 |
+                  \xf0        [\x90-\xbf] [\x80-\xbf] [\x80-\xbf]     |
+                  [\xf1-\xf3] [\x80-\xbf] [\x80-\xbf] [\x80-\xbf]     |
+                  \xf4        [\x80-\x8f] [\x80-\xbf] [\x80-\xbf])\z /xn,
+      # Quick check for valid Shift-JIS characters, disregards the odd-even pairing
+      'Shift_JIS' => /\A(?:
+                  [\x00-\x7e\xa1-\xdf]                                     |
+                  [\x81-\x9f\xe0-\xef] [\x40-\x7e\x80-\x9e\x9f-\xfc])\z /xn
+    }
+  end
+end
+
+require 'mail/multibyte/utils'
\ No newline at end of file
diff --git a/lib/mail/multibyte/chars.rb b/lib/mail/multibyte/chars.rb
new file mode 100644
index 000000000..c6d37fa77
--- /dev/null
+++ b/lib/mail/multibyte/chars.rb
@@ -0,0 +1,474 @@
+# encoding: utf-8
+
+module Mail #:nodoc:
+  module Multibyte #:nodoc:
+    # Chars enables you to work transparently with UTF-8 encoding in the Ruby String class without having extensive
+    # knowledge about the encoding. A Chars object accepts a string upon initialization and proxies String methods in an
+    # encoding safe manner. All the normal String methods are also implemented on the proxy.
+    #
+    # String methods are proxied through the Chars object, and can be accessed through the +mb_chars+ method. Methods
+    # which would normally return a String object now return a Chars object so methods can be chained.
+    #
+    #   "The Perfect String  ".mb_chars.downcase.strip.normalize # => "the perfect string"
+    #
+    # Chars objects are perfectly interchangeable with String objects as long as no explicit class checks are made.
+    # If certain methods do explicitly check the class, call +to_s+ before you pass chars objects to them.
+    #
+    #   bad.explicit_checking_method "T".mb_chars.downcase.to_s
+    #
+    # The default Chars implementation assumes that the encoding of the string is UTF-8, if you want to handle different
+    # encodings you can write your own multibyte string handler and configure it through
+    # Mail::Multibyte.proxy_class.
+    #
+    #   class CharsForUTF32
+    #     def size
+    #       @wrapped_string.size / 4
+    #     end
+    #
+    #     def self.accepts?(string)
+    #       string.length % 4 == 0
+    #     end
+    #   end
+    #
+    #   Mail::Multibyte.proxy_class = CharsForUTF32
+    class Chars
+      attr_reader :wrapped_string
+      alias to_s wrapped_string
+      alias to_str wrapped_string
+
+      if RUBY_VERSION >= "1.9"
+        # Creates a new Chars instance by wrapping _string_.
+        def initialize(string)
+          @wrapped_string = string
+          @wrapped_string.force_encoding(Encoding::UTF_8) unless @wrapped_string.frozen?
+        end
+      else
+        def initialize(string) #:nodoc:
+          @wrapped_string = string
+        end
+      end
+
+      # Forward all undefined methods to the wrapped string.
+      def method_missing(method, *args, &block)
+        if method.to_s =~ /!$/
+          @wrapped_string.__send__(method, *args, &block)
+          self
+        else
+          result = @wrapped_string.__send__(method, *args, &block)
+          result.kind_of?(String) ? chars(result) : result
+        end
+      end
+
+      # Returns +true+ if _obj_ responds to the given method. Private methods are included in the search
+      # only if the optional second parameter evaluates to +true+.
+      def respond_to?(method, include_private=false)
+        super || @wrapped_string.respond_to?(method, include_private) || false
+      end
+
+      # Enable more predictable duck-typing on String-like classes. See Object#acts_like?.
+      def acts_like_string?
+        true
+      end
+
+      # Returns +true+ when the proxy class can handle the string. Returns +false+ otherwise.
+      def self.consumes?(string)
+        # Unpack is a little bit faster than regular expressions.
+        string.unpack('U*')
+        true
+      rescue ArgumentError
+        false
+      end
+
+      include Comparable
+
+      # Returns -1, 0, or 1, depending on whether the Chars object is to be sorted before,
+      # equal or after the object on the right side of the operation. It accepts any object
+      # that implements +to_s+:
+      #
+      #   'é'.mb_chars <=> 'ü'.mb_chars # => -1
+      #
+      # See <tt>String#<=></tt> for more details.
+      def <=>(other)
+        @wrapped_string <=> other.to_s
+      end
+
+      if RUBY_VERSION < "1.9"
+        # Returns +true+ if the Chars class can and should act as a proxy for the string _string_. Returns
+        # +false+ otherwise.
+        def self.wants?(string)
+          $KCODE == 'UTF8' && consumes?(string)
+        end
+
+        # Returns a new Chars object containing the _other_ object concatenated to the string.
+        #
+        # Example:
+        #   ('Café'.mb_chars + ' périferôl').to_s # => "Café périferôl"
+        def +(other)
+          chars(@wrapped_string + other)
+        end
+
+        # Like <tt>String#=~</tt> only it returns the character offset (in codepoints) instead of the byte offset.
+        #
+        # Example:
+        #   'Café périferôl'.mb_chars =~ /ô/ # => 12
+        def =~(other)
+          translate_offset(@wrapped_string =~ other)
+        end
+
+        # Inserts the passed string at specified codepoint offsets.
+        #
+        # Example:
+        #   'Café'.mb_chars.insert(4, ' périferôl').to_s # => "Café périferôl"
+        def insert(offset, fragment)
+          unpacked = Unicode.u_unpack(@wrapped_string)
+          unless offset > unpacked.length
+            @wrapped_string.replace(
+              Unicode.u_unpack(@wrapped_string).insert(offset, *Unicode.u_unpack(fragment)).pack('U*')
+            )
+          else
+            raise IndexError, "index #{offset} out of string"
+          end
+          self
+        end
+
+        # Returns +true+ if contained string contains _other_. Returns +false+ otherwise.
+        #
+        # Example:
+        #   'Café'.mb_chars.include?('é') # => true
+        def include?(other)
+          # We have to redefine this method because Enumerable defines it.
+          @wrapped_string.include?(other)
+        end
+
+        # Returns the position _needle_ in the string, counting in codepoints. Returns +nil+ if _needle_ isn't found.
+        #
+        # Example:
+        #   'Café périferôl'.mb_chars.index('ô')   # => 12
+        #   'Café périferôl'.mb_chars.index(/\w/u) # => 0
+        def index(needle, offset=0)
+          wrapped_offset = first(offset).wrapped_string.length
+          index = @wrapped_string.index(needle, wrapped_offset)
+          index ? (Unicode.u_unpack(@wrapped_string.slice(0...index)).size) : nil
+        end
+
+        # Returns the position _needle_ in the string, counting in
+        # codepoints, searching backward from _offset_ or the end of the
+        # string. Returns +nil+ if _needle_ isn't found.
+        #
+        # Example:
+        #   'Café périferôl'.mb_chars.rindex('é')   # => 6
+        #   'Café périferôl'.mb_chars.rindex(/\w/u) # => 13
+        def rindex(needle, offset=nil)
+          offset ||= length
+          wrapped_offset = first(offset).wrapped_string.length
+          index = @wrapped_string.rindex(needle, wrapped_offset)
+          index ? (Unicode.u_unpack(@wrapped_string.slice(0...index)).size) : nil
+        end
+
+        # Returns the number of codepoints in the string
+        def size
+          Unicode.u_unpack(@wrapped_string).size
+        end
+        alias_method :length, :size
+
+        # Strips entire range of Unicode whitespace from the right of the string.
+        def rstrip
+          chars(@wrapped_string.gsub(Unicode::TRAILERS_PAT, ''))
+        end
+
+        # Strips entire range of Unicode whitespace from the left of the string.
+        def lstrip
+          chars(@wrapped_string.gsub(Unicode::LEADERS_PAT, ''))
+        end
+
+        # Strips entire range of Unicode whitespace from the right and left of the string.
+        def strip
+          rstrip.lstrip
+        end
+
+        # Returns the codepoint of the first character in the string.
+        #
+        # Example:
+        #   'こんにちは'.mb_chars.ord # => 12371
+        def ord
+          Unicode.u_unpack(@wrapped_string)[0]
+        end
+
+        # Works just like <tt>String#rjust</tt>, only integer specifies characters instead of bytes.
+        #
+        # Example:
+        #
+        #   "¾ cup".mb_chars.rjust(8).to_s
+        #   # => "   ¾ cup"
+        #
+        #   "¾ cup".mb_chars.rjust(8, " ").to_s # Use non-breaking whitespace
+        #   # => "   ¾ cup"
+        def rjust(integer, padstr=' ')
+          justify(integer, :right, padstr)
+        end
+
+        # Works just like <tt>String#ljust</tt>, only integer specifies characters instead of bytes.
+        #
+        # Example:
+        #
+        #   "¾ cup".mb_chars.rjust(8).to_s
+        #   # => "¾ cup   "
+        #
+        #   "¾ cup".mb_chars.rjust(8, " ").to_s # Use non-breaking whitespace
+        #   # => "¾ cup   "
+        def ljust(integer, padstr=' ')
+          justify(integer, :left, padstr)
+        end
+
+        # Works just like <tt>String#center</tt>, only integer specifies characters instead of bytes.
+        #
+        # Example:
+        #
+        #   "¾ cup".mb_chars.center(8).to_s
+        #   # => " ¾ cup  "
+        #
+        #   "¾ cup".mb_chars.center(8, " ").to_s # Use non-breaking whitespace
+        #   # => " ¾ cup  "
+        def center(integer, padstr=' ')
+          justify(integer, :center, padstr)
+        end
+
+      else
+        def =~(other)
+          @wrapped_string =~ other
+        end
+      end
+
+      # Works just like <tt>String#split</tt>, with the exception that the items in the resulting list are Chars
+      # instances instead of String. This makes chaining methods easier.
+      #
+      # Example:
+      #   'Café périferôl'.mb_chars.split(/é/).map { |part| part.upcase.to_s } # => ["CAF", " P", "RIFERÔL"]
+      def split(*args)
+        @wrapped_string.split(*args).map { |i| i.mb_chars }
+      end
+
+      # Like <tt>String#[]=</tt>, except instead of byte offsets you specify character offsets.
+      #
+      # Example:
+      #
+      #   s = "Müller"
+      #   s.mb_chars[2] = "e" # Replace character with offset 2
+      #   s
+      #   # => "Müeler"
+      #
+      #   s = "Müller"
+      #   s.mb_chars[1, 2] = "ö" # Replace 2 characters at character offset 1
+      #   s
+      #   # => "Möler"
+      def []=(*args)
+        replace_by = args.pop
+        # Indexed replace with regular expressions already works
+        if args.first.is_a?(Regexp)
+          @wrapped_string[*args] = replace_by
+        else
+          result = Unicode.u_unpack(@wrapped_string)
+          if args[0].is_a?(Fixnum)
+            raise IndexError, "index #{args[0]} out of string" if args[0] >= result.length
+            min = args[0]
+            max = args[1].nil? ? min : (min + args[1] - 1)
+            range = Range.new(min, max)
+            replace_by = [replace_by].pack('U') if replace_by.is_a?(Fixnum)
+          elsif args.first.is_a?(Range)
+            raise RangeError, "#{args[0]} out of range" if args[0].min >= result.length
+            range = args[0]
+          else
+            needle = args[0].to_s
+            min = index(needle)
+            max = min + Unicode.u_unpack(needle).length - 1
+            range = Range.new(min, max)
+          end
+          result[range] = Unicode.u_unpack(replace_by)
+          @wrapped_string.replace(result.pack('U*'))
+        end
+      end
+
+      # Reverses all characters in the string.
+      #
+      # Example:
+      #   'Café'.mb_chars.reverse.to_s # => 'éfaC'
+      def reverse
+        chars(Unicode.g_unpack(@wrapped_string).reverse.flatten.pack('U*'))
+      end
+
+      # Implements Unicode-aware slice with codepoints. Slicing on one point returns the codepoints for that
+      # character.
+      #
+      # Example:
+      #   'こんにちは'.mb_chars.slice(2..3).to_s # => "にち"
+      def slice(*args)
+        if args.size > 2
+          raise ArgumentError, "wrong number of arguments (#{args.size} for 1)" # Do as if we were native
+        elsif (args.size == 2 && !(args.first.is_a?(Numeric) || args.first.is_a?(Regexp)))
+          raise TypeError, "cannot convert #{args.first.class} into Integer" # Do as if we were native
+        elsif (args.size == 2 && !args[1].is_a?(Numeric))
+          raise TypeError, "cannot convert #{args[1].class} into Integer" # Do as if we were native
+        elsif args[0].kind_of? Range
+          cps = Unicode.u_unpack(@wrapped_string).slice(*args)
+          result = cps.nil? ? nil : cps.pack('U*')
+        elsif args[0].kind_of? Regexp
+          result = @wrapped_string.slice(*args)
+        elsif args.size == 1 && args[0].kind_of?(Numeric)
+          character = Unicode.u_unpack(@wrapped_string)[args[0]]
+          result = character && [character].pack('U')
+        else
+          cps = Unicode.u_unpack(@wrapped_string).slice(*args)
+          result = cps && cps.pack('U*')
+        end
+        result && chars(result)
+      end
+      alias_method :[], :slice
+
+      # Limit the byte size of the string to a number of bytes without breaking characters. Usable
+      # when the storage for a string is limited for some reason.
+      #
+      # Example:
+      #   s = 'こんにちは'
+      #   s.mb_chars.limit(7) # => "こに"
+      def limit(limit)
+        slice(0...translate_offset(limit))
+      end
+
+      # Convert characters in the string to uppercase.
+      #
+      # Example:
+      #   'Laurent, où sont les tests ?'.mb_chars.upcase.to_s # => "LAURENT, OÙ SONT LES TESTS ?"
+      def upcase
+        chars(Unicode.apply_mapping @wrapped_string, :uppercase_mapping)
+      end
+
+      # Convert characters in the string to lowercase.
+      #
+      # Example:
+      #   'VĚDA A VÝZKUM'.mb_chars.downcase.to_s # => "věda a výzkum"
+      def downcase
+        chars(Unicode.apply_mapping @wrapped_string, :lowercase_mapping)
+      end
+
+      # Converts the first character to uppercase and the remainder to lowercase.
+      #
+      # Example:
+      #  'über'.mb_chars.capitalize.to_s # => "Über"
+      def capitalize
+        (slice(0) || chars('')).upcase + (slice(1..-1) || chars('')).downcase
+      end
+
+      # Capitalizes the first letter of every word, when possible.
+      #
+      # Example:
+      #   "ÉL QUE SE ENTERÓ".mb_chars.titleize    # => "Él Que Se Enteró"
+      #   "日本語".mb_chars.titleize                 # => "日本語"
+      def titleize
+        chars(downcase.to_s.gsub(/\b('?[\S])/u) { Unicode.apply_mapping $1, :uppercase_mapping })
+      end
+      alias_method :titlecase, :titleize
+
+      # Returns the KC normalization of the string by default. NFKC is considered the best normalization form for
+      # passing strings to databases and validations.
+      #
+      # * <tt>form</tt> - The form you want to normalize in. Should be one of the following:
+      #   <tt>:c</tt>, <tt>:kc</tt>, <tt>:d</tt>, or <tt>:kd</tt>. Default is
+      #   Mail::Multibyte::Unicode.default_normalization_form
+      def normalize(form = nil)
+        chars(Unicode.normalize(@wrapped_string, form))
+      end
+
+      # Performs canonical decomposition on all the characters.
+      #
+      # Example:
+      #   'é'.length                         # => 2
+      #   'é'.mb_chars.decompose.to_s.length # => 3
+      def decompose
+        chars(Unicode.decompose_codepoints(:canonical, Unicode.u_unpack(@wrapped_string)).pack('U*'))
+      end
+
+      # Performs composition on all the characters.
+      #
+      # Example:
+      #   'é'.length                       # => 3
+      #   'é'.mb_chars.compose.to_s.length # => 2
+      def compose
+        chars(Unicode.compose_codepoints(Unicode.u_unpack(@wrapped_string)).pack('U*'))
+      end
+
+      # Returns the number of grapheme clusters in the string.
+      #
+      # Example:
+      #   'क्षि'.mb_chars.length   # => 4
+      #   'क्षि'.mb_chars.g_length # => 3
+      def g_length
+        Unicode.g_unpack(@wrapped_string).length
+      end
+
+      # Replaces all ISO-8859-1 or CP1252 characters by their UTF-8 equivalent resulting in a valid UTF-8 string.
+      #
+      # Passing +true+ will forcibly tidy all bytes, assuming that the string's encoding is entirely CP1252 or ISO-8859-1.
+      def tidy_bytes(force = false)
+        chars(Unicode.tidy_bytes(@wrapped_string, force))
+      end
+
+       %w(capitalize downcase lstrip reverse rstrip slice strip tidy_bytes upcase).each do |method|
+        # Only define a corresponding bang method for methods defined in the proxy; On 1.9 the proxy will
+        # exclude lstrip!, rstrip! and strip! because they are already work as expected on multibyte strings.
+        if public_method_defined?(method)
+          define_method("#{method}!") do |*args|
+            @wrapped_string = send(args.nil? ? method : method, *args).to_s
+            self
+          end
+        end
+      end
+
+      protected
+
+        def translate_offset(byte_offset) #:nodoc:
+          return nil if byte_offset.nil?
+          return 0   if @wrapped_string == ''
+
+          if @wrapped_string.respond_to?(:force_encoding)
+            @wrapped_string = @wrapped_string.dup.force_encoding(Encoding::ASCII_8BIT)
+          end
+
+          begin
+            @wrapped_string[0...byte_offset].unpack('U*').length
+          rescue ArgumentError => e
+            byte_offset -= 1
+            retry
+          end
+        end
+
+        def justify(integer, way, padstr=' ') #:nodoc:
+          raise ArgumentError, "zero width padding" if padstr.length == 0
+          padsize = integer - size
+          padsize = padsize > 0 ? padsize : 0
+          case way
+          when :right
+            result = @wrapped_string.dup.insert(0, padding(padsize, padstr))
+          when :left
+            result = @wrapped_string.dup.insert(-1, padding(padsize, padstr))
+          when :center
+            lpad = padding((padsize / 2.0).floor, padstr)
+            rpad = padding((padsize / 2.0).ceil, padstr)
+            result = @wrapped_string.dup.insert(0, lpad).insert(-1, rpad)
+          end
+          chars(result)
+        end
+
+        def padding(padsize, padstr=' ') #:nodoc:
+          if padsize != 0
+            chars(padstr * ((padsize / Unicode.u_unpack(padstr).size) + 1)).slice(0, padsize)
+          else
+            ''
+          end
+        end
+
+        def chars(string) #:nodoc:
+          self.class.new(string)
+        end
+    end
+  end
+end
diff --git a/lib/mail/multibyte/exceptions.rb b/lib/mail/multibyte/exceptions.rb
new file mode 100644
index 000000000..2d88f9e8c
--- /dev/null
+++ b/lib/mail/multibyte/exceptions.rb
@@ -0,0 +1,8 @@
+# encoding: utf-8
+
+module Mail #:nodoc:
+  module Multibyte #:nodoc:
+    # Raised when a problem with the encoding was found.
+    class EncodingError < StandardError; end
+  end
+end
\ No newline at end of file
diff --git a/lib/mail/multibyte/unicode.rb b/lib/mail/multibyte/unicode.rb
new file mode 100644
index 000000000..b4036212c
--- /dev/null
+++ b/lib/mail/multibyte/unicode.rb
@@ -0,0 +1,392 @@
+module Mail
+  module Multibyte
+    module Unicode
+
+      extend self
+
+      # A list of all available normalization forms. See http://www.unicode.org/reports/tr15/tr15-29.html for more
+      # information about normalization.
+      NORMALIZATION_FORMS = [:c, :kc, :d, :kd]
+
+      # The Unicode version that is supported by the implementation
+      UNICODE_VERSION = '5.2.0'
+
+      # The default normalization used for operations that require normalization. It can be set to any of the
+      # normalizations in NORMALIZATION_FORMS.
+      #
+      # Example:
+      #   Mail::Multibyte::Unicode.default_normalization_form = :c
+      attr_accessor :default_normalization_form
+      @default_normalization_form = :kc
+
+      # Hangul character boundaries and properties
+      HANGUL_SBASE = 0xAC00
+      HANGUL_LBASE = 0x1100
+      HANGUL_VBASE = 0x1161
+      HANGUL_TBASE = 0x11A7
+      HANGUL_LCOUNT = 19
+      HANGUL_VCOUNT = 21
+      HANGUL_TCOUNT = 28
+      HANGUL_NCOUNT = HANGUL_VCOUNT * HANGUL_TCOUNT
+      HANGUL_SCOUNT = 11172
+      HANGUL_SLAST = HANGUL_SBASE + HANGUL_SCOUNT
+      HANGUL_JAMO_FIRST = 0x1100
+      HANGUL_JAMO_LAST = 0x11FF
+
+      # All the unicode whitespace
+      WHITESPACE = [
+        (0x0009..0x000D).to_a, # White_Space # Cc   [5] <control-0009>..<control-000D>
+        0x0020,                # White_Space # Zs       SPACE
+        0x0085,                # White_Space # Cc       <control-0085>
+        0x00A0,                # White_Space # Zs       NO-BREAK SPACE
+        0x1680,                # White_Space # Zs       OGHAM SPACE MARK
+        0x180E,                # White_Space # Zs       MONGOLIAN VOWEL SEPARATOR
+        (0x2000..0x200A).to_a, # White_Space # Zs  [11] EN QUAD..HAIR SPACE
+        0x2028,                # White_Space # Zl       LINE SEPARATOR
+        0x2029,                # White_Space # Zp       PARAGRAPH SEPARATOR
+        0x202F,                # White_Space # Zs       NARROW NO-BREAK SPACE
+        0x205F,                # White_Space # Zs       MEDIUM MATHEMATICAL SPACE
+        0x3000,                # White_Space # Zs       IDEOGRAPHIC SPACE
+      ].flatten.freeze
+
+      # BOM (byte order mark) can also be seen as whitespace, it's a non-rendering character used to distinguish
+      # between little and big endian. This is not an issue in utf-8, so it must be ignored.
+      LEADERS_AND_TRAILERS = WHITESPACE + [65279] # ZERO-WIDTH NO-BREAK SPACE aka BOM
+
+      # Returns a regular expression pattern that matches the passed Unicode codepoints
+      def self.codepoints_to_pattern(array_of_codepoints) #:nodoc:
+        array_of_codepoints.collect{ |e| [e].pack 'U*' }.join('|')
+      end
+      TRAILERS_PAT = /(#{codepoints_to_pattern(LEADERS_AND_TRAILERS)})+\Z/u
+      LEADERS_PAT = /\A(#{codepoints_to_pattern(LEADERS_AND_TRAILERS)})+/u
+
+      # Unpack the string at codepoints boundaries. Raises an EncodingError when the encoding of the string isn't
+      # valid UTF-8.
+      #
+      # Example:
+      #   Unicode.u_unpack('Café') # => [67, 97, 102, 233]
+      def u_unpack(string)
+        begin
+          string.unpack 'U*'
+        rescue ArgumentError
+          raise EncodingError, 'malformed UTF-8 character'
+        end
+      end
+
+      # Detect whether the codepoint is in a certain character class. Returns +true+ when it's in the specified
+      # character class and +false+ otherwise. Valid character classes are: <tt>:cr</tt>, <tt>:lf</tt>, <tt>:l</tt>,
+      # <tt>:v</tt>, <tt>:lv</tt>, <tt>:lvt</tt> and <tt>:t</tt>.
+      #
+      # Primarily used by the grapheme cluster support.
+      def in_char_class?(codepoint, classes)
+        classes.detect { |c| database.boundary[c] === codepoint } ? true : false
+      end
+
+      # Unpack the string at grapheme boundaries. Returns a list of character lists.
+      #
+      # Example:
+      #   Unicode.g_unpack('क्षि') # => [[2325, 2381], [2359], [2367]]
+      #   Unicode.g_unpack('Café') # => [[67], [97], [102], [233]]
+      def g_unpack(string)
+        codepoints = u_unpack(string)
+        unpacked = []
+        pos = 0
+        marker = 0
+        eoc = codepoints.length
+        while(pos < eoc)
+          pos += 1
+          previous = codepoints[pos-1]
+          current = codepoints[pos]
+          if (
+              # CR X LF
+              ( previous == database.boundary[:cr] and current == database.boundary[:lf] ) or
+              # L X (L|V|LV|LVT)
+              ( database.boundary[:l] === previous and in_char_class?(current, [:l,:v,:lv,:lvt]) ) or
+              # (LV|V) X (V|T)
+              ( in_char_class?(previous, [:lv,:v]) and in_char_class?(current, [:v,:t]) ) or
+              # (LVT|T) X (T)
+              ( in_char_class?(previous, [:lvt,:t]) and database.boundary[:t] === current ) or
+              # X Extend
+              (database.boundary[:extend] === current)
+            )
+          else
+            unpacked << codepoints[marker..pos-1]
+            marker = pos
+          end
+        end
+        unpacked
+      end
+
+      # Reverse operation of g_unpack.
+      #
+      # Example:
+      #   Unicode.g_pack(Unicode.g_unpack('क्षि')) # => 'क्षि'
+      def g_pack(unpacked)
+        (unpacked.flatten).pack('U*')
+      end
+
+      # Re-order codepoints so the string becomes canonical.
+      def reorder_characters(codepoints)
+        length = codepoints.length- 1
+        pos = 0
+        while pos < length do
+          cp1, cp2 = database.codepoints[codepoints[pos]], database.codepoints[codepoints[pos+1]]
+          if (cp1.combining_class > cp2.combining_class) && (cp2.combining_class > 0)
+            codepoints[pos..pos+1] = cp2.code, cp1.code
+            pos += (pos > 0 ? -1 : 1)
+          else
+            pos += 1
+          end
+        end
+        codepoints
+      end
+
+      # Decompose composed characters to the decomposed form.
+      def decompose_codepoints(type, codepoints)
+        codepoints.inject([]) do |decomposed, cp|
+          # if it's a hangul syllable starter character
+          if HANGUL_SBASE <= cp and cp < HANGUL_SLAST
+            sindex = cp - HANGUL_SBASE
+            ncp = [] # new codepoints
+            ncp << HANGUL_LBASE + sindex / HANGUL_NCOUNT
+            ncp << HANGUL_VBASE + (sindex % HANGUL_NCOUNT) / HANGUL_TCOUNT
+            tindex = sindex % HANGUL_TCOUNT
+            ncp << (HANGUL_TBASE + tindex) unless tindex == 0
+            decomposed.concat ncp
+          # if the codepoint is decomposable in with the current decomposition type
+          elsif (ncp = database.codepoints[cp].decomp_mapping) and (!database.codepoints[cp].decomp_type || type == :compatability)
+            decomposed.concat decompose_codepoints(type, ncp.dup)
+          else
+            decomposed << cp
+          end
+        end
+      end
+
+      # Compose decomposed characters to the composed form.
+      def compose_codepoints(codepoints)
+        pos = 0
+        eoa = codepoints.length - 1
+        starter_pos = 0
+        starter_char = codepoints[0]
+        previous_combining_class = -1
+        while pos < eoa
+          pos += 1
+          lindex = starter_char - HANGUL_LBASE
+          # -- Hangul
+          if 0 <= lindex and lindex < HANGUL_LCOUNT
+            vindex = codepoints[starter_pos+1] - HANGUL_VBASE rescue vindex = -1
+            if 0 <= vindex and vindex < HANGUL_VCOUNT
+              tindex = codepoints[starter_pos+2] - HANGUL_TBASE rescue tindex = -1
+              if 0 <= tindex and tindex < HANGUL_TCOUNT
+                j = starter_pos + 2
+                eoa -= 2
+              else
+                tindex = 0
+                j = starter_pos + 1
+                eoa -= 1
+              end
+              codepoints[starter_pos..j] = (lindex * HANGUL_VCOUNT + vindex) * HANGUL_TCOUNT + tindex + HANGUL_SBASE
+            end
+            starter_pos += 1
+            starter_char = codepoints[starter_pos]
+          # -- Other characters
+          else
+            current_char = codepoints[pos]
+            current = database.codepoints[current_char]
+            if current.combining_class > previous_combining_class
+              if ref = database.composition_map[starter_char]
+                composition = ref[current_char]
+              else
+                composition = nil
+              end
+              unless composition.nil?
+                codepoints[starter_pos] = composition
+                starter_char = composition
+                codepoints.delete_at pos
+                eoa -= 1
+                pos -= 1
+                previous_combining_class = -1
+              else
+                previous_combining_class = current.combining_class
+              end
+            else
+              previous_combining_class = current.combining_class
+            end
+            if current.combining_class == 0
+              starter_pos = pos
+              starter_char = codepoints[pos]
+            end
+          end
+        end
+        codepoints
+      end
+
+      # Replaces all ISO-8859-1 or CP1252 characters by their UTF-8 equivalent resulting in a valid UTF-8 string.
+      #
+      # Passing +true+ will forcibly tidy all bytes, assuming that the string's encoding is entirely CP1252 or ISO-8859-1.
+      def tidy_bytes(string, force = false)
+        if force
+          return string.unpack("C*").map do |b|
+            tidy_byte(b)
+          end.flatten.compact.pack("C*").unpack("U*").pack("U*")
+        end
+
+        bytes = string.unpack("C*")
+        conts_expected = 0
+        last_lead = 0
+
+        bytes.each_index do |i|
+
+          byte          = bytes[i]
+          is_cont       = byte > 127 && byte < 192
+          is_lead       = byte > 191 && byte < 245
+          is_unused     = byte > 240
+          is_restricted = byte > 244
+
+          # Impossible or highly unlikely byte? Clean it.
+          if is_unused || is_restricted
+            bytes[i] = tidy_byte(byte)
+          elsif is_cont
+            # Not expecting contination byte? Clean up. Otherwise, now expect one less.
+            conts_expected == 0 ? bytes[i] = tidy_byte(byte) : conts_expected -= 1
+          else
+            if conts_expected > 0
+              # Expected continuation, but got ASCII or leading? Clean backwards up to
+              # the leading byte.
+              (1..(i - last_lead)).each {|j| bytes[i - j] = tidy_byte(bytes[i - j])}
+              conts_expected = 0
+            end
+            if is_lead
+              # Final byte is leading? Clean it.
+              if i == bytes.length - 1
+                bytes[i] = tidy_byte(bytes.last)
+              else
+                # Valid leading byte? Expect continuations determined by position of
+                # first zero bit, with max of 3.
+                conts_expected = byte < 224 ? 1 : byte < 240 ? 2 : 3
+                last_lead = i
+              end
+            end
+          end
+        end
+        bytes.empty? ? "" : bytes.flatten.compact.pack("C*").unpack("U*").pack("U*")
+      end
+
+      # Returns the KC normalization of the string by default. NFKC is considered the best normalization form for
+      # passing strings to databases and validations.
+      #
+      # * <tt>string</tt> - The string to perform normalization on.
+      # * <tt>form</tt> - The form you want to normalize in. Should be one of the following:
+      #   <tt>:c</tt>, <tt>:kc</tt>, <tt>:d</tt>, or <tt>:kd</tt>. Default is
+      #   Mail::Multibyte.default_normalization_form
+      def normalize(string, form=nil)
+        form ||= @default_normalization_form
+        # See http://www.unicode.org/reports/tr15, Table 1
+        codepoints = u_unpack(string)
+        case form
+          when :d
+            reorder_characters(decompose_codepoints(:canonical, codepoints))
+          when :c
+            compose_codepoints(reorder_characters(decompose_codepoints(:canonical, codepoints)))
+          when :kd
+            reorder_characters(decompose_codepoints(:compatability, codepoints))
+          when :kc
+            compose_codepoints(reorder_characters(decompose_codepoints(:compatability, codepoints)))
+          else
+            raise ArgumentError, "#{form} is not a valid normalization variant", caller
+        end.pack('U*')
+      end
+
+      def apply_mapping(string, mapping) #:nodoc:
+        u_unpack(string).map do |codepoint|
+          cp = database.codepoints[codepoint]
+          if cp and (ncp = cp.send(mapping)) and ncp > 0
+            ncp
+          else
+            codepoint
+          end
+        end.pack('U*')
+      end
+
+      # Holds data about a codepoint in the Unicode database
+      class Codepoint
+        attr_accessor :code, :combining_class, :decomp_type, :decomp_mapping, :uppercase_mapping, :lowercase_mapping
+      end
+
+      # Holds static data from the Unicode database
+      class UnicodeDatabase
+        ATTRIBUTES = :codepoints, :composition_exclusion, :composition_map, :boundary, :cp1252
+
+        attr_writer(*ATTRIBUTES)
+
+        def initialize
+          @codepoints = Hash.new(Codepoint.new)
+          @composition_exclusion = []
+          @composition_map = {}
+          @boundary = {}
+          @cp1252 = {}
+        end
+
+        # Lazy load the Unicode database so it's only loaded when it's actually used
+        ATTRIBUTES.each do |attr_name|
+          class_eval(<<-EOS, __FILE__, __LINE__ + 1)
+            def #{attr_name}     # def codepoints
+              load               #   load
+              @#{attr_name}      #   @codepoints
+            end                  # end
+          EOS
+        end
+
+        # Loads the Unicode database and returns all the internal objects of UnicodeDatabase.
+        def load
+          begin
+            @codepoints, @composition_exclusion, @composition_map, @boundary, @cp1252 = File.open(self.class.filename, 'rb') { |f| Marshal.load f.read }
+          rescue Exception => e
+              raise IOError.new("Couldn't load the Unicode tables for UTF8Handler (#{e.message}), Mail::Multibyte is unusable")
+          end
+
+          # Redefine the === method so we can write shorter rules for grapheme cluster breaks
+          @boundary.each do |k,_|
+            @boundary[k].instance_eval do
+              def ===(other)
+                detect { |i| i === other } ? true : false
+              end
+            end if @boundary[k].kind_of?(Array)
+          end
+
+          # define attr_reader methods for the instance variables
+          class << self
+            attr_reader(*ATTRIBUTES)
+          end
+        end
+
+        # Returns the directory in which the data files are stored
+        def self.dirname
+          File.dirname(__FILE__) + '/../values/'
+        end
+
+        # Returns the filename for the data file for this version
+        def self.filename
+          File.expand_path File.join(dirname, "unicode_tables.dat")
+        end
+      end
+
+      private
+
+      def tidy_byte(byte)
+        if byte < 160
+          [database.cp1252[byte] || byte].pack("U").unpack("C*")
+        elsif byte < 192
+          [194, byte]
+        else
+          [195, byte - 64]
+        end
+      end
+
+      def database
+        @database ||= UnicodeDatabase.new
+      end
+
+    end
+  end
+end
diff --git a/lib/mail/multibyte/utils.rb b/lib/mail/multibyte/utils.rb
new file mode 100644
index 000000000..5371fa536
--- /dev/null
+++ b/lib/mail/multibyte/utils.rb
@@ -0,0 +1,60 @@
+# encoding: utf-8
+
+module Mail #:nodoc:
+  module Multibyte #:nodoc:
+    if Kernel.const_defined?(:Encoding)
+      # Returns a regular expression that matches valid characters in the current encoding
+      def self.valid_character
+        VALID_CHARACTER[Encoding.default_external.to_s]
+      end
+    else
+      def self.valid_character
+        case $KCODE
+        when 'UTF8'
+          VALID_CHARACTER['UTF-8']
+        when 'SJIS'
+          VALID_CHARACTER['Shift_JIS']
+        end
+      end
+    end
+
+    if 'string'.respond_to?(:valid_encoding?)
+      # Verifies the encoding of a string
+      def self.verify(string)
+        string.valid_encoding?
+      end
+    else
+      def self.verify(string)
+        if expression = valid_character
+          # Splits the string on character boundaries, which are determined based on $KCODE.
+          string.split(//).all? { |c| expression =~ c }
+        else
+          true
+        end
+      end
+    end
+
+    # Verifies the encoding of the string and raises an exception when it's not valid
+    def self.verify!(string)
+      raise EncodingError.new("Found characters with invalid encoding") unless verify(string)
+    end
+
+    if 'string'.respond_to?(:force_encoding)
+      # Removes all invalid characters from the string.
+      #
+      # Note: this method is a no-op in Ruby 1.9
+      def self.clean(string)
+        string
+      end
+    else
+      def self.clean(string)
+        if expression = valid_character
+          # Splits the string on character boundaries, which are determined based on $KCODE.
+          string.split(//).grep(expression).join
+        else
+          string
+        end
+      end
+    end
+  end
+end
diff --git a/lib/mail/version_specific/ruby_1_8.rb b/lib/mail/version_specific/ruby_1_8.rb
index 679552635..5558f5e8b 100644
--- a/lib/mail/version_specific/ruby_1_8.rb
+++ b/lib/mail/version_specific/ruby_1_8.rb
@@ -1,9 +1,5 @@
 # encoding: utf-8
 
-# For multibyte strings in Ruby 1.8
-require 'active_support'
-require 'active_support/core_ext/string'
-
 module Mail
   class Ruby18
     require 'base64'
@@ -19,13 +15,13 @@ def Ruby18.escape_paren( str )
       re = /([\(\)])/          # Only match unescaped parens
       str.gsub(re) { |s| '\\' + s }
     end
-    
+
     def Ruby18.paren( str )
       str = $1 if str =~ /^\((.*)?\)$/
       str = escape_paren( str )
       '(' + str + ')'
     end
-    
+
     def Ruby18.escape_bracket( str )
       re = /\\\>/
       str = str.gsub(re) { |s| '>'}
@@ -34,36 +30,36 @@ def Ruby18.escape_bracket( str )
       re = /([\<\>])/          # Only match unescaped parens
       str.gsub(re) { |s| '\\' + s }
     end
-    
+
     def Ruby18.bracket( str )
       str = $1 if str =~ /^\<(.*)?\>$/
       str = escape_bracket( str )
       '<' + str + '>'
     end
-    
+
     def Ruby18.decode_base64(str)
       Base64.decode64(str) if str
     end
-    
+
     def Ruby18.encode_base64(str)
       Base64.encode64(str)
     end
-    
+
     def Ruby18.has_constant?(klass, string)
       klass.constants.include?( string )
     end
-    
+
     def Ruby18.get_constant(klass, string)
       klass.const_get( string )
     end
-    
+
     def Ruby18.b_value_encode(str, encoding)
       # Ruby 1.8 requires an encoding to work
       raise ArgumentError, "Must supply an encoding" if encoding.nil?
       encoding = encoding.to_s.upcase.gsub('_', '-')
       [Encodings::Base64.encode(str), encoding]
     end
-    
+
     def Ruby18.b_value_decode(str)
       match = str.match(/\=\?(.+)?\?[Bb]\?(.+)?\?\=/m)
       if match
@@ -72,14 +68,14 @@ def Ruby18.b_value_decode(str)
       end
       str
     end
-    
+
     def Ruby18.q_value_encode(str, encoding)
       # Ruby 1.8 requires an encoding to work
       raise ArgumentError, "Must supply an encoding" if encoding.nil?
       encoding = encoding.to_s.upcase.gsub('_', '-')
       [Encodings::QuotedPrintable.encode(str), encoding]
     end
-    
+
     def Ruby18.q_value_decode(str)
       match = str.match(/\=\?(.+)?\?[Qq]\?(.+)?\?\=/m)
       if match
@@ -88,11 +84,11 @@ def Ruby18.q_value_decode(str)
       end
       str
     end
-    
+
     def Ruby18.param_decode(str, encoding)
       URI.unescape(str)
     end
-    
+
     def Ruby18.param_encode(str)
       encoding = $KCODE.to_s.downcase
       language = Configuration.instance.param_encode_language
diff --git a/lib/mail/version_specific/ruby_1_9.rb b/lib/mail/version_specific/ruby_1_9.rb
index 3c31c8819..f708b0768 100644
--- a/lib/mail/version_specific/ruby_1_9.rb
+++ b/lib/mail/version_specific/ruby_1_9.rb
@@ -1,14 +1,5 @@
 # encoding: utf-8
 
-unless ''.respond_to?(:mb_chars)
-  class String
-    # Compatability with ActiveSupport, which returns self in 1.9
-    def mb_chars
-      self
-    end
-  end
-end
-
 module Mail
   class Ruby19
 
diff --git a/spec/mail/fields/common/parameter_hash_spec.rb b/spec/mail/fields/common/parameter_hash_spec.rb
index b3d872516..ad8645c8e 100644
--- a/spec/mail/fields/common/parameter_hash_spec.rb
+++ b/spec/mail/fields/common/parameter_hash_spec.rb
@@ -5,8 +5,8 @@
   it "should return the values in the hash" do
     hash = Mail::ParameterHash.new
     hash.merge!({'value1' => 'one', 'value2' => 'two'})
-    hash.keys.should include(:value1)
-    hash.keys.should include(:value2)
+    hash.keys.should include("value1")
+    hash.keys.should include("value2")
     hash.values.should include('one')
     hash.values.should include('two')
   end
diff --git a/spec/mail/fields/content_disposition_field_spec.rb b/spec/mail/fields/content_disposition_field_spec.rb
index e4f9b249c..1ba9d19c7 100644
--- a/spec/mail/fields/content_disposition_field_spec.rb
+++ b/spec/mail/fields/content_disposition_field_spec.rb
@@ -53,12 +53,12 @@
       c.decoded.should == 'attachment'
     end
   end
-  
+
   describe "instance methods" do
     it "should give it's disposition type" do
       c = Mail::ContentDispositionField.new('Content-Disposition: attachment; filename=File')
       c.disposition_type.should == 'attachment'
-      c.parameters.should == {:filename => 'File'}
+      c.parameters.should == {"filename" => 'File'}
     end
 
     # see spec/fixtures/trec_2005_corpus/missing_content_disposition.eml
diff --git a/spec/mail/fields/content_type_field_spec.rb b/spec/mail/fields/content_type_field_spec.rb
index 2d526ce7d..8a2d70455 100644
--- a/spec/mail/fields/content_type_field_spec.rb
+++ b/spec/mail/fields/content_type_field_spec.rb
@@ -147,17 +147,17 @@
 
     it "should return a parameter as a hash" do
       c = Mail::ContentTypeField.new('text/plain; charset=US-ASCII')
-      c.parameters.should == {:charset => 'US-ASCII'}
+      c.parameters.should == {"charset" => 'US-ASCII'}
     end
 
     it "should return multiple parameters as a hash" do
       c = Mail::ContentTypeField.new('text/plain; charset=US-ASCII; format=flowed')
-      c.parameters.should == {:charset => 'US-ASCII', :format => 'flowed'}
+      c.parameters.should == {"charset" => 'US-ASCII', "format" => 'flowed'}
     end
 
     it "should return boundry parameters" do
       c = Mail::ContentTypeField.new('multipart/mixed; boundary=Apple-Mail-13-196941151')
-      c.parameters.should == {:boundary => 'Apple-Mail-13-196941151'}
+      c.parameters.should == {"boundary" => 'Apple-Mail-13-196941151'}
     end
 
     it "should be indifferent with the access" do
@@ -197,7 +197,7 @@
       c.content_type.should == 'application/octet-stream'
       c.main_type.should == 'application'
       c.sub_type.should == 'octet-stream'
-      c.parameters.should == {:'name*' => "iso-2022-jp'ja'01%20Quien%20Te%20Dij%8aat.%20Pitbull.mp3"}
+      c.parameters.should == {'name*' => "iso-2022-jp'ja'01%20Quien%20Te%20Dij%8aat.%20Pitbull.mp3"}
     end
 
     it "should handle 'application/pdf;'" do
@@ -215,7 +215,7 @@
       c.content_type.should == 'application/pdf'
       c.main_type.should == 'application'
       c.sub_type.should == 'pdf'
-      c.parameters.should == {:name => "broken.pdf"}
+      c.parameters.should == {"name" => "broken.pdf"}
     end
 
     it "should handle 'application/pkcs7-signature;'" do
@@ -233,7 +233,7 @@
       c.content_type.should == 'application/pkcs7-signature'
       c.main_type.should == 'application'
       c.sub_type.should == 'pkcs7-signature'
-      c.parameters.should == {:name => "smime.p7s"}
+      c.parameters.should == {"name" => "smime.p7s"}
     end
 
     it "should handle 'application/x-gzip; NAME=blah.gz'" do
@@ -242,7 +242,7 @@
       c.content_type.should == 'application/x-gzip'
       c.main_type.should == 'application'
       c.sub_type.should == 'x-gzip'
-      c.parameters.should == {:NAME => "blah.gz"}
+      c.parameters.should == {"NAME" => "blah.gz"}
     end
 
     it "should handle 'image/jpeg'" do
@@ -314,7 +314,7 @@
       c.content_type.should == 'multipart/alternative'
       c.main_type.should == 'multipart'
       c.sub_type.should == 'alternative'
-      c.parameters.should == {:boundary =>"----=_NextPart_000_0093_01C81419.EB75E850"}
+      c.parameters.should == {"boundary" =>"----=_NextPart_000_0093_01C81419.EB75E850"}
     end
 
     it "should handle 'multipart/alternative; boundary=----=_NextPart_000_0093_01C81419.EB75E850'" do
@@ -323,7 +323,7 @@
       c.content_type.should == 'multipart/alternative'
       c.main_type.should == 'multipart'
       c.sub_type.should == 'alternative'
-      c.parameters.should == {:boundary =>"----=_NextPart_000_0093_01C81419.EB75E850"}
+      c.parameters.should == {"boundary" =>"----=_NextPart_000_0093_01C81419.EB75E850"}
     end
 
     it "should handle 'Multipart/Alternative;boundary=MuLtIpArT_BoUnDaRy'" do
@@ -332,7 +332,7 @@
       c.content_type.should == 'multipart/alternative'
       c.main_type.should == 'multipart'
       c.sub_type.should == 'alternative'
-      c.parameters.should == {:boundary =>"MuLtIpArT_BoUnDaRy"}
+      c.parameters.should == {"boundary" =>"MuLtIpArT_BoUnDaRy"}
     end
 
     it "should handle 'Multipart/Alternative;boundary=MuLtIpArT_BoUnDaRy'" do
@@ -341,7 +341,7 @@
       c.content_type.should == 'multipart/alternative'
       c.main_type.should == 'multipart'
       c.sub_type.should == 'alternative'
-      c.parameters.should == {:boundary =>"MuLtIpArT_BoUnDaRy"}
+      c.parameters.should == {"boundary" =>"MuLtIpArT_BoUnDaRy"}
     end
 
     it "should handle 'multipart/mixed'" do
@@ -368,7 +368,7 @@
       c.content_type.should == 'multipart/mixed'
       c.main_type.should == 'multipart'
       c.sub_type.should == 'mixed'
-      c.parameters.should == {:boundary => "Apple-Mail-13-196941151"}
+      c.parameters.should == {"boundary" => "Apple-Mail-13-196941151"}
     end
 
     it "should handle 'multipart/mixed; boundary=mimepart_427e4cb4ca329_133ae40413c81ef'" do
@@ -377,7 +377,7 @@
       c.content_type.should == 'multipart/mixed'
       c.main_type.should == 'multipart'
       c.sub_type.should == 'mixed'
-      c.parameters.should == {:boundary => "mimepart_427e4cb4ca329_133ae40413c81ef"}
+      c.parameters.should == {"boundary" => "mimepart_427e4cb4ca329_133ae40413c81ef"}
     end
 
     it "should handle 'multipart/report; report-type=delivery-status;'" do
@@ -386,7 +386,7 @@
       c.content_type.should == 'multipart/report'
       c.main_type.should == 'multipart'
       c.sub_type.should == 'report'
-      c.parameters.should == {:"report-type" => "delivery-status"}
+      c.parameters.should == {"report-type" => "delivery-status"}
     end
 
     it "should handle 'multipart/signed;'" do
@@ -422,7 +422,7 @@
       c.content_type.should == 'text/html'
       c.main_type.should == 'text'
       c.sub_type.should == 'html'
-      c.parameters.should == {:charset => 'iso-8859-1'}
+      c.parameters.should == {"charset" => 'iso-8859-1'}
     end
 
     it "should handle 'TEXT/PLAIN; charset=ISO-8859-1;'" do
@@ -431,7 +431,7 @@
       c.content_type.should == 'text/plain'
       c.main_type.should == 'text'
       c.sub_type.should == 'plain'
-      c.parameters.should == {:charset => 'ISO-8859-1'}
+      c.parameters.should == {"charset" => 'ISO-8859-1'}
     end
 
     it "should handle 'text/plain'" do
@@ -458,7 +458,7 @@
       c.content_type.should == 'text/plain'
       c.main_type.should == 'text'
       c.sub_type.should == 'plain'
-      c.parameters.should == {:charset => 'ISO-8859-1'}
+      c.parameters.should == {"charset" => 'ISO-8859-1'}
     end
 
     it "should handle 'text/plain; charset=ISO-8859-1;'" do
@@ -467,7 +467,7 @@
       c.content_type.should == 'text/plain'
       c.main_type.should == 'text'
       c.sub_type.should == 'plain'
-      c.parameters.should == {:charset => 'ISO-8859-1', :format => 'flowed'}
+      c.parameters.should == {"charset" => 'ISO-8859-1', "format" => 'flowed'}
     end
 
     it "should handle 'text/plain; charset=us-ascii;'" do
@@ -476,7 +476,7 @@
       c.content_type.should == 'text/plain'
       c.main_type.should == 'text'
       c.sub_type.should == 'plain'
-      c.parameters.should == {:charset => 'us-ascii'}
+      c.parameters.should == {"charset" => 'us-ascii'}
     end
 
     it "should handle 'text/plain; charset=US-ASCII; format=flowed'" do
@@ -485,7 +485,7 @@
       c.content_type.should == 'text/plain'
       c.main_type.should == 'text'
       c.sub_type.should == 'plain'
-      c.parameters.should == {:charset => 'US-ASCII', :format => 'flowed'}
+      c.parameters.should == {"charset" => 'US-ASCII', "format" => 'flowed'}
     end
 
     it "should handle 'text/plain; charset=US-ASCII; format=flowed'" do
@@ -494,7 +494,7 @@
       c.content_type.should == 'text/plain'
       c.main_type.should == 'text'
       c.sub_type.should == 'plain'
-      c.parameters.should == {:charset => 'US-ASCII', :format => 'flowed'}
+      c.parameters.should == {"charset" => 'US-ASCII', "format" => 'flowed'}
     end
 
     it "should handle 'text/plain; charset=utf-8'" do
@@ -503,7 +503,7 @@
       c.content_type.should == 'text/plain'
       c.main_type.should == 'text'
       c.sub_type.should == 'plain'
-      c.parameters.should == {:charset => 'utf-8'}
+      c.parameters.should == {"charset" => 'utf-8'}
     end
 
     it "should handle 'text/plain; charset=utf-8'" do
@@ -512,7 +512,7 @@
       c.content_type.should == 'text/plain'
       c.main_type.should == 'text'
       c.sub_type.should == 'plain'
-      c.parameters.should == {:charset => 'X-UNKNOWN'}
+      c.parameters.should == {"charset" => 'X-UNKNOWN'}
     end
 
     it "should handle 'text/x-ruby-script;'" do
@@ -530,7 +530,7 @@
       c.content_type.should == 'text/x-ruby-script'
       c.main_type.should == 'text'
       c.sub_type.should == 'x-ruby-script'
-      c.parameters.should == {:name => 'hello.rb'}
+      c.parameters.should == {"name" => 'hello.rb'}
     end
 
     it "should handle 'multipart/mixed; boundary=\"=_NextPart_Lycos_15031600484464_ID\"" do
@@ -539,7 +539,7 @@
       c.content_type.should == 'multipart/mixed'
       c.main_type.should == 'multipart'
       c.sub_type.should == 'mixed'
-      c.parameters.should == {:boundary => '=_NextPart_Lycos_15031600484464_ID'}
+      c.parameters.should == {"boundary" => '=_NextPart_Lycos_15031600484464_ID'}
     end
 
     it "should handle 'multipart/alternative; boundary=----=_=NextPart_000_0093_01C81419.EB75E850" do
@@ -548,7 +548,7 @@
       c.content_type.should == 'multipart/alternative'
       c.main_type.should == 'multipart'
       c.sub_type.should == 'alternative'
-      c.parameters.should == {:boundary => '----=_=NextPart_000_0093_01C81419.EB75E850'}
+      c.parameters.should == {"boundary" => '----=_=NextPart_000_0093_01C81419.EB75E850'}
     end
 
     it "should handle 'multipart/alternative; boundary=\"----=_=NextPart_000_0093_01C81419.EB75E850\"" do
@@ -557,7 +557,7 @@
       c.content_type.should == 'multipart/alternative'
       c.main_type.should == 'multipart'
       c.sub_type.should == 'alternative'
-      c.parameters.should == {:boundary => '----=_=NextPart_000_0093_01C81419.EB75E850'}
+      c.parameters.should == {"boundary" => '----=_=NextPart_000_0093_01C81419.EB75E850'}
     end
 
     it "should handle 'multipart/related;boundary=1_4626B816_9F1690;Type=\"application/smil\";Start=\"<mms.smil.txt>\"'" do
@@ -566,7 +566,7 @@
       c.content_type.should == 'multipart/related'
       c.main_type.should == 'multipart'
       c.sub_type.should == 'related'
-      c.parameters.should == {:boundary => '1_4626B816_9F1690', :Type => 'application/smil', :Start => '<mms.smil.txt>'}
+      c.parameters.should == {"boundary" => '1_4626B816_9F1690', "Type" => 'application/smil', "Start" => '<mms.smil.txt>'}
     end
 
     it "should handle 'IMAGE/JPEG; name=\"IM 006.jpg\"'" do
@@ -575,7 +575,7 @@
       c.content_type.should == 'image/jpeg'
       c.main_type.should == 'image'
       c.sub_type.should == 'jpeg'
-      c.parameters.should == {:name => "IM 006.jpg"}
+      c.parameters.should == {"name" => "IM 006.jpg"}
     end
 
   end
@@ -623,7 +623,7 @@
         result = %Q{Content-Type: application/octet-stream;\r\n\sfilename*=sjis'jp'01%20Quien%20Te%20Dij%91at.%20Pitbull.mp3\r\n}
       end
       c.filename = string
-      c.parameters.should == {:filename => string}
+      c.parameters.should == {"filename" => string}
       c.encoded.should == result
       $KCODE = @original if RUBY_VERSION < '1.9'
     end
diff --git a/spec/mail/message_spec.rb b/spec/mail/message_spec.rb
index e482e6e1e..a0a811f7a 100644
--- a/spec/mail/message_spec.rb
+++ b/spec/mail/message_spec.rb
@@ -1065,19 +1065,19 @@ def basic_email
           mail = Mail.new
           mail.content_type = ["text", "plain", { :charset => 'US-ASCII' }]
           mail[:content_type].encoded.should == %Q[Content-Type: text/plain;\r\n\scharset=US-ASCII\r\n]
-          mail.content_type_parameters.should == {:charset => "US-ASCII"}
+          mail.content_type_parameters.should == {"charset" => "US-ASCII"}
         end
 
         it "should be able to set a content type with an array and hash with a non-usascii field" do
           mail = Mail.new
           mail.content_type = ["text", "plain", { :charset => 'UTF-8' }]
           mail[:content_type].encoded.should == %Q[Content-Type: text/plain;\r\n\scharset=UTF-8\r\n]
-          mail.content_type_parameters.should == {:charset => "UTF-8"}
+          mail.content_type_parameters.should == {"charset" => "UTF-8"}
         end
 
         it "should allow us to specify a content type in a block" do
           mail = Mail.new { content_type ["text", "plain", { "charset" => "UTF-8" }] }
-          mail.content_type_parameters.should == {:charset => "UTF-8"}
+          mail.content_type_parameters.should == {"charset" => "UTF-8"}
         end
 
       end
@@ -1494,6 +1494,7 @@ def self.delivering_email(mail)
 
     it "shouldn't die with an invalid Content-Type header" do
       mail = Mail.new('Content-Type: invalid/invalid; charset="iso-8859-1"')
+      mail.attachment?
       doing { mail.attachment? }.should_not raise_error
     end
 
diff --git a/spec/mail/mime_messages_spec.rb b/spec/mail/mime_messages_spec.rb
index 0b681ed74..b20d9b720 100644
--- a/spec/mail/mime_messages_spec.rb
+++ b/spec/mail/mime_messages_spec.rb
@@ -53,7 +53,7 @@
 
       it "should return the content-type parameters" do
         mail = Mail.new("Content-Type: text/plain; charset=US-ASCII; format=flowed")
-        mail.content_type_parameters.should == {:charset => 'US-ASCII', :format => 'flowed'}
+        mail.content_type_parameters.should == {"charset" => 'US-ASCII', "format" => 'flowed'}
       end
 
       it "should recognize a multipart email" do
@@ -455,7 +455,7 @@
         m.parts.first[:content_type].content_type.should == 'image/png'
         m.parts.last[:content_type].content_type.should == 'text/plain'
       end
-      
+
       it "should allow you to add a body as text part if you have added a file and not truncate after newlines - issue 208" do
         m = Mail.new do
           from    'mikel@from.lindsaar.net'