diff --git a/lib/twitter_cldr/core_ext/string.rb b/lib/twitter_cldr/core_ext/string.rb
index 10f1020c6..9a786fd91 100644
--- a/lib/twitter_cldr/core_ext/string.rb
+++ b/lib/twitter_cldr/core_ext/string.rb
@@ -32,7 +32,7 @@ def normalize
     end
 
     def code_points
-      TwitterCldr::Normalizers::Base.string_to_code_points(@base_obj)
+      TwitterCldr::Utils::CodePoints.from_string(@base_obj)
     end
 
     def to_s
diff --git a/lib/twitter_cldr/normalizers.rb b/lib/twitter_cldr/normalizers.rb
index 2ef5340f3..7d2efb5be 100644
--- a/lib/twitter_cldr/normalizers.rb
+++ b/lib/twitter_cldr/normalizers.rb
@@ -5,7 +5,7 @@
 
 module TwitterCldr
   module Normalizers
-    autoload :Base, 'twitter_cldr/normalizers/base'
-    autoload :NFD,  'twitter_cldr/normalizers/canonical/nfd'
+    autoload :NFD,  'twitter_cldr/normalizers/nfd'
+    autoload :NFKD, 'twitter_cldr/normalizers/nfkd'
   end
 end
\ No newline at end of file
diff --git a/lib/twitter_cldr/normalizers/base.rb b/lib/twitter_cldr/normalizers/base.rb
deleted file mode 100644
index c3001b3fb..000000000
--- a/lib/twitter_cldr/normalizers/base.rb
+++ /dev/null
@@ -1,37 +0,0 @@
-# encoding: UTF-8
-
-# Copyright 2012 Twitter, Inc
-# http://www.apache.org/licenses/LICENSE-2.0
-
-module TwitterCldr
-  module Normalizers
-    class Base
-      class << self
-        def code_point_to_char(code_point)
-          [code_point.upcase.hex].pack('U*')
-        end
-
-        def char_to_code_point(char)
-          code_point = char.unpack('U*').first.to_s(16).upcase
-          code_point.rjust(4, '0') #Pad to at least 4 digits
-        end
-
-        def chars_to_code_points(chars)
-          chars.map { |char| char_to_code_point(char) }
-        end
-
-        def code_points_to_chars(code_points)
-          code_points.map { |code_point| code_point_to_char(code_point) }
-        end
-
-        def string_to_code_points(str)
-          chars_to_code_points(str.chars.to_a)
-        end
-
-        def code_points_to_string(code_points)
-          code_points.inject("") { |str, code_point| str << code_point_to_char(code_point); str }
-        end
-      end
-    end
-  end
-end
\ No newline at end of file
diff --git a/lib/twitter_cldr/normalizers/canonical/nfd.rb b/lib/twitter_cldr/normalizers/canonical/nfd.rb
deleted file mode 100644
index 715da37ee..000000000
--- a/lib/twitter_cldr/normalizers/canonical/nfd.rb
+++ /dev/null
@@ -1,133 +0,0 @@
-# encoding: UTF-8
-
-# Copyright 2012 Twitter, Inc
-# http://www.apache.org/licenses/LICENSE-2.0
-
-module TwitterCldr
-  module Normalizers
-    class NFD < Base
-
-      HANGUL_CONSTANTS = {
-          :SBase  => 0xAC00,
-          :LBase  => 0x1100,
-          :VBase  => 0x1161,
-          :TBase  => 0x11A7,
-          :LCount => 19,
-          :VCount => 21,
-          :TCount => 28,
-          :NCount => 588,  # VCount * TCount
-          :Scount => 11172 # LCount * NCount
-      }
-
-      class << self
-
-        def normalize(string)
-          # Convert string to code points
-          code_points = string.split('').map { |char| char_to_code_point(char) }
-
-          # Normalize code points
-          normalized_code_points = normalize_code_points(code_points)
-
-          # Convert normalized code points back to string
-          normalized_code_points.map { |code_point| code_point_to_char(code_point) }.join
-        end
-
-        def normalize_code_points(code_points)
-          code_points = code_points.map { |code_point| decompose code_point }.flatten
-          reorder(code_points)
-        end
-
-        # Recursively replace the given code point with the values in its Decomposition_Mapping property.
-        def decompose(code_point)
-          unicode_data = TwitterCldr::Shared::UnicodeData.for_code_point(code_point)
-          return code_point unless unicode_data
-
-          decomposition_mapping = unicode_data.decomposition.split
-
-          if unicode_data.name.include?('Hangul')
-            decompose_hangul(code_point)
-          # Return the code point if compatibility mapping or if no mapping exists
-          elsif decomposition_mapping.first =~ /<.*>/ || decomposition_mapping.empty?
-            code_point
-          else
-            decomposition_mapping.map do |decomposition_code_point|
-              decompose(decomposition_code_point)
-            end.flatten
-          end
-        end
-
-        private
-
-        # Special decomposition for Hangul syllables.
-        # Documented in Section 3.12 at http://www.unicode.org/versions/Unicode6.1.0/ch03.pdf
-        def decompose_hangul(code_point)
-          s_index = code_point.hex - HANGUL_CONSTANTS[:SBase]
-
-          l_index = s_index / HANGUL_CONSTANTS[:NCount]
-          v_index = (s_index % HANGUL_CONSTANTS[:NCount]) / HANGUL_CONSTANTS[:TCount]
-          t_index = s_index % HANGUL_CONSTANTS[:TCount]
-
-          result = []
-
-          result << (HANGUL_CONSTANTS[:LBase] + l_index).to_s(16).upcase
-          result << (HANGUL_CONSTANTS[:VBase] + v_index).to_s(16).upcase
-          result << (HANGUL_CONSTANTS[:TBase] + t_index).to_s(16).upcase if t_index > 0
-
-          result
-        end
-
-        # Swap any two adjacent code points A & B if ccc(A) > ccc(B) > 0.
-        def reorder(code_points)
-          code_points_with_cc = code_points.map { |cp| [cp, combining_class_for(cp)] }
-
-          result = []
-          accum  = []
-
-          code_points_with_cc.each do |cp_with_cc|
-            if cp_with_cc[1] == 0
-              unless accum.empty?
-                result.concat(stable_sort(accum))
-                accum = []
-              end
-              result << cp_with_cc
-            else
-              accum << cp_with_cc
-            end
-          end
-
-          result.concat(stable_sort(accum)) unless accum.empty?
-
-          result.map { |cp_with_cc| cp_with_cc[0] }
-        end
-
-        def stable_sort(code_points_with_cc)
-          n = code_points_with_cc.size - 2
-
-          code_points_with_cc.size.times do
-            swapped = false
-
-            (0..n).each do |j|
-              if code_points_with_cc[j][1] > code_points_with_cc[j + 1][1]
-                code_points_with_cc[j], code_points_with_cc[j + 1] = code_points_with_cc[j + 1], code_points_with_cc[j]
-                swapped = true
-              end
-            end
-
-            break unless swapped
-            n -= 1
-          end
-
-          code_points_with_cc
-        end
-
-        def combining_class_for(code_point)
-          TwitterCldr::Shared::UnicodeData.for_code_point(code_point).combining_class.to_i
-        rescue NoMethodError
-          0
-        end
-
-      end
-
-    end
-  end
-end
\ No newline at end of file
diff --git a/lib/twitter_cldr/normalizers/nfd.rb b/lib/twitter_cldr/normalizers/nfd.rb
new file mode 100644
index 000000000..94ca78e08
--- /dev/null
+++ b/lib/twitter_cldr/normalizers/nfd.rb
@@ -0,0 +1,30 @@
+# encoding: UTF-8
+
+# Copyright 2012 Twitter, Inc
+# http://www.apache.org/licenses/LICENSE-2.0
+
+module TwitterCldr
+  module Normalizers
+
+    # Implements normalization of a Unicode string to Normalization Form D (NFD).
+    # This normalization includes only Canonical Decomposition.
+    #
+    class NFD < NFKD
+
+      class << self
+
+        protected
+
+        # Returns code point's Decomposition Mapping based on its Unicode data. Returns nil if the mapping has
+        # compatibility type (it contains compatibility formatting tag).
+        #
+        def decomposition_mapping(unicode_data)
+          mapping = parse_decomposition_mapping(unicode_data)
+          mapping unless compatibility_decomposition?(mapping)
+        end
+
+      end
+
+    end
+  end
+end
\ No newline at end of file
diff --git a/lib/twitter_cldr/normalizers/nfkd.rb b/lib/twitter_cldr/normalizers/nfkd.rb
new file mode 100644
index 000000000..399128595
--- /dev/null
+++ b/lib/twitter_cldr/normalizers/nfkd.rb
@@ -0,0 +1,163 @@
+# encoding: UTF-8
+
+# Copyright 2012 Twitter, Inc
+# http://www.apache.org/licenses/LICENSE-2.0
+
+module TwitterCldr
+  # Normalizers module includes algorithm for Unicode normalization. Basic information on this topic can be found in the
+  # Unicode Standard Annex #15 "Unicode Normalization Forms" at http://www.unicode.org/reports/tr15/. More detailed
+  # description is given in the section "3.11 Normalization Forms" of the Unicode Standard core specification. The
+  # latest version at the moment (for Unicode 6.1) is available at http://www.unicode.org/versions/Unicode6.1.0/ch03.pdf.
+  #
+  module Normalizers
+    class NFKD
+
+      class << self
+
+        def normalize(string)
+          code_points = TwitterCldr::Utils::CodePoints.from_string(string)
+          normalized_code_points = normalize_code_points(code_points)
+          TwitterCldr::Utils::CodePoints.to_string(normalized_code_points)
+        end
+
+        def normalize_code_points(code_points)
+          canonical_ordering(decomposition(code_points))
+        end
+
+        protected
+
+        def decomposition(code_points)
+          code_points.map{ |code_point| decompose_recursively(code_point) }.flatten
+        end
+
+        # Recursively decomposes a given code point with the values in its Decomposition Mapping property.
+        #
+        def decompose_recursively(code_point)
+          unicode_data = TwitterCldr::Shared::UnicodeData.for_code_point(code_point)
+          return code_point unless unicode_data
+
+          if unicode_data.name.include?('Hangul')
+            decompose_hangul(code_point)
+          else
+            decompose_regular(code_point, decomposition_mapping(unicode_data))
+          end
+        end
+
+        # Decomposes regular (non-Hangul) code point.
+        #
+        def decompose_regular(code_point, mapping)
+          if mapping && !mapping.empty?
+            mapping.map{ |cp| decompose_recursively(cp) }.flatten
+          else
+            code_point
+          end
+        end
+
+        # Returns code point's Decomposition Mapping based on its Unicode data.
+        #
+        def decomposition_mapping(unicode_data)
+          mapping = parse_decomposition_mapping(unicode_data)
+          mapping.shift if compatibility_decomposition?(mapping) # remove compatibility formatting tag
+          mapping
+        end
+
+        def compatibility_decomposition?(mapping)
+          !!(COMPATIBILITY_FORMATTING_TAG_REGEXP =~ mapping.first)
+        end
+
+        def parse_decomposition_mapping(unicode_data)
+          unicode_data.decomposition.split
+        end
+
+        # Special decomposition for Hangul syllables. Documented in Section 3.12 at
+        # http://www.unicode.org/versions/Unicode6.1.0/ch03.pdf
+        #
+        def decompose_hangul(code_point)
+          s_index = code_point.hex - HANGUL_DECOMPOSITION_CONSTANTS[:SBase]
+
+          l_index = s_index / HANGUL_DECOMPOSITION_CONSTANTS[:NCount]
+          v_index = (s_index % HANGUL_DECOMPOSITION_CONSTANTS[:NCount]) / HANGUL_DECOMPOSITION_CONSTANTS[:TCount]
+          t_index = s_index % HANGUL_DECOMPOSITION_CONSTANTS[:TCount]
+
+          result = []
+
+          result << (HANGUL_DECOMPOSITION_CONSTANTS[:LBase] + l_index).to_s(16).upcase
+          result << (HANGUL_DECOMPOSITION_CONSTANTS[:VBase] + v_index).to_s(16).upcase
+          result << (HANGUL_DECOMPOSITION_CONSTANTS[:TBase] + t_index).to_s(16).upcase if t_index > 0
+
+          result
+        end
+
+        # Performs the Canonical Ordering Algorithm by stable sorting of every subsequence of combining code points
+        # (code points that have combining class greater than zero).
+        #
+        def canonical_ordering(code_points)
+          code_points_with_cc = code_points.map { |cp| [cp, combining_class_for(cp)] }
+
+          result = []
+          accum  = []
+
+          code_points_with_cc.each do |cp_with_cc|
+            if cp_with_cc[1] == 0
+              unless accum.empty?
+                result.concat(stable_sort(accum))
+                accum = []
+              end
+              result << cp_with_cc
+            else
+              accum << cp_with_cc
+            end
+          end
+
+          result.concat(stable_sort(accum)) unless accum.empty?
+
+          result.map { |cp_with_cc| cp_with_cc[0] }
+        end
+
+        # Performs stable sorting of a sequence of [code_point, combining_class] pairs.
+        #
+        def stable_sort(code_points_with_cc)
+          n = code_points_with_cc.size - 2
+
+          code_points_with_cc.size.times do
+            swapped = false
+
+            (0..n).each do |j|
+              if code_points_with_cc[j][1] > code_points_with_cc[j + 1][1]
+                code_points_with_cc[j], code_points_with_cc[j + 1] = code_points_with_cc[j + 1], code_points_with_cc[j]
+                swapped = true
+              end
+            end
+
+            break unless swapped
+            n -= 1
+          end
+
+          code_points_with_cc
+        end
+
+        def combining_class_for(code_point)
+          TwitterCldr::Shared::UnicodeData.for_code_point(code_point).combining_class.to_i
+        rescue NoMethodError
+          0
+        end
+
+      end
+
+      COMPATIBILITY_FORMATTING_TAG_REGEXP = /^<.*>$/
+
+      HANGUL_DECOMPOSITION_CONSTANTS = {
+          :SBase  => 0xAC00,
+          :LBase  => 0x1100,
+          :VBase  => 0x1161,
+          :TBase  => 0x11A7,
+          :LCount => 19,
+          :VCount => 21,
+          :TCount => 28,
+          :NCount => 588,  # VCount * TCount
+          :Scount => 11172 # LCount * NCount
+      }
+
+    end
+  end
+end
diff --git a/lib/twitter_cldr/utils.rb b/lib/twitter_cldr/utils.rb
index f6115d6c8..f38210d59 100644
--- a/lib/twitter_cldr/utils.rb
+++ b/lib/twitter_cldr/utils.rb
@@ -3,11 +3,11 @@
 # Copyright 2012 Twitter, Inc
 # http://www.apache.org/licenses/LICENSE-2.0
 
-require 'twitter_cldr/utils/interpolation'
-
 module TwitterCldr
   module Utils
 
+    autoload :CodePoints, 'twitter_cldr/utils/code_points'
+
     class << self
 
       # adapted from: http://snippets.dzone.com/posts/show/11121 (first comment)
@@ -25,4 +25,6 @@ def deep_symbolize_keys(arg)
     end
 
   end
-end
\ No newline at end of file
+end
+
+require 'twitter_cldr/utils/interpolation'
\ No newline at end of file
diff --git a/lib/twitter_cldr/utils/code_points.rb b/lib/twitter_cldr/utils/code_points.rb
new file mode 100644
index 000000000..185498f2e
--- /dev/null
+++ b/lib/twitter_cldr/utils/code_points.rb
@@ -0,0 +1,40 @@
+# encoding: UTF-8
+
+# Copyright 2012 Twitter, Inc
+# http://www.apache.org/licenses/LICENSE-2.0
+
+module TwitterCldr
+  module Utils
+    module CodePoints
+
+      class << self
+
+        def to_char(code_point)
+          [code_point.upcase.hex].pack('U*')
+        end
+
+        def from_char(char)
+          char.unpack('U*').first.to_s(16).upcase.rjust(4, '0')
+        end
+
+        def from_chars(chars)
+          chars.map { |char| from_char(char) }
+        end
+
+        def to_chars(code_points)
+          code_points.map { |code_point| to_char(code_point) }
+        end
+
+        def from_string(str)
+          from_chars(str.chars.to_a)
+        end
+
+        def to_string(code_points)
+          code_points.map{ |code_point| to_char(code_point) }.join
+        end
+
+      end
+
+    end
+  end
+end
\ No newline at end of file
diff --git a/spec/normalizers/base_spec.rb b/spec/normalizers/base_spec.rb
deleted file mode 100644
index a38e03add..000000000
--- a/spec/normalizers/base_spec.rb
+++ /dev/null
@@ -1,60 +0,0 @@
-# encoding: UTF-8
-
-# Copyright 2012 Twitter, Inc
-# http://www.apache.org/licenses/LICENSE-2.0
-
-require 'spec_helper'
-
-describe TwitterCldr::Normalizers::Base do
-  describe "#code_point_to_char" do
-    it "converts unicode code points to the actual character" do
-      TwitterCldr::Normalizers::Base.code_point_to_char("221E").should == "∞"
-    end
-  end
-
-  describe "#char_to_code_point" do
-    it "converts a character to a unicode code point" do
-      TwitterCldr::Normalizers::Base.char_to_code_point("∞").should == "221E"
-    end
-  end
-
-  describe "#chars_to_code_points" do
-    it "should handle an empty array" do
-      TwitterCldr::Normalizers::Base.chars_to_code_points([]).should == []
-    end
-
-    it "converts a char array to an array of unicode code points" do
-      TwitterCldr::Normalizers::Base.chars_to_code_points(["e", "s", "p"]).should == ["0065", "0073", "0070"]
-    end
-  end
-
-  describe "#code_points_to_chars" do
-    it "should handle an empty array" do
-      TwitterCldr::Normalizers::Base.code_points_to_chars([]).should == []
-    end
-
-    it "converts an array of unicode code points to an array of chars" do
-      TwitterCldr::Normalizers::Base.code_points_to_chars(["0065", "0073", "0070"]).should == ["e", "s", "p"]
-    end
-  end
-
-  describe "#string_to_code_points" do
-    it "should handle an empty string" do
-      TwitterCldr::Normalizers::Base.string_to_code_points("").should == []
-    end
-
-    it "converts a string into an array of unicode code points" do
-      TwitterCldr::Normalizers::Base.string_to_code_points("español").should == ["0065", "0073", "0070", "0061", "00F1", "006F", "006C"]
-    end
-  end
-
-  describe "#code_points_to_string" do
-    it "should handle an empty array" do
-      TwitterCldr::Normalizers::Base.code_points_to_string([]).should == ""
-    end
-
-    it "converts an array of unicode code points to a string" do
-      TwitterCldr::Normalizers::Base.code_points_to_string(["0065", "0073", "0070", "0061", "00F1", "006F", "006C"]).should == "español"
-    end
-  end
-end
\ No newline at end of file
diff --git a/spec/normalizers/canonical/nfd_spec.rb b/spec/normalizers/canonical/nfd_spec.rb
deleted file mode 100644
index 81f38e31b..000000000
--- a/spec/normalizers/canonical/nfd_spec.rb
+++ /dev/null
@@ -1,79 +0,0 @@
-# encoding: UTF-8
-
-# Copyright 2012 Twitter, Inc
-# http://www.apache.org/licenses/LICENSE-2.0
-
-require 'spec_helper'
-
-require 'open-uri'
-
-include TwitterCldr::Normalizers
-
-describe NFD do
-
-  NORMALIZERS_SPEC_PATH = File.dirname(File.dirname(__FILE__))
-
-  NORMALIZATION_TEST_URL = 'http://unicode.org/Public/UNIDATA/NormalizationTest.txt'
-
-  describe "#normalize" do
-    NFD.normalize("庠摪饢鼢豦樄澸脧鱵礩翜艰").should == "庠摪饢鼢豦樄澸脧鱵礩翜艰"
-    NFD.normalize("䷙䷿").should == "䷙䷿"
-    NFD.normalize("ᎿᎲᎪᏨᎨᏪᎧᎵᏥ").should == "ᎿᎲᎪᏨᎨᏪᎧᎵᏥ"
-    NFD.normalize("ᆙᅓᆼᄋᇶ").should == "ᆙᅓᆼᄋᇶ"
-    NFD.normalize("…‾⁋ ⁒⁯‒′‾⁖").should == "…‾⁋ ⁒⁯‒′‾⁖"
-    NFD.normalize("ⶾⷕⶱⷀ").should == "ⶾⷕⶱⷀ"
-  end
-
-  describe "#decompose" do
-    it "does not decompose a character with no decomposition mapping" do
-      code_points = %w[0EB8 041F 0066 1F52C A2D6]
-      code_points.each do |code_point|
-        NFD.decompose(code_point).should == code_point
-      end
-    end
-
-    it "does not decompose a character with compatibility decomposition mapping" do
-      code_points = %w[A770 FB02 FC35 FD20 00BC]
-      code_points.each do |code_point|
-        NFD.decompose(code_point).should == code_point
-      end
-    end
-  end
-
-  describe "#normalize_code_points" do
-    it "passes all the tests in NormalizersTestShort.txt" do
-      open(File.join(NORMALIZERS_SPEC_PATH, 'NormalizationTestShort.txt'), 'r:UTF-8') do |file|
-        run_normalization_test(file)
-      end
-    end
-
-    it "passes all the tests in NormalizersTest.txt", :slow => true do
-      file_path = File.join(NORMALIZERS_SPEC_PATH, 'NormalizationTest.txt')
-
-      unless File.file?(file_path)
-        print '    Downloading NormalizationTest.txt ... '
-        open(file_path, 'w') { |file| file.write(open(NORMALIZATION_TEST_URL).read) }
-        puts 'done.'
-      end
-
-      open(file_path, 'r:UTF-8') do |file|
-        run_normalization_test(file)
-      end
-    end
-  end
-
-  def run_normalization_test(file)
-    file.each do |line|
-      next if line[0,1] =~ /(@|#)/ || line.empty?
-
-      c1, c2, c3, c4, c5 = line.split(';')[0...5].map { |cps| cps.split }
-
-      NFD.normalize_code_points(c1).should == c3
-      NFD.normalize_code_points(c2).should == c3
-      NFD.normalize_code_points(c3).should == c3
-      NFD.normalize_code_points(c4).should == c5
-      NFD.normalize_code_points(c5).should == c5
-    end
-  end
-
-end
\ No newline at end of file
diff --git a/spec/normalizers/nfd_spec.rb b/spec/normalizers/nfd_spec.rb
new file mode 100644
index 000000000..3d00a9c19
--- /dev/null
+++ b/spec/normalizers/nfd_spec.rb
@@ -0,0 +1,21 @@
+# encoding: UTF-8
+
+# Copyright 2012 Twitter, Inc
+# http://www.apache.org/licenses/LICENSE-2.0
+
+require 'spec_helper'
+
+include TwitterCldr::Normalizers
+
+describe NFD do
+
+  describe "#normalize" do
+    NFD.normalize("庠摪饢鼢豦樄澸脧鱵礩翜艰").should == "庠摪饢鼢豦樄澸脧鱵礩翜艰"
+    NFD.normalize("䷙䷿").should == "䷙䷿"
+    NFD.normalize("ᎿᎲᎪᏨᎨᏪᎧᎵᏥ").should == "ᎿᎲᎪᏨᎨᏪᎧᎵᏥ"
+    NFD.normalize("ᆙᅓᆼᄋᇶ").should == "ᆙᅓᆼᄋᇶ"
+    NFD.normalize("…‾⁋ ⁒⁯‒′‾⁖").should == "…‾⁋ ⁒⁯‒′‾⁖"
+    NFD.normalize("ⶾⷕⶱⷀ").should == "ⶾⷕⶱⷀ"
+  end
+
+end
\ No newline at end of file
diff --git a/spec/normalizers/normalization_spec.rb b/spec/normalizers/normalization_spec.rb
new file mode 100644
index 000000000..c605304dd
--- /dev/null
+++ b/spec/normalizers/normalization_spec.rb
@@ -0,0 +1,96 @@
+# encoding: UTF-8
+
+# Copyright 2012 Twitter, Inc
+# http://www.apache.org/licenses/LICENSE-2.0
+
+require 'spec_helper'
+
+require 'open-uri'
+
+include TwitterCldr::Normalizers
+
+describe 'Unicode Normalization Algorithms' do
+
+  NORMALIZERS_SPEC_PATH = File.dirname(__FILE__)
+  SHORT_TEST_PATH       = File.join(NORMALIZERS_SPEC_PATH, 'NormalizationTestShort.txt')
+  FULL_TEST_PATH        = File.join(NORMALIZERS_SPEC_PATH, 'NormalizationTest.txt')
+
+  NORMALIZATION_TEST_URL = 'http://unicode.org/Public/UNIDATA/NormalizationTest.txt'
+
+  shared_examples_for 'a normalization algorithm' do
+    it 'passes all the tests in NormalizersTestShort.txt' do
+      run_normalization_test(described_class, invariants, SHORT_TEST_PATH)
+    end
+
+    it 'passes all the tests in NormalizersTest.txt', :slow => true do
+      prepare_full_test
+      run_normalization_test(described_class, invariants, FULL_TEST_PATH)
+    end
+  end
+
+  describe NFD do
+    let(:invariants) { { 3 => [1, 2, 3], 5 => [4, 5] } }
+    it_behaves_like 'a normalization algorithm'
+  end
+
+  describe NFKD do
+    let(:invariants) { { 5 => [1, 2, 3, 4, 5] } }
+    it_behaves_like 'a normalization algorithm'
+  end
+
+  # Runs standard Unicode normalization tests from `file_path` for a given `normalizer`. Expected invariants are
+  # specified via `invariants` hash.
+  #
+  # E.g., if `invariants` is { 2 => [1, 2, 3], 4 => [4, 5] } than the following invariants are expected to be true:
+  #
+  #   c2 == normalized(c1) == normalized(c2) == normalized(c3)
+  #   c4 == normalized(c4) == normalized(c5)
+  #
+  # where (c1, c2,...) are columns of the normalization test separated by semicolons and normalized() is the
+  # normalization function. Note, how expectation and tests columns indexes match the numbers in the `invariants` hash.
+  #
+  def run_normalization_test(normalizer, invariants, file_path)
+    open(file_path, 'r:UTF-8') do |file|
+      file.each do |line|
+        next if line.empty? || line =~ /^(@|#)/
+
+        data = line.split(';')[0...5].map { |cps| cps.split }
+
+        invariants.each do |expected_index, tests|
+          expected = data[expected_index - 1]
+
+          tests.each do |test_index|
+            test = data[test_index - 1]
+
+            normalized = normalizer.normalize_code_points(test)
+
+            message = normalization_error_message(line, test, expected, normalized, test_index, expected_index)
+            normalized.should(eq(expected), message)
+          end
+        end
+      end
+    end
+  end
+
+  # Generates helpful error message for normalization test failure.
+  #
+  def normalization_error_message(line, test, expected, normalized, test_index, expected_index)
+    <<-END
+Test:       "#{line.strip}"
+Invariant:  normalized(c#{test_index}) == c#{expected_index}
+Expected:   normalized(#{test.inspect}) == #{expected.inspect}
+Got:        #{normalized.inspect}
+    END
+  end
+
+  # Downloads full Unicode normalization tests suit if necessary.
+  #
+  def prepare_full_test
+    return if File.file?(FULL_TEST_PATH)
+
+    print '    Downloading NormalizationTest.txt ... '
+    open(FULL_TEST_PATH, 'w') { |file| file.write(open(NORMALIZATION_TEST_URL).read) }
+    puts 'done.'
+  end
+
+end
\ No newline at end of file
diff --git a/spec/utils/code_point_spec.rb b/spec/utils/code_point_spec.rb
new file mode 100644
index 000000000..b4f63d087
--- /dev/null
+++ b/spec/utils/code_point_spec.rb
@@ -0,0 +1,62 @@
+# encoding: UTF-8
+
+# Copyright 2012 Twitter, Inc
+# http://www.apache.org/licenses/LICENSE-2.0
+
+require 'spec_helper'
+
+describe TwitterCldr::Utils::CodePoints do
+
+  describe '#to_char' do
+    it 'converts unicode code points to the actual character' do
+      TwitterCldr::Utils::CodePoints.to_char('221E').should == '∞'
+    end
+  end
+
+  describe '#from_char' do
+    it 'converts a character to a unicode code point' do
+      TwitterCldr::Utils::CodePoints.from_char('∞').should == '221E'
+    end
+  end
+
+  describe '#to_chars' do
+    it 'should handle an empty array' do
+      TwitterCldr::Utils::CodePoints.to_chars([]).should == []
+    end
+
+    it 'converts an array of unicode code points to an array of chars' do
+      TwitterCldr::Utils::CodePoints.to_chars(%w[0065 0073 0070]).should == %w[e s p]
+    end
+  end
+
+  describe '#from_chars' do
+    it 'should handle an empty array' do
+      TwitterCldr::Utils::CodePoints.from_chars([]).should == []
+    end
+
+    it 'converts an array of chars to an array of unicode code points' do
+      TwitterCldr::Utils::CodePoints.from_chars(%w[e s p]).should == %w[0065 0073 0070]
+    end
+  end
+
+  describe '#to_string' do
+    it 'should handle an empty array' do
+      TwitterCldr::Utils::CodePoints.to_string([]).should == ''
+    end
+
+    it 'converts an array of unicode code points to a string' do
+      TwitterCldr::Utils::CodePoints.to_string(%w[0065 0073 0070 0061 00F1 006F 006C]).should == 'español'
+    end
+  end
+
+  describe '#from_string' do
+    it 'should handle an empty string' do
+      TwitterCldr::Utils::CodePoints.from_string('').should == []
+    end
+
+    it 'converts a string into an array of unicode code points' do
+      TwitterCldr::Utils::CodePoints.from_string('español').should == %w[0065 0073 0070 0061 00F1 006F 006C]
+    end
+  end
+
+end
\ No newline at end of file