twingly · jage · Feb 21, 2014 · Feb 20, 2014 · Feb 20, 2014 · Feb 20, 2014
diff --git a/.travis.yml b/.travis.yml
@@ -0,0 +1,14 @@
+language: ruby
+
+rvm:
+  - 2.0.0
+
+cache: bundler
+
+notifications:
+  email: false
+  hipchat:
+    rooms: 0715dd54b78b69f7dc310969a35036@208408
+    on_success: never
+    on_failure: change
+    template: '%{repository}#%{build_number} (%{branch} - %{commit} : %{author}): %{message}'
diff --git a/Gemfile b/Gemfile
@@ -0,0 +1,3 @@
+source 'https://rubygems.org/'
+
+gemspec
diff --git a/Gemfile.lock b/Gemfile.lock
@@ -0,0 +1,51 @@
+PATH
+  remote: .
+  specs:
+    twingly-url-normalizer (0.0.1)
+      domainatrix
+      postrank-uri
+
+GEM
+  remote: https://rubygems.org/
+  specs:
+    activesupport (4.0.3)
+      i18n (~> 0.6, >= 0.6.4)
+      minitest (~> 4.2)
+      multi_json (~> 1.3)
+      thread_safe (~> 0.1)
+      tzinfo (~> 0.3.37)
+    addressable (2.3.5)
+    ansi (1.4.3)
+    atomic (1.1.14)
+    domainatrix (0.0.11)
+      addressable
+    i18n (0.6.9)
+    minitest (4.7.5)
+    multi_json (1.8.4)
+    nokogiri (1.5.11)
+    postrank-uri (1.0.17)
+      addressable (~> 2.3.0)
+      nokogiri (~> 1.5.5)
+      public_suffix (~> 1.1.3)
+    public_suffix (1.1.3)
+    rake (10.1.1)
+    shoulda (3.5.0)
+      shoulda-context (~> 1.0, >= 1.0.1)
+      shoulda-matchers (>= 1.4.1, < 3.0)
+    shoulda-context (1.1.6)
+    shoulda-matchers (2.5.0)
+      activesupport (>= 3.0.0)
+    thread_safe (0.1.3)
+      atomic
+    turn (0.9.6)
+      ansi
+    tzinfo (0.3.38)
+
+PLATFORMS
+  ruby
+
+DEPENDENCIES
+  rake
+  shoulda
+  turn
+  twingly-url-normalizer!
diff --git a/README.md b/README.md
@@ -1,5 +1,7 @@
 # twingly-url-normalizer
 
+[![Build Status](https://magnum.travis-ci.com/twingly/twingly-url-normalizer.png?token=ADz8fWxRD3uP4KZPPZQS&branch=master)](https://magnum.travis-ci.com/twingly/twingly-url-normalizer)
+
 Ruby gem for URL normalization
 
 ## Example
@@ -10,3 +12,9 @@ Ruby gem for URL normalization
 [6] pry(main)> Twingly::URL::Normalizer.normalize('duh.se')
 => ["http://www.duh.se/"]
 ```
+
+## Tests
+
+Run tests with
+
+    bundle exec rake
diff --git a/Rakefile b/Rakefile
@@ -0,0 +1,19 @@
+require 'bundler/setup'
+
+task default: 'test:unit'
+task test:    'test:unit'
+
+require 'rake/testtask'
+namespace :test do
+  Rake::TestTask.new(:unit) do |test|
+    test.pattern = "test/unit/*_test.rb"
+    test.libs << 'lib'
+    test.libs << 'test'
+  end
+
+  Rake::TestTask.new(:profile) do |test|
+    test.pattern = "test/profile/*_test.rb"
+    test.libs << 'lib'
+    test.libs << 'test'
+  end
+end
diff --git a/lib/twingly-url-normalizer.rb b/lib/twingly-url-normalizer.rb
@@ -2,21 +2,26 @@
 require 'domainatrix'
 require 'uri'
 
-# TODO
-# * Handle blogspot.se -> blogspot.com
-
 module Twingly
   module URL
     class Normalizer
-      def self.normalize(potential_url)
-        PostRank::URI.extract(potential_url).map do |url|
-          subdomain = Domainatrix.parse(url).subdomain
-          uri = URI.parse(url)
-          if subdomain.empty?
-            uri.host = "www.#{uri.host}"
-          end
-          uri.to_s
+      def self.normalize(potential_urls)
+        extract_urls(potential_urls).map do |url|
+          normalize_url(url)
+        end
+      end
+
+      def self.extract_urls(potential_urls)
+        PostRank::URI.extract(potential_urls)
+      end
+
+      def self.normalize_url(url)
+        subdomain = Domainatrix.parse(url).subdomain
+        uri = URI.parse(url)
+        if subdomain.empty?
+          uri.host = "www.#{uri.host}"
         end
+        uri.to_s
       end
     end
   end

diff --git a/test/test_helper.rb b/test/test_helper.rb
@@ -0,0 +1,5 @@
+require 'bundler/setup'
+require 'turn/autorun'
+require 'shoulda'
+
+require 'twingly-url-normalizer'
diff --git a/test/unit/normalization_test.rb b/test/unit/normalization_test.rb
@@ -0,0 +1,80 @@
+require 'test_helper'
+
+class NormalizerTest < Test::Unit::TestCase
+  context ".normalize" do
+    setup do
+      @normalizer = Twingly::URL::Normalizer
+    end
+
+    should "accept a String" do
+      assert @normalizer.normalize("")
+    end
+
+    should "accept an Array" do
+      assert @normalizer.normalize([])
+    end
+
+    should "handle URL with ] in it" do
+      url = "http://www.iwaseki.co.jp/cgi/yybbs/yybbs.cgi/%DEuropean]buy"
+      assert @normalizer.normalize(url)
+    end
+
+    should "handle URL with reference to another URL in it" do
+      url = "http://news.google.com/news/url?sa=t&fd=R&usg=AFQjCNGc4A_sfGS6fMMqggiK_8h6yk2miw&url=http:%20%20%20//fansided.com/2013/08/02/nike-decides-to-drop-milwaukee-brewers-ryan-braun"
+      assert @normalizer.normalize(url)
+    end
+  end
+
+  context ".extract_urls" do
+    setup do
+      @normalizer = Twingly::URL::Normalizer
+    end
+
+    should "detect two urls in a String" do
+      urls = "http://blog.twingly.com/ http://twingly.com/"
+      response = @normalizer.extract_urls(urls)
+
+      response.size.must_equal 2
+    end
+
+    should "detect two urls in an Array" do
+      urls = %w(http://blog.twingly.com/ http://twingly.com/)
+      response = @normalizer.extract_urls(urls)
+
+      response.size.must_equal 2
+    end
+
+    should "return an Array" do
+      response = @normalizer.extract_urls(nil)
+
+      response.must_be_instance_of Array
+    end
+  end
+
+  context ".normalize_url" do
+    setup do
+      @normalizer = Twingly::URL::Normalizer
+    end
+
+    should "add www if host is missing a subdomain" do
+      url = "http://twingly.com/"
+      result = @normalizer.normalize_url(url)
+
+      assert_equal "http://www.twingly.com/", result
+    end
+
+    should "not add www if the host has a subdomain" do
+      url = "http://blog.twingly.com/"
+      result = @normalizer.normalize_url(url)
+
+      assert_equal "http://blog.twingly.com/", result
+    end
+
+    should "keep www if the host already has it" do
+      url = "http://www.twingly.com/"
+      result = @normalizer.normalize_url(url)
+
+      assert_equal "http://www.twingly.com/", result
+    end
+  end
+end
diff --git a/twingly-url-normalizer.gemspec b/twingly-url-normalizer.gemspec
@@ -15,6 +15,10 @@ Gem::Specification.new do |s|
   s.add_dependency "postrank-uri"
   s.add_dependency "domainatrix"
 
+  s.add_development_dependency "turn"
+  s.add_development_dependency "rake"
+  s.add_development_dependency "shoulda"
+
   s.files        = Dir.glob("{lib}/**/*") + %w(README.md)
   s.require_path = 'lib'
 end