Skip to content

Commit

Permalink
Describe the behavior with IDN URLs
Browse files Browse the repository at this point in the history
  • Loading branch information
dentarg committed Sep 15, 2016
1 parent ba6c5a4 commit 15414ed
Show file tree
Hide file tree
Showing 3 changed files with 156 additions and 4 deletions.
26 changes: 26 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,32 @@ url.path # => "/search"
url.without_scheme # => "//www.twingly.co.uk/search"
url.valid? # => "true"

url = Twingly::URL.parse("http://räksmörgås.макдональдс.рф/foo")
url.scheme # => "http"
url.trd # => "räksmörgås"
url.sld # => "макдональдс"
url.tld # => "рф"
url.ttld # => "рф"
url.domain # => "макдональдс.рф"
url.host # => "räksmörgås.макдональдс.рф"
url.origin # => "http://xn--rksmrgs-5wao1o.xn--80aalb1aicli8a5i.xn--p1ai"
url.path # => "/foo"
url.without_scheme # => "//räksmörgås.макдональдс.рф/foo"
url.valid? # => "true"

url = Twingly::URL.parse("http://xn--rksmrgs-5wao1o.xn--80aalb1aicli8a5i.xn--p1ai/foo")
url.scheme # => "http"
url.trd # => "xn--rksmrgs-5wao1o"
url.sld # => "xn--80aalb1aicli8a5i"
url.tld # => "xn--p1ai"
url.ttld # => "xn--p1ai"
url.domain # => "xn--80aalb1aicli8a5i.xn--p1ai"
url.host # => "xn--rksmrgs-5wao1o.xn--80aalb1aicli8a5i.xn--p1ai"
url.origin # => "http://xn--rksmrgs-5wao1o.xn--80aalb1aicli8a5i.xn--p1ai"
url.path # => "/foo"
url.without_scheme # => "//xn--rksmrgs-5wao1o.xn--80aalb1aicli8a5i.xn--p1ai/foo"
url.valid? # => "true"

url = Twingly::URL.parse("https://admin:correcthorsebatterystaple@example.com/")
url.scheme # => "https"
url.trd # => ""
Expand Down
36 changes: 36 additions & 0 deletions examples/url.rb
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,42 @@

puts

unicode_idn_url_as_string = "http://räksmörgås.макдональдс.рф/foo"
url = Twingly::URL.parse(unicode_idn_url_as_string)

puts "url = Twingly::URL.parse(\"#{unicode_idn_url_as_string}\")"
puts "url.scheme # => \"#{url.scheme}\""
puts "url.trd # => \"#{url.trd}\""
puts "url.sld # => \"#{url.sld}\""
puts "url.tld # => \"#{url.tld}\""
puts "url.ttld # => \"#{url.ttld}\""
puts "url.domain # => \"#{url.domain}\""
puts "url.host # => \"#{url.host}\""
puts "url.origin # => \"#{url.origin}\""
puts "url.path # => \"#{url.path}\""
puts "url.without_scheme # => \"#{url.without_scheme}\""
puts "url.valid? # => \"#{url.valid?}\""

puts

ascii_idn_url_as_string = "http://xn--rksmrgs-5wao1o.xn--80aalb1aicli8a5i.xn--p1ai/foo"
url = Twingly::URL.parse(ascii_idn_url_as_string)

puts "url = Twingly::URL.parse(\"#{ascii_idn_url_as_string}\")"
puts "url.scheme # => \"#{url.scheme}\""
puts "url.trd # => \"#{url.trd}\""
puts "url.sld # => \"#{url.sld}\""
puts "url.tld # => \"#{url.tld}\""
puts "url.ttld # => \"#{url.ttld}\""
puts "url.domain # => \"#{url.domain}\""
puts "url.host # => \"#{url.host}\""
puts "url.origin # => \"#{url.origin}\""
puts "url.path # => \"#{url.path}\""
puts "url.without_scheme # => \"#{url.without_scheme}\""
puts "url.valid? # => \"#{url.valid?}\""

puts

url_as_string = "https://admin:correcthorsebatterystaple@example.com/"
url = Twingly::URL.parse(url_as_string)

Expand Down
98 changes: 94 additions & 4 deletions spec/lib/twingly/url_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,14 @@ def valid_urls
end

describe Twingly::URL do
let(:unicode_idn_test_url) do
"http://räksmörgås.макдональдс.рф/foo"
end

let(:ascii_idn_test_url) do
"http://xn--rksmrgs-5wao1o.xn--80aalb1aicli8a5i.xn--p1ai/foo"
end

let(:test_url) do
"http://www.blog.twingly.co.uk/2015/07/01/language-detection-changes/"
end
Expand Down Expand Up @@ -165,24 +173,58 @@ def valid_urls

context "when the url contains no trd" do
let(:test_url){ "http://twingly.com" }

it { is_expected.to eq("") }
end

context "internationalized domain name" do
describe "given in Unicode" do
let(:test_url) { unicode_idn_test_url }
it { is_expected.to eq("räksmörgås") }
end

describe "given in ASCII" do
let(:test_url) { ascii_idn_test_url }
it { is_expected.to eq("xn--rksmrgs-5wao1o") }
end
end
end

describe "#sld" do
subject { url.sld }
it { is_expected.to eq("twingly") }

context "internationalized domain name" do
describe "given in Unicode" do
let(:test_url) { unicode_idn_test_url }
it { is_expected.to eq("макдональдс") }
end

describe "given in ASCII" do
let(:test_url) { ascii_idn_test_url }
it { is_expected.to eq("xn--80aalb1aicli8a5i") }
end
end
end

describe "#tld" do
subject { url.tld }
it { is_expected.to eq("co.uk") }

context "punycoded TLD with multiple levels" do
let(:test_url) { "https://foo.sande.xn--mre-og-romsdal-qqb.no/bar" }
context "internationalized domain name" do
describe "given in Unicode" do
let(:test_url) { unicode_idn_test_url }
it { is_expected.to eq("рф") }
end

describe "given in ASCII" do
let(:test_url) { ascii_idn_test_url }
it { is_expected.to eq("xn--p1ai") }
end

it { is_expected.to eq("sande.xn--mre-og-romsdal-qqb.no") }
describe "punycoded TLD with multiple levels" do
let(:test_url) { "https://foo.sande.xn--mre-og-romsdal-qqb.no/bar" }
it { is_expected.to eq("sande.xn--mre-og-romsdal-qqb.no") }
end
end
end

Expand All @@ -195,21 +237,69 @@ def valid_urls

it { is_expected.to eq("com") }
end

context "internationalized domain name" do
describe "given in Unicode" do
let(:test_url) { unicode_idn_test_url }
it { is_expected.to eq("рф") }
end

describe "given in ASCII" do
let(:test_url) { ascii_idn_test_url }
it { is_expected.to eq("xn--p1ai") }
end
end
end

describe "#domain" do
subject { url.domain }
it { is_expected.to eq("twingly.co.uk") }

context "internationalized domain name" do
describe "given in Unicode" do
let(:test_url) { unicode_idn_test_url }
it { is_expected.to eq("макдональдс.рф") }
end

describe "given in ASCII" do
let(:test_url) { ascii_idn_test_url }
it { is_expected.to eq("xn--80aalb1aicli8a5i.xn--p1ai") }
end
end
end

describe "#host" do
subject { url.host }
it { is_expected.to eq("www.blog.twingly.co.uk") }

context "internationalized domain name" do
describe "given in Unicode" do
let(:test_url) { unicode_idn_test_url }
it { is_expected.to eq("räksmörgås.макдональдс.рф") }
end

describe "given in ASCII" do
let(:test_url) { ascii_idn_test_url }
it { is_expected.to eq("xn--rksmrgs-5wao1o.xn--80aalb1aicli8a5i.xn--p1ai") }
end
end
end

describe "#origin" do
subject { url.origin }
it { is_expected.to eq("http://www.blog.twingly.co.uk") }

context "internationalized domain name" do
describe "given in Unicode" do
let(:test_url) { unicode_idn_test_url }
it { is_expected.to eq("http://xn--rksmrgs-5wao1o.xn--80aalb1aicli8a5i.xn--p1ai") }
end

describe "given in ASCII" do
let(:test_url) { ascii_idn_test_url }
it { is_expected.to eq("http://xn--rksmrgs-5wao1o.xn--80aalb1aicli8a5i.xn--p1ai") }
end
end
end

describe "#path" do
Expand Down

0 comments on commit 15414ed

Please sign in to comment.