Permalink
Browse files

Replace invalid gTLD with valid gTLD.

Add test cases for URLs without protocol.
  • Loading branch information...
keita
keita committed Sep 12, 2011
1 parent 55be804 commit 62627e14ddd14ee5cbbeb09d97057d65bb5fdc21
Showing with 49 additions and 20 deletions.
  1. +11 −7 autolink.yml
  2. +17 −13 extract.yml
  3. +21 −0 validate.yml
View
@@ -389,8 +389,8 @@ tests:
expected: "text <a href=\"http://msdn.com/S(deadbeef)/page.htm\">http://msdn.com/S(deadbeef)/page.htm</a>"
- description: "Autolink url with balanced parens hiding XSS"
- text: 'text http://foo.bar/("onclick="alert(1)")'
- expected: 'text <a href="http://foo.bar/">http://foo.bar/</a>("onclick="alert(1)")'
+ text: 'text http://foo.com/("onclick="alert(1)")'
+ expected: 'text <a href="http://foo.com/">http://foo.com/</a>("onclick="alert(1)")'
- description: "Autolink url should NOT capture unbalanced parens"
text: "Parenthetically bad http://example.com/i_has_a_) thing"
@@ -549,8 +549,8 @@ tests:
expected: "<a href=\"http://www.flickr.com/photos/29674651@N00/foobar\">http://www.flickr.com/photos/29674651@N00/foobar</a>"
- description: "Autolink URL with only a domain followed by a period doesn't swallow the period."
- text: "I think it's proper to end sentences with a period http://tell.me. Even when they contain a URL."
- expected: "I think it's proper to end sentences with a period <a href=\"http://tell.me\">http://tell.me</a>. Even when they contain a URL."
+ text: "I think it's proper to end sentences with a period http://tell.me.com. Even when they contain a URL."
+ expected: "I think it's proper to end sentences with a period <a href=\"http://tell.me.com\">http://tell.me.com</a>. Even when they contain a URL."
- description: "Autolink URL with a path followed by a period doesn't swallow the period."
text: "I think it's proper to end sentences with a period http://tell.me/why. Even when they contain a URL."
@@ -616,10 +616,14 @@ tests:
text: "@user Try http:// example.com/path"
expected: "@user Try http:// example.com/path"
- - decription: "Autolink URL should link paths containing accented characters"
+ - description: "Autolink URL should link paths containing accented characters"
text: "See: http://example.com/café"
expected: "See: <a href=\"http://example.com/café\">http://example.com/café</a>"
+ - description: "Autolink URL should not link URL without protocol"
+ text: "See: www.twitter.com or twitter.com/twitter"
+ expected: "See: www.twitter.com or twitter.com/twitter"
+
all:
- description: "Autolink all does not break on URL with @"
text: "http://www.flickr.com/photos/29674651@N00/4382024406 if you know what's good for you."
@@ -638,5 +642,5 @@ tests:
expected: "See: <a href=\"http://example.com/@user/\">http://example.com/@user/</a>"
- description: "Does not allow an XSS after an @"
- text: "See: http://x.xx/@\"style=\"color:pink\"onmouseover=alert(1)//"
- expected: "See: <a href=\"http://x.xx/\">http://x.xx/</a>@\"style=\"color:pink\"onmouseover=alert(1)//"
+ text: "See: http://x.xx.com/@\"style=\"color:pink\"onmouseover=alert(1)//"
+ expected: "See: <a href=\"http://x.xx.com/\">http://x.xx.com/</a>@\"style=\"color:pink\"onmouseover=alert(1)//"
View
@@ -234,24 +234,28 @@ tests:
expected: []
- description: "Extract a very long hyphenated sub-domain URL (single letter hyphens)"
- text: "text http://word-and-a-number-8-ftw.domain.tld/"
- expected: ["http://word-and-a-number-8-ftw.domain.tld/"]
+ text: "text http://word-and-a-number-8-ftw.domain.com/"
+ expected: ["http://word-and-a-number-8-ftw.domain.com/"]
- description: "Extract a hyphenated TLD (usually a typo)"
- text: "text http://domain.tld-that-you-should-have-put-a-space-after"
- expected: ["http://domain.tld"]
+ text: "text http://domain.com-that-you-should-have-put-a-space-after"
+ expected: ["http://domain.com"]
- description: "Extract URL ending with # value"
text: "text http://foo.com?#foo text"
expected: ["http://foo.com?#foo"]
- - description: "SHOULD NOT Extract URLs without protocol on (com|org|edu|gov|net) domains"
+ - description: "Extract URLs without protocol on (com|org|edu|gov|net) domains"
text: "foo.com foo.net foo.org foo.edu foo.gov"
- expected: []
+ expected: ["foo.com", "foo.net", "foo.org", "foo.edu", "foo.gov"]
- - description: "DO NOT extract URLs withour protocol not on (com|org|edu|gov|net) domains, even when preceded by www."
- text: "foo.bar foo.co.jp www.foo.bar www.foo.co.uk wwwww.foo foo.comm foo.somecom foo.govedu"
- expected: []
+ - description: "Extract URLs withour protocol not on (com|org|edu|gov|net) domains"
+ text: "foo.bar foo.co.jp www.foo.bar www.foo.co.uk wwwww.foo foo.comm foo.somecom foo.govedu foo.jp"
+ expected: ["foo.co.jp", "www.foo.co.uk"]
+
+ - description: "Extract URLs with protocol on ccTLD domains"
+ text: "http://foo.jp http://fooooo.jp"
+ expected: ["http://foo.jp", "http://fooooo.jp"]
- description: "Extract URLs with a - or + at the end of the path"
text: "Go to http://example.com/a+ or http://example.com/a-"
@@ -261,13 +265,13 @@ tests:
text: "Go to http://example.com/view/slug-url-?foo=bar"
expected: ["http://example.com/view/slug-url-?foo=bar"]
- - description: "DO NOT extract URLs beginning with a space"
+ - description: "Extract URLs beginning with a space"
text: "@user Try http:// example.com/path"
- expected: []
+ expected: ["example.com/path"]
- - description: "DO NOT extract URLs beginning with a non-breaking space (U+00A0)"
+ - description: "Extract URLs beginning with a non-breaking space (U+00A0)"
text: "@user Try http:// example.com/path"
- expected: []
+ expected: ["example.com/path"]
- description: "Extract URLs with underscores and dashes in the subdomain"
text: "test http://sub_domain-dash.twitter.com"
View
@@ -176,3 +176,24 @@ tests:
- description: "Invalid url: trailing space"
text: "http://example.com/#anchor "
expected: false
+
+ urls_without_protocol:
+ - description: "Valid url without protocol: domain + gTLD"
+ text: "example.com"
+ expected: true
+
+ - description: "Valid url without protocol: subdomain + domain + gTLD"
+ text: "www.example.com"
+ expected: true
+
+ - description: "Valid url without protocol: domain + ccTLD"
+ text: "t.co"
+ expected: true
+
+ - description: "Valid url without protocol: subdomain + domain + ccTLD"
+ text: "foo.co.jp"
+ expected: true
+
+ - description: "Valid url without protocol: domain + path + query"
+ text: "example.com/path/to/resource?search=foo&lang=en"
+ expected: true

0 comments on commit 62627e1

Please sign in to comment.