Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Include json-conformance tests to the repo

  • Loading branch information...
commit bfbb3123ecb1527d5caed03248bbea5f709bf773 1 parent 9413904
@psychs psychs authored
View
1  .gitignore
@@ -3,4 +3,3 @@
*.mode1v3
build/
xcuserdata/
-json-conformance/
View
1,695 test/json-conformance/extract.json
@@ -0,0 +1,1695 @@
+{
+ "tests": {
+ "cashtags": [
+ {
+ "expected": [
+ "TEST",
+ "Stock",
+ "symbol"
+ ],
+ "text": "Example cashtags: $TEST $Stock $symbol",
+ "description": "Extract cashtags"
+ },
+ {
+ "expected": [
+ "TEST.T",
+ "test.tt",
+ "Stock_X",
+ "symbol_ab"
+ ],
+ "text": "Example cashtags: $TEST.T $test.tt $Stock_X $symbol_ab",
+ "description": "Extract cashtags with . or _"
+ },
+ {
+ "expected": [
+
+ ],
+ "text": "$123 $test123 $TE123ST",
+ "description": "Do not extract cashtags if they contain numbers"
+ },
+ {
+ "expected": [
+
+ ],
+ "text": "$ストック $株",
+ "description": "Do not extract cashtags with non-ASCII characters"
+ },
+ {
+ "expected": [
+
+ ],
+ "text": "$ $. $- $@ $! $() $+",
+ "description": "Do not extract cashtags with punctuations"
+ },
+ {
+ "expected": [
+ "TEST",
+ "TEST"
+ ],
+ "text": "$TEST. $TEST_",
+ "description": "Do not include trailing . or _"
+ },
+ {
+ "expected": [
+ "OK"
+ ],
+ "text": "$OK$NG$BAD text$NO .$NG $$NG",
+ "description": "Do not extract cashtags if there is no space before $"
+ },
+ {
+ "expected": [
+
+ ],
+ "text": "$CashtagMustBeLessThanSixCharacter",
+ "description": "Do not extract too long cashtags"
+ }
+ ],
+ "urls_with_indices": [
+ {
+ "expected": [
+ {
+ "indices": [
+ 5,
+ 22
+ ],
+ "url": "http://google.com"
+ }
+ ],
+ "text": "text http://google.com",
+ "description": "Extract a URL"
+ },
+ {
+ "expected": [
+ {
+ "indices": [
+ 11,
+ 28
+ ],
+ "url": "http://google.com"
+ }
+ ],
+ "text": "皆さん見てください! http://google.com",
+ "description": "Extract a URL from a Japanese tweet"
+ },
+ {
+ "expected": [
+ {
+ "indices": [
+ 0,
+ 10
+ ],
+ "url": "t.co/abcde"
+ },
+ {
+ "indices": [
+ 11,
+ 23
+ ],
+ "url": "bit.ly/abcde"
+ }
+ ],
+ "text": "t.co/abcde bit.ly/abcde",
+ "description": "Extract URLs without protocol on ccTLD with slash"
+ },
+ {
+ "expected": [
+ {
+ "indices": [
+ 0,
+ 11
+ ],
+ "url": "twitter.com"
+ },
+ {
+ "indices": [
+ 20,
+ 31
+ ],
+ "url": "example.com"
+ },
+ {
+ "indices": [
+ 34,
+ 44
+ ],
+ "url": "t.co/abcde"
+ },
+ {
+ "indices": [
+ 46,
+ 57
+ ],
+ "url": "twitter.com"
+ },
+ {
+ "indices": [
+ 58,
+ 70
+ ],
+ "url": "example2.com"
+ },
+ {
+ "indices": [
+ 73,
+ 90
+ ],
+ "url": "twitter.com/abcde"
+ }
+ ],
+ "text": "twitter.comこれは日本語です。example.com中国語t.co/abcde한국twitter.com example2.comテストtwitter.com/abcde",
+ "description": "Extract URLs without protocol surrounded by CJK characters"
+ },
+ {
+ "expected": [
+ {
+ "indices": [
+ 0,
+ 19
+ ],
+ "url": "http://twitter.com/"
+ },
+ {
+ "indices": [
+ 28,
+ 39
+ ],
+ "url": "example.com"
+ },
+ {
+ "indices": [
+ 42,
+ 59
+ ],
+ "url": "http://t.co/abcde"
+ },
+ {
+ "indices": [
+ 61,
+ 72
+ ],
+ "url": "twitter.com"
+ },
+ {
+ "indices": [
+ 75,
+ 87
+ ],
+ "url": "example2.com"
+ },
+ {
+ "indices": [
+ 90,
+ 114
+ ],
+ "url": "http://twitter.com/abcde"
+ }
+ ],
+ "text": "http://twitter.com/これは日本語です。example.com中国語http://t.co/abcde한국twitter.comテストexample2.comテストhttp://twitter.com/abcde",
+ "description": "Extract URLs with and without protocol surrounded by CJK characters"
+ },
+ {
+ "expected": [
+ {
+ "indices": [
+ 0,
+ 20
+ ],
+ "url": "http://t.co/pbY2NfTZ"
+ },
+ {
+ "indices": [
+ 23,
+ 43
+ ],
+ "url": "http://t.co/2vYHpAc5"
+ },
+ {
+ "indices": [
+ 45,
+ 65
+ ],
+ "url": "http://t.co/ulYGBYSo"
+ },
+ {
+ "indices": [
+ 67,
+ 87
+ ],
+ "url": "http://t.co/8MkmHU0k"
+ },
+ {
+ "indices": [
+ 90,
+ 110
+ ],
+ "url": "http://t.co/TKLp64dY"
+ },
+ {
+ "indices": [
+ 113,
+ 133
+ ],
+ "url": "http://t.co/8t7G3ddS"
+ },
+ {
+ "indices": [
+ 136,
+ 156
+ ],
+ "url": "http://t.co/FNkPfmii"
+ }
+ ],
+ "text": "http://t.co/pbY2NfTZ's http://t.co/2vYHpAc5; http://t.co/ulYGBYSo: http://t.co/8MkmHU0k+c http://t.co/TKLp64dY.x http://t.co/8t7G3ddS#a http://t.co/FNkPfmii-",
+ "description": "Extract t.co URLs skipping trailing characters and adjusting indices correctly"
+ },
+ {
+ "expected": [
+ {
+ "indices": [
+ 0,
+ 11
+ ],
+ "url": "http://t.co"
+ },
+ {
+ "indices": [
+ 12,
+ 23
+ ],
+ "url": "http://t.co"
+ }
+ ],
+ "text": "http://t.co http://t.co",
+ "description": "Extract correct indices for duplicate instances of the same URL"
+ }
+ ],
+ "mentions_with_indices": [
+ {
+ "expected": [
+ {
+ "screen_name": "username",
+ "indices": [
+ 0,
+ 9
+ ]
+ }
+ ],
+ "text": "@username yo!",
+ "description": "Extract a mention at the start"
+ },
+ {
+ "expected": [
+ {
+ "screen_name": "username",
+ "indices": [
+ 9,
+ 18
+ ]
+ }
+ ],
+ "text": "username @username",
+ "description": "Extract a mention that has the same thing mentioned at the start"
+ },
+ {
+ "expected": [
+ {
+ "screen_name": "username",
+ "indices": [
+ 1,
+ 10
+ ]
+ }
+ ],
+ "text": "の@usernameに到着を待っている",
+ "description": "Extract a mention in the middle of a Japanese tweet"
+ }
+ ],
+ "hashtags": [
+ {
+ "expected": [
+ "hashtag"
+ ],
+ "text": "a #hashtag here",
+ "description": "Extract an all-alpha hashtag"
+ },
+ {
+ "expected": [
+ "hashtag1"
+ ],
+ "text": "this is #hashtag1",
+ "description": "Extract a letter-then-number hashtag"
+ },
+ {
+ "expected": [
+ "1hashtag"
+ ],
+ "text": "#1hashtag is this",
+ "description": "Extract a number-then-letter hashtag"
+ },
+ {
+ "expected": [
+
+ ],
+ "text": "On the #16 bus",
+ "description": "DO NOT Extract an all-numeric hashtag"
+ },
+ {
+ "expected": [
+
+ ],
+ "text": "#0",
+ "description": "DO NOT Extract a single numeric hashtag"
+ },
+ {
+ "expected": [
+ "hashtag1",
+ "hashtag2",
+ "hashtag3",
+ "hashtag4",
+ "hashtag5",
+ "hashtag6"
+ ],
+ "text": "(#hashtag1 )#hashtag2 [#hashtag3 ]#hashtag4 ’#hashtag5’#hashtag6",
+ "description": "Extract hashtag after bracket"
+ },
+ {
+ "expected": [
+ "mañana"
+ ],
+ "text": "I'll write more tests #mañana",
+ "description": "Extract a hashtag containing ñ"
+ },
+ {
+ "expected": [
+ "café"
+ ],
+ "text": "Working remotely #café",
+ "description": "Extract a hashtag containing é"
+ },
+ {
+ "expected": [
+ "münchen"
+ ],
+ "text": "Getting my Oktoberfest on #münchen",
+ "description": "Extract a hashtag containing ü"
+ },
+ {
+ "expected": [
+
+ ],
+ "text": "this is not valid: # 会議中 ハッシュ",
+ "description": "DO NOT Extract a hashtag containing Japanese"
+ },
+ {
+ "expected": [
+ "트위터"
+ ],
+ "text": "What is #트위터 anyway?",
+ "description": "Extract a hashtag in Korean"
+ },
+ {
+ "expected": [
+ "ᆪᆭᄚ"
+ ],
+ "text": "Just random half-width Hangul #ᆪᆭᄚ",
+ "description": "Extract a half-width Hangul hashtag"
+ },
+ {
+ "expected": [
+ "ашок"
+ ],
+ "text": "What is #ашок anyway?",
+ "description": "Extract a hashtag in Russian"
+ },
+ {
+ "expected": [
+ "カタカナ"
+ ],
+ "text": "#カタカナ is a hashtag",
+ "description": "Extract a starting katakana hashtag"
+ },
+ {
+ "expected": [
+ "ひらがな"
+ ],
+ "text": "#ひらがな FTW!",
+ "description": "Extract a starting hiragana hashtag"
+ },
+ {
+ "expected": [
+ "漢字"
+ ],
+ "text": "#漢字 is the future",
+ "description": "Extract a starting kanji hashtag"
+ },
+ {
+ "expected": [
+ "カタカナ"
+ ],
+ "text": "Hashtag #カタカナ",
+ "description": "Extract a trailing katakana hashtag"
+ },
+ {
+ "expected": [
+ "ひらがな"
+ ],
+ "text": "Japanese hashtags #ひらがな",
+ "description": "Extract a trailing hiragana hashtag"
+ },
+ {
+ "expected": [
+ "漢字"
+ ],
+ "text": "Study time #漢字",
+ "description": "Extract a trailing kanji hashtag"
+ },
+ {
+ "expected": [
+ "カタカナ"
+ ],
+ "text": "See my #カタカナ hashtag?",
+ "description": "Extract a central katakana hashtag"
+ },
+ {
+ "expected": [
+ "ひらがな"
+ ],
+ "text": "Study #ひらがな for fun and profit",
+ "description": "Extract a central hiragana hashtag"
+ },
+ {
+ "expected": [
+ "漢字"
+ ],
+ "text": "Some say #漢字 is the past. what do they know?",
+ "description": "Extract a central kanji hashtag"
+ },
+ {
+ "expected": [
+ "日本語ハッシュタグ"
+ ],
+ "text": "日本語ハッシュタグテスト #日本語ハッシュタグ",
+ "description": "Extract a Kanji/Katakana mixed hashtag"
+ },
+ {
+ "expected": [
+ "日本語ハッシュタグ"
+ ],
+ "text": "日本語ハッシュテスト。#日本語ハッシュタグ",
+ "description": "Extract a hashtag after a punctuation"
+ },
+ {
+ "expected": [
+ "日本語ハッシュタグ"
+ ],
+ "text": "#日本語ハッシュタグ。",
+ "description": "DO NOT include a punctuation in a hashtag"
+ },
+ {
+ "expected": [
+ "hashtag123"
+ ],
+ "text": "全角英数字ハッシュタグ #hashtag123",
+ "description": "Extract a full-width Alnum hashtag"
+ },
+ {
+ "expected": [
+
+ ],
+ "text": "日本語ハッシュタグ#日本語ハッシュタグ",
+ "description": "DO NOT extract a hashtag without a preceding space"
+ },
+ {
+ "expected": [
+ "サッカー"
+ ],
+ "text": "長音ハッシュタグ。#サッカー",
+ "description": "Hashtag with chouon"
+ },
+ {
+ "expected": [
+ "サッカー"
+ ],
+ "text": "長音ハッシュタグ。#サッカー",
+ "description": "Hashtag with half-width chouon"
+ },
+ {
+ "expected": [
+ "ハッシュタグ",
+ "パピプペポ"
+ ],
+ "text": "#ハッシュタグ #パピプペポ",
+ "description": "Hashtag with half-widh voiced sounds marks"
+ },
+ {
+ "expected": [
+ "日本語ハッシュタグ"
+ ],
+ "text": "できましたよー!#日本語ハッシュタグ。",
+ "description": "Hashtag with half-width # after full-width !"
+ },
+ {
+ "expected": [
+ "日本語ハッシュタグ"
+ ],
+ "text": "できましたよー!#日本語ハッシュタグ。",
+ "description": "Hashtag with full-width # after full-width !"
+ },
+ {
+ "expected": [
+ "云々",
+ "学問のすゝめ",
+ "いすゞ",
+ "各〻",
+ ""
+ ],
+ "text": "#云々 #学問のすゝめ #いすゞ #各〻 #〃",
+ "description": "Hashtag with ideographic iteration mark"
+ },
+ {
+ "expected": [
+ "Ateş",
+ "qrşt",
+ "ştu",
+ "ş"
+ ],
+ "text": "Here’s a test tweet for you: #Ateş #qrşt #ştu #ş",
+ "description": "Hashtags with ş (U+015F)"
+ },
+ {
+ "expected": [
+ "İn",
+ "ın"
+ ],
+ "text": "Here’s a test tweet for you: #İn #ın",
+ "description": "Hashtags with İ (U+0130) and ı (U+0131)"
+ },
+ {
+ "expected": [
+ "hashtag",
+ "hashtag",
+ "hashtag",
+ "hashtag",
+ "hashtag",
+ "hashtag"
+ ],
+ "text": "#hashtag: #hashtag; #hashtag, #hashtag. #hashtag! #hashtag?",
+ "description": "Hashtag before punctuations"
+ },
+ {
+ "expected": [
+ "hashtag",
+ "hashtag",
+ "hashtag",
+ "hashtag",
+ "hashtag",
+ "hashtag"
+ ],
+ "text": ":#hashtag ;#hashtag ,#hashtag .#hashtag !#hashtag ?#hashtag",
+ "description": "Hashtag after punctuations"
+ },
+ {
+ "expected": [
+ "hashtag",
+ "hashtag2",
+ "hashtag3"
+ ],
+ "text": "#hashtag\ntest\n#hashtag2\ntest\n#hashtag3\n",
+ "description": "Hashtag before newline"
+ },
+ {
+ "expected": [
+
+ ],
+ "text": "#http://twitter.com #https://twitter.com",
+ "description": "DO NOT extract hashtag when # is followed by URL"
+ },
+ {
+ "expected": [
+
+ ],
+ "text": "http://twitter.com/#hashtag twitter.com/#hashtag",
+ "description": "DO NOT extract hashtag if it's a part of URL"
+ },
+ {
+ "expected": [
+ "Azərbaycanca",
+ "mûǁae",
+ "Čeština",
+ "Ċaoiṁín"
+ ],
+ "text": "#Azərbaycanca #mûǁae #Čeština #Ċaoiṁín",
+ "description": "Extract hashtags with Latin extended characters"
+ },
+ {
+ "expected": [
+ "سیاست",
+ "ایران",
+ "السياسة",
+ "السياح",
+ "لغات",
+ "اتمی",
+ "کنفرانس",
+ "العربية",
+ "الجزيرة",
+ "فارسی"
+ ],
+ "text": "#سیاست #ایران #السياسة #السياح #لغات #اتمی #کنفرانس #العربية #الجزيرة #فارسی",
+ "description": "Extract Arabic hashtags"
+ },
+ {
+ "expected": [
+ "برنامه_نویسی",
+ "رییس_جمهور",
+ "رئيس_الوزراء",
+ "ثبت_نام",
+ "لس_آنجلس"
+ ],
+ "text": "#برنامه_نویسی #رییس_جمهور #رئيس_الوزراء, #ثبت_نام. #لس_آنجلس",
+ "description": "Extract Arabic hashtags with underscore"
+ },
+ {
+ "expected": [
+ "עַל־יְדֵי",
+ "וכו׳",
+ "מ״כ"
+ ],
+ "text": "#עַל־יְדֵי #וכו׳ #מ״כ",
+ "description": "Extract Hebrew hashtags"
+ },
+ {
+ "expected": [
+ "ผู้เริ่ม",
+ "การเมือง",
+ "รายละเอียด",
+ "นักท่องเที่ยว",
+ "ของขวัญ",
+ "สนามบิน",
+ "เดินทาง",
+ "ประธาน"
+ ],
+ "text": "#ผู้เริ่ม #การเมือง #รายละเอียด #นักท่องเที่ยว #ของขวัญ #สนามบิน #เดินทาง #ประธาน",
+ "description": "Extract Thai hashtags"
+ },
+ {
+ "expected": [
+ "أي‌بي‌إم",
+ "می‌خواهم"
+ ],
+ "text": "#أي‌بي‌إم #می‌خواهم",
+ "description": "Extract Arabic hashtags with Zero-Width Non-Joiner"
+ }
+ ],
+ "cashtags_with_indices": [
+ {
+ "expected": [
+ {
+ "indices": [
+ 9,
+ 14
+ ],
+ "cashtag": "TEST"
+ },
+ {
+ "indices": [
+ 15,
+ 22
+ ],
+ "cashtag": "symbol"
+ }
+ ],
+ "text": "Example: $TEST $symbol test",
+ "description": "Extract cashtags"
+ },
+ {
+ "expected": [
+ {
+ "indices": [
+ 9,
+ 16
+ ],
+ "cashtag": "TEST.T"
+ },
+ {
+ "indices": [
+ 22,
+ 32
+ ],
+ "cashtag": "symbol_ab"
+ }
+ ],
+ "text": "Example: $TEST.T test $symbol_ab end",
+ "description": "Extract cashtags with . or _"
+ }
+ ],
+ "replies": [
+ {
+ "expected": "username",
+ "text": "@username reply",
+ "description": "Extract reply at the begining of a tweet"
+ },
+ {
+ "expected": "username",
+ "text": " @username reply",
+ "description": "Extract reply preceded by only a space"
+ },
+ {
+ "expected": "username",
+ "text": " @username reply",
+ "description": "Extract reply preceded by only a full-width space (U+3000)"
+ },
+ {
+ "expected": null,
+ "text": "a @username mention, not a reply",
+ "description": "DO NOT Extract reply when preceded by text"
+ },
+ {
+ "expected": null,
+ "text": ".@username mention, not a reply",
+ "description": "DO NOT Extract reply when preceded by ."
+ },
+ {
+ "expected": null,
+ "text": "/@username mention, not a reply",
+ "description": "DO NOT Extract reply when preceded by /"
+ },
+ {
+ "expected": null,
+ "text": "_@username mention, not a reply",
+ "description": "DO NOT Extract reply when preceded by _"
+ },
+ {
+ "expected": null,
+ "text": "-@username mention, not a reply",
+ "description": "DO NOT Extract reply when preceded by -"
+ },
+ {
+ "expected": null,
+ "text": "+@username mention, not a reply",
+ "description": "DO NOT Extract reply when preceded by +"
+ },
+ {
+ "expected": null,
+ "text": "#@username mention, not a reply",
+ "description": "DO NOT Extract reply when preceded by #"
+ },
+ {
+ "expected": null,
+ "text": "!@username mention, not a reply",
+ "description": "DO NOT Extract reply when preceded by !"
+ },
+ {
+ "expected": null,
+ "text": "@@username mention, not a reply",
+ "description": "DO NOT Extract reply when preceded by @"
+ },
+ {
+ "expected": null,
+ "text": "@http://twitter.com",
+ "description": "DO NOT Extract reply when followed by URL"
+ }
+ ],
+ "mentions_or_lists_with_indices": [
+ {
+ "expected": [
+ {
+ "list_slug": "",
+ "screen_name": "username",
+ "indices": [
+ 0,
+ 9
+ ]
+ }
+ ],
+ "text": "@username yo!",
+ "description": "Extract a mention"
+ },
+ {
+ "expected": [
+ {
+ "list_slug": "/list-name",
+ "screen_name": "username",
+ "indices": [
+ 0,
+ 19
+ ]
+ }
+ ],
+ "text": "@username/list-name is a great list!",
+ "description": "Extract a list"
+ },
+ {
+ "expected": [
+ {
+ "list_slug": "",
+ "screen_name": "username",
+ "indices": [
+ 4,
+ 13
+ ]
+ },
+ {
+ "list_slug": "/list_name-01",
+ "screen_name": "otheruser",
+ "indices": [
+ 29,
+ 52
+ ]
+ }
+ ],
+ "text": "Hey @username, check out out @otheruser/list_name-01!",
+ "description": "Extract a mention and list"
+ },
+ {
+ "expected": [
+ {
+ "list_slug": "/list_name-01",
+ "screen_name": "username",
+ "indices": [
+ 1,
+ 23
+ ]
+ }
+ ],
+ "text": "の@username/list_name-01に到着を待っている",
+ "description": "Extract a list in the middle of a Japanese tweet"
+ },
+ {
+ "expected": [
+ {
+ "list_slug": "",
+ "screen_name": "username",
+ "indices": [
+ 0,
+ 9
+ ]
+ }
+ ],
+ "text": "@username/7list-name is a great list!",
+ "description": "DO NOT extract a list with slug that starts with a number"
+ }
+ ],
+ "urls": [
+ {
+ "expected": [
+ "http://example.com"
+ ],
+ "text": "http://example.com",
+ "description": "Extract a lone URL"
+ },
+ {
+ "expected": [
+ "http://google.com"
+ ],
+ "text": "text http://google.com",
+ "description": "Extract valid URL: http://google.com"
+ },
+ {
+ "expected": [
+ "http://foobar.com/#"
+ ],
+ "text": "text http://foobar.com/#",
+ "description": "Extract valid URL: http://foobar.com/#"
+ },
+ {
+ "expected": [
+ "http://google.com/#foo"
+ ],
+ "text": "text http://google.com/#foo",
+ "description": "Extract valid URL: http://google.com/#foo"
+ },
+ {
+ "expected": [
+ "http://google.com/#search?q=iphone%20-filter%3Alinks"
+ ],
+ "text": "text http://google.com/#search?q=iphone%20-filter%3Alinks",
+ "description": "Extract valid URL: http://google.com/#search?q=iphone%20-filter%3Alinks"
+ },
+ {
+ "expected": [
+ "http://twitter.com/#search?q=iphone%20-filter%3Alinks"
+ ],
+ "text": "text http://twitter.com/#search?q=iphone%20-filter%3Alinks",
+ "description": "Extract valid URL: http://twitter.com/#search?q=iphone%20-filter%3Alinks"
+ },
+ {
+ "expected": [
+ "http://somedomain.com/index.php?path=/abc/def/"
+ ],
+ "text": "text http://somedomain.com/index.php?path=/abc/def/",
+ "description": "Extract valid URL: http://somedomain.com/index.php?path=/abc/def/"
+ },
+ {
+ "expected": [
+ "http://www.boingboing.net/2007/02/14/katamari_damacy_phon.html"
+ ],
+ "text": "text http://www.boingboing.net/2007/02/14/katamari_damacy_phon.html",
+ "description": "Extract valid URL: http://www.boingboing.net/2007/02/14/katamari_damacy_phon.html"
+ },
+ {
+ "expected": [
+ "http://somehost.com:3000"
+ ],
+ "text": "text http://somehost.com:3000",
+ "description": "Extract valid URL: http://somehost.com:3000"
+ },
+ {
+ "expected": [
+ "http://xo.com/~matthew+%ff-x"
+ ],
+ "text": "text http://xo.com/~matthew+%ff-x",
+ "description": "Extract valid URL: http://xo.com/~matthew+%ff-x"
+ },
+ {
+ "expected": [
+ "http://xo.com/~matthew+%ff-,.;x"
+ ],
+ "text": "text http://xo.com/~matthew+%ff-,.;x",
+ "description": "Extract valid URL: http://xo.com/~matthew+%ff-,.;x"
+ },
+ {
+ "expected": [
+ "http://xo.com/,.;x"
+ ],
+ "text": "text http://xo.com/,.;x",
+ "description": "Extract valid URL: http://xo.com/,.;x"
+ },
+ {
+ "expected": [
+ "http://en.wikipedia.org/wiki/Primer_(film)"
+ ],
+ "text": "text http://en.wikipedia.org/wiki/Primer_(film)",
+ "description": "Extract valid URL: http://en.wikipedia.org/wiki/Primer_(film)"
+ },
+ {
+ "expected": [
+ "http://www.ams.org/bookstore-getitem/item=mbk-59"
+ ],
+ "text": "text http://www.ams.org/bookstore-getitem/item=mbk-59",
+ "description": "Extract valid URL: http://www.ams.org/bookstore-getitem/item=mbk-59"
+ },
+ {
+ "expected": [
+ "http://✪df.ws/ejp"
+ ],
+ "text": "text http://✪df.ws/ejp",
+ "description": "Extract valid URL: http://✪df.ws/ejp"
+ },
+ {
+ "expected": [
+ "http://chilp.it/?77e8fd"
+ ],
+ "text": "text http://chilp.it/?77e8fd",
+ "description": "Extract valid URL: http://chilp.it/?77e8fd"
+ },
+ {
+ "expected": [
+ "http://x.com/oneletterdomain"
+ ],
+ "text": "text http://x.com/oneletterdomain",
+ "description": "Extract valid URL: http://x.com/oneletterdomain"
+ },
+ {
+ "expected": [
+ "http://msdn.microsoft.com/ja-jp/library/system.net.httpwebrequest(v=VS.100).aspx"
+ ],
+ "text": "text http://msdn.microsoft.com/ja-jp/library/system.net.httpwebrequest(v=VS.100).aspx",
+ "description": "Extract valid URL: http://msdn.microsoft.com/ja-jp/library/system.net.httpwebrequest(v=VS.100).aspx"
+ },
+ {
+ "expected": [
+
+ ],
+ "text": "text http://domain-dash_2314352345_dfasd.foo-cow_4352.com",
+ "description": "DO NOT extract invalid URL: http://domain-begin_dash_2314352345_dfasd.foo-cow_4352.com"
+ },
+ {
+ "expected": [
+
+ ],
+ "text": "text http://-dash_2314352345_dfasd.foo-cow_4352.com",
+ "description": "DO NOT extract invalid URL: http://-begin_dash_2314352345_dfasd.foo-cow_4352.com"
+ },
+ {
+ "expected": [
+
+ ],
+ "text": "text http://no-tld",
+ "description": "DO NOT extract invalid URL: http://no-tld"
+ },
+ {
+ "expected": [
+
+ ],
+ "text": "text http://tld-too-short.x",
+ "description": "DO NOT extract invalid URL: http://tld-too-short.x"
+ },
+ {
+ "expected": [
+ "http://word-and-a-number-8-ftw.domain.com/"
+ ],
+ "text": "text http://word-and-a-number-8-ftw.domain.com/",
+ "description": "Extract a very long hyphenated sub-domain URL (single letter hyphens)"
+ },
+ {
+ "expected": [
+ "http://domain.com"
+ ],
+ "text": "text http://domain.com-that-you-should-have-put-a-space-after",
+ "description": "Extract a hyphenated TLD (usually a typo)"
+ },
+ {
+ "expected": [
+ "http://foo.com?#foo"
+ ],
+ "text": "text http://foo.com?#foo text",
+ "description": "Extract URL ending with # value"
+ },
+ {
+ "expected": [
+ "foo.com",
+ "foo.net",
+ "foo.org",
+ "foo.edu",
+ "foo.gov"
+ ],
+ "text": "foo.com foo.net foo.org foo.edu foo.gov",
+ "description": "Extract URLs without protocol on (com|org|edu|gov|net) domains"
+ },
+ {
+ "expected": [
+ "foo.co.jp",
+ "www.foo.co.uk"
+ ],
+ "text": "foo.bar foo.co.jp www.foo.bar www.foo.co.uk wwwww.foo foo.comm foo.somecom foo.govedu foo.jp",
+ "description": "Extract URLs without protocol not on (com|org|edu|gov|net) domains"
+ },
+ {
+ "expected": [
+ "t.co/abcde",
+ "bit.ly/abcde"
+ ],
+ "text": "t.co/abcde bit.ly/abcde",
+ "description": "Extract URLs without protocol on ccTLD with slash"
+ },
+ {
+ "expected": [
+ "http://foo.jp",
+ "http://fooooo.jp"
+ ],
+ "text": "http://foo.jp http://fooooo.jp",
+ "description": "Extract URLs with protocol on ccTLD domains"
+ },
+ {
+ "expected": [
+ "http://example.com/a+",
+ "http://example.com/a-"
+ ],
+ "text": "Go to http://example.com/a+ or http://example.com/a-",
+ "description": "Extract URLs with a - or + at the end of the path"
+ },
+ {
+ "expected": [
+ "http://example.com/view/slug-url-?foo=bar"
+ ],
+ "text": "Go to http://example.com/view/slug-url-?foo=bar",
+ "description": "Extract URLs with longer paths ending in -"
+ },
+ {
+ "expected": [
+ "example.com/path"
+ ],
+ "text": "@user Try http:// example.com/path",
+ "description": "Extract URLs beginning with a space"
+ },
+ {
+ "expected": [
+ "example.com/path/index.html",
+ "example.com/path"
+ ],
+ "text": "これは日本語です。example.com/path/index.html中国語example.com/path한국",
+ "description": "Extract long URL without protocol surrounded by CJK characters"
+ },
+ {
+ "expected": [
+ "twitter.com",
+ "example.com",
+ "t.co/abcde",
+ "twitter.com",
+ "example2.com",
+ "twitter.com/abcde"
+ ],
+ "text": "twitter.comこれは日本語です。example.com中国語t.co/abcde한국twitter.com example2.comテストtwitter.com/abcde",
+ "description": "Extract short URL without protocol surrounded by CJK characters"
+ },
+ {
+ "expected": [
+ "http://twitter.com/",
+ "example.com",
+ "http://t.co/abcde",
+ "twitter.com",
+ "example2.com",
+ "http://twitter.com/abcde"
+ ],
+ "text": "http://twitter.com/これは日本語です。example.com中国語http://t.co/abcde한국twitter.comテストexample2.comテストhttp://twitter.com/abcde",
+ "description": "Extract URLs with and without protocol surrounded by CJK characters"
+ },
+ {
+ "expected": [
+
+ ],
+ "text": "twitter.jp日本語t.co中国語foo.jp t.co foo.jp",
+ "description": "DO NOT extract short URLs without protocol on ccTLD domains without path"
+ },
+ {
+ "expected": [
+ "example.com/path"
+ ],
+ "text": "@user Try http:// example.com/path",
+ "description": "Extract URLs beginning with a non-breaking space (U+00A0)"
+ },
+ {
+ "expected": [
+ "http://sub_domain-dash.twitter.com"
+ ],
+ "text": "test http://sub_domain-dash.twitter.com",
+ "description": "Extract URLs with underscores and dashes in the subdomain"
+ },
+ {
+ "expected": [
+ "http://a.b.cd"
+ ],
+ "text": "test http://a.b.cd",
+ "description": "Extract URL with minimum number of valid characters"
+ },
+ {
+ "expected": [
+ "http://a_b.c-d.com"
+ ],
+ "text": "test http://a_b.c-d.com",
+ "description": "Extract URLs containing underscores and dashes"
+ },
+ {
+ "expected": [
+ "http://a-b.c.com"
+ ],
+ "text": "test http://a-b.c.com",
+ "description": "Extract URLs containing dashes in the subdomain"
+ },
+ {
+ "expected": [
+ "http://twitter-dash.com"
+ ],
+ "text": "test http://twitter-dash.com",
+ "description": "Extract URLs with dashes in the domain name"
+ },
+ {
+ "expected": [
+ "http://www.bestbuy.com/site/Currie+Technologies+-+Ezip+400+Scooter/9885188.p?id=1218189013070&skuId=9885188"
+ ],
+ "text": "http://www.bestbuy.com/site/Currie+Technologies+-+Ezip+400+Scooter/9885188.p?id=1218189013070&skuId=9885188",
+ "description": "Extract URLs with lots of symbols then a period"
+ },
+ {
+ "expected": [
+
+ ],
+ "text": "test http://-leadingdash.twitter.com",
+ "description": "DO NOT extract URLs containing leading dashes in the subdomain"
+ },
+ {
+ "expected": [
+
+ ],
+ "text": "test http://trailingdash-.twitter.com",
+ "description": "DO NOT extract URLs containing trailing dashes in the subdomain"
+ },
+ {
+ "expected": [
+
+ ],
+ "text": "test http://_leadingunderscore.twitter.com",
+ "description": "DO NOT extract URLs containing leading underscores in the subdomain"
+ },
+ {
+ "expected": [
+
+ ],
+ "text": "test http://trailingunderscore_.twitter.com",
+ "description": "DO NOT extract URLs containing trailing underscores in the subdomain"
+ },
+ {
+ "expected": [
+
+ ],
+ "text": "test http://-twitter.com",
+ "description": "DO NOT extract URLs containing leading dashes in the domain name"
+ },
+ {
+ "expected": [
+
+ ],
+ "text": "test http://twitter-.com",
+ "description": "DO NOT extract URLs containing trailing dashes in the domain name"
+ },
+ {
+ "expected": [
+
+ ],
+ "text": "test http://twitter_underscore.com",
+ "description": "DO NOT extract URLs containing underscores in the domain name"
+ },
+ {
+ "expected": [
+
+ ],
+ "text": "test http://twitter.c_o_m",
+ "description": "DO NOT extract URLs containing underscores in the tld"
+ },
+ {
+ "expected": [
+ "http://www.foo.com/foo/path-with-period./"
+ ],
+ "text": "test http://www.foo.com/foo/path-with-period./",
+ "description": "Extract valid URL http://www.foo.com/foo/path-with-period./"
+ },
+ {
+ "expected": [
+ "http://www.foo.org.za/foo/bar/688.1"
+ ],
+ "text": "test http://www.foo.org.za/foo/bar/688.1",
+ "description": "Extract valid URL http://www.foo.org.za/foo/bar/688.1"
+ },
+ {
+ "expected": [
+ "http://www.foo.com/bar-path/some.stm?param1=foo;param2=P1|0||P2|0"
+ ],
+ "text": "test http://www.foo.com/bar-path/some.stm?param1=foo;param2=P1|0||P2|0",
+ "description": "Extract valid URL http://www.foo.com/bar-path/some.stm?param1=foo;param2=P1|0||P2|0"
+ },
+ {
+ "expected": [
+ "http://foo.com/bar/123/foo_&_bar/"
+ ],
+ "text": "test http://foo.com/bar/123/foo_&_bar/",
+ "description": "Extract valid URL http://foo.com/bar/123/foo_&_bar/"
+ },
+ {
+ "expected": [
+ "http://www.cp.sc.edu/events/65"
+ ],
+ "text": "test http://www.cp.sc.edu/events/65 test",
+ "description": "Extract valid URL http://www.cp.sc.edu/events/65"
+ },
+ {
+ "expected": [
+ "http://www.andersondaradio.no.comunidades.net/"
+ ],
+ "text": "http://www.andersondaradio.no.comunidades.net/ test test",
+ "description": "Extract valid URL http://www.andersondaradio.no.comunidades.net/"
+ },
+ {
+ "expected": [
+ "ELPAÍS.com"
+ ],
+ "text": "test ELPAÍS.com",
+ "description": "Extract valid URL ELPAÍS.com"
+ },
+ {
+ "expected": [
+ "http://twitter.com/"
+ ],
+ "text": "test http://twitter.com/.",
+ "description": "DO NOT include period at the end of URL"
+ },
+ {
+ "expected": [
+ "http://tn.com.ar/show/00056158/la-estrella-del-certamen-el-turno-de-pamela-anderson?fb_xd_fragment#?=&cb=fe17523f223b7&relation=parent.parent&transport=fragment&type=resize&height=20&ackdata"
+ ],
+ "text": "http://tn.com.ar/show/00056158/la-estrella-del-certamen-el-turno-de-pamela-anderson?fb_xd_fragment#?=&cb=fe17523f223b7&relation=parent.parent&transport=fragment&type=resize&height=20&ackdata",
+ "description": "Extract a URL with '?' in fragment"
+ },
+ {
+ "expected": [
+ "http://tn.com.ar/show/00056158/la-estrella-del-certamen-el-turno-de-pamela-anderson?fb_xd_fragment#?=&cb=fe17523f223b7&relation=parent.parent&transport=fragment&type=resize&height=20&ackdata"
+ ],
+ "text": "text http://tn.com.ar/show/00056158/la-estrella-del-certamen-el-turno-de-pamela-anderson?fb_xd_fragment#?=&cb=fe17523f223b7&relation=parent.parent&transport=fragment&type=resize&height=20&ackdata text",
+ "description": "Extract a URL with '?' in fragment in a text"
+ },
+ {
+ "expected": [
+ "http://example.com/path"
+ ],
+ "text": "Test a ton of periods http://example.com/path..........................................",
+ "description": "Extract a URL with a ton of trailing periods"
+ },
+ {
+ "expected": [
+ "http://example.com/"
+ ],
+ "text": "Test a ton of periods http://example.com/,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,",
+ "description": "Extract a URL with a ton of trailing commas"
+ },
+ {
+ "expected": [
+ "http://example.com/path/"
+ ],
+ "text": "Test a ton of periods http://example.com/path/!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!",
+ "description": "Extract a URL with a ton of trailing '!'"
+ },
+ {
+ "expected": [
+
+ ],
+ "text": "#test.com @test.com #http://test.com @http://test.com #t.co/abcde @t.co/abcde",
+ "description": "DO NOT extract URLs in hashtag or @mention"
+ },
+ {
+ "expected": [
+ "http://t.co/pbY2NfTZ"
+ ],
+ "text": "I really like http://t.co/pbY2NfTZ's website",
+ "description": "Extract a t.co URL with a trailing apostrophe"
+ },
+ {
+ "expected": [
+ "http://t.co/FNkPfmii"
+ ],
+ "text": "Check this site out http://t.co/FNkPfmii- it's great",
+ "description": "Extract a t.co URL with a trailing hyphen"
+ },
+ {
+ "expected": [
+ "http://t.co/ulYGBYSo"
+ ],
+ "text": "According to http://t.co/ulYGBYSo: the internet is cool",
+ "description": "Extract a t.co URL with a trailing colon"
+ },
+ {
+ "expected": [
+ "http://twitter.com",
+ "http://example.com",
+ "http://example.com/path",
+ "example.com/path",
+ "t.co/abcde"
+ ],
+ "text": "http://twitter.com\nhttp://example.com\nhttp://example.com/path\nexample.com/path\nt.co\nt.co/abcde",
+ "description": "Extract URL before newline"
+ },
+ {
+ "expected": [
+
+ ],
+ "text": "$http://twitter.com $twitter.com $http://t.co/abcde $t.co/abcde $t.co $TVI.CA $RBS.CA",
+ "description": "DO NOT extract URL if preceded by $"
+ },
+ {
+ "expected": [
+
+ ],
+ "text": "long.test.tar.bz2 test.tar.bz2 tar.bz2",
+ "description": "DO NOT extract .bz2 file name as URL"
+ }
+ ],
+ "hashtags_with_indices": [
+ {
+ "expected": [
+ {
+ "indices": [
+ 0,
+ 8
+ ],
+ "hashtag": "hashtag"
+ }
+ ],
+ "text": "#hashtag here",
+ "description": "Extract a hastag at the start"
+ },
+ {
+ "expected": [
+ {
+ "indices": [
+ 7,
+ 15
+ ],
+ "hashtag": "hashtag"
+ }
+ ],
+ "text": "test a #hashtag",
+ "description": "Extract a hastag at the end"
+ },
+ {
+ "expected": [
+ {
+ "indices": [
+ 7,
+ 15
+ ],
+ "hashtag": "hashtag"
+ }
+ ],
+ "text": "test a #hashtag in a string",
+ "description": "Extract a hastag in the middle"
+ },
+ {
+ "expected": [
+ {
+ "indices": [
+ 7,
+ 15
+ ],
+ "hashtag": "hashtag"
+ }
+ ],
+ "text": "#123 a #hashtag in a string",
+ "description": "Extract only a valid hashtag"
+ },
+ {
+ "expected": [
+ {
+ "indices": [
+ 4,
+ 12
+ ],
+ "hashtag": "hashtag"
+ }
+ ],
+ "text": "会議中 #hashtag 会議中",
+ "description": "Extract a hashtag in a string of multi-byte characters"
+ },
+ {
+ "expected": [
+ {
+ "indices": [
+ 4,
+ 8
+ ],
+ "hashtag": "two"
+ },
+ {
+ "indices": [
+ 15,
+ 20
+ ],
+ "hashtag": "four"
+ }
+ ],
+ "text": "One #two three #four",
+ "description": "Extract multiple valid hashtags"
+ },
+ {
+ "expected": [
+ {
+ "indices": [
+ 12,
+ 20
+ ],
+ "hashtag": "русский"
+ }
+ ],
+ "text": "Hashtags in #русский!",
+ "description": "Extract a non-latin hashtag"
+ },
+ {
+ "expected": [
+ {
+ "indices": [
+ 12,
+ 15
+ ],
+ "hashtag": "中文"
+ },
+ {
+ "indices": [
+ 17,
+ 21
+ ],
+ "hashtag": "日本語"
+ },
+ {
+ "indices": [
+ 23,
+ 27
+ ],
+ "hashtag": "한국말"
+ },
+ {
+ "indices": [
+ 33,
+ 41
+ ],
+ "hashtag": "русский"
+ }
+ ],
+ "text": "Hashtags in #中文, #日本語, #한국말, and #русский! Try it out!",
+ "description": "Extract multiple non-latin hashtags"
+ }
+ ],
+ "mentions": [
+ {
+ "expected": [
+ "username"
+ ],
+ "text": "@username reply",
+ "description": "Extract mention at the begining of a tweet"
+ },
+ {
+ "expected": [
+ "username"
+ ],
+ "text": "mention @username",
+ "description": "Extract mention at the end of a tweet"
+ },
+ {
+ "expected": [
+ "username"
+ ],
+ "text": "mention @username in the middle",
+ "description": "Extract mention in the middle of a tweet"
+ },
+ {
+ "expected": [
+ "user_name"
+ ],
+ "text": "mention @user_name",
+ "description": "Extract mention of username with underscore"
+ },
+ {
+ "expected": [
+ "12345"
+ ],
+ "text": "mention @12345",
+ "description": "Extract mention of all numeric username"
+ },
+ {
+ "expected": [
+ "username1",
+ "username2"
+ ],
+ "text": "mention @username1 @username2",
+ "description": "Extract mention or multiple usernames"
+ },
+ {
+ "expected": [
+ "username"
+ ],
+ "text": "の@usernameに到着を待っている",
+ "description": "Extract mention in the middle of a Japanese tweet"
+ },
+ {
+ "expected": [
+ "username"
+ ],
+ "text": "Current Status: @_@ (cc: @username)",
+ "description": "DO NOT extract username ending in @"
+ },
+ {
+ "expected": [
+
+ ],
+ "text": "@aliceìnheiro something something",
+ "description": "DO NOT extract username followed by accented latin characters"
+ },
+ {
+ "expected": [
+ "username"
+ ],
+ "text": "@username email me @test@example.com",
+ "description": "Extract lone metion but not @user@user (too close to an email)"
+ },
+ {
+ "expected": [
+
+ ],
+ "text": "@http://twitter.com",
+ "description": "DO NOT extract 'http' in '@http://' as username"
+ },
+ {
+ "expected": [
+ "username",
+ "mention"
+ ],
+ "text": "@username\n@mention",
+ "description": "Extract mentions before newline"
+ },
+ {
+ "expected": [
+ "username",
+ "mention",
+ "test"
+ ],
+ "text": "RT@username RT:@mention RT @test",
+ "description": "Extract mentions after 'RT'"
+ },
+ {
+ "expected": [
+
+ ],
+ "text": "f!@kn",
+ "description": "DO NOT extract username preceded by !"
+ },
+ {
+ "expected": [
+
+ ],
+ "text": "f@@kn",
+ "description": "DO NOT extract username preceded by @"
+ },
+ {
+ "expected": [
+
+ ],
+ "text": "f#@kn",
+ "description": "DO NOT extract username preceded by #"
+ },
+ {
+ "expected": [
+
+ ],
+ "text": "f$@kn",
+ "description": "DO NOT extract username preceded by $"
+ },
+ {
+ "expected": [
+
+ ],
+ "text": "f%@kn",
+ "description": "DO NOT extract username preceded by %"
+ },
+ {
+ "expected": [
+
+ ],
+ "text": "f&@kn",
+ "description": "DO NOT extract username preceded by &"
+ },
+ {
+ "expected": [
+
+ ],
+ "text": "f*@kn",
+ "description": "DO NOT extract username preceded by *"
+ }
+ ]
+ }
+}
View
288 test/json-conformance/validate.json
@@ -0,0 +1,288 @@
+{
+ "tests": {
+ "hashtags": [
+ {
+ "expected": true,
+ "text": "#hashtag",
+ "description": "Valid hashtag: a-z < 20 characters"
+ },
+ {
+ "expected": true,
+ "text": "#1st",
+ "description": "Valid hashtag: number followed by letters"
+ },
+ {
+ "expected": true,
+ "text": "#that1time",
+ "description": "Valid hashtag: letters and numbers mixed"
+ },
+ {
+ "expected": true,
+ "text": "#easyas123",
+ "description": "Valid hashtag: letter followed by numbers"
+ },
+ {
+ "expected": false,
+ "text": "#12345",
+ "description": "Invalid hashtag: all numbers"
+ },
+ {
+ "expected": true,
+ "text": "#ашок",
+ "description": "Valid hashtag: Russian text"
+ },
+ {
+ "expected": true,
+ "text": "#트위터",
+ "description": "Valid hashtag: Korean text"
+ }
+ ],
+ "usernames": [
+ {
+ "expected": true,
+ "text": "@username",
+ "description": "Valid username: a-z < 20 characters"
+ },
+ {
+ "expected": true,
+ "text": "@12345",
+ "description": "All numeric username are allowed"
+ },
+ {
+ "expected": true,
+ "text": "@example_name",
+ "description": "Usernames should allow the _ character"
+ },
+ {
+ "expected": false,
+ "text": "@example-name",
+ "description": "Usernames SHOULD NOT allow the - character"
+ }
+ ],
+ "urls_without_protocol": [
+ {
+ "expected": true,
+ "text": "example.com",
+ "description": "Valid url without protocol: domain + gTLD"
+ },
+ {
+ "expected": true,
+ "text": "www.example.com",
+ "description": "Valid url without protocol: subdomain + domain + gTLD"
+ },
+ {
+ "expected": true,
+ "text": "t.co",
+ "description": "Valid url without protocol: domain + ccTLD"
+ },
+ {
+ "expected": true,
+ "text": "foo.co.jp",
+ "description": "Valid url without protocol: subdomain + domain + ccTLD"
+ },
+ {
+ "expected": true,
+ "text": "example.com/path/to/resource?search=foo&lang=en",
+ "description": "Valid url without protocol: domain + path + query"
+ }
+ ],
+ "urls": [
+ {
+ "expected": true,
+ "text": "http://example.com",
+ "description": "Valid url: protocol + domain"
+ },
+ {
+ "expected": true,
+ "text": "https://example.com/path/to/resource?search=foo&lang=en",
+ "description": "Valid url: ssl + domain + path + query"
+ },
+ {
+ "expected": true,
+ "text": "http://twitter.com/#!/twitter",
+ "description": "Valid url: protocol + domain + path + fragment"
+ },
+ {
+ "expected": true,
+ "text": "HTTPS://www.ExaMPLE.COM/index.html",
+ "description": "Valid url: cased protocol and domain"
+ },
+ {
+ "expected": true,
+ "text": "http://user:PASSW0RD@example.com:8080/login.php",
+ "description": "Valid url: port and userinfo"
+ },
+ {
+ "expected": true,
+ "text": "http://sports.yahoo.com/nfl/news;_ylt=Aom0;ylu=XyZ?slug=ap-superbowlnotebook",
+ "description": "Valid url: matrix path parameters"
+ },
+ {
+ "expected": true,
+ "text": "http://192.168.0.1/index.html?src=asdf",
+ "description": "Valid url: ipv4"
+ },
+ {
+ "expected": true,
+ "text": "http://[3ffe:1900:4545:3:200:f8ff:fe21:67cf]:80/index.html",
+ "description": "Valid url: ipv6"
+ },
+ {
+ "expected": true,
+ "text": "http://test_underscore.twitter.com",
+ "description": "Valid url: underscore in subdomain"
+ },
+ {
+ "expected": true,
+ "text": "http://example.com?foo=$bar.;baz?BAZ&c=d-#top/?stories+",
+ "description": "Valid url: sub delims and question marks"
+ },
+ {
+ "expected": true,
+ "text": "http://☃.net/",
+ "description": "Valid unicode url: unicode domain"
+ },
+ {
+ "expected": false,
+ "text": "ftp://www.example.com/",
+ "description": "Invalid url: invalid scheme"
+ },
+ {
+ "expected": false,
+ "text": "https://user:pass[word]@www.example.com/",
+ "description": "Invalid url: invalid userinfo characters"
+ },
+ {
+ "expected": false,
+ "text": "http://domain-dash_2314352345_dfasd.foo-cow_4352.com",
+ "description": "Invalid url: underscore in domain"
+ },
+ {
+ "expected": false,
+ "text": "http://www.-domain4352.com/",
+ "description": "Invalid url: domain beginning dash"
+ },
+ {
+ "expected": false,
+ "text": "http://www.domain4352-.com/",
+ "description": "Invalid url: domain trailing dash"
+ },
+ {
+ "expected": false,
+ "text": "http://☃-.net/",
+ "description": "Invalid url: unicode domain trailing dash"
+ },
+ {
+ "expected": false,
+ "text": "http://%e2%98%83.net/",
+ "description": "Invalid url: improperly encoded unicode domain"
+ },
+ {
+ "expected": false,
+ "text": "http://256.1.2.3/",
+ "description": "Invalid url: invalid IP"
+ },
+ {
+ "expected": false,
+ "text": "http://en.wikipedia.org/wiki/\"#Punctuation",
+ "description": "Invalid url: invalid char in path"
+ },
+ {
+ "expected": false,
+ "text": "http://example.com/#anchor ",
+ "description": "Invalid url: trailing space"
+ }
+ ],
+ "lengths": [
+ {
+ "expected": 15,
+ "text": "This is a test.",
+ "description": "Count the number of characters"
+ },
+ {
+ "expected": 20,
+ "text": "http://test.com",
+ "description": "Count a URL starting with http:// as 20 characters"
+ },
+ {
+ "expected": 21,
+ "text": "https://test.com",
+ "description": "Count a URL starting with https:// as 21 characters"
+ },
+ {
+ "expected": 20,
+ "text": "test.com",
+ "description": "Count a URL without protocol as 20 characters"
+ },
+ {
+ "expected": 77,
+ "text": "Test http://test.com test http://test.com test.com test",
+ "description": "Count multiple URLs correctly"
+ }
+ ],
+ "lists": [
+ {
+ "expected": true,
+ "text": "@username/list",
+ "description": "Valid list: a-z < 20 characters"
+ },
+ {
+ "expected": false,
+ "text": "@username",
+ "description": "A username alone SHOULD NOT be considered a valid list"
+ },
+ {
+ "expected": false,
+ "text": "@username/",
+ "description": "A username followed by a slash SHOULD NOT be considered a valid list"
+ },
+ {
+ "expected": false,
+ "text": " @username/list",
+ "description": "Validation SHOULD NOT allow leading spaces"
+ },
+ {
+ "expected": false,
+ "text": "@username/list ",
+ "description": "Validation SHOULD NOT allow trailing spaces"
+ }
+ ],
+ "tweets": [
+ {
+ "expected": true,
+ "text": "I am a Tweet",
+ "description": "Valid Tweet: < 20 characters"
+ },
+ {
+ "expected": true,
+ "text": "A lie gets halfway around the world before the truth has a chance to get its pants on. -- Winston Churchill (1874-1965) http://bit.ly/dJpywL",
+ "description": "Valid Tweet: 140 characters"
+ },
+ {
+ "expected": true,
+ "text": "A lié géts halfway arøünd thé wørld béføré thé truth has a chance tø get its pants øn. -- Winston Churchill (1874-1965) http://bit.ly/dJpywL",
+ "description": "Valid Tweet: 140 characters (with accents)"
+ },
+ {
+ "expected": true,
+ "text": "のののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののののの",
+ "description": "Valid Tweet: 140 characters (double byte characters)"
+ },
+ {
+ "expected": false,
+ "text": "",
+ "description": "Invalid Tweet: no characters (empty)"
+ },
+ {
+ "expected": false,
+ "text": "A lie gets halfway around the world before the truth has a chance to get its pants on. --- Winston Churchill (1874-1965) http://bit.ly/dJpywL",
+ "description": "Invalid Tweet: 141 characters"
+ },
+ {
+ "expected": false,
+ "text": "A lie gets halfway around the world before the truth has a chance to get its pants on. \n-- Winston Churchill (1874-1965) http://bit.ly/dJpywL",
+ "description": "Invalid Tweet: 141 characters (due to newline)"
+ }
+ ]
+ }
+}
Please sign in to comment.
Something went wrong with that request. Please try again.