Skip to content
This repository has been archived by the owner on Sep 18, 2021. It is now read-only.

Commit

Permalink
update tlds in source code
Browse files Browse the repository at this point in the history
  • Loading branch information
jakl committed Oct 28, 2014
1 parent 07862fe commit d78b478
Show file tree
Hide file tree
Showing 2 changed files with 86 additions and 72 deletions.
61 changes: 41 additions & 20 deletions Rakefile
Expand Up @@ -116,30 +116,51 @@ task :package, [:version] => [:pkg] do |t, args|
end

namespace :tlds do
desc "Print tlds to include in twitter-text.js, based on conformance"
task :print do
def line_js(tlds)
quote = "'"
indent = 4
' ' * indent + quote + tlds.join('|')
desc "Update tlds in twitter-text.js based on conformance tld_lib.yml"
task :update do
tld_yml = repo_path('test', 'twitter-text-conformance', 'tld_lib.yml')
tlds = YAML.load_file(tld_yml)
cctlds = format_tlds(tlds['country'], 100)
gtlds = format_tlds(tlds['generic'], 100)

twitter_text_js = File.read(repo_path('twitter-text.js'))
replace_tlds!(twitter_text_js, 'validGTLD', gtlds)
replace_tlds!(twitter_text_js, 'validCCTLD', cctlds)
File.open(repo_path('twitter-text.js'), 'w') do |file|
file.write(twitter_text_js)
end
end
end

tld_yml = repo_path('test', 'twitter-text-conformance', 'tld_lib.yml')
YAML.load_file(tld_yml).each do |type, tlds|
tld_line = []
lines = []
puts '# ' + type
tlds.each do |tld|
tld_line << tld
if line_js(tld_line).length >= 80
lines << line_js(tld_line)
tld_line = []
end
end
lines << line_js(tld_line) if tld_line.length > 0
puts lines.join("|' +\n") + "' +\n"
def format_tlds(tlds, line_length)
tld_line = []
lines = []
tlds.each do |tld|
if line_js(tld_line + [tld]).length > line_length
lines << line_js(tld_line)
tld_line = [tld]
else
tld_line << tld
end
end
lines << line_js(tld_line) if tld_line.length > 0
lines.join("|' +\n") + "' +"
end

def line_js(tlds)
quote = "'"
indent = 4
' ' * indent + quote + tlds.join('|')
end

def replace_tlds!(source_code, name, tlds)
start = Regexp.quote("twttr.txt.regexen.#{name} =")
source_code.sub!(/#{start}.*?;\n/m, <<-D)
twttr.txt.regexen.#{name} = regexSupplant(RegExp(
'(?:(?:' +
#{tlds}
')(?=[^0-9a-zA-Z@]|$))'));
D
end

def repo_path(*path)
Expand Down
97 changes: 45 additions & 52 deletions twitter-text.js
Expand Up @@ -229,61 +229,54 @@
twttr.txt.regexen.validDomainChars = regexSupplant(/[^#{invalidDomainChars}]/);
twttr.txt.regexen.validSubdomain = regexSupplant(/(?:(?:#{validDomainChars}(?:[_-]|#{validDomainChars})*)?#{validDomainChars}\.)/);
twttr.txt.regexen.validDomainName = regexSupplant(/(?:(?:#{validDomainChars}(?:-|#{validDomainChars})*)?#{validDomainChars}\.)/);
twttr.txt.regexen.validGTLD = regexSupplant(RegExp(
twttr.txt.regexen.validGTLD = regexSupplant(RegExp(
'(?:(?:' +
'abogado|academy|accountants|active|actor|aero|agency|airforce|allfinanz|alsace|' +
'archi|army|arpa|asia|associates|attorney|auction|audio|autos|axa|band|bar|bargains|' +
'bayern|beer|berlin|best|bid|bike|bio|biz|black|blackfriday|blue|bmw|bnpparibas|' +
'boo|boutique|brussels|budapest|build|builders|business|buzz|bzh|cab|cal|camera|' +
'camp|cancerresearch|capetown|capital|caravan|cards|care|career|careers|casa|' +
'cash|cat|catering|center|ceo|cern|channel|cheap|christmas|chrome|church|citic|' +
'city|claims|cleaning|click|clinic|clothing|club|codes|coffee|college|cologne|' +
'com|community|company|computer|condos|construction|consulting|contractors|cooking|' +
'cool|coop|country|credit|creditcard|crs|cruises|cuisinella|cymru|dad|dance|dating|' +
'day|deals|degree|democrat|dental|dentist|desi|diamonds|diet|digital|direct|directory|' +
'discount|dnp|domains|durban|dvag|eat|edu|education|email|emerck|engineer|engineering|' +
'enterprises|equipment|esq|estate|eus|events|exchange|expert|exposed|fail|farm|' +
'feedback|finance|financial|fish|fishing|fitness|flights|florist|flsmidth|fly|' +
'foo|forsale|foundation|frl|frogans|fund|furniture|futbol|gal|gallery|gbiz|gent|' +
'gift|gifts|gives|glass|gle|global|globo|gmail|gmo|gmx|google|gop|gov|graphics|' +
'gratis|green|gripe|guide|guitars|guru|hamburg|haus|healthcare|help|here|hiphop|' +
'hiv|holdings|holiday|homes|horse|host|hosting|house|how|ibm|immo|immobilien|' +
'industries|info|ing|ink|institute|insure|int|international|investments|jetzt|' +
'jobs|joburg|juegos|kaufen|kim|kitchen|kiwi|koeln|krd|kred|lacaixa|land|lawyer|' +
'lease|lgbt|life|lighting|limited|limo|link|loans|london|lotto|ltda|luxe|luxury|' +
'maison|management|mango|market|marketing|media|meet|melbourne|meme|menu|miami|' +
'mil|mini|mobi|moda|moe|monash|mortgage|moscow|motorcycles|mov|museum|nagoya|' +
'name|navy|net|network|neustar|new|nexus|ngo|nhk|ninja|nra|nrw|nyc|okinawa|ong|' +
'onl|ooo|org|organic|otsuka|ovh|paris|partners|parts|pharmacy|photo|photography|' +
'photos|physio|pics|pictures|pink|pizza|place|plumbing|pohl|poker|post|praxi|' +
'press|pro|prod|productions|prof|properties|property|pub|qpon|quebec|realtor|' +
'recipes|red|rehab|reise|reisen|ren|rentals|repair|report|republican|rest|restaurant|' +
'reviews|rich|rio|rip|rocks|rodeo|rsvp|ruhr|ryukyu|saarland|sarl|sca|scb|schmidt|' +
'schule|scot|services|sexy|shiksha|shoes|singles|social|software|sohu|solar|solutions|' +
'soy|space|spiegel|supplies|supply|support|surf|surgery|suzuki|systems|taipei|' +
'tatar|tattoo|tax|technology|tel|tienda|tips|tirol|today|tokyo|tools|top|town|' +
'toys|trade|training|travel|tui|university|uno|uol|vacations|vegas|ventures|vermögensberater|' +
'vermögensberatung|versicherung|vet|viajes|villas|vision|vlaanderen|vodka|vote|' +
'voting|voto|voyage|wales|wang|watch|webcam|website|wed|wedding|whoswho|wien|' +
'wiki|williamhill|wme|work|works|world|wtc|wtf|xxx|xyz|yachts|yandex|yoga|yokohama|' +
'youtube|zip|zone|дети|москва|онлайн|орг|рус|сайт|بازار|شبكة|موقع|संगठन|みんな|世界|' +
'中信|中文网|企业|佛山|公司|公益|商城|商标|在线|广东|我爱你|手机|政务|机构|游戏|移动|组织机构|网址|网络|集团|삼성' +
'abogado|academy|accountants|active|actor|aero|agency|airforce|allfinanz|alsace|archi|army|arpa|' +
'asia|associates|attorney|auction|audio|autos|axa|band|bar|bargains|bayern|beer|berlin|best|bid|' +
'bike|bio|biz|black|blackfriday|blue|bmw|bnpparibas|boo|boutique|brussels|budapest|build|' +
'builders|business|buzz|bzh|cab|cal|camera|camp|cancerresearch|capetown|capital|caravan|cards|' +
'care|career|careers|casa|cash|cat|catering|center|ceo|cern|channel|cheap|christmas|chrome|' +
'church|citic|city|claims|cleaning|click|clinic|clothing|club|codes|coffee|college|cologne|com|' +
'community|company|computer|condos|construction|consulting|contractors|cooking|cool|coop|country|' +
'credit|creditcard|crs|cruises|cuisinella|cymru|dad|dance|dating|day|deals|degree|democrat|' +
'dental|dentist|desi|diamonds|diet|digital|direct|directory|discount|dnp|domains|durban|dvag|eat|' +
'edu|education|email|emerck|engineer|engineering|enterprises|equipment|esq|estate|eus|events|' +
'exchange|expert|exposed|fail|farm|feedback|finance|financial|fish|fishing|fitness|flights|' +
'florist|flsmidth|fly|foo|forsale|foundation|frl|frogans|fund|furniture|futbol|gal|gallery|gbiz|' +
'gent|gift|gifts|gives|glass|gle|global|globo|gmail|gmo|gmx|google|gop|gov|graphics|gratis|green|' +
'gripe|guide|guitars|guru|hamburg|haus|healthcare|help|here|hiphop|hiv|holdings|holiday|homes|' +
'horse|host|hosting|house|how|ibm|immo|immobilien|industries|info|ing|ink|institute|insure|int|' +
'international|investments|jetzt|jobs|joburg|juegos|kaufen|kim|kitchen|kiwi|koeln|krd|kred|' +
'lacaixa|land|lawyer|lease|lgbt|life|lighting|limited|limo|link|loans|london|lotto|ltda|luxe|' +
'luxury|maison|management|mango|market|marketing|media|meet|melbourne|meme|menu|miami|mil|mini|' +
'mobi|moda|moe|monash|mortgage|moscow|motorcycles|mov|museum|nagoya|name|navy|net|network|' +
'neustar|new|nexus|ngo|nhk|ninja|nra|nrw|nyc|okinawa|ong|onl|ooo|org|organic|otsuka|ovh|paris|' +
'partners|parts|pharmacy|photo|photography|photos|physio|pics|pictures|pink|pizza|place|plumbing|' +
'pohl|poker|post|praxi|press|pro|prod|productions|prof|properties|property|pub|qpon|quebec|' +
'realtor|recipes|red|rehab|reise|reisen|ren|rentals|repair|report|republican|rest|restaurant|' +
'reviews|rich|rio|rip|rocks|rodeo|rsvp|ruhr|ryukyu|saarland|sarl|sca|scb|schmidt|schule|scot|' +
'services|sexy|shiksha|shoes|singles|social|software|sohu|solar|solutions|soy|space|spiegel|' +
'supplies|supply|support|surf|surgery|suzuki|systems|taipei|tatar|tattoo|tax|technology|tel|' +
'tienda|tips|tirol|today|tokyo|tools|top|town|toys|trade|training|travel|tui|university|uno|uol|' +
'vacations|vegas|ventures|vermögensberater|vermögensberatung|versicherung|vet|viajes|villas|' +
'vision|vlaanderen|vodka|vote|voting|voto|voyage|wales|wang|watch|webcam|website|wed|wedding|' +
'whoswho|wien|wiki|williamhill|wme|work|works|world|wtc|wtf|xxx|xyz|yachts|yandex|yoga|yokohama|' +
'youtube|zip|zone|дети|москва|онлайн|орг|рус|сайт|بازار|شبكة|موقع|संगठन|みんな|世界|中信|中文网|企业|佛山|公司|' +
'公益|商城|商标|在线|广东|我爱你|手机|政务|机构|游戏|移动|组织机构|网址|网络|集团|삼성' +
')(?=[^0-9a-zA-Z@]|$))'));
twttr.txt.regexen.validCCTLD = regexSupplant(RegExp(
twttr.txt.regexen.validCCTLD = regexSupplant(RegExp(
'(?:(?:' +
'ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|' +
'bj|bl|bm|bn|bo|bq|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|' +
'cu|cv|cw|cx|cy|cz|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo|fr|' +
'ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|' +
'ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|' +
'lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mf|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|' +
'mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|pk|pl|' +
'pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|' +
'so|sr|ss|st|su|sv|sx|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|' +
'ua|ug|uk|um|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|za|zm|zw|бел|мкд|мон|рф|' +
'срб|укр|қаз|الاردن|الجزائر|السعودية|المغرب|امارات|ایران|بھارت|تونس|سودان|سورية|' +
'عراق|عمان|فلسطين|قطر|مصر|مليسيا|پاکستان|भारत|বাংলা|ভারত|ਭਾਰਤ|ભારત|இந்தியா|இலங்கை|' +
'சிங்கப்பூர்|భారత్|ලංකා|ไทย|გე|中国|中國|台湾|台灣|新加坡|香港|한국' +
'ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bl|bm|bn|bo|bq|' +
'br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cu|cv|cw|cx|cy|cz|de|dj|dk|dm|do|dz|' +
'ec|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|' +
'gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|' +
'la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mf|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|' +
'my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|' +
'rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|ss|st|su|sv|sx|sy|sz|tc|td|tf|tg|th|tj|tk|' +
'tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|um|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|za|zm|zw|' +
'бел|мкд|мон|рф|срб|укр|қаз|الاردن|الجزائر|السعودية|المغرب|امارات|ایران|بھارت|تونس|سودان|سورية|' +
'عراق|عمان|فلسطين|قطر|مصر|مليسيا|پاکستان|भारत|বাংলা|ভারত|ਭਾਰਤ|ભારત|இந்தியா|இலங்கை|சிங்கப்பூர்|' +
'భారత్|ලංකා|ไทย|გე|中国|中國|台湾|台灣|新加坡|香港|한국' +
')(?=[^0-9a-zA-Z@]|$))'));
twttr.txt.regexen.validPunycode = regexSupplant(/(?:xn--[0-9a-z]+)/);
twttr.txt.regexen.validSpecialCCTLD = regexSupplant(RegExp(
Expand Down

0 comments on commit d78b478

Please sign in to comment.