Permalink
Browse files

Merge pull request #59 from twitter/cashtag

Add support for extracting/auto-linking cashtags
  • Loading branch information...
2 parents c469e31 + 20f4f2f commit dc57a0ddfc7340cfe6cb6b1d3fe48821cb9e448f @keitaf keitaf committed May 31, 2012
Showing with 80 additions and 4 deletions.
  1. +12 −0 test/conformance.html
  2. +68 −4 twitter-text.js
View
@@ -31,6 +31,10 @@
return function(test) {
return twttr.txt.autoLinkUsernamesOrLists(test.text, {suppressNoFollow: true, suppressDataScreenName: true});
};
+ case "cashtags":
+ return function(test) {
+ return twttr.txt.autoLinkCashtags(test.text, {suppressNoFollow: true, suppressDataScreenName: true});
+ };
case "all":
return function(test) {
return twttr.txt.autoLink(test.text, {suppressNoFollow: true, suppressDataScreenName: true});
@@ -86,6 +90,14 @@
return function(test) {
return twttr.txt.extractHashtagsWithIndices(test.text);
};
+ case "cashtags":
+ return function(test) {
+ return twttr.txt.extractCashtags(test.text);
+ };
+ case "cashtags_with_indices":
+ return function(test) {
+ return twttr.txt.extractCashtagsWithIndices(test.text);
+ };
}
case "hit_highlighting":
return function(test) {
View
@@ -276,6 +276,10 @@ if (typeof twttr === "undefined" || twttr === null) {
twttr.txt.regexen.validTcoUrl = /^https?:\/\/t\.co\/[a-z0-9]+/i;
+ // cashtag related regex
+ twttr.txt.regexen.cashtag = /[a-z]{1,6}(?:[._][a-z]{1,2})?/i;
+ twttr.txt.regexen.validCashtag = regexSupplant('(?:^|#{spaces})\\$(#{cashtag})(?=$|\\s|[#{punct}])', 'gi');
+
// These URL validation pattern strings are based on the ABNF from RFC 3986
twttr.txt.regexen.validateUrlUnreserved = /[a-z0-9\-._~]/i;
twttr.txt.regexen.validateUrlPctEncoded = /(?:%[0-9a-f]{2})/i;
@@ -372,11 +376,13 @@ if (typeof twttr === "undefined" || twttr === null) {
var DEFAULT_USERNAME_CLASS = "username";
// Default CSS class for auto-linked hashtags (along with the url class)
var DEFAULT_HASHTAG_CLASS = "hashtag";
+ // Default CSS class for auto-linked cashtags (along with the url class)
+ var DEFAULT_CASHTAG_CLASS = "cashtag";
// HTML attribute for robot nofollow behavior (default)
var HTML_ATTR_NO_FOLLOW = " rel=\"nofollow\"";
// Options which should not be passed as HTML attributes
- var OPTIONS_NOT_ATTRIBUTES = {'urlClass':true, 'listClass':true, 'usernameClass':true, 'hashtagClass':true,
- 'usernameUrlBase':true, 'listUrlBase':true, 'hashtagUrlBase':true,
+ var OPTIONS_NOT_ATTRIBUTES = {'urlClass':true, 'listClass':true, 'usernameClass':true, 'hashtagClass':true, 'cashtagClass':true,
+ 'usernameUrlBase':true, 'listUrlBase':true, 'hashtagUrlBase':true, 'cashtagUrlBase':true,
'usernameUrlBlock':true, 'listUrlBlock':true, 'hashtagUrlBlock':true, 'linkUrlBlock':true,
'usernameIncludeSymbol':true, 'suppressLists':true, 'suppressNoFollow':true,
'suppressDataScreenName':true, 'urlEntities':true, 'before':true
@@ -412,6 +418,22 @@ if (typeof twttr === "undefined" || twttr === null) {
return stringSupplant("#{before}<a href=\"#{hashtagUrlBase}#{text}\" title=\"##{text}\" class=\"#{urlClass} #{hashtagClass}\"#{extraHtml}>#{hash}#{preText}#{text}#{postText}</a>", d);
};
+ twttr.txt.linkToCashtag = function(entity, text, options) {
+ var d = {
+ preText: "",
+ text: twttr.txt.htmlEscape(entity.cashtag),
+ postText: "",
+ extraHtml: options.suppressNoFollow ? "" : HTML_ATTR_NO_FOLLOW
+ };
+ for (var k in options) {
+ if (options.hasOwnProperty(k)) {
+ d[k] = options[k];
+ }
+ }
+
+ return stringSupplant("#{before}<a href=\"#{cashtagUrlBase}#{text}\" title=\"$#{text}\" class=\"#{urlClass} #{cashtagClass}\"#{extraHtml}>$#{preText}#{text}#{postText}</a>", d);
+ };
+
twttr.txt.linkToMentionAndList = function(entity, text, options) {
var at = text.substring(entity.indices[0], entity.indices[0] + 1);
var d = {
@@ -548,6 +570,8 @@ if (typeof twttr === "undefined" || twttr === null) {
options.urlClass = options.urlClass || DEFAULT_URL_CLASS;
options.hashtagClass = options.hashtagClass || DEFAULT_HASHTAG_CLASS;
options.hashtagUrlBase = options.hashtagUrlBase || "https://twitter.com/#!/search?q=%23";
+ options.cashtagClass = options.cashtagClass || DEFAULT_CASHTAG_CLASS;
+ options.cashtagUrlBase = options.cashtagUrlBase || "https://twitter.com/#!/search?q=%24";
options.listClass = options.listClass || DEFAULT_LIST_CLASS;
options.usernameClass = options.usernameClass || DEFAULT_USERNAME_CLASS;
options.usernameUrlBase = options.usernameUrlBase || "https://twitter.com/";
@@ -580,8 +604,10 @@ if (typeof twttr === "undefined" || twttr === null) {
result += twttr.txt.linkToUrl(entity, text, options);
} else if (entity.hashtag) {
result += twttr.txt.linkToHashtag(entity, text, options);
- } else if(entity.screenName) {
+ } else if (entity.screenName) {
result += twttr.txt.linkToMentionAndList(entity, text, options);
+ } else if (entity.cashtag) {
+ result += twttr.txt.linkToCashtag(entity, text, options);
}
beginIndex = entity.indices[1];
}
@@ -641,6 +667,11 @@ if (typeof twttr === "undefined" || twttr === null) {
return twttr.txt.autoLinkEntities(text, entities, options);
};
+ twttr.txt.autoLinkCashtags = function(text, options) {
+ var entities = twttr.txt.extractCashtagsWithIndices(text);
+ return twttr.txt.autoLinkEntities(text, entities, options);
+ };
+
twttr.txt.autoLinkUrlsCustom = function(text, options) {
var entities = twttr.txt.extractUrlsWithIndices(text, {extractUrlWithoutProtocol: false});
return twttr.txt.autoLinkEntities(text, entities, options);
@@ -663,7 +694,8 @@ if (typeof twttr === "undefined" || twttr === null) {
twttr.txt.extractEntitiesWithIndices = function(text, options) {
var entities = twttr.txt.extractUrlsWithIndices(text, options)
.concat(twttr.txt.extractMentionsOrListsWithIndices(text))
- .concat(twttr.txt.extractHashtagsWithIndices(text, {checkUrlOverlap: false}));
+ .concat(twttr.txt.extractHashtagsWithIndices(text, {checkUrlOverlap: false}))
+ .concat(twttr.txt.extractCashtagsWithIndices(text));
if (entities.length == 0) {
return [];
@@ -880,6 +912,38 @@ if (typeof twttr === "undefined" || twttr === null) {
return tags;
};
+ twttr.txt.extractCashtags = function(text) {
+ var cashtagsOnly = [],
+ cashtagsWithIndices = twttr.txt.extractCashtagsWithIndices(text);
+
+ for (var i = 0; i < cashtagsWithIndices.length; i++) {
+ cashtagsOnly.push(cashtagsWithIndices[i].cashtag);
+ }
+
+ return cashtagsOnly;
+ };
+
+ twttr.txt.extractCashtagsWithIndices = function(text) {
+ if (!text || text.indexOf("$") == -1) {
+ return [];
+ }
+
+ var tags = [],
+ position = 0;
+
+ text.replace(twttr.txt.regexen.validCashtag, function(match, cashtag, offset, chunk) {
+ // cashtag doesn't contain $ sign, so need to decrement index by 1.
+ var startPosition = text.indexOf(cashtag, position) - 1;
+ position = startPosition + cashtag.length + 1;
+ tags.push({
+ cashtag: cashtag,
+ indices: [startPosition, position]
+ });
+ });
+
+ return tags;
+ };
+
twttr.txt.modifyIndicesFromUnicodeToUTF16 = function(text, entities) {
twttr.txt.convertUnicodeIndices(text, entities, false);
};

0 comments on commit dc57a0d

Please sign in to comment.