Skip to content
This repository has been archived by the owner on Sep 18, 2021. It is now read-only.

Commit

Permalink
Merge pull request #59 from twitter/cashtag
Browse files Browse the repository at this point in the history
Add support for extracting/auto-linking cashtags
  • Loading branch information
Keita Fujii committed May 31, 2012
2 parents c469e31 + 20f4f2f commit dc57a0d
Show file tree
Hide file tree
Showing 2 changed files with 80 additions and 4 deletions.
12 changes: 12 additions & 0 deletions test/conformance.html
Expand Up @@ -31,6 +31,10 @@
return function(test) {
return twttr.txt.autoLinkUsernamesOrLists(test.text, {suppressNoFollow: true, suppressDataScreenName: true});
};
case "cashtags":
return function(test) {
return twttr.txt.autoLinkCashtags(test.text, {suppressNoFollow: true, suppressDataScreenName: true});
};
case "all":
return function(test) {
return twttr.txt.autoLink(test.text, {suppressNoFollow: true, suppressDataScreenName: true});
Expand Down Expand Up @@ -86,6 +90,14 @@
return function(test) {
return twttr.txt.extractHashtagsWithIndices(test.text);
};
case "cashtags":
return function(test) {
return twttr.txt.extractCashtags(test.text);
};
case "cashtags_with_indices":
return function(test) {
return twttr.txt.extractCashtagsWithIndices(test.text);
};
}
case "hit_highlighting":
return function(test) {
Expand Down
72 changes: 68 additions & 4 deletions twitter-text.js
Expand Up @@ -276,6 +276,10 @@ if (typeof twttr === "undefined" || twttr === null) {

twttr.txt.regexen.validTcoUrl = /^https?:\/\/t\.co\/[a-z0-9]+/i;

// cashtag related regex
twttr.txt.regexen.cashtag = /[a-z]{1,6}(?:[._][a-z]{1,2})?/i;
twttr.txt.regexen.validCashtag = regexSupplant('(?:^|#{spaces})\\$(#{cashtag})(?=$|\\s|[#{punct}])', 'gi');

// These URL validation pattern strings are based on the ABNF from RFC 3986
twttr.txt.regexen.validateUrlUnreserved = /[a-z0-9\-._~]/i;
twttr.txt.regexen.validateUrlPctEncoded = /(?:%[0-9a-f]{2})/i;
Expand Down Expand Up @@ -372,11 +376,13 @@ if (typeof twttr === "undefined" || twttr === null) {
var DEFAULT_USERNAME_CLASS = "username";
// Default CSS class for auto-linked hashtags (along with the url class)
var DEFAULT_HASHTAG_CLASS = "hashtag";
// Default CSS class for auto-linked cashtags (along with the url class)
var DEFAULT_CASHTAG_CLASS = "cashtag";
// HTML attribute for robot nofollow behavior (default)
var HTML_ATTR_NO_FOLLOW = " rel=\"nofollow\"";
// Options which should not be passed as HTML attributes
var OPTIONS_NOT_ATTRIBUTES = {'urlClass':true, 'listClass':true, 'usernameClass':true, 'hashtagClass':true,
'usernameUrlBase':true, 'listUrlBase':true, 'hashtagUrlBase':true,
var OPTIONS_NOT_ATTRIBUTES = {'urlClass':true, 'listClass':true, 'usernameClass':true, 'hashtagClass':true, 'cashtagClass':true,
'usernameUrlBase':true, 'listUrlBase':true, 'hashtagUrlBase':true, 'cashtagUrlBase':true,
'usernameUrlBlock':true, 'listUrlBlock':true, 'hashtagUrlBlock':true, 'linkUrlBlock':true,
'usernameIncludeSymbol':true, 'suppressLists':true, 'suppressNoFollow':true,
'suppressDataScreenName':true, 'urlEntities':true, 'before':true
Expand Down Expand Up @@ -412,6 +418,22 @@ if (typeof twttr === "undefined" || twttr === null) {
return stringSupplant("#{before}<a href=\"#{hashtagUrlBase}#{text}\" title=\"##{text}\" class=\"#{urlClass} #{hashtagClass}\"#{extraHtml}>#{hash}#{preText}#{text}#{postText}</a>", d);
};

twttr.txt.linkToCashtag = function(entity, text, options) {
var d = {
preText: "",
text: twttr.txt.htmlEscape(entity.cashtag),
postText: "",
extraHtml: options.suppressNoFollow ? "" : HTML_ATTR_NO_FOLLOW
};
for (var k in options) {
if (options.hasOwnProperty(k)) {
d[k] = options[k];
}
}

return stringSupplant("#{before}<a href=\"#{cashtagUrlBase}#{text}\" title=\"$#{text}\" class=\"#{urlClass} #{cashtagClass}\"#{extraHtml}>$#{preText}#{text}#{postText}</a>", d);
};

twttr.txt.linkToMentionAndList = function(entity, text, options) {
var at = text.substring(entity.indices[0], entity.indices[0] + 1);
var d = {
Expand Down Expand Up @@ -548,6 +570,8 @@ if (typeof twttr === "undefined" || twttr === null) {
options.urlClass = options.urlClass || DEFAULT_URL_CLASS;
options.hashtagClass = options.hashtagClass || DEFAULT_HASHTAG_CLASS;
options.hashtagUrlBase = options.hashtagUrlBase || "https://twitter.com/#!/search?q=%23";
options.cashtagClass = options.cashtagClass || DEFAULT_CASHTAG_CLASS;
options.cashtagUrlBase = options.cashtagUrlBase || "https://twitter.com/#!/search?q=%24";
options.listClass = options.listClass || DEFAULT_LIST_CLASS;
options.usernameClass = options.usernameClass || DEFAULT_USERNAME_CLASS;
options.usernameUrlBase = options.usernameUrlBase || "https://twitter.com/";
Expand Down Expand Up @@ -580,8 +604,10 @@ if (typeof twttr === "undefined" || twttr === null) {
result += twttr.txt.linkToUrl(entity, text, options);
} else if (entity.hashtag) {
result += twttr.txt.linkToHashtag(entity, text, options);
} else if(entity.screenName) {
} else if (entity.screenName) {
result += twttr.txt.linkToMentionAndList(entity, text, options);
} else if (entity.cashtag) {
result += twttr.txt.linkToCashtag(entity, text, options);
}
beginIndex = entity.indices[1];
}
Expand Down Expand Up @@ -641,6 +667,11 @@ if (typeof twttr === "undefined" || twttr === null) {
return twttr.txt.autoLinkEntities(text, entities, options);
};

twttr.txt.autoLinkCashtags = function(text, options) {
var entities = twttr.txt.extractCashtagsWithIndices(text);
return twttr.txt.autoLinkEntities(text, entities, options);
};

twttr.txt.autoLinkUrlsCustom = function(text, options) {
var entities = twttr.txt.extractUrlsWithIndices(text, {extractUrlWithoutProtocol: false});
return twttr.txt.autoLinkEntities(text, entities, options);
Expand All @@ -663,7 +694,8 @@ if (typeof twttr === "undefined" || twttr === null) {
twttr.txt.extractEntitiesWithIndices = function(text, options) {
var entities = twttr.txt.extractUrlsWithIndices(text, options)
.concat(twttr.txt.extractMentionsOrListsWithIndices(text))
.concat(twttr.txt.extractHashtagsWithIndices(text, {checkUrlOverlap: false}));
.concat(twttr.txt.extractHashtagsWithIndices(text, {checkUrlOverlap: false}))
.concat(twttr.txt.extractCashtagsWithIndices(text));

if (entities.length == 0) {
return [];
Expand Down Expand Up @@ -880,6 +912,38 @@ if (typeof twttr === "undefined" || twttr === null) {
return tags;
};

twttr.txt.extractCashtags = function(text) {
var cashtagsOnly = [],
cashtagsWithIndices = twttr.txt.extractCashtagsWithIndices(text);

for (var i = 0; i < cashtagsWithIndices.length; i++) {
cashtagsOnly.push(cashtagsWithIndices[i].cashtag);
}

return cashtagsOnly;
};

twttr.txt.extractCashtagsWithIndices = function(text) {
if (!text || text.indexOf("$") == -1) {
return [];
}

var tags = [],
position = 0;

text.replace(twttr.txt.regexen.validCashtag, function(match, cashtag, offset, chunk) {
// cashtag doesn't contain $ sign, so need to decrement index by 1.
var startPosition = text.indexOf(cashtag, position) - 1;
position = startPosition + cashtag.length + 1;
tags.push({
cashtag: cashtag,
indices: [startPosition, position]
});
});

return tags;
};

twttr.txt.modifyIndicesFromUnicodeToUTF16 = function(text, entities) {
twttr.txt.convertUnicodeIndices(text, entities, false);
};
Expand Down

0 comments on commit dc57a0d

Please sign in to comment.