Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Add option 'countSupplementaryCharacterAsOne' in extract*().

  • Loading branch information...
commit 23b8dfd0a83c301379c1a3bf8221ce25914efb12 1 parent 430a085
Keita Fujii keitaf authored
Showing with 59 additions and 7 deletions.
  1. +10 −0 test/tests.js
  2. +49 −7 twitter-text.js
10 test/tests.js
View
@@ -33,6 +33,12 @@ test("twttr.txt.splitTags", function() {
}
});
+test("twttr.txt.extract", function() {
+ same(twttr.txt.extractHashtagsWithIndices("\uD801\uDC00 #hashtag"), [{hashtag:"hashtag", indices:[2, 10]}], "Hashtag w/ Supplementary character");
+ same(twttr.txt.extractMentionsOrListsWithIndices("\uD801\uDC00 @mention"), [{screenName:"mention", listSlug:"", indices:[2, 10]}], "Mention w/ Supplementary character");
+ same(twttr.txt.extractUrlsWithIndices("\uD801\uDC00 http://twitter.com"), [{url:"http://twitter.com", indices:[2, 20]}], "Hashtag w/ Supplementary character");
+});
+
test("twttr.txt.autolink", function() {
// Username Overrides
ok(twttr.txt.autoLink("@tw", { before: "!" }).match(/!@<a[^>]+>tw<\/a>/), "Override before");
@@ -77,4 +83,8 @@ test("twttr.txt.autolink", function() {
for (i = 0; i < invalidChars.length; i++) {
equal(twttr.txt.extractUrls("http://twitt" + invalidChars[i] + "er.com").length, 0, 'Should not extract URL with invalid cahracter');
}
+
+ same(twttr.txt.autoLink("\uD801\uDC00 #hashtag \uD801\uDC00 @mention \uD801\uDC00 http://twitter.com"),
+ "\uD801\uDC00 <a href=\"http://twitter.com/#!/search?q=%23hashtag\" title=\"#hashtag\" class=\"tweet-url hashtag\" rel=\"nofollow\">#hashtag</a> \uD801\uDC00 @<a class=\"tweet-url username\" data-screen-name=\"mention\" href=\"http://twitter.com/mention\" rel=\"nofollow\">mention</a> \uD801\uDC00 <a href=\"http://twitter.com\" rel=\"nofollow\" >http://twitter.com</a>",
+ "Autolink hashtag/mentionURL w/ Supplementary character");
});
56 twitter-text.js
View
@@ -440,7 +440,7 @@ if (!window.twttr) {
};
twttr.txt.autoLink = function(text, options) {
- var entities = twttr.txt.extractEntitiesWithIndices(text, {extractUrlWithoutProtocol: false});
+ var entities = twttr.txt.extractEntitiesWithIndices(text, {extractUrlWithoutProtocol: false, countSupplementaryCharacterAsOne: false});
return twttr.txt.autoLinkEntities(text, entities, options);
};
@@ -461,8 +461,8 @@ if (!window.twttr) {
twttr.txt.extractEntitiesWithIndices = function(text, options) {
var entities = twttr.txt.extractUrlsWithIndices(text, options)
- .concat(twttr.txt.extractMentionsOrListsWithIndices(text))
- .concat(twttr.txt.extractHashtagsWithIndices(text));
+ .concat(twttr.txt.extractMentionsOrListsWithIndices(text, options))
+ .concat(twttr.txt.extractHashtagsWithIndices(text, options));
if (entities.length == 0) {
return [];
@@ -495,7 +495,11 @@ if (!window.twttr) {
return screenNamesOnly;
};
- twttr.txt.extractMentionsWithIndices = function(text) {
+ twttr.txt.extractMentionsWithIndices = function(text, options) {
+ if (!options) {
+ options = {countSupplementaryCharacterAsOne: true};
+ }
+
var mentions = [];
var mentionsOrLists = twttr.txt.extractMentionsOrListsWithIndices(text);
@@ -509,6 +513,9 @@ if (!window.twttr) {
}
}
+ if (options.countSupplementaryCharacterAsOne) {
+ twttr.txt.adjustIndices(text, mentions, -1);
+ }
return mentions;
};
@@ -516,11 +523,15 @@ if (!window.twttr) {
* Extract list or user mentions.
* (Presence of listSlug indicates a list)
*/
- twttr.txt.extractMentionsOrListsWithIndices = function(text) {
+ twttr.txt.extractMentionsOrListsWithIndices = function(text, options) {
if (!text || !text.match(twttr.txt.regexen.atSign)) {
return [];
}
+ if (!options) {
+ options = {countSupplementaryCharacterAsOne: true};
+ }
+
var possibleNames = [],
position = 0;
@@ -538,6 +549,9 @@ if (!window.twttr) {
}
});
+ if (options.countSupplementaryCharacterAsOne) {
+ twttr.txt.adjustIndices(text, possibleNames, -1);
+ }
return possibleNames;
};
@@ -569,7 +583,7 @@ if (!window.twttr) {
twttr.txt.extractUrlsWithIndices = function(text, options) {
if (!options) {
- options = {extractUrlsWithoutProtocol: true};
+ options = {extractUrlsWithoutProtocol: true, countSupplementaryCharacterAsOne: true};
}
if (!text || (options.extractUrlsWithoutProtocol ? !text.match(/\./) : !text.match(/:/))) {
@@ -632,6 +646,10 @@ if (!window.twttr) {
}
});
+ if (options.countSupplementaryCharacterAsOne) {
+ twttr.txt.adjustIndices(text, urls, -1);
+ }
+
return urls;
};
@@ -646,11 +664,15 @@ if (!window.twttr) {
return hashtagsOnly;
};
- twttr.txt.extractHashtagsWithIndices = function(text) {
+ twttr.txt.extractHashtagsWithIndices = function(text, options) {
if (!text || !text.match(twttr.txt.regexen.hashSigns)) {
return [];
}
+ if (!options) {
+ options = {countSupplementaryCharacterAsOne: true};
+ }
+
var tags = [],
position = 0;
@@ -666,9 +688,29 @@ if (!window.twttr) {
});
});
+ if (options.countSupplementaryCharacterAsOne) {
+ twttr.txt.adjustIndices(text, tags, -1);
+ }
+
return tags;
};
+ twttr.txt.adjustIndices = function(text, entities, diff) {
+ for (var i = 0; i < text.length - 1; i++) {
+ var c1 = text.charCodeAt(i);
+ var c2 = text.charCodeAt(i + 1);
+ if (0xD800 <= c1 && c1 <= 0xDBFF && 0xDC00 <= c2 && c2 <= 0xDFFF) {
+ // supplementary character
+ for (var j = 0; j < entities.length; j++) {
+ if (entities[j].indices[0] >= i) {
+ entities[j].indices[0] += diff;
+ entities[j].indices[1] += diff;
+ }
+ }
+ }
+ }
+ }
+
// this essentially does text.split(/<|>/)
// except that won't work in IE, where empty strings are ommitted
// so "<>".split(/<|>/) => [] in IE, but is ["", "", ""] in all others
Please sign in to comment.
Something went wrong with that request. Please try again.