From a65f65fa7b577911084bbe8b3bbe0375b40ef24e Mon Sep 17 00:00:00 2001 From: keita Date: Tue, 14 Feb 2012 16:37:24 -0800 Subject: [PATCH] Fix a bug in modifyIndicesFromUTF16ToUnicode/modifyIndicesFromUnicodeToUTF16. --- twitter-text.js | 47 +++++++++++++++++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 14 deletions(-) diff --git a/twitter-text.js b/twitter-text.js index 926a243..7780186 100644 --- a/twitter-text.js +++ b/twitter-text.js @@ -767,25 +767,44 @@ if (typeof twttr === "undefined" || twttr === null) { }; twttr.txt.modifyIndicesFromUnicodeToUTF16 = function(text, entities) { - twttr.txt.shiftIndices(text, entities, 1); + twttr.txt.shiftIndices(text, entities, false); }; twttr.txt.modifyIndicesFromUTF16ToUnicode = function(text, entities) { - twttr.txt.shiftIndices(text, entities, -1); + twttr.txt.shiftIndices(text, entities, true); }; - twttr.txt.shiftIndices = function(text, entities, diff) { - for (var i = 0; i < text.length - 1; i++) { - var c1 = text.charCodeAt(i); - var c2 = text.charCodeAt(i + 1); - if (0xD800 <= c1 && c1 <= 0xDBFF && 0xDC00 <= c2 && c2 <= 0xDFFF) { - // supplementary character - i++; // skip surrogate pair character - for (var j = 0; j < entities.length; j++) { - if (entities[j].indices[0] >= i) { - entities[j].indices[0] += diff; - entities[j].indices[1] += diff; - } + twttr.txt.shiftIndices = function(text, entities, indicesInUTF16) { + if (entities.length == 0) { + return; + } + + var charIndex = text.length - 1; + // replace surrogate pairs with whitespace, and then count length + var codePointIndex = text.replace(/[\uD800-\uDBFF][\uDC00-\uDFFF]/g, " ").length - 1; + var entityIndex = entities.length - 1; + var entity = entities[entityIndex]; + + while (charIndex != codePointIndex) { + if (entity.indices[0] == (indicesInUTF16 ? charIndex : codePointIndex)) { + var len = entity.indices[1] - entity.indices[0]; + entity.indices[0] = indicesInUTF16 ? codePointIndex : charIndex; + entity.indices[1] = entity.indices[0] + len; + + if (entityIndex == 0) { + // no more entity + break; + } + entity = entities[--entityIndex]; + } + + codePointIndex--; + charIndex--; + var c = text.charCodeAt(charIndex); + if (0xD800 <= c && c <= 0xDBFF && charIndex < text.length - 1) { + c = text.charCodeAt(charIndex + 1); + if (0xDC00 <= c && c <= 0xDFFF) { + charIndex--; } } }