Skip to content

Commit

Permalink
Merge pull request #782 from etcimon/appender-fix
Browse files Browse the repository at this point in the history
Json serialization doesn't support UTF-8 Encoding & UTF-16 surrogate pairs
  • Loading branch information
s-ludwig committed Aug 14, 2014
2 parents 73884b4 + 1f2a332 commit df80d24
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 14 deletions.
2 changes: 1 addition & 1 deletion examples/json/source/app.d
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import vibe.data.json;
import vibe.data.json;

import std.stdio;

Expand Down
89 changes: 76 additions & 13 deletions source/vibe/data/json.d
Original file line number Diff line number Diff line change
Expand Up @@ -904,6 +904,8 @@ unittest {
assert(parseJsonString("[1, 2, 3]") == Json([Json(1), Json(2), Json(3)]));
assert(parseJsonString("{\"a\": 1}") == Json(["a": Json(1)]));
assert(parseJsonString(`"\\\/\b\f\n\r\t\u1234"`).get!string == "\\/\b\f\n\r\t\u1234");
auto json = parseJsonString(`{"hey": "This is @à test éhééhhéhéé !%/??*&?\ud83d\udcec"}`);
assert(json.toPrettyString() == parseJsonString(json.toPrettyString()).toPrettyString());
}


Expand Down Expand Up @@ -1691,15 +1693,54 @@ void writePrettyJsonString(R)(ref R dst, in Json json, int level = 0)
/// private
private void jsonEscape(R)(ref R dst, string s)
{
foreach( ch; s ){
size_t pos;
for ( pos = 0; pos < s.length; pos++ ){
immutable(char) ch = s[pos];

switch(ch){
default: dst.put(ch); break;
default:
if (ch < 0x80)
{
dst.put(ch);
}
else {
import std.utf : decode;
char[13] buf;
int len;
dchar codepoint = decode(s, pos);
import std.c.stdio : sprintf;
/* codepoint is in BMP */
if(codepoint < 0x10000)
{
sprintf(&buf[0], "\\u%04X", codepoint);
len = 6;
}
/* not in BMP -> construct a UTF-16 surrogate pair */
else
{
int first, last;

codepoint -= 0x10000;
first = 0xD800 | ((codepoint & 0xffc00) >> 10);
last = 0xDC00 | (codepoint & 0x003ff);

sprintf(&buf[0], "\\u%04X\\u%04X", first, last);
len = 12;
}

pos -= 1;
foreach (i; 0 .. len)
dst.put(buf[i]);

}
break;
case '\\': dst.put("\\\\"); break;
case '\r': dst.put("\\r"); break;
case '\n': dst.put("\\n"); break;
case '\t': dst.put("\\t"); break;
case '\"': dst.put("\\\""); break;
}

}
}

Expand All @@ -1725,17 +1766,39 @@ private string jsonUnescape(R)(ref R range)
case 'r': ret.put('\r'); range.popFront(); break;
case 't': ret.put('\t'); range.popFront(); break;
case 'u':
range.popFront();
dchar uch = 0;
foreach( i; 0 .. 4 ){
uch *= 16;
enforceJson(!range.empty, "Unicode sequence must be '\\uXXXX'.");
auto dc = range.front;

dchar decode_unicode_escape() {
enforceJson(range.front == 'u');
range.popFront();
if( dc >= '0' && dc <= '9' ) uch += dc - '0';
else if( dc >= 'a' && dc <= 'f' ) uch += dc - 'a' + 10;
else if( dc >= 'A' && dc <= 'F' ) uch += dc - 'A' + 10;
else enforceJson(false, "Unicode sequence must be '\\uXXXX'.");
dchar uch = 0;
foreach( i; 0 .. 4 ){
uch *= 16;
enforceJson(!range.empty, "Unicode sequence must be '\\uXXXX'.");
auto dc = range.front;
range.popFront();

if( dc >= '0' && dc <= '9' ) uch += dc - '0';
else if( dc >= 'a' && dc <= 'f' ) uch += dc - 'a' + 10;
else if( dc >= 'A' && dc <= 'F' ) uch += dc - 'A' + 10;
else enforceJson(false, "Unicode sequence must be '\\uXXXX'.");
}
return uch;
}

auto uch = decode_unicode_escape();

if(0xD800 <= uch && uch <= 0xDBFF) {
/* surrogate pair */
range.popFront(); // backslash '\'
auto uch2 = decode_unicode_escape();
enforceJson(0xDC00 <= uch2 && uch2 <= 0xDFFF, "invalid Unicode");
{
/* valid second surrogate */
uch =
((uch - 0xD800) << 10) +
(uch2 - 0xDC00) +
0x10000;
}
}
ret.put(uch);
break;
Expand Down Expand Up @@ -1831,4 +1894,4 @@ private void enforceJson(string file = __FILE__, size_t line = __LINE__)(bool co
{
static if (__VERSION__ >= 2065) enforceEx!JSONException(cond, message, file, line);
else if (!cond) throw new JSONException(message);
}
}

0 comments on commit df80d24

Please sign in to comment.