Skip to content

Commit

Permalink
Consolidated fixes to character encoders/decoders (#11707)
Browse files Browse the repository at this point in the history
* Fix EUC-JP reference decoder

The relevant step fixed by this pull request says "Return error."; thus, the rest of the process should continue with the next iteration, rather than run the rest of the handler for the given byte.

* Fix ISO-2022-JP reference implementation

The relevant step in the Encoding Standard says "Prepend _lead_ and _byte_ to _stream_." However, the two bytes are prepended in the wrong order in the reference implementation.  Note that under the Encoding Standard, "[w]hen one or more tokens are prepended to a stream, those tokens must be inserted, _in given order_, before the first token in the stream."  (Note that the code, at the time of this request, moves _lead_ to the front of the array, then moves _byte_ to the front of the array.)

There may be other issues like this elsewhere in the multiple-byte encoder reference implementations.

* Fix bug in EUC-KR reference implementation

Makes one "if" statement conditional rather than unconditional

* Fix bug in Shift_JIS encoder

* Fix bug in Big5 reference implementation

* Fix Shift_JIS reference decoder

To conform to the most recent Encoding Standard.
  • Loading branch information
peteroupc authored and Hexcles committed Jul 24, 2018
1 parent bb001a9 commit 0589266
Show file tree
Hide file tree
Showing 6 changed files with 19 additions and 18 deletions.
4 changes: 3 additions & 1 deletion encoding/legacy-mb-japanese/euc-jp/eucjp-decoder.js
Expand Up @@ -16,8 +16,9 @@ function eucjpDecoder(stream) {
stream = stream.replace(/%/g, " ");
stream = stream.replace(/[\s]+/g, " ").trim();
var bytes = stream.split(" ");
for (i = 0; i < bytes.length; i++) bytes[i] = parseInt(bytes[i], 16);
for (var i = 0; i < bytes.length; i++) bytes[i] = parseInt(bytes[i], 16);
var out = "";

var lead, byte, offset, ptr, cp;
var jis0212flag = false;
var eucjpLead = 0x00;
Expand Down Expand Up @@ -68,6 +69,7 @@ function eucjpDecoder(stream) {
}
if (byte >= 0x00 && byte <= 0x7f) bytes.unshift(byte);
out += "�";
continue;
}
if (byte >= 0x00 && byte <= 0x7f) {
out += dec2char(byte);
Expand Down
6 changes: 3 additions & 3 deletions encoding/legacy-mb-japanese/iso-2022-jp/iso2022jp-decoder.js
Expand Up @@ -25,7 +25,7 @@ function iso2022jpDecoder(stream) {
stream = stream.replace(/%/g, " ");
stream = stream.replace(/[\s]+/g, " ").trim();
var bytes = stream.split(" ");
for (i = 0; i < bytes.length; i++) bytes[i] = parseInt(bytes[i], 16);
for (var i = 0; i < bytes.length; i++) bytes[i] = parseInt(bytes[i], 16);
var endofstream = 2000000;
//bytes.push(endofstream)
var out = "";
Expand Down Expand Up @@ -193,8 +193,8 @@ function iso2022jpDecoder(stream) {
continue;
}
}
bytes.unshift(lead);
bytes.unshift(byte);
// Prepend the sequence (lead, byte) to the stream
bytes.unshift(lead, byte);
outFlag = false;
decState = outState;
out += "�";
Expand Down
4 changes: 2 additions & 2 deletions encoding/legacy-mb-japanese/shift_jis/sjis-decoder.js
Expand Up @@ -16,7 +16,7 @@ function sjisDecoder(stream) {
stream = stream.replace(/%/g, " ");
stream = stream.replace(/[\s]+/g, " ").trim();
var bytes = stream.split(" ");
for (i = 0; i < bytes.length; i++) bytes[i] = parseInt(bytes[i], 16);
for (var i = 0; i < bytes.length; i++) bytes[i] = parseInt(bytes[i], 16);
var out = "";
var lead, byte, leadoffset, offset, ptr, cp;
var sjisLead = 0x00;
Expand Down Expand Up @@ -46,7 +46,7 @@ function sjisDecoder(stream) {
else leadoffset = 0xc1;
if ((byte >= 0x40 && byte <= 0x7e) || (byte >= 0x80 && byte <= 0xfc))
ptr = (lead - leadoffset) * 188 + byte - offset;
if (cp == null && ptr >= 8836 && ptr <= 10528) {
if (ptr != null && ptr >= 8836 && ptr <= 10715) {
out += dec2char(0xe000 + ptr - 8836);
continue;
}
Expand Down
4 changes: 2 additions & 2 deletions encoding/legacy-mb-japanese/shift_jis/sjis-encoder.js
Expand Up @@ -50,7 +50,7 @@ function sjisEncoder(stream) {
var cp;
var finished = false;
var endofstream = 2000000;

var temp, offset, leadoffset, first, second;
while (!finished) {
if (cps.length == 0) cp = endofstream;
else cp = cps.shift();
Expand All @@ -73,7 +73,7 @@ function sjisEncoder(stream) {
}
if (cp >= 0xff61 && cp <= 0xff9f) {
temp = cp - 0xff61 + 0xa1;
out += temp.toString(16).toUpperCase();
out += " " + temp.toString(16).toUpperCase();
continue;
}
if (cp == 0x2212) {
Expand Down
4 changes: 2 additions & 2 deletions encoding/legacy-mb-korean/euc-kr/euckr-decoder.js
Expand Up @@ -16,7 +16,7 @@ function euckrDecoder(stream) {
stream = stream.replace(/%/g, " ");
stream = stream.replace(/[\s]+/g, " ").trim();
var bytes = stream.split(" ");
for (i = 0; i < bytes.length; i++) bytes[i] = parseInt(bytes[i], 16);
for (var i = 0; i < bytes.length; i++) bytes[i] = parseInt(bytes[i], 16);
var out = "";
var lead, byte, offset, ptr, cp;
var euckrLead = 0x00;
Expand All @@ -41,7 +41,7 @@ function euckrDecoder(stream) {
lead = euckrLead;
ptr = null;
euckrLead = 0x00;
if (byte >= 0x41 || byte <= 0xfe)
if (byte >= 0x41 && byte <= 0xfe)
ptr = (lead - 0x81) * 190 + (byte - 0x41);
if (ptr == null) cp = null;
else cp = euckr[ptr];
Expand Down
15 changes: 7 additions & 8 deletions encoding/legacy-mb-tchinese/big5/big5-decoder.js
Expand Up @@ -16,7 +16,7 @@ function big5Decoder(stream) {
stream = stream.replace(/%/g, " ");
stream = stream.replace(/[\s]+/g, " ").trim();
var bytes = stream.split(" ");
for (i = 0; i < bytes.length; i++) bytes[i] = parseInt(bytes[i], 16);
for (var i = 0; i < bytes.length; i++) bytes[i] = parseInt(bytes[i], 16);
var out = "";
var lead, byte, offset, ptr, cp;
var big5lead = 0x00;
Expand Down Expand Up @@ -47,24 +47,23 @@ function big5Decoder(stream) {
ptr = (lead - 0x81) * 157 + (byte - offset);
// "If there is a row in the table below whose first column is pointer, return the two code points listed in its second column"
switch (ptr) {
case "1133":
case 1133:
out += "Ê̄";
continue;
case "1135":
case 1135:
out += "Ê̌";
continue;
case "1164":
case 1164:
out += "ê̄";
continue;
case "1166":
case 1166:
out += "ê̌";
continue;
}
if (ptr == null) cp = null;
else cp = big5[ptr];
if (cp == null && byte >= 0x00 && byte < 0x7f) {
if (cp == null && byte >= 0x00 && byte <= 0x7f) {
bytes.unshift(byte);
continue;
}
if (cp == null) {
out += "�";
Expand All @@ -73,7 +72,7 @@ function big5Decoder(stream) {
out += dec2char(cp);
continue;
}
if (byte >= 0x00 && byte < 0x7f) {
if (byte >= 0x00 && byte <= 0x7f) {
out += dec2char(byte);
continue;
}
Expand Down

0 comments on commit 0589266

Please sign in to comment.