Skip to content

Commit 514a33f

Browse files
committedJun 16, 2019
use TextEncoder to encode string if available
1 parent 1ad2e68 commit 514a33f

File tree

6 files changed

+71
-14
lines changed

6 files changed

+71
-14
lines changed
 

‎benchmark/decode-string.ts

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/* eslint-disable no-console */
2-
import { utf8Encode, utf8Count, utf8DecodeJs, utf8DecodeTD } from "../src/utils/utf8";
2+
import { utf8EncodeJs, utf8Count, utf8DecodeJs, utf8DecodeTD } from "../src/utils/utf8";
33
import { utf8DecodeWasm } from "../src/wasmFunctions";
44

55
// @ts-ignore
@@ -13,7 +13,7 @@ for (const baseStr of ["A", "あ", "🌏"]) {
1313
for (const str of dataSet) {
1414
const byteLength = utf8Count(str);
1515
const bytes = new Uint8Array(new ArrayBuffer(byteLength));
16-
utf8Encode(str, bytes, 0);
16+
utf8EncodeJs(str, bytes, 0);
1717

1818
console.log(`\n## string "${baseStr}" x ${str.length} (byteLength=${byteLength})\n`);
1919

‎benchmark/encode-string.ts

+33
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
/* eslint-disable no-console */
2+
import { utf8EncodeJs, utf8Count, utf8EncodeTE } from "../src/utils/utf8";
3+
4+
// @ts-ignore
5+
import Benchmark from "benchmark";
6+
7+
for (const baseStr of ["A", "あ", "🌏"]) {
8+
const dataSet = [10, 100, 200, 1_000, 10_000, 100_000].map((n) => {
9+
return baseStr.repeat(n);
10+
});
11+
12+
for (const str of dataSet) {
13+
const byteLength = utf8Count(str);
14+
const buffer = new Uint8Array(byteLength);
15+
16+
console.log(`\n## string "${baseStr}" x ${str.length} (byteLength=${byteLength})\n`);
17+
18+
const suite = new Benchmark.Suite();
19+
20+
suite.add("utf8EncodeJs", () => {
21+
utf8EncodeJs(str, buffer, 0);
22+
});
23+
24+
suite.add("utf8DecodeTE", () => {
25+
utf8EncodeTE(str, buffer, 0);
26+
});
27+
suite.on("cycle", (event: any) => {
28+
console.log(String(event.target));
29+
});
30+
31+
suite.run();
32+
}
33+
}

‎package.json

+4-4
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,15 @@
1414
"prepublishOnly": "run-p 'test:dist:*' && npm run test:browser",
1515
"clean": "rimraf build dist dist.*",
1616
"test": "mocha 'test/**/*.test.ts'",
17-
"test:purejs": "TEXT_DECODER=never MSGPACK_WASM=never mocha 'test/**/*.test.ts'",
18-
"test:wasm": "npm run asbuild:production && TEXT_DECODER=never MSGPACK_WASM=force mocha 'test/**/*.test.ts'",
19-
"test:td": "TEXT_DECODER=force mocha 'test/**/*.test.ts'",
17+
"test:purejs": "TEXT_ENCODING=never MSGPACK_WASM=never mocha 'test/**/*.test.ts'",
18+
"test:wasm": "npm run asbuild:production && TEXT_ENCODING=never MSGPACK_WASM=force mocha 'test/**/*.test.ts'",
19+
"test:te": "TEXT_ENCODING=force mocha 'test/**/*.test.ts'",
2020
"test:dist:purejs": "TS_NODE_PROJECT=tsconfig.test-dist-es5-purejs.json npm run test:purejs -- --reporter=dot",
2121
"test:dist:wasm": "TS_NODE_PROJECT=tsconfig.test-dist-es5-wasm.json npm run test:wasm -- --reporter=dot",
2222
"test:cover": "npm run cover:clean && npm-run-all 'test:cover:*' && npm run cover:report",
2323
"test:cover:purejs": "npx nyc --no-clean npm run test:purejs",
2424
"test:cover:wasm": "npx nyc --no-clean npm run test:wasm",
25-
"test:cover:td": "npx nyc --no-clean npm run test:td",
25+
"test:cover:te": "npx nyc --no-clean npm run test:te",
2626
"cover:clean": "rimraf .nyc_output coverage/",
2727
"cover:report": "nyc report --reporter=text-summary --reporter=html --reporter=json",
2828
"test:browser": "karma start --single-run",

‎src/Decoder.ts

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import { prettyByte } from "./utils/prettyByte";
22
import { ExtensionCodec } from "./ExtensionCodec";
33
import { getInt64, getUint64 } from "./utils/int";
4-
import { utf8DecodeJs, TEXT_DECODER_AVAILABLE, TEXT_DECODER_THRESHOLD, utf8DecodeTD } from "./utils/utf8";
4+
import { utf8DecodeJs, TEXT_ENCODING_AVAILABLE, TEXT_DECODER_THRESHOLD, utf8DecodeTD } from "./utils/utf8";
55
import { createDataView, ensureUint8Array } from "./utils/typedArrays";
66
import { WASM_AVAILABLE, WASM_STR_THRESHOLD, utf8DecodeWasm } from "./wasmFunctions";
77

@@ -482,7 +482,7 @@ export class Decoder {
482482

483483
const offset = this.pos + headerOffset;
484484
let object: string;
485-
if (TEXT_DECODER_AVAILABLE && byteLength > TEXT_DECODER_THRESHOLD) {
485+
if (TEXT_ENCODING_AVAILABLE && byteLength > TEXT_DECODER_THRESHOLD) {
486486
object = utf8DecodeTD(this.bytes, offset, byteLength);
487487
} else if (WASM_AVAILABLE && byteLength > WASM_STR_THRESHOLD) {
488488
object = utf8DecodeWasm(this.bytes, offset, byteLength);

‎src/Encoder.ts

+9-3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { utf8Encode, utf8Count } from "./utils/utf8";
1+
import { utf8EncodeJs, utf8Count, TEXT_ENCODING_AVAILABLE, TEXT_ENCODER_THRESHOLD, utf8EncodeTE } from "./utils/utf8";
22
import { ExtensionCodec } from "./ExtensionCodec";
33
import { setInt64, setUint64 } from "./utils/int";
44
import { ensureUint8Array } from "./utils/typedArrays";
@@ -148,7 +148,13 @@ export class Encoder {
148148
const maxHeaderSize = 1 + 4;
149149
const strLength = object.length;
150150

151-
if (WASM_AVAILABLE && strLength > WASM_STR_THRESHOLD) {
151+
if (TEXT_ENCODING_AVAILABLE && strLength > TEXT_ENCODER_THRESHOLD) {
152+
const byteLength = utf8Count(object);
153+
this.ensureBufferSizeToWrite(maxHeaderSize + byteLength);
154+
this.writeStringHeader(byteLength);
155+
utf8EncodeTE(object, this.bytes, this.pos);
156+
this.pos += byteLength;
157+
} else if (WASM_AVAILABLE && strLength > WASM_STR_THRESHOLD) {
152158
// ensure max possible size
153159
const maxSize = maxHeaderSize + strLength * 4;
154160
this.ensureBufferSizeToWrite(maxSize);
@@ -161,7 +167,7 @@ export class Encoder {
161167
const byteLength = utf8Count(object);
162168
this.ensureBufferSizeToWrite(maxHeaderSize + byteLength);
163169
this.writeStringHeader(byteLength);
164-
utf8Encode(object, this.bytes, this.pos);
170+
utf8EncodeJs(object, this.bytes, this.pos);
165171
this.pos += byteLength;
166172
}
167173
}

‎src/utils/utf8.ts

+21-3
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
export const TEXT_ENCODING_AVAILABLE =
2+
process.env.TEXT_ENCODING !== "never" && (typeof TextEncoder !== "undefined" && typeof TextDecoder !== "undefined");
3+
14
export function utf8Count(str: string): number {
25
const strLength = str.length;
36

@@ -38,7 +41,7 @@ export function utf8Count(str: string): number {
3841
return byteLength;
3942
}
4043

41-
export function utf8Encode(str: string, output: Uint8Array, outputOffset: number): void {
44+
export function utf8EncodeJs(str: string, output: Uint8Array, outputOffset: number): void {
4245
const strLength = str.length;
4346
let offset = outputOffset;
4447
let pos = 0;
@@ -81,6 +84,22 @@ export function utf8Encode(str: string, output: Uint8Array, outputOffset: number
8184
}
8285
}
8386

87+
const sharedTextEncoder = TEXT_ENCODING_AVAILABLE ? new TextEncoder() : undefined;
88+
export const TEXT_ENCODER_THRESHOLD = process.env.TEXT_ENCODING !== "force" ? 200 : 0;
89+
90+
function utf8EncodeTEencode(str: string, output: Uint8Array, outputOffset: number): void {
91+
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
92+
output.set(sharedTextEncoder!.encode(str), outputOffset);
93+
}
94+
95+
function utf8EncodeTEencodeInto(str: string, output: Uint8Array, outputOffset: number): void {
96+
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
97+
sharedTextEncoder!.encodeInto(str, output.subarray(outputOffset));
98+
}
99+
100+
export const utf8EncodeTE =
101+
sharedTextEncoder && sharedTextEncoder.encodeInto ? utf8EncodeTEencodeInto : utf8EncodeTEencode;
102+
84103
const CHUNK_SIZE = 0x10_000;
85104

86105
export function utf8DecodeJs(bytes: Uint8Array, inputOffset: number, byteLength: number): string {
@@ -132,8 +151,7 @@ export function utf8DecodeJs(bytes: Uint8Array, inputOffset: number, byteLength:
132151
return result;
133152
}
134153

135-
const sharedTextDecoder = typeof TextDecoder !== "undefined" ? new TextDecoder() : null;
136-
export const TEXT_DECODER_AVAILABLE = process.env.TEXT_DECODER !== "never" && !!sharedTextDecoder;
154+
const sharedTextDecoder = TEXT_ENCODING_AVAILABLE ? new TextDecoder() : null;
137155
export const TEXT_DECODER_THRESHOLD = process.env.TEXT_DECODER !== "force" ? 200 : 0;
138156

139157
export function utf8DecodeTD(bytes: Uint8Array, inputOffset: number, byteLength: number): string {

0 commit comments

Comments
 (0)
Failed to load comments.