-
Notifications
You must be signed in to change notification settings - Fork 1.7k
/
Copy pathbenchmarks.dart
358 lines (325 loc) · 11.2 KB
/
benchmarks.dart
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
// Copyright (c) 2023, the Dart project authors. Please see the AUTHORS file
// for details. All rights reserved. Use of this source code is governed by a
// BSD-style license that can be found in the LICENSE file.
//
/// Benchmark for UTF-8 decoding, complex cases.
///
/// This benckmark complements the `Utf8Decode` benchmarks by exploring
/// different scenarios. There are three axes of variation - input complexity,
/// conversion type, and polymorphism. The variantions are represented in the
/// benchmark name, roughly
///
/// Utf8DecodeComplex.<polymorphism>.<conversion>.<data>.<complexity>.10k
///
/// ### Complexity
///
/// The input complexity is explored by having input sequences that are the
/// UTF-8 encoding of (1) ASCII strings ('ascii'), (2) strings that can be
/// represented by one-byte strings ('1byte'), and (3) strings need to be
/// represented by two-byte strings ('2byte').
///
/// Both of of these benchmarks process 10k bytes of input:
///
/// Utf8DecodeComplex.mono.simple.ascii.10k
/// Utf8DecodeComplex.mono.simple.2byte.10k
///
/// The first has ascii bytes as input, the simplest case. The second requires
/// parsing the variable-length UTF-8 code points.
///
/// ### Conversion
///
/// The conversion variations are 'simple', for a one-shot conversion of a List
/// of bytes to a string, and 'chunked' for a conversion that uses the chunked
/// conversion API to process the 10k bytes in chunks of a few hundred
/// bytes. This exercises different paths through the decoder. We would expect
/// the chuncked version to be slower, but only by a few percent.
///
/// ### Data
///
/// The type of the input is part of the benchmark name. When the input is a
/// modifiable `Unit8List`, there is no `.<data>` part to the name. Otherwise:
///
/// .list - Input is a system List (`List.of`)
/// .unmodifiable - Input is an ummodifiable `Uint8List`.
///
/// ### Polymorphism
///
/// Polymorphism is explored by compiling several programs that run different
/// subsets of the benchmarks.
///
/// Whole-program optimizing compilers like AOT or dart2js can sometimes 'see'
/// that the conversion code is called with a single implementation of List and
/// optimize the code accordingly. This can produce faster code, but might be
/// too optimistic as prediction of real-world performance.
///
/// These two benchmarks run the same code, on a `Uint8List` containing the same
/// values. Other than the name, the benchmarks are identical:
///
/// Utf8DecodeComplex.mono.simple.ascii.10k
/// Utf8DecodeComplex.poly.simple.ascii.10k
///
/// The difference is that the 'mono' benchmark is part of a program that passes
/// only modifiable `Uint8List` lists to `utf8.decode`, whereas the 'poly'
/// benchmark is part of a program that passes several different List
/// implementation type to `utf8.decode`, including system lists (`List.of`) and
/// and unmodifiable Uint8Lists.
///
/// There are three monomorphic entry points which are called from the `main`
/// method of an otherwise trivial program - `mainMono1`, `mainMono2` and
/// `mainMono3`. `mainMono1` does conversions exclusively on the preferred data
/// type, `Uint8List`. `mainMono2` does conversions exclusively on the system
/// list type (`List.of`). `mainMono3` does conversions exclusively on
/// unmodifiable `Uint8List`.
///
/// The primary program calls the `mainPoly` entry point.
///
/// Benchmark results from the different programs can be collected into a single
/// suite.
library;
import 'dart:convert';
import 'dart:math' show min;
import 'dart:typed_data';
import 'package:benchmark_harness/benchmark_harness.dart';
import 'package:expect/expect.dart';
// ASCII values which are start the sequence for quick validation that
// conversion happened.
const bytes0tag = 0x30;
const bytes1tag = 0x31;
const bytes2tag = 0x32;
// Input which decodes to a string where all code units are 7-bit ASCII.
const bytes0 = [
bytes0tag, // 0, U+0030
0x48, 0x45, 0x4C, 0x4C, 0x4f, 0x0A, // "HELLO\n"
];
// Input which decodes to a string where all code units fit in 1 byte.
const bytes1 = [
bytes1tag, // 1, U+0031
0x41, // A, U+0040
0xC2, 0xB1, // ±, U+00B1
0xC3, 0xB7, // ÷, U+00F7
0x0A, // \n, U+000A
];
// Input which decodes to a string where some code units require 2 bytes.
const bytes2 = [
bytes2tag, // 2, U+0032
0x41, // A, U+0040
0xC2, 0xB1, // ±, U+00B1
0xC3, 0xB7, // ÷, U+00F7
0xC4, 0x90, // Đ, U+0111
0xE0, 0xA6, 0x86, // আ, U+0986
0xF0, 0x9F, 0x87, 0xB9, // 🇹, U+1F1F9
0xF0, 0x9F, 0x87, 0xBB, // 🇻, U+1F1FB 🇹🇻
];
const targetSize = 10000;
const chunkSize = 250;
void check(String result, List<int> sequence) {
// Each sequence starts with a different ASCII value so we can quickly 'look
// up' the expected length of the decoded expanded sequence.
final expectedLength = switch (sequence[0]) {
bytes0tag => targetSize,
bytes1tag => 7144,
bytes2tag => 5266,
_ => throw 'Unexpected sequence start: ${sequence[0]}',
};
Expect.equals(expectedLength, result.length);
}
/// Expands a sequence by repetition and padding to `targetSize` bytes.
Uint8List makeSequence(List<int> bytes) {
Expect.equals(
1,
bytes.length.gcd(chunkSize),
'Bad repeated size (${bytes.length}).'
' Repeated sequence should be co-prime with chunk size ($chunkSize)'
' to exercise more UTF-8 boundaries',
);
final repeats = List.filled(
targetSize ~/ bytes.length,
bytes,
).expand((byte) => byte);
final padding = List.filled(targetSize.remainder(bytes.length), 0); // NULs.
final sequence = Uint8List.fromList([...repeats, ...padding]);
Expect.equals(targetSize, sequence.length);
return sequence;
}
final Uint8List sequence0 = makeSequence(bytes0);
final Uint8List sequence1 = makeSequence(bytes1);
final Uint8List sequence2 = makeSequence(bytes2);
class Utf8DecodeBenchmarkBase extends BenchmarkBase {
Utf8DecodeBenchmarkBase(String name) : super('Utf8DecodeComplex.$name');
late int totalInputSize;
@override
void exercise() {
// Only a single run per measurement instead of the usual 10.
run();
}
@override
double measure() {
// Report time per input byte.
return super.measure() / totalInputSize;
}
@override
void report() {
// Report time in nanoseconds.
final double score = measure() * 1000.0;
print('$name(RunTime): $score ns.');
}
}
class Simple extends Utf8DecodeBenchmarkBase {
final List<int> sequence;
Simple(super.name, this.sequence) {
totalInputSize = sequence.length;
}
@override
void run() {
final result = utf8.decode(sequence);
check(result, sequence);
}
}
abstract class ChunkedBase extends Utf8DecodeBenchmarkBase {
final List<int> sequence;
late List<List<int>> chunks;
ChunkedBase(super.name, this.sequence);
List<int> slice(List<int> list, int start, int end);
@override
void setup() {
totalInputSize = sequence.length;
chunks = [];
for (int i = 0; i < totalInputSize; i += chunkSize) {
final chunk = slice(sequence, i, min(i + chunkSize, totalInputSize));
chunks.add(chunk);
}
}
@override
void run() {
late final String result;
final byteSink = const Utf8Decoder().startChunkedConversion(
StringConversionSink.withCallback((s) => result = s),
);
for (final chunk in chunks) {
byteSink.add(chunk);
}
byteSink.close();
check(result, sequence);
}
}
class Chunked extends ChunkedBase {
Chunked(super.name, super.sequence);
@override
List<int> slice(List<int> list, int start, int end) {
return list.sublist(start, end);
}
}
class ChunkedUnmodifiable extends ChunkedBase {
ChunkedUnmodifiable(super.name, Uint8List super.sequence);
@override
Uint8List slice(List<int> list, int start, int end) {
return Uint8List.fromList(list.sublist(start, end)).asUnmodifiableView();
}
}
void runAll(List<BenchmarkBase> benchmarks) {
// Warm up all the benchmarks to avoid overly optimistic results for the first
// few benchmarks due to temporary monomorphism in JIT compilers.
for (var bm in benchmarks) {
bm.setup();
bm.warmup();
}
for (var bm in benchmarks) {
bm.report();
}
}
void mainPoly() {
// Polymorphic: Inputs of several types.
final benchmarks = [
Simple('poly.simple.ascii.10k', sequence0),
Simple('poly.simple.1byte.10k', sequence1),
Simple('poly.simple.2byte.10k', sequence2),
Simple('poly.simple.list.ascii.10k', List.of(sequence0)),
Simple('poly.simple.list.1byte.10k', List.of(sequence1)),
Simple('poly.simple.list.2byte.10k', List.of(sequence2)),
Simple(
'poly.simple.unmodifiable.ascii.10k',
sequence0.asUnmodifiableView(),
),
Simple(
'poly.simple.unmodifiable.1byte.10k',
sequence1.asUnmodifiableView(),
),
Simple(
'poly.simple.unmodifiable.2byte.10k',
sequence2.asUnmodifiableView(),
),
Chunked('poly.chunked.ascii.10k', sequence0),
Chunked('poly.chunked.1byte.10k', sequence1),
Chunked('poly.chunked.2byte.10k', sequence2),
Chunked('poly.chunked.list.ascii.10k', List.of(sequence0)),
Chunked('poly.chunked.list.1byte.10k', List.of(sequence1)),
Chunked('poly.chunked.list.2byte.10k', List.of(sequence2)),
ChunkedUnmodifiable(
'poly.chunked.unmodifiable.ascii.10k',
sequence0.asUnmodifiableView(),
),
ChunkedUnmodifiable(
'poly.chunked.unmodifiable.1byte.10k',
sequence1.asUnmodifiableView(),
),
ChunkedUnmodifiable(
'poly.chunked.unmodifiable.2byte.10k',
sequence2.asUnmodifiableView(),
),
];
runAll(benchmarks);
}
void mainMono1() {
// Monomorphic: All inputs are `Uint8List`s.
final benchmarks = [
Simple('mono.simple.ascii.10k', sequence0),
Simple('mono.simple.1byte.10k', sequence1),
Simple('mono.simple.2byte.10k', sequence2),
Chunked('mono.chunked.ascii.10k', sequence0),
Chunked('mono.chunked.1byte.10k', sequence1),
Chunked('mono.chunked.2byte.10k', sequence2),
];
runAll(benchmarks);
}
void mainMono2() {
// Monomorphic: All inputs are ordinary (system) Lists.
final benchmarks = [
Simple('mono.simple.list.ascii.10k', List.of(sequence0)),
Simple('mono.simple.list.1byte.10k', List.of(sequence1)),
Simple('mono.simple.list.2byte.10k', List.of(sequence2)),
Chunked('mono.chunked.list.ascii.10k', List.of(sequence0)),
Chunked('mono.chunked.list.1byte.10k', List.of(sequence1)),
Chunked('mono.chunked.list.2byte.10k', List.of(sequence2)),
];
runAll(benchmarks);
}
void mainMono3() {
// Monomorphic: All inputs are unmodifiable `Uint8List`s.
final benchmarks = [
Simple(
'mono.simple.unmodifiable.ascii.10k',
sequence0.asUnmodifiableView(),
),
Simple(
'mono.simple.unmodifiable.1byte.10k',
sequence1.asUnmodifiableView(),
),
Simple(
'mono.simple.unmodifiable.2byte.10k',
sequence2.asUnmodifiableView(),
),
ChunkedUnmodifiable(
'mono.chunked.unmodifiable.ascii.10k',
sequence0.asUnmodifiableView(),
),
ChunkedUnmodifiable(
'mono.chunked.unmodifiable.1byte.10k',
sequence1.asUnmodifiableView(),
),
ChunkedUnmodifiable(
'mono.chunked.unmodifiable.2byte.10k',
sequence2.asUnmodifiableView(),
),
];
runAll(benchmarks);
}