-
Notifications
You must be signed in to change notification settings - Fork 3.4k
/
Copy pathbenchmark_utf8.cpp
69 lines (64 loc) · 2.13 KB
/
benchmark_utf8.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
// Copyright 2016 The Emscripten Authors. All rights reserved.
// Emscripten is available under two separate licenses, the MIT license and the
// University of Illinois/NCSA Open Source License. Both these licenses can be
// found in the LICENSE file.
#include <stdio.h>
#include <string.h>
#include <wchar.h>
#include <iostream>
#include <cassert>
#include <emscripten.h>
double test(const char *str) {
double res = EM_ASM_DOUBLE({
var t0 = _emscripten_get_now();
var str = Module.UTF8ToString($0);
var t1 = _emscripten_get_now();
// out('t: ' + (t1 - t0) + ', len(result): ' + str.length + ', result: ' + str.slice(0, 100));
return (t1-t0);
}, str);
return res;
}
char *utf8_corpus = 0;
long utf8_corpus_length = 0;
char *randomString(int len) {
if (!utf8_corpus) {
FILE *handle = fopen("utf8_corpus.txt", "rb");
fseek(handle, 0, SEEK_END);
utf8_corpus_length = ftell(handle);
assert(utf8_corpus_length > 0);
utf8_corpus = new char[utf8_corpus_length+1];
fseek(handle, 0, SEEK_SET);
fread(utf8_corpus, 1, utf8_corpus_length, handle);
fclose(handle);
utf8_corpus[utf8_corpus_length] = '\0';
}
int startIdx = rand() % (utf8_corpus_length - len);
while(((unsigned char)utf8_corpus[startIdx] & 0xC0) == 0x80) {
++startIdx;
if (startIdx + len > utf8_corpus_length) len = utf8_corpus_length - startIdx;
}
assert(len > 0);
char *s = new char[len+1];
memcpy(s, utf8_corpus + startIdx, len);
s[len] = '\0';
while(len > 0 && ((unsigned char)s[len-1] & 0xC0) == 0x80) { s[--len] = '\0'; }
while(len > 0 && ((unsigned char)s[len-1] & 0xC0) == 0xC0) { s[--len] = '\0'; }
assert(len >= 0);
return s;
}
int main() {
srand(time(NULL));
double t = 0;
double t2 = emscripten_get_now();
for(int i = 0; i < 100000; ++i) {
// Create strings of lengths 1-32, because the internals of text decoding
// have a cutoff of 16 for when to use TextDecoder, and we wish to test both
// (see UTF8ArrayToString).
char *str = randomString((i % 32) + 1);
t += test(str);
delete [] str;
}
double t3 = emscripten_get_now();
printf("OK. Time: %f (%f).\n", t, t3-t2);
return 0;
}