-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathcreate-case-folding.ts
118 lines (99 loc) · 3.14 KB
/
create-case-folding.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
import * as fs from "fs";
import * as path from "path";
import { CharSet } from "../src/char-set";
import { printRanges } from "./util";
const caseFoldingCommon: ReadonlyMap<number, number> = require("@unicode/unicode-15.0.0/Case_Folding/C/code-points");
const caseFoldingSimple: ReadonlyMap<number, number> = require("@unicode/unicode-15.0.0/Case_Folding/S/code-points");
createCaseFoldingFile(canonicalizeIgnoreCaseUTF16, 0xffff, "UTF16", "utf16-case-folding.ts");
createCaseFoldingFile(canonicalizeIgnoreCaseUnicode, 0x10ffff, "Unicode", "unicode/case-folding.ts");
function canonicalizeIgnoreCaseUTF16(ch: number): number {
// https://tc39.es/ecma262/#sec-runtime-semantics-canonicalize-ch
const s = String.fromCharCode(ch);
const u = s.toUpperCase();
if (u.length !== 1) {
return ch;
}
const cu = u.charCodeAt(0);
if (ch >= 128 && cu < 128) {
return ch;
}
return cu;
}
function canonicalizeIgnoreCaseUnicode(ch: number): number {
// https://tc39.es/ecma262/#sec-runtime-semantics-canonicalize-ch
let mapping = caseFoldingCommon.get(ch);
if (mapping !== undefined) {
return mapping;
}
mapping = caseFoldingSimple.get(ch);
if (mapping !== undefined) {
return mapping;
}
return ch;
}
function createCaseFoldingFile(
canonicalize: (ch: number) => number,
maxCharacter: number,
variablePrefix: string,
filename: string
): void {
const canonicalizeMapping = new Map<number, number[]>();
for (let ch = 0; ch <= maxCharacter; ch++) {
const c = canonicalize(ch);
let list = canonicalizeMapping.get(c);
if (list === undefined) {
canonicalizeMapping.set(c, (list = []));
}
list.push(ch);
}
const caseFolding: number[][] = [];
canonicalizeMapping.forEach(chars => {
chars.forEach(c => {
caseFolding[c] = chars;
});
});
let count = 0;
const CASE_VARYING = CharSet.fromCharacters(
maxCharacter,
(function* () {
for (let i = 0; i < maxCharacter; i++) {
const fold = caseFolding[i];
if (fold.indexOf(i) === -1) {
throw new Error(`The case folding of ${i} does not include itself.`);
}
if (fold.length > 1) {
count++;
yield i;
}
}
})()
);
const map: Record<number, number[]> = {};
caseFolding.forEach((fold, i) => {
if (fold.length > 1) {
map[i] = fold;
}
});
console.log(`${variablePrefix}: ${count} characters vary in case`);
const code = `/* eslint-disable */
// DO NOT EDIT!
// THIS FILE IS GENERATED BY scripts/create-case-folding.js
import { CharSet } from "${"../".repeat(filename.split(/\//g).length)}char-set";
/**
* A character set of all characters that have at least one case variation.
*/
export const ${variablePrefix}CaseVarying: CharSet = CharSet.empty(${maxCharacter}).union(${printRanges(
CASE_VARYING.ranges
)});
/**
* A map for a given character to all it case variations. The list of case variations also includes the key character
* itself.
*
* If the given character do not have case variations, it will not be part of this map.
*/
export const ${variablePrefix}CaseFolding: Readonly<Partial<Record<number, readonly number[]>>> = JSON.parse('${JSON.stringify(
map
)}');
`;
fs.writeFileSync(path.join(__dirname, "../src/js", filename), code, "utf-8");
}