-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathregex-stress-test.ts
102 lines (90 loc) · 3.21 KB
/
regex-stress-test.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import { assert } from "chai";
import { ParseOptions, Parser } from "../src/js";
import { PrismRegexes } from "./helper/prism-regex-data";
import { NFA } from "../src/nfa";
import { DFA, ReadonlyDFA } from "../src/dfa";
import { Expression, NoParent } from "../src/ast";
import { CONFIG_ALL_PARSE_OPTIONS, CONFIG_RUN_STRESS_TEST } from "./helper/config";
import { TooManyNodesError } from "../src/errors";
/**
* Setting this to `true` will enable the check that verifies that the language of the generated RE from `toRegex` is
* the same as the language of the NFA/DFA that created it.
*
* The generated RE tends to create NFA that are both large and very non-deterministic. This means that the conversion
* to DFA will create __A LOT__ nodes (sometimes >10k). Both creating and minimizing the DFA takes time (up to a minute
* for a single regex).
*
* Only set this to `true` if you have the time to run it.
*/
const CHECK_RE_LANGUAGE = false as boolean;
const maxNodes = 100_000;
function equalLanguage(expected: ReadonlyDFA, re: NoParent<Expression>, maxCharacter: number): void {
const nfa = NFA.fromRegex(re, { maxCharacter }, { assertions: "disable" });
const dfa = DFA.fromFA(nfa, new DFA.LimitedNodeFactory(maxNodes));
dfa.minimize();
assert.isTrue(expected.structurallyEqual(dfa));
}
const parseOptions: ParseOptions[] = [];
if (CONFIG_ALL_PARSE_OPTIONS) {
for (const assertions of ["parse", "disable", "unknown"] as ParseOptions["assertions"][]) {
for (const backreferences of ["disable", "unknown"] as ParseOptions["backreferences"][]) {
for (const maxBackreferenceWords of [0, 1, 10, 100]) {
for (const simplify of [true, false]) {
parseOptions.push({
assertions,
backreferences,
maxBackreferenceWords,
maxNodes,
simplify,
});
}
}
}
}
} else {
parseOptions.push({ backreferences: "disable", maxNodes });
}
describe("Regex stress test", function () {
if (!CONFIG_RUN_STRESS_TEST) {
return;
}
this.timeout(60 * 1000); // timeout after a minute
parseOptions.forEach(options => {
describe("Parser config: " + JSON.stringify(options), function () {
PrismRegexes.forEach((literal, index) => {
let patternPreview = String(literal);
if (patternPreview.length > 80) {
patternPreview = patternPreview.substr(0, 80) + "...";
}
it(`[${index}]: ${patternPreview}`, function () {
try {
const { expression, maxCharacter } = Parser.fromLiteral(literal).parse(options);
const nfa = NFA.fromRegex(
expression,
{ maxCharacter },
{ assertions: "disable", unknowns: "disable" },
new NFA.LimitedNodeFactory(maxNodes)
);
nfa.countNodes();
const re1 = nfa.toRegex({ maxNodes });
const dfa = DFA.fromFA(nfa, new DFA.LimitedNodeFactory(maxNodes));
const dfaOriginalCount = dfa.countNodes();
dfa.minimize();
assert.isTrue(dfa.countNodes() <= dfaOriginalCount);
if (CHECK_RE_LANGUAGE) {
equalLanguage(dfa, re1, maxCharacter);
}
const re2 = dfa.toRegex({ maxNodes });
if (CHECK_RE_LANGUAGE) {
equalLanguage(dfa, re2, maxCharacter);
}
} catch (e) {
if (!(e instanceof TooManyNodesError)) {
throw e;
}
}
});
});
});
});
});