-
Notifications
You must be signed in to change notification settings - Fork 26
/
regExpToNFA.ts
166 lines (155 loc) · 5.14 KB
/
regExpToNFA.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
import {ACCEPT, START} from './NFA';
import clearFlag from './clearFlag';
import getAcceptStates from './getAcceptStates';
import getStartStates from './getStartStates';
import type {Edge, Flags, NFA} from './NFA';
import type {Node} from '../RegExp/RegExpParser';
import setFlag from './setFlag';
export default function regExpToNFA(
node: Node,
genId: () => number = defaultGenId(),
): NFA {
if (node.kind === 'Alternate' || node.kind === 'CharacterClass') {
// CharacterClass is just syntax sugar for an Alternate, so we handle them
// together.
if (node.kind === 'CharacterClass' && node.negated) {
throw new Error('regExpToNFA(): Cannot process negated CharacterClass');
}
const children = node.children.map((child) => {
return regExpToNFA(child, genId);
});
// Return a new start node with epsilon transitions to start nodes of each child.
return {
id: genId(),
edges: children.flatMap((child) => {
return getStartStates(child).map((start) => {
start.flags = clearFlag(start.flags, START);
return epsilonTo(start);
});
}),
flags: START,
};
} else if (
node.kind === 'Anything' ||
node.kind === 'Atom' ||
node.kind === 'Range'
) {
return {
id: genId(),
edges: [
{
on: node,
to: {
id: genId(),
edges: [],
flags: ACCEPT,
},
},
],
flags: START,
};
} else if (node.kind === 'Repeat') {
const child = regExpToNFA(node.child, genId);
if (node.minimum === 0 && node.maximum === 1) {
// "?" quantifier.
getStartStates(child).forEach((child) => {
child.flags = setFlag(child.flags, ACCEPT);
});
return child;
} else if (node.minimum === 0 && node.maximum === Infinity) {
// "*" quantifier (AKA Kleene star).
const start = {
id: genId(),
edges: getStartStates(child).map((start) => {
start.flags = clearFlag(start.flags, START);
return epsilonTo(start);
}),
flags: (START | ACCEPT) as Flags,
};
getAcceptStates(child).forEach((child) => {
child.edges.push(epsilonTo(start));
});
return start;
} else if (node.minimum === 1 && node.maximum === Infinity) {
// "+" quantifier.
const startStates = getStartStates(child);
getAcceptStates(child).forEach((accept) => {
startStates.forEach((start) => {
// TODO avoid pushing duplicate edges here
accept.edges.push(epsilonTo(start));
});
});
return child;
} else {
// {3} or {3,6} etc quantifier.
// "Clone" child by creating additional NFAs (same shape, different state
// `id`s).
const children: Array<NFA> = [child];
for (let i = 0; i < node.maximum - 1; i++) {
children.push(regExpToNFA(node.child, genId));
}
// For required items, link to next item by taking every accept state and
// turning it into a non-final state with an epsilon transition to the
// start state fo the next (ie. same as 'Sequence').
for (let i = 0; i < node.minimum - 1; i++) {
const child = children[i];
const next = children[i + 1];
const startStates = getStartStates(next);
getAcceptStates(child).forEach((state) => {
state.flags = clearFlag(state.flags, ACCEPT);
startStates.forEach((start) => {
start.flags = clearFlag(start.flags, START);
state.edges.push(epsilonTo(start));
});
});
}
// For optional items, allow intermediate accept states to remain,
// and link them together using epsilon transitions.
for (let i = node.minimum - 1; i < node.maximum - 1; i++) {
const child = children[i];
const next = children[i + 1];
const startStates = getStartStates(next);
getAcceptStates(child).forEach((state) => {
startStates.forEach((start) => {
start.flags = clearFlag(start.flags, START);
state.edges.push(epsilonTo(start));
});
});
}
return children[0];
}
} else if (node.kind === 'Sequence') {
// For each child, take every accept state and turn it into a non-final
// state with an epsilon transition to the start states of the next child.
const children = node.children.map((child) => {
return regExpToNFA(child, genId);
});
for (let i = children.length - 2; i >= 0; i--) {
const child = children[i];
const next = children[i + 1];
const startStates = getStartStates(next);
startStates.forEach((start) => {
start.flags = clearFlag(start.flags, START);
});
getAcceptStates(child).forEach((accept) => {
accept.flags = clearFlag(accept.flags, ACCEPT);
startStates.forEach((start) => {
accept.edges.push(epsilonTo(start));
});
});
}
return children[0];
} else {
throw new Error('regExpToNFA(): Unexpected `node.kind`');
}
}
function defaultGenId() {
let id = 0;
return () => id++;
}
function epsilonTo(to: NFA): Edge {
return {
on: null,
to,
};
}