-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathFsa.Search.cs
85 lines (78 loc) · 2.9 KB
/
Fsa.Search.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
namespace SourceGenerator;
public partial class Fsa
{
/// <summary>
/// Traverses the FSA in a breadth-first fashion, allowing vectorized
/// traversal of a frontier in case of nondeterministic automata.
///
/// A "frontier" refers to the set of nodes currently being visited. An
/// "epsilon closure" refers to nodes related to the frontier (and the
/// frontier itself) accessible without consuming any characters. Acceptance
/// states are achieved if any node on the frontier or any node in the
/// resulting epsilon closure has a token ID in its accept list.
///
/// Any reached accept state will update the "longest end" tracker, and the
/// last recorded longest match is returned on the first invalid state.
/// </summary>
public (int accepted, string match) Search(string text, int startIndex)
{
// Used for deterministic paths
var node = this;
// Used once determinism ends
List<Fsa> closure = null;
int textIndex = startIndex, longestEnd = -1, match = 0;
var nfaMode = false;
for (;;)
{
if (!nfaMode && (node?.Epsilon?.Count ?? 0) > 0)
{
nfaMode = true;
closure = node.EpsilonClosure().Distinct().ToList();
}
if (nfaMode)
{
// Any accept state in the frontier is a valid match
var acceptState = closure.Where((it) => it.Accepts.Count > 0).ToList();
if (acceptState.Count > 0)
{
longestEnd = textIndex;
match = acceptState.SelectMany((it) => it.Accepts).Min();
}
// "Invalid state" due to end of input or lack of next states
if (textIndex >= text.Length || closure.Count == 0)
{
break;
}
} else
{
// Any accept state in the frontier is a valid match
if ((node?.Accepts?.Count ?? 0) > 0)
{
longestEnd = textIndex;
match = node.Accepts.Min();
}
// "Invalid state" due to end of input or lack of next states
if (textIndex >= text.Length || node is null)
{
break;
}
}
var c = text[textIndex++];
if (nfaMode)
{
var frontier = closure.SelectMany((it) => it.AdjacentSet(c)).Distinct();
closure = frontier.SelectMany((it) => it.EpsilonClosure()).Distinct().ToList();
} else
{
node = node.Next.GetValueOrDefault(c);
}
}
if (longestEnd == -1)
{
return (0, "");
} else
{
return (match, text[startIndex..longestEnd]);
}
}
}