Skip to content

Commit a2ec566

Browse files
committed
automata: respect new 'which_captures' option
The NFA compiler now implements the 'All', 'Implicit' and 'None' options. We also add some targeted unit tests to confirm basic behavior.
1 parent e10c9d7 commit a2ec566

File tree

1 file changed

+87
-3
lines changed

1 file changed

+87
-3
lines changed

regex-automata/src/nfa/thompson/compiler.rs

Lines changed: 87 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1094,8 +1094,13 @@ impl Compiler {
10941094
name: Option<&str>,
10951095
expr: &Hir,
10961096
) -> Result<ThompsonRef, BuildError> {
1097-
if self.config.get_which_captures().is_none() {
1098-
return self.c(expr);
1097+
match self.config.get_which_captures() {
1098+
// No capture states means we always skip them.
1099+
WhichCaptures::None => return self.c(expr),
1100+
// Implicit captures states means we only add when index==0 since
1101+
// index==0 implies the group is implicit.
1102+
WhichCaptures::Implicit if index > 0 => return self.c(expr),
1103+
_ => {}
10991104
}
11001105

11011106
let start = self.add_capture_start(index, name)?;
@@ -1841,7 +1846,7 @@ mod tests {
18411846

18421847
use crate::{
18431848
nfa::thompson::{SparseTransitions, State, Transition, NFA},
1844-
util::primitives::{PatternID, StateID},
1849+
util::primitives::{PatternID, SmallIndex, StateID},
18451850
};
18461851

18471852
use super::*;
@@ -1903,6 +1908,15 @@ mod tests {
19031908
}
19041909
}
19051910

1911+
fn s_cap(next: usize, pattern: usize, index: usize, slot: usize) -> State {
1912+
State::Capture {
1913+
next: sid(next),
1914+
pattern_id: pid(pattern),
1915+
group_index: SmallIndex::new(index).unwrap(),
1916+
slot: SmallIndex::new(slot).unwrap(),
1917+
}
1918+
}
1919+
19061920
fn s_fail() -> State {
19071921
State::Fail
19081922
}
@@ -2144,4 +2158,74 @@ mod tests {
21442158
NFA::compiler().configure(config).build_from_hir(&hir).unwrap();
21452159
assert_eq!(nfa.states(), &[s_fail(), s_match(0)]);
21462160
}
2161+
2162+
#[test]
2163+
fn compile_captures_all() {
2164+
let nfa = NFA::compiler()
2165+
.configure(
2166+
NFA::config()
2167+
.unanchored_prefix(false)
2168+
.which_captures(WhichCaptures::All),
2169+
)
2170+
.build("a(b)c")
2171+
.unwrap();
2172+
assert_eq!(
2173+
nfa.states(),
2174+
&[
2175+
s_cap(1, 0, 0, 0),
2176+
s_byte(b'a', 2),
2177+
s_cap(3, 0, 1, 2),
2178+
s_byte(b'b', 4),
2179+
s_cap(5, 0, 1, 3),
2180+
s_byte(b'c', 6),
2181+
s_cap(7, 0, 0, 1),
2182+
s_match(0)
2183+
]
2184+
);
2185+
let ginfo = nfa.group_info();
2186+
assert_eq!(2, ginfo.all_group_len());
2187+
}
2188+
2189+
#[test]
2190+
fn compile_captures_implicit() {
2191+
let nfa = NFA::compiler()
2192+
.configure(
2193+
NFA::config()
2194+
.unanchored_prefix(false)
2195+
.which_captures(WhichCaptures::Implicit),
2196+
)
2197+
.build("a(b)c")
2198+
.unwrap();
2199+
assert_eq!(
2200+
nfa.states(),
2201+
&[
2202+
s_cap(1, 0, 0, 0),
2203+
s_byte(b'a', 2),
2204+
s_byte(b'b', 3),
2205+
s_byte(b'c', 4),
2206+
s_cap(5, 0, 0, 1),
2207+
s_match(0)
2208+
]
2209+
);
2210+
let ginfo = nfa.group_info();
2211+
assert_eq!(1, ginfo.all_group_len());
2212+
}
2213+
2214+
#[test]
2215+
fn compile_captures_none() {
2216+
let nfa = NFA::compiler()
2217+
.configure(
2218+
NFA::config()
2219+
.unanchored_prefix(false)
2220+
.which_captures(WhichCaptures::None),
2221+
)
2222+
.build("a(b)c")
2223+
.unwrap();
2224+
assert_eq!(
2225+
nfa.states(),
2226+
&[s_byte(b'a', 1), s_byte(b'b', 2), s_byte(b'c', 3), s_match(0)]
2227+
);
2228+
let ginfo = nfa.group_info();
2229+
assert_eq!(0, ginfo.all_group_len());
2230+
}
21472231
}

0 commit comments

Comments
 (0)