@@ -30,7 +30,7 @@ pub struct Config {
30
30
reverse : Option < bool > ,
31
31
nfa_size_limit : Option < Option < usize > > ,
32
32
shrink : Option < bool > ,
33
- captures : Option < bool > ,
33
+ which_captures : Option < WhichCaptures > ,
34
34
look_matcher : Option < LookMatcher > ,
35
35
#[ cfg( test) ]
36
36
unanchored_prefix : Option < bool > ,
@@ -178,12 +178,15 @@ impl Config {
178
178
/// ```
179
179
/// use regex_automata::{
180
180
/// dfa::{self, Automaton},
181
- /// nfa::thompson::NFA,
181
+ /// nfa::thompson::{ NFA, WhichCaptures} ,
182
182
/// HalfMatch, Input,
183
183
/// };
184
184
///
185
185
/// let dfa = dfa::dense::Builder::new()
186
- /// .thompson(NFA::config().captures(false).reverse(true))
186
+ /// .thompson(NFA::config()
187
+ /// .which_captures(WhichCaptures::None)
188
+ /// .reverse(true)
189
+ /// )
187
190
/// .build("baz[0-9]+")?;
188
191
/// let expected = Some(HalfMatch::must(0, 3));
189
192
/// assert_eq!(
@@ -277,10 +280,12 @@ impl Config {
277
280
///
278
281
/// ```
279
282
/// # if cfg!(miri) { return Ok(()); } // miri takes too long
280
- /// use regex_automata::nfa::thompson::NFA;
283
+ /// use regex_automata::nfa::thompson::{ NFA, WhichCaptures} ;
281
284
///
282
285
/// // Currently we have to disable captures when enabling reverse NFA.
283
- /// let config = NFA::config().captures(false).reverse(true);
286
+ /// let config = NFA::config()
287
+ /// .which_captures(WhichCaptures::None)
288
+ /// .reverse(true);
284
289
/// let not_shrunk = NFA::compiler()
285
290
/// .configure(config.clone().shrink(false))
286
291
/// .build(r"\w")?;
@@ -314,18 +319,70 @@ impl Config {
314
319
/// require capturing groups to be present in the NFA. Building a Pike VM
315
320
/// with an NFA without capturing groups will result in an error.
316
321
///
322
+ /// (Note that since this method is deprecated, the example below uses
323
+ /// [`Config::which_captures`] to disable capture states.)
324
+ ///
317
325
/// ```
318
- /// use regex_automata::nfa::thompson::{pikevm::PikeVM, NFA};
326
+ /// use regex_automata::nfa::thompson::{
327
+ /// pikevm::PikeVM,
328
+ /// NFA,
329
+ /// WhichCaptures,
330
+ /// };
319
331
///
320
332
/// let nfa = NFA::compiler()
321
- /// .configure(NFA::config().captures(false ))
333
+ /// .configure(NFA::config().which_captures(WhichCaptures::None ))
322
334
/// .build(r"[a-z]+")?;
323
335
/// assert!(PikeVM::new_from_nfa(nfa).is_err());
324
336
///
325
337
/// # Ok::<(), Box<dyn std::error::Error>>(())
326
338
/// ```
327
- pub fn captures ( mut self , yes : bool ) -> Config {
328
- self . captures = Some ( yes) ;
339
+ #[ deprecated( since = "0.3.5" , note = "use which_captures instead" ) ]
340
+ pub fn captures ( self , yes : bool ) -> Config {
341
+ self . which_captures ( if yes {
342
+ WhichCaptures :: All
343
+ } else {
344
+ WhichCaptures :: None
345
+ } )
346
+ }
347
+
348
+ /// Configures what kinds of capture groups are compiled into
349
+ /// [`State::Capture`](crate::nfa::thompson::State::Capture) states in a
350
+ /// Thompson NFA.
351
+ ///
352
+ /// Currently, using any option except for [`WhichCaptures::None`] requires
353
+ /// disabling the [`reverse`](Config::reverse) setting. If both are
354
+ /// enabled, then the compiler will return an error. It is expected that
355
+ /// this limitation will be lifted in the future.
356
+ ///
357
+ /// This is set to [`WhichCaptures::All`] by default. Callers may wish to
358
+ /// use [`WhichCaptures::Implicit`] in cases where one wants avoid the
359
+ /// overhead of capture states for explicit groups. Usually this occurs
360
+ /// when one wants to use the `PikeVM` only for determining the overall
361
+ /// match. Otherwise, the `PikeVM` could use much more memory than is
362
+ /// necessary.
363
+ ///
364
+ /// # Example
365
+ ///
366
+ /// This example demonstrates that some regex engines, like the Pike VM,
367
+ /// require capturing groups to be present in the NFA. Building a Pike VM
368
+ /// with an NFA without capturing groups will result in an error.
369
+ ///
370
+ /// ```
371
+ /// use regex_automata::nfa::thompson::{
372
+ /// pikevm::PikeVM,
373
+ /// NFA,
374
+ /// WhichCaptures,
375
+ /// };
376
+ ///
377
+ /// let nfa = NFA::compiler()
378
+ /// .configure(NFA::config().which_captures(WhichCaptures::None))
379
+ /// .build(r"[a-z]+")?;
380
+ /// assert!(PikeVM::new_from_nfa(nfa).is_err());
381
+ ///
382
+ /// # Ok::<(), Box<dyn std::error::Error>>(())
383
+ /// ```
384
+ pub fn which_captures ( mut self , which_captures : WhichCaptures ) -> Config {
385
+ self . which_captures = Some ( which_captures) ;
329
386
self
330
387
}
331
388
@@ -405,8 +462,14 @@ impl Config {
405
462
}
406
463
407
464
/// Return whether NFA compilation is configured to produce capture states.
465
+ #[ deprecated( since = "0.3.5" , note = "use get_which_captures instead" ) ]
408
466
pub fn get_captures ( & self ) -> bool {
409
- self . captures . unwrap_or ( true )
467
+ self . get_which_captures ( ) . is_any ( )
468
+ }
469
+
470
+ /// Return what kinds of capture states will be compiled into an NFA.
471
+ pub fn get_which_captures ( & self ) -> WhichCaptures {
472
+ self . which_captures . unwrap_or ( WhichCaptures :: All )
410
473
}
411
474
412
475
/// Return the look-around matcher for this NFA.
@@ -439,14 +502,65 @@ impl Config {
439
502
reverse : o. reverse . or ( self . reverse ) ,
440
503
nfa_size_limit : o. nfa_size_limit . or ( self . nfa_size_limit ) ,
441
504
shrink : o. shrink . or ( self . shrink ) ,
442
- captures : o. captures . or ( self . captures ) ,
505
+ which_captures : o. which_captures . or ( self . which_captures ) ,
443
506
look_matcher : o. look_matcher . or_else ( || self . look_matcher . clone ( ) ) ,
444
507
#[ cfg( test) ]
445
508
unanchored_prefix : o. unanchored_prefix . or ( self . unanchored_prefix ) ,
446
509
}
447
510
}
448
511
}
449
512
513
+ /// A configuration indicating which kinds of
514
+ /// [`State::Capture`](crate::nfa::thompson::State::Capture) states to include.
515
+ ///
516
+ /// This configuration can be used with [`Config::which_captures`] to control
517
+ /// which capture states are compiled into a Thompson NFA.
518
+ ///
519
+ /// The default configuration is [`WhichCaptures::All`].
520
+ #[ derive( Clone , Copy , Debug ) ]
521
+ pub enum WhichCaptures {
522
+ /// All capture states, including those corresponding to both implicit and
523
+ /// explicit capture groups, are included in the Thompson NFA.
524
+ All ,
525
+ /// Only capture states corresponding to implicit capture groups are
526
+ /// included. Implicit capture groups appear in every pattern implicitly
527
+ /// and correspond to the overall match of a pattern.
528
+ ///
529
+ /// This is useful when one only cares about the overall match of a
530
+ /// pattern. By excluding capture states from explicit capture groups,
531
+ /// one might be able to reduce the memory usage of a multi-pattern regex
532
+ /// substantially if it was otherwise written to have many explicit capture
533
+ /// groups.
534
+ Implicit ,
535
+ /// No capture states are compiled into the Thompson NFA.
536
+ ///
537
+ /// This is useful when capture states are either not needed (for example,
538
+ /// if one is only trying to build a DFA) or if they aren't supported (for
539
+ /// example, a reverse NFA).
540
+ None ,
541
+ }
542
+
543
+ impl Default for WhichCaptures {
544
+ fn default ( ) -> WhichCaptures {
545
+ WhichCaptures :: All
546
+ }
547
+ }
548
+
549
+ impl WhichCaptures {
550
+ /// Returns true if this configuration indicates that no capture states
551
+ /// should be produced in an NFA.
552
+ pub fn is_none ( & self ) -> bool {
553
+ matches ! ( * self , WhichCaptures :: None )
554
+ }
555
+
556
+ /// Returns true if this configuration indicates that some capture states
557
+ /// should be added to an NFA. Note that this might only include capture
558
+ /// states for implicit capture groups.
559
+ pub fn is_any ( & self ) -> bool {
560
+ !self . is_none ( )
561
+ }
562
+ }
563
+
450
564
/*
451
565
This compiler below uses Thompson's construction algorithm. The compiler takes
452
566
a regex-syntax::Hir as input and emits an NFA graph as output. The NFA graph
@@ -800,7 +914,9 @@ impl Compiler {
800
914
if exprs. len ( ) > PatternID :: LIMIT {
801
915
return Err ( BuildError :: too_many_patterns ( exprs. len ( ) ) ) ;
802
916
}
803
- if self . config . get_reverse ( ) && self . config . get_captures ( ) {
917
+ if self . config . get_reverse ( )
918
+ && self . config . get_which_captures ( ) . is_any ( )
919
+ {
804
920
return Err ( BuildError :: unsupported_captures ( ) ) ;
805
921
}
806
922
@@ -978,7 +1094,7 @@ impl Compiler {
978
1094
name : Option < & str > ,
979
1095
expr : & Hir ,
980
1096
) -> Result < ThompsonRef , BuildError > {
981
- if ! self . config . get_captures ( ) {
1097
+ if self . config . get_which_captures ( ) . is_none ( ) {
982
1098
return self . c ( expr) ;
983
1099
}
984
1100
@@ -1728,9 +1844,15 @@ mod tests {
1728
1844
util:: primitives:: { PatternID , StateID } ,
1729
1845
} ;
1730
1846
1847
+ use super :: * ;
1848
+
1731
1849
fn build ( pattern : & str ) -> NFA {
1732
1850
NFA :: compiler ( )
1733
- . configure ( NFA :: config ( ) . captures ( false ) . unanchored_prefix ( false ) )
1851
+ . configure (
1852
+ NFA :: config ( )
1853
+ . which_captures ( WhichCaptures :: None )
1854
+ . unanchored_prefix ( false ) ,
1855
+ )
1734
1856
. build ( pattern)
1735
1857
. unwrap ( )
1736
1858
}
@@ -1794,7 +1916,7 @@ mod tests {
1794
1916
#[ test]
1795
1917
fn compile_unanchored_prefix ( ) {
1796
1918
let nfa = NFA :: compiler ( )
1797
- . configure ( NFA :: config ( ) . captures ( false ) )
1919
+ . configure ( NFA :: config ( ) . which_captures ( WhichCaptures :: None ) )
1798
1920
. build ( r"a" )
1799
1921
. unwrap ( ) ;
1800
1922
assert_eq ! (
@@ -1827,7 +1949,11 @@ mod tests {
1827
1949
1828
1950
// Check that non-UTF-8 literals work.
1829
1951
let nfa = NFA :: compiler ( )
1830
- . configure ( NFA :: config ( ) . captures ( false ) . unanchored_prefix ( false ) )
1952
+ . configure (
1953
+ NFA :: config ( )
1954
+ . which_captures ( WhichCaptures :: None )
1955
+ . unanchored_prefix ( false ) ,
1956
+ )
1831
1957
. syntax ( crate :: util:: syntax:: Config :: new ( ) . utf8 ( false ) )
1832
1958
. build ( r"(?-u)\xFF" )
1833
1959
. unwrap ( ) ;
@@ -1937,7 +2063,7 @@ mod tests {
1937
2063
let nfa = NFA :: compiler ( )
1938
2064
. configure (
1939
2065
NFA :: config ( )
1940
- . captures ( false )
2066
+ . which_captures ( WhichCaptures :: None )
1941
2067
. reverse ( true )
1942
2068
. shrink ( false )
1943
2069
. unanchored_prefix ( false ) ,
@@ -1965,7 +2091,11 @@ mod tests {
1965
2091
#[ test]
1966
2092
fn compile_many_start_pattern ( ) {
1967
2093
let nfa = NFA :: compiler ( )
1968
- . configure ( NFA :: config ( ) . captures ( false ) . unanchored_prefix ( false ) )
2094
+ . configure (
2095
+ NFA :: config ( )
2096
+ . which_captures ( WhichCaptures :: None )
2097
+ . unanchored_prefix ( false ) ,
2098
+ )
1969
2099
. build_many ( & [ "a" , "b" ] )
1970
2100
. unwrap ( ) ;
1971
2101
assert_eq ! (
@@ -1993,7 +2123,9 @@ mod tests {
1993
2123
use regex_syntax:: hir:: { Class , ClassBytes , Hir } ;
1994
2124
1995
2125
let hir = Hir :: class ( Class :: Bytes ( ClassBytes :: new ( vec ! [ ] ) ) ) ;
1996
- let config = NFA :: config ( ) . captures ( false ) . unanchored_prefix ( false ) ;
2126
+ let config = NFA :: config ( )
2127
+ . which_captures ( WhichCaptures :: None )
2128
+ . unanchored_prefix ( false ) ;
1997
2129
let nfa =
1998
2130
NFA :: compiler ( ) . configure ( config) . build_from_hir ( & hir) . unwrap ( ) ;
1999
2131
assert_eq ! ( nfa. states( ) , & [ s_fail( ) , s_match( 0 ) ] ) ;
@@ -2005,7 +2137,9 @@ mod tests {
2005
2137
use regex_syntax:: hir:: { Class , ClassUnicode , Hir } ;
2006
2138
2007
2139
let hir = Hir :: class ( Class :: Unicode ( ClassUnicode :: new ( vec ! [ ] ) ) ) ;
2008
- let config = NFA :: config ( ) . captures ( false ) . unanchored_prefix ( false ) ;
2140
+ let config = NFA :: config ( )
2141
+ . which_captures ( WhichCaptures :: None )
2142
+ . unanchored_prefix ( false ) ;
2009
2143
let nfa =
2010
2144
NFA :: compiler ( ) . configure ( config) . build_from_hir ( & hir) . unwrap ( ) ;
2011
2145
assert_eq ! ( nfa. states( ) , & [ s_fail( ) , s_match( 0 ) ] ) ;
0 commit comments