@@ -16,6 +16,7 @@ use crate::{
16
16
strategy:: { self , Strategy } ,
17
17
wrappers,
18
18
} ,
19
+ nfa:: thompson:: WhichCaptures ,
19
20
util:: {
20
21
captures:: { Captures , GroupInfo } ,
21
22
iter,
@@ -2429,6 +2430,7 @@ pub struct Config {
2429
2430
utf8_empty : Option < bool > ,
2430
2431
autopre : Option < bool > ,
2431
2432
pre : Option < Option < Prefilter > > ,
2433
+ which_captures : Option < WhichCaptures > ,
2432
2434
nfa_size_limit : Option < Option < usize > > ,
2433
2435
onepass_size_limit : Option < Option < usize > > ,
2434
2436
hybrid_cache_capacity : Option < usize > ,
@@ -2619,6 +2621,75 @@ impl Config {
2619
2621
Config { pre : Some ( pre) , ..self }
2620
2622
}
2621
2623
2624
+ /// Configures what kinds of groups are compiled as "capturing" in the
2625
+ /// underlying regex engine.
2626
+ ///
2627
+ /// This is set to [`WhichCaptures::All`] by default. Callers may wish to
2628
+ /// use [`WhichCaptures::Implicit`] in cases where one wants avoid the
2629
+ /// overhead of capture states for explicit groups.
2630
+ ///
2631
+ /// Note that another approach to avoiding the overhead of capture groups
2632
+ /// is by using non-capturing groups in the regex pattern. That is,
2633
+ /// `(?:a)` instead of `(a)`. This option is useful when you can't control
2634
+ /// the concrete syntax but know that you don't need the underlying capture
2635
+ /// states. For example, using `WhichCaptures::Implicit` will behave as if
2636
+ /// all explicit capturing groups in the pattern were non-capturing.
2637
+ ///
2638
+ /// Setting this to `WhichCaptures::None` may result in an error when
2639
+ /// building a meta regex.
2640
+ ///
2641
+ /// # Example
2642
+ ///
2643
+ /// This example demonstrates how the results of capture groups can change
2644
+ /// based on this option. First we show the default (all capture groups in
2645
+ /// the pattern are capturing):
2646
+ ///
2647
+ /// ```
2648
+ /// use regex_automata::{meta::Regex, Match, Span};
2649
+ ///
2650
+ /// let re = Regex::new(r"foo([0-9]+)bar")?;
2651
+ /// let hay = "foo123bar";
2652
+ ///
2653
+ /// let mut caps = re.create_captures();
2654
+ /// re.captures(hay, &mut caps);
2655
+ /// assert_eq!(Some(Span::from(0..9)), caps.get_group(0));
2656
+ /// assert_eq!(Some(Span::from(3..6)), caps.get_group(1));
2657
+ ///
2658
+ /// Ok::<(), Box<dyn std::error::Error>>(())
2659
+ /// ```
2660
+ ///
2661
+ /// And now we show the behavior when we only include implicit capture
2662
+ /// groups. In this case, we can only find the overall match span, but the
2663
+ /// spans of any other explicit group don't exist because they are treated
2664
+ /// as non-capturing. (In effect, when `WhichCaptures::Implicit` is used,
2665
+ /// there is no real point in using [`Regex::captures`] since it will never
2666
+ /// be able to report more information than [`Regex::find`].)
2667
+ ///
2668
+ /// ```
2669
+ /// use regex_automata::{
2670
+ /// meta::Regex,
2671
+ /// nfa::thompson::WhichCaptures,
2672
+ /// Match,
2673
+ /// Span,
2674
+ /// };
2675
+ ///
2676
+ /// let re = Regex::builder()
2677
+ /// .configure(Regex::config().which_captures(WhichCaptures::Implicit))
2678
+ /// .build(r"foo([0-9]+)bar")?;
2679
+ /// let hay = "foo123bar";
2680
+ ///
2681
+ /// let mut caps = re.create_captures();
2682
+ /// re.captures(hay, &mut caps);
2683
+ /// assert_eq!(Some(Span::from(0..9)), caps.get_group(0));
2684
+ /// assert_eq!(None, caps.get_group(1));
2685
+ ///
2686
+ /// Ok::<(), Box<dyn std::error::Error>>(())
2687
+ /// ```
2688
+ pub fn which_captures ( mut self , which_captures : WhichCaptures ) -> Config {
2689
+ self . which_captures = Some ( which_captures) ;
2690
+ self
2691
+ }
2692
+
2622
2693
/// Sets the size limit, in bytes, to enforce on the construction of every
2623
2694
/// NFA build by the meta regex engine.
2624
2695
///
@@ -2983,6 +3054,14 @@ impl Config {
2983
3054
self . pre . as_ref ( ) . unwrap_or ( & None ) . as_ref ( )
2984
3055
}
2985
3056
3057
+ /// Returns the capture configuration, as set by
3058
+ /// [`Config::which_captures`].
3059
+ ///
3060
+ /// If it was not explicitly set, then a default value is returned.
3061
+ pub fn get_which_captures ( & self ) -> WhichCaptures {
3062
+ self . which_captures . unwrap_or ( WhichCaptures :: All )
3063
+ }
3064
+
2986
3065
/// Returns NFA size limit, as set by [`Config::nfa_size_limit`].
2987
3066
///
2988
3067
/// If it was not explicitly set, then a default value is returned.
@@ -3126,6 +3205,7 @@ impl Config {
3126
3205
utf8_empty : o. utf8_empty . or ( self . utf8_empty ) ,
3127
3206
autopre : o. autopre . or ( self . autopre ) ,
3128
3207
pre : o. pre . or_else ( || self . pre . clone ( ) ) ,
3208
+ which_captures : o. which_captures . or ( self . which_captures ) ,
3129
3209
nfa_size_limit : o. nfa_size_limit . or ( self . nfa_size_limit ) ,
3130
3210
onepass_size_limit : o
3131
3211
. onepass_size_limit
0 commit comments