Skip to content

Commit 04b11b6

Browse files
committed
automata: add 'which_captures' knob to meta::Regex
This propagates the new Thompson NFA compiler option to the meta regex config API.
1 parent a2ec566 commit 04b11b6

File tree

2 files changed

+81
-1
lines changed

2 files changed

+81
-1
lines changed

regex-automata/src/meta/regex.rs

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ use crate::{
1616
strategy::{self, Strategy},
1717
wrappers,
1818
},
19+
nfa::thompson::WhichCaptures,
1920
util::{
2021
captures::{Captures, GroupInfo},
2122
iter,
@@ -2429,6 +2430,7 @@ pub struct Config {
24292430
utf8_empty: Option<bool>,
24302431
autopre: Option<bool>,
24312432
pre: Option<Option<Prefilter>>,
2433+
which_captures: Option<WhichCaptures>,
24322434
nfa_size_limit: Option<Option<usize>>,
24332435
onepass_size_limit: Option<Option<usize>>,
24342436
hybrid_cache_capacity: Option<usize>,
@@ -2619,6 +2621,75 @@ impl Config {
26192621
Config { pre: Some(pre), ..self }
26202622
}
26212623

2624+
/// Configures what kinds of groups are compiled as "capturing" in the
2625+
/// underlying regex engine.
2626+
///
2627+
/// This is set to [`WhichCaptures::All`] by default. Callers may wish to
2628+
/// use [`WhichCaptures::Implicit`] in cases where one wants avoid the
2629+
/// overhead of capture states for explicit groups.
2630+
///
2631+
/// Note that another approach to avoiding the overhead of capture groups
2632+
/// is by using non-capturing groups in the regex pattern. That is,
2633+
/// `(?:a)` instead of `(a)`. This option is useful when you can't control
2634+
/// the concrete syntax but know that you don't need the underlying capture
2635+
/// states. For example, using `WhichCaptures::Implicit` will behave as if
2636+
/// all explicit capturing groups in the pattern were non-capturing.
2637+
///
2638+
/// Setting this to `WhichCaptures::None` may result in an error when
2639+
/// building a meta regex.
2640+
///
2641+
/// # Example
2642+
///
2643+
/// This example demonstrates how the results of capture groups can change
2644+
/// based on this option. First we show the default (all capture groups in
2645+
/// the pattern are capturing):
2646+
///
2647+
/// ```
2648+
/// use regex_automata::{meta::Regex, Match, Span};
2649+
///
2650+
/// let re = Regex::new(r"foo([0-9]+)bar")?;
2651+
/// let hay = "foo123bar";
2652+
///
2653+
/// let mut caps = re.create_captures();
2654+
/// re.captures(hay, &mut caps);
2655+
/// assert_eq!(Some(Span::from(0..9)), caps.get_group(0));
2656+
/// assert_eq!(Some(Span::from(3..6)), caps.get_group(1));
2657+
///
2658+
/// Ok::<(), Box<dyn std::error::Error>>(())
2659+
/// ```
2660+
///
2661+
/// And now we show the behavior when we only include implicit capture
2662+
/// groups. In this case, we can only find the overall match span, but the
2663+
/// spans of any other explicit group don't exist because they are treated
2664+
/// as non-capturing. (In effect, when `WhichCaptures::Implicit` is used,
2665+
/// there is no real point in using [`Regex::captures`] since it will never
2666+
/// be able to report more information than [`Regex::find`].)
2667+
///
2668+
/// ```
2669+
/// use regex_automata::{
2670+
/// meta::Regex,
2671+
/// nfa::thompson::WhichCaptures,
2672+
/// Match,
2673+
/// Span,
2674+
/// };
2675+
///
2676+
/// let re = Regex::builder()
2677+
/// .configure(Regex::config().which_captures(WhichCaptures::Implicit))
2678+
/// .build(r"foo([0-9]+)bar")?;
2679+
/// let hay = "foo123bar";
2680+
///
2681+
/// let mut caps = re.create_captures();
2682+
/// re.captures(hay, &mut caps);
2683+
/// assert_eq!(Some(Span::from(0..9)), caps.get_group(0));
2684+
/// assert_eq!(None, caps.get_group(1));
2685+
///
2686+
/// Ok::<(), Box<dyn std::error::Error>>(())
2687+
/// ```
2688+
pub fn which_captures(mut self, which_captures: WhichCaptures) -> Config {
2689+
self.which_captures = Some(which_captures);
2690+
self
2691+
}
2692+
26222693
/// Sets the size limit, in bytes, to enforce on the construction of every
26232694
/// NFA build by the meta regex engine.
26242695
///
@@ -2983,6 +3054,14 @@ impl Config {
29833054
self.pre.as_ref().unwrap_or(&None).as_ref()
29843055
}
29853056

3057+
/// Returns the capture configuration, as set by
3058+
/// [`Config::which_captures`].
3059+
///
3060+
/// If it was not explicitly set, then a default value is returned.
3061+
pub fn get_which_captures(&self) -> WhichCaptures {
3062+
self.which_captures.unwrap_or(WhichCaptures::All)
3063+
}
3064+
29863065
/// Returns NFA size limit, as set by [`Config::nfa_size_limit`].
29873066
///
29883067
/// If it was not explicitly set, then a default value is returned.
@@ -3126,6 +3205,7 @@ impl Config {
31263205
utf8_empty: o.utf8_empty.or(self.utf8_empty),
31273206
autopre: o.autopre.or(self.autopre),
31283207
pre: o.pre.or_else(|| self.pre.clone()),
3208+
which_captures: o.which_captures.or(self.which_captures),
31293209
nfa_size_limit: o.nfa_size_limit.or(self.nfa_size_limit),
31303210
onepass_size_limit: o
31313211
.onepass_size_limit

regex-automata/src/meta/strategy.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -452,7 +452,7 @@ impl Core {
452452
.utf8(info.config().get_utf8_empty())
453453
.nfa_size_limit(info.config().get_nfa_size_limit())
454454
.shrink(false)
455-
.which_captures(WhichCaptures::All)
455+
.which_captures(info.config().get_which_captures())
456456
.look_matcher(lookm);
457457
let nfa = thompson::Compiler::new()
458458
.configure(thompson_config.clone())

0 commit comments

Comments
 (0)