Skip to content

Commit 8beff76

Browse files
committed
Adjust rure to use internal regex methods where needed
1 parent 01644e0 commit 8beff76

File tree

2 files changed

+43
-16
lines changed

2 files changed

+43
-16
lines changed

regex-capi/src/rure.rs

Lines changed: 42 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ use ::error::{Error, ErrorKind};
22

33
use ::regex::bytes;
44
use ::regex::internal::{Exec, ExecBuilder, RegexOptions};
5+
use ::regex::internal::RegularExpression;
56
use ::libc::{c_char, size_t};
67

78
use ::std::collections::HashMap;
@@ -22,8 +23,11 @@ pub struct Options {
2223
dfa_size_limit: usize,
2324
}
2425

26+
// The `RegexSet` is not exposed with option support or matching at an
27+
// arbitrary position with a crate just yet. To circumvent this, we use
28+
// the `Exec` structure directly.
2529
pub struct RegexSet {
26-
re: bytes::RegexSet,
30+
re: Exec,
2731
pattern_count: usize
2832
}
2933

@@ -61,8 +65,8 @@ impl Deref for Regex {
6165
}
6266

6367
impl Deref for RegexSet {
64-
type Target = bytes::RegexSet;
65-
fn deref(&self) -> &bytes::RegexSet { &self.re }
68+
type Target = Exec;
69+
fn deref(&self) -> &Exec { &self.re }
6670
}
6771

6872
impl Default for Options {
@@ -497,19 +501,42 @@ ffi_fn! {
497501
});
498502
}
499503

500-
match bytes::RegexSet::new(&pats) {
501-
Ok(re) => {
502-
let pat_count = re.len();
504+
// Start with a default set and override values if present.
505+
let mut opts = RegexOptions::default();
506+
let pat_count = pats.len();
507+
opts.pats = pats.into_iter().map(|s| s.to_owned()).collect();
508+
509+
if !options.is_null() {
510+
let options = unsafe { &*options };
511+
opts.size_limit = options.size_limit;
512+
opts.dfa_size_limit = options.dfa_size_limit;
513+
}
514+
515+
opts.case_insensitive = flags & RURE_FLAG_CASEI > 0;
516+
opts.multi_line = flags & RURE_FLAG_MULTI > 0;
517+
opts.dot_matches_new_line = flags & RURE_FLAG_DOTNL > 0;
518+
opts.swap_greed = flags & RURE_FLAG_SWAP_GREED > 0;
519+
opts.ignore_whitespace = flags & RURE_FLAG_SPACE > 0;
520+
opts.unicode = flags & RURE_FLAG_UNICODE > 0;
521+
522+
// `Exec` does not expose a `new` function with appropriate arguments
523+
// so we construct directly.
524+
let builder = ExecBuilder::new_options(opts)
525+
.bytes(true)
526+
.only_utf8(false);
527+
528+
match builder.build() {
529+
Ok(ex) => {
503530
let re = RegexSet {
504-
re: re,
531+
re: ex,
505532
pattern_count: pat_count
506533
};
507534
Box::into_raw(Box::new(re))
508535
}
509536
Err(err) => {
510537
unsafe {
511538
if !error.is_null() {
512-
*error = Error::new(ErrorKind::Regex(err));
539+
*error = Error::new(ErrorKind::Regex(err))
513540
}
514541
ptr::null()
515542
}
@@ -533,7 +560,7 @@ ffi_fn! {
533560
) -> bool {
534561
let re = unsafe { &*re };
535562
let haystack = unsafe { slice::from_raw_parts(haystack, len) };
536-
re.is_match(haystack)
563+
re.searcher().is_match_at(haystack, start)
537564
}
538565
}
539566

@@ -546,23 +573,22 @@ ffi_fn! {
546573
matches: *mut bool
547574
) -> bool {
548575
let re = unsafe { &*re };
549-
let mut results = unsafe {
576+
let mut matches = unsafe {
550577
slice::from_raw_parts_mut(matches, re.pattern_count)
551578
};
552579
let haystack = unsafe { slice::from_raw_parts(haystack, len) };
553-
let matches = re.matches(haystack);
554580

555-
for i in 0..re.pattern_count {
556-
results[i] = matches.matched(i);
581+
// many_matches_at isn't guaranteed to set non-matches to false
582+
for item in matches.iter_mut() {
583+
*item = false;
557584
}
558585

559-
matches.matched_any()
586+
re.searcher().many_matches_at(&mut matches, haystack, start)
560587
}
561588
}
562589

563590
ffi_fn! {
564591
fn rure_set_len(re: *const RegexSet) -> size_t {
565-
let re = unsafe { &*re };
566-
re.len()
592+
unsafe { (*re).pattern_count }
567593
}
568594
}

src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -603,5 +603,6 @@ pub mod internal {
603603
pub use prog::{Program, Inst, EmptyLook, InstRanges};
604604
pub use re_plugin::Plugin;
605605
pub use re_unicode::_Regex;
606+
pub use re_trait::RegularExpression;
606607
pub use re_builder::RegexOptions;
607608
}

0 commit comments

Comments
 (0)