Skip to content

Commit 67fca6c

Browse files
marmelademaBurntSushi
authored andcommitted
capi: add rure_escape_must function
This commit exposes two new functions in regex's C API: rure_escape_must and rure_cstring_free. These permit escaping a pattern such that it contains no special regex meta characters. Currently, we only expose a routine that will abort the process if it fails, but we document the precise error conditions. A more flexible but less convenient routine should ideally be exposed in the future, but that needs a bit more API design than what's here. Closes rust-lang#537
1 parent 525b18b commit 67fca6c

File tree

5 files changed

+113
-4
lines changed

5 files changed

+113
-4
lines changed

regex-capi/ctest/test.c

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -531,6 +531,31 @@ bool test_regex_set_options() {
531531
return passed;
532532
}
533533

534+
bool test_escape() {
535+
bool passed = true;
536+
537+
const char *pattern = "^[a-z]+.*$";
538+
const char *expected_escaped = "\\^\\[a\\-z\\]\\+\\.\\*\\$";
539+
540+
const char *escaped = rure_escape_must(pattern);
541+
if (!escaped) {
542+
if (DEBUG) {
543+
fprintf(stderr,
544+
"[test_captures] expected escaped, but got no escaped\n");
545+
}
546+
passed = false;
547+
} else if (strcmp(escaped, expected_escaped) != 0) {
548+
if (DEBUG) {
549+
fprintf(stderr,
550+
"[test_captures] expected \"%s\", but got \"%s\"\n",
551+
expected_escaped, escaped);
552+
}
553+
passed = false;
554+
}
555+
rure_cstring_free((char *) escaped);
556+
return passed;
557+
}
558+
534559
void run_test(bool (test)(), const char *name, bool *passed) {
535560
if (!test()) {
536561
*passed = false;
@@ -557,6 +582,7 @@ int main() {
557582
run_test(test_regex_set_options, "test_regex_set_options", &passed);
558583
run_test(test_regex_set_match_start, "test_regex_set_match_start",
559584
&passed);
585+
run_test(test_escape, "test_escape", &passed);
560586

561587
if (!passed) {
562588
exit(1);

regex-capi/include/rure.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -567,6 +567,29 @@ void rure_error_free(rure_error *err);
567567
*/
568568
const char *rure_error_message(rure_error *err);
569569

570+
/*
571+
* rure_escape_must returns a NUL terminated string where all meta characters
572+
* have been escaped. If escaping fails for any reason, an error message is
573+
* printed to stderr and the process is aborted.
574+
*
575+
* The pattern given should be in UTF-8. For convenience, this accepts a C
576+
* string, which means the pattern cannot contain a NUL byte. These correspond
577+
* to the only two failure conditions of this function. That is, if the caller
578+
* guarantees that the given pattern is valid UTF-8 and does not contain a
579+
* NUL byte, then this is guaranteed to succeed (modulo out-of-memory errors).
580+
*
581+
* The pointer returned must not be freed directly. Instead, it should be freed
582+
* by calling rure_cstring_free.
583+
*/
584+
const char *rure_escape_must(const char *pattern);
585+
586+
/*
587+
* rure_cstring_free frees the string given.
588+
*
589+
* This must be called at most once per string.
590+
*/
591+
void rure_cstring_free(char *s);
592+
570593
#ifdef __cplusplus
571594
}
572595
#endif

regex-capi/src/error.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use ::std::ffi;
12
use ::std::ffi::CString;
23
use ::std::fmt;
34
use ::std::str;
@@ -16,6 +17,7 @@ pub enum ErrorKind {
1617
None,
1718
Str(str::Utf8Error),
1819
Regex(regex::Error),
20+
Nul(ffi::NulError),
1921
}
2022

2123
impl Error {
@@ -29,7 +31,7 @@ impl Error {
2931
pub fn is_err(&self) -> bool {
3032
match self.kind {
3133
ErrorKind::None => false,
32-
ErrorKind::Str(_) | ErrorKind::Regex(_) => true,
34+
ErrorKind::Str(_) | ErrorKind::Regex(_) | ErrorKind::Nul(_) => true,
3335
}
3436
}
3537
}
@@ -40,6 +42,7 @@ impl fmt::Display for Error {
4042
ErrorKind::None => write!(f, "no error"),
4143
ErrorKind::Str(ref e) => e.fmt(f),
4244
ErrorKind::Regex(ref e) => e.fmt(f),
45+
ErrorKind::Nul(ref e) => e.fmt(f),
4346
}
4447
}
4548
}

regex-capi/src/macros.rs

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
21
macro_rules! ffi_fn {
32
(fn $name:ident($($arg:ident: $arg_ty:ty),*,) -> $ret:ty $body:block) => {
43
ffi_fn!(fn $name($($arg: $arg_ty),*) -> $ret $body);
@@ -35,5 +34,3 @@ macro_rules! ffi_fn {
3534
ffi_fn!(fn $name($($arg: $arg_ty),*) -> () $body);
3635
};
3736
}
38-
39-

regex-capi/src/rure.rs

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -570,3 +570,63 @@ ffi_fn! {
570570
unsafe { (*re).len() }
571571
}
572572
}
573+
574+
ffi_fn! {
575+
fn rure_escape_must(pattern: *const c_char) -> *const c_char {
576+
let len = unsafe { CStr::from_ptr(pattern).to_bytes().len() };
577+
let pat = pattern as *const u8;
578+
let mut err = Error::new(ErrorKind::None);
579+
let esc = rure_escape(pat, len, &mut err);
580+
if err.is_err() {
581+
let _ = writeln!(&mut io::stderr(), "{}", err);
582+
let _ = writeln!(
583+
&mut io::stderr(), "aborting from rure_escape_must");
584+
unsafe { abort() }
585+
}
586+
esc
587+
}
588+
}
589+
590+
/// A helper function that implements fallible escaping in a way that returns
591+
/// an error if escaping failed.
592+
///
593+
/// This should ideally be exposed, but it needs API design work. In
594+
/// particular, this should not return a C string, but a `const uint8_t *`
595+
/// instead, since it may contain a NUL byte.
596+
fn rure_escape(
597+
pattern: *const u8,
598+
length: size_t,
599+
error: *mut Error
600+
) -> *const c_char {
601+
let pat: &[u8] = unsafe { slice::from_raw_parts(pattern, length) };
602+
let str_pat = match str::from_utf8(pat) {
603+
Ok(val) => val,
604+
Err(err) => {
605+
unsafe {
606+
if !error.is_null() {
607+
*error = Error::new(ErrorKind::Str(err));
608+
}
609+
return ptr::null();
610+
}
611+
}
612+
};
613+
let esc_pat = regex::escape(str_pat);
614+
let c_esc_pat = match CString::new(esc_pat) {
615+
Ok(val) => val,
616+
Err(err) => {
617+
unsafe {
618+
if !error.is_null() {
619+
*error = Error::new(ErrorKind::Nul(err));
620+
}
621+
return ptr::null();
622+
}
623+
}
624+
};
625+
c_esc_pat.into_raw() as *const c_char
626+
}
627+
628+
ffi_fn! {
629+
fn rure_cstring_free(s: *mut c_char) {
630+
unsafe { CString::from_raw(s); }
631+
}
632+
}

0 commit comments

Comments
 (0)