Skip to content

Commit

Permalink
fix(bytes)!: Ensure take_while_m_n handles UTF-8 correctly
Browse files Browse the repository at this point in the history
Fixes #93

BREAKING CHANGE: `take_while_m_n` now errors if `n < m`
  • Loading branch information
epage committed Feb 7, 2023
1 parent dd4951e commit 888a6a0
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 68 deletions.
53 changes: 20 additions & 33 deletions src/bytes/complete.rs
Original file line number Diff line number Diff line change
Expand Up @@ -385,44 +385,31 @@ where
I: Input,
T: ContainsToken<<I as Input>::Token>,
{
match input.offset_for(|c| !list.contains_token(c)) {
Some(idx) => {
if idx >= m {
if idx <= n {
let res: IResult<_, _, Error> = if let Ok(index) = input.offset_at(idx) {
Ok(input.next_slice(index))
} else {
Err(ErrMode::from_error_kind(input, ErrorKind::TakeWhileMN))
};
res
} else {
let res: IResult<_, _, Error> = if let Ok(index) = input.offset_at(n) {
Ok(input.next_slice(index))
} else {
Err(ErrMode::from_error_kind(input, ErrorKind::TakeWhileMN))
};
res
}
if n < m {
return Err(ErrMode::from_error_kind(input, ErrorKind::TakeWhileMN));
}

let mut final_count = 0;
for (processed, (offset, token)) in input.iter_offsets().enumerate() {
if !list.contains_token(token) {
if processed < m {
return Err(ErrMode::from_error_kind(input, ErrorKind::TakeWhileMN));
} else {
let e = ErrorKind::TakeWhileMN;
Err(ErrMode::from_error_kind(input, e))
return Ok(input.next_slice(offset));
}
}
None => {
let len = input.input_len();
if len >= n {
match input.offset_at(n) {
Ok(index) => Ok(input.next_slice(index)),
Err(_needed) => Err(ErrMode::from_error_kind(input, ErrorKind::TakeWhileMN)),
}
} else if len >= m && len <= n {
Ok(input.next_slice(len))
} else {
let e = ErrorKind::TakeWhileMN;
Err(ErrMode::from_error_kind(input, e))
} else {
if processed == n {
return Ok(input.next_slice(offset));
}
final_count = processed + 1;
}
}

if m <= final_count {
Ok(input.next_slice(input.input_len()))
} else {
Err(ErrMode::from_error_kind(input, ErrorKind::TakeWhileMN))
}
}

/// Returns the longest input slice (if any) till a predicate is met.
Expand Down
60 changes: 26 additions & 34 deletions src/bytes/streaming.rs
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,7 @@ where
}

pub(crate) fn take_while_m_n_internal<T, I, Error: ParseError<I>>(
i: I,
input: I,
m: usize,
n: usize,
list: &T,
Expand All @@ -422,44 +422,36 @@ where
I: Input,
T: ContainsToken<<I as Input>::Token>,
{
let input = i;

match input.offset_for(|c| !list.contains_token(c)) {
Some(idx) => {
if idx >= m {
if idx <= n {
let res: IResult<_, _, Error> = if let Ok(index) = input.offset_at(idx) {
Ok(input.next_slice(index))
} else {
Err(ErrMode::from_error_kind(input, ErrorKind::TakeWhileMN))
};
res
} else {
let res: IResult<_, _, Error> = if let Ok(index) = input.offset_at(n) {
Ok(input.next_slice(index))
} else {
Err(ErrMode::from_error_kind(input, ErrorKind::TakeWhileMN))
};
res
}
if n < m {
return Err(ErrMode::from_error_kind(input, ErrorKind::TakeWhileMN));
}

let mut final_count = 0;
for (processed, (offset, token)) in input.iter_offsets().enumerate() {
if !list.contains_token(token) {
if processed < m {
return Err(ErrMode::from_error_kind(input, ErrorKind::TakeWhileMN));
} else {
let e = ErrorKind::TakeWhileMN;
Err(ErrMode::from_error_kind(input, e))
return Ok(input.next_slice(offset));
}
}
None => {
let len = input.input_len();
if len >= n {
match input.offset_at(n) {
Ok(index) => Ok(input.next_slice(index)),
Err(_needed) => Err(ErrMode::from_error_kind(input, ErrorKind::TakeWhileMN)),
}
} else {
let needed = if m > len { m - len } else { 1 };
Err(ErrMode::Incomplete(Needed::new(needed)))
} else {
if processed == n {
return Ok(input.next_slice(offset));
}
final_count = processed + 1;
}
}

if final_count == n {
Ok(input.next_slice(input.input_len()))
} else {
let needed = if m > input.input_len() {
m - input.input_len()
} else {
1
};
Err(ErrMode::Incomplete(Needed::new(needed)))
}
}

/// Returns the longest input slice (if any) till a predicate is met.
Expand Down
40 changes: 39 additions & 1 deletion src/bytes/tests.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
use super::*;

#[cfg(feature = "std")]
use proptest::prelude::*;

use crate::bytes::tag;
use crate::error::ErrMode;
use crate::error::Error;
Expand All @@ -24,8 +27,43 @@ fn complete_take_while_m_n_utf8_all_matching_substring() {
assert_eq!(result, Ok(("n", "ø")));
}

#[cfg(feature = "std")]
fn model_complete_take_while_m_n(
m: usize,
n: usize,
valid: usize,
input: &str,
) -> IResult<&str, &str> {
if n < m {
Err(crate::error::ErrMode::from_error_kind(
input,
crate::error::ErrorKind::TakeWhileMN,
))
} else if m <= valid {
let offset = n.min(valid);
Ok((&input[offset..], &input[0..offset]))
} else {
Err(crate::error::ErrMode::from_error_kind(
input,
crate::error::ErrorKind::TakeWhileMN,
))
}
}

#[cfg(feature = "std")]
proptest! {
#[test]
fn complete_take_while_m_n_bounds(m in 0..20usize, n in 0..20usize, valid in 0..20usize, invalid in 0..20usize) {
let input = format!("{:a<valid$}{:b<invalid$}", "", "", valid=valid, invalid=invalid);
let expected = model_complete_take_while_m_n(m, n, valid, &input);
let actual = take_while_m_n(m, n, |c: char| c == 'a')(input.as_str());
assert_eq!(expected, actual);
}
}

#[test]
fn streaming_any_str() {
use super::any;
assert_eq!(
any::<_, Error<Streaming<&str>>, true>(Streaming("Ә")),
Ok((Streaming(""), 'Ә'))
Expand Down Expand Up @@ -446,7 +484,7 @@ fn streaming_take_while_m_n_utf8_range() {
take_while_m_n(1, 2, |c| c == 'A' || c == '😃')(i)
}
assert_eq!(parser(Streaming("A!")), Ok((Streaming("!"), "A")));
assert_eq!(parser(Streaming("😃!")), Ok((Streaming(""), "😃!")));
assert_eq!(parser(Streaming("😃!")), Ok((Streaming("!"), "😃")));
}

#[test]
Expand Down

0 comments on commit 888a6a0

Please sign in to comment.