diff --git a/src/lib.rs b/src/lib.rs index d4ce110..af06840 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -22,7 +22,7 @@ use std::io::{IsTerminal as _, Read}; use std::path::Path; use uucore::error::{FromIo, UResult, USimpleError}; -#[derive(Clone, Copy, PartialEq, Eq, Debug)] +#[derive(Clone, Copy, PartialEq, Eq)] #[doc(hidden)] pub enum RegexMode { Fixed, diff --git a/src/line_buffer.rs b/src/line_buffer.rs index 51ee4da..54e7057 100644 --- a/src/line_buffer.rs +++ b/src/line_buffer.rs @@ -3,7 +3,7 @@ // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -use memchr::{memchr, memrchr}; +use memchr::memchr; use std::fs::File; use std::io::{self, Read as _}; @@ -111,193 +111,4 @@ impl LineBuffer { self.end += n; } } - - /// Read the next run of *complete* lines as a single slice. - /// - /// Returns `Ok(None)` at end of input. Otherwise returns `Ok(Some((chunk, - /// chunk_start)))`, where `chunk` spans one or more whole lines (each ending - /// in the terminator) and `chunk_start` is the absolute byte offset of the - /// first byte of the chunk. The only exception is a final line lacking a - /// terminator, which is returned on its own as the last chunk. - /// - /// This hands back as much buffered data as ends on a line boundary, so a - /// caller can scan many lines with one pass instead of line by line. - pub fn read_chunk(&mut self, file: &mut File) -> io::Result> { - loop { - // Hand back everything up to and including the last terminator. - if self.end > self.beg - && let Some(off) = memrchr(self.line_terminator, &self.buffer[self.beg..self.end]) - { - let beg = self.beg; - let lim = self.beg + off + 1; - let chunk_start = self.next_line_start; - self.next_line_start += (lim - beg) as u64; - self.beg = lim; - self.scan = lim; - return Ok(Some((&self.buffer[beg..lim], chunk_start))); - } - - // No whole line buffered. At EOF, flush any unterminated remainder. - if self.eof { - if self.beg == self.end { - return Ok(None); - } - let beg = self.beg; - let chunk_start = self.next_line_start; - self.next_line_start += (self.end - beg) as u64; - self.beg = self.end; - self.scan = self.end; - return Ok(Some((&self.buffer[beg..self.end], chunk_start))); - } - - // Slide the partial tail to the front to maximize room for reading. - if self.beg > 0 { - self.buffer.copy_within(self.beg..self.end, 0); - self.end -= self.beg; - self.beg = 0; - self.scan = 0; - } - if self.end == self.buffer.len() { - // A single line is longer than the whole buffer; grow it. - self.buffer.resize(self.buffer.len() * 2, 0); - } - - let n = loop { - match file.read(&mut self.buffer[self.end..]) { - Ok(n) => break n, - Err(e) if e.kind() == io::ErrorKind::Interrupted => {} - Err(e) => return Err(e), - } - }; - if n == 0 { - self.eof = true; - } else { - self.end += n; - } - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - use std::io::{Seek as _, SeekFrom, Write as _}; - use std::sync::atomic::{AtomicU32, Ordering}; - - static COUNTER: AtomicU32 = AtomicU32::new(0); - - /// A temp file pre-loaded with `content`, rewound to the start, and removed - /// from disk when dropped. - struct TempInput { - file: File, - path: std::path::PathBuf, - } - - impl Drop for TempInput { - fn drop(&mut self) { - let _ = std::fs::remove_file(&self.path); - } - } - - fn temp_input(content: &[u8]) -> TempInput { - let mut path = std::env::temp_dir(); - let n = COUNTER.fetch_add(1, Ordering::Relaxed); - path.push(format!("uu_grep_lb_{}_{n}.tmp", std::process::id())); - let mut file = std::fs::OpenOptions::new() - .read(true) - .write(true) - .create(true) - .truncate(true) - .open(&path) - .unwrap(); - file.write_all(content).unwrap(); - file.seek(SeekFrom::Start(0)).unwrap(); - TempInput { file, path } - } - - /// Drain `read_chunk` into a list of (owned bytes, start offset) pairs. - fn chunks(term: u8, content: &[u8]) -> Vec<(Vec, u64)> { - let mut lb = LineBuffer::new(term); - let mut input = temp_input(content); - let mut out = Vec::new(); - while let Some((chunk, start)) = lb.read_chunk(&mut input.file).unwrap() { - out.push((chunk.to_vec(), start)); - } - out - } - - #[test] - fn empty_input_yields_nothing() { - assert!(chunks(b'\n', b"").is_empty()); - } - - #[test] - fn whole_complete_lines_come_back_as_one_chunk() { - // Small input arrives in a single read, so everything up to the final - // terminator is one chunk starting at offset 0. - assert_eq!( - chunks(b'\n', b"a\nbb\nccc\n"), - vec![(b"a\nbb\nccc\n".to_vec(), 0)] - ); - } - - #[test] - fn unterminated_tail_is_a_final_chunk_with_its_own_offset() { - // "a\n" is the complete-line chunk; "bb" is flushed at EOF at offset 2. - assert_eq!( - chunks(b'\n', b"a\nbb"), - vec![(b"a\n".to_vec(), 0), (b"bb".to_vec(), 2)] - ); - } - - #[test] - fn input_without_any_terminator_is_one_chunk() { - assert_eq!(chunks(b'\n', b"abc"), vec![(b"abc".to_vec(), 0)]); - } - - #[test] - fn honors_a_custom_terminator() { - assert_eq!( - chunks(b'\0', b"a\0bb\0c"), - vec![(b"a\0bb\0".to_vec(), 0), (b"c".to_vec(), 5)] - ); - } - - #[test] - fn reassembles_input_larger_than_the_buffer() { - // Force many reads and at least one chunk boundary mid-file. - let mut content = Vec::new(); - for i in 0..50_000u32 { - content.extend_from_slice(format!("line number {i}\n").as_bytes()); - } - assert!(content.len() > 128 * 1024); - - let got = chunks(b'\n', &content); - assert!(got.len() > 1, "expected multiple chunks, got {}", got.len()); - - // Chunks must tile the input exactly, contiguously, each ending on a - // line boundary (the input ends with a terminator). - let mut expected_start = 0u64; - let mut joined = Vec::new(); - for (bytes, start) in &got { - assert_eq!(*start, expected_start); - assert_eq!(*bytes.last().unwrap(), b'\n'); - expected_start += bytes.len() as u64; - joined.extend_from_slice(bytes); - } - assert_eq!(joined, content); - } - - #[test] - fn grows_to_hold_a_single_overlong_line() { - // One line far bigger than the initial 128 KiB buffer, then a short one. - let mut content = vec![b'x'; 300 * 1024]; - content.push(b'\n'); - content.extend_from_slice(b"tail\n"); - - let got = chunks(b'\n', &content); - let joined: Vec = got.iter().flat_map(|(b, _)| b.clone()).collect(); - assert_eq!(joined, content); - assert_eq!(got[0].1, 0); - } } diff --git a/src/matcher.rs b/src/matcher.rs index 6d72b69..49f2a27 100644 --- a/src/matcher.rs +++ b/src/matcher.rs @@ -4,7 +4,6 @@ // file that was distributed with this source code. use crate::{Config, RegexMode}; -use memchr::memmem; use onig::{ EncodedBytes, Regex, RegexOptions, Region, SearchOptions, Syntax, SyntaxBehavior, SyntaxOperator, @@ -15,12 +14,6 @@ use uucore::error::{UResult, USimpleError}; pub struct Matcher<'a> { config: &'a Config<'a>, patterns: Vec, - /// One substring searcher per pattern, present only when *every* pattern is - /// a plain literal that a raw byte search resolves exactly (see - /// [`plain_literal`]). When set, a caller can decide a line matches by - /// looking for any of these needles, bypassing the regex engine entirely. - /// `None` as soon as a single pattern needs real regex evaluation. - literal_searchers: Option>>, } impl<'a> Matcher<'a> { @@ -29,32 +22,7 @@ impl<'a> Matcher<'a> { for raw in config.patterns { patterns.push(CompiledPattern::compile(raw, config)?); } - - // If we can reduce the whole pattern set to literal needles, keep a - // searcher for each so the driver can take a bulk substring-scan path. - let needles: Option>> = config - .patterns - .iter() - .map(|p| plain_literal(p, config.ignore_case, config.regex_mode)) - .collect(); - let literal_searchers = needles.filter(|n| !n.is_empty()).map(|n| { - n.iter() - .map(|w| memmem::Finder::new(w).into_owned()) - .collect() - }); - - Ok(Self { - config, - patterns, - literal_searchers, - }) - } - - /// Per-pattern substring searchers, present only when the pattern set is a - /// pure set of literals (no regex needed). Used by the searcher to scan a - /// whole buffer at once instead of testing line by line. - pub fn literal_searchers(&self) -> Option<&[memmem::Finder<'static>]> { - self.literal_searchers.as_deref() + Ok(Self { config, patterns }) } /// Decide whether `line` matches and return the positions to highlight. @@ -229,25 +197,6 @@ impl Cursor<'_> { } } -/// Return the literal bytes of `pattern` when a raw byte-for-byte substring -/// search is *exactly* equivalent to matching it, otherwise `None`. -/// -/// We accept only ASCII, case-sensitive needles. That keeps the byte search in -/// agreement with the regex engine on every possible input, including bytes that -/// are not valid UTF-8: an ASCII byte can never be part of a multi-byte sequence, -/// so its presence is unambiguous. In the regex modes we also require that no -/// byte could ever act as a metacharacter; under `-F` the text is literal as-is. -fn plain_literal(pattern: &str, ignore_case: bool, mode: RegexMode) -> Option> { - if ignore_case || pattern.is_empty() || !pattern.is_ascii() { - return None; - } - // Every byte that carries special meaning in any of our regex syntaxes. - // A needle without these reads the same as a literal in Basic/Extended/Perl. - const SPECIAL: &[u8] = b".*[]^$\\+?{}()|"; - let plain = mode == RegexMode::Fixed || !pattern.bytes().any(|b| SPECIAL.contains(&b)); - plain.then(|| pattern.as_bytes().to_vec()) -} - struct CompiledPattern { /// Default semantics. It's decently fast and used for searching. leftmost: Regex, @@ -343,49 +292,3 @@ impl CompiledPattern { .is_some() } } - -#[cfg(test)] -mod tests { - use super::plain_literal; - use crate::RegexMode; - - fn lit(p: &str, ic: bool, mode: RegexMode) -> Option> { - plain_literal(p, ic, mode) - } - - #[test] - fn fixed_mode_takes_any_ascii_verbatim() { - // Under -F every byte is literal, even regex metacharacters. - assert_eq!(lit("abc", false, RegexMode::Fixed), Some(b"abc".to_vec())); - assert_eq!(lit("a.*b", false, RegexMode::Fixed), Some(b"a.*b".to_vec())); - assert_eq!(lit("a+b", false, RegexMode::Fixed), Some(b"a+b".to_vec())); - } - - #[test] - fn regex_modes_accept_metacharacter_free_literals() { - for mode in [RegexMode::Basic, RegexMode::Extended, RegexMode::Perl] { - assert_eq!(lit("ing", false, mode), Some(b"ing".to_vec())); - assert_eq!(lit("Hello123", false, mode), Some(b"Hello123".to_vec())); - } - } - - #[test] - fn regex_modes_reject_anything_with_a_metacharacter() { - for mode in [RegexMode::Basic, RegexMode::Extended, RegexMode::Perl] { - for p in [ - "a.b", "a*", "[ab]", "^a", "a$", "a\\b", "a+", "a?", "(a)", "a|b", "a{2}", - ] { - assert_eq!(lit(p, false, mode), None, "pattern {p:?} in {mode:?}"); - } - } - } - - #[test] - fn rejects_empty_case_insensitive_and_non_ascii() { - assert_eq!(lit("", false, RegexMode::Fixed), None); - assert_eq!(lit("abc", true, RegexMode::Fixed), None); // -i - assert_eq!(lit("abc", true, RegexMode::Basic), None); - assert_eq!(lit("café", false, RegexMode::Fixed), None); // non-ASCII - assert_eq!(lit("naïve", false, RegexMode::Basic), None); - } -} diff --git a/src/searcher.rs b/src/searcher.rs index 68e2f97..a5efe01 100644 --- a/src/searcher.rs +++ b/src/searcher.rs @@ -8,8 +8,7 @@ use crate::line_buffer::LineBuffer; use crate::matcher::Matcher; use crate::output::OutputWriter; use crate::{BinaryMode, Config, DeviceMode, DirectoryMode}; -use memchr::memmem::Finder; -use memchr::{memchr, memchr_iter, memrchr}; +use memchr::memchr; use std::ffi::OsStr; use std::fs::File; use std::io; @@ -249,221 +248,12 @@ impl<'a> Searcher<'a> { self.binary_notice_enabled && self.session_binary_detected && self.session_any_match() } - /// Whether the current configuration can use the buffer-at-a-time fast - /// path. It applies only to pure-literal patterns and the simpler output - /// modes — anything needing match positions, context, inversion, or special - /// binary handling falls back to the line-at-a-time [`Self::session_run`]. - fn eligible_for_fast_path(&self) -> bool { - // On Windows the line-at-a-time path strips a trailing CR before - // matching; the fast path mirrors that only for printed output, so a - // literal needle still behaves the same. Nothing else differs. - self.matcher.literal_searchers().is_some() - && !self.config.invert_match - && !self.config.word_regexp - && !self.config.line_regexp - && !self.config.only_matching - && !self.config.use_color - // `has_context` also covers `-C 0`, which still emits `--` separators. - && !self.config.has_context - && !self.config.null_data - && self.config.binary_mode != BinaryMode::WithoutMatch - } - - /// Buffer-at-a-time driver for literal patterns. Instead of testing every - /// line, it scans whole chunks with a substring searcher and only locates - /// line boundaries around the matches it finds. - fn session_run_fast( - &mut self, - lb: &mut LineBuffer, - path: &Path, - reader: &mut File, - ) -> io::Result { - lb.reset(); - if self.config.quiet - || self.config.files_with_matches - || self.config.files_without_match - || self.config.count - { - self.fast_locate(lb, path, reader) - } else { - self.fast_print(lb, path, reader) - } - } - - /// Fast path for modes that only need to know *whether* / *how many* lines - /// match: `-c`, `-l`, `-L`, `-q`. No per-line rendering, so no line numbers, - /// byte offsets, or binary bookkeeping are required (the count of matching - /// lines is unaffected by binary detection, and `-l`/`-L`/`-q` list files - /// regardless). - fn fast_locate( - &mut self, - lb: &mut LineBuffer, - path: &Path, - reader: &mut File, - ) -> io::Result { - let finders = self - .matcher - .literal_searchers() - .expect("eligibility guarantees literal searchers"); - let max = self.config.max_count; - // Existence is enough for these three; only `-c` needs the full tally. - let stop_at_first = - self.config.quiet || self.config.files_with_matches || self.config.files_without_match; - - let mut count: u64 = 0; - let mut matched = false; - 'outer: while let Some((chunk, _)) = lb.read_chunk(reader)? { - let mut p = 0; - while p < chunk.len() { - let Some(rel) = leftmost_match(finders, &chunk[p..]) else { - break; - }; - if max.is_some_and(|mx| count >= mx) { - break 'outer; - } - let (_, line_end) = line_bounds(chunk, p + rel); - count += 1; - matched = true; - if stop_at_first { - break 'outer; - } - // Each line counts once: resume past this line's terminator. - p = line_end + 1; - } - } - - // `-l`/`-L` take precedence over `-c`, matching the line-at-a-time path. - if self.config.quiet { - // Exit status only. - } else if self.config.files_with_matches { - if matched { - self.writer.write_filename(path)?; - } - } else if self.config.files_without_match { - if !matched { - self.writer.write_filename(path)?; - } - } else if self.config.count { - self.writer.write_count(count, path)?; - } - Ok(matched) - } - - /// Fast path that prints whole matching lines (optionally with `-n`, `-b`, - /// filename prefixes, `-m`). Binary files are detected per chunk and reported - /// with the usual notice instead of dumping their lines. - fn fast_print( - &mut self, - lb: &mut LineBuffer, - path: &Path, - reader: &mut File, - ) -> io::Result { - let finders = self - .matcher - .literal_searchers() - .expect("eligibility guarantees literal searchers"); - let max = self.config.max_count; - let want_lineno = self.config.line_number; - let detect_binary = self.config.binary_mode != BinaryMode::Text; - let notice_enabled = self.binary_notice_enabled; - - let mut count: u64 = 0; - let mut matched = false; - let mut binary = false; - // Number of terminators in all previously consumed chunks (for `-n`). - let mut base_lines: u64 = 0; - - 'outer: while let Some((chunk, chunk_off)) = lb.read_chunk(reader)? { - let mut p = 0; - // NUL scanned up to here; terminators counted up to `nl_cursor`. - let mut nul_scanned = 0; - let mut nl_cursor = 0; - let mut nl_before = 0u64; - - while p < chunk.len() { - let Some(rel) = leftmost_match(finders, &chunk[p..]) else { - break; - }; - if max.is_some_and(|mx| count >= mx) { - break 'outer; - } - let (line_beg, line_end) = line_bounds(chunk, p + rel); - - // A NUL anywhere up to this line marks the file binary, as does - // an invalid-UTF-8 matching line. - if detect_binary && !binary { - if memchr(0, &chunk[nul_scanned..line_end]).is_some() { - binary = true; - } - nul_scanned = line_end; - } - - let line = &chunk[line_beg..line_end]; - #[cfg(windows)] - let line = if self.config.strip_cr && line.last() == Some(&b'\r') { - &line[..line.len() - 1] - } else { - line - }; - - if detect_binary && !binary && std::str::from_utf8(line).is_err() { - binary = true; - } - - if binary { - // First match in a binary file: stop and emit the notice - // once at the end instead of dumping the line. - matched = true; - break 'outer; - } - - let line_number = if want_lineno { - nl_before += count_terminators(&chunk[nl_cursor..line_beg]); - nl_cursor = line_beg; - base_lines + nl_before + 1 - } else { - 0 - }; - self.writer.write_line( - &LineView { - line, - line_number, - byte_offset: chunk_off + line_beg as u64, - is_match: true, - match_positions: &[], - }, - path, - )?; - count += 1; - matched = true; - p = line_end + 1; - } - - // Carry NUL detection and the line tally across the chunk boundary. - if detect_binary && !binary && memchr(0, &chunk[nul_scanned..]).is_some() { - binary = true; - } - if want_lineno { - base_lines += nl_before + count_terminators(&chunk[nl_cursor..]); - } - } - - if binary && notice_enabled && matched { - self.writer.report_binary_match(path); - } - Ok(matched) - } - fn session_run( &mut self, lb: &mut LineBuffer, path: &Path, reader: &mut File, ) -> io::Result { - if self.eligible_for_fast_path() { - return self.session_run_fast(lb, path, reader); - } - // Reset all session (per-file) state. self.session_context_buf.clear(); self.session_match_count = 0; @@ -682,31 +472,3 @@ impl<'a> Searcher<'a> { } } } - -/// Offset of the earliest occurrence of any needle in `hay`, or `None`. -fn leftmost_match(finders: &[Finder<'static>], hay: &[u8]) -> Option { - let mut best: Option = None; - for finder in finders { - if let Some(pos) = finder.find(hay) { - best = Some(best.map_or(pos, |b| b.min(pos))); - if best == Some(0) { - break; // Can't start any earlier. - } - } - } - best -} - -/// Count line terminators in `bytes`. -fn count_terminators(bytes: &[u8]) -> u64 { - memchr_iter(b'\n', bytes).count() as u64 -} - -/// Byte range `[start, end)` of the line containing `pos` in `buf`, excluding -/// the trailing terminator. `start` follows the previous terminator (or 0); -/// `end` is the next terminator (or end of buffer). -fn line_bounds(buf: &[u8], pos: usize) -> (usize, usize) { - let start = memrchr(b'\n', &buf[..pos]).map_or(0, |i| i + 1); - let end = memchr(b'\n', &buf[pos..]).map_or(buf.len(), |i| pos + i); - (start, end) -} diff --git a/tests/test_grep.rs b/tests/test_grep.rs index 20b3f83..9a235f7 100644 --- a/tests/test_grep.rs +++ b/tests/test_grep.rs @@ -1343,170 +1343,3 @@ fn repeated_options_are_accepted() { .succeeds() .stdout_only("a\nb\n"); } - -#[test] -fn literal_buffer_path_prefixes_and_max() { - // Plain literals are served by the buffer-at-a-time engine; the line/byte - // prefixes and -m must still be byte-identical to the line-at-a-time path. - - // -n and -b together: "lineno:byteoffset:line". - let (_s, mut c) = ucmd(); - c.args(&["-nb", "foo"]) - .pipe_in("foo\nbar\nfoobar\n") - .succeeds() - .stdout_only("1:0:foo\n3:8:foobar\n"); - - // A line matched more than once is still emitted once. - let (_s, mut c) = ucmd(); - c.args(&["-c", "oo"]) - .pipe_in("oooo\nbar\noo\n") - .succeeds() - .stdout_only("2\n"); - - // -m caps printed matches. - let (_s, mut c) = ucmd(); - c.args(&["-m", "2", "x"]) - .pipe_in("x\ny\nx\nz\nx\n") - .succeeds() - .stdout_only("x\nx\n"); - - // Final line without a trailing terminator still matches and is printed - // with an added newline. - let (_s, mut c) = ucmd(); - c.args(&["foo"]) - .pipe_in("bar\nfoo") - .succeeds() - .stdout_only("foo\n"); -} - -#[test] -fn literal_buffer_path_spans_many_chunks() { - // Build an input far larger than the read buffer so the buffer-at-a-time - // engine crosses several chunk boundaries, and check that line numbers and - // counts stay correct across them. - let mut input = String::new(); - let mut expected_n = String::new(); - let mut count = 0u32; - for i in 1..=100_000u32 { - if i % 7 == 0 { - input.push_str("needle\n"); - expected_n.push_str(&format!("{i}:needle\n")); - count += 1; - } else { - input.push_str("some filler text\n"); - } - } - assert!(input.len() > 512 * 1024, "input must exceed several chunks"); - - let (_s, mut c) = ucmd(); - c.args(&["-c", "needle"]) - .pipe_in(input.clone()) - .succeeds() - .stdout_only(format!("{count}\n")); - - let (_s, mut c) = ucmd(); - c.args(&["-n", "needle"]) - .pipe_in(input) - .succeeds() - .stdout_only(expected_n); -} - -// Plain literals run on the buffer-at-a-time fast path, so the following tests -// use bracket-class patterns (non-literal) to keep the line-at-a-time engine's -// `-l` / `-L` / `-q` and binary-handling paths exercised too. - -#[test] -fn slow_path_list_and_quiet_modes() { - let (scene, _) = ucmd(); - scene.fixtures.write("hit", "yes\n"); - scene.fixtures.write("miss", "no\n"); - - // -l: list matching files. - scene - .cmd(env!("CARGO_BIN_EXE_grep")) - .args(&["-l", "[y]es", "hit", "miss"]) - .succeeds() - .stdout_is("hit\n"); - - // -L with a match in one file: only the non-matching file is listed. - scene - .cmd(env!("CARGO_BIN_EXE_grep")) - .args(&["-L", "[y]es", "hit", "miss"]) - .succeeds() - .stdout_is("miss\n"); - - // -L with no match anywhere: both files listed, exit 1. - scene - .cmd(env!("CARGO_BIN_EXE_grep")) - .args(&["-L", "[z]z", "hit", "miss"]) - .fails_with_code(1) - .stdout_is("hit\nmiss\n"); - - // -q stops at the first match (exit 0) or reports no match (exit 1). - scene - .cmd(env!("CARGO_BIN_EXE_grep")) - .args(&["-q", "[y]es", "hit"]) - .succeeds() - .no_output(); - scene - .cmd(env!("CARGO_BIN_EXE_grep")) - .args(&["-q", "[z]z", "hit"]) - .fails_with_code(1) - .no_output(); -} - -#[test] -fn slow_path_binary_handling() { - let (scene, _) = ucmd(); - // NOTE: avoid the name "nul" here — it's a reserved device name on Windows, - // so writing/reading it hits the null device instead of a real file. - scene.fixtures.write_bytes("nulbin", b"hit\0\n"); - scene.fixtures.write_bytes("bad", b"a\x9d\n"); - - // Binary notice on the line-at-a-time engine (regex pattern). - scene - .cmd(env!("CARGO_BIN_EXE_grep")) - .args(&["[h]it", "nulbin"]) - .succeeds() - .no_stdout() - .stderr_contains("binary file matches"); - - // -a forces text mode: the NUL line is printed verbatim. - scene - .cmd(env!("CARGO_BIN_EXE_grep")) - .args(&["-a", "[h]it", "nulbin"]) - .succeeds() - .stdout_is_bytes(b"hit\0\n"); - - // --binary-files=without-match bails out on an invalid-UTF-8 match. - scene - .cmd(env!("CARGO_BIN_EXE_grep")) - .args(&["--binary-files=without-match", "[a]", "bad"]) - .fails_with_code(1) - .no_output(); - - // A NUL after the matched line means binariness is discovered at EOF, so - // the line is printed first and the notice is emitted during finalization. - scene.fixtures.write_bytes("late", b"hit\nno\0\n"); - scene - .cmd(env!("CARGO_BIN_EXE_grep")) - .args(&["[h]it", "late"]) - .succeeds() - .stdout_is("hit\n") - .stderr_contains("binary file matches"); -} - -#[test] -fn fast_path_binary_detected_after_a_printed_line() { - // A NUL that appears only after the last match in the buffer marks the file - // binary on the fast path *after* an earlier match was already printed: the - // printed line stays and the trailing notice is still emitted. - let (scene, _) = ucmd(); - scene.fixtures.write_bytes("b", b"hit\nno\0\n"); - scene - .cmd(env!("CARGO_BIN_EXE_grep")) - .args(&["hit", "b"]) - .succeeds() - .stdout_is("hit\n") - .stderr_contains("binary file matches"); -}