Skip to content

Commit fd5a2d8

Browse files
committed
Tests: Byte-level hash checks on every SMB copy test
- Added `hash_bytes` / `hash_volume_file` helpers in `smb.rs`'s test mod. Streaming hash so the 20 MB tests don't reassemble a multi-megabyte `Vec<u8>` just to fail `assert_eq!` with an unreadable diff. - `smb_integration_create_and_read_file`, `smb_integration_write_from_stream_single_file`, `smb_integration_write_from_stream_with_progress` previously only verified metadata size or the `bytes_written` return value. All three now read the destination back and compare bytes — a pipeline bug that drops/duplicates/reuses a chunk would have slipped past the old assertions. - Large-file tests (`open_read_stream_large_file_spans_many_chunks`, `read_stream_large_file_multi_chunk`, `write_from_stream_local_source_large_file`, `write_from_stream_streams_large_file`) replaced their `assert_eq!(readback, data)` on 4–20 MB `Vec<u8>`s with streaming blake3 comparisons that also print a 32-byte hex pair on mismatch instead of a useless megabyte-sized diff. - `blake3 = "1.8.3"` added as a dev-dep (published 2026-01-08; Cargo resolved to 1.8.4 from 2026-03-30, which is fine — blake3 is a well-audited hash crate with no network surface in this use). Motivation: the SMB write/read pipeline (compound fast-path, streaming fallback, channel-backed reader) is the place where pipeline-corruption bugs can land silently — wrong buffer reused, chunk dropped, concurrency bleed. Tests that only assert `bytes_written == expected` and `metadata.size == N` would let every one of those through. blake3 pinned at the spec's hot path means we'll catch the next one on the first red run instead of seeing it in a user's file.
1 parent c336dbb commit fd5a2d8

3 files changed

Lines changed: 152 additions & 22 deletions

File tree

Cargo.lock

Lines changed: 27 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

apps/desktop/src-tauri/Cargo.toml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,12 @@ semver = "1"
148148

149149

150150
[dev-dependencies]
151+
# SMB integration tests hash source and destination bytes to catch pipeline
152+
# corruption (dropped/duplicated chunks, concurrency bleed) that a simple
153+
# size-check or `Vec<u8>` equality would either miss or report uselessly
154+
# (a 20 MB diff isn't actionable). Pinned about a month old (published
155+
# 2026-01-08) to avoid 0-day vulns in the freshest release.
156+
blake3 = "1.8.3"
151157
criterion = { version = "0.8.1", features = ["html_reports"] }
152158
# Dev-only env-driven logger. Tests don't use `tauri-plugin-log` (it requires
153159
# a Tauri runtime), so this gives us `RUST_LOG`-controlled logs for wire

apps/desktop/src-tauri/src/file_system/volume/smb.rs

Lines changed: 119 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1884,6 +1884,39 @@ mod tests {
18841884
}
18851885
}
18861886

1887+
// ── Byte-level integrity helpers ────────────────────────────────
1888+
//
1889+
// Every SMB copy test that lands a file on a destination hashes the
1890+
// source bytes and the destination bytes and compares the two. A
1891+
// pipeline bug that drops, duplicates, reorders, or reuses a chunk's
1892+
// buffer will change the hash — the old `bytes_written == expected`
1893+
// and `metadata.size == N` assertions would silently pass. blake3 is
1894+
// fast (well over a GB/s single-threaded), so the 20 MB streaming
1895+
// tests pay negligible hashing cost on top of the SMB RTTs.
1896+
//
1897+
// `hash_volume_file` streams the destination through `open_read_stream`
1898+
// so we also avoid buffering e.g. 20 MB into a `Vec<u8>` just to
1899+
// compare with `assert_eq!` (which on mismatch used to print an
1900+
// unreadable megabyte-sized diff). The hex-formatted hash in the
1901+
// assertion message is actionable on failure.
1902+
1903+
fn hash_bytes(data: &[u8]) -> [u8; 32] {
1904+
*blake3::hash(data).as_bytes()
1905+
}
1906+
1907+
async fn hash_volume_file(volume: &dyn Volume, path: &Path) -> [u8; 32] {
1908+
let mut stream = volume
1909+
.open_read_stream(path)
1910+
.await
1911+
.expect("open read stream for hashing");
1912+
let mut hasher = blake3::Hasher::new();
1913+
while let Some(chunk) = stream.next_chunk().await {
1914+
let chunk = chunk.expect("read chunk for hashing");
1915+
hasher.update(&chunk);
1916+
}
1917+
*hasher.finalize().as_bytes()
1918+
}
1919+
18871920
#[tokio::test]
18881921
#[ignore = "Requires Docker SMB containers (./apps/desktop/test/smb-servers/start.sh)"]
18891922
async fn smb_integration_list_directory() {
@@ -1918,6 +1951,16 @@ mod tests {
19181951
assert_eq!(meta.size, Some(content.len() as u64));
19191952
assert!(!meta.is_directory);
19201953

1954+
// Byte-level integrity: read the destination back and compare bytes.
1955+
// Catches any pipeline bug that lets metadata say "N bytes" while the
1956+
// wire payload is something other than the source.
1957+
let mut readback_stream = vol.open_read_stream(Path::new(&file_path)).await.unwrap();
1958+
let mut readback = Vec::new();
1959+
while let Some(Ok(chunk)) = readback_stream.next_chunk().await {
1960+
readback.extend_from_slice(&chunk);
1961+
}
1962+
assert_eq!(readback, content, "destination bytes must match source bytes");
1963+
19211964
// List the directory and verify the file is there
19221965
let entries = vol.list_directory_impl(Path::new(&dir)).await.unwrap();
19231966
assert_eq!(entries.len(), 1);
@@ -2084,6 +2127,17 @@ mod tests {
20842127
let meta = vol.get_metadata(Path::new(&smb_file)).await.unwrap();
20852128
assert_eq!(meta.size, Some(content.len() as u64));
20862129

2130+
// Byte-level integrity: the bytes that landed on the SMB share must
2131+
// be the same bytes the source stream produced. A bug in the write
2132+
// pipeline (wrong chunk reused, compound-write fast-path mis-splitting
2133+
// the buffer) would leave size correct but content wrong.
2134+
let mut verify = vol.open_read_stream(Path::new(&smb_file)).await.unwrap();
2135+
let mut readback = Vec::new();
2136+
while let Some(Ok(chunk)) = verify.next_chunk().await {
2137+
readback.extend_from_slice(&chunk);
2138+
}
2139+
assert_eq!(readback, content, "SMB destination bytes must match source bytes");
2140+
20872141
vol.delete(Path::new(&smb_file)).await.unwrap();
20882142
vol.delete(Path::new(&dir)).await.unwrap();
20892143
}
@@ -2500,6 +2554,20 @@ mod tests {
25002554
);
25012555
assert_eq!(last_bytes.load(Ordering::Relaxed), 200_000);
25022556

2557+
// Byte-level integrity: a progress-reporting write that loses or
2558+
// duplicates chunks would still satisfy the "progress_calls >= 1
2559+
// and final bytes_done == 200_000" assertions — hash the destination
2560+
// against the source to catch that.
2561+
let mut verify = vol
2562+
.open_read_stream(Path::new(&format!("{}/big.bin", dir)))
2563+
.await
2564+
.unwrap();
2565+
let mut readback = Vec::with_capacity(200_000);
2566+
while let Some(Ok(chunk)) = verify.next_chunk().await {
2567+
readback.extend_from_slice(&chunk);
2568+
}
2569+
assert_eq!(readback, data, "destination bytes must match source bytes");
2570+
25032571
ensure_clean(&vol, &dir).await;
25042572
}
25052573

@@ -2599,18 +2667,30 @@ mod tests {
25992667
let smb_path = format!("{}/big-stream.bin", dir);
26002668
vol.create_file(Path::new(&smb_path), &data).await.unwrap();
26012669

2670+
// Hash chunks as they arrive so a 20 MB mismatch produces a single
2671+
// 32-byte hex pair instead of a 20 MB `Vec<u8>` diff. Also avoids
2672+
// the 20 MB reassembly allocation.
26022673
let mut stream = vol.open_read_stream(Path::new(&smb_path)).await.unwrap();
26032674
assert_eq!(stream.total_size(), size as u64);
26042675

2605-
let mut reassembled = Vec::with_capacity(size);
2676+
let mut hasher = blake3::Hasher::new();
26062677
let mut chunks_seen = 0usize;
2678+
let mut total_read = 0usize;
26072679
while let Some(result) = stream.next_chunk().await {
26082680
let chunk = result.unwrap();
26092681
assert!(!chunk.is_empty(), "should not yield empty chunks");
2610-
reassembled.extend_from_slice(&chunk);
2682+
hasher.update(&chunk);
2683+
total_read += chunk.len();
26112684
chunks_seen += 1;
26122685
}
2613-
assert_eq!(reassembled, data);
2686+
assert_eq!(total_read, size, "total bytes streamed must equal source size");
2687+
let readback_hash = *hasher.finalize().as_bytes();
2688+
let expected_hash = hash_bytes(&data);
2689+
assert_eq!(
2690+
readback_hash, expected_hash,
2691+
"streamed bytes must match source (expected blake3 {:x?}, got {:x?})",
2692+
expected_hash, readback_hash
2693+
);
26142694
assert_eq!(stream.bytes_read(), size as u64);
26152695
assert!(chunks_seen >= 2, "multi-MB file should span multiple chunks");
26162696

@@ -2636,20 +2716,31 @@ mod tests {
26362716
let smb_path = format!("{}/export-large.bin", dir);
26372717
vol.create_file(Path::new(&smb_path), &data).await.unwrap();
26382718

2719+
// Hash chunks as they arrive — see the sibling large-file test for
2720+
// why we avoid `assert_eq!` on 20 MB `Vec<u8>`s.
26392721
let mut stream = vol.open_read_stream(Path::new(&smb_path)).await.unwrap();
26402722
assert_eq!(stream.total_size(), size as u64);
26412723

26422724
let mut chunks_seen = 0usize;
2643-
let mut readback: Vec<u8> = Vec::with_capacity(size);
2725+
let mut hasher = blake3::Hasher::new();
2726+
let mut total_read = 0usize;
26442727
while let Some(Ok(chunk)) = stream.next_chunk().await {
26452728
chunks_seen += 1;
2646-
readback.extend_from_slice(&chunk);
2729+
hasher.update(&chunk);
2730+
total_read += chunk.len();
26472731
}
26482732
assert!(
26492733
chunks_seen >= 2,
26502734
"streaming should yield multiple chunks for a multi-MB file"
26512735
);
2652-
assert_eq!(readback, data);
2736+
assert_eq!(total_read, size, "total bytes streamed must equal source size");
2737+
let readback_hash = *hasher.finalize().as_bytes();
2738+
let expected_hash = hash_bytes(&data);
2739+
assert_eq!(
2740+
readback_hash, expected_hash,
2741+
"streamed bytes must match source (expected blake3 {:x?}, got {:x?})",
2742+
expected_hash, readback_hash
2743+
);
26532744

26542745
ensure_clean(&vol, &dir).await;
26552746
}
@@ -2728,14 +2819,17 @@ mod tests {
27282819
);
27292820
assert_eq!(last_bytes.load(Ordering::Relaxed), size as u64);
27302821

2731-
// Verify content integrity via the streaming reader.
2732-
let mut stream = vol.open_read_stream(Path::new(&smb_path)).await.unwrap();
2733-
assert_eq!(stream.total_size(), size as u64);
2734-
let mut readback = Vec::with_capacity(size);
2735-
while let Some(Ok(chunk)) = stream.next_chunk().await {
2736-
readback.extend_from_slice(&chunk);
2737-
}
2738-
assert_eq!(readback, data);
2822+
// Byte-level integrity: hash the source and the destination and
2823+
// compare. Streaming hash avoids materializing a 4 MB `Vec<u8>`
2824+
// just to `assert_eq!` it, and on mismatch we get a legible hex
2825+
// dump instead of a multi-megabyte diff.
2826+
let expected_hash = hash_bytes(&data);
2827+
let actual_hash = hash_volume_file(&vol as &dyn Volume, Path::new(&smb_path)).await;
2828+
assert_eq!(
2829+
actual_hash, expected_hash,
2830+
"SMB destination bytes must match source (expected blake3 {:x?}, got {:x?})",
2831+
expected_hash, actual_hash
2832+
);
27392833

27402834
let _ = std::fs::remove_dir_all(&local_tmp);
27412835
ensure_clean(&vol, &dir).await;
@@ -2782,14 +2876,17 @@ mod tests {
27822876
);
27832877
assert_eq!(last_bytes.load(Ordering::Relaxed), size as u64);
27842878

2785-
// Verify content integrity via the streaming reader.
2786-
let mut verify = vol.open_read_stream(Path::new(&smb_path)).await.unwrap();
2787-
assert_eq!(verify.total_size(), size as u64);
2788-
let mut readback = Vec::with_capacity(size);
2789-
while let Some(Ok(chunk)) = verify.next_chunk().await {
2790-
readback.extend_from_slice(&chunk);
2791-
}
2792-
assert_eq!(readback, data);
2879+
// Byte-level integrity: streaming hash over the destination catches
2880+
// any chunk drop/duplicate/reuse that "bytes_written == expected"
2881+
// on its own can't see. See the sibling local-source test for the
2882+
// rationale on hashing vs. `assert_eq!` on a 4 MB buffer.
2883+
let expected_hash = hash_bytes(&data);
2884+
let actual_hash = hash_volume_file(&vol as &dyn Volume, Path::new(&smb_path)).await;
2885+
assert_eq!(
2886+
actual_hash, expected_hash,
2887+
"SMB destination bytes must match source (expected blake3 {:x?}, got {:x?})",
2888+
expected_hash, actual_hash
2889+
);
27932890

27942891
ensure_clean(&vol, &dir).await;
27952892
}

0 commit comments

Comments
 (0)