@@ -1884,6 +1884,39 @@ mod tests {
18841884 }
18851885 }
18861886
1887+ // ── Byte-level integrity helpers ────────────────────────────────
1888+ //
1889+ // Every SMB copy test that lands a file on a destination hashes the
1890+ // source bytes and the destination bytes and compares the two. A
1891+ // pipeline bug that drops, duplicates, reorders, or reuses a chunk's
1892+ // buffer will change the hash — the old `bytes_written == expected`
1893+ // and `metadata.size == N` assertions would silently pass. blake3 is
1894+ // fast (well over a GB/s single-threaded), so the 20 MB streaming
1895+ // tests pay negligible hashing cost on top of the SMB RTTs.
1896+ //
1897+ // `hash_volume_file` streams the destination through `open_read_stream`
1898+ // so we also avoid buffering e.g. 20 MB into a `Vec<u8>` just to
1899+ // compare with `assert_eq!` (which on mismatch used to print an
1900+ // unreadable megabyte-sized diff). The hex-formatted hash in the
1901+ // assertion message is actionable on failure.
1902+
1903+ fn hash_bytes ( data : & [ u8 ] ) -> [ u8 ; 32 ] {
1904+ * blake3:: hash ( data) . as_bytes ( )
1905+ }
1906+
1907+ async fn hash_volume_file ( volume : & dyn Volume , path : & Path ) -> [ u8 ; 32 ] {
1908+ let mut stream = volume
1909+ . open_read_stream ( path)
1910+ . await
1911+ . expect ( "open read stream for hashing" ) ;
1912+ let mut hasher = blake3:: Hasher :: new ( ) ;
1913+ while let Some ( chunk) = stream. next_chunk ( ) . await {
1914+ let chunk = chunk. expect ( "read chunk for hashing" ) ;
1915+ hasher. update ( & chunk) ;
1916+ }
1917+ * hasher. finalize ( ) . as_bytes ( )
1918+ }
1919+
18871920 #[ tokio:: test]
18881921 #[ ignore = "Requires Docker SMB containers (./apps/desktop/test/smb-servers/start.sh)" ]
18891922 async fn smb_integration_list_directory ( ) {
@@ -1918,6 +1951,16 @@ mod tests {
19181951 assert_eq ! ( meta. size, Some ( content. len( ) as u64 ) ) ;
19191952 assert ! ( !meta. is_directory) ;
19201953
1954+ // Byte-level integrity: read the destination back and compare bytes.
1955+ // Catches any pipeline bug that lets metadata say "N bytes" while the
1956+ // wire payload is something other than the source.
1957+ let mut readback_stream = vol. open_read_stream ( Path :: new ( & file_path) ) . await . unwrap ( ) ;
1958+ let mut readback = Vec :: new ( ) ;
1959+ while let Some ( Ok ( chunk) ) = readback_stream. next_chunk ( ) . await {
1960+ readback. extend_from_slice ( & chunk) ;
1961+ }
1962+ assert_eq ! ( readback, content, "destination bytes must match source bytes" ) ;
1963+
19211964 // List the directory and verify the file is there
19221965 let entries = vol. list_directory_impl ( Path :: new ( & dir) ) . await . unwrap ( ) ;
19231966 assert_eq ! ( entries. len( ) , 1 ) ;
@@ -2084,6 +2127,17 @@ mod tests {
20842127 let meta = vol. get_metadata ( Path :: new ( & smb_file) ) . await . unwrap ( ) ;
20852128 assert_eq ! ( meta. size, Some ( content. len( ) as u64 ) ) ;
20862129
2130+ // Byte-level integrity: the bytes that landed on the SMB share must
2131+ // be the same bytes the source stream produced. A bug in the write
2132+ // pipeline (wrong chunk reused, compound-write fast-path mis-splitting
2133+ // the buffer) would leave size correct but content wrong.
2134+ let mut verify = vol. open_read_stream ( Path :: new ( & smb_file) ) . await . unwrap ( ) ;
2135+ let mut readback = Vec :: new ( ) ;
2136+ while let Some ( Ok ( chunk) ) = verify. next_chunk ( ) . await {
2137+ readback. extend_from_slice ( & chunk) ;
2138+ }
2139+ assert_eq ! ( readback, content, "SMB destination bytes must match source bytes" ) ;
2140+
20872141 vol. delete ( Path :: new ( & smb_file) ) . await . unwrap ( ) ;
20882142 vol. delete ( Path :: new ( & dir) ) . await . unwrap ( ) ;
20892143 }
@@ -2500,6 +2554,20 @@ mod tests {
25002554 ) ;
25012555 assert_eq ! ( last_bytes. load( Ordering :: Relaxed ) , 200_000 ) ;
25022556
2557+ // Byte-level integrity: a progress-reporting write that loses or
2558+ // duplicates chunks would still satisfy the "progress_calls >= 1
2559+ // and final bytes_done == 200_000" assertions — hash the destination
2560+ // against the source to catch that.
2561+ let mut verify = vol
2562+ . open_read_stream ( Path :: new ( & format ! ( "{}/big.bin" , dir) ) )
2563+ . await
2564+ . unwrap ( ) ;
2565+ let mut readback = Vec :: with_capacity ( 200_000 ) ;
2566+ while let Some ( Ok ( chunk) ) = verify. next_chunk ( ) . await {
2567+ readback. extend_from_slice ( & chunk) ;
2568+ }
2569+ assert_eq ! ( readback, data, "destination bytes must match source bytes" ) ;
2570+
25032571 ensure_clean ( & vol, & dir) . await ;
25042572 }
25052573
@@ -2599,18 +2667,30 @@ mod tests {
25992667 let smb_path = format ! ( "{}/big-stream.bin" , dir) ;
26002668 vol. create_file ( Path :: new ( & smb_path) , & data) . await . unwrap ( ) ;
26012669
2670+ // Hash chunks as they arrive so a 20 MB mismatch produces a single
2671+ // 32-byte hex pair instead of a 20 MB `Vec<u8>` diff. Also avoids
2672+ // the 20 MB reassembly allocation.
26022673 let mut stream = vol. open_read_stream ( Path :: new ( & smb_path) ) . await . unwrap ( ) ;
26032674 assert_eq ! ( stream. total_size( ) , size as u64 ) ;
26042675
2605- let mut reassembled = Vec :: with_capacity ( size ) ;
2676+ let mut hasher = blake3 :: Hasher :: new ( ) ;
26062677 let mut chunks_seen = 0usize ;
2678+ let mut total_read = 0usize ;
26072679 while let Some ( result) = stream. next_chunk ( ) . await {
26082680 let chunk = result. unwrap ( ) ;
26092681 assert ! ( !chunk. is_empty( ) , "should not yield empty chunks" ) ;
2610- reassembled. extend_from_slice ( & chunk) ;
2682+ hasher. update ( & chunk) ;
2683+ total_read += chunk. len ( ) ;
26112684 chunks_seen += 1 ;
26122685 }
2613- assert_eq ! ( reassembled, data) ;
2686+ assert_eq ! ( total_read, size, "total bytes streamed must equal source size" ) ;
2687+ let readback_hash = * hasher. finalize ( ) . as_bytes ( ) ;
2688+ let expected_hash = hash_bytes ( & data) ;
2689+ assert_eq ! (
2690+ readback_hash, expected_hash,
2691+ "streamed bytes must match source (expected blake3 {:x?}, got {:x?})" ,
2692+ expected_hash, readback_hash
2693+ ) ;
26142694 assert_eq ! ( stream. bytes_read( ) , size as u64 ) ;
26152695 assert ! ( chunks_seen >= 2 , "multi-MB file should span multiple chunks" ) ;
26162696
@@ -2636,20 +2716,31 @@ mod tests {
26362716 let smb_path = format ! ( "{}/export-large.bin" , dir) ;
26372717 vol. create_file ( Path :: new ( & smb_path) , & data) . await . unwrap ( ) ;
26382718
2719+ // Hash chunks as they arrive — see the sibling large-file test for
2720+ // why we avoid `assert_eq!` on 20 MB `Vec<u8>`s.
26392721 let mut stream = vol. open_read_stream ( Path :: new ( & smb_path) ) . await . unwrap ( ) ;
26402722 assert_eq ! ( stream. total_size( ) , size as u64 ) ;
26412723
26422724 let mut chunks_seen = 0usize ;
2643- let mut readback: Vec < u8 > = Vec :: with_capacity ( size) ;
2725+ let mut hasher = blake3:: Hasher :: new ( ) ;
2726+ let mut total_read = 0usize ;
26442727 while let Some ( Ok ( chunk) ) = stream. next_chunk ( ) . await {
26452728 chunks_seen += 1 ;
2646- readback. extend_from_slice ( & chunk) ;
2729+ hasher. update ( & chunk) ;
2730+ total_read += chunk. len ( ) ;
26472731 }
26482732 assert ! (
26492733 chunks_seen >= 2 ,
26502734 "streaming should yield multiple chunks for a multi-MB file"
26512735 ) ;
2652- assert_eq ! ( readback, data) ;
2736+ assert_eq ! ( total_read, size, "total bytes streamed must equal source size" ) ;
2737+ let readback_hash = * hasher. finalize ( ) . as_bytes ( ) ;
2738+ let expected_hash = hash_bytes ( & data) ;
2739+ assert_eq ! (
2740+ readback_hash, expected_hash,
2741+ "streamed bytes must match source (expected blake3 {:x?}, got {:x?})" ,
2742+ expected_hash, readback_hash
2743+ ) ;
26532744
26542745 ensure_clean ( & vol, & dir) . await ;
26552746 }
@@ -2728,14 +2819,17 @@ mod tests {
27282819 ) ;
27292820 assert_eq ! ( last_bytes. load( Ordering :: Relaxed ) , size as u64 ) ;
27302821
2731- // Verify content integrity via the streaming reader.
2732- let mut stream = vol. open_read_stream ( Path :: new ( & smb_path) ) . await . unwrap ( ) ;
2733- assert_eq ! ( stream. total_size( ) , size as u64 ) ;
2734- let mut readback = Vec :: with_capacity ( size) ;
2735- while let Some ( Ok ( chunk) ) = stream. next_chunk ( ) . await {
2736- readback. extend_from_slice ( & chunk) ;
2737- }
2738- assert_eq ! ( readback, data) ;
2822+ // Byte-level integrity: hash the source and the destination and
2823+ // compare. Streaming hash avoids materializing a 4 MB `Vec<u8>`
2824+ // just to `assert_eq!` it, and on mismatch we get a legible hex
2825+ // dump instead of a multi-megabyte diff.
2826+ let expected_hash = hash_bytes ( & data) ;
2827+ let actual_hash = hash_volume_file ( & vol as & dyn Volume , Path :: new ( & smb_path) ) . await ;
2828+ assert_eq ! (
2829+ actual_hash, expected_hash,
2830+ "SMB destination bytes must match source (expected blake3 {:x?}, got {:x?})" ,
2831+ expected_hash, actual_hash
2832+ ) ;
27392833
27402834 let _ = std:: fs:: remove_dir_all ( & local_tmp) ;
27412835 ensure_clean ( & vol, & dir) . await ;
@@ -2782,14 +2876,17 @@ mod tests {
27822876 ) ;
27832877 assert_eq ! ( last_bytes. load( Ordering :: Relaxed ) , size as u64 ) ;
27842878
2785- // Verify content integrity via the streaming reader.
2786- let mut verify = vol. open_read_stream ( Path :: new ( & smb_path) ) . await . unwrap ( ) ;
2787- assert_eq ! ( verify. total_size( ) , size as u64 ) ;
2788- let mut readback = Vec :: with_capacity ( size) ;
2789- while let Some ( Ok ( chunk) ) = verify. next_chunk ( ) . await {
2790- readback. extend_from_slice ( & chunk) ;
2791- }
2792- assert_eq ! ( readback, data) ;
2879+ // Byte-level integrity: streaming hash over the destination catches
2880+ // any chunk drop/duplicate/reuse that "bytes_written == expected"
2881+ // on its own can't see. See the sibling local-source test for the
2882+ // rationale on hashing vs. `assert_eq!` on a 4 MB buffer.
2883+ let expected_hash = hash_bytes ( & data) ;
2884+ let actual_hash = hash_volume_file ( & vol as & dyn Volume , Path :: new ( & smb_path) ) . await ;
2885+ assert_eq ! (
2886+ actual_hash, expected_hash,
2887+ "SMB destination bytes must match source (expected blake3 {:x?}, got {:x?})" ,
2888+ expected_hash, actual_hash
2889+ ) ;
27932890
27942891 ensure_clean ( & vol, & dir) . await ;
27952892 }
0 commit comments