From 1b3469ebc16981b135b854bbe891c0a67bb903ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dorian=20P=C3=A9ron?= Date: Thu, 7 May 2026 14:01:25 +0200 Subject: [PATCH] cksum: Introduce HashLength newtype to avoid byte/bit misuse --- src/uu/checksum_common/src/lib.rs | 6 +- src/uu/cksum/src/cksum.rs | 7 +- src/uucore/src/lib/features/checksum/mod.rs | 148 ++++++++++++------ .../src/lib/features/checksum/validate.rs | 53 ++++--- src/uucore/src/lib/features/sum.rs | 1 + 5 files changed, 136 insertions(+), 79 deletions(-) diff --git a/src/uu/checksum_common/src/lib.rs b/src/uu/checksum_common/src/lib.rs index 67b729778f0..629c86d435c 100644 --- a/src/uu/checksum_common/src/lib.rs +++ b/src/uu/checksum_common/src/lib.rs @@ -16,7 +16,7 @@ use uucore::checksum::compute::{ ChecksumComputeOptions, OutputFormat, perform_checksum_computation, }; use uucore::checksum::validate::{self, ChecksumValidateOptions, ChecksumVerbose}; -use uucore::checksum::{AlgoKind, ChecksumError, SizedAlgoKind}; +use uucore::checksum::{AlgoKind, ChecksumError, HashLength, SizedAlgoKind}; use uucore::error::UResult; use uucore::line_ending::LineEnding; use uucore::{crate_version, format_usage, localized_help_template, util_name}; @@ -64,7 +64,7 @@ pub fn standalone_with_length_main( algo: AlgoKind, cmd: Command, args: impl uucore::Args, - validate_len: fn(&str) -> UResult, + validate_len: fn(&str) -> UResult, ) -> UResult<()> { let matches = uucore::clap_localization::handle_clap_result(cmd, args)?; let algo = Some(algo); @@ -142,7 +142,7 @@ pub fn standalone_checksum_app(about: String, usage: String) -> Command { /// validation on arguments and proceeds in computing or checking mode. pub fn checksum_main( algo: Option, - length: Option, + length: Option, matches: ArgMatches, output_format: OutputFormat, ) -> UResult<()> { diff --git a/src/uu/cksum/src/cksum.rs b/src/uu/cksum/src/cksum.rs index b097be2c9c4..9af2f6f00ff 100644 --- a/src/uu/cksum/src/cksum.rs +++ b/src/uu/cksum/src/cksum.rs @@ -12,7 +12,8 @@ use uu_checksum_common::{ChecksumCommand, checksum_main, default_checksum_app, o use uucore::checksum::compute::OutputFormat; use uucore::checksum::{ - AlgoKind, BlakeLength, ChecksumError, parse_blake_length, sanitize_sha2_sha3_length_str, + AlgoKind, BlakeLength, ChecksumError, HashLength, parse_blake_length, + sanitize_sha2_sha3_length_str, }; use uucore::error::UResult; use uucore::hardware::{HasHardwareFeatures as _, SimdPolicy}; @@ -47,7 +48,7 @@ fn print_cpu_debug_info() { fn maybe_sanitize_length( algo_cli: Option, input_length: Option<&str>, -) -> UResult> { +) -> UResult> { match (algo_cli, input_length) { // No provided length is not a problem so far. (_, None) => Ok(None), @@ -63,7 +64,7 @@ fn maybe_sanitize_length( // will have its extra bits set to zero. (Some(AlgoKind::Shake128 | AlgoKind::Shake256), Some(len)) => match len.parse::() { Ok(0) => Ok(None), - Ok(l) => Ok(Some(l)), + Ok(l) => Ok(Some(HashLength::from_bits(l))), Err(_) => Err(ChecksumError::InvalidLength(len.into()).into()), }, diff --git a/src/uucore/src/lib/features/checksum/mod.rs b/src/uucore/src/lib/features/checksum/mod.rs index d11cf5d6130..45d4c8b5918 100644 --- a/src/uucore/src/lib/features/checksum/mod.rs +++ b/src/uucore/src/lib/features/checksum/mod.rs @@ -204,15 +204,15 @@ impl AlgoKind { /// When checking untagged format lines, non-XOF non-legacy algorithms /// should report "improperly formatted lines" if the digest length isn't /// equivalent to this. - pub fn expected_digest_bit_len(self) -> Option { + pub fn expected_digest_bit_len(self) -> Option { match self { - Self::Md5 => Some(Md5::BIT_SIZE), - Self::Sm3 => Some(Sm3::BIT_SIZE), - Self::Sha1 => Some(Sha1::BIT_SIZE), - Self::Sha224 => Some(Sha224::BIT_SIZE), - Self::Sha256 => Some(Sha256::BIT_SIZE), - Self::Sha384 => Some(Sha384::BIT_SIZE), - Self::Sha512 => Some(Sha512::BIT_SIZE), + Self::Md5 => Some(HashLength::from_bits(Md5::BIT_SIZE)), + Self::Sm3 => Some(HashLength::from_bits(Sm3::BIT_SIZE)), + Self::Sha1 => Some(HashLength::from_bits(Sha1::BIT_SIZE)), + Self::Sha224 => Some(HashLength::from_bits(Sha224::BIT_SIZE)), + Self::Sha256 => Some(HashLength::from_bits(Sha256::BIT_SIZE)), + Self::Sha384 => Some(HashLength::from_bits(Sha384::BIT_SIZE)), + Self::Sha512 => Some(HashLength::from_bits(Sha512::BIT_SIZE)), _ => None, } } @@ -253,6 +253,46 @@ impl TryFrom for ShaLength { } } +/// Stores a hash length in bits. +#[derive(Debug, Clone, Copy)] +pub struct HashLength { + bit_len: usize, +} + +impl HashLength { + #[must_use] + #[inline] + pub(crate) fn from_bytes(n: usize) -> Self { + Self { bit_len: n * 8 } + } + + #[must_use] + #[inline] + pub fn from_bits(n: usize) -> Self { + Self { bit_len: n } + } + + #[must_use] + #[inline] + pub(crate) fn as_bits(self) -> usize { + self.bit_len + } + + #[must_use] + #[inline] + pub(crate) fn as_bytes(self) -> usize { + self.bit_len.div_ceil(8) + } +} + +impl From for HashLength { + fn from(value: ShaLength) -> Self { + Self { + bit_len: value.as_usize(), + } + } +} + /// Represents an actual determined algorithm. #[derive(Debug, Clone, Copy)] pub enum SizedAlgoKind { @@ -265,16 +305,14 @@ pub enum SizedAlgoKind { Sha1, Sha2(ShaLength), Sha3(ShaLength), - // Note: we store Blake*'s length as BYTES. - Blake2b(usize), - Blake3(usize), - // Shake* length are stored in bits. - Shake128(Option), - Shake256(Option), + Blake2b(HashLength), + Blake3(HashLength), + Shake128(Option), + Shake256(Option), } impl SizedAlgoKind { - pub fn from_unsized(kind: AlgoKind, output_length: Option) -> UResult { + pub fn from_unsized(kind: AlgoKind, output_length: Option) -> UResult { use AlgoKind as ak; match (kind, output_length) { ( @@ -300,12 +338,16 @@ impl SizedAlgoKind { (ak::Sm3, _) => Ok(Self::Sm3), (ak::Sha1, _) => Ok(Self::Sha1), - (ak::Blake2b, l) => Ok(Self::Blake2b(l.unwrap_or(Blake2b::DEFAULT_BYTE_SIZE))), - (ak::Blake3, l) => Ok(Self::Blake3(l.unwrap_or(Blake3::DEFAULT_BYTE_SIZE))), + (ak::Blake2b, l) => Ok(Self::Blake2b( + l.unwrap_or(HashLength::from_bits(Blake2b::DEFAULT_BIT_SIZE)), + )), + (ak::Blake3, l) => Ok(Self::Blake3( + l.unwrap_or(HashLength::from_bits(Blake3::DEFAULT_BIT_SIZE)), + )), (ak::Shake128, l) => Ok(Self::Shake128(l)), (ak::Shake256, l) => Ok(Self::Shake256(l)), - (ak::Sha2, Some(l)) => Ok(Self::Sha2(ShaLength::try_from(l)?)), - (ak::Sha3, Some(l)) => Ok(Self::Sha3(ShaLength::try_from(l)?)), + (ak::Sha2, Some(l)) => Ok(Self::Sha2(ShaLength::try_from(l.as_bits())?)), + (ak::Sha3, Some(l)) => Ok(Self::Sha3(ShaLength::try_from(l.as_bits())?)), (algo @ (ak::Sha2 | ak::Sha3), None) => { Err(ChecksumError::LengthRequiredForSha(algo.to_lowercase().into()).into()) } @@ -323,16 +365,16 @@ impl SizedAlgoKind { Self::Sha1 => "SHA1".into(), Self::Sha2(len) => format!("SHA{}", len.as_usize()), Self::Sha3(len) => format!("SHA3-{}", len.as_usize()), - Self::Blake2b(Blake2b::DEFAULT_BYTE_SIZE) => "BLAKE2b".into(), - Self::Blake2b(byte_len) => format!("BLAKE2b-{}", byte_len * 8), - Self::Blake3(byte_len) => format!("BLAKE3-{}", byte_len * 8), - Self::Shake128(opt_bit_len) => format!( + Self::Blake2b(len) if len.as_bits() == Blake2b::DEFAULT_BIT_SIZE => "BLAKE2b".into(), + Self::Blake2b(len) => format!("BLAKE2b-{}", len.as_bits()), + Self::Blake3(len) => format!("BLAKE3-{}", len.as_bits()), + Self::Shake128(opt_len) => format!( "SHAKE128-{}", - opt_bit_len.unwrap_or(Shake128::DEFAULT_BIT_SIZE) + opt_len.map_or(Shake128::DEFAULT_BIT_SIZE, HashLength::as_bits) ), - Self::Shake256(opt_bit_len) => format!( + Self::Shake256(opt_len) => format!( "SHAKE256-{}", - opt_bit_len.unwrap_or(Shake256::DEFAULT_BIT_SIZE) + opt_len.map_or(Shake256::DEFAULT_BIT_SIZE, HashLength::as_bits) ), Self::Sysv | Self::Bsd | Self::Crc | Self::Crc32b => { panic!("Should not be used for tagging") @@ -358,14 +400,20 @@ impl SizedAlgoKind { Self::Sha3(Len256) => Box::new(Sha3_256::default()), Self::Sha3(Len384) => Box::new(Sha3_384::default()), Self::Sha3(Len512) => Box::new(Sha3_512::default()), - Self::Blake2b(len) => Box::new(Blake2b::with_output_bytes(*len)), - Self::Blake3(len) => Box::new(Blake3::with_output_bytes(*len)), - Self::Shake128(len_opt) => { - Box::new(len_opt.map(Shake128::with_output_bits).unwrap_or_default()) - } - Self::Shake256(len_opt) => { - Box::new(len_opt.map(Shake256::with_output_bits).unwrap_or_default()) - } + Self::Blake2b(len) => Box::new(Blake2b::with_output_bytes(len.as_bytes())), + Self::Blake3(len) => Box::new(Blake3::with_output_bytes(len.as_bytes())), + Self::Shake128(len_opt) => Box::new( + len_opt + .map(HashLength::as_bits) + .map(Shake128::with_output_bits) + .unwrap_or_default(), + ), + Self::Shake256(len_opt) => Box::new( + len_opt + .map(HashLength::as_bits) + .map(Shake256::with_output_bits) + .unwrap_or_default(), + ), } } @@ -380,10 +428,10 @@ impl SizedAlgoKind { Self::Sha1 => 160, Self::Sha2(len) => len.as_usize(), Self::Sha3(len) => len.as_usize(), - Self::Blake2b(len) => len * 8, - Self::Blake3(len) => len * 8, - Self::Shake128(len) => len.unwrap_or(Shake128::DEFAULT_BIT_SIZE), - Self::Shake256(len) => len.unwrap_or(Shake256::DEFAULT_BIT_SIZE), + Self::Blake2b(len) => len.as_bits(), + Self::Blake3(len) => len.as_bits(), + Self::Shake128(len) => len.map_or(Shake128::DEFAULT_BIT_SIZE, HashLength::as_bits), + Self::Shake256(len) => len.map_or(Shake256::DEFAULT_BIT_SIZE, HashLength::as_bits), } } pub fn is_legacy(&self) -> bool { @@ -506,7 +554,7 @@ pub enum BlakeLength<'s> { /// Note: when the input is a string, validation may print error messages. /// Note: when the algo is Blake2b, values that are above 512 /// (Blake2b::DEFAULT_BIT_SIZE) are errors. -pub fn parse_blake_length(algo: AlgoKind, bit_length: BlakeLength<'_>) -> UResult { +pub fn parse_blake_length(algo: AlgoKind, bit_length: BlakeLength<'_>) -> UResult { debug_assert!(matches!(algo, AlgoKind::Blake2b | AlgoKind::Blake3)); let print_error = || { @@ -529,8 +577,8 @@ pub fn parse_blake_length(algo: AlgoKind, bit_length: BlakeLength<'_>) -> UResul if n == 0 { return Ok(match algo { - AlgoKind::Blake2b => Blake2b::DEFAULT_BYTE_SIZE, - AlgoKind::Blake3 => Blake3::DEFAULT_BYTE_SIZE, + AlgoKind::Blake2b => HashLength::from_bits(Blake2b::DEFAULT_BIT_SIZE), + AlgoKind::Blake3 => HashLength::from_bits(Blake3::DEFAULT_BIT_SIZE), _ => unreachable!(), }); } @@ -545,7 +593,7 @@ pub fn parse_blake_length(algo: AlgoKind, bit_length: BlakeLength<'_>) -> UResul return Err(ChecksumError::LengthNotMultipleOf8.into()); } - Ok(n / 8) + Ok(HashLength::from_bits(n)) } pub fn validate_sha2_sha3_length(algo_name: AlgoKind, length: Option) -> UResult { @@ -562,7 +610,7 @@ pub fn validate_sha2_sha3_length(algo_name: AlgoKind, length: Option) -> } } -pub fn sanitize_sha2_sha3_length_str(algo_kind: AlgoKind, length: &str) -> UResult { +pub fn sanitize_sha2_sha3_length_str(algo_kind: AlgoKind, length: &str) -> UResult { // There is a difference in the errors sent when the length is not a number // vs. its an invalid number. // @@ -580,7 +628,7 @@ pub fn sanitize_sha2_sha3_length_str(algo_kind: AlgoKind, length: &str) -> UResu }; if [224, 256, 384, 512].contains(&len) { - Ok(len) + Ok(HashLength::from_bits(len)) } else { show_error!("{}", ChecksumError::InvalidLength(length.into())); Err(ChecksumError::InvalidLengthForSha(algo_kind.to_uppercase().into()).into()) @@ -664,17 +712,23 @@ mod tests { #[test] fn test_calculate_blake2b_length() { assert_eq!( - parse_blake_length(AlgoKind::Blake2b, BlakeLength::String("0")).unwrap(), + parse_blake_length(AlgoKind::Blake2b, BlakeLength::String("0")) + .unwrap() + .as_bytes(), Blake2b::DEFAULT_BYTE_SIZE ); assert!(parse_blake_length(AlgoKind::Blake2b, BlakeLength::String("10")).is_err()); assert!(parse_blake_length(AlgoKind::Blake2b, BlakeLength::String("520")).is_err()); assert_eq!( - parse_blake_length(AlgoKind::Blake2b, BlakeLength::String("512")).unwrap(), + parse_blake_length(AlgoKind::Blake2b, BlakeLength::String("512")) + .unwrap() + .as_bytes(), Blake2b::DEFAULT_BYTE_SIZE ); assert_eq!( - parse_blake_length(AlgoKind::Blake2b, BlakeLength::String("256")).unwrap(), + parse_blake_length(AlgoKind::Blake2b, BlakeLength::String("256")) + .unwrap() + .as_bytes(), 32 ); } diff --git a/src/uucore/src/lib/features/checksum/validate.rs b/src/uucore/src/lib/features/checksum/validate.rs index e9cc58a3834..447f6b0f113 100644 --- a/src/uucore/src/lib/features/checksum/validate.rs +++ b/src/uucore/src/lib/features/checksum/validate.rs @@ -15,8 +15,8 @@ use std::io::{self, BufReader, Read, Write, stderr, stdin}; use os_display::Quotable; use crate::checksum::{ - AlgoKind, BlakeLength, ChecksumError, ReadingMode, ShaLength, SizedAlgoKind, digest_reader, - parse_blake_length, unescape_filename, + AlgoKind, BlakeLength, ChecksumError, HashLength, ReadingMode, ShaLength, SizedAlgoKind, + digest_reader, parse_blake_length, unescape_filename, }; use crate::error::{FromIo, UError, UIoError, UResult, USimpleError}; use crate::quoting_style::{QuotingStyle, locale_aware_escape_name}; @@ -496,7 +496,7 @@ impl LineInfo { } /// Extract the expected digest from the checksum string and decode it -fn get_raw_expected_digest(checksum: &str, bit_len_hint: Option) -> Option> { +fn get_raw_expected_digest(checksum: &str, len_hint: Option) -> Option> { // If the length of the digest is not a multiple of 2, then it must be // improperly formatted (1 byte is 2 hex digits, and base64 strings should // always be a multiple of 4). @@ -504,9 +504,7 @@ fn get_raw_expected_digest(checksum: &str, bit_len_hint: Option) -> Optio return None; } - let byte_len_hint = bit_len_hint.map(|n| n.div_ceil(8)); - - let checks_hint = |len| byte_len_hint.is_none_or(|hint| hint == len); + let checks_hint = |len| len_hint.is_none_or(|hint| hint.as_bytes() == len); // If the length of the string matches the one to be expected (in case it's // given) AND the digest can be decoded as hexadecimal, just go with it. @@ -619,7 +617,7 @@ fn identify_algo_name_and_length( line_info: &LineInfo, algo_name_input: Option, last_algo: &mut Option, -) -> Result<(AlgoKind, Option), LineCheckError> { +) -> Result<(AlgoKind, Option), LineCheckError> { use AlgoKind as ak; let algo_from_line = line_info.algo_name.clone().unwrap_or_default(); let Ok(line_algo) = AlgoKind::from_cksum(algo_from_line.to_lowercase()) else { @@ -641,7 +639,7 @@ fn identify_algo_name_and_length( } } - let bytes = if let Some(bitlen) = line_info.algo_bit_len { + let hash_len = if let Some(bitlen) = line_info.algo_bit_len { match line_algo { algo @ (ak::Blake2b | ak::Blake3) => { match parse_blake_length(algo, BlakeLength::Int(bitlen)) { @@ -649,8 +647,10 @@ fn identify_algo_name_and_length( Err(_) => return Err(LineCheckError::ImproperlyFormatted), } } - ak::Sha2 | ak::Sha3 if [224, 256, 384, 512].contains(&bitlen) => Some(bitlen), - ak::Shake128 | ak::Shake256 => Some(bitlen), + ak::Sha2 | ak::Sha3 if [224, 256, 384, 512].contains(&bitlen) => { + Some(HashLength::from_bits(bitlen)) + } + ak::Shake128 | ak::Shake256 => Some(HashLength::from_bits(bitlen)), // Either // the algo based line is provided with a bit length with an // algorithm that does not support it (only Blake2b, Blake3, sha2, @@ -664,15 +664,15 @@ fn identify_algo_name_and_length( } } else if line_algo == ak::Blake2b { // Default length with BLAKE2b, - Some(Blake2b::DEFAULT_BYTE_SIZE) + Some(HashLength::from_bits(Blake2b::DEFAULT_BIT_SIZE)) } else if line_algo == ak::Blake3 { // Default length with BLAKE3, - Some(Blake3::DEFAULT_BYTE_SIZE) + Some(HashLength::from_bits(Blake3::DEFAULT_BIT_SIZE)) } else { None }; - Ok((line_algo, bytes)) + Ok((line_algo, hash_len)) } /// Given a filename and an algorithm, compute the digest and compare it with @@ -748,15 +748,16 @@ fn process_algo_based_line( // If the digest bitlen is known, we can check the format of the expected // checksum with it. - let digest_bit_length_hint = match (algo_kind, algo_len) { - (AlgoKind::Blake2b | AlgoKind::Blake3, Some(byte_len)) => Some(byte_len * 8), - (AlgoKind::Shake128 | AlgoKind::Shake256, Some(bit_len)) => Some(bit_len), + let len_hint = match (algo_kind, algo_len) { + (AlgoKind::Blake2b | AlgoKind::Blake3, Some(byte_len)) => Some(byte_len.as_bits()), + (AlgoKind::Shake128 | AlgoKind::Shake256, Some(bit_len)) => Some(bit_len.as_bits()), (AlgoKind::Shake128, None) => Some(sum::Shake128::DEFAULT_BIT_SIZE), (AlgoKind::Shake256, None) => Some(sum::Shake256::DEFAULT_BIT_SIZE), _ => None, - }; + } + .map(HashLength::from_bits); - let expected_checksum = get_raw_expected_digest(&line_info.checksum, digest_bit_length_hint) + let expected_checksum = get_raw_expected_digest(&line_info.checksum, len_hint) .ok_or(LineCheckError::ImproperlyFormatted)?; let algo = SizedAlgoKind::from_unsized(algo_kind, algo_len) @@ -770,7 +771,7 @@ fn process_non_algo_based_line( line_number: usize, line_info: &LineInfo, cli_algo_kind: AlgoKind, - cli_algo_length: Option, + cli_algo_length: Option, opts: ChecksumValidateOptions, ) -> Result<(), LineCheckError> { use AlgoKind as ak; @@ -790,20 +791,20 @@ fn process_non_algo_based_line( // When a specific algorithm name is input, use it and use the provided // bits except when dealing with blake2b, sha2 and sha3, where we will // detect the length. - let algo_byte_len = match cli_algo_kind { - ak::Blake2b | ak::Blake3 => Some(expected_checksum.len()), + let algo_len = match cli_algo_kind { + ak::Blake2b | ak::Blake3 => Some(HashLength::from_bytes(expected_checksum.len())), ak::Sha2 | ak::Sha3 => { // multiplication by 8 to get the number of bits Some( ShaLength::try_from(expected_checksum.len() * 8) .map_err(|_| LineCheckError::ImproperlyFormatted)? - .as_usize(), + .into(), ) } _ => cli_algo_length, }; - let algo = SizedAlgoKind::from_unsized(cli_algo_kind, algo_byte_len)?; + let algo = SizedAlgoKind::from_unsized(cli_algo_kind, algo_len)?; compute_and_check_digest_from_file(filename_to_check, &expected_checksum, algo, opts) } @@ -818,7 +819,7 @@ fn process_checksum_line( line: &OsStr, i: usize, cli_algo_name: Option, - cli_algo_length: Option, + cli_algo_length: Option, opts: ChecksumValidateOptions, cached_line_format: &mut Option, last_algo: &mut Option, @@ -851,7 +852,7 @@ fn process_checksum_line( fn process_checksum_file( filename_input: &OsStr, cli_algo_kind: Option, - cli_algo_length: Option, + cli_algo_length: Option, opts: ChecksumValidateOptions, ) -> Result<(), FileCheckError> { use LineCheckError::*; @@ -992,7 +993,7 @@ fn process_checksum_file( pub fn perform_checksum_validation<'a, I>( files: I, algo_kind: Option, - length_input: Option, + length_input: Option, opts: ChecksumValidateOptions, ) -> UResult<()> where diff --git a/src/uucore/src/lib/features/sum.rs b/src/uucore/src/lib/features/sum.rs index 7a9f1a5ce58..4238cfaebc6 100644 --- a/src/uucore/src/lib/features/sum.rs +++ b/src/uucore/src/lib/features/sum.rs @@ -136,6 +136,7 @@ pub struct Blake3 { impl Blake3 { /// Default length for the BLAKE3 digest in bytes. pub const DEFAULT_BYTE_SIZE: usize = 32; + pub const DEFAULT_BIT_SIZE: usize = Self::DEFAULT_BYTE_SIZE * 8; pub fn with_output_bytes(output_bytes: usize) -> Self { Self {