Skip to content

Commit

Permalink
fix: Decode Zip-Info UTF8 name and comment fields (#159)
Browse files Browse the repository at this point in the history
  • Loading branch information
Pr0methean committed Jun 2, 2024
1 parent c74a811 commit e3c8102
Show file tree
Hide file tree
Showing 6 changed files with 73 additions and 1 deletion.
2 changes: 1 addition & 1 deletion examples/extract.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ fn real_main() -> i32 {
for i in 0..archive.len() {
let mut file = archive.by_index(i).unwrap();
let outpath = match file.enclosed_name() {
Some(path) => path.to_owned(),
Some(path) => path,
None => continue,
};

Expand Down
2 changes: 2 additions & 0 deletions src/extra_fields/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@ impl ExtraFieldVersion for LocalHeaderVersion {}
impl ExtraFieldVersion for CentralHeaderVersion {}

mod extended_timestamp;
mod zipinfo_utf8;

pub use extended_timestamp::*;
pub use zipinfo_utf8::*;

/// contains one extra field
#[derive(Debug, Clone)]
Expand Down
40 changes: 40 additions & 0 deletions src/extra_fields/zipinfo_utf8.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
use core::mem::size_of;
use std::io::Read;
use crate::result::{ZipError, ZipResult};
use crate::unstable::LittleEndianReadExt;

/// Info-ZIP Unicode Path Extra Field (0x7075) or Unicode Comment Extra Field (0x6375), as
/// specified in APPNOTE 4.6.8 and 4.6.9
#[derive(Clone, Debug)]
pub struct UnicodeExtraField {
crc32: u32,
content: Box<[u8]>
}

impl<'a> UnicodeExtraField {

Check failure on line 14 in src/extra_fields/zipinfo_utf8.rs

View workflow job for this annotation

GitHub Actions / style_and_docs (--no-default-features)

this lifetime isn't used in the impl
/// Verifies the checksum and returns the content.
pub fn unwrap_valid(self, ascii_field: &[u8]) -> ZipResult<Box<[u8]>> {
let mut crc32 = crc32fast::Hasher::new();
crc32.update(ascii_field);
let actual_crc32 = crc32.finalize();
if self.crc32 != actual_crc32 {
return Err(ZipError::InvalidArchive("CRC32 checksum failed on Unicode extra field"));
}
Ok(self.content)
}
}

impl UnicodeExtraField {
pub(crate) fn try_from_reader<R: Read>(reader: &mut R, len: u16) -> ZipResult<Self> {
// Read and discard version byte
reader.read_exact(&mut [0u8])?;

let crc32 = reader.read_u32_le()?;
let mut content = vec![0u8; len as usize - size_of::<u8>() - size_of::<u32>()].into_boxed_slice();
reader.read_exact(&mut content)?;
Ok(Self {
crc32,
content
})
}
}
18 changes: 18 additions & 0 deletions src/read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ use crate::spec::{is_dir, path_to_string};
use crate::types::ffi::S_IFLNK;
use crate::unstable::LittleEndianReadExt;
pub use zip_archive::ZipArchive;
use crate::extra_fields::UnicodeExtraField;

#[allow(clippy::large_enum_variant)]
pub(crate) enum CryptoReader<'a> {
Expand Down Expand Up @@ -1160,6 +1161,7 @@ fn central_header_to_zip_file_inner<R: Read>(
version_made_by: version_made_by as u8,
encrypted,
using_data_descriptor,
is_utf8,
compression_method: CompressionMethod::parse_from_u16(compression_method),
compression_level: None,
last_modified_time: DateTime::try_from_msdos(last_mod_date, last_mod_time).ok(),
Expand Down Expand Up @@ -1279,6 +1281,22 @@ fn parse_extra_field(file: &mut ZipFileData) -> ZipResult<()> {
// the reader for ExtendedTimestamp consumes `len` bytes
len_left = 0;
}
0x6375 => {
// Info-ZIP Unicode Comment Extra Field
// APPNOTE 4.6.8 and https://libzip.org/specifications/extrafld.txt
if !file.is_utf8 {
file.file_comment = String::from_utf8(
UnicodeExtraField::try_from_reader(&mut reader, len)?.unwrap_valid(file.file_comment.as_bytes())?.into_vec())?.into();
}
}
0x7075 => {
// Info-ZIP Unicode Path Extra Field
// APPNOTE 4.6.9 and https://libzip.org/specifications/extrafld.txt
if !file.is_utf8 {
file.file_name_raw = UnicodeExtraField::try_from_reader(&mut reader, len)?.unwrap_valid(&file.file_name_raw)?;
file.file_name = String::from_utf8(file.file_name_raw.clone().into_vec())?.into_boxed_str();
}
}
_ => {
// Other fields are ignored
}
Expand Down
7 changes: 7 additions & 0 deletions src/result.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use std::error::Error;
use std::fmt;
use std::io;
use std::num::TryFromIntError;
use std::string::FromUtf8Error;

/// Generic result type with ZipError as its error variant
pub type ZipResult<T> = Result<T, ZipError>;
Expand Down Expand Up @@ -68,6 +69,12 @@ impl From<DateTimeRangeError> for ZipError {
}
}

impl From<FromUtf8Error> for ZipError {
fn from(_: FromUtf8Error) -> Self {
ZipError::InvalidArchive("Invalid UTF-8")
}
}

/// Error type for time parsing
#[derive(Debug)]
pub struct DateTimeRangeError;
Expand Down
5 changes: 5 additions & 0 deletions src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,8 @@ pub struct ZipFileData {
pub version_made_by: u8,
/// True if the file is encrypted.
pub encrypted: bool,
/// True if file_name and file_comment are UTF8
pub is_utf8: bool,
/// True if the file uses a data-descriptor section
pub using_data_descriptor: bool,
/// Compression method used to store the file
Expand Down Expand Up @@ -612,6 +614,7 @@ impl ZipFileData {
version_made_by: DEFAULT_VERSION,
encrypted: options.encrypt_with.is_some(),
using_data_descriptor: false,
is_utf8: !file_name.is_ascii(),
compression_method,
compression_level: options.compression_level,
last_modified_time: Some(options.last_modified_time),
Expand Down Expand Up @@ -695,6 +698,7 @@ impl ZipFileData {
version_made_by: version_made_by as u8,
encrypted,
using_data_descriptor,
is_utf8,
compression_method,
compression_level: None,
last_modified_time: DateTime::try_from_msdos(last_mod_date, last_mod_time).ok(),
Expand Down Expand Up @@ -1071,6 +1075,7 @@ mod test {
version_made_by: 0,
encrypted: false,
using_data_descriptor: false,
is_utf8: true,
compression_method: crate::compression::CompressionMethod::Stored,
compression_level: None,
last_modified_time: None,
Expand Down

0 comments on commit e3c8102

Please sign in to comment.