Skip to content

Commit

Permalink
Expose Zopfli options as public API. Remove verbosity options
Browse files Browse the repository at this point in the history
As described in #5, users may have good reasons to tweak Zopfli
compression options, but this is not possible with the current API.

To improve the situation, let's refactor the Options struct with proper
data types that guarantee that invariants are being held, and make those
fields public. While at it, let's drop the verbosity options in favor of
using the log crate, which is lightweight and much more flexible and
idiomatic for users.
  • Loading branch information
AlexTMjugador committed Jul 25, 2022
1 parent 47d2ae7 commit c789bc4
Show file tree
Hide file tree
Showing 7 changed files with 106 additions and 70 deletions.
16 changes: 16 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ crc = "2"
adler32 = "1"
byteorder = "1"
typed-arena = "2"
iter-read = "0.3.0"
iter-read = "0.3"
log = "0.4"

[profile.release]
debug = true
25 changes: 14 additions & 11 deletions src/blocksplitter.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use std::f64;
use log::{debug, log_enabled};

use crate::deflate::calculate_block_size_auto_type;
use crate::lz77::{Lz77Store, ZopfliBlockState};
Expand Down Expand Up @@ -108,7 +109,12 @@ fn find_largest_splittable_block(
}

/// Prints the block split points as decimal and hex values in the terminal.
#[inline]
fn print_block_split_points(lz77: &Lz77Store, lz77splitpoints: &[usize]) {
if !log_enabled!(log::Level::Debug) {
return;
}

let nlz77points = lz77splitpoints.len();
let mut splitpoints = Vec::with_capacity(nlz77points);

Expand All @@ -129,7 +135,7 @@ fn print_block_split_points(lz77: &Lz77Store, lz77splitpoints: &[usize]) {
}
debug_assert_eq!(splitpoints.len(), nlz77points);

println!(
debug!(
"block split points: {} (hex: {})",
splitpoints
.iter()
Expand All @@ -148,21 +154,20 @@ fn print_block_split_points(lz77: &Lz77Store, lz77splitpoints: &[usize]) {
/// The output splitpoints are indices in the LZ77 data.
/// maxblocks: set a limit to the amount of blocks. Set to 0 to mean no limit.
pub fn blocksplit_lz77(
options: &Options,
lz77: &Lz77Store,
maxblocks: usize,
maxblocks: u16,
splitpoints: &mut Vec<usize>,
) {
if lz77.size() < 10 {
return; /* This code fails on tiny files. */
}

let mut numblocks = 1;
let mut numblocks = 1u32;
let mut done = vec![0; lz77.size()];
let mut lstart = 0;
let mut lend = lz77.size();

while maxblocks != 0 && numblocks < maxblocks {
while maxblocks != 0 && numblocks < maxblocks as u32 {
debug_assert!(lstart < lend);
let find_minimum_result = find_minimum(
|i| estimate_cost(lz77, lstart, i) + estimate_cost(lz77, i, lend),
Expand Down Expand Up @@ -201,9 +206,7 @@ pub fn blocksplit_lz77(
}
}

if options.verbose {
print_block_split_points(lz77, splitpoints);
}
print_block_split_points(lz77, splitpoints);
}

/// Does blocksplitting on uncompressed data.
Expand All @@ -223,7 +226,7 @@ pub fn blocksplit(
in_data: &[u8],
instart: usize,
inend: usize,
maxblocks: usize,
maxblocks: u16,
splitpoints: &mut Vec<usize>,
) {
splitpoints.clear();
Expand All @@ -236,8 +239,8 @@ pub fn blocksplit(
store.greedy(&mut state, in_data, instart, inend);
}

let mut lz77splitpoints = Vec::with_capacity(maxblocks);
blocksplit_lz77(options, &store, maxblocks, &mut lz77splitpoints);
let mut lz77splitpoints = Vec::with_capacity(maxblocks as usize);
blocksplit_lz77(&store, maxblocks, &mut lz77splitpoints);

let nlz77points = lz77splitpoints.len();

Expand Down
33 changes: 13 additions & 20 deletions src/deflate.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
use std::cmp;
use std::io::{self, Read, Write};
use log::{debug, log_enabled};

use crate::blocksplitter::{blocksplit, blocksplit_lz77};
use crate::iter::IsFinalIterator;
Expand Down Expand Up @@ -154,7 +155,6 @@ where

lz77_optimal_fixed(&mut s, in_data, instart, inend, &mut store);
add_lz77_block(
options,
btype,
final_block,
in_data,
Expand Down Expand Up @@ -731,7 +731,6 @@ where
/// set it to `0` to not do the assertion.
/// `bitwise_writer`: writer responsible for appending bits
fn add_lz77_block<W>(
options: &Options,
btype: BlockType,
final_block: bool,
in_data: &[u8],
Expand Down Expand Up @@ -767,12 +766,10 @@ where

let detect_tree_size = bitwise_writer.bytes_written();
add_dynamic_tree(&ll_lengths, &d_lengths, bitwise_writer)?;
if options.verbose {
println!(
"treesize: {}",
bitwise_writer.bytes_written() - detect_tree_size
);
}
debug!(
"treesize: {}",
bitwise_writer.bytes_written() - detect_tree_size
);
(ll_lengths, d_lengths)
}
};
Expand All @@ -796,18 +793,19 @@ where
/* End symbol. */
bitwise_writer.add_huffman_bits(ll_symbols[256], ll_lengths[256])?;

if options.verbose {
if log_enabled!(log::Level::Debug) {
let uncompressed_size = lz77.litlens[lstart..lend]
.iter()
.fold(0, |acc, &x| acc + x.size());
let compressed_size = bitwise_writer.bytes_written() - detect_block_size;
println!(
debug!(
"compressed block size: {} ({}k) (unc: {})",
compressed_size,
compressed_size / 1024,
uncompressed_size
);
}

Ok(())
}

Expand Down Expand Up @@ -1007,7 +1005,6 @@ where

if uncompressedcost < fixedcost && uncompressedcost < dyncost {
add_lz77_block(
options,
BlockType::Uncompressed,
final_block,
in_data,
Expand All @@ -1020,7 +1017,6 @@ where
} else if fixedcost < dyncost {
if expensivefixed {
add_lz77_block(
options,
BlockType::Fixed,
final_block,
in_data,
Expand All @@ -1032,7 +1028,6 @@ where
)
} else {
add_lz77_block(
options,
BlockType::Fixed,
final_block,
in_data,
Expand All @@ -1045,7 +1040,6 @@ where
}
} else {
add_lz77_block(
options,
BlockType::Dynamic,
final_block,
in_data,
Expand Down Expand Up @@ -1115,14 +1109,14 @@ where
let mut lz77 = Lz77Store::new();

/* byte coordinates rather than lz77 index */
let mut splitpoints_uncompressed = Vec::with_capacity(options.blocksplittingmax as usize);
let mut splitpoints_uncompressed = Vec::with_capacity(options.maximum_block_splits as usize);

blocksplit(
options,
in_data,
instart,
inend,
options.blocksplittingmax as usize,
options.maximum_block_splits,
&mut splitpoints_uncompressed,
);
let npoints = splitpoints_uncompressed.len();
Expand All @@ -1132,7 +1126,7 @@ where
for &item in &splitpoints_uncompressed {
let mut s = ZopfliBlockState::new(options, last, item);

let store = lz77_optimal(&mut s, in_data, last, item, options.numiterations);
let store = lz77_optimal(&mut s, in_data, last, item, options.iteration_count.get());
totalcost += calculate_block_size_auto_type(&store, 0, store.size());

// ZopfliAppendLZ77Store(&store, &lz77);
Expand All @@ -1148,7 +1142,7 @@ where

let mut s = ZopfliBlockState::new(options, last, inend);

let store = lz77_optimal(&mut s, in_data, last, inend, options.numiterations);
let store = lz77_optimal(&mut s, in_data, last, inend, options.iteration_count.get());
totalcost += calculate_block_size_auto_type(&store, 0, store.size());

// ZopfliAppendLZ77Store(&store, &lz77);
Expand All @@ -1163,9 +1157,8 @@ where
let mut totalcost2 = 0.0;

blocksplit_lz77(
options,
&lz77,
options.blocksplittingmax as usize,
options.maximum_block_splits,
&mut splitpoints2,
);

Expand Down
68 changes: 46 additions & 22 deletions src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
#![deny(trivial_casts, trivial_numeric_casts)]

//! A reimplementation of the [Zopfli](https://github.com/google/zopfli) compression library in Rust.
//!
//! Zopfli is a state of the art DEFLATE compressor that heavily prioritizes compression over speed.
//! It usually compresses much better than other DEFLATE compressors, generating standard DEFLATE
//! streams that can be decompressed with any DEFLATE decompressor, at the cost of being
//! significantly slower.

mod blocksplitter;
mod cache;
mod deflate;
Expand All @@ -15,58 +22,75 @@ mod util;
mod zlib;

use std::io::{self, Read, Write};
use std::num::NonZeroU8;

use crate::deflate::{deflate, BlockType};
use crate::gzip::gzip_compress;
use crate::zlib::zlib_compress;

/// Options used throughout the program.
/// Options for the Zopfli compression algorithm.
pub struct Options {
/* Whether to print output */
pub verbose: bool,
/* Whether to print more detailed output */
verbose_more: bool,
/*
Maximum amount of times to rerun forward and backward pass to optimize LZ77
compression cost. Good values: 10, 15 for small files, 5 for files over
several MB in size or it will be too slow.
*/
numiterations: i32,
/*
Maximum amount of blocks to split into (0 for unlimited, but this can give
extreme results that hurt compression on some files). Default value: 15.
*/
blocksplittingmax: i32,
/// Maximum amount of times to rerun forward and backward pass to optimize LZ77
/// compression cost. Good values: 10, 15 for small files, 5 for files over
/// Good values: 10, 15 for small files, 5 for files over several MB in size or
/// it will be too slow.
///
/// Default value: 15.
pub iteration_count: NonZeroU8,
/// Maximum amount of blocks to split into (0 for unlimited, but this can give
/// extreme results that hurt compression on some files).
///
/// Default value: 15.
pub maximum_block_splits: u16,
}

impl Default for Options {
fn default() -> Options {
Options {
verbose: false,
verbose_more: false,
numiterations: 15,
blocksplittingmax: 15,
iteration_count: NonZeroU8::new(15).unwrap(),
maximum_block_splits: 15,
}
}
}

/// The output file format to use to store data compressed with Zopfli.
pub enum Format {
/// The gzip file format, as defined in
/// [RFC 1952](https://datatracker.ietf.org/doc/html/rfc1952).
///
/// This file format can be easily decompressed with the gzip
/// program.
Gzip,
/// The zlib file format, as defined in
/// [RFC 1950](https://datatracker.ietf.org/doc/html/rfc1950).
///
/// The zlib format has less header overhead than gzip, but it
/// stores less metadata about the compressed data and may not
/// be as fit for purpose.
Zlib,
/// The raw DEFLATE stream format, as defined in
/// [RFC 1951](https://datatracker.ietf.org/doc/html/rfc1951).
///
/// Raw DEFLATE streams are not meant to be stored raw because
/// they lack error detection and correction metadata. They
/// usually are embedded in other file formats, such as gzip
/// and zlib.
Deflate,
}

/// Compresses data from a source with the Zopfli algorithm, using the specified
/// options, and writes the result to a sink in the defined output format.
pub fn compress<R, W>(
options: &Options,
output_type: &Format,
output_format: &Format,
in_data: R,
out: W,
) -> io::Result<()>
where
R: Read,
W: Write,
{
match *output_type {
match *output_format {
Format::Gzip => gzip_compress(options, in_data, out),
Format::Zlib => zlib_compress(options, in_data, out),
Format::Deflate => deflate(options, BlockType::Dynamic, in_data, out),
Expand Down

0 comments on commit c789bc4

Please sign in to comment.