diff --git a/lib/std/compress/flate.zig b/lib/std/compress/flate.zig index 4391a0678760..20c3815a9e95 100644 --- a/lib/std/compress/flate.zig +++ b/lib/std/compress/flate.zig @@ -1,8 +1,7 @@ const std = @import("../std.zig"); -/// When decompressing, the output buffer is used as the history window, so -/// less than this may result in failure to decompress streams that were -/// compressed with a larger window. +/// When compressing and decompressing, the provided buffer is used as the +/// history window, so it must be at least this size. pub const max_window_len = history_len * 2; pub const history_len = 32768; @@ -15,10 +14,6 @@ pub const Compress = @import("flate/Compress.zig"); /// produces the original full-size data. pub const Decompress = @import("flate/Decompress.zig"); -/// Compression without Lempel-Ziv match searching. Faster compression, less -/// memory requirements but bigger compressed sizes. -pub const HuffmanEncoder = @import("flate/HuffmanEncoder.zig"); - /// Container of the deflate bit stream body. Container adds header before /// deflate bit stream and footer after. It can bi gzip, zlib or raw (no header, /// no footer, raw bit stream). @@ -112,28 +107,24 @@ pub const Container = enum { switch (h.*) { .raw => {}, .gzip => |*gzip| { - gzip.update(buf); - gzip.count +%= buf.len; + gzip.crc.update(buf); + gzip.count +%= @truncate(buf.len); }, .zlib => |*zlib| { zlib.update(buf); }, - inline .gzip, .zlib => |*x| x.update(buf), } } pub fn writeFooter(hasher: *Hasher, writer: *std.Io.Writer) std.Io.Writer.Error!void { - var bits: [4]u8 = undefined; switch (hasher.*) { .gzip => |*gzip| { // GZIP 8 bytes footer // - 4 bytes, CRC32 (CRC-32) - // - 4 bytes, ISIZE (Input SIZE) - size of the original (uncompressed) input data modulo 2^32 - std.mem.writeInt(u32, &bits, gzip.final(), .little); - try writer.writeAll(&bits); - - std.mem.writeInt(u32, &bits, gzip.bytes_read, .little); - try writer.writeAll(&bits); + // - 4 bytes, ISIZE (Input SIZE) - size of the original + // (uncompressed) input data modulo 2^32 + try writer.writeInt(u32, gzip.crc.final(), .little); + try writer.writeInt(u32, gzip.count, .little); }, .zlib => |*zlib| { // ZLIB (RFC 1950) is big-endian, unlike GZIP (RFC 1952). @@ -141,8 +132,7 @@ pub const Container = enum { // Checksum value of the uncompressed data (excluding any // dictionary data) computed according to Adler-32 // algorithm. - std.mem.writeInt(u32, &bits, zlib.final, .big); - try writer.writeAll(&bits); + try writer.writeInt(u32, zlib.adler, .big); }, .raw => {}, } @@ -174,7 +164,6 @@ pub const Container = enum { }; test { - _ = HuffmanEncoder; _ = Compress; _ = Decompress; } diff --git a/lib/std/compress/flate/BlockWriter.zig b/lib/std/compress/flate/BlockWriter.zig deleted file mode 100644 index 47b1b298cedd..000000000000 --- a/lib/std/compress/flate/BlockWriter.zig +++ /dev/null @@ -1,591 +0,0 @@ -//! Accepts list of tokens, decides what is best block type to write. What block -//! type will provide best compression. Writes header and body of the block. -const std = @import("std"); -const assert = std.debug.assert; -const Writer = std.Io.Writer; - -const BlockWriter = @This(); -const flate = @import("../flate.zig"); -const Compress = flate.Compress; -const HuffmanEncoder = flate.HuffmanEncoder; -const Token = @import("Token.zig"); - -const codegen_order = HuffmanEncoder.codegen_order; -const end_code_mark = 255; - -output: *Writer, - -codegen_freq: [HuffmanEncoder.codegen_code_count]u16, -literal_freq: [HuffmanEncoder.max_num_lit]u16, -distance_freq: [HuffmanEncoder.distance_code_count]u16, -codegen: [HuffmanEncoder.max_num_lit + HuffmanEncoder.distance_code_count + 1]u8, -literal_encoding: HuffmanEncoder, -distance_encoding: HuffmanEncoder, -codegen_encoding: HuffmanEncoder, -fixed_literal_encoding: HuffmanEncoder, -fixed_distance_encoding: HuffmanEncoder, -huff_distance: HuffmanEncoder, - -fixed_literal_codes: [HuffmanEncoder.max_num_frequencies]HuffmanEncoder.Code, -fixed_distance_codes: [HuffmanEncoder.distance_code_count]HuffmanEncoder.Code, -distance_codes: [HuffmanEncoder.distance_code_count]HuffmanEncoder.Code, - -pub fn init(output: *Writer) BlockWriter { - return .{ - .output = output, - .codegen_freq = undefined, - .literal_freq = undefined, - .distance_freq = undefined, - .codegen = undefined, - .literal_encoding = undefined, - .distance_encoding = undefined, - .codegen_encoding = undefined, - .fixed_literal_encoding = undefined, - .fixed_distance_encoding = undefined, - .huff_distance = undefined, - .fixed_literal_codes = undefined, - .fixed_distance_codes = undefined, - .distance_codes = undefined, - }; -} - -pub fn initBuffers(bw: *BlockWriter) void { - bw.fixed_literal_encoding = .fixedLiteralEncoder(&bw.fixed_literal_codes); - bw.fixed_distance_encoding = .fixedDistanceEncoder(&bw.fixed_distance_codes); - bw.huff_distance = .huffmanDistanceEncoder(&bw.distance_codes); -} - -/// Flush intrenal bit buffer to the writer. -/// Should be called only when bit stream is at byte boundary. -/// -/// That is after final block; when last byte could be incomplete or -/// after stored block; which is aligned to the byte boundary (it has x -/// padding bits after first 3 bits). -pub fn flush(self: *BlockWriter) Writer.Error!void { - try self.bit_writer.flush(); -} - -fn writeCode(self: *BlockWriter, c: Compress.HuffCode) Writer.Error!void { - try self.bit_writer.writeBits(c.code, c.len); -} - -/// RFC 1951 3.2.7 specifies a special run-length encoding for specifying -/// the literal and distance lengths arrays (which are concatenated into a single -/// array). This method generates that run-length encoding. -/// -/// The result is written into the codegen array, and the frequencies -/// of each code is written into the codegen_freq array. -/// Codes 0-15 are single byte codes. Codes 16-18 are followed by additional -/// information. Code bad_code is an end marker -/// -/// num_literals: The number of literals in literal_encoding -/// num_distances: The number of distances in distance_encoding -/// lit_enc: The literal encoder to use -/// dist_enc: The distance encoder to use -fn generateCodegen( - self: *BlockWriter, - num_literals: u32, - num_distances: u32, - lit_enc: *Compress.LiteralEncoder, - dist_enc: *Compress.DistanceEncoder, -) void { - for (self.codegen_freq, 0..) |_, i| { - self.codegen_freq[i] = 0; - } - - // Note that we are using codegen both as a temporary variable for holding - // a copy of the frequencies, and as the place where we put the result. - // This is fine because the output is always shorter than the input used - // so far. - var codegen = &self.codegen; // cache - // Copy the concatenated code sizes to codegen. Put a marker at the end. - var cgnl = codegen[0..num_literals]; - for (cgnl, 0..) |_, i| { - cgnl[i] = @as(u8, @intCast(lit_enc.codes[i].len)); - } - - cgnl = codegen[num_literals .. num_literals + num_distances]; - for (cgnl, 0..) |_, i| { - cgnl[i] = @as(u8, @intCast(dist_enc.codes[i].len)); - } - codegen[num_literals + num_distances] = end_code_mark; - - var size = codegen[0]; - var count: i32 = 1; - var out_index: u32 = 0; - var in_index: u32 = 1; - while (size != end_code_mark) : (in_index += 1) { - // INVARIANT: We have seen "count" copies of size that have not yet - // had output generated for them. - const next_size = codegen[in_index]; - if (next_size == size) { - count += 1; - continue; - } - // We need to generate codegen indicating "count" of size. - if (size != 0) { - codegen[out_index] = size; - out_index += 1; - self.codegen_freq[size] += 1; - count -= 1; - while (count >= 3) { - var n: i32 = 6; - if (n > count) { - n = count; - } - codegen[out_index] = 16; - out_index += 1; - codegen[out_index] = @as(u8, @intCast(n - 3)); - out_index += 1; - self.codegen_freq[16] += 1; - count -= n; - } - } else { - while (count >= 11) { - var n: i32 = 138; - if (n > count) { - n = count; - } - codegen[out_index] = 18; - out_index += 1; - codegen[out_index] = @as(u8, @intCast(n - 11)); - out_index += 1; - self.codegen_freq[18] += 1; - count -= n; - } - if (count >= 3) { - // 3 <= count <= 10 - codegen[out_index] = 17; - out_index += 1; - codegen[out_index] = @as(u8, @intCast(count - 3)); - out_index += 1; - self.codegen_freq[17] += 1; - count = 0; - } - } - count -= 1; - while (count >= 0) : (count -= 1) { - codegen[out_index] = size; - out_index += 1; - self.codegen_freq[size] += 1; - } - // Set up invariant for next time through the loop. - size = next_size; - count = 1; - } - // Marker indicating the end of the codegen. - codegen[out_index] = end_code_mark; -} - -const DynamicSize = struct { - size: u32, - num_codegens: u32, -}; - -/// dynamicSize returns the size of dynamically encoded data in bits. -fn dynamicSize( - self: *BlockWriter, - lit_enc: *Compress.LiteralEncoder, // literal encoder - dist_enc: *Compress.DistanceEncoder, // distance encoder - extra_bits: u32, -) DynamicSize { - var num_codegens = self.codegen_freq.len; - while (num_codegens > 4 and self.codegen_freq[codegen_order[num_codegens - 1]] == 0) { - num_codegens -= 1; - } - const header = 3 + 5 + 5 + 4 + (3 * num_codegens) + - self.codegen_encoding.bitLength(self.codegen_freq[0..]) + - self.codegen_freq[16] * 2 + - self.codegen_freq[17] * 3 + - self.codegen_freq[18] * 7; - const size = header + - lit_enc.bitLength(&self.literal_freq) + - dist_enc.bitLength(&self.distance_freq) + - extra_bits; - - return DynamicSize{ - .size = @as(u32, @intCast(size)), - .num_codegens = @as(u32, @intCast(num_codegens)), - }; -} - -/// fixedSize returns the size of dynamically encoded data in bits. -fn fixedSize(self: *BlockWriter, extra_bits: u32) u32 { - return 3 + - self.fixed_literal_encoding.bitLength(&self.literal_freq) + - self.fixed_distance_encoding.bitLength(&self.distance_freq) + - extra_bits; -} - -const StoredSize = struct { - size: u32, - storable: bool, -}; - -/// storedSizeFits calculates the stored size, including header. -/// The function returns the size in bits and whether the block -/// fits inside a single block. -fn storedSizeFits(in: ?[]const u8) StoredSize { - if (in == null) { - return .{ .size = 0, .storable = false }; - } - if (in.?.len <= HuffmanEncoder.max_store_block_size) { - return .{ .size = @as(u32, @intCast((in.?.len + 5) * 8)), .storable = true }; - } - return .{ .size = 0, .storable = false }; -} - -/// Write the header of a dynamic Huffman block to the output stream. -/// -/// num_literals: The number of literals specified in codegen -/// num_distances: The number of distances specified in codegen -/// num_codegens: The number of codegens used in codegen -/// eof: Is it the end-of-file? (end of stream) -fn dynamicHeader( - self: *BlockWriter, - num_literals: u32, - num_distances: u32, - num_codegens: u32, - eof: bool, -) Writer.Error!void { - const first_bits: u32 = if (eof) 5 else 4; - try self.bit_writer.writeBits(first_bits, 3); - try self.bit_writer.writeBits(num_literals - 257, 5); - try self.bit_writer.writeBits(num_distances - 1, 5); - try self.bit_writer.writeBits(num_codegens - 4, 4); - - var i: u32 = 0; - while (i < num_codegens) : (i += 1) { - const value = self.codegen_encoding.codes[codegen_order[i]].len; - try self.bit_writer.writeBits(value, 3); - } - - i = 0; - while (true) { - const code_word: u32 = @as(u32, @intCast(self.codegen[i])); - i += 1; - if (code_word == end_code_mark) { - break; - } - try self.writeCode(self.codegen_encoding.codes[@as(u32, @intCast(code_word))]); - - switch (code_word) { - 16 => { - try self.bit_writer.writeBits(self.codegen[i], 2); - i += 1; - }, - 17 => { - try self.bit_writer.writeBits(self.codegen[i], 3); - i += 1; - }, - 18 => { - try self.bit_writer.writeBits(self.codegen[i], 7); - i += 1; - }, - else => {}, - } - } -} - -fn storedHeader(self: *BlockWriter, length: usize, eof: bool) Writer.Error!void { - assert(length <= 65535); - const flag: u32 = if (eof) 1 else 0; - try self.bit_writer.writeBits(flag, 3); - try self.flush(); - const l: u16 = @intCast(length); - try self.bit_writer.writeBits(l, 16); - try self.bit_writer.writeBits(~l, 16); -} - -fn fixedHeader(self: *BlockWriter, eof: bool) Writer.Error!void { - // Indicate that we are a fixed Huffman block - var value: u32 = 2; - if (eof) { - value = 3; - } - try self.bit_writer.writeBits(value, 3); -} - -/// Write a block of tokens with the smallest encoding. Will choose block type. -/// The original input can be supplied, and if the huffman encoded data -/// is larger than the original bytes, the data will be written as a -/// stored block. -/// If the input is null, the tokens will always be Huffman encoded. -pub fn write(self: *BlockWriter, tokens: []const Token, eof: bool, input: ?[]const u8) Writer.Error!void { - const lit_and_dist = self.indexTokens(tokens); - const num_literals = lit_and_dist.num_literals; - const num_distances = lit_and_dist.num_distances; - - var extra_bits: u32 = 0; - const ret = storedSizeFits(input); - const stored_size = ret.size; - const storable = ret.storable; - - if (storable) { - // We only bother calculating the costs of the extra bits required by - // the length of distance fields (which will be the same for both fixed - // and dynamic encoding), if we need to compare those two encodings - // against stored encoding. - var length_code: u16 = Token.length_codes_start + 8; - while (length_code < num_literals) : (length_code += 1) { - // First eight length codes have extra size = 0. - extra_bits += @as(u32, @intCast(self.literal_freq[length_code])) * - @as(u32, @intCast(Token.lengthExtraBits(length_code))); - } - var distance_code: u16 = 4; - while (distance_code < num_distances) : (distance_code += 1) { - // First four distance codes have extra size = 0. - extra_bits += @as(u32, @intCast(self.distance_freq[distance_code])) * - @as(u32, @intCast(Token.distanceExtraBits(distance_code))); - } - } - - // Figure out smallest code. - // Fixed Huffman baseline. - var literal_encoding = &self.fixed_literal_encoding; - var distance_encoding = &self.fixed_distance_encoding; - var size = self.fixedSize(extra_bits); - - // Dynamic Huffman? - var num_codegens: u32 = 0; - - // Generate codegen and codegenFrequencies, which indicates how to encode - // the literal_encoding and the distance_encoding. - self.generateCodegen( - num_literals, - num_distances, - &self.literal_encoding, - &self.distance_encoding, - ); - self.codegen_encoding.generate(self.codegen_freq[0..], 7); - const dynamic_size = self.dynamicSize( - &self.literal_encoding, - &self.distance_encoding, - extra_bits, - ); - const dyn_size = dynamic_size.size; - num_codegens = dynamic_size.num_codegens; - - if (dyn_size < size) { - size = dyn_size; - literal_encoding = &self.literal_encoding; - distance_encoding = &self.distance_encoding; - } - - // Stored bytes? - if (storable and stored_size < size) { - try self.storedBlock(input.?, eof); - return; - } - - // Huffman. - if (@intFromPtr(literal_encoding) == @intFromPtr(&self.fixed_literal_encoding)) { - try self.fixedHeader(eof); - } else { - try self.dynamicHeader(num_literals, num_distances, num_codegens, eof); - } - - // Write the tokens. - try self.writeTokens(tokens, &literal_encoding.codes, &distance_encoding.codes); -} - -pub fn storedBlock(self: *BlockWriter, input: []const u8, eof: bool) Writer.Error!void { - try self.storedHeader(input.len, eof); - try self.bit_writer.writeBytes(input); -} - -/// writeBlockDynamic encodes a block using a dynamic Huffman table. -/// This should be used if the symbols used have a disproportionate -/// histogram distribution. -/// If input is supplied and the compression savings are below 1/16th of the -/// input size the block is stored. -fn dynamicBlock( - self: *BlockWriter, - tokens: []const Token, - eof: bool, - input: ?[]const u8, -) Writer.Error!void { - const total_tokens = self.indexTokens(tokens); - const num_literals = total_tokens.num_literals; - const num_distances = total_tokens.num_distances; - - // Generate codegen and codegenFrequencies, which indicates how to encode - // the literal_encoding and the distance_encoding. - self.generateCodegen( - num_literals, - num_distances, - &self.literal_encoding, - &self.distance_encoding, - ); - self.codegen_encoding.generate(self.codegen_freq[0..], 7); - const dynamic_size = self.dynamicSize(&self.literal_encoding, &self.distance_encoding, 0); - const size = dynamic_size.size; - const num_codegens = dynamic_size.num_codegens; - - // Store bytes, if we don't get a reasonable improvement. - - const stored_size = storedSizeFits(input); - const ssize = stored_size.size; - const storable = stored_size.storable; - if (storable and ssize < (size + (size >> 4))) { - try self.storedBlock(input.?, eof); - return; - } - - // Write Huffman table. - try self.dynamicHeader(num_literals, num_distances, num_codegens, eof); - - // Write the tokens. - try self.writeTokens(tokens, &self.literal_encoding.codes, &self.distance_encoding.codes); -} - -const TotalIndexedTokens = struct { - num_literals: u32, - num_distances: u32, -}; - -/// Indexes a slice of tokens followed by an end_block_marker, and updates -/// literal_freq and distance_freq, and generates literal_encoding -/// and distance_encoding. -/// The number of literal and distance tokens is returned. -fn indexTokens(self: *BlockWriter, tokens: []const Token) TotalIndexedTokens { - var num_literals: u32 = 0; - var num_distances: u32 = 0; - - for (self.literal_freq, 0..) |_, i| { - self.literal_freq[i] = 0; - } - for (self.distance_freq, 0..) |_, i| { - self.distance_freq[i] = 0; - } - - for (tokens) |t| { - if (t.kind == Token.Kind.literal) { - self.literal_freq[t.literal()] += 1; - continue; - } - self.literal_freq[t.lengthCode()] += 1; - self.distance_freq[t.distanceCode()] += 1; - } - // add end_block_marker token at the end - self.literal_freq[HuffmanEncoder.end_block_marker] += 1; - - // get the number of literals - num_literals = @as(u32, @intCast(self.literal_freq.len)); - while (self.literal_freq[num_literals - 1] == 0) { - num_literals -= 1; - } - // get the number of distances - num_distances = @as(u32, @intCast(self.distance_freq.len)); - while (num_distances > 0 and self.distance_freq[num_distances - 1] == 0) { - num_distances -= 1; - } - if (num_distances == 0) { - // We haven't found a single match. If we want to go with the dynamic encoding, - // we should count at least one distance to be sure that the distance huffman tree could be encoded. - self.distance_freq[0] = 1; - num_distances = 1; - } - self.literal_encoding.generate(&self.literal_freq, 15); - self.distance_encoding.generate(&self.distance_freq, 15); - return TotalIndexedTokens{ - .num_literals = num_literals, - .num_distances = num_distances, - }; -} - -/// Writes a slice of tokens to the output followed by and end_block_marker. -/// codes for literal and distance encoding must be supplied. -fn writeTokens( - self: *BlockWriter, - tokens: []const Token, - le_codes: []Compress.HuffCode, - oe_codes: []Compress.HuffCode, -) Writer.Error!void { - for (tokens) |t| { - if (t.kind == Token.Kind.literal) { - try self.writeCode(le_codes[t.literal()]); - continue; - } - - // Write the length - const le = t.lengthEncoding(); - try self.writeCode(le_codes[le.code]); - if (le.extra_bits > 0) { - try self.bit_writer.writeBits(le.extra_length, le.extra_bits); - } - - // Write the distance - const oe = t.distanceEncoding(); - try self.writeCode(oe_codes[oe.code]); - if (oe.extra_bits > 0) { - try self.bit_writer.writeBits(oe.extra_distance, oe.extra_bits); - } - } - // add end_block_marker at the end - try self.writeCode(le_codes[HuffmanEncoder.end_block_marker]); -} - -/// Encodes a block of bytes as either Huffman encoded literals or uncompressed bytes -/// if the results only gains very little from compression. -pub fn huffmanBlock(self: *BlockWriter, input: []const u8, eof: bool) Writer.Error!void { - // Add everything as literals - histogram(input, &self.literal_freq); - - self.literal_freq[HuffmanEncoder.end_block_marker] = 1; - - const num_literals = HuffmanEncoder.end_block_marker + 1; - self.distance_freq[0] = 1; - const num_distances = 1; - - self.literal_encoding.generate(&self.literal_freq, 15); - - // Figure out smallest code. - // Always use dynamic Huffman or Store - var num_codegens: u32 = 0; - - // Generate codegen and codegenFrequencies, which indicates how to encode - // the literal_encoding and the distance_encoding. - self.generateCodegen( - num_literals, - num_distances, - &self.literal_encoding, - &self.huff_distance, - ); - self.codegen_encoding.generate(self.codegen_freq[0..], 7); - const dynamic_size = self.dynamicSize(&self.literal_encoding, &self.huff_distance, 0); - const size = dynamic_size.size; - num_codegens = dynamic_size.num_codegens; - - // Store bytes, if we don't get a reasonable improvement. - const stored_size_ret = storedSizeFits(input); - const ssize = stored_size_ret.size; - const storable = stored_size_ret.storable; - - if (storable and ssize < (size + (size >> 4))) { - try self.storedBlock(input, eof); - return; - } - - // Huffman. - try self.dynamicHeader(num_literals, num_distances, num_codegens, eof); - const encoding = self.literal_encoding.codes[0..257]; - - for (input) |t| { - const c = encoding[t]; - try self.bit_writer.writeBits(c.code, c.len); - } - try self.writeCode(encoding[HuffmanEncoder.end_block_marker]); -} - -fn histogram(b: []const u8, h: *[286]u16) void { - // Clear histogram - for (h, 0..) |_, i| { - h[i] = 0; - } - - var lh = h.*[0..256]; - for (b) |t| { - lh[t] += 1; - } -} diff --git a/lib/std/compress/flate/Compress.zig b/lib/std/compress/flate/Compress.zig index 2249ece4c0a0..d97053befd40 100644 --- a/lib/std/compress/flate/Compress.zig +++ b/lib/std/compress/flate/Compress.zig @@ -1,332 +1,2550 @@ -//! Default compression algorithm. Has two steps: tokenization and token -//! encoding. +//! Allocates statically ~224K (128K lookup, 96K tokens). //! -//! Tokenization takes uncompressed input stream and produces list of tokens. -//! Each token can be literal (byte of data) or match (backrefernce to previous -//! data with length and distance). Tokenization accumulators 32K tokens, when -//! full or `flush` is called tokens are passed to the `block_writer`. Level -//! defines how hard (how slow) it tries to find match. -//! -//! Block writer will decide which type of deflate block to write (stored, fixed, -//! dynamic) and encode tokens to the output byte stream. Client has to call -//! `finish` to write block with the final bit set. -//! -//! Container defines type of header and footer which can be gzip, zlib or raw. -//! They all share same deflate body. Raw has no header or footer just deflate -//! body. -//! -//! Compression algorithm explained in rfc-1951 (slightly edited for this case): -//! -//! The compressor uses a chained hash table `lookup` to find duplicated -//! strings, using a hash function that operates on 4-byte sequences. At any -//! given point during compression, let XYZW be the next 4 input bytes -//! (lookahead) to be examined (not necessarily all different, of course). -//! First, the compressor examines the hash chain for XYZW. If the chain is -//! empty, the compressor simply writes out X as a literal byte and advances -//! one byte in the input. If the hash chain is not empty, indicating that the -//! sequence XYZW (or, if we are unlucky, some other 4 bytes with the same -//! hash function value) has occurred recently, the compressor compares all -//! strings on the XYZW hash chain with the actual input data sequence -//! starting at the current point, and selects the longest match. -//! -//! To improve overall compression, the compressor defers the selection of -//! matches ("lazy matching"): after a match of length N has been found, the -//! compressor searches for a longer match starting at the next input byte. If -//! it finds a longer match, it truncates the previous match to a length of -//! one (thus producing a single literal byte) and then emits the longer -//! match. Otherwise, it emits the original match, and, as described above, -//! advances N bytes before continuing. -//! -//! -//! Allocates statically ~400K (192K lookup, 128K tokens, 64K window). +//! The source of an `error.WriteFailed` is always the backing writer. After an +//! `error.WriteFailed`, the `.writer` becomes `.failing` and is unrecoverable. +//! After a `flush`, the writer also becomes `.failing` since the stream has +//! been finished. This behavior also applies to `Raw` and `Huffman`. + +// Implementation details: +// A chained hash table is used to find matches. `drain` always preserves `flate.history_len` +// bytes to use as a history and avoids tokenizing the final bytes since they can be part of +// a longer match with unwritten bytes (unless it is a `flush`). The minimum match searched +// for is of length `seq_bytes`. If a match is made, a longer match is also checked for at +// the next byte (lazy matching) if the last match does not meet the `Options.lazy` threshold. +// +// Up to `block_token` tokens are accumalated in `buffered_tokens` and are outputted in +// `write_block` which determines the optimal block type and frequencies. const builtin = @import("builtin"); const std = @import("std"); -const assert = std.debug.assert; -const testing = std.testing; -const expect = testing.expect; const mem = std.mem; const math = std.math; -const Writer = std.Io.Writer; +const assert = std.debug.assert; +const Io = std.Io; +const Writer = Io.Writer; const Compress = @This(); -const Token = @import("Token.zig"); -const BlockWriter = @import("BlockWriter.zig"); +const token = @import("token.zig"); const flate = @import("../flate.zig"); -const Container = flate.Container; -const Lookup = @import("Lookup.zig"); -const HuffmanEncoder = flate.HuffmanEncoder; -const LiteralNode = HuffmanEncoder.LiteralNode; - -lookup: Lookup = .{}, -tokens: Tokens = .{}, -block_writer: BlockWriter, -level: LevelArgs, -hasher: Container.Hasher, -writer: Writer, -state: State, -// Match and literal at the previous position. -// Used for lazy match finding in processWindow. -prev_match: ?Token = null, -prev_literal: ?u8 = null, +/// Until #104 is implemented, a ?u15 takes 4 bytes, which is unacceptable +/// as it doubles the size of this already massive structure. +/// +/// Also, there are no `to` / `from` methods because LLVM 21 does not +/// optimize away the conversion from and to `?u15`. +const PackedOptionalU15 = packed struct(u16) { + value: u15, + is_null: bool, -pub const State = enum { header, middle, ended }; + pub fn int(p: PackedOptionalU15) u16 { + return @bitCast(p); + } -/// Trades between speed and compression size. -/// Starts with level 4: in [zlib](https://github.com/madler/zlib/blob/abd3d1a28930f89375d4b41408b39f6c1be157b2/deflate.c#L115C1-L117C43) -/// levels 1-3 are using different algorithm to perform faster but with less -/// compression. That is not implemented here. -pub const Level = enum(u4) { - level_4 = 4, - level_5 = 5, - level_6 = 6, - level_7 = 7, - level_8 = 8, - level_9 = 9, - - fast = 0xb, - default = 0xc, - best = 0xd, + pub const null_bit: PackedOptionalU15 = .{ .value = 0, .is_null = true }; }; -/// Number of tokens to accumulate in deflate before starting block encoding. -/// -/// In zlib this depends on memlevel: 6 + memlevel, where default memlevel is -/// 8 and max 9 that gives 14 or 15 bits. -pub const n_tokens = 1 << 15; - -/// Algorithm knobs for each level. -const LevelArgs = struct { - good: u16, // Do less lookups if we already have match of this length. - nice: u16, // Stop looking for better match if we found match with at least this length. - lazy: u16, // Don't do lazy match find if got match with at least this length. - chain: u16, // How many lookups for previous match to perform. - - pub fn get(level: Level) LevelArgs { - return switch (level) { - .fast, .level_4 => .{ .good = 4, .lazy = 4, .nice = 16, .chain = 16 }, - .level_5 => .{ .good = 8, .lazy = 16, .nice = 32, .chain = 32 }, - .default, .level_6 => .{ .good = 8, .lazy = 16, .nice = 128, .chain = 128 }, - .level_7 => .{ .good = 8, .lazy = 32, .nice = 128, .chain = 256 }, - .level_8 => .{ .good = 32, .lazy = 128, .nice = 258, .chain = 1024 }, - .best, .level_9 => .{ .good = 32, .lazy = 258, .nice = 258, .chain = 4096 }, +/// After `flush` is called, all vtable calls with result in `error.WriteFailed.` +writer: Writer, +has_history: bool, +bit_writer: BitWriter, +buffered_tokens: struct { + /// List of `TokenBufferEntryHeader`s and their trailing data. + list: [@as(usize, block_tokens) * 3]u8, + pos: u32, + n: u16, + lit_freqs: [286]u16, + dist_freqs: [30]u16, + + pub const empty: @This() = .{ + .list = undefined, + .pos = 0, + .n = 0, + .lit_freqs = @splat(0), + .dist_freqs = @splat(0), + }; +}, +lookup: struct { + /// Indexes are the hashes of four-bytes sequences. + /// + /// Values are the positions in `chain` of the previous four bytes with the same hash. + head: [1 << lookup_hash_bits]PackedOptionalU15, + /// Values are the non-zero number of bytes backwards in the history with the same hash. + /// + /// The relationship of chain indexes and bytes relative to the latest history byte is + /// `chain_pos -% chain_index = history_index`. + chain: [32768]PackedOptionalU15, + /// The index in `chain` which is of the newest byte of the history. + chain_pos: u15, +}, +container: flate.Container, +hasher: flate.Container.Hasher, +opts: Options, + +const BitWriter = struct { + output: *Writer, + buffered: u7, + buffered_n: u3, + + pub fn init(w: *Writer) BitWriter { + return .{ + .output = w, + .buffered = 0, + .buffered_n = 0, }; } + + /// Asserts `bits` is zero-extended + pub fn write(b: *BitWriter, bits: u56, n: u6) Writer.Error!void { + assert(@as(u8, b.buffered) >> b.buffered_n == 0); + assert(@as(u57, bits) >> n == 0); // n may be 56 so u57 is needed + const combined = @shlExact(@as(u64, bits), b.buffered_n) | b.buffered; + const combined_bits = @as(u6, b.buffered_n) + n; + + const out = try b.output.writableSliceGreedy(8); + mem.writeInt(u64, out[0..8], combined, .little); + b.output.advance(combined_bits / 8); + + b.buffered_n = @truncate(combined_bits); + b.buffered = @intCast(combined >> (combined_bits - b.buffered_n)); + } + + /// Assserts one byte can be written to `b.otuput` without rebasing. + pub fn byteAlign(b: *BitWriter) void { + b.output.unusedCapacitySlice()[0] = b.buffered; + b.output.advance(@intFromBool(b.buffered_n != 0)); + b.buffered = 0; + b.buffered_n = 0; + } + + pub fn writeClen( + b: *BitWriter, + hclen: u4, + clen_values: []u8, + clen_extra: []u8, + clen_codes: [19]u16, + clen_bits: [19]u4, + ) Writer.Error!void { + // Write the first four clen entries seperately since they are always present, + // and writing them all at once takes too many bits. + try b.write(clen_bits[token.codegen_order[0]] | + @shlExact(@as(u6, clen_bits[token.codegen_order[1]]), 3) | + @shlExact(@as(u9, clen_bits[token.codegen_order[2]]), 6) | + @shlExact(@as(u12, clen_bits[token.codegen_order[3]]), 9), 12); + + var i = hclen; + var clen_bits_table: u45 = 0; + while (i != 0) { + i -= 1; + clen_bits_table <<= 3; + clen_bits_table |= clen_bits[token.codegen_order[4..][i]]; + } + try b.write(clen_bits_table, @as(u6, hclen) * 3); + + for (clen_values, clen_extra) |value, extra| { + try b.write( + clen_codes[value] | @shlExact(@as(u16, extra), clen_bits[value]), + clen_bits[value] + @as(u3, switch (value) { + 0...15 => 0, + 16 => 2, + 17 => 3, + 18 => 7, + else => unreachable, + }), + ); + } + } +}; + +/// Number of tokens to accumulate before outputing as a block. +/// The maximum value is `math.maxInt(u16) - 1` since one token is reserved for end-of-block. +const block_tokens: u16 = 1 << 15; +const lookup_hash_bits = 15; +const Hash = u16; // `u[lookup_hash_bits]` is not used due to worse optimization (with LLVM 21) +const seq_bytes = 3; // not intended to be changed +const Seq = std.meta.Int(.unsigned, seq_bytes * 8); + +const TokenBufferEntryHeader = packed struct(u16) { + kind: enum(u1) { + /// Followed by non-zero `data` byte literals. + bytes, + /// Followed by the length as a byte + match, + }, + data: u15, +}; + +const BlockHeader = packed struct(u3) { + final: bool, + kind: enum(u2) { stored, fixed, dynamic, _ }, + + pub fn int(h: BlockHeader) u3 { + return @bitCast(h); + } + + pub const Dynamic = packed struct(u17) { + regular: BlockHeader, + hlit: u5, + hdist: u5, + hclen: u4, + + pub fn int(h: Dynamic) u17 { + return @bitCast(h); + } + }; }; +fn outputMatch(c: *Compress, dist: u15, len: u8) Writer.Error!void { + // This must come first. Instead of ensuring a full block is never left buffered, + // draining it is defered to allow end of stream to be indicated. + if (c.buffered_tokens.n == block_tokens) { + @branchHint(.unlikely); // LLVM 21 optimizes this branch as the more likely without + try c.writeBlock(false); + } + const header: TokenBufferEntryHeader = .{ .kind = .match, .data = dist }; + c.buffered_tokens.list[c.buffered_tokens.pos..][0..2].* = @bitCast(header); + c.buffered_tokens.list[c.buffered_tokens.pos + 2] = len; + c.buffered_tokens.pos += 3; + c.buffered_tokens.n += 1; + + c.buffered_tokens.lit_freqs[@as(usize, 257) + token.LenCode.fromVal(len).toInt()] += 1; + c.buffered_tokens.dist_freqs[token.DistCode.fromVal(dist).toInt()] += 1; +} + +fn outputBytes(c: *Compress, bytes: []const u8) Writer.Error!void { + var remaining = bytes; + while (remaining.len != 0) { + if (c.buffered_tokens.n == block_tokens) { + @branchHint(.unlikely); // LLVM 21 optimizes this branch as the more likely without + try c.writeBlock(false); + } + + const n = @min(remaining.len, block_tokens - c.buffered_tokens.n, math.maxInt(u15)); + assert(n != 0); + const header: TokenBufferEntryHeader = .{ .kind = .bytes, .data = n }; + c.buffered_tokens.list[c.buffered_tokens.pos..][0..2].* = @bitCast(header); + @memcpy(c.buffered_tokens.list[c.buffered_tokens.pos + 2 ..][0..n], remaining[0..n]); + c.buffered_tokens.pos += @as(u32, 2) + n; + c.buffered_tokens.n += n; + + for (remaining[0..n]) |b| { + c.buffered_tokens.lit_freqs[b] += 1; + } + remaining = remaining[n..]; + } +} + +fn hash(x: u32) Hash { + return @intCast((x *% 0x9E3779B1) >> (32 - lookup_hash_bits)); +} + +/// Trades between speed and compression size. +/// +/// Default paramaters are [taken from zlib] +/// (https://github.com/madler/zlib/blob/v1.3.1/deflate.c#L112) pub const Options = struct { - level: Level = .default, - container: Container = .raw, + /// Perform less lookups when a match of at least this length has been found. + good: u16, + /// Stop when a match of at least this length has been found. + nice: u16, + /// Don't attempt a lazy match find when a match of at least this length has been found. + lazy: u16, + /// Check this many previous locations with the same hash for longer matches. + chain: u16, + + // zig fmt: off + pub const level_1: Options = .{ .good = 4, .nice = 8, .lazy = 0, .chain = 4 }; + pub const level_2: Options = .{ .good = 4, .nice = 16, .lazy = 0, .chain = 8 }; + pub const level_3: Options = .{ .good = 4, .nice = 32, .lazy = 0, .chain = 32 }; + pub const level_4: Options = .{ .good = 4, .nice = 16, .lazy = 4, .chain = 16 }; + pub const level_5: Options = .{ .good = 8, .nice = 32, .lazy = 16, .chain = 32 }; + pub const level_6: Options = .{ .good = 8, .nice = 128, .lazy = 16, .chain = 128 }; + pub const level_7: Options = .{ .good = 8, .nice = 128, .lazy = 32, .chain = 256 }; + pub const level_8: Options = .{ .good = 32, .nice = 258, .lazy = 128, .chain = 1024 }; + pub const level_9: Options = .{ .good = 32, .nice = 258, .lazy = 258, .chain = 4096 }; + // zig fmt: on + pub const fastest = level_1; + pub const default = level_6; + pub const best = level_9; }; -pub fn init(output: *Writer, buffer: []u8, options: Options) Compress { +/// It is asserted `buffer` is least `flate.max_history_len` bytes. +/// It is asserted `output` has a capacity of at least 8 bytes. +pub fn init( + output: *Writer, + buffer: []u8, + container: flate.Container, + opts: Options, +) Writer.Error!Compress { + assert(output.buffer.len > 8); + assert(buffer.len >= flate.max_window_len); + + // note that disallowing some of these simplifies matching logic + assert(opts.chain != 0); // use `Huffman`, disallowing this simplies matching + assert(opts.good >= 3 and opts.nice >= 3); // a match will (usually) not be found + assert(opts.good <= 258 and opts.nice <= 258); // a longer match will not be found + assert(opts.lazy <= opts.nice); // a longer match will (usually) not be found + if (opts.good <= opts.lazy) assert(opts.chain >= 1 << 2); // chain can be reduced to zero + + try output.writeAll(container.header()); return .{ - .block_writer = .init(output), - .level = .get(options.level), - .hasher = .init(options.container), - .state = .header, .writer = .{ .buffer = buffer, - .vtable = &.{ .drain = drain }, + .vtable = &.{ + .drain = drain, + .flush = flush, + .rebase = rebase, + }, + }, + .has_history = false, + .bit_writer = .init(output), + .buffered_tokens = .empty, + .lookup = .{ + // init `value` is max so there is 0xff pattern + .head = @splat(.{ .value = math.maxInt(u15), .is_null = true }), + .chain = undefined, + .chain_pos = math.maxInt(u15), }, + .container = container, + .opts = opts, + .hasher = .init(container), }; } -// Tokens store -const Tokens = struct { - list: [n_tokens]Token = undefined, - pos: usize = 0, +fn drain(w: *Writer, data: []const []const u8, splat: usize) Writer.Error!usize { + errdefer w.* = .failing; + // There may have not been enough space in the buffer and the write was sent directly here. + // However, it is required that all data goes through the buffer to keep a history. + // + // Additionally, ensuring the buffer is always full ensures there is always a full history + // after. + const data_n = w.buffer.len - w.end; + _ = w.fixedDrain(data, splat) catch {}; + assert(w.end == w.buffer.len); + try rebaseInner(w, 0, 1, false); + return data_n; +} + +fn flush(w: *Writer) Writer.Error!void { + defer w.* = .failing; + const c: *Compress = @fieldParentPtr("writer", w); + try rebaseInner(w, 0, w.buffer.len - flate.history_len, true); + try c.bit_writer.output.rebase(0, 1); + c.bit_writer.byteAlign(); + try c.hasher.writeFooter(c.bit_writer.output); +} + +fn rebase(w: *Writer, preserve: usize, capacity: usize) Writer.Error!void { + return rebaseInner(w, preserve, capacity, false); +} + +pub const rebase_min_preserve = flate.history_len; +pub const rebase_reserved_capacity = (token.max_length + 1) + seq_bytes; + +fn rebaseInner(w: *Writer, preserve: usize, capacity: usize, eos: bool) Writer.Error!void { + if (!eos) { + assert(@max(preserve, rebase_min_preserve) + (capacity + rebase_reserved_capacity) <= w.buffer.len); + assert(w.end >= flate.history_len + rebase_reserved_capacity); // Above assert should + // fail since rebase is only called when `capacity` is not present. This assertion is + // important because a full history is required at the end. + } else { + assert(preserve == 0 and capacity == w.buffer.len - flate.history_len); + } + + const c: *Compress = @fieldParentPtr("writer", w); + const buffered = w.buffered(); + + const start = @as(usize, flate.history_len) * @intFromBool(c.has_history); + const lit_end: usize = if (!eos) + buffered.len - rebase_reserved_capacity - (preserve -| flate.history_len) + else + buffered.len -| (seq_bytes - 1); + + var i = start; + var last_unmatched = i; + // Read from `w.buffer` instead of `buffered` since the latter may not + // have enough bytes. If this is the case, this variable is not used. + var seq: Seq = mem.readInt( + std.meta.Int(.unsigned, (seq_bytes - 1) * 8), + w.buffer[i..][0 .. seq_bytes - 1], + .big, + ); + if (buffered[i..].len < seq_bytes - 1) { + @branchHint(.unlikely); + assert(eos); + seq = undefined; + assert(i >= lit_end); + } + + while (i < lit_end) { + var match_start = i; + seq <<= 8; + seq |= buffered[i + (seq_bytes - 1)]; + var match = c.matchAndAddHash(i, hash(seq), token.min_length - 1, c.opts.chain, c.opts.good); + i += 1; + if (match.len < token.min_length) continue; + + var match_unadded = match.len - 1; + lazy: { + if (match.len >= c.opts.lazy) break :lazy; + if (match.len >= c.writer.buffered()[i..].len) { + @branchHint(.unlikely); // Only end of stream + break :lazy; + } - fn add(self: *Tokens, t: Token) void { - self.list[self.pos] = t; - self.pos += 1; + var chain = c.opts.chain; + var good = c.opts.good; + if (match.len >= good) { + chain >>= 2; + good = math.maxInt(u8); // Reduce only once + } + + seq <<= 8; + seq |= buffered[i + (seq_bytes - 1)]; + const lazy = c.matchAndAddHash(i, hash(seq), match.len, chain, good); + match_unadded -= 1; + i += 1; + + if (lazy.len > match.len) { + match_start += 1; + match = lazy; + match_unadded = match.len - 1; + } + } + + assert(i + match_unadded == match_start + match.len); + assert(mem.eql( + u8, + buffered[match_start..][0..match.len], + buffered[match_start - 1 - match.dist ..][0..match.len], + )); // This assert also seems to help codegen. + + try c.outputBytes(buffered[last_unmatched..match_start]); + try c.outputMatch(@intCast(match.dist), @intCast(match.len - 3)); + + last_unmatched = match_start + match.len; + if (last_unmatched + seq_bytes >= w.end) { + @branchHint(.unlikely); + assert(eos); + i = undefined; + break; + } + + while (true) { + seq <<= 8; + seq |= buffered[i + (seq_bytes - 1)]; + _ = c.addHash(i, hash(seq)); + i += 1; + + match_unadded -= 1; + if (match_unadded == 0) break; + } + assert(i == match_start + match.len); } - fn full(self: *Tokens) bool { - return self.pos == self.list.len; + if (eos) { + i = undefined; // (from match hashing logic) + try c.outputBytes(buffered[last_unmatched..]); + c.hasher.update(buffered[start..]); + try c.writeBlock(true); + return; } - fn reset(self: *Tokens) void { - self.pos = 0; + try c.outputBytes(buffered[last_unmatched..i]); + c.hasher.update(buffered[start..i]); + + const preserved = buffered[i - flate.history_len ..]; + assert(preserved.len > @max(rebase_min_preserve, preserve)); + @memmove(w.buffer[0..preserved.len], preserved); + w.end = preserved.len; + c.has_history = true; +} + +fn addHash(c: *Compress, i: usize, h: Hash) void { + assert(h == hash(mem.readInt(Seq, c.writer.buffer[i..][0..seq_bytes], .big))); + + const l = &c.lookup; + l.chain_pos +%= 1; + + // Equivilent to the below, however LLVM 21 does not optimize `@subWithOverflow` well at all. + // const replaced_i, const no_replace = @subWithOverflow(i, flate.history_len); + // if (no_replace == 0) { + if (i >= flate.history_len) { + @branchHint(.likely); + const replaced_i = i - flate.history_len; + // The following is the same as the below except uses a 32-bit load to help optimizations + // const replaced_seq = mem.readInt(Seq, c.writer.buffer[replaced_i..][0..seq_bytes], .big); + comptime assert(@sizeOf(Seq) <= @sizeOf(u32)); + const replaced_u32 = mem.readInt(u32, c.writer.buffered()[replaced_i..][0..4], .big); + const replaced_seq: Seq = @intCast(replaced_u32 >> (32 - @bitSizeOf(Seq))); + + const replaced_h = hash(replaced_seq); + // The following is equivilent to the below since LLVM 21 doesn't optimize it well. + // l.head[replaced_h].is_null = l.head[replaced_h].is_null or + // l.head[replaced_h].int() == l.chain_pos; + const empty_head = l.head[replaced_h].int() == l.chain_pos; + const null_flag = PackedOptionalU15.int(.{ .is_null = empty_head, .value = 0 }); + l.head[replaced_h] = @bitCast(l.head[replaced_h].int() | null_flag); } - fn tokens(self: *Tokens) []const Token { - return self.list[0..self.pos]; + const prev_chain_index = l.head[h]; + l.chain[l.chain_pos] = @bitCast((l.chain_pos -% prev_chain_index.value) | + (prev_chain_index.int() & PackedOptionalU15.null_bit.int())); // Preserves null + l.head[h] = .{ .value = l.chain_pos, .is_null = false }; +} + +/// If the match is shorter, the returned value can be any value `<= old`. +fn betterMatchLen(old: u16, prev: []const u8, bytes: []const u8) u16 { + assert(old < @min(bytes.len, token.max_length)); + assert(prev.len >= bytes.len); + assert(bytes.len >= token.min_length); + + var i: u16 = 0; + const Block = std.meta.Int(.unsigned, @min(math.divCeil( + comptime_int, + math.ceilPowerOfTwoAssert(usize, @bitSizeOf(usize)), + 8, + ) catch unreachable, 256) * 8); + + if (bytes.len < token.max_length) { + @branchHint(.unlikely); // Only end of stream + + while (bytes[i..].len >= @sizeOf(Block)) { + const a = mem.readInt(Block, prev[i..][0..@sizeOf(Block)], .little); + const b = mem.readInt(Block, bytes[i..][0..@sizeOf(Block)], .little); + const diff = a ^ b; + if (diff != 0) { + @branchHint(.likely); + i += @ctz(diff) / 8; + return i; + } + i += @sizeOf(Block); + } + + while (i != bytes.len and prev[i] == bytes[i]) { + i += 1; + } + assert(i < token.max_length); + return i; } -}; -fn drain(me: *Writer, data: []const []const u8, splat: usize) Writer.Error!usize { - _ = data; - _ = splat; - const c: *Compress = @fieldParentPtr("writer", me); - const out = c.block_writer.output; - switch (c.state) { - .header => { - c.state = .middle; - const header = c.hasher.container().header(); - try out.writeAll(header); - return header.len; - }, - .middle => {}, - .ended => unreachable, + if (old >= @sizeOf(Block)) { + // Check that a longer end is present, otherwise the match is always worse + const a = mem.readInt(Block, prev[old + 1 - @sizeOf(Block) ..][0..@sizeOf(Block)], .little); + const b = mem.readInt(Block, bytes[old + 1 - @sizeOf(Block) ..][0..@sizeOf(Block)], .little); + if (a != b) return i; + } + + while (true) { + const a = mem.readInt(Block, prev[i..][0..@sizeOf(Block)], .little); + const b = mem.readInt(Block, bytes[i..][0..@sizeOf(Block)], .little); + const diff = a ^ b; + if (diff != 0) { + i += @ctz(diff) / 8; + return i; + } + i += @sizeOf(Block); + if (i == 256) break; + } + + const a = mem.readInt(u16, prev[i..][0..2], .little); + const b = mem.readInt(u16, bytes[i..][0..2], .little); + const diff = a ^ b; + i += @ctz(diff) / 8; + assert(i <= token.max_length); + return i; +} + +test betterMatchLen { + try std.testing.fuzz({}, testFuzzedMatchLen, .{}); +} + +fn testFuzzedMatchLen(_: void, input: []const u8) !void { + @disableInstrumentation(); + var r: Io.Reader = .fixed(input); + var buf: [1024]u8 = undefined; + var w: Writer = .fixed(&buf); + var old = r.takeLeb128(u9) catch 0; + var bytes_off = @max(1, r.takeLeb128(u10) catch 258); + const prev_back = @max(1, r.takeLeb128(u10) catch 258); + + while (r.takeByte()) |byte| { + const op: packed struct(u8) { + kind: enum(u2) { splat, copy, insert_imm, insert }, + imm: u6, + + pub fn immOrByte(op_s: @This(), r_s: *Io.Reader) usize { + return if (op_s.imm == 0) op_s.imm else @as(usize, r_s.takeByte() catch 0) + 64; + } + } = @bitCast(byte); + (switch (op.kind) { + .splat => w.splatByteAll(r.takeByte() catch 0, op.immOrByte(&r)), + .copy => write: { + const start = w.buffered().len -| op.immOrByte(&r); + const len = @min(w.buffered().len - start, r.takeByte() catch 3); + break :write w.writeAll(w.buffered()[start..][0..len]); + }, + .insert_imm => w.writeByte(op.imm), + .insert => w.writeAll(r.take( + @min(r.bufferedLen(), @as(usize, op.imm) + 1), + ) catch unreachable), + }) catch break; + } else |_| {} + + w.splatByteAll(0, (1 + 3) -| w.buffered().len) catch unreachable; + bytes_off = @min(bytes_off, @as(u10, @intCast(w.buffered().len - 3))); + const prev_off = bytes_off -| prev_back; + assert(prev_off < bytes_off); + const prev = w.buffered()[prev_off..]; + const bytes = w.buffered()[bytes_off..]; + old = @min(old, bytes.len - 1, token.max_length - 1); + + const diff_index = mem.indexOfDiff(u8, prev, bytes).?; // unwrap since lengths are not same + const expected_len = @min(diff_index, 258); + errdefer std.debug.print( + \\prev : '{any}' + \\bytes: '{any}' + \\old : {} + \\expected: {?} + \\actual : {} + ++ "\n", .{ + prev, bytes, old, + if (old < expected_len) expected_len else null, betterMatchLen(old, prev, bytes), + }); + if (old < expected_len) { + try std.testing.expectEqual(expected_len, betterMatchLen(old, prev, bytes)); + } else { + try std.testing.expect(betterMatchLen(old, prev, bytes) <= old); + } +} + +fn matchAndAddHash(c: *Compress, i: usize, h: Hash, gt: u16, max_chain: u16, good_: u16) struct { + dist: u16, + len: u16, +} { + const l = &c.lookup; + const buffered = c.writer.buffered(); + + var chain_limit = max_chain; + var best_dist: u16 = undefined; + var best_len = gt; + const nice = @min(c.opts.nice, buffered[i..].len); + var good = good_; + + search: { + if (l.head[h].is_null) break :search; + // Actually a u15, but LLVM 21 does not optimize that as well (it truncates it each use). + var dist: u16 = l.chain_pos -% l.head[h].value; + while (true) { + chain_limit -= 1; + + const match_len = betterMatchLen(best_len, buffered[i - 1 - dist ..], buffered[i..]); + if (match_len > best_len) { + best_dist = dist; + best_len = match_len; + if (best_len >= nice) break; + if (best_len >= good) { + chain_limit >>= 2; + good = math.maxInt(u8); // Reduce only once + } + } + + if (chain_limit == 0) break; + const next_chain_index = l.chain_pos -% @as(u15, @intCast(dist)); + // Equivilent to the below, however LLVM 21 optimizes the below worse. + // if (l.chain[next_chain_index].is_null) break; + // dist, const out_of_window = @addWithOverflow(dist, l.chain[next_chain_index].value); + // if (out_of_window == 1) break; + dist +%= l.chain[next_chain_index].int(); // wrapping for potential null bit + comptime assert(flate.history_len == PackedOptionalU15.int(.null_bit)); + // Also, doing >= flate.history_len gives worse codegen with LLVM 21. + if ((dist | l.chain[next_chain_index].int()) & flate.history_len != 0) break; + } + } + + c.addHash(i, h); + return .{ .dist = best_dist, .len = best_len }; +} + +fn clenHlen(freqs: [19]u16) u4 { + // Note that the first four codes (16, 17, 18, and 0) are always present. + if (builtin.mode != .ReleaseSmall and (std.simd.suggestVectorLength(u16) orelse 1) >= 8) { + const V = @Vector(16, u16); + const hlen_mul: V = comptime m: { + var hlen_mul: [16]u16 = undefined; + for (token.codegen_order[3..], 0..) |i, hlen| { + hlen_mul[i] = hlen; + } + break :m hlen_mul; + }; + const encoded = freqs[0..16].* != @as(V, @splat(0)); + return @intCast(@reduce(.Max, @intFromBool(encoded) * hlen_mul)); + } else { + var max: u4 = 0; + for (token.codegen_order[4..], 1..) |i, len| { + max = if (freqs[i] == 0) max else @intCast(len); + } + return max; + } +} + +test clenHlen { + var freqs: [19]u16 = @splat(0); + try std.testing.expectEqual(0, clenHlen(freqs)); + for (token.codegen_order, 1..) |i, len| { + freqs[i] = 1; + try std.testing.expectEqual(len -| 4, clenHlen(freqs)); + freqs[i] = 0; + } +} + +/// Returns the number of values followed by the bitsize of the extra bits. +fn buildClen( + dyn_bits: []const u4, + out_values: []u8, + out_extra: []u8, + out_freqs: *[19]u16, +) struct { u16, u16 } { + assert(dyn_bits.len <= out_values.len); + assert(out_values.len == out_extra.len); + + var len: u16 = 0; + var extra_bitsize: u16 = 0; + + var remaining_bits = dyn_bits; + var prev: u4 = 0; + while (true) { + const b = remaining_bits[0]; + const n_max = @min(@as(u8, if (b != 0) + if (b != prev) 1 else 6 + else + 138), remaining_bits.len); + prev = b; + + var n: u8 = 0; + while (true) { + remaining_bits = remaining_bits[1..]; + n += 1; + if (n == n_max or remaining_bits[0] != b) break; + } + const code, const extra, const xsize = switch (n) { + 0 => unreachable, + 1...2 => .{ b, 0, 0 }, + 3...10 => .{ + @as(u8, 16) + @intFromBool(b == 0), + n - 3, + @as(u8, 2) + @intFromBool(b == 0), + }, + 11...138 => .{ 18, n - 11, 7 }, + else => unreachable, + }; + while (true) { + out_values[len] = code; + out_extra[len] = extra; + out_freqs[code] += 1; + extra_bitsize += xsize; + len += 1; + if (n != 2) { + @branchHint(.likely); + break; + } + // Code needs outputted once more + n = 1; + } + if (remaining_bits.len == 0) break; + } + + return .{ len, extra_bitsize }; +} + +test buildClen { + //dyn_bits: []u4, + //out_values: *[288 + 30]u8, + //out_extra: *[288 + 30]u8, + //out_freqs: *[19]u16, + //struct { u16, u16 } + var out_values: [288 + 30]u8 = undefined; + var out_extra: [288 + 30]u8 = undefined; + var out_freqs: [19]u16 = @splat(0); + const len, const extra_bitsize = buildClen(&([_]u4{ + 1, // A + 2, 2, // B + 3, 3, 3, // C + 4, 4, 4, 4, // D + 5, // E + 5, 5, 5, 5, 5, 5, // + 5, 5, 5, 5, 5, 5, + 5, 5, + 0, 1, // F + 0, 0, 1, // G + } ++ @as([138 + 10]u4, @splat(0)) // H + ), &out_values, &out_extra, &out_freqs); + try std.testing.expectEqualSlices(u8, &.{ + 1, // A + 2, 2, // B + 3, 3, 3, // C + 4, 16, // D + 5, 16, 16, 5, 5, // E + 0, 1, // F + 0, 0, 1, // G + 18, 17, // H + }, out_values[0..len]); + try std.testing.expectEqualSlices(u8, &.{ + 0, // A + 0, 0, // B + 0, 0, 0, // C + 0, (0), // D + 0, (3), (3), 0, 0, // E + 0, 0, // F + 0, 0, 0, // G + (127), (7), // H + }, out_extra[0..len]); + try std.testing.expectEqual(2 + 2 + 2 + 7 + 3, extra_bitsize); + try std.testing.expectEqualSlices(u16, &.{ + 3, 3, 2, 3, 1, 3, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 3, 1, 1, + }, &out_freqs); +} + +fn writeBlock(c: *Compress, eos: bool) Writer.Error!void { + const toks = &c.buffered_tokens; + if (!eos) assert(toks.n == block_tokens); + assert(toks.lit_freqs[256] == 0); + toks.lit_freqs[256] = 1; + + var dyn_codes_buf: [286 + 30]u16 = undefined; + var dyn_bits_buf: [286 + 30]u4 = @splat(0); + + const dyn_lit_codes_bitsize, const dyn_last_lit = huffman.build( + &toks.lit_freqs, + dyn_codes_buf[0..286], + dyn_bits_buf[0..286], + 15, + true, + ); + const dyn_lit_len = @max(257, dyn_last_lit + 1); + + const dyn_dist_codes_bitsize, const dyn_last_dist = huffman.build( + &toks.dist_freqs, + dyn_codes_buf[dyn_lit_len..][0..30], + dyn_bits_buf[dyn_lit_len..][0..30], + 15, + true, + ); + const dyn_dist_len = @max(1, dyn_last_dist + 1); + + var clen_values: [288 + 30]u8 = undefined; + var clen_extra: [288 + 30]u8 = undefined; + var clen_freqs: [19]u16 = @splat(0); + const clen_len, const clen_extra_bitsize = buildClen( + dyn_bits_buf[0 .. dyn_lit_len + dyn_dist_len], + &clen_values, + &clen_extra, + &clen_freqs, + ); + + var clen_codes: [19]u16 = undefined; + var clen_bits: [19]u4 = @splat(0); + const clen_codes_bitsize, _ = huffman.build( + &clen_freqs, + &clen_codes, + &clen_bits, + 7, + false, + ); + const hclen = clenHlen(clen_freqs); + + const dynamic_bitsize = @as(u32, 14) + + (4 + @as(u6, hclen)) * 3 + clen_codes_bitsize + clen_extra_bitsize + + dyn_lit_codes_bitsize + dyn_dist_codes_bitsize; + const fixed_bitsize = n: { + const freq7 = 1; // eos + var freq8: u16 = 0; + var freq9: u16 = 0; + var freq12: u16 = 0; // 7 + 5 - match freqs always have corresponding 5-bit dist freq + var freq13: u16 = 0; // 8 + 5 + for (toks.lit_freqs[0..144]) |f| freq8 += f; + for (toks.lit_freqs[144..256]) |f| freq9 += f; + assert(toks.lit_freqs[256] == 1); + for (toks.lit_freqs[257..280]) |f| freq12 += f; + for (toks.lit_freqs[280..286]) |f| freq13 += f; + break :n @as(u32, freq7) * 7 + + @as(u32, freq8) * 8 + @as(u32, freq9) * 9 + + @as(u32, freq12) * 12 + @as(u32, freq13) * 13; + }; + + stored: { + for (toks.dist_freqs) |n| if (n != 0) break :stored; + // No need to check len frequencies since they each have a corresponding dist frequency + assert(for (toks.lit_freqs[257..]) |f| (if (f != 0) break false) else true); + + // No matches. If the stored size is smaller than the huffman-encoded version, it will be + // outputed in a store block. This is not done with matches since the original input would + // need to be stored since the window may slid, and it may also exceed 65535 bytes. This + // should be OK since most inputs with matches should be more compressable anyways. + const stored_align_bits = -%(c.bit_writer.buffered_n +% 3); + const stored_bitsize = stored_align_bits + @as(u32, 32) + @as(u32, toks.n) * 8; + if (@min(dynamic_bitsize, fixed_bitsize) < stored_bitsize) break :stored; + + try c.bit_writer.write(BlockHeader.int(.{ .kind = .stored, .final = eos }), 3); + try c.bit_writer.output.rebase(0, 5); + c.bit_writer.byteAlign(); + c.bit_writer.output.writeInt(u16, c.buffered_tokens.n, .little) catch unreachable; + c.bit_writer.output.writeInt(u16, ~c.buffered_tokens.n, .little) catch unreachable; + + // Relatively small buffer since regular draining will + // always consume slightly less than 2 << 15 bytes. + var vec_buf: [4][]const u8 = undefined; + var vec_n: usize = 0; + var i: usize = 0; + + assert(c.buffered_tokens.pos != 0); + while (i != c.buffered_tokens.pos) { + const h: TokenBufferEntryHeader = @bitCast(toks.list[i..][0..2].*); + assert(h.kind == .bytes); + + i += 2; + vec_buf[vec_n] = toks.list[i..][0..h.data]; + i += h.data; + + vec_n += 1; + if (i == c.buffered_tokens.pos or vec_n == vec_buf.len) { + try c.bit_writer.output.writeVecAll(vec_buf[0..vec_n]); + vec_n = 0; + } + } + + toks.* = .empty; + return; + } + + const lit_codes, const lit_bits, const dist_codes, const dist_bits = + if (dynamic_bitsize < fixed_bitsize) codes: { + try c.bit_writer.write(BlockHeader.Dynamic.int(.{ + .regular = .{ .final = eos, .kind = .dynamic }, + .hlit = @intCast(dyn_lit_len - 257), + .hdist = @intCast(dyn_dist_len - 1), + .hclen = hclen, + }), 17); + try c.bit_writer.writeClen( + hclen, + clen_values[0..clen_len], + clen_extra[0..clen_len], + clen_codes, + clen_bits, + ); + break :codes .{ + dyn_codes_buf[0..dyn_lit_len], + dyn_bits_buf[0..dyn_lit_len], + dyn_codes_buf[dyn_lit_len..][0..dyn_dist_len], + dyn_bits_buf[dyn_lit_len..][0..dyn_dist_len], + }; + } else codes: { + try c.bit_writer.write(BlockHeader.int(.{ .final = eos, .kind = .fixed }), 3); + break :codes .{ + &token.fixed_lit_codes, + &token.fixed_lit_bits, + &token.fixed_dist_codes, + &token.fixed_dist_bits, + }; + }; + + var i: usize = 0; + while (i != toks.pos) { + const h: TokenBufferEntryHeader = @bitCast(toks.list[i..][0..2].*); + i += 2; + if (h.kind == .bytes) { + for (toks.list[i..][0..h.data]) |b| { + try c.bit_writer.write(lit_codes[b], lit_bits[b]); + } + i += h.data; + } else { + const dist = h.data; + const len = toks.list[i]; + i += 1; + const dist_code = token.DistCode.fromVal(dist); + const len_code = token.LenCode.fromVal(len); + const dist_val = dist_code.toInt(); + const lit_val = @as(u16, 257) + len_code.toInt(); + + var out: u48 = lit_codes[lit_val]; + var out_bits: u6 = lit_bits[lit_val]; + out |= @shlExact(@as(u20, len - len_code.base()), @intCast(out_bits)); + out_bits += len_code.extraBits(); + + out |= @shlExact(@as(u35, dist_codes[dist_val]), out_bits); + out_bits += dist_bits[dist_val]; + out |= @shlExact(@as(u48, dist - dist_code.base()), out_bits); + out_bits += dist_code.extraBits(); + + try c.bit_writer.write(out, out_bits); + } + } + try c.bit_writer.write(lit_codes[256], lit_bits[256]); + + toks.* = .empty; +} + +/// Huffman tree construction. +/// +/// The approach for building the huffman tree is [taken from zlib] +/// (https://github.com/madler/zlib/blob/v1.3.1/trees.c#L625) with some modifications. +const huffman = struct { + const max_leafs = 286; + const max_nodes = max_leafs * 2; + + const Node = struct { + freq: u16, + depth: u16, + + pub const Index = u16; + + pub fn smaller(a: Node, b: Node) bool { + return if (a.freq != b.freq) a.freq < b.freq else a.depth < b.depth; + } + }; + + fn heapSiftDown(nodes: []Node, heap: []Node.Index, start: usize) void { + var i = start; + while (true) { + var min = i; + const l = i * 2 + 1; + const r = l + 1; + min = if (l < heap.len and nodes[heap[l]].smaller(nodes[heap[min]])) l else min; + min = if (r < heap.len and nodes[heap[r]].smaller(nodes[heap[min]])) r else min; + if (i == min) break; + mem.swap(Node.Index, &heap[i], &heap[min]); + i = min; + } + } + + fn heapRemoveRoot(nodes: []Node, heap: []Node.Index) void { + heap[0] = heap[heap.len - 1]; + heapSiftDown(nodes, heap[0 .. heap.len - 1], 0); + } + + /// Returns the total bits to encode `freqs` followed by the index of the last non-zero bits. + /// For `freqs[i]` == 0, `out_codes[i]` will be undefined. + /// It is asserted `out_bits` is zero-filled. + /// It is asserted `out_bits.len` is at least a length of + /// one if ncomplete trees are allowed and two otherwise. + pub fn build( + freqs: []const u16, + out_codes: []u16, + out_bits: []u4, + max_bits: u4, + incomplete_allowed: bool, + ) struct { u32, u16 } { + assert(out_codes.len - 1 >= @intFromBool(incomplete_allowed)); + // freqs and out_codes are in the loop to assert they are all the same length + for (freqs, out_codes, out_bits) |_, _, n| assert(n == 0); + assert(out_codes.len <= @as(u16, 1) << max_bits); + + // Indexes 0..freqs are leafs, indexes max_leafs.. are internal nodes. + var tree_nodes: [max_nodes]Node = undefined; + var tree_parent_nodes: [max_nodes]Node.Index = undefined; + var nodes_end: u16 = max_leafs; + // Dual-purpose buffer. Nodes are ordered by least frequency or when equal, least depth. + // The start is a min heap of level-zero nodes. + // The end is a sorted buffer of nodes with the greatest first. + var node_buf: [max_nodes]Node.Index = undefined; + var heap_end: u16 = 0; + var sorted_start: u16 = node_buf.len; + + for (0.., freqs) |n, freq| { + tree_nodes[n] = .{ .freq = freq, .depth = 0 }; + node_buf[heap_end] = @intCast(n); + heap_end += @intFromBool(freq != 0); + } + + // There must be at least one code at minimum, + node_buf[heap_end] = 0; + heap_end += @intFromBool(heap_end == 0); + // and at least two if incomplete must be avoided. + if (heap_end == 1 and incomplete_allowed) { + @branchHint(.unlikely); // LLVM 21 optimizes this branch as the more likely without + + // Codes must have at least one-bit, so this is a special case. + out_bits[node_buf[0]] = 1; + out_codes[node_buf[0]] = 0; + return .{ freqs[node_buf[0]], node_buf[0] }; + } + const last_nonzero = @max(node_buf[heap_end - 1], 1); // For heap_end > 1, last is not be 0 + node_buf[heap_end] = @intFromBool(node_buf[0] == 0); + heap_end += @intFromBool(heap_end == 1); + + // Heapify the array of frequencies + const heapify_final = heap_end - 1; + const heapify_start = (heapify_final - 1) / 2; // Parent of final node + var heapify_i = heapify_start; + while (true) { + heapSiftDown(&tree_nodes, node_buf[0..heap_end], heapify_i); + if (heapify_i == 0) break; + heapify_i -= 1; + } + + // Build optimal tree. `max_bits` is not enforced yet. + while (heap_end > 1) { + const a = node_buf[0]; + heapRemoveRoot(&tree_nodes, node_buf[0..heap_end]); + heap_end -= 1; + const b = node_buf[0]; + + sorted_start -= 2; + node_buf[sorted_start..][0..2].* = .{ b, a }; + + tree_nodes[nodes_end] = .{ + .freq = tree_nodes[a].freq + tree_nodes[b].freq, + .depth = @max(tree_nodes[a].depth, tree_nodes[b].depth) + 1, + }; + defer nodes_end += 1; + tree_parent_nodes[a] = nodes_end; + tree_parent_nodes[b] = nodes_end; + + node_buf[0] = nodes_end; + heapSiftDown(&tree_nodes, node_buf[0..heap_end], 0); + } + sorted_start -= 1; + node_buf[sorted_start] = node_buf[0]; + + var bit_counts: [16]u16 = @splat(0); + buildBits(out_bits, &bit_counts, &tree_parent_nodes, node_buf[sorted_start..], max_bits); + return .{ buildValues(freqs, out_codes, out_bits, bit_counts), last_nonzero }; + } + + fn buildBits( + out_bits: []u4, + bit_counts: *[16]u16, + parent_nodes: *[max_nodes]Node.Index, + sorted: []Node.Index, + max_bits: u4, + ) void { + var internal_node_bits: [max_nodes - max_leafs]u4 = undefined; + var overflowed: u16 = 0; + + internal_node_bits[sorted[0] - max_leafs] = 0; // root + for (sorted[1..]) |i| { + const parent_bits = internal_node_bits[parent_nodes[i] - max_leafs]; + overflowed += @intFromBool(parent_bits == max_bits); + const bits = parent_bits + @intFromBool(parent_bits != max_bits); + bit_counts[bits] += @intFromBool(i < max_leafs); + (if (i >= max_leafs) &internal_node_bits[i - max_leafs] else &out_bits[i]).* = bits; + } + + if (overflowed == 0) { + @branchHint(.likely); + return; + } + + outer: while (true) { + var deepest: u4 = max_bits - 1; + while (bit_counts[deepest] == 0) deepest -= 1; + while (overflowed != 0) { + // Insert an internal node under the leaf and move an overflow as its sibling + bit_counts[deepest] -= 1; + bit_counts[deepest + 1] += 2; + // Only overflow moved. Its sibling's depth is one less, however is still >= depth. + bit_counts[max_bits] -= 1; + overflowed -= 2; + + if (overflowed == 0) break :outer; + deepest += 1; + if (deepest == max_bits) continue :outer; + } + } + + // Reassign bit lengths + assert(bit_counts[0] == 0); + var i: usize = 0; + for (1.., bit_counts[1..]) |bits, all| { + var remaining = all; + while (remaining != 0) { + defer i += 1; + if (sorted[i] >= max_leafs) continue; + out_bits[sorted[i]] = @intCast(bits); + remaining -= 1; + } + } + assert(for (sorted[i..]) |n| { // all leafs consumed + if (n < max_leafs) break false; + } else true); + } + + fn buildValues(freqs: []const u16, out_codes: []u16, bits: []u4, bit_counts: [16]u16) u32 { + var code: u16 = 0; + var base: [16]u16 = undefined; + assert(bit_counts[0] == 0); + for (bit_counts[1..], base[1..]) |c, *b| { + b.* = code; + code +%= c; + code <<= 1; + } + var freq_sums: [16]u16 = @splat(0); + for (out_codes, bits, freqs) |*c, b, f| { + c.* = @bitReverse(base[b]) >> -%b; + base[b] += 1; // For `b == 0` this is fine since v is specified to be undefined. + freq_sums[b] += f; + } + return @reduce(.Add, @as(@Vector(16, u32), freq_sums) * std.simd.iota(u32, 16)); + } + + test build { + var codes: [8]u16 = undefined; + var bits: [8]u4 = undefined; + + const regular_freqs: [8]u16 = .{ 1, 1, 0, 8, 8, 0, 2, 4 }; + // The optimal tree for the above frequencies is + // 4 1 1 + // \ / + // 3 2 # + // \ / + // 2 8 8 4 # + // \ / \ / + // 1 # # + // \ / + // 0 # + bits = @splat(0); + var n, var lnz = build(®ular_freqs, &codes, &bits, 15, true); + codes[2] = 0; + codes[5] = 0; + try std.testing.expectEqualSlices(u4, &.{ 4, 4, 0, 2, 2, 0, 3, 2 }, &bits); + try std.testing.expectEqualSlices(u16, &.{ + 0b0111, 0b1111, 0, 0b00, 0b10, 0, 0b011, 0b01, + }, &codes); + try std.testing.expectEqual(54, n); + try std.testing.expectEqual(7, lnz); + // When constrained to 3 bits, it becomes + // 3 1 1 2 4 + // \ / \ / + // 2 8 8 # # + // \ / \ / + // 1 # # + // \ / + // 0 # + bits = @splat(0); + n, lnz = build(®ular_freqs, &codes, &bits, 3, true); + codes[2] = 0; + codes[5] = 0; + try std.testing.expectEqualSlices(u4, &.{ 3, 3, 0, 2, 2, 0, 3, 3 }, &bits); + try std.testing.expectEqualSlices(u16, &.{ + 0b001, 0b101, 0, 0b00, 0b10, 0, 0b011, 0b111, + }, &codes); + try std.testing.expectEqual(56, n); + try std.testing.expectEqual(7, lnz); + + // Empty tree. At least one code should be present + bits = @splat(0); + n, lnz = build(&.{ 0, 0 }, codes[0..2], bits[0..2], 15, true); + try std.testing.expectEqualSlices(u4, &.{ 1, 0 }, bits[0..2]); + try std.testing.expectEqual(0b0, codes[0]); + try std.testing.expectEqual(0, n); + try std.testing.expectEqual(0, lnz); + + // Check all incompletable frequencies are completed + for ([_][2]u16{ .{ 0, 0 }, .{ 0, 1 }, .{ 1, 0 } }) |incomplete| { + // Empty tree. Both codes should be present to prevent incomplete trees + bits = @splat(0); + n, lnz = build(&incomplete, codes[0..2], bits[0..2], 15, false); + try std.testing.expectEqualSlices(u4, &.{ 1, 1 }, bits[0..2]); + try std.testing.expectEqualSlices(u16, &.{ 0b0, 0b1 }, codes[0..2]); + try std.testing.expectEqual(incomplete[0] + incomplete[1], n); + try std.testing.expectEqual(1, lnz); + } + + try std.testing.fuzz({}, checkFuzzedBuildFreqs, .{}); } - const buffered = me.buffered(); - const min_lookahead = Token.min_length + Token.max_length; - const history_plus_lookahead_len = flate.history_len + min_lookahead; - if (buffered.len < history_plus_lookahead_len) return 0; - const lookahead = buffered[flate.history_len..]; + fn checkFuzzedBuildFreqs(_: void, freqs: []const u8) !void { + @disableInstrumentation(); + var r: Io.Reader = .fixed(freqs); + var freqs_limit: u16 = 65535; + var freqs_buf: [max_leafs]u16 = undefined; + var nfreqs: u15 = 0; + + const params: packed struct(u8) { + max_bits: u4, + _: u3, + incomplete_allowed: bool, + } = @bitCast(r.takeByte() catch 255); + while (nfreqs != freqs_buf.len) { + const leb = r.takeLeb128(u16); + const f = if (leb) |f| @min(f, freqs_limit) else |e| switch (e) { + error.ReadFailed => unreachable, + error.EndOfStream => 0, + error.Overflow => freqs_limit, + }; + freqs_buf[nfreqs] = f; + nfreqs += 1; + freqs_limit -= f; + if (leb == error.EndOfStream and nfreqs - 1 > @intFromBool(params.incomplete_allowed)) + break; + } + + var codes_buf: [max_leafs]u16 = undefined; + var bits_buf: [max_leafs]u4 = @splat(0); + const total_bits, const last_nonzero = build( + freqs_buf[0..nfreqs], + codes_buf[0..nfreqs], + bits_buf[0..nfreqs], + @max(math.log2_int_ceil(u15, nfreqs), params.max_bits), + params.incomplete_allowed, + ); + + var has_bitlen_one: bool = false; + var expected_total_bits: u32 = 0; + var expected_last_nonzero: ?u16 = null; + var weighted_sum: u32 = 0; + for (freqs_buf[0..nfreqs], bits_buf[0..nfreqs], 0..) |f, nb, i| { + has_bitlen_one = has_bitlen_one or nb == 1; + weighted_sum += @shlExact(@as(u16, 1), 15 - nb) & ((1 << 15) - 1); + expected_total_bits += @as(u32, f) * nb; + if (nb != 0) expected_last_nonzero = @intCast(i); + } + + errdefer std.log.err( + \\ params: {} + \\ freqs: {any} + \\ bits: {any} + \\ # freqs: {} + \\ max bits: {} + \\ weighted sum: {} + \\ has_bitlen_one: {} + \\ expected/actual total bits: {}/{} + \\ expected/actual last nonzero: {?}/{} + ++ "\n", .{ + params, + freqs_buf[0..nfreqs], + bits_buf[0..nfreqs], + nfreqs, + @max(math.log2_int_ceil(u15, nfreqs), params.max_bits), + weighted_sum, + has_bitlen_one, + expected_total_bits, + total_bits, + expected_last_nonzero, + last_nonzero, + }); + + try std.testing.expectEqual(expected_total_bits, total_bits); + try std.testing.expectEqual(expected_last_nonzero, last_nonzero); + if (weighted_sum > 1 << 15) + return error.OversubscribedHuffmanTree; + if (weighted_sum < 1 << 15 and + !(params.incomplete_allowed and has_bitlen_one and weighted_sum == 1 << 14)) + return error.IncompleteHuffmanTree; + } +}; - // TODO tokenize - _ = lookahead; - //c.hasher.update(lookahead[0..n]); - @panic("TODO"); +test { + _ = huffman; } -pub fn end(c: *Compress) !void { - try endUnflushed(c); - const out = c.block_writer.output; - try out.flush(); +/// [0] is a gradient where the probability of lower values decreases across it +/// [1] is completely random and hence uncompressable +fn testingFreqBufs() !*[2][65536]u8 { + const fbufs = try std.testing.allocator.create([2][65536]u8); + var prng: std.Random.DefaultPrng = .init(std.testing.random_seed); + prng.random().bytes(&fbufs[0]); + prng.random().bytes(&fbufs[1]); + for (0.., &fbufs[0], fbufs[1]) |i, *grad, rand| { + const prob = @as(u8, @intCast(255 - i / (fbufs[0].len * 256))); + grad.* /= @max(1, rand / @max(1, prob)); + } + return fbufs; } -pub fn endUnflushed(c: *Compress) !void { - while (c.writer.end != 0) _ = try drain(&c.writer, &.{""}, 1); - c.state = .ended; +fn testingCheckDecompressedMatches( + flate_bytes: []const u8, + expected_size: u32, + expected_hash: flate.Container.Hasher, +) !void { + const container: flate.Container = expected_hash; + var data_hash: flate.Container.Hasher = .init(container); + var data_size: u32 = 0; + var flate_r: Io.Reader = .fixed(flate_bytes); + var deflate_buf: [flate.max_window_len]u8 = undefined; + var deflate: flate.Decompress = .init(&flate_r, container, &deflate_buf); - const out = c.block_writer.output; + while (deflate.reader.peekGreedy(1)) |bytes| { + data_size += @intCast(bytes.len); + data_hash.update(bytes); + deflate.reader.toss(bytes.len); + } else |e| switch (e) { + error.ReadFailed => return deflate.err.?, + error.EndOfStream => {}, + } - // TODO flush tokens + try testingCheckContainerHash( + expected_size, + expected_hash, + data_hash, + data_size, + deflate.container_metadata, + ); +} - switch (c.hasher) { - .gzip => |*gzip| { - // GZIP 8 bytes footer - // - 4 bytes, CRC32 (CRC-32) - // - 4 bytes, ISIZE (Input SIZE) - size of the original (uncompressed) input data modulo 2^32 - const footer = try out.writableArray(8); - std.mem.writeInt(u32, footer[0..4], gzip.crc.final(), .little); - std.mem.writeInt(u32, footer[4..8], @truncate(gzip.count), .little); +fn testingCheckContainerHash( + expected_size: u32, + expected_hash: flate.Container.Hasher, + actual_hash: flate.Container.Hasher, + actual_size: u32, + actual_meta: flate.Container.Metadata, +) !void { + try std.testing.expectEqual(expected_size, actual_size); + switch (actual_hash) { + .raw => {}, + .gzip => |gz| { + const expected_crc = expected_hash.gzip.crc.final(); + try std.testing.expectEqual(expected_size, actual_meta.gzip.count); + try std.testing.expectEqual(expected_crc, gz.crc.final()); + try std.testing.expectEqual(expected_crc, actual_meta.gzip.crc); }, - .zlib => |*zlib| { - // ZLIB (RFC 1950) is big-endian, unlike GZIP (RFC 1952). - // 4 bytes of ADLER32 (Adler-32 checksum) - // Checksum value of the uncompressed data (excluding any - // dictionary data) computed according to Adler-32 - // algorithm. - std.mem.writeInt(u32, try out.writableArray(4), zlib.adler, .big); + .zlib => |zl| { + const expected_adler = expected_hash.zlib.adler; + try std.testing.expectEqual(expected_adler, zl.adler); + try std.testing.expectEqual(expected_adler, actual_meta.zlib.adler); }, - .raw => {}, } } -pub const Simple = struct { - /// Note that store blocks are limited to 65535 bytes. - buffer: []u8, - wp: usize, - block_writer: BlockWriter, - hasher: Container.Hasher, - strategy: Strategy, +const PackedContainer = packed struct(u2) { + raw: bool, + other: enum(u1) { gzip, zlib }, + + pub fn val(c: @This()) flate.Container { + return if (c.raw) .raw else switch (c.other) { + .gzip => .gzip, + .zlib => .zlib, + }; + } +}; + +test Compress { + const fbufs = try testingFreqBufs(); + defer if (!builtin.fuzz) std.testing.allocator.destroy(fbufs); + try std.testing.fuzz(fbufs, testFuzzedCompressInput, .{}); +} + +fn testFuzzedCompressInput(fbufs: *const [2][65536]u8, input: []const u8) !void { + var in: Io.Reader = .fixed(input); + var opts: packed struct(u51) { + container: PackedContainer, + buf_size: u16, + good: u8, + nice: u8, + lazy: u8, + /// Not a `u16` to limit it for performance + chain: u9, + } = @bitCast(in.takeLeb128(u51) catch 0); + var expected_hash: flate.Container.Hasher = .init(opts.container.val()); + var expected_size: u32 = 0; + + var flate_buf: [128 * 1024]u8 = undefined; + var flate_w: Writer = .fixed(&flate_buf); + var deflate_buf: [flate.max_window_len * 2]u8 = undefined; + var deflate_w = try Compress.init( + &flate_w, + deflate_buf[0 .. flate.max_window_len + @as(usize, opts.buf_size)], + opts.container.val(), + .{ + .good = @as(u16, opts.good) + 3, + .nice = @as(u16, opts.nice) + 3, + .lazy = @as(u16, @min(opts.lazy, opts.nice)) + 3, + .chain = @max(1, opts.chain, @as(u8, 4) * @intFromBool(opts.good <= opts.lazy)), + }, + ); + + // It is ensured that more bytes are not written then this to ensure this run + // does not take too long and that `flate_buf` does not run out of space. + const flate_buf_blocks = flate_buf.len / block_tokens; + // Allow a max overhead of 64 bytes per block since the implementation does not gaurauntee it + // writes store blocks when optimal. This comes from taking less than 32 bytes to write an + // optimal dynamic block header of mostly bitlen 8 codes and the end of block literal plus + // `(65536 / 256) / 8`, which is is the maximum number of extra bytes from bitlen 9 codes. An + // extra 32 bytes is reserved on top of that for container headers and footers. + const max_size = flate_buf.len - (flate_buf_blocks * 64 + 32); + + while (true) { + const data: packed struct(u36) { + is_rebase: bool, + is_bytes: bool, + params: packed union { + copy: packed struct(u34) { + len_lo: u5, + dist: u15, + len_hi: u4, + _: u10, + }, + bytes: packed struct(u34) { + kind: enum(u1) { gradient, random }, + off_hi: u4, + len_lo: u10, + off_mi: u4, + len_hi: u5, + off_lo: u8, + _: u2, + }, + rebase: packed struct(u34) { + preserve: u17, + capacity: u17, + }, + }, + } = @bitCast(in.takeLeb128(u36) catch |e| switch (e) { + error.ReadFailed => unreachable, + error.Overflow => 0, + error.EndOfStream => break, + }); + + const buffered = deflate_w.writer.buffered(); + // Required for repeating patterns and since writing from `buffered` is illegal + var copy_buf: [512]u8 = undefined; + + if (data.is_rebase) { + const usable_capacity = deflate_w.writer.buffer.len - rebase_reserved_capacity; + const preserve = @min(data.params.rebase.preserve, usable_capacity); + const capacity = @min(data.params.rebase.capacity, usable_capacity - + @max(rebase_min_preserve, preserve)); + try deflate_w.writer.rebase(preserve, capacity); + continue; + } + + const max_bytes = max_size -| expected_size; + const bytes = if (!data.is_bytes and buffered.len != 0) bytes: { + const dist = @min(buffered.len, @as(u32, data.params.copy.dist) + 1); + const len = @min( + @max(@shlExact(@as(u9, data.params.copy.len_hi), 5) | data.params.copy.len_lo, 1), + max_bytes, + ); + // Reuse the implementation's history. Otherwise our own would need maintained. + const bytes_start = buffered[buffered.len - dist ..]; + const history_bytes = bytes_start[0..@min(bytes_start.len, len)]; + + @memcpy(copy_buf[0..history_bytes.len], history_bytes); + const new_history = len - history_bytes.len; + if (history_bytes.len != len) for ( // check needed for `- dist` + copy_buf[history_bytes.len..][0..new_history], + copy_buf[history_bytes.len - dist ..][0..new_history], + ) |*next, prev| { + next.* = prev; + }; + break :bytes copy_buf[0..len]; + } else bytes: { + const off = @shlExact(@as(u16, data.params.bytes.off_hi), 12) | + @shlExact(@as(u16, data.params.bytes.off_mi), 8) | + data.params.bytes.off_lo; + const len = @shlExact(@as(u16, data.params.bytes.len_hi), 10) | + data.params.bytes.len_lo; + const fbuf = &fbufs[@intFromEnum(data.params.bytes.kind)]; + break :bytes fbuf[off..][0..@min(len, fbuf.len - off, max_bytes)]; + }; + assert(bytes.len <= max_bytes); + try deflate_w.writer.writeAll(bytes); + expected_hash.update(bytes); + expected_size += @intCast(bytes.len); + } + + try deflate_w.writer.flush(); + try testingCheckDecompressedMatches(flate_w.buffered(), expected_size, expected_hash); +} + +/// Does not compress data +pub const Raw = struct { + /// After `flush` is called, all vtable calls with result in `error.WriteFailed.` + writer: Writer, + output: *Writer, + hasher: flate.Container.Hasher, - pub const Strategy = enum { huffman, store }; + const max_block_size: u16 = 65535; + const full_header: [5]u8 = .{ + BlockHeader.int(.{ .final = false, .kind = .stored }), + 255, + 255, + 0, + 0, + }; - pub fn init(output: *Writer, buffer: []u8, container: Container, strategy: Strategy) !Simple { - const header = container.header(); - try output.writeAll(header); + /// While there is no minimum buffer size, it is recommended + /// to be at least `flate.max_window_len` for optimal output. + pub fn init(output: *Writer, buffer: []u8, container: flate.Container) Writer.Error!Raw { + try output.writeAll(container.header()); return .{ - .buffer = buffer, - .wp = 0, - .block_writer = .init(output), + .writer = .{ + .buffer = buffer, + .vtable = &.{ + .drain = Raw.drain, + .flush = Raw.flush, + .rebase = Raw.rebase, + }, + }, + .output = output, .hasher = .init(container), - .strategy = strategy, }; } - pub fn flush(self: *Simple) !void { - try self.flushBuffer(false); - try self.block_writer.storedBlock("", false); - try self.block_writer.flush(); + fn drain(w: *Writer, data: []const []const u8, splat: usize) Writer.Error!usize { + errdefer w.* = .failing; + const r: *Raw = @fieldParentPtr("writer", w); + const min_block = @min(w.buffer.len, max_block_size); + const pattern = data[data.len - 1]; + var partial_header: [5]u8 = undefined; + + var vecs: [16][]const u8 = undefined; + var vecs_n: usize = 0; + const data_bytes = Writer.countSplat(data, splat); + const total_bytes = w.end + data_bytes; + var rem_bytes = total_bytes; + var rem_splat = splat; + var rem_data = data; + var rem_data_elem: []const u8 = w.buffered(); + + assert(rem_bytes > min_block); + while (rem_bytes > min_block) { // not >= to allow `min_block` blocks to be marked as final + // also, it handles the case of `min_block` being zero (no buffer) + const block_size: u16 = @min(rem_bytes, max_block_size); + rem_bytes -= block_size; + + if (vecs_n == vecs.len) { + try r.output.writeVecAll(&vecs); + vecs_n = 0; + } + vecs[vecs_n] = if (block_size == 65535) + &full_header + else header: { + partial_header[0] = BlockHeader.int(.{ .final = false, .kind = .stored }); + mem.writeInt(u16, partial_header[1..3], block_size, .little); + mem.writeInt(u16, partial_header[3..5], ~block_size, .little); + break :header &partial_header; + }; + vecs_n += 1; + + var block_limit: Io.Limit = .limited(block_size); + while (true) { + if (vecs_n == vecs.len) { + try r.output.writeVecAll(&vecs); + vecs_n = 0; + } + + const vec = block_limit.sliceConst(rem_data_elem); + vecs[vecs_n] = vec; + vecs_n += 1; + r.hasher.update(vec); + + const is_pattern = rem_splat != splat and vec.len == pattern.len; + if (is_pattern) assert(pattern.len != 0); // exceeded countSplat + + if (!is_pattern or rem_splat == 0 or pattern.len > @intFromEnum(block_limit) / 2) { + rem_data_elem = rem_data_elem[vec.len..]; + block_limit = block_limit.subtract(vec.len).?; + + if (rem_data_elem.len == 0) { + rem_data_elem = rem_data[0]; + if (rem_data.len != 1) { + rem_data = rem_data[1..]; + } else if (rem_splat != 0) { + rem_splat -= 1; + } else { + // All of `data` has been consumed. + assert(block_limit == .nothing); + assert(rem_bytes == 0); + // Since `rem_bytes` and `block_limit` are zero, these won't be used. + rem_data = undefined; + rem_data_elem = undefined; + rem_splat = undefined; + } + } + if (block_limit == .nothing) break; + } else { + const out_splat = @intFromEnum(block_limit) / pattern.len; + assert(out_splat >= 2); + + try r.output.writeSplatAll(vecs[0..vecs_n], out_splat); + for (1..out_splat) |_| r.hasher.update(vec); + + vecs_n = 0; + block_limit = block_limit.subtract(pattern.len * out_splat).?; + if (rem_splat >= out_splat) { + // `out_splat` contains `rem_data`, however one more needs subtracted + // anyways since the next pattern is also being taken. + rem_splat -= out_splat; + } else { + // All of `data` has been consumed. + assert(block_limit == .nothing); + assert(rem_bytes == 0); + // Since `rem_bytes` and `block_limit` are zero, these won't be used. + rem_data = undefined; + rem_data_elem = undefined; + rem_splat = undefined; + } + if (block_limit == .nothing) break; + } + } + } + + if (vecs_n != 0) { // can be the case if a splat was sent + try r.output.writeVecAll(vecs[0..vecs_n]); + } + + if (rem_bytes > data_bytes) { + assert(rem_bytes - data_bytes == rem_data_elem.len); + assert(&rem_data_elem[0] == &w.buffer[total_bytes - rem_bytes]); + } + return w.consume(total_bytes - rem_bytes); + } + + fn flush(w: *Writer) Writer.Error!void { + defer w.* = .failing; + try Raw.rebaseInner(w, 0, w.buffer.len, true); } - pub fn finish(self: *Simple) !void { - try self.flushBuffer(true); - try self.block_writer.flush(); - try self.hasher.container().writeFooter(&self.hasher, self.block_writer.output); + fn rebase(w: *Writer, preserve: usize, capacity: usize) Writer.Error!void { + errdefer w.* = .failing; + try Raw.rebaseInner(w, preserve, capacity, false); } - fn flushBuffer(self: *Simple, final: bool) !void { - const buf = self.buffer[0..self.wp]; - switch (self.strategy) { - .huffman => try self.block_writer.huffmanBlock(buf, final), - .store => try self.block_writer.storedBlock(buf, final), + fn rebaseInner(w: *Writer, preserve: usize, capacity: usize, eos: bool) Writer.Error!void { + const r: *Raw = @fieldParentPtr("writer", w); + assert(preserve + capacity <= w.buffer.len); + if (eos) assert(capacity == w.buffer.len); + + var partial_header: [5]u8 = undefined; + var footer_buf: [8]u8 = undefined; + const preserved = @min(w.end, preserve); + var remaining = w.buffer[0 .. w.end - preserved]; + + var vecs: [16][]const u8 = undefined; + var vecs_n: usize = 0; + while (remaining.len > max_block_size) { // not >= so there is always a block down below + if (vecs_n == vecs.len) { + try r.output.writeVecAll(&vecs); + vecs_n = 0; + } + vecs[vecs_n + 0] = &full_header; + vecs[vecs_n + 1] = remaining[0..max_block_size]; + r.hasher.update(vecs[vecs_n + 1]); + vecs_n += 2; + remaining = remaining[max_block_size..]; + } + + // eos check required for empty block + if (w.buffer.len - (remaining.len + preserved) < capacity or eos) { + // A partial write is necessary to reclaim enough buffer space + const block_size: u16 = @intCast(remaining.len); + partial_header[0] = BlockHeader.int(.{ .final = eos, .kind = .stored }); + mem.writeInt(u16, partial_header[1..3], block_size, .little); + mem.writeInt(u16, partial_header[3..5], ~block_size, .little); + + if (vecs_n == vecs.len) { + try r.output.writeVecAll(&vecs); + vecs_n = 0; + } + vecs[vecs_n + 0] = &partial_header; + vecs[vecs_n + 1] = remaining[0..block_size]; + r.hasher.update(vecs[vecs_n + 1]); + vecs_n += 2; + remaining = remaining[block_size..]; + assert(remaining.len == 0); + + if (eos and r.hasher != .raw) { + // the footer is done here instead of `flush` so it can be included in the vector + var footer_w: Writer = .fixed(&footer_buf); + r.hasher.writeFooter(&footer_w) catch unreachable; + assert(footer_w.end != 0); + + if (vecs_n == vecs.len) { + try r.output.writeVecAll(&vecs); + return r.output.writeAll(footer_w.buffered()); + } else { + vecs[vecs_n] = footer_w.buffered(); + vecs_n += 1; + } + } } - self.wp = 0; + + try r.output.writeVecAll(vecs[0..vecs_n]); + _ = w.consume(w.end - preserved - remaining.len); } }; -test "generate a Huffman code from an array of frequencies" { - var freqs: [19]u16 = [_]u16{ - 8, // 0 - 1, // 1 - 1, // 2 - 2, // 3 - 5, // 4 - 10, // 5 - 9, // 6 - 1, // 7 - 0, // 8 - 0, // 9 - 0, // 10 - 0, // 11 - 0, // 12 - 0, // 13 - 0, // 14 - 0, // 15 - 1, // 16 - 3, // 17 - 5, // 18 +test Raw { + const data_buf = try std.testing.allocator.create([4 * 65536]u8); + defer if (!builtin.fuzz) std.testing.allocator.destroy(data_buf); + var prng: std.Random.DefaultPrng = .init(std.testing.random_seed); + prng.random().bytes(data_buf); + try std.testing.fuzz(data_buf, testFuzzedRawInput, .{}); +} + +fn countVec(data: []const []const u8) usize { + var bytes: usize = 0; + for (data) |d| bytes += d.len; + return bytes; +} + +fn testFuzzedRawInput(data_buf: *const [4 * 65536]u8, input: []const u8) !void { + const HashedStoreWriter = struct { + writer: Writer, + state: enum { + header, + block_header, + block_body, + final_block_body, + footer, + end, + }, + block_remaining: u16, + container: flate.Container, + data_hash: flate.Container.Hasher, + data_size: usize, + footer_hash: u32, + footer_size: u32, + + pub fn init(buf: []u8, container: flate.Container) @This() { + return .{ + .writer = .{ + .vtable = &.{ + .drain = @This().drain, + .flush = @This().flush, + }, + .buffer = buf, + }, + .state = .header, + .block_remaining = 0, + .container = container, + .data_hash = .init(container), + .data_size = 0, + .footer_hash = undefined, + .footer_size = undefined, + }; + } + + /// Note that this implementation is somewhat dependent on the implementation of + /// `Raw` by expecting headers / footers to be continous in data elements. It + /// also expects the header to be the same as `flate.Container.header` and not + /// for multiple streams to be concatenated. + fn drain(w: *Writer, data: []const []const u8, splat: usize) Writer.Error!usize { + errdefer w.* = .failing; + var h: *@This() = @fieldParentPtr("writer", w); + + var rem_splat = splat; + var rem_data = data; + var rem_data_elem: []const u8 = w.buffered(); + + data_loop: while (true) { + const wanted = switch (h.state) { + .header => h.container.headerSize(), + .block_header => 5, + .block_body, .final_block_body => h.block_remaining, + .footer => h.container.footerSize(), + .end => 1, + }; + + if (wanted != 0) { + while (rem_data_elem.len == 0) { + rem_data_elem = rem_data[0]; + if (rem_data.len != 1) { + rem_data = rem_data[1..]; + } else { + if (rem_splat == 0) { + break :data_loop; + } else { + rem_splat -= 1; + } + } + } + } + + const bytes = Io.Limit.limited(wanted).sliceConst(rem_data_elem); + rem_data_elem = rem_data_elem[bytes.len..]; + + switch (h.state) { + .header => { + if (bytes.len < wanted) + return error.WriteFailed; // header eos + if (!mem.eql(u8, bytes, h.container.header())) + return error.WriteFailed; // wrong header + h.state = .block_header; + }, + .block_header => { + if (bytes.len < wanted) + return error.WriteFailed; // store block header eos + const header: BlockHeader = @bitCast(@as(u3, @truncate(bytes[0]))); + if (header.kind != .stored) + return error.WriteFailed; // non-store block + const len = mem.readInt(u16, bytes[1..3], .little); + const nlen = mem.readInt(u16, bytes[3..5], .little); + if (nlen != ~len) + return error.WriteFailed; // wrong nlen + h.block_remaining = len; + h.state = if (!header.final) .block_body else .final_block_body; + }, + .block_body, .final_block_body => { + h.data_hash.update(bytes); + h.data_size += bytes.len; + h.block_remaining -= @intCast(bytes.len); + if (h.block_remaining == 0) { + h.state = if (h.state != .final_block_body) .block_header else .footer; + } + }, + .footer => { + if (bytes.len < wanted) + return error.WriteFailed; // footer eos + switch (h.container) { + .raw => {}, + .gzip => { + h.footer_hash = mem.readInt(u32, bytes[0..4], .little); + h.footer_size = mem.readInt(u32, bytes[4..8], .little); + }, + .zlib => { + h.footer_hash = mem.readInt(u32, bytes[0..4], .big); + }, + } + h.state = .end; + }, + .end => return error.WriteFailed, // data past end + } + } + + w.end = 0; + return Writer.countSplat(data, splat); + } + + fn flush(w: *Writer) Writer.Error!void { + defer w.* = .failing; // Clears buffer even if state hasn't reached `end` + _ = try @This().drain(w, &.{""}, 0); + } }; - var codes: [19]HuffmanEncoder.Code = undefined; - var enc: HuffmanEncoder = .{ - .codes = &codes, - .freq_cache = undefined, - .bit_count = undefined, - .lns = undefined, - .lfs = undefined, + var in: Io.Reader = .fixed(input); + const opts: packed struct(u19) { + container: PackedContainer, + buf_len: u17, + } = @bitCast(in.takeLeb128(u19) catch 0); + var output: HashedStoreWriter = .init(&.{}, opts.container.val()); + var r_buf: [2 * 65536]u8 = undefined; + var r: Raw = try .init( + &output.writer, + r_buf[0 .. opts.buf_len +% flate.max_window_len], + opts.container.val(), + ); + + var data_base: u18 = 0; + var expected_hash: flate.Container.Hasher = .init(opts.container.val()); + var expected_size: u32 = 0; + var vecs: [32][]const u8 = undefined; + var vecs_n: usize = 0; + + while (in.seek != in.end) { + const VecInfo = packed struct(u58) { + output: bool, + /// If set, `data_len` and `splat` are reinterpreted as `capacity` + /// and `preserve_len` respectively and `output` is treated as set. + rebase: bool, + block_aligning_len: bool, + block_aligning_splat: bool, + data_len: u18, + splat: u18, + data_off: u18, + }; + var vec_info: VecInfo = @bitCast(in.takeLeb128(u58) catch |e| switch (e) { + error.ReadFailed => unreachable, + error.Overflow, error.EndOfStream => 0, + }); + + { + const buffered = r.writer.buffered().len + countVec(vecs[0..vecs_n]); + const to_align = mem.alignForwardAnyAlign(usize, buffered, Raw.max_block_size) - buffered; + assert((buffered + to_align) % Raw.max_block_size == 0); + + if (vec_info.block_aligning_len) { + vec_info.data_len = @intCast(to_align); + } else if (vec_info.block_aligning_splat and vec_info.data_len != 0 and + to_align % vec_info.data_len == 0) + { + vec_info.splat = @divExact(@as(u18, @intCast(to_align)), vec_info.data_len) -% 1; + } + } + + var splat = if (vec_info.output and !vec_info.rebase) vec_info.splat +% 1 else 1; + add_vec: { + if (vec_info.rebase) break :add_vec; + if (expected_size +| math.mulWide(u18, vec_info.data_len, splat) > + 10 * (1 << 16)) + { + // Skip this vector to avoid this test taking too long. + // 10 maximum sized blocks is choosen as the limit since it is two more + // than the maximum the implementation can output in one drain. + splat = 1; + break :add_vec; + } + + vecs[vecs_n] = data_buf[@min( + data_base +% vec_info.data_off, + data_buf.len - vec_info.data_len, + )..][0..vec_info.data_len]; + + data_base +%= vec_info.data_len +% 3; // extra 3 to help catch aliasing bugs + + for (0..splat) |_| expected_hash.update(vecs[vecs_n]); + expected_size += @as(u32, @intCast(vecs[vecs_n].len)) * splat; + vecs_n += 1; + } + + const want_drain = vecs_n == vecs.len or vec_info.output or vec_info.rebase or + in.seek == in.end; + if (want_drain and vecs_n != 0) { + try r.writer.writeSplatAll(vecs[0..vecs_n], splat); + vecs_n = 0; + } else assert(splat == 1); + + if (vec_info.rebase) { + try r.writer.rebase(vec_info.data_len, @min( + r.writer.buffer.len -| vec_info.data_len, + vec_info.splat, + )); + } + } + + try r.writer.flush(); + try output.writer.flush(); + + try std.testing.expectEqual(.end, output.state); + try std.testing.expectEqual(expected_size, output.data_size); + switch (output.data_hash) { + .raw => {}, + .gzip => |gz| { + const expected_crc = expected_hash.gzip.crc.final(); + try std.testing.expectEqual(expected_crc, gz.crc.final()); + try std.testing.expectEqual(expected_crc, output.footer_hash); + try std.testing.expectEqual(expected_size, output.footer_size); + }, + .zlib => |zl| { + const expected_adler = expected_hash.zlib.adler; + try std.testing.expectEqual(expected_adler, zl.adler); + try std.testing.expectEqual(expected_adler, output.footer_hash); + }, + } +} + +/// Only performs huffman compression on data, does no matching. +pub const Huffman = struct { + writer: Writer, + bit_writer: BitWriter, + hasher: flate.Container.Hasher, + + const max_tokens: u16 = 65535 - 1; // one is reserved for EOF + + /// While there is no minimum buffer size, it is recommended + /// to be at least `flate.max_window_len` to improve compression. + /// + /// It is asserted `output` has a capacity of at least 8 bytes. + pub fn init(output: *Writer, buffer: []u8, container: flate.Container) Writer.Error!Huffman { + assert(output.buffer.len > 8); + + try output.writeAll(container.header()); + return .{ + .writer = .{ + .buffer = buffer, + .vtable = &.{ + .drain = Huffman.drain, + .flush = Huffman.flush, + .rebase = Huffman.rebase, + }, + }, + .bit_writer = .init(output), + .hasher = .init(container), + }; + } + + fn drain(w: *Writer, data: []const []const u8, splat: usize) Writer.Error!usize { + { + //std.debug.print("drain {} (buffered)", .{w.buffered().len}); + //for (data) |d| std.debug.print("\n\t+ {}", .{d.len}); + //std.debug.print(" x {}\n\n", .{splat}); + } + + const h: *Huffman = @fieldParentPtr("writer", w); + const min_block = @min(w.buffer.len, max_tokens); + const pattern = data[data.len - 1]; + + const data_bytes = Writer.countSplat(data, splat); + const total_bytes = w.end + data_bytes; + var rem_bytes = total_bytes; + var rem_splat = splat; + var rem_data = data; + var rem_data_elem: []const u8 = w.buffered(); + + assert(rem_bytes > min_block); + while (rem_bytes > min_block) { // not >= to allow `min_block` blocks to be marked as final + // also, it handles the case of `min_block` being zero (no buffer) + const block_size: u16 = @min(rem_bytes, max_tokens); + rem_bytes -= block_size; + + // Count frequencies + comptime assert(max_tokens != 65535); + var freqs: [257]u16 = @splat(0); + freqs[256] = 1; + + const start_splat = rem_splat; + const start_data = rem_data; + const start_data_elem = rem_data_elem; + + var block_limit: Io.Limit = .limited(block_size); + while (true) { + const bytes = block_limit.sliceConst(rem_data_elem); + const is_pattern = rem_splat != splat and bytes.len == pattern.len; + + const mul = if (!is_pattern) 1 else @intFromEnum(block_limit) / pattern.len; + assert(mul != 0); + if (is_pattern) assert(mul <= rem_splat + 1); // one more for `rem_data` + + for (bytes) |b| freqs[b] += @intCast(mul); + rem_data_elem = rem_data_elem[bytes.len..]; + block_limit = block_limit.subtract(bytes.len * mul).?; + + if (rem_data_elem.len == 0) { + rem_data_elem = rem_data[0]; + if (rem_data.len != 1) { + rem_data = rem_data[1..]; + } else if (rem_splat >= mul) { + // if the counter was not the pattern, `mul` is always one, otherwise, + // `mul` contains `rem_data`, however one more needs subtracted anyways + // since the next pattern is also being taken. + rem_splat -= mul; + } else { + // All of `data` has been consumed. + assert(block_limit == .nothing); + assert(rem_bytes == 0); + // Since `rem_bytes` and `block_limit` are zero, these won't be used. + rem_data = undefined; + rem_data_elem = undefined; + rem_splat = undefined; + } + } + if (block_limit == .nothing) break; + } + + // Output block + rem_splat = start_splat; + rem_data = start_data; + rem_data_elem = start_data_elem; + block_limit = .limited(block_size); + + var codes_buf: CodesBuf = .init; + if (try h.outputHeader(&freqs, &codes_buf, block_size, false)) |table| { + while (true) { + const bytes = block_limit.sliceConst(rem_data_elem); + rem_data_elem = rem_data_elem[bytes.len..]; + block_limit = block_limit.subtract(bytes.len).?; + + h.hasher.update(bytes); + for (bytes) |b| { + try h.bit_writer.write(table.codes[b], table.bits[b]); + } + + if (rem_data_elem.len == 0) { + rem_data_elem = rem_data[0]; + if (rem_data.len != 1) { + rem_data = rem_data[1..]; + } else if (rem_splat != 0) { + rem_splat -= 1; + } else { + // All of `data` has been consumed. + assert(block_limit == .nothing); + assert(rem_bytes == 0); + // Since `rem_bytes` and `block_limit` are zero, these won't be used. + rem_data = undefined; + rem_data_elem = undefined; + rem_splat = undefined; + } + } + if (block_limit == .nothing) break; + } + try h.bit_writer.write(table.codes[256], table.bits[256]); + } else while (true) { + // Store block + + // Write data that is not a full vector element + const in_pattern = rem_splat != splat; + const vec_elem_i, const in_data = + @subWithOverflow(data.len - (rem_data.len - @intFromBool(in_pattern)), 1); + const is_elem = in_data == 0 and data[vec_elem_i].len == rem_data_elem.len; + + if (!is_elem or rem_data_elem.len > @intFromEnum(block_limit)) { + block_limit = block_limit.subtract(rem_data_elem.len) orelse { + try h.bit_writer.output.writeAll(rem_data_elem[0..@intFromEnum(block_limit)]); + h.hasher.update(rem_data_elem[0..@intFromEnum(block_limit)]); + rem_data_elem = rem_data_elem[@intFromEnum(block_limit)..]; + assert(rem_data_elem.len != 0); + break; + }; + try h.bit_writer.output.writeAll(rem_data_elem); + h.hasher.update(rem_data_elem); + } else { + // Put `rem_data_elem` back in `rem_data` + if (!in_pattern) { + rem_data = data[vec_elem_i..]; + } else { + rem_splat += 1; + } + } + rem_data_elem = undefined; // it is always updated below + + // Send through as much of the original vector as possible + var vec_n: usize = 0; + var vlimit = block_limit; + const vec_splat = while (rem_data[vec_n..].len != 1) { + vlimit = vlimit.subtract(rem_data[vec_n].len) orelse break 1; + vec_n += 1; + } else vec_splat: { + // For `pattern.len == 0`, the value of `vec_splat` does not matter. + const vec_splat = @intFromEnum(vlimit) / @max(1, pattern.len); + if (pattern.len != 0) assert(vec_splat <= rem_splat + 1); + vlimit = vlimit.subtract(pattern.len * vec_splat).?; + vec_n += 1; + break :vec_splat vec_splat; + }; + + const n = if (vec_n != 0) n: { + assert(@intFromEnum(block_limit) - @intFromEnum(vlimit) == + Writer.countSplat(rem_data[0..vec_n], vec_splat)); + break :n try h.bit_writer.output.writeSplat(rem_data[0..vec_n], vec_splat); + } else 0; // Still go into the case below to advance the vector + block_limit = block_limit.subtract(n).?; + var consumed: Io.Limit = .limited(n); + + while (rem_data.len != 1) { + const elem = rem_data[0]; + rem_data = rem_data[1..]; + consumed = consumed.subtract(elem.len) orelse { + h.hasher.update(elem[0..@intFromEnum(consumed)]); + rem_data_elem = elem[@intFromEnum(consumed)..]; + break; + }; + h.hasher.update(elem); + } else { + if (pattern.len == 0) { + // All of `data` has been consumed. However, the general + // case below does not work since it divides by zero. + assert(consumed == .nothing); + assert(block_limit == .nothing); + assert(rem_bytes == 0); + // Since `rem_bytes` and `block_limit` are zero, these won't be used. + rem_splat = undefined; + rem_data = undefined; + rem_data_elem = undefined; + break; + } + + const splatted = @intFromEnum(consumed) / pattern.len; + const partial = @intFromEnum(consumed) % pattern.len; + for (0..splatted) |_| h.hasher.update(pattern); + h.hasher.update(pattern[0..partial]); + + const taken_splat = splatted + 1; + if (rem_splat >= taken_splat) { + rem_splat -= taken_splat; + rem_data_elem = pattern[partial..]; + } else { + // All of `data` has been consumed. + assert(partial == 0); + assert(block_limit == .nothing); + assert(rem_bytes == 0); + // Since `rem_bytes` and `block_limit` are zero, these won't be used. + rem_data = undefined; + rem_data_elem = undefined; + rem_splat = undefined; + } + } + + if (block_limit == .nothing) break; + } + } + + if (rem_bytes > data_bytes) { + assert(rem_bytes - data_bytes == rem_data_elem.len); + assert(&rem_data_elem[0] == &w.buffer[total_bytes - rem_bytes]); + } + return w.consume(total_bytes - rem_bytes); + } + + fn flush(w: *Writer) Writer.Error!void { + defer w.* = .failing; + const h: *Huffman = @fieldParentPtr("writer", w); + try Huffman.rebaseInner(w, 0, w.buffer.len, true); + try h.bit_writer.output.rebase(0, 1); + h.bit_writer.byteAlign(); + try h.hasher.writeFooter(h.bit_writer.output); + } + + fn rebase(w: *Writer, preserve: usize, capacity: usize) Writer.Error!void { + errdefer w.* = .failing; + try Huffman.rebaseInner(w, preserve, capacity, false); + } + + fn rebaseInner(w: *Writer, preserve: usize, capacity: usize, eos: bool) Writer.Error!void { + const h: *Huffman = @fieldParentPtr("writer", w); + assert(preserve + capacity <= w.buffer.len); + if (eos) assert(capacity == w.buffer.len); + + const preserved = @min(w.end, preserve); + var remaining = w.buffer[0 .. w.end - preserved]; + while (remaining.len > max_tokens) { // not >= so there is always a block down below + const bytes = remaining[0..max_tokens]; + remaining = remaining[max_tokens..]; + try h.outputBytes(bytes, false); + } + + // eos check required for empty block + if (w.buffer.len - (remaining.len + preserved) < capacity or eos) { + const bytes = remaining; + remaining = &.{}; + try h.outputBytes(bytes, eos); + } + + _ = w.consume(w.end - preserved - remaining.len); + } + + fn outputBytes(h: *Huffman, bytes: []const u8, eos: bool) Writer.Error!void { + comptime assert(max_tokens != 65535); + assert(bytes.len <= max_tokens); + var freqs: [257]u16 = @splat(0); + freqs[256] = 1; + for (bytes) |b| freqs[b] += 1; + h.hasher.update(bytes); + + var codes_buf: CodesBuf = .init; + if (try h.outputHeader(&freqs, &codes_buf, @intCast(bytes.len), eos)) |table| { + for (bytes) |b| { + try h.bit_writer.write(table.codes[b], table.bits[b]); + } + try h.bit_writer.write(table.codes[256], table.bits[256]); + } else { + try h.bit_writer.output.writeAll(bytes); + } + } + + const CodesBuf = struct { + dyn_codes: [258]u16, + dyn_bits: [258]u4, + + pub const init: CodesBuf = .{ + .dyn_codes = @as([257]u16, undefined) ++ .{0}, + .dyn_bits = @as([257]u4, @splat(0)) ++ .{1}, + }; }; - enc.generate(freqs[0..], 7); - - try testing.expectEqual(@as(u32, 141), enc.bitLength(freqs[0..])); - - try testing.expectEqual(@as(usize, 3), enc.codes[0].len); - try testing.expectEqual(@as(usize, 6), enc.codes[1].len); - try testing.expectEqual(@as(usize, 6), enc.codes[2].len); - try testing.expectEqual(@as(usize, 5), enc.codes[3].len); - try testing.expectEqual(@as(usize, 3), enc.codes[4].len); - try testing.expectEqual(@as(usize, 2), enc.codes[5].len); - try testing.expectEqual(@as(usize, 2), enc.codes[6].len); - try testing.expectEqual(@as(usize, 6), enc.codes[7].len); - try testing.expectEqual(@as(usize, 0), enc.codes[8].len); - try testing.expectEqual(@as(usize, 0), enc.codes[9].len); - try testing.expectEqual(@as(usize, 0), enc.codes[10].len); - try testing.expectEqual(@as(usize, 0), enc.codes[11].len); - try testing.expectEqual(@as(usize, 0), enc.codes[12].len); - try testing.expectEqual(@as(usize, 0), enc.codes[13].len); - try testing.expectEqual(@as(usize, 0), enc.codes[14].len); - try testing.expectEqual(@as(usize, 0), enc.codes[15].len); - try testing.expectEqual(@as(usize, 6), enc.codes[16].len); - try testing.expectEqual(@as(usize, 5), enc.codes[17].len); - try testing.expectEqual(@as(usize, 3), enc.codes[18].len); - - try testing.expectEqual(@as(u16, 0x0), enc.codes[5].code); - try testing.expectEqual(@as(u16, 0x2), enc.codes[6].code); - try testing.expectEqual(@as(u16, 0x1), enc.codes[0].code); - try testing.expectEqual(@as(u16, 0x5), enc.codes[4].code); - try testing.expectEqual(@as(u16, 0x3), enc.codes[18].code); - try testing.expectEqual(@as(u16, 0x7), enc.codes[3].code); - try testing.expectEqual(@as(u16, 0x17), enc.codes[17].code); - try testing.expectEqual(@as(u16, 0x0f), enc.codes[1].code); - try testing.expectEqual(@as(u16, 0x2f), enc.codes[2].code); - try testing.expectEqual(@as(u16, 0x1f), enc.codes[7].code); - try testing.expectEqual(@as(u16, 0x3f), enc.codes[16].code); + + /// Returns null if the block is stored. + fn outputHeader( + h: *Huffman, + freqs: *const [257]u16, + buf: *CodesBuf, + bytes: u16, + eos: bool, + ) Writer.Error!?struct { + codes: *const [257]u16, + bits: *const [257]u4, + } { + assert(freqs[256] == 1); + const dyn_codes_bitsize, _ = huffman.build( + freqs, + buf.dyn_codes[0..257], + buf.dyn_bits[0..257], + 15, + true, + ); + + var clen_values: [258]u8 = undefined; + var clen_extra: [258]u8 = undefined; + var clen_freqs: [19]u16 = @splat(0); + const clen_len, const clen_extra_bitsize = buildClen( + &buf.dyn_bits, + &clen_values, + &clen_extra, + &clen_freqs, + ); + + var clen_codes: [19]u16 = undefined; + var clen_bits: [19]u4 = @splat(0); + const clen_codes_bitsize, _ = huffman.build( + &clen_freqs, + &clen_codes, + &clen_bits, + 7, + false, + ); + const hclen = clenHlen(clen_freqs); + + const dynamic_bitsize = @as(u32, 14) + + (4 + @as(u6, hclen)) * 3 + clen_codes_bitsize + clen_extra_bitsize + + dyn_codes_bitsize; + const fixed_bitsize = n: { + const freq7 = 1; // eos + var freq9: u16 = 0; + for (freqs[144..256]) |f| freq9 += f; + const freq8: u16 = bytes - freq9; + break :n @as(u32, freq7) * 7 + @as(u32, freq8) * 8 + @as(u32, freq9) * 9; + }; + const stored_bitsize = n: { + const stored_align_bits = -%(h.bit_writer.buffered_n +% 3); + break :n stored_align_bits + @as(u32, 32) + @as(u32, bytes) * 8; + }; + + //std.debug.print("@ {}{{{}}} ", .{ h.bit_writer.output.end, h.bit_writer.buffered_n }); + //std.debug.print("#{} -> s {} f {} d {}\n", .{ bytes, stored_bitsize, fixed_bitsize, dynamic_bitsize }); + + if (stored_bitsize <= @min(dynamic_bitsize, fixed_bitsize)) { + try h.bit_writer.write(BlockHeader.int(.{ .kind = .stored, .final = eos }), 3); + try h.bit_writer.output.rebase(0, 5); + h.bit_writer.byteAlign(); + h.bit_writer.output.writeInt(u16, bytes, .little) catch unreachable; + h.bit_writer.output.writeInt(u16, ~bytes, .little) catch unreachable; + return null; + } + + if (fixed_bitsize <= dynamic_bitsize) { + try h.bit_writer.write(BlockHeader.int(.{ .final = eos, .kind = .fixed }), 3); + return .{ + .codes = token.fixed_lit_codes[0..257], + .bits = token.fixed_lit_bits[0..257], + }; + } else { + try h.bit_writer.write(BlockHeader.Dynamic.int(.{ + .regular = .{ .final = eos, .kind = .dynamic }, + .hlit = 0, + .hdist = 0, + .hclen = hclen, + }), 17); + try h.bit_writer.writeClen( + hclen, + clen_values[0..clen_len], + clen_extra[0..clen_len], + clen_codes, + clen_bits, + ); + return .{ .codes = buf.dyn_codes[0..257], .bits = buf.dyn_bits[0..257] }; + } + } +}; + +test Huffman { + const fbufs = try testingFreqBufs(); + defer if (!builtin.fuzz) std.testing.allocator.destroy(fbufs); + try std.testing.fuzz(fbufs, testFuzzedHuffmanInput, .{}); +} + +/// This function is derived from `testFuzzedRawInput` with a few changes for fuzzing `Huffman`. +fn testFuzzedHuffmanInput(fbufs: *const [2][65536]u8, input: []const u8) !void { + var in: Io.Reader = .fixed(input); + const opts: packed struct(u19) { + container: PackedContainer, + buf_len: u17, + } = @bitCast(in.takeLeb128(u19) catch 0); + var flate_buf: [2 * 65536]u8 = undefined; + var flate_w: Writer = .fixed(&flate_buf); + var h_buf: [2 * 65536]u8 = undefined; + var h: Huffman = try .init( + &flate_w, + h_buf[0 .. opts.buf_len +% flate.max_window_len], + opts.container.val(), + ); + + var expected_hash: flate.Container.Hasher = .init(opts.container.val()); + var expected_size: u32 = 0; + var vecs: [32][]const u8 = undefined; + var vecs_n: usize = 0; + + while (in.seek != in.end) { + const VecInfo = packed struct(u55) { + output: bool, + /// If set, `data_len` and `splat` are reinterpreted as `capacity` + /// and `preserve_len` respectively and `output` is treated as set. + rebase: bool, + block_aligning_len: bool, + block_aligning_splat: bool, + data_off_hi: u8, + random_data: u1, + data_len: u16, + splat: u18, + /// This is less useful as each value is part of the same gradient 'step' + data_off_lo: u8, + }; + var vec_info: VecInfo = @bitCast(in.takeLeb128(u55) catch |e| switch (e) { + error.ReadFailed => unreachable, + error.Overflow, error.EndOfStream => 0, + }); + + { + const buffered = h.writer.buffered().len + countVec(vecs[0..vecs_n]); + const to_align = mem.alignForwardAnyAlign(usize, buffered, Huffman.max_tokens) - buffered; + assert((buffered + to_align) % Huffman.max_tokens == 0); + + if (vec_info.block_aligning_len) { + vec_info.data_len = @intCast(to_align); + } else if (vec_info.block_aligning_splat and vec_info.data_len != 0 and + to_align % vec_info.data_len == 0) + { + vec_info.splat = @divExact(@as(u18, @intCast(to_align)), vec_info.data_len) -% 1; + } + } + + var splat = if (vec_info.output and !vec_info.rebase) vec_info.splat +% 1 else 1; + add_vec: { + if (vec_info.rebase) break :add_vec; + if (expected_size +| math.mulWide(u18, vec_info.data_len, splat) > 4 * (1 << 16)) { + // Skip this vector to avoid this test taking too long. + splat = 1; + break :add_vec; + } + + const data_buf = &fbufs[vec_info.random_data]; + vecs[vecs_n] = data_buf[@min( + (@as(u16, vec_info.data_off_hi) << 8) | vec_info.data_off_lo, + data_buf.len - vec_info.data_len, + )..][0..vec_info.data_len]; + + for (0..splat) |_| expected_hash.update(vecs[vecs_n]); + expected_size += @as(u32, @intCast(vecs[vecs_n].len)) * splat; + vecs_n += 1; + } + + const want_drain = vecs_n == vecs.len or vec_info.output or vec_info.rebase or + in.seek == in.end; + if (want_drain and vecs_n != 0) { + var n = h.writer.buffered().len + Writer.countSplat(vecs[0..vecs_n], splat); + const oos = h.writer.writeSplatAll(vecs[0..vecs_n], splat) == error.WriteFailed; + n -= h.writer.buffered().len; + const block_lim = math.divCeil(usize, n, Huffman.max_tokens) catch unreachable; + const lim = flate_w.end + 6 * block_lim + n; // 6 since block header may span two bytes + if (flate_w.end > lim) return error.OverheadTooLarge; + if (oos) return; + + vecs_n = 0; + } else assert(splat == 1); + + if (vec_info.rebase) { + const old_end = flate_w.end; + var n = h.writer.buffered().len; + const oos = h.writer.rebase(vec_info.data_len, @min( + h.writer.buffer.len -| vec_info.data_len, + vec_info.splat, + )) == error.WriteFailed; + n -= h.writer.buffered().len; + const block_lim = math.divCeil(usize, n, Huffman.max_tokens) catch unreachable; + const lim = old_end + 6 * block_lim + n; // 6 since block header may span two bytes + if (flate_w.end > lim) return error.OverheadTooLarge; + if (oos) return; + } + } + + { + const old_end = flate_w.end; + const n = h.writer.buffered().len; + const oos = h.writer.flush() == error.WriteFailed; + assert(h.writer.buffered().len == 0); + const block_lim = @max(1, math.divCeil(usize, n, Huffman.max_tokens) catch unreachable); + const lim = old_end + 6 * block_lim + n + opts.container.val().footerSize(); + if (flate_w.end > lim) return error.OverheadTooLarge; + if (oos) return; + } + + try testingCheckDecompressedMatches(flate_w.buffered(), expected_size, expected_hash); } diff --git a/lib/std/compress/flate/Decompress.zig b/lib/std/compress/flate/Decompress.zig index 47570beab201..d9b6a82a63f0 100644 --- a/lib/std/compress/flate/Decompress.zig +++ b/lib/std/compress/flate/Decompress.zig @@ -7,11 +7,10 @@ const Reader = std.Io.Reader; const Container = flate.Container; const Decompress = @This(); -const Token = @import("Token.zig"); +const token = @import("token.zig"); input: *Reader, -next_bits: Bits, -remaining_bits: std.math.Log2Int(Bits), +consumed_bits: u3, reader: Reader, @@ -25,8 +24,6 @@ state: State, err: ?Error, -const Bits = usize; - const BlockType = enum(u2) { stored = 0, fixed = 1, @@ -39,6 +36,8 @@ const State = union(enum) { block_header, stored_block: u16, fixed_block, + fixed_block_literal: u8, + fixed_block_match: u16, dynamic_block, dynamic_block_literal: u8, dynamic_block_match: u16, @@ -87,8 +86,7 @@ pub fn init(input: *Reader, container: Container, buffer: []u8) Decompress { .end = 0, }, .input = input, - .next_bits = 0, - .remaining_bits = 0, + .consumed_bits = 0, .container_metadata = .init(container), .lit_dec = .{}, .dst_dec = .{}, @@ -183,27 +181,25 @@ fn streamIndirectInner(d: *Decompress) Reader.Error!usize { return 0; } -fn decodeLength(self: *Decompress, code: u8) !u16 { - if (code > 28) return error.InvalidCode; - const ml = Token.matchLength(code); - return if (ml.extra_bits == 0) // 0 - 5 extra bits - ml.base - else - ml.base + try self.takeBitsRuntime(ml.extra_bits); +fn decodeLength(self: *Decompress, code_int: u5) !u16 { + if (code_int > 28) return error.InvalidCode; + const l: token.LenCode = .fromInt(code_int); + const base = l.base(); + const extra = l.extraBits(); + return token.min_length + (base | try self.takeBits(extra)); } -fn decodeDistance(self: *Decompress, code: u8) !u16 { - if (code > 29) return error.InvalidCode; - const md = Token.matchDistance(code); - return if (md.extra_bits == 0) // 0 - 13 extra bits - md.base - else - md.base + try self.takeBitsRuntime(md.extra_bits); +fn decodeDistance(self: *Decompress, code_int: u5) !u16 { + if (code_int > 29) return error.InvalidCode; + const d: token.DistCode = .fromInt(code_int); + const base = d.base(); + const extra = d.extraBits(); + return token.min_distance + (base | try self.takeBits(extra)); } -// Decode code length symbol to code length. Writes decoded length into -// lens slice starting at position pos. Returns number of positions -// advanced. +/// Decode code length symbol to code length. Writes decoded length into +/// lens slice starting at position pos. Returns number of positions +/// advanced. fn dynamicCodeLength(self: *Decompress, code: u16, lens: []u4, pos: usize) !usize { if (pos >= lens.len) return error.InvalidDynamicBlockHeader; @@ -217,7 +213,7 @@ fn dynamicCodeLength(self: *Decompress, code: u16, lens: []u4, pos: usize) !usiz 16 => { // Copy the previous code length 3 - 6 times. // The next 2 bits indicate repeat length - const n: u8 = @as(u8, try self.takeBits(u2)) + 3; + const n: u8 = @as(u8, try self.takeIntBits(u2)) + 3; if (pos == 0 or pos + n > lens.len) return error.InvalidDynamicBlockHeader; for (0..n) |i| { @@ -226,17 +222,17 @@ fn dynamicCodeLength(self: *Decompress, code: u16, lens: []u4, pos: usize) !usiz return n; }, // Repeat a code length of 0 for 3 - 10 times. (3 bits of length) - 17 => return @as(u8, try self.takeBits(u3)) + 3, + 17 => return @as(u8, try self.takeIntBits(u3)) + 3, // Repeat a code length of 0 for 11 - 138 times (7 bits of length) - 18 => return @as(u8, try self.takeBits(u7)) + 11, + 18 => return @as(u8, try self.takeIntBits(u7)) + 11, else => return error.InvalidDynamicBlockHeader, } } fn decodeSymbol(self: *Decompress, decoder: anytype) !Symbol { // Maximum code len is 15 bits. - const sym = try decoder.find(@bitReverse(try self.peekBits(u15))); - try self.tossBits(sym.code_bits); + const sym = try decoder.find(@bitReverse(try self.peekIntBitsShort(u15))); + try self.tossBitsShort(sym.code_bits); return sym; } @@ -320,11 +316,11 @@ fn streamInner(d: *Decompress, w: *Writer, limit: std.Io.Limit) (Error || Reader .raw => continue :sw .block_header, }, .block_header => { - d.final_block = (try d.takeBits(u1)) != 0; - const block_type: BlockType = @enumFromInt(try d.takeBits(u2)); + d.final_block = (try d.takeIntBits(u1)) != 0; + const block_type: BlockType = @enumFromInt(try d.takeIntBits(u2)); switch (block_type) { .stored => { - d.alignBitsDiscarding(); + d.alignBitsForward(); // everything after this is byte aligned in stored block const len = try in.takeInt(u16, .little); const nlen = try in.takeInt(u16, .little); @@ -333,17 +329,17 @@ fn streamInner(d: *Decompress, w: *Writer, limit: std.Io.Limit) (Error || Reader }, .fixed => continue :sw .fixed_block, .dynamic => { - const hlit: u16 = @as(u16, try d.takeBits(u5)) + 257; // number of ll code entries present - 257 - const hdist: u16 = @as(u16, try d.takeBits(u5)) + 1; // number of distance code entries - 1 - const hclen: u8 = @as(u8, try d.takeBits(u4)) + 4; // hclen + 4 code lengths are encoded + const hlit: u16 = @as(u16, try d.takeIntBits(u5)) + 257; // number of ll code entries present - 257 + const hdist: u16 = @as(u16, try d.takeIntBits(u5)) + 1; // number of distance code entries - 1 + const hclen: u8 = @as(u8, try d.takeIntBits(u4)) + 4; // hclen + 4 code lengths are encoded if (hlit > 286 or hdist > 30) return error.InvalidDynamicBlockHeader; // lengths for code lengths var cl_lens: [19]u4 = @splat(0); - for (flate.HuffmanEncoder.codegen_order[0..hclen]) |i| { - cl_lens[i] = try d.takeBits(u3); + for (token.codegen_order[0..hclen]) |i| { + cl_lens[i] = try d.takeIntBits(u3); } var cl_dec: CodegenDecoder = .{}; try cl_dec.generate(&cl_lens); @@ -352,9 +348,9 @@ fn streamInner(d: *Decompress, w: *Writer, limit: std.Io.Limit) (Error || Reader var dec_lens: [286 + 30]u4 = @splat(0); var pos: usize = 0; while (pos < hlit + hdist) { - const peeked = @bitReverse(try d.peekBits(u7)); + const peeked = @bitReverse(try d.peekIntBitsShort(u7)); const sym = try cl_dec.find(peeked); - try d.tossBits(sym.code_bits); + try d.tossBitsShort(sym.code_bits); pos += try d.dynamicCodeLength(sym.symbol, &dec_lens, pos); } if (pos > hlit + hdist) { @@ -373,9 +369,12 @@ fn streamInner(d: *Decompress, w: *Writer, limit: std.Io.Limit) (Error || Reader } }, .stored_block => |remaining_len| { - const out = try w.writableSliceGreedyPreserve(flate.history_len, 1); + const out: []u8 = if (remaining != 0) + try w.writableSliceGreedyPreserve(flate.history_len, 1) + else + &.{}; var limited_out: [1][]u8 = .{limit.min(.limited(remaining_len)).slice(out)}; - const n = try d.input.readVec(&limited_out); + const n = try in.readVec(&limited_out); if (remaining_len - n == 0) { d.state = if (d.final_block) .protocol_footer else .block_header; } else { @@ -389,8 +388,14 @@ fn streamInner(d: *Decompress, w: *Writer, limit: std.Io.Limit) (Error || Reader const code = try d.readFixedCode(); switch (code) { 0...255 => { - try w.writeBytePreserve(flate.history_len, @intCast(code)); - remaining -= 1; + if (remaining != 0) { + @branchHint(.likely); + try w.writeBytePreserve(flate.history_len, @intCast(code)); + remaining -= 1; + } else { + d.state = .{ .fixed_block_literal = @intCast(code) }; + return @intFromEnum(limit) - remaining; + } }, 256 => { d.state = if (d.final_block) .protocol_footer else .block_header; @@ -400,9 +405,7 @@ fn streamInner(d: *Decompress, w: *Writer, limit: std.Io.Limit) (Error || Reader // Handles fixed block non literal (length) code. // Length code is followed by 5 bits of distance code. const length = try d.decodeLength(@intCast(code - 257)); - const distance = try d.decodeDistance(@bitReverse(try d.takeBits(u5))); - try writeMatch(w, length, distance); - remaining -= length; + continue :sw .{ .fixed_block_match = length }; }, else => return error.InvalidCode, } @@ -410,6 +413,24 @@ fn streamInner(d: *Decompress, w: *Writer, limit: std.Io.Limit) (Error || Reader d.state = .fixed_block; return @intFromEnum(limit) - remaining; }, + .fixed_block_literal => |symbol| { + assert(remaining != 0); + remaining -= 1; + try w.writeBytePreserve(flate.history_len, symbol); + continue :sw .fixed_block; + }, + .fixed_block_match => |length| { + if (remaining >= length) { + @branchHint(.likely); + const distance = try d.decodeDistance(@bitReverse(try d.takeIntBits(u5))); + try writeMatch(w, length, distance); + remaining -= length; + continue :sw .fixed_block; + } else { + d.state = .{ .fixed_block_match = length }; + return @intFromEnum(limit) - remaining; + } + }, .dynamic_block => { // In larger archives most blocks are usually dynamic, so // decompression performance depends on this logic. @@ -429,7 +450,7 @@ fn streamInner(d: *Decompress, w: *Writer, limit: std.Io.Limit) (Error || Reader }, .match => { // Decode match backreference - const length = try d.decodeLength(sym.symbol); + const length = try d.decodeLength(@intCast(sym.symbol)); continue :sw .{ .dynamic_block_match = length }; }, .end_of_block => { @@ -449,7 +470,7 @@ fn streamInner(d: *Decompress, w: *Writer, limit: std.Io.Limit) (Error || Reader @branchHint(.likely); remaining -= length; const dsm = try d.decodeSymbol(&d.dst_dec); - const distance = try d.decodeDistance(dsm.symbol); + const distance = try d.decodeDistance(@intCast(dsm.symbol)); try writeMatch(w, length, distance); continue :sw .dynamic_block; } else { @@ -458,23 +479,16 @@ fn streamInner(d: *Decompress, w: *Writer, limit: std.Io.Limit) (Error || Reader } }, .protocol_footer => { + d.alignBitsForward(); switch (d.container_metadata) { .gzip => |*gzip| { - d.alignBitsDiscarding(); - gzip.* = .{ - .crc = try in.takeInt(u32, .little), - .count = try in.takeInt(u32, .little), - }; + gzip.crc = try in.takeInt(u32, .little); + gzip.count = try in.takeInt(u32, .little); }, .zlib => |*zlib| { - d.alignBitsDiscarding(); - zlib.* = .{ - .adler = try in.takeInt(u32, .little), - }; - }, - .raw => { - d.alignBitsPreserving(); + zlib.adler = try in.takeInt(u32, .big); }, + .raw => {}, } d.state = .end; return @intFromEnum(limit) - remaining; @@ -487,10 +501,10 @@ fn streamInner(d: *Decompress, w: *Writer, limit: std.Io.Limit) (Error || Reader /// back from current write position, and `length` of bytes. fn writeMatch(w: *Writer, length: u16, distance: u16) !void { if (w.end < distance) return error.InvalidMatch; - if (length < Token.base_length) return error.InvalidMatch; - if (length > Token.max_length) return error.InvalidMatch; - if (distance < Token.min_distance) return error.InvalidMatch; - if (distance > Token.max_distance) return error.InvalidMatch; + if (length < token.min_length) return error.InvalidMatch; + if (length > token.max_length) return error.InvalidMatch; + if (distance < token.min_distance) return error.InvalidMatch; + if (distance > token.max_distance) return error.InvalidMatch; // This is not a @memmove; it intentionally repeats patterns caused by // iterating one byte at a time. @@ -500,137 +514,71 @@ fn writeMatch(w: *Writer, length: u16, distance: u16) !void { for (dest, src) |*d, s| d.* = s; } -fn takeBits(d: *Decompress, comptime U: type) !U { - const remaining_bits = d.remaining_bits; - const next_bits = d.next_bits; - if (remaining_bits >= @bitSizeOf(U)) { - const u: U = @truncate(next_bits); - d.next_bits = next_bits >> @bitSizeOf(U); - d.remaining_bits = remaining_bits - @bitSizeOf(U); - return u; - } - const in = d.input; - const next_int = in.takeInt(Bits, .little) catch |err| switch (err) { - error.ReadFailed => return error.ReadFailed, - error.EndOfStream => return takeBitsEnding(d, U), +fn peekBits(d: *Decompress, n: u4) !u16 { + const bits = d.input.peekInt(u32, .little) catch |e| return switch (e) { + error.ReadFailed => error.ReadFailed, + error.EndOfStream => d.peekBitsEnding(n), }; - const needed_bits = @bitSizeOf(U) - remaining_bits; - const u: U = @intCast(((next_int & ((@as(Bits, 1) << needed_bits) - 1)) << remaining_bits) | next_bits); - d.next_bits = next_int >> needed_bits; - d.remaining_bits = @intCast(@bitSizeOf(Bits) - @as(usize, needed_bits)); - return u; + const mask = @shlExact(@as(u16, 1), n) - 1; + return @intCast((bits >> d.consumed_bits) & mask); } -fn takeBitsEnding(d: *Decompress, comptime U: type) !U { - const remaining_bits = d.remaining_bits; - const next_bits = d.next_bits; - const in = d.input; - const n = in.bufferedLen(); - assert(n < @sizeOf(Bits)); - const needed_bits = @bitSizeOf(U) - remaining_bits; - if (n * 8 < needed_bits) return error.EndOfStream; - const next_int = in.takeVarInt(Bits, .little, n) catch |err| switch (err) { - error.ReadFailed => return error.ReadFailed, - error.EndOfStream => unreachable, - }; - const u: U = @intCast(((next_int & ((@as(Bits, 1) << needed_bits) - 1)) << remaining_bits) | next_bits); - d.next_bits = next_int >> needed_bits; - d.remaining_bits = @intCast(n * 8 - @as(usize, needed_bits)); - return u; +fn peekBitsEnding(d: *Decompress, n: u4) !u16 { + @branchHint(.unlikely); + + const left = d.input.buffered(); + if (left.len * 8 - d.consumed_bits < n) return error.EndOfStream; + const bits = std.mem.readVarInt(u32, left, .little); + const mask = @shlExact(@as(u16, 1), n) - 1; + return @intCast((bits >> d.consumed_bits) & mask); } -fn peekBits(d: *Decompress, comptime U: type) !U { - const remaining_bits = d.remaining_bits; - const next_bits = d.next_bits; - if (remaining_bits >= @bitSizeOf(U)) return @truncate(next_bits); - const in = d.input; - const next_int = in.peekInt(Bits, .little) catch |err| switch (err) { - error.ReadFailed => return error.ReadFailed, - error.EndOfStream => return peekBitsEnding(d, U), - }; - const needed_bits = @bitSizeOf(U) - remaining_bits; - return @intCast(((next_int & ((@as(Bits, 1) << needed_bits) - 1)) << remaining_bits) | next_bits); +/// Safe only after `peekBits` has been called with a greater or equal `n` value. +fn tossBits(d: *Decompress, n: u4) void { + d.input.toss((@as(u8, n) + d.consumed_bits) / 8); + d.consumed_bits +%= @truncate(n); } -fn peekBitsEnding(d: *Decompress, comptime U: type) !U { - const remaining_bits = d.remaining_bits; - const next_bits = d.next_bits; - const in = d.input; - var u: Bits = 0; - var remaining_needed_bits = @bitSizeOf(U) - remaining_bits; - var i: usize = 0; - while (remaining_needed_bits > 0) { - const peeked = in.peek(i + 1) catch |err| switch (err) { - error.ReadFailed => return error.ReadFailed, - error.EndOfStream => break, - }; - u |= @as(Bits, peeked[i]) << @intCast(i * 8); - remaining_needed_bits -|= 8; - i += 1; - } - if (remaining_bits == 0 and i == 0) return error.EndOfStream; - return @truncate((u << remaining_bits) | next_bits); -} - -fn tossBits(d: *Decompress, n: u4) !void { - const remaining_bits = d.remaining_bits; - const next_bits = d.next_bits; - if (remaining_bits >= n) { - d.next_bits = next_bits >> n; - d.remaining_bits = remaining_bits - n; - } else { - const in = d.input; - const next_int = in.takeInt(Bits, .little) catch |err| switch (err) { - error.ReadFailed => return error.ReadFailed, - error.EndOfStream => return tossBitsEnding(d, n), - }; - const needed_bits = n - remaining_bits; - d.next_bits = next_int >> needed_bits; - d.remaining_bits = @intCast(@bitSizeOf(Bits) - @as(usize, needed_bits)); - } +fn takeBits(d: *Decompress, n: u4) !u16 { + const bits = try d.peekBits(n); + d.tossBits(n); + return bits; } -fn tossBitsEnding(d: *Decompress, n: u4) !void { - const remaining_bits = d.remaining_bits; - const in = d.input; - const buffered_n = in.bufferedLen(); - if (buffered_n == 0) return error.EndOfStream; - assert(buffered_n < @sizeOf(Bits)); - const needed_bits = n - remaining_bits; - const next_int = in.takeVarInt(Bits, .little, buffered_n) catch |err| switch (err) { - error.ReadFailed => return error.ReadFailed, - error.EndOfStream => unreachable, +fn alignBitsForward(d: *Decompress) void { + d.input.toss(@intFromBool(d.consumed_bits != 0)); + d.consumed_bits = 0; +} + +fn peekBitsShort(d: *Decompress, n: u4) !u16 { + const bits = d.input.peekInt(u32, .little) catch |e| return switch (e) { + error.ReadFailed => error.ReadFailed, + error.EndOfStream => d.peekBitsShortEnding(n), }; - d.next_bits = next_int >> needed_bits; - d.remaining_bits = @intCast(@as(usize, buffered_n) * 8 -| @as(usize, needed_bits)); + const mask = @shlExact(@as(u16, 1), n) - 1; + return @intCast((bits >> d.consumed_bits) & mask); } -fn takeBitsRuntime(d: *Decompress, n: u4) !u16 { - const x = try peekBits(d, u16); - const mask: u16 = (@as(u16, 1) << n) - 1; - const u: u16 = @as(u16, @truncate(x)) & mask; - try tossBits(d, n); - return u; +fn peekBitsShortEnding(d: *Decompress, n: u4) !u16 { + @branchHint(.unlikely); + + const left = d.input.buffered(); + const bits = std.mem.readVarInt(u32, left, .little); + const mask = @shlExact(@as(u16, 1), n) - 1; + return @intCast((bits >> d.consumed_bits) & mask); } -fn alignBitsDiscarding(d: *Decompress) void { - const remaining_bits = d.remaining_bits; - if (remaining_bits == 0) return; - const n_bytes = remaining_bits / 8; - const in = d.input; - in.seek -= n_bytes; - d.remaining_bits = 0; - d.next_bits = 0; +fn tossBitsShort(d: *Decompress, n: u4) !void { + if (d.input.bufferedLen() * 8 + d.consumed_bits < n) return error.EndOfStream; + d.tossBits(n); } -fn alignBitsPreserving(d: *Decompress) void { - const remaining_bits: usize = d.remaining_bits; - if (remaining_bits == 0) return; - const n_bytes = (remaining_bits + 7) / 8; - const in = d.input; - in.seek -= n_bytes; - d.remaining_bits = 0; - d.next_bits = 0; +fn takeIntBits(d: *Decompress, T: type) !T { + return @intCast(try d.takeBits(@bitSizeOf(T))); +} + +fn peekIntBitsShort(d: *Decompress, T: type) !T { + return @intCast(try d.peekBitsShort(@bitSizeOf(T))); } /// Reads first 7 bits, and then maybe 1 or 2 more to get full 7,8 or 9 bit code. @@ -646,12 +594,12 @@ fn alignBitsPreserving(d: *Decompress) void { /// 280 - 287 8 11000000 through /// 11000111 fn readFixedCode(d: *Decompress) !u16 { - const code7 = @bitReverse(try d.takeBits(u7)); + const code7 = @bitReverse(try d.takeIntBits(u7)); return switch (code7) { 0...0b0010_111 => @as(u16, code7) + 256, - 0b0010_111 + 1...0b1011_111 => (@as(u16, code7) << 1) + @as(u16, try d.takeBits(u1)) - 0b0011_0000, - 0b1011_111 + 1...0b1100_011 => (@as(u16, code7 - 0b1100000) << 1) + try d.takeBits(u1) + 280, - else => (@as(u16, code7 - 0b1100_100) << 2) + @as(u16, @bitReverse(try d.takeBits(u2))) + 144, + 0b0010_111 + 1...0b1011_111 => (@as(u16, code7) << 1) + @as(u16, try d.takeIntBits(u1)) - 0b0011_0000, + 0b1011_111 + 1...0b1100_011 => (@as(u16, code7 - 0b1100000) << 1) + try d.takeIntBits(u1) + 280, + else => (@as(u16, code7 - 0b1100_100) << 2) + @as(u16, @bitReverse(try d.takeIntBits(u2))) + 144, }; } @@ -807,7 +755,7 @@ fn HuffmanDecoder( return self.findLinked(code, sym.next); } - inline fn findLinked(self: *Self, code: u16, start: u16) !Symbol { + fn findLinked(self: *Self, code: u16, start: u16) !Symbol { var pos = start; while (pos > 0) { const sym = self.symbols[pos]; @@ -898,57 +846,30 @@ test "init/find" { } test "encode/decode literals" { - var codes: [flate.HuffmanEncoder.max_num_frequencies]flate.HuffmanEncoder.Code = undefined; - for (1..286) |j| { // for all different number of codes - var enc: flate.HuffmanEncoder = .{ - .codes = &codes, - .freq_cache = undefined, - .bit_count = undefined, - .lns = undefined, - .lfs = undefined, - }; - // create frequencies - var freq = [_]u16{0} ** 286; - freq[256] = 1; // ensure we have end of block code - for (&freq, 1..) |*f, i| { - if (i % j == 0) - f.* = @intCast(i); - } - - // encoder from frequencies - enc.generate(&freq, 15); - - // get code_lens from encoder - var code_lens = [_]u4{0} ** 286; - for (code_lens, 0..) |_, i| { - code_lens[i] = @intCast(enc.codes[i].len); - } - // generate decoder from code lens - var dec: LiteralDecoder = .{}; - try dec.generate(&code_lens); - - // expect decoder code to match original encoder code - for (dec.symbols) |s| { - if (s.code_bits == 0) continue; - const c_code: u16 = @bitReverse(@as(u15, @intCast(s.code))); - const symbol: u16 = switch (s.kind) { - .literal => s.symbol, - .end_of_block => 256, - .match => @as(u16, s.symbol) + 257, - }; - - const c = enc.codes[symbol]; - try testing.expect(c.code == c_code); - } - - // find each symbol by code - for (enc.codes) |c| { - if (c.len == 0) continue; - - const s_code: u15 = @bitReverse(@as(u15, @intCast(c.code))); - const s = try dec.find(s_code); - try testing.expect(s.code == s_code); - try testing.expect(s.code_bits == c.len); + // Check that the example in RFC 1951 section 3.2.2 works (plus some zeroes) + const max_bits = 5; + var decoder: HuffmanDecoder(16, max_bits, 3) = .{}; + try decoder.generate(&.{ 3, 3, 3, 3, 0, 0, 3, 2, 4, 4 }); + + inline for (0.., .{ + @as(u3, 0b010), + @as(u3, 0b011), + @as(u3, 0b100), + @as(u3, 0b101), + @as(u0, 0), + @as(u0, 0), + @as(u3, 0b110), + @as(u2, 0b00), + @as(u4, 0b1110), + @as(u4, 0b1111), + }) |i, code| { + const bits = @bitSizeOf(@TypeOf(code)); + if (bits == 0) continue; + for (0..1 << (max_bits - bits)) |extra| { + const full = (@as(u16, code) << (max_bits - bits)) | @as(u16, @intCast(extra)); + const symbol = try decoder.find(full); + try testing.expectEqual(i, symbol.symbol); + try testing.expectEqual(bits, symbol.code_bits); } } } diff --git a/lib/std/compress/flate/HuffmanEncoder.zig b/lib/std/compress/flate/HuffmanEncoder.zig deleted file mode 100644 index 28a405b88662..000000000000 --- a/lib/std/compress/flate/HuffmanEncoder.zig +++ /dev/null @@ -1,463 +0,0 @@ -const HuffmanEncoder = @This(); -const std = @import("std"); -const assert = std.debug.assert; -const testing = std.testing; - -codes: []Code, -// Reusable buffer with the longest possible frequency table. -freq_cache: [max_num_frequencies + 1]LiteralNode, -bit_count: [17]u32, -lns: []LiteralNode, // sorted by literal, stored to avoid repeated allocation in generate -lfs: []LiteralNode, // sorted by frequency, stored to avoid repeated allocation in generate - -pub const LiteralNode = struct { - literal: u16, - freq: u16, - - pub fn max() LiteralNode { - return .{ - .literal = std.math.maxInt(u16), - .freq = std.math.maxInt(u16), - }; - } -}; - -pub const Code = struct { - code: u16 = 0, - len: u16 = 0, -}; - -/// The odd order in which the codegen code sizes are written. -pub const codegen_order = [_]u32{ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 }; -/// The number of codegen codes. -pub const codegen_code_count = 19; - -/// The largest distance code. -pub const distance_code_count = 30; - -/// Maximum number of literals. -pub const max_num_lit = 286; - -/// Max number of frequencies used for a Huffman Code -/// Possible lengths are codegen_code_count (19), distance_code_count (30) and max_num_lit (286). -/// The largest of these is max_num_lit. -pub const max_num_frequencies = max_num_lit; - -/// Biggest block size for uncompressed block. -pub const max_store_block_size = 65535; -/// The special code used to mark the end of a block. -pub const end_block_marker = 256; - -/// Update this Huffman Code object to be the minimum code for the specified frequency count. -/// -/// freq An array of frequencies, in which frequency[i] gives the frequency of literal i. -/// max_bits The maximum number of bits to use for any literal. -pub fn generate(self: *HuffmanEncoder, freq: []u16, max_bits: u32) void { - var list = self.freq_cache[0 .. freq.len + 1]; - // Number of non-zero literals - var count: u32 = 0; - // Set list to be the set of all non-zero literals and their frequencies - for (freq, 0..) |f, i| { - if (f != 0) { - list[count] = LiteralNode{ .literal = @as(u16, @intCast(i)), .freq = f }; - count += 1; - } else { - list[count] = LiteralNode{ .literal = 0x00, .freq = 0 }; - self.codes[i].len = 0; - } - } - list[freq.len] = LiteralNode{ .literal = 0x00, .freq = 0 }; - - list = list[0..count]; - if (count <= 2) { - // Handle the small cases here, because they are awkward for the general case code. With - // two or fewer literals, everything has bit length 1. - for (list, 0..) |node, i| { - // "list" is in order of increasing literal value. - self.codes[node.literal] = .{ - .code = @intCast(i), - .len = 1, - }; - } - return; - } - self.lfs = list; - std.mem.sort(LiteralNode, self.lfs, {}, byFreq); - - // Get the number of literals for each bit count - const bit_count = self.bitCounts(list, max_bits); - // And do the assignment - self.assignEncodingAndSize(bit_count, list); -} - -pub fn bitLength(self: *HuffmanEncoder, freq: []u16) u32 { - var total: u32 = 0; - for (freq, 0..) |f, i| { - if (f != 0) { - total += @as(u32, @intCast(f)) * @as(u32, @intCast(self.codes[i].len)); - } - } - return total; -} - -/// Return the number of literals assigned to each bit size in the Huffman encoding -/// -/// This method is only called when list.len >= 3 -/// The cases of 0, 1, and 2 literals are handled by special case code. -/// -/// list: An array of the literals with non-zero frequencies -/// and their associated frequencies. The array is in order of increasing -/// frequency, and has as its last element a special element with frequency -/// `math.maxInt(i32)` -/// -/// max_bits: The maximum number of bits that should be used to encode any literal. -/// Must be less than 16. -/// -/// Returns an integer array in which array[i] indicates the number of literals -/// that should be encoded in i bits. -fn bitCounts(self: *HuffmanEncoder, list: []LiteralNode, max_bits_to_use: usize) []u32 { - var max_bits = max_bits_to_use; - const n = list.len; - const max_bits_limit = 16; - - assert(max_bits < max_bits_limit); - - // The tree can't have greater depth than n - 1, no matter what. This - // saves a little bit of work in some small cases - max_bits = @min(max_bits, n - 1); - - // Create information about each of the levels. - // A bogus "Level 0" whose sole purpose is so that - // level1.prev.needed == 0. This makes level1.next_pair_freq - // be a legitimate value that never gets chosen. - var levels: [max_bits_limit]LevelInfo = std.mem.zeroes([max_bits_limit]LevelInfo); - // leaf_counts[i] counts the number of literals at the left - // of ancestors of the rightmost node at level i. - // leaf_counts[i][j] is the number of literals at the left - // of the level j ancestor. - var leaf_counts: [max_bits_limit][max_bits_limit]u32 = @splat(@splat(0)); - - { - var level = @as(u32, 1); - while (level <= max_bits) : (level += 1) { - // For every level, the first two items are the first two characters. - // We initialize the levels as if we had already figured this out. - levels[level] = LevelInfo{ - .level = level, - .last_freq = list[1].freq, - .next_char_freq = list[2].freq, - .next_pair_freq = list[0].freq + list[1].freq, - .needed = 0, - }; - leaf_counts[level][level] = 2; - if (level == 1) { - levels[level].next_pair_freq = std.math.maxInt(i32); - } - } - } - - // We need a total of 2*n - 2 items at top level and have already generated 2. - levels[max_bits].needed = 2 * @as(u32, @intCast(n)) - 4; - - { - var level = max_bits; - while (true) { - var l = &levels[level]; - if (l.next_pair_freq == std.math.maxInt(i32) and l.next_char_freq == std.math.maxInt(i32)) { - // We've run out of both leaves and pairs. - // End all calculations for this level. - // To make sure we never come back to this level or any lower level, - // set next_pair_freq impossibly large. - l.needed = 0; - levels[level + 1].next_pair_freq = std.math.maxInt(i32); - level += 1; - continue; - } - - const prev_freq = l.last_freq; - if (l.next_char_freq < l.next_pair_freq) { - // The next item on this row is a leaf node. - const next = leaf_counts[level][level] + 1; - l.last_freq = l.next_char_freq; - // Lower leaf_counts are the same of the previous node. - leaf_counts[level][level] = next; - if (next >= list.len) { - l.next_char_freq = LiteralNode.max().freq; - } else { - l.next_char_freq = list[next].freq; - } - } else { - // The next item on this row is a pair from the previous row. - // next_pair_freq isn't valid until we generate two - // more values in the level below - l.last_freq = l.next_pair_freq; - // Take leaf counts from the lower level, except counts[level] remains the same. - @memcpy(leaf_counts[level][0..level], leaf_counts[level - 1][0..level]); - levels[l.level - 1].needed = 2; - } - - l.needed -= 1; - if (l.needed == 0) { - // We've done everything we need to do for this level. - // Continue calculating one level up. Fill in next_pair_freq - // of that level with the sum of the two nodes we've just calculated on - // this level. - if (l.level == max_bits) { - // All done! - break; - } - levels[l.level + 1].next_pair_freq = prev_freq + l.last_freq; - level += 1; - } else { - // If we stole from below, move down temporarily to replenish it. - while (levels[level - 1].needed > 0) { - level -= 1; - if (level == 0) { - break; - } - } - } - } - } - - // Somethings is wrong if at the end, the top level is null or hasn't used - // all of the leaves. - assert(leaf_counts[max_bits][max_bits] == n); - - var bit_count = self.bit_count[0 .. max_bits + 1]; - var bits: u32 = 1; - const counts = &leaf_counts[max_bits]; - { - var level = max_bits; - while (level > 0) : (level -= 1) { - // counts[level] gives the number of literals requiring at least "bits" - // bits to encode. - bit_count[bits] = counts[level] - counts[level - 1]; - bits += 1; - if (level == 0) { - break; - } - } - } - return bit_count; -} - -/// Look at the leaves and assign them a bit count and an encoding as specified -/// in RFC 1951 3.2.2 -fn assignEncodingAndSize(self: *HuffmanEncoder, bit_count: []u32, list_arg: []LiteralNode) void { - var code = @as(u16, 0); - var list = list_arg; - - for (bit_count, 0..) |bits, n| { - code <<= 1; - if (n == 0 or bits == 0) { - continue; - } - // The literals list[list.len-bits] .. list[list.len-bits] - // are encoded using "bits" bits, and get the values - // code, code + 1, .... The code values are - // assigned in literal order (not frequency order). - const chunk = list[list.len - @as(u32, @intCast(bits)) ..]; - - self.lns = chunk; - std.mem.sort(LiteralNode, self.lns, {}, byLiteral); - - for (chunk) |node| { - self.codes[node.literal] = .{ - .code = bitReverse(u16, code, @as(u5, @intCast(n))), - .len = @as(u16, @intCast(n)), - }; - code += 1; - } - list = list[0 .. list.len - @as(u32, @intCast(bits))]; - } -} - -fn byFreq(context: void, a: LiteralNode, b: LiteralNode) bool { - _ = context; - if (a.freq == b.freq) { - return a.literal < b.literal; - } - return a.freq < b.freq; -} - -/// Describes the state of the constructed tree for a given depth. -const LevelInfo = struct { - /// Our level. for better printing - level: u32, - /// The frequency of the last node at this level - last_freq: u32, - /// The frequency of the next character to add to this level - next_char_freq: u32, - /// The frequency of the next pair (from level below) to add to this level. - /// Only valid if the "needed" value of the next lower level is 0. - next_pair_freq: u32, - /// The number of chains remaining to generate for this level before moving - /// up to the next level - needed: u32, -}; - -fn byLiteral(context: void, a: LiteralNode, b: LiteralNode) bool { - _ = context; - return a.literal < b.literal; -} - -/// Reverse bit-by-bit a N-bit code. -fn bitReverse(comptime T: type, value: T, n: usize) T { - const r = @bitReverse(value); - return r >> @as(std.math.Log2Int(T), @intCast(@typeInfo(T).int.bits - n)); -} - -test bitReverse { - const ReverseBitsTest = struct { - in: u16, - bit_count: u5, - out: u16, - }; - - const reverse_bits_tests = [_]ReverseBitsTest{ - .{ .in = 1, .bit_count = 1, .out = 1 }, - .{ .in = 1, .bit_count = 2, .out = 2 }, - .{ .in = 1, .bit_count = 3, .out = 4 }, - .{ .in = 1, .bit_count = 4, .out = 8 }, - .{ .in = 1, .bit_count = 5, .out = 16 }, - .{ .in = 17, .bit_count = 5, .out = 17 }, - .{ .in = 257, .bit_count = 9, .out = 257 }, - .{ .in = 29, .bit_count = 5, .out = 23 }, - }; - - for (reverse_bits_tests) |h| { - const v = bitReverse(u16, h.in, h.bit_count); - try std.testing.expectEqual(h.out, v); - } -} - -/// Generates a HuffmanCode corresponding to the fixed literal table -pub fn fixedLiteralEncoder(codes: *[max_num_frequencies]Code) HuffmanEncoder { - var h: HuffmanEncoder = undefined; - h.codes = codes; - var ch: u16 = 0; - - while (ch < max_num_frequencies) : (ch += 1) { - var bits: u16 = undefined; - var size: u16 = undefined; - switch (ch) { - 0...143 => { - // size 8, 000110000 .. 10111111 - bits = ch + 48; - size = 8; - }, - 144...255 => { - // size 9, 110010000 .. 111111111 - bits = ch + 400 - 144; - size = 9; - }, - 256...279 => { - // size 7, 0000000 .. 0010111 - bits = ch - 256; - size = 7; - }, - else => { - // size 8, 11000000 .. 11000111 - bits = ch + 192 - 280; - size = 8; - }, - } - h.codes[ch] = .{ .code = bitReverse(u16, bits, @as(u5, @intCast(size))), .len = size }; - } - return h; -} - -pub fn fixedDistanceEncoder(codes: *[distance_code_count]Code) HuffmanEncoder { - var h: HuffmanEncoder = undefined; - h.codes = codes; - for (h.codes, 0..) |_, ch| { - h.codes[ch] = .{ .code = bitReverse(u16, @as(u16, @intCast(ch)), 5), .len = 5 }; - } - return h; -} - -pub fn huffmanDistanceEncoder(codes: *[distance_code_count]Code) HuffmanEncoder { - var distance_freq: [distance_code_count]u16 = @splat(0); - distance_freq[0] = 1; - // huff_distance is a static distance encoder used for huffman only encoding. - // It can be reused since we will not be encoding distance values. - var h: HuffmanEncoder = .{}; - h.codes = codes; - h.generate(distance_freq[0..], 15); - return h; -} - -test "generate a Huffman code for the fixed literal table specific to Deflate" { - var codes: [max_num_frequencies]Code = undefined; - const enc: HuffmanEncoder = .fixedLiteralEncoder(&codes); - for (enc.codes) |c| { - switch (c.len) { - 7 => { - const v = @bitReverse(@as(u7, @intCast(c.code))); - try testing.expect(v <= 0b0010111); - }, - 8 => { - const v = @bitReverse(@as(u8, @intCast(c.code))); - try testing.expect((v >= 0b000110000 and v <= 0b10111111) or - (v >= 0b11000000 and v <= 11000111)); - }, - 9 => { - const v = @bitReverse(@as(u9, @intCast(c.code))); - try testing.expect(v >= 0b110010000 and v <= 0b111111111); - }, - else => unreachable, - } - } -} - -test "generate a Huffman code for the 30 possible relative distances (LZ77 distances) of Deflate" { - var codes: [distance_code_count]Code = undefined; - const enc = fixedDistanceEncoder(&codes); - for (enc.codes) |c| { - const v = @bitReverse(@as(u5, @intCast(c.code))); - try testing.expect(v <= 29); - try testing.expect(c.len == 5); - } -} - -pub const fixed_codes = [_]u8{ - 0b00001100, 0b10001100, 0b01001100, 0b11001100, 0b00101100, 0b10101100, 0b01101100, 0b11101100, - 0b00011100, 0b10011100, 0b01011100, 0b11011100, 0b00111100, 0b10111100, 0b01111100, 0b11111100, - 0b00000010, 0b10000010, 0b01000010, 0b11000010, 0b00100010, 0b10100010, 0b01100010, 0b11100010, - 0b00010010, 0b10010010, 0b01010010, 0b11010010, 0b00110010, 0b10110010, 0b01110010, 0b11110010, - 0b00001010, 0b10001010, 0b01001010, 0b11001010, 0b00101010, 0b10101010, 0b01101010, 0b11101010, - 0b00011010, 0b10011010, 0b01011010, 0b11011010, 0b00111010, 0b10111010, 0b01111010, 0b11111010, - 0b00000110, 0b10000110, 0b01000110, 0b11000110, 0b00100110, 0b10100110, 0b01100110, 0b11100110, - 0b00010110, 0b10010110, 0b01010110, 0b11010110, 0b00110110, 0b10110110, 0b01110110, 0b11110110, - 0b00001110, 0b10001110, 0b01001110, 0b11001110, 0b00101110, 0b10101110, 0b01101110, 0b11101110, - 0b00011110, 0b10011110, 0b01011110, 0b11011110, 0b00111110, 0b10111110, 0b01111110, 0b11111110, - 0b00000001, 0b10000001, 0b01000001, 0b11000001, 0b00100001, 0b10100001, 0b01100001, 0b11100001, - 0b00010001, 0b10010001, 0b01010001, 0b11010001, 0b00110001, 0b10110001, 0b01110001, 0b11110001, - 0b00001001, 0b10001001, 0b01001001, 0b11001001, 0b00101001, 0b10101001, 0b01101001, 0b11101001, - 0b00011001, 0b10011001, 0b01011001, 0b11011001, 0b00111001, 0b10111001, 0b01111001, 0b11111001, - 0b00000101, 0b10000101, 0b01000101, 0b11000101, 0b00100101, 0b10100101, 0b01100101, 0b11100101, - 0b00010101, 0b10010101, 0b01010101, 0b11010101, 0b00110101, 0b10110101, 0b01110101, 0b11110101, - 0b00001101, 0b10001101, 0b01001101, 0b11001101, 0b00101101, 0b10101101, 0b01101101, 0b11101101, - 0b00011101, 0b10011101, 0b01011101, 0b11011101, 0b00111101, 0b10111101, 0b01111101, 0b11111101, - 0b00010011, 0b00100110, 0b01001110, 0b10011010, 0b00111100, 0b01100101, 0b11101010, 0b10110100, - 0b11101001, 0b00110011, 0b01100110, 0b11001110, 0b10011010, 0b00111101, 0b01100111, 0b11101110, - 0b10111100, 0b11111001, 0b00001011, 0b00010110, 0b00101110, 0b01011010, 0b10111100, 0b01100100, - 0b11101001, 0b10110010, 0b11100101, 0b00101011, 0b01010110, 0b10101110, 0b01011010, 0b10111101, - 0b01100110, 0b11101101, 0b10111010, 0b11110101, 0b00011011, 0b00110110, 0b01101110, 0b11011010, - 0b10111100, 0b01100101, 0b11101011, 0b10110110, 0b11101101, 0b00111011, 0b01110110, 0b11101110, - 0b11011010, 0b10111101, 0b01100111, 0b11101111, 0b10111110, 0b11111101, 0b00000111, 0b00001110, - 0b00011110, 0b00111010, 0b01111100, 0b11100100, 0b11101000, 0b10110001, 0b11100011, 0b00100111, - 0b01001110, 0b10011110, 0b00111010, 0b01111101, 0b11100110, 0b11101100, 0b10111001, 0b11110011, - 0b00010111, 0b00101110, 0b01011110, 0b10111010, 0b01111100, 0b11100101, 0b11101010, 0b10110101, - 0b11101011, 0b00110111, 0b01101110, 0b11011110, 0b10111010, 0b01111101, 0b11100111, 0b11101110, - 0b10111101, 0b11111011, 0b00001111, 0b00011110, 0b00111110, 0b01111010, 0b11111100, 0b11100100, - 0b11101001, 0b10110011, 0b11100111, 0b00101111, 0b01011110, 0b10111110, 0b01111010, 0b11111101, - 0b11100110, 0b11101101, 0b10111011, 0b11110111, 0b00011111, 0b00111110, 0b01111110, 0b11111010, - 0b11111100, 0b11100101, 0b11101011, 0b10110111, 0b11101111, 0b00111111, 0b01111110, 0b11111110, - 0b11111010, 0b11111101, 0b11100111, 0b11101111, 0b10111111, 0b11111111, 0b00000000, 0b00100000, - 0b00001000, 0b00001100, 0b10000001, 0b11000010, 0b11100000, 0b00001000, 0b00100100, 0b00001010, - 0b10001101, 0b11000001, 0b11100010, 0b11110000, 0b00000100, 0b00100010, 0b10001001, 0b01001100, - 0b10100001, 0b11010010, 0b11101000, 0b00000011, 0b10000011, 0b01000011, 0b11000011, 0b00100011, - 0b10100011, -}; diff --git a/lib/std/compress/flate/Lookup.zig b/lib/std/compress/flate/Lookup.zig deleted file mode 100644 index d1d93de50abd..000000000000 --- a/lib/std/compress/flate/Lookup.zig +++ /dev/null @@ -1,130 +0,0 @@ -//! Lookup of the previous locations for the same 4 byte data. Works on hash of -//! 4 bytes data. Head contains position of the first match for each hash. Chain -//! points to the previous position of the same hash given the current location. - -const std = @import("std"); -const testing = std.testing; -const expect = testing.expect; -const flate = @import("../flate.zig"); -const Token = @import("Token.zig"); - -const Lookup = @This(); - -const prime4 = 0x9E3779B1; // 4 bytes prime number 2654435761 -const chain_len = 2 * flate.history_len; - -pub const bits = 15; -pub const len = 1 << bits; -pub const shift = 32 - bits; - -// Maps hash => first position -head: [len]u16 = [_]u16{0} ** len, -// Maps position => previous positions for the same hash value -chain: [chain_len]u16 = [_]u16{0} ** (chain_len), - -// Calculates hash of the 4 bytes from data. -// Inserts `pos` position of that hash in the lookup tables. -// Returns previous location with the same hash value. -pub fn add(self: *Lookup, data: []const u8, pos: u16) u16 { - if (data.len < 4) return 0; - const h = hash(data[0..4]); - return self.set(h, pos); -} - -// Returns previous location with the same hash value given the current -// position. -pub fn prev(self: *Lookup, pos: u16) u16 { - return self.chain[pos]; -} - -fn set(self: *Lookup, h: u32, pos: u16) u16 { - const p = self.head[h]; - self.head[h] = pos; - self.chain[pos] = p; - return p; -} - -// Slide all positions in head and chain for `n` -pub fn slide(self: *Lookup, n: u16) void { - for (&self.head) |*v| { - v.* -|= n; - } - var i: usize = 0; - while (i < n) : (i += 1) { - self.chain[i] = self.chain[i + n] -| n; - } -} - -// Add `len` 4 bytes hashes from `data` into lookup. -// Position of the first byte is `pos`. -pub fn bulkAdd(self: *Lookup, data: []const u8, length: u16, pos: u16) void { - if (length == 0 or data.len < Token.min_length) { - return; - } - var hb = - @as(u32, data[3]) | - @as(u32, data[2]) << 8 | - @as(u32, data[1]) << 16 | - @as(u32, data[0]) << 24; - _ = self.set(hashu(hb), pos); - - var i = pos; - for (4..@min(length + 3, data.len)) |j| { - hb = (hb << 8) | @as(u32, data[j]); - i += 1; - _ = self.set(hashu(hb), i); - } -} - -// Calculates hash of the first 4 bytes of `b`. -fn hash(b: *const [4]u8) u32 { - return hashu(@as(u32, b[3]) | - @as(u32, b[2]) << 8 | - @as(u32, b[1]) << 16 | - @as(u32, b[0]) << 24); -} - -fn hashu(v: u32) u32 { - return @intCast((v *% prime4) >> shift); -} - -test add { - const data = [_]u8{ - 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, - 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, - 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, - 0x01, 0x02, 0x03, - }; - - var h: Lookup = .{}; - for (data, 0..) |_, i| { - const p = h.add(data[i..], @intCast(i)); - if (i >= 8 and i < 24) { - try expect(p == i - 8); - } else { - try expect(p == 0); - } - } - - const v = Lookup.hash(data[2 .. 2 + 4]); - try expect(h.head[v] == 2 + 16); - try expect(h.chain[2 + 16] == 2 + 8); - try expect(h.chain[2 + 8] == 2); -} - -test bulkAdd { - const data = "Lorem ipsum dolor sit amet, consectetur adipiscing elit."; - - // one by one - var h: Lookup = .{}; - for (data, 0..) |_, i| { - _ = h.add(data[i..], @intCast(i)); - } - - // in bulk - var bh: Lookup = .{}; - bh.bulkAdd(data, data.len, 0); - - try testing.expectEqualSlices(u16, &h.head, &bh.head); - try testing.expectEqualSlices(u16, &h.chain, &bh.chain); -} diff --git a/lib/std/compress/flate/Token.zig b/lib/std/compress/flate/Token.zig deleted file mode 100644 index 138304769381..000000000000 --- a/lib/std/compress/flate/Token.zig +++ /dev/null @@ -1,333 +0,0 @@ -//! Token cat be literal: single byte of data or match; reference to the slice of -//! data in the same stream represented with . Where length -//! can be 3 - 258 bytes, and distance 1 - 32768 bytes. -//! -const std = @import("std"); -const assert = std.debug.assert; -const print = std.debug.print; -const expect = std.testing.expect; - -const Token = @This(); - -pub const Kind = enum(u1) { - literal, - match, -}; - -// Distance range 1 - 32768, stored in dist as 0 - 32767 (fits u15) -dist: u15 = 0, -// Length range 3 - 258, stored in len_lit as 0 - 255 (fits u8) -len_lit: u8 = 0, -kind: Kind = .literal, - -pub const base_length = 3; // smallest match length per the RFC section 3.2.5 -pub const min_length = 4; // min length used in this algorithm -pub const max_length = 258; - -pub const min_distance = 1; -pub const max_distance = std.compress.flate.history_len; - -pub fn literal(t: Token) u8 { - return t.len_lit; -} - -pub fn distance(t: Token) u16 { - return @as(u16, t.dist) + min_distance; -} - -pub fn length(t: Token) u16 { - return @as(u16, t.len_lit) + base_length; -} - -pub fn initLiteral(lit: u8) Token { - return .{ .kind = .literal, .len_lit = lit }; -} - -// distance range 1 - 32768, stored in dist as 0 - 32767 (u15) -// length range 3 - 258, stored in len_lit as 0 - 255 (u8) -pub fn initMatch(dist: u16, len: u16) Token { - assert(len >= min_length and len <= max_length); - assert(dist >= min_distance and dist <= max_distance); - return .{ - .kind = .match, - .dist = @intCast(dist - min_distance), - .len_lit = @intCast(len - base_length), - }; -} - -pub fn eql(t: Token, o: Token) bool { - return t.kind == o.kind and - t.dist == o.dist and - t.len_lit == o.len_lit; -} - -pub fn lengthCode(t: Token) u16 { - return match_lengths[match_lengths_index[t.len_lit]].code; -} - -pub fn lengthEncoding(t: Token) MatchLength { - var c = match_lengths[match_lengths_index[t.len_lit]]; - c.extra_length = t.len_lit - c.base_scaled; - return c; -} - -// Returns the distance code corresponding to a specific distance. -// Distance code is in range: 0 - 29. -pub fn distanceCode(t: Token) u8 { - var dist: u16 = t.dist; - if (dist < match_distances_index.len) { - return match_distances_index[dist]; - } - dist >>= 7; - if (dist < match_distances_index.len) { - return match_distances_index[dist] + 14; - } - dist >>= 7; - return match_distances_index[dist] + 28; -} - -pub fn distanceEncoding(t: Token) MatchDistance { - var c = match_distances[t.distanceCode()]; - c.extra_distance = t.dist - c.base_scaled; - return c; -} - -pub fn lengthExtraBits(code: u32) u8 { - return match_lengths[code - length_codes_start].extra_bits; -} - -pub fn matchLength(code: u8) MatchLength { - return match_lengths[code]; -} - -pub fn matchDistance(code: u8) MatchDistance { - return match_distances[code]; -} - -pub fn distanceExtraBits(code: u32) u8 { - return match_distances[code].extra_bits; -} - -pub fn show(t: Token) void { - if (t.kind == .literal) { - print("L('{c}'), ", .{t.literal()}); - } else { - print("M({d}, {d}), ", .{ t.distance(), t.length() }); - } -} - -// Returns index in match_lengths table for each length in range 0-255. -const match_lengths_index = [_]u8{ - 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, - 9, 9, 10, 10, 11, 11, 12, 12, 12, 12, - 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, - 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, - 17, 17, 17, 17, 17, 17, 17, 17, 18, 18, - 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, - 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, - 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, - 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, - 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, - 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, - 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, - 23, 23, 23, 23, 23, 23, 23, 23, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, - 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, - 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, - 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, - 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, - 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, - 26, 26, 26, 26, 27, 27, 27, 27, 27, 27, - 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, - 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, - 27, 27, 27, 27, 27, 28, -}; - -const MatchLength = struct { - code: u16, - base_scaled: u8, // base - 3, scaled to fit into u8 (0-255), same as lit_len field in Token. - base: u16, // 3-258 - extra_length: u8 = 0, - extra_bits: u4, -}; - -// match_lengths represents table from rfc (https://datatracker.ietf.org/doc/html/rfc1951#page-12) -// -// Extra Extra Extra -// Code Bits Length(s) Code Bits Lengths Code Bits Length(s) -// ---- ---- ------ ---- ---- ------- ---- ---- ------- -// 257 0 3 267 1 15,16 277 4 67-82 -// 258 0 4 268 1 17,18 278 4 83-98 -// 259 0 5 269 2 19-22 279 4 99-114 -// 260 0 6 270 2 23-26 280 4 115-130 -// 261 0 7 271 2 27-30 281 5 131-162 -// 262 0 8 272 2 31-34 282 5 163-194 -// 263 0 9 273 3 35-42 283 5 195-226 -// 264 0 10 274 3 43-50 284 5 227-257 -// 265 1 11,12 275 3 51-58 285 0 258 -// 266 1 13,14 276 3 59-66 -// -pub const length_codes_start = 257; - -const match_lengths = [_]MatchLength{ - .{ .extra_bits = 0, .base_scaled = 0, .base = 3, .code = 257 }, - .{ .extra_bits = 0, .base_scaled = 1, .base = 4, .code = 258 }, - .{ .extra_bits = 0, .base_scaled = 2, .base = 5, .code = 259 }, - .{ .extra_bits = 0, .base_scaled = 3, .base = 6, .code = 260 }, - .{ .extra_bits = 0, .base_scaled = 4, .base = 7, .code = 261 }, - .{ .extra_bits = 0, .base_scaled = 5, .base = 8, .code = 262 }, - .{ .extra_bits = 0, .base_scaled = 6, .base = 9, .code = 263 }, - .{ .extra_bits = 0, .base_scaled = 7, .base = 10, .code = 264 }, - .{ .extra_bits = 1, .base_scaled = 8, .base = 11, .code = 265 }, - .{ .extra_bits = 1, .base_scaled = 10, .base = 13, .code = 266 }, - .{ .extra_bits = 1, .base_scaled = 12, .base = 15, .code = 267 }, - .{ .extra_bits = 1, .base_scaled = 14, .base = 17, .code = 268 }, - .{ .extra_bits = 2, .base_scaled = 16, .base = 19, .code = 269 }, - .{ .extra_bits = 2, .base_scaled = 20, .base = 23, .code = 270 }, - .{ .extra_bits = 2, .base_scaled = 24, .base = 27, .code = 271 }, - .{ .extra_bits = 2, .base_scaled = 28, .base = 31, .code = 272 }, - .{ .extra_bits = 3, .base_scaled = 32, .base = 35, .code = 273 }, - .{ .extra_bits = 3, .base_scaled = 40, .base = 43, .code = 274 }, - .{ .extra_bits = 3, .base_scaled = 48, .base = 51, .code = 275 }, - .{ .extra_bits = 3, .base_scaled = 56, .base = 59, .code = 276 }, - .{ .extra_bits = 4, .base_scaled = 64, .base = 67, .code = 277 }, - .{ .extra_bits = 4, .base_scaled = 80, .base = 83, .code = 278 }, - .{ .extra_bits = 4, .base_scaled = 96, .base = 99, .code = 279 }, - .{ .extra_bits = 4, .base_scaled = 112, .base = 115, .code = 280 }, - .{ .extra_bits = 5, .base_scaled = 128, .base = 131, .code = 281 }, - .{ .extra_bits = 5, .base_scaled = 160, .base = 163, .code = 282 }, - .{ .extra_bits = 5, .base_scaled = 192, .base = 195, .code = 283 }, - .{ .extra_bits = 5, .base_scaled = 224, .base = 227, .code = 284 }, - .{ .extra_bits = 0, .base_scaled = 255, .base = 258, .code = 285 }, -}; - -// Used in distanceCode fn to get index in match_distance table for each distance in range 0-32767. -const match_distances_index = [_]u8{ - 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, - 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, - 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, - 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, - 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, - 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, -}; - -const MatchDistance = struct { - base_scaled: u16, // base - 1, same as Token dist field - base: u16, - extra_distance: u16 = 0, - code: u8, - extra_bits: u4, -}; - -// match_distances represents table from rfc (https://datatracker.ietf.org/doc/html/rfc1951#page-12) -// -// Extra Extra Extra -// Code Bits Dist Code Bits Dist Code Bits Distance -// ---- ---- ---- ---- ---- ------ ---- ---- -------- -// 0 0 1 10 4 33-48 20 9 1025-1536 -// 1 0 2 11 4 49-64 21 9 1537-2048 -// 2 0 3 12 5 65-96 22 10 2049-3072 -// 3 0 4 13 5 97-128 23 10 3073-4096 -// 4 1 5,6 14 6 129-192 24 11 4097-6144 -// 5 1 7,8 15 6 193-256 25 11 6145-8192 -// 6 2 9-12 16 7 257-384 26 12 8193-12288 -// 7 2 13-16 17 7 385-512 27 12 12289-16384 -// 8 3 17-24 18 8 513-768 28 13 16385-24576 -// 9 3 25-32 19 8 769-1024 29 13 24577-32768 -// -const match_distances = [_]MatchDistance{ - .{ .extra_bits = 0, .base_scaled = 0x0000, .code = 0, .base = 1 }, - .{ .extra_bits = 0, .base_scaled = 0x0001, .code = 1, .base = 2 }, - .{ .extra_bits = 0, .base_scaled = 0x0002, .code = 2, .base = 3 }, - .{ .extra_bits = 0, .base_scaled = 0x0003, .code = 3, .base = 4 }, - .{ .extra_bits = 1, .base_scaled = 0x0004, .code = 4, .base = 5 }, - .{ .extra_bits = 1, .base_scaled = 0x0006, .code = 5, .base = 7 }, - .{ .extra_bits = 2, .base_scaled = 0x0008, .code = 6, .base = 9 }, - .{ .extra_bits = 2, .base_scaled = 0x000c, .code = 7, .base = 13 }, - .{ .extra_bits = 3, .base_scaled = 0x0010, .code = 8, .base = 17 }, - .{ .extra_bits = 3, .base_scaled = 0x0018, .code = 9, .base = 25 }, - .{ .extra_bits = 4, .base_scaled = 0x0020, .code = 10, .base = 33 }, - .{ .extra_bits = 4, .base_scaled = 0x0030, .code = 11, .base = 49 }, - .{ .extra_bits = 5, .base_scaled = 0x0040, .code = 12, .base = 65 }, - .{ .extra_bits = 5, .base_scaled = 0x0060, .code = 13, .base = 97 }, - .{ .extra_bits = 6, .base_scaled = 0x0080, .code = 14, .base = 129 }, - .{ .extra_bits = 6, .base_scaled = 0x00c0, .code = 15, .base = 193 }, - .{ .extra_bits = 7, .base_scaled = 0x0100, .code = 16, .base = 257 }, - .{ .extra_bits = 7, .base_scaled = 0x0180, .code = 17, .base = 385 }, - .{ .extra_bits = 8, .base_scaled = 0x0200, .code = 18, .base = 513 }, - .{ .extra_bits = 8, .base_scaled = 0x0300, .code = 19, .base = 769 }, - .{ .extra_bits = 9, .base_scaled = 0x0400, .code = 20, .base = 1025 }, - .{ .extra_bits = 9, .base_scaled = 0x0600, .code = 21, .base = 1537 }, - .{ .extra_bits = 10, .base_scaled = 0x0800, .code = 22, .base = 2049 }, - .{ .extra_bits = 10, .base_scaled = 0x0c00, .code = 23, .base = 3073 }, - .{ .extra_bits = 11, .base_scaled = 0x1000, .code = 24, .base = 4097 }, - .{ .extra_bits = 11, .base_scaled = 0x1800, .code = 25, .base = 6145 }, - .{ .extra_bits = 12, .base_scaled = 0x2000, .code = 26, .base = 8193 }, - .{ .extra_bits = 12, .base_scaled = 0x3000, .code = 27, .base = 12289 }, - .{ .extra_bits = 13, .base_scaled = 0x4000, .code = 28, .base = 16385 }, - .{ .extra_bits = 13, .base_scaled = 0x6000, .code = 29, .base = 24577 }, -}; - -test "size" { - try expect(@sizeOf(Token) == 4); -} - -// testing table https://datatracker.ietf.org/doc/html/rfc1951#page-12 -test "MatchLength" { - var c = Token.initMatch(1, 4).lengthEncoding(); - try expect(c.code == 258); - try expect(c.extra_bits == 0); - try expect(c.extra_length == 0); - - c = Token.initMatch(1, 11).lengthEncoding(); - try expect(c.code == 265); - try expect(c.extra_bits == 1); - try expect(c.extra_length == 0); - - c = Token.initMatch(1, 12).lengthEncoding(); - try expect(c.code == 265); - try expect(c.extra_bits == 1); - try expect(c.extra_length == 1); - - c = Token.initMatch(1, 130).lengthEncoding(); - try expect(c.code == 280); - try expect(c.extra_bits == 4); - try expect(c.extra_length == 130 - 115); -} - -test "MatchDistance" { - var c = Token.initMatch(1, 4).distanceEncoding(); - try expect(c.code == 0); - try expect(c.extra_bits == 0); - try expect(c.extra_distance == 0); - - c = Token.initMatch(192, 4).distanceEncoding(); - try expect(c.code == 14); - try expect(c.extra_bits == 6); - try expect(c.extra_distance == 192 - 129); -} - -test "match_lengths" { - for (match_lengths, 0..) |ml, i| { - try expect(@as(u16, ml.base_scaled) + 3 == ml.base); - try expect(i + 257 == ml.code); - } - - for (match_distances, 0..) |mo, i| { - try expect(mo.base_scaled + 1 == mo.base); - try expect(i == mo.code); - } -} diff --git a/lib/std/compress/flate/token.zig b/lib/std/compress/flate/token.zig new file mode 100644 index 000000000000..3c0866896d2c --- /dev/null +++ b/lib/std/compress/flate/token.zig @@ -0,0 +1,286 @@ +const std = @import("std"); +const builtin = @import("builtin"); + +pub const min_length = 3; +pub const max_length = 258; + +pub const min_distance = 1; +pub const max_distance = std.compress.flate.history_len; + +pub const codegen_order: [19]u8 = .{ + 16, 17, 18, + 0, 8, // + 7, 9, + 6, 10, + 5, 11, + 4, 12, + 3, 13, + 2, 14, + 1, 15, +}; + +pub const fixed_lit_codes = fixed_lit[0]; +pub const fixed_lit_bits = fixed_lit[1]; +const fixed_lit = blk: { + var codes: [286]u16 = undefined; + var bits: [286]u4 = undefined; + + for (0..143 + 1, 0b00110000..0b10111111 + 1) |i, v| { + codes[i] = @bitReverse(@as(u8, v)); + bits[i] = 8; + } + for (144..255 + 1, 0b110010000..0b111111111 + 1) |i, v| { + codes[i] = @bitReverse(@as(u9, v)); + bits[i] = 9; + } + for (256..279 + 1, 0b0000000..0b0010111 + 1) |i, v| { + codes[i] = @bitReverse(@as(u7, v)); + bits[i] = 7; + } + for (280..287 - 2 + 1, 0b11000000..0b11000111 - 2 + 1) |i, v| { + codes[i] = @bitReverse(@as(u8, v)); + bits[i] = 8; + } + break :blk .{ codes, bits }; +}; + +pub const fixed_dist_codes = fixed_dist[0]; +pub const fixed_dist_bits = fixed_dist[1]; +const fixed_dist = blk: { + var codes: [30]u16 = undefined; + const bits: [30]u4 = @splat(5); + + for (0..30) |i| { + codes[i] = @bitReverse(@as(u5, i)); + } + break :blk .{ codes, bits }; +}; + +// All paramters of codes can be derived matchematically, however some are faster to +// do via lookup table. For ReleaseSmall, we do all mathematically to save space. +pub const LenCode = if (builtin.mode != .ReleaseSmall) LookupLenCode else ShortLenCode; +pub const DistCode = if (builtin.mode != .ReleaseSmall) LookupDistCode else ShortDistCode; +const ShortLenCode = ShortCode(u8, u2, u3, true); +const ShortDistCode = ShortCode(u15, u1, u4, false); +/// For length and distance codes, they having this format. +/// +/// For example, length code 0b1101 (13 or literal 270) has high_bits=0b01 and high_log2=3 +/// and is 1_01_xx (2 extra bits). It is then offsetted by the min length of 3. +/// ^ bit 4 = 2 + high_log2 - 1 +/// +/// An exception is Length codes, where value 255 is assigned the special zero-bit code 28 or +/// literal 285. +fn ShortCode(Value: type, HighBits: type, HighLog2: type, len_special: bool) type { + return packed struct(u5) { + /// Bits preceding high bit or start if none + high_bits: HighBits, + /// High bit, 0 means none, otherwise it is at bit `x + high_log2 - 1` + high_log2: HighLog2, + + pub fn fromVal(v: Value) @This() { + if (len_special and v == 255) return .fromInt(28); + const high_bits = @bitSizeOf(HighBits) + 1; + const bits = @bitSizeOf(Value) - @clz(v); + if (bits <= high_bits) return @bitCast(@as(u5, @intCast(v))); + const high = v >> @intCast(bits - high_bits); + return .{ .high_bits = @truncate(high), .high_log2 = @intCast(bits - high_bits + 1) }; + } + + /// `@ctz(return) >= extraBits()` + pub fn base(c: @This()) Value { + if (len_special and c.toInt() == 28) return 255; + if (c.high_log2 <= 1) return @as(u5, @bitCast(c)); + const high_value = (@as(Value, @intFromBool(c.high_log2 != 0)) << @bitSizeOf(HighBits)) | c.high_bits; + const high_start = @as(std.math.Log2Int(Value), c.high_log2 - 1); + return @shlExact(high_value, high_start); + } + + const max_extra = @bitSizeOf(Value) - (1 + @bitSizeOf(HighLog2)); + pub fn extraBits(c: @This()) std.math.IntFittingRange(0, max_extra) { + if (len_special and c.toInt() == 28) return 0; + return @intCast(c.high_log2 -| 1); + } + + pub fn toInt(c: @This()) u5 { + return @bitCast(c); + } + + pub fn fromInt(x: u5) @This() { + return @bitCast(x); + } + }; +} + +const LookupLenCode = packed struct(u5) { + code: ShortLenCode, + + const code_table = table: { + var codes: [256]ShortLenCode = undefined; + for (0.., &codes) |v, *c| { + c.* = .fromVal(v); + } + break :table codes; + }; + + const base_table = table: { + var bases: [29]u8 = undefined; + for (0.., &bases) |c, *b| { + b.* = ShortLenCode.fromInt(c).base(); + } + break :table bases; + }; + + pub fn fromVal(v: u8) LookupLenCode { + return .{ .code = code_table[v] }; + } + + /// `@ctz(return) >= extraBits()` + pub fn base(c: LookupLenCode) u8 { + return base_table[c.toInt()]; + } + + pub fn extraBits(c: LookupLenCode) u3 { + return c.code.extraBits(); + } + + pub fn toInt(c: LookupLenCode) u5 { + return @bitCast(c); + } + + pub fn fromInt(x: u5) LookupLenCode { + return @bitCast(x); + } +}; + +const LookupDistCode = packed struct(u5) { + code: ShortDistCode, + + const base_table = table: { + var bases: [30]u15 = undefined; + for (0.., &bases) |c, *b| { + b.* = ShortDistCode.fromInt(c).base(); + } + break :table bases; + }; + + pub fn fromVal(v: u15) LookupDistCode { + return .{ .code = .fromVal(v) }; + } + + /// `@ctz(return) >= extraBits()` + pub fn base(c: LookupDistCode) u15 { + return base_table[c.toInt()]; + } + + pub fn extraBits(c: LookupDistCode) u4 { + return c.code.extraBits(); + } + + pub fn toInt(c: LookupDistCode) u5 { + return @bitCast(c); + } + + pub fn fromInt(x: u5) LookupDistCode { + return @bitCast(x); + } +}; + +test LenCode { + inline for ([_]type{ ShortLenCode, LookupLenCode }) |Code| { + // Check against the RFC 1951 table + for (0.., [_]struct { + base: u8, + extra_bits: u4, + }{ + // zig fmt: off + .{ .base = 3 - min_length, .extra_bits = 0 }, + .{ .base = 4 - min_length, .extra_bits = 0 }, + .{ .base = 5 - min_length, .extra_bits = 0 }, + .{ .base = 6 - min_length, .extra_bits = 0 }, + .{ .base = 7 - min_length, .extra_bits = 0 }, + .{ .base = 8 - min_length, .extra_bits = 0 }, + .{ .base = 9 - min_length, .extra_bits = 0 }, + .{ .base = 10 - min_length, .extra_bits = 0 }, + .{ .base = 11 - min_length, .extra_bits = 1 }, + .{ .base = 13 - min_length, .extra_bits = 1 }, + .{ .base = 15 - min_length, .extra_bits = 1 }, + .{ .base = 17 - min_length, .extra_bits = 1 }, + .{ .base = 19 - min_length, .extra_bits = 2 }, + .{ .base = 23 - min_length, .extra_bits = 2 }, + .{ .base = 27 - min_length, .extra_bits = 2 }, + .{ .base = 31 - min_length, .extra_bits = 2 }, + .{ .base = 35 - min_length, .extra_bits = 3 }, + .{ .base = 43 - min_length, .extra_bits = 3 }, + .{ .base = 51 - min_length, .extra_bits = 3 }, + .{ .base = 59 - min_length, .extra_bits = 3 }, + .{ .base = 67 - min_length, .extra_bits = 4 }, + .{ .base = 83 - min_length, .extra_bits = 4 }, + .{ .base = 99 - min_length, .extra_bits = 4 }, + .{ .base = 115 - min_length, .extra_bits = 4 }, + .{ .base = 131 - min_length, .extra_bits = 5 }, + .{ .base = 163 - min_length, .extra_bits = 5 }, + .{ .base = 195 - min_length, .extra_bits = 5 }, + .{ .base = 227 - min_length, .extra_bits = 5 }, + .{ .base = 258 - min_length, .extra_bits = 0 }, + }) |code, params| { + // zig fmt: on + const c: u5 = @intCast(code); + try std.testing.expectEqual(params.extra_bits, Code.extraBits(.fromInt(@intCast(c)))); + try std.testing.expectEqual(params.base, Code.base(.fromInt(@intCast(c)))); + for (params.base..params.base + @shlExact(@as(u16, 1), params.extra_bits) - + @intFromBool(c == 27)) |v| + { + try std.testing.expectEqual(c, Code.fromVal(@intCast(v)).toInt()); + } + } + } +} + +test DistCode { + inline for ([_]type{ ShortDistCode, LookupDistCode }) |Code| { + for (0.., [_]struct { + base: u15, + extra_bits: u4, + }{ + // zig fmt: off + .{ .base = 1 - min_distance, .extra_bits = 0 }, + .{ .base = 2 - min_distance, .extra_bits = 0 }, + .{ .base = 3 - min_distance, .extra_bits = 0 }, + .{ .base = 4 - min_distance, .extra_bits = 0 }, + .{ .base = 5 - min_distance, .extra_bits = 1 }, + .{ .base = 7 - min_distance, .extra_bits = 1 }, + .{ .base = 9 - min_distance, .extra_bits = 2 }, + .{ .base = 13 - min_distance, .extra_bits = 2 }, + .{ .base = 17 - min_distance, .extra_bits = 3 }, + .{ .base = 25 - min_distance, .extra_bits = 3 }, + .{ .base = 33 - min_distance, .extra_bits = 4 }, + .{ .base = 49 - min_distance, .extra_bits = 4 }, + .{ .base = 65 - min_distance, .extra_bits = 5 }, + .{ .base = 97 - min_distance, .extra_bits = 5 }, + .{ .base = 129 - min_distance, .extra_bits = 6 }, + .{ .base = 193 - min_distance, .extra_bits = 6 }, + .{ .base = 257 - min_distance, .extra_bits = 7 }, + .{ .base = 385 - min_distance, .extra_bits = 7 }, + .{ .base = 513 - min_distance, .extra_bits = 8 }, + .{ .base = 769 - min_distance, .extra_bits = 8 }, + .{ .base = 1025 - min_distance, .extra_bits = 9 }, + .{ .base = 1537 - min_distance, .extra_bits = 9 }, + .{ .base = 2049 - min_distance, .extra_bits = 10 }, + .{ .base = 3073 - min_distance, .extra_bits = 10 }, + .{ .base = 4097 - min_distance, .extra_bits = 11 }, + .{ .base = 6145 - min_distance, .extra_bits = 11 }, + .{ .base = 8193 - min_distance, .extra_bits = 12 }, + .{ .base = 12289 - min_distance, .extra_bits = 12 }, + .{ .base = 16385 - min_distance, .extra_bits = 13 }, + .{ .base = 24577 - min_distance, .extra_bits = 13 }, + }) |code, params| { + // zig fmt: on + const c: u5 = @intCast(code); + try std.testing.expectEqual(params.extra_bits, Code.extraBits(.fromInt(@intCast(c)))); + try std.testing.expectEqual(params.base, Code.base(.fromInt(@intCast(c)))); + for (params.base..params.base + @shlExact(@as(u16, 1), params.extra_bits)) |v| { + try std.testing.expectEqual(c, Code.fromVal(@intCast(v)).toInt()); + } + } + } +}