Skip to content

Commit a063235

Browse files
quaesitor-scientiamRichard Wheelerclaude
authored
compress.deflate: build Huffman trees via the shared hash.huffman builder (#27394)
* compress.deflate: build Huffman trees via the shared hash.huffman builder Replace the two in-tree copies of the canonical Huffman code-assignment (RFC 1951 §3.2.2 bl_count / next_code) with calls to hash.huffman, the shared builder added for #27358: - Decode: build_huff_tree() now fills its flat LSB-first lookup table via huffman.flat_table(). The table format (entry = (symbol << 5) | length, 0xFFFF_FFFF for invalid) and the decode loop are unchanged. flat_table() assigns each code inline while filling (no intermediate codes array) and skips the invalid pre-fill for complete codes, so it does strictly less work than the old hand-rolled loop on the common path. build_huff_tree() now returns `!HuffTree` so an over-subscribed (malformed) code surfaces as a clean error instead of a silently corrupt table; all call sites propagate with `!`. - Encode: fixed_litlen_encode() takes its reversed codes straight from huffman.build(..., bit_order: .lsb_first).codes. No behavior change for any valid DEFLATE stream (incomplete codes are still accepted). Benchmarked before/after with -prod: decode-dominated throughput (8 MiB fixed-Huffman text, 2 MiB dynamic-Huffman) is unchanged, and the build-dominated paths (thousands of small dynamic-block streams) are ~5-11% faster than the old code, since the fixed trees and most dynamic trees are complete and skip the invalid pre-fill. compress.deflate / gzip / zlib suites stay green. This is the second half of #27358 (depends on the hash.huffman module PR). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> * compress.deflate: propagate errors from fixed_litlen_encode instead of panicking Address review on #27394: fixed_litlen_encode() now returns `!([]u32, []int)` and deflate_compress_fixed() returns `![]u8`, propagating the (in practice unreachable) huffman.build error instead of panicking. This matches the decode-side build_huff_tree(), which already returns `!` in this PR, and keeps a clean error path end to end. The public compress* APIs are unchanged (already `![]u8`). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com> --------- Co-authored-by: Richard Wheeler <quaesitor.scientiam@gmail.com> Co-authored-by: Claude Opus 4.8 <noreply@anthropic.com>
1 parent 164b303 commit a063235

3 files changed

Lines changed: 32 additions & 76 deletions

File tree

vlib/compress/deflate/deflate.v

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -129,12 +129,12 @@ pub fn compress(data []u8, format CompressParams) ![]u8 {
129129
return match format.format {
130130
.zlib { compress_zlib(data) }
131131
.gzip { compress_gzip(data) }
132-
.raw_deflate { deflate_compress_fixed(data) }
132+
.raw_deflate { deflate_compress_fixed(data)! }
133133
}
134134
}
135135

136136
pub fn compress_zlib(data []u8) ![]u8 {
137-
payload := deflate_compress_fixed(data)
137+
payload := deflate_compress_fixed(data)!
138138
cksum := adler32.sum(data)
139139
mut out := []u8{cap: 2 + payload.len + 4}
140140
out << u8(0x78) // CMF: CM=8 deflate, CINFO=7 (32K window)
@@ -146,7 +146,7 @@ pub fn compress_zlib(data []u8) ![]u8 {
146146

147147
// compress_gzip compresses data into a gzip stream (RFC 1952).
148148
pub fn compress_gzip(data []u8) ![]u8 {
149-
payload := deflate_compress_fixed(data)
149+
payload := deflate_compress_fixed(data)!
150150
mut out := []u8{cap: 10 + payload.len + 8}
151151
// 10-byte gzip header: ID1 ID2 CM FLG MTIME(4) XFL OS
152152
out << [u8(0x1f), 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff]
@@ -158,7 +158,7 @@ pub fn compress_gzip(data []u8) ![]u8 {
158158

159159
// compress_raw compresses data to a raw RFC 1951 DEFLATE stream.
160160
pub fn compress_raw(data []u8) ![]u8 {
161-
return deflate_compress_fixed(data)
161+
return deflate_compress_fixed(data)!
162162
}
163163

164164
// decompress decompresses a zlib (RFC 1950), gzip (RFC 1952), or raw DEFLATE (RFC 1951) stream.

vlib/compress/deflate/deflate_compress.v

Lines changed: 9 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
module deflate
22

3+
import hash.huffman
4+
35
const deflate_hash_bits = 15
46
const deflate_hash_size = 1 << deflate_hash_bits
57
const deflate_max_chain = 64
@@ -8,36 +10,12 @@ const deflate_max_match = 258
810
const deflate_window = 32768
911

1012
// fixed_litlen_encode returns (reversed_codes, code_lengths) for fixed Huffman lit/len.
11-
fn fixed_litlen_encode() ([]u32, []int) {
13+
// The LSB-first (bit-reversed) codes come straight from the shared canonical
14+
// builder, since the encoder writes bits LSB-first.
15+
fn fixed_litlen_encode() !([]u32, []int) {
1216
lens := fixed_litlen_lengths()
13-
mut max_bits := 0
14-
for l in lens {
15-
if l > max_bits {
16-
max_bits = l
17-
}
18-
}
19-
mut bl_count := []int{len: max_bits + 1}
20-
for l in lens {
21-
if l > 0 {
22-
bl_count[l]++
23-
}
24-
}
25-
mut next_code := []u32{len: max_bits + 1}
26-
mut c := u32(0)
27-
for bits in 1 .. max_bits + 1 {
28-
c = (c + u32(bl_count[bits - 1])) << 1
29-
next_code[bits] = c
30-
}
31-
mut codes := []u32{len: 288}
32-
for sym in 0 .. 288 {
33-
l := lens[sym]
34-
if l == 0 {
35-
continue
36-
}
37-
codes[sym] = bit_reverse(next_code[l], l)
38-
next_code[l]++
39-
}
40-
return codes, lens
17+
t := huffman.build(lengths: lens, max_bits: 9, bit_order: .lsb_first)!
18+
return t.codes, lens
4119
}
4220

4321
// fixed_dist_encode returns (reversed_codes, code_lengths) for fixed Huffman distance.
@@ -135,8 +113,8 @@ fn (mut w BitWriter) flush() {
135113

136114
// deflate_compress_fixed compresses data to RFC 1951 DEFLATE using fixed Huffman codes.
137115
@[direct_array_access]
138-
fn deflate_compress_fixed(data []u8) []u8 {
139-
ll_codes, ll_lens := fixed_litlen_encode()
116+
fn deflate_compress_fixed(data []u8) ![]u8 {
117+
ll_codes, ll_lens := fixed_litlen_encode()!
140118
d_codes, d_lens := fixed_dist_encode()
141119
mut w := BitWriter{}
142120
// BFINAL=1, BTYPE=01 (fixed Huffman)

vlib/compress/deflate/deflate_inflate.v

Lines changed: 19 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
module deflate
22

3+
import hash.huffman
4+
35
// vfmt off
46
// RFC 1951 length/distance decode tables
57
const length_bases = [3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59,
@@ -39,7 +41,7 @@ struct HuffTree {
3941
max_bits int
4042
}
4143

42-
fn build_huff_tree(lengths []int) HuffTree {
44+
fn build_huff_tree(lengths []int) !HuffTree {
4345
mut max_bits := 0
4446
for l in lengths {
4547
if l > max_bits {
@@ -52,36 +54,12 @@ fn build_huff_tree(lengths []int) HuffTree {
5254
max_bits: 0
5355
}
5456
}
55-
mut bl_count := []int{len: max_bits + 1}
56-
for l in lengths {
57-
if l > 0 {
58-
bl_count[l]++
59-
}
60-
}
61-
mut next_code := []u32{len: max_bits + 1}
62-
mut c := u32(0)
63-
for bits in 1 .. max_bits + 1 {
64-
c = (c + u32(bl_count[bits - 1])) << 1
65-
next_code[bits] = c
66-
}
67-
table_size := 1 << max_bits
68-
mut table := []u32{len: table_size, init: 0xffff_ffff}
69-
for sym in 0 .. lengths.len {
70-
l := lengths[sym]
71-
if l == 0 {
72-
continue
73-
}
74-
code := next_code[l]
75-
next_code[l]++
76-
// Reverse code for LSB-first bit reader
77-
rev := bit_reverse(code, l)
78-
step := 1 << l
79-
mut idx := int(rev)
80-
for idx < table_size {
81-
table[idx] = (u32(sym) << 5) | u32(l)
82-
idx += step
83-
}
84-
}
57+
// Build the flat LSB-first lookup table the decode loop expects directly
58+
// from the lengths via the shared canonical builder: entry = (symbol << 5) |
59+
// length, 0xFFFF_FFFF for invalid. flat_table() allocates no intermediate
60+
// codes array, so this matches the hand-rolled loop's cost. Over-subscribed
61+
// lengths now surface as an error instead of a silently corrupt table.
62+
table := huffman.flat_table(lengths: lengths, max_bits: max_bits, bit_order: .lsb_first)!
8563
return HuffTree{
8664
table: table
8765
max_bits: max_bits
@@ -192,8 +170,8 @@ fn inflate_with_consumed(data []u8) !InflateResult {
192170
buf: data
193171
}
194172
mut out := []u8{}
195-
fixed_ll := build_huff_tree(fixed_litlen_lengths())
196-
fixed_d := build_huff_tree([]int{len: 32, init: 5})
173+
fixed_ll := build_huff_tree(fixed_litlen_lengths())!
174+
fixed_d := build_huff_tree([]int{len: 32, init: 5})!
197175
for {
198176
bfinal := r.read_bits(1)!
199177
btype := r.read_bits(2)!
@@ -220,7 +198,7 @@ fn inflate_with_consumed(data []u8) !InflateResult {
220198
for i in 0 .. hclen {
221199
cl_lens[cl_order[i]] = int(r.read_bits(3)!)
222200
}
223-
cl_tree := build_huff_tree(cl_lens)
201+
cl_tree := build_huff_tree(cl_lens)!
224202
mut all_lens := []int{}
225203
for all_lens.len < hlit + hdist {
226204
sym := r.huff_decode(cl_tree)!
@@ -249,8 +227,8 @@ fn inflate_with_consumed(data []u8) !InflateResult {
249227
return error('inflate: bad code length symbol')
250228
}
251229
}
252-
ll_tree := build_huff_tree(all_lens[..hlit])
253-
d_tree := build_huff_tree(all_lens[hlit..])
230+
ll_tree := build_huff_tree(all_lens[..hlit])!
231+
d_tree := build_huff_tree(all_lens[hlit..])!
254232
inflate_block(mut r, mut out, ll_tree, d_tree)!
255233
}
256234
else {
@@ -284,8 +262,8 @@ fn inflate_with_callback(data []u8, cb ChunkCallback, userdata voidptr) !Inflate
284262
mut out := []u8{}
285263
mut state := InflateStreamState{}
286264
mut aborted := false
287-
fixed_ll := build_huff_tree(fixed_litlen_lengths())
288-
fixed_d := build_huff_tree([]int{len: 32, init: 5})
265+
fixed_ll := build_huff_tree(fixed_litlen_lengths())!
266+
fixed_d := build_huff_tree([]int{len: 32, init: 5})!
289267
for {
290268
bfinal := r.read_bits(1)!
291269
btype := r.read_bits(2)!
@@ -318,7 +296,7 @@ fn inflate_with_callback(data []u8, cb ChunkCallback, userdata voidptr) !Inflate
318296
for i in 0 .. hclen {
319297
cl_lens[cl_order[i]] = int(r.read_bits(3)!)
320298
}
321-
cl_tree := build_huff_tree(cl_lens)
299+
cl_tree := build_huff_tree(cl_lens)!
322300
mut all_lens := []int{}
323301
for all_lens.len < hlit + hdist {
324302
sym := r.huff_decode(cl_tree)!
@@ -347,8 +325,8 @@ fn inflate_with_callback(data []u8, cb ChunkCallback, userdata voidptr) !Inflate
347325
return error('inflate: bad code length symbol')
348326
}
349327
}
350-
ll_tree := build_huff_tree(all_lens[..hlit])
351-
d_tree := build_huff_tree(all_lens[hlit..])
328+
ll_tree := build_huff_tree(all_lens[..hlit])!
329+
d_tree := build_huff_tree(all_lens[hlit..])!
352330
if !inflate_block_stream(mut r, mut out, ll_tree, d_tree, cb, userdata, mut state)! {
353331
aborted = true
354332
}

0 commit comments

Comments
 (0)