Skip to content

Commit

Permalink
compress/flate: add Reset() to allow reusing large buffers to compres…
Browse files Browse the repository at this point in the history
…s multiple buffers

This adds a Reset() to compress/flate's decompressor and plumbs that through
to compress/zlib and compress/gzip's Readers so callers can avoid large
allocations when performing many inflate operations. In particular this
preserves the allocation of the decompressor.hist buffer, which is 32kb and
overwritten as needed while inflating.

On the benchmark described in issue 6317, produces the following speedup on
my 2.3ghz Intel Core i7 MBP with go version devel +6b696a34e0af Sun Aug 03
15:14:59 2014 -0700 darwin/amd64:

blocked.text w/out patch vs blocked.text w/ patch:
benchmark           old ns/op      new ns/op      delta
BenchmarkGunzip     8371577533     7927917687     -5.30%

benchmark           old allocs     new allocs     delta
BenchmarkGunzip     176818         148519         -16.00%

benchmark           old bytes     new bytes     delta
BenchmarkGunzip     292184936     12739528      -95.64%

flat.text vs blocked.text w/patch:
benchmark           old ns/op      new ns/op      delta
BenchmarkGunzip     7939447827     7927917687     -0.15%

benchmark           old allocs     new allocs     delta
BenchmarkGunzip     90702          148519         +63.74%

benchmark           old bytes     new bytes     delta
BenchmarkGunzip     9959528       12739528      +27.91%

Similar speedups to those bradfitz saw in  https://golang.org/cl/13416045.

Fixes golang#6317.
Fixes golang#7950.

LGTM=nigeltao
R=golang-codereviews, bradfitz, dan.kortschak, adg, nigeltao, jamesr
CC=golang-codereviews
https://golang.org/cl/97140043
  • Loading branch information
jamesr authored and wheatman committed Jun 26, 2018
1 parent 150e090 commit add7a71
Show file tree
Hide file tree
Showing 4 changed files with 125 additions and 27 deletions.
27 changes: 27 additions & 0 deletions src/compress/flate/inflate.go
Expand Up @@ -56,6 +56,15 @@ func (e *WriteError) Error() string {
return "flate: write error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error()
}

// Resetter resets a ReadCloser returned by NewReader or NewReaderDict to
// to switch to a new underlying Reader. This permits reusing a ReadCloser
// instead of allocating a new one.
type Resetter interface {
// Reset discards any buffered data and resets the Resetter as if it was
// newly initialized with the given reader.
Reset(r io.Reader, dict []byte) error
}

// Note that much of the implementation of huffmanDecoder is also copied
// into gen.go (in package main) for the purpose of precomputing the
// fixed huffman tables so they can be included statically.
Expand Down Expand Up @@ -679,12 +688,28 @@ func makeReader(r io.Reader) Reader {
return bufio.NewReader(r)
}

func (f *decompressor) Reset(r io.Reader, dict []byte) error {
*f = decompressor{
r: makeReader(r),
bits: f.bits,
codebits: f.codebits,
hist: f.hist,
step: (*decompressor).nextBlock,
}
if dict != nil {
f.setDict(dict)
}
return nil
}

// NewReader returns a new ReadCloser that can be used
// to read the uncompressed version of r.
// If r does not also implement io.ByteReader,
// the decompressor may read more data than necessary from r.
// It is the caller's responsibility to call Close on the ReadCloser
// when finished reading.
//
// The ReadCloser returned by NewReader also implements Resetter.
func NewReader(r io.Reader) io.ReadCloser {
var f decompressor
f.bits = new([maxLit + maxDist]int)
Expand All @@ -700,6 +725,8 @@ func NewReader(r io.Reader) io.ReadCloser {
// the uncompressed data stream started with the given dictionary,
// which has already been read. NewReaderDict is typically used
// to read data compressed by NewWriterDict.
//
// The ReadCloser returned by NewReader also implements Resetter.
func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser {
var f decompressor
f.r = makeReader(r)
Expand Down
39 changes: 39 additions & 0 deletions src/compress/flate/inflate_test.go
@@ -0,0 +1,39 @@
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package flate

import (
"bytes"
"io"
"testing"
)

func TestReset(t *testing.T) {
ss := []string{
"lorem ipsum izzle fo rizzle",
"the quick brown fox jumped over",
}

deflated := make([]bytes.Buffer, 2)
for i, s := range ss {
w, _ := NewWriter(&deflated[i], 1)
w.Write([]byte(s))
w.Close()
}

inflated := make([]bytes.Buffer, 2)

f := NewReader(&deflated[0])
io.Copy(&inflated[0], f)
f.(Resetter).Reset(&deflated[1], nil)
io.Copy(&inflated[1], f)
f.Close()

for i, s := range ss {
if s != inflated[i].String() {
t.Errorf("inflated[%d]:\ngot %q\nwant %q", i, inflated[i], s)
}
}
}
6 changes: 5 additions & 1 deletion src/compress/gzip/gunzip.go
Expand Up @@ -208,7 +208,11 @@ func (z *Reader) readHeader(save bool) error {
}

z.digest.Reset()
z.decompressor = flate.NewReader(z.r)
if z.decompressor == nil {
z.decompressor = flate.NewReader(z.r)
} else {
z.decompressor.(flate.Resetter).Reset(z.r, nil)
}
return nil
}

Expand Down
80 changes: 54 additions & 26 deletions src/compress/zlib/reader.go
Expand Up @@ -51,46 +51,36 @@ type reader struct {
scratch [4]byte
}

// NewReader creates a new io.ReadCloser.
// Reads from the returned io.ReadCloser read and decompress data from r.
// Resetter resets a ReadCloser returned by NewReader or NewReaderDict to
// to switch to a new underlying Reader. This permits reusing a ReadCloser
// instead of allocating a new one.
type Resetter interface {
// Reset discards any buffered data and resets the Resetter as if it was
// newly initialized with the given reader.
Reset(r io.Reader, dict []byte) error
}

// NewReader creates a new ReadCloser.
// Reads from the returned ReadCloser read and decompress data from r.
// The implementation buffers input and may read more data than necessary from r.
// It is the caller's responsibility to call Close on the ReadCloser when done.
//
// The ReadCloser returned by NewReader also implements Resetter.
func NewReader(r io.Reader) (io.ReadCloser, error) {
return NewReaderDict(r, nil)
}

// NewReaderDict is like NewReader but uses a preset dictionary.
// NewReaderDict ignores the dictionary if the compressed data does not refer to it.
// If the compressed data refers to a different dictionary, NewReaderDict returns ErrDictionary.
//
// The ReadCloser returned by NewReaderDict also implements Resetter.
func NewReaderDict(r io.Reader, dict []byte) (io.ReadCloser, error) {
z := new(reader)
if fr, ok := r.(flate.Reader); ok {
z.r = fr
} else {
z.r = bufio.NewReader(r)
}
_, err := io.ReadFull(z.r, z.scratch[0:2])
err := z.Reset(r, dict)
if err != nil {
return nil, err
}
h := uint(z.scratch[0])<<8 | uint(z.scratch[1])
if (z.scratch[0]&0x0f != zlibDeflate) || (h%31 != 0) {
return nil, ErrHeader
}
if z.scratch[1]&0x20 != 0 {
_, err = io.ReadFull(z.r, z.scratch[0:4])
if err != nil {
return nil, err
}
checksum := uint32(z.scratch[0])<<24 | uint32(z.scratch[1])<<16 | uint32(z.scratch[2])<<8 | uint32(z.scratch[3])
if checksum != adler32.Checksum(dict) {
return nil, ErrDictionary
}
z.decompressor = flate.NewReaderDict(z.r, dict)
} else {
z.decompressor = flate.NewReader(z.r)
}
z.digest = adler32.New()
return z, nil
}

Expand Down Expand Up @@ -131,3 +121,41 @@ func (z *reader) Close() error {
z.err = z.decompressor.Close()
return z.err
}

func (z *reader) Reset(r io.Reader, dict []byte) error {
if fr, ok := r.(flate.Reader); ok {
z.r = fr
} else {
z.r = bufio.NewReader(r)
}
_, err := io.ReadFull(z.r, z.scratch[0:2])
if err != nil {
return err
}
h := uint(z.scratch[0])<<8 | uint(z.scratch[1])
if (z.scratch[0]&0x0f != zlibDeflate) || (h%31 != 0) {
return ErrHeader
}
haveDict := z.scratch[1]&0x20 != 0
if haveDict {
_, err = io.ReadFull(z.r, z.scratch[0:4])
if err != nil {
return err
}
checksum := uint32(z.scratch[0])<<24 | uint32(z.scratch[1])<<16 | uint32(z.scratch[2])<<8 | uint32(z.scratch[3])
if checksum != adler32.Checksum(dict) {
return ErrDictionary
}
}
if z.decompressor == nil {
if haveDict {
z.decompressor = flate.NewReaderDict(z.r, dict)
} else {
z.decompressor = flate.NewReader(z.r)
}
} else {
z.decompressor.(flate.Resetter).Reset(z.r, dict)
}
z.digest = adler32.New()
return nil
}

0 comments on commit add7a71

Please sign in to comment.