forked from quay/clair
/
tar.go
181 lines (156 loc) · 4.76 KB
/
tar.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
// Copyright 2015 clair authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package utils
import (
"archive/tar"
"bufio"
"bytes"
"compress/bzip2"
"compress/gzip"
"errors"
"io"
"io/ioutil"
"os/exec"
"strings"
)
var (
// ErrCouldNotExtract occurs when an extraction fails.
ErrCouldNotExtract = errors.New("utils: could not extract the archive")
// ErrExtractedFileTooBig occurs when a file to extract is too big.
ErrExtractedFileTooBig = errors.New("utils: could not extract one or more files from the archive: file too big")
readLen = 6 // max bytes to sniff
gzipHeader = []byte{0x1f, 0x8b}
bzip2Header = []byte{0x42, 0x5a, 0x68}
xzHeader = []byte{0xfd, 0x37, 0x7a, 0x58, 0x5a, 0x00}
)
// XzReader is an io.ReadCloser which decompresses xz compressed data.
type XzReader struct {
io.ReadCloser
cmd *exec.Cmd
closech chan error
}
// NewXzReader shells out to a command line xz executable (if
// available) to decompress the given io.Reader using the xz
// compression format and returns an *XzReader.
// It is the caller's responsibility to call Close on the XzReader when done.
func NewXzReader(r io.Reader) (*XzReader, error) {
rpipe, wpipe := io.Pipe()
ex, err := exec.LookPath("xz")
if err != nil {
return nil, err
}
cmd := exec.Command(ex, "--decompress", "--stdout")
closech := make(chan error)
cmd.Stdin = r
cmd.Stdout = wpipe
go func() {
err := cmd.Run()
wpipe.CloseWithError(err)
closech <- err
}()
return &XzReader{rpipe, cmd, closech}, nil
}
func (r *XzReader) Close() error {
r.ReadCloser.Close()
r.cmd.Process.Kill()
return <-r.closech
}
// TarReadCloser embeds a *tar.Reader and the related io.Closer
// It is the caller's responsibility to call Close on TarReadCloser when
// done.
type TarReadCloser struct {
*tar.Reader
io.Closer
}
func (r *TarReadCloser) Close() error {
return r.Closer.Close()
}
// SelectivelyExtractArchive extracts the specified files and folders
// from targz data read from the given reader and store them in a map indexed by file paths
func SelectivelyExtractArchive(r io.Reader, prefix string, toExtract []string, maxFileSize int64) (map[string][]byte, error) {
data := make(map[string][]byte)
// Create a tar or tar/tar-gzip/tar-bzip2/tar-xz reader
tr, err := getTarReader(r)
if err != nil {
return data, ErrCouldNotExtract
}
defer tr.Close()
// For each element in the archive
for {
hdr, err := tr.Next()
if err == io.EOF {
break
}
if err != nil {
return data, ErrCouldNotExtract
}
// Get element filename
filename := hdr.Name
filename = strings.TrimPrefix(filename, "./")
if prefix != "" {
filename = strings.TrimPrefix(filename, prefix)
}
// Determine if we should extract the element
toBeExtracted := false
for _, s := range toExtract {
if strings.HasPrefix(filename, s) {
toBeExtracted = true
break
}
}
if toBeExtracted {
// File size limit
if maxFileSize > 0 && hdr.Size > maxFileSize {
return data, ErrExtractedFileTooBig
}
// Extract the element
if hdr.Typeflag == tar.TypeSymlink || hdr.Typeflag == tar.TypeLink || hdr.Typeflag == tar.TypeReg {
d, _ := ioutil.ReadAll(tr)
data[filename] = d
}
}
}
return data, nil
}
// getTarReader returns a TarReaderCloser associated with the specified io.Reader.
//
// Gzip/Bzip2/XZ detection is done by using the magic numbers:
// Gzip: the first two bytes should be 0x1f and 0x8b. Defined in the RFC1952.
// Bzip2: the first three bytes should be 0x42, 0x5a and 0x68. No RFC.
// XZ: the first three bytes should be 0xfd, 0x37, 0x7a, 0x58, 0x5a, 0x00. No RFC.
func getTarReader(r io.Reader) (*TarReadCloser, error) {
br := bufio.NewReader(r)
header, err := br.Peek(readLen)
if err == nil {
switch {
case bytes.HasPrefix(header, gzipHeader):
gr, err := gzip.NewReader(br)
if err != nil {
return nil, err
}
return &TarReadCloser{tar.NewReader(gr), gr}, nil
case bytes.HasPrefix(header, bzip2Header):
bzip2r := ioutil.NopCloser(bzip2.NewReader(br))
return &TarReadCloser{tar.NewReader(bzip2r), bzip2r}, nil
case bytes.HasPrefix(header, xzHeader):
xzr, err := NewXzReader(br)
if err != nil {
return nil, err
}
return &TarReadCloser{tar.NewReader(xzr), xzr}, nil
}
}
dr := ioutil.NopCloser(br)
return &TarReadCloser{tar.NewReader(dr), dr}, nil
}