From 4f1a977207ec8295bb6c9a2723398118d5b61443 Mon Sep 17 00:00:00 2001 From: Tom Payne Date: Sun, 12 Sep 2021 00:12:25 +0200 Subject: [PATCH] Detect archive format from content if needed --- internal/chezmoi/archivereadersystem.go | 71 +++++++++++++++---- internal/chezmoi/sourcestate.go | 7 +- .../cmd/testdata/scripts/externalguess.txt | 38 ++++++++++ 3 files changed, 103 insertions(+), 13 deletions(-) create mode 100644 internal/cmd/testdata/scripts/externalguess.txt diff --git a/internal/chezmoi/archivereadersystem.go b/internal/chezmoi/archivereadersystem.go index 582a2f02b99..660074da0ff 100644 --- a/internal/chezmoi/archivereadersystem.go +++ b/internal/chezmoi/archivereadersystem.go @@ -14,7 +14,17 @@ import ( "strings" ) -var errUnknownFormat = errors.New("unknown format") +type archiveFormat string + +const ( + archiveFormatUnknown archiveFormat = "" + archiveFormatTar archiveFormat = "tar" + archiveFormatTarGz archiveFormat = "tar.gz" + archiveFormatTarBz2 archiveFormat = "tar.bz2" + archiveFormatZip archiveFormat = "zip" +) + +var errUnknownArchiveFormat = errors.New("unknown archive format") // An walkArchiveFunc is called once for each entry in an archive. type walkArchiveFunc func(name string, info fs.FileInfo, r io.Reader, linkname string) error @@ -43,7 +53,12 @@ func NewArchiveReaderSystem(path string, data []byte, options ArchiveReaderSyste linkname: make(map[AbsPath]string), } - if err := walkArchive(path, data, func(name string, info fs.FileInfo, r io.Reader, linkname string) error { + archiveFormat, err := guessArchiveFormat(path, data) + if err != nil { + return nil, err + } + + if err := walkArchive(archiveFormat, data, func(name string, info fs.FileInfo, r io.Reader, linkname string) error { if options.StripComponents > 0 { components := strings.Split(name, "/") if len(components) <= options.StripComponents { @@ -114,26 +129,58 @@ func (s *ArchiveReaderSystem) Readlink(name AbsPath) (string, error) { return "", fs.ErrNotExist } -// walkArchive walks over all the entries in an archive. path is used as a hint -// for the archive format. -func walkArchive(path string, data []byte, f walkArchiveFunc) error { - pathLower := strings.ToLower(path) - if strings.HasSuffix(pathLower, ".zip") { +// guessArchiveFormat guesses the archive format from the path and data. +func guessArchiveFormat(path string, data []byte) (archiveFormat, error) { + switch pathLower := strings.ToLower(path); { + case strings.HasSuffix(pathLower, ".tar"): + return archiveFormatTar, nil + case strings.HasSuffix(pathLower, ".tar.bz2") || strings.HasSuffix(pathLower, ".tbz2"): + return archiveFormatTarBz2, nil + case strings.HasSuffix(pathLower, ".tar.gz") || strings.HasSuffix(pathLower, ".tgz"): + return archiveFormatTarGz, nil + case strings.HasSuffix(pathLower, ".zip"): + return archiveFormatZip, nil + } + + switch { + case len(data) >= 3 && bytes.Equal(data[:3], []byte{0x1f, 0x8b, 0x08}): + return archiveFormatTarGz, nil + case len(data) >= 4 && bytes.Equal(data[:4], []byte{'P', 'K', 0x03, 0x04}): + return archiveFormatZip, nil + case isTarArchive(bytes.NewReader(data)): + return archiveFormatTar, nil + case isTarArchive(bzip2.NewReader(bytes.NewReader(data))): + return archiveFormatTarBz2, nil + } + + return archiveFormatUnknown, errUnknownArchiveFormat +} + +// isTarArchive returns if r looks like a tar archive. +func isTarArchive(r io.Reader) bool { + tarReader := tar.NewReader(r) + _, err := tarReader.Next() + return err == nil +} + +// walkArchive walks over all the entries in an archive. +func walkArchive(format archiveFormat, data []byte, f walkArchiveFunc) error { + if format == archiveFormatZip { return walkArchiveZip(bytes.NewReader(data), int64(len(data)), f) } var r io.Reader = bytes.NewReader(data) - switch { - case strings.HasSuffix(pathLower, ".tar"): - case strings.HasSuffix(pathLower, ".tar.bz2") || strings.HasSuffix(pathLower, ".tbz2"): + switch format { + case archiveFormatTar: + case archiveFormatTarBz2: r = bzip2.NewReader(r) - case strings.HasSuffix(pathLower, ".tar.gz") || strings.HasSuffix(pathLower, ".tgz"): + case archiveFormatTarGz: var err error r, err = gzip.NewReader(r) if err != nil { return err } default: - return errUnknownFormat + return errUnknownArchiveFormat } return walkArchiveTar(r, f) } diff --git a/internal/chezmoi/sourcestate.go b/internal/chezmoi/sourcestate.go index 98eb0c50f24..5e4b74e3d46 100644 --- a/internal/chezmoi/sourcestate.go +++ b/internal/chezmoi/sourcestate.go @@ -1507,7 +1507,12 @@ func (s *SourceState) readExternalArchive(ctx context.Context, externalRelPath R }, } - if err := walkArchive(path, data, func(name string, info fs.FileInfo, r io.Reader, linkname string) error { + archiveFormat, err := guessArchiveFormat(path, data) + if err != nil { + return nil, err + } + + if err := walkArchive(archiveFormat, data, func(name string, info fs.FileInfo, r io.Reader, linkname string) error { if external.StripComponents > 0 { components := strings.Split(name, "/") if len(components) <= external.StripComponents { diff --git a/internal/cmd/testdata/scripts/externalguess.txt b/internal/cmd/testdata/scripts/externalguess.txt new file mode 100644 index 00000000000..72ad88399ab --- /dev/null +++ b/internal/cmd/testdata/scripts/externalguess.txt @@ -0,0 +1,38 @@ +[windows] skip 'UNIX only' + +httpd www + +# test that chezmoi sniffs the format of tar files +exec tar -cf www/archive.tar archive/ +cp www/archive.tar www/archive +chezmoi apply --force --refresh-externals +cmp $HOME/.dir/dir/file golden/dir/file + +# test that chezmoi sniffs the format of tar.bz2 files +exec tar -cjf www/archive.tar.bz2 archive/ +cp www/archive.tar.bz2 www/archive +chezmoi apply --force --refresh-externals +cmp $HOME/.dir/dir/file golden/dir/file + +# test that chezmoi sniffs the format of tar.gz files +exec tar -czf www/archive.tar.gz archive/ +cp www/archive.tar.gz www/archive +chezmoi apply --force --refresh-externals +cmp $HOME/.dir/dir/file golden/dir/file + +# test that chezmoi sniffs the format of zip files +exec zip -r www/archive.zip archive +cp www/archive.zip www/archive +chezmoi apply --force --refresh-externals +cmp $HOME/.dir/dir/file golden/dir/file + +-- archive/dir/file -- +# contents of dir/file +-- golden/dir/file -- +# contents of dir/file +-- home/user/.local/share/chezmoi/.chezmoiexternal.yaml -- +.dir: + type: archive + url: "{{ env "HTTPD_URL" }}/archive" + stripComponents: 1 +-- www/.keep --