Skip to content
This repository has been archived by the owner on Jul 3, 2019. It is now read-only.

Commit

Permalink
feat(tar): switch to tarv3
Browse files Browse the repository at this point in the history
BREAKING CHANGE: this changes the underlying tar library, and thus may introduce some subtle low-level incompatibility. Also:

* The tarball packer built into pacote works much closer to how the one npm injects does.
* Special characters on Windows will now be escaped the way tar(1) usually does: by replacing them with the `0xf000` masked character on the way out.
* Directories won't be chowned.
  • Loading branch information
zkat committed Aug 16, 2017
1 parent 28c80a9 commit 53899c7
Show file tree
Hide file tree
Showing 7 changed files with 123 additions and 232 deletions.
27 changes: 20 additions & 7 deletions extract.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ const BB = require('bluebird')

const cacache = require('cacache')
const extractStream = require('./lib/extract-stream')
const mkdirp = BB.promisify(require('mkdirp'))
const npa = require('npm-package-arg')
const pipe = BB.promisify(require('mississippi').pipe)
const optCheck = require('./lib/util/opt-check')
const retry = require('promise-retry')
const rimraf = BB.promisify(require('rimraf'))
Expand Down Expand Up @@ -59,21 +59,34 @@ function extract (spec, dest, opts) {
}

function extractByDigest (start, spec, dest, opts) {
const xtractor = extractStream(dest, opts)
const cached = cacache.get.stream.byDigest(opts.cache, opts.integrity, opts)
return pipe(cached, xtractor).then(() => {
return mkdirp(dest).then(() => {
const xtractor = extractStream(dest, opts)
const cached = cacache.get.stream.byDigest(opts.cache, opts.integrity, opts)
cached.pipe(xtractor)
return new BB((resolve, reject) => {
cached.on('error', reject)
xtractor.on('error', reject)
xtractor.on('close', resolve)
})
}).then(() => {
opts.log.silly('pacote', `${spec} extracted to ${dest} by content address ${Date.now() - start}ms`)
})
}

let fetch
function extractByManifest (start, spec, dest, opts) {
const xtractor = extractStream(dest, opts)
return BB.resolve(null).then(() => {
return mkdirp(dest).then(() => {
const xtractor = extractStream(dest, opts)
if (!fetch) {
fetch = require('./lib/fetch')
}
return pipe(fetch.tarball(spec, opts), xtractor)
const tardata = fetch.tarball(spec, opts)
tardata.pipe(xtractor)
return new BB((resolve, reject) => {
tardata.on('error', reject)
xtractor.on('error', reject)
xtractor.on('close', resolve)
})
}).then(() => {
opts.log.silly('pacote', `${spec} extracted in ${Date.now() - start}ms`)
})
Expand Down
72 changes: 22 additions & 50 deletions lib/extract-stream.js
Original file line number Diff line number Diff line change
@@ -1,61 +1,33 @@
'use strict'

const gunzip = require('./util/gunzip-maybe')
const path = require('path')
const pipeline = require('mississippi').pipeline
const tar = require('tar-fs')
const tar = require('tar')

module.exports = extractStream
function extractStream (dest, opts) {
function extractStream (dest, opts, cb) {
opts = opts || {}
const sawIgnores = {}
return pipeline(gunzip(), tar.extract(dest, {
map: (header) => {
if (process.platform !== 'win32') {
header.uid = opts.uid == null ? header.uid : opts.uid
header.gid = opts.gid == null ? header.gid : opts.gid
}
// Note: This mirrors logic in the fs read operations that are
// employed during tarball creation, in the fstream-npm module.
// It is duplicated here to handle tarballs that are created
// using other means, such as system tar or git archive.
if (header.type === 'file') {
const base = path.basename(header.name)
if (base === '.npmignore') {
sawIgnores[header.name] = true
} else if (base === '.gitignore') {
const npmignore = header.name.replace(/\.gitignore$/, '.npmignore')
if (!sawIgnores[npmignore]) {
// Rename, may be clobbered later.
header.name = npmignore
}
}
}
return header
},
ignore: makeIgnore(opts.log),
dmode: opts.dmode,
fmode: opts.fmode,
umask: opts.umask,
strip: 1
}))
return tar.x({
cwd: dest,
filter: (name, entry) => !entry.header.type.match(/^.*link$/i),
strip: 1,
onwarn: msg => opts.log && opts.log.warn('tar', msg),
onentry: makeOnEntry(opts),
preserveOwner: opts.uid != null || opts.gid != null
})
}

function makeIgnore (log) {
const sawIgnores = {}
return (name, header) => _ignore(name, header, sawIgnores, log)
function makeOnEntry (opts) {
const sawEntry = {}
return entry => _onentry(entry, sawEntry, opts)
}

function _ignore (name, header, sawIgnores, logger) {
if (header.type.match(/^.*link$/)) {
if (logger) {
logger.warn(
'extract-stream',
'excluding symbolic link',
header.name, '->', header.linkname)
}
return true
function _onentry (entry, sawIgnores, opts) {
if (process.getuid) {
entry.uid = opts.uid == null ? entry.uid : opts.uid
entry.gid = opts.gid == null ? entry.gid : opts.gid
}
if (entry.type.toLowerCase() === 'file') {
entry.mode = opts.fmode & ~(opts.umask || 0)
} else if (entry.type.toLowerCase() === 'directory') {
entry.mode = opts.dmode & ~(opts.umask || 0)
}

return false
}
44 changes: 17 additions & 27 deletions lib/finalize-manifest.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,17 @@ const cacache = require('cacache')
const cacheKey = require('./util/cache-key')
const fetchFromManifest = require('./fetch').fromManifest
const finished = BB.promisify(require('mississippi').finished)
const gunzip = require('./util/gunzip-maybe')
const minimatch = require('minimatch')
const normalize = require('normalize-package-data')
const optCheck = require('./util/opt-check')
const path = require('path')
const pipe = BB.promisify(require('mississippi').pipe)
const ssri = require('ssri')
const tar = require('tar-stream')
const tar = require('tar')

// `finalizeManifest` takes as input the various kinds of manifests that
// manifest handlers ('lib/handlers/*/manifest.js') return, and makes sure they
// are:
// manifest handlers ('lib/fetchers/*.js#manifest()') return, and makes sure
// they are:
//
// * filled out with any required data that the handler couldn't fill in
// * formatted consistently
Expand Down Expand Up @@ -149,23 +148,18 @@ function tarballedProps (pkg, spec, opts) {
} else {
opts = optCheck(opts)
const tarStream = fetchFromManifest(pkg, spec, opts)
const extracted = needsExtract && tar.extract()
extracted && extracted.on('entry', (h, str, next) => {
// Drain it
str.on('data', () => {}).on('end', next).on('error', next)
})
const extracted = needsExtract && new tar.Parse()
return BB.join(
needsShrinkwrap && jsonFromStream('npm-shrinkwrap.json', extracted),
needsManifest && jsonFromStream('package.json', extracted),
needsBin && getPaths(extracted),
needsHash && ssri.fromStream(tarStream, { algorithms: ['sha1'] }),
needsExtract && pipe(tarStream, gunzip(), extracted),
needsExtract && pipe(tarStream, extracted),
(sr, mani, paths, hash) => {
const extraProps = mani || {}
delete extraProps._resolved
// drain out the rest of the tarball
tarStream.unpipe()
tarStream.on('data', () => {})
tarStream.resume()
// if we have directories.bin, we need to collect any matching files
// to add to bin
if (paths && paths.length) {
Expand Down Expand Up @@ -199,25 +193,22 @@ function tarballedProps (pkg, spec, opts) {
function jsonFromStream (filename, dataStream) {
return BB.fromNode(cb => {
dataStream.on('error', cb)
dataStream.on('finish', cb)
dataStream.on('entry', function handler (header, stream, next) {
const filePath = header.name.replace(/[^/]+\//, '')
dataStream.on('close', cb)
dataStream.on('entry', entry => {
const filePath = entry.header.path.replace(/[^/]+\//, '')
if (filePath !== filename) {
next()
entry.resume()
} else {
let data = ''
stream.on('data', d => { data += d })
stream.on('error', cb)
finished(stream).then(() => {
dataStream.removeListener('entry', handler)
entry.on('data', d => { data += d })
entry.on('error', cb)
finished(entry).then(() => {
try {
cb(null, JSON.parse(data))
next()
} catch (err) {
cb(err)
}
}, err => {
dataStream.removeListener('entry', handler)
cb(err)
})
}
Expand All @@ -229,12 +220,11 @@ function getPaths (dataStream) {
return BB.fromNode(cb => {
let paths = []
dataStream.on('error', cb)
dataStream.on('finish', () => cb(null, paths))
dataStream.on('entry', function handler (header, stream, next) {
const filePath = header.name.replace(/[^/]+\//, '')
stream.on('data', () => {})
dataStream.on('close', () => cb(null, paths))
dataStream.on('entry', function handler (entry) {
const filePath = entry.header.path.replace(/[^/]+\//, '')
entry.resume()
paths.push(filePath)
next()
})
})
}
Expand Down
35 changes: 18 additions & 17 deletions lib/util/pack-dir.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,39 +5,40 @@ const BB = require('bluebird')
const cacache = require('cacache')
const cacheKey = require('./cache-key')
const optCheck = require('./opt-check')
const packlist = require('npm-packlist')
const pipe = BB.promisify(require('mississippi').pipe)
const tar = require('tar-fs')
const tar = require('tar')

module.exports = packDir
function packDir (manifest, label, dir, target, opts) {
opts = optCheck(opts)

const packer = opts.dirPacker
? opts.dirPacker(manifest, dir)
: tar.pack(dir, {
map: header => {
header.name = 'package/' + header.name
header.mtime = 0 // make tarballs idempotent
return header
},
ignore: (name) => {
return name.match(/\.git/)
}
})
? BB.resolve(opts.dirPacker(manifest, dir))
: mkPacker(dir)

if (!opts.cache) {
return pipe(packer, target).catch(err => {
throw err
})
return packer.then(packer => pipe(packer, target))
} else {
const cacher = cacache.put.stream(
opts.cache, cacheKey('packed-dir', label), opts
).on('integrity', i => {
target.emit('integrity', i)
})
return BB.all([
return packer.then(packer => BB.all([
pipe(packer, cacher),
pipe(packer, target)
])
]))
}
}

function mkPacker (dir) {
return packlist({path: dir}).then(files => {
return tar.c({
cwd: dir,
gzip: true,
portable: true,
prefix: 'package/'
}, files)
})
}
13 changes: 5 additions & 8 deletions test/extract-stream.chown.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,24 +14,23 @@ require('./util/test-dir')(__filename)

npmlog.level = process.env.LOGLEVEL || 'silent'

test('accepts gid and uid opts', {
skip: !process.getuid
}, function (t) {
test('accepts gid and uid opts', {skip: !process.getuid}, t => {
const pkg = {
'package.json': {
'target/package.json': {
data: JSON.stringify({
name: 'foo',
version: '1.0.0'
})
},
'foo/index.js': 'console.log("hello world!")'
'target/foo/index.js': 'console.log("hello world!")'
}
const NEWUID = process.getuid() + 1
const NEWGID = process.getgid() + 1
// All of this only happens on uid === 0
process.getuid = () => 0
const updatedPaths = []
const fsClone = Object.create(fs)
fsClone.utimes = (_1, _2, _3, cb) => cb()
fsClone.chown = (p, uid, gid, cb) => {
process.nextTick(() => {
t.deepEqual({
Expand All @@ -49,16 +48,14 @@ test('accepts gid and uid opts', {
fs: fsClone
})
return mockTar(pkg, {stream: true}).then(tarStream => {
return pipe(tarStream, extractStream('./target', {
return pipe(tarStream, extractStream('.', {
uid: NEWUID,
gid: NEWGID,
log: npmlog
}))
}).then(() => {
t.deepEqual(updatedPaths, [
'target',
'target/package.json',
'target/foo',
'target/foo/index.js'
], 'extracted files had correct uid/gid set')
})
Expand Down
Loading

0 comments on commit 53899c7

Please sign in to comment.