From b1d3888e2ad7035d7daaacb16c5c5d09adc85633 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kat=20March=C3=A1n?= Date: Thu, 2 Mar 2017 18:06:50 -0800 Subject: [PATCH] fix(index): who cares about race conditions anyway PRETTY SURE THIS IS FINE --- lib/entry-index.js | 94 ++++++++++------------------------------ test/index.insert.js | 68 +++++++++++------------------ test/util/cache-index.js | 2 +- 3 files changed, 48 insertions(+), 116 deletions(-) diff --git a/lib/entry-index.js b/lib/entry-index.js index 961abcf..d2ca724 100644 --- a/lib/entry-index.js +++ b/lib/entry-index.js @@ -5,7 +5,6 @@ const contentPath = require('./content/path') const crypto = require('crypto') const fixOwner = require('./util/fix-owner') const fs = require('graceful-fs') -const lockfile = require('lockfile') const path = require('path') const pipe = require('mississippi').pipe const Promise = require('bluebird') @@ -14,64 +13,30 @@ const through = require('mississippi').through const indexV = require('../package.json')['cache-version'].index +const appendFileAsync = Promise.promisify(fs.appendFile) + module.exports.insert = insert function insert (cache, key, digest, opts) { opts = opts || {} const bucket = bucketPath(cache, key) - const lock = bucket + '.lock' return fixOwner.mkdirfix( path.dirname(bucket), opts.uid, opts.gid - ).then(() => ( - Promise.fromNode(_cb => { - const cb = (err, entry) => { - lockfile.unlock(lock, er => { - _cb(err || er, entry) - }) - } - lockfile.lock(lock, { - stale: 60000, - retries: 10, - wait: 10000 - }, function (err) { - if (err) { return _cb(err) } - fs.stat(bucket, function (err, existing) { - if (err && err.code !== 'ENOENT' && err.code !== 'EPERM') { - return cb(err) - } - const entry = { - key: key, - digest: digest, - hashAlgorithm: opts.hashAlgorithm, - time: +(new Date()), - metadata: opts.metadata - } - // Because of the way these entries work, - // the index is safe from fs.appendFile stopping - // mid-write so long as newlines are *prepended* - // - // That is, if a write fails, it will be ignored - // by `find`, and the next successful one will be - // used. - // - // This should be -very rare-, since `fs.appendFile` - // will often be atomic on most platforms unless - // very large metadata has been included, but caches - // like this one tend to last a long time. :) - // Most corrupted reads are likely to be from attempting - // to read the index while it's being written to -- - // which is safe, but not guaranteed to be atomic. - const e = (existing ? '\n' : '') + JSON.stringify(entry) - fs.appendFile(bucket, e, function (err) { - cb(err, entry) - }) - }) - }) - }) - )).then(entry => { - return fixOwner.chownr(bucket, opts.uid, opts.gid).then(() => { - return formatEntry(cache, entry) - }) - }) + ).then(() => { + const entry = { + key: key, + digest: digest, + hashAlgorithm: opts.hashAlgorithm, + time: +(new Date()), + metadata: opts.metadata + } + return appendFileAsync( + bucket, '\n' + JSON.stringify(entry) + ).then(() => entry) + }).then(entry => ( + fixOwner.chownr(bucket, opts.uid, opts.gid).then(() => ( + formatEntry(cache, entry) + )) + )) } module.exports.find = find @@ -126,7 +91,7 @@ function lsStream (cache) { fs.readFile(path.join(indexDir, bucket, f), 'utf8', function (err, data) { if (err) { return cb(err) } const entries = {} - data.split('\n').forEach(function (entry) { + data.split('\n').slice(1).forEach(function (entry) { let parsed try { parsed = JSON.parse(entry) @@ -186,30 +151,15 @@ function bucketDir (cache) { module.exports._bucketPath = bucketPath function bucketPath (cache, key) { const hashed = hashKey(key) - return path.join(bucketDir(cache), hashed.slice(0, 2), hashed) + return path.join(bucketDir(cache), hashed.slice(0, 2), hashed.slice(2)) } module.exports._hashKey = hashKey function hashKey (key) { - // NOTE (SECURITY) - // - // `sha1` conflicts can be generated, but it doesn't matter in this case, - // since we intend for there to be regular conflicts anyway. You can have - // the entire cache in a single bucket and all that'll do is just make a big - // file with a lot of contention, if you can even pull it off in the `key` - // string. So whatever. `sha1` is faster and it doesn't trigger the warnings - // `md5` tends to (yet?...). - // - // Not to mention, that in the case of pacote/npm, the amount of control - // anyone would have over this key is so minimal that it's incredibly - // unlikely that they could intentionally generate a large number of - // conflicts just with a package key such that they'd do anything resembling - // a hash flood DOS. return crypto - .createHash('sha1') - .update(key.toLowerCase()) // lump case-variant keys into same bucket. + .createHash('sha256') + .update(key) .digest('hex') - .slice(0, 7) } function formatEntry (cache, entry) { diff --git a/test/index.insert.js b/test/index.insert.js index a25acaa..f046bda 100644 --- a/test/index.insert.js +++ b/test/index.insert.js @@ -34,7 +34,7 @@ test('basic insertion', function (t) { }, 'formatted entry returned') return fs.readFileAsync(BUCKET, 'utf8') }).then(data => { - t.equal(data[0], '{', 'first entry starts with a {, not \\n') + t.equal(data[0], '\n', 'first entry starts with a \\n') const entry = JSON.parse(data) t.ok(entry.time, 'entry has a timestamp') t.deepEqual(entry, { @@ -55,7 +55,7 @@ test('inserts additional entries into existing key', function (t) { )).then(() => { return fs.readFileAsync(BUCKET, 'utf8') }).then(data => { - const entries = data.split('\n').map(JSON.parse) + const entries = data.split('\n').slice(1).map(JSON.parse) entries.forEach(function (e) { delete e.time }) t.deepEqual(entries, [{ key: KEY, @@ -112,48 +112,30 @@ test('optional arbitrary metadata', function (t) { test('key case-sensitivity', function (t) { return Promise.join( index.insert(CACHE, KEY, DIGEST), - index.insert(CACHE, KEY.toUpperCase(), DIGEST) + index.insert(CACHE, KEY.toUpperCase(), DIGEST + 'upper') ).then(() => { - return fs.readFileAsync(BUCKET, 'utf8') - }).then(data => { - const entries = data.split('\n').map(JSON.parse).sort(e => ( - e.key === KEY - ? -1 - : 1 - )) - entries.forEach(function (e) { delete e.time }) - t.deepEqual(entries, [{ - key: KEY, - digest: DIGEST - }, { - key: KEY.toUpperCase(), - digest: DIGEST - }], 'all entries present') - }) -}) - -test('hash conflict in same bucket', function (t) { - // NOTE - this test will break if `index._hashKey` changes its algorithm. - // Adapt to it accordingly. - const NEWKEY = KEY.toUpperCase() - const CONFLICTING = KEY.toLowerCase() - return index.insert( - CACHE, NEWKEY, DIGEST - ).then(() => ( - index.insert(CACHE, CONFLICTING, DIGEST) - )).then(() => { - const bucket = index._bucketPath(CACHE, NEWKEY) - return fs.readFileAsync(bucket, 'utf8') - }).then(data => { - const entries = data.split('\n').map(JSON.parse) - entries.forEach(function (e) { delete e.time }) - t.deepEqual(entries, [{ - key: NEWKEY, - digest: DIGEST - }, { - key: CONFLICTING, - digest: DIGEST - }], 'multiple entries for conflicting keys in the same bucket') + return Promise.join( + index.find(CACHE, KEY), + index.find(CACHE, KEY.toUpperCase()), + (entry, upperEntry) => { + delete entry.time + delete upperEntry.time + t.deepEqual({ + key: entry.key, + digest: entry.digest + }, { + key: KEY, + digest: DIGEST + }, 'regular entry exists') + t.deepEqual({ + key: upperEntry.key, + digest: upperEntry.digest + }, { + key: KEY.toUpperCase(), + digest: DIGEST + 'upper' + }, 'case-variant entry intact') + } + ) }) }) diff --git a/test/util/cache-index.js b/test/util/cache-index.js index 3302725..3303951 100644 --- a/test/util/cache-index.js +++ b/test/util/cache-index.js @@ -26,7 +26,7 @@ function CacheIndex (entries, hashAlgorithm) { if (typeof lines.length !== 'number') { lines = [lines] } - serialised = lines.map(JSON.stringify).join('\n') + serialised = '\n' + lines.map(JSON.stringify).join('\n') } insertContent(tree, parts, serialised) })