diff --git a/README.md b/README.md index c709fa5..81073ca 100644 --- a/README.md +++ b/README.md @@ -76,10 +76,12 @@ cacache.get.byDigest(cachePath, tarballSha512).then(data => { * Extraction by key or by content address (shasum, etc) * Multi-hash support - safely host sha1, sha512, etc, in a single cache * Automatic content deduplication -* Fault tolerance and consistency guarantees for both insertion and extraction +* Fault tolerance (immune to corruption, partial writes, etc) +* Consistency guarantees on read and write (full data verification) * Lockless, high-concurrency cache access * Streaming support * Promise support +* Pretty darn fast * Arbitrary metadata storage * Garbage collection and additional offline verification diff --git a/lib/entry-index.js b/lib/entry-index.js index 52b620a..c24c539 100644 --- a/lib/entry-index.js +++ b/lib/entry-index.js @@ -49,7 +49,7 @@ function insert (cache, key, digest, opts) { // // Thanks to @isaacs for the whiteboarding session that ended up with this. return appendFileAsync( - bucket, `\n${stringified.length}\t${stringified}` + bucket, `\n${hashEntry(stringified)}\t${stringified}` ).then(() => entry) }).then(entry => ( fixOwner.chownr(bucket, opts.uid, opts.gid).then(() => ( @@ -140,9 +140,11 @@ function bucketEntries (cache, bucket, filter) { ).then(data => { let entries = [] data.split('\n').forEach(entry => { + if (!entry) { return } const pieces = entry.split('\t') - if (!pieces[1] || pieces[1].length !== parseInt(pieces[0], 10)) { - // Length is no good! Corruption ahoy! + if (!pieces[1] || hashEntry(pieces[1]) !== pieces[0]) { + // Hash is no good! Corruption or malice? Doesn't matter! + // EJECT EJECT return } let obj @@ -175,9 +177,18 @@ function bucketPath (cache, key) { module.exports._hashKey = hashKey function hashKey (key) { + return hash(key, 'sha256') +} + +module.exports._hashEntry = hashEntry +function hashEntry (str) { + return hash(str, 'sha1') +} + +function hash (str, digest) { return crypto - .createHash('sha256') - .update(key) + .createHash(digest) + .update(str) .digest('hex') } diff --git a/package.json b/package.json index d69cec8..19068bb 100644 --- a/package.json +++ b/package.json @@ -3,7 +3,7 @@ "version": "6.1.2", "cache-version": { "content": "2", - "index": "2" + "index": "3" }, "description": "General content-addressable cache system that maintains a filesystem registry of file data.", "main": "index.js", diff --git a/test/index.find.js b/test/index.find.js index 312d548..af46a97 100644 --- a/test/index.find.js +++ b/test/index.find.js @@ -12,7 +12,6 @@ BB.promisifyAll(fs) const CACHE = path.join(testDir, 'cache') const contentPath = require('../lib/content/path') -const Dir = Tacks.Dir const index = require('../lib/entry-index') test('index.find cache hit', function (t) { @@ -178,7 +177,7 @@ test('index.find garbled data in index file', function (t) { }) const fixture = new Tacks(CacheIndex({ 'whatever': '\n' + - `${stringified.length}\t${stringified}` + + `${index._hashEntry(stringified)}\t${stringified}` + '\n{"key": "' + key + '"\noway' })) fixture.create(CACHE) diff --git a/test/index.insert.js b/test/index.insert.js index 7dce6cb..251d56b 100644 --- a/test/index.insert.js +++ b/test/index.insert.js @@ -35,7 +35,7 @@ test('basic insertion', function (t) { }).then(data => { t.equal(data[0], '\n', 'first entry starts with a \\n') const split = data.split('\t') - t.equal(parseInt(split[0], 10), split[1].length, 'length header correct') + t.equal(split[0].slice(1), index._hashEntry(split[1]), 'consistency header correct') const entry = JSON.parse(split[1]) t.ok(entry.time, 'entry has a timestamp') t.deepEqual(entry, { diff --git a/test/util/cache-index.js b/test/util/cache-index.js index 22f20fe..3e2a51c 100644 --- a/test/util/cache-index.js +++ b/test/util/cache-index.js @@ -1,9 +1,12 @@ 'use strict' -const bucketPath = require('../../lib/entry-index')._bucketPath +const index = require('../../lib/entry-index') const path = require('path') const Tacks = require('tacks') +const bucketPath = index._bucketPath +const hashEntry = index._hashEntry + const Dir = Tacks.Dir const File = Tacks.File @@ -28,7 +31,7 @@ function CacheIndex (entries, hashAlgorithm) { } serialised = '\n' + lines.map(line => { const stringified = JSON.stringify(line) - return `${stringified.length}\t${stringified}` + return `${hashEntry(stringified)}\t${stringified}` }).join('\n') } insertContent(tree, parts, serialised) diff --git a/test/verify.js b/test/verify.js index 9000422..bba0243 100644 --- a/test/verify.js +++ b/test/verify.js @@ -48,10 +48,11 @@ test('removes corrupted index entries from buckets', t => { t.equal(stats.totalEntries, 1, 'only one entry counted') return fs.readFileAsync(BUCKET, 'utf8') }).then(bucketData => { - // cleaned-up entries have different timestamps - const newTime = bucketData.match(/"time":([0-9]+)/)[1] - const target = BUCKETDATA.replace(/"time":[0-9]+/, `"time":${newTime}`) - t.deepEqual(bucketData, target, 'bucket only contains good entry') + const bucketEntry = JSON.parse(bucketData.split('\t')[1]) + const targetEntry = JSON.parse(BUCKETDATA.split('\t')[1]) + targetEntry.time = bucketEntry.time // different timestamps + t.deepEqual( + bucketEntry, targetEntry, 'bucket only contains good entry') }) }) }) @@ -75,7 +76,11 @@ test('removes shadowed index entries from buckets', t => { time: +(bucketData.match(/"time":([0-9]+)/)[1]), metadata: newEntry.metadata }) - t.equal(bucketData, `\n${stringified.length}\t${stringified}`) + t.equal( + bucketData, + `\n${index._hashEntry(stringified)}\t${stringified}`, + 'only the most recent entry is still in the bucket' + ) }) }) })