Skip to content

Commit

Permalink
feat(backups,vhd-lib): implement copyless merge (#6271)
Browse files Browse the repository at this point in the history
  • Loading branch information
fbeauchamp committed Jun 22, 2022
1 parent 8a71f84 commit fd752fe
Show file tree
Hide file tree
Showing 11 changed files with 167 additions and 57 deletions.
16 changes: 3 additions & 13 deletions @xen-orchestra/backups/_cleanVm.js
Original file line number Diff line number Diff line change
Expand Up @@ -59,30 +59,20 @@ async function mergeVhdChain(chain, { handler, logInfo, remove, merge }) {
let done, total
const handle = setInterval(() => {
if (done !== undefined) {
logInfo(`merging children in progress`, { children, parent, doneCount: done, totalCount: total})
logInfo(`merging children in progress`, { children, parent, doneCount: done, totalCount: total })
}
}, 10e3)

const mergedSize = await mergeVhd(handler, parent, handler, children, {
logInfo,
onProgress({ done: d, total: t }) {
done = d
total = t
},
remove,
})

clearInterval(handle)
const mergeTargetChild = children.shift()
await Promise.all([
VhdAbstract.rename(handler, parent, mergeTargetChild),
asyncMap(children, child => {
logInfo(`the VHD child is already merged`, { child })
if (remove) {
logInfo(`deleting merged VHD child`, { child })
return VhdAbstract.unlink(handler, child)
}
}),
])

return mergedSize
}
}
Expand Down
6 changes: 5 additions & 1 deletion CHANGELOG.unreleased.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@

> Users must be able to say: “Nice enhancement, I'm eager to test it”
- [Backup] Merge delta backups without copying data when using VHD directories on NFS/SMB/local remote(https://github.com/vatesfr/xen-orchestra/pull/6271))

### Bug fixes

> Users must be able to say: “I had this issue, happy to know it's fixed”
Expand All @@ -33,9 +35,11 @@

- @vates/event-listeners-manager patch
- @vates/read-chunk major
- @xen-orchestra/backups minor
- @xen-orchestra/xapi minor
- vhd-lib minor
- xo-remote-parser minor
- xo-server patch
- xo-server minor
- xo-vmdk-to-vhd patch

<!--packages-end-->
2 changes: 1 addition & 1 deletion packages/vhd-lib/Vhd/VhdAbstract.js
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ exports.VhdAbstract = class VhdAbstract {
*
* @returns {number} the merged data size
*/
async coalesceBlock(child, blockId) {
async mergeBlock(child, blockId) {
const block = await child.readBlock(blockId)
await this.writeEntireBlock(block)
return block.data.length
Expand Down
18 changes: 12 additions & 6 deletions packages/vhd-lib/Vhd/VhdDirectory.integ.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -53,19 +53,25 @@ test('Can coalesce block', async () => {
const childDirectoryVhd = yield openVhd(handler, childDirectoryName)
await childDirectoryVhd.readBlockAllocationTable()

await parentVhd.coalesceBlock(childFileVhd, 0)
let childBlockData = (await childDirectoryVhd.readBlock(0)).data
await parentVhd.mergeBlock(childDirectoryVhd, 0)
await parentVhd.writeFooter()
await parentVhd.writeBlockAllocationTable()
let parentBlockData = (await parentVhd.readBlock(0)).data
let childBlockData = (await childFileVhd.readBlock(0)).data
// block should be present in parent
expect(parentBlockData.equals(childBlockData)).toEqual(true)
// block should not be in child since it's a rename for vhd directory
await expect(childDirectoryVhd.readBlock(0)).rejects.toThrowError()

await parentVhd.coalesceBlock(childDirectoryVhd, 0)
childBlockData = (await childFileVhd.readBlock(1)).data
await parentVhd.mergeBlock(childFileVhd, 1)
await parentVhd.writeFooter()
await parentVhd.writeBlockAllocationTable()
parentBlockData = (await parentVhd.readBlock(0)).data
childBlockData = (await childDirectoryVhd.readBlock(0)).data
expect(parentBlockData).toEqual(childBlockData)
parentBlockData = (await parentVhd.readBlock(1)).data
// block should be present in parent in case of mixed vhdfile/vhddirectory
expect(parentBlockData.equals(childBlockData)).toEqual(true)
// block should still be child
await childFileVhd.readBlock(1)
})
})

Expand Down
48 changes: 33 additions & 15 deletions packages/vhd-lib/Vhd/VhdDirectory.js
Original file line number Diff line number Diff line change
Expand Up @@ -142,13 +142,13 @@ exports.VhdDirectory = class VhdDirectory extends VhdAbstract {
return test(this.#blockTable, blockId)
}

_getChunkPath(partName) {
#getChunkPath(partName) {
return this._path + '/' + partName
}

async _readChunk(partName) {
// here we can implement compression and / or crypto
const buffer = await this._handler.readFile(this._getChunkPath(partName))
const buffer = await this._handler.readFile(this.#getChunkPath(partName))

const uncompressed = await this.#compressor.decompress(buffer)
return {
Expand All @@ -164,16 +164,20 @@ exports.VhdDirectory = class VhdDirectory extends VhdAbstract {
)

const compressed = await this.#compressor.compress(buffer)
return this._handler.outputFile(this._getChunkPath(partName), compressed, this._opts)
return this._handler.outputFile(this.#getChunkPath(partName), compressed, this._opts)
}

// put block in subdirectories to limit impact when doing directory listing
_getBlockPath(blockId) {
#getBlockPath(blockId) {
const blockPrefix = Math.floor(blockId / 1e3)
const blockSuffix = blockId - blockPrefix * 1e3
return `blocks/${blockPrefix}/${blockSuffix}`
}

_getFullBlockPath(blockId) {
return this.#getChunkPath(this.#getBlockPath(blockId))
}

async readHeaderAndFooter() {
await this.#readChunkFilters()

Expand All @@ -200,7 +204,7 @@ exports.VhdDirectory = class VhdDirectory extends VhdAbstract {
if (onlyBitmap) {
throw new Error(`reading 'bitmap of block' ${blockId} in a VhdDirectory is not implemented`)
}
const { buffer } = await this._readChunk(this._getBlockPath(blockId))
const { buffer } = await this._readChunk(this.#getBlockPath(blockId))
return {
id: blockId,
bitmap: buffer.slice(0, this.bitmapSize),
Expand Down Expand Up @@ -240,25 +244,39 @@ exports.VhdDirectory = class VhdDirectory extends VhdAbstract {
}

// only works if data are in the same handler
// and if the full block is modified in child ( which is the case whit xcp)
// and if the full block is modified in child ( which is the case with xcp)
// and if the compression type is same on both sides
async coalesceBlock(child, blockId) {
async mergeBlock(child, blockId, isResumingMerge = false) {
const childBlockPath = child._getFullBlockPath?.(blockId)
if (
!(child instanceof VhdDirectory) ||
childBlockPath !== undefined ||
this._handler !== child._handler ||
child.compressionType !== this.compressionType
child.compressionType !== this.compressionType ||
child.compressionType === 'MIXED'
) {
return super.coalesceBlock(child, blockId)
return super.mergeBlock(child, blockId)
}
await this._handler.copy(
child._getChunkPath(child._getBlockPath(blockId)),
this._getChunkPath(this._getBlockPath(blockId))
)
try {
await this._handler.rename(childBlockPath, this._getFullBlockPath(blockId))
} catch (error) {
if (error.code === 'ENOENT' && isResumingMerge === true) {
// when resuming, the blocks moved since the last merge state write are
// not in the child anymore but it should be ok

// it will throw an error if block is missing in parent
// won't detect if the block was already in parent and is broken/missing in child
const { data } = await this.readBlock(blockId)
assert.strictEqual(data.length, this.header.blockSize)
} else {
throw error
}
}
setBitmap(this.#blockTable, blockId)
return sectorsToBytes(this.sectorsPerBlock)
}

async writeEntireBlock(block) {
await this._writeChunk(this._getBlockPath(block.id), block.buffer)
await this._writeChunk(this.#getBlockPath(block.id), block.buffer)
setBitmap(this.#blockTable, block.id)
}

Expand Down
4 changes: 2 additions & 2 deletions packages/vhd-lib/Vhd/VhdFile.integ.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -222,14 +222,14 @@ test('Can coalesce block', async () => {
const childDirectoryVhd = yield openVhd(handler, childDirectoryName)
await childDirectoryVhd.readBlockAllocationTable()

await parentVhd.coalesceBlock(childFileVhd, 0)
await parentVhd.mergeBlock(childFileVhd, 0)
await parentVhd.writeFooter()
await parentVhd.writeBlockAllocationTable()
let parentBlockData = (await parentVhd.readBlock(0)).data
let childBlockData = (await childFileVhd.readBlock(0)).data
expect(parentBlockData).toEqual(childBlockData)

await parentVhd.coalesceBlock(childDirectoryVhd, 0)
await parentVhd.mergeBlock(childDirectoryVhd, 0)
await parentVhd.writeFooter()
await parentVhd.writeBlockAllocationTable()
parentBlockData = (await parentVhd.readBlock(0)).data
Expand Down
25 changes: 23 additions & 2 deletions packages/vhd-lib/Vhd/VhdSynthetic.js
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,16 @@ const VhdSynthetic = class VhdSynthetic extends VhdAbstract {
}
}

get compressionType() {
const compressionType = this.vhds[0].compressionType
for (let i = 0; i < this.vhds.length; i++) {
if (compressionType !== this.vhds[i].compressionType) {
return 'MIXED'
}
}
return compressionType
}

/**
* @param {Array<VhdAbstract>} vhds the chain of Vhds used to compute this Vhd, from the deepest child (in position 0), to the root (in the last position)
* only the last one can have any type. Other must have type DISK_TYPES.DIFFERENCING (delta)
Expand Down Expand Up @@ -74,17 +84,28 @@ const VhdSynthetic = class VhdSynthetic extends VhdAbstract {
}
}

async readBlock(blockId, onlyBitmap = false) {
#getVhdWithBlock(blockId) {
const index = this.#vhds.findIndex(vhd => vhd.containsBlock(blockId))
assert(index !== -1, `no such block ${blockId}`)
return this.#vhds[index]
}

async readBlock(blockId, onlyBitmap = false) {
// only read the content of the first vhd containing this block
return await this.#vhds[index].readBlock(blockId, onlyBitmap)
return await this.#getVhdWithBlock(blockId).readBlock(blockId, onlyBitmap)
}

async mergeBlock(child, blockId) {
throw new Error(`can't coalesce block into a vhd synthetic`)
}

_readParentLocatorData(id) {
return this.#vhds[this.#vhds.length - 1]._readParentLocatorData(id)
}
_getFullBlockPath(blockId) {
const vhd = this.#getVhdWithBlock(blockId)
return vhd?._getFullBlockPath(blockId)
}
}

// add decorated static method
Expand Down
3 changes: 2 additions & 1 deletion packages/vhd-lib/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ exports.checkVhdChain = require('./checkChain')
exports.createReadableSparseStream = require('./createReadableSparseStream')
exports.createVhdStreamWithLength = require('./createVhdStreamWithLength')
exports.createVhdDirectoryFromStream = require('./createVhdDirectoryFromStream').createVhdDirectoryFromStream
exports.mergeVhd = require('./merge')
const { mergeVhd } = require('./merge')
exports.mergeVhd = mergeVhd
exports.peekFooterFromVhdStream = require('./peekFooterFromVhdStream')
exports.openVhd = require('./openVhd').openVhd
exports.VhdAbstract = require('./Vhd/VhdAbstract').VhdAbstract
Expand Down
38 changes: 29 additions & 9 deletions packages/vhd-lib/merge.integ.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ const { getHandler } = require('@xen-orchestra/fs')
const { pFromCallback } = require('promise-toolbox')

const { VhdFile, chainVhd, mergeVhd } = require('./index')
const { _cleanupVhds: cleanupVhds } = require('./merge')

const { checkFile, createRandomFile, convertFromRawToVhd } = require('./tests/utils')

Expand Down Expand Up @@ -38,14 +39,15 @@ test('merge works in normal cases', async () => {
await createRandomFile(`${tempDir}/${childRandomFileName}`, mbOfChildren)
await convertFromRawToVhd(`${tempDir}/${childRandomFileName}`, `${tempDir}/${child1FileName}`)
await chainVhd(handler, parentFileName, handler, child1FileName, true)
await checkFile(`${tempDir}/${parentFileName}`)

// merge
await mergeVhd(handler, parentFileName, handler, child1FileName)

// check that vhd is still valid
await checkFile(`${tempDir}/${parentFileName}`)
// check that the merged vhd is still valid
await checkFile(`${tempDir}/${child1FileName}`)

const parentVhd = new VhdFile(handler, parentFileName)
const parentVhd = new VhdFile(handler, child1FileName)
await parentVhd.readHeaderAndFooter()
await parentVhd.readBlockAllocationTable()

Expand Down Expand Up @@ -138,11 +140,11 @@ test('it can resume a merge ', async () => {
await mergeVhd(handler, 'parent.vhd', handler, 'child1.vhd')

// reload header footer and block allocation table , they should succed
await parentVhd.readHeaderAndFooter()
await parentVhd.readBlockAllocationTable()
await childVhd.readHeaderAndFooter()
await childVhd.readBlockAllocationTable()
let offset = 0
// check that the data are the same as source
for await (const block of parentVhd.blocks()) {
for await (const block of childVhd.blocks()) {
const blockContent = block.data
// first block is marked as already merged, should not be modified
// second block should come from children
Expand All @@ -153,7 +155,7 @@ test('it can resume a merge ', async () => {
await fs.read(fd, buffer, 0, buffer.length, offset)

expect(buffer.equals(blockContent)).toEqual(true)
offset += parentVhd.header.blockSize
offset += childVhd.header.blockSize
}
})

Expand Down Expand Up @@ -183,9 +185,9 @@ test('it merge multiple child in one pass ', async () => {
await mergeVhd(handler, parentFileName, handler, [grandChildFileName, childFileName])

// check that vhd is still valid
await checkFile(parentFileName)
await checkFile(grandChildFileName)

const parentVhd = new VhdFile(handler, parentFileName)
const parentVhd = new VhdFile(handler, grandChildFileName)
await parentVhd.readHeaderAndFooter()
await parentVhd.readBlockAllocationTable()

Expand All @@ -206,3 +208,21 @@ test('it merge multiple child in one pass ', async () => {
offset += parentVhd.header.blockSize
}
})

test('it cleans vhd mergedfiles', async () => {
const handler = getHandler({ url: `file://${tempDir}` })

await handler.writeFile('parent', 'parentData')
await handler.writeFile('child1', 'child1Data')
await handler.writeFile('child2', 'child2Data')
await handler.writeFile('child3', 'child3Data')

// childPath is from the grand children to the children
await cleanupVhds(handler, 'parent', ['child3', 'child2', 'child1'], { remove: true })

// only child3 should stay, with the data of parent
const [child3, ...other] = await handler.list('.')
expect(other.length).toEqual(0)
expect(child3).toEqual('child3')
expect((await handler.readFile('child3')).toString('utf8')).toEqual('parentData')
})
Loading

0 comments on commit fd752fe

Please sign in to comment.