Skip to content

Commit

Permalink
Merge pull request #235 from Sebatyne/verify-on-recovery
Browse files Browse the repository at this point in the history
Repozo : add an option to verify on recovery
  • Loading branch information
Sebatyne committed Mar 6, 2019
2 parents b2895c0 + 1c6a982 commit 334282c
Show file tree
Hide file tree
Showing 3 changed files with 155 additions and 37 deletions.
5 changes: 4 additions & 1 deletion CHANGES.rst
Expand Up @@ -5,7 +5,10 @@
5.5.2 (unreleased)
==================

- TBD
- Make repozo's recover mode atomic by recovering the backup in a
temporary file which is then moved to the expected output file.
- Add a new option to repozo in recover mode which allows to verify
backups integrity on the fly.

5.5.1 (2018-10-25)
==================
Expand Down
106 changes: 83 additions & 23 deletions src/ZODB/scripts/repozo.py
Expand Up @@ -73,6 +73,13 @@
Note: for the stdout case, the index file will **not** be restored
automatically.
-w
--with-verification
Verify on the fly the backup files on recovering. This option runs
the same checks as when repozo is run in -V/--verify mode, and
allows to verify and recover a backup in one single step. If a sanity
check fails, the partially recovered ZODB will be left in place.
Options for -V/--verify:
-Q / --quick
Verify file sizes only (skip md5 checksums).
Expand Down Expand Up @@ -101,11 +108,19 @@
VERBOSE = False


class WouldOverwriteFiles(Exception):
class RepozoError(Exception):
pass


class WouldOverwriteFiles(RepozoError):
pass


class NoFiles(RepozoError):
pass


class NoFiles(Exception):
class VerificationFail(RepozoError):
pass


Expand Down Expand Up @@ -146,7 +161,7 @@ def error(msg, *args):
def parseargs(argv):
global VERBOSE
try:
opts, args = getopt.getopt(argv, 'BRVvhr:f:FQzkD:o:',
opts, args = getopt.getopt(argv, 'BRVvhr:f:FQzkD:o:w',
['backup',
'recover',
'verify',
Expand All @@ -160,6 +175,7 @@ def parseargs(argv):
'kill-old-on-full',
'date=',
'output=',
'with-verification',
])
except getopt.error as msg:
usage(1, msg)
Expand All @@ -174,6 +190,7 @@ class Options(object):
quick = False # -Q flag state
gzip = False # -z flag state
killold = False # -k flag state
withverify = False # -w flag state

options = Options()

Expand Down Expand Up @@ -210,6 +227,8 @@ class Options(object):
options.gzip = True
elif opt in ('-k', '--kill-old-on-full'):
options.killold = True
elif opt in ('-w', '--with-verify'):
options.withverify = True
else:
assert False, (opt, arg)

Expand All @@ -229,6 +248,9 @@ class Options(object):
if options.output is not None:
log('--output option is ignored in backup mode')
options.output = None
if options.withverify is not None:
log('--with-verify option is ignored in backup mode')
options.withverify = None
elif options.mode == RECOVER:
if options.file is not None:
log('--file option is ignored in recover mode')
Expand Down Expand Up @@ -256,6 +278,9 @@ class Options(object):
if options.killold:
log('--kill-old-on-full option is ignored in verify mode')
options.killold = False
if options.withverify is not None:
log('--with-verify option is ignored in verify mode')
options.withverify = None
return options


Expand Down Expand Up @@ -360,8 +385,6 @@ def func(data):
ifp = open(f, 'rb')
bytesread += dofile(func, ifp)
ifp.close()
if ofp:
ofp.close()
return bytesread, sum.hexdigest()


Expand Down Expand Up @@ -649,12 +672,46 @@ def do_recover(options):
log('Recovering file to stdout')
outfp = sys.stdout
else:
# Delete old ZODB before recovering backup as size of
# old ZODB + full partial file may be superior to free disk space
if os.path.exists(options.output):
log('Deleting old %s', options.output)
os.unlink(options.output)
log('Recovering file to %s', options.output)
outfp = open(options.output, 'wb')
reposz, reposum = concat(repofiles, outfp)
if outfp != sys.stdout:
outfp.close()
log('Recovered %s bytes, md5: %s', reposz, reposum)
temporary_output_file = options.output + '.part'
outfp = open(temporary_output_file, 'wb')
if options.withverify:
datfile = os.path.splitext(repofiles[0])[0] + '.dat'
with open(datfile) as fp:
truth_dict = {}
for line in fp:
fn, startpos, endpos, sum = line.split()
startpos = int(startpos)
endpos = int(endpos)
filename = os.path.join(options.repository,
os.path.basename(fn))
truth_dict[filename] = {
'size': endpos - startpos,
'sum': sum,
}
totalsz = 0
for repofile in repofiles:
reposz, reposum = concat([repofile], outfp)
expected_truth = truth_dict[repofile]
if reposz != expected_truth['size']:
raise VerificationFail(
"%s is %d bytes, should be %d bytes" % (
repofile, reposz, expected_truth['size']))
if reposum != expected_truth['sum']:
raise VerificationFail(
"%s has checksum %s instead of %s" % (
repofile, reposum, expected_truth['sum']))
totalsz += reposz
log("Recovered chunk %s : %s bytes, md5: %s", repofile, reposz, reposum)
log("Recovered a total of %s bytes", totalsz)
else:
reposz, reposum = concat(repofiles, outfp)
log('Recovered %s bytes, md5: %s', reposz, reposum)

if options.output is not None:
last_base = os.path.splitext(repofiles[-1])[0]
Expand All @@ -666,6 +723,15 @@ def do_recover(options):
else:
log('No index file to restore: %s', source_index)

if outfp != sys.stdout:
outfp.close()
try:
os.rename(temporary_output_file, options.output)
except OSError:
log("ZODB has been fully recovered as %s, but it cannot be renamed into : %s",
temporary_output_file, options.output)
raise


def do_verify(options):
# Verify the sizes and checksums of all files mentioned in the .dat file
Expand Down Expand Up @@ -725,22 +791,16 @@ def main(argv=None):
if argv is None:
argv = sys.argv[1:]
options = parseargs(argv)
if options.mode == BACKUP:
try:
try:
if options.mode == BACKUP:
do_backup(options)
except WouldOverwriteFiles as e:
sys.exit(str(e))
elif options.mode == RECOVER:
try:
elif options.mode == RECOVER:
do_recover(options)
except NoFiles as e:
sys.exit(str(e))
else:
assert options.mode == VERIFY
try:
else:
assert options.mode == VERIFY
do_verify(options)
except NoFiles as e:
sys.exit(str(e))
except (RepozoError, OSError) as e:
sys.exit(str(e))


if __name__ == '__main__':
Expand Down
81 changes: 68 additions & 13 deletions src/ZODB/scripts/tests/test_repozo.py
Expand Up @@ -371,7 +371,7 @@ def _makeFile(self, name, text, gzip_file=False):
from ZODB.scripts.repozo import _GzipCloser
import tempfile
if self._repository_directory is None:
self._repository_directory = tempfile.mkdtemp()
self._repository_directory = tempfile.mkdtemp(prefix='zodb-test-')
fqn = os.path.join(self._repository_directory, name)
if gzip_file:
_opener = _GzipCloser
Expand Down Expand Up @@ -414,7 +414,7 @@ def close(self):
ofp = Faux()
bytes, sum = self._callFUT(files, ofp)
self.assertEqual(ofp._written, [x.encode() for x in 'ABC'])
self.assertTrue(ofp._closed)
self.assertFalse(ofp._closed)

_marker = object()
class Test_gen_filename(OptionsTestBase, unittest.TestCase):
Expand Down Expand Up @@ -674,7 +674,7 @@ def _callFUT(self, options):

def _makeDB(self):
import tempfile
datadir = self._data_directory = tempfile.mkdtemp()
datadir = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-')
return OurDB(self._data_directory)

def test_dont_overwrite_existing_file(self):
Expand Down Expand Up @@ -729,7 +729,7 @@ def _callFUT(self, options, reposz, repofiles):

def _makeDB(self):
import tempfile
datadir = self._data_directory = tempfile.mkdtemp()
datadir = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-')
return OurDB(self._data_directory)

def test_dont_overwrite_existing_file(self):
Expand Down Expand Up @@ -868,23 +868,25 @@ def test_no_files_before_explicit_date(self):

def test_w_full_backup_latest_no_index(self):
import tempfile
dd = self._data_directory = tempfile.mkdtemp()
dd = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-')
output = os.path.join(dd, 'Data.fs')
index = os.path.join(dd, 'Data.fs.index')
options = self._makeOptions(date='2010-05-15-13-30-57',
output=output)
output=output,
withverify=False)
self._makeFile(2, 3, 4, '.fs', 'AAA')
self._makeFile(4, 5, 6, '.fs', 'BBB')
self._callFUT(options)
self.assertEqual(_read_file(output), b'BBB')

def test_w_full_backup_latest_index(self):
import tempfile
dd = self._data_directory = tempfile.mkdtemp()
dd = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-')
output = os.path.join(dd, 'Data.fs')
index = os.path.join(dd, 'Data.fs.index')
options = self._makeOptions(date='2010-05-15-13-30-57',
output=output)
output=output,
withverify=False)
self._makeFile(2, 3, 4, '.fs', 'AAA')
self._makeFile(4, 5, 6, '.fs', 'BBB')
self._makeFile(4, 5, 6, '.index', 'CCC')
Expand All @@ -894,30 +896,83 @@ def test_w_full_backup_latest_index(self):

def test_w_incr_backup_latest_no_index(self):
import tempfile
dd = self._data_directory = tempfile.mkdtemp()
dd = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-')
output = os.path.join(dd, 'Data.fs')
index = os.path.join(dd, 'Data.fs.index')
options = self._makeOptions(date='2010-05-15-13-30-57',
output=output)
output=output,
withverify=False)
self._makeFile(2, 3, 4, '.fs', 'AAA')
self._makeFile(4, 5, 6, '.deltafs', 'BBB')
self._callFUT(options)
self.assertEqual(_read_file(output), b'AAABBB')

def test_w_incr_backup_latest_index(self):
import tempfile
dd = self._data_directory = tempfile.mkdtemp()
dd = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-')
output = os.path.join(dd, 'Data.fs')
index = os.path.join(dd, 'Data.fs.index')
options = self._makeOptions(date='2010-05-15-13-30-57',
output=output)
output=output,
withverify=False)
self._makeFile(2, 3, 4, '.fs', 'AAA')
self._makeFile(4, 5, 6, '.deltafs', 'BBB')
self._makeFile(4, 5, 6, '.index', 'CCC')
self._callFUT(options)
self.assertEqual(_read_file(output), b'AAABBB')
self.assertEqual(_read_file(index), b'CCC')

def test_w_incr_backup_with_verify_all_is_fine(self):
import tempfile
dd = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-')
output = os.path.join(dd, 'Data.fs')
index = os.path.join(dd, 'Data.fs.index')
options = self._makeOptions(date='2010-05-15-13-30-57',
output=output,
withverify=True)
self._makeFile(2, 3, 4, '.fs', 'AAA')
self._makeFile(4, 5, 6, '.deltafs', 'BBBB')
self._makeFile(2, 3, 4, '.dat',
'/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n'
'/backup/2010-05-14-04-05-06.deltafs 3 7 f50881ced34c7d9e6bce100bf33dec60\n')
self._callFUT(options)
self.assertFalse(os.path.exists(output + '.part'))
self.assertEqual(_read_file(output), b'AAABBBB')

def test_w_incr_backup_with_verify_sum_inconsistent(self):
import tempfile
from ZODB.scripts.repozo import VerificationFail
dd = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-')
output = os.path.join(dd, 'Data.fs')
index = os.path.join(dd, 'Data.fs.index')
options = self._makeOptions(date='2010-05-15-13-30-57',
output=output,
withverify=True)
self._makeFile(2, 3, 4, '.fs', 'AAA')
self._makeFile(4, 5, 6, '.deltafs', 'BBBB')
self._makeFile(2, 3, 4, '.dat',
'/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n'
'/backup/2010-05-14-04-05-06.deltafs 3 7 f50881ced34c7d9e6bce100bf33dec61\n')
self.assertRaises(VerificationFail, self._callFUT, options)
self.assertTrue(os.path.exists(output + '.part'))

def test_w_incr_backup_with_verify_size_inconsistent(self):
import tempfile
from ZODB.scripts.repozo import VerificationFail
dd = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-')
output = os.path.join(dd, 'Data.fs')
index = os.path.join(dd, 'Data.fs.index')
options = self._makeOptions(date='2010-05-15-13-30-57',
output=output,
withverify=True)
self._makeFile(2, 3, 4, '.fs', 'AAA')
self._makeFile(4, 5, 6, '.deltafs', 'BBBB')
self._makeFile(2, 3, 4, '.dat',
'/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n'
'/backup/2010-05-14-04-05-06.deltafs 3 8 f50881ced34c7d9e6bce100bf33dec60\n')
self.assertRaises(VerificationFail, self._callFUT, options)
self.assertTrue(os.path.exists(output + '.part'))


class Test_do_verify(OptionsTestBase, unittest.TestCase):

Expand Down Expand Up @@ -1069,7 +1124,7 @@ class MonteCarloTests(unittest.TestCase):
def setUp(self):
# compute directory names
import tempfile
self.basedir = tempfile.mkdtemp()
self.basedir = tempfile.mkdtemp(prefix='zodb-test-')
self.backupdir = os.path.join(self.basedir, 'backup')
self.datadir = os.path.join(self.basedir, 'data')
self.restoredir = os.path.join(self.basedir, 'restore')
Expand Down

0 comments on commit 334282c

Please sign in to comment.