Skip to content

Commit

Permalink
repozo --verify
Browse files Browse the repository at this point in the history
Could be used as a workaround for https://bugs.launchpad.net/zodb/+bug/906057
  • Loading branch information
mgedmin committed Oct 10, 2013
1 parent 70a0320 commit 15b6248
Show file tree
Hide file tree
Showing 2 changed files with 176 additions and 23 deletions.
111 changes: 89 additions & 22 deletions src/ZODB/scripts/repozo.py
@@ -1,4 +1,4 @@
#!/usr/bin/env python2.3
#!/usr/bin/env python

# repozo.py -- incremental and full backups of a Data.fs file.
#
Expand All @@ -18,6 +18,9 @@
-R / --recover
Restore a ZODB file from a backup.
-V / --verify
Verify backup integrity.
-v / --verbose
Verbose mode.
Expand Down Expand Up @@ -69,18 +72,17 @@
Note: for the stdout case, the index file will **not** be restored
automatically.
Options for -V/--verify:
-Q / --quick
Verify file sizes only (skip md5 checksums).
"""
from __future__ import print_function
import os
import shutil
import sys
from six.moves import filter
try:
# the hashlib package is available from Python 2.5
from hashlib import md5
except ImportError:
# the md5 package is deprecated in Python 2.6
from md5 import new as md5
from hashlib import md5
import gzip
import time
import errno
Expand All @@ -92,6 +94,7 @@

BACKUP = 1
RECOVER = 2
VERIFY = 3

COMMASPACE = ', '
READCHUNK = 16 * 1024
Expand Down Expand Up @@ -124,12 +127,17 @@ def log(msg, *args):
print(msg % args, file=sys.stderr)


def error(msg, *args):
print(msg % args, file=sys.stderr)


def parseargs(argv):
global VERBOSE
try:
opts, args = getopt.getopt(argv, 'BRvhr:f:FQzkD:o:',
opts, args = getopt.getopt(argv, 'BRVvhr:f:FQzkD:o:',
['backup',
'recover',
'verify'
'verbose',
'help',
'repository=',
Expand All @@ -145,7 +153,7 @@ def parseargs(argv):
usage(1, msg)

class Options:
mode = None # BACKUP or RECOVER
mode = None # BACKUP, RECOVER or VERIFY
file = None # name of input Data.fs file
repository = None # name of directory holding backups
full = False # True forces full backup
Expand All @@ -164,12 +172,16 @@ class Options:
VERBOSE = True
elif opt in ('-R', '--recover'):
if options.mode is not None:
usage(1, '-B and -R are mutually exclusive')
usage(1, '-B, -R, and -V are mutually exclusive')
options.mode = RECOVER
elif opt in ('-B', '--backup'):
if options.mode is not None:
usage(1, '-B and -R are mutually exclusive')
usage(1, '-B, -R, and -V are mutually exclusive')
options.mode = BACKUP
elif opt in ('-V', '--verify'):
if options.mode is not None:
usage(1, '-B, -R, and -V are mutually exclusive')
options.mode = VERIFY
elif opt in ('-Q', '--quick'):
options.quick = True
elif opt in ('-f', '--file'):
Expand All @@ -195,7 +207,7 @@ class Options:

# Sanity checks
if options.mode is None:
usage(1, 'Either --backup or --recover is required')
usage(1, 'Either --backup, --recover or --verify is required')
if options.repository is None:
usage(1, '--repository is required')
if options.mode == BACKUP:
Expand All @@ -205,14 +217,33 @@ class Options:
if options.output is not None:
log('--output option is ignored in backup mode')
options.output = None
else:
assert options.mode == RECOVER
elif options.mode == RECOVER:
if options.file is not None:
log('--file option is ignored in recover mode')
options.file = None
if options.killold is not None:
if options.killold:
log('--kill-old-on-full option is ignored in recover mode')
options.killold = None
options.killold = False
else:
assert options.mode == VERIFY
if options.date is not None:
log("--date option is ignored in verify mode")
options.date = None
if options.output is not None:
log('--output option is ignored in verify mode')
options.output = None
if options.full:
log('--full option is ignored in verify mode')
options.full = False
if options.gzip:
log('--gzip option is ignored in verify mode')
options.gzip = False
if options.file is not None:
log('--file option is ignored in verify mode')
options.file = None
if options.killold:
log('--kill-old-on-full option is ignored in verify mode')
options.killold = False
return options


Expand Down Expand Up @@ -608,6 +639,39 @@ def do_recover(options):
log('No index file to restore: %s', source_index)


def do_verify(options):
# Verify the sizes and checksums of all files mentioned in the .dat file
repofiles = find_files(options)
if not repofiles:
raise NoFiles('No files in repository')
datfile = os.path.splitext(repofiles[0])[0] + '.dat'
with open(datfile) as fp:
for line in fp:
fn, startpos, endpos, sum = line.split()
startpos = int(startpos)
endpos = int(endpos)
filename = os.path.join(options.repository,
os.path.basename(fn))
expected_size = endpos - startpos
log("Verifying %s", filename)
# XXX: if the file is gzipped, we need to unzip it
try:
fp = open(filename, 'rb')
except IOError:
error("%s is missing", filename)
else:
size = os.fstat(fp.fileno()).st_size
if size != expected_size:
error("%s is %d bytes, should be %d bytes", filename,
size, expected_size)
elif not options.quick:
actual_sum = checksum(fp, size)
if actual_sum != sum:
error("%s has checksum %s instead of %s", filename,
actual_sum, sum)
fp.close()


def main(argv=None):
if argv is None:
argv = sys.argv[1:]
Expand All @@ -616,15 +680,18 @@ def main(argv=None):
try:
do_backup(options)
except WouldOverwriteFiles as e:
print(str(e), file=sys.stderr)
sys.exit(1)
else:
assert options.mode == RECOVER
sys.exit(str(e))
elif options.mode == RECOVER:
try:
do_recover(options)
except NoFiles as e:
print(str(e), file=sys.stderr)
sys.exit(1)
sys.exit(str(e))
else:
assert options.mode == VERIFY
try:
do_verify(options)
except NoFiles as e:
sys.exit(str(e))


if __name__ == '__main__':
Expand Down
88 changes: 87 additions & 1 deletion src/ZODB/scripts/tests/test_repozo.py
Expand Up @@ -198,9 +198,10 @@ def tearDown(self):

def _makeOptions(self, **kw):
import tempfile
self._repository_directory = tempfile.mkdtemp()
self._repository_directory = tempfile.mkdtemp(prefix='test-repozo-')
class Options(object):
repository = self._repository_directory
date = None
def __init__(self, **kw):
self.__dict__.update(kw)
return Options(**kw)
Expand Down Expand Up @@ -789,6 +790,90 @@ def test_w_incr_backup_latest_index(self):
self.assertEqual(_read_file(output), b'AAABBB')
self.assertEqual(_read_file(index), b'CCC')


class Test_do_verify(OptionsTestBase, unittest.TestCase):

def _callFUT(self, options):
from ZODB.scripts import repozo
errors = []
orig_error = repozo.error
def _error(msg, *args):
errors.append(msg % args)
repozo.error = _error
try:
repozo.do_verify(options)
return errors
finally:
repozo.error = orig_error

def _makeFile(self, hour, min, sec, ext, text=None):
assert self._repository_directory, 'call _makeOptions first!'
name = '2010-05-14-%02d-%02d-%02d%s' % (hour, min, sec, ext)
if text is None:
text = name
fqn = os.path.join(self._repository_directory, name)
f = _write_file(fqn, text.encode())
return fqn

def test_no_files(self):
from ZODB.scripts.repozo import NoFiles
options = self._makeOptions()
self.assertRaises(NoFiles, self._callFUT, options)

def test_all_is_fine(self):
options = self._makeOptions(quick=False)
self._makeFile(2, 3, 4, '.fs', 'AAA')
self._makeFile(4, 5, 6, '.deltafs', 'BBBB')
self._makeFile(2, 3, 4, '.dat',
'/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n'
'/backup/2010-05-14-04-05-06.deltafs 3 7 f50881ced34c7d9e6bce100bf33dec60\n')
self.assertEqual(self._callFUT(options), [])

def test_missing_file(self):
options = self._makeOptions(quick=True)
self._makeFile(2, 3, 4, '.fs', 'AAA')
self._makeFile(2, 3, 4, '.dat',
'/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n'
'/backup/2010-05-14-04-05-06.deltafs 3 7 f50881ced34c7d9e6bce100bf33dec60\n')
self.assertEqual(self._callFUT(options),
[options.repository + os.path.sep +
'2010-05-14-04-05-06.deltafs is missing'])

def test_bad_size(self):
options = self._makeOptions(quick=False)
self._makeFile(2, 3, 4, '.fs', 'AAA')
self._makeFile(4, 5, 6, '.deltafs', 'BBB')
self._makeFile(2, 3, 4, '.dat',
'/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n'
'/backup/2010-05-14-04-05-06.deltafs 3 7 f50881ced34c7d9e6bce100bf33dec60\n')
self.assertEqual(self._callFUT(options),
[options.repository + os.path.sep +
'2010-05-14-04-05-06.deltafs is 3 bytes,'
' should be 4 bytes'])

def test_bad_checksum(self):
options = self._makeOptions(quick=False)
self._makeFile(2, 3, 4, '.fs', 'AAA')
self._makeFile(4, 5, 6, '.deltafs', 'BbBB')
self._makeFile(2, 3, 4, '.dat',
'/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n'
'/backup/2010-05-14-04-05-06.deltafs 3 7 f50881ced34c7d9e6bce100bf33dec60\n')
self.assertEqual(self._callFUT(options),
[options.repository + os.path.sep +
'2010-05-14-04-05-06.deltafs has checksum'
' 36486440db255f0ee6ab109d5d231406 instead of'
' f50881ced34c7d9e6bce100bf33dec60'])

def test_quick_ignores_checksums(self):
options = self._makeOptions(quick=True)
self._makeFile(2, 3, 4, '.fs', 'AAA')
self._makeFile(4, 5, 6, '.deltafs', 'BBBB')
self._makeFile(2, 3, 4, '.dat',
'/backup/2010-05-14-02-03-04.fs 0 3 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\n'
'/backup/2010-05-14-04-05-06.deltafs 3 7 bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\n')
self.assertEqual(self._callFUT(options), [])


class MonteCarloTests(unittest.TestCase):

layer = ZODB.tests.util.MininalTestLayer('repozo')
Expand Down Expand Up @@ -902,6 +987,7 @@ def test_suite():
unittest.makeSuite(Test_do_incremental_backup),
#unittest.makeSuite(Test_do_backup), #TODO
unittest.makeSuite(Test_do_recover),
unittest.makeSuite(Test_do_verify),
# N.B.: this test take forever to run (~40sec on a fast laptop),
# *and* it is non-deterministic.
unittest.makeSuite(MonteCarloTests),
Expand Down

0 comments on commit 15b6248

Please sign in to comment.