Skip to content

Commit

Permalink
repozo --verify now correctly handles gzipped backups
Browse files Browse the repository at this point in the history
  • Loading branch information
mgedmin committed Oct 10, 2013
1 parent 15b6248 commit bc3008c
Show file tree
Hide file tree
Showing 2 changed files with 106 additions and 18 deletions.
56 changes: 45 additions & 11 deletions src/ZODB/scripts/repozo.py
Expand Up @@ -287,6 +287,22 @@ def func(data):
return sum.hexdigest()


def file_size(fp):
# Compute number of bytes that can be read from fp
def func(data):
pass
return dofile(func, fp, None)


def checksum_and_size(fp):
# Checksum and return it with the size of the file
sum = md5()
def func(data):
sum.update(data)
size = dofile(func, fp, None)
return sum.hexdigest(), size


def copyfile(options, dst, start, n):
# Copy bytes from file src, to file dst, starting at offset start, for n
# length of bytes. For robustness, we first write, flush and fsync
Expand Down Expand Up @@ -654,22 +670,40 @@ def do_verify(options):
os.path.basename(fn))
expected_size = endpos - startpos
log("Verifying %s", filename)
# XXX: if the file is gzipped, we need to unzip it
try:
fp = open(filename, 'rb')
if filename.endswith('fsz'):
fp = gzip.open(filename, 'rb')
when_uncompressed = ' (when uncompressed)'
else:
fp = open(filename, 'rb')
when_uncompressed = ''
except IOError:
error("%s is missing", filename)
continue
try:
fp.seek(0, 2)
except ValueError:
# can't seek in gzipped files
if options.quick:
size = file_size(fp)
actual_sum = None
else:
actual_sum, size = checksum_and_size(fp)
else:
size = os.fstat(fp.fileno()).st_size
if size != expected_size:
error("%s is %d bytes, should be %d bytes", filename,
size, expected_size)
elif not options.quick:
size = fp.tell()
if options.quick or size != expected_size:
actual_sum = None
else:
fp.seek(0)
actual_sum = checksum(fp, size)
if actual_sum != sum:
error("%s has checksum %s instead of %s", filename,
actual_sum, sum)
fp.close()
if size != expected_size:
error("%s is %d bytes%s, should be %d bytes", filename,
size, when_uncompressed, expected_size)
elif not options.quick:
if actual_sum != sum:
error("%s has checksum %s%s instead of %s", filename,
actual_sum, when_uncompressed, sum)
fp.close()


def main(argv=None):
Expand Down
68 changes: 61 additions & 7 deletions src/ZODB/scripts/tests/test_repozo.py
Expand Up @@ -14,12 +14,7 @@
from __future__ import print_function
import unittest
import os
try:
# the hashlib package is available from Python 2.5
from hashlib import md5
except ImportError:
# the md5 package is deprecated in Python 2.6
from md5 import new as md5
from hashlib import md5

import ZODB.tests.util # layer used at class scope

Expand Down Expand Up @@ -812,7 +807,13 @@ def _makeFile(self, hour, min, sec, ext, text=None):
if text is None:
text = name
fqn = os.path.join(self._repository_directory, name)
f = _write_file(fqn, text.encode())
if ext.endswith('fsz'):
_opener = _GzipCloser
else:
_opener = open
with _opener(fqn, 'wb') as f:
f.write(text)
f.flush()
return fqn

def test_no_files(self):
Expand All @@ -829,6 +830,15 @@ def test_all_is_fine(self):
'/backup/2010-05-14-04-05-06.deltafs 3 7 f50881ced34c7d9e6bce100bf33dec60\n')
self.assertEqual(self._callFUT(options), [])

def test_all_is_fine_gzip(self):
options = self._makeOptions(quick=False)
self._makeFile(2, 3, 4, '.fsz', 'AAA')
self._makeFile(4, 5, 6, '.deltafsz', 'BBBB')
self._makeFile(2, 3, 4, '.dat',
'/backup/2010-05-14-02-03-04.fsz 0 3 e1faffb3e614e6c2fba74296962386b7\n'
'/backup/2010-05-14-04-05-06.deltafsz 3 7 f50881ced34c7d9e6bce100bf33dec60\n')
self.assertEqual(self._callFUT(options), [])

def test_missing_file(self):
options = self._makeOptions(quick=True)
self._makeFile(2, 3, 4, '.fs', 'AAA')
Expand All @@ -839,6 +849,16 @@ def test_missing_file(self):
[options.repository + os.path.sep +
'2010-05-14-04-05-06.deltafs is missing'])

def test_missing_file_gzip(self):
options = self._makeOptions(quick=True)
self._makeFile(2, 3, 4, '.fsz', 'AAA')
self._makeFile(2, 3, 4, '.dat',
'/backup/2010-05-14-02-03-04.fsz 0 3 e1faffb3e614e6c2fba74296962386b7\n'
'/backup/2010-05-14-04-05-06.deltafsz 3 7 f50881ced34c7d9e6bce100bf33dec60\n')
self.assertEqual(self._callFUT(options),
[options.repository + os.path.sep +
'2010-05-14-04-05-06.deltafsz is missing'])

def test_bad_size(self):
options = self._makeOptions(quick=False)
self._makeFile(2, 3, 4, '.fs', 'AAA')
Expand All @@ -851,6 +871,18 @@ def test_bad_size(self):
'2010-05-14-04-05-06.deltafs is 3 bytes,'
' should be 4 bytes'])

def test_bad_size_gzip(self):
options = self._makeOptions(quick=False)
self._makeFile(2, 3, 4, '.fsz', 'AAA')
self._makeFile(4, 5, 6, '.deltafsz', 'BBB')
self._makeFile(2, 3, 4, '.dat',
'/backup/2010-05-14-02-03-04.fsz 0 3 e1faffb3e614e6c2fba74296962386b7\n'
'/backup/2010-05-14-04-05-06.deltafsz 3 7 f50881ced34c7d9e6bce100bf33dec60\n')
self.assertEqual(self._callFUT(options),
[options.repository + os.path.sep +
'2010-05-14-04-05-06.deltafsz is 3 bytes (when uncompressed),'
' should be 4 bytes'])

def test_bad_checksum(self):
options = self._makeOptions(quick=False)
self._makeFile(2, 3, 4, '.fs', 'AAA')
Expand All @@ -864,6 +896,19 @@ def test_bad_checksum(self):
' 36486440db255f0ee6ab109d5d231406 instead of'
' f50881ced34c7d9e6bce100bf33dec60'])

def test_bad_checksum_gzip(self):
options = self._makeOptions(quick=False)
self._makeFile(2, 3, 4, '.fsz', 'AAA')
self._makeFile(4, 5, 6, '.deltafsz', 'BbBB')
self._makeFile(2, 3, 4, '.dat',
'/backup/2010-05-14-02-03-04.fsz 0 3 e1faffb3e614e6c2fba74296962386b7\n'
'/backup/2010-05-14-04-05-06.deltafsz 3 7 f50881ced34c7d9e6bce100bf33dec60\n')
self.assertEqual(self._callFUT(options),
[options.repository + os.path.sep +
'2010-05-14-04-05-06.deltafsz has checksum'
' 36486440db255f0ee6ab109d5d231406 (when uncompressed) instead of'
' f50881ced34c7d9e6bce100bf33dec60'])

def test_quick_ignores_checksums(self):
options = self._makeOptions(quick=True)
self._makeFile(2, 3, 4, '.fs', 'AAA')
Expand All @@ -873,6 +918,15 @@ def test_quick_ignores_checksums(self):
'/backup/2010-05-14-04-05-06.deltafs 3 7 bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\n')
self.assertEqual(self._callFUT(options), [])

def test_quick_ignores_checksums_gzip(self):
options = self._makeOptions(quick=True)
self._makeFile(2, 3, 4, '.fsz', 'AAA')
self._makeFile(4, 5, 6, '.deltafsz', 'BBBB')
self._makeFile(2, 3, 4, '.dat',
'/backup/2010-05-14-02-03-04.fsz 0 3 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\n'
'/backup/2010-05-14-04-05-06.deltafsz 3 7 bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\n')
self.assertEqual(self._callFUT(options), [])


class MonteCarloTests(unittest.TestCase):

Expand Down

0 comments on commit bc3008c

Please sign in to comment.