From d6bcc5a45241b007e19d3268a386050faa374e36 Mon Sep 17 00:00:00 2001 From: Nicolas Wavrant Date: Wed, 21 Nov 2018 09:52:00 +0100 Subject: [PATCH 1/5] repozo: do not close file in concat function But let the caller opening the file closing it --- src/ZODB/scripts/repozo.py | 2 -- src/ZODB/scripts/tests/test_repozo.py | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/src/ZODB/scripts/repozo.py b/src/ZODB/scripts/repozo.py index dfb12f6fd..38aa766ec 100755 --- a/src/ZODB/scripts/repozo.py +++ b/src/ZODB/scripts/repozo.py @@ -360,8 +360,6 @@ def func(data): ifp = open(f, 'rb') bytesread += dofile(func, ifp) ifp.close() - if ofp: - ofp.close() return bytesread, sum.hexdigest() diff --git a/src/ZODB/scripts/tests/test_repozo.py b/src/ZODB/scripts/tests/test_repozo.py index a4f25543c..5071b62e3 100644 --- a/src/ZODB/scripts/tests/test_repozo.py +++ b/src/ZODB/scripts/tests/test_repozo.py @@ -414,7 +414,7 @@ def close(self): ofp = Faux() bytes, sum = self._callFUT(files, ofp) self.assertEqual(ofp._written, [x.encode() for x in 'ABC']) - self.assertTrue(ofp._closed) + self.assertFalse(ofp._closed) _marker = object() class Test_gen_filename(OptionsTestBase, unittest.TestCase): From 1f394d5a9917add9fb8288f81d83d1e416b8e793 Mon Sep 17 00:00:00 2001 From: Nicolas Wavrant Date: Thu, 22 Nov 2018 07:26:08 +0100 Subject: [PATCH 2/5] repozo: recover action can verify data on the fly --- CHANGES.rst | 3 +- src/ZODB/scripts/repozo.py | 56 +++++++++++++++++++++++-- src/ZODB/scripts/tests/test_repozo.py | 60 +++++++++++++++++++++++++-- 3 files changed, 111 insertions(+), 8 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index e1f7df2cb..6b10de5af 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -5,7 +5,8 @@ 5.5.2 (unreleased) ================== -- TBD +- Add a new option to repozo in recover mode which allows to verify + backups integrity on the fly. 5.5.1 (2018-10-25) ================== diff --git a/src/ZODB/scripts/repozo.py b/src/ZODB/scripts/repozo.py index 38aa766ec..5aff4b508 100755 --- a/src/ZODB/scripts/repozo.py +++ b/src/ZODB/scripts/repozo.py @@ -73,6 +73,12 @@ Note: for the stdout case, the index file will **not** be restored automatically. + -w + --with-verification + Verify on the fly the backup files on recovering. This option runs + the same checks as when repozo is run in -V/--verify mode, and + allows to verify and recover a backup in one single step. + Options for -V/--verify: -Q / --quick Verify file sizes only (skip md5 checksums). @@ -108,6 +114,8 @@ class WouldOverwriteFiles(Exception): class NoFiles(Exception): pass +class VerificationFail(Exception): + pass class _GzipCloser(object): @@ -146,7 +154,7 @@ def error(msg, *args): def parseargs(argv): global VERBOSE try: - opts, args = getopt.getopt(argv, 'BRVvhr:f:FQzkD:o:', + opts, args = getopt.getopt(argv, 'BRVvhr:f:FQzkD:o:w', ['backup', 'recover', 'verify', @@ -160,6 +168,7 @@ def parseargs(argv): 'kill-old-on-full', 'date=', 'output=', + 'with-verification', ]) except getopt.error as msg: usage(1, msg) @@ -174,6 +183,7 @@ class Options(object): quick = False # -Q flag state gzip = False # -z flag state killold = False # -k flag state + withverify = False # -w flag state options = Options() @@ -210,6 +220,8 @@ class Options(object): options.gzip = True elif opt in ('-k', '--kill-old-on-full'): options.killold = True + elif opt in ('-w', '--with-verify'): + options.withverify = True else: assert False, (opt, arg) @@ -229,6 +241,9 @@ class Options(object): if options.output is not None: log('--output option is ignored in backup mode') options.output = None + if options.withverify is not None: + log('--with-verify option is ignored in backup mode') + options.withverify = None elif options.mode == RECOVER: if options.file is not None: log('--file option is ignored in recover mode') @@ -256,6 +271,9 @@ class Options(object): if options.killold: log('--kill-old-on-full option is ignored in verify mode') options.killold = False + if options.withverify is not None: + log('--with-verify option is ignored in verify mode') + options.withverify = None return options @@ -649,10 +667,40 @@ def do_recover(options): else: log('Recovering file to %s', options.output) outfp = open(options.output, 'wb') - reposz, reposum = concat(repofiles, outfp) + if options.withverify: + datfile = os.path.splitext(repofiles[0])[0] + '.dat' + with open(datfile) as fp: + truth_dict = {} + for line in fp: + fn, startpos, endpos, sum = line.split() + startpos = int(startpos) + endpos = int(endpos) + filename = os.path.join(options.repository, + os.path.basename(fn)) + truth_dict[filename] = { + 'size': endpos - startpos, + 'sum': sum, + } + totalsz = 0 + for repofile in repofiles: + reposz, reposum = concat([repofile], outfp) + expected_truth = truth_dict[repofile] + if reposz != expected_truth['size']: + raise VerificationFail( + "%s is %d bytes, should be %d bytes" % ( + repofile, reposz, expected_truth['size'])) + if reposum != expected_truth['sum']: + raise VerificationFail( + "%s has checksum %s instead of %s" % ( + repofile, reposum, expected_truth['sum'])) + totalsz += reposz + log("Recovered chunk %s : %s bytes, md5: %s", repofile, reposz, reposum) + log("Recovered a total of %s bytes", totalsz) + else: + reposz, reposum = concat(repofiles, outfp) + log('Recovered %s bytes, md5: %s', reposz, reposum) if outfp != sys.stdout: outfp.close() - log('Recovered %s bytes, md5: %s', reposz, reposum) if options.output is not None: last_base = os.path.splitext(repofiles[-1])[0] @@ -728,6 +776,8 @@ def main(argv=None): do_backup(options) except WouldOverwriteFiles as e: sys.exit(str(e)) + except VerificationFail as e: + sys.exit(str(e)) elif options.mode == RECOVER: try: do_recover(options) diff --git a/src/ZODB/scripts/tests/test_repozo.py b/src/ZODB/scripts/tests/test_repozo.py index 5071b62e3..a4bc0a7e9 100644 --- a/src/ZODB/scripts/tests/test_repozo.py +++ b/src/ZODB/scripts/tests/test_repozo.py @@ -872,7 +872,8 @@ def test_w_full_backup_latest_no_index(self): output = os.path.join(dd, 'Data.fs') index = os.path.join(dd, 'Data.fs.index') options = self._makeOptions(date='2010-05-15-13-30-57', - output=output) + output=output, + withverify=False) self._makeFile(2, 3, 4, '.fs', 'AAA') self._makeFile(4, 5, 6, '.fs', 'BBB') self._callFUT(options) @@ -884,7 +885,8 @@ def test_w_full_backup_latest_index(self): output = os.path.join(dd, 'Data.fs') index = os.path.join(dd, 'Data.fs.index') options = self._makeOptions(date='2010-05-15-13-30-57', - output=output) + output=output, + withverify=False) self._makeFile(2, 3, 4, '.fs', 'AAA') self._makeFile(4, 5, 6, '.fs', 'BBB') self._makeFile(4, 5, 6, '.index', 'CCC') @@ -898,7 +900,8 @@ def test_w_incr_backup_latest_no_index(self): output = os.path.join(dd, 'Data.fs') index = os.path.join(dd, 'Data.fs.index') options = self._makeOptions(date='2010-05-15-13-30-57', - output=output) + output=output, + withverify=False) self._makeFile(2, 3, 4, '.fs', 'AAA') self._makeFile(4, 5, 6, '.deltafs', 'BBB') self._callFUT(options) @@ -910,7 +913,8 @@ def test_w_incr_backup_latest_index(self): output = os.path.join(dd, 'Data.fs') index = os.path.join(dd, 'Data.fs.index') options = self._makeOptions(date='2010-05-15-13-30-57', - output=output) + output=output, + withverify=False) self._makeFile(2, 3, 4, '.fs', 'AAA') self._makeFile(4, 5, 6, '.deltafs', 'BBB') self._makeFile(4, 5, 6, '.index', 'CCC') @@ -918,6 +922,54 @@ def test_w_incr_backup_latest_index(self): self.assertEqual(_read_file(output), b'AAABBB') self.assertEqual(_read_file(index), b'CCC') + def test_w_incr_backup_with_verify_all_is_fine(self): + import tempfile + dd = self._data_directory = tempfile.mkdtemp() + output = os.path.join(dd, 'Data.fs') + index = os.path.join(dd, 'Data.fs.index') + options = self._makeOptions(date='2010-05-15-13-30-57', + output=output, + withverify=True) + self._makeFile(2, 3, 4, '.fs', 'AAA') + self._makeFile(4, 5, 6, '.deltafs', 'BBBB') + self._makeFile(2, 3, 4, '.dat', + '/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n' + '/backup/2010-05-14-04-05-06.deltafs 3 7 f50881ced34c7d9e6bce100bf33dec60\n') + self._callFUT(options) + self.assertEqual(_read_file(output), b'AAABBBB') + + def test_w_incr_backup_with_verify_sum_inconsistent(self): + import tempfile + from ZODB.scripts.repozo import VerificationFail + dd = self._data_directory = tempfile.mkdtemp() + output = os.path.join(dd, 'Data.fs') + index = os.path.join(dd, 'Data.fs.index') + options = self._makeOptions(date='2010-05-15-13-30-57', + output=output, + withverify=True) + self._makeFile(2, 3, 4, '.fs', 'AAA') + self._makeFile(4, 5, 6, '.deltafs', 'BBBB') + self._makeFile(2, 3, 4, '.dat', + '/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n' + '/backup/2010-05-14-04-05-06.deltafs 3 7 f50881ced34c7d9e6bce100bf33dec61\n') + self.assertRaises(VerificationFail, self._callFUT, options) + + def test_w_incr_backup_with_verify_size_inconsistent(self): + import tempfile + from ZODB.scripts.repozo import VerificationFail + dd = self._data_directory = tempfile.mkdtemp() + output = os.path.join(dd, 'Data.fs') + index = os.path.join(dd, 'Data.fs.index') + options = self._makeOptions(date='2010-05-15-13-30-57', + output=output, + withverify=True) + self._makeFile(2, 3, 4, '.fs', 'AAA') + self._makeFile(4, 5, 6, '.deltafs', 'BBBB') + self._makeFile(2, 3, 4, '.dat', + '/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n' + '/backup/2010-05-14-04-05-06.deltafs 3 8 f50881ced34c7d9e6bce100bf33dec60\n') + self.assertRaises(VerificationFail, self._callFUT, options) + class Test_do_verify(OptionsTestBase, unittest.TestCase): From 949a4209d588a659b63cca7303b274c5f55dacfc Mon Sep 17 00:00:00 2001 From: Nicolas Wavrant Date: Thu, 13 Dec 2018 06:59:43 +0100 Subject: [PATCH 3/5] test_repozo: set a prefix to temporary directories created during tests To give users hints about their origin if cleaning isn't done correctly --- src/ZODB/scripts/tests/test_repozo.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/ZODB/scripts/tests/test_repozo.py b/src/ZODB/scripts/tests/test_repozo.py index a4bc0a7e9..28f8052ee 100644 --- a/src/ZODB/scripts/tests/test_repozo.py +++ b/src/ZODB/scripts/tests/test_repozo.py @@ -371,7 +371,7 @@ def _makeFile(self, name, text, gzip_file=False): from ZODB.scripts.repozo import _GzipCloser import tempfile if self._repository_directory is None: - self._repository_directory = tempfile.mkdtemp() + self._repository_directory = tempfile.mkdtemp(prefix='zodb-test-') fqn = os.path.join(self._repository_directory, name) if gzip_file: _opener = _GzipCloser @@ -674,7 +674,7 @@ def _callFUT(self, options): def _makeDB(self): import tempfile - datadir = self._data_directory = tempfile.mkdtemp() + datadir = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-') return OurDB(self._data_directory) def test_dont_overwrite_existing_file(self): @@ -729,7 +729,7 @@ def _callFUT(self, options, reposz, repofiles): def _makeDB(self): import tempfile - datadir = self._data_directory = tempfile.mkdtemp() + datadir = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-') return OurDB(self._data_directory) def test_dont_overwrite_existing_file(self): @@ -868,7 +868,7 @@ def test_no_files_before_explicit_date(self): def test_w_full_backup_latest_no_index(self): import tempfile - dd = self._data_directory = tempfile.mkdtemp() + dd = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-') output = os.path.join(dd, 'Data.fs') index = os.path.join(dd, 'Data.fs.index') options = self._makeOptions(date='2010-05-15-13-30-57', @@ -881,7 +881,7 @@ def test_w_full_backup_latest_no_index(self): def test_w_full_backup_latest_index(self): import tempfile - dd = self._data_directory = tempfile.mkdtemp() + dd = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-') output = os.path.join(dd, 'Data.fs') index = os.path.join(dd, 'Data.fs.index') options = self._makeOptions(date='2010-05-15-13-30-57', @@ -896,7 +896,7 @@ def test_w_full_backup_latest_index(self): def test_w_incr_backup_latest_no_index(self): import tempfile - dd = self._data_directory = tempfile.mkdtemp() + dd = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-') output = os.path.join(dd, 'Data.fs') index = os.path.join(dd, 'Data.fs.index') options = self._makeOptions(date='2010-05-15-13-30-57', @@ -909,7 +909,7 @@ def test_w_incr_backup_latest_no_index(self): def test_w_incr_backup_latest_index(self): import tempfile - dd = self._data_directory = tempfile.mkdtemp() + dd = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-') output = os.path.join(dd, 'Data.fs') index = os.path.join(dd, 'Data.fs.index') options = self._makeOptions(date='2010-05-15-13-30-57', @@ -924,7 +924,7 @@ def test_w_incr_backup_latest_index(self): def test_w_incr_backup_with_verify_all_is_fine(self): import tempfile - dd = self._data_directory = tempfile.mkdtemp() + dd = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-') output = os.path.join(dd, 'Data.fs') index = os.path.join(dd, 'Data.fs.index') options = self._makeOptions(date='2010-05-15-13-30-57', @@ -941,7 +941,7 @@ def test_w_incr_backup_with_verify_all_is_fine(self): def test_w_incr_backup_with_verify_sum_inconsistent(self): import tempfile from ZODB.scripts.repozo import VerificationFail - dd = self._data_directory = tempfile.mkdtemp() + dd = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-') output = os.path.join(dd, 'Data.fs') index = os.path.join(dd, 'Data.fs.index') options = self._makeOptions(date='2010-05-15-13-30-57', @@ -957,7 +957,7 @@ def test_w_incr_backup_with_verify_sum_inconsistent(self): def test_w_incr_backup_with_verify_size_inconsistent(self): import tempfile from ZODB.scripts.repozo import VerificationFail - dd = self._data_directory = tempfile.mkdtemp() + dd = self._data_directory = tempfile.mkdtemp(prefix='zodb-test-') output = os.path.join(dd, 'Data.fs') index = os.path.join(dd, 'Data.fs.index') options = self._makeOptions(date='2010-05-15-13-30-57', @@ -1121,7 +1121,7 @@ class MonteCarloTests(unittest.TestCase): def setUp(self): # compute directory names import tempfile - self.basedir = tempfile.mkdtemp() + self.basedir = tempfile.mkdtemp(prefix='zodb-test-') self.backupdir = os.path.join(self.basedir, 'backup') self.datadir = os.path.join(self.basedir, 'data') self.restoredir = os.path.join(self.basedir, 'restore') From b94910cb1a1e8587057723fea1f0f27ce1057db5 Mon Sep 17 00:00:00 2001 From: Nicolas Wavrant Date: Tue, 18 Dec 2018 03:23:34 +0100 Subject: [PATCH 4/5] repozo: recover ZODB in a temporary file then rename it So if recovering a ZODB fails for any reason, it doesn't leave behind a partial file which may be confused with the recovered file (as it bears same name). --- CHANGES.rst | 2 ++ src/ZODB/scripts/repozo.py | 22 ++++++++++++++++++---- src/ZODB/scripts/tests/test_repozo.py | 3 +++ 3 files changed, 23 insertions(+), 4 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 6b10de5af..4d2f513a8 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -5,6 +5,8 @@ 5.5.2 (unreleased) ================== +- Make repozo's recover mode atomic by recovering the backup in a + temporary file which is then moved to the expected output file. - Add a new option to repozo in recover mode which allows to verify backups integrity on the fly. diff --git a/src/ZODB/scripts/repozo.py b/src/ZODB/scripts/repozo.py index 5aff4b508..d68000e3a 100755 --- a/src/ZODB/scripts/repozo.py +++ b/src/ZODB/scripts/repozo.py @@ -77,7 +77,8 @@ --with-verification Verify on the fly the backup files on recovering. This option runs the same checks as when repozo is run in -V/--verify mode, and - allows to verify and recover a backup in one single step. + allows to verify and recover a backup in one single step. If a sanity + check fails, the partially recovered ZODB will be left in place. Options for -V/--verify: -Q / --quick @@ -665,8 +666,14 @@ def do_recover(options): log('Recovering file to stdout') outfp = sys.stdout else: + # Delete old ZODB before recovering backup as size of + # old ZODB + full partial file may be superior to free disk space + if os.path.exists(options.output): + log('Deleting old %s', options.output) + os.unlink(options.output) log('Recovering file to %s', options.output) - outfp = open(options.output, 'wb') + temporary_output_file = options.output + '.part' + outfp = open(temporary_output_file, 'wb') if options.withverify: datfile = os.path.splitext(repofiles[0])[0] + '.dat' with open(datfile) as fp: @@ -699,8 +706,6 @@ def do_recover(options): else: reposz, reposum = concat(repofiles, outfp) log('Recovered %s bytes, md5: %s', reposz, reposum) - if outfp != sys.stdout: - outfp.close() if options.output is not None: last_base = os.path.splitext(repofiles[-1])[0] @@ -712,6 +717,15 @@ def do_recover(options): else: log('No index file to restore: %s', source_index) + if outfp != sys.stdout: + outfp.close() + try: + os.rename(temporary_output_file, options.output) + except OSError: + log("ZODB has been fully recovered as %s, but it cannot be renamed into : %s", + temporary_output_file, options.output) + raise + def do_verify(options): # Verify the sizes and checksums of all files mentioned in the .dat file diff --git a/src/ZODB/scripts/tests/test_repozo.py b/src/ZODB/scripts/tests/test_repozo.py index 28f8052ee..785069f4a 100644 --- a/src/ZODB/scripts/tests/test_repozo.py +++ b/src/ZODB/scripts/tests/test_repozo.py @@ -936,6 +936,7 @@ def test_w_incr_backup_with_verify_all_is_fine(self): '/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n' '/backup/2010-05-14-04-05-06.deltafs 3 7 f50881ced34c7d9e6bce100bf33dec60\n') self._callFUT(options) + self.assertFalse(os.path.exists(output + '.part')) self.assertEqual(_read_file(output), b'AAABBBB') def test_w_incr_backup_with_verify_sum_inconsistent(self): @@ -953,6 +954,7 @@ def test_w_incr_backup_with_verify_sum_inconsistent(self): '/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n' '/backup/2010-05-14-04-05-06.deltafs 3 7 f50881ced34c7d9e6bce100bf33dec61\n') self.assertRaises(VerificationFail, self._callFUT, options) + self.assertTrue(os.path.exists(output + '.part')) def test_w_incr_backup_with_verify_size_inconsistent(self): import tempfile @@ -969,6 +971,7 @@ def test_w_incr_backup_with_verify_size_inconsistent(self): '/backup/2010-05-14-02-03-04.fs 0 3 e1faffb3e614e6c2fba74296962386b7\n' '/backup/2010-05-14-04-05-06.deltafs 3 8 f50881ced34c7d9e6bce100bf33dec60\n') self.assertRaises(VerificationFail, self._callFUT, options) + self.assertTrue(os.path.exists(output + '.part')) class Test_do_verify(OptionsTestBase, unittest.TestCase): From 1c6a9828db5f2268e3a8ccecfd6ad5893e21b586 Mon Sep 17 00:00:00 2001 From: Nicolas Wavrant Date: Tue, 18 Dec 2018 03:54:59 +0100 Subject: [PATCH 5/5] repozo: simplify the handling of Errors in the main function --- src/ZODB/scripts/repozo.py | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/src/ZODB/scripts/repozo.py b/src/ZODB/scripts/repozo.py index d68000e3a..d9422ad2e 100755 --- a/src/ZODB/scripts/repozo.py +++ b/src/ZODB/scripts/repozo.py @@ -108,16 +108,22 @@ VERBOSE = False -class WouldOverwriteFiles(Exception): +class RepozoError(Exception): pass -class NoFiles(Exception): +class WouldOverwriteFiles(RepozoError): pass -class VerificationFail(Exception): + +class NoFiles(RepozoError): + pass + + +class VerificationFail(RepozoError): pass + class _GzipCloser(object): def __init__(self, fqn, mode): @@ -785,24 +791,16 @@ def main(argv=None): if argv is None: argv = sys.argv[1:] options = parseargs(argv) - if options.mode == BACKUP: - try: + try: + if options.mode == BACKUP: do_backup(options) - except WouldOverwriteFiles as e: - sys.exit(str(e)) - except VerificationFail as e: - sys.exit(str(e)) - elif options.mode == RECOVER: - try: + elif options.mode == RECOVER: do_recover(options) - except NoFiles as e: - sys.exit(str(e)) - else: - assert options.mode == VERIFY - try: + else: + assert options.mode == VERIFY do_verify(options) - except NoFiles as e: - sys.exit(str(e)) + except (RepozoError, OSError) as e: + sys.exit(str(e)) if __name__ == '__main__':