-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Adding pg_dump admin command and documentation (#430)
- Loading branch information
Showing
4 changed files
with
133 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
.. _admin_backup: | ||
|
||
============ | ||
Data Backups | ||
============ | ||
|
||
This section describes how to create data backups in VarFish. | ||
The assumption is that you are running VarFish in the recommended way via Docker Compose. | ||
|
||
All valuable state is kept in the VarFish PostgreSQL database. | ||
VarFish provides a convenient way to call the PostgreSQL tool ``pg_dump``. | ||
|
||
You can call it in the following way when VarFish is running under Docker Compose and the postgres container is running as well. | ||
|
||
:: | ||
|
||
# docker exec -it varfish-docker-compose_varfish-web_1 \ | ||
python /usr/src/app/manage.py pg_dump --mode=MODE | ||
|
||
This will execute ``python /usr/src/app/manage.py pg_dump --mode=MODE`` in the docker container that is running the VarFish web server. | ||
|
||
You can use one of the following dump modes. | ||
|
||
``full`` | ||
This will perform a full data dump including all background data. | ||
|
||
``backup-large`` | ||
This will exclude the huge background data tables, e.g., dbSNP and gnomAD. | ||
|
||
``backup-small`` | ||
This will also exclude all imported variant data. | ||
The assumption is that you have a separate backup of the imported TSV files or can easily regenerate them from the VCF files that you still have. | ||
|
||
Here is an example on how to create a compressed "small" dump file named ``varfish-${day_of_week}.sql.gz`` such that you get a rotating daily dump. | ||
|
||
:: | ||
|
||
# docker exec -it varfish-docker-compose_varfish-web_1 \ | ||
python /usr/src/app/manage.py pg_dump --mode=MODE \ | ||
| gzip -c \ | ||
> varfish-$(date +%a).sql.gz | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,89 @@ | ||
"""Django command that is a convenience wrapper around ``pg_dump``""" | ||
|
||
from itertools import chain | ||
import os | ||
import subprocess | ||
import sys | ||
|
||
from django.core.management.base import BaseCommand, CommandError | ||
from django.conf import settings | ||
|
||
#: The available dump modes. | ||
DUMP_MODES = ("full", "backup-large", "backup-small") | ||
#: The tables to be ignored in backup-large mode. | ||
IGNORE_LARGE = ( | ||
"clinvar_clinvar", | ||
"conservation_knowngeneaa", | ||
"dbsnp_dbsnp", | ||
"extra_annos_extraanno", | ||
"extra_annos_extraannofield", | ||
"frequencies_exac", | ||
"frequencies_gnomadexomes", | ||
"frequencies_gnomadgenomes", | ||
"frequencies_helixmtdb", | ||
"frequencies_mitomap", | ||
"frequencies_mtdb", | ||
"frequencies_thousandgenomes", | ||
) | ||
#: The tables to be ignored in backup-thin mode. | ||
IGNORE_THIN = ( | ||
"variants_smallvariant_[0-9]*", | ||
"svs_structuralvariant[0-9]+", | ||
"svs_structuralvariantgeneannotation[0-9]+", | ||
) | ||
|
||
|
||
class Command(BaseCommand): | ||
"""Implementation wrapping ``pg_dump`` to support creating dumps of the underlying PostgreSQL database. | ||
""" | ||
|
||
#: Help message displayed on the command line. | ||
help = "Easily create database dumps with ``pg_dump``" | ||
|
||
def add_arguments(self, parser): | ||
"""Add the command's argument to the ``parser``.""" | ||
parser.add_argument("--mode", help="Backup mode, one of %s" % (DUMP_MODES,), required=True) | ||
parser.add_argument( | ||
"--output-file", help="Optional path to write output to, default is stdout" | ||
) | ||
parser.add_argument( | ||
"--force-overwrite", default=False, action="store_true", help="Overwrite output file" | ||
) | ||
|
||
def handle(self, *args, **options): | ||
"""The actual implementation is in ``_handle()``, splitting to get commit times.""" | ||
if options["output_file"]: | ||
if os.path.exists(options["output_file"]) and not options["force_overwrite"]: | ||
self.stderr.write( | ||
self.style.ERROR( | ||
"Refusing to overwrite %s; use --force to force overwriting" | ||
% options["output_file"] | ||
) | ||
) | ||
raise CommandError("Refusing to overwrite %s" % options["output_file"]) | ||
with open(options["output_file"], "wb") as outputf: | ||
self._run(options, outputf) | ||
else: | ||
self._run(options, sys.stdout) | ||
|
||
def _run(self, options, outputf): | ||
database = settings.DATABASES["default"] | ||
env = dict(os.environ) | ||
env["PGPASSWORD"] = database["PASSWORD"] | ||
|
||
cmd = [ | ||
"/usr/bin/pg_dump", | ||
"--dbname=%s" % database["NAME"], | ||
"--host=%s" % database["HOST"], | ||
"--username=%s" % database["USER"], | ||
] | ||
|
||
if options["mode"] == "backup-large": | ||
cmd += ["--exclude-table-data=%s" % pat for pat in IGNORE_LARGE] | ||
elif options["mode"] == "backup-small": | ||
cmd += ["--exclude-table-data=%s" % pat for pat in chain(IGNORE_LARGE, IGNORE_THIN)] | ||
|
||
self.stderr.write(self.style.NOTICE("Running the following command: %s" % cmd)) | ||
subprocess.check_call(cmd, stdout=outputf, env=env) | ||
outputf.flush() | ||
self.stderr.write(self.style.NOTICE("All done. Have a nice day!")) |