Skip to content

Commit

Permalink
Implement database composer and partial fetch for Postgres (#1434)
Browse files Browse the repository at this point in the history
* Add Database Composer

* Add skipDirectory option

* Add only databases option

* Style fix

* Update flags

* Update flags

* Files filtering to new ExtractProvider

* Full database composer docker test

* Style issues

* Add partial backup docker test

* Add Docker test to CI & Update config.go

* Add tests to CI

* Docker test: Add check for skipped database

* Add documentation
  • Loading branch information
Catmoonlight committed Mar 18, 2023
1 parent 0b5f127 commit dfb04c8
Show file tree
Hide file tree
Showing 18 changed files with 417 additions and 22 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/dockertests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -79,11 +79,13 @@ jobs:
'make TEST="pg_delta_backup_wal_delta_test" pg_integration_test',
'make TEST="pg_full_backup_test" pg_integration_test',
'make TEST="pg_full_backup_streamed_test" pg_integration_test',
'make TEST="pg_partial_backup_test" pg_integration_test',
'make TEST="pg_remote_backup_test" pg_integration_test',
'make TEST="pg_ssh_backup_test" pg_integration_test',
'make TEST="pg_receive_wal_test" pg_integration_test',
'make TEST="pg_full_backup_copy_composer_test" pg_integration_test',
'make TEST="pg_full_backup_rating_composer_test" pg_integration_test',
'make TEST="pg_full_backup_database_composer_test" pg_integration_test',
'make TEST="pg_delete_before_name_find_full_test" pg_integration_test',
'make TEST="pg_delete_retain_full_test" pg_integration_test',
'make TEST="pg_delete_before_time_find_full_test" pg_integration_test',
Expand Down
23 changes: 20 additions & 3 deletions cmd/pg/backup_fetch.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,11 @@ package pg
import (
"fmt"

"github.com/wal-g/wal-g/internal/databases/postgres"

"github.com/spf13/cobra"
"github.com/spf13/viper"
"github.com/wal-g/tracelog"
"github.com/wal-g/wal-g/internal"
"github.com/wal-g/wal-g/internal/databases/postgres"
"github.com/wal-g/wal-g/pkg/storages/storage"
)

Expand All @@ -21,13 +20,16 @@ For information about pattern syntax view: https://golang.org/pkg/path/filepath/
reverseDeltaUnpackDescription = "Unpack delta backups in reverse order (beta feature)"
skipRedundantTarsDescription = "Skip tars with no useful data (requires reverse delta unpack)"
targetUserDataDescription = "Fetch storage backup which has the specified user data"
restoreOnlyDescription = `[Experimental] Downloads only databases specified by passed db ids from default tablespace.
Sets reverse delta unpack & skip redundant tars options automatically`
)

var fileMask string
var restoreSpec string
var reverseDeltaUnpack bool
var skipRedundantTars bool
var fetchTargetUserData string
var onlyDatabases []int

var backupFetchCmd = &cobra.Command{
Use: "backup-fetch destination_directory [backup_name | --target-user-data <data>]",
Expand All @@ -46,9 +48,21 @@ var backupFetchCmd = &cobra.Command{
tracelog.ErrorLogger.FatalOnError(err)

var pgFetcher func(folder storage.Folder, backup internal.Backup)

if onlyDatabases != nil {
skipRedundantTars = true
reverseDeltaUnpack = true
}
reverseDeltaUnpack = reverseDeltaUnpack || viper.GetBool(internal.UseReverseUnpackSetting)
skipRedundantTars = skipRedundantTars || viper.GetBool(internal.SkipRedundantTarsSetting)
extractProv := postgres.ExtractProviderImpl{}

var extractProv postgres.ExtractProvider

if onlyDatabases != nil {
extractProv = postgres.NewExtractProviderDBSpec(onlyDatabases)
} else {
extractProv = postgres.ExtractProviderImpl{}
}

if reverseDeltaUnpack {
pgFetcher = postgres.GetPgFetcherNew(args[0], fileMask, restoreSpec, skipRedundantTars, extractProv)
Expand Down Expand Up @@ -85,5 +99,8 @@ func init() {
false, skipRedundantTarsDescription)
backupFetchCmd.Flags().StringVar(&fetchTargetUserData, "target-user-data",
"", targetUserDataDescription)
backupFetchCmd.Flags().IntSliceVar(&onlyDatabases, "restore-only",
nil, restoreOnlyDescription)

Cmd.AddCommand(backupFetchCmd)
}
11 changes: 11 additions & 0 deletions cmd/pg/backup_push.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ const (
storeAllCorruptBlocksFlag = "store-all-corrupt"
useRatingComposerFlag = "rating-composer"
useCopyComposerFlag = "copy-composer"
useDatabaseComposerFlag = "database-composer"
deltaFromUserDataFlag = "delta-from-user-data"
deltaFromNameFlag = "delta-from-name"
addUserDataFlag = "add-user-data"
Expand All @@ -31,6 +32,7 @@ const (
storeAllCorruptBlocksShorthand = "s"
useRatingComposerShorthand = "r"
useCopyComposerShorthand = "c"
useDatabaseComposerShorthand = "b"
)

var (
Expand Down Expand Up @@ -102,6 +104,7 @@ var (
verifyPageChecksums = false
storeAllCorruptBlocks = false
useRatingComposer = false
useDatabaseComposer = false
useCopyComposer = false
deltaFromName = ""
deltaFromUserData = ""
Expand All @@ -116,6 +119,12 @@ func chooseTarBallComposer() postgres.TarBallComposerType {
if useRatingComposer {
tarBallComposerType = postgres.RatingComposer
}

useDatabaseComposer = useDatabaseComposer || viper.GetBool(internal.UseDatabaseComposerSetting)
if useDatabaseComposer {
tarBallComposerType = postgres.DatabaseComposer
}

useCopyComposer = useCopyComposer || viper.GetBool(internal.UseCopyComposerSetting)
if useCopyComposer {
fullBackup = true
Expand All @@ -140,6 +149,8 @@ func init() {
false, "Use rating tar composer (beta)")
backupPushCmd.Flags().BoolVarP(&useCopyComposer, useCopyComposerFlag, useCopyComposerShorthand,
false, "Use copy tar composer (beta)")
backupPushCmd.Flags().BoolVarP(&useDatabaseComposer, useDatabaseComposerFlag, useDatabaseComposerShorthand,
false, "Use database tar composer (experimental)")
backupPushCmd.Flags().StringVar(&deltaFromName, deltaFromNameFlag,
"", "Select the backup specified by name as the target for the delta backup")
backupPushCmd.Flags().StringVar(&deltaFromUserData, deltaFromUserDataFlag,
Expand Down
24 changes: 24 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,11 @@ services:
command: >
-c 'mkdir -p /export/fullbucket
&& mkdir -p /export/fullratingcomposerbucket
&& mkdir -p /export/fulldatabasecomposerbucket
&& mkdir -p /export/fullcopycomposerbucket
&& mkdir -p /export/fullwithoutfilesmetadatabucket
&& mkdir -p /export/fullscandeltabucket
&& mkdir -p /export/partialbucket
&& mkdir -p /export/remotebucket
&& mkdir -p /export/remotewithoutfilesmetadatabucket
&& mkdir -p /export/cryptobucket
Expand Down Expand Up @@ -278,6 +280,17 @@ services:
links:
- s3

pg_partial_backup_test:
build:
dockerfile: docker/pg_tests/Dockerfile_partial_backup_test
context: .
image: wal-g/partial_backup_test
container_name: wal-g_pg_partial_backup_test
depends_on:
- s3
links:
- s3

pg_remote_backup_test:
build:
dockerfile: docker/pg_tests/Dockerfile_remote_backup_test
Expand Down Expand Up @@ -344,6 +357,17 @@ services:
links:
- s3

pg_full_backup_database_composer_test:
build:
dockerfile: docker/pg_tests/Dockerfile_full_backup_database_composer_test
context: .
image: wal-g/full_backup_database_composer_test
container_name: wal-g_pg_full_backup_database_composer_test
depends_on:
- s3
links:
- s3

pg_full_backup_without_files_metadata_test:
build:
dockerfile: docker/pg_tests/Dockerfile_full_backup_without_files_metadata_test
Expand Down
3 changes: 3 additions & 0 deletions docker/pg_tests/Dockerfile_full_backup_database_composer_test
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
FROM wal-g/docker_prefix:latest

CMD su postgres -c "/tmp/tests/full_backup_database_composer_test.sh"
3 changes: 3 additions & 0 deletions docker/pg_tests/Dockerfile_partial_backup_test
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
FROM wal-g/docker_prefix:latest

CMD su postgres -c "/tmp/tests/partial_backup_test.sh"
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
"WALE_S3_PREFIX": "s3://fulldatabasecomposerbucket",
"WALG_DELTA_MAX_STEPS": "6",
"WALG_PGP_KEY_PATH": "/tmp/PGP_KEY",
"WALG_USE_DATABASE_COMPOSER": "true"
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
"WALE_S3_PREFIX": "s3://partialbucket",
"WALG_DELTA_MAX_STEPS": "6",
"WALG_PGP_KEY_PATH": "/tmp/PGP_KEY"
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/bin/sh
set -e -x
CONFIG_FILE="/tmp/configs/full_backup_database_composer_test_config.json"
COMMON_CONFIG="/tmp/configs/common_config.json"
TMP_CONFIG="/tmp/configs/tmp_config.json"
cat ${CONFIG_FILE} > ${TMP_CONFIG}
echo "," >> ${TMP_CONFIG}
cat ${COMMON_CONFIG} >> ${TMP_CONFIG}
/tmp/scripts/wrap_config_file.sh ${TMP_CONFIG}

. /tmp/tests/test_functions/test_full_backup.sh
test_full_backup ${TMP_CONFIG}
57 changes: 57 additions & 0 deletions docker/pg_tests/scripts/tests/partial_backup_test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#!/bin/sh
set -e -x
CONFIG_FILE="/tmp/configs/partial_backup_test_config.json"
COMMON_CONFIG="/tmp/configs/common_config.json"
TMP_CONFIG="/tmp/configs/tmp_config.json"
cat ${CONFIG_FILE} > ${TMP_CONFIG}
echo "," >> ${TMP_CONFIG}
cat ${COMMON_CONFIG} >> ${TMP_CONFIG}
/tmp/scripts/wrap_config_file.sh ${TMP_CONFIG}

wal-g --config=${TMP_CONFIG} delete everything FORCE --confirm

/usr/lib/postgresql/10/bin/initdb ${PGDATA}

echo "archive_mode = on" >> ${PGDATA}/postgresql.conf
echo "archive_command = 'wal-g --config=${TMP_CONFIG} wal-push %p && echo \"WAL pushing: %p\"'" >> ${PGDATA}/postgresql.conf

/usr/lib/postgresql/10/bin/pg_ctl -D ${PGDATA} -w start
/tmp/scripts/wait_while_pg_not_ready.sh

psql -c "CREATE DATABASE first" postgres
psql -c "CREATE DATABASE second" postgres
psql -c "CREATE TABLE tbl1 (data integer); INSERT INTO tbl1 VALUES (1), (2);" first
psql -c "CREATE TABLE tbl2 (data integer); INSERT INTO tbl2 VALUES (3), (4);" second
sleep 1

wal-g --config=${TMP_CONFIG} backup-push ${PGDATA}

psql -c "INSERT INTO tbl1 VALUES (5), (6);" first
psql -c "INSERT INTO tbl2 VALUES (7), (8);" second
FIRST_OID=$(psql -t -c "SELECT oid FROM pg_database WHERE datname = 'first';" -d postgres -A;)
T0_OID=$(psql -t -c "SELECT oid FROM pg_database WHERE datname = 'template0';" -d postgres -A;)
T1_OID=$(psql -t -c "SELECT oid FROM pg_database WHERE datname = 'template1';" -d postgres -A;)
PG_OID=$(psql -t -c "SELECT oid FROM pg_database WHERE datname = 'postgres';" -d postgres -A;)
psql -c "SELECT pg_switch_wal();" postgres
sleep 10

/tmp/scripts/drop_pg.sh
wal-g --config=${TMP_CONFIG} backup-fetch ${PGDATA} LATEST --restore-only=${T1_OID},${T0_OID},${PG_OID},${FIRST_OID}
echo "restore_command = 'echo \"WAL file restoration: %f, %p\"&& wal-g --config=${TMP_CONFIG} wal-fetch \"%f\" \"%p\"'" > ${PGDATA}/recovery.conf

/usr/lib/postgresql/10/bin/pg_ctl -D ${PGDATA} -w start
/tmp/scripts/wait_while_pg_not_ready.sh

if [ "$(psql -t -c "select data from tbl1;" -d first -A)" = "$(printf '1\n2\n5\n6')" ]; then
echo "Partial backup success!!!!!!"
else
echo "Partial backup doesn't work :("
exit 1
fi

if psql -t -c "select data from tbl2;" -d second -A 2>&1 | grep -q "is not a valid data directory"; then
echo "Skipped database raises error, as it should be!"
else
echo "Skipped database responses unexpectedly"
exit 1
fi
28 changes: 28 additions & 0 deletions docs/PostgreSQL.md
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,20 @@ Since this feature involves both backup creation and restore process, in order t
wal-g backup-fetch /path LATEST --reverse-unpack --skip-redundant-tars
```

#### Partial backup (experimental)

During partial backup wal-g restores only specified databases' files in default tablespace directory (`/base`).

```bash
wal-g backup-fetch /path LATEST --restore-only=1,4,5,16384
```

If `--restore-only` specified, `--skip-redundant-tars` and `--reverse-unpack` are set automatically.

PostgreSQL works fine with restored databases if `template0`, `template1` and `postgres` are restored. Because of others' remains are still in system tables, it is recommended to drop all unrestored databases.

Currently, only database oids can be specified.

### ``backup-push``

When uploading backups to storage, the user should pass the Postgres data directory as an argument.
Expand Down Expand Up @@ -225,6 +239,20 @@ To activate this feature, do one of the following:
wal-g backup-push /path --copy-composer
```

#### Database composer mode

In the database composer mode, WAL-G separated files from different directories inside default tablespace and packs them in different tars. Designed to increase partial backup performance.

To activate this feature, do one of the following:

* set the `WALG_USE_DATABASE_COMPOSER` environment variable
* add the --database-composer flag

```bash
wal-g backup-push /path --database-composer
```


#### Backup without metadata

By default, WAL-G tracks metadata of the files backed up. If millions of files are backed up (typically in case of hundreds of databases and thousands of tables in each database), tracking this metadata alone would require GBs of memory.
Expand Down
3 changes: 3 additions & 0 deletions internal/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ const (
StoreAllCorruptBlocksSetting = "WALG_STORE_ALL_CORRUPT_BLOCKS"
UseRatingComposerSetting = "WALG_USE_RATING_COMPOSER"
UseCopyComposerSetting = "WALG_USE_COPY_COMPOSER"
UseDatabaseComposerSetting = "WALG_USE_DATABASE_COMPOSER"
WithoutFilesMetadataSetting = "WALG_WITHOUT_FILES_METADATA"
DeltaFromNameSetting = "WALG_DELTA_FROM_NAME"
DeltaFromUserDataSetting = "WALG_DELTA_FROM_USER_DATA"
Expand Down Expand Up @@ -203,6 +204,7 @@ var (
StoreAllCorruptBlocksSetting: "false",
UseRatingComposerSetting: "false",
UseCopyComposerSetting: "false",
UseDatabaseComposerSetting: "false",
WithoutFilesMetadataSetting: "false",
MaxDelayedSegmentsCount: "0",
SerializerTypeSetting: "json_default",
Expand Down Expand Up @@ -283,6 +285,7 @@ var (
StoreAllCorruptBlocksSetting: true,
UseRatingComposerSetting: true,
UseCopyComposerSetting: true,
UseDatabaseComposerSetting: true,
WithoutFilesMetadataSetting: true,
MaxDelayedSegmentsCount: true,
DeltaFromNameSetting: true,
Expand Down
30 changes: 15 additions & 15 deletions internal/databases/greenplum/backup_fetch_handler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,38 +9,38 @@ import (

func TestPrepareContentIDsToFetch(t *testing.T) {
testcases := []struct {
fetchContentId []int
segmentConfig []cluster.SegConfig
fetchContentId []int
segmentConfig []cluster.SegConfig
contentIDsToFetch map[int]bool
} {
}{
{
fetchContentId: []int{},
segmentConfig: []cluster.SegConfig{},
fetchContentId: []int{},
segmentConfig: []cluster.SegConfig{},
contentIDsToFetch: map[int]bool{},
},
{
fetchContentId: []int{},
segmentConfig: []cluster.SegConfig{{ContentID: 21}, {ContentID: 42}},
fetchContentId: []int{},
segmentConfig: []cluster.SegConfig{{ContentID: 21}, {ContentID: 42}},
contentIDsToFetch: map[int]bool{21: true, 42: true},
},
{
fetchContentId: []int{1},
segmentConfig: []cluster.SegConfig{{ContentID: 1231}, {ContentID: 6743}, {ContentID: 7643}},
fetchContentId: []int{1},
segmentConfig: []cluster.SegConfig{{ContentID: 1231}, {ContentID: 6743}, {ContentID: 7643}},
contentIDsToFetch: map[int]bool{1: true},
},
{
fetchContentId: []int{65, 42, 12, 76, 22},
segmentConfig: []cluster.SegConfig{},
fetchContentId: []int{65, 42, 12, 76, 22},
segmentConfig: []cluster.SegConfig{},
contentIDsToFetch: map[int]bool{65: true, 42: true, 12: true, 76: true, 22: true},
},
{
fetchContentId: []int{5, 4, 3, 2, 1},
segmentConfig: []cluster.SegConfig{{ContentID: 4}, {ContentID: 5}, {ContentID: 6}},
fetchContentId: []int{5, 4, 3, 2, 1},
segmentConfig: []cluster.SegConfig{{ContentID: 4}, {ContentID: 5}, {ContentID: 6}},
contentIDsToFetch: map[int]bool{1: true, 2: true, 3: true, 4: true, 5: true},
},
{
fetchContentId: []int{6, 7, 8, 9, 10},
segmentConfig: []cluster.SegConfig{{ContentID: 1}, {ContentID: 5}, {ContentID: 7}},
fetchContentId: []int{6, 7, 8, 9, 10},
segmentConfig: []cluster.SegConfig{{ContentID: 1}, {ContentID: 5}, {ContentID: 7}},
contentIDsToFetch: map[int]bool{6: true, 7: true, 8: true, 9: true, 10: true},
},
}
Expand Down

0 comments on commit dfb04c8

Please sign in to comment.