Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement database composer and partial fetch for Postgres #1434

Merged
merged 15 commits into from
Mar 18, 2023
Merged
2 changes: 2 additions & 0 deletions .github/workflows/dockertests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -79,11 +79,13 @@ jobs:
'make TEST="pg_delta_backup_wal_delta_test" pg_integration_test',
'make TEST="pg_full_backup_test" pg_integration_test',
'make TEST="pg_full_backup_streamed_test" pg_integration_test',
'make TEST="pg_partial_backup_test" pg_integration_test',
'make TEST="pg_remote_backup_test" pg_integration_test',
'make TEST="pg_ssh_backup_test" pg_integration_test',
'make TEST="pg_receive_wal_test" pg_integration_test',
'make TEST="pg_full_backup_copy_composer_test" pg_integration_test',
'make TEST="pg_full_backup_rating_composer_test" pg_integration_test',
'make TEST="pg_full_backup_database_composer_test" pg_integration_test',
'make TEST="pg_delete_before_name_find_full_test" pg_integration_test',
'make TEST="pg_delete_retain_full_test" pg_integration_test',
'make TEST="pg_delete_before_time_find_full_test" pg_integration_test',
Expand Down
23 changes: 20 additions & 3 deletions cmd/pg/backup_fetch.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,11 @@ package pg
import (
"fmt"

"github.com/wal-g/wal-g/internal/databases/postgres"

"github.com/spf13/cobra"
"github.com/spf13/viper"
"github.com/wal-g/tracelog"
"github.com/wal-g/wal-g/internal"
"github.com/wal-g/wal-g/internal/databases/postgres"
"github.com/wal-g/wal-g/pkg/storages/storage"
)

Expand All @@ -21,13 +20,16 @@ For information about pattern syntax view: https://golang.org/pkg/path/filepath/
reverseDeltaUnpackDescription = "Unpack delta backups in reverse order (beta feature)"
skipRedundantTarsDescription = "Skip tars with no useful data (requires reverse delta unpack)"
targetUserDataDescription = "Fetch storage backup which has the specified user data"
restoreOnlyDescription = `[Experimental] Downloads only databases specified by passed db ids from default tablespace.
Sets reverse delta unpack & skip redundant tars options automatically`
)

var fileMask string
var restoreSpec string
var reverseDeltaUnpack bool
var skipRedundantTars bool
var fetchTargetUserData string
var onlyDatabases []int

var backupFetchCmd = &cobra.Command{
Use: "backup-fetch destination_directory [backup_name | --target-user-data <data>]",
Expand All @@ -46,9 +48,21 @@ var backupFetchCmd = &cobra.Command{
tracelog.ErrorLogger.FatalOnError(err)

var pgFetcher func(folder storage.Folder, backup internal.Backup)

if onlyDatabases != nil {
skipRedundantTars = true
reverseDeltaUnpack = true
}
reverseDeltaUnpack = reverseDeltaUnpack || viper.GetBool(internal.UseReverseUnpackSetting)
skipRedundantTars = skipRedundantTars || viper.GetBool(internal.SkipRedundantTarsSetting)
extractProv := postgres.ExtractProviderImpl{}

var extractProv postgres.ExtractProvider

if onlyDatabases != nil {
extractProv = postgres.NewExtractProviderDBSpec(onlyDatabases)
} else {
extractProv = postgres.ExtractProviderImpl{}
}

if reverseDeltaUnpack {
pgFetcher = postgres.GetPgFetcherNew(args[0], fileMask, restoreSpec, skipRedundantTars, extractProv)
Expand Down Expand Up @@ -85,5 +99,8 @@ func init() {
false, skipRedundantTarsDescription)
backupFetchCmd.Flags().StringVar(&fetchTargetUserData, "target-user-data",
"", targetUserDataDescription)
backupFetchCmd.Flags().IntSliceVar(&onlyDatabases, "restore-only",
nil, restoreOnlyDescription)

Cmd.AddCommand(backupFetchCmd)
}
11 changes: 11 additions & 0 deletions cmd/pg/backup_push.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ const (
storeAllCorruptBlocksFlag = "store-all-corrupt"
useRatingComposerFlag = "rating-composer"
useCopyComposerFlag = "copy-composer"
useDatabaseComposerFlag = "database-composer"
deltaFromUserDataFlag = "delta-from-user-data"
deltaFromNameFlag = "delta-from-name"
addUserDataFlag = "add-user-data"
Expand All @@ -31,6 +32,7 @@ const (
storeAllCorruptBlocksShorthand = "s"
useRatingComposerShorthand = "r"
useCopyComposerShorthand = "c"
useDatabaseComposerShorthand = "b"
)

var (
Expand Down Expand Up @@ -102,6 +104,7 @@ var (
verifyPageChecksums = false
storeAllCorruptBlocks = false
useRatingComposer = false
useDatabaseComposer = false
useCopyComposer = false
deltaFromName = ""
deltaFromUserData = ""
Expand All @@ -116,6 +119,12 @@ func chooseTarBallComposer() postgres.TarBallComposerType {
if useRatingComposer {
tarBallComposerType = postgres.RatingComposer
}

useDatabaseComposer = useDatabaseComposer || viper.GetBool(internal.UseDatabaseComposerSetting)
if useDatabaseComposer {
tarBallComposerType = postgres.DatabaseComposer
}

useCopyComposer = useCopyComposer || viper.GetBool(internal.UseCopyComposerSetting)
if useCopyComposer {
fullBackup = true
Expand All @@ -140,6 +149,8 @@ func init() {
false, "Use rating tar composer (beta)")
backupPushCmd.Flags().BoolVarP(&useCopyComposer, useCopyComposerFlag, useCopyComposerShorthand,
false, "Use copy tar composer (beta)")
backupPushCmd.Flags().BoolVarP(&useDatabaseComposer, useDatabaseComposerFlag, useDatabaseComposerShorthand,
false, "Use database tar composer (experimental)")
backupPushCmd.Flags().StringVar(&deltaFromName, deltaFromNameFlag,
"", "Select the backup specified by name as the target for the delta backup")
backupPushCmd.Flags().StringVar(&deltaFromUserData, deltaFromUserDataFlag,
Expand Down
24 changes: 24 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,11 @@ services:
command: >
-c 'mkdir -p /export/fullbucket
&& mkdir -p /export/fullratingcomposerbucket
&& mkdir -p /export/fulldatabasecomposerbucket
&& mkdir -p /export/fullcopycomposerbucket
&& mkdir -p /export/fullwithoutfilesmetadatabucket
&& mkdir -p /export/fullscandeltabucket
&& mkdir -p /export/partialbucket
&& mkdir -p /export/remotebucket
&& mkdir -p /export/remotewithoutfilesmetadatabucket
&& mkdir -p /export/cryptobucket
Expand Down Expand Up @@ -278,6 +280,17 @@ services:
links:
- s3

pg_partial_backup_test:
build:
dockerfile: docker/pg_tests/Dockerfile_partial_backup_test
context: .
image: wal-g/partial_backup_test
container_name: wal-g_pg_partial_backup_test
depends_on:
- s3
links:
- s3

pg_remote_backup_test:
build:
dockerfile: docker/pg_tests/Dockerfile_remote_backup_test
Expand Down Expand Up @@ -344,6 +357,17 @@ services:
links:
- s3

pg_full_backup_database_composer_test:
build:
dockerfile: docker/pg_tests/Dockerfile_full_backup_database_composer_test
context: .
image: wal-g/full_backup_database_composer_test
container_name: wal-g_pg_full_backup_database_composer_test
depends_on:
- s3
links:
- s3

pg_full_backup_without_files_metadata_test:
build:
dockerfile: docker/pg_tests/Dockerfile_full_backup_without_files_metadata_test
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
FROM wal-g/docker_prefix:latest

CMD su postgres -c "/tmp/tests/full_backup_database_composer_test.sh"
3 changes: 3 additions & 0 deletions docker/pg_tests/Dockerfile_partial_backup_test
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
FROM wal-g/docker_prefix:latest

CMD su postgres -c "/tmp/tests/partial_backup_test.sh"
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
"WALE_S3_PREFIX": "s3://fulldatabasecomposerbucket",
"WALG_DELTA_MAX_STEPS": "6",
"WALG_PGP_KEY_PATH": "/tmp/PGP_KEY",
"WALG_USE_DATABASE_COMPOSER": "true"
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
"WALE_S3_PREFIX": "s3://partialbucket",
"WALG_DELTA_MAX_STEPS": "6",
"WALG_PGP_KEY_PATH": "/tmp/PGP_KEY"
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/bin/sh
set -e -x
CONFIG_FILE="/tmp/configs/full_backup_database_composer_test_config.json"
COMMON_CONFIG="/tmp/configs/common_config.json"
TMP_CONFIG="/tmp/configs/tmp_config.json"
cat ${CONFIG_FILE} > ${TMP_CONFIG}
echo "," >> ${TMP_CONFIG}
cat ${COMMON_CONFIG} >> ${TMP_CONFIG}
/tmp/scripts/wrap_config_file.sh ${TMP_CONFIG}

. /tmp/tests/test_functions/test_full_backup.sh
test_full_backup ${TMP_CONFIG}
57 changes: 57 additions & 0 deletions docker/pg_tests/scripts/tests/partial_backup_test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#!/bin/sh
set -e -x
CONFIG_FILE="/tmp/configs/partial_backup_test_config.json"
COMMON_CONFIG="/tmp/configs/common_config.json"
TMP_CONFIG="/tmp/configs/tmp_config.json"
cat ${CONFIG_FILE} > ${TMP_CONFIG}
echo "," >> ${TMP_CONFIG}
cat ${COMMON_CONFIG} >> ${TMP_CONFIG}
/tmp/scripts/wrap_config_file.sh ${TMP_CONFIG}

wal-g --config=${TMP_CONFIG} delete everything FORCE --confirm

/usr/lib/postgresql/10/bin/initdb ${PGDATA}

echo "archive_mode = on" >> ${PGDATA}/postgresql.conf
echo "archive_command = 'wal-g --config=${TMP_CONFIG} wal-push %p && echo \"WAL pushing: %p\"'" >> ${PGDATA}/postgresql.conf

/usr/lib/postgresql/10/bin/pg_ctl -D ${PGDATA} -w start
/tmp/scripts/wait_while_pg_not_ready.sh

psql -c "CREATE DATABASE first" postgres
psql -c "CREATE DATABASE second" postgres
psql -c "CREATE TABLE tbl1 (data integer); INSERT INTO tbl1 VALUES (1), (2);" first
psql -c "CREATE TABLE tbl2 (data integer); INSERT INTO tbl2 VALUES (3), (4);" second
sleep 1

wal-g --config=${TMP_CONFIG} backup-push ${PGDATA}

psql -c "INSERT INTO tbl1 VALUES (5), (6);" first
psql -c "INSERT INTO tbl2 VALUES (7), (8);" second
FIRST_OID=$(psql -t -c "SELECT oid FROM pg_database WHERE datname = 'first';" -d postgres -A;)
T0_OID=$(psql -t -c "SELECT oid FROM pg_database WHERE datname = 'template0';" -d postgres -A;)
T1_OID=$(psql -t -c "SELECT oid FROM pg_database WHERE datname = 'template1';" -d postgres -A;)
PG_OID=$(psql -t -c "SELECT oid FROM pg_database WHERE datname = 'postgres';" -d postgres -A;)
psql -c "SELECT pg_switch_wal();" postgres
sleep 10

/tmp/scripts/drop_pg.sh
wal-g --config=${TMP_CONFIG} backup-fetch ${PGDATA} LATEST --restore-only=${T1_OID},${T0_OID},${PG_OID},${FIRST_OID}
echo "restore_command = 'echo \"WAL file restoration: %f, %p\"&& wal-g --config=${TMP_CONFIG} wal-fetch \"%f\" \"%p\"'" > ${PGDATA}/recovery.conf

/usr/lib/postgresql/10/bin/pg_ctl -D ${PGDATA} -w start
/tmp/scripts/wait_while_pg_not_ready.sh

if [ "$(psql -t -c "select data from tbl1;" -d first -A)" = "$(printf '1\n2\n5\n6')" ]; then
echo "Partial backup success!!!!!!"
else
echo "Partial backup doesn't work :("
exit 1
fi

if psql -t -c "select data from tbl2;" -d second -A 2>&1 | grep -q "is not a valid data directory"; then
echo "Skipped database raises error, as it should be!"
else
echo "Skipped database responses unexpectedly"
exit 1
fi
28 changes: 28 additions & 0 deletions docs/PostgreSQL.md
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,20 @@ Since this feature involves both backup creation and restore process, in order t
wal-g backup-fetch /path LATEST --reverse-unpack --skip-redundant-tars
```

#### Partial backup (experimental)

During partial backup wal-g restores only specified databases' files in default tablespace directory (`/base`).

```bash
wal-g backup-fetch /path LATEST --restore-only=1,4,5,16384
```

If `--restore-only` specified, `--skip-redundant-tars` and `--reverse-unpack` are set automatically.

PostgreSQL works fine with restored databases if `template0`, `template1` and `postgres` are restored. Because of others' remains are still in system tables, it is recommended to drop all unrestored databases.

Currently, only database oids can be specified.

### ``backup-push``

When uploading backups to storage, the user should pass the Postgres data directory as an argument.
Expand Down Expand Up @@ -225,6 +239,20 @@ To activate this feature, do one of the following:
wal-g backup-push /path --copy-composer
```

#### Database composer mode

In the database composer mode, WAL-G separated files from different directories inside default tablespace and packs them in different tars. Designed to increase partial backup performance.

To activate this feature, do one of the following:

* set the `WALG_USE_DATABASE_COMPOSER` environment variable
* add the --database-composer flag

```bash
wal-g backup-push /path --database-composer
```


#### Backup without metadata

By default, WAL-G tracks metadata of the files backed up. If millions of files are backed up (typically in case of hundreds of databases and thousands of tables in each database), tracking this metadata alone would require GBs of memory.
Expand Down
3 changes: 3 additions & 0 deletions internal/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ const (
StoreAllCorruptBlocksSetting = "WALG_STORE_ALL_CORRUPT_BLOCKS"
UseRatingComposerSetting = "WALG_USE_RATING_COMPOSER"
UseCopyComposerSetting = "WALG_USE_COPY_COMPOSER"
UseDatabaseComposerSetting = "WALG_USE_DATABASE_COMPOSER"
WithoutFilesMetadataSetting = "WALG_WITHOUT_FILES_METADATA"
DeltaFromNameSetting = "WALG_DELTA_FROM_NAME"
DeltaFromUserDataSetting = "WALG_DELTA_FROM_USER_DATA"
Expand Down Expand Up @@ -196,6 +197,7 @@ var (
StoreAllCorruptBlocksSetting: "false",
UseRatingComposerSetting: "false",
UseCopyComposerSetting: "false",
UseDatabaseComposerSetting: "false",
WithoutFilesMetadataSetting: "false",
MaxDelayedSegmentsCount: "0",
SerializerTypeSetting: "json_default",
Expand Down Expand Up @@ -275,6 +277,7 @@ var (
StoreAllCorruptBlocksSetting: true,
UseRatingComposerSetting: true,
UseCopyComposerSetting: true,
UseDatabaseComposerSetting: true,
WithoutFilesMetadataSetting: true,
MaxDelayedSegmentsCount: true,
DeltaFromNameSetting: true,
Expand Down
30 changes: 15 additions & 15 deletions internal/databases/greenplum/backup_fetch_handler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,38 +9,38 @@ import (

func TestPrepareContentIDsToFetch(t *testing.T) {
testcases := []struct {
fetchContentId []int
segmentConfig []cluster.SegConfig
fetchContentId []int
segmentConfig []cluster.SegConfig
contentIDsToFetch map[int]bool
} {
}{
{
fetchContentId: []int{},
segmentConfig: []cluster.SegConfig{},
fetchContentId: []int{},
segmentConfig: []cluster.SegConfig{},
contentIDsToFetch: map[int]bool{},
},
{
fetchContentId: []int{},
segmentConfig: []cluster.SegConfig{{ContentID: 21}, {ContentID: 42}},
fetchContentId: []int{},
segmentConfig: []cluster.SegConfig{{ContentID: 21}, {ContentID: 42}},
contentIDsToFetch: map[int]bool{21: true, 42: true},
},
{
fetchContentId: []int{1},
segmentConfig: []cluster.SegConfig{{ContentID: 1231}, {ContentID: 6743}, {ContentID: 7643}},
fetchContentId: []int{1},
segmentConfig: []cluster.SegConfig{{ContentID: 1231}, {ContentID: 6743}, {ContentID: 7643}},
contentIDsToFetch: map[int]bool{1: true},
},
{
fetchContentId: []int{65, 42, 12, 76, 22},
segmentConfig: []cluster.SegConfig{},
fetchContentId: []int{65, 42, 12, 76, 22},
segmentConfig: []cluster.SegConfig{},
contentIDsToFetch: map[int]bool{65: true, 42: true, 12: true, 76: true, 22: true},
},
{
fetchContentId: []int{5, 4, 3, 2, 1},
segmentConfig: []cluster.SegConfig{{ContentID: 4}, {ContentID: 5}, {ContentID: 6}},
fetchContentId: []int{5, 4, 3, 2, 1},
segmentConfig: []cluster.SegConfig{{ContentID: 4}, {ContentID: 5}, {ContentID: 6}},
contentIDsToFetch: map[int]bool{1: true, 2: true, 3: true, 4: true, 5: true},
},
{
fetchContentId: []int{6, 7, 8, 9, 10},
segmentConfig: []cluster.SegConfig{{ContentID: 1}, {ContentID: 5}, {ContentID: 7}},
fetchContentId: []int{6, 7, 8, 9, 10},
segmentConfig: []cluster.SegConfig{{ContentID: 1}, {ContentID: 5}, {ContentID: 7}},
contentIDsToFetch: map[int]bool{6: true, 7: true, 8: true, 9: true, 10: true},
},
}
Expand Down