Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PostgreSQL - Reduce memory usage by not tracking metadata of the files backed up #1101

Merged
merged 3 commits into from
Nov 25, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
33 changes: 27 additions & 6 deletions cmd/pg/backup_push.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ const (
deltaFromUserDataFlag = "delta-from-user-data"
deltaFromNameFlag = "delta-from-name"
addUserDataFlag = "add-user-data"
withoutFilesMetadataFlag = "without-files-metadata"

permanentShorthand = "p"
fullBackupShorthand = "f"
Expand Down Expand Up @@ -56,7 +57,8 @@ var (
if useRatingComposer {
tarBallComposerType = postgres.RatingComposer
}
if useCopyComposer || viper.GetBool(internal.UseCopyComposerSetting) {
useCopyComposer = useCopyComposer || viper.GetBool(internal.UseCopyComposerSetting)
if useCopyComposer {
fullBackup = true
tarBallComposerType = postgres.CopyComposer
}
Expand All @@ -66,19 +68,36 @@ var (
if deltaFromUserData == "" {
deltaFromUserData = viper.GetString(internal.DeltaFromUserDataSetting)
}
deltaBaseSelector, err := createDeltaBaseSelector(cmd, deltaFromName, deltaFromUserData)
tracelog.ErrorLogger.FatalOnError(err)

if userDataRaw == "" {
userDataRaw = viper.GetString(internal.SentinelUserDataSetting)
}
withoutFilesMetadata = withoutFilesMetadata || viper.GetBool(internal.WithoutFilesMetadataSetting)
if withoutFilesMetadata {
// files metadata tracking is required for delta backups and copy/rating composers
if useRatingComposer || useCopyComposer {
tracelog.ErrorLogger.Fatalf(
"%s option cannot be used with %s, %s options",
withoutFilesMetadataFlag, useRatingComposerFlag, useCopyComposerFlag)
}
if deltaFromName != "" || deltaFromUserData != "" || userDataRaw != "" {
tracelog.ErrorLogger.Fatalf(
"%s option cannot be used with %s, %s, %s options",
withoutFilesMetadataFlag, deltaFromNameFlag, deltaFromUserDataFlag, addUserDataFlag)
}
tracelog.InfoLogger.Print("Files metadata tracking is disabled")
fullBackup = true
}

deltaBaseSelector, err := createDeltaBaseSelector(cmd, deltaFromName, deltaFromUserData)
tracelog.ErrorLogger.FatalOnError(err)

userData, err := internal.UnmarshalSentinelUserData(userDataRaw)
tracelog.ErrorLogger.FatalfOnError("Failed to unmarshal the provided UserData: %s", err)

arguments := postgres.NewBackupArguments(dataDirectory, utility.BaseBackupPath,
permanent, verifyPageChecksums || viper.GetBool(internal.VerifyPageChecksumsSetting),
fullBackup, storeAllCorruptBlocks || viper.GetBool(internal.StoreAllCorruptBlocksSetting),
tarBallComposerType, deltaBaseSelector, userData)
tarBallComposerType, deltaBaseSelector, userData, withoutFilesMetadata)

backupHandler, err := postgres.NewBackupHandler(arguments)
tracelog.ErrorLogger.FatalOnError(err)
Expand All @@ -94,6 +113,7 @@ var (
deltaFromName = ""
deltaFromUserData = ""
userDataRaw = ""
withoutFilesMetadata = false
)

// create the BackupSelector for delta backup base according to the provided flags
Expand All @@ -115,7 +135,6 @@ func createDeltaBaseSelector(cmd *cobra.Command,
return internal.NewUserDataBackupSelector(targetUserData, postgres.NewGenericMetaFetcher())

default:
tracelog.InfoLogger.Println("Selecting the latest backup as the base for the current delta backup...")
return internal.NewLatestBackupSelector(), nil
}
}
Expand All @@ -141,4 +160,6 @@ func init() {
"", "Select the backup specified by UserData as the target for the delta backup")
backupPushCmd.Flags().StringVar(&userDataRaw, addUserDataFlag,
"", "Write the provided user data to the backup sentinel and metadata files.")
backupPushCmd.Flags().BoolVar(&withoutFilesMetadata, withoutFilesMetadataFlag,
false, "Do not track files metadata, significantly reducing memory usage")
}
26 changes: 25 additions & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,10 @@ services:
-c 'mkdir -p /export/fullbucket
&& mkdir -p /export/fullratingcomposerbucket
&& mkdir -p /export/fullcopycomposerbucket
&& mkdir -p /export/fullwithoutfilesmetadatabucket
&& mkdir -p /export/fullscandeltabucket
&& mkdir -p /export/remotebucket
&& mkdir -p /export/remotewithoutfilesmetadatabucket
&& mkdir -p /export/cryptobucket
&& mkdir -p /export/waldeltabucket
&& mkdir -p /export/ghostbucket
Expand Down Expand Up @@ -276,6 +278,17 @@ services:
links:
- s3

pg_remote_backup_without_files_metadata_test:
build:
dockerfile: docker/pg_tests/Dockerfile_remote_backup_without_files_metadata_test
context: .
image: wal-g/remote_backup_without_files_metadata_test
container_name: wal-g_pg_remote_backup_without_files_metadata_test
depends_on:
- s3
links:
- s3

pg_ssh_backup_test:
build:
dockerfile: docker/pg_tests/Dockerfile_ssh_backup_test
Expand All @@ -299,7 +312,7 @@ services:
- s3

pg_full_backup_copy_composer_test:
build:
build:
dockerfile: docker/pg_tests/Dockerfile_full_backup_copy_composer_test
context: .
image: wal-g/full_backup_copy_composer_test
Expand All @@ -320,6 +333,17 @@ services:
links:
- s3

pg_full_backup_without_files_metadata_test:
build:
dockerfile: docker/pg_tests/Dockerfile_full_backup_without_files_metadata_test
context: .
image: wal-g/full_backup_without_files_metadata_test
container_name: wal-g_pg_full_backup_without_files_metadata_test
depends_on:
- s3
links:
- s3

pg_delete_before_time_find_full_test:
build:
dockerfile: docker/pg_tests/Dockerfile_delete_before_time_find_full_test
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
FROM wal-g/docker_prefix:latest

CMD su postgres -c "/tmp/tests/full_backup_without_files_metadata_test.sh"
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
FROM wal-g/docker_prefix:latest

CMD su postgres -c "/tmp/tests/remote_backup_without_files_metadata_test.sh"
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
"WALE_S3_PREFIX": "s3://fullwithoutfilesmetadatabucket",
"WALG_PGP_KEY_PATH": "/tmp/PGP_KEY",
"WALG_WITHOUT_FILES_METADATA": "true"
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
"WALE_S3_PREFIX": "s3://remotewithoutfilesmetadatabucket",
"WALG_PGP_KEY_PATH": "/tmp/PGP_KEY"
"WALG_WITHOUT_FILES_METADATA": "true"
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/bin/sh
set -e -x
CONFIG_FILE="/tmp/configs/full_backup_without_files_metadata_test_config.json"
COMMON_CONFIG="/tmp/configs/common_config.json"
TMP_CONFIG="/tmp/configs/tmp_config.json"
cat ${CONFIG_FILE} > ${TMP_CONFIG}
echo "," >> ${TMP_CONFIG}
cat ${COMMON_CONFIG} >> ${TMP_CONFIG}
/tmp/scripts/wrap_config_file.sh ${TMP_CONFIG}

. /tmp/tests/test_functions/test_full_backup.sh
test_full_backup ${TMP_CONFIG}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/bin/sh
set -e -x
CONFIG_FILE="/tmp/configs/remote_backup_without_files_metadata_test_config.json"
COMMON_CONFIG="/tmp/configs/common_config.json"
TMP_CONFIG="/tmp/configs/tmp_config.json"
cat ${CONFIG_FILE} > ${TMP_CONFIG}
echo "," >> ${TMP_CONFIG}
cat ${COMMON_CONFIG} >> ${TMP_CONFIG}
/tmp/scripts/wrap_config_file.sh ${TMP_CONFIG}

. /tmp/tests/test_functions/remote_backup_and_restore_test.sh
remote_backup_and_restore_test "${TMP_CONFIG}"
20 changes: 20 additions & 0 deletions docs/PostgreSQL.md
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,26 @@ To activate this feature, do one of the following:
wal-g backup-push /path --copy-composer
```

#### Backup without metadata

By default, WAL-G tracks metadata of the files backed up. If millions of files are backed up (typically in case of hundreds of databases and thousands of tables in each database), tracking this metadata alone would require GBs of memory.

If `--without-files-metadata` or `WALG_WITHOUT_FILES_METADATA` is enabled, WAL-G does not track metadata of the files backed up. This significantly reduces the memory usage on instances with `> 100k` files.

Limitations

* Cannot be used with `rating-composer`, `copy-composer`
* Cannot be used with `delta-from-user-data`, `delta-from-name`, `add-user-data`

To activate this feature, do one of the following:

* set the `WALG_WITHOUT_FILES_METADATA`environment variable
* add the `--without-files-metadata` flag

```bash
wal-g backup-push /path --without-files-metadata
```

#### Create delta from specific backup
When creating delta backup (`WALG_DELTA_MAX_STEPS` > 0), WAL-G uses the latest backup as the base by default. This behaviour can be changed via following flags:

Expand Down
3 changes: 3 additions & 0 deletions internal/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ const (
StoreAllCorruptBlocksSetting = "WALG_STORE_ALL_CORRUPT_BLOCKS"
UseRatingComposerSetting = "WALG_USE_RATING_COMPOSER"
UseCopyComposerSetting = "WALG_USE_COPY_COMPOSER"
WithoutFilesMetadataSetting = "WALG_WITHOUT_FILES_METADATA"
DeltaFromNameSetting = "WALG_DELTA_FROM_NAME"
DeltaFromUserDataSetting = "WALG_DELTA_FROM_USER_DATA"
FetchTargetUserDataSetting = "WALG_FETCH_TARGET_USER_DATA"
Expand Down Expand Up @@ -154,6 +155,7 @@ var (
StoreAllCorruptBlocksSetting: "false",
UseRatingComposerSetting: "false",
UseCopyComposerSetting: "false",
WithoutFilesMetadataSetting: "false",
MaxDelayedSegmentsCount: "0",
SerializerTypeSetting: "json_default",
}
Expand Down Expand Up @@ -222,6 +224,7 @@ var (
StoreAllCorruptBlocksSetting: true,
UseRatingComposerSetting: true,
UseCopyComposerSetting: true,
WithoutFilesMetadataSetting: true,
MaxDelayedSegmentsCount: true,
DeltaFromNameSetting: true,
DeltaFromUserDataSetting: true,
Expand Down
13 changes: 12 additions & 1 deletion internal/databases/postgres/backup.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,14 @@ func (backup *Backup) GetSentinelAndFilesMetadata() (BackupSentinelDto, FilesMet
}

var filesMetadata FilesMetadataDto

// skip the files metadata fetch if backup was taken without it
if sentinel.FilesMetadataDisabled {
tracelog.InfoLogger.Printf("Files metadata tracking was disabled, skipping the download of %s", FilesMetadataName)
backup.FilesMetadataDto = &filesMetadata
return sentinel, filesMetadata, nil
}

err = backup.FetchDto(&filesMetadata, getFilesMetadataPath(backup.Name))
if err != nil {
// double-check that this is not V2 backup
Expand Down Expand Up @@ -343,7 +351,9 @@ func (backup *Backup) GetFilesToUnwrap(fileMask string) (map[string]bool, error)
if err != nil {
return nil, err
}
if filesMeta.Files == nil { // in case of WAL-E of old WAL-G backup
// in case of WAL-E of old WAL-G backup -or-
// base backup created with WALG_WITHOUT_FILES_METADATA
if len(filesMeta.Files) == 0 {
return UnwrapAll, nil
}
filesToUnwrap := make(map[string]bool)
Expand All @@ -357,6 +367,7 @@ func (backup *Backup) GetFilesToUnwrap(fileMask string) (map[string]bool, error)
}

func shouldUnwrapTar(tarName string, filesMeta FilesMetadataDto, filesToUnwrap map[string]bool) bool {
// in case of base backup created with WALG_WITHOUT_FILES_METADATA
if len(filesMeta.TarFileSets) == 0 {
return true
}
Expand Down
8 changes: 4 additions & 4 deletions internal/databases/postgres/backup_fetch_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,8 @@ func chooseTablespaceSpecification(sentinelDtoSpec, spec *TablespaceSpec) *Table

// TODO : unit tests
// deltaFetchRecursion function composes Backup object and recursively searches for necessary base backup
func deltaFetchRecursionOld(backupName string, folder storage.Folder, dbDataDirectory string,
func deltaFetchRecursionOld(backup Backup, folder storage.Folder, dbDataDirectory string,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change is not related to the files metadata uploading, but helps to avoid downloading it twice.

tablespaceSpec *TablespaceSpec, filesToUnwrap map[string]bool) error {
backup := NewBackup(folder.GetSubFolder(utility.BaseBackupPath), backupName)
sentinelDto, filesMetaDto, err := backup.GetSentinelAndFilesMetadata()
if err != nil {
return err
Expand All @@ -78,7 +77,8 @@ func deltaFetchRecursionOld(backupName string, folder storage.Folder, dbDataDire
if err != nil {
return err
}
err = deltaFetchRecursionOld(*sentinelDto.IncrementFrom, folder, dbDataDirectory, tablespaceSpec, baseFilesToUnwrap)
incrementFrom := NewBackup(folder.GetSubFolder(utility.BaseBackupPath), *sentinelDto.IncrementFrom)
err = deltaFetchRecursionOld(incrementFrom, folder, dbDataDirectory, tablespaceSpec, baseFilesToUnwrap)
if err != nil {
return err
}
Expand All @@ -102,7 +102,7 @@ func GetPgFetcherOld(dbDataDirectory, fileMask, restoreSpecPath string) func(roo
errMessege := fmt.Sprintf("Invalid restore specification path %s\n", restoreSpecPath)
tracelog.ErrorLogger.FatalfOnError(errMessege, err)
}
err = deltaFetchRecursionOld(backup.Name, rootFolder, utility.ResolveSymlink(dbDataDirectory), spec, filesToUnwrap)
err = deltaFetchRecursionOld(pgBackup, rootFolder, utility.ResolveSymlink(dbDataDirectory), spec, filesToUnwrap)
tracelog.ErrorLogger.FatalfOnError("Failed to fetch backup: %v\n", err)
}
}
Expand Down