Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP wal-restore #977

Merged
merged 20 commits into from
Feb 4, 2022
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/dockertests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ jobs:
'make TEST="pg_wal_perftest" pg_integration_test',
'make TEST="pg_backup_perftest" pg_integration_test',
'make TEST="pg_catchup_test" pg_integration_test',
'make TEST="pg_restore_test" pg_integration_test',
Xaspy marked this conversation as resolved.
Show resolved Hide resolved
'make MYSQL_TEST=mysql_base_tests mysql_integration_test',
'make MYSQL_TEST=mysql_delete_tests mysql_integration_test',
'make MYSQL_TEST=mysql_copy_tests mysql_integration_test',
Expand Down
31 changes: 31 additions & 0 deletions cmd/pg/wal_restore.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
package pg

import (
"github.com/spf13/cobra"
"github.com/wal-g/tracelog"
"github.com/wal-g/wal-g/internal"
"github.com/wal-g/wal-g/internal/databases/postgres"
)

const (
WalRestoreUsage = "wal-restore target-pgdata source-pgdata"
WalRestoreShortDescription = "Restores WAL segments from storage."
WalRestoreLongDescription = "Restores the missing WAL segments that will be needed to perform pg_rewind with storage."
)

// walRestoreCmd represents the walRestore command
var walRestoreCmd = &cobra.Command{
Use: WalRestoreUsage,
Short: WalRestoreShortDescription,
Long: WalRestoreLongDescription,
Args: cobra.ExactArgs(2),
Run: func(cmd *cobra.Command, args []string) {
folder, err := internal.ConfigureFolder()
tracelog.ErrorLogger.FatalfOnError("Error on configure external folder %v\n", err)
postgres.HandleWALRestore(args[0], args[1], folder)
},
}

func init() {
Cmd.AddCommand(walRestoreCmd)
}
12 changes: 12 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ services:
&& mkdir -p /export/gpdeletebeforenamebucket
&& mkdir -p /export/gpdeletebeforetimebucket
&& mkdir -p /export/storagetoolsbucket
&& mkdir -p /export/restorebucket
Xaspy marked this conversation as resolved.
Show resolved Hide resolved
&& /usr/bin/minio server /export'

s3-another:
Expand Down Expand Up @@ -523,6 +524,17 @@ services:
links:
- s3

pg_restore_test:
build:
dockerfile: docker/pg_tests/Dockerfile_restore_test
context: .
image: wal-g/restore_test
container_name: wal-g_pg_restore_test
depends_on:
- s3
links:
- s3

mysql:
build:
dockerfile: docker/mysql/Dockerfile
Expand Down
3 changes: 3 additions & 0 deletions docker/pg_tests/Dockerfile_restore_test
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
FROM wal-g/docker_prefix:latest

CMD su - postgres /tmp/tests/restore_test.sh
3 changes: 3 additions & 0 deletions docker/pg_tests/scripts/configs/restore_test_config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
"WALG_DELTA_MAX_STEPS": "0",
"WALE_S3_PREFIX": "s3://restorebucket",
"WALG_USE_WAL_DELTA": "true"
Empty file modified docker/pg_tests/scripts/tests/archiving_ready_rename.sh
100644 → 100755
Empty file.
124 changes: 124 additions & 0 deletions docker/pg_tests/scripts/tests/restore_test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
#!/bin/sh
set -e -x

PGDATA="/var/lib/postgresql/10/main"
PGDATA_ALPHA="${PGDATA}_alpha"
PGDATA_BETA="${PGDATA}_beta"
ALPHA_PORT=5432
BETA_PORT=5433

# init config
CONFIG_FILE="/tmp/configs/restore_test_config.json"
COMMON_CONFIG="/tmp/configs/common_config.json"
TMP_CONFIG="/tmp/configs/tmp_config.json"
cp ${CONFIG_FILE} ${TMP_CONFIG}
echo "," >> ${TMP_CONFIG}
cat ${COMMON_CONFIG} >> ${TMP_CONFIG}
/tmp/scripts/wrap_config_file.sh ${TMP_CONFIG}

# init alpha cluster
/usr/lib/postgresql/10/bin/initdb ${PGDATA_ALPHA}

# preparation for replication
cd ${PGDATA_ALPHA}
echo "host replication repl 127.0.0.1/32 md5" >> pg_hba.conf
echo "wal_level = replica" >> postgresql.conf
echo "wal_keep_segments = 100" >> postgresql.conf
echo "max_wal_senders = 2" >> postgresql.conf
echo "hot_standby = on" >> postgresql.conf
echo "listen_addresses = 'localhost'" >> postgresql.conf

echo "archive_mode = on" >> postgresql.conf
echo "archive_command = '\
AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE \
Xaspy marked this conversation as resolved.
Show resolved Hide resolved
AWS_SECRET_ACCESS_KEY=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY \
AWS_ENDPOINT=http://s3:9000 \
AWS_S3_FORCE_PATH_STYLE=true \
WALG_COMPRESSION_METHOD=brotli \
WALG_DELTA_MAX_STEPS=3 \
WALG_UPLOAD_CONCURRENCY=10 \
WALG_DISK_RATE_LIMIT=41943040 \
WALG_NETWORK_RATE_LIMIT=10485760 \
PGSSLMODE=allow \
PGDATABASE=postgres \
PGHOST=/var/run/postgresql \
WALE_S3_PREFIX=s3://restorebucket \
WALG_USE_WAL_DELTA=true \
/usr/bin/timeout 600 /usr/bin/wal-g wal-push %p'" >> postgresql.conf
echo "archive_timeout = 600" >> postgresql.conf

/usr/lib/postgresql/10/bin/pg_ctl -D ${PGDATA_ALPHA} -w start

PGDATA=${PGDATA_ALPHA} /tmp/scripts/wait_while_pg_not_ready.sh

psql -c "CREATE ROLE repl WITH REPLICATION PASSWORD 'password' LOGIN;"

# init beta cluster (replica of alpha)
/usr/lib/postgresql/10/bin/pg_basebackup --wal-method=stream -D ${PGDATA_BETA} -U repl -h 127.0.0.1 -p ${ALPHA_PORT}
cd ${PGDATA_BETA}
echo "port = ${BETA_PORT}" >> postgresql.conf
echo "hot_standby = on" >> postgresql.conf
cat > recovery.conf << EOF
standby_mode = 'on'
primary_conninfo = 'host=127.0.0.1 port=${ALPHA_PORT} user=repl password=password'
restore_command = 'cp ${PGDATA_BETA}/archive/%f %p'
trigger_file = '/tmp/postgresql.trigger.${BETA_PORT}'
EOF
/usr/lib/postgresql/10/bin/pg_ctl -D ${PGDATA_BETA} -w start

# fill database postgres
pgbench -i -s 10 -h 127.0.0.1 -p ${ALPHA_PORT} postgres

# db table conn_port row_count
/tmp/scripts/wait_while_replication_complete.sh postgres pgbench_accounts ${ALPHA_PORT} 1000000 # 10 * 100000, 10 is value of -s in pgbench
# script above waits only one table, so just in case sleep
sleep 5

/usr/lib/postgresql/10/bin/pg_ctl -D ${PGDATA_ALPHA} -m immediate -w stop
sleep 3

echo "archive_mode = on" >> postgresql.conf
echo "archive_command = '\
AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE \
AWS_SECRET_ACCESS_KEY=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY \
AWS_ENDPOINT=http://s3:9000 \
AWS_S3_FORCE_PATH_STYLE=true \
WALG_COMPRESSION_METHOD=brotli \
WALG_DELTA_MAX_STEPS=3 \
WALG_UPLOAD_CONCURRENCY=10 \
WALG_DISK_RATE_LIMIT=41943040 \
WALG_NETWORK_RATE_LIMIT=10485760 \
PGSSLMODE=allow \
PGDATABASE=postgres \
PGHOST=/var/run/postgresql \
WALE_S3_PREFIX=s3://restorebucket \
WALG_USE_WAL_DELTA=true \
/usr/bin/timeout 600 /usr/bin/wal-g wal-push %p'" >> postgresql.conf
echo "archive_timeout = 600" >> postgresql.conf

/usr/lib/postgresql/10/bin/pg_ctl -D ${PGDATA_BETA} -w promote

pgbench -i -s 10 -h 127.0.0.1 -p ${BETA_PORT} postgres

/usr/lib/postgresql/10/bin/pg_ctl -D ${PGDATA_BETA} -m fast -W stop
sleep 3

# for more info to log
ls "${PGDATA_BETA}/pg_wal"

WAL_TO_DELETE_NAME="00000002000000000000000C"
DELETED_WAL="${PGDATA_BETA}/pg_wal/${WAL_TO_DELETE_NAME}"
Xaspy marked this conversation as resolved.
Show resolved Hide resolved

rm ${DELETED_WAL}

timeout 30 wal-g --config=${TMP_CONFIG} wal-show

sleep 3

timeout 30 wal-g --config=${TMP_CONFIG} wal-restore ${PGDATA_ALPHA} ${PGDATA_BETA}

if [ -f ${DELETED_WAL} ]; then
exit 0
else
exit 1
fi
72 changes: 72 additions & 0 deletions internal/databases/postgres/pg_control_data.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
package postgres

import (
"encoding/binary"
"io"
"os"
"path"

"github.com/wal-g/tracelog"
)

// PgControlData represents data contained in pg_control file
type PgControlData struct {
systemIdentifier uint64 // systemIdentifier represents system ID of PG cluster (f.e. [0-8] bytes in pg_control)
currentTimeline uint32 // currentTimeline represents current timeline of PG cluster (f.e. [48-52] bytes in pg_control v. 1100+)
// Any data from pg_control
}

// ExtractPgControl extract pg_control data of cluster by storage
func ExtractPgControl(folder string) (*PgControlData, error) {
pgControlReadCloser, err := os.Open(path.Join(folder, PgControlPath))
if err != nil {
return nil, err
}

result, err := extractPgControlData(pgControlReadCloser)
if err != nil {
closeErr := pgControlReadCloser.Close()
tracelog.WarningLogger.Printf("Error on closing pg_control file: %v\n", closeErr)
return nil, err
}

err = pgControlReadCloser.Close()
if err != nil {
return nil, err
}

return result, nil
}

func extractPgControlData(pgControlReader io.Reader) (*PgControlData, error) {
bytes := make([]byte, 8192)

n, err := pgControlReader.Read(bytes)
if err != nil || n < 8192 {
Xaspy marked this conversation as resolved.
Show resolved Hide resolved
return nil, err
}

systemID := binary.LittleEndian.Uint64(bytes[0:8])
pgControlVersion := binary.LittleEndian.Uint32(bytes[8:12])
currentTimeline := uint32(0)

if pgControlVersion < 1100 {
currentTimeline = binary.LittleEndian.Uint32(bytes[56:60])
} else {
currentTimeline = binary.LittleEndian.Uint32(bytes[48:52])
}

// Parse bytes from pg_control file and share this data
return &PgControlData{
systemIdentifier: systemID,
currentTimeline: currentTimeline,
}, nil
}

func (data *PgControlData) GetSystemIdentifier() uint64 {
return data.systemIdentifier
}

func (data *PgControlData) GetCurrentTimeline() uint32 {
return data.currentTimeline
}