Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use separate disks for WAL and data #984

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
.idea
1 change: 1 addition & 0 deletions ENVIRONMENT.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ Environment Configuration Settings
- **PGROOT**: a directory where we put the pgdata (by default /home/postgres/pgroot). One may adjust it to point to the mount point of the persistent volume, such as EBS.
- **WALE_TMPDIR**: directory to store WAL-E temporary files. PGROOT/../tmp by default, make sure it has a few GBs of free space.
- **PGDATA**: location of PostgreSQL data directory, by default PGROOT/pgdata.
- **WAL_DIRECTORY**: location where the write-ahead log should be stored. By default stored inside PGDATA. This option is useful if you plan to use separate disks for WAL and data.
- **PGUSER_STANDBY**: username for the replication user, 'standby' by default.
- **PGPASSWORD_STANDBY**: a password for the replication user, 'standby' by default.
- **STANDBY_HOST**: hostname or IP address of the primary to stream from.
Expand Down
23 changes: 21 additions & 2 deletions postgres-appliance/bootstrap/clone_with_wale.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,22 +22,23 @@ def read_configuration():
parser = argparse.ArgumentParser(description="Script to clone from S3 with support for point-in-time-recovery")
parser.add_argument('--scope', required=True, help='target cluster name')
parser.add_argument('--datadir', required=True, help='target cluster postgres data directory')
parser.add_argument('--waldir', required=True, help='target cluster postgres wal directory')
parser.add_argument('--recovery-target-time',
help='the timestamp up to which recovery will proceed (including time zone)',
dest='recovery_target_time_string')
parser.add_argument('--dry-run', action='store_true', help='find a matching backup and build the wal-e '
'command to fetch that backup without running it')
args = parser.parse_args()

options = namedtuple('Options', 'name datadir recovery_target_time dry_run')
options = namedtuple('Options', 'name datadir recovery_target_time dry_run waldir')
if args.recovery_target_time_string:
recovery_target_time = parse(args.recovery_target_time_string)
if recovery_target_time.tzinfo is None:
raise Exception("recovery target time must contain a timezone")
else:
recovery_target_time = None

return options(args.scope, args.datadir, recovery_target_time, args.dry_run)
return options(args.scope, args.datadir, recovery_target_time, args.dry_run, args.waldir)


def build_wale_command(command, datadir=None, backup=None):
Expand Down Expand Up @@ -178,10 +179,28 @@ def run_clone_from_s3(options):
return 0


def create_symbolic_link_wal_directory(pg_data, wal_dir):
pg_wal = f'{pg_data}/pg_wal'
logger.info(f"Examining whether WAL already exists or not. directory={pg_wal}")

if not os.path.isdir(pg_wal):
create_symbolic_link_wal_dir = ['ln', '-s', wal_dir, pg_wal]
ret = subprocess.call(create_symbolic_link_wal_dir)
if ret == 0:
logger.info(f"Successfully created a wal directory with symbolic link to {wal_dir}")
else:
raise Exception("Creating a separate wal directory failed with exit code {0}".format(ret))
else:
logger.info(f"Wal directory with symbolic link to {wal_dir} already exists.")


def main():
options = read_configuration()
try:
run_clone_from_s3(options)
logger.info(f'Found waldir={options.waldir}')
if options.waldir:
create_symbolic_link_wal_directory(options.datadir, options.waldir)
except Exception:
logger.exception("Clone failed")
return 1
Expand Down
11 changes: 10 additions & 1 deletion postgres-appliance/scripts/basebackup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ while getopts ":-:" optchar; do
retries=* )
RETRIES=${OPTARG#*=}
;;
wal_dir=* )
WAL_DIR=${OPTARG#*=}
;;
esac
done

Expand All @@ -27,6 +30,12 @@ else
PG_BASEBACKUP_OPTS=()
fi

if [[ -n "$WAL_DIR" ]]; then
PG_WAL_OPTS=(--waldir="$WAL_DIR")
else
PG_WAL_OPTS=()
fi

WAL_FAST=$(dirname "$DATA_DIR")/wal_fast
readonly WAL_FAST
mkdir -p "$WAL_FAST"
Expand Down Expand Up @@ -97,7 +106,7 @@ fi

ATTEMPT=0
while [[ $((ATTEMPT++)) -le $RETRIES ]]; do
pg_basebackup --pgdata="${DATA_DIR}" "${PG_BASEBACKUP_OPTS[@]}" --dbname="${CONNSTR}" &
pg_basebackup --pgdata="${DATA_DIR}" "${PG_WAL_OPTS[@]}" "${PG_BASEBACKUP_OPTS[@]}" --dbname="${CONNSTR}" &
basebackup_pid=$!
wait $basebackup_pid
EXITCODE=$?
Expand Down
9 changes: 8 additions & 1 deletion postgres-appliance/scripts/configure_spilo.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ def deep_update(a, b):
method: clone_with_wale
clone_with_wale:
command: envdir "{{CLONE_WALE_ENV_DIR}}" python3 /scripts/clone_with_wale.py
--recovery-target-time="{{CLONE_TARGET_TIME}}"
--recovery-target-time="{{CLONE_TARGET_TIME}}" --waldir="{{WAL_DIRECTORY}}"
recovery_conf:
restore_command: envdir "{{CLONE_WALE_ENV_DIR}}" timeout "{{WAL_RESTORE_TIMEOUT}}"
/scripts/restore_command.sh "%f" "%p"
Expand All @@ -254,6 +254,9 @@ def deep_update(a, b):
--port={{CLONE_PORT}} --user="{{CLONE_USER}}"
{{/CLONE_WITH_BASEBACKUP}}
initdb:
{{#WAL_DIRECTORY}}
- waldir: /home/postgres/wal
{{/WAL_DIRECTORY}}
- encoding: UTF8
- locale: {{INITDB_LOCALE}}.UTF-8
- data-checksums
Expand Down Expand Up @@ -370,17 +373,20 @@ def deep_update(a, b):
threshold_backup_size_percentage: {{WALE_BACKUP_THRESHOLD_PERCENTAGE}}
retries: 2
no_master: 1
wal_dir: "{{WAL_DIRECTORY}}"
{{/USE_WALE}}
basebackup_fast_xlog:
command: /scripts/basebackup.sh
retries: 2
wal_dir: "{{WAL_DIRECTORY}}"
{{#STANDBY_WITH_WALE}}
bootstrap_standby_with_wale:
command: envdir "{{STANDBY_WALE_ENV_DIR}}" bash /scripts/wale_restore.sh
threshold_megabytes: {{WALE_BACKUP_THRESHOLD_MEGABYTES}}
threshold_backup_size_percentage: {{WALE_BACKUP_THRESHOLD_PERCENTAGE}}
retries: 2
no_master: 1
wal_dir: "{{WAL_DIRECTORY}}"
{{/STANDBY_WITH_WALE}}
'''

Expand Down Expand Up @@ -524,6 +530,7 @@ def get_placeholders(provider):
placeholders.setdefault('PGROOT', os.path.join(placeholders['PGHOME'], 'pgroot'))
placeholders.setdefault('WALE_TMPDIR', os.path.abspath(os.path.join(placeholders['PGROOT'], '../tmp')))
placeholders.setdefault('PGDATA', os.path.join(placeholders['PGROOT'], 'pgdata'))
placeholders.setdefault('WAL_DIRECTORY', '')
placeholders.setdefault('HUMAN_ROLE', 'zalandos')
placeholders.setdefault('PGUSER_STANDBY', 'standby')
placeholders.setdefault('PGPASSWORD_STANDBY', 'standby')
Expand Down
2 changes: 1 addition & 1 deletion postgres-appliance/scripts/restore_command.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ readonly wal_destination=$2

wal_dir=$(dirname "$wal_destination")
readonly wal_dir
wal_fast_source=$(dirname "$(dirname "$(realpath "$wal_dir")")")/wal_fast/$wal_filename
wal_fast_source=$(dirname "$PGDATA/wal_fast/$wal_filename")
readonly wal_fast_source

[[ -f $wal_fast_source ]] && exec mv "${wal_fast_source}" "${wal_destination}"
Expand Down
15 changes: 13 additions & 2 deletions postgres-appliance/scripts/wale_restore.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ while getopts ":-:" optchar; do
no_master=*|no-master=* )
NO_MASTER=${OPTARG#*=}
;;
wal_dir=* )
WAL_DIR=${OPTARG#*=}
;;
esac
done

Expand Down Expand Up @@ -91,8 +94,16 @@ while true; do
if $WAL_E backup-fetch "$DATA_DIR" LATEST; then
version=$(<"$DATA_DIR/PG_VERSION")
[[ "$version" =~ \. ]] && wal_name=xlog || wal_name=wal
readonly wal_dir=$DATA_DIR/pg_$wal_name
[[ ! -d $wal_dir ]] && rm -f "$wal_dir" && mkdir "$wal_dir"
readonly pg_wal_location=$DATA_DIR/pg_$wal_name

# Only create a symbolic link when a separate WAL directory is specified.
if [[ -n "$WAL_DIR" ]]; then
PG_WAL_OPTS=(ln -s "$WAL_DIR" "$pg_wal_location")
else
PG_WAL_OPTS=(mkdir "$pg_wal_location")
fi

[[ ! -d $pg_wal_location ]] && rm -f "$pg_wal_location" && "${PG_WAL_OPTS[@]}"
# remove broken symlinks from PGDATA
find "$DATA_DIR" -xtype l -delete
exit 0
Expand Down
34 changes: 34 additions & 0 deletions postgres-appliance/tests/test_spilo.sh
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,16 @@ function start_clone_with_basebackup_upgrade_container() {
-d spilo3
}

function start_separate_wal_directory_container() {
local ID=$1

docker-compose run \
-e SCOPE=separatewal \
-e WAL_DIRECTORY="/home/postgres/wal" \
--name "${PREFIX}separatewal$ID" \
-d "spilo$ID"
}

function verify_clone_upgrade() {
local type=$2
local from_version=$3
Expand All @@ -249,13 +259,26 @@ function verify_archive_mode_is_on() {
[ "$archive_mode" = "on" ]
}

function verify_wal_outside_data_directory() {
local target_path="/home/postgres/wal"
is_symbolic_link=$(
docker_exec "$1" "
[ -L '/home/postgres/pgdata/pgroot/data/pg_wal' ] &&
readlink -f '/home/postgres/pgdata/pgroot/data/pg_wal' | grep -q \"$target_path\" &&
echo true ||
echo false"
)
[ "$is_symbolic_link" = true ]
}


# TEST SUITE 1 - In-place major upgrade 10->11->...->15
# TEST SUITE 2 - Major upgrade 10->15 after wal-e clone (with CLONE_PGVERSION set)
# TEST SUITE 3 - PITR (clone with wal-e) with unreachable target (13+)
# TEST SUITE 4 - Major upgrade 10->11 after wal-e clone (no CLONE_PGVERSION)
# TEST SUITE 5 - Replica bootstrap with wal-e
# TEST SUITE 6 - Major upgrade 11->12 after clone with basebackup
# TEST SUITE 7 - Form a fresh cluster that persists WALs outside of data directory
function test_spilo() {
# TEST SUITE 1
local container=$1
Expand Down Expand Up @@ -355,6 +378,11 @@ function test_spilo() {
basebackup_container=$(start_clone_with_basebackup_upgrade_container "$upgrade_container") # SCOPE=upgrade2 PGVERSION=12 CLONE: _SCOPE=upgrade
log_info "[TS6] Started $basebackup_container for testing major upgrade 11->12 after clone with basebackup"

# TEST SUITE 7
local seapate_wal_container="${PREFIX}separatewal1"
start_separate_wal_directory_container 1 # WAL_DIRECTORY="/home/postgres/wal" SCOPE=separatewal
start_separate_wal_directory_container 2 # WAL_DIRECTORY="/home/postgres/wal" SCOPE=separatewal
log_info "[TS7] Started a fresh cluster to test for persisting WALs on a specified location"

# TEST SUITE 1
# run_test test_pg_upgrade_to_15_check_failed "$container" # pg_upgrade --check complains about timescaledb
Expand All @@ -377,6 +405,12 @@ function test_spilo() {
log_info "[TS6] Testing in-place major upgrade 11->12 after clone with basebackup"
run_test verify_clone_upgrade "$basebackup_container" "basebackup" 11 12
run_test verify_archive_mode_is_on "$basebackup_container"

# TEST SUITE 7
wait_all_streaming "$seapate_wal_container" 1
wait_zero_lag "$seapate_wal_container" 1
run_test verify_wal_outside_data_directory "$seapate_wal_container"
run_test verify_wal_outside_data_directory "${PREFIX}separatewal2"
}

function main() {
Expand Down
Loading