diff --git a/.github/ISSUE_TEMPLATE/postgres-operator-issue-template.md b/.github/ISSUE_TEMPLATE/postgres-operator-issue-template.md index a4dec9409..b96eba1c6 100644 --- a/.github/ISSUE_TEMPLATE/postgres-operator-issue-template.md +++ b/.github/ISSUE_TEMPLATE/postgres-operator-issue-template.md @@ -9,7 +9,7 @@ assignees: '' Please, answer some short questions which should help us to understand your problem / question better? -- **Which image of the operator are you using?** e.g. registry.opensource.zalan.do/acid/postgres-operator:v1.6.0 +- **Which image of the operator are you using?** e.g. registry.opensource.zalan.do/acid/postgres-operator:v1.6.1 - **Where do you run it - cloud or metal? Kubernetes or OpenShift?** [AWS K8s | GCP ... | Bare Metal K8s] - **Are you running Postgres Operator in production?** [yes | no] - **Type of issue?** [Bug report, question, feature request, etc.] diff --git a/README.md b/README.md index 7edb60d84..b29142967 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ The Postgres Operator delivers an easy to run highly-available [PostgreSQL](https://www.postgresql.org/) -clusters on Kubernetes (K8s) powered by [Patroni](https://github.com/zalando/spilo). +clusters on Kubernetes (K8s) powered by [Patroni](https://github.com/zalando/patroni). It is configured only through Postgres manifests (CRDs) to ease integration into automated CI/CD pipelines with no access to Kubernetes API directly, promoting infrastructure as code vs manual operations. @@ -24,7 +24,7 @@ pipelines with no access to Kubernetes API directly, promoting infrastructure as * Support for custom TLS certificates * UI to create and edit Postgres cluster manifests * Works well on Amazon AWS, Google Cloud, OpenShift and locally on Kind -* Base support for AWS EBS gp3 migration (iops, throughput pending) +* Support for AWS EBS gp3 migration ### PostgreSQL features @@ -65,7 +65,7 @@ We introduce the major version into the backup path to smoothen the [major versi The new operator configuration can set a compatibility flag *enable_spilo_wal_path_compat* to make Spilo look for wal segments in the current path but also old format paths. This comes at potential performance costs and should be disabled after a few days. -The new Spilo 13 image is: `registry.opensource.zalan.do/acid/spilo-13:2.0-p2` +The newest Spilo 13 image is: `registry.opensource.zalan.do/acid/spilo-13:2.0-p4` The last Spilo 12 image is: `registry.opensource.zalan.do/acid/spilo-12:1.6-p5` diff --git a/charts/postgres-operator-ui/Chart.yaml b/charts/postgres-operator-ui/Chart.yaml index 9be6c84dd..8ff7f6637 100644 --- a/charts/postgres-operator-ui/Chart.yaml +++ b/charts/postgres-operator-ui/Chart.yaml @@ -1,7 +1,7 @@ apiVersion: v1 name: postgres-operator-ui -version: 1.6.0 -appVersion: 1.6.0 +version: 1.6.1 +appVersion: 1.6.1 home: https://github.com/zalando/postgres-operator description: Postgres Operator UI provides a graphical interface for a convenient database-as-a-service user experience keywords: diff --git a/charts/postgres-operator-ui/index.yaml b/charts/postgres-operator-ui/index.yaml index 948a52274..7706f5bf8 100644 --- a/charts/postgres-operator-ui/index.yaml +++ b/charts/postgres-operator-ui/index.yaml @@ -2,11 +2,10 @@ apiVersion: v1 entries: postgres-operator-ui: - apiVersion: v1 - appVersion: 1.6.0 - created: "2020-12-18T14:19:25.464717041+01:00" - description: Postgres Operator UI provides a graphical interface for a convenient - database-as-a-service user experience - digest: d7813a235dd1015377c38fd5a14e7679a411c7340a25cfcf5f5294405f9a2eb2 + appVersion: 1.6.1 + created: "2021-02-16T12:16:51.963793476+01:00" + description: Postgres Operator UI provides a graphical interface for a convenient database-as-a-service user experience + digest: 3d321352f2f1e7bb7450aa8876e3d818aa9f9da9bd4250507386f0490f2c1969 home: https://github.com/zalando/postgres-operator keywords: - postgres @@ -22,13 +21,12 @@ entries: sources: - https://github.com/zalando/postgres-operator urls: - - postgres-operator-ui-1.6.0.tgz - version: 1.6.0 + - postgres-operator-ui-1.6.1.tgz + version: 1.6.1 - apiVersion: v1 appVersion: 1.5.0 - created: "2020-12-18T14:19:25.464015993+01:00" - description: Postgres Operator UI provides a graphical interface for a convenient - database-as-a-service user experience + created: "2021-02-16T12:16:51.96319758+01:00" + description: Postgres Operator UI provides a graphical interface for a convenient database-as-a-service user experience digest: c91ea39e6d51d57f4048fb1b6ec53b40823f2690eb88e4e4f1a036367b9fdd61 home: https://github.com/zalando/postgres-operator keywords: @@ -47,4 +45,4 @@ entries: urls: - postgres-operator-ui-1.5.0.tgz version: 1.5.0 -generated: "2020-12-18T14:19:25.463104102+01:00" +generated: "2021-02-16T12:16:51.962463462+01:00" diff --git a/charts/postgres-operator-ui/postgres-operator-ui-1.6.0.tgz b/charts/postgres-operator-ui/postgres-operator-ui-1.6.0.tgz deleted file mode 100644 index 68a43b51b..000000000 Binary files a/charts/postgres-operator-ui/postgres-operator-ui-1.6.0.tgz and /dev/null differ diff --git a/charts/postgres-operator-ui/postgres-operator-ui-1.6.1.tgz b/charts/postgres-operator-ui/postgres-operator-ui-1.6.1.tgz new file mode 100644 index 000000000..c59d20b2f Binary files /dev/null and b/charts/postgres-operator-ui/postgres-operator-ui-1.6.1.tgz differ diff --git a/charts/postgres-operator-ui/values.yaml b/charts/postgres-operator-ui/values.yaml index 2aef84f1c..926d99b39 100644 --- a/charts/postgres-operator-ui/values.yaml +++ b/charts/postgres-operator-ui/values.yaml @@ -8,7 +8,7 @@ replicaCount: 1 image: registry: registry.opensource.zalan.do repository: acid/postgres-operator-ui - tag: v1.6.0 + tag: v1.6.1 pullPolicy: "IfNotPresent" # Optionally specify an array of imagePullSecrets. diff --git a/charts/postgres-operator/Chart.yaml b/charts/postgres-operator/Chart.yaml index e5a66b6e3..857967a48 100644 --- a/charts/postgres-operator/Chart.yaml +++ b/charts/postgres-operator/Chart.yaml @@ -1,7 +1,7 @@ apiVersion: v1 name: postgres-operator -version: 1.6.0 -appVersion: 1.6.0 +version: 1.6.1 +appVersion: 1.6.1 home: https://github.com/zalando/postgres-operator description: Postgres Operator creates and manages PostgreSQL clusters running in Kubernetes keywords: diff --git a/charts/postgres-operator/crds/operatorconfigurations.yaml b/charts/postgres-operator/crds/operatorconfigurations.yaml index ef9b2c84d..3906f9052 100644 --- a/charts/postgres-operator/crds/operatorconfigurations.yaml +++ b/charts/postgres-operator/crds/operatorconfigurations.yaml @@ -65,7 +65,7 @@ spec: properties: docker_image: type: string - default: "registry.opensource.zalan.do/acid/spilo-13:2.0-p2" + default: "registry.opensource.zalan.do/acid/spilo-13:2.0-p4" enable_crd_validation: type: boolean default: true @@ -382,7 +382,7 @@ spec: properties: logical_backup_docker_image: type: string - default: "registry.opensource.zalan.do/acid/logical-backup:v1.6.0" + default: "registry.opensource.zalan.do/acid/logical-backup:v1.6.1" logical_backup_google_application_credentials: type: string logical_backup_job_prefix: @@ -511,7 +511,7 @@ spec: default: "pooler" connection_pooler_image: type: string - default: "registry.opensource.zalan.do/acid/pgbouncer:master-12" + default: "registry.opensource.zalan.do/acid/pgbouncer:master-14" connection_pooler_max_db_connections: type: integer default: 60 diff --git a/charts/postgres-operator/index.yaml b/charts/postgres-operator/index.yaml index 6b64fd705..a0f705ec9 100644 --- a/charts/postgres-operator/index.yaml +++ b/charts/postgres-operator/index.yaml @@ -2,11 +2,10 @@ apiVersion: v1 entries: postgres-operator: - apiVersion: v1 - appVersion: 1.6.0 - created: "2020-12-17T16:16:25.639708821+01:00" - description: Postgres Operator creates and manages PostgreSQL clusters running - in Kubernetes - digest: 2f5f527bae0a22b02f2f7b1e2352665cecf489a990e18212444fa34450b97604 + appVersion: 1.6.1 + created: "2021-02-16T11:49:43.295433402+01:00" + description: Postgres Operator creates and manages PostgreSQL clusters running in Kubernetes + digest: ce9cfc0d4838edf307b690b942bd4e1ea73c3b93bb5552ae8ecd2952d55383ea home: https://github.com/zalando/postgres-operator keywords: - postgres @@ -21,13 +20,12 @@ entries: sources: - https://github.com/zalando/postgres-operator urls: - - postgres-operator-1.6.0.tgz - version: 1.6.0 + - postgres-operator-1.6.1.tgz + version: 1.6.1 - apiVersion: v1 appVersion: 1.5.0 - created: "2020-12-17T16:16:25.637262877+01:00" - description: Postgres Operator creates and manages PostgreSQL clusters running - in Kubernetes + created: "2021-02-16T11:49:43.292890391+01:00" + description: Postgres Operator creates and manages PostgreSQL clusters running in Kubernetes digest: 198351d5db52e65cdf383d6f3e1745d91ac1e2a01121f8476f8b1be728b09531 home: https://github.com/zalando/postgres-operator keywords: @@ -45,4 +43,4 @@ entries: urls: - postgres-operator-1.5.0.tgz version: 1.5.0 -generated: "2020-12-17T16:16:25.635647131+01:00" +generated: "2021-02-16T11:49:43.291315248+01:00" diff --git a/charts/postgres-operator/postgres-operator-1.6.0.tgz b/charts/postgres-operator/postgres-operator-1.6.0.tgz deleted file mode 100644 index bf98cd818..000000000 Binary files a/charts/postgres-operator/postgres-operator-1.6.0.tgz and /dev/null differ diff --git a/charts/postgres-operator/postgres-operator-1.6.1.tgz b/charts/postgres-operator/postgres-operator-1.6.1.tgz new file mode 100644 index 000000000..48ffb9014 Binary files /dev/null and b/charts/postgres-operator/postgres-operator-1.6.1.tgz differ diff --git a/charts/postgres-operator/values-crd.yaml b/charts/postgres-operator/values-crd.yaml index 42af903cd..a66911f96 100644 --- a/charts/postgres-operator/values-crd.yaml +++ b/charts/postgres-operator/values-crd.yaml @@ -1,7 +1,7 @@ image: registry: registry.opensource.zalan.do repository: acid/postgres-operator - tag: v1.6.0 + tag: v1.6.1 pullPolicy: "IfNotPresent" # Optionally specify an array of imagePullSecrets. @@ -32,7 +32,7 @@ configGeneral: # Select if setup uses endpoints (default), or configmaps to manage leader (DCS=k8s) # kubernetes_use_configmaps: false # Spilo docker image - docker_image: registry.opensource.zalan.do/acid/spilo-13:2.0-p2 + docker_image: registry.opensource.zalan.do/acid/spilo-13:2.0-p4 # max number of instances in Postgres cluster. -1 = no limit min_instances: -1 # min number of instances in Postgres cluster. -1 = no limit @@ -252,7 +252,7 @@ configAwsOrGcp: # configure K8s cron job managed by the operator configLogicalBackup: # image for pods of the logical backup job (example runs pg_dumpall) - logical_backup_docker_image: "registry.opensource.zalan.do/acid/logical-backup:v1.6.0" + logical_backup_docker_image: "registry.opensource.zalan.do/acid/logical-backup:v1.6.1" # path of google cloud service account json file # logical_backup_google_application_credentials: "" @@ -315,7 +315,7 @@ configConnectionPooler: # db user for pooler to use connection_pooler_user: "pooler" # docker image - connection_pooler_image: "registry.opensource.zalan.do/acid/pgbouncer:master-9" + connection_pooler_image: "registry.opensource.zalan.do/acid/pgbouncer:master-14" # max db connections the pooler should hold connection_pooler_max_db_connections: 60 # default pooling mode diff --git a/charts/postgres-operator/values.yaml b/charts/postgres-operator/values.yaml index c46e21e1f..e55da4b86 100644 --- a/charts/postgres-operator/values.yaml +++ b/charts/postgres-operator/values.yaml @@ -1,7 +1,7 @@ image: registry: registry.opensource.zalan.do repository: acid/postgres-operator - tag: v1.6.0 + tag: v1.6.1 pullPolicy: "IfNotPresent" # Optionally specify an array of imagePullSecrets. @@ -35,7 +35,7 @@ configGeneral: # Select if setup uses endpoints (default), or configmaps to manage leader (DCS=k8s) # kubernetes_use_configmaps: "false" # Spilo docker image - docker_image: registry.opensource.zalan.do/acid/spilo-13:2.0-p2 + docker_image: registry.opensource.zalan.do/acid/spilo-13:2.0-p4 # max number of instances in Postgres cluster. -1 = no limit min_instances: "-1" # min number of instances in Postgres cluster. -1 = no limit @@ -242,7 +242,7 @@ configAwsOrGcp: # configure K8s cron job managed by the operator configLogicalBackup: # image for pods of the logical backup job (example runs pg_dumpall) - logical_backup_docker_image: "registry.opensource.zalan.do/acid/logical-backup:v1.6.0" + logical_backup_docker_image: "registry.opensource.zalan.do/acid/logical-backup:v1.6.1" # path of google cloud service account json file # logical_backup_google_application_credentials: "" @@ -309,7 +309,7 @@ configConnectionPooler: # db user for pooler to use connection_pooler_user: "pooler" # docker image - connection_pooler_image: "registry.opensource.zalan.do/acid/pgbouncer:master-9" + connection_pooler_image: "registry.opensource.zalan.do/acid/pgbouncer:master-14" # max db connections the pooler should hold connection_pooler_max_db_connections: "60" # default pooling mode diff --git a/docs/administrator.md b/docs/administrator.md index 30b612ded..715b8f74e 100644 --- a/docs/administrator.md +++ b/docs/administrator.md @@ -135,6 +135,26 @@ Every other Postgres cluster which lacks the annotation will be ignored by this operator. Conversely, operators without a defined `CONTROLLER_ID` will ignore clusters with defined ownership of another operator. +## Understanding rolling update of Spilo pods + +The operator logs reasons for a rolling update with the `info` level and a diff +between the old and new StatefulSet specs with the `debug` level. To benefit +from numerous escape characters in the latter log entry, view it in CLI with +`echo -e`. Note that the resultant message will contain some noise because the +`PodTemplate` used by the operator is yet to be updated with the default values +used internally in K8s. + +The operator also support lazy updates of the Spilo image. That means the pod +template of a PG cluster's stateful set is updated immediately with the new +image, but no rolling update follows. This feature saves you a switchover - and +hence downtime - when you know pods are re-started later anyway, for instance +due to the node rotation. To force a rolling update, disable this mode by +setting the `enable_lazy_spilo_upgrade` to `false` in the operator configuration +and restart the operator pod. With the standard eager rolling updates the +operator checks during Sync all pods run images specified in their respective +statefulsets. The operator triggers a rolling upgrade for PG clusters that +violate this condition. + ## Delete protection via annotations To avoid accidental deletes of Postgres clusters the operator can check the @@ -196,7 +216,6 @@ On the next sync event it should change to `Running`. However, as it is in fact a new resource for K8s, the UID will differ which can trigger a rolling update of the pods because the UID is used as part of backup path to S3. - ## Role-based access control for the operator The manifest [`operator-service-account-rbac.yaml`](../manifests/operator-service-account-rbac.yaml) @@ -393,21 +412,24 @@ spec: ## Custom Pod Environment Variables -It is possible to configure a ConfigMap as well as a Secret which are used by the Postgres pods as -an additional provider for environment variables. One use case is to customize -the Spilo image and configure it with environment variables. Another case could be to provide custom -cloud provider or backup settings. -In general the Operator will give preference to the globally configured variables, to not have the custom -ones interfere with core functionality. Variables with the 'WAL_' and 'LOG_' prefix can be overwritten though, to allow -backup and logshipping to be specified differently. +It is possible to configure a ConfigMap as well as a Secret which are used by +the Postgres pods as an additional provider for environment variables. One use +case is a customized Spilo image configured by extra environment variables. +Another case could be to provide custom cloud provider or backup settings. +In general the Operator will give preference to the globally configured +variables, to not have the custom ones interfere with core functionality. +Variables with the 'WAL_' and 'LOG_' prefix can be overwritten though, to +allow backup and log shipping to be specified differently. ### Via ConfigMap -The ConfigMap with the additional settings is referenced in the operator's main configuration. -A namespace can be specified along with the name. If left out, the configured -default namespace of your K8s client will be used and if the ConfigMap is not -found there, the Postgres cluster's namespace is taken when different: + +The ConfigMap with the additional settings is referenced in the operator's +main configuration. A namespace can be specified along with the name. If left +out, the configured default namespace of your K8s client will be used and if +the ConfigMap is not found there, the Postgres cluster's namespace is taken +when different: **postgres-operator ConfigMap** @@ -446,15 +468,15 @@ data: MY_CUSTOM_VAR: value ``` -The key-value pairs of the ConfigMap are then added as environment variables to the -Postgres StatefulSet/pods. - +The key-value pairs of the ConfigMap are then added as environment variables +to the Postgres StatefulSet/pods. ### Via Secret -The Secret with the additional variables is referenced in the operator's main configuration. -To protect the values of the secret from being exposed in the pod spec they are each referenced -as SecretKeyRef. -This does not allow for the secret to be in a different namespace as the pods though + +The Secret with the additional variables is referenced in the operator's main +configuration. To protect the values of the secret from being exposed in the +pod spec they are each referenced as SecretKeyRef. This does not allow for the +secret to be in a different namespace as the pods though **postgres-operator ConfigMap** @@ -493,8 +515,8 @@ data: MY_CUSTOM_VAR: dmFsdWU= ``` -The key-value pairs of the Secret are all accessible as environment variables to the -Postgres StatefulSet/pods. +The key-value pairs of the Secret are all accessible as environment variables +to the Postgres StatefulSet/pods. ## Limiting the number of min and max instances in clusters @@ -503,8 +525,8 @@ instances permitted by each Postgres cluster managed by the operator. If either `min_instances` or `max_instances` is set to a non-zero value, the operator may adjust the number of instances specified in the cluster manifest to match either the min or the max boundary. For instance, of a cluster manifest has 1 -instance and the `min_instances` is set to 3, the cluster will be created with 3 -instances. By default, both parameters are set to `-1`. +instance and the `min_instances` is set to 3, the cluster will be created with +3 instances. By default, both parameters are set to `-1`. ## Load balancers and allowed IP ranges @@ -579,59 +601,6 @@ maintaining and troubleshooting, and (c) additional teams, superuser teams or members associated with the owning team. The latter is managed via the [PostgresTeam CRD](user.md#additional-teams-and-members-per-cluster). - -## Understanding rolling update of Spilo pods - -The operator logs reasons for a rolling update with the `info` level and a diff -between the old and new StatefulSet specs with the `debug` level. To benefit -from numerous escape characters in the latter log entry, view it in CLI with -`echo -e`. Note that the resultant message will contain some noise because the -`PodTemplate` used by the operator is yet to be updated with the default values -used internally in K8s. - -The operator also support lazy updates of the Spilo image. That means the pod -template of a PG cluster's stateful set is updated immediately with the new -image, but no rolling update follows. This feature saves you a switchover - and -hence downtime - when you know pods are re-started later anyway, for instance -due to the node rotation. To force a rolling update, disable this mode by -setting the `enable_lazy_spilo_upgrade` to `false` in the operator configuration -and restart the operator pod. With the standard eager rolling updates the -operator checks during Sync all pods run images specified in their respective -statefulsets. The operator triggers a rolling upgrade for PG clusters that -violate this condition. - -## Logical backups - -The operator can manage K8s cron jobs to run logical backups of Postgres -clusters. The cron job periodically spawns a batch job that runs a single pod. -The backup script within this pod's container can connect to a DB for a logical -backup. The operator updates cron jobs during Sync if the job schedule changes; -the job name acts as the job identifier. These jobs are to be enabled for each -individual Postgres cluster by setting `enableLogicalBackup: true` in its -manifest. Notes: - -1. The [example image](../docker/logical-backup/Dockerfile) implements the -backup via `pg_dumpall` and upload of compressed and encrypted results to an S3 -bucket; the default image ``registry.opensource.zalan.do/acid/logical-backup`` -is the same image built with the Zalando-internal CI pipeline. `pg_dumpall` -requires a `superuser` access to a DB and runs on the replica when possible. - -2. Due to the [limitation of K8s cron jobs](https://kubernetes.io/docs/concepts/workloads/controllers/cron-jobs/#cron-job-limitations) -it is highly advisable to set up additional monitoring for this feature; such -monitoring is outside of the scope of operator responsibilities. - -3. The operator does not remove old backups. - -4. You may use your own image by overwriting the relevant field in the operator -configuration. Any such image must ensure the logical backup is able to finish -[in presence of pod restarts](https://kubernetes.io/docs/concepts/workloads/controllers/jobs-run-to-completion/#handling-pod-and-container-failures) -and [simultaneous invocations](https://kubernetes.io/docs/concepts/workloads/controllers/cron-jobs/#cron-job-limitations) -of the backup cron job. - -5. For that feature to work, your RBAC policy must enable operations on the -`cronjobs` resource from the `batch` API group for the operator service account. -See [example RBAC](../manifests/operator-service-account-rbac.yaml) - ## Access to cloud resources from clusters in non-cloud environment To access cloud resources like S3 from a cluster on bare metal you can use @@ -649,26 +618,127 @@ A secret can be pre-provisioned in different ways: * Automatically provisioned via a custom K8s controller like [kube-aws-iam-controller](https://github.com/mikkeloscar/kube-aws-iam-controller) -## Google Cloud Platform setup +## WAL archiving and physical basebackups + +Spilo is shipped with [WAL-E](https://github.com/wal-e/wal-e) and its successor +[WAL-G](https://github.com/wal-g/wal-g) to perform WAL archiving. By default, +WAL-E is used for backups because it is more battle-tested. In addition to the +continuous backup stream WAL-E/G pushes a physical base backup every night and +01:00 am UTC. + +These are the pre-configured settings in the docker image: +```bash +BACKUP_NUM_TO_RETAIN: 5 +BACKUP_SCHEDULE: '00 01 * * *' +USE_WALG_BACKUP: false (true for Azure and SSH) +USE_WALG_RESTORE: false (true for S3, Azure and SSH) +``` + +Within Postgres you can check the pre-configured commands for archiving and +restoring WAL files. You can find the log files to the respective commands +under `$HOME/pgdata/pgroot/pg_log/postgres-?.log`. + +```bash +archive_command: `envdir "{WALE_ENV_DIR}" {WALE_BINARY} wal-push "%p"` +restore_command: `envdir "{{WALE_ENV_DIR}}" /scripts/restore_command.sh "%f" "%p"` +``` + +You can produce a basebackup manually with the following command and check +if it ends up in your specified WAL backup path: -To configure the operator on GCP there are some prerequisites that are needed: +```bash +envdir "/run/etc/wal-e.d/env" /scripts/postgres_backup.sh "/home/postgres/pgdata/pgroot/data" +``` + +Depending on the cloud storage provider different [environment variables](https://github.com/zalando/spilo/blob/master/ENVIRONMENT.rst) +have to be set for Spilo. Not all of them are generated automatically by the +operator by changing its configuration. In this case you have to use an +[extra configmap or secret](#custom-pod-environment-variables). + +### Using AWS S3 or compliant services + +When using AWS you have to reference the S3 backup path, the IAM role and the +AWS region in the configuration. + +**postgres-operator ConfigMap** + +```yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: postgres-operator +data: + aws_region: eu-central-1 + kube_iam_role: postgres-pod-role + wal_s3_bucket: your-backup-path +``` + +**OperatorConfiguration** + +```yaml +apiVersion: "acid.zalan.do/v1" +kind: OperatorConfiguration +metadata: + name: postgresql-operator-configuration +configuration: + aws_or_gcp: + aws_region: eu-central-1 + kube_iam_role: postgres-pod-role + wal_s3_bucket: your-backup-path +``` + +The referenced IAM role should contain the following privileges to make sure +Postgres can send compressed WAL files to the given S3 bucket: + +```yaml + PostgresPodRole: + Type: "AWS::IAM::Role" + Properties: + RoleName: "postgres-pod-role" + Path: "/" + Policies: + - PolicyName: "SpiloS3Access" + PolicyDocument: + Version: "2012-10-17" + Statement: + - Action: "s3:*" + Effect: "Allow" + Resource: + - "arn:aws:s3:::your-backup-path" + - "arn:aws:s3:::your-backup-path/*" +``` + +This should produce the following settings for the essential environment +variables: + +```bash +AWS_ENDPOINT='https://s3.eu-central-1.amazonaws.com:443' +WALE_S3_ENDPOINT='https+path://s3.eu-central-1.amazonaws.com:443' +WALE_S3_PREFIX=$WAL_S3_BUCKET/spilo/{WAL_BUCKET_SCOPE_PREFIX}{SCOPE}{WAL_BUCKET_SCOPE_SUFFIX}/wal/{PGVERSION} +``` + +If the prefix is not specified Spilo will generate it from `WAL_S3_BUCKET`. +When the `AWS_REGION` is set `AWS_ENDPOINT` and `WALE_S3_ENDPOINT` are +generated automatically. `WALG_S3_PREFIX` is identical to `WALE_S3_PREFIX`. +`SCOPE` is the Postgres cluster name. + +### Google Cloud Platform setup + +To configure the operator on GCP these prerequisites that are needed: * A service account with the proper IAM setup to access the GCS bucket for the WAL-E logs * The credentials file for the service account. -The configuration paramaters that we will be using are: +The configuration parameters that we will be using are: * `additional_secret_mount` * `additional_secret_mount_path` * `gcp_credentials` * `wal_gs_bucket` -### Generate a K8s secret resource - -Generate the K8s secret resource that will contain your service account's +1. Generate the K8s secret resource that will contain your service account's credentials. It's highly recommended to use a service account and limit its scope to just the WAL-E bucket. - ```yaml apiVersion: v1 kind: Secret @@ -681,11 +751,9 @@ stringData: ``` -### Setup your operator configuration values - -With the `psql-wale-creds` resource applied to your cluster, ensure that -the operator's configuration is set up like the following: - +2. Setup your operator configuration values. With the `psql-wale-creds` +resource applied to your cluster, ensure that the operator's configuration +is set up like the following: ```yml ... aws_or_gcp: @@ -700,9 +768,8 @@ aws_or_gcp: ... ``` -### Setup pod environment configmap - -To make postgres-operator work with GCS, use following configmap: +3. Setup pod environment configmap that instructs the operator to use WAL-G, +instead of WAL-E, for backup and restore. ```yml apiVersion: v1 kind: ConfigMap @@ -715,9 +782,8 @@ data: USE_WALG_RESTORE: "true" CLONE_USE_WALG_RESTORE: "true" ``` -This configmap will instruct operator to use WAL-G, instead of WAL-E, for backup and restore. -Then provide this configmap in postgres-operator settings: +4. Then provide this configmap in postgres-operator settings: ```yml ... # namespaced name of the ConfigMap with environment variables to populate on every pod @@ -725,6 +791,62 @@ pod_environment_configmap: "postgres-operator-system/pod-env-overrides" ... ``` +### Restoring physical backups + +If cluster members have to be (re)initialized restoring physical backups +happens automatically either from the backup location or by running +[pg_basebackup](https://www.postgresql.org/docs/13/app-pgbasebackup.html) +on one of the other running instances (preferably replicas if they do not lag +behind). You can test restoring backups by [cloning](user.md#how-to-clone-an-existing-postgresql-cluster) +clusters. + +## Logical backups + +The operator can manage K8s cron jobs to run logical backups (SQL dumps) of +Postgres clusters. The cron job periodically spawns a batch job that runs a +single pod. The backup script within this pod's container can connect to a DB +for a logical backup. The operator updates cron jobs during Sync if the job +schedule changes; the job name acts as the job identifier. These jobs are to +be enabled for each individual Postgres cluster by updating the manifest: + +```yaml +apiVersion: "acid.zalan.do/v1" +kind: postgresql +metadata: + name: demo-cluster +spec: + enableLogicalBackup: true +``` + +There a few things to consider when using logical backups: + +1. Logical backups should not be seen as a proper alternative to basebackups +and WAL archiving which are described above. At the moment, the operator cannot +restore logical backups automatically and you do not get point-in-time recovery +but only snapshots of your data. In its current state, see logical backups as a +way to quickly create SQL dumps that you can easily restore in an empty test +cluster. + +2. The [example image](../docker/logical-backup/Dockerfile) implements the backup +via `pg_dumpall` and upload of compressed and encrypted results to an S3 bucket. +`pg_dumpall` requires a `superuser` access to a DB and runs on the replica when +possible. + +3. Due to the [limitation of K8s cron jobs](https://kubernetes.io/docs/concepts/workloads/controllers/cron-jobs/#cron-job-limitations) +it is highly advisable to set up additional monitoring for this feature; such +monitoring is outside of the scope of operator responsibilities. + +4. The operator does not remove old backups. + +5. You may use your own image by overwriting the relevant field in the operator +configuration. Any such image must ensure the logical backup is able to finish +[in presence of pod restarts](https://kubernetes.io/docs/concepts/workloads/controllers/jobs-run-to-completion/#handling-pod-and-container-failures) +and [simultaneous invocations](https://kubernetes.io/docs/concepts/workloads/controllers/cron-jobs/#cron-job-limitations) +of the backup cron job. + +6. For that feature to work, your RBAC policy must enable operations on the +`cronjobs` resource from the `batch` API group for the operator service account. +See [example RBAC](../manifests/operator-service-account-rbac.yaml) ## Sidecars for Postgres clusters @@ -739,6 +861,7 @@ configuration: name: global-sidecar ports: - containerPort: 80 + protocol: TCP volumeMounts: - mountPath: /custom-pgdata-mountpoint name: pgdata @@ -814,7 +937,7 @@ make docker # build in image in minikube docker env eval $(minikube docker-env) -docker build -t registry.opensource.zalan.do/acid/postgres-operator-ui:v1.3.0 . +docker build -t registry.opensource.zalan.do/acid/postgres-operator-ui:v1.6.1 . # apply UI manifests next to a running Postgres Operator kubectl apply -f manifests/ diff --git a/docs/reference/operator_parameters.md b/docs/reference/operator_parameters.md index 54d13ffc2..eb1d855b7 100644 --- a/docs/reference/operator_parameters.md +++ b/docs/reference/operator_parameters.md @@ -565,7 +565,7 @@ grouped under the `logical_backup` key. runs `pg_dumpall` on a replica if possible and uploads compressed results to an S3 bucket under the key `/spilo/pg_cluster_name/cluster_k8s_uuid/logical_backups`. The default image is the same image built with the Zalando-internal CI - pipeline. Default: "registry.opensource.zalan.do/acid/logical-backup:v1.6.0" + pipeline. Default: "registry.opensource.zalan.do/acid/logical-backup:v1.6.1" * **logical_backup_google_application_credentials** Specifies the path of the google cloud service account json file. Default is empty. diff --git a/docs/user.md b/docs/user.md index ec5941d9e..8ba649bdd 100644 --- a/docs/user.md +++ b/docs/user.md @@ -30,7 +30,7 @@ spec: databases: foo: zalando postgresql: - version: "12" + version: "13" ``` Once you cloned the Postgres Operator [repository](https://github.com/zalando/postgres-operator) @@ -509,6 +509,25 @@ spec: defaultUsers: true ``` +### Schema `search_path` for default roles + +The schema [`search_path`](https://www.postgresql.org/docs/13/ddl-schemas.html#DDL-SCHEMAS-PATH) +for each role will include the role name and the schemas, this role should have +access to. So `foo_bar_writer` does not have to schema-qualify tables from +schemas `foo_bar_writer, bar`, while `foo_writer` can look up `foo_writer` and +any schema listed under `schemas`. To register the default `public` schema in +the `search_path` (because some extensions are installed there) one has to add +the following (assuming no extra roles are desired only for the public schema): + +```yaml +spec: + preparedDatabases: + foo: + schemas: + public: + defaultRoles: false +``` + ### Database extensions Prepared databases also allow for creating Postgres extensions. They will be @@ -625,6 +644,10 @@ spec: - pci ``` +## In-place major version upgrade + +Starting with Spilo 13, operator supports in-place major version upgrade to a higher major version (e.g. from PG 10 to PG 12). To trigger the upgrade, simply increase the version in the manifest. It is your responsibility to test your applications against the new version before the upgrade; downgrading is not supported. The easiest way to do so is to try the upgrade on the cloned cluster first. For details of how Spilo does the upgrade [see here](https://github.com/zalando/spilo/pull/488), operator implementation is described [in the admin docs](administrator.md#minor-and-major-version-upgrade). + ## How to clone an existing PostgreSQL cluster You can spin up a new cluster as a clone of the existing one, using a `clone` @@ -636,10 +659,6 @@ section in the spec. There are two options here: Note, that cloning can also be used for [major version upgrades](administrator.md#minor-and-major-version-upgrade) of PostgreSQL. -## In-place major version upgrade - -Starting with Spilo 13, operator supports in-place major version upgrade to a higher major version (e.g. from PG 10 to PG 12). To trigger the upgrade, simply increase the version in the manifest. It is your responsibility to test your applications against the new version before the upgrade; downgrading is not supported. The easiest way to do so is to try the upgrade on the cloned cluster first. For details of how Spilo does the upgrade [see here](https://github.com/zalando/spilo/pull/488), operator implementation is described [in the admin docs](administrator.md#minor-and-major-version-upgrade). - ### Clone from S3 Cloning from S3 has the advantage that there is no impact on your production @@ -687,7 +706,8 @@ spec: ### Clone directly -Another way to get a fresh copy of your source DB cluster is via basebackup. To +Another way to get a fresh copy of your source DB cluster is via +[pg_basebackup](https://www.postgresql.org/docs/13/app-pgbasebackup.html). To use this feature simply leave out the timestamp field from the clone section. The operator will connect to the service of the source cluster by name. If the cluster is called test, then the connection string will look like host=test @@ -875,8 +895,8 @@ size of volumes that correspond to the previously running pods is not changed. ## Logical backups -You can enable logical backups from the cluster manifest by adding the following -parameter in the spec section: +You can enable logical backups (SQL dumps) from the cluster manifest by adding +the following parameter in the spec section: ```yaml spec: diff --git a/manifests/complete-postgres-manifest.yaml b/manifests/complete-postgres-manifest.yaml index 9f2d19639..7f7b34d98 100644 --- a/manifests/complete-postgres-manifest.yaml +++ b/manifests/complete-postgres-manifest.yaml @@ -9,7 +9,7 @@ metadata: # "delete-date": "2020-08-31" # can only be deleted on that day if "delete-date "key is configured # "delete-clustername": "acid-test-cluster" # can only be deleted when name matches if "delete-clustername" key is configured spec: - dockerImage: registry.opensource.zalan.do/acid/spilo-13:2.0-p2 + dockerImage: registry.opensource.zalan.do/acid/spilo-13:2.0-p4 teamId: "acid" numberOfInstances: 2 users: # Application/Robot users @@ -148,18 +148,22 @@ spec: image: busybox command: [ "/bin/date" ] # sidecars: -# - name: "telegraf-sidecar" -# image: "telegraf:latest" -# resources: -# limits: -# cpu: 500m -# memory: 500Mi -# requests: -# cpu: 100m -# memory: 100Mi -# env: -# - name: "USEFUL_VAR" -# value: "perhaps-true" +# - name: "telegraf-sidecar" +# image: "telegraf:latest" +# ports: +# name: metrics +# containerPort: 8094 +# protocol: TCP +# resources: +# limits: +# cpu: 500m +# memory: 500Mi +# requests: +# cpu: 100m +# memory: 100Mi +# env: +# - name: "USEFUL_VAR" +# value: "perhaps-true" # Custom TLS certificate. Disabled unless tls.secretName has a value. tls: diff --git a/manifests/configmap.yaml b/manifests/configmap.yaml index f1bde6811..c35344b4d 100644 --- a/manifests/configmap.yaml +++ b/manifests/configmap.yaml @@ -16,7 +16,7 @@ data: # connection_pooler_default_cpu_request: "500m" # connection_pooler_default_memory_limit: 100Mi # connection_pooler_default_memory_request: 100Mi - connection_pooler_image: "registry.opensource.zalan.do/acid/pgbouncer:master-12" + connection_pooler_image: "registry.opensource.zalan.do/acid/pgbouncer:master-14" # connection_pooler_max_db_connections: 60 # connection_pooler_mode: "transaction" # connection_pooler_number_of_instances: 2 @@ -32,7 +32,7 @@ data: # default_memory_request: 100Mi # delete_annotation_date_key: delete-date # delete_annotation_name_key: delete-clustername - docker_image: registry.opensource.zalan.do/acid/spilo-13:2.0-p2 + docker_image: registry.opensource.zalan.do/acid/spilo-13:2.0-p4 # downscaler_annotations: "deployment-time,downscaler/*" # enable_admin_role_for_users: "true" # enable_crd_validation: "true" @@ -63,7 +63,7 @@ data: # inherited_labels: application,environment # kube_iam_role: "" # log_s3_bucket: "" - logical_backup_docker_image: "registry.opensource.zalan.do/acid/logical-backup:v1.6.0" + logical_backup_docker_image: "registry.opensource.zalan.do/acid/logical-backup:v1.6.1" # logical_backup_google_application_credentials: "" logical_backup_job_prefix: "logical-backup-" logical_backup_provider: "s3" @@ -125,4 +125,4 @@ data: # wal_gs_bucket: "" # wal_s3_bucket: "" watched_namespace: "*" # listen to all namespaces - workers: "16" + workers: "8" diff --git a/manifests/minimal-fake-pooler-deployment.yaml b/manifests/minimal-fake-pooler-deployment.yaml index 5ee8cf05f..823b4f24d 100644 --- a/manifests/minimal-fake-pooler-deployment.yaml +++ b/manifests/minimal-fake-pooler-deployment.yaml @@ -23,7 +23,7 @@ spec: serviceAccountName: postgres-operator containers: - name: postgres-operator - image: registry.opensource.zalan.do/acid/pgbouncer:master-12 + image: registry.opensource.zalan.do/acid/pgbouncer:master-14 imagePullPolicy: IfNotPresent resources: requests: diff --git a/manifests/operatorconfiguration.crd.yaml b/manifests/operatorconfiguration.crd.yaml index 7388a765b..227ce6689 100644 --- a/manifests/operatorconfiguration.crd.yaml +++ b/manifests/operatorconfiguration.crd.yaml @@ -61,7 +61,7 @@ spec: properties: docker_image: type: string - default: "registry.opensource.zalan.do/acid/spilo-13:2.0-p2" + default: "registry.opensource.zalan.do/acid/spilo-13:2.0-p4" enable_crd_validation: type: boolean default: true @@ -378,7 +378,7 @@ spec: properties: logical_backup_docker_image: type: string - default: "registry.opensource.zalan.do/acid/logical-backup:v1.6.0" + default: "registry.opensource.zalan.do/acid/logical-backup:v1.6.1" logical_backup_google_application_credentials: type: string logical_backup_job_prefix: @@ -507,7 +507,7 @@ spec: default: "pooler" connection_pooler_image: type: string - default: "registry.opensource.zalan.do/acid/pgbouncer:master-12" + default: "registry.opensource.zalan.do/acid/pgbouncer:master-14" connection_pooler_max_db_connections: type: integer default: 60 diff --git a/manifests/postgres-operator.yaml b/manifests/postgres-operator.yaml index a03959805..5c134345b 100644 --- a/manifests/postgres-operator.yaml +++ b/manifests/postgres-operator.yaml @@ -19,7 +19,7 @@ spec: serviceAccountName: postgres-operator containers: - name: postgres-operator - image: registry.opensource.zalan.do/acid/postgres-operator:v1.6.0 + image: registry.opensource.zalan.do/acid/postgres-operator:v1.6.1 imagePullPolicy: IfNotPresent resources: requests: diff --git a/manifests/postgresql-operator-default-configuration.yaml b/manifests/postgresql-operator-default-configuration.yaml index 18680fbb0..0c0daa924 100644 --- a/manifests/postgresql-operator-default-configuration.yaml +++ b/manifests/postgresql-operator-default-configuration.yaml @@ -3,7 +3,7 @@ kind: OperatorConfiguration metadata: name: postgresql-operator-default-configuration configuration: - docker_image: registry.opensource.zalan.do/acid/spilo-13:2.0-p2 + docker_image: registry.opensource.zalan.do/acid/spilo-13:2.0-p4 # enable_crd_validation: true # enable_lazy_spilo_upgrade: false enable_pgversion_env_var: true @@ -21,6 +21,7 @@ configuration: # name: global-sidecar-1 # ports: # - containerPort: 80 + # protocol: TCP workers: 8 users: replication_username: standby @@ -117,7 +118,7 @@ configuration: # wal_gs_bucket: "" # wal_s3_bucket: "" logical_backup: - logical_backup_docker_image: "registry.opensource.zalan.do/acid/logical-backup:v1.6.0" + logical_backup_docker_image: "registry.opensource.zalan.do/acid/logical-backup:v1.6.1" # logical_backup_google_application_credentials: "" logical_backup_job_prefix: "logical-backup-" logical_backup_provider: "s3" @@ -156,7 +157,7 @@ configuration: connection_pooler_default_cpu_request: "500m" connection_pooler_default_memory_limit: 100Mi connection_pooler_default_memory_request: 100Mi - connection_pooler_image: "registry.opensource.zalan.do/acid/pgbouncer:master-9" + connection_pooler_image: "registry.opensource.zalan.do/acid/pgbouncer:master-14" # connection_pooler_max_db_connections: 60 connection_pooler_mode: "transaction" connection_pooler_number_of_instances: 2 diff --git a/pkg/controller/operator_config.go b/pkg/controller/operator_config.go index 6ef7a2f42..dbdd4afb4 100644 --- a/pkg/controller/operator_config.go +++ b/pkg/controller/operator_config.go @@ -39,7 +39,7 @@ func (c *Controller) importConfigurationFromCRD(fromCRD *acidv1.OperatorConfigur result.EnableSpiloWalPathCompat = fromCRD.EnableSpiloWalPathCompat result.EtcdHost = fromCRD.EtcdHost result.KubernetesUseConfigMaps = fromCRD.KubernetesUseConfigMaps - result.DockerImage = util.Coalesce(fromCRD.DockerImage, "registry.opensource.zalan.do/acid/spilo-13:2.0-p2") + result.DockerImage = util.Coalesce(fromCRD.DockerImage, "registry.opensource.zalan.do/acid/spilo-13:2.0-p4") result.Workers = util.CoalesceUInt32(fromCRD.Workers, 8) result.MinInstances = fromCRD.MinInstances result.MaxInstances = fromCRD.MaxInstances @@ -146,7 +146,7 @@ func (c *Controller) importConfigurationFromCRD(fromCRD *acidv1.OperatorConfigur // logical backup config result.LogicalBackupSchedule = util.Coalesce(fromCRD.LogicalBackup.Schedule, "30 00 * * *") - result.LogicalBackupDockerImage = util.Coalesce(fromCRD.LogicalBackup.DockerImage, "registry.opensource.zalan.do/acid/logical-backup:v1.6.0") + result.LogicalBackupDockerImage = util.Coalesce(fromCRD.LogicalBackup.DockerImage, "registry.opensource.zalan.do/acid/logical-backup:v1.6.1") result.LogicalBackupProvider = util.Coalesce(fromCRD.LogicalBackup.BackupProvider, "s3") result.LogicalBackupS3Bucket = fromCRD.LogicalBackup.S3Bucket result.LogicalBackupS3Region = fromCRD.LogicalBackup.S3Region diff --git a/pkg/util/config/config.go b/pkg/util/config/config.go index 4f4940567..93fceff01 100644 --- a/pkg/util/config/config.go +++ b/pkg/util/config/config.go @@ -113,7 +113,7 @@ type Scalyr struct { // LogicalBackup defines configuration for logical backup type LogicalBackup struct { LogicalBackupSchedule string `name:"logical_backup_schedule" default:"30 00 * * *"` - LogicalBackupDockerImage string `name:"logical_backup_docker_image" default:"registry.opensource.zalan.do/acid/logical-backup:v1.6.0"` + LogicalBackupDockerImage string `name:"logical_backup_docker_image" default:"registry.opensource.zalan.do/acid/logical-backup:v1.6.1"` LogicalBackupProvider string `name:"logical_backup_provider" default:"s3"` LogicalBackupS3Bucket string `name:"logical_backup_s3_bucket" default:""` LogicalBackupS3Region string `name:"logical_backup_s3_region" default:""` @@ -151,7 +151,7 @@ type Config struct { WatchedNamespace string `name:"watched_namespace"` // special values: "*" means 'watch all namespaces', the empty string "" means 'watch a namespace where operator is deployed to' KubernetesUseConfigMaps bool `name:"kubernetes_use_configmaps" default:"false"` EtcdHost string `name:"etcd_host" default:""` // special values: the empty string "" means Patroni will use K8s as a DCS - DockerImage string `name:"docker_image" default:"registry.opensource.zalan.do/acid/spilo-13:2.0-p2"` + DockerImage string `name:"docker_image" default:"registry.opensource.zalan.do/acid/spilo-13:2.0-p4"` SidecarImages map[string]string `name:"sidecar_docker_images"` // deprecated in favour of SidecarContainers SidecarContainers []v1.Container `name:"sidecars"` PodServiceAccountName string `name:"pod_service_account_name" default:"postgres-pod"`