From db96273ab3871e7f55305bc864abfb73f1962777 Mon Sep 17 00:00:00 2001 From: DavidSpek Date: Tue, 16 May 2023 11:17:06 +0200 Subject: [PATCH 1/3] Init add liveness probe Signed-off-by: DavidSpek --- .../crds/operatorconfigurations.yaml | 133 +++++++++++++++++ .../postgres-operator/crds/postgresqls.yaml | 134 ++++++++++++++++++ charts/postgres-operator/values.yaml | 13 ++ docs/reference/cluster_manifest.md | 4 + docs/reference/operator_parameters.md | 4 + manifests/configmap.yaml | 10 ++ manifests/operatorconfiguration.crd.yaml | 133 +++++++++++++++++ ...gresql-operator-default-configuration.yaml | 10 ++ manifests/postgresql.crd.yaml | 134 ++++++++++++++++++ .../v1/operator_configuration_type.go | 1 + pkg/apis/acid.zalan.do/v1/postgresql_type.go | 1 + pkg/cluster/k8sres.go | 15 ++ pkg/controller/operator_config.go | 1 + pkg/util/config/config.go | 1 + 14 files changed, 594 insertions(+) diff --git a/charts/postgres-operator/crds/operatorconfigurations.yaml b/charts/postgres-operator/crds/operatorconfigurations.yaml index 46d135fa5..9b5bf0875 100644 --- a/charts/postgres-operator/crds/operatorconfigurations.yaml +++ b/charts/postgres-operator/crds/operatorconfigurations.yaml @@ -260,6 +260,139 @@ spec: type: array items: type: string + liveness_probe: + description: Probe describes a health check to be performed against + a container to determine whether it is alive or ready to receive + traffic. + properties: + exec: + description: One and only one of the following should be specified. + Exec specifies the action to take. + properties: + command: + description: Command is the command line to execute inside + the container, the working directory for the command is + root ('/') in the container's filesystem. The command + is simply exec'd, it is not run inside a shell, so traditional + shell instructions ('|', etc) won't work. To use a shell, + you need to explicitly call out to that shell. Exit + status of 0 is treated as live/healthy and non-zero + is unhealthy. + items: + type: string + type: array + type: object + failureThreshold: + description: Minimum consecutive failures for the probe to + be considered failed after having succeeded. Defaults to + 3. Minimum value is 1. + format: int32 + type: integer + httpGet: + description: HTTPGet specifies the http request to perform. + properties: + host: + description: Host name to connect to, defaults to the + pod IP. You probably want to set "Host" in httpHeaders + instead. + type: string + httpHeaders: + description: Custom headers to set in the request. HTTP + allows repeated headers. + items: + description: HTTPHeader describes a custom header to + be used in HTTP probes + properties: + name: + description: The header field name + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + path: + description: Path to access on the HTTP server. + type: string + port: + anyOf: + - type: integer + - type: string + description: Name or number of the port to access on the + container. Number must be in the range 1 to 65535. Name + must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: 'Number of seconds after the container has started + before liveness probes are initiated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + format: int32 + type: integer + periodSeconds: + description: How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: Minimum consecutive successes for the probe to + be considered successful after having failed. Defaults to + 1. Must be 1 for liveness and startup. Minimum value is + 1. + format: int32 + type: integer + tcpSocket: + description: 'TCPSocket specifies an action involving a TCP + port. TCP hooks not yet supported TODO: implement a realistic + TCP lifecycle hook' + properties: + host: + description: 'Optional: Host name to connect to, defaults + to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: Number or name of the port to access on the + container. Number must be in the range 1 to 65535. Name + must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: Optional duration in seconds the pod needs to + terminate gracefully upon probe failure. The grace period + is the duration in seconds after the processes running in + the pod are sent a termination signal and the time when + the processes are forcibly halted with a kill signal. Set + this value longer than the expected cleanup time for your + process. If this value is nil, the pod's terminationGracePeriodSeconds + will be used. Otherwise, this value overrides the value + provided by the pod spec. Value must be non-negative integer. + The value zero indicates stop immediately via the kill signal + (no opportunity to shut down). This is a beta field and + requires enabling ProbeTerminationGracePeriod feature gate. + Minimum value is 1. spec.terminationGracePeriodSeconds is + used if unset. + format: int64 + type: integer + timeoutSeconds: + description: 'Number of seconds after which the probe times + out. Defaults to 1 second. Minimum value is 1. More info: + https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + format: int32 + type: integer + type: object master_pod_move_timeout: type: string default: "20m" diff --git a/charts/postgres-operator/crds/postgresqls.yaml b/charts/postgres-operator/crds/postgresqls.yaml index 6f938cf8f..ba4683a71 100644 --- a/charts/postgres-operator/crds/postgresqls.yaml +++ b/charts/postgres-operator/crds/postgresqls.yaml @@ -202,6 +202,140 @@ spec: items: type: object x-kubernetes-preserve-unknown-fields: true + livenessProbe: + description: 'Periodic probe of container liveness. Container + will be restarted if the probe fails. Cannot be updated. More + info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + properties: + exec: + description: One and only one of the following should be + specified. Exec specifies the action to take. + properties: + command: + description: Command is the command line to execute + inside the container, the working directory for the + command is root ('/') in the container's filesystem. + The command is simply exec'd, it is not run inside + a shell, so traditional shell instructions ('|', etc) + won't work. To use a shell, you need to explicitly + call out to that shell. Exit status of 0 is treated + as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + type: object + failureThreshold: + description: Minimum consecutive failures for the probe + to be considered failed after having succeeded. Defaults + to 3. Minimum value is 1. + format: int32 + type: integer + httpGet: + description: HTTPGet specifies the http request to perform. + properties: + host: + description: Host name to connect to, defaults to the + pod IP. You probably want to set "Host" in httpHeaders + instead. + type: string + httpHeaders: + description: Custom headers to set in the request. HTTP + allows repeated headers. + items: + description: HTTPHeader describes a custom header + to be used in HTTP probes + properties: + name: + description: The header field name + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + path: + description: Path to access on the HTTP server. + type: string + port: + anyOf: + - type: integer + - type: string + description: Name or number of the port to access on + the container. Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: 'Number of seconds after the container has + started before liveness probes are initiated. More info: + https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + format: int32 + type: integer + periodSeconds: + description: How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: Minimum consecutive successes for the probe + to be considered successful after having failed. Defaults + to 1. Must be 1 for liveness and startup. Minimum value + is 1. + format: int32 + type: integer + tcpSocket: + description: 'TCPSocket specifies an action involving a + TCP port. TCP hooks not yet supported TODO: implement + a realistic TCP lifecycle hook' + properties: + host: + description: 'Optional: Host name to connect to, defaults + to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: Number or name of the port to access on + the container. Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: Optional duration in seconds the pod needs + to terminate gracefully upon probe failure. The grace + period is the duration in seconds after the processes + running in the pod are sent a termination signal and the + time when the processes are forcibly halted with a kill + signal. Set this value longer than the expected cleanup + time for your process. If this value is nil, the pod's + terminationGracePeriodSeconds will be used. Otherwise, + this value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates + stop immediately via the kill signal (no opportunity to + shut down). This is a beta field and requires enabling + ProbeTerminationGracePeriod feature gate. Minimum value + is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: + description: 'Number of seconds after which the probe times + out. Defaults to 1 second. Minimum value is 1. More info: + https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + format: int32 + type: integer + type: object init_containers: type: array description: deprecated diff --git a/charts/postgres-operator/values.yaml b/charts/postgres-operator/values.yaml index 3032cd618..3cbc10ba1 100644 --- a/charts/postgres-operator/values.yaml +++ b/charts/postgres-operator/values.yaml @@ -205,6 +205,19 @@ configKubernetes: # whether the Spilo container should run with additional permissions other than parent. # required by cron which needs setuid spilo_allow_privilege_escalation: true + + # liveness probe for the spilo pod + # liveness_probe: + # httpGet: + # scheme: HTTP + # path: /liveness + # port: 8008 + # initialDelaySeconds: 10 + # periodSeconds: 10 + # timeoutSeconds: 5 + # successThreshold: 1 + # failureThreshold: 3 + # storage resize strategy, available options are: ebs, pvc, off or mixed storage_resize_mode: pvc # pod toleration assigned to instances of every Postgres cluster diff --git a/docs/reference/cluster_manifest.md b/docs/reference/cluster_manifest.md index 8caf55b94..a7706f9cd 100644 --- a/docs/reference/cluster_manifest.md +++ b/docs/reference/cluster_manifest.md @@ -85,6 +85,10 @@ These parameters are grouped directly under the `spec` key in the manifest. requires a custom Spilo image. Note the FSGroup of a Pod cannot be changed without recreating a new Pod. Optional. +* **livenessProbe** + Allows for adding a liveness probe to the Spilo container to detect if it's + running properly. + * **enableMasterLoadBalancer** boolean flag to override the operator defaults (set by the `enable_master_load_balancer` parameter) to define whether to enable the load diff --git a/docs/reference/operator_parameters.md b/docs/reference/operator_parameters.md index 5818d817d..ef4876e06 100644 --- a/docs/reference/operator_parameters.md +++ b/docs/reference/operator_parameters.md @@ -469,6 +469,10 @@ configuration they are grouped under the `kubernetes` key. process. Required by cron which needs setuid. Without this parameter, certification rotation & backups will not be done. The default is `true`. +* **liveness_probe** + Allows for adding a liveness probe to the Spilo container to detect if it's + running properly. + * **additional_pod_capabilities** list of additional capabilities to be added to the postgres container's SecurityContext (e.g. SYS_NICE etc.). Please, make sure first that the diff --git a/manifests/configmap.yaml b/manifests/configmap.yaml index 2388c22b2..4d6d957fa 100644 --- a/manifests/configmap.yaml +++ b/manifests/configmap.yaml @@ -152,6 +152,16 @@ data: # spilo_runasgroup: 103 # spilo_fsgroup: 103 spilo_privileged: "false" + # liveness_probe: |- + # httpGet: + # scheme: HTTP + # path: /liveness + # port: 8008 + # initialDelaySeconds: 10 + # periodSeconds: 10 + # timeoutSeconds: 5 + # successThreshold: 1 + # failureThreshold: 3 storage_resize_mode: "pvc" super_username: postgres # target_major_version: "15" diff --git a/manifests/operatorconfiguration.crd.yaml b/manifests/operatorconfiguration.crd.yaml index f60d8426e..7bc8ca08f 100644 --- a/manifests/operatorconfiguration.crd.yaml +++ b/manifests/operatorconfiguration.crd.yaml @@ -258,6 +258,139 @@ spec: type: array items: type: string + liveness_probe: + description: Probe describes a health check to be performed against + a container to determine whether it is alive or ready to receive + traffic. + properties: + exec: + description: One and only one of the following should be specified. + Exec specifies the action to take. + properties: + command: + description: Command is the command line to execute inside + the container, the working directory for the command is + root ('/') in the container's filesystem. The command + is simply exec'd, it is not run inside a shell, so traditional + shell instructions ('|', etc) won't work. To use a shell, + you need to explicitly call out to that shell. Exit + status of 0 is treated as live/healthy and non-zero + is unhealthy. + items: + type: string + type: array + type: object + failureThreshold: + description: Minimum consecutive failures for the probe to + be considered failed after having succeeded. Defaults to + 3. Minimum value is 1. + format: int32 + type: integer + httpGet: + description: HTTPGet specifies the http request to perform. + properties: + host: + description: Host name to connect to, defaults to the + pod IP. You probably want to set "Host" in httpHeaders + instead. + type: string + httpHeaders: + description: Custom headers to set in the request. HTTP + allows repeated headers. + items: + description: HTTPHeader describes a custom header to + be used in HTTP probes + properties: + name: + description: The header field name + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + path: + description: Path to access on the HTTP server. + type: string + port: + anyOf: + - type: integer + - type: string + description: Name or number of the port to access on the + container. Number must be in the range 1 to 65535. Name + must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: 'Number of seconds after the container has started + before liveness probes are initiated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + format: int32 + type: integer + periodSeconds: + description: How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: Minimum consecutive successes for the probe to + be considered successful after having failed. Defaults to + 1. Must be 1 for liveness and startup. Minimum value is + 1. + format: int32 + type: integer + tcpSocket: + description: 'TCPSocket specifies an action involving a TCP + port. TCP hooks not yet supported TODO: implement a realistic + TCP lifecycle hook' + properties: + host: + description: 'Optional: Host name to connect to, defaults + to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: Number or name of the port to access on the + container. Number must be in the range 1 to 65535. Name + must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: Optional duration in seconds the pod needs to + terminate gracefully upon probe failure. The grace period + is the duration in seconds after the processes running in + the pod are sent a termination signal and the time when + the processes are forcibly halted with a kill signal. Set + this value longer than the expected cleanup time for your + process. If this value is nil, the pod's terminationGracePeriodSeconds + will be used. Otherwise, this value overrides the value + provided by the pod spec. Value must be non-negative integer. + The value zero indicates stop immediately via the kill signal + (no opportunity to shut down). This is a beta field and + requires enabling ProbeTerminationGracePeriod feature gate. + Minimum value is 1. spec.terminationGracePeriodSeconds is + used if unset. + format: int64 + type: integer + timeoutSeconds: + description: 'Number of seconds after which the probe times + out. Defaults to 1 second. Minimum value is 1. More info: + https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + format: int32 + type: integer + type: object master_pod_move_timeout: type: string default: "20m" diff --git a/manifests/postgresql-operator-default-configuration.yaml b/manifests/postgresql-operator-default-configuration.yaml index ad0f99da3..c39427d64 100644 --- a/manifests/postgresql-operator-default-configuration.yaml +++ b/manifests/postgresql-operator-default-configuration.yaml @@ -78,6 +78,16 @@ configuration: # inherited_labels: # - application # - environment + # liveness_probe: + # httpGet: + # scheme: HTTP + # path: /liveness + # port: 8008 + # initialDelaySeconds: 10 + # periodSeconds: 10 + # timeoutSeconds: 5 + # successThreshold: 1 + # failureThreshold: 3 master_pod_move_timeout: 20m # node_readiness_label: # status: ready diff --git a/manifests/postgresql.crd.yaml b/manifests/postgresql.crd.yaml index 6066abad1..001b8ed8b 100644 --- a/manifests/postgresql.crd.yaml +++ b/manifests/postgresql.crd.yaml @@ -200,6 +200,140 @@ spec: items: type: object x-kubernetes-preserve-unknown-fields: true + livenessProbe: + description: 'Periodic probe of container liveness. Container + will be restarted if the probe fails. Cannot be updated. More + info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + properties: + exec: + description: One and only one of the following should be + specified. Exec specifies the action to take. + properties: + command: + description: Command is the command line to execute + inside the container, the working directory for the + command is root ('/') in the container's filesystem. + The command is simply exec'd, it is not run inside + a shell, so traditional shell instructions ('|', etc) + won't work. To use a shell, you need to explicitly + call out to that shell. Exit status of 0 is treated + as live/healthy and non-zero is unhealthy. + items: + type: string + type: array + type: object + failureThreshold: + description: Minimum consecutive failures for the probe + to be considered failed after having succeeded. Defaults + to 3. Minimum value is 1. + format: int32 + type: integer + httpGet: + description: HTTPGet specifies the http request to perform. + properties: + host: + description: Host name to connect to, defaults to the + pod IP. You probably want to set "Host" in httpHeaders + instead. + type: string + httpHeaders: + description: Custom headers to set in the request. HTTP + allows repeated headers. + items: + description: HTTPHeader describes a custom header + to be used in HTTP probes + properties: + name: + description: The header field name + type: string + value: + description: The header field value + type: string + required: + - name + - value + type: object + type: array + path: + description: Path to access on the HTTP server. + type: string + port: + anyOf: + - type: integer + - type: string + description: Name or number of the port to access on + the container. Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + scheme: + description: Scheme to use for connecting to the host. + Defaults to HTTP. + type: string + required: + - port + type: object + initialDelaySeconds: + description: 'Number of seconds after the container has + started before liveness probes are initiated. More info: + https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + format: int32 + type: integer + periodSeconds: + description: How often (in seconds) to perform the probe. + Default to 10 seconds. Minimum value is 1. + format: int32 + type: integer + successThreshold: + description: Minimum consecutive successes for the probe + to be considered successful after having failed. Defaults + to 1. Must be 1 for liveness and startup. Minimum value + is 1. + format: int32 + type: integer + tcpSocket: + description: 'TCPSocket specifies an action involving a + TCP port. TCP hooks not yet supported TODO: implement + a realistic TCP lifecycle hook' + properties: + host: + description: 'Optional: Host name to connect to, defaults + to the pod IP.' + type: string + port: + anyOf: + - type: integer + - type: string + description: Number or name of the port to access on + the container. Number must be in the range 1 to 65535. + Name must be an IANA_SVC_NAME. + x-kubernetes-int-or-string: true + required: + - port + type: object + terminationGracePeriodSeconds: + description: Optional duration in seconds the pod needs + to terminate gracefully upon probe failure. The grace + period is the duration in seconds after the processes + running in the pod are sent a termination signal and the + time when the processes are forcibly halted with a kill + signal. Set this value longer than the expected cleanup + time for your process. If this value is nil, the pod's + terminationGracePeriodSeconds will be used. Otherwise, + this value overrides the value provided by the pod spec. + Value must be non-negative integer. The value zero indicates + stop immediately via the kill signal (no opportunity to + shut down). This is a beta field and requires enabling + ProbeTerminationGracePeriod feature gate. Minimum value + is 1. spec.terminationGracePeriodSeconds is used if unset. + format: int64 + type: integer + timeoutSeconds: + description: 'Number of seconds after which the probe times + out. Defaults to 1 second. Minimum value is 1. More info: + https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes' + format: int32 + type: integer + type: object init_containers: type: array description: deprecated diff --git a/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go b/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go index d966aa1aa..a6fe92196 100644 --- a/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go +++ b/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go @@ -102,6 +102,7 @@ type KubernetesMetaConfiguration struct { PodManagementPolicy string `json:"pod_management_policy,omitempty"` EnableReadinessProbe bool `json:"enable_readiness_probe,omitempty"` EnableCrossNamespaceSecret bool `json:"enable_cross_namespace_secret,omitempty"` + LivenessProbe *v1.Probe `json:"liveness_probe,omitempty"` } // PostgresPodResourcesDefaults defines the spec of default resources diff --git a/pkg/apis/acid.zalan.do/v1/postgresql_type.go b/pkg/apis/acid.zalan.do/v1/postgresql_type.go index 67007b522..31139a6f7 100644 --- a/pkg/apis/acid.zalan.do/v1/postgresql_type.go +++ b/pkg/apis/acid.zalan.do/v1/postgresql_type.go @@ -87,6 +87,7 @@ type PostgresSpec struct { AdditionalVolumes []AdditionalVolume `json:"additionalVolumes,omitempty"` Streams []Stream `json:"streams,omitempty"` Env []v1.EnvVar `json:"env,omitempty"` + LivenessProbe *v1.Probe `json:"livenessProbe,omitempty"` // deprecated json tags InitContainersOld []v1.Container `json:"init_containers,omitempty"` diff --git a/pkg/cluster/k8sres.go b/pkg/cluster/k8sres.go index 8be32f09c..c0e99f93f 100644 --- a/pkg/cluster/k8sres.go +++ b/pkg/cluster/k8sres.go @@ -1207,6 +1207,19 @@ func generateSpiloReadinessProbe() *v1.Probe { } } +func generateSpiloLivenessProbe(probe, defaultProbe *v1.Probe) *v1.Probe { + + if probe != nil { + return probe + } + + if defaultProbe != nil { + return defaultProbe + } + + return nil +} + func (c *Cluster) generateStatefulSet(spec *acidv1.PostgresSpec) (*appsv1.StatefulSet, error) { var ( @@ -1326,6 +1339,8 @@ func (c *Cluster) generateStatefulSet(spec *acidv1.PostgresSpec) (*appsv1.Statef spiloContainer.ReadinessProbe = generateSpiloReadinessProbe() } + spiloContainer.LivenessProbe = generateSpiloLivenessProbe(spec.LivenessProbe, c.OpConfig.LivenessProbe) + // generate container specs for sidecars specified in the cluster manifest clusterSpecificSidecars := []v1.Container{} if spec.Sidecars != nil && len(spec.Sidecars) > 0 { diff --git a/pkg/controller/operator_config.go b/pkg/controller/operator_config.go index 8aaf8759c..d6264fec7 100644 --- a/pkg/controller/operator_config.go +++ b/pkg/controller/operator_config.go @@ -125,6 +125,7 @@ func (c *Controller) importConfigurationFromCRD(fromCRD *acidv1.OperatorConfigur result.PodAntiAffinityTopologyKey = util.Coalesce(fromCRD.Kubernetes.PodAntiAffinityTopologyKey, "kubernetes.io/hostname") result.PodAntiAffinityPreferredDuringScheduling = fromCRD.Kubernetes.PodAntiAffinityPreferredDuringScheduling result.PodToleration = fromCRD.Kubernetes.PodToleration + result.LivenessProbe = fromCRD.Kubernetes.LivenessProbe // Postgres Pod resources result.DefaultCPURequest = util.Coalesce(fromCRD.PostgresPodResources.DefaultCPURequest, "100m") diff --git a/pkg/util/config/config.go b/pkg/util/config/config.go index 82b878b61..29b0b158d 100644 --- a/pkg/util/config/config.go +++ b/pkg/util/config/config.go @@ -245,6 +245,7 @@ type Config struct { TargetMajorVersion string `name:"target_major_version" default:"15"` PatroniAPICheckInterval time.Duration `name:"patroni_api_check_interval" default:"1s"` PatroniAPICheckTimeout time.Duration `name:"patroni_api_check_timeout" default:"5s"` + LivenessProbe *v1.Probe `json:"liveness_probe,omitempty"` EnablePatroniFailsafeMode *bool `name:"enable_patroni_failsafe_mode" default:"false"` } From 033ff473f8345b46543cbf7a92c61b091938ba58 Mon Sep 17 00:00:00 2001 From: DavidSpek Date: Tue, 16 May 2023 11:17:06 +0200 Subject: [PATCH 2/3] update statefulset if probes change Signed-off-by: DavidSpek --- pkg/cluster/cluster.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pkg/cluster/cluster.go b/pkg/cluster/cluster.go index 29c321efb..a361989db 100644 --- a/pkg/cluster/cluster.go +++ b/pkg/cluster/cluster.go @@ -561,6 +561,10 @@ func (c *Cluster) compareContainers(description string, setA, setB []v1.Containe func(a, b v1.Container) bool { return !reflect.DeepEqual(a.SecurityContext, b.SecurityContext) }), newCheck("new statefulset %s's %s (index %d) volume mounts do not match the current one", func(a, b v1.Container) bool { return !reflect.DeepEqual(a.VolumeMounts, b.VolumeMounts) }), + newCheck("new statefulset %s's %s (index %d) readiness probe do not match the current one", + func(a, b v1.Container) bool { return !reflect.DeepEqual(a.ReadinessProbe, b.ReadinessProbe) }), + newCheck("new statefulset %s's %s (index %d) liveness probe do not match the current one", + func(a, b v1.Container) bool { return !reflect.DeepEqual(a.LivenessProbe, b.LivenessProbe) }), } if !c.OpConfig.EnableLazySpiloUpgrade { From fee5a7074640c2fa3f072570e7613d7fd7e657cb Mon Sep 17 00:00:00 2001 From: DavidSpek Date: Tue, 16 May 2023 11:17:23 +0200 Subject: [PATCH 3/3] update deepcopy and crd Signed-off-by: DavidSpek --- pkg/apis/acid.zalan.do/v1/crds.go | 252 ++++++++++++++++++ .../acid.zalan.do/v1/zz_generated.deepcopy.go | 10 + pkg/util/config/config.go | 1 + 3 files changed, 263 insertions(+) diff --git a/pkg/apis/acid.zalan.do/v1/crds.go b/pkg/apis/acid.zalan.do/v1/crds.go index 558a03f0f..a8650c37a 100644 --- a/pkg/apis/acid.zalan.do/v1/crds.go +++ b/pkg/apis/acid.zalan.do/v1/crds.go @@ -363,6 +363,132 @@ var PostgresCRDResourceValidation = apiextv1.CustomResourceValidation{ }, }, }, + "livenessProbe": { + Description: "Periodic probe of container liveness. Container will be restarted if the probe fails. Cannot be updated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes", + Type: "object", + Properties: map[string]apiextv1.JSONSchemaProps{ + "exec": { + Description: "One and only one of the following should be specified. Exec specifies the action to take.", + Type: "object", + Properties: map[string]apiextv1.JSONSchemaProps{ + "command": { + Description: "Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.", + Type: "array", + Items: &apiextv1.JSONSchemaPropsOrArray{ + Schema: &apiextv1.JSONSchemaProps{ + Type: "string", + }, + }, + }, + }, + }, + "failureThreshold": { + Description: "Minimum consecutive failures for the probe to be considered failed after having succeeded. Defaults to 3. Minimum value is 1.", + Type: "integer", + Format: "int32", + }, + "httpGet": { + Description: "HTTPGet specifies the http request to perform.", + Type: "object", + Required: []string{"port"}, + Properties: map[string]apiextv1.JSONSchemaProps{ + "host": { + Description: "Host name to connect to, defaults to the pod IP. You probably want to set \"Host\" in httpHeaders instead.", + Type: "string", + }, + "httpHeaders": { + Description: "Custom headers to set in the request. HTTP allows repeated headers.", + Type: "array", + Items: &apiextv1.JSONSchemaPropsOrArray{ + Schema: &apiextv1.JSONSchemaProps{ + Description: "HTTPHeader describes a custom header to be used in HTTP probes", + Type: "object", + Required: []string{"name", "value"}, + Properties: map[string]apiextv1.JSONSchemaProps{ + "name": { + Description: "The header field name", + Type: "string", + }, + "value": { + Description: "The header field value", + Type: "string", + }, + }, + }, + }, + }, + "path": { + Description: "Path to access on the HTTP server.", + Type: "string", + }, + "port": { + Description: "Name or number of the port to access on the container. Number must be in the range 1 to 65535. Name must be an IANA_SVC_NAME.", + AnyOf: []apiextv1.JSONSchemaProps{ + { + Type: "integer", + }, + { + Type: "string", + }, + }, + XIntOrString: true, + }, + "scheme": { + Description: "Scheme to use for connecting to the host. Defaults to HTTP.", + Type: "string", + }, + }, + }, + "initialDelaySeconds": { + Description: "Number of seconds after the container has started before liveness probes are initiated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes", + Type: "integer", + Format: "int32", + }, + "periodSeconds": { + Description: "How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1.", + Type: "integer", + Format: "int32", + }, + "successThreshold": { + Description: "Minimum consecutive successes for the probe to be considered successful after having failed. Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.", + Type: "integer", + Format: "int32", + }, + "tcpSocket": { + Description: "TCPSocket specifies an action involving a TCP port. TCP hooks not yet supported TODO: implement a realistic TCP lifecycle hook", + Type: "object", + Required: []string{"port"}, + Properties: map[string]apiextv1.JSONSchemaProps{ + "host": { + Description: "Optional: Host name to connect to, defaults to the pod IP.", + Type: "string", + }, + "port": { + Description: "Number or name of the port to access on the container. Number must be in the range 1 to 65535. Name must be an IANA_SVC_NAME.", + XIntOrString: true, + AnyOf: []apiextv1.JSONSchemaProps{ + { + Type: "integer", + }, + { + Type: "string", + }, + }, + }, + }, + }, + "terminationGracePeriodSeconds": { + Description: "Optional duration in seconds the pod needs to terminate gracefully upon probe failure. The grace period is the duration in seconds after the processes running in the pod are sent a termination signal and the time when the processes are forcibly halted with a kill signal. Set this value longer than the expected cleanup time for your process. If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this value overrides the value provided by the pod spec. Value must be non-negative integer. The value zero indicates stop immediately via the kill signal (no opportunity to shut down). This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset.", + Type: "integer", + Format: "int64", + }, + "timeoutSeconds": { + Description: "Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes", + Type: "integer", + Format: "int32", + }, + }, + }, "nodeAffinity": { Type: "object", Properties: map[string]apiextv1.JSONSchemaProps{ @@ -1360,6 +1486,132 @@ var OperatorConfigCRDResourceValidation = apiextv1.CustomResourceValidation{ }, }, }, + "liveness_probe": { + Description: "Periodic probe of container liveness. Container will be restarted if the probe fails. Cannot be updated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes", + Type: "object", + Properties: map[string]apiextv1.JSONSchemaProps{ + "exec": { + Description: "One and only one of the following should be specified. Exec specifies the action to take.", + Type: "object", + Properties: map[string]apiextv1.JSONSchemaProps{ + "command": { + Description: "Command is the command line to execute inside the container, the working directory for the command is root ('/') in the container's filesystem. The command is simply exec'd, it is not run inside a shell, so traditional shell instructions ('|', etc) won't work. To use a shell, you need to explicitly call out to that shell. Exit status of 0 is treated as live/healthy and non-zero is unhealthy.", + Type: "array", + Items: &apiextv1.JSONSchemaPropsOrArray{ + Schema: &apiextv1.JSONSchemaProps{ + Type: "string", + }, + }, + }, + }, + }, + "failureThreshold": { + Description: "Minimum consecutive failures for the probe to be considered failed after having succeeded. Defaults to 3. Minimum value is 1.", + Type: "integer", + Format: "int32", + }, + "httpGet": { + Description: "HTTPGet specifies the http request to perform.", + Type: "object", + Required: []string{"port"}, + Properties: map[string]apiextv1.JSONSchemaProps{ + "host": { + Description: "Host name to connect to, defaults to the pod IP. You probably want to set \"Host\" in httpHeaders instead.", + Type: "string", + }, + "httpHeaders": { + Description: "Custom headers to set in the request. HTTP allows repeated headers.", + Type: "array", + Items: &apiextv1.JSONSchemaPropsOrArray{ + Schema: &apiextv1.JSONSchemaProps{ + Description: "HTTPHeader describes a custom header to be used in HTTP probes", + Type: "object", + Required: []string{"name", "value"}, + Properties: map[string]apiextv1.JSONSchemaProps{ + "name": { + Description: "The header field name", + Type: "string", + }, + "value": { + Description: "The header field value", + Type: "string", + }, + }, + }, + }, + }, + "path": { + Description: "Path to access on the HTTP server.", + Type: "string", + }, + "port": { + Description: "Name or number of the port to access on the container. Number must be in the range 1 to 65535. Name must be an IANA_SVC_NAME.", + AnyOf: []apiextv1.JSONSchemaProps{ + { + Type: "integer", + }, + { + Type: "string", + }, + }, + XIntOrString: true, + }, + "scheme": { + Description: "Scheme to use for connecting to the host. Defaults to HTTP.", + Type: "string", + }, + }, + }, + "initialDelaySeconds": { + Description: "Number of seconds after the container has started before liveness probes are initiated. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes", + Type: "integer", + Format: "int32", + }, + "periodSeconds": { + Description: "How often (in seconds) to perform the probe. Default to 10 seconds. Minimum value is 1.", + Type: "integer", + Format: "int32", + }, + "successThreshold": { + Description: "Minimum consecutive successes for the probe to be considered successful after having failed. Defaults to 1. Must be 1 for liveness and startup. Minimum value is 1.", + Type: "integer", + Format: "int32", + }, + "tcpSocket": { + Description: "TCPSocket specifies an action involving a TCP port. TCP hooks not yet supported TODO: implement a realistic TCP lifecycle hook", + Type: "object", + Required: []string{"port"}, + Properties: map[string]apiextv1.JSONSchemaProps{ + "host": { + Description: "Optional: Host name to connect to, defaults to the pod IP.", + Type: "string", + }, + "port": { + Description: "Number or name of the port to access on the container. Number must be in the range 1 to 65535. Name must be an IANA_SVC_NAME.", + XIntOrString: true, + AnyOf: []apiextv1.JSONSchemaProps{ + { + Type: "integer", + }, + { + Type: "string", + }, + }, + }, + }, + }, + "terminationGracePeriodSeconds": { + Description: "Optional duration in seconds the pod needs to terminate gracefully upon probe failure. The grace period is the duration in seconds after the processes running in the pod are sent a termination signal and the time when the processes are forcibly halted with a kill signal. Set this value longer than the expected cleanup time for your process. If this value is nil, the pod's terminationGracePeriodSeconds will be used. Otherwise, this value overrides the value provided by the pod spec. Value must be non-negative integer. The value zero indicates stop immediately via the kill signal (no opportunity to shut down). This is a beta field and requires enabling ProbeTerminationGracePeriod feature gate. Minimum value is 1. spec.terminationGracePeriodSeconds is used if unset.", + Type: "integer", + Format: "int64", + }, + "timeoutSeconds": { + Description: "Number of seconds after which the probe times out. Defaults to 1 second. Minimum value is 1. More info: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle#container-probes", + Type: "integer", + Format: "int32", + }, + }, + }, "master_pod_move_timeout": { Type: "string", }, diff --git a/pkg/apis/acid.zalan.do/v1/zz_generated.deepcopy.go b/pkg/apis/acid.zalan.do/v1/zz_generated.deepcopy.go index a43c995c5..80b136fd6 100644 --- a/pkg/apis/acid.zalan.do/v1/zz_generated.deepcopy.go +++ b/pkg/apis/acid.zalan.do/v1/zz_generated.deepcopy.go @@ -260,6 +260,11 @@ func (in *KubernetesMetaConfiguration) DeepCopyInto(out *KubernetesMetaConfigura } } out.PodEnvironmentConfigMap = in.PodEnvironmentConfigMap + if in.LivenessProbe != nil { + in, out := &in.LivenessProbe, &out.LivenessProbe + *out = new(corev1.Probe) + (*in).DeepCopyInto(*out) + } return } @@ -832,6 +837,11 @@ func (in *PostgresSpec) DeepCopyInto(out *PostgresSpec) { (*in)[i].DeepCopyInto(&(*out)[i]) } } + if in.LivenessProbe != nil { + in, out := &in.LivenessProbe, &out.LivenessProbe + *out = new(corev1.Probe) + (*in).DeepCopyInto(*out) + } if in.InitContainersOld != nil { in, out := &in.InitContainersOld, &out.InitContainersOld *out = make([]corev1.Container, len(*in)) diff --git a/pkg/util/config/config.go b/pkg/util/config/config.go index 29b0b158d..352011bfd 100644 --- a/pkg/util/config/config.go +++ b/pkg/util/config/config.go @@ -61,6 +61,7 @@ type Resources struct { NodeReadinessLabel map[string]string `name:"node_readiness_label" default:""` NodeReadinessLabelMerge string `name:"node_readiness_label_merge" default:"OR"` ShmVolume *bool `name:"enable_shm_volume" default:"true"` + LivenessProbe *v1.Probe `name:"liveness_probe"` MaxInstances int32 `name:"max_instances" default:"-1"` MinInstances int32 `name:"min_instances" default:"-1"`