From f2807df6c34e2451ff725764da5eb92df49ffa78 Mon Sep 17 00:00:00 2001 From: Jociele Padilha Date: Tue, 1 Mar 2022 16:57:41 +0100 Subject: [PATCH] add new parameter for Patroni API (PatroniAPICheckInterval, PatroniAPICheckTimeout) --- .../crds/operatorconfigurations.yaml | 6 ++++++ charts/postgres-operator/values.yaml | 4 ++++ docs/reference/operator_parameters.md | 7 +++++++ manifests/configmap.yaml | 2 ++ manifests/operatorconfiguration.crd.yaml | 6 ++++++ .../postgresql-operator-default-configuration.yaml | 2 ++ pkg/apis/acid.zalan.do/v1/crds.go | 6 ++++++ .../v1/operator_configuration_type.go | 14 ++++++++------ pkg/cluster/pod.go | 10 +++++----- pkg/cluster/pod_test.go | 10 +++++++++- pkg/controller/operator_config.go | 2 ++ pkg/util/config/config.go | 2 ++ 12 files changed, 59 insertions(+), 12 deletions(-) diff --git a/charts/postgres-operator/crds/operatorconfigurations.yaml b/charts/postgres-operator/crds/operatorconfigurations.yaml index f510e08f5..52e3f5155 100644 --- a/charts/postgres-operator/crds/operatorconfigurations.yaml +++ b/charts/postgres-operator/crds/operatorconfigurations.yaml @@ -349,6 +349,12 @@ spec: timeouts: type: object properties: + patroni_api_check_interval: + type: string + default: "1s" + patroni_api_check_timeout: + type: string + default: "5s" pod_label_wait_timeout: type: string default: "10m" diff --git a/charts/postgres-operator/values.yaml b/charts/postgres-operator/values.yaml index 288efe763..a3be38af0 100644 --- a/charts/postgres-operator/values.yaml +++ b/charts/postgres-operator/values.yaml @@ -204,6 +204,10 @@ configPostgresPodResources: # timeouts related to some operator actions configTimeouts: + # interval between consecutive attempts of operator calling the Patroni API + patroni_api_check_interval: 1s + # timeout when waiting for successful response from Patroni API + patroni_api_check_timeout: 5s # timeout when waiting for the Postgres pods to be deleted pod_deletion_wait_timeout: 10m # timeout when waiting for pod role and cluster labels diff --git a/docs/reference/operator_parameters.md b/docs/reference/operator_parameters.md index f3d9be88f..923d001e3 100644 --- a/docs/reference/operator_parameters.md +++ b/docs/reference/operator_parameters.md @@ -498,6 +498,13 @@ configuration `resource_check_interval` and `resource_check_timeout` have no effect, and the parameters are grouped under the `timeouts` key in the CRD-based configuration. +* **PatroniAPICheckInterval** + the interval between consecutive attempts waiting for the return of + Patroni Api. The default is `1s`. + +* **PatroniAPICheckTimeout** + the timeout for a response from Patroni Api. The default is `5s`. + * **resource_check_interval** interval to wait between consecutive attempts to check for the presence of some Kubernetes resource (i.e. `StatefulSet` or `PodDisruptionBudget`). The diff --git a/manifests/configmap.yaml b/manifests/configmap.yaml index b3aaa3c66..9b81bfdb5 100644 --- a/manifests/configmap.yaml +++ b/manifests/configmap.yaml @@ -93,6 +93,8 @@ data: # pam_configuration: | # https://info.example.com/oauth2/tokeninfo?access_token= uid realm=/employees # pam_role_name: zalandos + patroni_api_check_interval: "1s" + patroni_api_check_timeout: "5s" # password_rotation_interval: "90" # password_rotation_user_retention: "180" pdb_name_format: "postgres-{cluster}-pdb" diff --git a/manifests/operatorconfiguration.crd.yaml b/manifests/operatorconfiguration.crd.yaml index d086998cf..ae6104c1a 100644 --- a/manifests/operatorconfiguration.crd.yaml +++ b/manifests/operatorconfiguration.crd.yaml @@ -347,6 +347,12 @@ spec: timeouts: type: object properties: + patroni_api_check_interval: + type: string + default: "1s" + patroni_api_check_timeout: + type: string + default: "5s" pod_label_wait_timeout: type: string default: "10m" diff --git a/manifests/postgresql-operator-default-configuration.yaml b/manifests/postgresql-operator-default-configuration.yaml index 87b5436d5..ddd74594a 100644 --- a/manifests/postgresql-operator-default-configuration.yaml +++ b/manifests/postgresql-operator-default-configuration.yaml @@ -107,6 +107,8 @@ configuration: # min_cpu_limit: 250m # min_memory_limit: 250Mi timeouts: + patroni_api_check_interval: 1s + patroni_api_check_timeout: 5s pod_label_wait_timeout: 10m pod_deletion_wait_timeout: 10m ready_wait_interval: 4s diff --git a/pkg/apis/acid.zalan.do/v1/crds.go b/pkg/apis/acid.zalan.do/v1/crds.go index 9dc3d167e..03dbd15a2 100644 --- a/pkg/apis/acid.zalan.do/v1/crds.go +++ b/pkg/apis/acid.zalan.do/v1/crds.go @@ -1432,6 +1432,12 @@ var OperatorConfigCRDResourceValidation = apiextv1.CustomResourceValidation{ "timeouts": { Type: "object", Properties: map[string]apiextv1.JSONSchemaProps{ + "patroni_api_check_interval": { + Type: "string", + }, + "patroni_api_check_timeout": { + Type: "string", + }, "pod_label_wait_timeout": { Type: "string", }, diff --git a/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go b/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go index 1298c6834..f1758d646 100644 --- a/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go +++ b/pkg/apis/acid.zalan.do/v1/operator_configuration_type.go @@ -111,12 +111,14 @@ type PostgresPodResourcesDefaults struct { // OperatorTimeouts defines the timeout of ResourceCheck, PodWait, ReadyWait type OperatorTimeouts struct { - ResourceCheckInterval Duration `json:"resource_check_interval,omitempty"` - ResourceCheckTimeout Duration `json:"resource_check_timeout,omitempty"` - PodLabelWaitTimeout Duration `json:"pod_label_wait_timeout,omitempty"` - PodDeletionWaitTimeout Duration `json:"pod_deletion_wait_timeout,omitempty"` - ReadyWaitInterval Duration `json:"ready_wait_interval,omitempty"` - ReadyWaitTimeout Duration `json:"ready_wait_timeout,omitempty"` + ResourceCheckInterval Duration `json:"resource_check_interval,omitempty"` + ResourceCheckTimeout Duration `json:"resource_check_timeout,omitempty"` + PodLabelWaitTimeout Duration `json:"pod_label_wait_timeout,omitempty"` + PodDeletionWaitTimeout Duration `json:"pod_deletion_wait_timeout,omitempty"` + ReadyWaitInterval Duration `json:"ready_wait_interval,omitempty"` + ReadyWaitTimeout Duration `json:"ready_wait_timeout,omitempty"` + PatroniAPICheckInterval Duration `json:"patroni_api_check_interval,omitempty"` + PatroniAPICheckTimeout Duration `json:"patroni_api_check_timeout,omitempty"` } // LoadBalancerConfiguration defines the LB configuration diff --git a/pkg/cluster/pod.go b/pkg/cluster/pod.go index 9e8ded844..26c4c332d 100644 --- a/pkg/cluster/pod.go +++ b/pkg/cluster/pod.go @@ -67,7 +67,7 @@ func (c *Cluster) markRollingUpdateFlagForPod(pod *v1.Pod, msg string) error { return fmt.Errorf("could not form patch for pod's rolling update flag: %v", err) } - err = retryutil.Retry(1*time.Second, 5*time.Second, + err = retryutil.Retry(c.OpConfig.PatroniAPICheckInterval, c.OpConfig.PatroniAPICheckTimeout, func() (bool, error) { _, err2 := c.KubeClient.Pods(pod.Namespace).Patch( context.TODO(), @@ -356,7 +356,7 @@ func (c *Cluster) getPatroniConfig(pod *v1.Pod) (acidv1.Patroni, map[string]stri pgParameters map[string]string ) podName := util.NameFromMeta(pod.ObjectMeta) - err := retryutil.Retry(1*time.Second, 5*time.Second, + err := retryutil.Retry(c.OpConfig.PatroniAPICheckInterval, c.OpConfig.PatroniAPICheckTimeout, func() (bool, error) { var err error patroniConfig, pgParameters, err = c.patroni.GetConfig(pod) @@ -377,7 +377,7 @@ func (c *Cluster) getPatroniConfig(pod *v1.Pod) (acidv1.Patroni, map[string]stri func (c *Cluster) getPatroniMemberData(pod *v1.Pod) (patroni.MemberData, error) { var memberData patroni.MemberData - err := retryutil.Retry(1*time.Second, 5*time.Second, + err := retryutil.Retry(c.OpConfig.PatroniAPICheckInterval, c.OpConfig.PatroniAPICheckTimeout, func() (bool, error) { var err error memberData, err = c.patroni.GetMemberData(pod) @@ -403,7 +403,7 @@ func (c *Cluster) recreatePod(podName spec.NamespacedName) (*v1.Pod, error) { defer c.unregisterPodSubscriber(podName) stopChan := make(chan struct{}) - err := retryutil.Retry(1*time.Second, 5*time.Second, + err := retryutil.Retry(c.OpConfig.PatroniAPICheckInterval, c.OpConfig.PatroniAPICheckTimeout, func() (bool, error) { err2 := c.KubeClient.Pods(podName.Namespace).Delete( context.TODO(), @@ -492,7 +492,7 @@ func (c *Cluster) getSwitchoverCandidate(master *v1.Pod) (spec.NamespacedName, e candidates := make([]patroni.ClusterMember, 0) syncCandidates := make([]patroni.ClusterMember, 0) - err := retryutil.Retry(1*time.Second, 5*time.Second, + err := retryutil.Retry(c.OpConfig.PatroniAPICheckInterval, c.OpConfig.PatroniAPICheckTimeout, func() (bool, error) { var err error members, err = c.patroni.GetClusterMembers(master) diff --git a/pkg/cluster/pod_test.go b/pkg/cluster/pod_test.go index a533ebafd..068145312 100644 --- a/pkg/cluster/pod_test.go +++ b/pkg/cluster/pod_test.go @@ -6,11 +6,13 @@ import ( "io/ioutil" "net/http" "testing" + "time" "github.com/golang/mock/gomock" "github.com/zalando/postgres-operator/mocks" acidv1 "github.com/zalando/postgres-operator/pkg/apis/acid.zalan.do/v1" "github.com/zalando/postgres-operator/pkg/spec" + "github.com/zalando/postgres-operator/pkg/util/config" "github.com/zalando/postgres-operator/pkg/util/k8sutil" "github.com/zalando/postgres-operator/pkg/util/patroni" ) @@ -22,7 +24,13 @@ func TestGetSwitchoverCandidate(t *testing.T) { ctrl := gomock.NewController(t) defer ctrl.Finish() - var cluster = New(Config{}, k8sutil.KubernetesClient{}, acidv1.Postgresql{}, logger, eventRecorder) + var cluster = New( + Config{ + OpConfig: config.Config{ + PatroniAPICheckInterval: time.Duration(1), + PatroniAPICheckTimeout: time.Duration(5), + }, + }, k8sutil.KubernetesClient{}, acidv1.Postgresql{}, logger, eventRecorder) // simulate different member scenarios tests := []struct { diff --git a/pkg/controller/operator_config.go b/pkg/controller/operator_config.go index fbf12bfb9..bb1ea2f77 100644 --- a/pkg/controller/operator_config.go +++ b/pkg/controller/operator_config.go @@ -137,6 +137,8 @@ func (c *Controller) importConfigurationFromCRD(fromCRD *acidv1.OperatorConfigur result.PodDeletionWaitTimeout = util.CoalesceDuration(time.Duration(fromCRD.Timeouts.PodDeletionWaitTimeout), "10m") result.ReadyWaitInterval = util.CoalesceDuration(time.Duration(fromCRD.Timeouts.ReadyWaitInterval), "4s") result.ReadyWaitTimeout = util.CoalesceDuration(time.Duration(fromCRD.Timeouts.ReadyWaitTimeout), "30s") + result.PatroniAPICheckInterval = util.CoalesceDuration(time.Duration(fromCRD.Timeouts.PatroniAPICheckInterval), "1s") + result.PatroniAPICheckTimeout = util.CoalesceDuration(time.Duration(fromCRD.Timeouts.PatroniAPICheckTimeout), "5s") // load balancer config result.DbHostedZone = util.Coalesce(fromCRD.LoadBalancer.DbHostedZone, "db.example.com") diff --git a/pkg/util/config/config.go b/pkg/util/config/config.go index 0dc1004a7..99b514443 100644 --- a/pkg/util/config/config.go +++ b/pkg/util/config/config.go @@ -221,6 +221,8 @@ type Config struct { MajorVersionUpgradeTeamAllowList []string `name:"major_version_upgrade_team_allow_list" default:""` MinimalMajorVersion string `name:"minimal_major_version" default:"9.6"` TargetMajorVersion string `name:"target_major_version" default:"14"` + PatroniAPICheckInterval time.Duration `name:"patroni_api_check_interval" default:"1s"` + PatroniAPICheckTimeout time.Duration `name:"patroni_api_check_timeout" default:"5s"` } // MustMarshal marshals the config or panics