Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Patroni API timeout #1803

Merged
merged 1 commit into from
Mar 15, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
6 changes: 6 additions & 0 deletions charts/postgres-operator/crds/operatorconfigurations.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,12 @@ spec:
timeouts:
type: object
properties:
patroni_api_check_interval:
type: string
default: "1s"
patroni_api_check_timeout:
type: string
default: "5s"
pod_label_wait_timeout:
type: string
default: "10m"
Expand Down
4 changes: 4 additions & 0 deletions charts/postgres-operator/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,10 @@ configPostgresPodResources:

# timeouts related to some operator actions
configTimeouts:
# interval between consecutive attempts of operator calling the Patroni API
patroni_api_check_interval: 1s
# timeout when waiting for successful response from Patroni API
patroni_api_check_timeout: 5s
# timeout when waiting for the Postgres pods to be deleted
pod_deletion_wait_timeout: 10m
# timeout when waiting for pod role and cluster labels
Expand Down
7 changes: 7 additions & 0 deletions docs/reference/operator_parameters.md
Original file line number Diff line number Diff line change
Expand Up @@ -498,6 +498,13 @@ configuration `resource_check_interval` and `resource_check_timeout` have no
effect, and the parameters are grouped under the `timeouts` key in the
CRD-based configuration.

* **PatroniAPICheckInterval**
the interval between consecutive attempts waiting for the return of
Patroni Api. The default is `1s`.

* **PatroniAPICheckTimeout**
the timeout for a response from Patroni Api. The default is `5s`.

* **resource_check_interval**
interval to wait between consecutive attempts to check for the presence of
some Kubernetes resource (i.e. `StatefulSet` or `PodDisruptionBudget`). The
Expand Down
2 changes: 2 additions & 0 deletions manifests/configmap.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,8 @@ data:
# pam_configuration: |
# https://info.example.com/oauth2/tokeninfo?access_token= uid realm=/employees
# pam_role_name: zalandos
patroni_api_check_interval: "1s"
patroni_api_check_timeout: "5s"
# password_rotation_interval: "90"
# password_rotation_user_retention: "180"
pdb_name_format: "postgres-{cluster}-pdb"
Expand Down
6 changes: 6 additions & 0 deletions manifests/operatorconfiguration.crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,12 @@ spec:
timeouts:
type: object
properties:
patroni_api_check_interval:
type: string
default: "1s"
patroni_api_check_timeout:
type: string
default: "5s"
pod_label_wait_timeout:
type: string
default: "10m"
Expand Down
2 changes: 2 additions & 0 deletions manifests/postgresql-operator-default-configuration.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,8 @@ configuration:
# min_cpu_limit: 250m
# min_memory_limit: 250Mi
timeouts:
patroni_api_check_interval: 1s
patroni_api_check_timeout: 5s
pod_label_wait_timeout: 10m
pod_deletion_wait_timeout: 10m
ready_wait_interval: 4s
Expand Down
6 changes: 6 additions & 0 deletions pkg/apis/acid.zalan.do/v1/crds.go
Original file line number Diff line number Diff line change
Expand Up @@ -1432,6 +1432,12 @@ var OperatorConfigCRDResourceValidation = apiextv1.CustomResourceValidation{
"timeouts": {
Type: "object",
Properties: map[string]apiextv1.JSONSchemaProps{
"patroni_api_check_interval": {
Type: "string",
},
"patroni_api_check_timeout": {
Type: "string",
},
"pod_label_wait_timeout": {
Type: "string",
},
Expand Down
14 changes: 8 additions & 6 deletions pkg/apis/acid.zalan.do/v1/operator_configuration_type.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,12 +111,14 @@ type PostgresPodResourcesDefaults struct {

// OperatorTimeouts defines the timeout of ResourceCheck, PodWait, ReadyWait
type OperatorTimeouts struct {
ResourceCheckInterval Duration `json:"resource_check_interval,omitempty"`
ResourceCheckTimeout Duration `json:"resource_check_timeout,omitempty"`
PodLabelWaitTimeout Duration `json:"pod_label_wait_timeout,omitempty"`
PodDeletionWaitTimeout Duration `json:"pod_deletion_wait_timeout,omitempty"`
ReadyWaitInterval Duration `json:"ready_wait_interval,omitempty"`
ReadyWaitTimeout Duration `json:"ready_wait_timeout,omitempty"`
ResourceCheckInterval Duration `json:"resource_check_interval,omitempty"`
ResourceCheckTimeout Duration `json:"resource_check_timeout,omitempty"`
PodLabelWaitTimeout Duration `json:"pod_label_wait_timeout,omitempty"`
PodDeletionWaitTimeout Duration `json:"pod_deletion_wait_timeout,omitempty"`
ReadyWaitInterval Duration `json:"ready_wait_interval,omitempty"`
ReadyWaitTimeout Duration `json:"ready_wait_timeout,omitempty"`
PatroniAPICheckInterval Duration `json:"patroni_api_check_interval,omitempty"`
PatroniAPICheckTimeout Duration `json:"patroni_api_check_timeout,omitempty"`
}

// LoadBalancerConfiguration defines the LB configuration
Expand Down
10 changes: 5 additions & 5 deletions pkg/cluster/pod.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ func (c *Cluster) markRollingUpdateFlagForPod(pod *v1.Pod, msg string) error {
return fmt.Errorf("could not form patch for pod's rolling update flag: %v", err)
}

err = retryutil.Retry(1*time.Second, 5*time.Second,
err = retryutil.Retry(c.OpConfig.PatroniAPICheckInterval, c.OpConfig.PatroniAPICheckTimeout,
func() (bool, error) {
_, err2 := c.KubeClient.Pods(pod.Namespace).Patch(
context.TODO(),
Expand Down Expand Up @@ -356,7 +356,7 @@ func (c *Cluster) getPatroniConfig(pod *v1.Pod) (acidv1.Patroni, map[string]stri
pgParameters map[string]string
)
podName := util.NameFromMeta(pod.ObjectMeta)
err := retryutil.Retry(1*time.Second, 5*time.Second,
err := retryutil.Retry(c.OpConfig.PatroniAPICheckInterval, c.OpConfig.PatroniAPICheckTimeout,
func() (bool, error) {
var err error
patroniConfig, pgParameters, err = c.patroni.GetConfig(pod)
Expand All @@ -377,7 +377,7 @@ func (c *Cluster) getPatroniConfig(pod *v1.Pod) (acidv1.Patroni, map[string]stri

func (c *Cluster) getPatroniMemberData(pod *v1.Pod) (patroni.MemberData, error) {
var memberData patroni.MemberData
err := retryutil.Retry(1*time.Second, 5*time.Second,
err := retryutil.Retry(c.OpConfig.PatroniAPICheckInterval, c.OpConfig.PatroniAPICheckTimeout,
func() (bool, error) {
var err error
memberData, err = c.patroni.GetMemberData(pod)
Expand All @@ -403,7 +403,7 @@ func (c *Cluster) recreatePod(podName spec.NamespacedName) (*v1.Pod, error) {
defer c.unregisterPodSubscriber(podName)
stopChan := make(chan struct{})

err := retryutil.Retry(1*time.Second, 5*time.Second,
err := retryutil.Retry(c.OpConfig.PatroniAPICheckInterval, c.OpConfig.PatroniAPICheckTimeout,
func() (bool, error) {
err2 := c.KubeClient.Pods(podName.Namespace).Delete(
context.TODO(),
Expand Down Expand Up @@ -492,7 +492,7 @@ func (c *Cluster) getSwitchoverCandidate(master *v1.Pod) (spec.NamespacedName, e
candidates := make([]patroni.ClusterMember, 0)
syncCandidates := make([]patroni.ClusterMember, 0)

err := retryutil.Retry(1*time.Second, 5*time.Second,
err := retryutil.Retry(c.OpConfig.PatroniAPICheckInterval, c.OpConfig.PatroniAPICheckTimeout,
func() (bool, error) {
var err error
members, err = c.patroni.GetClusterMembers(master)
Expand Down
10 changes: 9 additions & 1 deletion pkg/cluster/pod_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,13 @@ import (
"io/ioutil"
"net/http"
"testing"
"time"

"github.com/golang/mock/gomock"
"github.com/zalando/postgres-operator/mocks"
acidv1 "github.com/zalando/postgres-operator/pkg/apis/acid.zalan.do/v1"
"github.com/zalando/postgres-operator/pkg/spec"
"github.com/zalando/postgres-operator/pkg/util/config"
"github.com/zalando/postgres-operator/pkg/util/k8sutil"
"github.com/zalando/postgres-operator/pkg/util/patroni"
)
Expand All @@ -22,7 +24,13 @@ func TestGetSwitchoverCandidate(t *testing.T) {
ctrl := gomock.NewController(t)
defer ctrl.Finish()

var cluster = New(Config{}, k8sutil.KubernetesClient{}, acidv1.Postgresql{}, logger, eventRecorder)
var cluster = New(
Config{
OpConfig: config.Config{
PatroniAPICheckInterval: time.Duration(1),
PatroniAPICheckTimeout: time.Duration(5),
},
}, k8sutil.KubernetesClient{}, acidv1.Postgresql{}, logger, eventRecorder)

// simulate different member scenarios
tests := []struct {
Expand Down
2 changes: 2 additions & 0 deletions pkg/controller/operator_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,8 @@ func (c *Controller) importConfigurationFromCRD(fromCRD *acidv1.OperatorConfigur
result.PodDeletionWaitTimeout = util.CoalesceDuration(time.Duration(fromCRD.Timeouts.PodDeletionWaitTimeout), "10m")
result.ReadyWaitInterval = util.CoalesceDuration(time.Duration(fromCRD.Timeouts.ReadyWaitInterval), "4s")
result.ReadyWaitTimeout = util.CoalesceDuration(time.Duration(fromCRD.Timeouts.ReadyWaitTimeout), "30s")
result.PatroniAPICheckInterval = util.CoalesceDuration(time.Duration(fromCRD.Timeouts.PatroniAPICheckInterval), "1s")
result.PatroniAPICheckTimeout = util.CoalesceDuration(time.Duration(fromCRD.Timeouts.PatroniAPICheckTimeout), "5s")

// load balancer config
result.DbHostedZone = util.Coalesce(fromCRD.LoadBalancer.DbHostedZone, "db.example.com")
Expand Down
2 changes: 2 additions & 0 deletions pkg/util/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,8 @@ type Config struct {
MajorVersionUpgradeTeamAllowList []string `name:"major_version_upgrade_team_allow_list" default:""`
MinimalMajorVersion string `name:"minimal_major_version" default:"9.6"`
TargetMajorVersion string `name:"target_major_version" default:"14"`
PatroniAPICheckInterval time.Duration `name:"patroni_api_check_interval" default:"1s"`
PatroniAPICheckTimeout time.Duration `name:"patroni_api_check_timeout" default:"5s"`
}

// MustMarshal marshals the config or panics
Expand Down