From fa98d6748f43d82a463bf35cedd5436acb0bda95 Mon Sep 17 00:00:00 2001 From: Felix Kunde Date: Wed, 23 Jun 2021 14:18:01 +0200 Subject: [PATCH 01/16] refactor restarting instances --- pkg/cluster/sync.go | 130 ++++++++++++++++++++------------------------ 1 file changed, 58 insertions(+), 72 deletions(-) diff --git a/pkg/cluster/sync.go b/pkg/cluster/sync.go index 53552f558..e0257dfea 100644 --- a/pkg/cluster/sync.go +++ b/pkg/cluster/sync.go @@ -261,7 +261,10 @@ func (c *Cluster) syncPodDisruptionBudget(isUpdate bool) error { } func (c *Cluster) syncStatefulSet() error { - var instancesRestartRequired bool + var ( + masterPod *v1.Pod + instanceRestartRequired bool + ) podsToRecreate := make([]v1.Pod, 0) switchoverCandidates := make([]spec.NamespacedName, 0) @@ -381,21 +384,52 @@ func (c *Cluster) syncStatefulSet() error { // Apply special PostgreSQL parameters that can only be set via the Patroni API. // it is important to do it after the statefulset pods are there, but before the rolling update // since those parameters require PostgreSQL restart. - instancesRestartRequired, err = c.checkAndSetGlobalPostgreSQLConfiguration() + pods, err = c.listPods() if err != nil { - return fmt.Errorf("could not set cluster-wide PostgreSQL configuration options: %v", err) + c.logger.Infof("could not list pods of the statefulset: %v", err) } + for i, pod := range pods { + role := PostgresRole(pod.Labels[c.OpConfig.PodRoleLabel]) + + if role == Master { + masterPod = &pods[i] + continue + } + + podName := util.NameFromMeta(pods[i].ObjectMeta) + config, err := c.patroni.GetConfig(&pod) + if err != nil { + return fmt.Errorf("could not get config for pod %s: %v", podName, err) + } + + instanceRestartRequired, err = c.checkAndSetGlobalPostgreSQLConfiguration(&pod) + if err != nil { + return fmt.Errorf("could not set cluster-wide PostgreSQL configuration options: %v", err) + } - if instancesRestartRequired { - c.logger.Debugln("restarting Postgres server within pods") - c.eventRecorder.Event(c.GetReference(), v1.EventTypeNormal, "Update", "restarting Postgres server within pods") - if err := c.restartInstances(); err != nil { - c.logger.Warningf("could not restart Postgres server within pods: %v", err) + if instanceRestartRequired { + c.logger.Debugln("restarting Postgres server within pods") + ttl, ok := config["ttl"].(int32) + if !ok { + ttl = 30 + } + c.eventRecorder.Event(c.GetReference(), v1.EventTypeNormal, "Update", "restarting Postgres server within pod "+pod.Name) + if err := c.restartInstance(&pod, ttl); err != nil { + c.logger.Warningf("could not restart Postgres server within pod %s: %v", podName, err) + } + c.logger.Infof("Postgres server successfuly restarted in pod %s", podName) + c.eventRecorder.Event(c.GetReference(), v1.EventTypeNormal, "Update", "Postgres server restart done for pod "+pod.Name) } - c.logger.Infof("Postgres server successfuly restarted on all pods") - c.eventRecorder.Event(c.GetReference(), v1.EventTypeNormal, "Update", "Postgres server restart done - all instances have been restarted") } + + if masterPod != nil { + masterPodName := util.NameFromMeta(masterPod.ObjectMeta) + if err := c.restartInstance(masterPod, 0); err != nil { + c.logger.Warningf("could not restart Postgres master within pod %s: %v", masterPodName, err) + } + } + // if we get here we also need to re-create the pods (either leftovers from the old // statefulset or those that got their configuration from the outdated statefulset) if len(podsToRecreate) > 0 { @@ -409,53 +443,12 @@ func (c *Cluster) syncStatefulSet() error { return nil } -func (c *Cluster) restartInstances() error { - c.setProcessName("starting to restart Postgres servers") - ls := c.labelsSet(false) - namespace := c.Namespace - - listOptions := metav1.ListOptions{ - LabelSelector: ls.String(), - } - - pods, err := c.KubeClient.Pods(namespace).List(context.TODO(), listOptions) - if err != nil { - return fmt.Errorf("could not get the list of pods: %v", err) - } - c.logger.Infof("there are %d pods in the cluster which resquire Postgres server restart", len(pods.Items)) - - var ( - masterPod *v1.Pod - ) - for i, pod := range pods.Items { - role := PostgresRole(pod.Labels[c.OpConfig.PodRoleLabel]) - - if role == Master { - masterPod = &pods.Items[i] - continue - } - - podName := util.NameFromMeta(pods.Items[i].ObjectMeta) - config, err := c.patroni.GetConfig(&pod) - if err != nil { - return fmt.Errorf("could not get config for pod %s: %v", podName, err) - } - ttl, ok := config["ttl"].(int32) - if !ok { - ttl = 30 - } - if err = c.patroni.Restart(&pod); err != nil { - return fmt.Errorf("could not restart Postgres server on pod %s: %v", podName, err) - } - time.Sleep(time.Duration(ttl) * time.Second) - } +func (c *Cluster) restartInstance(pod *v1.Pod, ttl int32) error { - if masterPod != nil { - podName := util.NameFromMeta(masterPod.ObjectMeta) - if err = c.patroni.Restart(masterPod); err != nil { - return fmt.Errorf("could not restart postgres server on masterPod %s: %v", podName, err) - } + if err := c.patroni.Restart(pod); err != nil { + return err } + time.Sleep(time.Duration(ttl) * time.Second) return nil } @@ -493,8 +486,8 @@ func (c *Cluster) AnnotationsToPropagate(annotations map[string]string) map[stri } // checkAndSetGlobalPostgreSQLConfiguration checks whether cluster-wide API parameters -// (like max_connections) has changed and if necessary sets it via the Patroni API -func (c *Cluster) checkAndSetGlobalPostgreSQLConfiguration() (bool, error) { +// (like max_connections) have changed and if necessary sets it via the Patroni API +func (c *Cluster) checkAndSetGlobalPostgreSQLConfiguration(pod *v1.Pod) (bool, error) { var ( err error pods []v1.Pod @@ -515,24 +508,17 @@ func (c *Cluster) checkAndSetGlobalPostgreSQLConfiguration() (bool, error) { return restartRequired, nil } - if pods, err = c.listPods(); err != nil { - return restartRequired, err - } - if len(pods) == 0 { - return restartRequired, fmt.Errorf("could not call Patroni API: cluster has no pods") - } // try all pods until the first one that is successful, as it doesn't matter which pod // carries the request to change configuration through - for _, pod := range pods { - podName := util.NameFromMeta(pod.ObjectMeta) - c.logger.Debugf("calling Patroni API on a pod %s to set the following Postgres options: %v", - podName, optionsToSet) - if err = c.patroni.SetPostgresParameters(&pod, optionsToSet); err == nil { - restartRequired = true - return restartRequired, nil - } - c.logger.Warningf("could not patch postgres parameters with a pod %s: %v", podName, err) + podName := util.NameFromMeta(pod.ObjectMeta) + c.logger.Debugf("calling Patroni API on a pod %s to set the following Postgres options: %v", + podName, optionsToSet) + if err = c.patroni.SetPostgresParameters(pod, optionsToSet); err == nil { + restartRequired = true + return restartRequired, nil } + c.logger.Warningf("could not patch postgres parameters with a pod %s: %v", podName, err) + return restartRequired, fmt.Errorf("could not reach Patroni API to set Postgres options: failed on every pod (%d total)", len(pods)) } From e9b46fd06f01a0b03800bf7d165c34628006f7de Mon Sep 17 00:00:00 2001 From: Felix Kunde Date: Wed, 23 Jun 2021 15:31:10 +0200 Subject: [PATCH 02/16] only add option to set if it differs from effective config --- pkg/cluster/sync.go | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/pkg/cluster/sync.go b/pkg/cluster/sync.go index e0257dfea..85f562243 100644 --- a/pkg/cluster/sync.go +++ b/pkg/cluster/sync.go @@ -403,7 +403,7 @@ func (c *Cluster) syncStatefulSet() error { return fmt.Errorf("could not get config for pod %s: %v", podName, err) } - instanceRestartRequired, err = c.checkAndSetGlobalPostgreSQLConfiguration(&pod) + instanceRestartRequired, err = c.checkAndSetGlobalPostgreSQLConfiguration(&pod, config) if err != nil { return fmt.Errorf("could not set cluster-wide PostgreSQL configuration options: %v", err) } @@ -487,7 +487,7 @@ func (c *Cluster) AnnotationsToPropagate(annotations map[string]string) map[stri // checkAndSetGlobalPostgreSQLConfiguration checks whether cluster-wide API parameters // (like max_connections) have changed and if necessary sets it via the Patroni API -func (c *Cluster) checkAndSetGlobalPostgreSQLConfiguration(pod *v1.Pod) (bool, error) { +func (c *Cluster) checkAndSetGlobalPostgreSQLConfiguration(pod *v1.Pod, patroniConfig map[string]interface{}) (bool, error) { var ( err error pods []v1.Pod @@ -496,11 +496,14 @@ func (c *Cluster) checkAndSetGlobalPostgreSQLConfiguration(pod *v1.Pod) (bool, e // we need to extract those options from the cluster manifest. optionsToSet := make(map[string]string) - pgOptions := c.Spec.Parameters - - for k, v := range pgOptions { - if isBootstrapOnlyParameter(k) { - optionsToSet[k] = v + desiredConfig := c.Spec.Parameters + effectiveConfig := patroniConfig["postgresql"].(map[string]interface{}) + effectiveParameters := effectiveConfig["parameters"].(map[string]string) + + for desiredOption, desiredValue := range desiredConfig { + effectiveValue, exists := effectiveParameters[desiredOption] + if isBootstrapOnlyParameter(desiredOption) && (effectiveValue != desiredValue || !exists) { + optionsToSet[desiredOption] = desiredValue } } From fb1dc2265c0316e98c10b3063d16ba8296514dd8 Mon Sep 17 00:00:00 2001 From: Felix Kunde Date: Thu, 24 Jun 2021 11:38:47 +0200 Subject: [PATCH 03/16] fix parsing patroni config --- pkg/cluster/sync.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/cluster/sync.go b/pkg/cluster/sync.go index 85f562243..e2484f05f 100644 --- a/pkg/cluster/sync.go +++ b/pkg/cluster/sync.go @@ -498,7 +498,7 @@ func (c *Cluster) checkAndSetGlobalPostgreSQLConfiguration(pod *v1.Pod, patroniC optionsToSet := make(map[string]string) desiredConfig := c.Spec.Parameters effectiveConfig := patroniConfig["postgresql"].(map[string]interface{}) - effectiveParameters := effectiveConfig["parameters"].(map[string]string) + effectiveParameters := effectiveConfig["parameters"].(map[string]interface{}) for desiredOption, desiredValue := range desiredConfig { effectiveValue, exists := effectiveParameters[desiredOption] From b3f58f2f16be69ab8646a610a2a6654eac3111cb Mon Sep 17 00:00:00 2001 From: Felix Kunde Date: Wed, 30 Jun 2021 14:58:38 +0200 Subject: [PATCH 04/16] patch Pg config on spec.Patroni changes --- pkg/apis/acid.zalan.do/v1/marshal.go | 2 +- pkg/cluster/sync.go | 72 ++++++++++++++++++++++------ pkg/util/patroni/patroni.go | 15 ++++++ 3 files changed, 74 insertions(+), 15 deletions(-) diff --git a/pkg/apis/acid.zalan.do/v1/marshal.go b/pkg/apis/acid.zalan.do/v1/marshal.go index 9521082fc..f4167ce92 100644 --- a/pkg/apis/acid.zalan.do/v1/marshal.go +++ b/pkg/apis/acid.zalan.do/v1/marshal.go @@ -81,7 +81,7 @@ func (ps *PostgresStatus) UnmarshalJSON(data []byte) error { if err != nil { metaErr := json.Unmarshal(data, &status) if metaErr != nil { - return fmt.Errorf("Could not parse status: %v; err %v", string(data), metaErr) + return fmt.Errorf("could not parse status: %v; err %v", string(data), metaErr) } tmp.PostgresClusterStatus = status } diff --git a/pkg/cluster/sync.go b/pkg/cluster/sync.go index e2484f05f..88bb066fe 100644 --- a/pkg/cluster/sync.go +++ b/pkg/cluster/sync.go @@ -2,7 +2,9 @@ package cluster import ( "context" + "encoding/json" "fmt" + "reflect" "regexp" "strings" "time" @@ -409,7 +411,7 @@ func (c *Cluster) syncStatefulSet() error { } if instanceRestartRequired { - c.logger.Debugln("restarting Postgres server within pods") + c.logger.Debugf("restarting Postgres server within pod %s", podName) ttl, ok := config["ttl"].(int32) if !ok { ttl = 30 @@ -494,29 +496,71 @@ func (c *Cluster) checkAndSetGlobalPostgreSQLConfiguration(pod *v1.Pod, patroniC restartRequired bool ) - // we need to extract those options from the cluster manifest. - optionsToSet := make(map[string]string) - desiredConfig := c.Spec.Parameters - effectiveConfig := patroniConfig["postgresql"].(map[string]interface{}) - effectiveParameters := effectiveConfig["parameters"].(map[string]interface{}) + configToSet := make(map[string]interface{}) + parametersToSet := make(map[string]string) + effectivePgParameters := make(map[string]interface{}) - for desiredOption, desiredValue := range desiredConfig { - effectiveValue, exists := effectiveParameters[desiredOption] - if isBootstrapOnlyParameter(desiredOption) && (effectiveValue != desiredValue || !exists) { - optionsToSet[desiredOption] = desiredValue + // read effective Patroni config if set + if patroniConfig != nil { + effectivePostgresql := patroniConfig["postgresql"].(map[string]interface{}) + effectivePgParameters = effectivePostgresql[patroniPGParametersParameterName].(map[string]interface{}) + } + + // compare parameters under postgresql section with c.Spec.Postgresql.Parameters from manifest + desiredPgParameters := c.Spec.Parameters + for desiredOption, desiredValue := range desiredPgParameters { + effectiveValue := effectivePgParameters[desiredOption] + if isBootstrapOnlyParameter(desiredOption) && (effectiveValue != desiredValue) { + parametersToSet[desiredOption] = desiredValue } } - if len(optionsToSet) == 0 { + if len(parametersToSet) > 0 { + configToSet["postgresql"] = map[string]interface{}{patroniPGParametersParameterName: parametersToSet} + } + + // compare other options from config with c.Spec.Patroni from manifest + desiredPatroniConfig := c.Spec.Patroni + if desiredPatroniConfig.LoopWait > 0 && desiredPatroniConfig.LoopWait != uint32(patroniConfig["loop_wait"].(float64)) { + configToSet["loop_wait"] = desiredPatroniConfig.LoopWait + } + if desiredPatroniConfig.MaximumLagOnFailover > 0 && desiredPatroniConfig.MaximumLagOnFailover != float32(patroniConfig["maximum_lag_on_failover"].(float64)) { + configToSet["maximum_lag_on_failover"] = desiredPatroniConfig.MaximumLagOnFailover + } + if desiredPatroniConfig.PgHba != nil && !reflect.DeepEqual(desiredPatroniConfig.PgHba, (patroniConfig["pg_hba"])) { + configToSet["pg_hba"] = desiredPatroniConfig.PgHba + } + if desiredPatroniConfig.RetryTimeout > 0 && desiredPatroniConfig.RetryTimeout != uint32(patroniConfig["retry_timeout"].(float64)) { + configToSet["retry_timeout"] = desiredPatroniConfig.RetryTimeout + } + if desiredPatroniConfig.Slots != nil && !reflect.DeepEqual(desiredPatroniConfig.Slots, patroniConfig["slots"]) { + configToSet["slots"] = desiredPatroniConfig.Slots + } + if desiredPatroniConfig.SynchronousMode != patroniConfig["synchronous_mode"] { + configToSet["synchronous_mode"] = desiredPatroniConfig.SynchronousMode + } + if desiredPatroniConfig.SynchronousModeStrict != patroniConfig["synchronous_mode_strict"] { + configToSet["synchronous_mode_strict"] = desiredPatroniConfig.SynchronousModeStrict + } + if desiredPatroniConfig.TTL > 0 && desiredPatroniConfig.TTL != uint32(patroniConfig["ttl"].(float64)) { + configToSet["ttl"] = desiredPatroniConfig.TTL + } + + if len(configToSet) == 0 { return restartRequired, nil } + configToSetJson, err := json.Marshal(configToSet) + if err != nil { + c.logger.Debugf("could not convert config patch to JSON: %v", err) + } + // try all pods until the first one that is successful, as it doesn't matter which pod // carries the request to change configuration through podName := util.NameFromMeta(pod.ObjectMeta) - c.logger.Debugf("calling Patroni API on a pod %s to set the following Postgres options: %v", - podName, optionsToSet) - if err = c.patroni.SetPostgresParameters(pod, optionsToSet); err == nil { + c.logger.Debugf("calling Patroni API on a pod %s to set the following Postgres options: %s", + podName, configToSetJson) + if err = c.patroni.SetConfig(pod, configToSet); err == nil { restartRequired = true return restartRequired, nil } diff --git a/pkg/util/patroni/patroni.go b/pkg/util/patroni/patroni.go index 1f2c95552..a9cadafba 100644 --- a/pkg/util/patroni/patroni.go +++ b/pkg/util/patroni/patroni.go @@ -32,6 +32,7 @@ type Interface interface { GetMemberData(server *v1.Pod) (MemberData, error) Restart(server *v1.Pod) error GetConfig(server *v1.Pod) (map[string]interface{}, error) + SetConfig(server *v1.Pod, config map[string]interface{}) error } // Patroni API client @@ -163,6 +164,20 @@ func (p *Patroni) SetPostgresParameters(server *v1.Pod, parameters map[string]st return p.httpPostOrPatch(http.MethodPatch, apiURLString+configPath, buf) } +//SetConfig sets Patroni options via Patroni patch API call. +func (p *Patroni) SetConfig(server *v1.Pod, config map[string]interface{}) error { + buf := &bytes.Buffer{} + err := json.NewEncoder(buf).Encode(config) + if err != nil { + return fmt.Errorf("could not encode json: %v", err) + } + apiURLString, err := apiURL(server) + if err != nil { + return err + } + return p.httpPostOrPatch(http.MethodPatch, apiURLString+configPath, buf) +} + // MemberDataPatroni child element type MemberDataPatroni struct { Version string `json:"version"` From 3bed6bce6dbb4ba4b7dbbaa70c703a66f2a537bd Mon Sep 17 00:00:00 2001 From: Felix Kunde Date: Wed, 30 Jun 2021 19:52:16 +0200 Subject: [PATCH 05/16] update e2e test for updating Postgres config --- e2e/tests/k8s_api.py | 14 ++++++++++ e2e/tests/test_e2e.py | 64 +++++++++++++++++++++++++++++-------------- 2 files changed, 57 insertions(+), 21 deletions(-) diff --git a/e2e/tests/k8s_api.py b/e2e/tests/k8s_api.py index d28ea69ad..afba6b307 100644 --- a/e2e/tests/k8s_api.py +++ b/e2e/tests/k8s_api.py @@ -243,6 +243,13 @@ def exec_with_kubectl(self, pod, cmd): stdout=subprocess.PIPE, stderr=subprocess.PIPE) + def patroni_rest(self, pod, path): + r = self.exec_with_kubectl(pod, "curl localhost:8008/" + path) + if not r.returncode == 0 or not r.stdout.decode()[0:1] == "{": + return None + + return json.loads(r.stdout.decode()) + def get_patroni_state(self, pod): r = self.exec_with_kubectl(pod, "patronictl list -f json") if not r.returncode == 0 or not r.stdout.decode()[0:1] == "[": @@ -496,6 +503,13 @@ def exec_with_kubectl(self, pod, cmd): stdout=subprocess.PIPE, stderr=subprocess.PIPE) + def patroni_rest(self, pod, path): + r = self.exec_with_kubectl(pod, "curl localhost:8008/" + path) + if not r.returncode == 0 or not r.stdout.decode()[0:1] == "{": + return None + + return json.loads(r.stdout.decode()) + def get_patroni_state(self, pod): r = self.exec_with_kubectl(pod, "patronictl list -f json") if not r.returncode == 0 or not r.stdout.decode()[0:1] == "[": diff --git a/e2e/tests/test_e2e.py b/e2e/tests/test_e2e.py index 30d0cfe2f..6a78dd571 100644 --- a/e2e/tests/test_e2e.py +++ b/e2e/tests/test_e2e.py @@ -1419,14 +1419,16 @@ def test_zzzz_cluster_deletion(self): k8s.update_config(patch_delete_annotations) @timeout_decorator.timeout(TEST_TIMEOUT_SEC) - def test_decrease_max_connections(self): + def test_patroni_config_update(self): ''' - Test decreasing max_connections and restarting cluster through rest api + Change Postgres config under Spec.Postgresql.Parameters and Spec.Patroni + and query Patroni config endpoint to check if manifest changes got applied + via restarting cluster through Patroni's rest api ''' k8s = self.k8s cluster_label = 'application=spilo,cluster-name=acid-minimal-cluster' labels = 'spilo-role=master,' + cluster_label - new_max_connections_value = "99" + new_max_connections_value = "50" pods = k8s.api.core_v1.list_namespaced_pod( 'default', label_selector=labels).items self.assert_master_is_unique() @@ -1434,35 +1436,55 @@ def test_decrease_max_connections(self): creationTimestamp = masterPod.metadata.creation_timestamp # adjust max_connection - pg_patch_max_connections = { + pg_patch_config = { "spec": { "postgresql": { "parameters": { "max_connections": new_max_connections_value } + }, + "patroni": { + "slots": { + "test_slot": { + "type": "physical" + } + }, + "ttl": 29, + "loop_wait": 9, + "retry_timeout": 9, + "synchronous_mode": True } } } k8s.api.custom_objects_api.patch_namespaced_custom_object( - "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_max_connections) - - def get_max_connections(): - pods = k8s.api.core_v1.list_namespaced_pod( - 'default', label_selector=labels).items - self.assert_master_is_unique() - masterPod = pods[0] - get_max_connections_cmd = '''psql -At -U postgres -c "SELECT setting FROM pg_settings WHERE name = 'max_connections';"''' - result = k8s.exec_with_kubectl(masterPod.metadata.name, get_max_connections_cmd) - max_connections_value = int(result.stdout) - return max_connections_value - - #Make sure that max_connections decreased - self.eventuallyEqual(get_max_connections, int(new_max_connections_value), "max_connections didn't decrease") + "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_config) + + self.eventuallyEqual(lambda: self.k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") + + def compare_config(): + effective_config = k8s.patroni_rest(masterPod.metadata.name, "config") + desired_patroni = pg_patch_config["spec"]["patroni"] + desired_parameters = pg_patch_config["spec"]["postgresql"]["parameters"] + effective_parameters = effective_config["postgresql"]["parameters"] + self.assertEqual(desired_parameters["max_connections"], effective_parameters["max_connections"], + "max_connectoins not updated") + self.assertTrue(effective_config["slots"] is not None) + self.assertEqual(desired_patroni["ttl"], effective_config["ttl"], + "ttl not updated") + self.assertEqual(desired_patroni["loop_wait"], effective_config["loop_wait"], + "loop_wait not updated") + self.assertEqual(desired_patroni["retry_timeout"], effective_config["retry_timeout"], + "retry_timeout not updated") + self.assertEqual(desired_patroni["synchronous_mode"], effective_config["synchronous_mode"], + "synchronous_mode not updated") + return True + + # make sure that max_connections decreased + self.eventuallyTrue(compare_config, "Postgres config not applied") pods = k8s.api.core_v1.list_namespaced_pod( 'default', label_selector=labels).items - self.assert_master_is_unique() - masterPod = pods[0] - #Make sure that pod didn't restart + + # make sure that Postgres was not restarted in Pod self.assertEqual(creationTimestamp, masterPod.metadata.creation_timestamp, "Master pod creation timestamp is updated") From 6847a710cfb052c3c17e3deeaeccf624433211f0 Mon Sep 17 00:00:00 2001 From: Felix Kunde Date: Thu, 1 Jul 2021 18:12:10 +0200 Subject: [PATCH 06/16] minor changes to e2e test --- e2e/tests/test_e2e.py | 81 +++++++++++++++++++++++-------------------- 1 file changed, 43 insertions(+), 38 deletions(-) diff --git a/e2e/tests/test_e2e.py b/e2e/tests/test_e2e.py index 6a78dd571..c0d153acf 100644 --- a/e2e/tests/test_e2e.py +++ b/e2e/tests/test_e2e.py @@ -1426,13 +1426,8 @@ def test_patroni_config_update(self): via restarting cluster through Patroni's rest api ''' k8s = self.k8s - cluster_label = 'application=spilo,cluster-name=acid-minimal-cluster' - labels = 'spilo-role=master,' + cluster_label - new_max_connections_value = "50" - pods = k8s.api.core_v1.list_namespaced_pod( - 'default', label_selector=labels).items - self.assert_master_is_unique() - masterPod = pods[0] + masterPod = k8s.get_cluster_leader_pod() + labels = 'application=spilo,cluster-name=acid-minimal-cluster,spilo-role=master' creationTimestamp = masterPod.metadata.creation_timestamp # adjust max_connection @@ -1440,7 +1435,7 @@ def test_patroni_config_update(self): "spec": { "postgresql": { "parameters": { - "max_connections": new_max_connections_value + "max_connections": "50" } }, "patroni": { @@ -1456,37 +1451,47 @@ def test_patroni_config_update(self): } } } - k8s.api.custom_objects_api.patch_namespaced_custom_object( - "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_config) - - self.eventuallyEqual(lambda: self.k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") - def compare_config(): - effective_config = k8s.patroni_rest(masterPod.metadata.name, "config") - desired_patroni = pg_patch_config["spec"]["patroni"] - desired_parameters = pg_patch_config["spec"]["postgresql"]["parameters"] - effective_parameters = effective_config["postgresql"]["parameters"] - self.assertEqual(desired_parameters["max_connections"], effective_parameters["max_connections"], - "max_connectoins not updated") - self.assertTrue(effective_config["slots"] is not None) - self.assertEqual(desired_patroni["ttl"], effective_config["ttl"], - "ttl not updated") - self.assertEqual(desired_patroni["loop_wait"], effective_config["loop_wait"], - "loop_wait not updated") - self.assertEqual(desired_patroni["retry_timeout"], effective_config["retry_timeout"], - "retry_timeout not updated") - self.assertEqual(desired_patroni["synchronous_mode"], effective_config["synchronous_mode"], - "synchronous_mode not updated") - return True - - # make sure that max_connections decreased - self.eventuallyTrue(compare_config, "Postgres config not applied") - pods = k8s.api.core_v1.list_namespaced_pod( - 'default', label_selector=labels).items - - # make sure that Postgres was not restarted in Pod - self.assertEqual(creationTimestamp, masterPod.metadata.creation_timestamp, - "Master pod creation timestamp is updated") + try: + k8s.api.custom_objects_api.patch_namespaced_custom_object( + "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_config) + + self.eventuallyEqual(lambda: self.k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") + + def compare_config(): + effective_config = k8s.patroni_rest(masterPod.metadata.name, "config") + desired_patroni = pg_patch_config["spec"]["patroni"] + desired_parameters = pg_patch_config["spec"]["postgresql"]["parameters"] + effective_parameters = effective_config["postgresql"]["parameters"] + self.assertEqual(desired_parameters["max_connections"], effective_parameters["max_connections"], + "max_connectoins not updated") + self.assertTrue(effective_config["slots"] is not None, "physical replication slot not added") + self.assertEqual(desired_patroni["ttl"], effective_config["ttl"], + "ttl not updated") + self.assertEqual(desired_patroni["loop_wait"], effective_config["loop_wait"], + "loop_wait not updated") + self.assertEqual(desired_patroni["retry_timeout"], effective_config["retry_timeout"], + "retry_timeout not updated") + self.assertEqual(desired_patroni["synchronous_mode"], effective_config["synchronous_mode"], + "synchronous_mode not updated") + return True + + self.eventuallyTrue(compare_config, "Postgres config not applied") + pods = k8s.api.core_v1.list_namespaced_pod( + 'default', label_selector=labels).items + + # make sure that pod wasn't recreated + self.assertEqual(creationTimestamp, masterPod.metadata.creation_timestamp, + "Master pod creation timestamp is updated") + + except timeout_decorator.TimeoutError: + print('Operator log: {}'.format(k8s.get_operator_log())) + raise + + # make sure cluster is in a good state for further tests + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") + self.eventuallyEqual(lambda: k8s.count_running_pods(), 2, + "No 2 pods running") def get_failover_targets(self, master_node, replica_nodes): ''' From ebc1d6ad9fdc8bd9fdf11c4c169b00e4e1984964 Mon Sep 17 00:00:00 2001 From: Felix Kunde Date: Mon, 5 Jul 2021 17:33:16 +0200 Subject: [PATCH 07/16] moving multi namespace test to the ned --- e2e/tests/test_e2e.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/e2e/tests/test_e2e.py b/e2e/tests/test_e2e.py index c0d153acf..e53156cfa 100644 --- a/e2e/tests/test_e2e.py +++ b/e2e/tests/test_e2e.py @@ -844,7 +844,7 @@ def verify_pod_limits(): self.eventuallyTrue(verify_pod_limits, "Pod limits where not adjusted") @timeout_decorator.timeout(TEST_TIMEOUT_SEC) - def test_multi_namespace_support(self): + def test_zzz_multi_namespace_support(self): ''' Create a customized Postgres cluster in a non-default namespace. ''' @@ -858,6 +858,7 @@ def test_multi_namespace_support(self): try: k8s.create_with_kubectl("manifests/complete-postgres-manifest.yaml") k8s.wait_for_pod_start("spilo-role=master", self.test_namespace) + k8s.wait_for_pod_start('spilo-role=replica') self.assert_master_is_unique(self.test_namespace, "acid-test-cluster") except timeout_decorator.TimeoutError: @@ -1226,6 +1227,7 @@ def test_zzz_taint_based_eviction(self): self.assert_distributed_pods(new_master_node, new_replica_nodes, cluster_label) @timeout_decorator.timeout(TEST_TIMEOUT_SEC) + @unittest.skip("Skipping this test until fixed") def test_node_affinity(self): ''' Add label to a node and update postgres cluster spec to deploy only on a node with that label From ce522ffeeb19728476effee485a4d804e58b4f44 Mon Sep 17 00:00:00 2001 From: Felix Kunde Date: Fri, 16 Jul 2021 14:54:04 +0200 Subject: [PATCH 08/16] enhance e2e test and improve log messages in sync --- e2e/tests/test_e2e.py | 15 +++++++++++---- pkg/cluster/sync.go | 6 +++--- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/e2e/tests/test_e2e.py b/e2e/tests/test_e2e.py index e53156cfa..0970953a9 100644 --- a/e2e/tests/test_e2e.py +++ b/e2e/tests/test_e2e.py @@ -1431,13 +1431,14 @@ def test_patroni_config_update(self): masterPod = k8s.get_cluster_leader_pod() labels = 'application=spilo,cluster-name=acid-minimal-cluster,spilo-role=master' creationTimestamp = masterPod.metadata.creation_timestamp + new_max_connections_value = "50" # adjust max_connection pg_patch_config = { "spec": { "postgresql": { "parameters": { - "max_connections": "50" + "max_connections": new_max_connections_value } }, "patroni": { @@ -1466,7 +1467,7 @@ def compare_config(): desired_parameters = pg_patch_config["spec"]["postgresql"]["parameters"] effective_parameters = effective_config["postgresql"]["parameters"] self.assertEqual(desired_parameters["max_connections"], effective_parameters["max_connections"], - "max_connectoins not updated") + "max_connections not updated") self.assertTrue(effective_config["slots"] is not None, "physical replication slot not added") self.assertEqual(desired_patroni["ttl"], effective_config["ttl"], "ttl not updated") @@ -1479,8 +1480,14 @@ def compare_config(): return True self.eventuallyTrue(compare_config, "Postgres config not applied") - pods = k8s.api.core_v1.list_namespaced_pod( - 'default', label_selector=labels).items + + setting_query = """ + SELECT setting + FROM pg_settings + WHERE name = 'max_connections'; + """ + self.eventuallyEqual(lambda: self.query_database(masterPod.metadata.name, "postgres", setting_query)[0], new_max_connections_value, + "New max_connections setting not applied", 10, 5) # make sure that pod wasn't recreated self.assertEqual(creationTimestamp, masterPod.metadata.creation_timestamp, diff --git a/pkg/cluster/sync.go b/pkg/cluster/sync.go index 88bb066fe..91ae40e5b 100644 --- a/pkg/cluster/sync.go +++ b/pkg/cluster/sync.go @@ -273,7 +273,7 @@ func (c *Cluster) syncStatefulSet() error { pods, err := c.listPods() if err != nil { - c.logger.Infof("could not list pods of the statefulset: %v", err) + c.logger.Warnf("could not list pods of the statefulset: %v", err) } // NB: Be careful to consider the codepath that acts on podsRollingUpdateRequired before returning early. @@ -388,7 +388,7 @@ func (c *Cluster) syncStatefulSet() error { // since those parameters require PostgreSQL restart. pods, err = c.listPods() if err != nil { - c.logger.Infof("could not list pods of the statefulset: %v", err) + c.logger.Warnf("could not get list of pods to apply special PostgreSQL parameters only to be set via Patroni API: %v", err) } for i, pod := range pods { @@ -407,7 +407,7 @@ func (c *Cluster) syncStatefulSet() error { instanceRestartRequired, err = c.checkAndSetGlobalPostgreSQLConfiguration(&pod, config) if err != nil { - return fmt.Errorf("could not set cluster-wide PostgreSQL configuration options: %v", err) + return fmt.Errorf("could not set PostgreSQL configuration options for pod %s: %v", podName, err) } if instanceRestartRequired { From f6b3afd993652f0a86c7894342ee14c0223b60bf Mon Sep 17 00:00:00 2001 From: Felix Kunde Date: Fri, 16 Jul 2021 14:55:44 +0200 Subject: [PATCH 09/16] fix e2e multi_namespace_support --- e2e/tests/test_e2e.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/e2e/tests/test_e2e.py b/e2e/tests/test_e2e.py index 0970953a9..2eb563018 100644 --- a/e2e/tests/test_e2e.py +++ b/e2e/tests/test_e2e.py @@ -858,7 +858,7 @@ def test_zzz_multi_namespace_support(self): try: k8s.create_with_kubectl("manifests/complete-postgres-manifest.yaml") k8s.wait_for_pod_start("spilo-role=master", self.test_namespace) - k8s.wait_for_pod_start('spilo-role=replica') + k8s.wait_for_pod_start('spilo-role=replica', self.test_namespace) self.assert_master_is_unique(self.test_namespace, "acid-test-cluster") except timeout_decorator.TimeoutError: From 3e3b9b3036cf4fda1e37548a99715de9d37e2dad Mon Sep 17 00:00:00 2001 From: Felix Kunde Date: Thu, 29 Jul 2021 16:59:16 +0200 Subject: [PATCH 10/16] check for restart within restartInstance --- pkg/cluster/sync.go | 73 ++++++++++++++++++++------------------------- 1 file changed, 32 insertions(+), 41 deletions(-) diff --git a/pkg/cluster/sync.go b/pkg/cluster/sync.go index 76b34da32..50f1092b7 100644 --- a/pkg/cluster/sync.go +++ b/pkg/cluster/sync.go @@ -263,10 +263,7 @@ func (c *Cluster) syncPodDisruptionBudget(isUpdate bool) error { } func (c *Cluster) syncStatefulSet() error { - var ( - masterPod *v1.Pod - instanceRestartRequired bool - ) + var masterPod *v1.Pod podsToRecreate := make([]v1.Pod, 0) switchoverCandidates := make([]spec.NamespacedName, 0) @@ -393,43 +390,15 @@ func (c *Cluster) syncStatefulSet() error { for i, pod := range pods { role := PostgresRole(pod.Labels[c.OpConfig.PodRoleLabel]) - if role == Master { masterPod = &pods[i] continue } - - podName := util.NameFromMeta(pods[i].ObjectMeta) - config, err := c.patroni.GetConfig(&pod) - if err != nil { - return fmt.Errorf("could not get config for pod %s: %v", podName, err) - } - - instanceRestartRequired, err = c.checkAndSetGlobalPostgreSQLConfiguration(&pod, config) - if err != nil { - return fmt.Errorf("could not set PostgreSQL configuration options for pod %s: %v", podName, err) - } - - if instanceRestartRequired { - c.logger.Debugf("restarting Postgres server within pod %s", podName) - ttl, ok := config["ttl"].(int32) - if !ok { - ttl = 30 - } - c.eventRecorder.Event(c.GetReference(), v1.EventTypeNormal, "Update", "restarting Postgres server within pod "+pod.Name) - if err := c.restartInstance(&pod, ttl); err != nil { - c.logger.Warningf("could not restart Postgres server within pod %s: %v", podName, err) - } - c.logger.Infof("Postgres server successfuly restarted in pod %s", podName) - c.eventRecorder.Event(c.GetReference(), v1.EventTypeNormal, "Update", "Postgres server restart done for pod "+pod.Name) - } + c.restartInstance(&pod) } if masterPod != nil { - masterPodName := util.NameFromMeta(masterPod.ObjectMeta) - if err := c.restartInstance(masterPod, 0); err != nil { - c.logger.Warningf("could not restart Postgres master within pod %s: %v", masterPodName, err) - } + c.restartInstance(masterPod) } // if we get here we also need to re-create the pods (either leftovers from the old @@ -445,14 +414,36 @@ func (c *Cluster) syncStatefulSet() error { return nil } -func (c *Cluster) restartInstance(pod *v1.Pod, ttl int32) error { +func (c *Cluster) restartInstance(pod *v1.Pod) { - if err := c.patroni.Restart(pod); err != nil { - return err + podName := util.NameFromMeta(pod.ObjectMeta) + role := PostgresRole(pod.Labels[c.OpConfig.PodRoleLabel]) + config, err := c.patroni.GetConfig(pod) + if err != nil { + c.logger.Warningf("could not get config for %s pod %s: %v", role, podName, err) + return } - time.Sleep(time.Duration(ttl) * time.Second) - return nil + instanceRestartRequired, err := c.checkAndSetPostgreSQLConfiguration(pod, config) + if err != nil { + c.logger.Warningf("could not set PostgreSQL configuration options for %s pod %s: %v", role, podName, err) + return + } + + if instanceRestartRequired { + c.logger.Debugf("restarting Postgres server within %s pod %s", podName) + ttl, ok := config["ttl"].(int32) + if !ok { + ttl = 30 + } + c.eventRecorder.Event(c.GetReference(), v1.EventTypeNormal, "Update", "restarting Postgres server within pod "+pod.Name) + if err := c.patroni.Restart(pod); err != nil { + c.logger.Warningf("could not restart Postgres server within %s pod %s: %v", role, podName, err) + } + time.Sleep(time.Duration(ttl) * time.Second) + c.logger.Infof("Postgres server successfuly restarted in %s pod %s", role, podName) + c.eventRecorder.Event(c.GetReference(), v1.EventTypeNormal, "Update", "Postgres server restart done for pod "+pod.Name) + } } // AnnotationsToPropagate get the annotations to update if required @@ -487,9 +478,9 @@ func (c *Cluster) AnnotationsToPropagate(annotations map[string]string) map[stri return nil } -// checkAndSetGlobalPostgreSQLConfiguration checks whether cluster-wide API parameters +// checkAndSetPostgreSQLConfiguration checks whether cluster-wide API parameters // (like max_connections) have changed and if necessary sets it via the Patroni API -func (c *Cluster) checkAndSetGlobalPostgreSQLConfiguration(pod *v1.Pod, patroniConfig map[string]interface{}) (bool, error) { +func (c *Cluster) checkAndSetPostgreSQLConfiguration(pod *v1.Pod, patroniConfig map[string]interface{}) (bool, error) { var ( err error pods []v1.Pod From 20ca09507adc896e4d5cd01ace5be2e7d2b3ba70 Mon Sep 17 00:00:00 2001 From: Felix Kunde Date: Thu, 29 Jul 2021 17:13:34 +0200 Subject: [PATCH 11/16] update log messages on restartInstanceswq --- pkg/cluster/sync.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/cluster/sync.go b/pkg/cluster/sync.go index 50f1092b7..5e31929ae 100644 --- a/pkg/cluster/sync.go +++ b/pkg/cluster/sync.go @@ -431,18 +431,18 @@ func (c *Cluster) restartInstance(pod *v1.Pod) { } if instanceRestartRequired { - c.logger.Debugf("restarting Postgres server within %s pod %s", podName) + c.logger.Debugf("restarting Postgres server within %s pod %s", role, podName) ttl, ok := config["ttl"].(int32) if !ok { ttl = 30 } - c.eventRecorder.Event(c.GetReference(), v1.EventTypeNormal, "Update", "restarting Postgres server within pod "+pod.Name) + c.eventRecorder.Event(c.GetReference(), v1.EventTypeNormal, "Update", fmt.Sprintf("restarting Postgres server within %s pod %s", role, pod.Name)) if err := c.patroni.Restart(pod); err != nil { c.logger.Warningf("could not restart Postgres server within %s pod %s: %v", role, podName, err) } time.Sleep(time.Duration(ttl) * time.Second) c.logger.Infof("Postgres server successfuly restarted in %s pod %s", role, podName) - c.eventRecorder.Event(c.GetReference(), v1.EventTypeNormal, "Update", "Postgres server restart done for pod "+pod.Name) + c.eventRecorder.Event(c.GetReference(), v1.EventTypeNormal, "Update", fmt.Sprintf("Postgres server restart done for pod %s pod %s", role, pod.Name)) } } From 8d5df0c758b24fe81195944745ce60e95b42c863 Mon Sep 17 00:00:00 2001 From: Felix Kunde Date: Fri, 30 Jul 2021 14:37:43 +0200 Subject: [PATCH 12/16] rename restartInstances to syncPostgreSQLConfiguration --- e2e/tests/test_e2e.py | 3 +-- pkg/cluster/sync.go | 6 +++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/e2e/tests/test_e2e.py b/e2e/tests/test_e2e.py index 99006819c..6a4bf78ca 100644 --- a/e2e/tests/test_e2e.py +++ b/e2e/tests/test_e2e.py @@ -852,7 +852,7 @@ def test_multi_namespace_support(self): try: k8s.create_with_kubectl("manifests/complete-postgres-manifest.yaml") k8s.wait_for_pod_start("spilo-role=master", self.test_namespace) - k8s.wait_for_pod_start('spilo-role=replica', self.test_namespace) + k8s.wait_for_pod_start("spilo-role=replica", self.test_namespace) self.assert_master_is_unique(self.test_namespace, "acid-test-cluster") except timeout_decorator.TimeoutError: @@ -1501,7 +1501,6 @@ def test_zz_cluster_deletion(self): } k8s.update_config(patch_delete_annotations) - def get_failover_targets(self, master_node, replica_nodes): ''' If all pods live on the same node, failover will happen to other worker(s) diff --git a/pkg/cluster/sync.go b/pkg/cluster/sync.go index 5e31929ae..a464089f8 100644 --- a/pkg/cluster/sync.go +++ b/pkg/cluster/sync.go @@ -394,11 +394,11 @@ func (c *Cluster) syncStatefulSet() error { masterPod = &pods[i] continue } - c.restartInstance(&pod) + c.syncPostgreSQLConfiguration(&pod) } if masterPod != nil { - c.restartInstance(masterPod) + c.syncPostgreSQLConfiguration(masterPod) } // if we get here we also need to re-create the pods (either leftovers from the old @@ -414,7 +414,7 @@ func (c *Cluster) syncStatefulSet() error { return nil } -func (c *Cluster) restartInstance(pod *v1.Pod) { +func (c *Cluster) syncPostgreSQLConfiguration(pod *v1.Pod) { podName := util.NameFromMeta(pod.ObjectMeta) role := PostgresRole(pod.Labels[c.OpConfig.PodRoleLabel]) From f5067e302ce0070219a5824ae25292c215b940f4 Mon Sep 17 00:00:00 2001 From: Felix Kunde Date: Fri, 30 Jul 2021 15:49:54 +0200 Subject: [PATCH 13/16] patch config only once --- pkg/cluster/sync.go | 96 +++++++++++++++++++++++---------------------- 1 file changed, 50 insertions(+), 46 deletions(-) diff --git a/pkg/cluster/sync.go b/pkg/cluster/sync.go index a464089f8..04fb9d8d4 100644 --- a/pkg/cluster/sync.go +++ b/pkg/cluster/sync.go @@ -263,7 +263,11 @@ func (c *Cluster) syncPodDisruptionBudget(isUpdate bool) error { } func (c *Cluster) syncStatefulSet() error { - var masterPod *v1.Pod + var ( + masterPod *v1.Pod + postgresConfig map[string]interface{} + instanceRestartRequired bool + ) podsToRecreate := make([]v1.Pod, 0) switchoverCandidates := make([]spec.NamespacedName, 0) @@ -388,17 +392,42 @@ func (c *Cluster) syncStatefulSet() error { c.logger.Warnf("could not get list of pods to apply special PostgreSQL parameters only to be set via Patroni API: %v", err) } + // get Postgres config, compare with manifest and update via Patroni PATCH endpoint if it differs + // Patroni's config endpoint is just a "proxy" to DCS. It is enough to patch it only once and it doesn't matter which pod is used. for i, pod := range pods { - role := PostgresRole(pod.Labels[c.OpConfig.PodRoleLabel]) - if role == Master { - masterPod = &pods[i] + podName := util.NameFromMeta(pods[i].ObjectMeta) + config, err := c.patroni.GetConfig(&pod) + if err != nil { + c.logger.Warningf("could not get Postgres config from pod %s: %v", podName, err) + continue + } + instanceRestartRequired, err = c.checkAndSetGlobalPostgreSQLConfiguration(&pod, config) + if err != nil { + c.logger.Warningf("could not set PostgreSQL configuration options for pod %s: %v", podName, err) continue } - c.syncPostgreSQLConfiguration(&pod) + break } - if masterPod != nil { - c.syncPostgreSQLConfiguration(masterPod) + // if the config update requires a restart, call Patroni restart for replicas first, then master + if instanceRestartRequired { + c.logger.Info("restarting Postgres server within pods") + ttl, ok := postgresConfig["ttl"].(int32) + if !ok { + ttl = 30 + } + for i, pod := range pods { + role := PostgresRole(pod.Labels[c.OpConfig.PodRoleLabel]) + if role == Master { + masterPod = &pods[i] + continue + } + c.restartInstance(&pod, ttl) + } + + if masterPod != nil { + c.restartInstance(masterPod, ttl) + } } // if we get here we also need to re-create the pods (either leftovers from the old @@ -414,36 +443,20 @@ func (c *Cluster) syncStatefulSet() error { return nil } -func (c *Cluster) syncPostgreSQLConfiguration(pod *v1.Pod) { - +func (c *Cluster) restartInstance(pod *v1.Pod, ttl int32) { podName := util.NameFromMeta(pod.ObjectMeta) role := PostgresRole(pod.Labels[c.OpConfig.PodRoleLabel]) - config, err := c.patroni.GetConfig(pod) - if err != nil { - c.logger.Warningf("could not get config for %s pod %s: %v", role, podName, err) - return - } - instanceRestartRequired, err := c.checkAndSetPostgreSQLConfiguration(pod, config) - if err != nil { - c.logger.Warningf("could not set PostgreSQL configuration options for %s pod %s: %v", role, podName, err) + c.eventRecorder.Event(c.GetReference(), v1.EventTypeNormal, "Update", fmt.Sprintf("restarting Postgres server within %s pod %s", role, pod.Name)) + + if err := c.patroni.Restart(pod); err != nil { + c.logger.Warningf("could not restart Postgres server within %s pod %s: %v", role, podName, err) return } - if instanceRestartRequired { - c.logger.Debugf("restarting Postgres server within %s pod %s", role, podName) - ttl, ok := config["ttl"].(int32) - if !ok { - ttl = 30 - } - c.eventRecorder.Event(c.GetReference(), v1.EventTypeNormal, "Update", fmt.Sprintf("restarting Postgres server within %s pod %s", role, pod.Name)) - if err := c.patroni.Restart(pod); err != nil { - c.logger.Warningf("could not restart Postgres server within %s pod %s: %v", role, podName, err) - } - time.Sleep(time.Duration(ttl) * time.Second) - c.logger.Infof("Postgres server successfuly restarted in %s pod %s", role, podName) - c.eventRecorder.Event(c.GetReference(), v1.EventTypeNormal, "Update", fmt.Sprintf("Postgres server restart done for pod %s pod %s", role, pod.Name)) - } + time.Sleep(time.Duration(ttl) * time.Second) + c.logger.Debugf("Postgres server successfuly restarted in %s pod %s", role, podName) + c.eventRecorder.Event(c.GetReference(), v1.EventTypeNormal, "Update", fmt.Sprintf("Postgres server restart done for pod %s pod %s", role, pod.Name)) } // AnnotationsToPropagate get the annotations to update if required @@ -478,15 +491,9 @@ func (c *Cluster) AnnotationsToPropagate(annotations map[string]string) map[stri return nil } -// checkAndSetPostgreSQLConfiguration checks whether cluster-wide API parameters +// checkAndSetGlobalPostgreSQLConfiguration checks whether cluster-wide API parameters // (like max_connections) have changed and if necessary sets it via the Patroni API -func (c *Cluster) checkAndSetPostgreSQLConfiguration(pod *v1.Pod, patroniConfig map[string]interface{}) (bool, error) { - var ( - err error - pods []v1.Pod - restartRequired bool - ) - +func (c *Cluster) checkAndSetGlobalPostgreSQLConfiguration(pod *v1.Pod, patroniConfig map[string]interface{}) (bool, error) { configToSet := make(map[string]interface{}) parametersToSet := make(map[string]string) effectivePgParameters := make(map[string]interface{}) @@ -538,7 +545,7 @@ func (c *Cluster) checkAndSetPostgreSQLConfiguration(pod *v1.Pod, patroniConfig } if len(configToSet) == 0 { - return restartRequired, nil + return false, nil } configToSetJson, err := json.Marshal(configToSet) @@ -551,14 +558,11 @@ func (c *Cluster) checkAndSetPostgreSQLConfiguration(pod *v1.Pod, patroniConfig podName := util.NameFromMeta(pod.ObjectMeta) c.logger.Debugf("calling Patroni API on a pod %s to set the following Postgres options: %s", podName, configToSetJson) - if err = c.patroni.SetConfig(pod, configToSet); err == nil { - restartRequired = true - return restartRequired, nil + if err = c.patroni.SetConfig(pod, configToSet); err != nil { + return true, fmt.Errorf("could not patch postgres parameters with a pod %s: %v", podName, err) } - c.logger.Warningf("could not patch postgres parameters with a pod %s: %v", podName, err) - return restartRequired, fmt.Errorf("could not reach Patroni API to set Postgres options: failed on every pod (%d total)", - len(pods)) + return true, nil } func (c *Cluster) syncSecrets() error { From d1edb11041fe0df254fd4ccdcf28ff14a6273f7a Mon Sep 17 00:00:00 2001 From: Felix Kunde Date: Fri, 30 Jul 2021 15:58:02 +0200 Subject: [PATCH 14/16] minor updates to log messages --- pkg/cluster/sync.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/cluster/sync.go b/pkg/cluster/sync.go index 04fb9d8d4..7f8e2b859 100644 --- a/pkg/cluster/sync.go +++ b/pkg/cluster/sync.go @@ -411,7 +411,7 @@ func (c *Cluster) syncStatefulSet() error { // if the config update requires a restart, call Patroni restart for replicas first, then master if instanceRestartRequired { - c.logger.Info("restarting Postgres server within pods") + c.logger.Debug("restarting Postgres server within pods") ttl, ok := postgresConfig["ttl"].(int32) if !ok { ttl = 30 @@ -456,7 +456,7 @@ func (c *Cluster) restartInstance(pod *v1.Pod, ttl int32) { time.Sleep(time.Duration(ttl) * time.Second) c.logger.Debugf("Postgres server successfuly restarted in %s pod %s", role, podName) - c.eventRecorder.Event(c.GetReference(), v1.EventTypeNormal, "Update", fmt.Sprintf("Postgres server restart done for pod %s pod %s", role, pod.Name)) + c.eventRecorder.Event(c.GetReference(), v1.EventTypeNormal, "Update", fmt.Sprintf("Postgres server restart done for %s pod %s", role, pod.Name)) } // AnnotationsToPropagate get the annotations to update if required @@ -556,7 +556,7 @@ func (c *Cluster) checkAndSetGlobalPostgreSQLConfiguration(pod *v1.Pod, patroniC // try all pods until the first one that is successful, as it doesn't matter which pod // carries the request to change configuration through podName := util.NameFromMeta(pod.ObjectMeta) - c.logger.Debugf("calling Patroni API on a pod %s to set the following Postgres options: %s", + c.logger.Debugf("patching Postgres config via Patroni API on pod %s with following options: %s", podName, configToSetJson) if err = c.patroni.SetConfig(pod, configToSet); err != nil { return true, fmt.Errorf("could not patch postgres parameters with a pod %s: %v", podName, err) From d13b02d6ed14f2096c283170083881aff9008e78 Mon Sep 17 00:00:00 2001 From: Felix Kunde Date: Mon, 9 Aug 2021 15:21:14 +0200 Subject: [PATCH 15/16] wait after restart outside of function --- pkg/cluster/k8sres_test.go | 1 + pkg/cluster/sync.go | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/pkg/cluster/k8sres_test.go b/pkg/cluster/k8sres_test.go index d411dd004..f3277cf79 100644 --- a/pkg/cluster/k8sres_test.go +++ b/pkg/cluster/k8sres_test.go @@ -505,6 +505,7 @@ func TestCloneEnv(t *testing.T) { var cluster = New( Config{ OpConfig: config.Config{ + AWSRegion: "test-region", WALES3Bucket: "wale-bucket", ProtectedRoles: []string{"admin"}, Auth: config.Auth{ diff --git a/pkg/cluster/sync.go b/pkg/cluster/sync.go index 7f8e2b859..4937a2034 100644 --- a/pkg/cluster/sync.go +++ b/pkg/cluster/sync.go @@ -422,11 +422,12 @@ func (c *Cluster) syncStatefulSet() error { masterPod = &pods[i] continue } - c.restartInstance(&pod, ttl) + c.restartInstance(&pod) + time.Sleep(time.Duration(ttl) * time.Second) } if masterPod != nil { - c.restartInstance(masterPod, ttl) + c.restartInstance(masterPod) } } @@ -443,7 +444,7 @@ func (c *Cluster) syncStatefulSet() error { return nil } -func (c *Cluster) restartInstance(pod *v1.Pod, ttl int32) { +func (c *Cluster) restartInstance(pod *v1.Pod) { podName := util.NameFromMeta(pod.ObjectMeta) role := PostgresRole(pod.Labels[c.OpConfig.PodRoleLabel]) @@ -454,7 +455,6 @@ func (c *Cluster) restartInstance(pod *v1.Pod, ttl int32) { return } - time.Sleep(time.Duration(ttl) * time.Second) c.logger.Debugf("Postgres server successfuly restarted in %s pod %s", role, podName) c.eventRecorder.Event(c.GetReference(), v1.EventTypeNormal, "Update", fmt.Sprintf("Postgres server restart done for %s pod %s", role, pod.Name)) } From ae6fe808a19294a11fbb45c62932ab2882056580 Mon Sep 17 00:00:00 2001 From: Felix Kunde Date: Mon, 9 Aug 2021 15:22:39 +0200 Subject: [PATCH 16/16] revert change in k8sres_test --- pkg/cluster/k8sres_test.go | 1 - 1 file changed, 1 deletion(-) diff --git a/pkg/cluster/k8sres_test.go b/pkg/cluster/k8sres_test.go index f3277cf79..d411dd004 100644 --- a/pkg/cluster/k8sres_test.go +++ b/pkg/cluster/k8sres_test.go @@ -505,7 +505,6 @@ func TestCloneEnv(t *testing.T) { var cluster = New( Config{ OpConfig: config.Config{ - AWSRegion: "test-region", WALES3Bucket: "wale-bucket", ProtectedRoles: []string{"admin"}, Auth: config.Auth{