From b3c5b4066ced69bb64d6614106e0ddb22fba9088 Mon Sep 17 00:00:00 2001 From: Felix Kunde Date: Fri, 18 Mar 2022 15:35:10 +0100 Subject: [PATCH 01/11] fix unit test and improve stability in e2e test --- e2e/tests/test_e2e.py | 4 ---- pkg/cluster/cluster_test.go | 14 ++++++++++++-- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/e2e/tests/test_e2e.py b/e2e/tests/test_e2e.py index e47b4bf4c..94644ece8 100644 --- a/e2e/tests/test_e2e.py +++ b/e2e/tests/test_e2e.py @@ -208,8 +208,6 @@ def test_additional_pod_capabilities(self): try: k8s.update_config(patch_capabilities) - self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, - "Operator does not get in sync") # changed security context of postgres container should trigger a rolling update k8s.wait_for_pod_failover(replica_nodes, 'spilo-role=master,' + cluster_label) @@ -1002,7 +1000,6 @@ def test_min_resource_limits(self): } k8s.api.custom_objects_api.patch_namespaced_custom_object( "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_resources) - self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") # wait for switched over k8s.wait_for_pod_failover(replica_nodes, 'spilo-role=master,' + cluster_label) @@ -1109,7 +1106,6 @@ def test_node_affinity(self): plural="postgresqls", name="acid-minimal-cluster", body=patch_node_affinity_config) - self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") # node affinity change should cause replica to relocate from replica node to master node due to node affinity requirement k8s.wait_for_pod_failover(master_nodes, 'spilo-role=replica,' + cluster_label) diff --git a/pkg/cluster/cluster_test.go b/pkg/cluster/cluster_test.go index 98cc40f05..50b17c3e1 100644 --- a/pkg/cluster/cluster_test.go +++ b/pkg/cluster/cluster_test.go @@ -11,6 +11,7 @@ import ( acidv1 "github.com/zalando/postgres-operator/pkg/apis/acid.zalan.do/v1" fakeacidv1 "github.com/zalando/postgres-operator/pkg/generated/clientset/versioned/fake" "github.com/zalando/postgres-operator/pkg/spec" + "github.com/zalando/postgres-operator/pkg/util" "github.com/zalando/postgres-operator/pkg/util/config" "github.com/zalando/postgres-operator/pkg/util/constants" "github.com/zalando/postgres-operator/pkg/util/k8sutil" @@ -167,8 +168,17 @@ func TestInitAdditionalOwnerRoles(t *testing.T) { } cl.initAdditionalOwnerRoles() - if !reflect.DeepEqual(cl.pgUsers, expectedUsers) { - t.Errorf("%s expected: %#v, got %#v", testName, expectedUsers, cl.pgUsers) + + for _, additionalOwnerRole := range cl.Config.OpConfig.AdditionalOwnerRoles { + expectedPgUser := expectedUsers[additionalOwnerRole] + existingPgUser, exists := cl.pgUsers[additionalOwnerRole] + if !exists { + t.Errorf("%s additional owner role %q not initilaized", testName, additionalOwnerRole) + } + if !util.IsEqualIgnoreOrder(expectedPgUser.MemberOf, existingPgUser.MemberOf) { + t.Errorf("%s unexpected membership of additional owner role %q: expected member of %#v, got member of %#v", + testName, additionalOwnerRole, expectedPgUser.MemberOf, existingPgUser.MemberOf) + } } } From c84600699b783d1eeb2c3c11964de7cc7b647d47 Mon Sep 17 00:00:00 2001 From: Felix Kunde Date: Fri, 18 Mar 2022 16:15:38 +0100 Subject: [PATCH 02/11] remove waiting from member test too --- e2e/tests/test_e2e.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/e2e/tests/test_e2e.py b/e2e/tests/test_e2e.py index 94644ece8..8a9f5a701 100644 --- a/e2e/tests/test_e2e.py +++ b/e2e/tests/test_e2e.py @@ -238,8 +238,6 @@ def test_additional_teams_and_members(self): }, } k8s.update_config(enable_postgres_team_crd) - self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, - "Operator does not get in sync") k8s.api.custom_objects_api.patch_namespaced_custom_object( 'acid.zalan.do', 'v1', 'default', From 2b0f90aa7af1624eecc11edb744c7a97f72f6ecb Mon Sep 17 00:00:00 2001 From: Felix Kunde Date: Fri, 18 Mar 2022 16:41:34 +0100 Subject: [PATCH 03/11] wait after failover --- e2e/tests/test_e2e.py | 1 + 1 file changed, 1 insertion(+) diff --git a/e2e/tests/test_e2e.py b/e2e/tests/test_e2e.py index 8a9f5a701..0a90d8552 100644 --- a/e2e/tests/test_e2e.py +++ b/e2e/tests/test_e2e.py @@ -213,6 +213,7 @@ def test_additional_pod_capabilities(self): k8s.wait_for_pod_failover(replica_nodes, 'spilo-role=master,' + cluster_label) k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label) + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") self.eventuallyEqual(lambda: k8s.count_pods_with_container_capabilities(capabilities, cluster_label), 2, "Container capabilities not updated") From 064289d2b04fefd92c9c00df2b47118bda95aa6e Mon Sep 17 00:00:00 2001 From: Felix Kunde Date: Fri, 18 Mar 2022 17:04:29 +0100 Subject: [PATCH 04/11] wait after failover in min resources test, too --- e2e/tests/test_e2e.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/e2e/tests/test_e2e.py b/e2e/tests/test_e2e.py index 0a90d8552..0f2c62efe 100644 --- a/e2e/tests/test_e2e.py +++ b/e2e/tests/test_e2e.py @@ -1003,7 +1003,8 @@ def test_min_resource_limits(self): # wait for switched over k8s.wait_for_pod_failover(replica_nodes, 'spilo-role=master,' + cluster_label) k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label) - self.eventuallyEqual(lambda: len(k8s.get_patroni_running_members()), 2, "Postgres status did not enter running") + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, + "Operator does not get in sync") def verify_pod_limits(): pods = k8s.api.core_v1.list_namespaced_pod('default', label_selector="cluster-name=acid-minimal-cluster,application=spilo").items From 4b859af2c32ef73d548d3061ee8418710a235c8c Mon Sep 17 00:00:00 2001 From: Felix Kunde Date: Fri, 18 Mar 2022 17:37:54 +0100 Subject: [PATCH 05/11] fix resources in unit test --- pkg/cluster/k8sres_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/cluster/k8sres_test.go b/pkg/cluster/k8sres_test.go index daa8aa4e4..85305504f 100644 --- a/pkg/cluster/k8sres_test.go +++ b/pkg/cluster/k8sres_test.go @@ -1578,7 +1578,7 @@ func TestEnableLoadBalancers(t *testing.T) { EnableReplicaLoadBalancer: util.False(), EnableReplicaPoolerLoadBalancer: util.False(), NumberOfInstances: 1, - Resources: acidv1.Resources{ + Resources: &acidv1.Resources{ ResourceRequests: acidv1.ResourceDescription{CPU: "1", Memory: "10"}, ResourceLimits: acidv1.ResourceDescription{CPU: "1", Memory: "10"}, }, @@ -1625,7 +1625,7 @@ func TestEnableLoadBalancers(t *testing.T) { EnableReplicaLoadBalancer: util.True(), EnableReplicaPoolerLoadBalancer: util.True(), NumberOfInstances: 1, - Resources: acidv1.Resources{ + Resources: &acidv1.Resources{ ResourceRequests: acidv1.ResourceDescription{CPU: "1", Memory: "10"}, ResourceLimits: acidv1.ResourceDescription{CPU: "1", Memory: "10"}, }, @@ -1720,7 +1720,7 @@ func TestVolumeSelector(t *testing.T) { return acidv1.PostgresSpec{ TeamID: "myapp", NumberOfInstances: 0, - Resources: acidv1.Resources{ + Resources: &acidv1.Resources{ ResourceRequests: acidv1.ResourceDescription{CPU: "1", Memory: "10"}, ResourceLimits: acidv1.ResourceDescription{CPU: "1", Memory: "10"}, }, From 0d55e2e60cf05f151034df0a66a1365b8959e53e Mon Sep 17 00:00:00 2001 From: Felix Kunde Date: Fri, 18 Mar 2022 18:54:51 +0100 Subject: [PATCH 06/11] fix resource handling --- pkg/cluster/cluster.go | 5 +++++ pkg/cluster/k8sres.go | 26 +++++++++++++------------- 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/pkg/cluster/cluster.go b/pkg/cluster/cluster.go index 80c59f872..220644d19 100644 --- a/pkg/cluster/cluster.go +++ b/pkg/cluster/cluster.go @@ -683,12 +683,17 @@ func (c *Cluster) enforceMinResourceLimits(spec *acidv1.PostgresSpec) error { err error ) + if spec.Resources == nil { + return nil + } + // setting limits too low can cause unnecessary evictions / OOM kills minCPULimit := c.OpConfig.MinCPULimit minMemoryLimit := c.OpConfig.MinMemoryLimit cpuLimit := spec.Resources.ResourceLimits.CPU if cpuLimit != "" { + cpuLimit := spec.Resources.ResourceLimits.CPU isSmaller, err = util.IsSmallerQuantity(cpuLimit, minCPULimit) if err != nil { return fmt.Errorf("could not compare defined CPU limit %s with configured minimum value %s: %v", cpuLimit, minCPULimit, err) diff --git a/pkg/cluster/k8sres.go b/pkg/cluster/k8sres.go index 7e96adacc..a23b9e195 100644 --- a/pkg/cluster/k8sres.go +++ b/pkg/cluster/k8sres.go @@ -1007,14 +1007,14 @@ func (c *Cluster) generateStatefulSet(spec *acidv1.PostgresSpec) (*appsv1.Statef // controller adjusts the default memory request at operator startup - request := spec.Resources.ResourceRequests.Memory - if request == "" { - request = c.OpConfig.Resources.DefaultMemoryRequest - } + var request, limit string - limit := spec.Resources.ResourceLimits.Memory - if limit == "" { + if spec.Resources == nil { + request = c.OpConfig.Resources.DefaultMemoryRequest limit = c.OpConfig.Resources.DefaultMemoryLimit + } else { + request = spec.Resources.ResourceRequests.Memory + limit = spec.Resources.ResourceRequests.Memory } isSmaller, err := util.IsSmallerQuantity(request, limit) @@ -1034,14 +1034,14 @@ func (c *Cluster) generateStatefulSet(spec *acidv1.PostgresSpec) (*appsv1.Statef for _, sidecar := range spec.Sidecars { // TODO #413 - sidecarRequest := sidecar.Resources.ResourceRequests.Memory - if request == "" { - request = c.OpConfig.Resources.DefaultMemoryRequest - } + var sidecarRequest, sidecarLimit string - sidecarLimit := sidecar.Resources.ResourceLimits.Memory - if limit == "" { - limit = c.OpConfig.Resources.DefaultMemoryLimit + if sidecar.Resources == nil { + sidecarRequest = c.OpConfig.Resources.DefaultMemoryRequest + sidecarLimit = c.OpConfig.Resources.DefaultMemoryLimit + } else { + sidecarRequest = sidecar.Resources.ResourceRequests.Memory + sidecarLimit = sidecar.Resources.ResourceRequests.Memory } isSmaller, err := util.IsSmallerQuantity(sidecarRequest, sidecarLimit) From cd5b440ec6ce6a6eda734ede14a6d3d3fa58ad1d Mon Sep 17 00:00:00 2001 From: Felix Kunde Date: Fri, 18 Mar 2022 20:44:29 +0100 Subject: [PATCH 07/11] minor change --- pkg/cluster/cluster.go | 1 - 1 file changed, 1 deletion(-) diff --git a/pkg/cluster/cluster.go b/pkg/cluster/cluster.go index 220644d19..9c1aada79 100644 --- a/pkg/cluster/cluster.go +++ b/pkg/cluster/cluster.go @@ -693,7 +693,6 @@ func (c *Cluster) enforceMinResourceLimits(spec *acidv1.PostgresSpec) error { cpuLimit := spec.Resources.ResourceLimits.CPU if cpuLimit != "" { - cpuLimit := spec.Resources.ResourceLimits.CPU isSmaller, err = util.IsSmallerQuantity(cpuLimit, minCPULimit) if err != nil { return fmt.Errorf("could not compare defined CPU limit %s with configured minimum value %s: %v", cpuLimit, minCPULimit, err) From 5b4282101bf308c87fc0d5732bf9a145e4ca5eff Mon Sep 17 00:00:00 2001 From: Felix Kunde Date: Fri, 18 Mar 2022 22:55:43 +0100 Subject: [PATCH 08/11] revert change in min resource test --- e2e/tests/test_e2e.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/e2e/tests/test_e2e.py b/e2e/tests/test_e2e.py index 0f2c62efe..88cb6792d 100644 --- a/e2e/tests/test_e2e.py +++ b/e2e/tests/test_e2e.py @@ -999,12 +999,12 @@ def test_min_resource_limits(self): } k8s.api.custom_objects_api.patch_namespaced_custom_object( "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_resources) + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, + "Operator does not get in sync") # wait for switched over k8s.wait_for_pod_failover(replica_nodes, 'spilo-role=master,' + cluster_label) k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label) - self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, - "Operator does not get in sync") def verify_pod_limits(): pods = k8s.api.core_v1.list_namespaced_pod('default', label_selector="cluster-name=acid-minimal-cluster,application=spilo").items From c9c71130d4d1b4d2d795183337b0a11bff2248af Mon Sep 17 00:00:00 2001 From: Felix Kunde Date: Sat, 19 Mar 2022 00:06:42 +0100 Subject: [PATCH 09/11] more minor changes --- pkg/cluster/k8sres.go | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pkg/cluster/k8sres.go b/pkg/cluster/k8sres.go index a23b9e195..23f3b9cd6 100644 --- a/pkg/cluster/k8sres.go +++ b/pkg/cluster/k8sres.go @@ -139,8 +139,8 @@ func (c *Cluster) makeDefaultResources() acidv1.Resources { func generateResourceRequirements(resources *acidv1.Resources, defaultResources acidv1.Resources) (*v1.ResourceRequirements, error) { var err error - var specRequests acidv1.ResourceDescription - var specLimits acidv1.ResourceDescription + var specRequests, specLimits acidv1.ResourceDescription + if resources == nil { specRequests = acidv1.ResourceDescription{} specLimits = acidv1.ResourceDescription{} @@ -1024,7 +1024,6 @@ func (c *Cluster) generateStatefulSet(spec *acidv1.PostgresSpec) (*appsv1.Statef if isSmaller { c.logger.Warningf("The memory request of %v for the Postgres container is increased to match the memory limit of %v.", request, limit) spec.Resources.ResourceRequests.Memory = limit - } // controller adjusts the Scalyr sidecar request at operator startup @@ -1057,7 +1056,6 @@ func (c *Cluster) generateStatefulSet(spec *acidv1.PostgresSpec) (*appsv1.Statef } defaultResources := c.makeDefaultResources() - resourceRequirements, err := generateResourceRequirements(spec.Resources, defaultResources) if err != nil { return nil, fmt.Errorf("could not generate resource requirements: %v", err) From 1d2b3f52917f450d614fa5af1052c71a7a6e539f Mon Sep 17 00:00:00 2001 From: Felix Kunde Date: Sat, 19 Mar 2022 17:30:58 +0100 Subject: [PATCH 10/11] revert change for node affinity test --- e2e/tests/test_e2e.py | 802 +----------------------------------------- 1 file changed, 2 insertions(+), 800 deletions(-) diff --git a/e2e/tests/test_e2e.py b/e2e/tests/test_e2e.py index 88cb6792d..a346e09ad 100644 --- a/e2e/tests/test_e2e.py +++ b/e2e/tests/test_e2e.py @@ -158,806 +158,6 @@ def setUpClass(cls): print('Operator log: {}'.format(k8s.get_operator_log())) raise - @timeout_decorator.timeout(TEST_TIMEOUT_SEC) - def test_additional_owner_roles(self): - ''' - Test adding additional member roles to existing database owner roles - ''' - k8s = self.k8s - - # enable PostgresTeam CRD and lower resync - owner_roles = { - "data": { - "additional_owner_roles": "cron_admin", - }, - } - k8s.update_config(owner_roles) - self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, - "Operator does not get in sync") - - leader = k8s.get_cluster_leader_pod() - owner_query = """ - SELECT a2.rolname - FROM pg_catalog.pg_authid a - JOIN pg_catalog.pg_auth_members am - ON a.oid = am.member - AND a.rolname = 'cron_admin' - JOIN pg_catalog.pg_authid a2 - ON a2.oid = am.roleid - WHERE a2.rolname IN ('zalando', 'bar_owner', 'bar_data_owner'); - """ - self.eventuallyEqual(lambda: len(self.query_database(leader.metadata.name, "postgres", owner_query)), 3, - "Not all additional users found in database", 10, 5) - - @timeout_decorator.timeout(TEST_TIMEOUT_SEC) - def test_additional_pod_capabilities(self): - ''' - Extend postgres container capabilities - ''' - k8s = self.k8s - cluster_label = 'application=spilo,cluster-name=acid-minimal-cluster' - capabilities = ["SYS_NICE","CHOWN"] - patch_capabilities = { - "data": { - "additional_pod_capabilities": ','.join(capabilities), - }, - } - - # get node and replica (expected target of new master) - _, replica_nodes = k8s.get_pg_nodes(cluster_label) - - try: - k8s.update_config(patch_capabilities) - - # changed security context of postgres container should trigger a rolling update - k8s.wait_for_pod_failover(replica_nodes, 'spilo-role=master,' + cluster_label) - k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label) - - self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") - self.eventuallyEqual(lambda: k8s.count_pods_with_container_capabilities(capabilities, cluster_label), - 2, "Container capabilities not updated") - - except timeout_decorator.TimeoutError: - print('Operator log: {}'.format(k8s.get_operator_log())) - raise - - @timeout_decorator.timeout(TEST_TIMEOUT_SEC) - def test_additional_teams_and_members(self): - ''' - Test PostgresTeam CRD with extra teams and members - ''' - k8s = self.k8s - - # enable PostgresTeam CRD and lower resync - enable_postgres_team_crd = { - "data": { - "enable_postgres_team_crd": "true", - "enable_team_member_deprecation": "true", - "role_deletion_suffix": "_delete_me", - "resync_period": "15s", - "repair_period": "15s", - }, - } - k8s.update_config(enable_postgres_team_crd) - - k8s.api.custom_objects_api.patch_namespaced_custom_object( - 'acid.zalan.do', 'v1', 'default', - 'postgresteams', 'custom-team-membership', - { - 'spec': { - 'additionalTeams': { - 'acid': [ - 'e2e' - ] - }, - 'additionalMembers': { - 'e2e': [ - 'kind' - ] - } - } - }) - - leader = k8s.get_cluster_leader_pod() - user_query = """ - SELECT rolname - FROM pg_catalog.pg_roles - WHERE rolname IN ('elephant', 'kind'); - """ - self.eventuallyEqual(lambda: len(self.query_database(leader.metadata.name, "postgres", user_query)), 2, - "Not all additional users found in database", 10, 5) - - # replace additional member and check if the removed member's role is renamed - k8s.api.custom_objects_api.patch_namespaced_custom_object( - 'acid.zalan.do', 'v1', 'default', - 'postgresteams', 'custom-team-membership', - { - 'spec': { - 'additionalMembers': { - 'e2e': [ - 'tester' - ] - }, - } - }) - - user_query = """ - SELECT rolname - FROM pg_catalog.pg_roles - WHERE (rolname = 'tester' AND rolcanlogin) - OR (rolname = 'kind_delete_me' AND NOT rolcanlogin); - """ - self.eventuallyEqual(lambda: len(self.query_database(leader.metadata.name, "postgres", user_query)), 2, - "Database role of replaced member in PostgresTeam not renamed", 10, 5) - - # re-add additional member and check if the role is renamed back - k8s.api.custom_objects_api.patch_namespaced_custom_object( - 'acid.zalan.do', 'v1', 'default', - 'postgresteams', 'custom-team-membership', - { - 'spec': { - 'additionalMembers': { - 'e2e': [ - 'kind' - ] - }, - } - }) - - user_query = """ - SELECT rolname - FROM pg_catalog.pg_roles - WHERE (rolname = 'kind' AND rolcanlogin) - OR (rolname = 'tester_delete_me' AND NOT rolcanlogin); - """ - self.eventuallyEqual(lambda: len(self.query_database(leader.metadata.name, "postgres", user_query)), 2, - "Database role of recreated member in PostgresTeam not renamed back to original name", 10, 5) - - # revert config change - revert_resync = { - "data": { - "resync_period": "4m", - "repair_period": "1m", - }, - } - k8s.update_config(revert_resync) - self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, - "Operator does not get in sync") - - @timeout_decorator.timeout(TEST_TIMEOUT_SEC) - def test_config_update(self): - ''' - Change Postgres config under Spec.Postgresql.Parameters and Spec.Patroni - and query Patroni config endpoint to check if manifest changes got applied - via restarting cluster through Patroni's rest api - ''' - k8s = self.k8s - leader = k8s.get_cluster_leader_pod() - replica = k8s.get_cluster_replica_pod() - masterCreationTimestamp = leader.metadata.creation_timestamp - replicaCreationTimestamp = replica.metadata.creation_timestamp - new_max_connections_value = "50" - - # adjust Postgres config - pg_patch_config = { - "spec": { - "postgresql": { - "parameters": { - "max_connections": new_max_connections_value - } - }, - "patroni": { - "slots": { - "test_slot": { - "type": "physical" - } - }, - "ttl": 29, - "loop_wait": 9, - "retry_timeout": 9, - "synchronous_mode": True - } - } - } - - try: - k8s.api.custom_objects_api.patch_namespaced_custom_object( - "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_config) - - self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") - - def compare_config(): - effective_config = k8s.patroni_rest(leader.metadata.name, "config") - desired_config = pg_patch_config["spec"]["patroni"] - desired_parameters = pg_patch_config["spec"]["postgresql"]["parameters"] - effective_parameters = effective_config["postgresql"]["parameters"] - self.assertEqual(desired_parameters["max_connections"], effective_parameters["max_connections"], - "max_connections not updated") - self.assertTrue(effective_config["slots"] is not None, "physical replication slot not added") - self.assertEqual(desired_config["ttl"], effective_config["ttl"], - "ttl not updated") - self.assertEqual(desired_config["loop_wait"], effective_config["loop_wait"], - "loop_wait not updated") - self.assertEqual(desired_config["retry_timeout"], effective_config["retry_timeout"], - "retry_timeout not updated") - self.assertEqual(desired_config["synchronous_mode"], effective_config["synchronous_mode"], - "synchronous_mode not updated") - return True - - # check if Patroni config has been updated - self.eventuallyTrue(compare_config, "Postgres config not applied") - - # make sure that pods were not recreated - leader = k8s.get_cluster_leader_pod() - replica = k8s.get_cluster_replica_pod() - self.assertEqual(masterCreationTimestamp, leader.metadata.creation_timestamp, - "Master pod creation timestamp is updated") - self.assertEqual(replicaCreationTimestamp, replica.metadata.creation_timestamp, - "Master pod creation timestamp is updated") - - # query max_connections setting - setting_query = """ - SELECT setting - FROM pg_settings - WHERE name = 'max_connections'; - """ - self.eventuallyEqual(lambda: self.query_database(leader.metadata.name, "postgres", setting_query)[0], new_max_connections_value, - "New max_connections setting not applied on master", 10, 5) - self.eventuallyNotEqual(lambda: self.query_database(replica.metadata.name, "postgres", setting_query)[0], new_max_connections_value, - "Expected max_connections not to be updated on replica since Postgres was restarted there first", 10, 5) - - # the next sync should restart the replica because it has pending_restart flag set - # force next sync by deleting the operator pod - k8s.delete_operator_pod() - self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") - - self.eventuallyEqual(lambda: self.query_database(replica.metadata.name, "postgres", setting_query)[0], new_max_connections_value, - "New max_connections setting not applied on replica", 10, 5) - - # decrease max_connections again - # this time restart will be correct and new value should appear on both instances - lower_max_connections_value = "30" - pg_patch_max_connections = { - "spec": { - "postgresql": { - "parameters": { - "max_connections": lower_max_connections_value - } - } - } - } - - k8s.api.custom_objects_api.patch_namespaced_custom_object( - "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_max_connections) - - self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") - - # check Patroni config again - pg_patch_config["spec"]["postgresql"]["parameters"]["max_connections"] = lower_max_connections_value - self.eventuallyTrue(compare_config, "Postgres config not applied") - - # and query max_connections setting again - self.eventuallyEqual(lambda: self.query_database(leader.metadata.name, "postgres", setting_query)[0], lower_max_connections_value, - "Previous max_connections setting not applied on master", 10, 5) - self.eventuallyEqual(lambda: self.query_database(replica.metadata.name, "postgres", setting_query)[0], lower_max_connections_value, - "Previous max_connections setting not applied on replica", 10, 5) - - except timeout_decorator.TimeoutError: - print('Operator log: {}'.format(k8s.get_operator_log())) - raise - - # make sure cluster is in a good state for further tests - self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") - self.eventuallyEqual(lambda: k8s.count_running_pods(), 2, - "No 2 pods running") - - @timeout_decorator.timeout(TEST_TIMEOUT_SEC) - def test_cross_namespace_secrets(self): - ''' - Test secrets in different namespace - ''' - k8s = self.k8s - - # enable secret creation in separate namespace - patch_cross_namespace_secret = { - "data": { - "enable_cross_namespace_secret": "true" - } - } - k8s.update_config(patch_cross_namespace_secret, - step="cross namespace secrets enabled") - self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, - "Operator does not get in sync") - - # create secret in test namespace - k8s.api.custom_objects_api.patch_namespaced_custom_object( - 'acid.zalan.do', 'v1', 'default', - 'postgresqls', 'acid-minimal-cluster', - { - 'spec': { - 'users':{ - 'test.db_user': [], - } - } - }) - - self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, - "Operator does not get in sync") - self.eventuallyEqual(lambda: k8s.count_secrets_with_label("cluster-name=acid-minimal-cluster,application=spilo", self.test_namespace), - 1, "Secret not created for user in namespace") - - @timeout_decorator.timeout(TEST_TIMEOUT_SEC) - def test_enable_disable_connection_pooler(self): - ''' - For a database without connection pooler, then turns it on, scale up, - turn off and on again. Test with different ways of doing this (via - enableConnectionPooler or connectionPooler configuration section). At - the end turn connection pooler off to not interfere with other tests. - ''' - k8s = self.k8s - pooler_label = 'application=db-connection-pooler,cluster-name=acid-minimal-cluster' - master_pooler_label = 'connection-pooler=acid-minimal-cluster-pooler' - replica_pooler_label = master_pooler_label + '-repl' - self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") - - k8s.api.custom_objects_api.patch_namespaced_custom_object( - 'acid.zalan.do', 'v1', 'default', - 'postgresqls', 'acid-minimal-cluster', - { - 'spec': { - 'enableConnectionPooler': True, - 'enableReplicaConnectionPooler': True, - } - }) - self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") - - self.eventuallyEqual(lambda: k8s.get_deployment_replica_count(), 2, "Deployment replicas is 2 default") - self.eventuallyEqual(lambda: k8s.count_running_pods(master_pooler_label), 2, "No pooler pods found") - self.eventuallyEqual(lambda: k8s.count_running_pods(replica_pooler_label), 2, "No pooler replica pods found") - self.eventuallyEqual(lambda: k8s.count_services_with_label(pooler_label), 2, "No pooler service found") - self.eventuallyEqual(lambda: k8s.count_secrets_with_label(pooler_label), 1, "Pooler secret not created") - - k8s.api.custom_objects_api.patch_namespaced_custom_object( - 'acid.zalan.do', 'v1', 'default', - 'postgresqls', 'acid-minimal-cluster', - { - 'spec': { - 'enableMasterPoolerLoadBalancer': True, - 'enableReplicaPoolerLoadBalancer': True, - } - }) - self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") - self.eventuallyEqual(lambda: k8s.get_service_type(master_pooler_label+","+pooler_label), - 'LoadBalancer', - "Expected LoadBalancer service type for master pooler pod, found {}") - self.eventuallyEqual(lambda: k8s.get_service_type(replica_pooler_label+","+pooler_label), - 'LoadBalancer', - "Expected LoadBalancer service type for replica pooler pod, found {}") - - # Turn off only master connection pooler - k8s.api.custom_objects_api.patch_namespaced_custom_object( - 'acid.zalan.do', 'v1', 'default', - 'postgresqls', 'acid-minimal-cluster', - { - 'spec': { - 'enableConnectionPooler': False, - 'enableReplicaConnectionPooler': True, - } - }) - - self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") - - self.eventuallyEqual(lambda: k8s.get_deployment_replica_count(name="acid-minimal-cluster-pooler-repl"), 2, - "Deployment replicas is 2 default") - self.eventuallyEqual(lambda: k8s.count_running_pods(master_pooler_label), - 0, "Master pooler pods not deleted") - self.eventuallyEqual(lambda: k8s.count_running_pods(replica_pooler_label), - 2, "Pooler replica pods not found") - self.eventuallyEqual(lambda: k8s.count_services_with_label(pooler_label), - 1, "No pooler service found") - self.eventuallyEqual(lambda: k8s.count_secrets_with_label(pooler_label), - 1, "Secret not created") - - # Turn off only replica connection pooler - k8s.api.custom_objects_api.patch_namespaced_custom_object( - 'acid.zalan.do', 'v1', 'default', - 'postgresqls', 'acid-minimal-cluster', - { - 'spec': { - 'enableConnectionPooler': True, - 'enableReplicaConnectionPooler': False, - 'enableMasterPoolerLoadBalancer': False, - } - }) - - self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") - - self.eventuallyEqual(lambda: k8s.get_deployment_replica_count(), 2, - "Deployment replicas is 2 default") - self.eventuallyEqual(lambda: k8s.count_running_pods(master_pooler_label), - 2, "Master pooler pods not found") - self.eventuallyEqual(lambda: k8s.count_running_pods(replica_pooler_label), - 0, "Pooler replica pods not deleted") - self.eventuallyEqual(lambda: k8s.count_services_with_label(pooler_label), - 1, "No pooler service found") - self.eventuallyEqual(lambda: k8s.get_service_type(master_pooler_label+","+pooler_label), - 'ClusterIP', - "Expected LoadBalancer service type for master, found {}") - self.eventuallyEqual(lambda: k8s.count_secrets_with_label(pooler_label), - 1, "Secret not created") - - # scale up connection pooler deployment - k8s.api.custom_objects_api.patch_namespaced_custom_object( - 'acid.zalan.do', 'v1', 'default', - 'postgresqls', 'acid-minimal-cluster', - { - 'spec': { - 'connectionPooler': { - 'numberOfInstances': 3, - }, - } - }) - - self.eventuallyEqual(lambda: k8s.get_deployment_replica_count(), 3, - "Deployment replicas is scaled to 3") - self.eventuallyEqual(lambda: k8s.count_running_pods(master_pooler_label), - 3, "Scale up of pooler pods does not work") - - # turn it off, keeping config should be overwritten by false - k8s.api.custom_objects_api.patch_namespaced_custom_object( - 'acid.zalan.do', 'v1', 'default', - 'postgresqls', 'acid-minimal-cluster', - { - 'spec': { - 'enableConnectionPooler': False, - 'enableReplicaConnectionPooler': False, - 'enableReplicaPoolerLoadBalancer': False, - } - }) - - self.eventuallyEqual(lambda: k8s.count_running_pods(master_pooler_label), - 0, "Pooler pods not scaled down") - self.eventuallyEqual(lambda: k8s.count_services_with_label(pooler_label), - 0, "Pooler service not removed") - self.eventuallyEqual(lambda: k8s.count_secrets_with_label('application=spilo,cluster-name=acid-minimal-cluster'), - 4, "Secrets not deleted") - - # Verify that all the databases have pooler schema installed. - # Do this via psql, since otherwise we need to deal with - # credentials. - db_list = [] - - leader = k8s.get_cluster_leader_pod() - schemas_query = """ - SELECT schema_name - FROM information_schema.schemata - WHERE schema_name = 'pooler' - """ - - db_list = self.list_databases(leader.metadata.name) - for db in db_list: - self.eventuallyNotEqual(lambda: len(self.query_database(leader.metadata.name, db, schemas_query)), 0, - "Pooler schema not found in database {}".format(db)) - - # remove config section to make test work next time - k8s.api.custom_objects_api.patch_namespaced_custom_object( - 'acid.zalan.do', 'v1', 'default', - 'postgresqls', 'acid-minimal-cluster', - { - 'spec': { - 'connectionPooler': None, - 'EnableReplicaConnectionPooler': False, - } - }) - - @timeout_decorator.timeout(TEST_TIMEOUT_SEC) - def test_enable_load_balancer(self): - ''' - Test if services are updated when enabling/disabling load balancers in Postgres manifest - ''' - - k8s = self.k8s - cluster_label = 'application=spilo,cluster-name=acid-minimal-cluster,spilo-role={}' - - self.eventuallyEqual(lambda: k8s.get_service_type(cluster_label.format("master")), - 'ClusterIP', - "Expected ClusterIP type initially, found {}") - - try: - # enable load balancer services - pg_patch_enable_lbs = { - "spec": { - "enableMasterLoadBalancer": True, - "enableReplicaLoadBalancer": True - } - } - k8s.api.custom_objects_api.patch_namespaced_custom_object( - "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_enable_lbs) - - self.eventuallyEqual(lambda: k8s.get_service_type(cluster_label.format("master")), - 'LoadBalancer', - "Expected LoadBalancer service type for master, found {}") - - self.eventuallyEqual(lambda: k8s.get_service_type(cluster_label.format("replica")), - 'LoadBalancer', - "Expected LoadBalancer service type for master, found {}") - - # disable load balancer services again - pg_patch_disable_lbs = { - "spec": { - "enableMasterLoadBalancer": False, - "enableReplicaLoadBalancer": False - } - } - k8s.api.custom_objects_api.patch_namespaced_custom_object( - "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_disable_lbs) - - self.eventuallyEqual(lambda: k8s.get_service_type(cluster_label.format("master")), - 'ClusterIP', - "Expected LoadBalancer service type for master, found {}") - - self.eventuallyEqual(lambda: k8s.get_service_type(cluster_label.format("replica")), - 'ClusterIP', - "Expected LoadBalancer service type for master, found {}") - - except timeout_decorator.TimeoutError: - print('Operator log: {}'.format(k8s.get_operator_log())) - raise - - @timeout_decorator.timeout(TEST_TIMEOUT_SEC) - def test_infrastructure_roles(self): - ''' - Test using external secrets for infrastructure roles - ''' - k8s = self.k8s - # update infrastructure roles description - secret_name = "postgresql-infrastructure-roles" - roles = "secretname: postgresql-infrastructure-roles-new, userkey: user,"\ - "rolekey: memberof, passwordkey: password, defaultrolevalue: robot_zmon" - patch_infrastructure_roles = { - "data": { - "infrastructure_roles_secret_name": secret_name, - "infrastructure_roles_secrets": roles, - }, - } - k8s.update_config(patch_infrastructure_roles) - self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, - "Operator does not get in sync") - - try: - # check that new roles are represented in the config by requesting the - # operator configuration via API - - def verify_role(): - try: - operator_pod = k8s.get_operator_pod() - get_config_cmd = "wget --quiet -O - localhost:8080/config" - result = k8s.exec_with_kubectl(operator_pod.metadata.name, - get_config_cmd) - try: - roles_dict = (json.loads(result.stdout) - .get("controller", {}) - .get("InfrastructureRoles")) - except: - return False - - if "robot_zmon_acid_monitoring_new" in roles_dict: - role = roles_dict["robot_zmon_acid_monitoring_new"] - role.pop("Password", None) - self.assertDictEqual(role, { - "Name": "robot_zmon_acid_monitoring_new", - "Namespace":"", - "Flags": None, - "MemberOf": ["robot_zmon"], - "Parameters": None, - "AdminRole": "", - "Origin": 2, - "IsDbOwner": False, - "Deleted": False - }) - return True - except: - pass - - return False - - self.eventuallyTrue(verify_role, "infrastructure role setup is not loaded") - - except timeout_decorator.TimeoutError: - print('Operator log: {}'.format(k8s.get_operator_log())) - raise - - @timeout_decorator.timeout(TEST_TIMEOUT_SEC) - def test_lazy_spilo_upgrade(self): - ''' - Test lazy upgrade for the Spilo image: operator changes a stateful set - but lets pods run with the old image until they are recreated for - reasons other than operator's activity. That works because the operator - configures stateful sets to use "onDelete" pod update policy. - The test covers: - 1) enabling lazy upgrade in existing operator deployment - 2) forcing the normal rolling upgrade by changing the operator - configmap and restarting its pod - ''' - - k8s = self.k8s - - pod0 = 'acid-minimal-cluster-0' - pod1 = 'acid-minimal-cluster-1' - - self.eventuallyEqual(lambda: k8s.count_running_pods(), 2, - "No 2 pods running") - self.eventuallyEqual(lambda: len(k8s.get_patroni_running_members(pod0)), - 2, "Postgres status did not enter running") - - patch_lazy_spilo_upgrade = { - "data": { - "docker_image": SPILO_CURRENT, - "enable_lazy_spilo_upgrade": "false" - } - } - k8s.update_config(patch_lazy_spilo_upgrade, - step="Init baseline image version") - - self.eventuallyEqual(lambda: k8s.get_statefulset_image(), SPILO_CURRENT, - "Statefulset not updated initially") - self.eventuallyEqual(lambda: k8s.count_running_pods(), 2, - "No 2 pods running") - self.eventuallyEqual(lambda: len(k8s.get_patroni_running_members(pod0)), - 2, "Postgres status did not enter running") - - self.eventuallyEqual(lambda: k8s.get_effective_pod_image(pod0), - SPILO_CURRENT, "Rolling upgrade was not executed") - self.eventuallyEqual(lambda: k8s.get_effective_pod_image(pod1), - SPILO_CURRENT, "Rolling upgrade was not executed") - - # update docker image in config and enable the lazy upgrade - conf_image = SPILO_LAZY - patch_lazy_spilo_upgrade = { - "data": { - "docker_image": conf_image, - "enable_lazy_spilo_upgrade": "true" - } - } - k8s.update_config(patch_lazy_spilo_upgrade, - step="patch image and lazy upgrade") - self.eventuallyEqual(lambda: k8s.get_statefulset_image(), conf_image, - "Statefulset not updated to next Docker image") - - try: - # restart the pod to get a container with the new image - k8s.api.core_v1.delete_namespaced_pod(pod0, 'default') - - # verify only pod-0 which was deleted got new image from statefulset - self.eventuallyEqual(lambda: k8s.get_effective_pod_image(pod0), - conf_image, "Delete pod-0 did not get new spilo image") - self.eventuallyEqual(lambda: k8s.count_running_pods(), 2, - "No two pods running after lazy rolling upgrade") - self.eventuallyEqual(lambda: len(k8s.get_patroni_running_members(pod0)), - 2, "Postgres status did not enter running") - self.assertNotEqual(lambda: k8s.get_effective_pod_image(pod1), - SPILO_CURRENT, - "pod-1 should not have change Docker image to {}".format(SPILO_CURRENT)) - - # clean up - unpatch_lazy_spilo_upgrade = { - "data": { - "enable_lazy_spilo_upgrade": "false", - } - } - k8s.update_config(unpatch_lazy_spilo_upgrade, step="patch lazy upgrade") - - # at this point operator will complete the normal rolling upgrade - # so we additionally test if disabling the lazy upgrade - forcing the normal rolling upgrade - works - self.eventuallyEqual(lambda: k8s.get_effective_pod_image(pod0), - conf_image, "Rolling upgrade was not executed", - 50, 3) - self.eventuallyEqual(lambda: k8s.get_effective_pod_image(pod1), - conf_image, "Rolling upgrade was not executed", - 50, 3) - self.eventuallyEqual(lambda: len(k8s.get_patroni_running_members(pod0)), - 2, "Postgres status did not enter running") - - except timeout_decorator.TimeoutError: - print('Operator log: {}'.format(k8s.get_operator_log())) - raise - - @timeout_decorator.timeout(TEST_TIMEOUT_SEC) - def test_logical_backup_cron_job(self): - ''' - Ensure we can (a) create the cron job at user request for a specific PG cluster - (b) update the cluster-wide image for the logical backup pod - (c) delete the job at user request - Limitations: - (a) Does not run the actual batch job because there is no S3 mock to upload backups to - (b) Assumes 'acid-minimal-cluster' exists as defined in setUp - ''' - - k8s = self.k8s - - # create the cron job - schedule = "7 7 7 7 *" - pg_patch_enable_backup = { - "spec": { - "enableLogicalBackup": True, - "logicalBackupSchedule": schedule - } - } - k8s.api.custom_objects_api.patch_namespaced_custom_object( - "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_enable_backup) - - try: - self.eventuallyEqual(lambda: len(k8s.get_logical_backup_job().items), 1, "failed to create logical backup job") - - job = k8s.get_logical_backup_job().items[0] - self.assertEqual(job.metadata.name, "logical-backup-acid-minimal-cluster", - "Expected job name {}, found {}" - .format("logical-backup-acid-minimal-cluster", job.metadata.name)) - self.assertEqual(job.spec.schedule, schedule, - "Expected {} schedule, found {}" - .format(schedule, job.spec.schedule)) - - # update the cluster-wide image of the logical backup pod - image = "test-image-name" - patch_logical_backup_image = { - "data": { - "logical_backup_docker_image": image, - } - } - k8s.update_config(patch_logical_backup_image, step="patch logical backup image") - - def get_docker_image(): - jobs = k8s.get_logical_backup_job().items - return jobs[0].spec.job_template.spec.template.spec.containers[0].image - - self.eventuallyEqual(get_docker_image, image, - "Expected job image {}, found {}".format(image, "{}")) - - # delete the logical backup cron job - pg_patch_disable_backup = { - "spec": { - "enableLogicalBackup": False, - } - } - k8s.api.custom_objects_api.patch_namespaced_custom_object( - "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_disable_backup) - - self.eventuallyEqual(lambda: len(k8s.get_logical_backup_job().items), 0, "failed to create logical backup job") - - except timeout_decorator.TimeoutError: - print('Operator log: {}'.format(k8s.get_operator_log())) - raise - - # ensure cluster is healthy after tests - self.eventuallyEqual(lambda: len(k8s.get_patroni_running_members("acid-minimal-cluster-0")), 2, "Postgres status did not enter running") - - @timeout_decorator.timeout(TEST_TIMEOUT_SEC) - @unittest.skip("Skipping this test until fixed") - def test_major_version_upgrade(self): - k8s = self.k8s - result = k8s.create_with_kubectl("manifests/minimal-postgres-manifest-12.yaml") - self.eventuallyEqual(lambda: k8s.count_running_pods(labels="application=spilo,cluster-name=acid-upgrade-test"), 2, "No 2 pods running") - self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") - - pg_patch_version = { - "spec": { - "postgres": { - "version": "14" - } - } - } - k8s.api.custom_objects_api.patch_namespaced_custom_object( - "acid.zalan.do", "v1", "default", "postgresqls", "acid-upgrade-test", pg_patch_version) - - self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") - - def check_version_14(): - p = k8s.get_patroni_state("acid-upgrade-test-0") - version = p["server_version"][0:2] - return version - - self.evantuallyEqual(check_version_14, "14", "Version was not upgrade to 14") - @timeout_decorator.timeout(TEST_TIMEOUT_SEC) def test_min_resource_limits(self): ''' @@ -1106,6 +306,8 @@ def test_node_affinity(self): plural="postgresqls", name="acid-minimal-cluster", body=patch_node_affinity_config) + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, + "Operator does not get in sync") # node affinity change should cause replica to relocate from replica node to master node due to node affinity requirement k8s.wait_for_pod_failover(master_nodes, 'spilo-role=replica,' + cluster_label) From e1f5173dafb1a1e8e357a9bcf6852e65f05126d0 Mon Sep 17 00:00:00 2001 From: Felix Kunde Date: Sun, 20 Mar 2022 11:52:43 +0100 Subject: [PATCH 11/11] bring back all e2e tests --- e2e/tests/test_e2e.py | 800 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 800 insertions(+) diff --git a/e2e/tests/test_e2e.py b/e2e/tests/test_e2e.py index a346e09ad..44c72cb44 100644 --- a/e2e/tests/test_e2e.py +++ b/e2e/tests/test_e2e.py @@ -158,6 +158,806 @@ def setUpClass(cls): print('Operator log: {}'.format(k8s.get_operator_log())) raise + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) + def test_additional_owner_roles(self): + ''' + Test adding additional member roles to existing database owner roles + ''' + k8s = self.k8s + + # enable PostgresTeam CRD and lower resync + owner_roles = { + "data": { + "additional_owner_roles": "cron_admin", + }, + } + k8s.update_config(owner_roles) + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, + "Operator does not get in sync") + + leader = k8s.get_cluster_leader_pod() + owner_query = """ + SELECT a2.rolname + FROM pg_catalog.pg_authid a + JOIN pg_catalog.pg_auth_members am + ON a.oid = am.member + AND a.rolname = 'cron_admin' + JOIN pg_catalog.pg_authid a2 + ON a2.oid = am.roleid + WHERE a2.rolname IN ('zalando', 'bar_owner', 'bar_data_owner'); + """ + self.eventuallyEqual(lambda: len(self.query_database(leader.metadata.name, "postgres", owner_query)), 3, + "Not all additional users found in database", 10, 5) + + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) + def test_additional_pod_capabilities(self): + ''' + Extend postgres container capabilities + ''' + k8s = self.k8s + cluster_label = 'application=spilo,cluster-name=acid-minimal-cluster' + capabilities = ["SYS_NICE","CHOWN"] + patch_capabilities = { + "data": { + "additional_pod_capabilities": ','.join(capabilities), + }, + } + + # get node and replica (expected target of new master) + _, replica_nodes = k8s.get_pg_nodes(cluster_label) + + try: + k8s.update_config(patch_capabilities) + + # changed security context of postgres container should trigger a rolling update + k8s.wait_for_pod_failover(replica_nodes, 'spilo-role=master,' + cluster_label) + k8s.wait_for_pod_start('spilo-role=replica,' + cluster_label) + + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") + self.eventuallyEqual(lambda: k8s.count_pods_with_container_capabilities(capabilities, cluster_label), + 2, "Container capabilities not updated") + + except timeout_decorator.TimeoutError: + print('Operator log: {}'.format(k8s.get_operator_log())) + raise + + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) + def test_additional_teams_and_members(self): + ''' + Test PostgresTeam CRD with extra teams and members + ''' + k8s = self.k8s + + # enable PostgresTeam CRD and lower resync + enable_postgres_team_crd = { + "data": { + "enable_postgres_team_crd": "true", + "enable_team_member_deprecation": "true", + "role_deletion_suffix": "_delete_me", + "resync_period": "15s", + "repair_period": "15s", + }, + } + k8s.update_config(enable_postgres_team_crd) + + k8s.api.custom_objects_api.patch_namespaced_custom_object( + 'acid.zalan.do', 'v1', 'default', + 'postgresteams', 'custom-team-membership', + { + 'spec': { + 'additionalTeams': { + 'acid': [ + 'e2e' + ] + }, + 'additionalMembers': { + 'e2e': [ + 'kind' + ] + } + } + }) + + leader = k8s.get_cluster_leader_pod() + user_query = """ + SELECT rolname + FROM pg_catalog.pg_roles + WHERE rolname IN ('elephant', 'kind'); + """ + self.eventuallyEqual(lambda: len(self.query_database(leader.metadata.name, "postgres", user_query)), 2, + "Not all additional users found in database", 10, 5) + + # replace additional member and check if the removed member's role is renamed + k8s.api.custom_objects_api.patch_namespaced_custom_object( + 'acid.zalan.do', 'v1', 'default', + 'postgresteams', 'custom-team-membership', + { + 'spec': { + 'additionalMembers': { + 'e2e': [ + 'tester' + ] + }, + } + }) + + user_query = """ + SELECT rolname + FROM pg_catalog.pg_roles + WHERE (rolname = 'tester' AND rolcanlogin) + OR (rolname = 'kind_delete_me' AND NOT rolcanlogin); + """ + self.eventuallyEqual(lambda: len(self.query_database(leader.metadata.name, "postgres", user_query)), 2, + "Database role of replaced member in PostgresTeam not renamed", 10, 5) + + # re-add additional member and check if the role is renamed back + k8s.api.custom_objects_api.patch_namespaced_custom_object( + 'acid.zalan.do', 'v1', 'default', + 'postgresteams', 'custom-team-membership', + { + 'spec': { + 'additionalMembers': { + 'e2e': [ + 'kind' + ] + }, + } + }) + + user_query = """ + SELECT rolname + FROM pg_catalog.pg_roles + WHERE (rolname = 'kind' AND rolcanlogin) + OR (rolname = 'tester_delete_me' AND NOT rolcanlogin); + """ + self.eventuallyEqual(lambda: len(self.query_database(leader.metadata.name, "postgres", user_query)), 2, + "Database role of recreated member in PostgresTeam not renamed back to original name", 10, 5) + + # revert config change + revert_resync = { + "data": { + "resync_period": "4m", + "repair_period": "1m", + }, + } + k8s.update_config(revert_resync) + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, + "Operator does not get in sync") + + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) + def test_config_update(self): + ''' + Change Postgres config under Spec.Postgresql.Parameters and Spec.Patroni + and query Patroni config endpoint to check if manifest changes got applied + via restarting cluster through Patroni's rest api + ''' + k8s = self.k8s + leader = k8s.get_cluster_leader_pod() + replica = k8s.get_cluster_replica_pod() + masterCreationTimestamp = leader.metadata.creation_timestamp + replicaCreationTimestamp = replica.metadata.creation_timestamp + new_max_connections_value = "50" + + # adjust Postgres config + pg_patch_config = { + "spec": { + "postgresql": { + "parameters": { + "max_connections": new_max_connections_value + } + }, + "patroni": { + "slots": { + "test_slot": { + "type": "physical" + } + }, + "ttl": 29, + "loop_wait": 9, + "retry_timeout": 9, + "synchronous_mode": True + } + } + } + + try: + k8s.api.custom_objects_api.patch_namespaced_custom_object( + "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_config) + + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") + + def compare_config(): + effective_config = k8s.patroni_rest(leader.metadata.name, "config") + desired_config = pg_patch_config["spec"]["patroni"] + desired_parameters = pg_patch_config["spec"]["postgresql"]["parameters"] + effective_parameters = effective_config["postgresql"]["parameters"] + self.assertEqual(desired_parameters["max_connections"], effective_parameters["max_connections"], + "max_connections not updated") + self.assertTrue(effective_config["slots"] is not None, "physical replication slot not added") + self.assertEqual(desired_config["ttl"], effective_config["ttl"], + "ttl not updated") + self.assertEqual(desired_config["loop_wait"], effective_config["loop_wait"], + "loop_wait not updated") + self.assertEqual(desired_config["retry_timeout"], effective_config["retry_timeout"], + "retry_timeout not updated") + self.assertEqual(desired_config["synchronous_mode"], effective_config["synchronous_mode"], + "synchronous_mode not updated") + return True + + # check if Patroni config has been updated + self.eventuallyTrue(compare_config, "Postgres config not applied") + + # make sure that pods were not recreated + leader = k8s.get_cluster_leader_pod() + replica = k8s.get_cluster_replica_pod() + self.assertEqual(masterCreationTimestamp, leader.metadata.creation_timestamp, + "Master pod creation timestamp is updated") + self.assertEqual(replicaCreationTimestamp, replica.metadata.creation_timestamp, + "Master pod creation timestamp is updated") + + # query max_connections setting + setting_query = """ + SELECT setting + FROM pg_settings + WHERE name = 'max_connections'; + """ + self.eventuallyEqual(lambda: self.query_database(leader.metadata.name, "postgres", setting_query)[0], new_max_connections_value, + "New max_connections setting not applied on master", 10, 5) + self.eventuallyNotEqual(lambda: self.query_database(replica.metadata.name, "postgres", setting_query)[0], new_max_connections_value, + "Expected max_connections not to be updated on replica since Postgres was restarted there first", 10, 5) + + # the next sync should restart the replica because it has pending_restart flag set + # force next sync by deleting the operator pod + k8s.delete_operator_pod() + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") + + self.eventuallyEqual(lambda: self.query_database(replica.metadata.name, "postgres", setting_query)[0], new_max_connections_value, + "New max_connections setting not applied on replica", 10, 5) + + # decrease max_connections again + # this time restart will be correct and new value should appear on both instances + lower_max_connections_value = "30" + pg_patch_max_connections = { + "spec": { + "postgresql": { + "parameters": { + "max_connections": lower_max_connections_value + } + } + } + } + + k8s.api.custom_objects_api.patch_namespaced_custom_object( + "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_max_connections) + + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") + + # check Patroni config again + pg_patch_config["spec"]["postgresql"]["parameters"]["max_connections"] = lower_max_connections_value + self.eventuallyTrue(compare_config, "Postgres config not applied") + + # and query max_connections setting again + self.eventuallyEqual(lambda: self.query_database(leader.metadata.name, "postgres", setting_query)[0], lower_max_connections_value, + "Previous max_connections setting not applied on master", 10, 5) + self.eventuallyEqual(lambda: self.query_database(replica.metadata.name, "postgres", setting_query)[0], lower_max_connections_value, + "Previous max_connections setting not applied on replica", 10, 5) + + except timeout_decorator.TimeoutError: + print('Operator log: {}'.format(k8s.get_operator_log())) + raise + + # make sure cluster is in a good state for further tests + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") + self.eventuallyEqual(lambda: k8s.count_running_pods(), 2, + "No 2 pods running") + + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) + def test_cross_namespace_secrets(self): + ''' + Test secrets in different namespace + ''' + k8s = self.k8s + + # enable secret creation in separate namespace + patch_cross_namespace_secret = { + "data": { + "enable_cross_namespace_secret": "true" + } + } + k8s.update_config(patch_cross_namespace_secret, + step="cross namespace secrets enabled") + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, + "Operator does not get in sync") + + # create secret in test namespace + k8s.api.custom_objects_api.patch_namespaced_custom_object( + 'acid.zalan.do', 'v1', 'default', + 'postgresqls', 'acid-minimal-cluster', + { + 'spec': { + 'users':{ + 'test.db_user': [], + } + } + }) + + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, + "Operator does not get in sync") + self.eventuallyEqual(lambda: k8s.count_secrets_with_label("cluster-name=acid-minimal-cluster,application=spilo", self.test_namespace), + 1, "Secret not created for user in namespace") + + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) + def test_enable_disable_connection_pooler(self): + ''' + For a database without connection pooler, then turns it on, scale up, + turn off and on again. Test with different ways of doing this (via + enableConnectionPooler or connectionPooler configuration section). At + the end turn connection pooler off to not interfere with other tests. + ''' + k8s = self.k8s + pooler_label = 'application=db-connection-pooler,cluster-name=acid-minimal-cluster' + master_pooler_label = 'connection-pooler=acid-minimal-cluster-pooler' + replica_pooler_label = master_pooler_label + '-repl' + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") + + k8s.api.custom_objects_api.patch_namespaced_custom_object( + 'acid.zalan.do', 'v1', 'default', + 'postgresqls', 'acid-minimal-cluster', + { + 'spec': { + 'enableConnectionPooler': True, + 'enableReplicaConnectionPooler': True, + } + }) + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") + + self.eventuallyEqual(lambda: k8s.get_deployment_replica_count(), 2, "Deployment replicas is 2 default") + self.eventuallyEqual(lambda: k8s.count_running_pods(master_pooler_label), 2, "No pooler pods found") + self.eventuallyEqual(lambda: k8s.count_running_pods(replica_pooler_label), 2, "No pooler replica pods found") + self.eventuallyEqual(lambda: k8s.count_services_with_label(pooler_label), 2, "No pooler service found") + self.eventuallyEqual(lambda: k8s.count_secrets_with_label(pooler_label), 1, "Pooler secret not created") + + k8s.api.custom_objects_api.patch_namespaced_custom_object( + 'acid.zalan.do', 'v1', 'default', + 'postgresqls', 'acid-minimal-cluster', + { + 'spec': { + 'enableMasterPoolerLoadBalancer': True, + 'enableReplicaPoolerLoadBalancer': True, + } + }) + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") + self.eventuallyEqual(lambda: k8s.get_service_type(master_pooler_label+","+pooler_label), + 'LoadBalancer', + "Expected LoadBalancer service type for master pooler pod, found {}") + self.eventuallyEqual(lambda: k8s.get_service_type(replica_pooler_label+","+pooler_label), + 'LoadBalancer', + "Expected LoadBalancer service type for replica pooler pod, found {}") + + # Turn off only master connection pooler + k8s.api.custom_objects_api.patch_namespaced_custom_object( + 'acid.zalan.do', 'v1', 'default', + 'postgresqls', 'acid-minimal-cluster', + { + 'spec': { + 'enableConnectionPooler': False, + 'enableReplicaConnectionPooler': True, + } + }) + + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") + + self.eventuallyEqual(lambda: k8s.get_deployment_replica_count(name="acid-minimal-cluster-pooler-repl"), 2, + "Deployment replicas is 2 default") + self.eventuallyEqual(lambda: k8s.count_running_pods(master_pooler_label), + 0, "Master pooler pods not deleted") + self.eventuallyEqual(lambda: k8s.count_running_pods(replica_pooler_label), + 2, "Pooler replica pods not found") + self.eventuallyEqual(lambda: k8s.count_services_with_label(pooler_label), + 1, "No pooler service found") + self.eventuallyEqual(lambda: k8s.count_secrets_with_label(pooler_label), + 1, "Secret not created") + + # Turn off only replica connection pooler + k8s.api.custom_objects_api.patch_namespaced_custom_object( + 'acid.zalan.do', 'v1', 'default', + 'postgresqls', 'acid-minimal-cluster', + { + 'spec': { + 'enableConnectionPooler': True, + 'enableReplicaConnectionPooler': False, + 'enableMasterPoolerLoadBalancer': False, + } + }) + + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") + + self.eventuallyEqual(lambda: k8s.get_deployment_replica_count(), 2, + "Deployment replicas is 2 default") + self.eventuallyEqual(lambda: k8s.count_running_pods(master_pooler_label), + 2, "Master pooler pods not found") + self.eventuallyEqual(lambda: k8s.count_running_pods(replica_pooler_label), + 0, "Pooler replica pods not deleted") + self.eventuallyEqual(lambda: k8s.count_services_with_label(pooler_label), + 1, "No pooler service found") + self.eventuallyEqual(lambda: k8s.get_service_type(master_pooler_label+","+pooler_label), + 'ClusterIP', + "Expected LoadBalancer service type for master, found {}") + self.eventuallyEqual(lambda: k8s.count_secrets_with_label(pooler_label), + 1, "Secret not created") + + # scale up connection pooler deployment + k8s.api.custom_objects_api.patch_namespaced_custom_object( + 'acid.zalan.do', 'v1', 'default', + 'postgresqls', 'acid-minimal-cluster', + { + 'spec': { + 'connectionPooler': { + 'numberOfInstances': 3, + }, + } + }) + + self.eventuallyEqual(lambda: k8s.get_deployment_replica_count(), 3, + "Deployment replicas is scaled to 3") + self.eventuallyEqual(lambda: k8s.count_running_pods(master_pooler_label), + 3, "Scale up of pooler pods does not work") + + # turn it off, keeping config should be overwritten by false + k8s.api.custom_objects_api.patch_namespaced_custom_object( + 'acid.zalan.do', 'v1', 'default', + 'postgresqls', 'acid-minimal-cluster', + { + 'spec': { + 'enableConnectionPooler': False, + 'enableReplicaConnectionPooler': False, + 'enableReplicaPoolerLoadBalancer': False, + } + }) + + self.eventuallyEqual(lambda: k8s.count_running_pods(master_pooler_label), + 0, "Pooler pods not scaled down") + self.eventuallyEqual(lambda: k8s.count_services_with_label(pooler_label), + 0, "Pooler service not removed") + self.eventuallyEqual(lambda: k8s.count_secrets_with_label('application=spilo,cluster-name=acid-minimal-cluster'), + 4, "Secrets not deleted") + + # Verify that all the databases have pooler schema installed. + # Do this via psql, since otherwise we need to deal with + # credentials. + db_list = [] + + leader = k8s.get_cluster_leader_pod() + schemas_query = """ + SELECT schema_name + FROM information_schema.schemata + WHERE schema_name = 'pooler' + """ + + db_list = self.list_databases(leader.metadata.name) + for db in db_list: + self.eventuallyNotEqual(lambda: len(self.query_database(leader.metadata.name, db, schemas_query)), 0, + "Pooler schema not found in database {}".format(db)) + + # remove config section to make test work next time + k8s.api.custom_objects_api.patch_namespaced_custom_object( + 'acid.zalan.do', 'v1', 'default', + 'postgresqls', 'acid-minimal-cluster', + { + 'spec': { + 'connectionPooler': None, + 'EnableReplicaConnectionPooler': False, + } + }) + + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) + def test_enable_load_balancer(self): + ''' + Test if services are updated when enabling/disabling load balancers in Postgres manifest + ''' + + k8s = self.k8s + cluster_label = 'application=spilo,cluster-name=acid-minimal-cluster,spilo-role={}' + + self.eventuallyEqual(lambda: k8s.get_service_type(cluster_label.format("master")), + 'ClusterIP', + "Expected ClusterIP type initially, found {}") + + try: + # enable load balancer services + pg_patch_enable_lbs = { + "spec": { + "enableMasterLoadBalancer": True, + "enableReplicaLoadBalancer": True + } + } + k8s.api.custom_objects_api.patch_namespaced_custom_object( + "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_enable_lbs) + + self.eventuallyEqual(lambda: k8s.get_service_type(cluster_label.format("master")), + 'LoadBalancer', + "Expected LoadBalancer service type for master, found {}") + + self.eventuallyEqual(lambda: k8s.get_service_type(cluster_label.format("replica")), + 'LoadBalancer', + "Expected LoadBalancer service type for master, found {}") + + # disable load balancer services again + pg_patch_disable_lbs = { + "spec": { + "enableMasterLoadBalancer": False, + "enableReplicaLoadBalancer": False + } + } + k8s.api.custom_objects_api.patch_namespaced_custom_object( + "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_disable_lbs) + + self.eventuallyEqual(lambda: k8s.get_service_type(cluster_label.format("master")), + 'ClusterIP', + "Expected LoadBalancer service type for master, found {}") + + self.eventuallyEqual(lambda: k8s.get_service_type(cluster_label.format("replica")), + 'ClusterIP', + "Expected LoadBalancer service type for master, found {}") + + except timeout_decorator.TimeoutError: + print('Operator log: {}'.format(k8s.get_operator_log())) + raise + + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) + def test_infrastructure_roles(self): + ''' + Test using external secrets for infrastructure roles + ''' + k8s = self.k8s + # update infrastructure roles description + secret_name = "postgresql-infrastructure-roles" + roles = "secretname: postgresql-infrastructure-roles-new, userkey: user,"\ + "rolekey: memberof, passwordkey: password, defaultrolevalue: robot_zmon" + patch_infrastructure_roles = { + "data": { + "infrastructure_roles_secret_name": secret_name, + "infrastructure_roles_secrets": roles, + }, + } + k8s.update_config(patch_infrastructure_roles) + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, + "Operator does not get in sync") + + try: + # check that new roles are represented in the config by requesting the + # operator configuration via API + + def verify_role(): + try: + operator_pod = k8s.get_operator_pod() + get_config_cmd = "wget --quiet -O - localhost:8080/config" + result = k8s.exec_with_kubectl(operator_pod.metadata.name, + get_config_cmd) + try: + roles_dict = (json.loads(result.stdout) + .get("controller", {}) + .get("InfrastructureRoles")) + except: + return False + + if "robot_zmon_acid_monitoring_new" in roles_dict: + role = roles_dict["robot_zmon_acid_monitoring_new"] + role.pop("Password", None) + self.assertDictEqual(role, { + "Name": "robot_zmon_acid_monitoring_new", + "Namespace":"", + "Flags": None, + "MemberOf": ["robot_zmon"], + "Parameters": None, + "AdminRole": "", + "Origin": 2, + "IsDbOwner": False, + "Deleted": False + }) + return True + except: + pass + + return False + + self.eventuallyTrue(verify_role, "infrastructure role setup is not loaded") + + except timeout_decorator.TimeoutError: + print('Operator log: {}'.format(k8s.get_operator_log())) + raise + + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) + def test_lazy_spilo_upgrade(self): + ''' + Test lazy upgrade for the Spilo image: operator changes a stateful set + but lets pods run with the old image until they are recreated for + reasons other than operator's activity. That works because the operator + configures stateful sets to use "onDelete" pod update policy. + The test covers: + 1) enabling lazy upgrade in existing operator deployment + 2) forcing the normal rolling upgrade by changing the operator + configmap and restarting its pod + ''' + + k8s = self.k8s + + pod0 = 'acid-minimal-cluster-0' + pod1 = 'acid-minimal-cluster-1' + + self.eventuallyEqual(lambda: k8s.count_running_pods(), 2, + "No 2 pods running") + self.eventuallyEqual(lambda: len(k8s.get_patroni_running_members(pod0)), + 2, "Postgres status did not enter running") + + patch_lazy_spilo_upgrade = { + "data": { + "docker_image": SPILO_CURRENT, + "enable_lazy_spilo_upgrade": "false" + } + } + k8s.update_config(patch_lazy_spilo_upgrade, + step="Init baseline image version") + + self.eventuallyEqual(lambda: k8s.get_statefulset_image(), SPILO_CURRENT, + "Statefulset not updated initially") + self.eventuallyEqual(lambda: k8s.count_running_pods(), 2, + "No 2 pods running") + self.eventuallyEqual(lambda: len(k8s.get_patroni_running_members(pod0)), + 2, "Postgres status did not enter running") + + self.eventuallyEqual(lambda: k8s.get_effective_pod_image(pod0), + SPILO_CURRENT, "Rolling upgrade was not executed") + self.eventuallyEqual(lambda: k8s.get_effective_pod_image(pod1), + SPILO_CURRENT, "Rolling upgrade was not executed") + + # update docker image in config and enable the lazy upgrade + conf_image = SPILO_LAZY + patch_lazy_spilo_upgrade = { + "data": { + "docker_image": conf_image, + "enable_lazy_spilo_upgrade": "true" + } + } + k8s.update_config(patch_lazy_spilo_upgrade, + step="patch image and lazy upgrade") + self.eventuallyEqual(lambda: k8s.get_statefulset_image(), conf_image, + "Statefulset not updated to next Docker image") + + try: + # restart the pod to get a container with the new image + k8s.api.core_v1.delete_namespaced_pod(pod0, 'default') + + # verify only pod-0 which was deleted got new image from statefulset + self.eventuallyEqual(lambda: k8s.get_effective_pod_image(pod0), + conf_image, "Delete pod-0 did not get new spilo image") + self.eventuallyEqual(lambda: k8s.count_running_pods(), 2, + "No two pods running after lazy rolling upgrade") + self.eventuallyEqual(lambda: len(k8s.get_patroni_running_members(pod0)), + 2, "Postgres status did not enter running") + self.assertNotEqual(lambda: k8s.get_effective_pod_image(pod1), + SPILO_CURRENT, + "pod-1 should not have change Docker image to {}".format(SPILO_CURRENT)) + + # clean up + unpatch_lazy_spilo_upgrade = { + "data": { + "enable_lazy_spilo_upgrade": "false", + } + } + k8s.update_config(unpatch_lazy_spilo_upgrade, step="patch lazy upgrade") + + # at this point operator will complete the normal rolling upgrade + # so we additionally test if disabling the lazy upgrade - forcing the normal rolling upgrade - works + self.eventuallyEqual(lambda: k8s.get_effective_pod_image(pod0), + conf_image, "Rolling upgrade was not executed", + 50, 3) + self.eventuallyEqual(lambda: k8s.get_effective_pod_image(pod1), + conf_image, "Rolling upgrade was not executed", + 50, 3) + self.eventuallyEqual(lambda: len(k8s.get_patroni_running_members(pod0)), + 2, "Postgres status did not enter running") + + except timeout_decorator.TimeoutError: + print('Operator log: {}'.format(k8s.get_operator_log())) + raise + + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) + def test_logical_backup_cron_job(self): + ''' + Ensure we can (a) create the cron job at user request for a specific PG cluster + (b) update the cluster-wide image for the logical backup pod + (c) delete the job at user request + Limitations: + (a) Does not run the actual batch job because there is no S3 mock to upload backups to + (b) Assumes 'acid-minimal-cluster' exists as defined in setUp + ''' + + k8s = self.k8s + + # create the cron job + schedule = "7 7 7 7 *" + pg_patch_enable_backup = { + "spec": { + "enableLogicalBackup": True, + "logicalBackupSchedule": schedule + } + } + k8s.api.custom_objects_api.patch_namespaced_custom_object( + "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_enable_backup) + + try: + self.eventuallyEqual(lambda: len(k8s.get_logical_backup_job().items), 1, "failed to create logical backup job") + + job = k8s.get_logical_backup_job().items[0] + self.assertEqual(job.metadata.name, "logical-backup-acid-minimal-cluster", + "Expected job name {}, found {}" + .format("logical-backup-acid-minimal-cluster", job.metadata.name)) + self.assertEqual(job.spec.schedule, schedule, + "Expected {} schedule, found {}" + .format(schedule, job.spec.schedule)) + + # update the cluster-wide image of the logical backup pod + image = "test-image-name" + patch_logical_backup_image = { + "data": { + "logical_backup_docker_image": image, + } + } + k8s.update_config(patch_logical_backup_image, step="patch logical backup image") + + def get_docker_image(): + jobs = k8s.get_logical_backup_job().items + return jobs[0].spec.job_template.spec.template.spec.containers[0].image + + self.eventuallyEqual(get_docker_image, image, + "Expected job image {}, found {}".format(image, "{}")) + + # delete the logical backup cron job + pg_patch_disable_backup = { + "spec": { + "enableLogicalBackup": False, + } + } + k8s.api.custom_objects_api.patch_namespaced_custom_object( + "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_disable_backup) + + self.eventuallyEqual(lambda: len(k8s.get_logical_backup_job().items), 0, "failed to create logical backup job") + + except timeout_decorator.TimeoutError: + print('Operator log: {}'.format(k8s.get_operator_log())) + raise + + # ensure cluster is healthy after tests + self.eventuallyEqual(lambda: len(k8s.get_patroni_running_members("acid-minimal-cluster-0")), 2, "Postgres status did not enter running") + + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) + @unittest.skip("Skipping this test until fixed") + def test_major_version_upgrade(self): + k8s = self.k8s + result = k8s.create_with_kubectl("manifests/minimal-postgres-manifest-12.yaml") + self.eventuallyEqual(lambda: k8s.count_running_pods(labels="application=spilo,cluster-name=acid-upgrade-test"), 2, "No 2 pods running") + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") + + pg_patch_version = { + "spec": { + "postgres": { + "version": "14" + } + } + } + k8s.api.custom_objects_api.patch_namespaced_custom_object( + "acid.zalan.do", "v1", "default", "postgresqls", "acid-upgrade-test", pg_patch_version) + + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") + + def check_version_14(): + p = k8s.get_patroni_state("acid-upgrade-test-0") + version = p["server_version"][0:2] + return version + + self.evantuallyEqual(check_version_14, "14", "Version was not upgrade to 14") + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) def test_min_resource_limits(self): '''