From e10b2e31da9be84e9d3bfd4b74ba5cd62ef4e989 Mon Sep 17 00:00:00 2001 From: Sergey Dudoladov Date: Mon, 9 Nov 2020 08:16:17 +0100 Subject: [PATCH 1/9] fix relevant flake8 violations --- e2e/tests/k8s_api.py | 19 +++++++++---------- e2e/tests/test_e2e.py | 21 ++++++++------------- 2 files changed, 17 insertions(+), 23 deletions(-) diff --git a/e2e/tests/k8s_api.py b/e2e/tests/k8s_api.py index f2abd8e0c..3f2e47cbf 100644 --- a/e2e/tests/k8s_api.py +++ b/e2e/tests/k8s_api.py @@ -11,9 +11,11 @@ from kubernetes import client, config from kubernetes.client.rest import ApiException + def to_selector(labels): return ",".join(["=".join(l) for l in labels.items()]) + class K8sApi: def __init__(self): @@ -181,10 +183,10 @@ def count_deployments_with_label(self, labels, namespace='default'): def count_pdbs_with_label(self, labels, namespace='default'): return len(self.api.policy_v1_beta1.list_namespaced_pod_disruption_budget( namespace, label_selector=labels).items) - + def count_running_pods(self, labels='application=spilo,cluster-name=acid-minimal-cluster', namespace='default'): pods = self.api.core_v1.list_namespaced_pod(namespace, label_selector=labels).items - return len(list(filter(lambda x: x.status.phase=='Running', pods))) + return len(list(filter(lambda x: x.status.phase == 'Running', pods))) def wait_for_pod_failover(self, failover_targets, labels, namespace='default'): pod_phase = 'Failing over' @@ -211,7 +213,6 @@ def wait_for_logical_backup_job_creation(self): self.wait_for_logical_backup_job(expected_num_of_jobs=1) def delete_operator_pod(self, step="Delete operator deplyment"): - operator_pod = self.api.core_v1.list_namespaced_pod('default', label_selector="name=postgres-operator").items[0].metadata.name self.api.apps_v1.patch_namespaced_deployment("postgres-operator","default", {"spec":{"template":{"metadata":{"annotations":{"step":"{}-{}".format(step, time.time())}}}}}) self.wait_for_operator_pod_start() @@ -241,7 +242,7 @@ def get_patroni_state(self, pod): def get_operator_state(self): pod = self.get_operator_pod() - if pod == None: + if pod is None: return None pod = pod.metadata.name @@ -251,7 +252,6 @@ def get_operator_state(self): return json.loads(r.stdout.decode()) - def get_patroni_running_members(self, pod="acid-minimal-cluster-0"): result = self.get_patroni_state(pod) return list(filter(lambda x: "State" in x and x["State"] == "running", result)) @@ -260,9 +260,9 @@ def get_deployment_replica_count(self, name="acid-minimal-cluster-pooler", names try: deployment = self.api.apps_v1.read_namespaced_deployment(name, namespace) return deployment.spec.replicas - except ApiException as e: + except ApiException: return None - + def get_statefulset_image(self, label_selector="application=spilo,cluster-name=acid-minimal-cluster", namespace='default'): ssets = self.api.apps_v1.list_namespaced_stateful_set(namespace, label_selector=label_selector, limit=1) if len(ssets.items) == 0: @@ -463,7 +463,6 @@ def wait_for_logical_backup_job_creation(self): self.wait_for_logical_backup_job(expected_num_of_jobs=1) def delete_operator_pod(self, step="Delete operator deplyment"): - operator_pod = self.api.core_v1.list_namespaced_pod('default', label_selector="name=postgres-operator").items[0].metadata.name self.api.apps_v1.patch_namespaced_deployment("postgres-operator","default", {"spec":{"template":{"metadata":{"annotations":{"step":"{}-{}".format(step, time.time())}}}}}) self.wait_for_operator_pod_start() @@ -521,7 +520,7 @@ def __init__(self, labels="name=postgres-operator", namespace="default"): class K8sPostgres(K8sBase): def __init__(self, labels="cluster-name=acid-minimal-cluster", namespace="default"): super().__init__(labels, namespace) - + def get_pg_nodes(self): master_pod_node = '' replica_pod_nodes = [] @@ -532,4 +531,4 @@ def get_pg_nodes(self): elif pod.metadata.labels.get('spilo-role') == 'replica': replica_pod_nodes.append(pod.spec.node_name) - return master_pod_node, replica_pod_nodes \ No newline at end of file + return master_pod_node, replica_pod_nodes diff --git a/e2e/tests/test_e2e.py b/e2e/tests/test_e2e.py index 05cc09a70..4b7f33f3c 100644 --- a/e2e/tests/test_e2e.py +++ b/e2e/tests/test_e2e.py @@ -2,13 +2,11 @@ import unittest import time import timeout_decorator -import subprocess -import warnings import os import yaml from datetime import datetime -from kubernetes import client, config +from kubernetes import client from tests.k8s_api import K8s @@ -170,9 +168,6 @@ def test_enable_disable_connection_pooler(self): 'connection-pooler': 'acid-minimal-cluster-pooler', }) - pod_selector = to_selector(pod_labels) - service_selector = to_selector(service_labels) - # enable connection pooler k8s.api.custom_objects_api.patch_namespaced_custom_object( 'acid.zalan.do', 'v1', 'default', @@ -746,12 +741,12 @@ def test_statefulset_annotation_propagation(self): } k8s.api.custom_objects_api.patch_namespaced_custom_object( "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_crd_annotations) - + annotations = { "deployment-time": "2020-04-30 12:00:00", "downscaler/downtime_replicas": "0", } - + self.eventuallyTrue(lambda: k8s.check_statefulset_annotations(cluster_label, annotations), "Annotations missing") @@ -823,14 +818,14 @@ def test_zzzz_cluster_deletion(self): } } k8s.update_config(patch_delete_annotations) - self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0":"idle"}, "Operator does not get in sync") + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") try: # this delete attempt should be omitted because of missing annotations k8s.api.custom_objects_api.delete_namespaced_custom_object( "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster") time.sleep(5) - self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0":"idle"}, "Operator does not get in sync") + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") # check that pods and services are still there k8s.wait_for_running_pods(cluster_label, 2) @@ -841,7 +836,7 @@ def test_zzzz_cluster_deletion(self): # wait a little before proceeding time.sleep(10) - self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0":"idle"}, "Operator does not get in sync") + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") # add annotations to manifest delete_date = datetime.today().strftime('%Y-%m-%d') @@ -855,7 +850,7 @@ def test_zzzz_cluster_deletion(self): } k8s.api.custom_objects_api.patch_namespaced_custom_object( "acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_delete_annotations) - self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0":"idle"}, "Operator does not get in sync") + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") # wait a little before proceeding time.sleep(20) @@ -882,7 +877,7 @@ def test_zzzz_cluster_deletion(self): print('Operator log: {}'.format(k8s.get_operator_log())) raise - #reset configmap + # reset configmap patch_delete_annotations = { "data": { "delete_annotation_date_key": "", From af412e7b2f62cd257dcd69b8ad5dddc39fe3199e Mon Sep 17 00:00:00 2001 From: Sergey Dudoladov Date: Mon, 9 Nov 2020 08:23:13 +0100 Subject: [PATCH 2/9] log actual exceptions on debugging --- e2e/tests/test_e2e.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/e2e/tests/test_e2e.py b/e2e/tests/test_e2e.py index 4b7f33f3c..2e44f6e86 100644 --- a/e2e/tests/test_e2e.py +++ b/e2e/tests/test_e2e.py @@ -9,6 +9,7 @@ from kubernetes import client from tests.k8s_api import K8s +from kubernetes.client.rest import ApiException SPILO_CURRENT = "registry.opensource.zalan.do/acid/spilo-12:1.6-p5" SPILO_LAZY = "registry.opensource.zalan.do/acid/spilo-cdp-12:1.6-p114" @@ -87,8 +88,8 @@ def setUpClass(cls): # remove existing local storage class and create hostpath class try: k8s.api.storage_v1_api.delete_storage_class("standard") - except: - print("Storage class has already been remove") + except ApiException as e: + print("Failed to delete the 'standard' storage class: {0}".format(e)) # operator deploys pod service account there on start up # needed for test_multi_namespace_support() @@ -96,8 +97,8 @@ def setUpClass(cls): try: v1_namespace = client.V1Namespace(metadata=client.V1ObjectMeta(name=cls.namespace)) k8s.api.core_v1.create_namespace(v1_namespace) - except: - print("Namespace already present") + except ApiException as e: + print("Failed to create the '{0}' namespace: {1}".format(cls.namespace, e)) # submit the most recent operator image built on the Docker host with open("manifests/postgres-operator.yaml", 'r+') as f: From 1e3938ca7f984e18d4c70517ad119742679c0896 Mon Sep 17 00:00:00 2001 From: Sergey Dudoladov Date: Mon, 9 Nov 2020 08:26:00 +0100 Subject: [PATCH 3/9] rename namespace variable to be more descriptive --- e2e/tests/test_e2e.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/e2e/tests/test_e2e.py b/e2e/tests/test_e2e.py index 2e44f6e86..2a2680e11 100644 --- a/e2e/tests/test_e2e.py +++ b/e2e/tests/test_e2e.py @@ -93,12 +93,12 @@ def setUpClass(cls): # operator deploys pod service account there on start up # needed for test_multi_namespace_support() - cls.namespace = "test" + cls.test_namespace = "test" try: - v1_namespace = client.V1Namespace(metadata=client.V1ObjectMeta(name=cls.namespace)) + v1_namespace = client.V1Namespace(metadata=client.V1ObjectMeta(name=cls.test_namespace)) k8s.api.core_v1.create_namespace(v1_namespace) except ApiException as e: - print("Failed to create the '{0}' namespace: {1}".format(cls.namespace, e)) + print("Failed to create the '{0}' namespace: {1}".format(cls.test_namespace, e)) # submit the most recent operator image built on the Docker host with open("manifests/postgres-operator.yaml", 'r+') as f: @@ -600,13 +600,13 @@ def test_multi_namespace_support(self): with open("manifests/complete-postgres-manifest.yaml", 'r+') as f: pg_manifest = yaml.safe_load(f) - pg_manifest["metadata"]["namespace"] = self.namespace + pg_manifest["metadata"]["namespace"] = self.test_namespace yaml.dump(pg_manifest, f, Dumper=yaml.Dumper) try: k8s.create_with_kubectl("manifests/complete-postgres-manifest.yaml") - k8s.wait_for_pod_start("spilo-role=master", self.namespace) - self.assert_master_is_unique(self.namespace, "acid-test-cluster") + k8s.wait_for_pod_start("spilo-role=master", self.test_namespace) + self.assert_master_is_unique(self.test_namespace, "acid-test-cluster") except timeout_decorator.TimeoutError: print('Operator log: {}'.format(k8s.get_operator_log())) From f0542ba9952b1d71de1f71a225d23e5ece879301 Mon Sep 17 00:00:00 2001 From: Sergey Dudoladov Date: Mon, 9 Nov 2020 08:48:08 +0100 Subject: [PATCH 4/9] don't log a msg about version change if the version has not actually changed --- pkg/cluster/cluster.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/cluster/cluster.go b/pkg/cluster/cluster.go index 18778fd41..1764ea726 100644 --- a/pkg/cluster/cluster.go +++ b/pkg/cluster/cluster.go @@ -626,14 +626,14 @@ func (c *Cluster) Update(oldSpec, newSpec *acidv1.Postgresql) error { } }() - if oldSpec.Spec.PostgresqlParam.PgVersion >= newSpec.Spec.PostgresqlParam.PgVersion { + if oldSpec.Spec.PostgresqlParam.PgVersion > newSpec.Spec.PostgresqlParam.PgVersion { c.logger.Warningf("postgresql version change(%q -> %q) has no effect", oldSpec.Spec.PostgresqlParam.PgVersion, newSpec.Spec.PostgresqlParam.PgVersion) c.eventRecorder.Eventf(c.GetReference(), v1.EventTypeWarning, "PostgreSQL", "postgresql version change(%q -> %q) has no effect", oldSpec.Spec.PostgresqlParam.PgVersion, newSpec.Spec.PostgresqlParam.PgVersion) // we need that hack to generate statefulset with the old version newSpec.Spec.PostgresqlParam.PgVersion = oldSpec.Spec.PostgresqlParam.PgVersion - } else { + } else if oldSpec.Spec.PostgresqlParam.PgVersion < newSpec.Spec.PostgresqlParam.PgVersion { c.logger.Infof("postgresql version increased (%q -> %q), major version upgrade can be done manually after StatefulSet Sync", oldSpec.Spec.PostgresqlParam.PgVersion, newSpec.Spec.PostgresqlParam.PgVersion) } From 554fa2b5307e2b28d550490688e9fd88a22f541c Mon Sep 17 00:00:00 2001 From: Sergey Dudoladov Date: Tue, 10 Nov 2020 07:39:42 +0100 Subject: [PATCH 5/9] document debugging --- e2e/README.md | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/e2e/README.md b/e2e/README.md index ce8931f62..3bba6ccc3 100644 --- a/e2e/README.md +++ b/e2e/README.md @@ -56,12 +56,24 @@ NOCLEANUP=True ./run.sh main tests.test_e2e.EndToEndTestCase.test_lazy_spilo_upg ## Inspecting Kind -If you want to inspect Kind/Kubernetes cluster, use the following script to exec into the K8s setup and then use `kubectl` +If you want to inspect Kind/Kubernetes cluster, switch `kubeconfig` file and context +```bash +# save the old config in case you have it +export KUBECONFIG_SAVED=$KUBECONFIG + +# use the one created by e2e tests +export KUBECONFIG=/tmp/kind-config-postgres-operator-e2e-tests + +# this kubeconfig defines a single context +kubectl config use-context kind-postgres-operator-e2e-tests +``` + +or use the following script to exec into the K8s setup and then use `kubectl` ```bash ./exec_into_env.sh -# use kube ctl +# use kubectl kubectl get pods # watch relevant objects @@ -71,6 +83,14 @@ kubectl get pods ./scripts/get_logs.sh ``` +If you want to inspect the state of the `kind` cluster manually with a single command, add a `context` flag +```bash +kubectl get pods --context kind-kind +``` +or set the context for a few commands at once + + + ## Cleaning up Kind To cleanup kind and start fresh @@ -79,6 +99,12 @@ To cleanup kind and start fresh e2e/run.sh cleanup ``` +That also helps in case you see the +``` +ERROR: no nodes found for cluster "postgres-operator-e2e-tests" +``` +that happens when the `kind` cluster was deleted manually but its configuraiton file was not. + ## Covered use cases The current tests are all bundled in [`test_e2e.py`](tests/test_e2e.py): From 6f4ea06584e6d1c8e06d584f9b3c42bfbd692837 Mon Sep 17 00:00:00 2001 From: Sergey Dudoladov Date: Tue, 10 Nov 2020 08:14:25 +0100 Subject: [PATCH 6/9] document pod deletion --- e2e/tests/k8s_api.py | 5 +++-- e2e/tests/test_e2e.py | 6 ++---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/e2e/tests/k8s_api.py b/e2e/tests/k8s_api.py index 3f2e47cbf..93280dd53 100644 --- a/e2e/tests/k8s_api.py +++ b/e2e/tests/k8s_api.py @@ -212,8 +212,9 @@ def wait_for_logical_backup_job_deletion(self): def wait_for_logical_backup_job_creation(self): self.wait_for_logical_backup_job(expected_num_of_jobs=1) - def delete_operator_pod(self, step="Delete operator deplyment"): - self.api.apps_v1.patch_namespaced_deployment("postgres-operator","default", {"spec":{"template":{"metadata":{"annotations":{"step":"{}-{}".format(step, time.time())}}}}}) + def delete_operator_pod(self, step="Delete operator pod"): + # patching the pod template in the deployment restarts the operator pod + self.api.apps_v1.patch_namespaced_deployment("postgres-operator","default", {"spec":{"template":{"metadata":{"annotations":{"step":"{}-{}".format(step, datetime.fromtimestamp(time.time()))}}}}}) self.wait_for_operator_pod_start() def update_config(self, config_map_patch, step="Updating operator deployment"): diff --git a/e2e/tests/test_e2e.py b/e2e/tests/test_e2e.py index 2a2680e11..2344004a8 100644 --- a/e2e/tests/test_e2e.py +++ b/e2e/tests/test_e2e.py @@ -134,10 +134,8 @@ def setUpClass(cls): # make sure we start a new operator on every new run, # this tackles the problem when kind is reused - # and the Docker image is infact changed (dirty one) + # and the Docker image is in fact changed (dirty one) - # patch resync period, this can catch some problems with hanging e2e tests - # k8s.update_config({"data": {"resync_period":"30s"}},step="TestSuite setup") k8s.update_config({}, step="TestSuite Startup") actual_operator_image = k8s.api.core_v1.list_namespaced_pod( @@ -343,7 +341,7 @@ def test_infrastructure_roles(self): }, } k8s.update_config(patch_infrastructure_roles) - self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0":"idle"}, "Operator does not get in sync") + self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync") try: # check that new roles are represented in the config by requesting the From b47232910121dc7393d1382b0faabd8c441f4387 Mon Sep 17 00:00:00 2001 From: Sergey Dudoladov Date: Tue, 10 Nov 2020 08:23:44 +0100 Subject: [PATCH 7/9] clean up after test_multi_namespace support --- e2e/tests/test_e2e.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/e2e/tests/test_e2e.py b/e2e/tests/test_e2e.py index 2344004a8..7e260e679 100644 --- a/e2e/tests/test_e2e.py +++ b/e2e/tests/test_e2e.py @@ -609,6 +609,8 @@ def test_multi_namespace_support(self): except timeout_decorator.TimeoutError: print('Operator log: {}'.format(k8s.get_operator_log())) raise + finally: + k8s.api.core_v1.delete_namespace(self.test_namespace) @timeout_decorator.timeout(TEST_TIMEOUT_SEC) def test_zz_node_readiness_label(self): From e2fda96eedb5a57ecd7543f6c9d86ecf1ab418cd Mon Sep 17 00:00:00 2001 From: Sergey Dudoladov Date: Wed, 11 Nov 2020 06:42:47 +0100 Subject: [PATCH 8/9] delete the cluster and not the namespace --- e2e/tests/test_e2e.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/e2e/tests/test_e2e.py b/e2e/tests/test_e2e.py index 7e260e679..11d24c1a7 100644 --- a/e2e/tests/test_e2e.py +++ b/e2e/tests/test_e2e.py @@ -610,7 +610,13 @@ def test_multi_namespace_support(self): print('Operator log: {}'.format(k8s.get_operator_log())) raise finally: - k8s.api.core_v1.delete_namespace(self.test_namespace) + # delete the new cluster so that the k8s_api.get_operator_state works correctly in subsequent tests + # ideally we should delete the 'test' namespace here but + # the pods inside the namespace stuck in the Terminating state making the test time out + k8s.api.custom_objects_api.delete_namespaced_custom_object( + "acid.zalan.do", "v1", self.test_namespace, "postgresqls", "acid-minimal-cluster") + time.sleep(5) + @timeout_decorator.timeout(TEST_TIMEOUT_SEC) def test_zz_node_readiness_label(self): From c0d923c452b15bfd083ac0e1b445dc2b2d2a155d Mon Sep 17 00:00:00 2001 From: Sergey Dudoladov Date: Wed, 11 Nov 2020 06:47:54 +0100 Subject: [PATCH 9/9] delete correct cluster --- e2e/tests/test_e2e.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/e2e/tests/test_e2e.py b/e2e/tests/test_e2e.py index 11d24c1a7..0fc60bf42 100644 --- a/e2e/tests/test_e2e.py +++ b/e2e/tests/test_e2e.py @@ -614,7 +614,7 @@ def test_multi_namespace_support(self): # ideally we should delete the 'test' namespace here but # the pods inside the namespace stuck in the Terminating state making the test time out k8s.api.custom_objects_api.delete_namespaced_custom_object( - "acid.zalan.do", "v1", self.test_namespace, "postgresqls", "acid-minimal-cluster") + "acid.zalan.do", "v1", self.test_namespace, "postgresqls", "acid-test-cluster") time.sleep(5)