Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 28 additions & 2 deletions e2e/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,24 @@ NOCLEANUP=True ./run.sh main tests.test_e2e.EndToEndTestCase.test_lazy_spilo_upg

## Inspecting Kind

If you want to inspect Kind/Kubernetes cluster, use the following script to exec into the K8s setup and then use `kubectl`
If you want to inspect Kind/Kubernetes cluster, switch `kubeconfig` file and context
```bash
# save the old config in case you have it
export KUBECONFIG_SAVED=$KUBECONFIG

# use the one created by e2e tests
export KUBECONFIG=/tmp/kind-config-postgres-operator-e2e-tests

# this kubeconfig defines a single context
kubectl config use-context kind-postgres-operator-e2e-tests
```

or use the following script to exec into the K8s setup and then use `kubectl`

```bash
./exec_into_env.sh

# use kube ctl
# use kubectl
kubectl get pods

# watch relevant objects
Expand All @@ -71,6 +83,14 @@ kubectl get pods
./scripts/get_logs.sh
```

If you want to inspect the state of the `kind` cluster manually with a single command, add a `context` flag
```bash
kubectl get pods --context kind-kind
```
or set the context for a few commands at once



## Cleaning up Kind

To cleanup kind and start fresh
Expand All @@ -79,6 +99,12 @@ To cleanup kind and start fresh
e2e/run.sh cleanup
```

That also helps in case you see the
```
ERROR: no nodes found for cluster "postgres-operator-e2e-tests"
```
that happens when the `kind` cluster was deleted manually but its configuraiton file was not.

## Covered use cases

The current tests are all bundled in [`test_e2e.py`](tests/test_e2e.py):
Expand Down
24 changes: 12 additions & 12 deletions e2e/tests/k8s_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,11 @@
from kubernetes import client, config
from kubernetes.client.rest import ApiException


def to_selector(labels):
return ",".join(["=".join(l) for l in labels.items()])


class K8sApi:

def __init__(self):
Expand Down Expand Up @@ -181,10 +183,10 @@ def count_deployments_with_label(self, labels, namespace='default'):
def count_pdbs_with_label(self, labels, namespace='default'):
return len(self.api.policy_v1_beta1.list_namespaced_pod_disruption_budget(
namespace, label_selector=labels).items)

def count_running_pods(self, labels='application=spilo,cluster-name=acid-minimal-cluster', namespace='default'):
pods = self.api.core_v1.list_namespaced_pod(namespace, label_selector=labels).items
return len(list(filter(lambda x: x.status.phase=='Running', pods)))
return len(list(filter(lambda x: x.status.phase == 'Running', pods)))

def wait_for_pod_failover(self, failover_targets, labels, namespace='default'):
pod_phase = 'Failing over'
Expand All @@ -210,9 +212,9 @@ def wait_for_logical_backup_job_deletion(self):
def wait_for_logical_backup_job_creation(self):
self.wait_for_logical_backup_job(expected_num_of_jobs=1)

def delete_operator_pod(self, step="Delete operator deplyment"):
operator_pod = self.api.core_v1.list_namespaced_pod('default', label_selector="name=postgres-operator").items[0].metadata.name
self.api.apps_v1.patch_namespaced_deployment("postgres-operator","default", {"spec":{"template":{"metadata":{"annotations":{"step":"{}-{}".format(step, time.time())}}}}})
def delete_operator_pod(self, step="Delete operator pod"):
# patching the pod template in the deployment restarts the operator pod
self.api.apps_v1.patch_namespaced_deployment("postgres-operator","default", {"spec":{"template":{"metadata":{"annotations":{"step":"{}-{}".format(step, datetime.fromtimestamp(time.time()))}}}}})
self.wait_for_operator_pod_start()

def update_config(self, config_map_patch, step="Updating operator deployment"):
Expand Down Expand Up @@ -241,7 +243,7 @@ def get_patroni_state(self, pod):

def get_operator_state(self):
pod = self.get_operator_pod()
if pod == None:
if pod is None:
return None
pod = pod.metadata.name

Expand All @@ -251,7 +253,6 @@ def get_operator_state(self):

return json.loads(r.stdout.decode())


def get_patroni_running_members(self, pod="acid-minimal-cluster-0"):
result = self.get_patroni_state(pod)
return list(filter(lambda x: "State" in x and x["State"] == "running", result))
Expand All @@ -260,9 +261,9 @@ def get_deployment_replica_count(self, name="acid-minimal-cluster-pooler", names
try:
deployment = self.api.apps_v1.read_namespaced_deployment(name, namespace)
return deployment.spec.replicas
except ApiException as e:
except ApiException:
return None

def get_statefulset_image(self, label_selector="application=spilo,cluster-name=acid-minimal-cluster", namespace='default'):
ssets = self.api.apps_v1.list_namespaced_stateful_set(namespace, label_selector=label_selector, limit=1)
if len(ssets.items) == 0:
Expand Down Expand Up @@ -463,7 +464,6 @@ def wait_for_logical_backup_job_creation(self):
self.wait_for_logical_backup_job(expected_num_of_jobs=1)

def delete_operator_pod(self, step="Delete operator deplyment"):
operator_pod = self.api.core_v1.list_namespaced_pod('default', label_selector="name=postgres-operator").items[0].metadata.name
self.api.apps_v1.patch_namespaced_deployment("postgres-operator","default", {"spec":{"template":{"metadata":{"annotations":{"step":"{}-{}".format(step, time.time())}}}}})
self.wait_for_operator_pod_start()

Expand Down Expand Up @@ -521,7 +521,7 @@ def __init__(self, labels="name=postgres-operator", namespace="default"):
class K8sPostgres(K8sBase):
def __init__(self, labels="cluster-name=acid-minimal-cluster", namespace="default"):
super().__init__(labels, namespace)

def get_pg_nodes(self):
master_pod_node = ''
replica_pod_nodes = []
Expand All @@ -532,4 +532,4 @@ def get_pg_nodes(self):
elif pod.metadata.labels.get('spilo-role') == 'replica':
replica_pod_nodes.append(pod.spec.node_name)

return master_pod_node, replica_pod_nodes
return master_pod_node, replica_pod_nodes
54 changes: 28 additions & 26 deletions e2e/tests/test_e2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,14 @@
import unittest
import time
import timeout_decorator
import subprocess
import warnings
import os
import yaml

from datetime import datetime
from kubernetes import client, config
from kubernetes import client

from tests.k8s_api import K8s
from kubernetes.client.rest import ApiException

SPILO_CURRENT = "registry.opensource.zalan.do/acid/spilo-12:1.6-p5"
SPILO_LAZY = "registry.opensource.zalan.do/acid/spilo-cdp-12:1.6-p114"
Expand Down Expand Up @@ -89,17 +88,17 @@ def setUpClass(cls):
# remove existing local storage class and create hostpath class
try:
k8s.api.storage_v1_api.delete_storage_class("standard")
except:
print("Storage class has already been remove")
except ApiException as e:
print("Failed to delete the 'standard' storage class: {0}".format(e))

# operator deploys pod service account there on start up
# needed for test_multi_namespace_support()
cls.namespace = "test"
cls.test_namespace = "test"
try:
v1_namespace = client.V1Namespace(metadata=client.V1ObjectMeta(name=cls.namespace))
v1_namespace = client.V1Namespace(metadata=client.V1ObjectMeta(name=cls.test_namespace))
k8s.api.core_v1.create_namespace(v1_namespace)
except:
print("Namespace already present")
except ApiException as e:
print("Failed to create the '{0}' namespace: {1}".format(cls.test_namespace, e))

# submit the most recent operator image built on the Docker host
with open("manifests/postgres-operator.yaml", 'r+') as f:
Expand Down Expand Up @@ -135,10 +134,8 @@ def setUpClass(cls):

# make sure we start a new operator on every new run,
# this tackles the problem when kind is reused
# and the Docker image is infact changed (dirty one)
# and the Docker image is in fact changed (dirty one)

# patch resync period, this can catch some problems with hanging e2e tests
# k8s.update_config({"data": {"resync_period":"30s"}},step="TestSuite setup")
k8s.update_config({}, step="TestSuite Startup")

actual_operator_image = k8s.api.core_v1.list_namespaced_pod(
Expand Down Expand Up @@ -170,9 +167,6 @@ def test_enable_disable_connection_pooler(self):
'connection-pooler': 'acid-minimal-cluster-pooler',
})

pod_selector = to_selector(pod_labels)
service_selector = to_selector(service_labels)

# enable connection pooler
k8s.api.custom_objects_api.patch_namespaced_custom_object(
'acid.zalan.do', 'v1', 'default',
Expand Down Expand Up @@ -347,7 +341,7 @@ def test_infrastructure_roles(self):
},
}
k8s.update_config(patch_infrastructure_roles)
self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0":"idle"}, "Operator does not get in sync")
self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync")

try:
# check that new roles are represented in the config by requesting the
Expand Down Expand Up @@ -604,17 +598,25 @@ def test_multi_namespace_support(self):

with open("manifests/complete-postgres-manifest.yaml", 'r+') as f:
pg_manifest = yaml.safe_load(f)
pg_manifest["metadata"]["namespace"] = self.namespace
pg_manifest["metadata"]["namespace"] = self.test_namespace
yaml.dump(pg_manifest, f, Dumper=yaml.Dumper)

try:
k8s.create_with_kubectl("manifests/complete-postgres-manifest.yaml")
k8s.wait_for_pod_start("spilo-role=master", self.namespace)
self.assert_master_is_unique(self.namespace, "acid-test-cluster")
k8s.wait_for_pod_start("spilo-role=master", self.test_namespace)
self.assert_master_is_unique(self.test_namespace, "acid-test-cluster")

except timeout_decorator.TimeoutError:
print('Operator log: {}'.format(k8s.get_operator_log()))
raise
finally:
# delete the new cluster so that the k8s_api.get_operator_state works correctly in subsequent tests
# ideally we should delete the 'test' namespace here but
# the pods inside the namespace stuck in the Terminating state making the test time out
k8s.api.custom_objects_api.delete_namespaced_custom_object(
"acid.zalan.do", "v1", self.test_namespace, "postgresqls", "acid-test-cluster")
time.sleep(5)


@timeout_decorator.timeout(TEST_TIMEOUT_SEC)
def test_zz_node_readiness_label(self):
Expand Down Expand Up @@ -746,12 +748,12 @@ def test_statefulset_annotation_propagation(self):
}
k8s.api.custom_objects_api.patch_namespaced_custom_object(
"acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_crd_annotations)

annotations = {
"deployment-time": "2020-04-30 12:00:00",
"downscaler/downtime_replicas": "0",
}

self.eventuallyTrue(lambda: k8s.check_statefulset_annotations(cluster_label, annotations), "Annotations missing")


Expand Down Expand Up @@ -823,14 +825,14 @@ def test_zzzz_cluster_deletion(self):
}
}
k8s.update_config(patch_delete_annotations)
self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0":"idle"}, "Operator does not get in sync")
self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync")

try:
# this delete attempt should be omitted because of missing annotations
k8s.api.custom_objects_api.delete_namespaced_custom_object(
"acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster")
time.sleep(5)
self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0":"idle"}, "Operator does not get in sync")
self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync")

# check that pods and services are still there
k8s.wait_for_running_pods(cluster_label, 2)
Expand All @@ -841,7 +843,7 @@ def test_zzzz_cluster_deletion(self):

# wait a little before proceeding
time.sleep(10)
self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0":"idle"}, "Operator does not get in sync")
self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync")

# add annotations to manifest
delete_date = datetime.today().strftime('%Y-%m-%d')
Expand All @@ -855,7 +857,7 @@ def test_zzzz_cluster_deletion(self):
}
k8s.api.custom_objects_api.patch_namespaced_custom_object(
"acid.zalan.do", "v1", "default", "postgresqls", "acid-minimal-cluster", pg_patch_delete_annotations)
self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0":"idle"}, "Operator does not get in sync")
self.eventuallyEqual(lambda: k8s.get_operator_state(), {"0": "idle"}, "Operator does not get in sync")

# wait a little before proceeding
time.sleep(20)
Expand All @@ -882,7 +884,7 @@ def test_zzzz_cluster_deletion(self):
print('Operator log: {}'.format(k8s.get_operator_log()))
raise

#reset configmap
# reset configmap
patch_delete_annotations = {
"data": {
"delete_annotation_date_key": "",
Expand Down
4 changes: 2 additions & 2 deletions pkg/cluster/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -626,14 +626,14 @@ func (c *Cluster) Update(oldSpec, newSpec *acidv1.Postgresql) error {
}
}()

if oldSpec.Spec.PostgresqlParam.PgVersion >= newSpec.Spec.PostgresqlParam.PgVersion {
if oldSpec.Spec.PostgresqlParam.PgVersion > newSpec.Spec.PostgresqlParam.PgVersion {
c.logger.Warningf("postgresql version change(%q -> %q) has no effect",
oldSpec.Spec.PostgresqlParam.PgVersion, newSpec.Spec.PostgresqlParam.PgVersion)
c.eventRecorder.Eventf(c.GetReference(), v1.EventTypeWarning, "PostgreSQL", "postgresql version change(%q -> %q) has no effect",
oldSpec.Spec.PostgresqlParam.PgVersion, newSpec.Spec.PostgresqlParam.PgVersion)
// we need that hack to generate statefulset with the old version
newSpec.Spec.PostgresqlParam.PgVersion = oldSpec.Spec.PostgresqlParam.PgVersion
} else {
} else if oldSpec.Spec.PostgresqlParam.PgVersion < newSpec.Spec.PostgresqlParam.PgVersion {
c.logger.Infof("postgresql version increased (%q -> %q), major version upgrade can be done manually after StatefulSet Sync",
oldSpec.Spec.PostgresqlParam.PgVersion, newSpec.Spec.PostgresqlParam.PgVersion)
}
Expand Down