## Check cluster network connectivity

* pod2pod connection on same node
* pod2pod connection on different node
* pod2service connection on same node
* pod2service connection on different node
* pod2node connection
* pod2service connection on same pod

### deployments
* 2 pod/service pairs on each node
* global serivce select all pods

### Setup (Only run on ready nodes)

In [None]:
from k8sclient.keywords import list_ready_nodes
from k8sclient.Components import ServicePort
import datetime

nodes = list_ready_nodes()
uid = datetime.datetime.now().strftime("-%Y-%m-%d-%H-%M-%S")
global_service_name = "health-check" + uid
namespace = "health-check"
image = "127.0.0.1:30100/library/memcached:check"
args = "memcached -m 1028 -u root -v"
client_port = ServicePort("clientport", 11211, 11211)

### Create a global service

In [None]:
from k8sclient.K8SClient import ServiceBuilder
global_service = ServiceBuilder(global_service_name, namespace).add_port(client_port)

### Create 2 pods and services on each nodes

In [None]:
from k8sclient.Components import PodBuilder

def deploy(node):
    node_mark = "-".join(node.split("."))
    for i in range(2):
        pod_name = ("pod-%s-%d" % (node_mark, i)) + uid
        service_name = ("service-%s-%d" % (node_mark, i)) + uid
        service = ServiceBuilder(service_name, namespace).add_port(client_port)
        pod = PodBuilder(
            pod_name,
            namespace,
        ).set_node(
            node
        ).add_container(
            pod_name,
            image=image,
            args=args,
            ports=[client_port],
            requests={'cpu': '0', 'memory': '0'}
        ).attache_service(
            service
        ).attache_service(
            global_service
        )
        pod.deploy()
        service.deploy()


### Deploy the pod/service pairs on each ready node

In [None]:
for node in nodes:
    deploy(node)

### Deploy the global service

In [None]:
global_service.deploy()

### Connection test on same node

In [None]:
from k8sclient.keywords import get_pod_ip, pod_exec
import random


def check_service(namespace, pod, service):
    # give it a retry
    return pod_exec(namespace, pod, ["/opt/check.sh", service]) or\
           pod_exec(namespace, pod, ["/opt/check.sh", service])

def check_ping(namespace, pod, ip):
    return pod_exec(namespace, pod, ["bash", "-c", "ping %s -c 2 -w 5" % ip]) or\
           pod_exec(namespace, pod, ["bash", "-c", "ping %s -c 2 -w 5" % ip])

def check_pod(namespace, pod, targets):
    for target in targets:
        if not check_service(namespace, pod, target):
            return "Fail to connect %s on %s." % (target, pod)
        
def check_local(node):
    id_1 = random.randint(0,1)
    id_2 = (id_1 + 1) % 2
    error = ""
    node_mark = "-".join(node.split("."))
    pod_1 = ("pod-%s-%d" % (node_mark, id_1)) + uid
    pod_2 = ("pod-%s-%d" % (node_mark, id_2)) + uid
    pod_2_ip = get_pod_ip(namespace, pod_2)
    pod_2_service = ("service-%s-%d" % (node_mark, id_2)) + uid
    return check_pod(namespace, pod_1, [pod_2_ip, pod_2_service, global_service_name])

In [None]:
for node in nodes:
    print "checking", node
    print check_local(node)

### Connection test across node

In [None]:
pod_ips = {}
node_marks = {n: "-".join(n.split(".")) for n in nodes}
def check_host(namespace, pod, hosts):
    for host in hosts:
        if not check_ping(namespace, pod, host):
            return "Fail to ping %s on %s." % (host, pod)
        
for node_1 in nodes:
    print "checking on", node_1
    id_1 = random.randint(0,1)
    node_1_mark = node_marks[node_1]
    pod_1 = ("pod-%s-%d" % (node_1_mark, id_1)) + uid
    for node_2 in nodes:
        if node_1 == node_2:
            continue
        id_2 = random.randint(0,1)
        node_2_mark = node_marks[node_2]
        pod_2 = ("pod-%s-%d" % (node_2_mark, id_2)) + uid
        pod_2_service = ("service-%s-%d" % (node_2_mark, id_2)) + uid
        pod_2_ip = pod_ips.get(pod_2, get_pod_ip(namespace, pod_2))
        pod_ips[pod_2] = pod_2_ip
        r = check_pod(namespace, pod_1, [pod_2_ip, pod_2_service])
        if r:
            print r
        r = check_host(namespace, pod_1, [node_1, node_2])
        if r:
            print r
        