# Play with k8s cluster using python

- __Installation__
  * pip
  * kubernetes
  * pandas (Only needed for cluster data analysis)
  * k8sclient (wrapper for kubernetes)


- __Collect cluster information and data analysis with pandas__
  * setup
  * pod
  * node
  * service
  * etc.
 
  
- __Deploy pod/replicaset/service__
  * pods, volume, service and etc.
  * replicaset
  * network connectivity test
  * file system test (fio), network throughput (iperf), pod stress test and etc
  * service search
  * query api (hubot)

## Installation
#### [pip](https://pip.pypa.io/en/stable/installing/)
>Be cautious if you're using a Python install that's managed by your operating system or another package manager. get-pip.py does not coordinate with those tools, and may leave your system in an inconsistent state.
>To install pip via package manager, use package name python-pip
>Python3 is recommented though, not sure thoes numpy/pandas package can work well with python3

```shell
wget https://bootstrap.pypa.io/get-pip.py
python get-pip.py
```

#### [kubernetes](https://github.com/kubernetes-incubator/client-python/)
```shell
pip install kubernetes
```
> Usefully documentation links 
   * [auto generated docs](https://github.com/kubernetes-incubator/client-python/blob/master/kubernetes/README.md)
   * [api references](https://kubernetes.io/docs/api-reference/v1.6/)


#### [pandas](pandas.pydata.org/pandas-docs/stable/)
```shell
pip install pandas
```

#### k8sclient
```shell
git clone ssh://git@10.19.248.200:30884/DavidWang/k8sft.git
cd k8sft && pip install -e .
```

## Collect cluster information

### Setup

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd

### Pod information

#### all pods under specific namespace

In [None]:
from k8sclient.dataframes import collect_namespaced_pods
dfs = collect_namespaced_pods("4tools")
# dfs['pod']

#### collect all pods information

In [None]:
from k8sclient.dataframes import collect_all_pods
pods = collect_all_pods()

#### list abnormal pods

In [None]:
container = pd.merge(pods['pod'], pods['container'], left_on="uid", right_on="pod_id")
bads = container[(container['state'] != 'running') | (container['phase'] != 'Running')]
bads[['namespace', 'pod', 'name', 'phase', 'state']]
# c = bads.groupby(['namespace', "phase"]).size().reset_index().set_index(["namespace","phase"])
# c.columns = ['count']
# c

In [None]:
pods['pod'].columns
# container[container.command.str.contains("exe")]

In [None]:
# list pod count using host-path
v = pods['volume']
hp = v[v.vtype == "host_path"].dropna(axis=1)
hpp = pd.merge(pods['pod'], hp, left_on="uid", right_on="pod_id")
hpp[["name", "path", "host_ip", "namespace", "pod"]].groupby("namespace").size().reset_index().set_index("namespace")

In [None]:
# found 0 limit pods
container = pd.merge(pods['pod'], pods['container'], left_on="uid", right_on="pod_id")
runnings = container[(container['state'] == 'running') & (container['phase'] == 'Running')]
bads = runnings[(runnings.lmemory == 0) | (runnings.lcpu == 0)]
pd.DataFrame({"#pods without memory/cpu limit": bads.groupby("namespace").size()})

In [None]:
# cpu request too much
container = pd.merge(pods['pod'], pods['container'], left_on="uid", right_on="pod_id")
runnings = container[(container['state'] == 'running') & (container['phase'] == 'Running')]
bads = runnings[(runnings.rcpu > 1) ]
bads[['namespace', "pod", 'rcpu', 'rmemory']].sort_values(by="rcpu", ascending=False)

#### simple plot

In [None]:
bins = pods['pod'].groupby(['namespace']).size()
bins.plot(kind="bar", figsize=(20,10))


In [None]:
# node resource request percentage
container = pd.merge(pods['pod'], pods['container'], left_on="uid", right_on="pod_id")
# runnings = container[(container['state'] == 'running') & (container['phase'] == 'Running')]
runnings = container[(container['phase'] == 'Running')]
res = runnings[["host_ip", "rcpu", "rmemory",]]# "lcpu", "lmemory", "pod"]]
# res.groupby('host_ip').sum().plot(kind='bar')
# percentage
a = res.groupby('host_ip').sum()
from k8sclient.dataframes import collect_nodes
nodes = collect_nodes().set_index("name")[["a-cpu", "a-memory"]]
b = pd.concat([a, nodes], axis=1)
b["cpu-percent"] = b['rcpu']*100/b['a-cpu']
b["memory-percent"] = b['rmemory']*100/b['a-memory']
b[['cpu-percent', "memory-percent"]].plot(kind='bar', figsize=(12,8))

In [None]:
# find memory request -ill configured pods
container = pd.merge(pods['pod'], pods['container'], left_on="uid", right_on="pod_id")
runnings = container[(container['state'] == 'running') & (container['phase'] == 'Running')]
bads = runnings[(runnings.rmemory > 64) | (runnings.lmemory > 64)]
bads[['namespace', "pod", 'rmemory']]

In [None]:
# plot running pod per node
p = pods['pod']
runnings = p[p['phase'] == 'Running']
runnings.groupby('host_ip').size().plot(kind='bar')

In [None]:
# plot running containers per node
container = pd.merge(pods['pod'], pods['container'], left_on="uid", right_on="pod_id")
runnings = container[(container['state'] == 'running') & (container['phase'] == 'Running')]
runnings.groupby('host_ip').size().plot(kind='bar')

### read pod logs

In [None]:
from k8sclient.keywords import tail_pod_logs
r = tail_pod_logs("monitor-essential-service", "elasticsearch-data-734v4", lines=20)
# r.strip().split("\n")
print r

### wait for pod complete

In [None]:
from k8sclient.K8SClient import k8sclient
r = k8sclient.get_pod_info("k8sft", "client-1")
print r.status.phase == "Succeeded"

### Node information

In [None]:
from k8sclient.dataframes import collect_nodes
nodes = collect_nodes()
# nodes[["name","a-cpu", "a-memory", "a-pods", "c-cpu", "c-memory", "purpose", "role"]].set_index("name")

In [None]:
nodes[nodes.name=='10.19.248.43'].T

In [None]:
n = nodes.set_index("name")
n[n.nvidiagpu > 0][["a-cpu", "a-memory", "a-pods", "addition", "nvidiagpu"]]

### Service information

In [None]:
%matplotlib inline
from k8sclient.dataframes import collect_services
services, ports, eips = collect_services()
services.groupby("type").size().plot(kind="bar")

In [None]:
# check conflict external ip ports
import pandas as pd
external_ports = pd.merge(ports, eips)
a = pd.merge(services, external_ports, left_on="uid", right_on="service_id")
b = a[['namespace', "name_x", "type", "port", "target_port", "external_ip"]]
c = b[["external_ip", "port"]].duplicated(keep=False)
b[c].set_index(["external_ip","port"]).sort_index()

In [None]:
# check conflict Nodeport ports
import pandas as pd
a = pd.merge(services, ports, left_on="uid", right_on="service_id")
b = a[['namespace', "name_x", "type", "node_port", "target_port"]]
c = b["node_port"].duplicated(keep=False) & (b.node_port != "None")
b[c].set_index(["node_port"]).sort_index()

In [None]:
# check conflict between nodeport and external ip ports
import pandas as pd
external_ports = pd.merge(ports, eips)
a = pd.merge(services, ports, left_on="uid", right_on="service_id")
node_ports = a[a.type=="NodePort"][['namespace', "name_x", "type", "node_port", "target_port"]]
pd.merge(node_ports, external_ports, left_on="node_port", right_on="port", how="outer").dropna()
# external_ports

In [None]:
# plot per namespace
%matplotlib inline
services.groupby(["namespace", "type"]).size().unstack().fillna(0).plot(kind="bar", figsize=(12,8))

## Deployment

### Create pod

In [None]:
from k8sclient.Components import PodBuilder
from k8sclient.keywords import (
    wait_for_pod_state,
    RUNNING,
    delete_pod,
    NOT_FOUND
)

In [None]:
namespace = "monkey"
image = "127.0.0.1:30100/library/python-tools:v20170619"
args = "loggen.py"
node = "10.19.137.151"
name = "logstress"


def deploy():
    PodBuilder(
        name,
        namespace,
    ).add_container(
        name,
        image=image,
        args=args,
    ).deploy()
    wait_for_pod_state(namespace, name, 60, RUNNING)


def un_deploy():
    delete_pod(namespace, name)
    wait_for_pod_state(namespace, name, 60, NOT_FOUND)

### Use volumes

In [None]:
from k8sclient.Components import (
    HostPathVolume, RBDVolume, CephFSVolume
)
volume_hostpath = HostPathVolume(
        "containers",
        "/apt/containers",
        "/data/docker/containers"
    )

ceph_monitors = "10.19.137.144:6789,10.19.137.145:6789,10.19.137.146:6789"
ceph_pool = "monkey"
ceph_fstype = "xfs"
ceph_secret = "ceph-secret"
volume_rbd = RBDVolume(
        "rbd",
        "/apt/data",
        fs_type=ceph_fstype,
        image="default",
        pool=ceph_pool,
        monitors=ceph_monitors,
        secret_name=ceph_secret,
        sub_path="writetest",
        read_only=True
    )
volume_cephfs = CephFSVolume(
            "cephfs",
            "/tmp",
            monitors=ceph_monitors,
            secret_name=ceph_secret,
            fs_path="scrapy",
            sub_path="scrapy_name"
        )

def deploy():
    PodBuilder(
        name,
        namespace,
    ).add_container(
        name,
        image=image,
        args=args,
        volumes=[volume_hostpath, volume_rbd, volume_cephfs]
    ).deploy()

### Use service

In [None]:
from k8sclient.Components import ServicePort, ServiceBuilder

udp_port = ServicePort("dataport", 8125, 8125, protocol="UDP")
http_port = ServicePort("httpport", 80, 80)
udp_service = ServiceBuilder("graphite", namespace).add_port(udp_port)
http_service = ServiceBuilder("graphite-ui", namespace, service_type="NodePort").add_port(http_port)

def deploy():
    PodBuilder(
        name,
        namespace,
    ).add_container(
        name=name,
        image=image,
        ports=[udp_port, http_port]
    ).attache_service(
        udp_service
    ).attach_service(
        http_service
    ).deploy()

### Env variables

In [None]:
def deploy():
    PodBuilder(
        name,
        namespace,
    ).add_container(
        name,
        image=image,
        args=args,
        BATCH_SIZE=10000,
        BATCH_COUNT=1000
    ).deploy()

### Node select

In [None]:
def deploy():
    PodBuilder(
        name,
        namespace,
    ).set_node(
        node
    ).add_container(
        name,
        image=image,
        args=args,
        BATCH_SIZE=10000,
        BATCH_COUNT=1000
    ).deploy()

### Replicaset example

In [None]:
from k8sclient.Components import ServicePort, ServiceBuilder, ReplicaSetBuilder

namespace = "k8sft"
name = "graphite"
image = "127.0.0.1:30100/yangtze/graphite-statsd"
# ports
udp_port = ServicePort("dataport", 8125, 8125, protocol="UDP")
http_port = ServicePort("httpport", 80, 80)
# service
udp_service = ServiceBuilder("graphite", namespace).add_port(udp_port)
http_service = ServiceBuilder("graphite-ui", namespace, service_type="NodePort").add_port(http_port)
# replica set
rs = ReplicaSetBuilder(
    name, namespace
).add_container(
    name=name + "-container",
    image=image,
    ports=[udp_port, http_port]
).attache_service(
    http_service
).attache_service(
    udp_service
).set_hostname(name)


def deploy():
    rs.deploy()
    udp_service.deploy()
    http_service.deploy()


def un_deploy():
    udp_service.un_deploy()
    http_service.un_deploy()
    rs.un_deploy()

### switch cluster

In [None]:
from k8sclient.keywords import switch_cluster, register_cluster
register_cluster("yancheng", "~/.kube/config-yancheng")
register_cluster("shanghai", "~/.kube/config-shanghai")
switch_cluster("yancheng")