In [None]:
import pandas

from datetime import datetime, timedelta
from matplotlib import pyplot

In [None]:
df = pandas.read_csv("first_part.csv")
df

In [None]:
start_timestamp = df["startTime"].min() / 1000
end_timestamp = df["endTime"].max() / 1000
start_datetime = datetime.fromtimestamp(start_timestamp)
end_datetime = datetime.fromtimestamp(end_timestamp)

print(start_datetime)
print(end_datetime)

In [None]:
pyplot.figure(figsize=(10,10))
for number, (label, lambdadf) in enumerate(df.groupby("uuid")):
    pyplot.plot(lambdadf.groupby("startTime")["measurementID"].count()+ number)
pyplot.xlabel("Timestamp")
pyplot.ylabel("Lambda invokation")

In [None]:
minstartdf = df.groupby('uuid')[["startTime"]].min().reset_index()
minstartdf["startTime"] = minstartdf["startTime"] / 1000

pyplot.figure(figsize=(10,10))
pyplot.plot(minstartdf.index, minstartdf["startTime"]-minstartdf["startTime"].min())

In [None]:
runtimedf = df.groupby('uuid')[["startTime","endTime"]].agg(minstart=("startTime","min"),maxend=("endTime","max")).reset_index()
runtimedf["runtime"] = (runtimedf["maxend"] - runtimedf["minstart"]) / 1000
runtimedf

pyplot.figure(figsize=(10,10))
pyplot.plot(runtimedf.index, runtimedf["runtime"])

In [None]:
cpudf = df[["uuid","startTime","endTime","cpuUsr"]]
cpudf

In [None]:
dfs = []
for label, curdf in cpudf.groupby('uuid'):
    curdf["cpuPercent"] = curdf.cpuUsr.diff().shift(-1)
    dfs.append(curdf)


cpupercentdf = pandas.concat(dfs).reset_index(drop=True)
cpupercentdf["startDateTime"] = (cpupercentdf["startTime"]/pow(10,3)).apply(datetime.fromtimestamp)
pyplot.plot(cpupercentdf.index, cpupercentdf["cpuPercent"])

In [None]:
metrics = df[["startTime","cpuUsr","network_rx_bytes.vinternal_1"]].copy()
metrics

In [None]:
metrics["startDateTime"] = (metrics["startTime"]/pow(10,3)).apply(datetime.fromtimestamp)
metrics

In [None]:
timebins = []
current_datetime = start_datetime
while current_datetime <= end_datetime:
    timebins.append(current_datetime)
    current_datetime += timedelta(seconds=1)
timebins.append(end_datetime)


In [None]:
metrics["bins"] = pandas.cut(metrics["startDateTime"], timebins)
metrics

In [None]:
for value in metrics.loc[0]:
    print(value)

In [None]:
for grouplabel, groupdf in metrics.groupby("bins"):
    #print(f"Label: {grouplabel}")
    print(f"Entries: {len(groupdf)}")
    #print(f"DataFrame: {groupdf}")

In [None]:
cpupercentdf["bins"] = pandas.cut(cpupercentdf["startDateTime"], timebins)
cpupercentdf

In [None]:
for grouplabel, groupdf in cpupercentdf.groupby("bins"):
    #print(f"Label: {grouplabel}")
    print(f"Entries: {len(groupdf)}")
    #print(f"DataFrame: {groupdf}")

In [None]:
cpusumdf = metrics.groupby("bins").agg(cpu_sum=("cpuUsr","sum"), numlambdas=("cpuUsr","count")).reset_index()
cpusumdf

In [None]:
cpusumdfpercent = cpupercentdf.groupby("bins").agg(cpu_sum=("cpuPercent","sum"), numlambdas=("cpuUsr","count")).reset_index()
cpusumdfpercent

In [None]:
pyplot.plot(cpusumdf.index, cpusumdf["cpu_sum"])

In [None]:
cpusumdf.cpu_sum.diff().shift(-1).plot()

In [None]:
pyplot.figure(figsize=(10,10))
pyplot.plot(cpusumdfpercent["cpu_sum"], label="CPU Usage")

In [None]:
networksumdf = metrics[["bins","network_rx_bytes.vinternal_1"]].groupby("bins").sum().reset_index()
networksumdf.plot()

In [None]:
networkcumsumdf = metrics[["bins","network_rx_bytes.vinternal_1"]].groupby("bins").sum().cumsum().reset_index()
networkcumsumdf

In [None]:
cpucumsumdf = metrics[["bins","cpuUsr"]].groupby("bins").sum().cumsum().reset_index()
cpucumsumdf.plot()

In [None]:
fig,ax = pyplot.subplots(figsize=(10,10))
ax.plot(cpusumdfpercent["cpu_sum"], color="green")
ax.set_ylabel("CPU Usage (Percentage)", color="green")
ax2 = ax.twinx()
ax2.plot(networksumdf["network_rx_bytes.vinternal_1"]/pow(10,9), color="blue")
ax2.set_ylabel("Transfered data (GB)", color="blue")
ax.set_xlabel("Time increment (s)")
fig.savefig('cpuandnetwork.png',
            format='png',
            dpi=300,
            bbox_inches='tight')

In [None]:
fig,ax = pyplot.subplots(figsize=(10,10))
ax.plot(cpusumdfpercent["numlambdas"])
pyplot.yticks(list(range(0,70,5)))
pyplot.xlabel("Time increment(s)")
pyplot.ylabel("Number of Lambdas")
fig.savefig('numlambdas.png',
            format='png',
            dpi=300,
            bbox_inches='tight')