In [None]:
import pandas

from datetime import datetime, timedelta
from matplotlib import pyplot

In [None]:
df = pandas.read_csv("cpu_bound_4_times_1e9.csv")
df.columns

In [None]:
start_timestamp = df["startTime"].min() / 1000
end_timestamp = df["endTime"].max() / 1000
start_datetime = datetime.fromtimestamp(start_timestamp)
end_datetime = datetime.fromtimestamp(end_timestamp)

print(start_datetime)
print(end_datetime)
print(f"Total runtime: {round(end_timestamp - start_timestamp, 2)} seconds")

In [None]:
pyplot.figure(figsize=(10,10))
for number, (label, lambdadf) in enumerate(df.groupby("uuid")):
    pyplot.plot(lambdadf.groupby("startTime")["measurementID"].count()+ number)
pyplot.xlabel("Timestamp")
pyplot.ylabel("Lambda invokation")

In [None]:
minstartdf = df.groupby('uuid')[["startTime"]].min().reset_index()
minstartdf["startTime"] = minstartdf["startTime"] / 1000

pyplot.figure(figsize=(12,12))
pyplot.title("Starting time of each lambda relative to the first one")
pyplot.xlabel("Lambda index")
pyplot.ylabel("Time (seconds)")
pyplot.plot(minstartdf.index, minstartdf["startTime"]-minstartdf["startTime"].min())

In [None]:
maxenddf = df.groupby('uuid')[["endTime"]].max().reset_index()
maxenddf["endTime"] = maxenddf["endTime"] / 1000

pyplot.figure(figsize=(12,12))
pyplot.title("Ending time of each lambda relative to the first one")
pyplot.xlabel("Lambda index")
pyplot.ylabel("Time (seconds)")
pyplot.plot(maxenddf.index, maxenddf["endTime"]-maxenddf["endTime"].min())

In [None]:
runtimedf = df.groupby('uuid')[["startTime","endTime"]].agg(minstart=("startTime","min"),maxend=("endTime","max")).reset_index()
runtimedf["runtime"] = (runtimedf["maxend"] - runtimedf["minstart"]) / 1000
runtimedf

pyplot.figure(figsize=(10,10))
pyplot.ylabel("Runtime (seconds)")
pyplot.xlabel("Lambda ID")
pyplot.plot(runtimedf.index, runtimedf["runtime"])

In [None]:
cpudf = df[["uuid","startTime","endTime","cpuUsr"]]
cpudf

In [None]:
dfs = []
for label, curdf in cpudf.groupby('uuid'):
    curdf["cpuPercent"] = curdf.cpuUsr.diff().shift(-1)
    dfs.append(curdf)


cpupercentdf = pandas.concat(dfs).reset_index(drop=True)
cpupercentdf["startDateTime"] = (cpupercentdf["startTime"]/pow(10,3)).apply(datetime.fromtimestamp)
cpupercentdf

In [None]:
metrics = df[["uuid","startTime","cpuUsr","network_rx_bytes.vinternal_1"]].copy()
metrics

In [None]:
metrics["startDateTime"] = (metrics["startTime"]/pow(10,3)).apply(datetime.fromtimestamp)
metrics

In [None]:
def round_seconds(obj: datetime, how="previous") -> datetime:
    if how=="previous":
        obj -= timedelta(seconds=1)
    else:
        obj += timedelta(seconds=1)
    return obj.replace(microsecond=0)

timebins = []
current_datetime = round_seconds(start_datetime)
while current_datetime <= end_datetime:
    timebins.append(current_datetime)
    current_datetime += timedelta(seconds=2)
timebins.append(round_seconds(end_datetime, how="next"))
timebins

In [None]:
metrics["bins"] = pandas.cut(metrics["startDateTime"], timebins)
metrics

In [None]:
pyplot.figure(figsize=(10,10))
for number, (label, lambdadf) in enumerate(metrics.groupby("uuid")):
    pyplot.plot(lambdadf.groupby("startTime")["cpuUsr"].count() + number)
pyplot.xlabel("Timestamp")
pyplot.ylabel("Lambda invokation")

In [None]:
for value in metrics.loc[0]:
    print(value)

In [None]:
for grouplabel, groupdf in metrics.groupby(["bins"]):
    #print(f"Label: {grouplabel}")
    print(f"Entries: {len(groupdf)}")
    #print(f"DataFrame: {groupdf}")
    print(len(groupdf.uuid.unique()))

In [None]:
cpupercentdf["bins"] = pandas.cut(cpupercentdf["startDateTime"], timebins)
cpupercentdf

In [None]:
for grouplabel, groupdf in cpupercentdf.groupby(["bins"]):
    #print(f"Label: {grouplabel}")
    print(f"Entries: {len(groupdf)}")
    #print(f"DataFrame: {groupdf}")
    print(len(groupdf.uuid.unique()))

In [None]:
def f(series):
    return len(series.unique())
cpusumdf = metrics.groupby("bins").agg(cpu_sum=("cpuUsr","sum"), numlambdas=("uuid",f)).reset_index()
cpusumdf

In [None]:
cpusumdfpercent = cpupercentdf.groupby("bins").agg(cpu_sum=("cpuPercent","sum"), numlambdas=("uuid",f)).reset_index()
cpusumdfpercent

In [None]:
fig, ax = pyplot.subplots(figsize=(10,10))
pyplot.plot(cpusumdfpercent["bins"].apply(lambda x: x.left), cpusumdfpercent["cpu_sum"]/2, label="CPU Usage")
pyplot.xlabel("Absolute time")
pyplot.ylabel("CPU usage (percentage)")
fig.savefig('cpuusage_cpubound.png',
            format='png',
            dpi=300,
            bbox_inches='tight')

In [None]:
cpusumdfpercent["bins"].apply(lambda x: x.left)

In [None]:
networksumdf = metrics.groupby("bins").agg(network_sum=("network_rx_bytes.vinternal_1","sum"), numlambdas=("uuid",f)).reset_index()
networksumdf

In [None]:
networkcumsumdf = metrics[["bins","network_rx_bytes.vinternal_1"]].groupby("bins").sum().cumsum().reset_index()
networkcumsumdf.plot()

In [None]:
cpucumsumdf = cpusumdf[["bins","cpu_sum"]].groupby("bins").sum().cumsum().reset_index()

fig, ax = pyplot.subplots(figsize=(10,10))
pyplot.plot(cpucumsumdf["bins"].apply(lambda x: x.left), cpucumsumdf["cpu_sum"]/2, label="CPU Usage")
pyplot.xlabel("Absolute time")
pyplot.ylabel("CPU usage (sum)")
fig.savefig('cpuusage_sum.png',
            format='png',
            dpi=300,
            bbox_inches='tight')

In [None]:
fig,ax = pyplot.subplots(figsize=(10,10))
ax.bar(cpusumdfpercent.index, cpusumdfpercent["cpu_sum"], color="green")
ax.set_ylabel("CPU Usage (Percentage)", color="green")
ax2 = ax.twinx()
ax2.bar(networksumdf.index, networksumdf["network_sum"]/pow(10,9), color="blue")
ax2.set_ylabel("Transfered data (GB)", color="blue")
ax.set_xlabel("Time increment (2 seconds)")
fig.savefig('cpuandnetwork.png',
            format='png',
            dpi=300,
            bbox_inches='tight')

In [None]:
fig,ax = pyplot.subplots(figsize=(10,10))
ax.plot(cpusumdf["numlambdas"])
pyplot.yticks(list(range(0,70,5)))
pyplot.xlabel("Time increment (2 seconds)")
pyplot.ylabel("Number of Lambdas")
fig.savefig('numlambdas.png',
            format='png',
            dpi=300,
            bbox_inches='tight')

In [None]:
fig,ax = pyplot.subplots(figsize=(10,10))
ax.bar(cpusumdfpercent.index, cpusumdfpercent["cpu_sum"]/2)
ax.set_ylabel("CPU Usage (Percentage)", color="green")
