In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from prometheus_pandas import query
from IPython.display import Markdown, display

pd.__version__

'2.0.0'

In [2]:
p = query.Prometheus('http://192.168.178.64:9090')
step = "1s"

In [3]:
# We're scraping much more data than we actually need.
# This is especially true for devices.
# In this section we define which devices we actually want to analyze.

NET_DEVICES=["docker0","enp7s0","lo"]
DISK_DEVICES=["sda"]

# Utils

In [4]:
def printmd(string):
    display(Markdown(string))
    
def remove_meta_columns(df):
    cols = [col for col in df.columns if col not in ['measurement','seconds']]
    return df[cols]

    
def describe_measurement_dataframe(df):
    printmd("### Total ")
    display(remove_meta_columns(df).describe())
    
    printmd("### Per Measurement")
    display(df[df.columns.difference(['seconds'])].groupby('measurement').describe().unstack(1))

def prepare_query_range_as_dataframe(measurement_no, query, start, end, step=step):
    print(f"[Measurement {measurement_no}] Querying '{query}' from {start} to {end} with step {step} ...")
    metrics = p.query_range(query, start, end, step)
    metrics = metrics.assign(seconds=range(len(metrics)))
    metrics = metrics.assign(measurement=measurement_no)
    
    summarize_outliers(metrics.reset_index())
    
    return metrics


def prepare_query_metrics_as_dataframe(measurement_no, query, end):
    print(f"[Measurement {measurement_no}] Querying '{query}' at {end} ...")
    metrics = p.query(query, end).to_frame()
    metrics.columns = ['value']
    metrics = metrics.assign(measurement=measurement_no)

    summarize_outliers(metrics.reset_index())
    
    return metrics


def prepare_dataframe(query_fn, df):
    result = [prepare_query_range_as_dataframe(idx, query_fn(start, duration, end), start, end) for idx, (start, duration, end) in enumerate(zip(df['Start'], df['Duration'], df['End']), 1)]
    result = pd.concat(result)
    return result


def prepare_series(query_fn, df):
    result = [prepare_query_metrics_as_dataframe(idx, query_fn(start, duration, end), end) for idx, (start, duration, end) in enumerate(zip(df['Start'], df['Duration'], df['End']), 1)]
    result = pd.concat(result)
    return result


def mark_outliers(df, fence=2.22):
    # Select only numeric columns
    cols = df.select_dtypes('number').columns
    df_sub = df.loc[:, cols]
    
    # Calculate IQR
    iqr = df_sub.quantile(0.75, numeric_only=False) - df_sub.quantile(0.25, numeric_only=False)
    lim = np.abs((df_sub - df_sub.median()) / iqr) < fence
    
    # Replace Outliers with NaN
    df.loc[:, cols] = df_sub.where(lim, np.nan)
    
    # Replace all NaN values
    return df


def summarize_outliers(df, fence=2.22):
    marked_outliers = mark_outliers(df, fence)
    diff = df.compare(marked_outliers)
    
    # No Outliers, no problem
    if diff.size == 0:
        return
    
    diff.name = 'Outliers'
    print('Outliers detected, please check manually')
    print(diff)
    

def drop_outliers(df, fence=2.22):
    cols = df.select_dtypes('number').columns
    return mark_outliners(df, fence).dropna(how='any', subset=cols)

# Baseline Analysis

Here we take a look at the Baseline of our DUT


In [5]:
bm = pd.read_csv('baseline.csv')
bm

Unnamed: 0,Start,End,Duration,Energy
0,1681634834,1681635134,5m,4


## Power Stats

In [6]:
result = prepare_dataframe(lambda start,duration,end: '{__name__=~"epc1202PowerActive|epc1202Current|epc1202Voltage"}',bm)
result.columns = ['Current', 'Watts', 'Voltage', 'seconds', 'measurement']

[Measurement 1] Querying '{__name__=~"epc1202PowerActive|epc1202Current|epc1202Voltage"}' from 1681634834 to 1681635134 with step 1s ...


In [7]:
describe_measurement_dataframe(result)

### Total 

Unnamed: 0,Current,Watts,Voltage
count,301.0,301.0,301.0
mean,262.259136,47.438538,237.239203
std,0.786951,0.497034,0.427307
min,261.0,47.0,237.0
25%,262.0,47.0,237.0
50%,262.0,47.0,237.0
75%,263.0,48.0,237.0
max,265.0,48.0,238.0


### Per Measurement

                measurement
Current  count  1              301.000000
         mean   1              262.259136
         std    1                0.786951
         min    1              261.000000
         25%    1              262.000000
         50%    1              262.000000
         75%    1              263.000000
         max    1              265.000000
Voltage  count  1              301.000000
         mean   1              237.239203
         std    1                0.427307
         min    1              237.000000
         25%    1              237.000000
         50%    1              237.000000
         75%    1              237.000000
         max    1              238.000000
Watts    count  1              301.000000
         mean   1               47.438538
         std    1                0.497034
         min    1               47.000000
         25%    1               47.000000
         50%    1               47.000000
         75%    1               48.000000
      

In [8]:
printmd("### Current Measurment Plots")
sns.lmplot(
      data=result, x="seconds", y="Current", col="measurement", col_wrap=5, height=3
)

### Current Measurment Plots

NameError: name 'sns' is not defined

In [None]:
printmd("### Voltage Measurment Plots")
sns.lmplot(
    data=result, x="seconds", y="Voltage", col="measurement", col_wrap=5, height=3
)

In [None]:
printmd("### Watts Measurment Plots")
sns.lmplot(
    data=result, x="seconds", y="Watts", col="measurement", col_wrap=5, height=3
)

In [None]:
printmd("### Total Mean Plots")

grouped_total_mean = result.groupby(by=['seconds', 'measurement']).mean()

fig, axes = plt.subplots(nrows=2, ncols=2)
grouped_total_mean['Current'].plot(drawstyle='steps', ax=axes[0,0]);
axes[0,0].set_title('Current')
axes[0,0].set_ylim(bottom=250, top=280)
grouped_total_mean['Voltage'].plot(drawstyle='steps', ax=axes[0,1]);
axes[0,1].set_title('Voltage')
axes[0,1].set_ylim(bottom=230, top=250)
grouped_total_mean['Watts'].plot(drawstyle='steps', ax=axes[1,0]);
axes[1,0].set_title('Watts')
axes[1,0].set_ylim(bottom=0, top=50)

## CPU

### CPU Time

In [None]:
result = prepare_series(lambda start,duration,end: f"avg by (mode) (increase(node_cpu_seconds_total[{duration}]))",bm)
describe_measurement_dataframe(result)

total_mean_result = remove_meta_columns(result).groupby(level=0).mean()

display(total_mean_result)
total_mean_result.plot.bar(ylabel='CPU seconds')

### CPU Utilization

In [None]:
result = prepare_dataframe(lambda start, duration, end: 'avg by (mode) (rate(node_cpu_seconds_total[10s]) * 100)', bm)
describe_measurement_dataframe(result)

grouped_mean = result.groupby(by=['seconds']).mean()
grouped_mean.drop(['measurement'], inplace=True, axis=1)
grouped_mean.plot(drawstyle='steps',figsize=(10,3),ylim=(-5,105),ylabel='%')

## Memory

In [None]:
rng = "10s"
query = f"""
    100 * (1 - ((avg_over_time(node_memory_MemFree_bytes[{rng}]) + avg_over_time(node_memory_Cached_bytes[{rng}]) + avg_over_time(node_memory_Buffers_bytes[{rng}])) / avg_over_time(node_memory_MemTotal_bytes[{rng}])))
    """

result = prepare_dataframe(lambda start, duration, end: query, bm)
describe_measurement_dataframe(result)

grouped_mean = result.groupby(by=['seconds']).mean()
grouped_mean.drop(['measurement'], inplace=True, axis=1)
grouped_mean.plot(drawstyle='steps',figsize=(10,3),ylim=(-5,105),ylabel='%')

## Network

In [None]:
# This is going to be the label filter we're going to use
net_filter_label = f"{{device=~\"{'|'.join(NET_DEVICES)}\"}}"

### Received KBytes

In [None]:
result = prepare_series(lambda start, duration, end: f"sum by(device) (increase(node_network_receive_bytes_total{net_filter_label}[{duration}])) / 1000", bm)
describe_measurement_dataframe(result)

grouped_mean = result.groupby(level=0).mean()
grouped_mean.drop(['measurement'], inplace=True, axis=1)
grouped_mean.plot.bar(ylabel='Received KBytes')

In [None]:
result = prepare_dataframe(lambda start, duration, end: f"avg by (device) (rate(node_network_receive_bytes_total{net_filter_label}[10s]))", bm)
describe_measurement_dataframe(result)

grouped_mean = result.groupby(by=['seconds']).mean()
grouped_mean.drop(['measurement'], inplace=True, axis=1)
grouped_mean.plot(drawstyle='steps',figsize=(10,3),ylabel='Bytes')

### Sent KByte

In [None]:
result = prepare_series(lambda start, duration, end: f"sum by(device) (increase(node_network_transmit_bytes_total{net_filter_label}[{duration}])) / 1000", bm)
describe_measurement_dataframe(result)

grouped_mean = result.groupby(level=0).mean()
grouped_mean.drop(['measurement'], inplace=True, axis=1)
grouped_mean.plot.bar(ylabel='Sent KBytes')

In [None]:
result = prepare_dataframe(lambda start, duration, end: f"avg by (device) (rate(node_network_transmit_bytes_total{net_filter_label}[10s]))", bm)
describe_measurement_dataframe(result)

grouped_mean = result.groupby(by=['seconds']).mean()
grouped_mean.drop(['measurement'], inplace=True, axis=1)
grouped_mean.plot(drawstyle='steps',figsize=(10,3),ylabel='Bytes')

# Disk

In [None]:
# This is going to be the label filter we're going to use
disk_filter_label = f"{{device=~\"{'|'.join(DISK_DEVICES)}\"}}"

In [None]:
result = prepare_series(lambda start, duration, end: f"increase(node_disk_io_time_seconds_total{disk_filter_label}[{duration}])", bm)

grouped_result = result.groupby(level=0).mean()
display(grouped_result)

In [None]:
result = prepare_dataframe(lambda start, duration, end: f"rate(node_disk_io_time_seconds_total{disk_filter_label}[10s])", bm)
describe_measurement_dataframe(result)

grouped_mean = result.groupby(by=['seconds']).mean()
grouped_mean.drop(['measurement'], inplace=True, axis=1)
grouped_mean.plot(drawstyle='steps',figsize=(10,3),ylabel='I/O seconds')

# Interrupts

In [None]:
interrupts_filter = f"{{devices=\"\"}}"

In [None]:
result = prepare_series(lambda start, duration, end: f"sum by (devices, type) (idelta(node_interrupts_total{interrupts_filter}[{duration}]) > 0)", bm)
describe_measurement_dataframe(result)

grouped_mean = result.groupby(level=0).mean()
grouped_mean.drop(['measurement'], inplace=True, axis=1)
display(grouped_mean)
grouped_mean.plot.bar(ylabel='Interrupts')

# Software Experiment

In [None]:
# Those are the variants we have measured
VARIANTS=["no-cache", "caffeine-cache", "redis-cache", "caffeine-redis-cache"]