In [None]:
import os, re, sys, argparse, glob
import csv
import itertools
from datetime import datetime
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import *
from matplotlib_helper import *

In [None]:
def removeprefix(self: str, prefix: str, /) -> str:
    if self.startswith(prefix):
        return self[len(prefix):]
    else:
        return self[:]

def removesuffix(self: str, suffix: str, /) -> str:
    # suffix='' should not call self[:-0].
    if suffix and self.endswith(suffix):
        return self[:-len(suffix)]
    else:
        return self[:]

In [None]:
def common_prefix(l):
    "Given a list of pathnames, returns the longest common leading component"
    if not l: return ''
    s1 = min(l)
    s2 = max(l)
    for i, c in enumerate(s1):
        if c != s2[i]:
            return s1[:i]
    return s1

In [None]:
def plot_cdf_array(array, label, include_count = False, index=0, color=None):
    x = sorted(array)
    y = np.linspace(0., 1., len(array) + 1)[1:]
    if include_count:
        label += ' (%d)' % len(array)
    if color is None:
        color = get_next_color()
    plt.plot(x, y, label=label, color=color, linestyle=get_linestyle(index))

In [None]:
def plot_timeseries(data_array, plot_axis=None, timestamp_column_name='timestamp', prefix=None, use_relative_time=False, color=None, index=0, marker=None):
    x = [entry[timestamp_column_name] for entry in data_array]
    if use_relative_time:
        start_time = x[0]
        x = [(t - start_time).total_seconds() for t in x]
    data_keys = []
    for key in data_array[0].keys():
        if key == timestamp_column_name:
            continue
        data_keys.append(key)
    lines = []
    for key in data_keys:
        data_series = [entry[key] for entry in data_array]
        label = ('%s - ' % prefix if prefix else '') + key
        if plot_axis is None:
            plot_axis = plt.gca()
        line = plot_axis.plot(x, data_series, color=color, linestyle=get_linestyle(index), label=label, marker=marker)
        # index += 1
        lines.append(line[0])
    return lines

In [None]:
def get_throughput_data(throughput_log_file):
    data = []
    regex_pattern = re.compile(r'parallel: (\d+), sender: ([\d.]+), receiver: ([\d.]+)')
    with open(throughput_log_file, 'r') as f:
        for line in f:
            if not line.startswith('parallel: '):
                continue
            m = regex_pattern.match(line)
            assert m, "Cannot parse throughput line: %s" % line
            data.append({
                'parallelism': int(m.group(1)),
                'throughput': float(m.group(2)),
            })
    return data

In [None]:
def get_cpu_mem_usage_data(usage_log_file):
    data_array = []
    with open(usage_log_file, 'r') as f:
        csv_reader = csv.DictReader(f)
        column_names = csv_reader.fieldnames
        required_columns = set(['timestamp', 'cpu-user', 'cpu-kernel', 'cpu-idle', 'mem-used', 'mem-free'])
        assert [required_column in column_names for required_column in required_columns]
        for row in csv_reader:
            timestamp = datetime.fromisoformat(row['timestamp'])
            cpu_user = float(row['cpu-user'])
            cpu_kernel = float(row['cpu-kernel'])
            cpu_idle = float(row['cpu-idle'])
            mem_used = float(row['mem-used'])
            mem_free = float(row['mem-free'])
            if mem_used + mem_free < 1:
                print(row, file=sys.stderr)
            data_array.append({
                'timestamp': timestamp,
                'cpu-total': cpu_user + cpu_kernel,
                # 'mem': mem_used / (mem_used + mem_free),
                'cpu-user': cpu_user,
                'cpu-kernel': cpu_kernel,
                # 'cpu-idle': cpu_idle,
                # 'mem-used': mem_used,
                # 'mem-free': mem_free,
            })
    return data_array

In [None]:
# data_array is a list of dicts, each of which has 'timestamp', 'total_intel_energy', ...
# it's already sorted by timestamp
def get_energy_stats(data_array):
    assert len(data_array) > 2, "Time series is too short"
    l_timestamp = np.array([entry['timestamp'] for entry in data_array])
    delta_timestamps = [int(delta.total_seconds()) for delta in np.diff(l_timestamp, n=1)]
    sample_interval_s = delta_timestamps[0]
    assert all(delta == sample_interval_s for delta in delta_timestamps)
#     print('Sample interval: %ds' % sample_interval_s)

    # detect idle power draw
    l_power = np.array([entry['total_intel_energy']/sample_interval_s for entry in data_array])
    # print(l_power)
    delta_power = np.diff(l_power, n=1)
    # print(delta_power)
    assert len(l_power) == len(delta_power) + 1
    POWER_DIFF_THRESHOLD = 1
    IDLE_POWER_STD_THRESHOLD = 0.01
    index_workload_start = np.argmax(delta_power > POWER_DIFF_THRESHOLD)
    index_workload_end = len(delta_power) - np.argmax(delta_power[::-1] < -POWER_DIFF_THRESHOLD)
    l_power_idle_before = l_power[:index_workload_start]
    l_power_workload = l_power[index_workload_start:index_workload_end]
    l_power_idle_after = l_power[index_workload_end:]
    avg_power_idle_before = np.average(l_power_idle_before)
    std_power_idle_before = np.std(l_power_idle_before)
    avg_power_idle_after = np.average(l_power_idle_after)
    std_power_idle_after = np.std(l_power_idle_after)
    # print(avg_power_idle_before, std_power_idle_before, avg_power_idle_after, std_power_idle_after)

    assert std_power_idle_before / avg_power_idle_before < IDLE_POWER_STD_THRESHOLD, "Idle power std is too high"
    assert std_power_idle_after / avg_power_idle_after < IDLE_POWER_STD_THRESHOLD, "Idle power std is too high"
    
    # "Idle power before/after difference is too high"
    avg_power_idle = np.average([avg_power_idle_before, avg_power_idle_after])
    if np.abs(avg_power_idle_before - avg_power_idle_after) > POWER_DIFF_THRESHOLD:
        avg_power_idle = avg_power_idle_before

#     print('Workload duration: %ds' % len(l_power_workload) * sample_interval_s)
#     print('Idle power: %.fW' % (avg_power_idle / sample_interval_s))
    return {
        'duration': len(l_power_workload) * sample_interval_s,
        'start_index': index_workload_start,
        'total_energy': np.sum(l_power_workload),
        'delta_energy': np.sum(l_power_workload) - len(l_power_workload) * avg_power_idle,
        'sample_interval_s': sample_interval_s,
        'idle_power': avg_power_idle,
    }

In [None]:
def auto_detect_log_files(dirpath):
    dirpath = os.path.expanduser(dirpath)
    THROUGHPUT_LOGFILE_SUFFIX = ".throughput.log"
    USAGE_LOGFILE_SUFFIX = ".usage.csv"
    throughput_log_files = sorted(glob.glob(os.path.join(dirpath, '*' + THROUGHPUT_LOGFILE_SUFFIX)))
    usage_log_files = sorted(glob.glob(os.path.join(dirpath, '*' + USAGE_LOGFILE_SUFFIX)))
    assert len(throughput_log_files) == len(usage_log_files)
    assert [removesuffix(filename, THROUGHPUT_LOGFILE_SUFFIX) for filename in throughput_log_files] == [removesuffix(filename, USAGE_LOGFILE_SUFFIX) for filename in usage_log_files]
    return list(zip(throughput_log_files, usage_log_files))

In [None]:
def align_throughput_to_usage_data(data_usage, data_throughput):
    # print(data_usage)
    l_usage_nonempty = [x['cpu-total'] > 0.5 for x in data_usage]
    range_start = -1
    l_range_indices = []
    for i in range(len(l_usage_nonempty)):
        if l_usage_nonempty[i] and range_start == -1:
            range_start = i
        elif not l_usage_nonempty[i] and range_start != -1 and i > range_start + 10:
            range_end = i
            l_range_indices.append((range_start, range_end))
            range_start = -1
        else:
            pass
    # print(l_range_indices)
    # print(data_throughput)
    assert len(l_range_indices) == len(data_throughput), "Failed to align throughput to usage timeseries data"
    data_throughput_aligned = []
    index = 0
    for entry in sorted(data_throughput, key=lambda e: e['parallelism']):
        (range_start, range_end) = l_range_indices[index]
        index += 1
        range_middle_index = (range_start + range_end) // 2
        timestamp_aligned = data_usage[range_middle_index]['timestamp']
        data_throughput_aligned.append({
            'timestamp': timestamp_aligned,
            'throughput': entry['throughput']
        })
    return data_throughput_aligned

# align_throughput_to_usage_data(data_sender_usage, data_throughput)

In [None]:
experiments = [
    'iperf-1-16',
    'iperf-32'
]

ROOT_DIR = os.path.relpath('../logs/with-tc')

figsizes = [
    (10, 6),
    None
]
experiment_index = 0
for experiment in experiments:
    figsize = figsizes[experiment_index]
    experiment_index += 1
    filename_sender_usage = '%s.sender.usage.csv' % experiment
    filename_receiver_usage = '%s.receiver.usage.csv' % experiment
    filename_throughput = '%s.throughput.log' % experiment
    data_sender_usage = get_cpu_mem_usage_data(os.path.join(ROOT_DIR, filename_sender_usage))
    data_receiver_usage = get_cpu_mem_usage_data(os.path.join(ROOT_DIR, filename_receiver_usage))
    data_throughput = get_throughput_data(os.path.join(ROOT_DIR, filename_throughput))
    # print(data_throughput)
    for (data_usage, prefix) in [(data_sender_usage, "sender"), (data_receiver_usage, "receiver")]:
        fig = plt.figure(figsize=figsize)
        ax1 = fig.subplots()
        ax2 = ax1.twinx()
        lines1 = plot_timeseries(data_usage, prefix=None, plot_axis=ax1)
        data_throughput_aligned = align_throughput_to_usage_data(data_usage, data_throughput)
        # print(data_throughput_aligned)
        lines2 = plot_timeseries(data_throughput_aligned, prefix=None, plot_axis=ax2, color='black', index = 1, marker='o')
        ax1.set_ylim(0, 10)
        ax1.set_xlabel('Time')
        ax1.set_ylabel('CPU usage (%)')
        ax2.set_ylim(0, 100)
        ax2.set_ylabel('Throughput (Gbps)')
        plt.grid(which='major', linestyle='-')
        # plt.grid(which='minor', linestyle='-.')
        plt.title('%s - %s' % (experiment, prefix))
        lines = (lines1 + lines2)
        labels = [line.get_label() for line in lines]
        ax1.legend(lines, labels)
        plt.savefig('%s.%s.png' % (experiment, prefix))
