In [None]:
import numpy as np
import matplotlib.pyplot as plt
import json
from pathlib import Path

from utils.cachelib_analyzer import *
from utils.plot import *

In [None]:
NUM_CORES_LIST = [1, 2, 4, 8, 16]
NUM_THREADS_LIST = [8, 16]

BASE_PATH = Path('..', 'cachelib', 'CacheLib-log')

DRAM_LATENCY_LIST = [0]
CXL_LATENCY_LIST = [500, 1000, 1500, 2000, 2500, 3000, 4000, 5000, 10000, 20000]
MEM_LATENCY_LIST = DRAM_LATENCY_LIST + CXL_LATENCY_LIST

GET_OP_RATIO = 0.65

In [None]:
DRAM_LOGS = 'cachelib-benchmark-dram-%dcore-%dfiber-100000000-20000000000-1.0.stats'
CXL_LOGS  = 'cachelib-benchmark-cxl-%dcore-%dfiber-100000000-20000000000-1.0.stats'

# num_cores -> [8 threads, 16 threads]
CXL_LATENCY_CHANGE_TIME_BASE = {
    1: [441, 390],
    2: [240, 250],
    4: [150, 151],
    8: [100, 100],
    16: [60, 90],
}
CXL_LATENCY_CHANGE_TIME_STEP = 30
    
dram_stats_dict = get_stats(BASE_PATH, DRAM_LOGS, NUM_CORES_LIST, NUM_THREADS_LIST)
cxl_stats_dict = get_stats(BASE_PATH, CXL_LOGS, NUM_CORES_LIST, NUM_THREADS_LIST)

In [None]:
max_throughput_dict = {}
for num_cores in NUM_CORES_LIST:
    cxl_latency_change_times = []
    for i in range(len(CXL_LATENCY_LIST)):
        d = {}
        for j, num_threads in enumerate(NUM_THREADS_LIST):
            t = CXL_LATENCY_CHANGE_TIME_BASE[num_cores][j] + CXL_LATENCY_CHANGE_TIME_STEP * i
            d[num_threads] = t
        cxl_latency_change_times.append(d)
    max_t, _, _ = get_throughput_and_latency(dram_stats_dict[num_cores], cxl_stats_dict[num_cores],
                                             cxl_latency_change_times, [])
    max_throughput_dict[num_cores] = max_t

In [None]:
NUM_IOS_PER_OP = 1.3

SWITCH_TIME = 90
NUM_PREFETCHES = 7
NUM_CHASES = 5.8 / NUM_IOS_PER_OP
MEMORY_TIME = 286.2 - 62
IO_TIME_PRE = 2446.4 - 62
IO_TIME_POST = 1293.7 - 62
TITLE = 'CacheLib (100M items, single core)'
NAME = 'cachelib100m_1core'
NUM_CORES = 1

latencies = []
throughputs = []
for lat, t in zip(MEM_LATENCY_LIST, max_throughput_dict[NUM_CORES]):
    if(lat > 1000 and lat % 1000 == 500):
        continue
    elif(lat > 15000):
        continue
    else:
        latencies.append(lat)
        throughputs.append(t)

_ = plot_with_models(latencies, throughputs,
                     NUM_CHASES, MEMORY_TIME, IO_TIME_PRE, IO_TIME_POST,
                     SWITCH_TIME, NUM_PREFETCHES, 'Aerospike/CacheLib', TITLE, NAME)

In [None]:
throughput_dict = defaultdict(list)
for i, lat in enumerate(MEM_LATENCY_LIST):
    if(lat > 1000 and lat % 1000 == 500):
        continue
    elif(lat > 15000):
        continue
    else:
        for num_cores in NUM_CORES_LIST:
            throughput_dict[lat].append(max_throughput_dict[num_cores][i] / GET_OP_RATIO)

with open('cachelib.json', 'w') as f:
    json.dump(throughput_dict, f, indent=4)

plot_core_scaling(NUM_CORES_LIST, throughput_dict, 'CacheLib (100M items)')

In [None]:
NUM_CORES = 16
throughputs = np.array(max_throughput_dict[NUM_CORES]) / GET_OP_RATIO
plot_throughputs(MEM_LATENCY_LIST, throughputs, 2.1, 'C1',
                 'CacheLib (100M items)', 'cachelib100m_16core')

In [None]:
def plot_throughput_over_time(stats_dict, title, time_ticks):
    plt.figure(figsize=(15, 5))
    vline_max = 0
    for num_threads, stats in stats_dict.items():
        ys = np.array(stats['cache_gets']) / 60 * 1e-6
        plt.plot(stats['time'], ys, label='%d threads' % num_threads)
        vline_max = max(vline_max, np.amax(ys))

    if(time_ticks is not None):
        plt.vlines(time_ticks, 0, vline_max, colors='gray')
    plt.xlabel('Time [min]')
    plt.ylabel('Throughput [M gets/sec]')
    plt.legend()
    plt.title(title)
    plt.show()

In [None]:
def plot_latency_over_time(stats_dict, percentile, title, time_ticks):
    plt.figure(figsize=(15, 5))
    vline_max = 0
    for num_threads, stats in stats_dict.items():
        ys = np.array(stats['find_p%d' % percentile]) * 1e-3
        plt.plot(stats['time'], ys, label='%d threads' % num_threads)
        vline_max = max(vline_max, np.amax(ys))

    if(time_ticks is not None):
        plt.vlines(time_ticks, 0, vline_max, colors='gray')
    plt.xlabel('Time [min]')
    plt.ylabel('P%d GET latency [usec]' % percentile)
    plt.legend()
    plt.title(title)
    plt.show()

In [None]:
def plot_hit_ratio_over_time(stats_dict, tier, title, time_ticks):
    plt.figure(figsize=(15, 5))
    vline_max = 0
    for num_threads, stats in stats_dict.items():
        ys = np.array(stats['hit%s' % ('_' + tier if tier else '')])
        plt.plot(stats['time'], ys, label='%d threads' % num_threads)
        vline_max = max(vline_max, np.amax(ys))

    if(time_ticks is not None):
        plt.vlines(time_ticks, 0, vline_max, colors='gray')
    plt.xlabel('Time [min]')
    plt.ylabel('%s hit ratio [%%]' % (tier.upper() if tier else 'Total'))
    plt.legend()
    plt.title(title)
    plt.show()

In [None]:
def plot_over_time(stats_dict, title, percentile_list, time_ticks=None):
    plot_throughput_over_time(stats_dict, title, time_ticks)
    for p in percentile_list:
        plot_latency_over_time(stats_dict, p, title, time_ticks)
    plot_hit_ratio_over_time(stats_dict, '', title, time_ticks)
    plot_hit_ratio_over_time(stats_dict, 'ram', title, time_ticks)
    plot_hit_ratio_over_time(stats_dict, 'nvm', title, time_ticks)

In [None]:
NUM_CORES = 16
DRAM_LOGS = 'cachelib-benchmark-x4-dram-%dcore-%dfiber-400000000-20000000000-1.0.stats'
CXL_LOGS  = 'cachelib-benchmark-x4-cxl-%dcore-%dfiber-400000000-20000000000-1.0.stats'

PERCENTILE_LIST = [50, 90, 99]

CXL_LATENCY_CHANGE_TIME_BASE = 320
CXL_LATENCY_CHANGE_TIME_STEP = 30
cxl_latency_change_times = []
for i in range(len(CXL_LATENCY_LIST)):
    t = CXL_LATENCY_CHANGE_TIME_BASE + CXL_LATENCY_CHANGE_TIME_STEP * i
    d = {}
    for num_threads in NUM_THREADS_LIST:
        d[num_threads] = t
    cxl_latency_change_times.append(d)    

dram_stats_dict = get_stats(BASE_PATH, DRAM_LOGS, [NUM_CORES], NUM_THREADS_LIST)
cxl_stats_dict = get_stats(BASE_PATH, CXL_LOGS, [NUM_CORES], NUM_THREADS_LIST)

In [None]:
plot_over_time(dram_stats_dict[NUM_CORES], 'DRAM', PERCENTILE_LIST)
plot_over_time(cxl_stats_dict[NUM_CORES], 'CXL', PERCENTILE_LIST,
               [CXL_LATENCY_CHANGE_TIME_BASE + CXL_LATENCY_CHANGE_TIME_STEP * i for i in range(len(CXL_LATENCY_LIST))])

In [None]:
_, throughput_dict, get_latency_dict = get_throughput_and_latency(dram_stats_dict[NUM_CORES], cxl_stats_dict[NUM_CORES],
                                                                  cxl_latency_change_times, PERCENTILE_LIST)

NUM_THREADS = 8
throughputs = np.array(throughput_dict[NUM_THREADS]) / GET_OP_RATIO
plot_throughputs(MEM_LATENCY_LIST, throughputs, None, '#9400d3', 'CacheLib Operation Throughput (400M items)', 'cachelib400m_16core')

plot_latencies(MEM_LATENCY_LIST, get_latency_dict[NUM_THREADS], PERCENTILE_LIST, 1e3,
               'CacheLib Operation Latency (400M items)', 'cachelib400m_16core_latency')