# Benchmark Analysis

## Setup

In [30]:
import os
from os.path import exists
from sys import path
cwd = os.getcwd()
path.append(f"{cwd}/..")

from config import *  # "global" benchmarking variables shared with benchmark.ipynb

# create the path for storing the benchmark results if they do not exist
path = f"./out/{total_size}/{chunk_size}"
if not exists("./out"):
  os.mkdir("./out")
if not exists(f"./out/{total_size}"):
  os.mkdir(f"./out/{total_size}")
if not exists(path):
  os.mkdir(path)

### DOI distributions per use case in histograms

In [31]:
import altair as alt

charts = []

# load all data from the out directory into one dataframe and add a column that indicates the context
# and update strategies used in this particular use case
available_test_cases = os.listdir(f"{path}/doi")
available_test_cases

all_doi_values_df = pd.DataFrame()

# approach2: manually compute the histogram over all groups in the data, then visualize those bins
# histogram = np.histogram(all_doi_values_df["doi"])
no_bins = 10
def get_doi_bins_df(doi_df: pd.DataFrame) -> pd.DataFrame:
  bins = np.histogram(doi_df["doi"], bins=no_bins, range=(0, 1))[0]
  bins_df = pd.DataFrame(bins.transpose())
  return bins_df

# compute the ground truth bins 
ground_truth_df = pd.read_csv(f"{path}/doi/__ground_truth__.csv")
ground_truth_bins = get_doi_bins_df(ground_truth_df)

all_doi_bins_df = pd.DataFrame()

# compute the bins for each combination of strategies and then compare it to the ground truth in a 
# layered histogram
for c_strat in context_strategies:
  for u_strat in update_strategies:
    # check if that test case exists
    test_case = f"{c_strat[0]}-{u_strat[0]}.csv"
    if test_case not in available_test_cases:
      continue

    # read the benchmark results
    df = pd.read_csv(f"{path}/doi/{test_case}")

    # compute 10 bins on the interval [0, 1] over the "doi" column
    bins_df = get_doi_bins_df(df)

    # compute difference to the ground truth
    bins_df["delta"] = (bins_df[0] - ground_truth_bins[0]) / total_size
    bins_df.columns = ["doi", "delta"]

    # add context info
    bins_df["bin"] = bins_df.index / no_bins
    bins_df["context_strategy"] = c_strat[0]
    bins_df["update_strategy"] = u_strat[0]

    # store these bins in a df
    all_doi_bins_df = all_doi_bins_df.append(bins_df)

alt.data_transformers.disable_max_rows()
alt.data_transformers.enable("data_server")
alt.Chart(all_doi_bins_df).mark_bar().encode(
  x=alt.X("bin:Q"),
  y=alt.Y("delta:Q"),
).properties(
  width=100,
  height=100
).facet(
  row="context_strategy",
  column="update_strategy",
  spacing=10
)

### Per item error

In [3]:
import numpy as np
import pandas as pd
import altair as alt
from database import ID, DOI


gt = pd.read_csv(f"{path}/doi/__ground_truth__.csv")
id = ID.lower()
doi = DOI.lower()

results = []

# compute the overlap between the two 
for c_strat in context_strategies:
  for u_strat in update_strategies:
    # check if that test case exists
    test_case = f"{c_strat[0]}-{u_strat[0]}.csv"
    if test_case not in available_test_cases:
      continue

    # read the benchmark results
    df = pd.read_csv(f"{path}/doi/{test_case}")
    hits = len(gt[(gt[id] == df[id]) & (gt[doi] == df[doi])])
    fails = len(gt[(gt[id] == df[id]) & (gt[doi] != df[doi])])
    result = [c_strat[0], u_strat[0], hits, fails]
    results += [result]

results = pd.DataFrame(
  results, 
  columns = ["context_strategy", "update_strategy", "hits", "fails"]
)

alt.Chart(results).transform_fold(
  ["hits", "fails"]
).mark_bar().encode(
  x="context_strategy:N",
  y="value:Q",
  color="key:N",
  column="update_strategy:N",
)

### Time per test case in boxplots

In [32]:
import altair as alt

charts = []

# load all data from the out directory into one dataframe and add a column that indicates the context
# and update strategies used in this particular use case
available_test_cases = os.listdir(f"{path}/times")
available_test_cases

all_doi_values_df = pd.DataFrame()

# build one big dataframe containing all doi scores and label each based on the strategies that were
# used to generate them
for c_strat in context_strategies:
  for u_strat in update_strategies:
    # check if that test case exists
    test_case = f"{c_strat[0]}-{u_strat[0]}.csv"
    if test_case not in available_test_cases:
      continue

    df = pd.read_csv(f"{path}/times/{test_case}")
    df["context_strategy"] = c_strat[0]
    df["update_strategy"] = u_strat[0]
    all_doi_values_df = all_doi_values_df.append(df)
    all_doi_values_df.reset_index(inplace=True, drop=True)

chart1 = alt.Chart(all_doi_values_df).mark_boxplot().encode(
  x="update_strategy:N",
  y={"field": "total_time", "type": "quantitative", "scale": {"type": "linear"}, "title": "time (s)"},
  column="context_strategy:N",
).properties(
  width=190,
  height=250
)

chart2 = alt.Chart(all_doi_values_df).mark_boxplot().encode(
  x="context_strategy:N",
  y={"field": "total_time", "type": "quantitative", "scale": {"type": "linear"}, "title": "time (s)"},
  column="update_strategy:N",
).properties(
  width=190,
  height=250
)

alt.vconcat(chart1, chart2)

### Time series per test case

In [23]:
import altair as alt

charts = []

# load all data from the out directory into one dataframe and add a column that indicates the context
# and update strategies used in this particular use case
available_test_cases = os.listdir(f"{path}/times")
available_test_cases

all_timeseries_df = pd.DataFrame()

# build one big dataframe containing all doi scores and label each based on the strategies that were
# used to generate them
for c_strat in context_strategies:
  for u_strat in update_strategies:
    # check if that test case exists
    test_case = f"{c_strat[0]}-{u_strat[0]}.csv"
    if test_case not in available_test_cases:
      continue

    df = pd.read_csv(f"{path}/times/{test_case}")
    df["context_strategy"] = c_strat[0]
    df["update_strategy"] = u_strat[0]
    all_timeseries_df = all_timeseries_df.append(df)
    all_timeseries_df.reset_index(inplace=True, drop=True)

alt.data_transformers.disable_max_rows()
alt.Chart(all_timeseries_df).mark_line().encode(
  x="chunk:Q",
  y={"field": "total_time", "type": "quantitative", "scale": {"type": "linear"}, "title": "time (s)"},
  row="context_strategy:N",
  color="update_strategy:N",
).properties(
  width=800,
  height=120
)

### Time series for each test case step

In [29]:
import altair as alt
import pandas as pd

all_timeseries_df = pd.DataFrame()

for c_strat in context_strategies:
  for u_strat in update_strategies:
    # check if that test case exists
    test_case = f"{c_strat[0]}-{u_strat[0]}.csv"
    if test_case not in available_test_cases:
      continue

    df = pd.read_csv(f"{path}/times/{test_case}")
    df["context_strategy"] = c_strat[0]
    df["update_strategy"] = u_strat[0]
    all_timeseries_df = all_timeseries_df.append(df)
    all_timeseries_df.reset_index(inplace=True, drop=True)


alt.Chart(all_timeseries_df).transform_fold(
  ["chunk_time", "storage_time", "context_time", "outdated_time", "old_doi_time", "store_new_time", 
   "update_dois_time", "total_time"]
).mark_line().encode(
  x="chunk:N",
  y="value:Q",
  color="key:N",
  column="update_strategy:N",
  row="context_strategy"
)