# Evaluations

## Installs

In [36]:
pip install tqdm

Collecting tqdm
  Downloading tqdm-4.53.0-py2.py3-none-any.whl (70 kB)
[K     |████████████████████████████████| 70 kB 1.5 MB/s eta 0:00:01
[?25hInstalling collected packages: tqdm
Successfully installed tqdm-4.53.0
You should consider upgrading via the '/Users/phillip/.local/share/virtualenvs/actix-telepathy-syeEGSfw/bin/python -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.


## Imports

In [37]:
%matplotlib inline
import pandas as pd
import sqlite3
import matplotlib.pyplot as plt
import tqdm

In [38]:
plt.rcParams["figure.figsize"] = (15, 10)

## Helper Functions

In [63]:
def load_data(where: str) -> (pd.DataFrame, pd.DataFrame):
    conn = sqlite3.connect("global_db.db")
    print("Load data from database")
    eva = pd.read_sql(f"select exp.*, eva.* from experiments as exp, evaluations as eva where exp.id = eva.experiment_id and exp.rank = eva.rank and {where}", conn)
    eva = eva.loc[:,~eva.columns.duplicated()]
    for experiment_id in tqdm.notebook.tqdm(eva.experiment_id.unique(), desc="Transform timestamps to relative time"):
        for rank in eva["rank"].unique():
            select = (eva.experiment_id == experiment_id) & (eva["rank"] == rank)
            start_time = eva[select].timestamp.min()
            eva.loc[select, "timestamp"] = eva.loc[select, "timestamp"] - start_time
    print("Aggregate data")
    eva_mean = eva.groupby(["update_every", "group_size", "step"]).mean()[["value", "timestamp"]]
    eva_mean.reset_index(inplace=True)
    eva_std = eva.groupby(["update_every", "group_size", "step"]).std()[["value", "timestamp"]]
    eva_std.reset_index(inplace=True)
    return eva_mean, eva_std

# Plots

In [64]:
from ipywidgets import interact, IntSlider

In [65]:
eva_mean, eva_std = load_data(where="group_size <= 5")

Load data from database


HBox(children=(HTML(value='Transform timestamps to relative time'), FloatProgress(value=0.0, max=18.0), HTML(v…


Aggregate data


## Single

In [53]:
@interact
def plot(
    update_every=IntSlider(min=eva_mean.update_every.min(), max=eva_mean.update_every.max()),
    group_size=IntSlider(min=eva_mean.group_size.min(), max=eva_mean.group_size.max()),
    timestamp=False
):
    data = eva_mean[(eva_mean.update_every == update_every) & (eva_mean.group_size == group_size)]
    data_std = eva_std[(eva_std.update_every == update_every) & (eva_std.group_size == group_size)]
    x = data.timestamp if timestamp else data.step
    plt.plot(x, data.value)
    plt.fill_between(x, data.value - data_std.value, data.value + data_std.value, alpha=0.3)
    plt.title(f"update every {update_every} epochs with group size {group_size}")
    plt.xlabel("Seconds" if timestamp else "Epoch")
    plt.ylabel("Test Accuracy")
    plt.show()

interactive(children=(IntSlider(value=1, description='update_every', max=5, min=1), IntSlider(value=3, descrip…

## Update Every

In [66]:
@interact
def plot(
    group_size=IntSlider(min=eva_mean.group_size.min(), max=eva_mean.group_size.max()),
    timestamp=False
):
    for update_every in eva_mean.update_every.unique():
        data = eva_mean[(eva_mean.update_every == update_every) & (eva_mean.group_size == group_size)]
        data_std = eva_std[(eva_std.update_every == update_every) & (eva_std.group_size == group_size)]
        x = data.timestamp if timestamp else data.step
        plt.plot(x, data.value, label=update_every)
        plt.fill_between(x, data.value - data_std.value, data.value + data_std.value, alpha=0.3)
    plt.title(f"update every nth epochs with group size {group_size}")
    plt.xlabel("Seconds" if timestamp else "Epoch")
    plt.ylabel("Test Accuracy")
    plt.legend()
    plt.show()

interactive(children=(IntSlider(value=3, description='group_size', max=5, min=3), Checkbox(value=False, descri…

## Group Size

In [67]:
@interact
def plot(
    update_every=IntSlider(min=eva_mean.update_every.min(), max=eva_mean.update_every.max()),
    timestamp=False
):
    for group_size in eva_mean.group_size.unique():
        data = eva_mean[(eva_mean.update_every == update_every) & (eva_mean.group_size == group_size)]
        data_std = eva_std[(eva_std.update_every == update_every) & (eva_std.group_size == group_size)]
        x = data.timestamp if timestamp else data.step
        plt.plot(x, data.value, label=group_size)
        plt.fill_between(x, data.value - data_std.value, data.value + data_std.value, alpha=0.3)
    plt.title(f"update every {update_every} epochs with group size n")
    plt.xlabel("Seconds" if timestamp else "Epoch")
    plt.ylabel("Test Accuracy")
    plt.legend()
    plt.show()

interactive(children=(IntSlider(value=1, description='update_every', max=5, min=1), Checkbox(value=False, desc…

## Less Updates

In [73]:
eva_mean, eva_std = load_data(where="group_size > 5 and update_every > 5")

Load data from database


HBox(children=(HTML(value='Transform timestamps to relative time'), FloatProgress(value=0.0, max=27.0), HTML(v…


Aggregate data


In [74]:
@interact
def plot(
    group_size=IntSlider(min=eva_mean.group_size.min(), max=eva_mean.group_size.max()),
    timestamp=False
):
    for update_every in eva_mean.update_every.unique():
        data = eva_mean[(eva_mean.update_every == update_every) & (eva_mean.group_size == group_size)]
        data_std = eva_std[(eva_std.update_every == update_every) & (eva_std.group_size == group_size)]
        x = data.timestamp if timestamp else data.step
        plt.plot(x, data.value, label=update_every)
        plt.fill_between(x, data.value - data_std.value, data.value + data_std.value, alpha=0.3)
    plt.title(f"update every nth epochs with group size {group_size}")
    plt.xlabel("Seconds" if timestamp else "Epoch")
    plt.ylabel("Test Accuracy")
    plt.legend()
    plt.show()

interactive(children=(IntSlider(value=6, description='group_size', max=8, min=6), Checkbox(value=False, descri…

## Bigger Groups

In [75]:
@interact
def plot(
    update_every=IntSlider(min=eva_mean.update_every.min(), max=eva_mean.update_every.max()),
    timestamp=False
):
    for group_size in eva_mean.group_size.unique():
        data = eva_mean[(eva_mean.update_every == update_every) & (eva_mean.group_size == group_size)]
        data_std = eva_std[(eva_std.update_every == update_every) & (eva_std.group_size == group_size)]
        x = data.timestamp if timestamp else data.step
        plt.plot(x, data.value, label=group_size)
        plt.fill_between(x, data.value - data_std.value, data.value + data_std.value, alpha=0.3)
    plt.title(f"update every {update_every} epochs with group size n")
    plt.xlabel("Seconds" if timestamp else "Epoch")
    plt.ylabel("Test Accuracy")
    plt.legend()
    plt.show()

interactive(children=(IntSlider(value=7, description='update_every', max=15, min=7), Checkbox(value=False, des…