In [None]:
import os
import sys
# If we don't need CUDA, do this before importing TF
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
import tensorflow as tf
import numpy as np
import pandas as pd
import tqdm
import tqdm.notebook
import matplotlib.pyplot as plt
import seaborn as sns
import IPython
sns.set()

os.chdir("/nfs/scistore12/chlgrp/vvolhejn/smooth/logs/")

In [None]:
%load_ext autoreload
%aimport smooth.config
%aimport smooth.datasets
%aimport smooth.model
%aimport smooth.analysis
%aimport smooth.callbacks
%aimport smooth.measures
%aimport smooth.util
%autoreload 1

In [None]:
def should_plot_as_log(measure_name):
    patterns = [
        "loss",
#         "weights_product",
    ]
    
    for p in patterns:
        if p in measure_name:
            return True
    
    return False

def plot_measure(ms, measure_name, groupby, xlabel=None):
    ms = ms.copy()
    if should_plot_as_log(measure_name):
        log_measure_name = "log10_{}".format(measure_name)
        ms[log_measure_name] = np.log10(ms[measure_name])
        measure_name = log_measure_name

    sns.boxplot(data=ms, x=measure_name, y=groupby, orient="h")
    swarm_plot = sns.swarmplot(data=ms, x=measure_name, y=groupby,
                      size=2, color=".3", linewidth=0, orient="h",
                 )
    
    if xlabel:
        swarm_plot.set(xlabel=xlabel)
    
    return swarm_plot.get_figure()

def plot_all_measures(ms, additional_measures=None, groupby="kind"):
    for g_id, g in ms.groupby(groupby):
        print(g_id, g.count().iloc[0])

    for measure in [
                "loss_train", "loss_test",
                "gradient_norm_test",
                "weights_product",
                "path_length_f_test",
                "path_length_d_test",
                "actual_epochs",
            ] + (additional_measures or []):
        plot_measure(ms, measure, groupby)
        plt.show()

# path_length_f

In [None]:
ms = smooth.analysis.load_measures(
    "./0508-163442/measures.feather",
    kind_cols=[
        ("model.path_length_f_reg_coef", "coef"),
    ],
)

In [None]:
ms0 = smooth.analysis.load_measures(
    "./0508-163442/measures.feather",
    kind_cols=[
        ("model.path_length_f_reg_coef", "coef"),
    ],
)
ms0 = ms0[ms0["model.path_length_f_reg_coef"] == 0]

In [None]:
ms = smooth.analysis.load_measures(
    "./0508_path_length_f_reg/measures.feather",
    kind_cols=[
        ("model.path_length_f_reg_coef", "coef"),
    ],
)

ms0 = smooth.analysis.load_measures(
    "./0422_path_length_f_reg/measures.feather",
#     "./0421_path_length_f_reg/measures.feather",
    kind_cols=[
        ("model.path_length_f_reg_coef", "coef"),
    ],
)

ms0 = ms0[ms0["model.path_length_f_reg_coef"] == 0]
ms = pd.concat([ms0, ms], sort=False)

converged_datasets = ms[(ms["model.path_length_f_reg_coef"] == 0.0001)]
ms = ms[ms["dataset.name"].isin(converged_datasets["dataset.name"])]
ms["path_length_f_test_baselined"] = ms["path_length_f_test"] - 1

ms = smooth.analysis.get_ratios(ms, ms["model.path_length_f_reg_coef"] == 0, "path_length_f_test_baselined")

plot_all_measures(ms, additional_measures=["path_length_f_test_baselined_normalized"])

# sns.relplot(
#     data=ms,
#     x="model.path_length_f_reg_coef",
#     y="path_length_f_train",
#     hue="dataset.name",
#     kind="line",
# )

In [None]:
ms1 = ms.copy()
ms1["Regularization coef"] = ms1["model.path_length_f_reg_coef"]

fig1 = plot_measure(ms1, "path_length_f_test", groupby="Regularization coef", xlabel="Function path length")
fig1.show()
# fig1.savefig("../figures/teatalk_path_length_f.png", bbox_inches="tight")
# plot_all_measures(ms1, "Regularization coef")

In [None]:
fig1 = plot_measure(
    ms1, "path_length_f_test_baselined_normalized",
    groupby="Regularization coef",
    xlabel="Function path length - baselined and normalized"
)
fig1.show()
# fig1.savefig("../figures/teatalk_path_length_f_normalized.png", bbox_inches="tight")

## path_length_d

For a redo of this experiment with a longer training time, see `figures_explicit_regularization.ipynb`

In [None]:
ms = smooth.analysis.load_measures(
    "./0423-105330/measures.feather",
    kind_cols=[
        ("model.path_length_d_reg_coef", "coef"),
    ],
)

ms1 = smooth.analysis.load_measures(
    "./0426-120233/measures.feather",
    kind_cols=[
        ("model.path_length_d_reg_coef", "coef"),
    ],
)

ms = ms[ms["dataset.name"].isin(set(ms1["dataset.name"]))]

ms = pd.concat([ms, ms1], sort=False)
ms = smooth.analysis.get_ratios(ms, ms["model.path_length_d_reg_coef"] == 0, "path_length_d_test")

# converged_datasets = ms[(ms["model.path_length_f_reg_coef"] == 0.0001)]
# ms = ms[ms["dataset.name"].isin(converged_datasets["dataset.name"])]

plot_all_measures(ms, additional_measures=["path_length_d_test_normed"])

In [None]:
ms1 = ms.copy()
ms1["Regularization coef"] = ms1["model.path_length_d_reg_coef"]

fig1 = plot_measure(ms1, "path_length_d_test", groupby="Regularization coef", xlabel="Gradient path length")
fig1.show()
fig1.savefig("../figures/teatalk_path_length_d.png", bbox_inches="tight")
# plot_all_measures(ms1, "Regularization coef")

In [None]:
fig1 = plot_measure(
    ms1, "path_length_d_test_normed",
    groupby="Regularization coef",
    xlabel="Gradient path length - normalized"
)
fig1.show()
fig1.savefig("../figures/teatalk_path_length_d_normalized.png", bbox_inches="tight")

In [None]:
ms["path_length_d_test_normed"]

In [None]:
for row in ms[:3].iterrows():
    print(row)