# One-dimensional case

In [None]:
import os
import sys
# If we don't need CUDA, do this before importing TF
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
import tensorflow as tf
import numpy as np
import pandas as pd
import tqdm
import tqdm.notebook
import matplotlib.pyplot as plt
import seaborn as sns
import IPython
import scipy.stats
sns.set()

os.chdir("/nfs/scistore12/chlgrp/vvolhejn/smooth/logs/")

In [None]:
%load_ext autoreload
%aimport smooth.datasets
%aimport smooth.model
%aimport smooth.analysis
%aimport smooth.callbacks
%aimport smooth.measures
%aimport smooth.util
%autoreload 1

In [None]:
dataset = smooth.datasets.GaussianProcessDataset(samples_train=10, lengthscale=1., noise_var=0.0, seed=1)

In [None]:
model = smooth.model.train_shallow(
    dataset,
    learning_rate=0.01,
    init_scale=0.01,
    epochs=100000,
    verbose=0,
    batch_size=64,
)

In [None]:
smooth.analysis.plot_shallow(model, dataset)

In [None]:
smooth.measures.get_measures(model, dataset, precise_in_1d=False)

In [None]:
def plot_measure(ms, measure_name, y_log=False):
    grid = sns.relplot(
        data=ms,
        x="dataset.samples_train",
        y=measure_name,
        kind="line",
        hue="dataset.seed",
    )

    ax = grid.axes[0][0]
#     ax.set_xscale("log")
    
    if y_log:
        ax.set_yscale("log")

    plt.show()
    

def plot_measures(ms):
    for measure in [
        "loss_train", "loss_test",
    #     "gradient_norm_train",
        "gradient_norm_test",
        "weights_product",
        "path_length_f_test",
        "path_length_d_test",
    ]:
        y_log = False
        if "loss" in measure:
            y_log = True

        plot_measure(ms, measure, y_log)

In [None]:
# def get_kendall(ms, col_1, col_2, get_pvalues=False):
#     tau = scipy.stats.kendalltau(ms[col_1], ms[col_2])

#     if get_pvalues:
#         return tau.pvalue
#     else:
#         return tau.correlation

# def get_kendalls(ms, col_1, cols, get_pvalues=False):
#     res = [
#         get_kendall(ms, col_1, col, get_pvalues)
#         for col in cols
#     ]
#     return pd.Series(res, index=cols)

# measures = ["gradient_norm_test", "path_length_f_test", "path_length_d_test", "weights_product"]

# def summarize_kendalls(ms, get_pvalues=False):
#     return ms.groupby("dataset.seed").apply(
#         lambda df: get_kendalls(df, "dataset.samples_train", measures, get_pvalues)
#     ).describe()

In [None]:
ms = smooth.analysis.load_measures(
    "./0516_gp_1d/measures.feather",
    kind_cols=[
        ("dataset.seed", "seed"),
        ("dataset.samples_train", "samples"),
    ],
    remove_unconverged=False,
)

smooth.analysis.summarize_kendalls(
    ms,
    groupby="dataset.seed",
    x_col="dataset.samples_train",
    y_cols=smooth.analysis.get_measure_names(),
    get_pvalues=True,
)

In [None]:
ms = smooth.analysis.load_measures(
    "./0517_gp_1d_no_loss_threshold/measures.feather",
    kind_cols=[
        ("dataset.seed", "seed"),
        ("dataset.samples_train", "samples"),
    ],
    remove_unconverged=False,
)
smooth.analysis.summarize_kendalls(
    ms,
    groupby="dataset.seed",
    x_col="dataset.samples_train",
    y_cols=smooth.analysis.get_measure_names(),
    get_pvalues=True,
)

In [None]:
ms = smooth.analysis.load_measures(
    "./0517_gp_1d_lr_0.1/measures.feather",
    kind_cols=[
        ("dataset.seed", "seed"),
        ("dataset.samples_train", "samples"),
    ],
    remove_unconverged=False,
)
smooth.analysis.summarize_kendalls(
    ms,
    groupby="dataset.seed",
    x_col="dataset.samples_train",
    y_cols=smooth.analysis.get_measure_names(),
    get_pvalues=True,
)

In [None]:
ms = smooth.analysis.load_measures(
    "./0517_gp_1d_100k_epochs/measures.feather",
    kind_cols=[
        ("dataset.seed", "seed"),
        ("dataset.samples_train", "samples"),
    ],
    remove_unconverged=False,
)
smooth.analysis.summarize_kendalls(
    ms,
    groupby="dataset.seed",
    x_col="dataset.samples_train",
    y_cols=smooth.analysis.get_measure_names(),
    get_pvalues=True,
)

In [None]:
ms = smooth.analysis.load_measures(
    "./0517_gp_1d_big/measures.feather",
    kind_cols=[
        ("dataset.seed", "seed"),
        ("dataset.samples_train", "samples"),
    ],
    remove_unconverged=False,
)
smooth.analysis.summarize_kendalls(
    ms,
    groupby="dataset.seed",
    x_col="dataset.samples_train",
    y_cols=smooth.analysis.get_measure_names(),
    get_pvalues=True,
).describe()

In [None]:
# plot_measures(ms)

In [None]:
measures = ["gradient_norm_test", "path_length_f_test", "path_length_d_test", "weights_product"]
ms.groupby("dataset.seed").apply(
    lambda df: get_kendalls(df, "dataset.samples_train", measures)
).describe()

In [None]:
ms = smooth.analysis.load_measures(
    "./0518_gp_1d_lengthscale_0.5/measures.feather",
    kind_cols=[
        ("dataset.seed", "seed"),
        ("dataset.samples_train", "samples"),
    ],
    remove_unconverged=False,
)
smooth.analysis.summarize_kendalls(ms, get_pvalues=True)

In [None]:
plot_measures(ms)

In [None]:
ms = smooth.analysis.load_measures(
    "./0519_gp_1d/measures.feather",
    kind_cols=[
        ("dataset.seed", "seed"),
        ("dataset.samples_train", "samples"),
    ],
    remove_unconverged=True,
)
# This one is used in the thesis
taus = smooth.analysis.summarize_kendalls(
    ms,
    groupby="dataset.seed",
    x_col="dataset.samples_train",
    y_cols=smooth.analysis.get_measure_names(),
    get_pvalues=False,
).describe()

In [None]:
len(ms[ms["loss_train"] < 0.1])

In [None]:
ms.sort_values("loss_train")["loss_train"]

In [None]:
taus

In [None]:
plot_measures(ms)

## Polynomials

In [None]:
ms_poly = pd.read_feather("0519_gp_1d/measures_polynomials.feather")
return

ms_poly = pd.DataFrame(
    columns=[
        "dataset.seed",
        "dataset.samples_train",
        "weights_product"
    ],
)


for seed in tqdm.tqdm_notebook(range(1, 21)):
    for n_samples in tqdm.tqdm_notebook(range(2, 11), leave=False):
        dataset = smooth.datasets.GaussianProcessDataset(samples_train=n_samples, lengthscale=0.5, seed=seed)
        model = smooth.model.interpolate_polynomial(dataset)
    #     smooth.analysis.plot_shallow(model, dataset)
        row = smooth.measures.get_measures(model, dataset, samples=1000)
        row["dataset.seed"] = seed
        row["dataset.samples_train"] = n_samples
        ms_poly = ms_poly.append(row, ignore_index=True)


In [None]:

taus = smooth.analysis.summarize_kendalls(
    ms_poly,
    groupby="dataset.seed",
    x_col="dataset.samples_train",
    y_cols=smooth.analysis.get_measure_names(),
    get_pvalues=False,
).describe()

In [None]:
# ms_poly.to_feather("0519_gp_1d/measures_polynomials.feather")

In [None]:
def format_for_table(ms):
    measures = [
        ("gradient_norm_test", "GN"),
        ("path_length_f_test", "PL_0"),
        ("path_length_d_test", "PL_1"),
        ("weights_product", "WP"),
    ]
    
    for measure_col, measure_tex in measures:
        measure_tex = "${}$".format(measure_tex)
        print("{} & ${:.2f} \pm {:.2f}$".format(
            measure_tex.ljust(10),
            ms.loc["mean", measure_col],
            ms.loc["std", measure_col]
        ))

format_for_table(taus)

In [None]:
$GN$       & $0.30 \pm 0.18$
$PL_0$     & $0.49 \pm 0.16$
$PL_1$     & $0.59 \pm 0.13$
$WP$       & $0.38 \pm 0.15$

In [None]:
$GN$       & $0.32 \pm 0.16$
$PL_0$     & $0.48 \pm 0.16$
$PL_1$     & $0.46 \pm 0.11$
$WP$       & $0.16 \pm 0.21$

In [None]:
taus = smooth.analysis.summarize_kendalls(
    ms_poly,
    groupby="dataset.seed",
    x_col="dataset.samples_train",
    y_cols=smooth.analysis.get_measure_names(),
    get_pvalues=False,
)
# .describe()
taus.describe()

In [None]:
(ms_poly.groupby("dataset.seed")
    .apply(lambda df: smooth.analysis.get_kendalls(
        df,
        "dataset.samples_train",
        smooth.analysis.get_measure_names(),
        False,
    )))

In [None]:
ms_poly

In [None]:
ms_poly