# Non-smooth NNs on GP datasets

Can we set hyperparameters such that the learned function is not smooth?

In [None]:
import os
import sys
# If we don't need CUDA, do this before importing TF
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
import tensorflow as tf
import numpy as np
import pandas as pd
import tqdm
import tqdm.notebook
import scipy.stats
import matplotlib.pyplot as plt
import seaborn as sns
import IPython
sns.set()

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    tf.config.experimental.set_visible_devices([gpus[1]], 'GPU')

os.chdir("/nfs/scistore12/chlgrp/vvolhejn/smooth/logs/0304_gp_nn")

In [None]:
%load_ext autoreload
%aimport smooth.datasets
%aimport smooth.model
%aimport smooth.analysis
%aimport smooth.callbacks
%aimport smooth.measures
%aimport smooth.util
%autoreload 1

### Measures of shallow relu neural networks

In [None]:
ms_nn = pd.read_feather("measures.feather")
ms_nn = smooth.analysis.expand_dataset_columns(ms_nn)
smooth.analysis.remove_constant_columns(ms_nn, verbose=True)
print(len(ms_nn))
ms_nn = ms_nn.loc[np.isfinite(ms_nn["path_length_f"])]
print(len(ms_nn))
ms_nn.head()

### Measures of the original GP model

In [None]:
ms_gp = pd.read_feather("measures_gp.feather")
ms_gp.head()

Normalized measures - divided by the corresponding measure of GP

In [None]:
def normalize(df1, df2, join_col, cols):
    assert set(cols + [join_col]).issubset(set(df1.columns))
    df = pd.merge(df1, df2[cols + [join_col]], on=join_col, suffixes=("", "_0"))
    for col in cols:
        df[col + "_n"] = df[col] / df[col + "_0"]
        del df[col + "_0"]
    return df

measure_cols = ["train_loss", "test_loss", "path_length_f"]
ms_kr = normalize(ms_kr, ms_gp, "dataset", measure_cols)
ms_nn = normalize(ms_nn, ms_gp, "dataset", measure_cols)

In [None]:
ms_nn.loc[(ms_nn["dim"] == 8) & (ms_nn["init_scale"] == 10.) & ()]

In [None]:
ms_nn["learning_params"] = ms_nn["init_scale"].map(str) + "," + ms_nn["learning_rate"].map(str)

In [None]:
return
ms_gp = smooth.analysis.get_gp_measures(ms_nn["dataset"].unique())

In [None]:
ms_gp2 = smooth.analysis.get_gp_measures(ms_nn["dataset"].unique())

In [None]:
ms_gp2

In [None]:
def add_normalized_cols(ms):
    ms["plc"] = ms["path_length_f"] / ms["path_length_f_bound"]
    ms["plct"] = ms["path_length_f_train"] / ms["path_length_f_train_bound"]
    return ms

add_normalized_cols(ms_gp)
add_normalized_cols(ms_gp2)

In [None]:
grid = sns.relplot(data=ms_gp2, x="dim", y="test_loss", hue="samples_train")
ax = grid.axes[0][0]
ax.set_xscale("log")

In [None]:
# ms1 = ms
measure_cols = ["train_loss", "test_loss", "path_length_f", "path_length_d", "weights_rms"]
# measure_cols = ["path_length_f"]

for measure in measure_cols:
# for dim in sorted(ms_nn["dim"].unique()):
    IPython.display.display(IPython.display.Markdown("### dim = {}".format(dim)))
    if True:
#         ms1 = ms_nn[(ms_nn["dim"] == dim) & (ms_nn["hidden_size"] == 64)]
        ms1 = ms_nn[(ms_nn["hidden_size"] == 64)]
        grid = sns.relplot(
            data=ms1,
            x="samples_train",
            y=measure,
            hue="init_scale",
            style="learning_rate",
            col="dim",
            col_wrap=3,
            kind="line",
            palette=smooth.analysis.make_palette(ms1["init_scale"].unique()),
    #                 ax=ax
        )
        ax = grid.axes[0] #[0]

#         if measure == "path_length_f":
#             ol = optimal_lengths["gp-{}-{}-1.0".format(dim, seed)]
#             plt.plot([ms1["samples_train"].min(), ms1["samples_train"].max()], [ol, ol])

        ax.set_xscale("log")
        if "loss" in measure or True:
            ax.set_yscale("log")
#         plt.title("dim={}".format(dim))
        plt.show()

In [None]:
ms_bound = (ms_gp
    .drop(columns=["path_length_f", "path_length_f_train", "train_loss", "test_loss"])
    .rename(columns={
        "path_length_f_bound": "path_length_f",
        "path_length_f_train_bound": "path_length_f_train",
    })
)


In [None]:
ms_gp

In [None]:
def plot_compare(groups, filter_f: None):
    filter_f = filter_f or (lambda df: df)
    l = []
    for group_name, group in groups:
        for name, ms_cur in group:
            ms_cur = ms_cur.copy()
            ms_cur.loc[:, "source"] = name
            ms_cur.loc[:, "group"] = group_name
            l.append(ms_cur)

    ms_all = pd.concat(l, sort=False)
    ms_all = filter_f(ms_all)
    ms_all = ms_all.loc[
        (ms_all["dim"] == dim)
#         & (ms_all["seed"] == 1)
        & (ms_all["lengthscale"] == ms_all["dim"])
    ]
    
    for measure in ["train_loss", "test_loss", "path_length_f", "path_length_f_train"]:
        grid = sns.relplot(
            data=ms_all,
            x="samples_train",
            y=measure,
            hue="source",
            col="group",
            kind="line",
        )
        ax = grid.axes[0][0]
        ax.set_xscale("log")
        if measure in ["train_loss", "test_loss",
                      "path_length_f"
                      ]:
            ax.set_yscale("log")
#         if measure in ["path_length_f"]:
#             ax.set_ylim(0.03, 30)
        plt.show()

nn_group = []
for init in sorted(ms_nn["init_scale"].unique()):
    nn_group.append((
        "nn, is={:02}".format(init),
        ms_nn.loc[(ms_nn["hidden_size"] == 64) & (ms_nn["init_scale"] == init) & (ms_nn["learning_rate"] == 0.01)],
    ))
nn_group.append(("gp_noise", ms_gp))
nn_group.append(("gp_noiseless", ms_gp2))
nn_group.append(("bound", ms_bound))

def filter_f(ms):
    return ms.loc[
        (ms["dim"] == dim)
#         & (ms_all["seed"] == 1)
        & (ms["lengthscale"] == ms["dim"])
    ]

for dim in sorted(ms_nn["dim"].unique()):
    display(IPython.display.Markdown("### dim = {}".format(dim)))
    plot_compare([("nn", nn_group)], filter_f)

In [None]:
ms_nn

In [None]:
dataset = smooth.datasets.from_params(**dict(name="gp", dim=16, lengthscale=16., seed=123, samples_train=100))

In [None]:
# y = [0., 1., 2.]
# y = np.random.randn(6)
y = [0., 3., 2.]
dataset = smooth.datasets.Dataset(y,y,[],[])

In [None]:
print(smooth.measures.path_length_f_lower_bound(dataset, use_test_set=False))

y = dataset.y_train
act = 0
for y1 in y:
    for y2 in y:
        act += np.abs(y1 - y2)

print(act / (len(y) ** 2))

In [None]:
dataset1 = smooth.datasets.from_name("gp-2-1-1.0-100-0-0")
dataset2 = smooth.datasets.from_name("gp-2-1-1.0-100-0-1")

In [None]:
dataset1.disjoint

In [None]:
dataset2.x_test[:10]

In [None]:
print(dataset1.x_train[:10])
print(dataset2.x_train[:10])

In [None]:
dataset1.x_train[:10]

In [None]:
model = tf.keras.models.load_model("./bs=64_d=gp-128-1-128.0-1000_e=100000_hs=16_is=0.1_i=0_lr=0.001/model.h5")
dataset = smooth.datasets.from_name("gp-128-1-128.0-1000")

In [None]:
measures = smooth.measures.get_measures(model, dataset)

In [None]:
measures

In [None]:
a = dict(a=1, b=2)
a.update({"c": 3})

In [None]:
a

In [None]:
[x.shape for x in model.get_weights()]