In [None]:
import os
import sys
# If we don't need CUDA, do this before importing TF
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
import tensorflow as tf
import numpy as np
import pandas as pd
import tqdm
import tqdm.notebook
import scipy.stats

import matplotlib.pyplot as plt

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    tf.config.experimental.set_visible_devices([gpus[1]], 'GPU')

sys.path.append("/nfs/scistore12/chlgrp/vvolhejn/smooth")

os.chdir("/nfs/scistore12/chlgrp/vvolhejn/smooth/logs/0224_gp2/")

In [None]:
%load_ext autoreload
%aimport smooth.datasets
%aimport smooth.model
%aimport smooth.analysis
%aimport smooth.callbacks
%aimport smooth.measures
%aimport smooth.util
%autoreload 1

In [None]:
df = pd.read_feather("measures.feather")
smooth.analysis.remove_constant_columns(df)
df = smooth.analysis.expand_dataset_columns(df)
df["log_dir"] = df["log_dir"].str.split("/").str.get(-1)
df.head()

In [None]:
for col in ["actual_epochs", "train_loss", "test_loss"]:
    if df[col].dtype == "object":
        continue
    plt.hist(df[col], bins=20)
    plt.title(col)
    plt.show()

In [None]:
def get_interpolation_measures(dataset_names, use_test_set=False):
    res = []
    for dataset_name in tqdm.notebook.tqdm(dataset_names):
        dataset = smooth.datasets.GaussianProcessDataset.from_name(dataset_name)
        model = smooth.model.interpolate_relu_network(dataset, use_test_set)
        measures = smooth.measures.get_measures(
            model,
            dataset.x_test, dataset.y_test,
            include_training_measures=False,
        )
        res.append(measures)
    
    return pd.DataFrame(res, index=dataset_names)

im_train = get_interpolation_measures(df["dataset"].unique())
im_test = get_interpolation_measures(df["dataset"].unique(), use_test_set=True)

In [None]:
df1 = df[(df["seed"] == 1) & (df["lengthscale"] == 0.1) & (df["train_loss"] < 10)]
for hs in [100, 1000]:
    df2 = df1[df1["hidden_size"] == hs]
    plt.scatter(df2["samples_train"], df2["seg_total_variation"], alpha=0.3)

In [None]:
df1 = df[(df["seed"] == 2) & (df["lengthscale"] == 0.3) & (df["train_loss"] < 1)]
# df1 = df1[(df1[""])]

# plt.scatter(df1["samples_train"], df1["seg_total_variation"], alpha=0.3)
df1 = df1[df1["samples_train"] == 33]
print(len(df1))

dataset = smooth.datasets.GaussianProcessDataset.from_name(df1.iloc[0]["dataset"])
x = dataset.x_test

ax = plt.subplot()
ax.plot(x, dataset.y_test, color="C0")
ax.scatter(dataset.x_train, dataset.y_train, color="C0")

for i, row in list(df1.iterrows()):
    log_dir = row["log_dir"]
    model = tf.keras.models.load_model(os.path.join(log_dir, "model.h5"))
    y = model.predict(x)
#     color = {
#         10: "C1",
#         30: "C2",
#         100: "C3",
#     }[row["samples_train"]]
    color="C1"
    ax.plot(x, y, alpha=1, color=color)


#     smooth.analysis.plot_shallow(model, dataset, title=log_dir)

In [None]:
df1