# Kernel ridge regression on GP datasets 2


In [None]:
import os
import sys
# If we don't need CUDA, do this before importing TF
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
import tensorflow as tf
import numpy as np
import pandas as pd
import tqdm
import tqdm.notebook
import scipy.stats
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    tf.config.experimental.set_visible_devices([gpus[1]], 'GPU')

os.chdir("/nfs/scistore12/chlgrp/vvolhejn/smooth/logs/0302_gp_krr/")

In [None]:
%load_ext autoreload
%aimport smooth.datasets
%aimport smooth.model
%aimport smooth.analysis
%aimport smooth.callbacks
%aimport smooth.measures
%aimport smooth.util
%autoreload 1

In [None]:
ms_kr = pd.read_feather("measures.feather")
ms_kr_2 = pd.read_feather("measures_alpha0.feather")
ms_kr = pd.concat([ms_kr, ms_kr_2], ignore_index=True)
ms_kr = smooth.analysis.expand_dataset_columns(ms_kr)
smooth.analysis.remove_constant_columns(ms_kr, verbose=True)
ms_kr.rename(columns={"path_length_f_test": "path_length_f"}, inplace=True)
ms_kr.head()

In [None]:
ms_nn = pd.read_feather("../0302-173545/measures.feather")
ms_nn = smooth.analysis.expand_dataset_columns(ms_nn)
smooth.analysis.remove_constant_columns(ms_nn, verbose=True)
ms_nn.head()

In [None]:
trim = 0.1

for col in ms.columns:
    if ms[col].dtype == "object":
        continue
    
    data = ms.loc[(ms[col] >= ms[col].quantile(trim/2)) & (ms[col] <= ms[col].quantile(1-trim/2)), col]
    
    plt.hist(data, bins=20)
    plt.title(col)
    plt.show()

In [None]:
def get_optimal_path_length_f(dataset_name):
    dataset = smooth.datasets.from_name(dataset_name)
    n = len(dataset.x_test)
    y = sorted(dataset.y_test.reshape((-1,)))
#     cs = np.cumsum(y)[::-1]
#     res = 0
#     for i in range(n - 1):
#         res += cs[i] - y[i] * (n - i)

#     return res / (n ** 2)
    res = 0
    for a in y:
        for b in y:
            res += np.abs(a - b)
    return res / (n ** 2)


datasets = ms["dataset"].str.split("-").str.slice(0, -1).str.join("-").unique()
datasets

optimal_lengths = {}
for dataset in tqdm.notebook.tqdm(datasets):
    optimal_lengths[dataset] = get_optimal_path_length_f("{}-77".format(dataset))

optimal_lengths

In [None]:
# ms1 = ms
seed = 2
# measure_cols = ["train_loss", "test_loss", "path_length_f_test", "path_length_f_train"]
measure_cols = ["train_loss", "test_loss", "path_length_f"]

for dim in sorted(ms_kr["dim"].unique()):
    for measure in measure_cols:
#         ms1 = ms_kr[(ms_kr["seed"] == seed) & (ms_kr["dim"] == dim) & (ms_kr["alpha"] == 0.0001)]
        ms1 = ms_kr[
            (ms_kr["dim"] == ms_kr["lengthscale"])
#             & (ms_kr["seed"] == seed)
            & (ms_kr["dim"] == dim)
            & (ms_kr["alpha"] == 0.0001)
        ]
#         ax = plt.subplot()
    #     ms1.loc[:,"hidden_size_s"] = ms1["hidden_size"].astype(str) + " units"
        grid = sns.relplot(
            data=ms1,
            x="samples_train",
            y=measure,
            hue="degree",
            style="alpha",
            kind="line",
    #         col="batch_size",
            palette=smooth.analysis.make_palette(ms1["degree"].unique()),
    #                 ax=ax
        )
        ax = grid.axes[0][0]
#         if measure == "test_loss":
#             baseline = sklearn.metrics.mean_squared_error(dataset.y_test, y_pred)
#             plt.plot([ms_kr["samples_train"].min(), ms_kr["samples_train"].max()], [baseline, baseline])

        ax.set_xscale("log")
        if "loss" in measure:
            ax.set_yscale("log")
        plt.title("dim={}".format(dim))
        plt.show()

In [None]:
ms_kr

In [None]:
ms_nn

In [None]:
smooth.analysis.make_palette([1,2,3])

In [None]:
def plot_compare(groups, dim):
    l = []
    for group_name, group in groups:
        for name, ms_cur in group:
            ms_cur.loc[:, "source"] = name
            ms_cur.loc[:, "group"] = group_name
            l.append(ms_cur)
#     ms1.loc[:,"source"] = "a"
#     ms2.loc[:,"source"] = "b"
    ms_all = pd.concat(l, sort=False)
    ms_all = ms_all.loc[
        (ms_all["dim"] == dim)
        & (ms_all["seed"] == 1)
        & (ms_all["lengthscale"] < ms_all["dim"])
    ]
    
    for measure in ["train_loss", "test_loss", "path_length_f"]:
        grid = sns.relplot(
            data=ms_all,
            x="samples_train",
            y=measure,
            hue="source",
            col="group",
#             style="alpha",
        #         col="batch_size",
            kind="line",
#             palette=make_palette(ms_krr["degree"].unique()),
        #         sns.cubehelix_palette(8),
        #                 ax=ax
        )
        ax = grid.axes[0][0]
        ax.set_xscale("log")
        if measure in ["train_loss", "test_loss", "path_length_f"]:
            ax.set_yscale("log")
        plt.title("dim={}".format(dim))
        plt.show()

kr_group = []
for deg in range(1, 6):
    kr_group.append((
        "krr, deg={}".format(deg),
        ms_kr.loc[(ms_kr["degree"] == deg) & (ms_kr["alpha"] == 0.000)],
    ))

nn_group = []
for hs in sorted(ms_nn["hidden_size"].unique()):
    nn_group.append((
        "nn, hs={:02}".format(hs),
        ms_nn.loc[ms_nn["hidden_size"] == hs],
    ))
    
# plot_compare([("krr", kr_group), ("nn", nn_group)])

for dim in sorted(ms_nn["dim"].unique()):
    plot_compare([("krr", kr_group), ("nn", nn_group)], dim)

# plot_compare({
#     "nn, hs=010": ms_nn.loc[(ms_nn["hidden_size"] == 32)],
# #     "nn, hs=030": ms_nn.loc[(ms["hidden_size"] == 30) & (ms["seed"] == 1)],
# #     "nn, hs=100": ms_nn.loc[(ms["hidden_size"] == 100) & (ms["seed"] == 1)],
#     "krr, deg=1": ms_kr.loc[(ms_kr["degree"] == 1) & (ms_kr["alpha"] == 0.0001)],
# #     "krr, deg=2": ms_kr.loc[(ms_krr["degree"] == 2) & (ms_krr["alpha"] == 0.0001)],
# #     "krr, deg=3": ms_kr.loc[(ms_krr["degree"] == 3) & (ms_krr["alpha"] == 0.0001)],
# #     "krr, deg=4": ms_kr.loc[(ms_krr["degree"] == 4) & (ms_krr["alpha"] == 0.0001)],
# #     "krr, deg=5": ms_kr.loc[(ms_krr["degree"] == 5) & (ms_krr["alpha"] == 0.0001)],
# })

In [None]:
dataset = smooth.datasets.from_name("gp-{}-{}-{}-{}".format(64, 1, 64.0, 1000))

In [None]:
import sklearn.kernel_ridge

alpha = 1e-50
degree = 4
krr = sklearn.kernel_ridge.KernelRidge(
    alpha=0,
    kernel="poly",
    degree=degree,
#     degree=len(dataset.x_train) + 10,
    coef0=1,
)
krr.fit(dataset.x_train[:100], dataset.y_train[:100])

In [None]:
m = smooth.train_kernel_models.measure_krr(krr, dataset)
m

In [None]:
def get_A(x, deg):
    return np.power(
        np.tile(x, deg + 1).reshape(-1, len(x)).T,
        range(deg + 1)
    )

In [None]:
import numpy.linalg
x = np.array([0, 1.4, 2, 3])
y = np.array([-1, 0.2, -0.9, -0.5])

for deg in range(15):
    A = get_A(x, deg)
    p = np.linalg.lstsq(A, y, rcond=None)[0]
    x_pred = np.linspace(0, 3)
    y_pred = np.polynomial.polynomial.polyval(x_pred, p)
    plt.plot(x, y, 'o', label='Original data', markersize=10)
    plt.plot(x_pred, y_pred, 'r', label='Fitted line')
    plt.legend()
    plt.show()