# Smoother models through changing hyperparameters

What happens to smoothness if we train models with different hyperparams? Specifically, we might try to decrease the init scale as this has yielded smoother models in the past. This is an updated version of this experiment to see how this interacts with explicit regularization. Can we get smoother models just by varying the hyperparameters?

In [None]:
import os
import sys
# If we don't need CUDA, do this before importing TF
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
import tensorflow as tf
import numpy as np
import pandas as pd
import tqdm
import tqdm.notebook
import matplotlib.pyplot as plt
import seaborn as sns
import IPython
sns.set()

In [None]:
%load_ext autoreload
%aimport smooth.config
%aimport smooth.datasets
%aimport smooth.model
%aimport smooth.analysis
%aimport smooth.callbacks
%aimport smooth.measures
%aimport smooth.util
%autoreload 1

In [None]:
os.chdir("/nfs/scistore12/chlgrp/vvolhejn/smooth/logs/")

# ms = pd.read_feather("0326_mnist_binary/measures.feather")
# ms2 = pd.read_feather("0329_mnist_binary_gradient_norm/measures.feather")

ms_finetune = pd.read_feather("./0402_mnist_binary_finetune/measures.feather")
ms_finetune["kind"] = "IS: " + ms_finetune["model.init_scale"].map(str) + ", LR: " + ms_finetune["model.learning_rate"].map(str)

ms_finetune_2 = pd.read_feather("./0406_mnist_binary_finetune_wp/measures.feather")

ms = pd.concat([ms_finetune, ms_finetune_2], sort=False)
# ms = pd.concat([ms2, ms_finetune], sort=False)
ms = ms.reset_index(drop=True)

# print("Removing {} entries".format(sum(ms["gradient_norm_test"].isna())))
# ms = ms[~ms["gradient_norm_test"].isna()]
ms["model.weights_product_reg_coef"] = ms["model.weights_product_reg_coef"].fillna(value=0)

smooth.analysis.remove_constant_columns(ms, verbose=True)

ms.head()

In [None]:
def plot_measure(ms, measure_name, bins=10):
    kinds = sorted(ms["kind"].unique())
    data = ms[[measure_name, "kind"]].copy()
    
    if "loss" in measure_name:
        log_measure_name = "log10_{}".format(measure_name)
        data[log_measure_name] = np.log10(ms[measure_name])
        measure_name = log_measure_name
    
    for kind in kinds:
        grid = sns.distplot(
            data.loc[data["kind"] == kind, measure_name],
            label=kind,
            hist_kws={
                "range": (data[measure_name].min(), data[measure_name].max()),
            },
            bins=bins,
        )

    plt.legend()
    plt.show()

In [None]:
for measure in [
            "loss_train", "loss_test",
            "gradient_norm_test",
            "weights_product",
            "path_length_f_test",
            "path_length_d_test",
        ]:
    plot_measure(ms_finetune, measure)

In [None]:
ms = ms.loc[ms["model.init_scale"] == 0.1]
ms["kind"] = ("gn: " + ms["model.gradient_norm_reg_coef"].map(str) +
              ", wp: " + ms["model.weights_product_reg_coef"].map(str))

In [None]:
for measure in [
            "loss_train", "loss_test",
            "gradient_norm_test",
            "weights_product",
            "path_length_f_test",
            "path_length_d_test",
        ]:
    plot_measure(ms, measure, bins=20)