# Measures on MNIST - small models

These models have small hidden sizes and so we might be able to observe the double descent risk curve.

In [None]:
import os
import sys
# If we don't need CUDA, do this before importing TF
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
import tensorflow as tf
import numpy as np
import pandas as pd
import tqdm
import tqdm.notebook
import scipy.stats
import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns
sns.set()

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    tf.config.experimental.set_visible_devices([gpus[1]], 'GPU')

sys.path.append("/nfs/scistore12/chlgrp/vvolhejn/smooth")

# os.chdir("/nfs/scistore12/chlgrp/vvolhejn/smooth/logs/0226_mnist/")
os.chdir("/nfs/scistore12/chlgrp/vvolhejn/smooth/logs/0227_mnist_small/")

In [None]:
%load_ext autoreload
%aimport smooth.datasets
%aimport smooth.model
%aimport smooth.analysis
%aimport smooth.callbacks
%aimport smooth.measures
%aimport smooth.util
%autoreload 1

In [None]:
ms_raw = pd.read_feather("measures.feather")

ms_raw = smooth.analysis.expand_dataset_columns(ms_raw)
ms = ms_raw

print("Remaining:", len(ms))
smooth.analysis.remove_constant_columns(ms, verbose=True)
# ms = smooth.analysis.expand_dataset_columns(ms)
ms.loc[:,"log_dir"] = ms["log_dir"].str.split("/").str.get(-1)

ms = ms.rename(columns={
    "seg_total_variation": "path_length_f",
    "seg_total_variation_derivative": "path_length_d",
})

ms.head()

In [None]:
ms2 = pd.read_feather("measures2.feather")
ms2["log_dir"] = ms2["model_path"].str.split("/").str.get(2)
del ms2["model_path"]
# for col in ["l2", "gradient_norm", "seg_total_variation", "seg_total_variation_derivative",
#            "test_accuracy", "test_loss"]:
#     del ms[col]

ms = pd.merge(ms, ms2[["log_dir", "path_length_f_softmax", "path_length_d_softmax"]], on="log_dir")

In [None]:
ms

In [None]:
cols = ["actual_epochs", "train_loss", "test_loss"]
cols = ms.columns

trim = 0.1

for col in cols:
    if ms[col].dtype == "object":
        continue
    
    data = ms.loc[(ms[col] >= ms[col].quantile(trim/2)) & (ms[col] <= ms[col].quantile(1-trim/2)), col]
    
    plt.hist(data, bins=20)
    plt.title(col)
    plt.show()

In [None]:
ms1 = ms.sort_values("samples_train")
groups = ms1.groupby(["hidden_size", "iteration"])

measure_cols = ["gradient_norm",
                "path_length_f", "path_length_f_softmax",
                "path_length_d", "path_length_d_softmax",
               ]
ratios = groups.agg(lambda g: np.log10(g.iloc[0] / g.iloc[-1]))[measure_cols]

ratios.describe()

In [None]:
# ms1 = ms[(ms["hidden_size"] == 100) & (ms["batch_size"] == 128)]
# ms1 = ms.loc[(ms["batch_size"] == 256)]
# ms1 = ms.loc[(ms["hidden_size"].isin([2, 4, 8, 16]))]
ms1 = ms


for measure in measure_cols + ["train_accuracy", "test_accuracy", "train_loss", "test_loss", "l2"]:
#     ax = plt.subplot()
    ms1.loc[:,"hidden_size_s"] = ms1["hidden_size"].astype(str) + " units"
#     palette = sns.color_palette("Blues_d", 6)
    grid = sns.relplot(
        data=ms1,
        x="samples_train",
        y=measure,
        hue="hidden_size",
        palette=smooth.analysis.make_palette(ms1["hidden_size"]),
#         hue_norm=matplotlib.colors.LogNorm(),
        kind="line",
    )
    ax = grid.axes[0][0]
    ax.set_xscale("log")
    if measure in ["train_loss", "test_loss"]:
#         print("ya")
        ax.set_yscale("log")
    plt.show()


In [None]:
%%time
mnist = smooth.datasets.get_mnist()
mss = ms[(ms["hidden_size"] == 10) & (ms["batch_size"] == 128)
#          & (ms["iteration"] == 1)
        ]
ms2_dict = {}
for i, row in tqdm.notebook.tqdm(list(mss.iterrows())):
    model = tf.keras.models.load_model(os.path.join(row["log_dir"], "model.h5"))
    measures = smooth.measures.get_measures(
        model,
        mnist.x_test, mnist.y_test,
        include_training_measures=False,
        is_classification=True,
        samples=100,
    )
    ms2_dict[i] = measures
# measures

In [None]:
ms2["samples_train"] = ms["samples_train"]

In [None]:
ms2