# Polynomial interpolation is not smooth

To compare the smoothness of NNs/SGD with a different model, we investigate polynomial interpolation.

In [None]:
import os
import sys
import warnings
# If we don't need CUDA, do this before importing TF
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
import tensorflow as tf
import numpy as np
import pandas as pd
import tqdm
import tqdm.notebook
import scipy.stats

import matplotlib.pyplot as plt

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    tf.config.experimental.set_visible_devices([gpus[1]], 'GPU')

sys.path.append("/nfs/scistore12/chlgrp/vvolhejn/smooth")

os.chdir("/nfs/scistore12/chlgrp/vvolhejn/smooth/logs/0224_gp2/")

In [None]:
%load_ext autoreload
%aimport smooth.datasets
%aimport smooth.model
%aimport smooth.analysis
%aimport smooth.callbacks
%aimport smooth.measures
%aimport smooth.util
%autoreload 1

## Visualisation of polynomial fits

The polynomials are unnecessarily "wiggly", as we can see here.
The training points are selected so that a smaller training set is always a subset of a larger one.

In [None]:
for n in range(5, 30, 2):
    dataset = smooth.datasets.GaussianProcessDataset.from_name("gp-1-123-0.3-{}".format(n))
    model = smooth.model.interpolate_polynomial(dataset)
    plt.scatter(dataset.x_train, dataset.y_train, alpha=0.5)
    plt.plot(dataset.x_test, model(dataset.x_test))
    plt.title("samples_train={}".format(n))
    plt.show()

Now let's plot our measures as a function of the number of training samples. For smooth training procedures,
the plots should be nondecreasing.

In [None]:
datasets = ["gp-1-123-0.1-{}".format(x) for x in range(2, 100)]
im_train = smooth.analysis.get_interpolation_measures(datasets, use_test_set=False)
im_test = smooth.analysis.get_interpolation_measures(datasets, use_test_set=rue)
im_poly = smooth.analysis.get_interpolation_measures(datasets, use_polynomial=True)
# im_train = smooth.analysis.expand_dataset_columns(im_train.reset_index())
# im_test = smooth.analysis.expand_dataset_columns(im_test.reset_index())
# im_poly = smooth.analysis.expand_dataset_columns(im_poly.reset_index())

In [None]:
def plot_by_samples(df, measure_name, label=None):
    df1 = df
    df1 = df1.sort_values("samples_train")
    samples_variants = df1["samples_train"].unique()
    samples_variants.sort()
    
    plt.plot(df1["samples_train"], df1[measure_name], label=label)
    plt.title(measure_name)
    return samples_variants

for measure in ["gradient_norm", "seg_total_variation", "seg_total_variation_derivative"]:
    plot_by_samples(im_train, measure, label="piecewise linear - training set")
    plot_by_samples(im_test, measure, label="piecewise linear - test set")
    plot_by_samples(im_poly, measure, label="polynomial - training set")
    plt.legend(loc='upper right')
    plt.show()

We can see that for polynomials, the trend is indeed far from nondecreasing.