# overfit_demo.ipynb
# WESmith 06/27/23
## Demonstrate underfittig/overfitting of simple functions using pytorch
## using some techniques from
### https://pytorch.org/tutorials/beginner/pytorch_with_examples.html?highlight=polynomial

In [None]:
import torch
import torch.nn as nn
import math
import matplotlib.pyplot as plt

In [None]:
lim         = 1.5
npts        = 1000
max_order   = 5 # maximum polynomial order to use
noise_scale = 1.0

In [None]:
min, max = (-lim, lim) #(-0.1, 0.1)  # x limits
x = torch.linspace(min, max, npts)

## POLYNOMIAL BASIS USED TO CREATE DATA AND TO FIT THE NOISY DATA

In [None]:
p  = torch.tensor(range(max_order + 1))
xx = x.unsqueeze(-1).pow(p)  # important to turn (npts) vector into (npts,1) vector for this to work
fig = plt.figure(figsize=(6, 6))
plt.plot(x, xx)
plt.grid()
plt.show()

In [None]:
# set seed here if desired
coeffs  = torch.randn(max_order + 1)
y       = xx @ coeffs  # clean random signal using polynomial basis
train   = y + noise_scale * torch.randn(npts)  # noisy signal
test    = y + noise_scale * torch.randn(npts)

In [None]:
fig = plt.figure(figsize=(12, 6))
plt.plot(x, y,     'r',  label='original')
plt.plot(x, train, 'b.', label='training')
plt.plot(x, test,  'g.', label='testing')
plt.legend()
plt.grid()
plt.show()

In [None]:
# this model uses the fixed polynomial basis as input: it is just estimating
# the max_order + 1 unknown polynomial coefficients
model = nn.Sequential(nn.Linear(max_order + 1, 1), nn.Flatten(0, 1))

In [None]:
# explanation of model dimensions:
# (n_samples, max_order + 1) input array x (max_order + 1) array to be trained = (n_samples x 1)
# nn.Flatten(0, 1) transforms (n_samples x 1) array into (n_samples) array output
# nn.Flatten(start_dim, end_dim) multiplies start_dim x intermediate_dims x end_dim to flatten that range
# see nn.Flatten? examples
#nn.Flatten?

In [None]:
loss_fn = nn.MSELoss(reduction='sum')

In [None]:
lr = 1e-3  # learning rate
optimizer = torch.optim.RMSprop(model.parameters(), lr=lr)

In [None]:
def train_model(n_iter, train, test):
    for t in range(n_iter):
        # xx ix the fixed polynomial basis, the model() contains the learned coefficients
        # NOTE that this isn't the usual paradigm for NN training: normally the training set
        # is input to the model, and the target is compared to the predictions; here the training
        # set is the target
        y_pred = model(xx)
        loss = loss_fn(y_pred, train)
        if t % 200 == 0:
            with torch.no_grad(): # make sure gradients aren't affected
                loss_test = loss_fn(y_pred, test)
            print(f'iter: {t:5}, train: {loss.item():10.3f}, test: {loss_test.item():10.3f}')
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

In [None]:
train_model(2001, train, test)

In [None]:
#model[0].bias, model[0].weight, coeffs

In [None]:
model(xx).shape

In [None]:
fig = plt.figure(figsize=(12, 6))
plt.plot(x, y, 'r', label='true')
plt.plot(x, model(xx).detach().numpy(), 'b', label='estimate')
plt.grid()
plt.legend()
plt.show()

## FIT POLYNOMIAL TO SINE

In [None]:
npts        = 1000
max_order   = 3 # maximum polynomial order to use
noise_scale = 1.0

In [None]:
x = torch.linspace(-math.pi, math.pi, npts)

In [None]:
p  = torch.tensor(range(max_order + 1))
xx = x.unsqueeze(-1).pow(p)

In [None]:
ysin = torch.sin(x)
fig = plt.figure(figsize=(10, 6))
plt.plot(x, ysin)
plt.grid()

In [None]:
model   = nn.Sequential(nn.Linear(max_order + 1, 1), nn.Flatten(0, 1))
loss_fn = nn.MSELoss(reduction='sum')
lr      = 1e-3  # learning rate
optimizer = torch.optim.RMSprop(model.parameters(), lr=lr)

In [None]:
train_model(2001, ysin, ysin)

In [None]:
fig = plt.figure(figsize=(12, 6))
plt.plot(x, ysin, 'r', label='true')
plt.plot(x, model(xx).detach().numpy(), 'b', label='estimate')
plt.grid()
plt.legend()
plt.show()