In [1]:
# %matplotlib inline

import numpy as np
import matplotlib.pyplot as plt

In [2]:
N = 100
X = np.linspace(0, 6*np.pi, N)
Y = np.sin(X)
plt.plot(X, Y)
plt.show()

In [3]:
def make_poly(X, deg):
    n = len(X)
    data = [np.ones(n)]
    for d in range(0, deg):
        data.append(X**(d+1))
    return np.vstack(data).T

In [4]:
def fit(X, Y):
    return np.linalg.solve(X.T.dot(X), X.T.dot(Y))

In [8]:
def fit_and_display(X, Y, sample, deg):
    N = len(X)
    train_idx = np.random.choice(N, sample)
    Xtrain = X[train_idx]
    Ytrain = Y[train_idx]
    
    plt.scatter(Xtrain, Ytrain)
    plt.show()
    
    Xtrain_poly = make_poly(Xtrain, deg)
    w = fit(Xtrain_poly, Ytrain)
    
    X_poly = make_poly(X, deg)
    Y_hat = X_poly.dot(w)
    plt.plot(X, Y)
    plt.plot(X, Y_hat)
    plt.scatter(Xtrain, Ytrain)
    plt.title('deg={}'.format(deg))
    plt.show()

In [9]:
for deg in (5, 6, 7, 8, 9):
    fit_and_display(X, Y, 10, deg)

In [10]:
def get_mse(Y, Y_hat):
    d = Y - Y_hat
    return d.T.dot(d)/len(d)

In [11]:
def plot_train_test_curves(X, Y, sample=20, max_deg=20):
    N = len(X)
    train_idx = np.random.choice(N, sample)
    Xtrain = X[train_idx]
    Ytrain = Y[train_idx]
    test_idx = [idx for idx in range(N) if idx not in train_idx]
    Xtest = X[test_idx]
    Ytest = Y[test_idx]
    
    mse_trains, mse_tests = [], []
    for deg in range(1, max_deg+1):
        Xtrain_poly = make_poly(Xtrain, deg)
        w = fit(Xtrain_poly, Ytrain)
        Yhat_train = Xtrain_poly.dot(w)
        mse_train = get_mse(Ytrain, Yhat_train)
        
        Xtest_poly = make_poly(Xtest, deg)
        Yhat_test = Xtest_poly.dot(w)
        mse_test = get_mse(Ytest, Yhat_test)
        
        mse_trains.append(mse_train)
        mse_tests.append(mse_test)
    
    plt.plot(mse_trains, label='train_mse')
    plt.plot(mse_tests, label='test_mse')
    plt.legend()
    plt.show()
        

In [12]:
plot_train_test_curves(X, Y)