# Caltech Machine Learning Homework # 7

In [1]:
import numpy as np
from sklearn.linear_model import Perceptron
import random
import math
import matplotlib.pyplot as plt
from typing import List
from itertools import product
import scipy.special
from scipy import optimize
import scipy.optimize as spo
from sympy import Symbol, Derivative
import functools

def dbg():
    import pdb; pdb.set_trace()

Instructions: https://work.caltech.edu/homework/hw7.pdf

Answers: http://work.caltech.edu/homework/hw7_sol.pdf

## Validation

![](imgs/val1.png)

In [23]:
data = np.loadtxt('data/hw7/in.dta.txt')

N_train = 25
X_train = data[:N_train,:-1]
Y_train = data[:N_train,2]

N_val = 10
X_val = data[-N_val:,:-1]
Y_val = data[-N_val:,2]

def phi(X, k):
    assert(X.shape == (2,))
    x1, x2 = X
    return np.array([1, x1, x2, x1 * x1, x2 * x2, x1 * x2, np.abs(x1-x2), np.abs(x1+x2)])[:k+1]

for k in range(3, 8):
    # Non-Linear Transformation
    Z_train = np.apply_along_axis(phi, 1, X_train, k)
    Z_val = np.apply_along_axis(phi, 1, X_val, k)

    # Linear Regression
    X_dagger = np.dot(np.linalg.inv(np.dot(Z_train.T, Z_train)), Z_train.T)
    W = np.dot(X_dagger, Y_train)

    # Validation Error
    preds_val = np.sign(np.dot(Z_val, W))
    E_VAL = sum(preds_val != Y_val) / N_val

    print(f"E_VAL for k={k} is {E_VAL}")


E_VAL for k=3 is 0.3
E_VAL for k=4 is 0.5
E_VAL for k=5 is 0.2
E_VAL for k=6 is 0.0
E_VAL for k=7 is 0.1


So the smallest classification error on the validation set occurs for `k=6`, **[d]**.

![](imgs/val2.png)

In [24]:
test = np.loadtxt('data/hw7/out.dta.txt')

X_test = test[:,:-1]
Y_test = test[:,2]
N_test = Y_test.shape[0]

for k in range(3, 8):
    # Non-Linear Transformation
    Z_train = np.apply_along_axis(phi, 1, X_train, k)
    Z_test = np.apply_along_axis(phi, 1, X_test, k)

    # Linear Regression
    X_dagger = np.dot(np.linalg.inv(np.dot(Z_train.T, Z_train)), Z_train.T)
    W = np.dot(X_dagger, Y_train)

    # Out of Sample Error
    preds_test = np.sign(np.dot(Z_test, W))
    E_OUT = sum(preds_test != Y_test) / N_test

    print(f"E_OUT for k={k} is {E_OUT}")

E_OUT for k=3 is 0.42
E_OUT for k=4 is 0.416
E_OUT for k=5 is 0.188
E_OUT for k=6 is 0.084
E_OUT for k=7 is 0.072


So the smalles out-of-sample error on the test set was almost predicted correctly, but not quite. The answer is **[e]**

![](imgs/val3.png)

I'm expecting the validation set predictions to be wayyy better now, and a simpler model to "win" since we have less examples. Let's see :)

In [29]:
data = np.loadtxt('data/hw7/in.dta.txt')

N_train = 10
X_train = data[-N_train:,:-1]
Y_train = data[-N_train:,2]

N_val = 25
X_val = data[:N_val,:-1]
Y_val = data[:N_val,2]

def phi(X, k):
    assert(X.shape == (2,))
    x1, x2 = X
    return np.array([1, x1, x2, x1 * x1, x2 * x2, x1 * x2, np.abs(x1-x2), np.abs(x1+x2)])[:k+1]

for k in range(3, 8):
    # Non-Linear Transformation
    Z_train = np.apply_along_axis(phi, 1, X_train, k)
    Z_val = np.apply_along_axis(phi, 1, X_val, k)

    # Linear Regression
    X_dagger = np.dot(np.linalg.inv(np.dot(Z_train.T, Z_train)), Z_train.T)
    W = np.dot(X_dagger, Y_train)

    # Validation Error
    preds_val = np.sign(np.dot(Z_val, W))
    E_VAL = sum(preds_val != Y_val) / N_val

    print(f"E_VAL for k={k} is {E_VAL}")

E_VAL for k=3 is 0.28
E_VAL for k=4 is 0.36
E_VAL for k=5 is 0.2
E_VAL for k=6 is 0.08
E_VAL for k=7 is 0.12


So the answer is still **[d]** as above, not too much relative change here compared to 1. except that the predicted errors are much higher (as expected).

![](imgs/val4.png)

In [30]:
test = np.loadtxt('data/hw7/out.dta.txt')

X_test = test[:,:-1]
Y_test = test[:,2]
N_test = Y_test.shape[0]

for k in range(3, 8):
    # Non-Linear Transformation
    Z_train = np.apply_along_axis(phi, 1, X_train, k)
    Z_test = np.apply_along_axis(phi, 1, X_test, k)

    # Linear Regression
    X_dagger = np.dot(np.linalg.inv(np.dot(Z_train.T, Z_train)), Z_train.T)
    W = np.dot(X_dagger, Y_train)

    # Out of Sample Error
    preds_test = np.sign(np.dot(Z_test, W))
    E_OUT = sum(preds_test != Y_test) / N_test

    print(f"E_OUT for k={k} is {E_OUT}")

E_OUT for k=3 is 0.396
E_OUT for k=4 is 0.388
E_OUT for k=5 is 0.284
E_OUT for k=6 is 0.192
E_OUT for k=7 is 0.196


And indeed the prediction is better this time! **[d]** as well.

![](imgs/val5.png)

For the model (d) chosen in 1., we had

E_OUT for k=6 is `0.084`

and for the model (d) chosen in 3., we had 

E_OUT for k=6 is `0.192`

That's closest to answer **[b]**

## Validation Bias