# Caltech Machine Learning Homework # 6

In [1]:
import numpy as np
from sklearn.linear_model import Perceptron
import random
import math
import matplotlib.pyplot as plt
from typing import List
from itertools import product
import scipy.special
from scipy import optimize
import scipy.optimize as spo
from sympy import Symbol, Derivative

def dbg():
    import pdb; pdb.set_trace()

Instructions: https://work.caltech.edu/homework/hw6.pdf

Answers: http://work.caltech.edu/homework/hw6_sol.pdf

## Overfitting and Deterministic Noise

![](imgs/overfitting.png)

**[b]**

## Regularization with Weight Decay

![](imgs/regdecay1.png)

In [3]:
train = np.loadtxt('data/hw5/in.dta.txt')
test = np.loadtxt('data/hw5/out.dta.txt')

X_train = train[:,:-1]
Y_train = train[:,2]
N_train = X_train[:, 0].size

X_test = test[:,:-1]
Y_test = test[:,2]
N_test = X_test[:, 0].size

def theta(X):
    assert(X.shape == (2,))
    x1, x2 = X
    return np.array([1, x1, x2, x1 * x1, x2 * x2, x1 * x2, np.abs(x1-x2), np.abs(x1+x2)])

# Non-linear Transformation
Z_train = np.apply_along_axis(theta, 1, X_train)
Z_test = np.apply_along_axis(theta, 1, X_test)

# Linear Regression
X_dagger = np.dot(np.linalg.inv(np.dot(Z_train.T, Z_train)), Z_train.T)
W = np.dot(X_dagger, Y_train)

# In-sample Error
preds_train = np.sign(np.dot(Z_train, W))
E_IN = sum(preds_train != Y_train) / N_train

# Out-of-sample Error
preds_test = np.sign(np.dot(Z_test, W))
E_OUT = sum(preds_test != Y_test) / N_test

print(f"E_IN is {E_IN}")
print(f"E_OUT is {E_OUT}")


E_IN is 0.02857142857142857
E_OUT is 0.084


**[a]**

![](imgs/regdecay2.png)

In [5]:
k = -3
lambd = 10 ** k

# Linear Regression with Regularization
X_dagger_reg = np.dot(np.linalg.inv(np.dot(Z_train.T, Z_train) + lambd * np.identity(W.size) ), Z_train.T)
W_reg = np.dot(X_dagger_reg, Y_train)

# In-sample Error
preds_train_reg = np.sign(np.dot(Z_train, W_reg))
E_IN_REG = sum(preds_train_reg != Y_train) / N_train

# Out-of-sample Error
preds_test_reg = np.sign(np.dot(Z_test, W_reg))
E_OUT_REG = sum(preds_test_reg != Y_test) / N_test

print(f"E_IN_REG is {E_IN_REG}")
print(f"E_OUT_REG is {E_OUT_REG}")

E_IN_REG is 0.02857142857142857
E_OUT_REG is 0.08


Not much happening here with that small a level of k

**[d]**

![](imgs/regdecay3.png)

In [7]:
k = 3
lambd = 10 ** k

# Linear Regression with Regularization
X_dagger_reg = np.dot(np.linalg.inv(np.dot(Z_train.T, Z_train) + lambd * np.identity(W.size) ), Z_train.T)
W_reg = np.dot(X_dagger_reg, Y_train)

# In-sample Error
preds_train_reg = np.sign(np.dot(Z_train, W_reg))
E_IN_REG = sum(preds_train_reg != Y_train) / N_train

# Out-of-sample Error
preds_test_reg = np.sign(np.dot(Z_test, W_reg))
E_OUT_REG = sum(preds_test_reg != Y_test) / N_test

print(f"E_IN_REG is {E_IN_REG}")
print(f"E_OUT_REG is {E_OUT_REG}")

E_IN_REG is 0.37142857142857144
E_OUT_REG is 0.436


**[e]**

![](imgs/regdecay4.png)

In [10]:
def run_with_k(k):
    lambd = 10 ** k
    
    # Linear Regression with Regularization
    X_dagger_reg = np.dot(np.linalg.inv(np.dot(Z_train.T, Z_train) + lambd * np.identity(W.size) ), Z_train.T)
    W_reg = np.dot(X_dagger_reg, Y_train)

    # In-sample Error
    preds_train_reg = np.sign(np.dot(Z_train, W_reg))
    E_IN = sum(preds_train_reg != Y_train) / N_train

    # Out-of-sample Error
    preds_test_reg = np.sign(np.dot(Z_test, W_reg))
    E_OUT = sum(preds_test_reg != Y_test) / N_test
    
    return E_OUT

print([run_with_k(k)for k in range(-2,3)])

[0.084, 0.056, 0.092, 0.124, 0.228]


Smallest `E_OUT` is $0.056$ at index `1`, so the answer is $k = -1$ **[d]**

![](imgs/regdecay5.png)

We know that $k=3$ is over-regularizing, and $k=-3$ is under-regularizing, so we can limit ourselves to the integer values between $-3$ and $3$:

In [16]:
print([run_with_k(k)for k in range(-3,4)])

[0.08, 0.084, 0.056, 0.092, 0.124, 0.228, 0.436]


So the smallest E_OUT is indeed achieved at $k=-1$, and it's closest to answer **[b]**