## Problema 1

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures

In [2]:
all_data = pd.read_csv("housing.csv", header=None)
all_data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,5.33,36.2


### Definición de funciones importantes

In [3]:
# Performs a linear regression

# Performs a linear regression over the training set Xt/Yt using LS with L2
def regularized_lq_l2(Xt, Yt, lambda_val, degree):
        
    # Design matrix for a polynomial expansion of degree n
    phi_Xt = np.column_stack((np.ones(Xt.shape[0]), polynomial_expansion(Xt, degree)))

    # Create "ridge_matrix"
    XTX = np.dot(phi_Xt.T, phi_Xt)
    Id = np.identity(XTX.shape[0])
    ridge_matrix = lambda_val * Id

    # Regularizerd Least Squares
    theta = np.dot(np.linalg.inv(ridge_matrix + XTX) ,np.dot(phi_Xt.T, Yt))
    
    return theta

In [4]:
# Given the X test inputs, with their corresponding Y labels, and a vector of parameters theta, calculate
# the MSE
def mse_error(X, Y, theta):
    phi_X = np.column_stack((np.ones(X.shape[0]), X))
    Yh = np.dot(phi_X, theta)
    sse_training = np.square(Y - Yh).sum()
    mse_training = sse_training / Y.size
    return mse_training

In [5]:
# Performs several regularized_lq regressions over the sets, by using a different 
# set of basis functions (resulting from a polynomial expansion)
# Returns the MSE for the training and validations sets, and the resulting theta parameters
def regularized_lq_errors(Xt, Yt, Xv, Yv, lambda_val, min_degree, max_degree, rule):
    
    mse_training_values = []
    mse_validation_values = []
    
    for i in range(min_degree, max_degree + 1):
        if rule == 1:
            theta = regularized_lq_l1(Xt, Yt, Xv, Yv, lambda_val, i)
        elif rule == 2:
            theta = regularized_lq_l2(Xt, Yt, Xv, Yv, lambda_val, i)
        
        #Training set
        phi_Xt = polynomial_expansion(Xt, i)
        mse_training = mse_error(phi_Xt, Yt, theta)
        mse_training_values.append(mse_training)
        
        # Validation set
        phi_Xv = polynomial_expansion(Xv, i)
        mse_validation = mse_error(phi_Xv, Yv, theta)
        mse_validation_values.append(mse_validation)
    
    return mse_training_values, mse_validation_values

In [6]:
# Plot a comparison between the Training MSE and the Validation MSE, given the min and max
# degrees, and the lambda parameters
def plot_errors(Xt, Xv, Yt, Yv, lambda_val, min_degree, max_degree):
    mse_t, mse_v = regularized_lq_errors(Xt, Yt, Xv, Yv, lambda_val, min_degree, max_degree)
    degree = np.arange(min_degree, max_degree + 1)
    train_plot = plt.plot(degree, mse_t, 'rs', label= 'Entrenamiento', linestyle='-')
    valid_plot = plt.plot(degree, mse_v, 'bs', label= u'Validación', linestyle='-')
    plt.legend()
    plt.grid(True)
    plt.xlabel('Degree of the polynomial expansion')
    plt.ylabel('MSE')
    plt.title('MSE for lambda = ' + str(lambda_val))
    plt.show()