## Problema 1

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures

In [11]:
all_data = pd.read_csv("housing.csv", header=None)
all_data.head()

x_training = pd.read_csv('x_entrenamiento.csv', header=None)
Xt = x_training.values
y_training = pd.read_csv('y_entrenamiento.csv', header=None)
Yt = y_training.values
x_validation = pd.read_csv('x_validacion.csv', header=None)
Xv = x_validation.values
y_validation = pd.read_csv('y_validacion.csv', header=None)
Yv = y_validation.values

### Definición de funciones importantes

In [12]:
# Scikit-learn Lasso (L1) and Ridge (L2) functions for LS linear regression
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge

In [None]:
# Performs a linear regression over the training set Xt/Yt using LS with L2
def regularized_lq(Xt, Yt, alpha, degree):
        clf = Ridge(alpha, copy_X=True, fit_intercept=True, normalize=True)
        ridge = clf.fit(Xt, Yt)
        return ridge.get_params()

In [4]:
# Given the X test inputs, with their corresponding Y labels, and a vector of parameters theta, calculate
# the MSE
def mse_error(X, Y, theta):
    phi_X = np.column_stack((np.ones(X.shape[0]), X))
    Yh = np.dot(phi_X, theta)
    sse_training = np.square(Y - Yh).sum()
    mse_training = sse_training / Y.size
    return mse_training

In [5]:
# Performs several regularized_lq regressions over the sets, by using a different 
# set of basis functions (resulting from a polynomial expansion)
# Returns the MSE for the training and validations sets, and the resulting theta parameters
def regularized_lq_errors(Xt, Yt, Xv, Yv, lambda_val, min_degree, max_degree, rule):
    
    mse_training_values = []
    mse_validation_values = []
    
    for i in range(min_degree, max_degree + 1):
        if rule == 1:
            theta = regularized_lq_l1(Xt, Yt, Xv, Yv, lambda_val, i)
        elif rule == 2:
            theta = regularized_lq_l2(Xt, Yt, Xv, Yv, lambda_val, i)
        
        #Training set
        phi_Xt = polynomial_expansion(Xt, i)
        mse_training = mse_error(phi_Xt, Yt, theta)
        mse_training_values.append(mse_training)
        
        # Validation set
        phi_Xv = polynomial_expansion(Xv, i)
        mse_validation = mse_error(phi_Xv, Yv, theta)
        mse_validation_values.append(mse_validation)
    
    return mse_training_values, mse_validation_values

In [6]:
# Plot a comparison between the Training MSE and the Validation MSE, given the min and max
# degrees, and the lambda parameters
def plot_errors(Xt, Xv, Yt, Yv, lambda_val, min_degree, max_degree):
    mse_t, mse_v = regularized_lq_errors(Xt, Yt, Xv, Yv, lambda_val, min_degree, max_degree)
    degree = np.arange(min_degree, max_degree + 1)
    train_plot = plt.plot(degree, mse_t, 'rs', label= 'Entrenamiento', linestyle='-')
    valid_plot = plt.plot(degree, mse_v, 'bs', label= u'Validación', linestyle='-')
    plt.legend()
    plt.grid(True)
    plt.xlabel('Degree of the polynomial expansion')
    plt.ylabel('MSE')
    plt.title('MSE for lambda = ' + str(lambda_val))
    plt.show()