In [57]:
import numpy as np
import pandas as pd
from sklearn import linear_model
from sklearn.linear_model import ElasticNetCV
from sklearn.linear_model import RidgeCV
from sklearn.linear_model import LassoCV
from sklearn.metrics import mean_squared_error

In [58]:
def transform_data(X):
    """
    This function transforms the 5 input features of matrix X (x_i denoting the i-th component of X)
    into 21 new features phi(X) in the following manner:
    5 linear features: phi_1(X) = x_1, phi_2(X) = x_2, phi_3(X) = x_3, phi_4(X) = x_4, phi_5(X) = x_5
    5 quadratic features: phi_6(X) = x_1^2, phi_7(X) = x_2^2, phi_8(X) = x_3^2, phi_9(X) = x_4^2, phi_10(X) = x_5^2
    5 exponential features: phi_11(X) = exp(x_1), phi_12(X) = exp(x_2), phi_13(X) = exp(x_3), phi_14(X) = exp(x_4), phi_15(X) = exp(x_5)
    5 cosine features: phi_16(X) = cos(x_1), phi_17(X) = cos(x_2), phi_18(X) = cos(x_3), phi_19(X) = cos(x_4), phi_20(X) = cos(x_5)
    1 constant features: phi_21(X)=1

    Parameters
    ----------
    X: matrix of floats, dim = (700,5), inputs with 5 features

    Returns
    ----------
    X_transformed: array of floats: dim = (700,21), transformed input with 21 features
    """

    X_square = np.square(X)
    X_exp = np.exp(X)
    X_cos = np.cos(X)
    const = np.ones((700,1))

    X_transformed = np.concatenate((X, X_square, X_exp, X_cos, const), 1)

    assert X_transformed.shape == (700, 21)
    return X_transformed

In [59]:
def fit(X, y):
    """
    This function receives training data points, transform them, and then fits the linear regression on this
    transformed data. Finally, it outputs the weights of the fitted linear regression.

    Parameters
    ----------
    X: matrix of floats, dim = (700,5), inputs with 5 features
    y: array of floats, dim = (700,), input labels)

    Returns
    ----------
    w: array of floats: dim = (21,), optimal parameters of linear regression
    """
    X_transformed = transform_data(X)

    # Compute closed form solution w  = (X^T*X)^-1 * X^T*y
    # by solving the linear system X^T*X*w = X^T*y
    #w = np.linalg.solve(np.dot(X_transformed.T, X_transformed), np.dot(X_transformed.T, y))

    # Compute Solution using sklearns linear regression method
    #reg = linear_model.LinearRegression(fit_intercept=False)
    #reg.fit(X_transformed,y)
    #w = reg.coef_

    #using lassoCV
    regr_LCV = LassoCV(cv=10, fit_intercept=False, random_state=0, alphas=1e-2*np.linspace(1,10, num=100), max_iter = 10000)
    regr_LCV.fit(X_transformed, y)
    w_LCV = regr_LCV.coef_
    y_pred_LCV = regr_LCV.predict(X_transformed)
    RMSE = mean_squared_error(y_pred_LCV,y)**0.5
    print('LCV')
    print(RMSE)

    # Compute Solution using ElsaticNet with Crossvalidation
    regr_RCV = RidgeCV(cv=50, alphas=np.linspace(1,100, num=100), fit_intercept=False )
    regr_RCV.fit(X_transformed, y)
    w_RCV = regr_RCV.coef_
    y_pred_RCV = regr_RCV.predict(X_transformed)
    RMSE = mean_squared_error(y_pred_RCV,y)**0.5
    print('RCV')
    print(RMSE)

    ##using ElasticNetCV
    regr = ElasticNetCV(cv=100, random_state=0, fit_intercept=False, max_iter = 10000)
    regr.fit(X_transformed, y)
    w_ECV = regr.coef_
    y_pred_ECV = regr.predict(X_transformed)
    RMSE = mean_squared_error(y_pred_ECV,y)**0.5
    print('ECV')
    print(RMSE)


    w=w_RCV
    assert w.shape == (21,)
    return w

In [60]:
if __name__ == "__main__":
    # Data loading
    data = pd.read_csv("train.csv")
    y = data["y"].to_numpy()
    data = data.drop(columns=["Id", "y"])
    # print a few data samples
    #print(data.head())

    X = data.to_numpy()
    # The function retrieving optimal LR parameters
    w = fit(X, y)
    # Save results in the required format
    print(w)
    np.savetxt("./results_RCV_50.csv", w, fmt="%.12f")


LCV
1.947677101210053
RCV
1.9523913047078831
ECV
1.9507974819471696
[ 0.08500373 -0.17014155 -0.25048316  0.20604224  0.06642821 -0.0801026
  0.03668773  0.0452679  -0.09223525  0.01007583 -0.54192089 -0.73899124
 -0.81603454 -0.42280554 -0.51610987 -0.54614158 -0.60417397 -0.60843203
 -0.54060893 -0.5906718  -0.58594306]
