In [10]:
import numpy as np
import pandas as pd
from sklearn import linear_model
from sklearn.linear_model import ElasticNetCV

In [2]:
def transform_data(X):
    """
    This function transforms the 5 input features of matrix X (x_i denoting the i-th component of X) 
    into 21 new features phi(X) in the following manner:
    5 linear features: phi_1(X) = x_1, phi_2(X) = x_2, phi_3(X) = x_3, phi_4(X) = x_4, phi_5(X) = x_5
    5 quadratic features: phi_6(X) = x_1^2, phi_7(X) = x_2^2, phi_8(X) = x_3^2, phi_9(X) = x_4^2, phi_10(X) = x_5^2
    5 exponential features: phi_11(X) = exp(x_1), phi_12(X) = exp(x_2), phi_13(X) = exp(x_3), phi_14(X) = exp(x_4), phi_15(X) = exp(x_5)
    5 cosine features: phi_16(X) = cos(x_1), phi_17(X) = cos(x_2), phi_18(X) = cos(x_3), phi_19(X) = cos(x_4), phi_20(X) = cos(x_5)
    1 constant features: phi_21(X)=1

    Parameters
    ----------
    X: matrix of floats, dim = (700,5), inputs with 5 features

    Returns
    ----------
    X_transformed: array of floats: dim = (700,21), transformed input with 21 features
    """

    X_square = np.square(X)
    X_exp = np.exp(X)
    X_cos = np.cos(X)
    const = np.ones((700,1))
    
    X_transformed = np.concatenate((X, X_square, X_exp, X_cos, const), 1)
    
    assert X_transformed.shape == (700, 21)
    return X_transformed

In [16]:
def fit(X, y):
    """
    This function receives training data points, transform them, and then fits the linear regression on this 
    transformed data. Finally, it outputs the weights of the fitted linear regression. 

    Parameters
    ----------
    X: matrix of floats, dim = (700,5), inputs with 5 features
    y: array of floats, dim = (700,), input labels)

    Returns
    ----------
    w: array of floats: dim = (21,), optimal parameters of linear regression
    """
    X_transformed = transform_data(X)
    
    # Compute closed form solution w  = (X^T*X)^-1 * X^T*y
    # by solving the linear system X^T*X*w = X^T*y
    #w = np.linalg.solve(np.dot(X_transformed.T, X_transformed), np.dot(X_transformed.T, y))
    
    # Compute Solution using sklearns linear regression method
    #reg = linear_model.LinearRegression(fit_intercept=False)
    #reg.fit(X_transformed,y)
    #w = reg.coef_
    
    # Compute Solution using ElsaticNet with Crossvalidation
    regr = ElasticNetCV(cv=5, random_state=0, fit_intercept=False, max_iter = 10000)
    regr.fit(X_transformed, y)
    w = regr.coef_
    
    print(w)
    
    assert w.shape == (21,)
    return w

In [17]:
if __name__ == "__main__":
    # Data loading
    data = pd.read_csv("train.csv")
    y = data["y"].to_numpy()
    data = data.drop(columns=["Id", "y"])
    # print a few data samples
    print(data.head())

    X = data.to_numpy()
    # The function retrieving optimal LR parameters
    w = fit(X, y)
    # Save results in the required format
    np.savetxt("./results.csv", w, fmt="%.12f")

     x1    x2    x3    x4    x5
0  0.02  0.05 -0.09 -0.43 -0.08
1 -0.13  0.11 -0.08 -0.29 -0.03
2  0.08  0.06 -0.07 -0.41 -0.03
3  0.02 -0.12  0.01 -0.43 -0.02
4 -0.14 -0.12 -0.08 -0.02 -0.08
[ 0.         -0.         -0.          0.          0.         -0.
  0.          0.         -0.          0.         -0.46047154 -0.91661701
 -1.09949531 -0.16160283 -0.43410309 -0.51023268 -0.63959144 -0.64157583
 -0.46239592 -0.59503649 -0.58923516]
