In [4]:
import os
import numpy as np
import pandas as pd
from sklearn import metrics
from sklearn import linear_model
from sklearn import model_selection

# load data
data = pd.read_csv('./train.csv')
y = data['y']
raw_X = data.iloc[:, 2:]

# create the features that are required for the task:
quadratic = raw_X**2
exponential = np.exp(raw_X)
cosine = np.cos(raw_X)
constant = pd.Series(np.ones(raw_X.shape[0]))

# merge features to dataframe
X = pd.concat([raw_X, quadratic, exponential, cosine, constant], axis=1)

# rename columns
column_names = list()
for i in range(1, 22):
    column_names.append('x{}'.format(i))
X.columns = column_names

In [52]:
# gridsearch for alpha and l1_ratio with elastic net and 10-fold cross validation
regr = linear_model.ElasticNet(random_state=0, max_iter=1e5,
                              fit_intercept=False)
rmse = metrics.make_scorer(metrics.mean_squared_error, squared=False,
                          greater_is_better=False)
param = {'alpha': np.linspace(0.055, 0.057, 10),
        'l1_ratio': np.linspace(0.385, 0.39, 10)}
gs = model_selection.GridSearchCV(estimator=regr, cv=10, scoring=rmse,
                                  param_grid=param)
gs.fit(X, y)

GridSearchCV(cv=10,
             estimator=ElasticNet(fit_intercept=False, max_iter=100000.0,
                                  random_state=0),
             param_grid={'alpha': array([0.055     , 0.05522222, 0.05544444, 0.05566667, 0.05588889,
       0.05611111, 0.05633333, 0.05655556, 0.05677778, 0.057     ]),
                         'l1_ratio': array([0.385     , 0.38555556, 0.38611111, 0.38666667, 0.38722222,
       0.38777778, 0.38833333, 0.38888889, 0.38944444, 0.39      ])},
             scoring=make_scorer(mean_squared_error, greater_is_better=False, squared=False))

In [54]:
# evaluate gridsearch
print('Best RMSE: %0.6f: %s)' % (gs.best_score_, gs.best_params_))

Best RMSE: -1.951069: {'alpha': 0.05655555555555556, 'l1_ratio': 0.3877777777777778})


In [55]:
# fit with best estimator
final_regr = gs.best_estimator_.fit(X, y)

# get weights
weights = final_regr.coef_
weights

array([ 0.        , -0.        , -0.        ,  0.        ,  0.        ,
       -0.        ,  0.        ,  0.        , -0.        ,  0.        ,
       -0.5260155 , -0.75233812, -0.83954689, -0.25700654, -0.50748563,
       -0.56977302, -0.62992376, -0.64010492, -0.53087588, -0.62468602,
       -0.6240199 ])

In [56]:
# create submission file
result = pd.DataFrame({'weights': weights})
filename = 'submission_elasticnet.csv'
result.to_csv(os.path.join('.', filename), index=False, header=False)