## Elastic Net Model

Libraries and Datasets:

In [None]:
from sklearn.linear_model import ElasticNet,ElasticNetCV
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from scipy import stats
import numpy as np
import pandas as pd

In [None]:
xtest = pd.read_csv('../data/processed/X_test_norm.csv')
x_train = pd.read_csv('../data/processed/X_train_norm.csv')
y_train = pd.read_csv('../data/processed/y_train_norm.csv')

x_test = xtest.drop(columns = 'Id')

### Initial Model Implementation

Looking for reasonable start point for alpha

In [None]:
alphas = [1e-5, 0.0001, 0.001, 0.01, 0.1, 0.3, 0.5, 0.7, 1, 10]

for a in alphas:
    elastic_mdl = ElasticNet(alpha=a).fit(x_train,y_train)   
    score = elastic_mdl.score(x_train,y_train)
    pred_y = elastic_mdl.predict(x_train)
    mse = mean_squared_error(y_train, pred_y)   
    print("Alpha:{0:.5f}, R2:{1:.3f}, MSE:{2:.3f}, RMSE:{3:.3f}"
       .format(a, score, mse, np.sqrt(mse)))

In [None]:
elastic=ElasticNet(alpha=0.001).fit(x_train, y_train)


In [None]:
elastic_cv=ElasticNetCV(alphas=alphas, cv=5)
elastic_mdl = elastic_cv.fit(x_train, y_train)
print(elastic_mdl.alpha_)
print(elastic_mdl.intercept_)

In [None]:
elastic_cv.mse_path_

In [None]:
elastic_cv.score(x_train,y_train)

## Hyperparameter tuning

Looking to find best combination of alpha and l1_ratio

In [None]:
elasticnet_mdl = ElasticNet(warm_start = True, max_iter = 1e7)

Define ranges of parameters for initial broad search

In [None]:
alphas = alphas = [1e-5, 0.0001, 0.001, 0.01, 0.1, 0.3, 0.5, 0.7, 1, 10]
l1_ratios = np.linspace(0.1,1,10)
elastic_params = {'alpha': alphas,'l1_ratio':l1_ratios}

In [None]:
#GridSearch
elastic_param_search = GridSearchCV(estimator = elasticnet_mdl,
                                    param_grid = elastic_params,
                                    cv = 10,
                                    return_train_score = True,
                                    verbose =2)

In [None]:
elastic_param_search.fit(x_train,y_train)

In [None]:
elastic_param_search.best_params_

In [None]:
elastic_param_search.cv_results_['mean_test_score'].max()

### Refine search

In [None]:
alphas2 = np.linspace(0.00005,0.001,50)
l1_ratios2 = np.linspace(0.01,1,20)
elastic_params2 = {'alpha': alphas2,'l1_ratio':l1_ratios2}

In [None]:
elastic_param_search2 = GridSearchCV(estimator = elasticnet_mdl,
                                    param_grid = elastic_params2,
                                    cv = 10,
                                    return_train_score = True,
                                    verbose =2)

In [None]:
elastic_param_search2.fit(x_train,y_train)

In [None]:
elastic_param_search2.best_params_

In [None]:
elastic_param_search2.cv_results_['mean_test_score'].max()

Second tuning

In [None]:
alphas3 = np.linspace(0.0001,0.0003,50)
l1_ratios3 = np.linspace(0.001, 1,20)
elastic_params3 = {'alpha': alphas,'l1_ratio':l1_ratios}

In [None]:
elastic_param_search3 = GridSearchCV(estimator = elasticnet_mdl,
                                    param_grid = elastic_params3,
                                    cv = 10,
                                    return_train_score = True,
                                    verbose =2)

In [None]:
elastic_param_search3.fit(x_train,y_train)

In [None]:
elastic_param_search3.best_params_

In [None]:
elastic_param_search3.cv_results_['mean_test_score'].max()

## Model submission


In [None]:
y_predict = np.expm1(elastic_param_search3.predict(xtest.loc[:,xtest.columns != 'Id']))

submission = pd.DataFrame({'Id': xtest['Id'], 'SalePrice': y_predict})

submission.to_csv('submission_elasticnet.csv',index=False)

submission