# Lasso & Ridge

In [4]:
%matplotlib inline
import numpy as np
import pandas as pd
from pandas import Series, DataFrame
import pickle
from sklearn.metrics import mean_squared_error as mse
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import Lasso, LassoCV, Ridge,RidgeCV
import warnings


warnings.filterwarnings("ignore")

# environment settings
data_path = 'Data/'

# Deserialize previously saved data from "preprocessing"
with open(data_path+'train_pp.obj', 'rb') as train_pp, \
open(data_path+'test_pp.obj','rb') as test_pp:
    train_df = pickle.load(train_pp)
    test_df = pickle.load(test_pp)
train_df["SalePrice"] = np.log1p(train_df["SalePrice"])

In [5]:
X = train_df.loc[:,'Id':'SaleCondition_Partial']
y = train_df['SalePrice']
print("Shape of training set {}.\nShape of test set {}".format(X.shape,y.shape))

Shape of training set (1460, 303).
Shape of test set (1460,)


In [6]:
lasso = LassoCV(alphas = [1, 0.1, 0.001, 0.0005],
                      cv=KFold(10, shuffle=True, 
                       random_state=12345678)).fit(X, y)
ridge = RidgeCV(alphas = [1, 0.1, 0.001, 0.0005],
                      cv=KFold(10, shuffle=True, 
                       random_state=12345678)).fit(X, y)

kf = KFold(5, shuffle=True, random_state=42).get_n_splits(X)
rmse_cv_lasso= np.sqrt(-cross_val_score(lasso, X, y, scoring="neg_mean_squared_error", cv = kf))
rmse_cv_ridge= np.sqrt(-cross_val_score(ridge, X, y, scoring="neg_mean_squared_error", cv = kf))


print("The 10-fold crossvalidation RMSE of Lasso is {:.5f} +/- {:.3f} , alpha :{}".format(rmse_cv_lasso.mean(),
                                                                                          rmse_cv_lasso.std(),
                                                                                          lasso.alpha_))
print("The 10-fold crossvalidation RMSE of Ridge is {:.5f} +/- {:.3f} , alpha :{}".format(rmse_cv_ridge.mean(),
                                                                                          rmse_cv_ridge.std(),
                                                                                          ridge.alpha_))

The 10-fold crossvalidation RMSE of Lasso is 0.12430 +/- 0.015 , alpha :0.0005
The 10-fold crossvalidation RMSE of Ridge is 0.13352 +/- 0.015 , alpha :1


In [7]:
#Prediction on real test set using lasso
lasso = Lasso(alpha=0.0005)
lasso.fit(X,y)
pred_results =lasso.predict(test_df)
pred_results =np.expm1(pred_results)
result_df = pd.DataFrame(data={'Id': test_df["Id"].values,
                               'SalePrice': pred_results})
#Create output csv file
result_df.to_csv(data_path+"outputs/lasso_alpha_0005", index=False)