In [None]:
from sklearn.linear_model import Ridge,RidgeCV
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
import matplotlib.pyplot as plt
from scipy import stats
import numpy as np
import pandas as pd

In [None]:
xtest = pd.read_csv('../data/processed/X_test_norm.csv')
x_train = pd.read_csv('../data/processed/X_train_norm.csv')
y_train = pd.read_csv('../data/processed/y_train_norm.csv')

x_test = xtest.drop(columns = 'Id')

# Ridge Feature Selection


In [None]:
ridge_coefs = []
ridge_alphas = np.logspace(-1, 7, 1000)
ridge_mdl = Ridge(max_iter = 1e7)

In [None]:
for alpha in ridge_alphas:
    ridge_lm.set_params(alpha = alpha).fit(x_train, y_train)
    ridge_coefs.extend(ridge_lm.coef_)

In [None]:
ridge_coefs

In [None]:
ridge_df_coef = pd.DataFrame(ridge_coefs, index = ridge_alphas, columns = x_train.columns)
title = 'Ridge coefficients as a function of the regularization'
ridge_df_coef.plot(logx = True, title = title, legend = False)
plt.xlabel('alpha')
plt.ylabel('Coefficients')
plt.show()

In [None]:
ridge_df_coef.iloc[:, np.random.choice(range(ridge_df_coef.shape[1]), size = 5, replace = False)].plot(logx=True, title=title)

In [None]:
ridge_df_coef

## Hyperparameter tuning

In [None]:
ridge_alphas = np.logspace(-1, 7, 100)

ridge_cv=RidgeCV(alphas=ridge_alphas, cv=5)
ridge_mdl = ridge_cv.fit(x_train, y_train)
print(ridge_mdl.alpha_)
print(ridge_mdl.intercept_)

y_pred = ridge_mdl.predict(x_test)



In [None]:
ridge_cv.score(x_train,y_train)

## Refined search

In [None]:
ridge_alphas2 = np.logspace(0, 2, 200)


In [None]:
ridge_cv2=RidgeCV(alphas=ridge_alphas2, cv=5)
ridge_mdl2 = ridge_cv2.fit(x_train, y_train)
print(ridge_mdl2.alpha_)
print(ridge_mdl2.intercept_)

y_pred = ridge_mdl2.predict(x_test)

In [None]:
ridge_cv2.score(x_train,y_train)

## Model Submission

In [None]:
y_predict = np.expm1(ridge_cv2.predict(xtest.loc[:,xtest.columns != 'Id']))

submission = pd.DataFrame({'Id': xtest['Id'], 'SalePrice': pd.Series(y_predict.reshape(-1,))})

submission.to_csv('submission_ridge.csv',index=False)

submission