In [1]:
import pandas as pd
import os
os.chdir("C:/Training/Academy/Statistics (Python)/Cases/Real Estate")
from sklearn.linear_model import Ridge, Lasso, ElasticNet
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold, GridSearchCV, cross_val_score
import numpy as np

In [2]:
hous = pd.read_csv("Housing.csv")
dum_hous = pd.get_dummies(hous, drop_first=True)

In [3]:
X = dum_hous.drop('price', axis=1)
y = dum_hous['price']

### Lasso

In [4]:
lasso = Lasso()
params = {'alpha': np.linspace(0.001, 10, 20)}
kfold = KFold(n_splits=5, shuffle=True, random_state=23)
gcv_ls = GridSearchCV(lasso, cv=kfold, param_grid=params)
gcv_ls.fit(X, y)
print(gcv_ls.best_params_)
print(gcv_ls.best_score_)

{'alpha': 2.1060526315789474}
0.6494122887917475


In [5]:
best_lasso = gcv_ls.best_estimator_

### Ridge

In [6]:
ridge = Ridge()
params = {'alpha': np.linspace(0.001, 10, 20)}
kfold = KFold(n_splits=5, shuffle=True, random_state=23)
gcv_rg = GridSearchCV(ridge, cv=kfold, param_grid=params)
gcv_rg.fit(X, y)
print(gcv_rg.best_params_)
print(gcv_rg.best_score_)

{'alpha': 5.263631578947369}
0.6504158844201877


In [7]:
best_rg = gcv_rg.best_estimator_

### Elastic Net

In [8]:
elastic = ElasticNet()
params = {'alpha': np.linspace(0.001, 10, 20),
         'l1_ratio': np.linspace(0.001, 1, 10)}
gcv_el = GridSearchCV(elastic, cv=kfold, param_grid=params)
gcv_el.fit(X, y)

In [9]:
best_el = gcv_el.best_estimator_

In [10]:
print(gcv_el.best_params_)
print(gcv_el.best_score_)

{'alpha': 0.001, 'l1_ratio': 0.001}
0.6495959483887184


In [11]:
best_el.intercept_, best_el.coef_

(-3983.6508997455094,
 array([3.55476306e+00, 1.84566798e+03, 1.42821017e+04, 6.56857962e+03,
        4.25341746e+03, 6.63558901e+03, 4.50361226e+03, 5.45173108e+03,
        1.25195763e+04, 1.25567436e+04, 9.31743499e+03]))

### Inferencing

#### UnLabeled Data

In [12]:
tst = pd.read_csv("tstHousing.csv")
tst.head(3)

Unnamed: 0,lotsize,bedrooms,bathrms,stories,driveway,recroom,fullbase,gashw,airco,garagepl,prefarea
0,3450,3,1,2,yes,no,yes,no,no,1,no
1,2309,1,1,1,no,no,no,no,no,0,no
2,8940,2,2,3,yes,no,yes,yes,yes,1,yes


In [13]:
dum_tst = pd.get_dummies(tst, drop_first=True)
dum_tst.columns

Index(['lotsize', 'bedrooms', 'bathrms', 'stories', 'garagepl', 'driveway_yes',
       'recroom_yes', 'fullbase_yes', 'gashw_yes', 'airco_yes',
       'prefarea_yes'],
      dtype='object')

In [14]:
X.columns

Index(['lotsize', 'bedrooms', 'bathrms', 'stories', 'garagepl', 'driveway_yes',
       'recroom_yes', 'fullbase_yes', 'gashw_yes', 'airco_yes',
       'prefarea_yes'],
      dtype='object')

We predict with Ridge Regression object as R2 score for Ridge came out to be the best

In [15]:
tst['Pred_Price'] = best_rg.predict(dum_tst)
tst

Unnamed: 0,lotsize,bedrooms,bathrms,stories,driveway,recroom,fullbase,gashw,airco,garagepl,prefarea,Pred_Price
0,3450,3,1,2,yes,no,yes,no,no,1,no,57951.739108
1,2309,1,1,1,no,no,no,no,no,0,no,27282.770437
2,8940,2,2,3,yes,no,yes,yes,yes,1,yes,127578.009065
3,5810,3,1,2,no,yes,no,no,no,0,no,54975.551589
