# California House Pricing Prediction

## Imports

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline 
import seaborn as sns

from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, r2_score
from sklearn.model_selection import KFold, cross_val_score
from sklearn.preprocessing import StandardScaler

from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR

## Dataset Import

In [None]:
dataset = fetch_california_housing()

In [None]:
data = pd.DataFrame(dataset.data, columns=dataset.feature_names)

In [None]:
data.head()

In [None]:
data['Price'] = dataset.target

In [None]:
data.head()

In [None]:
data.info()

## EDA

In [None]:
data.corr()

In [None]:
data.columns

In [None]:
plt.figure(figsize=[15,20])
sns.pairplot(data, x_vars=['Price', 'MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude'], 
             y_vars=['Price'], diag_kind='hist')
plt.rcParams["axes.labelsize"] = 12
plt.show()

In [None]:
sns.pairplot(data, corner=True, kind='reg', plot_kws={'line_kws':{'color':'red'}, 'alpha':0.6})

## Data - Features & Target

In [None]:
X = data.iloc[:, :-1]
y = data.iloc[:, -1]

## Model Building

### Multiple Linear Regression

In [None]:
np.array([X.iloc[0, :]]).shape

In [None]:
# Multiple Linear Regression with all features included

ml_regr = LinearRegression()
ml_regr.fit(X, y)
y_pred = ml_regr.predict(np.expand_dims(X.iloc[1, :], axis=0))

print('Predicted Price: ', y_pred[0])
print('Actual Price: ', y[1])



# score = ml_regr.score(X_test, y_test)
# print('Test R-Squared: ', score)
score_train = ml_regr.score(X, y)
print('Train R-Squared: ', score_train)

In [None]:
features_ml_regr = ['MedInc', 'HouseAge', 'AveRooms', 'Population', 'AveOccup',
       'Latitude', 'Longitude']
X_features = X[features_ml_regr]

In [None]:
# Multiple Linear Regression with one of the features with multi-colinearity excluded

X_features = X[features_ml_regr]
ml_regr = LinearRegression()
ml_regr.fit(X_features, y)
y_pred = ml_regr.predict(np.expand_dims(X_features.iloc[1, :], axis=0))

print('Predicted Price: ', y_pred[0])
print('Actual Price: ', y[1])



# score = ml_regr.score(X_test, y_test)
# print('Test R-Squared: ', score)
score_train = ml_regr.score(X_features, y)
print('Train R-Squared: ', score_train)

### Decision Tree Regression

In [None]:
# Decision Tree Regression with all features included
dt_regr = DecisionTreeRegressor(random_state=42)

dt_regr.fit(X, y)
y_pred = dt_regr.predict(np.expand_dims(X.iloc[1, :], axis=0))

print('Predicted Price: ', y_pred[0])
print('Actual Price: ', y[1])
score_train = dt_regr.score(X, y)
print('Train R-Squared: ', score_train)

In [None]:
# Decision Tree Regression with one of the features with multi-colinearity excluded
dt_regr = DecisionTreeRegressor(random_state=42)

dt_regr.fit(X_features, y)
y_pred = dt_regr.predict(np.expand_dims(X_features.iloc[1, :], axis=0))

print('Predicted Price: ', y_pred[0])
print('Actual Price: ', y[1])
score_train = dt_regr.score(X_features, y)
print('Train R-Squared: ', score_train)

### Random Forest Regression

In [None]:
# Random Forest Regression with all features included
rf_regr = RandomForestRegressor(n_estimators=100, random_state=42)

rf_regr.fit(X, y)
y_pred = rf_regr.predict(np.expand_dims(X.iloc[1, :], axis=0))

print('Predicted Price: ', y_pred[0])
print('Actual Price: ', y[1])
score_train = rf_regr.score(X, y)
print('Train R-Squared: ', score_train)

In [None]:
# Random Forest Regression with one of the features with multi-colinearity excluded
rf_regr = RandomForestRegressor(n_estimators=100, random_state=42)

rf_regr.fit(X_features, y)
y_pred = rf_regr.predict(np.expand_dims(X_features.iloc[1, :], axis=0))

print('Predicted Price: ', y_pred[0])
print('Actual Price: ', y[1])
score_train = rf_regr.score(X_features, y)
print('Train R-Squared: ', score_train)

### Support Vector Regression

In [None]:
#Feature Scaling
sc_X = StandardScaler()
sc_y = StandardScaler()

X_train_scaled = sc_X.fit_transform(X)
y_train_scaled = sc_y.fit_transform(np.expand_dims(y, axis=0))

# X_test_scaled = sc_X.transform(X_test)
# y_test_scaled = sc_y.transform(np.expand_dims(y_test, axis=0))

#### Linear Kernel

In [None]:
# Support Vector Regression with linear kernel and all features included
svr_regr = SVR(kernel='linear')
svr_regr.fit(X, y)

y_pred = svr_regr.predict(np.expand_dims(X.iloc[1, :], axis=0))

print('Predicted Price: ', sc_y.inverse_transform(y_pred[0]))
print('Actual Price: ', y[1])
score_train = svr_regr.score(X_train_scaled, y_train_scaled)
print('Train R-Squared: ', score_train)

# score = svr_regr.score(X_test, y_test)
# print('Test R-Squared: ', score)
# score_train = svr_regr.score(X_train, y_train)
# print('Train R-Squared: ', score_train)

#### Non-Linear Kernel

In [None]:
# Support Vector Regression with non-linear kernel and all features included
svr_nl_regr = SVR(kernel='rbf')
svr_nl_regr.fit(X, y)

y_pred = svr_nl_regr.predict(np.expand_dims(X.iloc[1, :], axis=0))

print('Predicted Price: ', sc_y.inverse_transform(y_pred[0]))
print('Actual Price: ', y[1])
score_train = svr_nl_regr.score(X_train_scaled, y_train_scaled)
print('Train R-Squared: ', score_train)

# score = svr_regr.score(X_test, y_test)
# print('Test R-Squared: ', score)
# score_train = svr_regr.score(X_train, y_train)
# print('Train R-Squared: ', score_train)