## Real Estate (99acres.com) Model Building

## 1. Import sklearn Libraries

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.pylab as pylab
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

import warnings
warnings.filterwarnings('ignore')

## 2. Import Dataset

In [None]:
NJ_prop = pd.read_csv('Final_Project.csv')
NJ_prop

## 3. Data Understanding

In [None]:
NJ_prop.shape

In [None]:
NJ_prop.info()

In [None]:
NJ_prop.isna().sum()

In [None]:
NJ_prop.describe().round()

## 4. Feature Engineering

### 4.1 Drop Unwanted Columns

In [None]:
NJ_prop.head()

In [None]:
NJ_prop.drop(columns=['Property_Name', 'Location','Availability','Bathroom'], inplace =  True)
print('Shape of data :', NJ_prop.shape)

### 4.2 Label Encoding for Categorical Columns

In [None]:
le = LabelEncoder()

In [None]:
for column in NJ_prop.describe(include='object').columns:
    NJ_prop[column] = le.fit_transform(NJ_prop[column])

In [None]:
NJ_prop.describe().round(2).T

In [None]:
NJ_prop

In [None]:
NJ_prop.info()

### 4.3 Looking for Minimum & Maximum

In [None]:
for i in NJ_prop.columns:  
    print(i,'Min value :', NJ_prop[i].min(),'Max value :', NJ_prop[i].max())

### 4.4 Correlation Heatmap

In [None]:
fig = plt.figure( figsize =(9,8))
rcParams = {'xtick.labelsize':'14','ytick.labelsize':'14','axes.labelsize':'16'}
sns.heatmap(NJ_prop.corr(),annot = True, linewidths=.5, cbar_kws={"shrink": .5},fmt='.2f', cmap='coolwarm')
fig.suptitle('Heatmap New Jersey Property Data',fontsize=18, fontweight="bold")
pylab.rcParams.update(rcParams)
fig.tight_layout()
plt.show()

fig.savefig('Heatmap_Encoding', dpi = 250)

## 5. Model Building

In [None]:
NJ_prop.head()

### 5.1 Train Test Split

In [None]:
X =  NJ_prop.drop('Price', axis = 1)
y =  NJ_prop['Price']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 0.20, random_state = 12)

In [None]:
print(X_train.shape, X_test.shape)

### 5.2 Linear Regression

In [None]:
from sklearn.linear_model import LinearRegression

linear = LinearRegression()
linear.fit(X_train, y_train)

print("Training Accuracy = ", linear.score(X_train, y_train))
print("Test Accuracy     = ", linear.score(X_test, y_test))

### 5.3 Decision Tree Regressor

In [None]:
from sklearn.tree import DecisionTreeRegressor

dt = DecisionTreeRegressor(min_samples_split=2)
dt.fit(X_train, y_train)

print("Training Accuracy = ", dt.score(X_train, y_train))
print("Test Accuracy     = ", dt.score(X_test, y_test))

### 5.4 Random Forest Regressor

In [None]:
from sklearn.ensemble import RandomForestRegressor

rf = RandomForestRegressor(n_estimators = 1000, max_depth=5, random_state = 12)
rf.fit(X_train, y_train);

print("Training Accuracy = ", rf.score(X_train, y_train))
print("Test Accuracy     = ", rf.score(X_test, y_test))

### 5.5 Polynomial Features

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures

poly = PolynomialFeatures(degree=2)
poly.fit_transform(X)

# Define the pipeline and train model
poly_model = Pipeline([('poly', PolynomialFeatures(degree=2)),
                       ('rf', RandomForestRegressor(n_estimators = 1000, max_depth=5, random_state = 12))])
poly_model.fit(X_train, y_train)

# Calculate the Score
print("Training Accuracy = ", poly_model.score(X_train, y_train))
print("Test Accuracy     = ", poly_model.score(X_test, y_test))

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures

poly = PolynomialFeatures(degree=2)
poly.fit_transform(X)

# Define the pipeline and train model
poly_model = Pipeline([('poly', PolynomialFeatures(degree=2)), ('linear', LinearRegression(fit_intercept=False))])
poly_model.fit(X_train, y_train)

# Calculate the Score
print("Training Accuracy = ", poly_model.score(X_train, y_train))
print("Test Accuracy     = ", poly_model.score(X_test, y_test))

## Obeservaion :
### 1. We select the final model - Polynomial Feature.
### 2. We got 98.73 % Model Accuracy.

## 6. Final Model Evaluation

In [None]:
def evaluate(model, test_features, test_labels):
    predictions = model.predict(test_features)
    errors = abs(predictions - test_labels)
    accuracy = model.score(test_features, test_labels)

    print('Average Error  = {:0.4f} degrees'.format(np.mean(errors)))
    print('Model Accuracy = {:0.4f} %'.format(accuracy))

In [None]:
evaluate(poly_model, X_train, y_train)

In [None]:
evaluate(poly_model, X_test, y_test)

### 6.1 Visualizing Results

In [None]:
pred = poly_model.predict(X_test)

In [None]:
fig = plt.figure(figsize=(8,7))

sns.scatterplot(y_test, pred)
fig.suptitle('Prediction using Polynomial', fontsize= 18 , fontweight='bold')
plt.xlabel("Actual")
plt.ylabel("Prediction")
pylab.rcParams.update(rcParams)
fig.tight_layout()
fig.subplots_adjust(top=0.92)
plt.show()

#fig.savefig('Prediction_Polynomial', dpi = 500)

## 7. Model Deployment

In [None]:
from pickle import dump

In [None]:
dump(poly_model,open('regression_model.pkl','wb'))

# The End !!!