In [161]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OrdinalEncoder
from sklearn import preprocessing

## Preprocessing

In [162]:
df = pd.read_csv('MagicBricks.csv')

In [163]:
df.head()

Unnamed: 0,Area,BHK,Bathroom,Furnishing,Parking,Price,Status,Transaction,Type,Per_Sqft
0,800.0,3,2.0,Semi-Furnished,1.0,6500000,Ready_to_move,New_Property,Builder_Floor,
1,750.0,2,2.0,Semi-Furnished,1.0,5000000,Ready_to_move,New_Property,Apartment,6667.0
2,950.0,2,2.0,Furnished,1.0,15500000,Ready_to_move,Resale,Apartment,6667.0
3,600.0,2,2.0,Semi-Furnished,1.0,4200000,Ready_to_move,Resale,Builder_Floor,6667.0
4,650.0,2,2.0,Semi-Furnished,1.0,6200000,Ready_to_move,New_Property,Builder_Floor,6667.0


In [164]:
df = df.dropna()

In [165]:
df['Parking'].unique()

array([ 1.,  5.,  2.,  4.,  9.,  3., 10.])

In [166]:
ord_enc = OrdinalEncoder()

df["Furnishing"] = ord_enc.fit_transform(df[["Furnishing"]])
df["Transaction"] = ord_enc.fit_transform(df[["Transaction"]])
df["Type"] = ord_enc.fit_transform(df[["Type"]])


In [167]:
df.head()

Unnamed: 0,Area,BHK,Bathroom,Furnishing,Parking,Price,Status,Transaction,Type,Per_Sqft
1,750.0,2,2.0,1.0,1.0,5000000,Ready_to_move,0.0,0.0,6667.0
2,950.0,2,2.0,0.0,1.0,15500000,Ready_to_move,1.0,0.0,6667.0
3,600.0,2,2.0,1.0,1.0,4200000,Ready_to_move,1.0,1.0,6667.0
4,650.0,2,2.0,1.0,1.0,6200000,Ready_to_move,0.0,1.0,6667.0
5,1300.0,4,3.0,1.0,1.0,15500000,Ready_to_move,0.0,1.0,6667.0


In [168]:
y = df['Price']

In [169]:
df.drop(['Status','Price','Per_Sqft'],inplace=True,axis=1)

In [170]:
df.head()

Unnamed: 0,Area,BHK,Bathroom,Furnishing,Parking,Transaction,Type
1,750.0,2,2.0,1.0,1.0,0.0,0.0
2,950.0,2,2.0,0.0,1.0,1.0,0.0
3,600.0,2,2.0,1.0,1.0,1.0,1.0
4,650.0,2,2.0,1.0,1.0,0.0,1.0
5,1300.0,4,3.0,1.0,1.0,0.0,1.0


In [172]:
X = df

In [173]:
X = X.to_numpy()
y = y.to_numpy()

In [174]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05, random_state=42)

In [175]:
y_train.shape, y_test.shape

((954,), (51,))

In [176]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1005 entries, 1 to 1258
Data columns (total 7 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Area         1005 non-null   float64
 1   BHK          1005 non-null   int64  
 2   Bathroom     1005 non-null   float64
 3   Furnishing   1005 non-null   float64
 4   Parking      1005 non-null   float64
 5   Transaction  1005 non-null   float64
 6   Type         1005 non-null   float64
dtypes: float64(6), int64(1)
memory usage: 62.8 KB


## Scaling data

In [177]:
scaler = preprocessing.MinMaxScaler().fit(X_train)
X_train_scaled = scaler.transform(X_train)
scaler = preprocessing.MinMaxScaler().fit(X_test)
X_test_scaled = scaler.transform(X_test)

In [178]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

(954, 7)
(51, 7)
(954,)
(51,)


## Building the model

In [179]:
from sklearn import linear_model
lm = linear_model.LinearRegression()
lm.fit(X_train, y_train)


## Making the Prediction

In [180]:
y_pred = lm.predict(X_test)

## Calculating Accuracy and Errors

In [181]:
from sklearn import metrics
print('MAE:', metrics.mean_absolute_error(y_test, y_pred))  
print('MSE:', metrics.mean_squared_error(y_test, y_pred))  
print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
print('VarScore:',metrics.explained_variance_score(y_test,y_pred))

MAE: 11497260.242338276
MSE: 430838419869544.3
RMSE: 20756647.60671974
VarScore: 0.14096743773719345


## Obtaining Weights and Intercepts

In [182]:
lm.coef_

array([ 4.96481366e+03, -9.87760010e+04,  1.35124290e+07,  1.41005566e+06,
        4.22475548e+06, -3.62600660e+06, -7.37972534e+05])

In [183]:
lm.intercept_

-24853306.80254696