In [1]:
import pandas as pd

df = pd.read_csv('real_estate_dataset.csv')

In [2]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 12 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   ID                  500 non-null    int64  
 1   Square_Feet         500 non-null    float64
 2   Num_Bedrooms        500 non-null    int64  
 3   Num_Bathrooms       500 non-null    int64  
 4   Num_Floors          500 non-null    int64  
 5   Year_Built          500 non-null    int64  
 6   Has_Garden          500 non-null    int64  
 7   Has_Pool            500 non-null    int64  
 8   Garage_Size         500 non-null    int64  
 9   Location_Score      500 non-null    float64
 10  Distance_to_Center  500 non-null    float64
 11  Price               500 non-null    float64
dtypes: float64(4), int64(8)
memory usage: 47.0 KB


In [3]:
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.linear_model import LinearRegression, ElasticNet
from sklearn.pipeline import Pipeline
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.preprocessing import StandardScaler

In [4]:
X = df.drop('Price', axis=1)
y = df['Price']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

## <b>Linear Regression Model</b>

In [5]:
Linear_model = Pipeline(
    steps=[
        ('scaler_model', StandardScaler()),
        ('linear_reg', LinearRegression())
    ]
)

Linear_model.fit(X_train, y_train)

In [6]:
y_pred = Linear_model.predict(X_test)
print(f"Mean square error : {mean_squared_error(y_test, y_pred)}")
print(f"R2 square : {r2_score(y_test, y_pred)}")

Mean square error : 434103062.40461963
R2 square : 0.9698235434054303


## <b>ElasticNet Regression Model With Optimized Search Method (Randomizedsearch)</b>

In [7]:
elastic_model = Pipeline(
    steps=[
    ('elastic_scaler', StandardScaler()),
    ('Elastic_Net', ElasticNet())
])

In [8]:
import numpy as np

elastic_param = {
    'Elastic_Net__alpha': np.logspace(-5, 2, 8),  
    'Elastic_Net__l1_ratio': [0.0, 0.1, 0.25, 0.5, 0.75, 1.0],  
    'Elastic_Net__max_iter': [1000, 5000, 10000], 
    'Elastic_Net__tol': [1e-3, 1e-4, 1e-5],  
}

In [9]:
elastic_search = RandomizedSearchCV(
    estimator=elastic_model,
    param_distributions=elastic_param,
    cv=5,
    n_iter=20,
    verbose=1,
    random_state=42,
    n_jobs=-1,
    scoring='neg_mean_squared_error'
)

In [10]:
elastic_search.fit(X_train, y_train)

Fitting 5 folds for each of 20 candidates, totalling 100 fits


In [11]:
print(f"best elastic model parameters : {elastic_search.best_params_}")
print(f"best elastic model score : {elastic_search.best_score_}")

best elastic model parameters : {'Elastic_Net__tol': 0.0001, 'Elastic_Net__max_iter': 1000, 'Elastic_Net__l1_ratio': 0.75, 'Elastic_Net__alpha': np.float64(0.01)}
best elastic model score : -355764643.9081


In [12]:
best_elastic_model = elastic_search.best_estimator_
pred_y = best_elastic_model.predict(X_test)
print(f"Mean square error : {mean_squared_error(y_test, pred_y)}")
print(f"R2 square : {r2_score(y_test, pred_y)}")

Mean square error : 436464169.7687147
R2 square : 0.9696594122115773


In [None]:
import joblib

joblib.dump(Linear_model, 'Linear_Model.pkl')
joblib.dump(best_elastic_model, 'Elastic_Linear_Model.pkl')

['Elastic_Linear_Model.pkl']