Step 1: Import Required Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import OrdinalEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error


Step 2: Load Dataset

In [2]:
# Replace with your dataset path
dataset_path = './Housing.csv'
df = pd.read_csv(dataset_path)


Step 3: Preprocess Categorical Data

In [3]:
# Identify categorical columns
categorical_cols = df.select_dtypes(include=['object']).columns.to_list()
# Encode categorical columns
ordinal_encoder = OrdinalEncoder()
encoded_categorical_cols = ordinal_encoder.fit_transform(df[categorical_cols])
encoded_categorical_df = pd.DataFrame(encoded_categorical_cols, columns=categorical_cols)
# Merge encoded categorical data with numerical data
numerical_df = df.drop(categorical_cols, axis=1)
encoded_df = pd.concat([numerical_df, encoded_categorical_df], axis=1)


Step 4: Normalize the Dataset

In [4]:
normalizer = StandardScaler()
dataset_arr = normalizer.fit_transform(encoded_df)


Step 5: Split Data into Features (X) and Target (y)

In [5]:
X, y = dataset_arr[:, 1:], dataset_arr[:, 0]


Step 6: Split Data into Train and Validation Sets

In [9]:
test_size = 0.3
random_state = 1
is_shuffle = True
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=test_size, random_state=random_state, shuffle=is_shuffle)


Step 7: Train and Evaluate Models

Decision Tree:

In [10]:
# Train Decision Tree Regressor
dt_regressor = DecisionTreeRegressor(random_state=random_state)
dt_regressor.fit(X_train, y_train)

# Predict and evaluate
y_pred_dt = dt_regressor.predict(X_val)
mae_dt = mean_absolute_error(y_val, y_pred_dt)
mse_dt = mean_squared_error(y_val, y_pred_dt)
print(f'Decision Tree - MAE: {mae_dt}, MSE: {mse_dt}')


Decision Tree - MAE: 0.594233095728814, MSE: 0.7245255619360014


Random Forest:

In [11]:
# Train Random Forest Regressor
rf_regressor = RandomForestRegressor(random_state=random_state)
rf_regressor.fit(X_train, y_train)

# Predict and evaluate
y_pred_rf = rf_regressor.predict(X_val)
mae_rf = mean_absolute_error(y_val, y_pred_rf)
mse_rf = mean_squared_error(y_val, y_pred_rf)
print(f'Random Forest - MAE: {mae_rf}, MSE: {mse_rf}')


Random Forest - MAE: 0.46093873321571177, MSE: 0.37944418523089524


AdaBoost

In [12]:
from sklearn.ensemble import AdaBoostRegressor

# Train AdaBoost Regressor
ada_regressor = AdaBoostRegressor(random_state=random_state)
ada_regressor.fit(X_train, y_train)

# Predict and evaluate
y_pred_ada = ada_regressor.predict(X_val)
mae_ada = mean_absolute_error(y_val, y_pred_ada)
mse_ada = mean_squared_error(y_val, y_pred_ada)
print(f'AdaBoost - MAE: {mae_ada}, MSE: {mse_ada}')


AdaBoost - MAE: 0.567680019897059, MSE: 0.5739244030038942


Gradient Boosting

In [13]:
from sklearn.ensemble import GradientBoostingRegressor

# Train Gradient Boosting Regressor
gb_regressor = GradientBoostingRegressor(random_state=random_state)
gb_regressor.fit(X_train, y_train)

# Predict and evaluate
y_pred_gb = gb_regressor.predict(X_val)
mae_gb = mean_absolute_error(y_val, y_pred_gb)
mse_gb = mean_squared_error(y_val, y_pred_gb)
print(f'Gradient Boosting - MAE: {mae_gb}, MSE: {mse_gb}')


Gradient Boosting - MAE: 0.4516626127750995, MSE: 0.39610445936979427


Step 9: Evaluation Model