In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.linear_model import ElasticNet
from sklearn.preprocessing import MinMaxScaler
from scipy.optimize import minimize

# Load the dataset
data = pd.read_csv("Boston House Prices Dataset.csv")

# Handling Outliers
def cap_outliers(df, column):
    Q1 = df[column].quantile(0.25)
    Q3 = df[column].quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    df[column] = np.where(df[column] > upper_bound, upper_bound, df[column])
    df[column] = np.where(df[column] < lower_bound, lower_bound, df[column])

for column in ['CRIM', 'ZN', 'INDUS', 'NOX', 'RM', 'AGE', 'DIS', 'RAD', 'TAX', 'PTRATIO', 'B', 'LSTAT', 'MEDV']:
    cap_outliers(data, column)

# Feature Scaling - Normalization:
X = data.drop(columns=['MEDV'])
y = data['MEDV']
scaler = MinMaxScaler()
X_normalized = scaler.fit_transform(X)
data_normalized = pd.DataFrame(X_normalized, columns=X.columns)
data_normalized['MEDV'] = y

# Feature Selection:
selected_features = ['RM', 'PTRATIO', 'LSTAT', 'NOX', 'CRIM', 'TAX', 'INDUS']
X = data[selected_features]
y = data['MEDV']

# Split the Dataset into Training and Test Sets:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=52)

# Define Base Models for Weighted Ensemble:
rf_model = RandomForestRegressor(n_estimators=300, max_depth=15, min_samples_split=3, min_samples_leaf=2, max_features='sqrt', bootstrap=True, random_state=52)

xgb_model = XGBRegressor(n_estimators=300, learning_rate=0.03, max_depth=6, subsample=0.9, colsample_bytree=0.85, random_state=52)

elastic_net_model = ElasticNet(alpha=0.1, l1_ratio=0.5, random_state=52)

# Train base models
rf_model.fit(X_train, y_train)
xgb_model.fit(X_train, y_train)
elastic_net_model.fit(X_train, y_train)

# Get predictions from the base models:
rf_preds_test = rf_model.predict(X_test)
xgb_preds_test = xgb_model.predict(X_test)
elastic_net_preds_test = elastic_net_model.predict(X_test)
predictions = np.column_stack((rf_preds_test, xgb_preds_test, elastic_net_preds_test))

# Function to optimize weights
def loss_function(weights):
    weighted_pred = np.dot(predictions, weights)
    return mean_squared_error(y_test, weighted_pred)

# Initial weights
def constraint(weights):
    return np.sum(weights) - 1

initial_weights = [1/3, 1/3, 1/3]
bounds = [(0,1)] * 3
constraints = ({'type': 'eq', 'fun': constraint})

# Optimize weights
result = minimize(loss_function, initial_weights, bounds=bounds, constraints=constraints)
best_weights = result.x

# Final weighted prediction
final_preds = np.dot(predictions, best_weights)

# Evaluate performance
mae = mean_absolute_error(y_test, final_preds)
mse = mean_squared_error(y_test, final_preds)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, final_preds)

print(f"Optimized Weights: {best_weights}")
print(f"Mean Absolute Error (MAE): {mae}")
print(f"Mean Squared Error (MSE): {mse}")
print(f"Root Mean Squared Error (RMSE): {rmse}")
print(f"R-Squared (R²): {r2}")


Optimized Weights: [7.77156117e-16 1.00000000e+00 5.82867088e-15]
Mean Absolute Error (MAE): 1.765175806307326
Mean Squared Error (MSE): 6.823825559875199
Root Mean Squared Error (RMSE): 2.612245310049422
R-Squared (R²): 0.9021308987360973
