In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
train_df = pd.read_csv('train.csv')
processed_df = pd.get_dummies(train_df, columns=["Lifestyle Activities"], drop_first=True)
labels = processed_df["Recovery Index"]
features = processed_df.drop("Recovery Index", axis=1)
features.drop("Id", axis=1, inplace=True)

In [6]:
from sklearn.ensemble import BaggingRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.linear_model import Ridge, Lasso, LinearRegression


In [7]:
y = processed_df['Recovery Index']
X = processed_df.drop('Recovery Index', axis=1)


In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=0.2,
                                                    random_state=42)

In [9]:
ridge_bagging_model = BaggingRegressor(estimator=Ridge(alpha=1.0), n_estimators=20, random_state=24, max_samples=0.8)
ridge_bagging_model.fit(X_train, y_train)

0,1,2
,estimator,Ridge()
,n_estimators,20
,max_samples,0.8
,max_features,1.0
,bootstrap,True
,bootstrap_features,False
,oob_score,False
,warm_start,False
,n_jobs,
,random_state,24

0,1,2
,alpha,1.0
,fit_intercept,True
,copy_X,True
,max_iter,
,tol,0.0001
,solver,'auto'
,positive,False
,random_state,


In [11]:
def calc_and_print_metrics(true_values, predictions, model_name):
    r2 = r2_score(true_values, predictions)
    mae = mean_absolute_error(true_values, predictions)
    mse = mean_squared_error(true_values, predictions)
    rmse = np.sqrt(mse)
    
    print(f"{model_name} Performance Metrics:")
    print(f"R² Score: {r2:.4f}")
    print(f"Mean Absolute Error (MAE): {mae:.4f}")
    print(f"Mean Squared Error (MSE): {mse:.4f}")
    print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
    print("-" * 40)

In [12]:
test_predictions_ridge_bagging = ridge_bagging_model.predict(X_test)
calc_and_print_metrics(y_test, test_predictions_ridge_bagging, "Ridge Bagging Regressor")

Ridge Bagging Regressor Performance Metrics:
R² Score: 0.9891
Mean Absolute Error (MAE): 1.5970
Mean Squared Error (MSE): 4.0265
Root Mean Squared Error (RMSE): 2.0066
----------------------------------------


In [13]:
best_rs = -1
best_mse = float('inf')
for rs in range(0, 101):
    ridge_bagging_model = BaggingRegressor(estimator=Ridge(alpha=1.0), n_estimators=200, random_state=rs, max_samples=0.8)
    ridge_bagging_model.fit(X_train, y_train)
    test_predictions = ridge_bagging_model.predict(X_test)

    # Check if the current model is the best one so far
    current_mse = mean_squared_error(y_test, test_predictions)
    if current_mse < best_mse:
        best_mse = current_mse
        best_rs = rs

print(f"Best Random State: {best_rs} with RMSE: {best_mse}")

Best Random State: 100 with RMSE: 4.027204329929526


In [14]:
ridge_bagging_model = BaggingRegressor(estimator=Ridge(alpha=1.0), n_estimators=100, random_state=11, max_samples=0.8)
ridge_bagging_model.fit(X_train, y_train)
test_predictions_ridge_bagging = ridge_bagging_model.predict(X_test)
calc_and_print_metrics(y_test, test_predictions_ridge_bagging, "Ridge Bagging Regressor with rs = 11")

Ridge Bagging Regressor with rs = 11 Performance Metrics:
R² Score: 0.9891
Mean Absolute Error (MAE): 1.5973
Mean Squared Error (MSE): 4.0269
Root Mean Squared Error (RMSE): 2.0067
----------------------------------------


In [15]:
def convert_into_csv(og_df, predictions, filename):
    submission_df = pd.DataFrame({'Id': og_df['Id'], 'Recovery Index': predictions})
    submission_df.to_csv(f"submission_{filename}.csv", index=False)

In [16]:
test_df = pd.read_csv('test.csv')
processed_test_df = pd.get_dummies(test_df, columns=["Lifestyle Activities"], drop_first=True)
test_predictions_final = ridge_bagging_model.predict(processed_test_df)
convert_into_csv(test_df, test_predictions_final, "ridge_bagging")


Bagging with Linear Regressor

In [17]:
linear_bagging_model = BaggingRegressor(estimator=LinearRegression(), n_estimators=20, random_state=42, max_samples=0.8)
linear_bagging_model.fit(X_train, y_train)

0,1,2
,estimator,LinearRegression()
,n_estimators,20
,max_samples,0.8
,max_features,1.0
,bootstrap,True
,bootstrap_features,False
,oob_score,False
,warm_start,False
,n_jobs,
,random_state,42

0,1,2
,fit_intercept,True
,copy_X,True
,tol,1e-06
,n_jobs,
,positive,False


In [18]:
test_prediction_lasso_bagging = linear_bagging_model.predict(X_test)
calc_and_print_metrics(y_test, test_prediction_lasso_bagging, "Linear Bagging Regressor")

Linear Bagging Regressor Performance Metrics:
R² Score: 0.9891
Mean Absolute Error (MAE): 1.5979
Mean Squared Error (MSE): 4.0290
Root Mean Squared Error (RMSE): 2.0072
----------------------------------------


Alpha hyperparameter tuning

In [19]:
alphas = [0.1, 0.5, 1.0, 5.0, 10.0]

for alpha in alphas:
    ridge_bagging_model = BaggingRegressor(estimator=Ridge(alpha=alpha), n_estimators=20, random_state=42, max_samples=0.8)
    ridge_bagging_model.fit(X_train, y_train)
    test_predictions = ridge_bagging_model.predict(X_test)
    calc_and_print_metrics(y_test, test_predictions, f"Ridge Bagging Regressor (alpha={alpha})")

Ridge Bagging Regressor (alpha=0.1) Performance Metrics:
R² Score: 0.9891
Mean Absolute Error (MAE): 1.5979
Mean Squared Error (MSE): 4.0290
Root Mean Squared Error (RMSE): 2.0072
----------------------------------------
Ridge Bagging Regressor (alpha=0.5) Performance Metrics:
R² Score: 0.9891
Mean Absolute Error (MAE): 1.5979
Mean Squared Error (MSE): 4.0290
Root Mean Squared Error (RMSE): 2.0072
----------------------------------------
Ridge Bagging Regressor (alpha=1.0) Performance Metrics:
R² Score: 0.9891
Mean Absolute Error (MAE): 1.5979
Mean Squared Error (MSE): 4.0290
Root Mean Squared Error (RMSE): 2.0072
----------------------------------------
Ridge Bagging Regressor (alpha=5.0) Performance Metrics:
R² Score: 0.9891
Mean Absolute Error (MAE): 1.5979
Mean Squared Error (MSE): 4.0290
Root Mean Squared Error (RMSE): 2.0072
----------------------------------------
Ridge Bagging Regressor (alpha=10.0) Performance Metrics:
R² Score: 0.9891
Mean Absolute Error (MAE): 1.5979
Mean Sq