In [266]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [267]:
import warnings
warnings.filterwarnings("ignore")

In [268]:
df = pd.read_csv("./House_Rent_Dataset.csv")

In [269]:
df.head()

Unnamed: 0,Posted On,BHK,Rent,Size,Floor,Area Type,Area Locality,City,Furnishing Status,Tenant Preferred,Bathroom,Point of Contact
0,5/18/2022,2,10000.0,1100.0,Ground out of 2,Super Area,Bandel,Kolkata,Unfurnished,Bachelors/Family,2.0,Contact Owner
1,5/13/2022,2,20000.0,,1 out of 3,Super Area,"Phool Bagan, Kankurgachi",Kolkata,Semi-Furnished,Bachelors/Family,1.0,Contact Owner
2,5/16/2022,2,17000.0,1000.0,1 out of 3,Super Area,Salt Lake City Sector 2,Kolkata,Semi-Furnished,Bachelors/Family,1.0,Contact Owner
3,7/4/2022,2,10000.0,,1 out of 2,Super Area,Dumdum Park,Kolkata,Unfurnished,Bachelors/Family,1.0,Contact Owner
4,5/9/2022,2,7500.0,850.0,1 out of 2,Carpet Area,South Dum Dum,Kolkata,Unfurnished,Bachelors,1.0,Contact Owner


In [270]:
df.drop(columns=["Posted On", "Area Locality"], inplace=True)

In [271]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4746 entries, 0 to 4745
Data columns (total 10 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   BHK                4746 non-null   int64  
 1   Rent               4741 non-null   float64
 2   Size               4738 non-null   float64
 3   Floor              4746 non-null   object 
 4   Area Type          4739 non-null   object 
 5   City               4746 non-null   object 
 6   Furnishing Status  4746 non-null   object 
 7   Tenant Preferred   4746 non-null   object 
 8   Bathroom           4742 non-null   float64
 9   Point of Contact   4746 non-null   object 
dtypes: float64(3), int64(1), object(6)
memory usage: 370.9+ KB


In [272]:
df.describe()

Unnamed: 0,BHK,Rent,Size,Bathroom
count,4746.0,4741.0,4738.0,4742.0
mean,2.08386,35023.4,967.936049,1.965837
std,0.832256,78142.14,634.562635,0.884904
min,1.0,1200.0,10.0,1.0
25%,2.0,10000.0,550.0,1.0
50%,2.0,16000.0,850.0,2.0
75%,3.0,33000.0,1200.0,2.0
max,6.0,3500000.0,8000.0,10.0


In [273]:
df.isnull().sum()

BHK                  0
Rent                 5
Size                 8
Floor                0
Area Type            7
City                 0
Furnishing Status    0
Tenant Preferred     0
Bathroom             4
Point of Contact     0
dtype: int64

In [274]:
df.dropna(inplace=True)

In [275]:
print("Number of duplicate rows:", df.duplicated().sum())
df.drop_duplicates(inplace=True)

Number of duplicate rows: 40


In [276]:
print("Number of null values in each column:", df.isnull().sum().sum())
print("Number of duplicate rows:", df.duplicated().sum())

Number of null values in each column: 0
Number of duplicate rows: 0


In [277]:
num_cols = df.select_dtypes(exclude=['object']).columns.tolist()
print("Length of numerical columns:", len(num_cols))
print("Numerical columns:", num_cols)

Length of numerical columns: 4
Numerical columns: ['BHK', 'Rent', 'Size', 'Bathroom']


In [278]:
def cap_outliers_iqr(df, cols):
    df_capped = df.copy()
    for col in cols:
        Q1 = df_capped[col].quantile(0.25)
        Q3 = df_capped[col].quantile(0.75)
        IQR = Q3 - Q1
        lower_bound = Q1 - 1.5 * IQR
        upper_bound = Q3 + 1.5 * IQR
        df_capped[col] = df_capped[col].apply(
            lambda x: lower_bound if x < lower_bound else upper_bound if x > upper_bound else x
        )
    return df_capped

In [279]:
df = cap_outliers_iqr(df, num_cols)

print(df.describe())

               BHK          Rent         Size     Bathroom
count  4686.000000   4686.000000  4686.000000  4686.000000
mean      2.082053  24674.311353   933.925416     1.924242
std       0.816087  19949.151318   508.414594     0.756418
min       1.000000   1200.000000    10.000000     1.000000
25%       2.000000  10000.000000   555.750000     1.000000
50%       2.000000  16000.000000   850.000000     2.000000
75%       3.000000  33000.000000  1200.000000     2.000000
max       4.500000  67500.000000  2166.375000     3.500000


In [280]:
cat_cols = df.select_dtypes(include=['object']).columns.tolist()
print("Length of categorical columns:", len(cat_cols))
print("Categorical columns:", cat_cols)

Length of categorical columns: 6
Categorical columns: ['Floor', 'Area Type', 'City', 'Furnishing Status', 'Tenant Preferred', 'Point of Contact']


In [281]:
for col in cat_cols[:3]:
  print(df[col].value_counts())

Floor
1 out of 2         370
Ground out of 2    336
2 out of 3         310
2 out of 4         302
1 out of 3         289
                  ... 
1 out of 11          1
6 out of 29          1
28 out of 31         1
1 out of 15          1
2 out of 11          1
Name: count, Length: 480, dtype: int64
Area Type
Super Area     2414
Carpet Area    2270
Built Area        2
Name: count, dtype: int64
City
Mumbai       968
Chennai      880
Bangalore    874
Hyderabad    864
Delhi        599
Kolkata      501
Name: count, dtype: int64


In [282]:
for col in cat_cols[3:6]:
  print(df[col].value_counts())

Furnishing Status
Semi-Furnished    2228
Unfurnished       1786
Furnished          672
Name: count, dtype: int64
Tenant Preferred
Bachelors/Family    3397
Bachelors            819
Family               470
Name: count, dtype: int64
Point of Contact
Contact Owner      3173
Contact Agent      1512
Contact Builder       1
Name: count, dtype: int64


In [283]:
# Split and clean both parts
split_floor = df["Floor"].str.split("out of", expand=True)

# Strip whitespace from both columns
split_floor[0] = split_floor[0].fillna("").str.strip()
split_floor[1] = split_floor[1].fillna("").str.strip()

In [284]:
split_floor[0] = split_floor[0].replace({
    "Ground": "0",
    "Upper Basement": "-1",
    "Basement": "-2",
    "Lower Basement": "-3"
})

In [285]:
df["Current Floor"] = pd.to_numeric(split_floor[0], errors="coerce").astype("Int64")
df["Total Floors"] = pd.to_numeric(split_floor[1], errors="coerce").astype("Int64")

In [286]:
df.dropna(inplace=True)
df.drop(columns=["Floor"], inplace=True)

In [287]:
cat_cols = df.select_dtypes(include=['object']).columns.tolist()
print("Length of categorical columns:", len(cat_cols))
print("Categorical columns:", cat_cols)

Length of categorical columns: 5
Categorical columns: ['Area Type', 'City', 'Furnishing Status', 'Tenant Preferred', 'Point of Contact']


In [288]:
df_encoded = pd.get_dummies(df, columns=cat_cols, drop_first=True)

In [289]:
df_encoded.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4682 entries, 0 to 4745
Data columns (total 19 columns):
 #   Column                             Non-Null Count  Dtype  
---  ------                             --------------  -----  
 0   BHK                                4682 non-null   float64
 1   Rent                               4682 non-null   float64
 2   Size                               4682 non-null   float64
 3   Bathroom                           4682 non-null   float64
 4   Current Floor                      4682 non-null   Int64  
 5   Total Floors                       4682 non-null   Int64  
 6   Area Type_Carpet Area              4682 non-null   bool   
 7   Area Type_Super Area               4682 non-null   bool   
 8   City_Chennai                       4682 non-null   bool   
 9   City_Delhi                         4682 non-null   bool   
 10  City_Hyderabad                     4682 non-null   bool   
 11  City_Kolkata                       4682 non-null   bool   
 1

In [290]:
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.preprocessing import StandardScaler, RobustScaler, MinMaxScaler
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, BaggingRegressor, GradientBoostingRegressor, VotingRegressor, StackingRegressor
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.base import clone

In [291]:
df_encoded.head()

Unnamed: 0,BHK,Rent,Size,Bathroom,Current Floor,Total Floors,Area Type_Carpet Area,Area Type_Super Area,City_Chennai,City_Delhi,City_Hyderabad,City_Kolkata,City_Mumbai,Furnishing Status_Semi-Furnished,Furnishing Status_Unfurnished,Tenant Preferred_Bachelors/Family,Tenant Preferred_Family,Point of Contact_Contact Builder,Point of Contact_Contact Owner
0,2.0,10000.0,1100.0,2.0,0,2,False,True,False,False,False,True,False,False,True,True,False,False,True
2,2.0,17000.0,1000.0,1.0,1,3,False,True,False,False,False,True,False,True,False,True,False,False,True
4,2.0,7500.0,850.0,1.0,1,2,True,False,False,False,False,True,False,False,True,False,False,False,True
5,2.0,7000.0,600.0,2.0,0,1,False,True,False,False,False,True,False,False,True,True,False,False,True
6,2.0,10000.0,700.0,2.0,0,4,False,True,False,False,False,True,False,False,True,False,False,False,False


In [292]:
scaler = StandardScaler()
numerical_cols = ["BHK", "Size", "Bathroom", "Current Floor", "Total Floors"]
df_encoded[numerical_cols] = scaler.fit_transform(df_encoded[numerical_cols])

In [293]:
X = df_encoded.drop("Rent", axis=1)
y = df_encoded["Rent"]

In [294]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [295]:
model_results = {}

In [296]:
# Linear Regression
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)

model_results["LinearRegression"] = {"R2": r2, "MSE": mse}
print(f"🔹 Linear Regression\nR2 Score: {r2:.4f}\nMSE: {mse:.2f}")

🔹 Linear Regression
R2 Score: 0.7861
MSE: 93239157.82


In [297]:
y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)

print("Train R2 Score:", r2_score(y_train, y_train_pred))
print("Test R2 Score:", r2_score(y_test, y_test_pred))

Train R2 Score: 0.7796790549441979
Test R2 Score: 0.7860553276097247


In [298]:
# SVR Regression
param_grid = {
    "C": [1, 10],
    "epsilon": [0.1, 0.2],
    "kernel": ["rbf"]
}

grid = GridSearchCV(SVR(), param_grid, cv=3, scoring="r2", n_jobs=-1)
grid.fit(X_train, y_train)
y_pred = grid.predict(X_test)

r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)

model_results["SVR"] = {"R2": r2, "MSE": mse}
print(f"🔹 SVR\nBest Params: {grid.best_params_}\nR2 Score: {r2:.4f}\nMSE: {mse:.2f}")

🔹 SVR
Best Params: {'C': 10, 'epsilon': 0.2, 'kernel': 'rbf'}
R2 Score: -0.0126
MSE: 441315814.10


In [299]:
y_train_pred = grid.predict(X_train)
y_test_pred = grid.predict(X_test)

print("Train R2 Score:", r2_score(y_train, y_train_pred))
print("Test R2 Score:", r2_score(y_test, y_test_pred))

Train R2 Score: 0.002895586636805225
Test R2 Score: -0.012634278093251794


In [300]:
# KNN Regression
param_grid = {
    "n_neighbors": [3, 5, 7],
    "weights": ["uniform", "distance"]
}

grid = GridSearchCV(KNeighborsRegressor(), param_grid, cv=3, scoring="r2", n_jobs=-1)
grid.fit(X_train, y_train)
y_pred = grid.predict(X_test)

r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)

model_results["KNN"] = {"R2": r2, "MSE": mse}
print(f"🔹 KNN\nBest Params: {grid.best_params_}\nR2 Score: {r2:.4f}\nMSE: {mse:.2f}")

🔹 KNN
Best Params: {'n_neighbors': 7, 'weights': 'distance'}
R2 Score: 0.8126
MSE: 81670731.36


In [301]:
y_train_pred = grid.predict(X_train)
y_test_pred = grid.predict(X_test)

print("Train R2 Score:", r2_score(y_train, y_train_pred))
print("Test R2 Score:", r2_score(y_test, y_test_pred))

Train R2 Score: 0.9978941712537969
Test R2 Score: 0.8126000033332346


In [302]:
# Decision Tree Regression
param_grid = {
    "max_depth": [5, 10, None],
    "min_samples_split": [2, 5]
}

grid = GridSearchCV(DecisionTreeRegressor(), param_grid, cv=3, scoring="r2", n_jobs=-1)
grid.fit(X_train, y_train)
y_pred = grid.predict(X_test)

r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)

model_results["DecisionTree"] = {"R2": r2, "MSE": mse}
print(f"🔹 Decision Tree\nBest Params: {grid.best_params_}\nR2 Score: {r2:.4f}\nMSE: {mse:.2f}")

🔹 Decision Tree
Best Params: {'max_depth': 5, 'min_samples_split': 5}
R2 Score: 0.8216
MSE: 77736125.25


In [303]:
y_train_pred = grid.predict(X_train)
y_test_pred = grid.predict(X_test)

print("Train R2 Score:", r2_score(y_train, y_train_pred))
print("Test R2 Score:", r2_score(y_test, y_test_pred))

Train R2 Score: 0.8195783261284956
Test R2 Score: 0.8216282703730831


In [304]:
# Random Forest Regression
param_grid = {
    "n_estimators": [50, 100],
    "max_depth": [None, 10],
    "min_samples_split": [2, 5]
}

grid = GridSearchCV(RandomForestRegressor(), param_grid, cv=3, scoring="r2", n_jobs=-1)
grid.fit(X_train, y_train)
y_pred = grid.predict(X_test)

r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)

model_results["RandomForest"] = {"R2": r2, "MSE": mse}
print(f"🔹 Random Forest\nBest Params: {grid.best_params_}\nR2 Score: {r2:.4f}\nMSE: {mse:.2f}")

🔹 Random Forest
Best Params: {'max_depth': 10, 'min_samples_split': 5, 'n_estimators': 50}
R2 Score: 0.8488
MSE: 65872879.81


In [305]:
y_train_pred = grid.predict(X_train)
y_test_pred = grid.predict(X_test)

print("Train R2 Score:", r2_score(y_train, y_train_pred))
print("Test R2 Score:", r2_score(y_test, y_test_pred))

Train R2 Score: 0.9226349680133248
Test R2 Score: 0.8488494317186301


In [306]:
print("🔸 Compare all models:")
results_df = pd.DataFrame(model_results).T
print(results_df)

🔸 Compare all models:
                        R2           MSE
LinearRegression  0.786055  9.323916e+07
SVR              -0.012634  4.413158e+08
KNN               0.812600  8.167073e+07
DecisionTree      0.821628  7.773613e+07
RandomForest      0.848849  6.587288e+07


In [307]:
def regression_analysis(X_train, y_train, X_test, y_test):
    # Define scalers
    scalers = {
        "StandardScaler": StandardScaler(),
        "MinMaxScaler": MinMaxScaler(),
        "RobustScaler": RobustScaler()
    }

    # Define models
    base_models = {
        "LinearRegression": LinearRegression(),
        "SVM Regression": SVR(),
        "KNN Regression": KNeighborsRegressor(),
        "DecisionTree": DecisionTreeRegressor(),
        "RandomForest": RandomForestRegressor(),
    }

    # Ensemble models
    ensemble_models = {
        "Voting": VotingRegressor([("lr", LinearRegression()), ("dt", DecisionTreeRegressor()), ("rf", RandomForestRegressor())]),
        "Bagging": BaggingRegressor(RandomForestRegressor(), n_estimators=50),
        "Boosting": GradientBoostingRegressor(n_estimators=100),
        "Stacking": StackingRegressor(estimators=[
            ("lr", LinearRegression()),
            ("svm", SVR()),
            ("rf", RandomForestRegressor())
        ], final_estimator=LinearRegression()),
    }

    models_with_params = {**base_models, **ensemble_models}

    param_grid_all = {
      "SVM Regression": {
          "model__C": [1, 10],
          "model__kernel": ["rbf"],
          "model__epsilon": [0.05, 0.1]
      },

      "KNN Regression": {
          "model__n_neighbors": [7, 9],
          "model__weights": ["distance"],
          "model__p": [2]
      },

      "DecisionTree": {
          "model__max_depth": [3, 5],
          "model__min_samples_split": [5, 10],
          "model__min_samples_leaf": [3, 5]
      },

      "RandomForest": {
          "model__n_estimators": [100],
          "model__max_depth": [6],
          "model__min_samples_split": [5],
          "model__min_samples_leaf": [2]
      },

      "Bagging": {
          "model__n_estimators": [100],
          "model__max_samples": [0.8],
          "model__max_features": [0.8]
      },

      "Boosting": {
          "model__n_estimators": [100],
          "model__learning_rate": [0.05],
          "model__max_depth": [3]
      }
  }


    feature_selector = SelectKBest(score_func=f_regression, k='all')
    models_need_y_scaling = ["SVM Regression", "KNN Regression"]

    results = []

    for scaler_name, scaler in scalers.items():
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)

        for model_name, model in models_with_params.items():
            pipe = Pipeline([
                ('select', feature_selector),
                ('model', model)
            ])

            param_grid = param_grid_all.get(model_name, {})
            grid = GridSearchCV(pipe, param_grid=param_grid, cv=5, scoring='r2', n_jobs=-1)

            use_y_scaling = model_name in models_need_y_scaling
            y_scaler = StandardScaler()

            if use_y_scaling:
                y_train_np = y_train.to_numpy().reshape(-1, 1)
                y_train_scaled = y_scaler.fit_transform(y_train_np).ravel()

                grid.fit(X_train_scaled, y_train_scaled)

                y_pred_scaled = grid.predict(X_test_scaled)
                y_pred = y_scaler.inverse_transform(np.array(y_pred_scaled).reshape(-1, 1)).ravel()

                y_train_pred_scaled = grid.predict(X_train_scaled)
                y_train_pred = y_scaler.inverse_transform(np.array(y_train_pred_scaled).reshape(-1, 1)).ravel()
            else:
                grid.fit(X_train_scaled, y_train)
                y_pred = grid.predict(X_test_scaled)
                y_train_pred = grid.predict(X_train_scaled)

            results.append({
                "Scaler": scaler_name,
                "Model": model_name,
                "Best Params": grid.best_params_,
                "R2 Score": r2_score(y_test, y_pred),
                "MSE": mean_squared_error(y_test, y_pred),
                "MAE": mean_absolute_error(y_test, y_pred),
                "Train Score": r2_score(y_train, y_train_pred),
                "Test Score": r2_score(y_test, y_pred)
            })

    # Create DataFrame
    df_results = pd.DataFrame(results)
    df_results_sorted = df_results.sort_values(by="R2 Score", ascending=False)
    df_results_sorted["R2 Score"] = df_results_sorted["R2 Score"].apply(lambda x: round(x, 4))
    df_results_sorted["MSE"] = df_results_sorted["MSE"].apply(lambda x: f"{x:.2e}")
    df_results_sorted["MAE"] = df_results_sorted["MAE"].apply(lambda x: f"{x:,.2f}")
    df_display = df_results_sorted[["Model", "Scaler", "R2 Score", "MSE", "MAE", "Best Params"]]

    print("\n📊 🔝 Top Results by R² Score:\n")
    print(df_display.to_string(index=False))

    # Average performance per model
    df_avg = df_results.copy()
    model_avg = df_avg.groupby("Model")[["R2 Score", "MSE", "MAE"]].mean().reset_index()
    model_avg = model_avg.sort_values(by="R2 Score", ascending=False)
    print("\n📌 Average Performance per Model:\n")
    print(model_avg.to_string(index=False, formatters={
        "R2 Score": "{:.4f}".format,
        "MSE": "{:.2e}".format,
        "MAE": "{:,.2f}".format
    }))

    # Average performance per scaler
    scaler_avg = df_avg.groupby("Scaler")[["R2 Score", "MSE", "MAE"]].mean().reset_index()
    scaler_avg = scaler_avg.sort_values(by="R2 Score", ascending=False)
    print("\n📌 Average Performance per Scaler:\n")
    print(scaler_avg.to_string(index=False, formatters={
        "R2 Score": "{:.4f}".format,
        "MSE": "{:.2e}".format,
        "MAE": "{:,.2f}".format
    }))

    # Scaler insights
    for scaler in df_avg["Scaler"].unique():
        subset = df_avg[df_avg["Scaler"] == scaler]
        print(f"\n📊 Scaler Results: {scaler}\n")
        print("🔺 Highest R²:")
        print(subset.loc[subset["R2 Score"].idxmax()][["Model", "R2 Score", "MSE", "MAE"]].to_string())
        print("\n🔻 Lowest R²:")
        print(subset.loc[subset["R2 Score"].idxmin()][["Model", "R2 Score", "MSE", "MAE"]].to_string())
        print("\n🔻 Lowest MSE:")
        print(subset.loc[subset["MSE"].idxmin()][["Model", "R2 Score", "MSE", "MAE"]].to_string())
        print("\n🔻 Lowest MAE:")
        print(subset.loc[subset["MAE"].idxmin()][["Model", "R2 Score", "MSE", "MAE"]].to_string())

    # Train vs Test R² scores
    print("\n🧪 Train vs Test R² Score Comparison:\n")
    for scaler_name, scaler in scalers.items():
        print(f"\n🚀 Scaler: {scaler_name}")
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)
        for model_name, model in models_with_params.items():
            model_clone = clone(model)
            use_y_scaling = model_name in models_need_y_scaling
            if use_y_scaling:
                y_train_scaled = y_scaler.fit_transform(y_train.to_numpy().reshape(-1, 1)).ravel()
                model_clone.fit(X_train_scaled, y_train_scaled)
                train_score = r2_score(y_train, y_scaler.inverse_transform(model_clone.predict(X_train_scaled).reshape(-1, 1)).ravel())
                test_score = r2_score(y_test, y_scaler.inverse_transform(model_clone.predict(X_test_scaled).reshape(-1, 1)).ravel())
            else:
                model_clone.fit(X_train_scaled, y_train)
                train_score = model_clone.score(X_train_scaled, y_train)
                test_score = model_clone.score(X_test_scaled, y_test)
            print(f"🔹 {model_name:18} | Train R²: {train_score:.4f} | Test R²: {test_score:.4f}")

    # Best result per model
    best_per_model = df_results.loc[df_results.groupby("Model")["R2 Score"].idxmax()]
    best_per_model = best_per_model.sort_values(by="R2 Score", ascending=False)
    print("📈 Best result for each model:\n")
    for _, row in best_per_model.iterrows():
        print(f"🧠 Model        : {row['Model']}")
        print(f"   🔧 Scaler    : {row['Scaler']}")
        print(f"   🎯 R2 Score  : {row['R2 Score']:.4f}")
        print(f"   🧪 MSE       : {row['MSE']:.2f}")
        print(f"   📉 MAE       : {row['MAE']:.2f}")
        print(f"   ⚙️ Best Params: {row['Best Params']}\n")

    return df_results_sorted

In [308]:
df_results = regression_analysis(X_train, y_train, X_test, y_test)


📊 🔝 Top Results by R² Score:

           Model         Scaler  R2 Score      MSE      MAE                                                                                                      Best Params
         Bagging StandardScaler    0.8507 6.51e+07 5,438.19                              {'model__max_features': 0.8, 'model__max_samples': 0.8, 'model__n_estimators': 100}
  SVM Regression StandardScaler    0.8495 6.56e+07 5,216.76                                                   {'model__C': 1, 'model__epsilon': 0.1, 'model__kernel': 'rbf'}
        Stacking   RobustScaler    0.8493 6.57e+07 5,427.29                                                                                                               {}
         Bagging   RobustScaler    0.8489 6.58e+07 5,477.61                              {'model__max_features': 0.8, 'model__max_samples': 0.8, 'model__n_estimators': 100}
         Bagging   MinMaxScaler    0.8481 6.62e+07 5,499.52                              {'model__max_fe