In [9]:
# Import necessary libraries
from sklearn.linear_model import Ridge, LinearRegression, LogisticRegression
from sklearn.metrics import accuracy_score,log_loss
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import GridSearchCV,train_test_split ,StratifiedKFold,KFold
from sklearn.svm import SVC
from xgboost import XGBClassifier,XGBRFRegressor,XGBRegressor
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.tree import DecisionTreeClassifier,DecisionTreeRegressor,ExtraTreeClassifier
from sklearn.ensemble import VotingClassifier, BaggingClassifier,BaggingRegressor,RandomForestClassifier,AdaBoostClassifier,StackingClassifier,StackingRegressor
from sklearn.neighbors import KNeighborsClassifier,KNeighborsRegressor
from sklearn.svm import SVC
from sklearn.pipeline import *
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import warnings
import os

warnings.simplefilter('ignore')
os.chdir('D:\Datasets')

conc = pd.read_csv('Concrete_Data.csv')
conc

Unnamed: 0,Cement,Blast,Fly,Water,Superplasticizer,Coarse,Fine,Age,Strength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.30
...,...,...,...,...,...,...,...,...,...
1025,276.4,116.0,90.3,179.6,8.9,870.1,768.3,28,44.28
1026,322.2,0.0,115.6,196.0,10.4,817.9,813.4,28,31.18
1027,148.5,139.4,108.6,192.7,6.1,892.4,780.0,28,23.70
1028,159.1,186.7,0.0,175.6,11.3,989.6,788.9,28,32.77


In [3]:
x= conc.drop('Strength',axis=1)
y=conc['Strength']

In [21]:
# Create instances of the base regressors
lr = LinearRegression()
knn = KNeighborsRegressor()
xgbm = XGBRegressor(random_state=24)
dtc = DecisionTreeRegressor(random_state=24)

# Create the Stacking Regressor
# The base regressors are passed as a list of tuples, where the first element is the name of the regressor and the second is the regressor object
# The final_estimator is the XGBoost Regressor
# 'passthrough=True' means the input features will be passed to the final estimator
stack = StackingRegressor([('LR', lr), ('KNN', knn), ('TREE', dtc)], final_estimator=xgbm, passthrough=True)

In [22]:
print(stack.get_params())

{'cv': None, 'estimators': [('LR', LinearRegression()), ('KNN', KNeighborsRegressor()), ('TREE', DecisionTreeRegressor(random_state=24))], 'final_estimator__objective': 'reg:squarederror', 'final_estimator__base_score': None, 'final_estimator__booster': None, 'final_estimator__callbacks': None, 'final_estimator__colsample_bylevel': None, 'final_estimator__colsample_bynode': None, 'final_estimator__colsample_bytree': None, 'final_estimator__device': None, 'final_estimator__early_stopping_rounds': None, 'final_estimator__enable_categorical': False, 'final_estimator__eval_metric': None, 'final_estimator__feature_types': None, 'final_estimator__gamma': None, 'final_estimator__grow_policy': None, 'final_estimator__importance_type': None, 'final_estimator__interaction_constraints': None, 'final_estimator__learning_rate': None, 'final_estimator__max_bin': None, 'final_estimator__max_cat_threshold': None, 'final_estimator__max_cat_to_onehot': None, 'final_estimator__max_delta_step': None, 'fin

# Hyperparameter Tuning for Stacking Regressor

In [24]:
# Create a K-Fold cross-validation object
# This will split the data into 5 folds, without preserving the class distribution
# 'shuffle=True' ensures the data is shuffled before splitting
# 'random_state=24' sets the random seed for reproducibility
kfold = KFold(n_splits=5, shuffle=True, random_state=24)

# Define the hyperparameter grid to search over
# The parameters are specified for the final estimator (XGBoost Regressor) and the base regressors (KNN and Decision Tree)
params = {
    'final_estimator__learning_rate': [0.1, 0.3, 0.5],
    'final_estimator__max_depth': [1, 3, 5],
    'final_estimator__n_estimators': [25, 50],
    'KNN__n_neighbors': [3, 5, 9],
    'TREE__max_depth': [None, 3]
}

# Create a GridSearchCV object
# This will perform a grid search to find the best combination of hyperparameters
# 'cv=kfold' uses the K-Fold cross-validation object
# 'verbose=3' sets the verbosity level to 3, which will print progress updates
gcv = GridSearchCV(stack, param_grid=params, cv=kfold, verbose=3)

# Fit the GridSearchCV object to the data
# This will perform the grid search and find the best hyperparameters
gcv.fit(x, y)

# Print the best parameters found by the GridSearchCV
print("Best Parameters:", gcv.best_params_)

# Print the best score (i.e., the lowest mean squared error) found during the grid search
print("Best Score:", gcv.best_score_)

# Get the best model from the GridSearchCV object
best_model = gcv.best_estimator_

Fitting 5 folds for each of 108 candidates, totalling 540 fits
[CV 1/5] END KNN__n_neighbors=3, TREE__max_depth=None, final_estimator__learning_rate=0.1, final_estimator__max_depth=1, final_estimator__n_estimators=25;, score=0.742 total time=   0.0s
[CV 2/5] END KNN__n_neighbors=3, TREE__max_depth=None, final_estimator__learning_rate=0.1, final_estimator__max_depth=1, final_estimator__n_estimators=25;, score=0.722 total time=   0.0s
[CV 3/5] END KNN__n_neighbors=3, TREE__max_depth=None, final_estimator__learning_rate=0.1, final_estimator__max_depth=1, final_estimator__n_estimators=25;, score=0.711 total time=   0.0s
[CV 4/5] END KNN__n_neighbors=3, TREE__max_depth=None, final_estimator__learning_rate=0.1, final_estimator__max_depth=1, final_estimator__n_estimators=25;, score=0.664 total time=   0.0s
[CV 5/5] END KNN__n_neighbors=3, TREE__max_depth=None, final_estimator__learning_rate=0.1, final_estimator__max_depth=1, final_estimator__n_estimators=25;, score=0.734 total time=   0.0s
[C