**Stacking Ensamble Learning**

In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn import datasets
from sklearn.model_selection import train_test_split

In [2]:
#individual learners of the model
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
#1) Logistic Regression
model1 = LogisticRegression()
#2) Naive Bayes
model2 = GaussianNB()
#3) Decision Tree
model3 = DecisionTreeClassifier()

In [3]:
#Meta Learner (the blender)
metaLearner = LogisticRegression()

In [4]:
df = datasets.load_breast_cancer()

In [5]:
X = df.data
y = df.target

In [6]:
#Splitting Dataset into Train and Test
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size = 0.25, random_state=1)

#Splitting Train Dataset into training and holdout 
xtraining,x_holdout,ytraining,y_holdout = train_test_split(X_train,y_train,test_size=0.5)

In [7]:
#Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
xtraining = sc.fit_transform(xtraining)
X_test = sc.transform(X_test)
x_holdout = sc.transform(x_holdout)

In [8]:
#training the initial learners
model1.fit(xtraining,ytraining)
model2.fit(xtraining,ytraining)
model3.fit(xtraining,ytraining)

DecisionTreeClassifier()

In [9]:
#making predictions for the validation data
train_preds1 = model1.predict(x_holdout)
train_preds2 = model2.predict(x_holdout)
train_preds3 = model3.predict(x_holdout)

#making predictions for the test data
test_preds1 = model1.predict(X_test)
test_preds2 = model2.predict(X_test)
test_preds3 = model3.predict(X_test)

In [10]:
#making a new dataset for training our final model by stacking the predictions on the validation data
train_stack = np.column_stack((train_preds1,train_preds2,train_preds3))

#making the final test set for our final model by stacking the predictions on the test data
test_stack = np.column_stack((test_preds1,test_preds2,test_preds3))



In [11]:
metaLearner.fit(train_stack, y_holdout)

LogisticRegression()

In [12]:
final_predictions = metaLearner.predict(test_stack)

In [13]:
print(accuracy_score(y_test, final_predictions))

0.958041958041958


**Bagging and RandomForest**

In [14]:
#import libraries
from sklearn.ensemble import BaggingClassifier

In [15]:
#Load Dataset
df1 = datasets.load_breast_cancer()

In [16]:
X1 = df1.data
y1 = df1.target

In [17]:
#Splitting Dataset into Train and Test
X_train1, X_test1, y_train1, y_test1 = train_test_split( X1, y1, test_size = 0.20)

In [18]:
model = BaggingClassifier(DecisionTreeClassifier())
model.fit(X_train1, y_train1)

BaggingClassifier(base_estimator=DecisionTreeClassifier())

In [19]:
model.score(X_test1, y_test1)

0.956140350877193

**AdaBoost**

In [20]:
from sklearn.ensemble import AdaBoostClassifier

In [21]:
#Load Dataset
df2 = datasets.load_breast_cancer()

In [22]:
X2 = df2.data
y2 = df2.target

In [23]:
#Splitting Dataset into Train and Test
X_train2, X_test2, y_train2, y_test2 = train_test_split( X2, y2, test_size = 0.20)

In [24]:
adaboost = BaggingClassifier(DecisionTreeClassifier())
adaboost.fit(X_train2, y_train2)

BaggingClassifier(base_estimator=DecisionTreeClassifier())

In [25]:
adaboost.score(X_test2, y_test2)

0.9298245614035088

**Q4 Adaboost regressor on Diabetes Dataset. What is the MSE loss value of the trained model?**

In [26]:
from sklearn.ensemble import AdaBoostRegressor
from sklearn.metrics import mean_squared_error

In [27]:
df3 = datasets.load_diabetes()

In [28]:
X3 = df3.data
y3 = df3.target

In [29]:
X_train3, X_test3, y_train3, y_test3 = train_test_split( X3, y3, test_size = 0.20)

In [30]:
clf = AdaBoostRegressor().fit(X_train3, y_train3)
y_pred = clf.predict(X_test3)

In [31]:
print(clf.score(X_test3, y_test3))

0.4670943147399125


In [32]:
#MSE LOSS
from sklearn.metrics import mean_squared_error
mean_squared_error(y_test3,y_pred)

3354.2951455799866

**Q6 Implement Adaboost Regression on concrete_data.csv.**

In [35]:
data = pd.read_csv("/content/drive/MyDrive/Sem 6/ML/Lab 8/Contrete.csv")


In [36]:
# All rows, all columns except last
X4 = data.iloc[:, :-1].values
# Only last column
y4 = data.iloc[:, -1].values

In [37]:
X_train4, X_test4, y_train4, y_test4 = train_test_split( X4, y4, random_state = 117, test_size = 0.5)

In [38]:
model = AdaBoostRegressor()
model.fit(X_train4, y_train4)
model.score(X_test4,y_test4)

0.77674323433603