### Ensemble Learning

In [1]:
from sklearn import datasets
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from itertools import product
from sklearn.ensemble import VotingClassifier

In [3]:
iris = datasets.load_iris()
X, y = iris.data[:,1:3], iris.target

In [5]:
clf1 = LogisticRegression(random_state=1)
clf2 = RandomForestClassifier(n_estimators=50, random_state=1)
clf3 = GaussianNB()

#Hard Voting
eclf = VotingClassifier(estimators=[('lr',clf1),('rf',clf2),('gnb',clf3)],voting='hard')

clf1 = clf1.fit(X,y)
clf2 = clf2.fit(X,y)
clf3 = clf3.fit(X,y)
eclf = eclf.fit(X,y)

for clf, label in zip([clf1, clf2, clf3, eclf], ['Logistic Regression','Random Forest','Naive Bayes','Ensemble']):
    scores = cross_val_score(clf, X, y, scoring='accuracy',cv=5)
    print('Accuracy: %0.2f (+/- %0.2f) [%s]' %(scores.mean(), scores.std(),label))

Accuracy: 0.95 (+/- 0.04) [Logistic Regression]
Accuracy: 0.94 (+/- 0.04) [Random Forest]
Accuracy: 0.91 (+/- 0.04) [Naive Bayes]
Accuracy: 0.95 (+/- 0.04) [Ensemble]


In [7]:
clf1 = DecisionTreeClassifier(max_depth=4)
clf2 = KNeighborsClassifier(n_neighbors=7)
clf3 = SVC(kernel='rbf',probability=True)

#Soft Voting
eclf = VotingClassifier(estimators=[('dt',clf1),('knn',clf2),('svc',clf3)],voting='soft', weights=[2,1,2])

clf1 = clf1.fit(X,y)
clf2 = clf2.fit(X,y)
clf3 = clf3.fit(X,y)
eclf = eclf.fit(X,y)

for clf, label in zip([clf1, clf2, clf3, eclf], ['Decisin Tree','K-NN','SVM','Ensemble']):
    scores = cross_val_score(clf, X, y, scoring='accuracy',cv=5)
    print('Accuracy: %0.2f (+/- %0.2f) [%s]' %(scores.mean(), scores.std(),label))

Accuracy: 0.92 (+/- 0.06) [Decisin Tree]
Accuracy: 0.95 (+/- 0.04) [K-NN]
Accuracy: 0.95 (+/- 0.04) [SVM]
Accuracy: 0.93 (+/- 0.07) [Ensemble]


In [9]:
#Digits Veri Seti ile Uygulaması
digits = datasets.load_digits()
X = digits.data
y = digits.target

In [11]:
clf1 = DecisionTreeClassifier(max_depth=4)
clf2 = KNeighborsClassifier(n_neighbors=7)
clf3 = SVC(kernel='rbf',probability=True)

#Soft Voting
eclf = VotingClassifier(estimators=[('dt',clf1),('knn',clf2),('svc',clf3)],voting='soft', weights=[1,3,3])

clf1 = clf1.fit(X,y)
clf2 = clf2.fit(X,y)
clf3 = clf3.fit(X,y)
eclf = eclf.fit(X,y)

for clf, label in zip([clf1, clf2, clf3, eclf], ['Decisin Tree','K-NN','SVM','Ensemble']):
    scores = cross_val_score(clf, X, y, scoring='accuracy',cv=5)
    print('Accuracy: %0.2f (+/- %0.2f) [%s]' %(scores.mean(), scores.std(),label))

Accuracy: 0.55 (+/- 0.05) [Decisin Tree]
Accuracy: 0.96 (+/- 0.02) [K-NN]
Accuracy: 0.96 (+/- 0.02) [SVM]
Accuracy: 0.97 (+/- 0.01) [Ensemble]
