# Custom Ensemble Machine Learning Alogorithms

In [1]:
## creating synthetic datapoints
from sklearn.datasets import make_classification

X,y = make_classification(n_samples=1000,n_features=20,n_informative=15,n_redundant=5,random_state=1)

In [2]:
from collections import Counter
count = Counter(y)
count

Counter({0: 501, 1: 499})

In [3]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier,VotingClassifier
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score,RepeatedStratifiedKFold
from sklearn.pipeline import Pipeline

#get models
#get a voting ensemble of models

models = list()

decision_tree = Pipeline([('m',DecisionTreeClassifier())])
models.append(('decision',decision_tree))

random_forest = Pipeline([('m',RandomForestClassifier())])
models.append(('randomforest',random_forest))

svc = Pipeline([('m',SVC())])
models.append(('svc', svc))

#define a voting classifier

ensemble = VotingClassifier(estimators=models,voting='hard')
#return a list of tuples with a name and a model

In [4]:
models

[('decision', Pipeline(steps=[('m', DecisionTreeClassifier())])),
 ('randomforest', Pipeline(steps=[('m', RandomForestClassifier())])),
 ('svc', Pipeline(steps=[('m', SVC())]))]

In [5]:
ensemble

VotingClassifier(estimators=[('decision',
                              Pipeline(steps=[('m',
                                               DecisionTreeClassifier())])),
                             ('randomforest',
                              Pipeline(steps=[('m',
                                               RandomForestClassifier())])),
                             ('svc', Pipeline(steps=[('m', SVC())]))])

In [6]:
cv = RepeatedStratifiedKFold(n_splits=10,n_repeats=3,random_state=1)
n_scores = cross_val_score(ensemble,X,y,scoring='accuracy',cv=cv,n_jobs=-1)

In [7]:
n_scores

array([0.93, 0.95, 0.95, 0.92, 0.9 , 0.96, 0.93, 0.92, 0.95, 0.94, 0.97,
       0.93, 0.91, 0.94, 0.92, 0.92, 0.93, 0.94, 0.95, 0.95, 0.95, 0.98,
       0.97, 0.98, 0.92, 0.88, 0.97, 0.94, 0.93, 0.89])

In [8]:
n_scores.mean()

0.9373333333333335