### Import

In [1]:
import numpy as np
import pandas as pd
import sklearn
import os
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [5]:
df=pd.read_table('H:\SELF\Yashu\Files\iris.csv',
                sep=',')
df.head(5)

Unnamed: 0.1,Unnamed: 0,Sepal.Length,Sepal.Width,Petal.Length,Petal.Width,Species
0,1,5.1,3.5,1.4,0.2,setosa
1,2,4.9,3.0,1.4,0.2,setosa
2,3,4.7,3.2,1.3,0.2,setosa
3,4,4.6,3.1,1.5,0.2,setosa
4,5,5.0,3.6,1.4,0.2,setosa


In [6]:
# encoding the class to integers
X = df.iloc[:, :-1].values
Y = df.iloc[:, -1].values
# encode the class with integers
le = preprocessing.LabelEncoder()
Y = le.fit_transform(Y)

In [8]:
# ideal practice is to use test as 20% - 30% of training data defined by test_size in train_test_split()
# random_state is required to avoid sequential biasness in the data distribution
def data_split(X, Y):
    X_train, X_test, Y_train, Y_test = train_test_split( X, Y, test_size=0.30, random_state = 10)
    return X_train, X_test, Y_train, Y_test

X_train, X_test, Y_train, Y_test = data_split(X, Y)
print (X_train.shape, X_test.shape)

(105, 5) (45, 5)


In [9]:
# this class takes care for scaling the features to the scale of 0-1 we are doing the scaling with this cap because we use sigmoid activation fxn in logistic which 
# also has the range from 0-1
class Normalizer:

    def __init__(self):
        self.sc = StandardScaler()
    
    def scale(self, X, dtype):
        if dtype=='train':
            XX = self.sc.fit_transform(X)
        elif dtype=='test':
            XX = self.sc.transform(X)
        else:
            return None
        return XX

In [10]:
norm = Normalizer()
X_train = norm.scale(X_train, 'train')
X_test = norm.scale(X_test, 'test')

### Model 1 (Logistic)  

In [12]:
from sklearn.linear_model import LogisticRegression
# train the model
classifier = LogisticRegression()
model = classifier.fit(X_train, Y_train)
predictions_lr = model.predict_proba(X_test)
print (sklearn.metrics.accuracy_score(Y_test, np.argmax(predictions_lr, axis=1)))

0.8888888888888888




### Model 2 (Decision Tree)

In [14]:
from sklearn import tree
# train the model
classifier = tree.DecisionTreeClassifier()
model = classifier.fit(X_train, Y_train)
predictions_dtree = model.predict_proba(X_test)
print (sklearn.metrics.accuracy_score(Y_test, np.argmax(predictions_dtree, axis=1)))

1.0


### Model 3 (KNN)

In [16]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=3)
model = knn.fit(X_train, Y_train)
predictions_knn = model.predict_proba(X_test)
print (sklearn.metrics.accuracy_score(Y_test, np.argmax(predictions_knn, axis=1)))

1.0


### Meta Model (Esemble)

In [17]:
class Ensemble(object):
    """
    Implements averaging voting ensemble technique
    Each model is given equal weight
    """
    def __init__(self, samples=None, classes=None, classifiers=None):
        self.classes = classes
        self.samples = samples
        self.classifiers = classifiers
    
    def mixmatch(self, predictions):
        if not self.classifiers:
            self.classifiers = len(predictions)
            
        if not self.samples:
            self.samples = len(predictions[0])
        
        if not self.classes:
            self.classes = len(predictions[0][0])
        
        final_pred = np.array([0]*self.classes)
        for s in range(self.samples):
            s_pred = np.array([0]*self.classes)
            for c in range(self.classifiers):
                pred = predictions[c][s]
                s_pred = np.vstack((s_pred, pred))
            s_pred = s_pred[1:, :]
            s_pred_avg = np.average(s_pred, axis=0)
            final_pred = np.vstack((final_pred, s_pred_avg))
        return final_pred[1:, :]

In [18]:
ensemble = Ensemble(45, 3, 3)
pred = np.argmax(ensemble.mixmatch([predictions_lr, predictions_dtree, predictions_knn]), axis=1)
print (sklearn.metrics.accuracy_score(Y_test, pred))

1.0
