# Bagged Ensemble Classifier Mode Data

# Import Libraries

In [1]:
import pandas as pd, numpy as np
from sklearn.ensemble import BaggingClassifier

from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

from scipy import stats

In [2]:
df_train = pd.read_csv('mode_train.csv', index_col = 0)
df_test = pd.read_csv('mode_test.csv', index_col = 0)

In [3]:
len(df_train.columns) == len(df_test.columns)

True

# Separate Data into X and y

In [4]:
X_train = df_train.iloc[:,:-1]
y_train = df_train.iloc[:,-1]

In [5]:
X_test = df_test.iloc[:,:-1]
y_test = df_test.iloc[:,-1]

# Individual Algorithms

In [6]:
def rforest(X_train, y_train, X_test):

    rforest = BaggingClassifier(base_estimator = RandomForestClassifier())
    rforest.fit(X_train, y_train)
    y_pred = rforest.predict(X_test)
    
    return y_pred

In [7]:
def knn(X_train, y_train, X_test):
    
    knn = BaggingClassifier(base_estimator = KNeighborsClassifier())
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)
    
    return y_pred

In [8]:
def nb(X_train, y_train, X_test):
    
    nb = BaggingClassifier(base_estimator = GaussianNB())
    nb.fit(X_train,y_train)
    y_pred = nb.predict(X_test)
    
    return y_pred


In [9]:
def lr(X_train, y_train, X_test):

    lr = BaggingClassifier(base_estimator = LogisticRegression())
    lr.fit(X_train, y_train)
    y_pred = lr.predict(X_test)
    
    return y_pred

In [10]:
def svm(X_train, y_train, X_test):
    svm = BaggingClassifier(base_estimator = SVC())
    svm.fit(X_train,y_train)    
    y_pred = svm.predict(X_test)
    
    return y_pred

# Ensemble Function


In [11]:
def ensemble(X_train, y_train, X_test):
    
    r_y = rforest(X_train, y_train, X_test)
    k_y = knn(X_train, y_train, X_test)
    l_y = lr(X_train, y_train, X_test)
    #n_y = nb(X_train, y_train, X_test)
    #s_y = svm(X_train, y_train, X_test)
    
    
    final_y = []
    
    for i in range(len(X_test)):
        final_y.append(stats.mode([r_y[i], k_y[i], l_y[i]])[0][0])
        
    return final_y

# Mode Bagged Ensemble

In [12]:
trial_run = ensemble(X_train, y_train, X_test)

In [13]:
for i in range(4):
    print(['tn', 'fp', 'fn', 'tp'][i],confusion_matrix(trial_run, y_test).ravel()[i])

tn 11540
fp 1972
fn 895
tp 1874


In [14]:
accuracy_score(trial_run, y_test)

0.82390516553037285

In [15]:
precision_score(trial_run, y_test)

0.48725949037961519

In [16]:
recall_score(trial_run, y_test)

0.67677862044059223

In [17]:
f1_score(trial_run, y_test)

0.56659108087679522