# Ensemble Classifier KNN Data

# Import Libraries

In [17]:
import pandas as pd, numpy as np
from sklearn.ensemble import BaggingClassifier

from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

from scipy import stats

In [18]:
df_train = pd.read_csv('train_knn_norm.csv')
df_test = pd.read_csv('test_knn_norm.csv')

In [19]:
len(df_train.columns) == len(df_test.columns)

True

In [20]:
print(len(df_train), len(df_test))

32561 16281


# Separate Data into X and y

In [21]:
X_train = df_train.iloc[:,:-1]
y_train = df_train.iloc[:,-1]

In [22]:
X_test = df_test.iloc[:,:-1]
y_test = df_test.iloc[:,-1]

# Individual Algorithms

In [23]:
def rforest(X_train, y_train, X_test):

    rforest = BaggingClassifier(base_estimator = RandomForestClassifier(n_estimators = 90))
    rforest.fit(X_train, y_train)
    y_pred = rforest.predict(X_test)
    
    return y_pred

In [24]:
def knn(X_train, y_train, X_test):
    
    knn = BaggingClassifier(base_estimator = KNeighborsClassifier(n_neighbors = 50))
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)
    
    return y_pred

In [25]:
def nb(X_train, y_train, X_test):
    
    nb = BaggingClassifier(base_estimator = GaussianNB())
    nb.fit(X_train,y_train)
    y_pred = nb.predict(X_test)
    
    return y_pred


# Ensemble Function


In [26]:
def ensemble(X_train, y_train, X_test):
    
    r_y = rforest(X_train, y_train, X_test)
    k_y = knn(X_train, y_train, X_test)
    n_y = nb(X_train, y_train, X_test)
    
    
    final_y = []
    
    for i in range(len(X_test)):
        final_y.append(stats.mode([r_y[i], k_y[i], n_y[i]])[0][0])
        
    return final_y

# Optimized Ensemble B KNN Data

In [27]:
trial_run = ensemble(X_train, y_train, X_test)

In [28]:
for i in range(4):
    print(['tn', 'fp', 'fn', 'tp'][i],confusion_matrix(trial_run, y_test).ravel()[i])

tn 11761
fp 1609
fn 674
tp 2237


In [29]:
accuracy_score(trial_run, y_test)

0.85977519808365577

In [30]:
precision_score(trial_run, y_test)

0.58164326573062919

In [31]:
recall_score(trial_run, y_test)

0.76846444520783241

In [32]:
f1_score(trial_run, y_test)

0.66212816338611802