In [194]:
from sklearn.ensemble import RandomForestClassifier,AdaBoostClassifier
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score,confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier

In [154]:
data = []
labels = []
with open("transfusion.data") as datafile:
    for line in datafile.readlines():
        row = [int(x) for x in line.split(",")]
        data.append(row[:-1])
        labels.append(row[4])

data = np.array(data)
labels = np.array(labels)
data_train, data_test, labels_train, labels_test = train_test_split(data, labels, test_size=0.20, random_state=37,stratify=labels)


In [155]:
predictions = []
accuracies = []
matricies = []
names = ["Random Forest","AdaBoost","Gaussian NB","MLP NN","KNN","Logistic Regression","Decision Tree"]
models = []
models.append(RandomForestClassifier(n_jobs=2, random_state=68,n_estimators=30,max_depth=5))
models.append(AdaBoostClassifier(n_estimators=80,learning_rate=0.1,algorithm='SAMME.R'))
models.append(GaussianNB())
models.append(MLPClassifier(solver='lbfgs', alpha=1e-4,hidden_layer_sizes=(6, 9), random_state=1))
models.append(KNeighborsClassifier(n_neighbors=9))
models.append(LogisticRegression(random_state=0, solver='lbfgs',multi_class='multinomial'))
models.append(DecisionTreeClassifier(max_depth=3))

In [156]:
for i in range(len(models)):
    models[i].fit(data_train, labels_train)
    y_pred = models[i].predict(data_test)
    predictions.append(y_pred)
    acc = accuracy_score(labels_test, y_pred, normalize=1)
    print("\n" + names[i] + ' {0:.2f}%'.format(acc * 100))
    accuracies.append(acc)
    matrix = confusion_matrix(labels_test,y_pred)
    print (matrix)
    matricies.append(matrix)


Random Forest 82.00%
[[106   8]
 [ 19  17]]

AdaBoost 79.33%
[[114   0]
 [ 31   5]]

Gaussian NB 74.67%
[[105   9]
 [ 29   7]]

MLP NN 80.00%
[[107   7]
 [ 23  13]]

KNN 78.67%
[[111   3]
 [ 29   7]]

Logistic Regression 78.67%
[[112   2]
 [ 30   6]]

Decesion Tree 77.33%
[[102  12]
 [ 22  14]]


In [192]:
ensemble_y = []
weights_1 = [1,1,1,1,1,1,1]
weights_2 = [18,6,2,7,4,4,4]
for i in range(len(predictions[0])):
    _0 = _1 = 0
    for j in range(2,len(predictions)):
        _0 += weights_2[j] if predictions[j][i] == 0 else 0
        _1 += weights_2[j] if predictions[j][i] == 1 else 0
    ensemble_y.append( 0 if _0 > _1 else 1)
acc = accuracy_score(labels_test, ensemble_y)
matrix = confusion_matrix(labels_test,ensemble_y)
print("Ensemble " + ' {0:.2f}%'.format(acc * 100))
print (matrix)

Ensemble  80.67%
[[111   3]
 [ 26  10]]


In [193]:
ensemble_y = []
weights_1 = [1,1,1,1,1,1,1]
weights_2 = [8,6,2,7,4,4,4]
for i in range(len(predictions[0])):
    _0 = _1 = 0
    for j in range(len(predictions)):
        _0 += weights_2[j] if predictions[j][i] == 0 else 0
        _1 += weights_2[j] if predictions[j][i] == 1 else 0
    ensemble_y.append( 0 if _0 > _1 else 1)
acc = accuracy_score(labels_test, ensemble_y)
matrix = confusion_matrix(labels_test,ensemble_y)
print("Ensemble " + ' {0:.2f}%'.format(acc * 100))
print (matrix)

Ensemble  82.67%
[[112   2]
 [ 24  12]]
