## Neural network

In [1]:
import numpy as np
import pandas as pd
from functools import reduce
from sklearn.metrics import confusion_matrix, accuracy_score, f1_score, classification_report
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier

In [2]:
import os 
os.chdir ('/Users/hainayan/Downloads')
df = pd.read_csv('final result.csv')
df = df[df.columns[2:-1].to_list()]
X = df[df.columns[1:]].values
y = df['Mortality (1= death)'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)

In [3]:
class BaggingTreeClassifier:

    def __init__(self, n=100):
        self.n = n

    def fit(self, X, y, random_state=42, criterion='entropy'):
        X_train_p = X[y==1]
        X_train_n = X[y==0]
        y_train_p = y[y==1]
        y_train_n = y[y==0]
        l = sum(y_train_p)
        clf_list = []
        np.random.seed(seed=random_state)
        for i in range(self.n):
            idx_n = np.random.choice(range(X_train_n.shape[0]), l)
            idx_p = np.random.choice(range(X_train_p.shape[0]), l)
            X_train_f = np.concatenate([X_train_n[idx_n, :], X_train_p[idx_p]])
            y_train_f = np.concatenate([y_train_n[idx_n], y_train_p[idx_p]])
            clf = MLPClassifier(solver = 'lbfgs',random_state=0)
            clf.fit(X_train_f, y_train_f)
            clf_list.append(clf)
            print(f'\rClassifier {i+1} completed.', end='')
        self.clfs = clf_list        
        
    def predict(self, X):
        result_list = list(map(lambda clf: clf.predict_proba(X)[:,1], self.clfs))
        score = reduce(lambda a, b: a+b, result_list) / len(self.clfs)
        return score
        
    def report(self, X, y):
        y_hat = self.predict(X)
        print(classification_report(y_test, 1.*(y_hat>0.5)))

In [25]:
btc = BaggingTreeClassifier(500)
btc.fit(X_train, y_train)
y_hat = 1.0 * (btc.predict(X_test)>0.5)

Classifier 500 completed.

In [27]:
print(classification_report(y_true=y_test, y_pred=y_hat))

              precision    recall  f1-score   support

           0       0.99      0.96      0.97     10477
           1       0.56      0.90      0.69       641

    accuracy                           0.95     11118
   macro avg       0.78      0.93      0.83     11118
weighted avg       0.97      0.95      0.96     11118



In [28]:
f1_score(y_true=y_test, y_pred=y_hat)

0.6893667861409798

In [31]:
accuracy_score(y_true=y_test, y_pred=y_hat)

0.9532289980212268

In [32]:
confusion_matrix(y_true=y_test, y_pred=y_hat)

array([[10021,   456],
       [   64,   577]])