In [131]:
# importing module
from pandas import *
from statistics import mean
import numpy as np
from helper760 import read_inputs

In [132]:
# test case list 
# test_case 0 -> Use CT Data to detect cancer (Keras Neural Network)
# test_case 1 -> Use CT Data to detect cancer (KNN)
# test_case 2 -> Use CT Data + Clinic Data to detect death (Keras Neural Network)
# test_case 3 -> Use CT Data to detect death (Keras Neural Network)
# test_case 4 -> Use CT Data to predict all outcomes results (Keras Neural Network with multiple outputs)
test_case = 4

In [133]:
Clininc_Data,Outcome_Data,CT_Data = read_inputs()
def kFoldCV(k, X, y, model):
    len_x = X.shape[0]
    ind_step = int(len_x  / k)
    ind = np.array([i * ind_step for i in range(k)])
    accuracy = np.zeros(k)
    precision = np.zeros(k)
    recall = np.zeros(k)
    # first k - 1 fold
    for i in range(k - 1):
        X_train_k = np.vstack((X[:ind[i],:],X[ind[i+1]:,:]))
        X_test_k = X[ind[i]:ind[i + 1]]
        y_train_k = np.hstack((y[:ind[i]],y[ind[i+1]:]))
        y_test_k = y[ind[i]:ind[i + 1]]
        _, accuracy[i],precision[i],recall[i] = model.evaluate(X_test_k, y_test_k)       
    # last fold
    X_train_k = X[:ind[k - 1],:]
    y_train_k = y[:ind[k - 1]]
    X_test_k = X[ind[k - 1]:,:]
    y_test_k = y[ind[k - 1]:]
    _, accuracy[k - 1],precision[k - 1],recall[k - 1] = model.evaluate(X_test_k, y_test_k)
    return accuracy,precision,recall

# Test Case 0 - Keras CT-To-Cancer Prediction
This is a test run perform prediction of cancer using CT Data. This test run uses a neural network constructed using Keras

In [134]:
if test_case == 0:
    import numpy as np
    from tensorflow import keras
    from keras.models import Sequential
    from keras.layers import Dense
    from sklearn.preprocessing import StandardScaler 

    X = np.array(CT_Data)
    X = X.T
    X = X.astype(float)
    y = np.array(Outcome_Data[21])
    y = y.astype(int)
    standardScalar = StandardScaler()
    standardScalar.fit(X)
    standardScalar.transform(X)

    # define the keras model
    model = Sequential()
    model.add(Dense(32, input_dim=11, use_bias=True))
    model.add(Dense(64, activation='relu',use_bias=True))
    model.add(Dense(128, activation='relu',use_bias=True))
    model.add(Dense(256, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    opt = keras.optimizers.Adam(learning_rate=0.0001)
    # compile the keras model
    model.compile(loss='BinaryFocalCrossentropy', optimizer=opt, metrics=['accuracy','Precision','Recall'])
    # fit the keras model on the dataset
    model.fit(X, y, epochs=1000, batch_size=50,verbose=1)
    # evaluate the keras model
    accuracy,precision,recall = kFoldCV(10, X, y, model)
    print("K fold average accuracy is: {0:.4%},average precision is: {1:.4%},average recall is: {2:.4%}" \
          .format(np.mean(accuracy),np.mean(precision),np.mean(recall)))


    # =========================================================
    from sklearn.metrics import precision_score
    from sklearn.metrics import recall_score
    from sklearn.metrics import f1_score
    test_y = model.predict(X)
    print(X.shape)
    print(test_y.shape)
    test_y = test_y > 0.5
    right_count = 0
    wrong_count = 0
    cancer_dectected = 0
    for i in range(len(test_y)):
        if test_y[i] == y[i]:
            right_count = right_count + 1
            if test_y[i] == True:
                cancer_dectected = cancer_dectected + 1
        else:
            wrong_count = wrong_count + 1
    print(right_count)
    print(wrong_count)
    print(cancer_dectected)
    print("validation accuracy is: "+str(right_count / (right_count + wrong_count)) + ". Successfully detected cancer: "+str(cancer_dectected))
    print("precision score is: {0:.4%}".format(precision_score(y, test_y)))
    print("recall_score is: {0:.4%}".format(recall_score(y, test_y)))
    print("f1_score: {0:.4%}".format(f1_score(y, test_y)))

validation accuracy is: 0.9664968014745744. Successfully detected cancer: 918
precision score is: 98.0769%
recall_score is: 75.9305%
f1_score: 85.5944%

# Test Case 1 - KNN CT-To-Cancer Prediction
In this test run we tried to use CT_Data to predict cancer, the algorithm we used here is K-nearest-Neighbours

In [135]:
if test_case == 1:
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.model_selection import GridSearchCV
    from sklearn.pipeline import Pipeline
    from sklearn.preprocessing import StandardScaler

    param_grid = [
        {
            'weights': ['uniform'], 
            'n_neighbors': [i for i in range(1, 20)]
        },
        {
            'weights': ['distance'],
            'n_neighbors': [i for i in range(1, 20)], 
            'p': [i for i in range(1, 6)]
        }
    ]



    X = np.array(CT_Data)
    X = X.T
    X = X.astype(float)
    standardScalar = StandardScaler()
    standardScalar.fit(X)
    standardScalar.transform(X)
    y = np.array(Outcome_Data[21])
    y = y.astype(int)
    print(X.shape)
    print(y.shape)
    knn_clf = KNeighborsClassifier()
    grid_search = GridSearchCV(knn_clf, param_grid , cv = 5, scoring = 'average_precision')
    grid_search.fit(X, y)
    print(grid_search.best_estimator_)
    print(grid_search.best_score_)


    # ============================================
    def predict_Knn(test_vec, train_data, train_label, k):
        res_list = []
        dis_list = []
        train_data = train_data.astype(float)
        test_vec = test_vec.astype(float)
        for i in range(train_data.shape[1]):
            diff = (test_vec.reshape((train_data.shape[0],1))) - train_data[:,i]
            dis_list.append(np.linalg.norm(diff))

            
        # Smallest K elements indices
        # using sorted() + lambda + list slicing
        res = sorted(range(len(dis_list)), key = lambda sub: dis_list[sub])[:k] 

        for i in range(len(res)):
            res_list.append(train_label[res[i]])

        count_0 = 0
        count_1 = 0

        for i in range(len(res_list)):
            if res_list[i] == 0:
                count_0 = count_0 + 1
            else:
                count_1 = count_1 + 1

        #if count_1 > count_0:
        #    return 1
        #else:
        #    return 0        

        # as long as one nearest neighbour has cancer,
        # than we predict cancer
        if count_1 > 0:
            return 1
        else:
            return 0
    

# Test Case 2 - Keras CT+Clinic_Data-To-Death Prediction
This is a test run perform prediction of death using (CT Data + Clinic Data). This test run uses a neural network constructed using Keras

In [136]:
if test_case == 2:
    import numpy as np
    from tensorflow import keras
    from keras.models import Sequential
    from keras.layers import Dense
    from sklearn.preprocessing import StandardScaler 

    X1 = np.array(CT_Data)
    X1 = X1.T
    X1 = X1.astype(float)
    print(X1.shape)

    X2 = np.array(Clininc_Data)
    X2 = X2.T
    X2 = X2.astype(float)
    print(X2.shape)

    X = np.hstack((X1,X2))
    print(X.shape)

    y = np.array(Outcome_Data[0])
    y = y.astype(int)
    standardScalar = StandardScaler()
    standardScalar.fit(X)
    standardScalar.transform(X)

    # define the keras model
    model = Sequential()
    model.add(Dense(32, input_dim=25, use_bias=True))
    model.add(Dense(64, activation='relu',use_bias=True))
    model.add(Dense(128, activation='relu',use_bias=True))
    model.add(Dense(256, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    opt = keras.optimizers.Adam(learning_rate=0.0001)
    # compile the keras model
    model.compile(loss='BinaryFocalCrossentropy', optimizer=opt, metrics=['accuracy','Precision','Recall'])
    # fit the keras model on the dataset
    model.fit(X, y, epochs=1000, batch_size=50,verbose=1)
    # evaluate the keras model
    _, accuracy,precision,recall = model.evaluate(X, y)
    print('Accuracy: %.2f' % (accuracy*100))
    print('precision: %.2f' % (precision*100))
    print('recall rate: %.2f' % (recall*100))
    accuracy,precision,recall = kFoldCV(10, X, y, model)
    print("K fold average accuracy is: {0:.4%},average precision is: {1:.4%},average recall is: {2:.4%}" \
          .format(np.mean(accuracy),np.mean(precision),np.mean(recall)))


    # =========================================================
    from sklearn.metrics import precision_score
    from sklearn.metrics import recall_score
    from sklearn.metrics import f1_score
    test_y = model.predict(X)
    print(X.shape)
    print(test_y.shape)
    test_y = test_y > 0.5
    right_count = 0
    wrong_count = 0
    death_dectected = 0
    for i in range(len(test_y)):
        if test_y[i] == y[i]:
            right_count = right_count + 1
            if test_y[i] == True:
                death_dectected = death_dectected + 1
        else:
            wrong_count = wrong_count + 1
    print(right_count)
    print(wrong_count)
    print(death_dectected)
    print("validation accuracy is: "+str(right_count / (right_count + wrong_count)) + ". Successfully detected death: "+str(death_dectected))
    print("precision score is: {0:.4%}".format(precision_score(y, test_y)))
    print("recall_score is: {0:.4%}".format(recall_score(y, test_y)))
    print("f1_score: {0:.4%}".format(f1_score(y, test_y)))

validation accuracy is: 0.9998915754093028. Successfully detected death: 548
precision score is: 100.0000%
recall_score is: 99.8179%
f1_score: 99.9088%

# Test Case 3 - Keras CT-To-Death Prediction
This is a test run perform prediction of death using (CT Data Only). This test run uses a neural network constructed using Keras

In [137]:
if test_case == 3:
    import numpy as np
    from tensorflow import keras
    from keras.models import Sequential
    from keras.layers import Dense
    from sklearn.preprocessing import StandardScaler 

    X = np.array(CT_Data)
    X = X.T
    X = X.astype(float)


    y = np.array(Outcome_Data[0])
    y = y.astype(int)
    standardScalar = StandardScaler()
    standardScalar.fit(X)
    standardScalar.transform(X)

    # define the keras model
    model = Sequential()
    model.add(Dense(32, input_dim=25, use_bias=True))
    model.add(Dense(64, activation='relu',use_bias=True))
    model.add(Dense(128, activation='relu',use_bias=True))
    model.add(Dense(256, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    opt = keras.optimizers.Adam(learning_rate=0.0001)
    # compile the keras model
    model.compile(loss='BinaryFocalCrossentropy', optimizer=opt, metrics=['accuracy','Precision','Recall'])
    # fit the keras model on the dataset
    model.fit(X, y, epochs=1000, batch_size=50,verbose=1)
    # evaluate the keras model
    _, accuracy,precision,recall = model.evaluate(X, y)
    print('Accuracy: %.2f' % (accuracy*100))
    print('precision: %.2f' % (precision*100))
    print('recall rate: %.2f' % (recall*100))
    


    # =========================================================
    from sklearn.metrics import precision_score
    from sklearn.metrics import recall_score
    from sklearn.metrics import f1_score
    test_y = model.predict(X)
    print(X.shape)
    print(test_y.shape)
    test_y = test_y > 0.5
    right_count = 0
    wrong_count = 0
    death_dectected = 0
    for i in range(len(test_y)):
        if test_y[i] == y[i]:
            right_count = right_count + 1
            if test_y[i] == True:
                death_dectected = death_dectected + 1
        else:
            wrong_count = wrong_count + 1
    print(right_count)
    print(wrong_count)
    print(death_dectected)
    print("validation accuracy is: "+str(right_count / (right_count + wrong_count)) + ". Successfully detected death: "+str(death_dectected))
    print("precision score is: {0:.4%}".format(precision_score(y, test_y)))
    print("recall_score is: {0:.4%}".format(recall_score(y, test_y)))
    print("f1_score: {0:.4%}".format(f1_score(y, test_y)))

# Test Case 4 - Keras CT-To-AllOutcome Prediction
This is a test run perform prediction of all outcomes using (CT Data Only). This test run uses a neural network constructed using Keras

In [138]:
if test_case == 4:
    import numpy as np
    from tensorflow import keras
    from keras.models import Sequential
    from keras.layers import Dense
    from sklearn.preprocessing import StandardScaler 

    X = np.array(CT_Data)
    X = X.T
    X = X.astype(float)


    y = np.array(Outcome_Data)
    y = y.T
    y = y.astype(int)

    # the predictions are:
    # Idx 1 -> CVD
    # Idx 3 -> Heart Failure
    # Idx 5 -> MI DX
    # Idx 7 -> Type 2 Diabetes
    # Idx 9 -> Femoral Neck Fracture
    # Idx 11 -> Unspec Femoral Fracture
    # Idx 13 -> Forearm Fracture
    # Idx 15 -> Humerus Fracture
    # Idx 17 -> Pathologic Fracture
    # Idx 19 -> Alzheimers
    # Idx 21 -> Cancer (weather the patience has or doesn't have, regardless of 1 or 2 types)
    y = y[:,[1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21]]

    print(X.shape)
    print(y.shape)

    # define the keras model
    model = Sequential()
    model.add(Dense(32, input_dim=11, use_bias=True))
    model.add(Dense(64, activation='relu',use_bias=True))
    model.add(Dense(128, activation='relu',use_bias=True))
    model.add(Dense(256, activation='relu',use_bias=True))
    model.add(Dense(512, activation='relu',use_bias=True))
    model.add(Dense(11, activation='sigmoid'))
    opt = keras.optimizers.Adam(learning_rate=0.00005)
    # compile the keras model
    model.compile(loss='BinaryFocalCrossentropy', optimizer=opt, metrics=['accuracy','Precision','Recall'])
    # fit the keras model on the dataset
    model.fit(X, y, epochs=1500, batch_size=50,verbose=1)
    # evaluate the keras model
    _, accuracy,precision,recall = model.evaluate(X, y)
    print('Accuracy: %.2f' % (accuracy*100))
    print('precision: %.2f' % (precision*100))
    print('recall rate: %.2f' % (recall*100))
    


    # =========================================================
    from sklearn.metrics import precision_score
    from sklearn.metrics import recall_score
    from sklearn.metrics import f1_score
    test_y = model.predict(X)
    print("test_y.shape:"+str(test_y.shape))
    
    count_right = 0
    count_wrong = 0

    count_disease = 0
    count_mis_disease = 0
    count_wrong_disease = 0
    #for i in range((test_y.shape)[0]):
    for i in range((test_y.shape)[0]):
        for j in range((test_y.shape)[1]):
            if test_y[i][j]>0.5:
                test_y[i][j] = 1
            else:
                test_y[i][j] = 0

            if test_y[i][j] == y[i][j]:
                count_right = count_right + 1
                if y[i][j] == 1:
                    count_disease = count_disease + 1
            else:
                count_wrong = count_wrong + 1
                if y[i][j] == 1:
                    count_mis_disease = count_mis_disease + 1
                else:
                    count_wrong_disease = count_wrong_disease + 1

    print("accuracy: "+str(count_right/(count_right + count_wrong)))


    print("count_right: "+str(count_right))
    print("count_wrong: "+str(count_wrong))

    print("detected disease case: "+str(count_disease))
    print("missed disease case: "+str(count_mis_disease))     
    print("wrongly detected disease case: "+str(count_wrong_disease))
           

(9223, 11)
(9223, 11)
Epoch 1/1500
Epoch 2/1500
Epoch 3/1500
Epoch 4/1500
Epoch 5/1500
Epoch 6/1500
Epoch 7/1500
Epoch 8/1500
Epoch 9/1500
Epoch 10/1500
Epoch 11/1500
Epoch 12/1500
Epoch 13/1500
Epoch 14/1500
Epoch 15/1500
Epoch 16/1500
Epoch 17/1500
Epoch 18/1500
Epoch 19/1500
Epoch 20/1500
Epoch 21/1500
Epoch 22/1500
Epoch 23/1500
Epoch 24/1500
Epoch 25/1500
Epoch 26/1500
Epoch 27/1500
Epoch 28/1500
Epoch 29/1500
Epoch 30/1500
Epoch 31/1500
Epoch 32/1500
Epoch 33/1500
Epoch 34/1500
Epoch 35/1500
Epoch 36/1500
Epoch 37/1500
Epoch 38/1500
Epoch 39/1500
Epoch 40/1500
Epoch 41/1500
Epoch 42/1500
Epoch 43/1500
Epoch 44/1500
Epoch 45/1500
Epoch 46/1500
Epoch 47/1500
Epoch 48/1500
Epoch 49/1500
Epoch 50/1500
Epoch 51/1500
Epoch 52/1500
Epoch 53/1500
Epoch 54/1500
Epoch 55/1500
Epoch 56/1500
Epoch 57/1500
Epoch 58/1500
Epoch 59/1500
Epoch 60/1500
Epoch 61/1500
Epoch 62/1500
Epoch 63/1500
Epoch 64/1500
Epoch 65/1500
Epoch 66/1500
Epoch 67/1500
Epoch 68/1500
Epoch 69/1500
Epoch 70/1500
Epoch 7