In [74]:
# importing module
from pandas import *
from statistics import mean
import numpy as np
from helper760 import read_inputs

In [75]:
# test case list 
# test_case 0 -> Use CT Data to detect cancer (Keras Neural Network)
# test_case 1 -> Use CT Data to detect cancer (KNN)
# test_case 2 -> Use CT Data + Clinic Data to detect death (Keras Neural Network)
test_case = 2

In [76]:
Clininc_Data,Outcome_Data,CT_Data = read_inputs()

# Test Case 0 - Keras CT-To-Cancer Prediction
This is a test run perform prediction of cancer using CT Data. This test run uses a neural network constructed using Keras

In [77]:
if test_case == 0:
    import numpy as np
    from tensorflow import keras
    from keras.models import Sequential
    from keras.layers import Dense
    from sklearn.preprocessing import StandardScaler 

    X = np.array(CT_Data)
    X = X.T
    X = X.astype(float)
    y = np.array(Outcome_Data[21])
    y = y.astype(int)
    standardScalar = StandardScaler()
    standardScalar.fit(X)
    standardScalar.transform(X)

    # define the keras model
    model = Sequential()
    model.add(Dense(32, input_dim=11, use_bias=True))
    model.add(Dense(64, activation='relu',use_bias=True))
    model.add(Dense(128, activation='relu',use_bias=True))
    model.add(Dense(256, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    opt = keras.optimizers.Adam(learning_rate=0.0001)
    # compile the keras model
    model.compile(loss='BinaryFocalCrossentropy', optimizer=opt, metrics=['accuracy','Precision','Recall'])
    # fit the keras model on the dataset
    model.fit(X, y, epochs=700, batch_size=50,verbose=1)
    # evaluate the keras model
    _, accuracy,precision,recall = model.evaluate(X, y)
    print('Accuracy: %.2f' % (accuracy*100))
    print('precision: %.2f' % (precision*100))
    print('recall rate: %.2f' % (recall*100))



    # =========================================================
    from sklearn.metrics import precision_score
    from sklearn.metrics import recall_score
    from sklearn.metrics import f1_score
    test_y = model.predict(X)
    print(X.shape)
    print(test_y.shape)
    test_y = test_y > 0.5
    right_count = 0
    wrong_count = 0
    cancer_dectected = 0
    for i in range(len(test_y)):
        if test_y[i] == y[i]:
            right_count = right_count + 1
            if test_y[i] == True:
                cancer_dectected = cancer_dectected + 1
        else:
            wrong_count = wrong_count + 1
    print(right_count)
    print(wrong_count)
    print(cancer_dectected)
    print("validation accuracy is: "+str(right_count / (right_count + wrong_count)) + ". Successfully detected cancer: "+str(cancer_dectected))
    print("precision score is: {0:.4%}".format(precision_score(y, test_y)))
    print("recall_score is: {0:.4%}".format(recall_score(y, test_y)))
    print("f1_score: {0:.4%}".format(f1_score(y, test_y)))

validation accuracy is: 0.9664968014745744. Successfully detected cancer: 918
precision score is: 98.0769%
recall_score is: 75.9305%
f1_score: 85.5944%

# Test Run - KNN CT-To-Cancer Prediction
In this test run we tried to use CT_Data to predict cancer, the algorithm we used here is K-nearest-Neighbours

In [78]:
if test_case == 1:
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.model_selection import GridSearchCV
    from sklearn.pipeline import Pipeline
    from sklearn.preprocessing import StandardScaler

    param_grid = [
        {
            'weights': ['uniform'], 
            'n_neighbors': [i for i in range(1, 20)]
        },
        {
            'weights': ['distance'],
            'n_neighbors': [i for i in range(1, 20)], 
            'p': [i for i in range(1, 6)]
        }
    ]



    X = np.array(CT_Data)
    X = X.T
    X = X.astype(float)
    standardScalar = StandardScaler()
    standardScalar.fit(X)
    standardScalar.transform(X)
    y = np.array(Outcome_Data[21])
    y = y.astype(int)
    print(X.shape)
    print(y.shape)
    knn_clf = KNeighborsClassifier()
    grid_search = GridSearchCV(knn_clf, param_grid , cv = 5, scoring = 'average_precision')
    grid_search.fit(X, y)
    print(grid_search.best_estimator_)
    print(grid_search.best_score_)


    # ============================================
    def predict_Knn(test_vec, train_data, train_label, k):
        res_list = []
        dis_list = []
        train_data = train_data.astype(float)
        test_vec = test_vec.astype(float)
        for i in range(train_data.shape[1]):
            diff = (test_vec.reshape((train_data.shape[0],1))) - train_data[:,i]
            dis_list.append(np.linalg.norm(diff))

            
        # Smallest K elements indices
        # using sorted() + lambda + list slicing
        res = sorted(range(len(dis_list)), key = lambda sub: dis_list[sub])[:k] 

        for i in range(len(res)):
            res_list.append(train_label[res[i]])

        count_0 = 0
        count_1 = 0

        for i in range(len(res_list)):
            if res_list[i] == 0:
                count_0 = count_0 + 1
            else:
                count_1 = count_1 + 1

        #if count_1 > count_0:
        #    return 1
        #else:
        #    return 0        

        # as long as one nearest neighbour has cancer,
        # than we predict cancer
        if count_1 > 0:
            return 1
        else:
            return 0
    

# Test Case 2 - Keras CT+Clinic_Data-To-Death Prediction
This is a test run perform prediction of death using (CT Data + Clinic Data). This test run uses a neural network constructed using Keras

In [79]:
if test_case == 2:
    import numpy as np
    from tensorflow import keras
    from keras.models import Sequential
    from keras.layers import Dense
    from sklearn.preprocessing import StandardScaler 

    X1 = np.array(CT_Data)
    X1 = X1.T
    X1 = X1.astype(float)
    print(X1.shape)

    X2 = np.array(Clininc_Data)
    X2 = X2.T
    X2 = X2.astype(float)
    print(X2.shape)

    X = np.hstack((X1,X2))
    print(X.shape)

    y = np.array(Outcome_Data[0])
    y = y.astype(int)
    standardScalar = StandardScaler()
    standardScalar.fit(X)
    standardScalar.transform(X)

    # define the keras model
    model = Sequential()
    model.add(Dense(32, input_dim=25, use_bias=True))
    model.add(Dense(64, activation='relu',use_bias=True))
    model.add(Dense(128, activation='relu',use_bias=True))
    model.add(Dense(256, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    opt = keras.optimizers.Adam(learning_rate=0.0001)
    # compile the keras model
    model.compile(loss='BinaryFocalCrossentropy', optimizer=opt, metrics=['accuracy','Precision','Recall'])
    # fit the keras model on the dataset
    model.fit(X, y, epochs=1000, batch_size=50,verbose=1)
    # evaluate the keras model
    _, accuracy,precision,recall = model.evaluate(X, y)
    print('Accuracy: %.2f' % (accuracy*100))
    print('precision: %.2f' % (precision*100))
    print('recall rate: %.2f' % (recall*100))



    # =========================================================
    from sklearn.metrics import precision_score
    from sklearn.metrics import recall_score
    from sklearn.metrics import f1_score
    test_y = model.predict(X)
    print(X.shape)
    print(test_y.shape)
    test_y = test_y > 0.5
    right_count = 0
    wrong_count = 0
    death_dectected = 0
    for i in range(len(test_y)):
        if test_y[i] == y[i]:
            right_count = right_count + 1
            if test_y[i] == True:
                death_dectected = death_dectected + 1
        else:
            wrong_count = wrong_count + 1
    print(right_count)
    print(wrong_count)
    print(death_dectected)
    print("validation accuracy is: "+str(right_count / (right_count + wrong_count)) + ". Successfully detected death: "+str(death_dectected))
    print("precision score is: {0:.4%}".format(precision_score(y, test_y)))
    print("recall_score is: {0:.4%}".format(recall_score(y, test_y)))
    print("f1_score: {0:.4%}".format(f1_score(y, test_y)))

(9223, 11)
(9223, 14)
(9223, 25)
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1

validation accuracy is: 0.9998915754093028. Successfully detected death: 548
precision score is: 100.0000%
recall_score is: 99.8179%
f1_score: 99.9088%

# Test Case 3 - Keras CT-To-Death Prediction
This is a test run perform prediction of death using (CT Data Only). This test run uses a neural network constructed using Keras

In [None]:
if test_case == 3:
    import numpy as np
    from tensorflow import keras
    from keras.models import Sequential
    from keras.layers import Dense
    from sklearn.preprocessing import StandardScaler 

    X = np.array(CT_Data)
    X = X.T
    X = X.astype(float)


    y = np.array(Outcome_Data[0])
    y = y.astype(int)
    standardScalar = StandardScaler()
    standardScalar.fit(X)
    standardScalar.transform(X)

    # define the keras model
    model = Sequential()
    model.add(Dense(32, input_dim=25, use_bias=True))
    model.add(Dense(64, activation='relu',use_bias=True))
    model.add(Dense(128, activation='relu',use_bias=True))
    model.add(Dense(256, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    opt = keras.optimizers.Adam(learning_rate=0.0001)
    # compile the keras model
    model.compile(loss='BinaryFocalCrossentropy', optimizer=opt, metrics=['accuracy','Precision','Recall'])
    # fit the keras model on the dataset
    model.fit(X, y, epochs=1000, batch_size=50,verbose=1)
    # evaluate the keras model
    _, accuracy,precision,recall = model.evaluate(X, y)
    print('Accuracy: %.2f' % (accuracy*100))
    print('precision: %.2f' % (precision*100))
    print('recall rate: %.2f' % (recall*100))



    # =========================================================
    from sklearn.metrics import precision_score
    from sklearn.metrics import recall_score
    from sklearn.metrics import f1_score
    test_y = model.predict(X)
    print(X.shape)
    print(test_y.shape)
    test_y = test_y > 0.5
    right_count = 0
    wrong_count = 0
    death_dectected = 0
    for i in range(len(test_y)):
        if test_y[i] == y[i]:
            right_count = right_count + 1
            if test_y[i] == True:
                death_dectected = death_dectected + 1
        else:
            wrong_count = wrong_count + 1
    print(right_count)
    print(wrong_count)
    print(death_dectected)
    print("validation accuracy is: "+str(right_count / (right_count + wrong_count)) + ". Successfully detected death: "+str(death_dectected))
    print("precision score is: {0:.4%}".format(precision_score(y, test_y)))
    print("recall_score is: {0:.4%}".format(recall_score(y, test_y)))
    print("f1_score: {0:.4%}".format(f1_score(y, test_y)))