In [1]:
import numpy as np
import tensorflow as tf
import pandas
import pickle
import os

In [2]:
# Our code
import lrp
import EmbedHelper
import DataLoader
import Models



{'Dermatology': 'Deri ve Zührevi Hastalıkları (Cildiye)', 'Internal Medicine': 'İç Hastalıkları (Dahiliye)', 'Neurology': 'Nöroloji', 'Obstetrics & Gynecology': 'Kadın Hastalıkları ve Doğum', 'Ophthalmology': 'Göz Hastalıkları', 'Orthopaedic Surgery': 'Ortopedi ve Travmatoloji', 'Otolaryngology': 'Kulak Burun Boğaz Hastalıkları', 'Pediatrics': 'Çocuk Sağlığı ve Hastalıkları', 'Psychiatry': 'Ruh Sağlığı ve Hastalıkları', 'Radiology-Diagnostic': 'Radyoloji', 'Surgery-General': 'Genel Cerrahi', 'Urology': 'Üroloji'}


In [3]:
embedDict = EmbedHelper.EmbeddingHandler.embedDict
print(embedDict)
configs = {
    "vectorSize":300,
    "trainNewModel":True,
    "dataColumn":"question",
    "maxLength":128,
    "batchSize":8,
    "embeddingType":embedDict[2],
    "ELMo":True,
    "PreEmbed":True,
    "restore":True
}

inputSize = configs["maxLength"]
vectorSize = configs["vectorSize"]

{1: 'Fast Text', 2: 'Google News', 3: 'HealthTap', 4: 'Pubmed', 5: 'Glove', 6: 'iCliniq Trigram', 7: 'iCliniq default'}


In [4]:
EmbedModel = EmbedHelper.EmbeddingHandler(configs["embeddingType"], False, 300, "Embeddings")

Loading Google News


In [5]:
# new data that is larger with 9800~ data instances
trainData = np.load("data//icliniq//iCliniq_14K//icliniq_14k_train_questions.npy")
trainTarget = np.load("data//icliniq//iCliniq_14K//icliniq_14k_train_target.npy")
testData = np.load("data//icliniq//iCliniq_14K//icliniq_14k_test_questions.npy")
testTarget = np.load("data//icliniq//iCliniq_14K//icliniq_14k_test_target.npy")

trainData_raw = np.load("data//icliniq//iCliniq_14K//icliniq_14k_train_questions_raw.npy")
testData_raw = np.load("data//icliniq//iCliniq_14K//icliniq_14k_test_questions_raw.npy")

ClassDict = {}
with open('fold0classDict.pkl', 'rb') as f:
    ClassDict = pickle.load(f)
outputSize = len(ClassDict)

# Train-Test

In [8]:
def getTokenLengths(token):
    return [len(item) for item in token]

In [9]:
def evaluatePerformance(nnModel,sess,testData,testTarget,batchSize,uncertaintyCoef):
    reverseClassDict = {value:key for key,value in ClassDict.items()}
    top3 = []
    
    dataSize = testData.shape[0]
    start = 0
    end = batchSize
    
    totalAcc = 0
    totalUcAcc = 0
    totalDataRate = 0
    
    truth = None
    predu = None
    
    testTruth = np.array([])
    testPred = np.array([])
    
    testEvTrue = 0
    testEvFail = 0
    
    while(start<dataSize):
        data = np.array(testData[start:end])
        dataClean = data
        
        if(configs["PreEmbed"]):
            data = EmbedModel.vectorizeBatch(data)
        
        outputData = np.array(testTarget[start:end])
        cutSize = data.shape[0]
        tokens_length = getTokenLengths(data)
        
        fd = {nnModel.nn_inputs:dataClean,nnModel.nn_vector_inputs:data,nnModel.nn_outputs:outputData,nnModel.isTraining:False,nnModel.token_lengths:tokens_length,
             nnModel.uncertaintyRatio:uncertaintyCoef}
        
        prob, testBAcc,nnTruth,nnPrediction,nnMatch,evCor,evFail,ucAcc,dataRate = sess.run([nnModel.prob, nnModel.accuracy,nnModel.truths,nnModel.predictions
                                                                       ,nnModel.correct_predictions,nnModel.mean_ev_succ,nnModel.mean_ev_fail,nnModel.ucAccuracy,
                                                                                     nnModel.dataRatio]
                                                                      ,feed_dict=fd)
        # For top 3
        prob = prob[0]
        probDict = {reverseClassDict[i]:prob[i] for i in np.arange(outputSize)}
        probMatrix = []
        for i in range(len(prob)):
            probMatrix.append([reverseClassDict[i], prob[i]])
        probMatrix = sorted(probMatrix, key=lambda x: (x[1]), reverse=True)
        top3.append(probMatrix[0:3])
        
        testTruth = np.append(testTruth,nnTruth,axis=0)
        testPred = np.append(testPred,nnPrediction,axis=0)
        testEvTrue += evCor*cutSize
        testEvFail += evFail*cutSize 
        
        totalAcc += testBAcc*cutSize
        totalUcAcc += ucAcc*cutSize
        totalDataRate += dataRate*cutSize
        start += batchSize
        end += batchSize
        
    outputs = {
        "Accuracy":totalAcc/dataSize,
        "TotalEvidenceTrue":testEvTrue/dataSize,
        "TotalEvidenceFalse":testEvFail/dataSize,
        "UncertaintyAccuracy":totalUcAcc/dataSize,
        "DataRate":totalDataRate/dataSize,
        "Truth":testTruth,
        "Prediction":testPred,
        "Top3":top3
    }
        
    return outputs
    #return (totalAcc/dataSize,testTruth,testPred,testEvTrue/dataSize,testEvFail/dataSize,totalUcAcc/dataSize,totalDataRate/dataSize)

In [10]:
def trainModel(nnModel, iterations, trainData, trainTarget, testData, testTarget, configs, accList):
    batcher = DataLoader.DataHandler.batchIterator(trainData, trainTarget, configs["batchSize"])
    sample,_ = next(batcher)
    
    print("trainData shape : ", trainData.shape)
    print("testData shape : ", testData.shape)
    print("trainTarget shape : ", trainTarget.shape)
    print("testTarget shape : ", testTarget.shape)
    
    htTestAcc=0
    fold0TestAcc = 0
    ucAcc = 0
    dataRate = 0
    
    L_test_ev_s=[]
    L_test_ev_f=[]
    
    print("")
    for i in range(iterations):
        data, target = next(batcher)
        dataClean = data

        if(configs["PreEmbed"]):
            data = EmbedModel.vectorizeBatch(data)

        tokens_length = getTokenLengths(data)
        fd = {nnModel.nn_inputs:dataClean, nnModel.nn_vector_inputs:data,nnModel.nn_outputs:target,
              nnModel.isTraining:True,nnModel.token_lengths:tokens_length,nnModel.annealing_step:0.00005*i}
        _, acc, los = sess.run([nnModel.train_op,nnModel.accuracy,nnModel.loss],feed_dict=fd)

        if(i%20==0):
            title = ("[Current iteration = "+str(i)+" Train Acc:"+str(acc)+" HT Test Acc:"+str(htTestAcc)+" fold0Test: ("+str(fold0TestAcc)+') ucAcc :'+str(ucAcc)
                +" dataRatio  :"+str(dataRate)+' ]')
            title = str(title)       
            print(title, end="\r")

        if(i%50000==0 and i != 0):
            oldTestAcc = fold0TestAcc               
            testOutputs = evaluatePerformance(nnModel, sess, testData, testTarget, configs["batchSize"], 0.1)  
            
            fold0TestAcc = testOutputs["Accuracy"]
            fEvTrue = testOutputs["TotalEvidenceTrue"]
            fEvFail = testOutputs["TotalEvidenceFalse"]
            ucAcc = testOutputs["UncertaintyAccuracy"]
            dataRate = testOutputs["DataRate"]
            fTruth = testOutputs["Truth"]
            fPrediction = testOutputs["Prediction"]
            
            confidences = [0.995,0.98,0.90,0.70,0.5]
            confidenceMatrix = np.zeros(shape=[len(confidences),3])
            for idx in range(len(confidences)):
                testOutputs = evaluatePerformance(nnModel, sess, testData, testTarget, configs["batchSize"],1-confidences[idx])
                confidenceMatrix[idx,0] = confidences[idx]
                confidenceMatrix[idx,1] = testOutputs["DataRate"]
                confidenceMatrix[idx,2] = testOutputs["UncertaintyAccuracy"]
            
            L_test_ev_s.append(fEvTrue)
            L_test_ev_f.append(fEvFail)
            
            if(fold0TestAcc>oldTestAcc):
                pass
                #saveSession(sess)

            accList.append([i, acc, htTestAcc, fold0TestAcc, los, ucAcc])
            npAccList = np.array(accList)           

In [11]:
should_load = True
model_path = "NNModels/icliniq14k_GoogleNews_onelayer_pad128/model.ckpt"

configs["maxLength"] = 128 
inputSize = configs["maxLength"]
configs["batchSize"] = 8
# ORIGINAL PART
nnModel = Models.PyramidCNNVShort(inputSize=inputSize, vectorSize=vectorSize, outputSize=outputSize)

# MY PART
# nnModel = Models.myModel_CNN_TEXT(inputSize=inputSize, vectorSize=vectorSize, outputSize=outputSize)

sess = tf.InteractiveSession(graph=nnModel.paperGraph)
tf.global_variables_initializer().run()
sess.run(tf.tables_initializer())

if should_load:
    tf.train.Saver().restore(sess, model_path)

fullvectorsize:  300
(?, 126, 1, 250)
Instructions for updating:
keep_dims is deprecated, use keepdims instead
INFO:tensorflow:Restoring parameters from NNModels/icliniq14k_GoogleNews_onelayer_pad128/model.ckpt


In [12]:
# Evaluate
confidence = 0.9
results = evaluatePerformance(nnModel, sess, testData, testTarget, 1, 1-confidence)
results["Accuracy"]

0.7432911392405064

# Understandin NN - LRP

We get layers from output to input so that we can backpropagate.

Then we calculate word importances for each word in input.

In the current model there is only one conv-pool layer so the layer_count is 1. But in the medspecsearch models have 3 layers, so this model is different. We will use this model for LRP purposes.

( Maybe remove stop words? )

In [13]:
layer_count = 1

In [14]:
# Get weights, biases and activations to use in lrp method
weights = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='.*kernel.*')
biases = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='.*bias.*')

activations = []
if layer_count == 1:
    activations = [nnModel.cnnInput, nnModel.conv1, nnModel.blockPool, nnModel.h_pool_flat, nnModel.fc1, nnModel.scores]
    
elif layer_count == 3:   
    activations = [nnModel.cnnInput, nnModel.conv1, nnModel.blockPool, nnModel.conv2, nnModel.blockPool2, nnModel.conv3,
             nnModel.blockPool3, nnModel.h_pool_flat, nnModel.fc1, nnModel.scores]

weights.reverse()
biases.reverse()
activations.reverse()

In [15]:
# We have three parallel conv-pool couple.
# We need to split this ben backpropogating
# I was experiencing lots of bugs so I splitted it like this
# Need a better way for this
if layer_count == 3:
    biases_0 = np.array(biases)[[0,1,4]]
    weights_0 = np.array(weights)[[0,1,4]]
    activations_0 = np.array(activations)[[0,1,2,7,8,9]]

    biases_1 = np.array(biases)[[0,1,3]]
    weights_1 = np.array(weights)[[0,1,3]]
    activations_1 = np.array(activations)[[0,1,2,5,6,9]]

    biases_2 = np.array(biases)[[0,1,2]]
    weights_2 = np.array(weights)[[0,1,2]]
    activations_2 = np.array(activations)[[0,1,2,3,4,9]]

    biases_splitted = [biases_0, biases_1, biases_2]
    weights_splitted = [weights_0, weights_1, weights_2]
    activations_splitted = [activations_0, activations_1, activations_2]
    pool_biases = [[1,126,1,1], [1,125,1,1], [1,124,1,1]]

In [16]:
# To test
batch_x = trainData[0:21]
batch_y = trainTarget[0:21]
batch_x = EmbedModel.vectorizeBatch(batch_x)
batch_y = sess.run(tf.one_hot(batch_y,outputSize)) 

In [17]:
alpha = 1
backprop_layers = lrp.lrp_layers(alpha, layer_count, activations, weights, biases)

In [18]:
word_importances, results_combined = lrp.get_word_relevances(alpha, backprop_layers, layer_count, batch_x[0:1], trainData[0], sess, nnModel, activations, weights, biases)

In [19]:
word_importances

[('hello', -0.00826842975688232),
 ('doctor', -0.10804215329674047),
 ('i', 0.020342812725022037),
 ('have', -0.30172094642857256),
 ('burning', 0.44691338759829063),
 ('sensation', -0.005695157685182687),
 ('while', -0.006010421393746959),
 ('urinating', -0.016021350662397415),
 ('and', 0.0),
 ('a', 0.0),
 ('frequent', 0.16106620664576704),
 ('urge', -0.012891234366653742),
 ('to', 0.0),
 ('urinate', -0.702877483046213),
 ('can', 0.04179421897966783),
 ('it', 0.09591377357002948),
 ('be', 0.24346296059100556),
 ('due', -0.12945817197134266),
 ('to', 0.0),
 ('sexual', -0.08989811715872369),
 ('contact', -0.27382462013709),
 ('i', 0.020263645015194495),
 ('am', -0.007761488160500732),
 ('a', 0.0),
 ('year', 0.0011631109154077452),
 ('old', -0.0023620034744476037),
 ('male', -0.07802475127381596),
 ('[None]', 0.0),
 ('[None]', 0.0),
 ('[None]', 0.0),
 ('[None]', 0.0),
 ('[None]', 0.0),
 ('[None]', 0.0),
 ('[None]', 0.0),
 ('[None]', 0.0),
 ('[None]', 0.0),
 ('[None]', 0.0),
 ('[None]', 0

# TF-IDF

In [20]:
def get_word_imps_all_classes(path, default=True):
    dir_ = "default//"
    if not default:
        dir_ = "stemmed//"
        
    files = os.listdir(path + dir_ + "//")
    word_imps_all_classes = []
    for file in files:
        f = open(path + dir_ + "//" + file)
        tmp = []
        for line in f:
            tmp.append(line[0:-1].split(' '))
        tmp = tmp[1:] # remove title
        word_imps_all_classes.append(tmp)
    
    return word_imps_all_classes

# Asking keywords to user

In [22]:
def ask_user(relevant_words, index, raw_user_input):
    print("Is '" + relevant_words[index][0] + "' keyword related with your situation?")
    
    is_relevant(relevant_words, index, raw_user_input)

In [23]:
def is_relevant(relevant_words, index, raw_user_input):
    answer = input()
    if answer == "exit": return
    if answer == "True":
        print("Please provide more explanation about '" + relevant_words[index][0] + "'")
        answer = input()
        if answer == "exit": return
        new_input = str(raw_user_input) + " " + str(answer)
        new_input = process_user_input([new_input])
        
        new_results = evaluatePerformance(nnModel, sess, new_input, [0], 1, 1-confidence)
        highest_confidence = new_results["Top3"][0][0]
        print("Results: ", new_results["Top3"][0])
        if highest_confidence[1] < desired_confidence:
            ask_user(relevant_words, index + 1, new_input)
        
    else:
        ask_user(relevant_words, index + 1, raw_user_input) 

In [24]:
def process_user_input(user_input):
    user_input = DataLoader.DataHandler.cleanTextData(user_input)
    user_input = np.array(DataLoader.DataHandler.textIntoWordList(user_input, 128)[0])
    
    return user_input

def get_relevant_words(confidence_top3, amount, tfidf_words):
    relevant_words = []
    for i in range(len(confidence_top3)):
        category = confidence_top3[i][0]
        
        for words in tfidf_words[ClassDict[category]][0:amount]:      
            relevant_words.append(words)
    
    return relevant_words

# Example

Lets say confidence is low and we want to ask more about user's situation according to TF-IDF keywords
raw_user_input is the original input of user

user enters input until confidence reaches a high enough value
(maybe there should be option for exiting early)

In this example, confidence is 0.55 for most confident category, we want it to be higher than desired confidence value, so we keep asking about relevant words

In [25]:
tfidf_words = get_word_imps_all_classes("data//icliniq//iCliniq_14K//tfidf_results//")

In [26]:
# options:
desired_confidence = 0.9

raw_user_input = "my hair is transparent"
user_input = process_user_input([raw_user_input])

results = evaluatePerformance(nnModel, sess, user_input, [0], 1, 1-confidence)
print(results["Accuracy"])
print(results["Top3"])

confidence_top3 = results["Top3"][0]

relevant_words = get_relevant_words(confidence_top3, 5, tfidf_words)
relevant_words

1.0
[[['Dermatology', 0.55359864], ['Otolaryngology', 0.046154037], ['Ophthalmology', 0.043179028]]]


[['acne', '0.335'],
 ['hair', '0.299'],
 ['cream', '0.268'],
 ['penis', '0.239'],
 ['itching', '0.226'],
 ['ear', '0.738'],
 ['tonsil', '0.203'],
 ['ent', '0.181'],
 ['hearing', '0.163'],
 ['tinnitus', '0.145'],
 ['eye', '0.842'],
 ['eyes', '0.298'],
 ['vision', '0.265'],
 ['lasik', '0.095'],
 ['drops', '0.084']]

In [233]:
ask_user(relevant_words, 0, raw_user_input)

Is 'ecg' keyword related with your situation?
False
Is 'dl' keyword related with your situation?
no
Is 'tsh' keyword related with your situation?
no
Is 'count' keyword related with your situation?
no
Is 'cholesterol' keyword related with your situation?
no
Is 'acne' keyword related with your situation?
True
Please provide more explanation about 'acne'
i have acne
Results:  [['Dermatology', 0.99923104], ['Otolaryngology', 7.039149e-05], ['Orthopaedic Surgery', 7.037149e-05]]
