### Training the model

In [11]:
import IPython
from classes.vqa_model_trainer import VqaModelTrainer
from common.model_utils import get_trainable_params_distribution
from common.functions import get_highlighted_function_code
from common.settings import data_access
from common.utils import VerboseTimer
from data_access.model_folder import ModelFolder
from classes.DataGenerator import DataGenerator

In [2]:
import logging
import vqa_logger 
logger = logging.getLogger(__name__)

In [3]:
best_model_location = 'C:\\Users\\Public\\Documents\\Data\\2019\\models\\20190223_2239_45\\'
model_location = 'C:\\Users\\Public\\Documents\\Data\\2019\\models\\20190222_1346_47\\'
model_location = best_model_location
model_folder = ModelFolder(model_location)
model_folder

ModelFolder(folder=C:\Users\Public\Documents\Data\2019\models\20190223_2239_45)

### Loading the model to train:

In [4]:
epochs = 1
batch_size = 75
augmentations = 10
mt = VqaModelTrainer(model_folder, augmentations=augmentations,batch_size=batch_size, data_access=data_access)

[2019-03-15 09:17:08][common.utils][DEBUG] Loading Model: 0:00:03.601068


#### Lets take a look at the parameters:

In [5]:
get_trainable_params_distribution(mt.model)
# mt.model.summary()

Got a total of 120,220 trainable parameters


Unnamed: 0,index,layer,trainable_params,pretty_value
0,4,dense_1/kernel:0,81920,81920
1,0,model_output_sigmoid_dense/kernel:0,27328,27328
2,2,embedding_batch_normalization/gamma:0,4608,4608
3,7,embedding_batch_normalization/beta:0,4608,4608
4,3,model_output_sigmoid_dense/bias:0,1708,1708


#### And a look at data:

In [18]:
from IPython.display import display_html

meta = data_access.load_meta()
df_meta_answers = meta['answers']
df_words = meta['words']
df_data = data_access.load_processed_data()

def display_side_by_side(*args):
    html_str=''
    for df in args:
        html_str+=df.to_html()
    display_html(html_str.replace('table','table style="display:inline"'),raw=True)
    
display_side_by_side(df_meta_answers.sample(10),df_words.sample(10))



[2019-03-15 09:22:32][data_access.api][DEBUG] loading processed data from:
C:\Users\avitu\Documents\GitHub\VQA-MED\VQA-MED\VQA.Python\data\model_input.parquet
[2019-03-15 09:22:32][data_access.api][DEBUG] loading parquet from:
C:\Users\avitu\Documents\GitHub\VQA-MED\VQA-MED\VQA.Python\data\model_input.parquet
[2019-03-15 09:22:35][common.utils][DEBUG] Loading parquet: 0:00:03.031529
[2019-03-15 09:22:36][common.utils][DEBUG] Converting to pandas: 0:00:00.541541


Unnamed: 0,answer
1043,metastatic malignant melanoma
1668,citrobacter koseri osteomyelitis/discitis
1487,chordoma
826,eosinophilic esophagitis
486,hypertensive basal ganglia hemorrhage
783,cecal volvulus
491,blowout fracture orbit
1666,rickets due renal failure
1683,spontaneous pneumomediastinum
391,pediatric depressed skull fracture

Unnamed: 0,word
1174,endograft
352,paget
171,long
1696,augmentation
1478,persistent
1810,trichobezoar
1218,injection
771,osteoma
329,lucid
2079,pneumopericardium


### Overview of preperations for training:

##### The functions for getting the features & labels:

In [15]:
from common.functions import get_features, sentences_to_hot_vector, hot_vector_to_words

code_generate_data = get_highlighted_function_code(DataGenerator._generate_data, remove_comments=True)


code_get_features = get_highlighted_function_code(get_features, remove_comments=True)
code_hot_vector = get_highlighted_function_code(sentences_to_hot_vector, remove_comments=True)


print('Getting the label using a hot vector\n')
IPython.display.display(code_generate_data)
print('\n\nThe underlying method:\n')
IPython.display.display(code_hot_vector)


print('\n\nGetting the features using question embeding concatenation:\n')
IPython.display.display(code_get_features)

Getting the label using a hot vector





The underlying method:





Getting the features using question embeding concatenation:



##### Example of hot vector of answer (AKA answer...)

In [10]:
df = mt.data_train

class_df = mt.class_df
class_count = len(class_df)
# class_df.sample(5)

classes_indices_df = [class_df.loc[class_df.isin(ans.lower().split())] for ans in  df.answer]
classes_indices = [list(d.index) for d in classes_indices_df]

idx_sample = 9
print(df.answer[idx_sample])
classes_indices[idx_sample]

t2


[]

##### Will transform the sentences into vector and back using the following:

In [11]:
code = get_highlighted_function_code(hot_vector_to_words,remove_comments=False)
IPython.display.display(code)  

##### Check it looks sane by inversing the binarizing:

In [29]:
# words = mt.df_meta_words.word
prediction_vector = model_folder.prediction_vector
# arr_one_hot_vector, categorial_labels= DataGenerator._generate_data(df_data, prediction_vector=prediction_vector)
print(f'prediction data name: {model_folder.prediction_data_name}')


idx = 0
answer =  df_data.answer.loc[idx]
print(f'The sentence:\n{answer}')

one_hot_vector = categorial_labels[idx]
label_words = hot_vector_to_words(one_hot_vector, prediction_vector)
print('\n\nThe reversed answer from hot vector:')
label_words

prediction data name: answers
The sentence:
cta - ct angiography


The reversed answer from hot vector:


943    cta - ct angiography
Name: answer, dtype: object

In [13]:
history = mt.train()

[2019-02-22 12:50:43][DEBUG] Getting 2000 validation features: 0:00:16.860666
[2019-02-22 12:50:44][DEBUG] Getting 2000 validation labels: 0:00:00.209300
[2019-02-22 12:50:44][DEBUG] loading processed data from:
C:\Users\avitu\Documents\GitHub\VQA-MED\VQA-MED\VQA.Python\data\model_input.parquet
[2019-02-22 12:50:44][DEBUG] loading parquet from:
C:\Users\avitu\Documents\GitHub\VQA-MED\VQA-MED\VQA.Python\data\model_input.parquet
[2019-02-22 12:50:46][DEBUG] Loading parquet: 0:00:02.802816
[2019-02-22 12:50:46][DEBUG] Converting to pandas: 0:00:00.016047
[2019-02-22 12:50:46][DEBUG] Loading augmentations:
C:\Users\avitu\Documents\GitHub\VQA-MED\VQA-MED\VQA.Python\data\augmentations.parquet
[2019-02-22 12:50:46][DEBUG] loading parquet from:
C:\Users\avitu\Documents\GitHub\VQA-MED\VQA-MED\VQA.Python\data\augmentations.parquet
[2019-02-22 12:50:47][DEBUG] Loading parquet: 0:00:00.021333
[2019-02-22 12:50:47][DEBUG] Converting to pandas: 0:00:00.008826


ValueError: Buffer dtype mismatch, expected 'Python object' but got 'long long'

Exception ignored in: 'pandas._libs.lib.is_bool_array'
ValueError: Buffer dtype mismatch, expected 'Python object' but got 'long long'


[2019-02-22 12:50:48][DEBUG] Expected shape: [(None, 4608, 1), (None, None, None, 3)]
[2019-02-22 12:50:48][DEBUG] ---------------------------------------------------------------------------
[2019-02-22 12:50:48][DEBUG] Actual training shape:((75, 4608, 1), (75, 224, 224, 3))
[2019-02-22 12:50:48][DEBUG] Actual Validation shape:((2000, 4608, 1), (2000, 224, 224, 3))
[2019-02-22 12:50:48][DEBUG] ---------------------------------------------------------------------------
[2019-02-22 12:50:48][DEBUG] Train Labels shape:(75, 2119)
[2019-02-22 12:50:48][DEBUG] Validation Labels shape:(2000, 2119)
Epoch 1/1




[2019-02-22 13:18:13][DEBUG] Training Model: 0:27:29.132835


### Save trained model:

In [14]:
with VerboseTimer("Saving trained Model"):
    model_folder = mt.save(mt.model, mt.model_folder, history)


[2019-02-22 13:19:24][DEBUG] model saved
[2019-02-22 13:19:24][DEBUG] saving prediction vector
[2019-02-22 13:19:24][DEBUG] saved prediction vector
[2019-02-22 13:19:24][DEBUG] Writing Summary
[2019-02-22 13:19:24][DEBUG] Done Writing Summary
[2019-02-22 13:19:24][DEBUG] Saving image
[2019-02-22 13:19:28][DEBUG] Image saved ('C:\Users\Public\Documents\Data\2019\models\20190222_1319_20\model.png')
[2019-02-22 13:19:28][DEBUG] Saving History
[2019-02-22 13:19:28][DEBUG] History saved to 'C:\Users\Public\Documents\Data\2019\models\20190222_1319_20\model_history.pkl'
[2019-02-22 13:19:28][DEBUG] Saving trained Model: 0:00:08.050156
[2019-02-22 13:19:28][INFO] Summary: C:\Users\Public\Documents\Data\2019\models\20190222_1319_20\model_summary.txt
Image: C:\Users\Public\Documents\Data\2019\models\20190222_1319_20\model.png
History: C:\Users\Public\Documents\Data\2019\models\20190222_1319_20\model_history.pkl

[2019-02-22 13:19:28][INFO] model_location = 'C:\Users\Public\Documents\Data\2019\mo

In [15]:
print (model_folder.model_path)

C:\Users\Public\Documents\Data\2019\models\20190222_1319_20\vqa_model.h5
