In [None]:
import os
os.environ['TF_GPU_ALLOCATOR'] = 'cuda_malloc_async'

In [1]:
input_default_json_conf_file='cnn_emotion4_training_default.json';

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
import datetime
import json

In [3]:
import sys
sys.path.append('../library');

In [4]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  0


In [5]:
## Load json conf json file
fd = open(os.path.join('./',input_default_json_conf_file));
DATA = json.load(fd);
fd.close()

# Variable globales

In [6]:
## Seed for the random variables
seed_number=0;

## Dataset 
dataset_base_dir    = DATA['dataset_train_base_dir'];
dataset_labels_file = DATA['dataset_train_labels_file'];

dataset_base_test_dir    = DATA['dataset_test_base_dir'];
dataset_labels_test_file = DATA['dataset_test_labels_file'];

dataset_name        = DATA['dataset_name'];

## Training hyperparameters
EPOCAS     = DATA["epochs"];
BATCH_SIZE = DATA["batch_size"];

## Model of network
## 'mobilenet_v3', 'efficientnet_b3', 'inception_v3', 'inception_resnet_v2', 'resnet_v2_50'
model_type = DATA["model_type"];

## Output
output_base_dir = DATA["output_base_dir"];

## fine tuning
fine_tuning=DATA["fine_tuning"];

# Parametros de entrada

In [7]:
for n in range(len(sys.argv)):
    if sys.argv[n]=='--dataset-train-dir':
        dataset_base_dir=sys.argv[n+1];
    elif sys.argv[n]=='--dataset-train-file':
        dataset_labels_file=sys.argv[n+1];
    elif sys.argv[n]=='--dataset-test-dir':
        dataset_base_test_dir=sys.argv[n+1];
    elif sys.argv[n]=='--dataset-test-file':
        dataset_labels_test_file=sys.argv[n+1];
    elif sys.argv[n]=='--dataset-name':
        dataset_name=sys.argv[n+1];
    elif sys.argv[n]=='--model':
        model_type=sys.argv[n+1];
    elif sys.argv[n]=='--epochs':
        EPOCAS=int(sys.argv[n+1]);
    elif sys.argv[n]=='--batch-size':
        BATCH_SIZE=int(sys.argv[n+1]);
    elif sys.argv[n]=='--fine-tuning':
        fine_tuning=sys.argv[n+1].lower()=='true';
    elif sys.argv[n]=='--output-dir':
        output_base_dir=sys.argv[n+1];
        
print('        dataset_base_dir:',dataset_base_dir)
print('     dataset_labels_file:',dataset_labels_file)
print('   dataset_base_test_dir:',dataset_base_test_dir)
print('dataset_labels_test_file:',dataset_labels_test_file)
print('            dataset_name:',dataset_name)
print('              model_type:',model_type)
print('                  EPOCAS:',EPOCAS)
print('              BATCH_SIZE:',BATCH_SIZE)
print('             fine_tuning:',fine_tuning)
print('         output_base_dir:',output_base_dir)

        dataset_base_dir: /media/fernando/Expansion/DATASET/TESE/PATIENT-RECOGNITION/PATIENT-IMAGES/perwi/dataset/train/
     dataset_labels_file: labels-emotion4-v1.csv
   dataset_base_test_dir: /media/fernando/Expansion/DATASET/TESE/PATIENT-RECOGNITION/PATIENT-IMAGES/perwi/dataset/test/
dataset_labels_test_file: labels-emotion4-v1.csv
            dataset_name: perwi
              model_type: mobilenet_v3
                  EPOCAS: 50
              BATCH_SIZE: 32
         output_base_dir: /media/fernando/Expansion/OUTPUTS/DOCTORADO2/cnn_emotion4


# Set seed of random variables


In [8]:
np.random.seed(seed_number)
tf.keras.utils.set_random_seed(seed_number);

# Loading data of dataset

In [9]:
# Load filenames and labels
train_val_data = pd.read_csv(os.path.join(dataset_base_dir,dataset_labels_file));
print(train_val_data)

# Setting labels
Y = train_val_data[['label']];
L=np.shape(Y)[0];

# Load test filenames and labels
test_data = pd.read_csv(os.path.join(dataset_base_test_dir,dataset_labels_test_file));
print(test_data)


                   filename     label
0     patient/filename1.png  Negative
1     patient/filename2.png  Negative
2     patient/filename3.png      Pain
3     patient/filename4.png  Negative
4     patient/filename5.png      Pain
..                      ...       ...
626  people/filename349.png    Neutro
627  people/filename350.png    Neutro
628  people/filename351.png    Neutro
629  people/filename352.png    Neutro
630  people/filename354.png  Positive

[631 rows x 2 columns]
                    filename     label
0    patient/filename302.png  Positive
1    patient/filename303.png  Positive
2    patient/filename304.png  Positive
3    patient/filename305.png  Positive
4    patient/filename306.png  Positive
..                       ...       ...
268   people/filename522.png  Positive
269   people/filename525.png    Neutro
270   people/filename526.png  Positive
271   people/filename527.png    Neutro
272   people/filename528.png    Neutro

[273 rows x 2 columns]


# Setting the cross-validation split


In [10]:
from sklearn.model_selection import train_test_split

training_data, validation_data = train_test_split(train_val_data, test_size=0.2,shuffle=True, stratify=Y)

# Data augmentation configuration

In [11]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

idg    = ImageDataGenerator(rescale=1./255,
                            rotation_range = 10,
                            width_shift_range= 0.07,
                            height_shift_range= 0.07,
                            horizontal_flip=True,
                            shear_range=1.25,
                            zoom_range = [0.75, 1.25] 
                            )

idg_val= ImageDataGenerator(rescale=1./255 )

idg_test= ImageDataGenerator(rescale=1./255 )


# Creating output directory

In [12]:
if fine_tuning:
    output_dir = os.path.join(output_base_dir,dataset_name,'training_validation_holdout_fine_tuning',model_type);
else:
    output_dir = os.path.join(output_base_dir,dataset_name,'training_validation_holdout',model_type);

os.makedirs(output_base_dir,exist_ok = True);

os.makedirs(output_dir,exist_ok = True);

# Create new model

In [13]:
import BodyEmotion4Lib.lib_model as mpp

model, target_size = mpp.create_model(model_type=model_type,load_weights=False);
model.summary()

mpp.save_model_parameters(model, os.path.join(output_dir,'parameters_stats.m'));


Loading architecture mobilenet_v3

        url: https://tfhub.dev/google/imagenet/mobilenet_v3_small_100_224/feature_vector/5
target_size: (224, 224)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 keras_layer (KerasLayer)    (None, 1024)              1529968   
                                                                 
 dense (Dense)               (None, 4)                 4100      
                                                                 
Total params: 1,534,068
Trainable params: 4,100
Non-trainable params: 1,529,968
_________________________________________________________________


# Defining directories

In [14]:

train_data_generator = idg.flow_from_dataframe(training_data, 
                                               directory = dataset_base_dir,
                                               target_size=target_size,
                                               x_col = "filename", 
                                               y_col = "label",
                                               batch_size=BATCH_SIZE,
                                               class_mode="categorical",
                                               shuffle = True);

valid_data_generator  = idg_val.flow_from_dataframe(validation_data, 
                                                    directory = dataset_base_dir,
                                                    target_size=target_size,
                                                    x_col = "filename", 
                                                    y_col = "label",
                                                    batch_size=BATCH_SIZE,
                                                    class_mode="categorical",
                                                    shuffle = True);



test_data_generator  = idg_test.flow_from_dataframe(test_data, 
                                                    directory = dataset_base_test_dir,
                                                    target_size=target_size,
                                                    x_col = "filename", 
                                                    y_col = "label",
                                                    batch_size=BATCH_SIZE,
                                                    class_mode="categorical",
                                                    shuffle = True);

Found 504 validated image filenames belonging to 4 classes.
Found 127 validated image filenames belonging to 4 classes.
Found 273 validated image filenames belonging to 4 classes.


# Train and validation


In [15]:
import matplotlib.pyplot as plt


STEPS_BY_EPOCHS=len(train_data_generator);

# COMPILE NEW MODEL
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['categorical_accuracy'])

# CREATE CALLBACKS
best_model_file=os.path.join(output_dir,'model.h5');
checkpoint = tf.keras.callbacks.ModelCheckpoint(filepath=best_model_file, 
                                                save_weights_only=True,
                                                monitor='val_loss', 
                                                save_best_only=True, 
                                                verbose=1);

log_dir = os.path.join(output_dir,"logs","fit",'coarse_tunning-'+datetime.datetime.now().strftime("%Y%m%d-%H%M%S"));
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

# There can be other callbacks, but just showing one because it involves the model name
# This saves the best model
# FIT THE MODEL
history = model.fit(train_data_generator,
                    steps_per_epoch=STEPS_BY_EPOCHS,
                    epochs=EPOCAS,
                    validation_data=valid_data_generator,
                    callbacks=[checkpoint,tensorboard_callback],
                    verbose=1
                   );


mpp.save_model_history(history,
                       os.path.join(output_dir,"historical.csv"),
                       show=False,
                       labels=['categorical_accuracy','loss']);

if fine_tuning:
    tf.keras.backend.clear_session();
    #import torch
    #torch.cuda.empty_cache();
    del model
    del history
    
    model, target_size = mpp.create_model(model_type=model_type,load_weights=False,file_of_weight=best_model_file);
    #model.load_weights(best_model_file);

    for layer in model.layers:
        layer.trainable = True;

    #necessary for these changes to take effect
    model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['categorical_accuracy'])
    
    model.summary();
    
    log_dir = os.path.join(output_dir,"logs","fit",'fine_tunning-'+datetime.datetime.now().strftime("%Y%m%d-%H%M%S"));
    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

    history = model.fit(train_data_generator,
                        steps_per_epoch=STEPS_BY_EPOCHS,
                        epochs=EPOCAS,
                        validation_data=valid_data_generator,
                        callbacks=[checkpoint,tensorboard_callback],
                        verbose=1
                    );


    mpp.save_model_history(history,
                        os.path.join(output_dir,"historical-fine_tuning.csv"),
                        show=False,
                        labels=['categorical_accuracy','loss']);

Epoch 1/50
Epoch 1: val_loss improved from inf to 1.16100, saving model to /media/fernando/Expansion/OUTPUTS/DOCTORADO2/cnn_emotion4/perwi/training_validation_holdout/mobilenet_v3/model.h5
Epoch 2/50
Epoch 2: val_loss improved from 1.16100 to 1.06395, saving model to /media/fernando/Expansion/OUTPUTS/DOCTORADO2/cnn_emotion4/perwi/training_validation_holdout/mobilenet_v3/model.h5
Epoch 3/50
Epoch 3: val_loss improved from 1.06395 to 1.01001, saving model to /media/fernando/Expansion/OUTPUTS/DOCTORADO2/cnn_emotion4/perwi/training_validation_holdout/mobilenet_v3/model.h5
Epoch 4/50
Epoch 4: val_loss improved from 1.01001 to 0.97112, saving model to /media/fernando/Expansion/OUTPUTS/DOCTORADO2/cnn_emotion4/perwi/training_validation_holdout/mobilenet_v3/model.h5
Epoch 5/50
Epoch 5: val_loss improved from 0.97112 to 0.94727, saving model to /media/fernando/Expansion/OUTPUTS/DOCTORADO2/cnn_emotion4/perwi/training_validation_holdout/mobilenet_v3/model.h5
Epoch 6/50
Epoch 6: val_loss improved f

# Evaluate best model

In [16]:
# LOAD BEST MODEL to evaluate the performance of the model
model.load_weights(best_model_file);
data_results=dict();

# Evaluate training
results = model.evaluate(train_data_generator)
results = dict(zip(model.metrics_names,results))
print('training',results,"\n\n");
for key,value in results.items():
    data_results['train_'+key]=value;

# Evaluate validation
results = model.evaluate(valid_data_generator)
results = dict(zip(model.metrics_names,results))
print('validation',results,"\n\n");
for key,value in results.items():
    data_results['val_'+key]=value;

# Evaluate testing
results = model.evaluate(test_data_generator)
results = dict(zip(model.metrics_names,results))
print('testing',results,"\n\n");
for key,value in results.items():
    data_results['test_'+key]=value;

data_results['number_of_parameters']=mpp.get_model_parameters(model);

# final all json
with open(os.path.join(output_dir,"training_data_results.json"), 'w') as f:
    json.dump(data_results, f,indent=4);

# final test txt
with open(os.path.join(output_dir,"results_testing.txt"), 'w') as f: 
    for key, value in results.items(): 
        f.write('%s=%s;\n' % (key, value));

tf.keras.backend.clear_session()


training {'loss': 0.4786042273044586, 'categorical_accuracy': 0.8373016119003296} 


validation {'loss': 0.8832107782363892, 'categorical_accuracy': 0.6535432934761047} 


testing {'loss': 1.062915563583374, 'categorical_accuracy': 0.5567765831947327} 




In [17]:

#POSTNAME=str(int(results['accuracy']*100000));
#tmp_name='modelo_'+model_type+'_acc'+POSTNAME+'.h5';

tmp_name='model_'+model_type+'.h5';

os.rename(best_model_file,os.path.join(output_dir,tmp_name));