In [1]:
import os, sys, platform

# Variables

In [2]:
## Seed for the random variables
SEED_NUMBER=0;

## Kfold 
K=5;                    # Variable K of kfold
FOLD_STATUS_FILE='fold_status.json';

## Training hyperparameters
EPOCHS=80;
BATCH_SIZE=32;

## Model of network
#MODEL_TYPE  = 'model_sector4';
MODEL_TYPE = 'model_max'

#DATASET_NAME = 'mcfer2023';
DATASET_NAME = 'ck+48';

OUTPUT_BASE_DIR='output';

# Bibliotecas externas

In [3]:
import numpy as np
import pandas as pd
import tensorflow as tf
import datetime

  from ._conv import register_converters as _register_converters
  'nearest': pil_image.NEAREST,
  'bilinear': pil_image.BILINEAR,
  'bicubic': pil_image.BICUBIC,
  'hamming': pil_image.HAMMING,
  'box': pil_image.BOX,
  'lanczos': pil_image.LANCZOS,


In [4]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))


Num GPUs Available:  0


# Biblioteca local


In [5]:
sys.path.append('library');
sys.path.append('../src');

import tools_dataset as toolsd
import tools_model   as toolsm

# If command line

In [6]:
for n in range(len(sys.argv)):
    if sys.argv[n]=='--model':
        MODEL_TYPE=sys.argv[n+1];
    if sys.argv[n]=='--epochs':
        EPOCHS=int(sys.argv[n+1]);
    if sys.argv[n]=='--batch-size':
        BATCH_SIZE=int(sys.argv[n+1]);
    if sys.argv[n]=='--dataset':
        DATASET_NAME=sys.argv[n+1];
    if sys.argv[n]=='--output-base-dir':
        OUTPUT_BASE_DIR=sys.argv[n+1];
        
print('     MODEL_TYPE:',MODEL_TYPE)
print('         EPOCHS:',EPOCHS)
print('     BATCH_SIZE:',BATCH_SIZE)
print('   DATASET_NAME:',DATASET_NAME)
print('OUTPUT_BASE_DIR:',OUTPUT_BASE_DIR)

     MODEL_TYPE: model_max
         EPOCHS: 80
     BATCH_SIZE: 32
   DATASET_NAME: ck+48
OUTPUT_BASE_DIR: output


# Load data

In [7]:
dataset_csv_train_file, dataset_csv_test_file, dataset_train_base_dir, input_shape, nout = toolsd.load_dataset(dataset_name=DATASET_NAME);

print('dataset_csv_train_file:',dataset_csv_train_file)
print(' dataset_csv_test_file:',dataset_csv_test_file)

OUTPUT_BASE_DIR=os.path.join(OUTPUT_BASE_DIR,DATASET_NAME);


dataset_csv_train_file: /mnt/boveda/DATASETs/FACE-EMOTION/CK+48/CK+48/labels.csv
 dataset_csv_test_file: /mnt/boveda/DATASETs/FACE-EMOTION/CK+48/CK+48/labels.csv


# Set seed of random variables


In [8]:
np.random.seed(SEED_NUMBER)
tf.keras.utils.set_random_seed(int(SEED_NUMBER));

# Setting the cross-validation kfold


In [9]:
from sklearn.model_selection import KFold, StratifiedKFold

# output directory
output_dir = os.path.join(OUTPUT_BASE_DIR,'skfold'+str(K)+'_'+MODEL_TYPE);

# K-fold object
kf = StratifiedKFold(n_splits = K, shuffle = True, random_state = SEED_NUMBER);

# fold status file
fold_status_path=os.path.join(output_dir,FOLD_STATUS_FILE);

print('      output_dir:',output_dir)
print('fold_status_path:',fold_status_path)

      output_dir: output/ck+48/skfold5_model_max
fold_status_path: output/ck+48/skfold5_model_max/fold_status.json


# Loading data of dataset

In [10]:
# Load filenames and labels
train_data = pd.read_csv(dataset_csv_train_file);
print(train_data)

# Setting labels
Y   = train_data[['label']];

L=np.shape(Y)[0];

                           filename     label
0       anger/S010_004_00000017.png     anger
1     sadness/S011_002_00000020.png   sadness
2        fear/S011_003_00000012.png      fear
3    surprise/S010_002_00000012.png  surprise
4       happy/S010_006_00000013.png     happy
..                              ...       ...
745  surprise/S137_001_00000013.png  surprise
746  surprise/S137_001_00000014.png  surprise
747  surprise/S138_004_00000011.png  surprise
748  surprise/S138_004_00000012.png  surprise
749  surprise/S138_004_00000013.png  surprise

[750 rows x 2 columns]


# Data augmentation configuration

In [11]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

idg    = ImageDataGenerator(rescale=1./255,
                            rotation_range = 10,
                            width_shift_range= 0.07,
                            height_shift_range= 0.07,
                            horizontal_flip=True,
                            shear_range=1.25,
                            zoom_range = [float(0.9), float(1.1)] 
                            )

idg_val= ImageDataGenerator(rescale=1./255 )



# Auxiliar function

In [12]:
def get_model_name(k):
    return 'model_'+str(k)+'.h5'

# Creating output directory

In [13]:

try: 
    os.makedirs(output_dir) 
except: 
    pass


# Cross-validation

In [14]:
import tools_model as mpp
import matplotlib.pyplot as plt


list_train_index=[];
list_val_index=[];
for train_index, val_index in kf.split(np.zeros(L),Y):
    list_train_index.append(train_index);
    list_val_index.append(val_index);

import json
data_fold =  {'VALIDATION_ACCURACY': [],'VALIDATION_LOSS': [] };

fold_var=1;

if os.path.isfile(fold_status_path):
    # Read JSON file
    with open(fold_status_path) as data_file:
        data_fold = json.load(data_file)
        fold_var=len(data_fold['VALIDATION_ACCURACY'])+1;
    
while fold_var<=K:
    training_data   = train_data.iloc[list_train_index[fold_var-1]]
    validation_data = train_data.iloc[list_val_index[fold_var-1]]

    print('\nFold',fold_var,'of',K);
    print('length train:',len(list_train_index[fold_var-1]),'elements');
    print('length val  :',len(list_val_index[fold_var-1]),'elements');

    # CREATE NEW MODEL
    
    model, target_size = mpp.create_model(  file_of_weight='',
                                            model_type=MODEL_TYPE,
                                            input_shape=input_shape,
                                            nout=nout)
    
    model.summary()
    
    train_data_generator = idg.flow_from_dataframe(training_data, 
                                                   directory = dataset_train_base_dir,
                                                   target_size=target_size,
                                                   x_col = "filename", 
                                                   y_col = "label",
                                                   batch_size=BATCH_SIZE,
                                                   class_mode="categorical",
                                                   shuffle = True);
    
    valid_data_generator  = idg_val.flow_from_dataframe(validation_data, 
                                                    directory = dataset_train_base_dir,
                                                    target_size=target_size,
                                                    x_col = "filename", 
                                                    y_col = "label",
                                                    batch_size=BATCH_SIZE,
                                                    class_mode="categorical",
                                                    shuffle = True)
    
    #STEPS_BY_EPOCHS=len(train_data_generator);
    

    
    # COMPILE NEW MODEL
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['categorical_accuracy'])
    
    # CREATE CALLBACKS
    best_model_file=os.path.join(output_dir,get_model_name(fold_var));
    checkpoint = tf.keras.callbacks.ModelCheckpoint(filepath=best_model_file, 
                                                    save_weights_only=True,
                                                    monitor='val_categorical_accuracy', 
                                                    save_best_only=True, 
                                                    verbose=1);
    
    log_dir = os.path.join(output_dir,"logs","fit" , datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

    
    # There can be other callbacks, but just showing one because it involves the model name
    # This saves the best model
    # FIT THE MODEL
    history = model.fit(train_data_generator,
                        #steps_per_epoch=STEPS_BY_EPOCHS,
                        epochs=EPOCHS,
                        validation_data=valid_data_generator,
                        callbacks=[checkpoint,tensorboard_callback],
                        verbose=1
                       );
    print('***** Saving the history *****')

    #PLOT HISTORY
    mpp.save_model_history(history,os.path.join(output_dir,"historical_"+str(fold_var)+".csv"), labels=['categorical_accuracy','loss'],show=False);
    
    print('***** Loding the best model file ... *****')
    # LOAD BEST MODEL to evaluate the performance of the model
    model.load_weights(best_model_file);
    
    print('***** Evaluating the model ... *****')
    results = model.evaluate(valid_data_generator)
    results = dict(zip(model.metrics_names,results))
    print(results,"\n\n");
    
    data_fold['VALIDATION_ACCURACY'].append(results['categorical_accuracy'])
    data_fold['VALIDATION_LOSS'].append(results['loss'])
    
    # Data fold
    with open(fold_status_path, 'w') as f:
        json.dump(data_fold, f);

    tf.keras.backend.clear_session()
    
    fold_var += 1


Fold 1 of 5
length train: 600 elements
length val  : 150 elements
Loaded model_max
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 96, 96, 16)        5824      
                                                                 
 conv2d_1 (Conv2D)           (None, 96, 96, 4)         5188      
                                                                 
 max_pooling2d (MaxPooling2D  (None, 48, 48, 4)        0         
 )                                                               
                                                                 
 conv2d_2 (Conv2D)           (None, 48, 48, 16)        5200      
                                                                 
 conv2d_3 (Conv2D)           (None, 48, 48, 4)         3140      
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 2

NameError: name 'np' is not defined

In [None]:
fpath=os.path.join(output_dir,"final_stats.csv");
mean_val_acc=mpp.save_model_stat_kfold(data_fold, fpath);

mpp.save_model_parameters(model, os.path.join(output_dir,'parameters_stats.txt'));

print(mean_val_acc)