## Càrrega de llibreries

In [None]:
#Llibreries requerides
import os
import time
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array, array_to_img, ImageDataGenerator
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input as resnet50_in_prep
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Input, Dropout, Flatten, GlobalAveragePooling2D
from tensorflow.keras.utils import to_categorical
from keras.utils.vis_utils import plot_model
from tensorflow.keras.callbacks  import EarlyStopping, ModelCheckpoint, Callback
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

In [None]:
#Llibreria adicional pel disposar de models basats en arquitectures Squeeze-and-excitation
!pip install git+https://github.com/qubvel/classification_models.git

from classification_models.keras import Classifiers

## Definició de constants

In [None]:
#Tamany de la imatge a llegir
IM_WIDTH = 224
IM_HEIGHT = 224
SIZE = (IM_WIDTH,IM_HEIGHT)
#Etiquetes
NUM_CLASSES = 3 # 0-negative, 1-neutral, 2-positive
CLASSES_NAMES = ['Negatiu','Neutre','Positiu']

In [None]:
#Directoris de treball
TWITTER_DS_DIR = '/kaggle/input/twittertestdataset/'
BT4SA_DS_DIR = '/kaggle/input/iteracio3/iteracio3/'
WORK_DIR = '/kaggle/working/'

## Definició de funcions

### Generació dels models

In [None]:
#Funció build_ResNet50 que retorna el model base basat en ResNet50
def build_ResNet50(weights=None):
    resnet_pretrained_model = ResNet50(include_top = False, #les capes fully-connected les afegirem segons la nostra necessitat
                                       weights = weights, #Model ResNet50 pre-entrenat amb ImageNet o None
                                       pooling = 'avg',
                                       input_tensor=Input(shape=(IM_WIDTH, IM_HEIGHT,3)))
    #Evitem entrenar les capes del model resnet50 pre-entrenat
    for layer in resnet_pretrained_model.layers:
        layer.trainable = False
    return resnet_pretrained_model

def build_SEResNet50(weights=None):
    
    SEResNet50, seresnet50_in_prep = Classifiers.get('seresnet50')
    seresnet_model = SEResNet50(input_shape=(IM_WIDTH,IM_HEIGHT,3), weights=weights, include_top=False)
    
    for layer in seresnet_model.layers:
        layer.trainable = False
   
    return seresnet_model,seresnet50_in_prep

def create_model(pretrained_model, weights, add_pooling=False):
    #creació del model final basat en un pre-entrenat
    modelx = pretrained_model.output
    if add_pooling == True:
        modelx = GlobalAveragePooling2D()(modelx)
    modelx = Dense(1024, activation='relu')(modelx)
    modelx = Dropout(0.4)(modelx)
    modelx = Dense(512, activation='relu')(modelx)
    modelx = Dropout(0.4)(modelx)
    output = Dense(NUM_CLASSES, activation='softmax')(modelx)
    return_model = Model(inputs=pretrained_model.inputs, outputs=output)
    #Càrrega dels pesos passats com a paràmetre
    if weights != None:
        return_model.load_weights(weights)
    
    return return_model


### Càrrega de dades

In [None]:
def create_datagens(model_preprocessor, batch_size):

    train_datagen = ImageDataGenerator(preprocessing_function=model_preprocessor)
    val_datagen = ImageDataGenerator(preprocessing_function=model_preprocessor)

    bt4sa_ds_train = BT4SA_DS_DIR + 'train/'
    train_generator = train_datagen.flow_from_directory(
      bt4sa_ds_train,
      target_size=(IM_WIDTH, IM_HEIGHT),
      batch_size=batch_size,
      seed=42,
      class_mode='categorical',  
    )

    bt4sa_ds_val = BT4SA_DS_DIR + 'val/'
    val_generator = val_datagen.flow_from_directory(
      bt4sa_ds_val,
      target_size=(IM_WIDTH, IM_HEIGHT),
      batch_size=batch_size,
      seed=42,
      class_mode='categorical',  
    )
    
    return train_generator,val_generator

### Callbacks

In [None]:
#Custom callback available in Kaggle
class TimerCallback(Callback):
    
    def __init__(self, maxExecutionTime, byBatch = False, on_interrupt=None):
        
# Arguments:
#     maxExecutionTime (number): Time in minutes. The model will keep training 
#                                until shortly before this limit

#     byBatch (boolean)     : If True, will try to interrupt training at the end of each batch
#                             If False, will try to interrupt the model at the end of each epoch    
#                            (use `byBatch = True` only if each epoch is going to take hours)          

#     on_interrupt (method)          : called when training is interrupted
#         signature: func(model,elapsedTime), where...
#               model: the model being trained
#               elapsedTime: the time passed since the beginning until interruption   

        
        self.maxExecutionTime = maxExecutionTime * 60
        self.on_interrupt = on_interrupt
        
        #the same handler is used for checking each batch or each epoch
        if byBatch == True:
            #on_batch_end is called by keras every time a batch finishes
            self.on_batch_end = self.on_end_handler
        else:
            #on_epoch_end is called by keras every time an epoch finishes
            self.on_epoch_end = self.on_end_handler
    
    
    #Keras will call this when training begins
    def on_train_begin(self, logs):
        self.startTime = time.time()
        self.longestTime = 0            #time taken by the longest epoch or batch
        self.lastTime = self.startTime  #time when the last trained epoch or batch was finished
    
    
    #this is our custom handler that will be used in place of the keras methods:
        #`on_batch_end(batch,logs)` or `on_epoch_end(epoch,logs)`
    def on_end_handler(self, index, logs):
        
        currentTime      = time.time()                           
        self.elapsedTime = currentTime - self.startTime    #total time taken until now
        thisTime         = currentTime - self.lastTime     #time taken for the current epoch
                                                               #or batch to finish
        
        self.lastTime = currentTime
        
        #verifications will be made based on the longest epoch or batch
        if thisTime > self.longestTime:
            self.longestTime = thisTime
        
        
        #if the (assumed) time taken by the next epoch or batch is greater than the
            #remaining time, stop training
        remainingTime = self.maxExecutionTime - self.elapsedTime
        if remainingTime < self.longestTime:
            
            self.model.stop_training = True  #this tells Keras to not continue training
            print("\n\nTimerCallback: Finishing model training before it takes too much time. (Elapsed time: " + str(self.elapsedTime/60.) + " minutes )\n\n")
            
            #if we have passed the `on_interrupt` callback, call it here
            if self.on_interrupt is not None:
                self.on_interrupt(self.model, self.elapsedTime)
                

In [None]:
#Generació dels callbacks necessaris per a l'entrenament del model
def generate_callbacks(model_CP_weights_file):
    #Guardem els pesos del millor model entrenat monitoritzant la pèrdua sobre el conjunt de validació
    checkpoint = ModelCheckpoint(model_CP_weights_file, monitor='val_loss', save_best_only=True)
    #Aturem l'entrenament sino millora després de 10 èpoques
    earlyStopping = EarlyStopping(monitor='val_loss', patience=10, mode='min', verbose=1)
    #La execució dels kernels de kaggle podem durar 9h com a màxim amb GPU (540min). Auturarem l'entrenament si dura més de 530min per precaució
    timercallback = TimerCallback(530)    
    
    callbacks_list = [checkpoint, earlyStopping, timercallback]
    
    return callbacks_list
    

## Entrenament models

#### Paràmetres globals

In [None]:
#continue_training: True  - Carrega els pesos de la darrera sessió d'entrenament per a tot el model
#                   False - Utilitza els pesos de imagenet en el model base per a l'extracció de característiques (pel 1r entrenament)
continue_training = True
model_to_train = 'ResNet50'  #ResNet50 o SEResNet50
epochs = 100
BATCH_SIZE = 32

#### Paràmetres per al model basat en ResNet50

In [None]:
## Fitxers a crear
#model_CP_callback_weights - Nom del fitxer a desar amb els pesos de l'entrenament generat pel callback CheckPoint (extensió .hdf5)
resnet50_CP_callback_weights = 'resnet50_CP_weights_adamlr01_part2.hdf5'
#model_fit_hist            - Nom del fitxer a desar amb l'historic de l'entrenament (extensió .npy)
resnet50_fit_hist  = 'resnet50_fit_hist_adamlr01_part2.npy'

## Fitxers a llegir
# trained_model_resnet_cp_weights - pesos desats pel callback checkpoint en la darrera part de l'entrenament
trained_model_resnet_cp_weights = '/kaggle/input/tfm-models-train/resnet50_CP_weights_adamlr01_part1.hdf5'

#### Paràmetres per al model basat en SEResNet50

In [None]:
## Fitxers a crear
#model_CP_callback_weights - Nom del fitxer a desar amb els pesos de l'entrenament generat pel callback CheckPoint (extensió .hdf5)
seresnet50_CP_callback_weights = 'seresnet50_CP_weights_nadam_part1.hdf5'
#model_fit_hist            - Nom del fitxer amb l'historic de l'entrenament (extensió .npy)
seresnet50_fit_hist  = 'seresnet50_fit_hist_nadam_part1.npy'
## Fitxers a llegir
# trained_model_seresnet_cp_weights - pesos desats pel callback checkpoint en la darrera part de l'entrenament
trained_model_seresnet_cp_weights = '/kaggle/input/seresnetsdgpart1/seresnet50_CP_weights_nadam_part1.hdf5'


#### Creació i compilació del model

In [None]:
if model_to_train == 'ResNet50':
    #1r entrenament o continuació
    if continue_training == False:
        base_weights = "imagenet"
        model_weights = None
    else:
        base_weights = None
        model_weights = trained_model_resnet_cp_weights
        
    #Fitxers que generarem en aquesta sessió d'entrenament
    model_CP_callback_weights = resnet50_CP_callback_weights
    model_fit_hist  = resnet50_fit_hist

    #Creació del model base per extreure les característiques de les imatges basat en ResNet50
    add_pooling_layer = False
    modelx = build_ResNet50(base_weights)
    model_preprocessor = resnet50_in_prep  
    
elif model_to_train == 'SEResNet50':
    #1r entrenament o continuació
    if continue_training == False:
        base_weights = "imagenet"
        model_weights = None
    else:
        base_weights = None
        model_weights = trained_model_seresnet_cp_weights
        
    #Fitxers que generarem en aquesta sessió d'entrenament    
    model_CP_callback_weights = seresnet50_CP_callback_weights
    model_fit_hist  = seresnet50_fit_hist
    
    #Creació del model base per extreuure les característiques de les imatges basat en SEResNet50 (la funció també retorna el seu preprocessor associat)
    add_pooling_layer = True
    modelx,model_preprocessor = build_SEResNet50(base_weights)
            
else:
    print("\n\nEl model especificat en la variable model_to_train no és vàlid (valors permesos: ResNet50 o SEResNet50)")        
        
#Creació del model complet: base + capes pròpies 


#optimizer = tf.keras.optimizers.SGD(lr=0.001, momentum=0.9, nesterov=True)
optimizer = tf.keras.optimizers.Adam(lr=0.01)
#optimizer = tf.keras.optimizers.Nadam()


model = create_model(modelx, model_weights, add_pooling_layer)
model.compile(optimizer= optimizer, loss='categorical_crossentropy', metrics=['accuracy'])    
    
model.summary()

#### Càrrega de dades

In [None]:
train_generator,val_generator = create_datagens(model_preprocessor, BATCH_SIZE)

STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
STEP_SIZE_VALID=val_generator.n//val_generator.batch_size

#### Entrenament

In [None]:
#Callbacks
callback_list = generate_callbacks(model_CP_callback_weights)

#Entrenament del model
model_fit = model.fit(train_generator, steps_per_epoch=STEP_SIZE_TRAIN,
                      validation_data=val_generator, validation_steps=STEP_SIZE_VALID,
                      epochs=epochs, callbacks=callback_list)                      
                      
np.save(model_fit_hist, model_fit.history)