<a href="https://colab.research.google.com/github/viniciusrpb/116319_estruturasdedados/blob/main/selenastraceae_greenalga_classification_bin_hard.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [12]:
#from google.colab import drive
#drive.mount('/content/drive')

In [13]:
#!pip install tensorflow_addons

In [14]:
from tensorflow.keras.models import Sequential
from keras.layers import Dense,GlobalAveragePooling2D ,MaxPooling2D,Activation,Flatten,Conv2D,BatchNormalization,Dropout
from tensorflow.keras.losses import sparse_categorical_crossentropy
from tensorflow.keras.optimizers import SGD,Adam
from sklearn.model_selection import KFold, StratifiedKFold
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint,EarlyStopping,ReduceLROnPlateau
import numpy as np
import pandas as pd
import tensorflow_addons as tfa

In [15]:
#!cp -r "/content/drive/My Drive/alga/alga_bin_easy" "alga_bin_easy"
#path_data = "alga_bin_easy"

#!cp -r "/content/drive/My Drive/alga/alga_bin_hard" "alga_bin_hard"
path_data = "alga_bin_hard"

In [16]:
def get_model_name(sel_model,k):
    return sel_model+"_"+str(k)+'.h5'

In [17]:
def classification_model(sel_model,learning_rate,activation_f,num_labels,prob,number_of_neurons):

    f1_score = tfa.metrics.F1Score(num_classes=num_labels, average='macro',threshold=0.5)

    model = Sequential()

    if sel_model == 'resnet50':

        pre_trained_model = ResNet50(input_shape=(224,224,3),include_top=False,pooling ='avg',weights='imagenet', classes=num_labels)

        for layer in pre_trained_model.layers:
            layer.trainable = False

        model.add(pre_trained_model)

        model.add(GlobalAveragePooling2D())
        model.add(Flatten())
        model.add(Dense(number_of_neurons,activation="relu"))
        model.add(BatchNormalization())
        model.add(Dropout(prob))
        model.add(Dense(num_labels,activation=activation_f))

        # It is preferable to use SGD in combination to ResNet50

        sgd = SGD(learning_rate = 0.01, decay = 1e-6, momentum = 0.9, nesterov = True)

        model.compile(optimizer = SGD, 
              loss = 'categorical_crossentropy', 
              metrics = [f1_score])

    elif sel_model == 'vgg16':

        pre_trained_model = VGG16(input_shape=(224,224,3),include_top=False,pooling ='avg',weights='imagenet', classes=num_labels)

        for layer in pre_trained_model.layers:
            layer.trainable = False

        model.add(pre_trained_model)

        model.add(GlobalAveragePooling2D())
        model.add(Flatten())
        model.add(Dense(number_of_neurons,activation="relu"))
        model.add(BatchNormalization())
        model.add(Dropout(prob))
        model.add(Dense(num_labels,activation=activation_f))

        adams_family = Adam(learning_rate=learning_rate,decay=0.01)
        
        model.compile(optimizer = adams_family, 
              loss = 'categorical_crossentropy', 
              metrics = [f1_score])
    
    return model

Main function

Organize training and test sets

In [18]:
import os
list_subfolders = os.listdir(path_data)
    
list_subfolders.sort()

dataset_dict = {}

dataset_dict['filename'] = []
dataset_dict['label'] = []

for folder in list_subfolders:

    list_images_path = os.listdir(path_data+"/"+folder)
    
    list_images_path.sort()

    for image_name in list_images_path:

        dataset_dict['filename'].append(folder+"/"+image_name)

        dataset_dict['label'].append(folder)

In [19]:
df = pd.DataFrame.from_dict(dataset_dict)

In [20]:
df

Unnamed: 0,filename,label
0,ankisdensus/128_10_2.png,ankisdensus
1,ankisdensus/128_11_1.png,ankisdensus
2,ankisdensus/128_11_orig.png,ankisdensus
3,ankisdensus/128_12_16.png,ankisdensus
4,ankisdensus/128_12_orig.png,ankisdensus
...,...,...
62,ankisfusiformis/CujubimD_7dia_1_99.png,ankisfusiformis
63,ankisfusiformis/CujubimD_7dia_3_1.png,ankisfusiformis
64,ankisfusiformis/CujubimD_7dia_3_orig.png,ankisfusiformis
65,ankisfusiformis/CujubimD_7dia_5_1.png,ankisfusiformis


Define Stratified KFold

In [21]:
skf_outer = StratifiedKFold(n_splits = 10, random_state = 7, shuffle = True)

outer_results = list()

X = np.array(df['filename'])
y = np.array(df['label'])

num_labels = 2#len(y.unique)
sel_model = 'resnet50'
activation_f = 'softmax'
lr = 0.01
epochs = 15
prob = 0.5
batch_size = 16
num_neurons = 256

f1_score = tfa.metrics.F1Score(num_classes=num_labels, average='macro',threshold=0.5)

agnostic_datagen = ImageDataGenerator(rescale=1./255)

In [22]:
test_f1score = []
test_fold = []
test_loss = []

trial = 1

for train_ix, test_ix in skf_outer.split(X,y):

    val_f1score = []
    val_loss = []
    
    train_list = []
    #y_train_list = []
    test_list = []

    for ind in train_ix:
        train_list.append([X[ind],y[ind]])
        #y_train_list.append(y[ind])
    
    for ind in test_ix:
        test_list.append([X[ind],y[ind]])

    X_train = pd.DataFrame(train_list, columns =['filename','label'])
    X_test = pd.DataFrame(test_list, columns =['filename','label'])

    test_generator = agnostic_datagen.flow_from_dataframe(X_test, directory = path_data,
							x_col = "filename", y_col = "label",
							class_mode = "categorical", shuffle = True)
    
    skf_inner = StratifiedKFold(n_splits = 10, random_state = 7, shuffle = True)

    nfold = 1

    for train_index, val_index in skf_inner.split(X_train['filename'],X_train['label']):
        
        train_list = []
        valid_list = []

        for ind in train_ix:
            train_list.append([X[ind],y[ind]])
        
        for ind in test_ix:
            valid_list.append([X[ind],y[ind]])

        X_train = pd.DataFrame(train_list, columns =['filename','label'])
        X_valid = pd.DataFrame(valid_list, columns =['filename','label'])


        train_generator = agnostic_datagen.flow_from_dataframe(X_train, directory = path_data,
                                                               x_col = "filename", y_col = "label",
                                                               class_mode = "categorical", shuffle = True)
        
        validation_generator = agnostic_datagen.flow_from_dataframe(X_valid, directory = path_data,
                                                                 x_col = "filename", y_col = "label",
                                                                 class_mode = "categorical", shuffle = True)
        
        #model = classification_model(sel_model,lr,activation_f,num_labels,prob,num_neurons)
        model = Sequential()
        pre_trained_model = VGG16(input_shape=(224,224,3),include_top=False,pooling ='avg',weights='imagenet', classes=num_labels)

        for layer in pre_trained_model.layers:
            layer.trainable = False

        model.add(pre_trained_model)

        #model.add(GlobalAveragePooling2D())
        model.add(Flatten())
        model.add(Dense(num_neurons,activation="relu"))
        model.add(BatchNormalization())
        model.add(Dropout(prob))
        model.add(Dense(num_labels,activation=activation_f))

        print(model.summary())
        
        #fname = "weights-improvement-{epoch:02d}-{val_f1_score:.2f}.hdf5"
        
        #checkpoint = ModelCheckpoint(fname, monitor='val_f1_score', verbose=1, save_best_only=True, mode='max')

        early_stopping = EarlyStopping(monitor='loss', patience=5)
        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.05, patience=5, min_lr=0.000002)
        callbacklist = [early_stopping,reduce_lr]

        adams_family = Adam(learning_rate=lr,decay=0.01)
        
        model.compile(optimizer = adams_family, 
              loss = 'categorical_crossentropy', 
              metrics = [f1_score])

        history_fine = model.fit(train_generator,
                         epochs=epochs,
                         batch_size=batch_size,
                         validation_data=validation_generator,
                         callbacks=callbacklist
                         )
        
        f1 = history_fine.history['f1_score']
        val_f1 = history_fine.history['val_f1_score']

        loss = history_fine.history['loss']
        val_loss = history_fine.history['val_loss']

        val_f1score.append(np.mean(np.array(val_f1)))
        val_loss.append(np.mean(np.array(val_loss)))

        nfold+=1

        print(f'NFold {nfold} Validation loss: {val_loss} / Validation f-score: {val_f1}')
        #del model
    
        score = model.evaluate(test_generator,batch_size=batch_size) 
        print(f'Trial {trial} Test loss: {score[0]} / Test accuracy: {score[1]}\n')

        trial+=1

        test_f1score.append(np.mean(np.array(val_f1score)))
        test_loss.append(np.mean(np.array(val_loss)))

        del model

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch 14/15
Epoch 15/15
NFold 5 Validation loss: [0.5267212986946106, 0.7453022599220276, 0.92562335729599, 0.8398712873458862, 0.66100013256073, 0.5101197361946106, 0.4645169675350189, 0.4586654603481293, 0.44430795311927795, 0.44249263405799866, 0.44377946853637695, 0.4441787898540497, 0.4295171797275543, 0.4134924113750458, 0.43026819825172424, 0.5453238089879354] / Validation f-score: [0.6499999761581421, 0.6499999761581421, 0.6499999761581421, 0.6499999761581421, 0.6499999761581421, 0.6499999761581421, 0.6499999761581421, 0.8444445133209229, 0.8444445133209229, 0.6499999761581421, 0.6499999761581421, 0.6499999761581421, 0.6499999761581421, 0.7083333730697632, 0.8571428656578064]
Trial 14 Test loss: 0.43026819825172424 / Test accuracy: 0.8571428656578064

Found 60 validated image filenames belonging to 2 classes.
Found 7 validated image filenames belonging to 2 classes.
Model: "sequential_114"
________________________