<a href="https://colab.research.google.com/github/viniciusrpb/cloud_image_classification/blob/main/selenastraceae_greenalga_classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
!pip install tensorflow_addons

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [3]:
from tensorflow.keras.models import Sequential
from keras.layers import Dense,GlobalAveragePooling2D ,MaxPooling2D,Activation,Flatten,Conv2D,BatchNormalization,Dropout
from tensorflow.keras.losses import sparse_categorical_crossentropy
from tensorflow.keras.optimizers import SGD,Adam
from sklearn.model_selection import KFold, StratifiedKFold
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint,EarlyStopping,ReduceLROnPlateau
import numpy as np
import pandas as pd
import tensorflow_addons as tfa

In [4]:
!cp -r "/content/drive/My Drive/alga/alga_full" "alga_full"
path_data = "alga_full"

In [5]:
def get_model_name(sel_model,k):
    return sel_model+"_"+str(k)+'.h5'

In [6]:
def classification_model(sel_model,learning_rate,activation_f,num_labels,prob,number_of_neurons):

    f1_score = tfa.metrics.F1Score(num_classes=num_labels, average='macro',threshold=0.5)

    model = Sequential()

    if sel_model == 'resnet50':

        pre_trained_model = ResNet50(input_shape=(224,224,3),include_top=False,pooling ='avg',weights='imagenet', classes=num_labels)

        for layer in pre_trained_model.layers:
            layer.trainable = False

        model.add(pre_trained_model)

        model.add(GlobalAveragePooling2D())
        model.add(Flatten())
        model.add(Dense(number_of_neurons,activation="relu"))
        model.add(BatchNormalization())
        model.add(Dropout(prob))
        model.add(Dense(num_labels,activation=activation_f))

        # It is preferable to use SGD in combination to ResNet50

        sgd = SGD(learning_rate = 0.01, decay = 1e-6, momentum = 0.9, nesterov = True)

        model.compile(optimizer = SGD, 
              loss = 'categorical_crossentropy', 
              metrics = [f1_score])

    elif sel_model == 'vgg16':

        pre_trained_model = VGG16(input_shape=(224,224,3),include_top=False,pooling ='avg',weights='imagenet', classes=num_labels)

        for layer in pre_trained_model.layers:
            layer.trainable = False

        model.add(pre_trained_model)

        model.add(GlobalAveragePooling2D())
        model.add(Flatten())
        model.add(Dense(number_of_neurons,activation="relu"))
        model.add(BatchNormalization())
        model.add(Dropout(prob))
        model.add(Dense(num_labels,activation=activation_f))

        adams_family = Adam(learning_rate=learning_rate,decay=0.01)
        
        model.compile(optimizer = adams_family, 
              loss = 'categorical_crossentropy', 
              metrics = [f1_score])
    
    return model

Main function

Organize training and test sets

In [7]:
import os
list_subfolders = os.listdir(path_data)
    
list_subfolders.sort()

dataset_dict = {}

dataset_dict['filename'] = []
dataset_dict['label'] = []

for folder in list_subfolders:

    list_images_path = os.listdir(path_data+"/"+folder)
    
    list_images_path.sort()

    for image_name in list_images_path:

        dataset_dict['filename'].append(folder+"/"+image_name)

        dataset_dict['label'].append(folder)

In [8]:
df = pd.DataFrame.from_dict(dataset_dict)

In [9]:
df

Unnamed: 0,filename,label
0,alga_full/ankisdensus,alga_full
1,alga_full/ankisfusiformis,alga_full
2,alga_full/kirchneriela,alga_full
3,alga_full/monorapcontortum,alga_full
4,alga_full/monorapgriff,alga_full
...,...,...
406,selenastrumbrib/JamariA_7dia_8_14.png,selenastrumbrib
407,selenastrumbrib/JamariA_7dia_8_2.png,selenastrumbrib
408,selenastrumbrib/JamariA_7dia_8_9.png,selenastrumbrib
409,selenastrumbrib/JamariA_7dia_8_orig.png,selenastrumbrib


Define Stratified KFold

In [10]:
skf_outer = StratifiedKFold(n_splits = 10, random_state = 7, shuffle = True)

outer_results = list()

X = np.array(df['filename'])
y = np.array(df['label'])

num_labels = 8#len(y.unique)
sel_model = 'resnet55'
activation_f = 'softmax'
lr = 0.00001
epochs = 15
prob = 0.5
batch_size = 16
num_neurons = 256

f1_score = tfa.metrics.F1Score(num_classes=num_labels, average='macro',threshold=0.5)

agnostic_datagen = ImageDataGenerator(rescale=1./255)

In [12]:
test_f1score = []
test_fold = []
test_loss = []

trial = 1

for train_ix, test_ix in skf_outer.split(X,y):

    val_f1score = []
    val_loss = []
    
    train_list = []
    #y_train_list = []
    test_list = []

    for ind in train_ix:
        train_list.append([X[ind],y[ind]])
        #y_train_list.append(y[ind])
    
    for ind in test_ix:
        test_list.append([X[ind],y[ind]])

    X_train = pd.DataFrame(train_list, columns =['filename','label'])
    X_test = pd.DataFrame(test_list, columns =['filename','label'])

    test_generator = agnostic_datagen.flow_from_dataframe(X_test, directory = path_data,
							x_col = "filename", y_col = "label",
							class_mode = "categorical", shuffle = True)
    
    skf_inner = StratifiedKFold(n_splits = 10, random_state = 7, shuffle = True)

    nfold = 1

    for train_index, val_index in skf_inner.split(X_train['filename'],X_train['label']):
        
        train_list = []
        valid_list = []

        for ind in train_ix:
            train_list.append([X[ind],y[ind]])
        
        for ind in test_ix:
            valid_list.append([X[ind],y[ind]])

        X_train = pd.DataFrame(train_list, columns =['filename','label'])
        X_valid = pd.DataFrame(valid_list, columns =['filename','label'])


        train_generator = agnostic_datagen.flow_from_dataframe(X_train, directory = path_data,
                                                               x_col = "filename", y_col = "label",
                                                               class_mode = "categorical", shuffle = True)
        
        validation_generator = agnostic_datagen.flow_from_dataframe(X_valid, directory = path_data,
                                                                 x_col = "filename", y_col = "label",
                                                                 class_mode = "categorical", shuffle = True)
        
        #model = classification_model(sel_model,lr,activation_f,num_labels,prob,num_neurons)
        model = Sequential()
        pre_trained_model = VGG16(input_shape=(224,224,3),include_top=False,pooling ='avg',weights='imagenet', classes=num_labels)

        for layer in pre_trained_model.layers:
            layer.trainable = False

        model.add(pre_trained_model)

        #model.add(GlobalAveragePooling2D())
        model.add(Flatten())
        model.add(Dense(num_neurons,activation="relu"))
        model.add(BatchNormalization())
        model.add(Dropout(prob))
        model.add(Dense(num_labels,activation=activation_f))

        print(model.summary())
        
        fname = "weights-improvement-{epoch:02d}-{val_f1_score:.2f}.hdf5"
        
        checkpoint = ModelCheckpoint(fname, monitor='val_f1_score', verbose=1, save_best_only=True, mode='max')

        early_stopping = EarlyStopping(monitor='loss', patience=5)
        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.05, patience=5, min_lr=0.000002)
        callbacklist = [checkpoint,early_stopping,reduce_lr]

        adams_family = Adam(learning_rate=0.01,decay=0.01)
        
        model.compile(optimizer = adams_family, 
              loss = 'categorical_crossentropy', 
              metrics = [f1_score])

        history_fine = model.fit(train_generator,
                         epochs=epochs,
                         batch_size=batch_size,
                         validation_data=validation_generator,
                         callbacks=callbacklist
                         )
        
        f1 = history_fine.history['f1_score']
        val_f1 = history_fine.history['val_f1_score']

        loss = history_fine.history['loss']
        val_loss = history_fine.history['val_loss']

        val_f1score.append(np.mean(np.array(val_f1)))
        val_loss.append(np.mean(np.array(val_loss)))

        nfold+=1

        print(f'NFold {nfold} Validation loss: {val_loss} / Validation f-score: {val_f1}')
        #del model
    
        score = model.evaluate(test_generator,batch_size=batch_size) 
        print(f'Trial {trial} Test loss: {score[0]} / Test accuracy: {score[1]}\n')

        trial+=1

        test_f1score.append(np.mean(np.array(val_f1score)))
        test_loss.append(np.mean(np.array(val_loss)))

        del model

  .format(n_invalid, x_col)
  .format(n_invalid, x_col)
  .format(n_invalid, x_col)


Found 41 validated image filenames belonging to 8 classes.
Found 362 validated image filenames belonging to 8 classes.
Found 41 validated image filenames belonging to 8 classes.
Model: "sequential_10"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vgg16 (Functional)          (None, 512)               14714688  
                                                                 
 flatten_10 (Flatten)        (None, 512)               0         
                                                                 
 dense_20 (Dense)            (None, 256)               131328    
                                                                 
 batch_normalization_10 (Bat  (None, 256)              1024      
 chNormalization)                                                
                                                                 
 dropout_10 (Dropout)        (None, 256)               0         
       

  .format(n_invalid, x_col)
  .format(n_invalid, x_col)


Model: "sequential_20"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vgg16 (Functional)          (None, 512)               14714688  
                                                                 
 flatten_20 (Flatten)        (None, 512)               0         
                                                                 
 dense_40 (Dense)            (None, 256)               131328    
                                                                 
 batch_normalization_20 (Bat  (None, 256)              1024      
 chNormalization)                                                
                                                                 
 dropout_20 (Dropout)        (None, 256)               0         
                                                                 
 dense_41 (Dense)            (None, 8)                 2056      
                                                     

  .format(n_invalid, x_col)
  .format(n_invalid, x_col)


Model: "sequential_30"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vgg16 (Functional)          (None, 512)               14714688  
                                                                 
 flatten_30 (Flatten)        (None, 512)               0         
                                                                 
 dense_60 (Dense)            (None, 256)               131328    
                                                                 
 batch_normalization_30 (Bat  (None, 256)              1024      
 chNormalization)                                                
                                                                 
 dropout_30 (Dropout)        (None, 256)               0         
                                                                 
 dense_61 (Dense)            (None, 8)                 2056      
                                                     

  .format(n_invalid, x_col)
  .format(n_invalid, x_col)


Model: "sequential_40"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vgg16 (Functional)          (None, 512)               14714688  
                                                                 
 flatten_40 (Flatten)        (None, 512)               0         
                                                                 
 dense_80 (Dense)            (None, 256)               131328    
                                                                 
 batch_normalization_40 (Bat  (None, 256)              1024      
 chNormalization)                                                
                                                                 
 dropout_40 (Dropout)        (None, 256)               0         
                                                                 
 dense_81 (Dense)            (None, 8)                 2056      
                                                     

  .format(n_invalid, x_col)
  .format(n_invalid, x_col)


Model: "sequential_50"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vgg16 (Functional)          (None, 512)               14714688  
                                                                 
 flatten_50 (Flatten)        (None, 512)               0         
                                                                 
 dense_100 (Dense)           (None, 256)               131328    
                                                                 
 batch_normalization_50 (Bat  (None, 256)              1024      
 chNormalization)                                                
                                                                 
 dropout_50 (Dropout)        (None, 256)               0         
                                                                 
 dense_101 (Dense)           (None, 8)                 2056      
                                                     

  .format(n_invalid, x_col)
  .format(n_invalid, x_col)


Model: "sequential_60"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vgg16 (Functional)          (None, 512)               14714688  
                                                                 
 flatten_60 (Flatten)        (None, 512)               0         
                                                                 
 dense_120 (Dense)           (None, 256)               131328    
                                                                 
 batch_normalization_60 (Bat  (None, 256)              1024      
 chNormalization)                                                
                                                                 
 dropout_60 (Dropout)        (None, 256)               0         
                                                                 
 dense_121 (Dense)           (None, 8)                 2056      
                                                     

  .format(n_invalid, x_col)
  .format(n_invalid, x_col)


Model: "sequential_70"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vgg16 (Functional)          (None, 512)               14714688  
                                                                 
 flatten_70 (Flatten)        (None, 512)               0         
                                                                 
 dense_140 (Dense)           (None, 256)               131328    
                                                                 
 batch_normalization_70 (Bat  (None, 256)              1024      
 chNormalization)                                                
                                                                 
 dropout_70 (Dropout)        (None, 256)               0         
                                                                 
 dense_141 (Dense)           (None, 8)                 2056      
                                                     

  .format(n_invalid, x_col)
  .format(n_invalid, x_col)


Model: "sequential_80"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vgg16 (Functional)          (None, 512)               14714688  
                                                                 
 flatten_80 (Flatten)        (None, 512)               0         
                                                                 
 dense_160 (Dense)           (None, 256)               131328    
                                                                 
 batch_normalization_80 (Bat  (None, 256)              1024      
 chNormalization)                                                
                                                                 
 dropout_80 (Dropout)        (None, 256)               0         
                                                                 
 dense_161 (Dense)           (None, 8)                 2056      
                                                     

  .format(n_invalid, x_col)


Model: "sequential_90"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vgg16 (Functional)          (None, 512)               14714688  
                                                                 
 flatten_90 (Flatten)        (None, 512)               0         
                                                                 
 dense_180 (Dense)           (None, 256)               131328    
                                                                 
 batch_normalization_90 (Bat  (None, 256)              1024      
 chNormalization)                                                
                                                                 
 dropout_90 (Dropout)        (None, 256)               0         
                                                                 
 dense_181 (Dense)           (None, 8)                 2056      
                                                     

  .format(n_invalid, x_col)


Model: "sequential_100"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vgg16 (Functional)          (None, 512)               14714688  
                                                                 
 flatten_100 (Flatten)       (None, 512)               0         
                                                                 
 dense_200 (Dense)           (None, 256)               131328    
                                                                 
 batch_normalization_100 (Ba  (None, 256)              1024      
 tchNormalization)                                               
                                                                 
 dropout_100 (Dropout)       (None, 256)               0         
                                                                 
 dense_201 (Dense)           (None, 8)                 2056      
                                                    

KeyboardInterrupt: ignored