In [1]:
import tensorflow as tf
import plotly.express as px
from tensorflow.keras.applications import EfficientNetB4
from tensorflow.keras.applications.efficientnet import preprocess_input

## Loading

In [2]:
directory = 'images/final/'
width = 128
height = 128
channels = 4
image_size = (width, height)
input_shape = (width, height, channels)
batch_size = 32

In [3]:
train_ds, val_ds = tf.keras.preprocessing.image_dataset_from_directory(
    directory,
    batch_size=batch_size,
    validation_split=0.2,
    subset="both",
    labels="inferred",
    label_mode="categorical",
    image_size=image_size,
    crop_to_aspect_ratio=True,
    interpolation="bilinear",
    color_mode="rgba",
    shuffle=True,
    seed=905,
)

class_names = train_ds.class_names

Found 13876 files belonging to 905 classes.
Using 11101 files for training.
Using 2775 files for validation.


In [25]:
def plot_image(image, label, labels={}):
    fig = px.imshow(image, width=width, height=height)
    fig.update_layout(
        title=f"Label: {label}",
        width=500,
        height=350,
        margin={'l': 0},
        # remove axis but leave a border
        xaxis=dict(
            showgrid=False,
            zeroline=False,
            showticklabels=False,
            visible=False,
        ),
        yaxis=dict(
            showgrid=False,
            zeroline=False,
            showticklabels=False,
            visible=False,
        ),
        # add a solid contour of the image
        shapes=[
            dict(
                type="rect",
                xref="x",
                yref="y",
                x0=0,
                y0=0,
                x1=width,
                y1=height,
                line=dict(
                    color="black",
                    width=2,
                ),
            ),
        ],       
    )
    if labels:
        # display all 5 elements of the dictionary with the key: value
        i = 0
        for key, value in labels.items():
            fig.add_annotation(
                x=1,
                y=1-0.25*i,
                xref="paper",
                yref="paper",
                text=f"{key}: {value*100:.0f}%",
                showarrow=False,
                font=dict(
                    family="Courier New, monospace",
                    size=12,
                    color="#ffffff"
                ),
                align="left",
                bordercolor="#c7c7c7",
                borderwidth=1,
                borderpad=2,
                bgcolor="#ff7f0e",
                opacity=0.8,
            )
            # add image from corresponding folder in images/final
            fig.add_layout_image(
                dict(
                    source=f"images/final/{key}/1.png",
                    xref="paper",
                    yref="paper",
                    x=1,
                    y=1.1-0.25*i,
                    sizex=0.3,
                    sizey=0.3,
                    # sizing="stretch",
                    # opacity=0.8,
                    layer="below",
                )
            )
            i += 1

    fig.show()

In [5]:
def plot_n_images(ds, n):
    for image, label in ds.take(1):
        for i in range(n):
            print(image[i][0][0])
            print(image[i].shape)
            plot_image(image[i], class_names[int(tf.argmax(tf.reshape(label[i], [-1, 1]), axis=0))])

In [6]:
plot_n_images(train_ds, 1)

tf.Tensor([255. 255. 255.   0.], shape=(4,), dtype=float32)
(128, 128, 4)


### Preprocessing

In [7]:
flip_and_rotate = tf.keras.Sequential([
    tf.keras.layers.experimental.preprocessing.RandomFlip("horizontal"),
    tf.keras.layers.experimental.preprocessing.RandomRotation(0.1, fill_mode="constant", fill_value=255),
    # tf.keras.layers.experimental.preprocessing.RandomZoom(0.1),
])

def preprocess_image(image):
    # print(image.shape)
    alpha_channel = image[:, :, :, 3]
    rgb_channels = image[:, :, :, :3]
    alpha_bool = alpha_channel > 0
    alpha_bool = tf.expand_dims(alpha_bool, axis=-1)
    # print(alpha_bool.shape)
    # print(rgb_channels.shape)
    rgb_channels = tf.where(alpha_bool, rgb_channels, 255)
    
    # Normalize the pixel values to be between 0 and 1
    image = preprocess_input(rgb_channels)
    
    return image

def prepare(ds, shuffle=False, augment=False):
    ds = ds.map(lambda x, y: (preprocess_image(x), y), num_parallel_calls=tf.data.AUTOTUNE)

    # Use data augmentation only on the training set
    if augment:
        ds = ds.map(lambda x, y: (flip_and_rotate(x, training=True), y), num_parallel_calls=tf.data.AUTOTUNE)
    
    # Use buffered prefecting on all datasets
    if shuffle:
        ds = ds.shuffle(1000)

    # ds = ds.batch(batch_size)
    ds = ds.prefetch(buffer_size=tf.data.AUTOTUNE)

    return ds

In [8]:
train_ds = prepare(train_ds, shuffle=True, augment=True)
val_ds = prepare(val_ds)

channels = 3
input_shape = (width, height, channels)



In [9]:
plot_n_images(train_ds, 5)

tf.Tensor([255. 255. 255.], shape=(3,), dtype=float32)
(128, 128, 3)


tf.Tensor([255. 255. 255.], shape=(3,), dtype=float32)
(128, 128, 3)


tf.Tensor([255. 255. 255.], shape=(3,), dtype=float32)
(128, 128, 3)


tf.Tensor([255. 255. 255.], shape=(3,), dtype=float32)
(128, 128, 3)


tf.Tensor([255. 255. 255.], shape=(3,), dtype=float32)
(128, 128, 3)


In [10]:
# Define the EfficientNetB4 model
base_model = EfficientNetB4(include_top=False, input_shape=input_shape, pooling='avg', weights='imagenet')

# Freeze the base model
base_model.trainable = False

# Define the model inputs
inputs = tf.keras.Input(shape=input_shape)

x = base_model(inputs, training=False)
x = tf.keras.layers.Dense(512, activation='relu')(x)
x = tf.keras.layers.Dropout(0.2)(x)
x = tf.keras.layers.Dense(256, activation='relu')(x)
x = tf.keras.layers.Dropout(0.2)(x)
x = tf.keras.layers.Dense(len(class_names), activation='softmax')(x)

# Define the model
model = tf.keras.Model(inputs, x)

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy', 'top_k_categorical_accuracy'])

In [11]:
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 128, 128, 3)]     0         
                                                                 
 efficientnetb4 (Functional)  (None, 1792)             17673823  
                                                                 
 dense (Dense)               (None, 512)               918016    
                                                                 
 dropout (Dropout)           (None, 512)               0         
                                                                 
 dense_1 (Dense)             (None, 256)               131328    
                                                                 
 dropout_1 (Dropout)         (None, 256)               0         
                                                                 
 dense_2 (Dense)             (None, 905)               232585

In [12]:
history = model.fit(train_ds, batch_size=batch_size, validation_data=val_ds, epochs=15)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [13]:
cnn = tf.keras.Sequential()
cnn.add(tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
cnn.add(tf.keras.layers.BatchNormalization())
cnn.add(tf.keras.layers.MaxPooling2D((2, 2)))
cnn.add(tf.keras.layers.Dropout(0.4))

cnn.add(tf.keras.layers.Conv2D(64, (5, 5), activation='relu'))
cnn.add(tf.keras.layers.BatchNormalization())
cnn.add(tf.keras.layers.MaxPooling2D((2, 2)))
cnn.add(tf.keras.layers.Dropout(0.4))

cnn.add(tf.keras.layers.Conv2D(64, (5, 5), activation='relu'))
cnn.add(tf.keras.layers.BatchNormalization())
cnn.add(tf.keras.layers.MaxPooling2D((2, 2)))
cnn.add(tf.keras.layers.Dropout(0.4))

cnn.add(tf.keras.layers.Conv2D(128, (8, 8), activation='relu'))
cnn.add(tf.keras.layers.BatchNormalization())
cnn.add(tf.keras.layers.MaxPooling2D((2, 2)))
cnn.add(tf.keras.layers.Dropout(0.4))

cnn.add(tf.keras.layers.Flatten())
cnn.add(tf.keras.layers.Dense(512, activation='relu'))
cnn.add(tf.keras.layers.BatchNormalization())
cnn.add(tf.keras.layers.Dropout(0.5))
cnn.add(tf.keras.layers.Dense(256, activation='relu'))
cnn.add(tf.keras.layers.BatchNormalization())
cnn.add(tf.keras.layers.Dropout(0.5))
cnn.add(tf.keras.layers.Dense(len(class_names), activation='softmax'))

cnn.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy', 'top_k_categorical_accuracy'])

In [14]:
cnn.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 126, 126, 32)      896       
                                                                 
 batch_normalization (BatchN  (None, 126, 126, 32)     128       
 ormalization)                                                   
                                                                 
 max_pooling2d (MaxPooling2D  (None, 63, 63, 32)       0         
 )                                                               
                                                                 
 dropout_2 (Dropout)         (None, 63, 63, 32)        0         
                                                                 
 conv2d_1 (Conv2D)           (None, 59, 59, 64)        51264     
                                                                 
 batch_normalization_1 (Batc  (None, 59, 59, 64)      

In [15]:
history_cnn = cnn.fit(train_ds, batch_size=batch_size, validation_data=val_ds, epochs=25)

Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25


In [77]:
training_results = {
    "efficientnetB4": 
        {"training_time" : 1073,
            "trainable_weights" : 1281929,
            "non_trainable_weights" : 17673823,
        },
    "CNN":
        {"training_time" : 637,
            "trainable_weights" : 1307721,
            "non_trainable_weights" : 2112,
        }
}

In [78]:
import pandas as pd
df_plot = pd.DataFrame(training_results).T
print(df_plot)

                training_time  trainable_weights  non_trainable_weights
efficientnetB4           1073            1281929               17673823
CNN                       637            1307721                   2112


In [79]:
# plot weights and then training time on 2 different y axes
fig = go.Figure()
fig.add_trace(go.Bar(x=df_plot.index, y=df_plot["trainable_weights"], name="Trainable Weights"))
fig.add_trace(go.Bar(x=df_plot.index, y=df_plot["non_trainable_weights"], name="Non-Trainable Weights"))
fig.update_layout(barmode='stack', title="Trainable and Non-Trainable Weights")
fig.update_layout(width=800, height=500)
fig.show()

In [14]:
import plotly.graph_objects as go
fig = go.Figure()
fig.add_trace(go.Scatter(x=history.epoch, y=history.history['loss'], name="Training Loss"))
fig.add_trace(go.Scatter(x=history.epoch, y=history.history['val_loss'], name="Validation Loss"))
# add accuracy on different y axis
fig.update_layout(yaxis2=dict(overlaying='y', side='right', range=[0, 1], title="Accuracy"))
fig.add_trace(go.Scatter(x=history.epoch, y=history.history['accuracy'], name="Training Accuracy", yaxis="y2"))
fig.add_trace(go.Scatter(x=history.epoch, y=history.history['val_accuracy'], name="Validation Accuracy", yaxis="y2"))
fig.update_layout(title="EfficientNetB4 Loss & accuracy",
                  legend=dict(
                        orientation="h",
                        yanchor="bottom",
                        y=1.02,
                        xanchor="right",
                        x=1
                        ),
                  yaxis=dict(range=[0, 5],title="Loss"),
                  xaxis=dict(title="Epochs")
                  )
fig.update_layout(width=1000, height=600)
fig.show()

In [23]:
# same plot but give blue color to train and orange to validation
fig = go.Figure()
fig.add_trace(go.Scatter(x=history_cnn.epoch, y=history_cnn.history['loss'], name="Training Loss"))
fig.add_trace(go.Scatter(x=history_cnn.epoch, y=history_cnn.history['val_loss'], name="Validation Loss"))
# add accuracy on different y axis
fig.update_layout(yaxis2=dict(overlaying='y', side='right', range=[0, 1], title="Accuracy"))
fig.add_trace(go.Scatter(x=history_cnn.epoch, y=history_cnn.history['accuracy'], name="Training Accuracy", yaxis="y2"))
fig.add_trace(go.Scatter(x=history_cnn.epoch, y=history_cnn.history['val_accuracy'], name="Validation Accuracy", yaxis="y2"))
fig.update_layout(title="CNN Loss & Accuracy",
                  legend=dict(
                        orientation="h",
                        yanchor="bottom",
                        y=1.02,
                        xanchor="right",
                        x=1
                        ),
                  yaxis=dict(range=[0, 5],title="Loss"),
                  xaxis=dict(title="Epochs")
                  )
fig.update_layout(width=1000, height=600)
fig.show()

## TEST

### rapport

In [23]:
def top_k(y_pred, k=3):
    # retrieve the top k predictions as sorted dictionary of {class: probability}
    top_k = tf.math.top_k(y_pred, k=k)
    prob = top_k.values.numpy().tolist()
    prob = [round(p, 2) for p in prob]
    # convert the dictionary to a list of class indices
    name = top_k.indices.numpy().tolist()
    name = [class_names[i] for i in name]
    ret = {}
    for i in range(len(prob)):
        if prob[i] > 0.05:
            ret[name[i]] = prob[i]
    return ret

In [17]:
y_pred_cnn = cnn.predict(val_ds)
y_pred_eff = model.predict(val_ds)



In [18]:
y_test = []
for i in val_ds:
    y_test.extend(i[1].numpy())
import numpy as np
y_test = np.array(y_test)

In [19]:
# argmax all
y_pred_cnn_ = np.argmax(y_pred_cnn, axis=1).reshape(-1, 1)
y_pred_eff_ = np.argmax(y_pred_eff, axis=1).reshape(-1, 1)

y_test = np.argmax(y_test, axis=1).reshape(-1, 1)

In [40]:
from sklearn.metrics import cohen_kappa_score
kappa_eff = cohen_kappa_score(y_test, y_pred_eff_)
kappa_cnn = cohen_kappa_score(y_test, y_pred_cnn_)

print("Kappa score for EfficientNetB4: ", kappa_eff)
print("Kappa score for CNN: ", kappa_cnn)

Kappa score for EfficientNetB4:  0.7446011818478946
Kappa score for CNN:  0.28485547620141827


In [42]:
from sklearn.metrics import classification_report
print("Classification report for EfficientNetB4: ")
print(classification_report(y_test, y_pred_eff_))

Classification report for EfficientNetB4: 
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         4
           1       0.71      1.00      0.83         5
           2       0.71      1.00      0.83         5
           3       1.00      1.00      1.00         3
           4       1.00      0.75      0.86         4
           5       1.00      0.50      0.67         2
           6       1.00      0.33      0.50         3
           7       0.56      1.00      0.71         5
           8       1.00      0.57      0.73         7
           9       0.96      1.00      0.98        49
          10       1.00      1.00      1.00         1
          11       1.00      0.75      0.86         8
          12       0.33      1.00      0.50         1
          13       1.00      1.00      1.00         2
          14       0.00      0.00      0.00         2
          15       0.86      1.00      0.92         6
          16       0.75      0.75     


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.



In [43]:
print("Classification report for CNN: ")
print(classification_report(y_test, y_pred_cnn_))

Classification report for CNN: 
              precision    recall  f1-score   support

           0       0.21      0.75      0.33         4
           1       1.00      0.20      0.33         5
           2       0.14      0.20      0.17         5
           3       0.50      0.33      0.40         3
           4       1.00      0.25      0.40         4
           5       0.25      0.50      0.33         2
           6       0.25      0.33      0.29         3
           7       0.00      0.00      0.00         5
           8       0.80      0.57      0.67         7
           9       0.96      0.55      0.70        49
          10       0.00      0.00      0.00         1
          11       0.25      0.38      0.30         8
          12       0.00      0.00      0.00         1
          13       0.67      1.00      0.80         2
          14       0.00      0.00      0.00         2
          15       0.43      1.00      0.60         6
          16       0.30      0.75      0.43      


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.


Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Recall and F-score are ill-defined and being set to 0.0 in labels with no true samples. Use `zero_division` parameter to control this behavior.



In [28]:
for image, label in val_ds.take(1):
    for i in range(10):
        plot_image(image[i], class_names[int(tf.argmax(tf.reshape(label[i], (-1, 1))))], top_k(y_pred_eff[i], k=5))
        plot_image(image[i], class_names[int(tf.argmax(tf.reshape(label[i], (-1, 1))))], top_k(y_pred_cnn[i], k=5))
        

### charmander test

In [140]:
# load images/test/charmander.png to test
test_image = tf.keras.preprocessing.image.load_img('images/test/charmander.png', color_mode='rgba', target_size=image_size)
test_image = tf.keras.preprocessing.image.img_to_array(test_image)
test_image = tf.expand_dims(test_image, axis=0)
test_image = preprocess_image(test_image)
print(test_image.shape)

(1, 128, 128, 3)


In [147]:
pred = model.predict(test_image)



In [159]:
top_k(pred[0], k=5)

{'Scraggy': 0.51, 'Charmander': 0.26, 'Buizel': 0.12}

In [169]:
plot_image(test_image[0], "Charmander", top_k(pred[0], k=5))