In [1]:
import tensorflow as tf
import keras
from keras.models import Sequential, Model, load_model
from keras import layers, applications, optimizers
from keras.layers import Dropout, Flatten, Dense, GlobalAveragePooling2D
from keras.preprocessing.image import ImageDataGenerator, load_img
from tensorflow.keras.backend import clear_session
from tensorflow.keras.applications import EfficientNetB3
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from sklearn.metrics import classification_report

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

In [2]:
train = pd.read_csv('../input/cassava-leaf-disease-classification/train.csv')
train_path = '../input/cassava-leaf-disease-classification/train_images'

train_path_second = '../input/more-cassava-disease/train/train/train'

In [3]:
second_id = []
second_label = []

for img in os.listdir(os.path.join(train_path_second, 'cbb')):
    second_id.append('/cbb/'+img)
    second_label.append('0')
    
for img in os.listdir(os.path.join(train_path_second, 'cbsd')):
    second_id.append('/cbsd/'+img)
    second_label.append('1')
    
for img in os.listdir(os.path.join(train_path_second, 'cgm')):
    second_id.append('/cgm/'+img)
    second_label.append('2')
    
for img in os.listdir(os.path.join(train_path_second, 'cmd')):
    second_id.append('/cmd/'+img)
    second_label.append('3')
    
for img in os.listdir(os.path.join(train_path_second, 'healthy')):
    second_id.append('/healthy/'+img)
    second_label.append('4')

In [4]:
train_second = pd.DataFrame({'image_id':second_id, 'label':second_label})

train_second.head()

In [5]:
del second_id
del second_label

In [6]:
def image_path_first(image):
    return os.path.join(train_path,image)

def image_path_second(image):
    return os.path.join(train_path_second, image)

train['image_id'] = train['image_id'].apply(image_path_first)
train_second['image_id'] = train_second['image_id'].apply(image_path_second)

In [7]:
train['label'] = train['label'].astype('str')

In [8]:
# combine dataframe to train

combined_train = pd.concat([train, train_second], ignore_index=True)

In [9]:
del train
del train_second

train = combined_train

del combined_train
del train_path
del train_path_second

In [10]:
from sklearn.model_selection import StratifiedKFold

SKF = StratifiedKFold(n_splits=5, random_state=42, shuffle=True)

In [11]:
train_gen = ImageDataGenerator(preprocessing_function = tf.keras.applications.efficientnet.preprocess_input,
                                horizontal_flip=True, vertical_flip=True, fill_mode='nearest', brightness_range=[0.7, 1.3],
                                rotation_range=270, zoom_range=0.2, shear_range=10, width_shift_range=0.2, height_shift_range=0.2,
                                rescale = 1./255)

val_gen = ImageDataGenerator(rescale=1./255)

In [18]:
def build_efficientnet_b3():
    model = Sequential()
    model.add(EfficientNetB3(input_shape=(300,300,3), include_top=False, weights='imagenet'))
    model.add(GlobalAveragePooling2D())
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(5, activation='softmax'))
    
    return model

In [13]:
train_idx, val_idx = [], []

for i in SKF.split(train, train['label']):
    train_idx, val_idx = i

In [14]:
training_data = train.iloc[train_idx]
validation_data = train.iloc[val_idx]
    
train_generator = train_gen.flow_from_dataframe(dataframe=training_data, directory=None, x_col='image_id', y_col='label',
                                                batch_size=16, seed=1, shuffle=True,
                                                class_mode='categorical', target_size=(300,300))

validation_generator = val_gen.flow_from_dataframe(dataframe=validation_data, directory=None, x_col='image_id', y_col='label',
                                                   batch_size=16, seed=1, shuffle=False,
                                                   class_mode='categorical', target_size=(300,300))
del train
del train_gen
del val_gen
del training_data
del validation_data
del train_idx
del val_idx
    
print('Fold num: 5')
print('Train length:', len(train_generator))
print('Val length:', len(validation_generator))
    
# build model
model = build_efficientnet_b3()
    
# compile
model.compile(loss='categorical_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3), metrics=['accuracy'])
    
# callbacks   
early_stopping = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=3, restore_best_weights=True)
model_checkpoint = ModelCheckpoint('./k_folds_model/checkpoint_5.h5', monitor='val_loss', mode='min', verbose=1, save_best_only=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', mode='min', factor=0.2, patience=1, min_lr=0, verbose=1)
    
# fit
history = model.fit(train_generator, epochs=30, validation_data=validation_generator, verbose=1,
                    callbacks=[early_stopping, model_checkpoint, reduce_lr])
                        
# save model
model.save('./k_folds_model/5.h5')
    
clear_session()

In [21]:
print((0.8684 + 0.8697 + 0.8724 + 0.8798 + 0.8708) / 5)

In [7]:
test = pd.read_csv('../input/cassava-disease/sample_submission_file.csv')

In [8]:
test_path = '../input/cassava-disease/test/test/0'

def test_image_path(image):
    return os.path.join(test_path, image)

test['image_id'] = test['image_id'].apply(test_image_path)

test.tail()

Unnamed: 0,label,image_id
3769,cbb,../input/cassava-disease/test/test/0\test-img-...
3770,cgm,../input/cassava-disease/test/test/0\test-img-...
3771,healthy,../input/cassava-disease/test/test/0\test-img-...
3772,cbb,../input/cassava-disease/test/test/0\test-img-...
3773,healthy,../input/cassava-disease/test/test/0\test-img-...


In [9]:
test['label'].replace('cbb', '0', inplace=True)
test['label'].replace('cbsd', '1', inplace=True)
test['label'].replace('cgm', '2', inplace=True)
test['label'].replace('cmd', '3', inplace=True)
test['label'].replace('healthy', '4', inplace=True)

In [10]:
test = test[['image_id', 'label']]

test.tail()

Unnamed: 0,image_id,label
3769,../input/cassava-disease/test/test/0\test-img-...,0
3770,../input/cassava-disease/test/test/0\test-img-...,2
3771,../input/cassava-disease/test/test/0\test-img-...,4
3772,../input/cassava-disease/test/test/0\test-img-...,0
3773,../input/cassava-disease/test/test/0\test-img-...,4


In [11]:
print(test['image_id'][0])

../input/cassava-disease/test/test/0\test-img-0.jpg


In [12]:
val_gen = ImageDataGenerator(rescale=1./255)

In [14]:
test_generator = val_gen.flow_from_dataframe(dataframe=test, directory=None, x_col='image_id', y_col='label',
                                              preprocessing_function=tf.keras.applications.efficientnet.preprocess_input,
                                              class_mode='categorical', target_size=(300,300), shuffle=False, seed=1)

Found 3774 validated image filenames belonging to 5 classes.


In [19]:
model = build_efficientnet_b3()

model.load_weights('./k_folds_model/checkpoint_5.h5')

In [20]:
output_5 = np.argmax(model.predict(test_generator), axis=1)

report = classification_report(test_generator.classes, output_5, target_names=['CBB', 'CBSD', 'CGM', 'CMD', 'Healthy'])
print(report)

              precision    recall  f1-score   support

         CBB       0.22      0.06      0.10       753
        CBSD       0.20      0.18      0.19       731
         CGM       0.19      0.17      0.18       706
         CMD       0.20      0.41      0.27       800
     Healthy       0.19      0.16      0.17       784

    accuracy                           0.20      3774
   macro avg       0.20      0.20      0.18      3774
weighted avg       0.20      0.20      0.18      3774



In [24]:
clear_session()

model_1 = load_model('./k_folds_model/checkpoint_1.h5')

In [25]:
output_1 = np.argmax(model_1.predict(test_generator), axis=1)

report = classification_report(test_generator.classes, output_1, target_names=['CBB', 'CBSD', 'CGM', 'CMD', 'Healthy'])
print(report)

              precision    recall  f1-score   support

         CBB       0.22      0.07      0.10       753
        CBSD       0.20      0.18      0.19       731
         CGM       0.18      0.18      0.18       706
         CMD       0.21      0.37      0.26       800
     Healthy       0.19      0.18      0.18       784

    accuracy                           0.20      3774
   macro avg       0.20      0.19      0.18      3774
weighted avg       0.20      0.20      0.18      3774



In [26]:
clear_session()

model_2 = load_model('./k_folds_model/checkpoint_2.h5')

In [27]:
output_2 = np.argmax(model_2.predict(test_generator), axis=1)

report = classification_report(test_generator.classes, output_2, target_names=['CBB', 'CBSD', 'CGM', 'CMD', 'Healthy'])
print(report)

              precision    recall  f1-score   support

         CBB       0.24      0.07      0.11       753
        CBSD       0.19      0.16      0.18       731
         CGM       0.18      0.16      0.17       706
         CMD       0.20      0.37      0.26       800
     Healthy       0.21      0.22      0.21       784

    accuracy                           0.20      3774
   macro avg       0.20      0.20      0.18      3774
weighted avg       0.20      0.20      0.19      3774



In [28]:
clear_session()

model_3 = load_model('./k_folds_model/checkpoint_3.h5')

In [29]:
output_3 = np.argmax(model_3.predict(test_generator), axis=1)

report = classification_report(test_generator.classes, output_3, target_names=['CBB', 'CBSD', 'CGM', 'CMD', 'Healthy'])
print(report)

              precision    recall  f1-score   support

         CBB       0.19      0.06      0.09       753
        CBSD       0.20      0.18      0.19       731
         CGM       0.18      0.15      0.16       706
         CMD       0.20      0.42      0.27       800
     Healthy       0.20      0.17      0.18       784

    accuracy                           0.20      3774
   macro avg       0.20      0.19      0.18      3774
weighted avg       0.20      0.20      0.18      3774



In [30]:
clear_session()

model_4 = load_model('./k_folds_model/checkpoint_4.h5')

In [31]:
output_4 = np.argmax(model_4.predict(test_generator), axis=1)

report = classification_report(test_generator.classes, output_4, target_names=['CBB', 'CBSD', 'CGM', 'CMD', 'Healthy'])
print(report)

              precision    recall  f1-score   support

         CBB       0.24      0.09      0.13       753
        CBSD       0.20      0.17      0.19       731
         CGM       0.18      0.18      0.18       706
         CMD       0.20      0.38      0.27       800
     Healthy       0.20      0.18      0.19       784

    accuracy                           0.20      3774
   macro avg       0.21      0.20      0.19      3774
weighted avg       0.21      0.20      0.19      3774

