In [None]:
import tensorflow as tf
import keras
from keras.models import Sequential, Model
from keras import layers, applications, optimizers
from keras.layers import Dropout, Flatten, Dense, GlobalAveragePooling2D
from keras.preprocessing.image import ImageDataGenerator, load_img
from keras import backend as K
from tensorflow.keras.applications import ResNet50
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

In [None]:
image_size = 350
batch_size = 16

In [None]:
train = pd.read_csv('../input/cassava-leaf-disease-classification/train.csv')
train_path = '../input/cassava-leaf-disease-classification/train_images'

train_path_second = '../input/more-cassava-disease/train/train/train'

In [None]:
second_id = []
second_label = []

for img in os.listdir(os.path.join(train_path_second, 'cbb')):
    second_id.append(train_path_second+'/cbb/'+img)
    second_label.append('0')
    
for img in os.listdir(os.path.join(train_path_second, 'cbsd')):
    second_id.append(train_path_second+'/cbsd/'+img)
    second_label.append('1')
    
for img in os.listdir(os.path.join(train_path_second, 'cgm')):
    second_id.append(train_path_second+'/cgm/'+img)
    second_label.append('2')
    
for img in os.listdir(os.path.join(train_path_second, 'cmd')):
    second_id.append(train_path_second+'/cmd/'+img)
    second_label.append('3')
    
for img in os.listdir(os.path.join(train_path_second, 'healthy')):
    second_id.append(train_path_second+'/healthy/'+img)
    second_label.append('4')

In [None]:
train_second = pd.DataFrame({'image_id':second_id, 'label':second_label})

train_second.tail()

In [None]:
del second_id
del second_label

In [None]:
def image_path_first(image):
    return os.path.join(train_path,image)

train['image_id'] = train['image_id'].apply(image_path_first)

In [None]:
train['label'] = train['label'].astype('str')

In [None]:
train.tail()

In [None]:
# combine dataframe to train

train = pd.concat([train, train_second], ignore_index=True)

train.tail()

In [None]:
del train_second
del train_path
del train_path_second

In [None]:
from sklearn.model_selection import StratifiedKFold

SKF = StratifiedKFold(n_splits=5, random_state=1, shuffle=True)

In [None]:
train_gen = ImageDataGenerator(preprocessing_function = tf.keras.applications.resnet50.preprocess_input,
                                horizontal_flip=True, vertical_flip=True, fill_mode='nearest', brightness_range=[0.7, 1.3],
                                rotation_range=270, zoom_range=0.2, shear_range=10, width_shift_range=0.2, height_shift_range=0.2,
                                rescale = 1./255)

val_gen = ImageDataGenerator(rescale=1./255)

In [None]:
def build_resnet50():
    model = Sequential()
    model.add(ResNet50(input_shape=(image_size,image_size,3), include_top=False, weights='imagenet'))
    model.add(GlobalAveragePooling2D())
    model.add(Dense(128, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dense(32, activation='relu'))
    model.add(BatchNormalization())
    model.add(Dropout(0.3))
    model.add(Dense(5, activation='softmax'))
    
    return model

In [None]:
import seaborn as sns

sns.countplot(train['label'])
plt.title('Count of disease types')
plt.grid()
plt.show()

In [None]:
fold_var = 1

all_val_acc = []

In [None]:
for train_idx, val_idx in SKF.split(train, train['label']):
    training_data = train.iloc[train_idx]
    validation_data = train.iloc[val_idx]
    
    # generator
    train_generator = train_gen.flow_from_dataframe(dataframe=training_data, directory=None, x_col='image_id', y_col='label',
                                                batch_size=batch_size, seed=1, shuffle=True,
                                                class_mode='categorical', target_size=(image_size,image_size))

    validation_generator = val_gen.flow_from_dataframe(dataframe=validation_data, directory=None, x_col='image_id', y_col='label',
                                                   batch_size=batch_size, seed=1, shuffle=False,
                                                   class_mode='categorical', target_size=(image_size,image_size))
    
    print('Fold num:', fold_var)
    print('Train length:', len(train_generator))
    print('Val length:', len(validation_generator))
    
    # build model
    model = build_resnet50()
    
    # compile
    model.compile(loss='categorical_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
                  metrics=['accuracy'])
    
    # callbacks
    checkpoint_filename = './k_folds_resnet/checkpoint_' + str(fold_var) + '.h5'
    
    early_stopping = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=3, restore_best_weights=True)
    model_checkpoint = ModelCheckpoint(checkpoint_filename, monitor='val_loss', mode='min', verbose=1, save_best_only=True)
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', mode='min', factor=0.1, patience=1, min_lr=0, verbose=1)
    
    # fit
    history = model.fit(train_generator, epochs=30, validation_data=validation_generator, verbose=1,
                        callbacks=[early_stopping, model_checkpoint, reduce_lr])
                        
    # save model
    model_filename = './k_folds_resnet/' + str(fold_var) + '.h5'
    model.save(model_filename)
    
    results = pd.DataFrame(history.history)
    fig, axs = plt.subplots(1,2,figsize=(15,5))
    axs[0].plot(results[['loss', 'val_loss']])
    axs[0].set_title('Loss')
    axs[1].plot(results[['accuracy', 'val_accuracy']])
    axs[1].set_title('Accuracy')
    plt.show()
    
    all_val_acc.append(history.history['val_accuracy'][-3])
    
    K.clear_session()
    del(model)
    
    fold_var += 1

In [None]:
print(all_val_acc.mean())

In [None]:
test = pd.read_csv('../input/more-cassava-disease/sample_submission_file.csv')

In [None]:
test_path = '../input/more-cassava-disease/test/test/test/0'

def test_image_path(image):
    return os.path.join(test_path,image)

test['image_id'] = test['image_id'].apply(test_image_path)
test['label'].replace('cbb', '0')
test['label'].replace('cbsd', '1')
test['label'].replace('cgm', '2')
test['label'].replace('cmd', '3')
test['label'].replace('healthy', '4')

In [None]:
test = test[['image_id', 'label']]

test.tail()

In [None]:
test_generator = val_gen.flow_from_dataframe(dataframe=test, directory=None, x_col='image_id', y_col='label',
                                              preprocessing_function=applications.resnet50.preprocess_input,
                                              class_mode='categorical', target_size=(300,300))

In [None]:
output = model.predict(test_generator)

In [None]:
from sklearn.metrics import classification_report

report = classification_report(test_generator.classes, output, target_names=['CBB', 'CBSD', 'CGM', 'CMD', 'Healthy'])
print(report)

In [None]:
print(model.predict(load_img('../input/cassava-leaf-disease-classification/test_images/2216849948.jpg')))