In [1]:
import os
import time
import shutil
from collections import Counter
import tensorflow as tf
import numpy as np
import pandas as pd
import cv2
import keras
import gc
import warnings
from PIL import Image
%matplotlib inline
from keras.models import Sequential,Model
from tensorflow.keras.applications import ResNet50, ResNet50V2
from keras.layers import Dense, Activation, Dropout
import keras.backend as K
from tensorflow.keras.optimizers import Adam, RMSprop, SGD
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import BatchNormalization
from keras.layers import Conv2D, MaxPool2D, AvgPool2D, Flatten, GlobalAveragePooling2D, AveragePooling2D
from keras.callbacks import ReduceLROnPlateau, EarlyStopping
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix, classification_report

warnings.filterwarnings("ignore")

In [3]:
img_size = (512,512)
batch_size = 16
# optimizer = SGD(learning_rate=5e-5, momentum=0.9)
optimizer = Adam(learning_rate=1e-4)
foldId = 0

In [5]:
train_datagen = ImageDataGenerator(rotation_range=90,
                            brightness_range=[0.7, 1.3],
                            zoom_range=[0.5,1.0],
                            horizontal_flip=True,
                            vertical_flip=True)
    
train_df = pd.read_csv("../input/diabetic-retinopathy-preprocessed-data/trainLabels.csv")

indexes = []
for i in range(len(train_df['image'])):
    if train_df['kfold'][i] == foldId:
        indexes.append(i)
train_df.drop(indexes, axis=0, inplace=True)

train_df['image'] = [str(x)+'.jpeg' for x in train_df['image']]
train_df['level'] = [str(x) for x in train_df['level']]
train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    directory="../input/diabetic-retinopathy-preprocessed-data/Data/Data/train",
    x_col="image",
    y_col='level',
    subset="training",
    batch_size=16,
    seed=42,
    shuffle=True,
    class_mode="categorical",
    target_size=img_size,
    save_to_dir="./",
    save_format='jpeg')

Found 25210 validated image filenames belonging to 5 classes.


In [7]:
val_datagen = ImageDataGenerator()
val_df = pd.read_csv("../input/diabetic-retinopathy-preprocessed-data/trainLabels.csv")
indexes = []
for i in range(len(val_df['image'])):
    if val_df['kfold'][i] != foldId:
        indexes.append(i)
val_df.drop(indexes, axis=0, inplace=True)

val_df['image'] = [str(x)+'.jpeg' for x in val_df['image']]
val_df['level'] = [str(x) for x in val_df['level']]
val_generator = val_datagen.flow_from_dataframe(
    dataframe=val_df,
    directory="../input/diabetic-retinopathy-preprocessed-data/Data/Data/train",
    x_col="image",
    y_col='level',
    batch_size=32,
    seed=42,
    shuffle=True,
    class_mode="categorical",
    target_size=img_size)

Found 8404 validated image filenames belonging to 5 classes.


In [8]:
test_datagen = ImageDataGenerator()
test_df = pd.read_csv("../input/diabetic-retinopathy-preprocessed-data/Data/Data/test_labels.csv")
test_df['image'] = [str(x)+'.jpeg' for x in test_df['image']]
test_df['level'] = [str(x) for x in test_df['level']]
test_generator = test_datagen.flow_from_dataframe(
    dataframe=test_df,
    directory="../input/diabetic-retinopathy-preprocessed-data/Data/Data/test",
    x_col="image",
    y_col='level',
    batch_size=1,
    seed=42,
    shuffle=False,
    class_mode="categorical",
    target_size=img_size)

Found 53574 validated image filenames belonging to 5 classes.


In [9]:
resnet = ResNet50V2(include_top=False, weights='imagenet', input_shape=(img_size[0], img_size[1], 3))
output = resnet.layers[-1].output
output = keras.layers.Flatten()(output)
resnet = Model(resnet.input, output)

model = Sequential()
model.add(resnet)
model.add(Dropout(0.3))
model.add(Dense(5, activation='softmax'))

model.compile(optimizer, 
            loss='categorical_crossentropy',
            metrics=['accuracy'])

checkpoint_filepath = "./trainedmodel.h5"
model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint( filepath=checkpoint_filepath,
                          save_weights_only=False,
                          monitor='val_loss',
                          mode='auto',
                          save_best_only=True)

early_stopping = EarlyStopping(
                monitor="val_loss",
                patience=3,
                verbose=1,
                mode="auto",
)

model.summary()

2022-04-21 20:32:52.640332: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-04-21 20:32:52.726587: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-04-21 20:32:52.727461: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-04-21 20:32:52.728628: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compil

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50v2_weights_tf_dim_ordering_tf_kernels_notop.h5
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
model (Functional)           (None, 524288)            23564800  
_________________________________________________________________
dropout (Dropout)            (None, 524288)            0         
_________________________________________________________________
dense (Dense)                (None, 5)                 2621445   
Total params: 26,186,245
Trainable params: 26,140,805
Non-trainable params: 45,440
_________________________________________________________________


In [10]:
STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
STEP_SIZE_VALID=val_generator.n//val_generator.batch_size
STEP_SIZE_TEST=test_generator.n//test_generator.batch_size

In [11]:
model.fit_generator(generator=train_generator,
                    steps_per_epoch=STEP_SIZE_TRAIN,
                    validation_data=val_generator,
                    validation_steps=STEP_SIZE_VALID,
                    epochs=25, 
                    callbacks=[model_checkpoint_callback, early_stopping])

2022-04-21 20:32:58.419613: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


Epoch 1/25


2022-04-21 20:33:05.283783: I tensorflow/stream_executor/cuda/cuda_dnn.cc:369] Loaded cuDNN version 8005


Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 00013: early stopping


<keras.callbacks.History at 0x7f55ced880d0>

In [12]:
test_generator.reset()
pred=model.predict_generator(test_generator,
                            steps=STEP_SIZE_TEST,
                            verbose=1)



In [13]:
predicted_class_indices=np.argmax(pred,axis=1)
labels = (train_generator.class_indices)
labels = dict((v,k) for k,v in labels.items())

predictions = [labels[k] for k in predicted_class_indices]
filenames=test_generator.filenames

res = {filenames[i]: predictions[i] for i in range(len(filenames))}

In [14]:
from collections import Counter
P = Counter(predictions)
T = Counter(test_df['level'])
print(sorted(P.items()))
print(sorted(T.items()))

[('0', 46536), ('1', 248), ('2', 5764), ('3', 802), ('4', 224)]
[('0', 39533), ('1', 3762), ('2', 7861), ('3', 1214), ('4', 1206)]


In [15]:
correct = 0
predLabels = []
indexes = []
for i in range(len(test_df['image'])):
    if test_df['image'][i] in res:
        predLabels.append(res[test_df['image'][i]])
        if res[test_df['image'][i]] == test_df['level'][i]:
            correct += 1
    else:
        predLabels.append("NA")
        indexes.append(i)
acc = (correct / len(test_df['level'].head(300))) * 100
print(acc)

test_df['predLevel'] = predLabels
test_df.drop(indexes, axis=0, inplace=True)
test_df.to_csv("./predictions.csv", index=False)

model.save('model.h5')

14213.666666666666


In [16]:
print(classification_report(test_df['level'], test_df['predLevel']))

print("Test Accuracy score : ", accuracy_score(test_df['level'], test_df['predLevel']))
print("Test Weighted F1 score : ", f1_score(test_df['level'], test_df['predLevel'], average="weighted"))
print("Test Micro F1 score : ", f1_score(test_df['level'], test_df['predLevel'], average="micro"))
print("Test Precision score : ", precision_score(test_df['level'], test_df['predLevel'], average="weighted"))
print("Test Recall score : ", recall_score(test_df['level'], test_df['predLevel'], average="weighted"))

              precision    recall  f1-score   support

           0       0.83      0.98      0.90     39532
           1       0.34      0.02      0.04      3762
           2       0.59      0.43      0.50      7860
           3       0.44      0.29      0.35      1214
           4       0.79      0.15      0.25      1206

    accuracy                           0.80     53574
   macro avg       0.60      0.37      0.41     53574
weighted avg       0.75      0.80      0.75     53574

Test Accuracy score :  0.7959271288311495
Test Weighted F1 score :  0.7520691078470904
Test Micro F1 score :  0.7959271288311495
Test Precision score :  0.7507110811034535
Test Recall score :  0.7959271288311495


# Testing

In [17]:
# train_datagen = ImageDataGenerator(rescale=1./255)
# train_df = pd.read_csv("../input/diabetic-retinopathy-preprocessed-data/trainLabels.csv")

# indexes = []
# for i in range(len(train_df['image'])):
#     if train_df['kfold'][i] == foldId:
#         indexes.append(i)
# train_df.drop(indexes, axis=0, inplace=True)

# train_df['image'] = [str(x)+'.jpeg' for x in train_df['image']]
# train_df['level'] = [str(x) for x in train_df['level']]
# train_generator = train_datagen.flow_from_dataframe(
#     dataframe=train_df,
#     directory="../input/diabetic-retinopathy-preprocessed-data/Data/Data/train",
#     x_col="image",
#     y_col='level',
#     subset="training",
#     batch_size=16,
#     seed=42,
#     shuffle=True,
#     class_mode="categorical",
#     target_size=img_size)

# test_datagen = ImageDataGenerator(rescale=1./255)
# test_df = pd.read_csv("../input/diabetic-retinopathy-preprocessed-data/Data/Data/test_labels.csv")
# test_df['image'] = [str(x)+'.jpeg' for x in test_df['image']]
# test_df['level'] = [str(x) for x in test_df['level']]
# test_generator = test_datagen.flow_from_dataframe(
#     dataframe=test_df,
#     directory="../input/diabetic-retinopathy-preprocessed-data/Data/Data/test",
#     x_col="image",
#     y_col='level',
#     batch_size=1,
#     seed=42,
#     shuffle=False,
#     class_mode="categorical",
#     target_size=img_size)

# resnet = ResNet50V2(include_top=False, weights='imagenet', input_shape=(img_size[0], img_size[1], 3))
# output = resnet.layers[-1].output
# output = keras.layers.Flatten()(output)
# resnet = Model(resnet.input, output)

# model = Sequential()
# model.add(resnet)
# model.add(Dropout(0.1))
# model.add(Dense(5, activation='softmax'))

# model.compile(optimizer, 
#             loss='categorical_crossentropy',
#             metrics=['accuracy'])

# checkpoint_filepath = "./trainedmodel.h5"
# model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint( filepath=checkpoint_filepath,
#                           save_weights_only=True,
#                           monitor='val_loss',
#                           mode='auto',
#                           save_best_only=True)

# early_stopping = EarlyStopping(
#                 monitor="val_loss",
#                 patience=3,
#                 verbose=1,
#                 mode="auto",
# )

# model.summary()

# model.load_weights("../input/resnet50v2-full-run-model-file/trainedmodel.h5")

# STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
# STEP_SIZE_TEST=test_generator.n//test_generator.batch_size

# test_generator.reset()
# pred=model.predict_generator(test_generator,
#                             steps=STEP_SIZE_TEST,
#                             verbose=1)

# predicted_class_indices=np.argmax(pred,axis=1)
# labels = (train_generator.class_indices)
# labels = dict((v,k) for k,v in labels.items())

# predictions = [labels[k] for k in predicted_class_indices]
# filenames=test_generator.filenames

# res = {filenames[i]: predictions[i] for i in range(len(filenames))}

# from collections import Counter
# P = Counter(predictions)
# T = Counter(test_df['level'])
# print(sorted(P.items()))
# print(sorted(T.items()))

In [18]:
# correct = 0
# for i in range(len(test_df['image'])):
#     if test_df['image'][i] in res:
#         if res[test_df['image'][i]] == test_df['level'][i]:
#             correct += 1

# acc = (correct / len(test_df['level'])) * 100 
# print(acc)

In [19]:
# P = [('0', 47049), ('1', 685), ('2', 4520), ('3', 787), ('4', 533)]
# T = [('0', 39533), ('1', 3762), ('2', 7861), ('3', 1214), ('4', 1206)]