In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# **Eye Disease Detection**

**Steps taken to solve problem**
* Step : Import common libraries
* Step : Setup GPU
* Step : Getting the data
* Step : Picking the right estimator (Model)
* Step : Fit the data and make a prediction
* Step : Evaluate the model
* Step : Improving the model
* Step : Presenting results visually
* Step : Make observations
* Step : Recreate with the best model & hyperturning parameters
* Step : Save and load the model

# **Global variables**

---



In [1]:
Image_height = 300
Image_width = 196

## **Step 1: Import common libraries**

---



In [2]:
#Common machine learning libraries
import pandas as pd
import numpy as np
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
%matplotlib inline

#Used for setting up GPU and image classifcation
import tensorflow as tf
import tensorflow_hub as hub 

#Used for seeing how many images we have within each folder
import os

#Used for split / viewing images later on
import glob

#Libraries needed for image classification
from tensorflow import keras 
from keras.optimizers import Adam
from tensorflow.python.keras.layers import Input, Activation, Conv2D, MaxPool2D,MaxPooling2D, BatchNormalization, UpSampling2D, Lambda
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential

## **Step 2: Setup GPU**

---



In [3]:
print("GPU is working" if tf.config.list_physical_devices("GPU") else " GPU is not working. Please change the runtime to use GPU")

GPU is working


## **Step 3: Getting Data**

---



In [4]:
# Getting the file of the training set and testing set
train_folder = "/content/drive/My Drive/Research/train"
test_folder = "/content/drive/My Drive/Research/test"

In [7]:
#Code to iterate over all the files (Sub directories included). This will be used to see how many training and images we have
CNV_train = os.listdir("/content/drive/My Drive/Research/train/CNV") 
DME_train = os.listdir("/content/drive/My Drive/Research/train/DME")
DRUSEN_train = os.listdir("/content/drive/My Drive/Research/train/DRUSEN")
NORMAL_train = os.listdir("/content/drive/My Drive/Research/train/NORMAL")

CNV_test = os.listdir("/content/drive/My Drive/Research/test/CNV") 
DME_test = os.listdir("/content/drive/My Drive/Research/test/DME")
DRUSEN_test = os.listdir("/content/drive/My Drive/Research/test/DRUSEN")
NORMAL_test = os.listdir("/content/drive/My Drive/Research/test/NORMAL")

total_training = len(CNV_train + DME_train + DRUSEN_train + NORMAL_train)
total_testing = len(CNV_test + DME_test + DRUSEN_test + NORMAL_test)

print("We have",total_training, "total images in the training folder and", total_testing, "images in the testing folder")
print("Allocating 85% for training 15% for validation on the training set we get:")
print(total_training*0.85, "images for training")
print(total_training*0.15, "images for validation")

We have 83484 total images in the training folder and 1000 images in the testing folder
Allocating 85% for training 15% for validation on the training set we get:
70961.4 images for training
12522.6 images for validation


In [8]:
#Creating batches
train_batches = ImageDataGenerator(preprocessing_function=tf.keras.applications.vgg16.preprocess_input,validation_split=0.15) \
    .flow_from_directory(directory=train_folder, target_size=(Image_height,Image_width), classes=['CNV','DME','DRUSEN','NORMAL'], batch_size=32,class_mode="categorical",
                              subset="training")
validation_batches = ImageDataGenerator(preprocessing_function=tf.keras.applications.vgg16.preprocess_input,validation_split=0.15) \
    .flow_from_directory(directory=train_folder, target_size=(Image_height,Image_width), classes=['CNV','DME','DRUSEN','NORMAL'], batch_size=32,class_mode="categorical",
                              subset="validation")
test_batches = ImageDataGenerator(preprocessing_function=tf.keras.applications.vgg16.preprocess_input) \
                       .flow_from_directory(test_folder, target_size=(Image_height,Image_width), 
                         classes=['CNV','DME','DRUSEN','NORMAL'], batch_size=32,class_mode="categorical")

Found 70963 images belonging to 4 classes.
Found 12521 images belonging to 4 classes.
Found 1000 images belonging to 4 classes.


## **Step 4: Build/Train a CNN**

---



In [8]:
#building the model to train a CNN. This will be used to extract features of images
model = Sequential([
    Conv2D(filters=32, kernel_size=(3, 3), activation='relu', input_shape=(Image_height,Image_width,3)),
    MaxPool2D(pool_size=(2, 2), strides=2),
    Conv2D(filters=64, kernel_size=(3, 3), activation='relu'),
    MaxPool2D(pool_size=(2, 2), strides=2),
    Flatten(),
    Dense(units=4, activation='softmax')
])

In [9]:
#Checking the models summary
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 298, 194, 32)      896       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 149, 97, 32)       0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 147, 95, 64)       18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 73, 47, 64)        0         
_________________________________________________________________
flatten (Flatten)            (None, 219584)            0         
_________________________________________________________________
dense (Dense)                (None, 4)                 878340    
Total params: 897,732
Trainable params: 897,732
Non-trainable params: 0
__________________________________________________

## **Step 5: Fit the data**
---




In [106]:
#Compiling the model
model.compile(optimizer=Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

In [110]:
#Fitting the data
model.fit(train_batches, steps_per_epoch = 128, epochs = 10, 
            validation_data = validation_batches, validation_steps = 32,shuffle = True)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f3c380d6c50>

## **Step 6: Improve the model**

---


In [14]:
#Using VGG16 pretrained model. Changing some parameters in order to allow us reshape the image
vgg16_model = keras.applications.vgg16.VGG16(input_tensor=Input(shape=(Image_height, Image_width, 3)),input_shape=(Image_height,Image_width,3), include_top = False)

In [15]:
#View summary of the new model
vgg16_model.summary()

Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         [(None, 300, 196, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 300, 196, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 300, 196, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 150, 98, 64)       0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 150, 98, 128)      73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 150, 98, 128)      147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 75, 49, 128)       0     

In [11]:
#Check to see what type of model
type(vgg16_model)

tensorflow.python.keras.engine.functional.Functional

In [30]:
#We want to change it to sequential
model = Sequential()
for layer in vgg16_model.layers:
  model.add(layer)
model.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
block1_conv1 (Conv2D)        (None, 300, 196, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 300, 196, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 150, 98, 64)       0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 150, 98, 128)      73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 150, 98, 128)      147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 75, 49, 128)       0         
_________________________________________________________________
block3_conv1 (Conv2D)        (None, 75, 49, 256)      

In [31]:
#This is so when we want to tune the model, it does affect the sequential setup above
for layer in model.layers:
  layer.trainable = False

In [32]:
#Adding a layer so it tells the model to seperate it into 4 categories
model.add(Dense(4,activation="softmax"))
model.summary()

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
block1_conv1 (Conv2D)        (None, 300, 196, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 300, 196, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 150, 98, 64)       0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 150, 98, 128)      73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 150, 98, 128)      147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 75, 49, 128)       0         
_________________________________________________________________
block3_conv1 (Conv2D)        (None, 75, 49, 256)      

## **Step 7: Fit improved model**

---



In [34]:
model.compile(Adam(lr=0.0001),loss="categorical_crossentropy",metrics=['accuracy'])

In [35]:
model.fit_generator(train_batches, steps_per_epoch=2087,
                    validation_data=validation_batches,validation_steps=521, epochs=10,verbose=2)

Epoch 1/10


ValueError: ignored

## **Step 7: Evaluate the model**

---



## **Step 8: Presenting results visually**

---




## **Step 9: Make changes if needed**

## **Step 10: Save model**