# 1. Loading data and setting up folders

In [None]:
#This cell is purely for loading the data and setting the directories for convenience

!wget --no-check-certificate \
https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip \
-O /tmp/cats_and_dogs_filtered.zip

#This loads in the dataset you'll be using for this project.
import os
import zipfile

local_zip = '/tmp/cats_and_dogs_filtered.zip'
zip_ref = zipfile.ZipFile(local_zip, 'r')
zip_ref.extractall('/tmp')
zip_ref.close()

base_dir = '/tmp/cats_and_dogs_filtered'
train_dir = os.path.join(base_dir, 'train')  #some of this data will be split for validation later
testing_dir = os.path.join(base_dir, 'validation')  #this data will be reserved for testing despite the directory's name

# Directory with our training cat pictures
train_cats_dir = os.path.join(train_dir, 'cats')

# Directory with our training dog pictures
train_dogs_dir = os.path.join(train_dir, 'dogs')

# Directory with our validation cat pictures
testing_cats_dir = os.path.join(testing_dir, 'cats')

# Directory with our validation dog pictures
testing_dogs_dir = os.path.join(testing_dir, 'dogs')


--2022-02-02 01:51:38--  https://storage.googleapis.com/mledu-datasets/cats_and_dogs_filtered.zip
Resolving storage.googleapis.com (storage.googleapis.com)... 74.125.31.128, 173.194.210.128, 173.194.211.128, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|74.125.31.128|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 68606236 (65M) [application/zip]
Saving to: ‘/tmp/cats_and_dogs_filtered.zip’


2022-02-02 01:51:39 (193 MB/s) - ‘/tmp/cats_and_dogs_filtered.zip’ saved [68606236/68606236]



In [None]:
#this code block is useful for demonstrating a later point...

os.chdir(train_dir)
print(os.listdir())
os.chdir('cats')
print(os.listdir())

['dogs', 'cats']
['cat.368.jpg', 'cat.578.jpg', 'cat.646.jpg', 'cat.483.jpg', 'cat.724.jpg', 'cat.587.jpg', 'cat.827.jpg', 'cat.267.jpg', 'cat.410.jpg', 'cat.737.jpg', 'cat.461.jpg', 'cat.48.jpg', 'cat.375.jpg', 'cat.582.jpg', 'cat.344.jpg', 'cat.31.jpg', 'cat.898.jpg', 'cat.469.jpg', 'cat.161.jpg', 'cat.268.jpg', 'cat.303.jpg', 'cat.677.jpg', 'cat.374.jpg', 'cat.676.jpg', 'cat.797.jpg', 'cat.929.jpg', 'cat.438.jpg', 'cat.54.jpg', 'cat.89.jpg', 'cat.339.jpg', 'cat.650.jpg', 'cat.637.jpg', 'cat.431.jpg', 'cat.598.jpg', 'cat.838.jpg', 'cat.590.jpg', 'cat.221.jpg', 'cat.292.jpg', 'cat.399.jpg', 'cat.265.jpg', 'cat.125.jpg', 'cat.632.jpg', 'cat.18.jpg', 'cat.763.jpg', 'cat.377.jpg', 'cat.823.jpg', 'cat.332.jpg', 'cat.274.jpg', 'cat.283.jpg', 'cat.69.jpg', 'cat.32.jpg', 'cat.974.jpg', 'cat.798.jpg', 'cat.231.jpg', 'cat.519.jpg', 'cat.840.jpg', 'cat.512.jpg', 'cat.406.jpg', 'cat.518.jpg', 'cat.530.jpg', 'cat.351.jpg', 'cat.854.jpg', 'cat.328.jpg', 'cat.489.jpg', 'cat.501.jpg', 'cat.385.jpg',

In [None]:
import numpy as np  #storing the data using this
import cv2  #for reading the images into grayscale vectors
from PIL import Image  #used to manipulate the images into the appropriate sizes
import matplotlib.pyplot as plt
%matplotlib inline

# 2. Resizing the images

In [None]:
from keras.preprocessing.image import ImageDataGenerator as IDG  #keras's built in image augmentor, which we can use to specify image size and such

size = (160, 160)  #the new size of each image after resizing. Dimensions are still up in the air

#using IDG, make an augmentor with the parameters indicating what augmentations can take place.


normal_generator = IDG(rescale=1./255)  #no need to have augmentation parameters since the model isn't being trained to fit data generated by this. 
                                        # Just normalize the data appropriately.

#we'll use our augmented data generator instead of just extracting our training data. IDG has something perfect for this,
#considering we have the directories already.

#needed data generators
training_generator = normal_generator.flow_from_directory(train_dir,
                                                       target_size=size,  #force resizes all input images, super nice.
                                                       batch_size=20,
                                                       class_mode='binary',
                                                       subset='training')  

validation_generator = normal_generator.flow_from_directory(train_dir,
                                                         target_size=size,
                                                         batch_size=10,
                                                         class_mode='binary',
                                                         subset='validation')

testing_generator = normal_generator.flow_from_directory(testing_dir,
                                                         target_size=size,
                                                         batch_size=20,
                                                         class_mode='binary')

#This is why we have the data separated into two subdirectories in the training and validation directories.
#The way the data can tell the cats from the dogs is simply due to the fact that they are in two different
#sub directories as they flow in from their main directories. The generator automatically applies labels to
#them this way. Super useful. And since validation and training have the same subdirectories, the labels will
#be the same as well, which is required.

Found 2000 images belonging to 2 classes.
Found 0 images belonging to 2 classes.
Found 1000 images belonging to 2 classes.


# 3. Creating the neural network model (cnn)

In [None]:
from tensorflow.keras.models import Sequential as seq
from tensorflow.keras.layers import Dense, Conv2D, Flatten,  MaxPooling2D, Dropout
#Standard model creation that can be used for most projects. You can play around with layers if you would like to increase or decrease accuracy.
def createModel():
  model = seq()
  model.add(Conv2D(32, kernel_size=3, activation='relu', input_shape=(size[0],size[1],3)))                                                                                       #The 3 indicates RGB, since we don't need to grayscale.
  model.add(MaxPooling2D((2,2)))  #reduces dimensionality for computational ease
  
  model.add(Conv2D(64, kernel_size=3, activation='relu'))
  model.add(MaxPooling2D((2,2)))
  
  model.add(Conv2D(128, kernel_size=3, activation='relu'))
  model.add(MaxPooling2D((2,2)))
  
  model.add(Conv2D(256, kernel_size=3, activation='relu'))
  model.add(MaxPooling2D((2,2)))
  
  model.add(Dropout(0.5))
  
  model.add(Flatten())  #allows the data from the previous layer to be fed into a standard dense layer
  
  #the rest of these is architecture of the normal deep neural network

  model.add(Dense(1024, activation='relu'))
  model.add(Dense(1, activation='sigmoid')) 
  
  print(model.summary())
  return model

myModel = createModel()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 158, 158, 32)      896       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 79, 79, 32)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 77, 77, 64)        18496     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 38, 38, 64)       0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 36, 36, 128)       73856     
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 18, 18, 128)      0

Note that recall and precision, in this case, are not very important. The errors of identifying a cat as a dog and vice versa are of equal consequence; thus, there is no real reason to check for recall and precision, and relying on accuracy is a good enough metric to judge our CNN. Based on this, we can now compile and train our model with accuracy as its sole metric.

# 4. Training the data

In [None]:
from tensorflow.keras.optimizers import RMSprop  #special kind of optimizer that allows 

myModel.compile(loss='binary_crossentropy',
                optimizer=RMSprop(lr=2e-4),  #learning rate of 0.0002 originally
                metrics=['accuracy'])

t_history = myModel.fit_generator(training_generator, 
                                  validation_data=validation_generator,
                                  validation_steps=40,
                                  epochs=100,
                                  steps_per_epoch=80)
                                
  

  super(RMSprop, self).__init__(name, **kwargs)
  # This is added back by InteractiveShellApp.init_path()


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

# 5. Checking our model's stats

In [None]:
 testing_loss, testing_acc = myModel.evaluate_generator(testing_generator)
#just for you to see loss and accuracy(Note: these are decimals out of 1)
print("Testing loss: {}".format(testing_loss))
print("Testing accuracy: {}".format(testing_acc))

  """Entry point for launching an IPython kernel.


Testing loss: 1.841797113418579
Testing accuracy: 0.7720000147819519


# 6. Saving the model

In [None]:
#make a directory for saving models to

os.chdir("/content")
os.mkdir("saved_models")

In [None]:
#save the current model to a file

os.chdir("/content")
os.chdir("saved_models")
model_num = 3  #change this manually after the run. this was the best model we had
myModel.save("model_{}.h5".format(model_num))  #remember to download it locally

# 7. Try out the model!

In [None]:
from PIL import Image
import requests

def getImage2(url):  
  im = Image.open(requests.get(url, stream=True).raw)
  return im

In [None]:
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

print("If you give me an image, I'll try to classify it as a dog or a cat!")
img_url = str(input("Enter the image URL:"))
retr_img = getImage2(img_url)
plt.imshow(retr_img)

#adjust the image appropriately
retr_img = retr_img.resize(size)
retr_img = np.divide(np.array(retr_img),255)

retr_img = retr_img.reshape(1,size[0],size[1],3)

#print the prediction!
print("Classified as cat.") if myModel.predict(retr_img)[0][0] <= 0.5 else print("Classified as dog.")

If you give me an image, I'll try to classify it as a dog or a cat!
