In [11]:
"""
    author: William Darko (repurposed from original author Francois Chollet)
    date: July, 2021
    description: Basic ConvNet example using kaggle dogs vs cats dataset from dogs vs cats comeptition
"""

# imports

import tensorflow as tf
from tensorflow import keras
from keras import layers
from keras import models
from keras import optimizer_v2 as optimizers
from keras.preprocessing.image import ImageDataGenerator

import os
import shutil



In [None]:
# copying images into training, validation and test directories

init_train_dataset_dir = '../../datasets/catsvdogs_train'
init_test_dataset_dir = '../../datasets/catsvdogs_test'
base_dataset_dir = '../../datasets/smaller_datasets'

# create directories for training, testing, and validation data
train_dir = os.path.join(base_dataset_dir, 'catsvdogs_train_dir')
validation_dir = os.path.join(base_dataset_dir, 'catsvdogs_valid_dir')
test_dir = os.path.join(base_dataset_dir, 'catsvdogs_test_dir')
os.mkdir(train_dir)
os.mkdir(validation_dir)
os.mkdir(test_dir)


# create sub training, validation, and testing directories for both cat and dog
train_cats_dir = os.path.join(train_dir, 'cats')
validation_cats_dir = os.path.join(validation_dir, 'cats')
test_cats_dir = os.path.join(test_dir, 'cats')

train_dogs_dir = os.path.join(train_dir, 'dogs')
validation_dogs_dir = os.path.join(validation_dir, 'dogs')
test_dogs_dir = os.path.join(test_dir, 'dogs')

os.mkdir(train_cats_dir)
os.mkdir(validation_cats_dir)
os.mkdir(test_cats_dir)
os.mkdir(train_dogs_dir)
os.mkdir(validation_dogs_dir)
os.mkdir(test_dogs_dir)

# create cats training, validation, and test dataset
# first 1000 cat images of file name 'cat.{i}.jpg'
file_names = ['cat.{}.jpg'.format(i) for i in range(1000)]
for fname in file_names:
    src = os.path.join(init_train_dataset_dir, fname)
    dest = os.path.join(train_cats_dir, fname)
    shutil.copyfile(src, dest)

file_names = ['cat.{}.jpg'.format(i) for i in range(1000, 1500)]
for fname in file_names:
    src = os.path.join(init_train_dataset_dir, fname)
    dest = os.path.join(validation_cats_dir, fname)
    shutil.copyfile(src, dest)

file_names = ['cat.{}.jpg'.format(i) for i in range(1500, 2000)]
for fname in file_names:
    src = os.path.join(init_train_dataset_dir, fname)
    dest = os.path.join(test_cats_dir, fname)
    shutil.copyfile(src, dest)

# partition data into dogs training, validation, and test directories
file_names = ['dog.{}.jpg'.format(i) for i in range(1000)]
for fname in file_names:
    src = os.path.join(init_train_dataset_dir, fname)
    dest = os.path.join(train_dogs_dir, fname)
    shutil.copyfile(src, dest)

file_names = ['dog.{}.jpg'.format(i) for i in range(1000, 1500)]
for fname in file_names:
    src = os.path.join(init_train_dataset_dir, fname)
    dest = os.path.join(validation_dogs_dir, fname)
    shutil.copyfile(src, dest)

file_names = ['dog.{}.jpg'.format(i) for i in range(1500, 2000)]
for fname in file_names:
    src = os.path.join(init_train_dataset_dir, fname)
    dest = os.path.join(test_dogs_dir, fname)
    shutil.copyfile(src, dest)

# sanity check

print("TOTAL CATS DATA COUNT \n TRAINING: {}, VALIDATION: {}, TESTING: {} \n".format(len(os.listdir(train_cats_dir)), len(os.listdir(validation_cats_dir)), 
len(os.listdir(test_cats_dir))))

print("TOTAL DOGS DATA COUNT \n TRAINING: {}, VALIDATION: {}, TESTING: {}".format(len(os.listdir(train_dogs_dir)), len(os.listdir(validation_dogs_dir)), 
len(os.listdir(test_dogs_dir))))



In [None]:
# defining the model

model = models.Sequential()
model.add(layers.Conv2D(32, (3,3), activation='relu', input_shape=(150, 150, 3)))
model.add(layers.MaxPooling2D((2,2)))
model.add(layers.Conv2D(64, (3,3), activation='relu'))
model.add(layers.MaxPooling2D((2,2)))
model.add(layers.Conv2D(128, (3,3), activation='relu'))
model.add(layers.MaxPooling2D((2,2)))
model.add(layers.Conv2D(128, (3,3), activation='relu'))
model.add(layers.MaxPooling2D((2,2)))
model.add(layers.Flatten())
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

model.summary()

In [12]:
# configuring the model for training, and preprocessing step

model.compile(loss='binary_crossentropy', optimizer=optimizers.rmsprop.RMSProp(learning_rate=1e-4), metrics=['acc'])

"""
because our training and testing data are still just jpeg files on disk, we must make them floating point tensors before feeding them into the
convnets
"""
# preprocessing steps:
# read image files
# decode jpeg into rgb grids of pixels
# convert into floating point tensors
# rescale pixel values to [0,1] interval (preferable for neural networks)
train_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

train_data_generator = train_datagen.flow_from_directory(train_dir, target_size=(150, 150), batch_size=20, class_mode='binary')
validation_data_generator = test_datagen.flow_from_directory(validation_dir, target_size=(150,150), batch_size=20,class_mode='binary')