In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten
from tensorflow.keras.optimizers import SGD
from keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
import os
import zipfile

In [2]:
zip_files = ['test1', 'train']
# Will unzip the files so that you can see them..
for zip_file in zip_files:
    with zipfile.ZipFile("../input/{}.zip".format(zip_file),"r") as z:
        z.extractall(".")
        print("{} unzipped".format(zip_file))

In [3]:
print(os.listdir('../input'))

In [4]:
folder_path="../working/train"
file_names=os.listdir(folder_path)

In [5]:
targets=list()
full_paths=list()
for file_name in file_names:
    target=file_name.split(".")[0]
    full_path=os.path.join(folder_path, file_name)
    full_paths.append(full_path)
    targets.append(target)

dataset=pd.DataFrame()
dataset['image_path']=full_paths
dataset['target']=targets

In [6]:
dataset.head(10)

In [7]:
target_counts=dataset['target'].value_counts()
print("Number of dogs in the dataset:{}".format(target_counts['dog']))
print("Number of cats in the dataset:{}".format(target_counts['cat']))

In [8]:
dataset_train, dataset_test=train_test_split(dataset,test_size=0.2,random_state=42)

In [9]:
train_datagen=ImageDataGenerator(
rotation_range=15,
rescale=1./255,
shear_range=0.1,
zoom_range=0.2,
horizontal_flip=True,
width_shift_range=0.1,
height_shift_range=0.1)

train_datagenerator=train_datagen.flow_from_dataframe(dataframe=dataset_train,
                                                     x_col="image_path",
                                                     y_col="target",
                                                     target_size=(200,200),
                                                     class_mode="binary",
                                                     batch_size=64)

In [10]:
test_datagen=ImageDataGenerator(rescale=1./255)

test_datagenerator=test_datagen.flow_from_dataframe(dataframe=dataset_test,
                                                   x_col="image_path",
                                                   y_col="target",
                                                   target_size=(200,200),
                                                   class_mode="binary",
                                                   batch_size=64)

# Creating a Baseline Model

In [11]:
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same', input_shape=(200, 200, 3)))
model.add(MaxPooling2D((2, 2)))
model.add(Dropout(0.2))
model.add(Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
model.add(MaxPooling2D((2, 2)))
model.add(Dropout(0.2))
model.add(Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
model.add(MaxPooling2D((2, 2)))
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(128, activation='relu', kernel_initializer='he_uniform'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))
# compile model
opt = SGD(learning_rate=0.001, momentum=0.9)
model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])

In [12]:
history = model.fit(train_datagenerator, 
                    epochs=50,
                    validation_data=test_datagenerator,
                    validation_steps=dataset_test.shape[0]//150,
                    steps_per_epoch=dataset_train.shape[0]//150
                    )

In [14]:
print("Train Accuracy:{:.3f}".format(history.history['acc'][-1]))
print("Test Accuracy:{:.3f}".format(history.history['val_acc'][-1]))

# Improvising the Model

In [15]:
model2=Sequential()
model2.add(Conv2D(32, (3,3), activation="relu", input_shape=(200,200, 3)))
model2.add(Conv2D(32, (3,3), activation="relu"))
model2.add(BatchNormalization())
model2.add(MaxPooling2D((2,2)))
model2.add(Dropout(0.25))
model2.add(Conv2D(64, (3,3), activation="relu"))
model2.add(Conv2D(64, (3,3), activation="relu"))
model2.add(BatchNormalization())
model2.add(MaxPooling2D(2,2))
model2.add(Dropout(0.25))
model2.add(Conv2D(128, (3,3), activation="relu"))
model2.add(Conv2D(128, (3,3), activation="relu"))
model2.add(BatchNormalization())
model2.add(MaxPooling2D((2,2)))
model2.add(Dropout(0.25))
model2.add(Conv2D(64, (3,3), activation="relu"))
model2.add(Conv2D(64, (3,3), activation="relu"))
model2.add(BatchNormalization())
model2.add(MaxPooling2D((2,2)))
model2.add(Dropout(0.25))
model2.add(Flatten())
model2.add(Dense(512, activation="relu"))
model2.add(Dropout(0.5))
model2.add(Dense(1, activation="sigmoid"))
model2.summary()
opt = SGD(learning_rate=0.01, momentum=0.9)
model2.compile(loss="binary_crossentropy", optimizer=opt, metrics=["accuracy"])

In [16]:
history2 = model2.fit(train_datagenerator, 
                     epochs = 50, 
                     validation_data=test_datagenerator,
                     validation_steps=dataset_test.shape[0]//100, 
                     steps_per_epoch = dataset_train.shape[0]//100)

In [17]:
print("Train Accuracy:{:.3f}".format(history2.history['acc'][-1]))
print("Test Accuracy:{:.3f}".format(history2.history['val_acc'][-1]))