In [None]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import keras

# Model related
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, MaxPooling2D, AveragePooling2D, Dropout, Flatten, GlobalAveragePooling2D
from tensorflow.keras import backend as K
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ReduceLROnPlateau

train_dir = "../input/ml-marathon-final/data/kaggle_dogcat/train/"
test_dir = "../input/ml-marathon-final/data/kaggle_dogcat/test/"

In [None]:
img_size = (224, 224, 3)

def load_image(path):
    img = np.array(load_img(path,
                   grayscale = False,
                   color_mode = 'rgb',
                   target_size = img_size,
                   interpolation='nearest'
                  ))
    return img

In [None]:
x_train = []
x_val = []
y_train = []
y_val = []

for i, filename in enumerate(os.listdir(train_dir + 'dogs/')):
    if i <= len(os.listdir(train_dir + 'dogs/')) * 0.9:
        x_train.append(load_image(train_dir + 'dogs/' + filename))
        y_train.append(1)
    else:
        x_val.append(load_image(train_dir + 'dogs/' + filename))
        y_val.append(1)
        
for i, filename in enumerate(os.listdir(train_dir + 'cats/')):
    if i <= len(os.listdir(train_dir + 'cats/')) * 0.9:
        x_train.append(load_image(train_dir + 'cats/' + filename))
        y_train.append(0)
    else:
        x_val.append(load_image(train_dir + 'cats/' + filename))
        y_val.append(0)

In [None]:
# Preprocessing: Normalization
x_train = np.array(x_train, dtype='float32') / 255
x_val = np.array(x_val, dtype='float32') / 255

y_train = keras.utils.to_categorical(np.array(y_train), 2)
y_val = keras.utils.to_categorical(np.array(y_val), 2)

print(f'x_train: {x_train.shape}')
print(f'x_val:   {x_val.shape}')
print(f'y_train: {y_train.shape}')
print(f'y_val:   {y_val.shape}')

In [None]:
trans_model = ResNet50(include_top = False, input_shape = (224,224,3), input_tensor = None)
trans_model.summary()

In [None]:
x = trans_model.output
x = AveragePooling2D((2,2))(x)
x = Flatten()(x)
x = Dense(units=1024, activation="relu")(x)
x = Dropout(0.5)(x)
x = Dense(units=512, activation="relu")(x)
x = Dropout(0.5)(x)
x = Dense(units=512, activation="relu")(x)
x = Dropout(0.5)(x)
output_layer = Dense(units=2, activation="softmax")(x)

model = Model(inputs=trans_model.input, outputs=output_layer)
model.compile(optimizer=Adam(lr=1e-5), loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

In [None]:
batch_size = 32
epochs = 10

In [None]:
# 建立 ImageDataGenerator，並指定我們要做資料增強的數值範圍
train_generator = ImageDataGenerator(rotation_range=10, 
                                       width_shift_range=0.1, 
                                       height_shift_range=0.1, 
                                       horizontal_flip=True
                                      )

val_generator = ImageDataGenerator(rotation_range=20, 
                                       width_shift_range=0.1, 
                                       height_shift_range=0.1, 
                                       horizontal_flip=True
                                      )

In [None]:
reduce_lr = ReduceLROnPlateau(onitor='val_loss',
                              factor=0.01,
                              patience=5,
                              min_lr=1e-12
                             )

In [None]:
history = model.fit_generator(train_generator.flow(x_train, y_train, batch_size=batch_size),
                              steps_per_epoch = x_train.shape[0]/batch_size,
                              epochs = epochs,
                              validation_data = val_generator.flow(x_val, y_val, batch_size=batch_size),
                              validation_steps = x_test.shape[0]/batch_size,
                              verbose=1,
                              shuffle=True,
                              callbacks = [reduce_lr]
                             )

In [None]:
model.save_weights("model.h5")

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

# Plot training & validation accuracy values
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()

# Plot training & validation loss values
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()

In [None]:
model.load_weights("model.h5")

In [None]:
test_list = os.listdir(test_dir)
test_list.sort()
x_test = []

for filename in test_list:
    x_test.append(load_image(test_dir + filename))

x_test = np.array(x_test, dtype='float32') / 255

print(f'x_test: {x_test.shape}')

In [None]:
submission = pd.read_csv('../input/ml-marathon-final/sample_submission.csv')
submission['Predicted'] = model.predict(x_test)[:,1] 
submission.to_csv("submission.csv",header = ["ID", "Predicted"], index = False)

In [None]:
submission