## 載入及整理資料

In [1]:
import os
import cv2
import keras
from keras.preprocessing import image
import numpy as np
import matplotlib.pyplot as plt

os.environ['CUDA_VISIBLE_DEVICES'] = ""

data_path = "kaggle_dogcat/train"
class_list = os.listdir(data_path)

x_train = []
y_train = []

row_size = 300
column_size = 300
image_gen = image.ImageDataGenerator(rotation_range=30,
                                     width_shift_range=0.1,
                                    height_shift_range=0.1,
                                    rescale=1/255)
image_batch_size = 10
'''
# 整理訓練用資料
for n in ['cats','dogs']:
    class_path = os.path.join(data_path,n)
    file_list = os.listdir(class_path)
    for data in file_list:
        img = cv2.imread(os.path.join(class_path,data))
        
        # 標籤
        if n == 'cats':
            train_label = [1]
        elif n == 'dogs':
            train_label = [0]
        
        #x_train.append(img)
        #y_train.append(train_label)
        
        # 使用imageGenerator產製圖片
        gen_img = image_gen.flow(np.array([img]),train_label,
                                 batch_size=image_batch_size)
        for i in range(image_batch_size):
            gen_img_data = gen_img.next()
            x_train.append(gen_img_data[0][0])
            y_train.append(train_label)
            
x_test = []
# 整理測試資料
data_path = "kaggle_dogcat/test"
file_list = os.listdir(data_path)
for data in file_list:
    img = cv2.imread(os.path.join(data_path,data))
    x_test.append(img)

# 順序打散
index_list = list(range(len(x_train)))
np.random.shuffle(index_list)    

x_train = np.array(x_train)[index_list]
y_train = np.array(y_train)[index_list]
x_test = np.array(x_test)
'''


Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


(40000,)


## 圖像預處理
圖像的尺寸都不是固定的，但是CNN的input必須要固定下來
所以必須先將圖片的格式統一
作法為找出資料中圍度最大值，並將其補遺

In [None]:
from keras.utils import np_utils


# 將資料維度補齊
resize_x_train = []
resize_x_test = []
# 訓練資料圖片重整
for img in x_train:
    res = cv2.resize(img, dsize=(row_size, column_size), interpolation=cv2.INTER_CUBIC)
    resize_x_train.append(res)

# 測試資料圖片重整
for img in x_test:
    res = cv2.resize(img, dsize=(row_size, column_size), interpolation=cv2.INTER_CUBIC)
    resize_x_test.append(res)

del x_train,x_test

y_train = np_utils.to_categorical(y_train)

# 記憶體不夠用，所以使用批次訓練的方式
def data_generator(data_x,data_y,batch_size):
    while True:
        for d in range(0,len(data_x),batch_size):
            x_train = np.array(data_x[d:int(d+batch_size)])
            y_train = np.array(data_y[d:int(d+batch_size)])
            yield x_train,y_train



## 建立模型
keras.layers.BatchNormalization(),

In [None]:
   

def cnn_model():
    layer_list = [keras.layers.Conv2D(filters=5,kernel_size=(10,10),
                                      input_shape=(row_size,column_size,3),
                                      activation='relu'),
                  keras.layers.BatchNormalization(),
                  keras.layers.MaxPooling2D((2,2)),
                  keras.layers.Conv2D(filters=10,kernel_size=(5,5),activation='relu'),
                  keras.layers.MaxPooling2D((2,2)),
                  keras.layers.Conv2D(filters=15,kernel_size=(3,3),activation='relu'),
                  keras.layers.MaxPooling2D((2,2)),
                  keras.layers.Flatten(),
                  keras.layers.Dense(units=150,activation='relu'),
                  keras.layers.Dense(units=2,activation='softmax')]
    
    model = keras.models.Sequential(layer_list)
    return model

cnn_model = cnn_model()
cnn_model.summary()
cnn_model.compile(loss="categorical_crossentropy",
             optimizer=keras.optimizers.Adam(),
             metrics=['accuracy'])

batch_size = 10
trainning_generator = data_generator(resize_x_train,y_train,batch_size)

cnn_model.fit_generator(trainning_generator,
                        epochs=20,
                        steps_per_epoch=int(len(resize_x_train)/batch_size))



In [None]:
import pandas as pd

pred = cnn_model.predict(np.array(resize_x_test))[:,1]
output_df = pd.DataFrame([str(i).zfill(3) for i in range(len(pred))])
output_df.columns = ['ID']
output_df['Predicted'] = pred
output_df.to_csv('res.csv',index=False)
output_df.head(20)

