In [1]:
import numpy as np
import os
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
import tensorflow as tf

%matplotlib inline

In [3]:
IMAGE_SIZE = 256
# 图片尺寸转换
counter = 0
for root_path in ('train', 'test'):
    files = [ root_path + '/' + i for i in os.listdir(root_path)]
    for img_path in files:
        img = Image.open(img_path)
        scale = IMAGE_SIZE / max(img.size)
        to_size = (int(img.size[0] * scale + 0.5), int(0.5 + img.size[1] * scale))
        img = img.resize(to_size)
        black_bg = Image.new("RGB", (IMAGE_SIZE, IMAGE_SIZE))
        black_bg.paste(img, ((IMAGE_SIZE-img.size[0])//2, (IMAGE_SIZE - img.size[1])//2))
        black_bg.save('input/my_' + img_path)
        counter += 1
        if counter % 10000 == 0:
            print(counter)

10000
20000
30000


In [62]:
from sklearn.cross_validation import train_test_split

def get_train(shuffle=True, seed=None):
    """return X_train, Y_train"""
    root_path = 'input/my_train/'
    files_path = [ root_path + i for i in os.listdir(root_path)]
    def get_label(name):
        if 'cat' in name:
            return [0, 1]
        return [1, 0]
    Y_train = np.array(list(map(get_label, files_path)))
    X_train = np.array(list(map(lambda x: np.array(Image.open(x)), files_path)))

    mask = np.ones(X_train.shape[0]).astype(np.bool)
    for i,v in enumerate(X_train):
        if v.shape != (128, 128, 3):
            mask[i] = False
    X_train = np.array(X_train[mask])
    Y_train = Y_train[mask]
    X_train = np.array(X_train.tolist())
    if shuffle:
        if not seed:
            X_train, _, Y_train, _ = train_test_split(X_train, Y_train, test_size=0.0)
        else:
            X_train, _, Y_train, _ = train_test_split(X_train, Y_train, test_size=0.0, random_state=seed)
    return X_train, Y_train
    
def get_test():
    """return X_test"""
    root_path = 'input/my_test/'
    files_path = [ root_path + i for i in os.listdir(root_path)]
    X_test = np.array(list(map(lambda x: np.array(Image.open(x)), files_path)))
    for i,v in enumerate(X_test):
        if v.shape != (128, 128, 3):
            print(i)
            print('ERROR')
            raise ValueError("!!!")
    return X_test
    

In [2]:
from cat_read_data import *

X_train, Y_train = get_train()

In [5]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.optimizers import SGD

model = Sequential()
# input: 100x100 images with 3 channels -> (3, 100, 100) tensors.
# this applies 32 convolution filters of size 3x3 each.
model.add(Convolution2D(32, 3, 3, border_mode='valid', input_shape=(128, 128, 3)))
model.add(Activation('relu'))
model.add(Convolution2D(32, 3, 3))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Convolution2D(64, 3, 3, border_mode='valid'))
model.add(Activation('relu'))
model.add(Convolution2D(64, 3, 3))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
# Note: Keras does automatic shape inference.
model.add(Dense(256))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(2))
model.add(Activation('softmax'))

sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd)


Using TensorFlow backend.


In [12]:
model.fit(X_train[:128], Y_train[:128], batch_size=32, nb_epoch=1, validation_split=0.15)

Train on 108 samples, validate on 20 samples
Epoch 1/1


<keras.callbacks.History at 0x20900c225c0>

In [None]:
X_train.astype(np.float)