In [1]:
import os
from PIL import Image
import numpy as np
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.optimizers import SGD, RMSprop, Adam
from keras.layers import Conv2D, MaxPooling2D

In [3]:
ima_train = os.listdir('./train')

# 图片其实就是一个矩阵（每一个像素都是0-255之间的数）（100*100*3）
def read_train_image(filename):
    img = Image.open('./train/' + filename).convert('RGB')
    return np.array(img)

x_train = []
# 把所有的图片放在一个列表里 (840, 100, 100, 3)
for i in ima_train:
    x_train.append(read_train_image(i))

# 得到一个三维矩阵
x_train = np.array(x_train)

y_train = []
for filename in ima_train:
    y_train.append(int(filename.split('_')[0]))

# 标签（0/1/2/3）(840,)
y_train = np.array(y_train)

# 我是因为重命名图片为（1/2/3/4），所以都减了1
# 为了能够转化为独热矩阵
y_train = y_train - 1  

print(x_train.shape)
print(y_train.shape)

(840, 100, 100, 3)
(840,)


In [4]:
ima_test = os.listdir('./test')

def read_test_image(filename):
    img = Image.open('./test/'+filename).convert('RGB')
    return np.array(img)

x_test = []

for i in ima_test:
    x_test.append(read_test_image(i))

x_test = np.array(x_test)


y_test = []
for filename in ima_test:
    y_test.append(int(filename.split('_')[0]))

y_test = np.array(y_test)
y_test = y_test - 1

print(x_test.shape)
print(y_test.shape)

(188, 100, 100, 3)
(188,)


In [5]:
# 转换为独热矩阵
# 将类别信息转换为独热码的形式（独热码有利于神经网络的训练）
y_train = np_utils.to_categorical(y_train)
y_test = np_utils.to_categorical(y_test)
print( y_test)

[[1. 0. 0. 0.]
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]
 [1. 0. 0. 0.]
 [0. 1. 0. 0.]
 [0. 1. 0. 0.]
 [0. 1. 0. 0.]
 [0. 1. 0. 0.]
 [0. 1. 0. 0.]
 [0. 1. 0. 0.]
 [0. 1. 0. 0.]
 [0. 1. 0. 0.]
 [0. 1. 0. 0.]
 [0. 1. 0. 0.]
 [0. 1. 0. 0.]
 [0. 1. 0. 0.]
 [0. 1. 0. 0.]
 [0. 1. 0. 0.]
 [0. 1. 0. 0.]
 [0. 1. 0. 0.]
 [0. 1. 0. 0.]
 [0. 1. 0. 0.]
 [0. 1. 0. 0.]
 [0. 1. 0. 0.]
 [0. 1. 0. 0.]
 [0. 1. 0. 0.]
 [0. 1. 0. 0.]
 [0. 1. 0. 0.]
 [0. 1. 0. 0.]
 [0. 1. 0. 0.]
 [0. 1. 0. 0.]
 [0. 1. 0. 0.]
 [0. 1. 0. 0.]
 [0. 1. 0. 0.]
 [0. 1. 0. 0.]
 [0. 1. 0. 0.]
 [0. 1. 0. 0.]
 [0. 1. 0. 0.]
 [0. 1. 0. 0.]
 [0. 1. 0. 0.]
 [0. 1. 0. 0.]
 [0. 1. 0. 0.]
 [0. 1. 0. 0.]
 [0. 1. 0. 0.]
 [0. 1. 0. 0.]
 [0. 1. 0. 0.]
 [0. 1. 0.

In [6]:
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

x_train /= 255
x_test /= 255

print(x_train.shape)
print(y_train.shape)

(840, 100, 100, 3)
(840, 4)


In [None]:
model = Sequential()
# 这里搭建的卷积层共有32个卷积核，卷积核大小为3*3，采用relu的激活方式。
# input_shape，字面意思就是输入数据的维度。

#这里使用序贯模型，比较容易理解
#序贯模型就像搭积木一样，将神经网络一层一层往上搭上去

model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(100, 100, 3)))
model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
#dropout层可以防止过拟合，每次有25%的数据将被抛弃

model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(4, activation='softmax'))

# 编译模型
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

model.fit(x_train, y_train, batch_size=10, epochs=32)
# 一共进行32轮。
# 也就是说840张图片，每次训练10张，相当于一共训练84次。当最后20个数据喂完后，1个epoch就结束了，然后再回到这1000张图片的第一张，从头开始继续训练。

In [None]:
# 保存权重文件（也就是相当于“房价问题的k和b两个参数”）
model.save_weights('./dog_weights.h5', overwrite=True)

In [None]:
# 评估模型
score = model.evaluate(x_test, y_test, batch_size=10)
print(score)