In [1]:
import numpy as np
import os
import pickle

from keras.utils import np_utils
from keras.preprocessing import image
from keras.models import Model
from keras.layers import Dense, Dropout, Flatten, Input, concatenate
from keras.layers.convolutional import Conv2D, Convolution2D, MaxPooling2D
from keras.callbacks import ModelCheckpoint
from keras.optimizers import Adadelta
from matplotlib import pyplot as plt 
import string

Using TensorFlow backend.
  return f(*args, **kwds)


### 配置参数

In [2]:
captcha_word  = string.digits + string.ascii_letters

width = 200
height = 80

word_len = 4
word_class = len(captcha_word)

train_dir = '/home/wei/data/third/third_train'
train_label =  '/home/wei/data/third/third_label.txt'

#生成字符索引
char_indices = dict((c, i) for i,c in enumerate(captcha_word))
indices_char = dict((i, c) for i,c in enumerate(captcha_word))


#验证码字符串转数组
def captcha_to_vec(captcha):    
   
    vector = np.zeros(word_len * word_class)
    for i,ch in enumerate(captcha):
        idex = i * word_class + char_indices[ch]
        vector[idex] = 1
    return vector

#把数组转换回文字
def vec_to_captcha(vec):
    text = []
    vec[vec < 0.5] = 0    
    char_pos = vec.nonzero()[0]
    for i, ch in enumerate(char_pos):
        text.append(captcha_word[ch % word_class])
    return ''.join(text)


In [3]:
#获取目录下样本列表
image_list = []

for item in os.listdir(train_dir):
    image_list.append(item)

print(len(image_list))

ok
9700


In [4]:

X = np.zeros((len(image_list), height, width, 3), dtype = np.uint8)
y = np.zeros((len(image_list), word_len * word_class), dtype = np.uint8)

with open(train_label) as file_object:
    p = file_object.readlines()
for i,img in enumerate(image_list):
    img_path = train_dir + "/" + img
    raw_img = image.load_img(img_path, target_size=(height, width))
    X[i] = image.img_to_array(raw_img)
    for j in range(len(p)):
        if img[:4]==p[j][:4]:
            break;
    y[i] = captcha_to_vec(p[j][-5:-1])



ok


In [5]:
#保存成pkl文件
file = open('/home/wei/data/third/third_train_data.pkl','wb')
pickle.dump((X,y) , file)
print("ok")

ok


### 加载数据，读取pickle文件

In [3]:
#读取pickle文件
file = open('/home/wei/data/third/third_train_data.pkl', 'rb')
X, y = pickle.load(file)
print("ok")

ok


### 创建模型

In [9]:
input_tensor = Input( shape=(height, width, 3))

x = input_tensor

x = Convolution2D(32, 5, padding='same', activation='relu')(x)
x = Convolution2D(32, 5, padding='same', activation='relu')(x)
x = MaxPooling2D((2, 2))(x)


x = Convolution2D(64, 3, padding='same', activation='relu')(x)
x = Convolution2D(64, 3, padding='same', activation='relu')(x)
x = MaxPooling2D((2, 2))(x)

x = Convolution2D(128, 3, padding='same', activation='relu')(x)
x = Convolution2D(128, 3, padding='same',activation='relu')(x)
x = MaxPooling2D((2, 2))(x)


x = Flatten()(x)

x = Dropout(0.3)(x)


#最后连接4个分类器
x = [Dense(word_class, activation='softmax', name='c%d'%(i+1))(x) for i in range(word_len)]
output = concatenate(x)

model = Model(inputs=input_tensor, outputs=output)

opt = Adadelta(lr=0.1)
model.compile(loss = 'categorical_crossentropy', optimizer=opt, metrics=['accuracy'])


#每次epoch都保存一下权重，用于继续训练
checkpointer = ModelCheckpoint(filepath="/home/wei/weight_3/weights.{epoch:02d}--{val_loss:.2f}-{val_acc:.4f}.hdf5", 
                               verbose=2, save_weights_only=True)

ok


### 开始训练模型

In [6]:
model.fit(X, y, epochs=45,callbacks=[checkpointer],validation_split=0.1)

Train on 8730 samples, validate on 970 samples
Epoch 1/45
Epoch 00001: saving model to /home/wei/weight_3/weights.01--7.50-0.3031.hdf5
Epoch 2/45
Epoch 00002: saving model to /home/wei/weight_3/weights.02--5.71-0.2670.hdf5
Epoch 3/45
Epoch 00003: saving model to /home/wei/weight_3/weights.03--5.63-0.2866.hdf5
Epoch 4/45
Epoch 00004: saving model to /home/wei/weight_3/weights.04--5.58-0.3062.hdf5
Epoch 5/45
Epoch 00005: saving model to /home/wei/weight_3/weights.05--5.58-0.3072.hdf5
Epoch 6/45
Epoch 00006: saving model to /home/wei/weight_3/weights.06--5.56-0.4196.hdf5
Epoch 7/45
Epoch 00007: saving model to /home/wei/weight_3/weights.07--5.56-0.4866.hdf5
Epoch 8/45
Epoch 00008: saving model to /home/wei/weight_3/weights.08--5.55-0.4876.hdf5
Epoch 9/45
Epoch 00009: saving model to /home/wei/weight_3/weights.09--5.56-0.6608.hdf5
Epoch 10/45
Epoch 00010: saving model to /home/wei/weight_3/weights.10--5.55-0.6866.hdf5
Epoch 11/45
Epoch 00011: saving model to /home/wei/weight_3/weights.11--

KeyboardInterrupt: 

In [12]:
#保存权重和模型
model.save_weights('/home/wei/data/third/third_model_weights.h5')
model.save('/home/wei/data/third/third_model.h5')