In [1]:
from __future__ import division
import numpy as np
import tensorflow as tf
from captcha.image import ImageCaptcha

In [2]:
width = 100
height = 60
charset = '0123456789'
captchaLength = 4
imageSize = width * height
alpha = 1e-3

In [3]:
def getCode():
    return ''.join(map(
        lambda x: charset[x], 
        np.random.randint(0, len(charset), captchaLength)
    ))
captcha = ImageCaptcha(width=width, height=height)

In [4]:
def getData(n = 10):    
    codeList = [getCode() for _ in range(n)]
    imageList = map(lambda code: captcha.generate_image(code), codeList)
    return imageList, codeList

In [5]:
def weight_variable(shape):
    initial = tf.random_normal(shape, stddev=0.01)                                                                                                     
    return tf.Variable(initial)   

In [6]:
def bias_variable(shape):
    initial = tf.random_normal(shape, stddev=0.1)
    return tf.Variable(initial)   

In [7]:
def conv2d(x, W): 
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

In [8]:
def max_pool(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

In [9]:
# 定义输入输出
x = tf.placeholder(tf.float32, shape=[None, imageSize])
y = tf.placeholder(tf.float32, shape=[None, len(charset) * captchaLength])
keep_prob = tf.placeholder(tf.float32)
x_image = tf.reshape(x, shape=[-1, width, height, 1])

In [10]:
# 定义第一层卷积
conv_layer1_weight = weight_variable([5, 5, 1, 32])
conv_layer1_bias = bias_variable([32])
pool_layer1 = max_pool(
    tf.nn.relu(conv2d(x_image, conv_layer1_weight) + conv_layer1_bias))

In [11]:
# 定义第二层卷积
conv_layer2_weight = weight_variable([5, 5, 32, 64])
conv_layer2_bias = bias_variable([64])
pool_layer2 = max_pool(
    tf.nn.relu(conv2d(pool_layer1, conv_layer2_weight) + conv_layer2_bias)) 

In [12]:
# 定义第三层卷积
conv_layer3_weight = weight_variable([5, 5, 64, 64])
conv_layer3_bias = bias_variable([64])
pool_layer3 = max_pool(
    tf.nn.relu(conv2d(pool_layer2, conv_layer3_weight) + conv_layer3_bias))

In [13]:
lastWidth = int(round(width / 8))
lastHeight = int(round(height / 8))

In [14]:
# 定义全连接层
fc_layer_weight = weight_variable([lastWidth * lastHeight * 64, 1024])
fc_layer_bias = bias_variable([1024])
pool_layer3_flat = tf.reshape(pool_layer3, [-1, lastWidth * lastHeight * 64])
fc_layer = tf.nn.relu(tf.add(tf.matmul(pool_layer3_flat, fc_layer_weight), fc_layer_bias))

In [15]:
# Dropout层
fc_layer_drop = tf.nn.dropout(fc_layer, keep_prob)

In [16]:
# Readout层
output_layer_weight = weight_variable([1024, len(charset) * captchaLength])
output_layer_bias = bias_variable([len(charset) * captchaLength])
y_conv = tf.add(tf.matmul(fc_layer_drop, output_layer_weight), output_layer_bias)

In [17]:
# 定义输出函数
loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=y_conv))
optimizer = tf.train.AdamOptimizer(alpha).minimize(loss)
prediction = tf.argmax(tf.reshape(y_conv, [-1, captchaLength, len(charset)]), 2)
correct = tf.argmax(tf.reshape(y, [-1, captchaLength, len(charset)]), 2)                                                           
correct_prediction = tf.equal(prediction, correct)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [18]:
# 初始化session
saver = tf.train.Saver()
session = tf.Session()
session.run(tf.global_variables_initializer())

In [19]:
def imageToVertor(image):
    """ 将图片转化为向量表示 """
    image = image.convert("L")
    image = np.asarray(image)
    image = image.reshape([width * height]) / 255
    return image

In [None]:
def codeToVertor(code):
    """ 将验证码转化为向量表示 """
    labels = np.zeros([captchaLength, len(charset)])
    for i in range(captchaLength):
        labels[i, charset.index(code[i])] = 1
    return labels.reshape(len(charset) * captchaLength)

In [None]:
for step in range(1, 1000):
    imageList, codeList = getData(10)
    x_data = map(imageToVertor, imageList)
    y_data = map(codeToVertor, codeList)
    _, l = session.run([optimizer, loss], feed_dict={x: x_data, y: y_data, keep_prob: .75})
    print '[%d]: loss: %f' % (step, l)

[1]: loss: 0.690429
[2]: loss: 0.610761
[3]: loss: 0.464669
[4]: loss: 0.319703
[5]: loss: 0.466319
[6]: loss: 0.451865
[7]: loss: 0.380793
[8]: loss: 0.323238
[9]: loss: 0.350861
[10]: loss: 0.367005
[11]: loss: 0.377611
[12]: loss: 0.366303
[13]: loss: 0.348796
[14]: loss: 0.333631
[15]: loss: 0.336407
[16]: loss: 0.343376
[17]: loss: 0.338347
[18]: loss: 0.346365
[19]: loss: 0.347115
[20]: loss: 0.357813
[21]: loss: 0.338703
[22]: loss: 0.335340
[23]: loss: 0.347702
[24]: loss: 0.341254
[25]: loss: 0.336141
[26]: loss: 0.332695
[27]: loss: 0.332490
[28]: loss: 0.342500
[29]: loss: 0.337657
[30]: loss: 0.334766
[31]: loss: 0.334872
[32]: loss: 0.334144
[33]: loss: 0.331498
[34]: loss: 0.330046
[35]: loss: 0.341723
[36]: loss: 0.327263
[37]: loss: 0.321013
[38]: loss: 0.325559
[39]: loss: 0.325214
[40]: loss: 0.340154
[41]: loss: 0.335449
[42]: loss: 0.325814
[43]: loss: 0.333847
[44]: loss: 0.336624
[45]: loss: 0.335985
[46]: loss: 0.337576
[47]: loss: 0.334604
[48]: loss: 0.334794
[

[379]: loss: 0.323300
[380]: loss: 0.331969
[381]: loss: 0.325438
[382]: loss: 0.326304
[383]: loss: 0.331097
[384]: loss: 0.326804
[385]: loss: 0.326017
[386]: loss: 0.327932
[387]: loss: 0.325412
[388]: loss: 0.322468
[389]: loss: 0.328122
[390]: loss: 0.333956
[391]: loss: 0.331000
[392]: loss: 0.328226
[393]: loss: 0.326085
[394]: loss: 0.329281
[395]: loss: 0.326034
[396]: loss: 0.330590
[397]: loss: 0.325503
[398]: loss: 0.324399
[399]: loss: 0.327867
[400]: loss: 0.325860
[401]: loss: 0.332431
[402]: loss: 0.337104
[403]: loss: 0.328161
[404]: loss: 0.327221
[405]: loss: 0.327249
[406]: loss: 0.322370
[407]: loss: 0.324754
[408]: loss: 0.335523
[409]: loss: 0.330091
[410]: loss: 0.329635
[411]: loss: 0.329600
[412]: loss: 0.326460
[413]: loss: 0.331663
[414]: loss: 0.327535
[415]: loss: 0.328605
[416]: loss: 0.324455
[417]: loss: 0.322541
[418]: loss: 0.334118
[419]: loss: 0.327895
[420]: loss: 0.325881
[421]: loss: 0.328798
[422]: loss: 0.328621
[423]: loss: 0.326543
[424]: los