In [1]:
import random
import numpy as np
from PIL import Image
import os
from datetime import datetime
import tensorflow as tf



NUMBER = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
LOW_CASE = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u',
            'v', 'w', 'x', 'y', 'z']
UP_CASE = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U',
           'V', 'W', 'X', 'Y', 'Z']
CAPTCHA_LIST = NUMBER + LOW_CASE + UP_CASE
CAPTCHA_LEN = 5
CAPTCHA_HEIGHT = 30
CAPTCHA_WIDTH = 150
TRAIN_PATH = 'train'
TEST_PATH = 'train-1'
TRAIN_NUM = 19994
TEST_NUM = 1114

In [45]:
def convert2gray(img):
    '''
    图片转为黑白，3维转1维
    '''
    if len(img.shape) > 2:
        img = np.mean(img, -1)
    return img


def text2vec(text, captcha_len=CAPTCHA_LEN, captcha_list=CAPTCHA_LIST):
    '''
    验证码文本转为向量
    '''
    text_len = len(text)
    vector = np.zeros(captcha_len * len(captcha_list))
    for i in range(text_len):
        vector[captcha_list.index(text[i])+i*len(captcha_list)] = 1
    return vector




def next_batch(path, num, width=CAPTCHA_WIDTH, height=CAPTCHA_HEIGHT):
    '''
    获取训练图片组
    '''
    batch_x = np.zeros([num, width * height])
    batch_y = np.zeros([num, CAPTCHA_LEN * len(CAPTCHA_LIST)])
    img_names = os.listdir(path)
    i = 0
    for img_name in img_names:
        img_path = os.path.join(path, img_name)
        #读取图片
        img = Image.open(img_path)
        #将图片矩阵化
        image_arr = np.array(img)
        image = convert2gray(image_arr)
        #读取文本类别
        txt = img_name[0:5]
        # 将图片数组一维化 同时将文本也对应在两个二维组的同一行
        batch_x[i, :] = image.flatten() / 255
        batch_y[i, :] = text2vec(txt)
        i = i + 1
    # 返回该训练批次
    return batch_x, batch_y

def test_img(path, num, width=CAPTCHA_WIDTH, height=CAPTCHA_HEIGHT):
     '''
    获取测试图片组
    '''
    batch_x = np.zeros([num, width * height])
    img_names = os.listdir(path)
    i = 0
    for img_name in img_names:
        img_path = os.path.join(path, img_name)
        #读取图片
        img = Image.open(img_path)
        #将图片矩阵化
        image_arr = np.array(img)
        image = convert2gray(image_arr)
        # 将图片数组一维化
        batch_x[i, :] = image.flatten() / 255
        i = i + 1
    # 返回该训练批次
    return batch_x


In [46]:
def weight_variable(shape, w_alpha=0.01):
    '''
    增加噪音，随机生成权重
    '''
    initial = w_alpha * tf.random_normal(shape)
    return tf.Variable(initial)


def bias_variable(shape, b_alpha=0.1):
    '''
    增加噪音，随机生成偏置项
    '''
    initial = b_alpha * tf.random_normal(shape)
    return tf.Variable(initial)


def conv2d(x, w):
    '''
    局部变量线性组合，步长为1，模式‘SAME’代表卷积后图片尺寸不变，即零边距
    '''
    return tf.nn.conv2d(x, w, strides=[1, 1, 1, 1], padding='SAME')


def max_pool_2x2(x):
    '''
    max pooling,取出区域内最大值为代表特征， 2x2pool，图片尺寸变为1/2
    '''
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

In [47]:
def cnn_graph(x, keep_prob, size, captcha_list=CAPTCHA_LIST, captcha_len=CAPTCHA_LEN):
    '''
    三层卷积神经网络计算图
    '''
    # 图片reshape为4维向量
    image_height, image_width = size
    x_image = tf.reshape(x, shape=[-1, image_height, image_width, 1])

    # layer 1
    # filter定义为3x3x1， 输出32个特征, 即32个filter
    w_conv1 = weight_variable([3, 3, 1, 32])
    b_conv1 = bias_variable([32])
    # rulu激活函数
    h_conv1 = tf.nn.relu(tf.nn.bias_add(conv2d(x_image, w_conv1), b_conv1))
    # 池化
    h_pool1 = max_pool_2x2(h_conv1)
    # dropout防止过拟合
    h_drop1 = tf.nn.dropout(h_pool1, keep_prob)

    # layer 2
    w_conv2 = weight_variable([3, 3, 32, 64])
    b_conv2 = bias_variable([64])
    h_conv2 = tf.nn.relu(tf.nn.bias_add(conv2d(h_drop1, w_conv2), b_conv2))
    h_pool2 = max_pool_2x2(h_conv2)
    h_drop2 = tf.nn.dropout(h_pool2, keep_prob)

    # layer 3
    w_conv3 = weight_variable([3, 3, 64, 64])
    b_conv3 = bias_variable([64])
    h_conv3 = tf.nn.relu(tf.nn.bias_add(conv2d(h_drop2, w_conv3), b_conv3))
    h_pool3 = max_pool_2x2(h_conv3)
    h_drop3 = tf.nn.dropout(h_pool3, keep_prob)

    # full connect layer
    image_height = int(h_drop3.shape[1])
    image_width = int(h_drop3.shape[2])
    w_fc = weight_variable([image_height*image_width*64, 1024])
    b_fc = bias_variable([1024])
    h_drop3_re = tf.reshape(h_drop3, [-1, image_height*image_width*64])
    h_fc = tf.nn.relu(tf.add(tf.matmul(h_drop3_re, w_fc), b_fc))
    h_drop_fc = tf.nn.dropout(h_fc, keep_prob)

    # out layer
    w_out = weight_variable([1024, len(captcha_list)*captcha_len])
    b_out = bias_variable([len(captcha_list)*captcha_len])
    y_conv = tf.add(tf.matmul(h_drop_fc, w_out), b_out)
    return y_conv

In [48]:
def optimize_graph(y, y_conv):
    '''
    优化计算图
    '''
    # 交叉熵计算loss 注意logits输入是在函数内部进行sigmod操作
    # sigmod_cross适用于每个类别相互独立但不互斥，如图中可以有字母和数字
    # softmax_cross适用于每个类别独立且排斥的情况，如数字和字母不可以同时出现
    loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=y_conv, labels=y))
    # 最小化loss优化
    optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)
    return optimizer


def accuracy_graph(y, y_conv, width=len(CAPTCHA_LIST), height=CAPTCHA_LEN):
    '''
    偏差计算图
    '''
    # 这里区分了大小写 实际上验证码一般不区分大小写
    # 预测值
    predict = tf.reshape(y_conv, [-1, height, width])
    max_predict_idx = tf.argmax(predict, 2)
    # 标签
    label = tf.reshape(y, [-1, height, width])
    max_label_idx = tf.argmax(label, 2)
    correct_p = tf.equal(max_predict_idx, max_label_idx)
    accuracy = tf.reduce_mean(tf.cast(correct_p, tf.float32))
    return accuracy

In [None]:
def train(height=CAPTCHA_HEIGHT, width=CAPTCHA_WIDTH, y_size=len(CAPTCHA_LIST)*CAPTCHA_LEN, train_path=TRAIN_PATH, test_path=TEST_PATH,
          train_num=TRAIN_NUM, test_num=TEST_NUM):
    '''
    cnn训练
    '''
    acc_rate = 0.1
    # 按照图片大小申请占位符
    x = tf.placeholder(tf.float32, [None, height * width])
    y = tf.placeholder(tf.float32, [None, y_size])
    # 防止过拟合 训练时启用 测试时不启用
    keep_prob = tf.placeholder(tf.float32)
    # cnn模型
    y_conv = cnn_graph(x, keep_prob, (height, width))
    # 最优化
    optimizer = optimize_graph(y, y_conv)
    # 偏差
    accuracy = accuracy_graph(y, y_conv)
    # 启动会话.开始训练
    saver = tf.train.Saver()
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())

    step = 0
    while 1:
        batch_x, batch_y = next_batch(train_path,train_num)
        sess.run(optimizer, feed_dict={x: batch_x, y: batch_y, keep_prob: 0.75})
        # 每训练两次测试一次
        if step % 2 == 0:
            batch_x_test, batch_y_test = next_batch(test_path,test_num)
            acc = sess.run(accuracy, feed_dict={x: batch_x_test, y: batch_y_test, keep_prob: 1.0})
            print(datetime.now().strftime('%c'), ' step:', step, ' accuracy:', acc)
            # 偏差要求，退出循环
            if acc > acc_rate:
                break
        step += 1
    sess.close()

train()

In [None]:
def captcha2text(path, num, height=CAPTCHA_HEIGHT, width=CAPTCHA_WIDTH):
    '''
    获得预测值
    '''
    image_list = test_img(path, num)
    x = tf.placeholder(tf.float32, [None, height * width])
    keep_prob = tf.placeholder(tf.float32)
    y_conv = cnn_graph(x, keep_prob, (height, width))
    # 启动会话.开始训练
    saver = tf.train.Saver()
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())
    #预测值
    predict = tf.argmax(tf.reshape(y_conv, [-1, CAPTCHA_LEN, len(CAPTCHA_LIST)]), 2)
    vector_list = sess.run(predict, feed_dict={x: image_list, keep_prob: 1})
    vector_list = vector_list.tolist()
    text_list = [vec2text(vector) for vector in vector_list]
    return text_list

In [None]:
text_list = captcha2text('test', 20000)
#将预测值以固定形式输出
text_list = np.array(text_list)
text_list.shape = (20000, 1)
ID = np.arange(1, 20000+1)
ID.shape = (20000, 1)
result = np.concatenate((ID, text_list), axis=1)
print(result)

In [None]:
#将结果按照固定格式生成CSV文件
result_df = pd.DataFrame(result,columns=['id', 'y'])
result_df.to_csv('sample_test_target.csv', index=False, sep=',')

result_df