# DENSENET + CTC

中文数据集: https://github.com/senlinuc/caffe_ocr

Synthetic Chinese String Dataset
```
--images/ 图片目录
--char_std_5990.txt 图片标签
--train.txt 训练集合
--test.txt 测试集合
```

修改 char_std_5990.txt:
1. 无 bom 的utf-8 编码格式
2. blank 从第一行(caffe 格式)改到最后一行(tensorflow格式)

In [1]:
dataset = '/home/ubuntu/文档/Synthetic Chinese String Dataset/'
# tf's CTC: the last class is reserved to the blank label.
char = ''
with open(dataset + 'char_std_5990.txt', encoding='utf-8') as f:
    for ch in f.readlines():
        ch = ch.strip('\r\n')
        char = char + ch

# 使用 卍 代替 blank 来可视化输出
char =char[:len(char)-5]+'卍'
nclass = len(char)
print('nclass:', nclass)
id_to_char = {i:j for i,j in enumerate(char)}

nclass: 5990


In [2]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

import os
import numpy as np
from PIL import Image
from matplotlib import pyplot as plt

from keras import backend as K
from keras.models import Model
from keras.layers import Input
from keras.layers.core import Lambda
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard
from keras.utils import multi_gpu_model
import tensorflow as tf

from imp import reload 
import densenet
reload(densenet) # 自动检测修改源码 reload


G = 4 # GPU 数量
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6)
session = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, allow_soft_placement=True))
K.set_session(session)


maxlabellength = 20
img_h = 32
img_w = 280
nclass = len(char)
batch_size = 64 * G


class random_data():
    """
    随机顺序
    """
    def __init__(self, total):
        self.index = 0
        self.total = total
        self.range = [i for i in range(total)]
        np.random.shuffle(self.range)
    def get(self,batchsize):
        r_n = []
        if(self.index + batchsize > self.total):
            r_n_1 = self.range[self.index:self.total]
            np.random.shuffle(self.range)
            self.index = (self.index + batchsize) - self.total
            r_n_2 = self.range[0:self.index]
            r_n.extend(r_n_1)
            r_n.extend(r_n_2)
        else:
            r_n = self.range[self.index:self.index + batchsize]
            self.index = self.index + batchsize
        return r_n  

def read_data(filename):
    """
    读取数据集
    """
    res=[]
    with open(filename, encoding='utf-8') as f:
        lines = f.readlines()
        for i in lines:
            res.append(i.strip('\r\n'))
    dic={}
    for i in res:
        p = i.split(' ')
        dic[p[0]] = p[1:]
    return dic

def gen(datafile, batchsize=64, maxlabellength=10, imagesize=(32,280)):
    """
    生成数据集
    """
    image_label = read_data(datafile)
    imagefile = [i for i, j in image_label.items()]
    print('图片总量:', len(imagefile))
    r_n = random_data(len(imagefile))
    imagefile = np.array(imagefile)
    
    x = np.zeros((batchsize, imagesize[0], imagesize[1], 1), dtype=np.float)
    labels = np.ones([batchsize, maxlabellength]) * 10000
    input_length = np.zeros([batchsize, 1])
    label_length = np.zeros([batchsize, 1])
    while 1:
        shufimagefile = imagefile[r_n.get(batchsize)]
        for i, j in enumerate(shufimagefile):
            img = Image.open(dataset + 'images/' + j).convert('L')
            img = np.array(img).astype(np.float32) / 255.0 - 0.5
            x[i] = np.expand_dims(img, axis=2)
            
            str = image_label[j]
            labels[i,:len(str)] = [int(i) - 1 for i in str]
            input_length[i] = imagesize[1] // 8
            label_length[i] = len(str)
        inputs = {'the_input': x,
                'the_labels': labels,
                'input_length': input_length,
                'label_length': label_length,
                 }
        outputs = {'ctc': np.zeros([batchsize])} 
        yield (inputs, outputs)


def ctc_lambda_func(args):
    y_pred, labels, input_length, label_length = args
    return K.ctc_batch_cost(labels, y_pred, input_length, label_length)


####################
input = Input(shape=(img_h,None,1), name='the_input')
y_pred= densenet.dense_cnn(input, nclass)

labels = Input(name='the_labels', shape=[maxlabellength], dtype='float32')
input_length = Input(name='input_length', shape=[1], dtype='int64')
label_length = Input(name='label_length', shape=[1], dtype='int64')
loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([y_pred, labels, input_length, label_length]) 

model = Model(inputs=[input, labels, input_length, label_length], outputs=loss_out)
model.summary()
model = multi_gpu_model(model)
model.summary()

adam = Adam()
model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=adam, metrics=['accuracy'])
####################

earlystop = EarlyStopping(patience=10)
# checkpoint = ModelCheckpoint('./model/weights-densent-{epoch:02d}.h5', save_weights_only=True)
tensorboard = TensorBoard('./model/tflog-densenet', write_graph=True)

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
the_input (InputLayer)          (None, 32, None, 1)  0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 16, None, 64) 1600        the_input[0][0]                  
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 16, None, 64) 256         conv2d_1[0][0]                   
__________________________________________________________________________________________________
activation_1 (Activation)       (None, 16, None, 64) 0           batch_normalization_1[0][0]      
__________________________________________________________________________________________________
conv2d_2 (

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
the_input (InputLayer)          (None, 32, None, 1)  0                                            
__________________________________________________________________________________________________
the_labels (InputLayer)         (None, 20)           0                                            
__________________________________________________________________________________________________
input_length (InputLayer)       (None, 1)            0                                            
__________________________________________________________________________________________________
label_length (InputLayer)       (None, 1)            0                                            
__________________________________________________________________________________________________
lambda_1 (

In [3]:
cc1 = gen(dataset + 'train.txt', batchsize=batch_size, maxlabellength=maxlabellength, imagesize=(img_h,img_w))
cc2 = gen(dataset + 'test.txt', batchsize=batch_size, maxlabellength=maxlabellength, imagesize=(img_h,img_w))

In [4]:
model.fit_generator(cc1,
                    steps_per_epoch = 3279601 // batch_size,
                    epochs = 3,
                    validation_data = cc2,
                    validation_steps = 364400 // batch_size,
                    # callbacks = [earlystop,checkpoint,tensorboard],
                    callbacks = [earlystop,tensorboard],
                    )

Epoch 1/3
图片总量: 364400
图片总量: 3279606
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7f5405a66668>

In [5]:
basemodel = model.layers[-2]
basemodel.save_weights('./model/densenet-model.h5')

In [6]:
basemodel.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
the_input (InputLayer)          (None, 32, None, 1)  0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 16, None, 64) 1600        the_input[0][0]                  
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 16, None, 64) 256         conv2d_1[0][0]                   
__________________________________________________________________________________________________
activation_1 (Activation)       (None, 16, None, 64) 0           batch_normalization_1[0][0]      
__________________________________________________________________________________________________
conv2d_2 (