In [17]:
# model
from tensorflow.python.keras.optimizers import RMSprop
from tensorflow.keras import layers, Model

height = 60
width = 34
LR = 0.001

img_input = layers.Input((height, width,  1))

x = layers.Conv2D(16, 3, activation='relu')(img_input)
# print(x.shape)
x = layers.MaxPool2D(2)(x)
# print(x.shape)

x = layers.Conv2D(32, 3, activation='relu')(x)
x = layers.MaxPool2D(2)(x)

x = layers.Conv2D(64, 3, activation='relu')(x)
x = layers.MaxPool2D(2)(x)

x = layers.Flatten()(x)

x = layers.Dense(512, activation='relu')(x)

x = layers.Dropout(0.5)(x)

# output = [layers.Dense(1, activation='softmax', name='c%d'%(i+1))(x) for i in range(4)]

output = layers.Dense(36, activation='softmax', name='output')(x)
print(output[-1].shape)

model = Model(img_input, output)
model.compile(loss='categorical_crossentropy',
              optimizer=RMSprop(lr=LR),
              metrics=['accuracy']
              )

(36,)


In [8]:
# 数据流
import tensorflow as tf
import pathlib
from string import digits, ascii_lowercase
import numpy as np
import os
CHAPTER_LIST = digits + ascii_lowercase


def _get_file_name(path):
    return os.path.basename(path).split('_')[0].split('.')[0]


def _preprocess_image(image):
    image = tf.image.decode_jpeg(image, channels=1)
    image = tf.image.resize_images(image, [60, 34])
    image /= 255.0  # normalize to [0,1] range
    return image


def _load_and_preprocess_from_path_label(path, label):
    print(path, label)
    image = tf.read_file(path)
    return _preprocess_image(image), _flatten_labels(label)

def _str2idxlist(text):
    """
    text 转化为索引列表
    """

    return np.array([_idx2onehot(CHAPTER_LIST.index(i.lower())) for i in text])


def _idx2onehot(idx):
    tmp = np.zeros(len(CHAPTER_LIST))
    tmp[idx] = 1
    return tmp

def _flatten_labels(array):
    return tf.reshape(array, (36, ))


def get_dataset(data_dir, batch_size=50):
    root_path = pathlib.Path(data_dir)
    all_image_paths = [str(i) for i in root_path.glob('*.png')]
    all_image_labels = np.array([_str2idxlist(_get_file_name(i)) for i in all_image_paths])

    path_ds = tf.data.Dataset.from_tensor_slices((all_image_paths, all_image_labels))
    image_label_ds = path_ds.map(_load_and_preprocess_from_path_label)

    ds = image_label_ds.shuffle(buffer_size=100)
    ds = ds.repeat()
    ds = ds.batch(batch_size)
    return ds
    
dataset = get_dataset('/Users/zhangzhichao/github/ContentSecurity-Analyzer/ml/captcha/splited/train/')

iteror = dataset.make_one_shot_iterator()
imgs, labels = iteror.get_next()
print(imgs.shape)
print(labels.shape)

Tensor("arg0:0", shape=(), dtype=string) Tensor("arg1:0", shape=(1, 36), dtype=float64)
(?, 60, 34, 1)
(?, 36)


In [18]:
# train
import datasets.base as input_data
import os

data_dir = '/Users/zhangzhichao/github/ContentSecurity-Analyzer/ml/captcha/splited'
train_dir = os.path.join(data_dir, 'train')
test_dir = os.path.join(data_dir, 'test')

batch_size = 50

train_dataset = get_dataset(train_dir)
# train_iteror = train_dataset.make_one_shot_iterator()

test_dataset = get_dataset(test_dir, 2000)
# test_iteror = test_dataset.make_one_shot_iterator()

history = model.fit(
    train_dataset,
    steps_per_epoch=1000,
    epochs=30,
    validation_data=test_dataset,
    validation_steps=50,
    verbose=2,
)

acc = history.history['acc']
val_acc = history.history['val_acc']

loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc)
plt.plot(epochs, val_acc)
plt.title('Accuracy')

plt.plot(epochs, loss)
plt.plot(epochs, val_loss)
plt.title('Loss')

Tensor("arg0:0", shape=(), dtype=string) Tensor("arg1:0", shape=(1, 36), dtype=float64)
Tensor("arg0:0", shape=(), dtype=string) Tensor("arg1:0", shape=(1, 36), dtype=float64)
Instructions for updating:
Use tf.cast instead.


Instructions for updating:
Use tf.cast instead.


Epoch 1/30


KeyboardInterrupt: 