In [54]:
import tensorflow
import tensorflow.keras as keras
import tensorflow.keras.utils
from tensorflow.keras.layers import LSTM, Input, Dense, TimeDistributed, Masking, Bidirectional
from tensorflow.keras.models import Model

from sklearn.utils import class_weight
import numpy as np

In [55]:
tensorflow.__version__

'2.3.0'

In [56]:
tensorflow.test.is_gpu_available()

True

# General

In [57]:
alphabet = list("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'.,-()/") # %:;&#
alphabet.insert(0, chr(0)) # '\x00' character, i.e., ord(0) to label concatenate

# Model

In [100]:
timesteps = None
input_features = 3
lstm_output = 512

input_layer_size = [256]
output_layer_size = [256]

alphabet_output_size = len(alphabet)
eoc_output_size = 1
eow_output_isze = 1
stack_height = 4

In [101]:
model_input = Input(shape=(timesteps, input_features), name='stroke_input')

masking_layer =  Masking(mask_value=0., input_shape=(timesteps, input_features))(model_input)

input_layer = masking_layer
for i in input_layer_size:
  input_layer = TimeDistributed(Dense(i, activation='relu'))(input_layer)

lstm_layer = input_layer
for i in range(stack_height):
  lstm_layer = Bidirectional(LSTM(lstm_output, return_sequences=True, activation="tanh"))(lstm_layer)

output_layer = lstm_layer
for i in output_layer_size:
  output_layer = TimeDistributed(Dense(i, activation='relu'))(output_layer)

alphabet_output = TimeDistributed(Dense(alphabet_output_size, activation='softmax'), name='alphabet_output')(output_layer)
eoc_output = TimeDistributed(Dense(eoc_output_size, activation='sigmoid'), name='eoc_output')(output_layer)
eow_output = TimeDistributed(Dense(eow_output_isze, activation='sigmoid'), name='eow_output')(output_layer)

In [102]:
losses ={
    'alphabet_output': 'sparse_categorical_crossentropy',
    'eoc_output' : 'mean_squared_error',
    'eow_output' : 'mean_squared_error'
}

In [103]:
model = Model(inputs=[model_input], outputs=[alphabet_output, eoc_output, eow_output])
model.compile(
    loss=losses,
    optimizer='adam', 
    metrics=['accuracy'],
    loss_weights={
        'alphabet_output': 10.0,
        'eoc_output': 1.0,
        'eow_output': 1.0
    },
)

In [104]:
model.summary()

Model: "functional_9"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
stroke_input (InputLayer)       [(None, None, 3)]    0                                            
__________________________________________________________________________________________________
masking_4 (Masking)             (None, None, 3)      0           stroke_input[0][0]               
__________________________________________________________________________________________________
time_distributed_8 (TimeDistrib (None, None, 256)    1024        masking_4[0][0]                  
__________________________________________________________________________________________________
bidirectional_16 (Bidirectional (None, None, 1024)   3149824     time_distributed_8[0][0]         
_______________________________________________________________________________________

# Preprocessing

In [105]:
is_colab = False
!pwd

/home/martin/Documents/code/deepwriting-module/deepreading/baseline_stacked


In [106]:
if is_colab:
    from google.colab import drive
    drive.mount('/content/drive')
    training_dataset = np.load('/content/drive/My Drive/deepwriting/deepwriting_training.npz', allow_pickle=True)
    validation_dataset = np.load('/content/drive/My Drive/deepwriting/deepwriting_validation.npz', allow_pickle=True)
else:
    training_dataset = np.load('../../data/dataset/deepwriting_training.npz', allow_pickle=True)
    validation_dataset = np.load('../../data/dataset/deepwriting_validation.npz', allow_pickle=True)

In [107]:
def pad_to_length(sequences, max_len, two_dimensional=True):
    padded_sequence = []
    for seq in sequences:
        pad_len = max_len - len(seq)
        if two_dimensional:
            padded_seq = np.pad(seq, [(0, pad_len), (0, 0)], mode='constant', constant_values=0)
        else:
            padded_seq = np.pad(seq, (0, pad_len), mode='constant', constant_values=0)
            padded_seq = np.expand_dims(padded_seq, axis=1)
        padded_sequence.append(padded_seq)
    # check whether all lists have actually the same length
    assert len(list(filter(lambda x: x != max_len, [len(seq) for seq in padded_sequence]))) == 0
    return np.array(padded_sequence)

In [108]:
def check_lengths(seq, max_len):
    other_max_length = len(max(seq, key=len))
    assert other_max_length == max_len

## Pad Training input

In [109]:
max_len_training = len(max(training_dataset['strokes'], key=len))
max_len_validation = len(max(validation_dataset['strokes'], key=len))

In [110]:
training_input_padded = pad_to_length(training_dataset['strokes'], max_len_training)
validation_input_padded = pad_to_length(validation_dataset['strokes'], max_len_validation)

## Pad char labels

In [111]:
check_lengths(training_dataset['char_labels'], max_len_training)
check_lengths(validation_dataset['char_labels'], max_len_validation)

In [112]:
training_char_labels_padded = pad_to_length(training_dataset['char_labels'], max_len_training, False)
validation_char_labels_padded = pad_to_length(validation_dataset['char_labels'], max_len_validation, False)

In [113]:
classes = len(training_dataset['alphabet'])
#char_labels_padded = tensorflow.keras.utils.to_categorical(char_labels_padded, num_classes=classes)

## Pad EOC labels

In [114]:
check_lengths(training_dataset['eoc_labels'], max_len_training)
check_lengths(validation_dataset['eoc_labels'], max_len_validation)

In [115]:
training_eoc_labels_padded = pad_to_length(training_dataset['eoc_labels'], max_len_training, False)
validation_eoc_labels_padded = pad_to_length(validation_dataset['eoc_labels'], max_len_validation, False)

In [116]:
check_lengths(training_dataset['eow_labels'], max_len_training)
check_lengths(validation_dataset['eow_labels'], max_len_validation)

In [117]:
training_eow_labels_padded = pad_to_length(training_dataset['eow_labels'], max_len_training, False)
validation_eow_labels_padded = pad_to_length(validation_dataset['eow_labels'], max_len_validation, False)

# Training

In [118]:
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

In [119]:
monitor = 'val_alphabet_output_accuracy'
mode='max'

modelCheckpoint = ModelCheckpoint('./deepwriting_model.hdf5', monitor=monitor, save_best_only='True', mode=mode)
earlyStopping = EarlyStopping(monitor=monitor, patience=10, mode=mode)
reduceLROnPlateau = ReduceLROnPlateau(monitor=monitor, factor=0.1, patience=4, mode=mode)

In [None]:
model.fit(
    {
        'stroke_input': training_input_padded
    },
    {
        'alphabet_output': training_char_labels_padded,
        'eoc_output': training_eoc_labels_padded,
        'eow_output': training_eow_labels_padded
    },
    callbacks=[modelCheckpoint, earlyStopping, reduceLROnPlateau],
    batch_size=1,
    epochs=100,
    validation_data=(
        {
            'stroke_input': validation_input_padded
        },
        {
            'alphabet_output': validation_char_labels_padded,
            'eoc_output': validation_eoc_labels_padded,
            'eow_output': validation_eow_labels_padded
        }
    )
)

Epoch 1/100
  258/34577 [..............................] - ETA: 1:50:35 - loss: 18.2590 - alphabet_output_loss: 1.8219 - eoc_output_loss: 0.0224 - eow_output_loss: 0.0173 - alphabet_output_accuracy: 0.0797 - eoc_output_accuracy: 0.9594 - eow_output_accuracy: 0.9552

In [None]:
training_dataset['strokes']

In [None]:
!cp deepwriting_model.hdf5 ./drive/My\ Drive/deepwriting/