In [2]:
#!/usr/bin/env python3
# 53907afe-531b-11ea-a595-00505601122b
# b7ea974c-d389-11e8-a4be-00505601122b

# 1. Setup

## 1.1. FS/OS Requirements

In [1]:
!cp /kaggle/input/homr-competition/homr_dataset.py /kaggle/working/homr_dataset.py
!cp /kaggle/input/homr-competition/homr.train.tfrecord /kaggle/working/homr.train.tfrecord
!cp /kaggle/input/homr-competition/homr.test.tfrecord /kaggle/working/homr.test.tfrecord
!cp /kaggle/input/homr-competition/homr.dev.tfrecord /kaggle/working/homr.dev.tfrecord

In [3]:
#!pip install -U tensorflow-gpu==2.8 tensorflow-addons==0.16.1 tensorflow-probability==0.16.0 tensorflow-hub==0.12.0 scipy
!pip freeze | grep tensorflow

## 1.2. Python imports

In [2]:
import argparse
import datetime
import functools
import os
import re

os.environ.setdefault("TF_CPP_MIN_LOG_LEVEL", "2")  # Report only TF errors by default

import numpy as np
import tensorflow as tf
import tensorflow_addons as tfa

import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

from homr_dataset import HOMRDataset

## 1.3. Args

In [3]:
parser = argparse.ArgumentParser()
parser.add_argument("--batch_size", default=None, type=int, help="Batch size.")
parser.add_argument("--epochs", default=None, type=int, help="Number of epochs.")
parser.add_argument("--seed", default=42, type=int, help="Random seed.")
parser.add_argument("--threads", default=1, type=int, help="Maximum number of threads to use.")
parser.add_argument("--buffer_size", default=None, type=int, help="Dataset buffer size to load into memory. By default load the whole dataset.")
parser.add_argument("--checkpoints_period", default=None, type=int, help="Checkpoint callback period.")
parser.add_argument("--stopping_patience", default=None, type=int, help="Early stopping epochs patience.")
parser.add_argument("--label_smoothing", default=None, type=float, help="")
parser.add_argument("--densenet_filters", default=32, type=int, help="")
parser.add_argument("--densenet_block_sizes", nargs="+", type=int, default=[6, 12, 24, 16], help="Individual dense block sizes")
parser.add_argument("--learning_rate", default=0.01, type=float, help="Initial model learning rate.")

args = parser.parse_args([
    '--threads=4',
    '--batch_size=4',
    '--epochs=10',
    '--checkpoints_period=3',
    '--stopping_patience=3',
    '--learning_rate=0.01',
    #'--label_smoothing=0.1',
    '--densenet_filters=32',
    '--buffer_size=128',
    '--densenet_block_sizes', '8', '10', #'10', #'8',
] if "__file__" not in globals() else None)

# Create logdir name
args.logdir = os.path.join(
    "logs",
    "{}-{}-{}".format(
        os.path.basename(globals().get("__file__", "notebook")),
        datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S"),
        ",".join(
            (
                "{}={}".format(re.sub("(.)[^_]*_?", r"\1", k), v)
                for k, v in sorted(vars(args).items())
            )
        ),
    ),
)

tf.random.set_seed(args.seed) # tf2.6 (I have gpu issues on tf2.8 unfortunately)
tf.config.threading.set_inter_op_parallelism_threads(args.threads)
tf.config.threading.set_intra_op_parallelism_threads(args.threads)

args

In [13]:
args.decay_steps = int(args.epochs * len(homr.train) / args.batch_size)
if args.buffer_size is None:
    args.buffer_size = len(homr.train)

# 2. Data

In [4]:
homr = HOMRDataset()

if args.buffer_size is None:
    args.buffer_size = len(homr.train)
    
args.decay_steps = int(args.epochs * len(homr.train) / args.batch_size)
NULL_CHAR = len(homr.MARKS)
print("First 10 classes: ", homr.MARKS[:10], '...')
print("Total num of classes: ", len(homr.MARKS))
print("Total train dataset cardinality: ", homr.train.cardinality())
print("Total dev dataset cardinality: ", homr.dev.cardinality())

In [7]:
for x in homr.train.take(10).as_numpy_iterator():
    print(x['image'].shape)

In [8]:
for x in homr.train.take(10).as_numpy_iterator():
    print(x['marks'].shape)

In [5]:
_N = 3000
heights = list()
widths = list()
labels = list()
labels_len = list()
for x in homr.train.take(_N).as_numpy_iterator():
    heights.append(x['image'].shape[0])
    widths.append(x['image'].shape[1])
    labels.extend(x['marks'])
    labels_len.append(x['marks'].shape[0])
    
heights = np.array(heights)
widths = np.array(widths)
labels = np.array(labels)
labels_len = np.array(labels_len)

print(np.mean(heights), np.std(heights), 3 * np.std(heights))
print(np.mean(widths), np.std(widths), 3 * np.std(widths))

TARGET_HEIGHT = int(np.ceil(np.mean(heights) + 3 * np.std(heights)))
TARGET_WIDTH = int(np.ceil(np.mean(widths) + 3 * np.std(widths)))
TARGET_LABEL_LEN = int(np.ceil(np.mean(labels_len) + 3 * np.std(labels_len)))

print("Target height: ", TARGET_HEIGHT)
print("Target width: ", TARGET_WIDTH)
print("Target label_len: ", TARGET_LABEL_LEN)

In [9]:
plt.figure(figsize=(14,7))
sns.histplot(heights, kde=True, discrete=True).set_title("Train images heights distribution of the first {} samples".format(_N))

In [10]:
plt.figure(figsize=(14,7))
sns.histplot(widths, kde=True, discrete=True).set_title("Train images widths distribution of the first {} samples".format(_N))

In [11]:
plt.figure(figsize=(14,7))
sns.histplot(labels, kde=True, discrete=True).set_title("Train marks distribution of the first {} samples".format(_N))

In [None]:
del labels, heights

In [6]:
def create_dataset(name):
    def prepare_example(example):
        label = tf.cast(example["marks"] if args.label_smoothing is None else tf.one_hot(example["labels"], len(modelnet.LABELS)), tf.int32)
        image = tf.cast(example["image"], tf.float32)
        #image = tf.image.resize_with_crop_or_pad(image, TARGET_HEIGHT, TARGET_WIDTH)
        image = tf.image.resize_with_crop_or_pad(image, TARGET_HEIGHT, tf.shape(image)[1])
        return (image, label)

    dataset = getattr(homr, name).map(prepare_example)
    #dataset = dataset.shuffle(len(dataset), seed=args.seed) if name == "train" else dataset
    dataset = dataset.shuffle(args.buffer_size, seed=args.seed) if name == "train" else dataset
    dataset = dataset.apply(tf.data.experimental.dense_to_ragged_batch(args.batch_size))
    dataset = dataset.prefetch(tf.data.AUTOTUNE)
    return dataset

train, dev, test = create_dataset("train"), create_dataset("dev"), create_dataset("test")
print(train, '\n', dev, '\n', test)

# 3. Model

In [10]:
class Model(tf.keras.Model):
    
    class DenseBlockPart(tf.keras.layers.Layer):
        def __init__(self, filters, kernel, strides, padding = "same", activation: str = "relu", *args, **kwargs) -> None:
            super().__init__(*args, **kwargs)
            
            self.activation, self.filters, self.kernel, self.strides, self.padding = (
                activation, filters, kernel, strides, padding
            )
            
            self.layers = {
                "batchnorm": tf.keras.layers.BatchNormalization(),
                "activation": tf.keras.layers.Activation(self.activation),
                "conv": tf.keras.layers.Conv2D(self.filters, self.kernel, self.strides, self.padding)
            }
            
            
        def get_config(self):
            return {
                "activation": self.activation,
                "filters": self.filters,
                "kernel": self.kernel,
                "strides": self.strides,
                "padding": self.padding
            }
        
        def call(self, inputs, mask=None):
            x = self.layers["batchnorm"](inputs)
            x = self.layers["activation"](x)
            x = self.layers["conv"](x)
            return x

    class DenseBlock(tf.keras.layers.Layer):
        def __init__(self, size, filters, *args, **kwargs) -> None:
            super().__init__(*args, **kwargs)
            
            self.size, self.filters = size, filters
            
            self.layers = list()
            for _ in range(self.size):
                self.layers.append({
                    "1x1_part": Model.DenseBlockPart(4 * self.filters, 1, 1, "same", "relu"),
                    "3x3_part": Model.DenseBlockPart(self.filters, 3, 1, "same", "relu")
                })

        def get_config(self):
            return {
                "size": self.size,
                "filters": self.filters
            }
        
        def call(self, inputs, mask=None):
            x = inputs
            for layer in self.layers:
                y = layer["1x1_part"](x)
                y = layer["3x3_part"](y)
                x = tf.keras.layers.Concatenate()([y, x])
            return x
        
    class TransitionLayer(tf.keras.layers.Layer):
        def __init__(self, filters, *args, **kwargs) -> None:
            super().__init__(*args, **kwargs)
            self.filters = filters
            
            self.layers = {
                "denseblock_part": Model.DenseBlockPart(filters, 1, 1, "same", "relu"),
                "pooling": tf.keras.layers.MaxPool2D(2, 2, padding="same")
            }
            
        def get_config(self):
            return {
                "filters": self.filters
            }
        
        def call(self, inputs, mask=None):
            hidden = self.layers["denseblock_part"](inputs)
            hidden = self.layers["pooling"](hidden)
            return hidden
        
    class DenseNet(tf.keras.layers.Layer):
        def __init__(self, filters, block_sizes, *args, **kwargs) -> None:
            super().__init__(*args, **kwargs)
            self.filters, self.block_sizes = filters, block_sizes
            self.layers = {
                "init_conv": tf.keras.layers.Conv2D(64, 7, 2, "same"),
                "init_pool": tf.keras.layers.MaxPool2D(3, 2),
                "body": list()
            }
            
            for block_size in self.block_sizes:
                self.layers["body"].append({
                    "denseblock": Model.DenseBlock(self.filters, block_size),
                    "transition": Model.TransitionLayer(self.filters // 2)
                })
            
        def get_config(self):
            return {
                "filters": self.filters,
                "block_sizes": self.block_sizes
            }
        
        def call(self, inputs, mask=None):
            x = self.layers["init_conv"](inputs)
            x = self.layers["init_pool"](x)
            
            for layer in self.layers["body"]:
                x = layer["denseblock"](x)
                x = layer["transition"](x)
            return x
    
    
    def __init__(self, args: argparse.Namespace) -> None:
        inputs = tf.keras.layers.Input(shape=[TARGET_HEIGHT, None, 1], dtype=tf.float32, ragged=True)
        
        hidden = self.DenseNet(args.densenet_filters, args.densenet_block_sizes)(inputs.to_tensor())
        
        hidden = tf.keras.layers.Reshape([-1, hidden.shape[-1] * hidden.shape[1]])(hidden)
        hidden = tf.RaggedTensor.from_tensor(hidden)
        
        hidden = tf.keras.layers.Bidirectional(
            tf.keras.layers.RNN(tfa.rnn.LayerNormLSTMCell(32, recurrent_dropout=0.2, dropout=0.05),return_sequences=True, return_state=False),
            merge_mode="sum"
        )(hidden)
        
        hidden = tf.keras.layers.RNN(tfa.rnn.LayerNormLSTMCell(32, recurrent_dropout=0.2, dropout=0.05), return_sequences=True, return_state=False)(hidden)
        hidden = tf.keras.layers.RNN(tfa.rnn.LayerNormLSTMCell(32, recurrent_dropout=0.2, dropout=0.05), return_sequences=True, return_state=False)(hidden)
           
        logits = tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(1 + len(homr.MARKS), activation=None))(hidden)
        
        super().__init__(inputs=inputs, outputs=logits)

        self.compile(optimizer=tf.optimizers.Adam(learning_rate=tf.keras.optimizers.schedules.CosineDecay(args.learning_rate, args.decay_steps)),
                     loss=self.ctc_loss,
                     metrics=[homr.EditDistanceMetric()])

        self.callbacks = list()
        self.tb_callback = tf.keras.callbacks.TensorBoard(args.logdir)
        self.callbacks.append(self.tb_callback)
        if args.checkpoints_period:
            self.checkpoints = tf.keras.callbacks.ModelCheckpoint('weights{epoch:08d}.h5', save_weights_only=True, period=args.checkpoints_period) 
            self.callbacks.append(self.checkpoints)
        if args.stopping_patience:
            self.early_stopping = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=args.stopping_patience)
            self.callbacks.append(self.early_stopping)

    def ctc_loss(self, gold_labels: tf.RaggedTensor, logits: tf.RaggedTensor) -> tf.Tensor:
        assert isinstance(gold_labels, tf.RaggedTensor), "Gold labels given to CTC loss must be RaggedTensors"
        assert isinstance(logits, tf.RaggedTensor), "Logits given to CTC loss must be RaggedTensors"

        return tf.reduce_mean( 
            tf.nn.ctc_loss( 
                gold_labels.to_sparse(),
                tf.transpose(logits.to_tensor(), perm=[1, 0, 2]),
                label_length=None,
                logit_length=tf.cast(logits.row_lengths(), tf.int32),
                #logits_time_major=False,
                blank_index=-1 # TODO, check?
            ),
            axis=0
        )

    def ctc_decode(self, logits: tf.RaggedTensor) -> tf.RaggedTensor:
        assert isinstance(logits, tf.RaggedTensor), "Logits given to CTC predict must be RaggedTensors"

        predictions, _ = tf.nn.ctc_beam_search_decoder(
            tf.transpose(logits.to_tensor(), perm=[1, 0, 2]),
            tf.cast(logits.row_lengths(), tf.int32),
        )
        predictions = tf.RaggedTensor.from_sparse(predictions[0])

        assert isinstance(predictions, tf.RaggedTensor), "CTC predictions must be RaggedTensors"
        return predictions

    def train_step(self, data):
        x, y = data
        with tf.GradientTape() as tape:
            y_pred = self(x, training=True)
            loss = self.compute_loss(x, y, y_pred) # TF2.6. COMPATIBILITY
        self.optimizer.minimize(loss, self.trainable_variables, tape=tape)
        return {"loss": metric.result() for metric in self.metrics if metric.name == "loss"}

    def predict_step(self, data):
        data = data[0] if isinstance(data, tuple) else data
        y_pred = self(data, training=False)
        y_pred = self.ctc_decode(y_pred)
        return y_pred

    def test_step(self, data):
        x, y = data
        y_pred = self(x, training=False)
        self.compute_loss(x, y, y_pred)
        y_pred = self.ctc_decode(y_pred)
        return self.compute_metrics(x, y, y_pred, None)
    
    def compute_loss(self, x=None, y=None, y_pred=None, sample_weight=None):
        del x  # The default implementation does not use `x`.
        return self.compiled_loss(
            y, y_pred, sample_weight, regularization_losses=self.losses)
    
    def compute_metrics(self, x, y, y_pred, sample_weight):
        del x  # The default implementation does not use `x`.
        self.compiled_metrics.update_state(y, y_pred, sample_weight)
        # Collect metrics to return
        return_metrics = {}
        for metric in self.metrics:
            result = metric.result()
            if isinstance(result, dict):
                return_metrics.update(result)
            else:
                return_metrics[metric.name] = result
        return return_metrics
    
model = Model(args)
model.summary()

In [11]:
logs = model.fit(
    train, 
    epochs=args.epochs,
    validation_data=dev,
    shuffle=False,
    callbacks=[model.callbacks],
)

In [None]:
del model, train, dev, test
gc.collect()

In [None]:
from keras.utils.vis_utils import plot_model
plot_model(model)

# 4. Predictions

In [12]:
os.makedirs(args.logdir, exist_ok=True)
with open(os.path.join(args.logdir, "homr_competition.txt"), "w", encoding="utf-8") as predictions_file:
    # TODO: Predict the sequences of recognized marks.
    predictions = model.predict(test)

    for sequence in predictions:
        print(" ".join(homr.MARKS[mark] for mark in sequence), file=predictions_file)

In [None]:
args.logdir