In [1]:
import numpy as np
import json
import pandas as ps
np.random.seed(6)

In [2]:
from model.utils.data_generator import DataGenerator
from model.img2seq import Img2SeqModel
from model.utils.lr_schedule import LRSchedule
from model.utils.general import Config
from model.utils.text import Vocab
from model.utils.image import greyscale

In [3]:
import os
os.environ['CUDA_VISIBLE_DEVICES']="0"

In [4]:
def main(data, vocab, training, model, output):
    # Load configs
    dir_output = output
    config = Config([data, vocab, training, model])
    config.save(dir_output)
    vocab = Vocab(config)

    # Load datasets
    train_set = DataGenerator(path_formulas=config.path_formulas_train,
            dir_images=config.dir_images_train, img_prepro=greyscale,
            max_iter=config.max_iter, bucket=config.bucket_train,
            path_index=config.path_index_train,
            max_len=config.max_length_formula,
            form_prepro=vocab.form_prepro)
    val_set = DataGenerator(path_formulas=config.path_formulas_val,
            dir_images=config.dir_images_val, img_prepro=greyscale,
            max_iter=config.max_iter, bucket=config.bucket_val,
            path_index=config.path_index_val,
            max_len=config.max_length_formula,
            form_prepro=vocab.form_prepro)

    # Define learning rate schedule
    n_batches_epoch = ((len(train_set) + config.batch_size - 1) //
                        config.batch_size)
    lr_schedule = LRSchedule(lr_init=config.lr_init,
            start_decay=config.start_decay*n_batches_epoch,
            end_decay=config.end_decay*n_batches_epoch,
            end_warm=config.end_warm*n_batches_epoch,
            lr_warm=config.lr_warm,
            lr_min=config.lr_min)

    # Build model
    model = Img2SeqModel(config, dir_output, vocab)
    model.build_train(config)

    # Restart weights
    # model.restore_session(dir_output + "model.weights/")

    # Train model
    model.train(config, train_set, val_set, lr_schedule)

In [11]:
if __name__ == "__main__":
    
    main(data = 'configs/data_HDval-50K.json',
    vocab = 'configs/vocab.json',
    training = 'configs/training.json',
    model = 'configs/model.json',
    output = 'results/HDval-50K/' )

configs/data_HDval-50K.json
configs/vocab.json
configs/training.json
configs/model.json
configs/data_HDval-50K.json
configs/vocab.json
configs/training.json
configs/model.json
Loaded 36366 formulas from Z:/FYP/data/synthetic/size_tests/50K/train.formulas.txt
Loaded 413 formulas from Z:/FYP/data/hand-drawn/hand-drawn-val/val.formulas.txt
First call to len(dataset) - may take a while.


Building model...


- done.
Device mapping:
/job:localhost/replica:0/task:0/device:GPU:0 -> device: 0, name: NVIDIA GeForce RTX 3080, pci bus id: 0000:01:00.0, compute capability: 8.6



- done.
Epoch 1/30


[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 17681269241321425917
xla_global_id: -1
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 7787773952
locality {
  bus_id: 1
  links {
  }
}
incarnation: 13682798707936466465
physical_device_desc: "device: 0, name: NVIDIA GeForce RTX 3080, pci bus id: 0000:01:00.0, compute capability: 8.6"
xla_global_id: 416903419
]


- Training:  - 596s - loss: 0.8800 - perplexity: 2.4430 - lr: 0.0001


- Evaluating...Loaded 413 formulas from results/HDval-50K/formulas_val/ref.txt
Loaded 413 formulas from results/HDval-50K/formulas_val/hyp_0.txt


- Eval: BLEU-4 29.62 - EM 0.00 - Edit 45.90 - perplexity -2.91
- New best score (-2.91)!


==SAVING WEIGHTS===
- Saving model...

- Saved model in results/HDval-50K/model.weights/
- Elapsed time: 603.42, lr: 0.00010
Epoch 2/30




- Training:  - 598s - loss: 0.7332 - perplexity: 2.0827 - lr: 0.0001


- Evaluating...Loaded 413 formulas from results/HDval-50K/formulas_val/ref.txt
Loaded 413 formulas from results/HDval-50K/formulas_val/hyp_0.txt


- Eval: BLEU-4 26.74 - EM 0.00 - Edit 44.04 - perplexity -2.72
- New best score (-2.72)!


==SAVING WEIGHTS===
- Saving model...

- Saved model in results/HDval-50K/model.weights/
- Elapsed time: 605.78, lr: 0.00010
Epoch 3/30




- Training:  - 601s - loss: 0.7000 - perplexity: 2.0142 - lr: 0.0001


- Evaluating...Loaded 413 formulas from results/HDval-50K/formulas_val/ref.txt
Loaded 413 formulas from results/HDval-50K/formulas_val/hyp_0.txt


- Eval: BLEU-4 26.81 - EM 0.00 - Edit 44.62 - perplexity -2.65
- New best score (-2.65)!


==SAVING WEIGHTS===
- Saving model...

- Saved model in results/HDval-50K/model.weights/
- Elapsed time: 608.97, lr: 0.00010
Epoch 4/30




- Training:  - 595s - loss: 0.6868 - perplexity: 1.9878 - lr: 0.0001


- Evaluating...Loaded 413 formulas from results/HDval-50K/formulas_val/ref.txt
Loaded 413 formulas from results/HDval-50K/formulas_val/hyp_0.txt


- Eval: BLEU-4 31.67 - EM 0.00 - Edit 45.77 - perplexity -2.63
- New best score (-2.63)!


==SAVING WEIGHTS===
- Saving model...

- Saved model in results/HDval-50K/model.weights/
- Elapsed time: 602.39, lr: 0.00010
Epoch 5/30




- Training:  - 592s - loss: 0.6800 - perplexity: 1.9742 - lr: 0.0001


- Evaluating...Loaded 413 formulas from results/HDval-50K/formulas_val/ref.txt
Loaded 413 formulas from results/HDval-50K/formulas_val/hyp_0.txt


- Eval: BLEU-4 30.28 - EM 0.00 - Edit 45.55 - perplexity -2.59
- New best score (-2.59)!


==SAVING WEIGHTS===
- Saving model...

- Saved model in results/HDval-50K/model.weights/
- Elapsed time: 599.05, lr: 0.00010
Epoch 6/30




- Training:  - 590s - loss: 0.6759 - perplexity: 1.9661 - lr: 0.0001


- Evaluating...Loaded 413 formulas from results/HDval-50K/formulas_val/ref.txt
Loaded 413 formulas from results/HDval-50K/formulas_val/hyp_0.txt


- Eval: BLEU-4 30.28 - EM 0.00 - Edit 45.55 - perplexity -2.54
- New best score (-2.54)!


==SAVING WEIGHTS===
- Saving model...

- Saved model in results/HDval-50K/model.weights/
- Elapsed time: 598.44, lr: 0.00010
Epoch 7/30




- Training:  - 591s - loss: 0.6715 - perplexity: 1.9574 - lr: 0.0001


- Evaluating...Loaded 413 formulas from results/HDval-50K/formulas_val/ref.txt
Loaded 413 formulas from results/HDval-50K/formulas_val/hyp_0.txt


- Eval: BLEU-4 31.05 - EM 0.00 - Edit 46.53 - perplexity -2.53
- New best score (-2.53)!


==SAVING WEIGHTS===
- Saving model...

- Saved model in results/HDval-50K/model.weights/
- Elapsed time: 598.27, lr: 0.00010
Epoch 8/30




- Training:  - 589s - loss: 0.6684 - perplexity: 1.9513 - lr: 0.0001


- Evaluating...Loaded 413 formulas from results/HDval-50K/formulas_val/ref.txt
Loaded 413 formulas from results/HDval-50K/formulas_val/hyp_0.txt


- Eval: BLEU-4 31.12 - EM 0.00 - Edit 46.62 - perplexity -2.52
- New best score (-2.52)!


==SAVING WEIGHTS===
- Saving model...

- Saved model in results/HDval-50K/model.weights/
- Elapsed time: 596.05, lr: 0.00010
Epoch 9/30




- Training:  - 590s - loss: 0.6661 - perplexity: 1.9468 - lr: 0.0001


- Evaluating...Loaded 413 formulas from results/HDval-50K/formulas_val/ref.txt
Loaded 413 formulas from results/HDval-50K/formulas_val/hyp_0.txt


- Eval: BLEU-4 32.47 - EM 0.00 - Edit 48.19 - perplexity -2.50
- New best score (-2.50)!


==SAVING WEIGHTS===
- Saving model...

- Saved model in results/HDval-50K/model.weights/
- Elapsed time: 596.90, lr: 0.00010
Epoch 10/30




- Training:  - 590s - loss: 0.6639 - perplexity: 1.9427 - lr: 0.0001


- Evaluating...Loaded 413 formulas from results/HDval-50K/formulas_val/ref.txt
Loaded 413 formulas from results/HDval-50K/formulas_val/hyp_0.txt


- Eval: BLEU-4 34.60 - EM 0.00 - Edit 46.89 - perplexity -2.47
- New best score (-2.47)!


==SAVING WEIGHTS===
- Saving model...

- Saved model in results/HDval-50K/model.weights/
- Elapsed time: 596.90, lr: 0.00010
Epoch 11/30




- Training:  - 589s - loss: 0.6615 - perplexity: 1.9380 - lr: 0.0001


- Evaluating...Loaded 413 formulas from results/HDval-50K/formulas_val/ref.txt
Loaded 413 formulas from results/HDval-50K/formulas_val/hyp_0.txt


- Eval: BLEU-4 35.39 - EM 0.00 - Edit 47.26 - perplexity -2.46
- New best score (-2.46)!


==SAVING WEIGHTS===
- Saving model...

- Saved model in results/HDval-50K/model.weights/
- Elapsed time: 596.26, lr: 0.00010
Epoch 12/30




- Training:  - 588s - loss: 0.6597 - perplexity: 1.9345 - lr: 0.0001


- Evaluating...Loaded 413 formulas from results/HDval-50K/formulas_val/ref.txt
Loaded 413 formulas from results/HDval-50K/formulas_val/hyp_0.txt


- Eval: BLEU-4 30.78 - EM 0.00 - Edit 46.86 - perplexity -2.46
- Elapsed time: 593.92, lr: 0.00010
Epoch 13/30




- Training:  - 586s - loss: 0.6574 - perplexity: 1.9301 - lr: 0.0001


- Evaluating...Loaded 413 formulas from results/HDval-50K/formulas_val/ref.txt
Loaded 413 formulas from results/HDval-50K/formulas_val/hyp_0.txt


- Eval: BLEU-4 31.55 - EM 0.00 - Edit 47.98 - perplexity -2.48
- Elapsed time: 592.20, lr: 0.00010
Epoch 14/30




- Training:  - 590s - loss: 0.6553 - perplexity: 1.9259 - lr: 0.0001


- Evaluating...Loaded 413 formulas from results/HDval-50K/formulas_val/ref.txt
Loaded 413 formulas from results/HDval-50K/formulas_val/hyp_0.txt


- Eval: BLEU-4 29.65 - EM 0.00 - Edit 46.18 - perplexity -2.49
- Elapsed time: 595.60, lr: 0.00010
Epoch 15/30




- Training:  - 588s - loss: 0.6530 - perplexity: 1.9215 - lr: 0.0001


- Evaluating...Loaded 413 formulas from results/HDval-50K/formulas_val/ref.txt
Loaded 413 formulas from results/HDval-50K/formulas_val/hyp_0.txt


- Eval: BLEU-4 27.59 - EM 0.00 - Edit 45.96 - perplexity -2.49
- Elapsed time: 594.26, lr: 0.00010
Epoch 16/30




- Training:  - 588s - loss: 0.6507 - perplexity: 1.9171 - lr: 0.0001


- Evaluating...Loaded 413 formulas from results/HDval-50K/formulas_val/ref.txt
Loaded 413 formulas from results/HDval-50K/formulas_val/hyp_0.txt


- Eval: BLEU-4 29.66 - EM 0.00 - Edit 46.91 - perplexity -2.50
- Elapsed time: 594.08, lr: 0.00010
Epoch 17/30




- Training:  - 588s - loss: 0.6478 - perplexity: 1.9116 - lr: 0.0001


- Evaluating...Loaded 413 formulas from results/HDval-50K/formulas_val/ref.txt
Loaded 413 formulas from results/HDval-50K/formulas_val/hyp_0.txt


- Eval: BLEU-4 27.26 - EM 0.00 - Edit 44.47 - perplexity -2.48
- Elapsed time: 593.98, lr: 0.00010
Epoch 18/30




- Training:  - 590s - loss: 0.6448 - perplexity: 1.9059 - lr: 0.0001


- Evaluating...Loaded 413 formulas from results/HDval-50K/formulas_val/ref.txt
Loaded 413 formulas from results/HDval-50K/formulas_val/hyp_0.txt


- Eval: BLEU-4 26.59 - EM 0.00 - Edit 43.30 - perplexity -2.48
- Elapsed time: 596.17, lr: 0.00010
Epoch 19/30




- Training:  - 590s - loss: 0.6414 - perplexity: 1.8994 - lr: 0.0001


- Evaluating...Loaded 413 formulas from results/HDval-50K/formulas_val/ref.txt
Loaded 413 formulas from results/HDval-50K/formulas_val/hyp_0.txt


- Eval: BLEU-4 33.47 - EM 0.00 - Edit 46.42 - perplexity -2.49
- Elapsed time: 595.44, lr: 0.00010
Epoch 20/30




- Training:  - 590s - loss: 0.6377 - perplexity: 1.8924 - lr: 0.0001


- Evaluating...Loaded 413 formulas from results/HDval-50K/formulas_val/ref.txt
Loaded 413 formulas from results/HDval-50K/formulas_val/hyp_0.txt


- Eval: BLEU-4 29.64 - EM 0.00 - Edit 46.54 - perplexity -2.51
- Elapsed time: 595.71, lr: 0.00010
Epoch 21/30




- Training:  - 590s - loss: 0.6265 - perplexity: 1.8714 - lr: 0.0001


- Evaluating...Loaded 413 formulas from results/HDval-50K/formulas_val/ref.txt
Loaded 413 formulas from results/HDval-50K/formulas_val/hyp_0.txt


- Eval: BLEU-4 27.50 - EM 0.00 - Edit 44.88 - perplexity -2.45
- New best score (-2.45)!


==SAVING WEIGHTS===
- Saving model...

- Saved model in results/HDval-50K/model.weights/
- Elapsed time: 598.07, lr: 0.00010
Epoch 22/30




- Training:  - 593s - loss: 0.6025 - perplexity: 1.8271 - lr: 0.0001


- Evaluating...Loaded 413 formulas from results/HDval-50K/formulas_val/ref.txt
Loaded 413 formulas from results/HDval-50K/formulas_val/hyp_0.txt


- Eval: BLEU-4 29.50 - EM 0.00 - Edit 48.35 - perplexity -2.38
- New best score (-2.38)!


==SAVING WEIGHTS===
- Saving model...

- Saved model in results/HDval-50K/model.weights/
- Elapsed time: 600.83, lr: 0.00010
Epoch 23/30




- Training:  - 594s - loss: 0.5725 - perplexity: 1.7731 - lr: 0.0001


- Evaluating...Loaded 413 formulas from results/HDval-50K/formulas_val/ref.txt
Loaded 413 formulas from results/HDval-50K/formulas_val/hyp_0.txt


- Eval: BLEU-4 31.67 - EM 0.00 - Edit 49.86 - perplexity -2.29
- New best score (-2.29)!


==SAVING WEIGHTS===
- Saving model...

- Saved model in results/HDval-50K/model.weights/
- Elapsed time: 601.95, lr: 0.00010
Epoch 24/30




- Training:  - 594s - loss: 0.5248 - perplexity: 1.6908 - lr: 0.0001


- Evaluating...Loaded 413 formulas from results/HDval-50K/formulas_val/ref.txt
Loaded 413 formulas from results/HDval-50K/formulas_val/hyp_0.txt


- Eval: BLEU-4 35.78 - EM 0.00 - Edit 53.59 - perplexity -2.21
- New best score (-2.21)!


==SAVING WEIGHTS===
- Saving model...

- Saved model in results/HDval-50K/model.weights/
- Elapsed time: 601.75, lr: 0.00010
Epoch 25/30




- Training:  - 594s - loss: 0.4764 - perplexity: 1.6109 - lr: 0.0001


- Evaluating...Loaded 413 formulas from results/HDval-50K/formulas_val/ref.txt
Loaded 413 formulas from results/HDval-50K/formulas_val/hyp_0.txt


- Eval: BLEU-4 40.77 - EM 0.24 - Edit 57.02 - perplexity -2.13
- New best score (-2.13)!


==SAVING WEIGHTS===
- Saving model...

- Saved model in results/HDval-50K/model.weights/
- Elapsed time: 601.72, lr: 0.00010
Epoch 26/30




- Training:  - 593s - loss: 0.4322 - perplexity: 1.5413 - lr: 0.0001


- Evaluating...Loaded 413 formulas from results/HDval-50K/formulas_val/ref.txt
Loaded 413 formulas from results/HDval-50K/formulas_val/hyp_0.txt


- Eval: BLEU-4 44.27 - EM 0.24 - Edit 58.51 - perplexity -2.08
- New best score (-2.08)!


==SAVING WEIGHTS===
- Saving model...

- Saved model in results/HDval-50K/model.weights/
- Elapsed time: 600.79, lr: 0.00010
Epoch 27/30




- Training:  - 594s - loss: 0.3946 - perplexity: 1.4845 - lr: 0.0001


- Evaluating...Loaded 413 formulas from results/HDval-50K/formulas_val/ref.txt
Loaded 413 formulas from results/HDval-50K/formulas_val/hyp_0.txt


- Eval: BLEU-4 47.77 - EM 0.24 - Edit 59.91 - perplexity -2.08
- New best score (-2.08)!


==SAVING WEIGHTS===
- Saving model...

- Saved model in results/HDval-50K/model.weights/
- Elapsed time: 600.93, lr: 0.00010
Epoch 28/30




- Training:  - 595s - loss: 0.3634 - perplexity: 1.4389 - lr: 0.0001


- Evaluating...Loaded 413 formulas from results/HDval-50K/formulas_val/ref.txt
Loaded 413 formulas from results/HDval-50K/formulas_val/hyp_0.txt


- Eval: BLEU-4 50.89 - EM 0.24 - Edit 60.73 - perplexity -2.06
- New best score (-2.06)!


==SAVING WEIGHTS===
- Saving model...

- Saved model in results/HDval-50K/model.weights/
- Elapsed time: 602.49, lr: 0.00010
Epoch 29/30




- Training:  - 595s - loss: 0.3358 - perplexity: 1.3997 - lr: 0.0001


- Evaluating...Loaded 413 formulas from results/HDval-50K/formulas_val/ref.txt
Loaded 413 formulas from results/HDval-50K/formulas_val/hyp_0.txt


- Eval: BLEU-4 51.21 - EM 0.73 - Edit 60.88 - perplexity -2.13
- Elapsed time: 600.77, lr: 0.00010
Epoch 30/30




- Training:  - 596s - loss: 0.3121 - perplexity: 1.3670 - lr: 0.0001


- Evaluating...Loaded 413 formulas from results/HDval-50K/formulas_val/ref.txt
Loaded 413 formulas from results/HDval-50K/formulas_val/hyp_0.txt


- Eval: BLEU-4 52.78 - EM 0.24 - Edit 62.06 - perplexity -2.08
- Elapsed time: 601.68, lr: 0.00010
