In [None]:
import tensorflow as tf
from tensorflow import keras
import os
import time
import argparse
import utils
import math
from model import LPRNet
import evaluate
import numpy as np
from tensorflow.keras import backend as K
import os
import os
import sys
sys.path.insert(1, '/kaggle/input/your-dataset-name')

os.environ['CUDA_DIR'] = '/usr/lib/cuda'
os.environ['XLA_FLAGS'] = '--xla_gpu_cuda_data_dir=/usr/lib/cuda'

train_epochs_var = 100
batch_size_var  = 8
val_batch_size_var = 4
train_dir_var = "./train"
val_dir_var = "./valid"
#pretrained = 
lr_var = 1e-3
decay_steps_var = 500
decay_rate_var = 0.995
staircase_var = "smooth"
saved_dir_var = "./saved_models"


def train():
    # Initiate the Neural Network
    net = LPRNet(NUM_CLASS)

    # Get the train and validation batch size from argument parser
    batch_size = batch_size_var
    print("batch size is {}".format(batch_size_var))
    val_batch_size = val_batch_size_var

    # Initialize the custom data generator
    train_gen = utils.DataIterator(img_dir=train_dir_var, batch_size=batch_size)
    val_gen = utils.DataIterator(img_dir=val_dir_var, batch_size=val_batch_size_var)

    # Variable initialization used for custom training loop
    train_len = len(next(os.walk(train_dir_var))[2])
    val_len = len(next(os.walk(val_dir_var))[2])
    print("Train Len is", train_len)

    # Calculate batches per epoch
    BATCH_PER_EPOCH = int(math.ceil(train_len / batch_size_var))
    print("batch size is ---- {}".format(batch_size_var))

    # Initialize TensorBoard
    tensorboard = keras.callbacks.TensorBoard(
        log_dir='tmp/my_tf_logs',
        histogram_freq=0,
        write_graph=True
    )

    val_batch_len = int(math.floor(val_len / val_batch_size_var))
    evaluator = evaluate.Evaluator(val_gen, net, CHARS, val_batch_len, val_batch_size_var)
    best_val_loss = float("inf")

    # If a pretrained model is available, load weights from it
    #if pretrained:
    #    net.load_weights(pretrained)

    model = net.model
    tensorboard.set_model(model)

    # Initialize the learning rate
    learning_rate = keras.optimizers.schedules.ExponentialDecay(
        lr,
        decay_steps=decay_steps_var,
        decay_rate=decay_rate_var,
        staircase=staircase_var
    )

    # Define training optimizer
    optimizer = tf.keras.optimizers.Adam(learning_rate=lr_var)
    print('Training ...')
    train_loss = 0

    # Starting the training loop
    for epoch in range(train_epochs_var):
        print("Start of epoch {} / {}".format(epoch, train_epochs_var))

        # Zero out the train_loss and val_loss at the beginning of every loop
        train_loss = 0
        start_time = time.time()

        for batch in range(BATCH_PER_EPOCH):
            # Get a batch of images/labels
            train_inputs, train_targets, train_labels = train_gen.next_batch()
            train_inputs = train_inputs.astype('float32')
            train_targets = tf.SparseTensor(train_targets[0], train_targets[1], train_targets[2])

            # Open a GradientTape to record the operations run during the forward pass
            with tf.GradientTape() as tape:
                # Get model outputs
                logits = model(train_inputs, training=True)

                # Pass the model outputs into the CTC loss function
                logits = tf.reduce_mean(logits, axis=1)
                logits_shape = tf.shape(logits)
                cur_batch_size = logits_shape[0]
                timesteps = logits_shape[1]
                seq_len = tf.fill([cur_batch_size], timesteps)
                logits = tf.transpose(logits, (1, 0, 2))

                # Calculate CTC loss
                ctc_loss = tf.nn.ctc_loss(
                    labels=train_targets,
                    logits=logits,
                    logit_length=seq_len,
                    label_length=tf.fill([cur_batch_size], tf.shape(train_targets.values)[0]),
                    blank_index=NUM_CLASS - 1  # Índice en blanco
                )
                loss_value = tf.reduce_mean(ctc_loss)

            # Calculate gradients and update them
            grads = tape.gradient(ctc_loss, model.trainable_weights, unconnected_gradients=tf.UnconnectedGradients.NONE)
            optimizer.apply_gradients(zip(grads, model.trainable_weights))
            train_loss += float(loss_value)

        tim = time.time() - start_time
        print("Train loss {}, time {} \n".format(float(train_loss / BATCH_PER_EPOCH), tim))

        # Run a validation loop every 25 epochs
        if epoch != 0 and epoch % 25 == 0:
            val_loss = evaluator.evaluate()
            # If the validation loss is less than the previous best validation loss, update the saved model
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                net.save_weights(os.path.join(saved_dir_var, "new_out_model_best.weights.h5"))  # Corregido aquí
                print("Weights updated in {}/{}".format(saved_dir_var, "new_out_model_best.weights.h5"))
            else:
                print("Validation loss is greater than best_val_loss")

    # Save the final model
    net.save(os.path.join(saved_dir_var, "new_out_model_last.weights.h5"))  # Corregido aquí
    print("Final Weights saved in {}/{}".format(saved_dir_var, "new_out_model_last.weights.h5"))
    tensorboard.on_train_end(None)


"""


def parser_args():
    
    parser = argparse.ArgumentParser()

    parser.add_argument("--train_dir", default="./train", help="path to the train directory")
    parser.add_argument("--val_dir", default="./valid", help="path to the validation directory")

    parser.add_argument("--train_epochs", type=int, help="number of training epochs", default=151)
    parser.add_argument("--batch_size", type=int, default=8, help="batch size (train)")
    parser.add_argument("--val_batch_size", type=int, default=4, help="Validation batch size")
    parser.add_argument("--lr", type=float, default=1e-3, help="initial learning rate")
    parser.add_argument("--decay_steps", type=float, default=500, help="learning rate decay rate")
    parser.add_argument("--decay_rate", type=float, default=0.995, help="learning rate decay rate")
    parser.add_argument("--staircase", action="store_true", help="learning rate decay on step (default:smooth)")

    parser.add_argument("--pretrained", help="pretrained model location")
    parser.add_argument("--saved_dir", default="saved_models", help="folder for saving models")

    args = vars(parser.parse_args())
    return args
"""


if __name__ == "__main__":
    #args = parser_args()
    CHARS = "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
    NUM_CLASS = len(CHARS) + 1
    tf.compat.v1.enable_eager_execution()
    train()

batch size is 8
Train Len is 1373
batch size is ---- 8
Training ...
Start of epoch 0 / 100


I0000 00:00:1737558647.172018   47625 cuda_dnn.cc:529] Loaded cuDNN version 90600


Train loss 232.25452148082644, time 43.66825866699219 

Start of epoch 1 / 100
Train loss 186.00751193734104, time 41.39356708526611 

Start of epoch 2 / 100
