In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import cv2

import os

import tensorflow as tf
import string
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras import layers, models

from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam

# from tensorflow import keras
from tensorflow.keras import callbacks
# from tensorflow.keras import layers

In [2]:

train_image_dir = '/kaggle/input/handwriting-recognitionocr/train_v2/train'
valid_image_dir = '/kaggle/input/handwriting-recognitionocr/validation_v2/validation'
test_image_dir = '/kaggle/input/handwriting-recognitionocr/test_v2/test'


# Train CSV
train_csv = pd.read_csv('/kaggle/input/handwriting-recognitionocr/CSV/written_name_train.csv')[:1000]

# Validation CSV
valid_csv = pd.read_csv('/kaggle/input/handwriting-recognitionocr/CSV/written_name_validation.csv')[:1000]

# Test CSV
test_csv = pd.read_csv('/kaggle/input/handwriting-recognitionocr/CSV/written_name_test.csv')[:1000]

In [3]:
print(train_csv.shape)
train_csv.head()

(1000, 2)


Unnamed: 0,FILENAME,IDENTITY
0,TRAIN_00001.jpg,BALTHAZAR
1,TRAIN_00002.jpg,SIMON
2,TRAIN_00003.jpg,BENES
3,TRAIN_00004.jpg,LA LOVE
4,TRAIN_00005.jpg,DAPHNE


In [4]:
train_csv['FILENAME'] = [train_image_dir + f"/{filename}" for filename in train_csv['FILENAME']]
valid_csv['FILENAME'] = [valid_image_dir + f"/{filename}" for filename in valid_csv['FILENAME']]
test_csv['FILENAME']  = [test_image_dir + f"/{filename}" for filename in test_csv['FILENAME']]
train_csv.head()

Unnamed: 0,FILENAME,IDENTITY
0,/kaggle/input/handwriting-recognitionocr/train...,BALTHAZAR
1,/kaggle/input/handwriting-recognitionocr/train...,SIMON
2,/kaggle/input/handwriting-recognitionocr/train...,BENES
3,/kaggle/input/handwriting-recognitionocr/train...,LA LOVE
4,/kaggle/input/handwriting-recognitionocr/train...,DAPHNE


In [5]:
def preprocess_image(image_path, target_size=(128, 32)):
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    
    img = cv2.resize(img, target_size, interpolation=cv2.INTER_AREA)
    
    return img

def normalize_image(img):
    img = img / 255.0
    return img


In [6]:
def load_and_preprocess_images(csv_file, image_dir, target_size=(128, 32)):
    data = csv_file
    
    images = []
    labels = []
    
    for index, row in data.iterrows():
        image_path = row['FILENAME']
        label = row['IDENTITY']
        
        img = preprocess_image(image_path, target_size)
        img = normalize_image(img)
        
        images.append(img)
        labels.append(label)
    
    images = np.array(images)
    labels = np.array(labels)
    
    return images, labels

In [7]:
train_images, train_labels = load_and_preprocess_images(train_csv, train_image_dir)
print(train_labels[:10])
valid_images, valid_labels = load_and_preprocess_images(valid_csv, valid_image_dir)
print(valid_labels[:10])
test_images, test_labels = load_and_preprocess_images(test_csv, test_image_dir)
print(test_labels[:10])

['BALTHAZAR' 'SIMON' 'BENES' 'LA LOVE' 'DAPHNE' 'LUCIE' 'NASSIM'
 'ASSRAOUI' 'LAVIAN' 'MAEVA']
['BILEL' 'LAUMIONIER' 'LEA' 'JEAN-ROCH' 'RUPP' 'UNREADABLE' 'PICHON'
 'DANIEL' 'JEREMY' 'JEAN-MICHEL']
['KEVIN' 'CLOTAIRE' 'LENA' 'JULES' 'CHERPIN' 'MARTIN' 'VALENTINE' 'LORAS'
 'THIBAULT' 'AZABI']


In [8]:
char_set = string.ascii_letters + " '"
char_to_num = {char: idx for idx, char in enumerate(char_set, 1)}
num_to_char = {idx: char for char, idx in char_to_num.items()}

def encode_label(name, char_to_num):
    return [char_to_num[char] for char in name if char in char_to_num]

def encode_and_pad_labels(labels, char_to_num, max_length):
    encoded_labels = [encode_label(label, char_to_num) for label in labels]
    padded_labels = pad_sequences(encoded_labels, maxlen=max_length, padding='post')
    return padded_labels

# Set the maximum name length for the train data
max_name_length = max(len(name) for name in train_labels)
train_padded_labels = encode_and_pad_labels(train_labels, char_to_num, max_name_length)

# Set the maximum name length for validation 
max_name_length = max(len(name) for name in valid_labels)
valid_padded_labels = encode_and_pad_labels(valid_labels, char_to_num, max_name_length)

# text data
max_name_length = max(len(name) for name in test_labels)
test_padded_labels = encode_and_pad_labels(test_labels, char_to_num, max_name_length)


In [9]:
def create_tf_dataset(images, labels, batch_size=32, shuffle=True):
    dataset = tf.data.Dataset.from_tensor_slices((images, labels))
    
    if shuffle:
        dataset = dataset.shuffle(buffer_size=len(images))
    
    dataset = dataset.batch(batch_size)
    return dataset

# Create TensorFlow datasets
train_dataset = create_tf_dataset(train_images, train_padded_labels, batch_size=32)
valid_dataset = create_tf_dataset(valid_images, valid_padded_labels, batch_size=32, shuffle=False)
test_dataset = create_tf_dataset(test_images, test_padded_labels, batch_size=32, shuffle=False)
print("")




In [10]:
def ctc_lambda_func(args):
    y_pred, labels, input_length, label_length = args
    return tf.keras.backend.ctc_batch_cost(labels, y_pred, input_length, label_length)

def build_ocr_model(input_shape, num_classes):
    # Input layer for the images
    input_img = layers.Input(shape=input_shape, name='image_input')

    # CNN layers for feature extraction
    x = layers.Conv2D(32, (3, 3), padding='same', activation='relu')(input_img)
    x = layers.MaxPooling2D(pool_size=(2, 2))(x)
    x = layers.Conv2D(64, (3, 3), padding='same', activation='relu')(x)
    x = layers.MaxPooling2D(pool_size=(2, 2))(x)
    x = layers.Conv2D(128, (3, 3), padding='same', activation='relu')(x)
    x = layers.MaxPooling2D(pool_size=(2, 2))(x)

    # Reshape the features for the RNN input
    x = layers.Reshape(target_shape=(-1, x.shape[-1]))(x)

    # RNN layers (LSTM/GRU) for sequence modeling
    x = layers.Bidirectional(layers.LSTM(128, return_sequences=True))(x)
    x = layers.Bidirectional(layers.LSTM(128, return_sequences=True))(x)

    # Dense layer to predict characters
    y_pred = layers.Dense(num_classes, activation='softmax', name='softmax_output')(x)

    # CTC Input Layers
    labels = layers.Input(name='label_input', shape=[None], dtype='float32')  # ground truth labels
    input_length = layers.Input(name='input_length', shape=[1], dtype='int64')  # lengths of input sequences
    label_length = layers.Input(name='label_length', shape=[1], dtype='int64')  # lengths of the actual labels

    # Define the CTC layer (no trainable parameters here)
    ctc_loss_output = layers.Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')(
        [y_pred, labels, input_length, label_length]
    )

    # Model to train using CTC loss
    model = tf.keras.models.Model(inputs=[input_img, labels, input_length, label_length], outputs=ctc_loss_output)

    return model

In [11]:
# def ctc_lambda_func(args):
#     y_pred, labels, input_length, label_length = args
#     return tf.keras.backend.ctc_batch_cost(labels, y_pred, input_length, label_length)

In [12]:
# com classes count
num_classes = len(char_set) + 1

# Build the model
input_shape = (32, 128, 1)  # Adjust based on your preprocessed image size
ocr_model = build_ocr_model(input_shape, num_classes)

# Compile the model with no specific loss function since it's handled by CTC
ocr_model.compile(optimizer='adam')

# Display the model architecture
ocr_model.summary()
