Code to load CK's extracted characters

In [None]:
import os
import cv2
import numpy as np
from sklearn.preprocessing import LabelEncoder

def load_images_from_folder(folder_path, target_size, color_mode):
    images = []
    labels = []
    valid_extensions = ['.jpg', '.jpeg', '.JPG', '.JPEG','.png']
    channels = 1 if color_mode == 'grayscale' else 3

    for filename in os.listdir(folder_path):
        if any(filename.endswith(ext) for ext in valid_extensions):
            file_path = os.path.join(folder_path, filename)
            try:
                # Read image
                if color_mode == 'grayscale':
                    image = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE)
                else:
                    image = cv2.imread(file_path)
                    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                
                # Resize and normalize
                image = cv2.resize(image, target_size)
                image = image.astype('float32') / 255.0
                
                # Add channel dimension if grayscale
                if channels == 1:
                    image = np.expand_dims(image, axis=-1)
                
                # Store image and label
                images.append(image)
                labels.append(os.path.splitext(filename)[0].split('-')[0])
                
            except Exception as e:
                print(f"Error processing {filename}: {str(e)}")
                continue
                
    return np.array(images), np.array(labels)

def prepare_fixed_split(train_dir, test_dir, target_size=(28, 28), color_mode='grayscale'):

    # Load training data
    X_train, train_labels = load_images_from_folder(train_dir, target_size, color_mode)
    
    # Load test data
    X_test, test_labels = load_images_from_folder(test_dir, target_size, color_mode)
    
    # Combine labels to ensure consistent encoding
    all_labels = np.concatenate([train_labels, test_labels])
    label_encoder = LabelEncoder()
    label_encoder.fit(all_labels)
    
    # Encode both sets of labels
    y_train = label_encoder.transform(train_labels)
    y_test = label_encoder.transform(test_labels)
    
    return X_train, X_test, y_train, y_test, label_encoder

if __name__ == "__main__":
    train_dir = "train_extracted"
    test_dir = "test_extracted"
    target_size = (28, 28) 
    color_mode = 'grayscale' 

    # Load and prepare data
    X_train, X_test, Y_train, Y_test, label_encoder = prepare_fixed_split(
        train_dir, test_dir, target_size, color_mode
    )

    # Verify shapes and classes
    print(f"Training data shape: {X_train.shape}")
    print(f"Training labels shape: {y_train.shape}")
    print(f"Test data shape: {X_test.shape}")
    print(f"Test labels shape: {y_test.shape}")
    print(f"Number of classes: {len(label_encoder.classes_)}")
    print(f"Class names: {label_encoder.classes_}")

Training data shape: (39426, 28, 28, 1)
Training labels shape: (39426,)
Test data shape: (9988, 28, 28, 1)
Test labels shape: (9988,)
Number of classes: 36
Class names: ['0' '1' '2' '3' '4' '5' '6' '7' '8' '9' 'a' 'b' 'c' 'd' 'e' 'f' 'g' 'h'
 'i' 'j' 'k' 'l' 'm' 'n' 'o' 'p' 'q' 'r' 's' 't' 'u' 'v' 'w' 'x' 'y' 'z']


Code to load Pooja's extracted characters

In [2]:
# Code to load Pooja's extracted characters

import os
import cv2
import numpy as np
from sklearn.preprocessing import LabelEncoder

def load_images_from_folder(folder_path, target_size, color_mode):
    images = []
    labels = []
    valid_extensions = ['.jpg', '.jpeg', '.JPG', '.JPEG','.png']
    channels = 1 if color_mode == 'grayscale' else 3

    for label in os.listdir(folder_path):
        sub_folder_path = os.path.join(folder_path, label)
        for filename in os.listdir(sub_folder_path):
            if any(filename.endswith(ext) for ext in valid_extensions):
                file_path = os.path.join(sub_folder_path, filename)
                try:
                    # Read image
                    if color_mode == 'grayscale':
                        image = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE)
                    else:
                        image = cv2.imread(file_path)
                        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                    
                    # Resize and normalize
                    image = cv2.resize(image, target_size)
                    image = image.astype('float32') / 255.0
                    
                    # Add channel dimension if grayscale
                    if channels == 1:
                        image = np.expand_dims(image, axis=-1)
                    
                    # Store image and label
                    images.append(image)
                    labels.append(label)
                    
                except Exception as e:
                    print(f"Error processing {filename}: {str(e)}")
                    continue
                
    return np.array(images), np.array(labels)

def prepare_fixed_split(train_dir, test_dir, target_size=(28, 28), color_mode='grayscale'):

    # Load training data
    X_train, train_labels = load_images_from_folder(train_dir, target_size, color_mode)
    
    # Load test data
    X_test, test_labels = load_images_from_folder(test_dir, target_size, color_mode)
    
    # Combine labels to ensure consistent encoding
    all_labels = np.concatenate([train_labels, test_labels])
    label_encoder = LabelEncoder()
    label_encoder.fit(all_labels)
    
    # Encode both sets of labels
    y_train = label_encoder.transform(train_labels)
    y_test = label_encoder.transform(test_labels)
    
    return X_train, X_test, y_train, y_test, label_encoder

if __name__ == "__main__":
    train_dir = "output-train-by-char"
    test_dir = "output-test"
    target_size = (28, 28) 
    color_mode = 'grayscale' 

    # Load and prepare data
    X_train, X_test, Y_train, Y_test, label_encoder = prepare_fixed_split(
        train_dir, test_dir, target_size, color_mode
    )

    # Verify shapes and classes
    print(f"Training data shape: {X_train.shape}")
    print(f"Training labels shape: {Y_train.shape}")
    print(f"Test data shape: {X_test.shape}")
    print(f"Test labels shape: {Y_test.shape}")
    print(f"Number of classes: {len(label_encoder.classes_)}")
    print(f"Class names: {label_encoder.classes_}")

Training data shape: (42136, 28, 28, 1)
Training labels shape: (42136,)
Test data shape: (9021, 28, 28, 1)
Test labels shape: (9021,)
Number of classes: 36
Class names: ['0' '1' '2' '3' '4' '5' '6' '7' '8' '9' 'a' 'b' 'c' 'd' 'e' 'f' 'g' 'h'
 'i' 'j' 'k' 'l' 'm' 'n' 'o' 'p' 'q' 'r' 's' 't' 'u' 'v' 'w' 'x' 'y' 'z']


Check loaded data:

In [3]:
print(f"Training data shape: {X_train.shape}")
print(f"Training labels shape: {Y_train.shape}")
print(f"Test data shape: {X_test.shape}")
print(f"Test labels shape: {Y_test.shape}")
print(f"Number of classes: {len(label_encoder.classes_)}")
print(f"Class names: {label_encoder.classes_}")

Training data shape: (42136, 28, 28, 1)
Training labels shape: (42136,)
Test data shape: (9021, 28, 28, 1)
Test labels shape: (9021,)
Number of classes: 36
Class names: ['0' '1' '2' '3' '4' '5' '6' '7' '8' '9' 'a' 'b' 'c' 'd' 'e' 'f' 'g' 'h'
 'i' 'j' 'k' 'l' 'm' 'n' 'o' 'p' 'q' 'r' 's' 't' 'u' 'v' 'w' 'x' 'y' 'z']


Baseline Model

In [None]:
# TODO: Create new conda env for this. Default one having issue with keras

from tf_keras.models import Sequential
from tf_keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tf_keras.preprocessing.image import ImageDataGenerator
from tf_keras.callbacks import EarlyStopping, ReduceLROnPlateau
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.utils import shuffle
import seaborn as sns

# Shuffle data before split so that the classes are not completely absent from either side
X_train, Y_train = shuffle(X_train, Y_train, random_state=42)

train_datagen = ImageDataGenerator(
    rotation_range=15,      # ±15 degree rotation
    width_shift_range=0.1,  # 10% horizontal shift
    height_shift_range=0.1, # 10% vertical shift
    zoom_range=0.1,         # 10% zoom
    shear_range=0.1,        # 10% shear
    validation_split=0.2,
)

# Use flow_from_dataframe if you have structured data
train_generator = train_datagen.flow(
    X_train, Y_train,
    batch_size=32,
    subset="training"
)

val_generator = train_datagen.flow(
    X_train, Y_train,
    batch_size=32,
    subset="validation"
)


def create_model(input_shape, num_classes):
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        MaxPooling2D((2, 2)),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        
        # Classifier
        Flatten(),
        Dense(128, activation='relu'),
        Dense(num_classes, activation='softmax')
    ])
    
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    return model


num_classes = len(label_encoder.classes_)
input_shape = X_train.shape[1:]
model = create_model(input_shape=input_shape, num_classes=num_classes)
model.summary()

callbacks = [
    EarlyStopping(patience=5, restore_best_weights=True),
    ReduceLROnPlateau(factor=0.1, patience=3)
]

history = model.fit(train_generator,
                    epochs=50,
                    batch_size=32,
                    validation_data=val_generator)

# Evaluate on test set
test_loss, test_acc = model.evaluate(X_test, Y_test, verbose=2)
print(f"\nTest accuracy: {test_acc:.4f}")
print(f"Test loss: {test_loss:.4f}")

# Generate predictions
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)

# Classification report
print("\nClassification Report:")
print(classification_report(Y_test, y_pred_classes, 
                           target_names=label_encoder.classes_))

# Confusion matrix
plt.figure(figsize=(10,8))
cm = confusion_matrix(Y_test, y_pred_classes)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=label_encoder.classes_,
            yticklabels=label_encoder.classes_)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()

# Plot training history
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

# Save the model
model.save('character_recognition_model.keras')


Pretrained CNN model on EMNIST before fine tuning on dataset

In [None]:
# TODO: Create new conda env for this. Default one having issue with keras
from tf_keras.models import Sequential
from tf_keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tf_keras.preprocessing.image import ImageDataGenerator
from tf_keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tf_keras.utils import to_categorical
from tf_keras.optimizers import Adam
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.utils import shuffle
import seaborn as sns

def create_model(input_shape, num_classes):
    model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        MaxPooling2D((2, 2)),
        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Conv2D(128, (3, 3), activation='relu'),

        Flatten(),
        Dense(128, activation='relu'),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer=Adam(learning_rate=1e-3),
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    return model


num_classes = len(label_encoder.classes_)
input_shape = X_train.shape[1:]
model = create_model(input_shape=input_shape, num_classes=num_classes)
model.summary()

callbacks = [
    EarlyStopping(patience=5, restore_best_weights=True),
    ReduceLROnPlateau(factor=0.1, patience=3)
]



import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

def load_emnist_from_csv(csv_path):
    """Load EMNIST data from CSV file"""
    df = pd.read_csv(csv_path, header=None)
    labels = df.iloc[:, 0].values
    images = df.iloc[:, 1:].values
    return images, labels

def preprocess_emnist(images, labels):
    """Preprocess EMNIST images and labels"""
    # Reshape to 28x28 and transpose (EMNIST images are rotated)
    images = images.reshape((-1, 28, 28)).transpose(0, 2, 1)
    
    # Flip images to correct orientation
    # images = np.array([np.fliplr(img) for img in images])
    
    # Normalize pixel values
    images = images.astype('float32') / 255.0
    
    # Add channel dimension (for CNN input)
    images = np.expand_dims(images, axis=-1)
    
    return images, labels

def create_label_mapping():
    # For merged case (36 classes: 0-9 + A-Z)
    label_names = [str(i) for i in range(10)]  # Digits 0-9
    label_names += [chr(65 + i) for i in range(26)]  # A-Z
    label_names += [chr(97 + i) for i in range(26)]  # a-z
    return label_names


# Create mapping (assuming you've merged cases as before)
label_mapping = create_label_mapping()  

# Example usage with your data
def convert_labels(y_numerical):
    """Convert numerical labels to character labels"""
    return [label_mapping[label] for label in y_numerical]

def merge_case_labels(y_original):
    """Convert labels so uppercase and lowercase of same letter have same class"""
    y_new = y_original.copy()
    
    # Digits 0-9 remain unchanged (labels 0-9)
    # Letters A-Z and a-z need to be merged
    
    # For uppercase letters (A-Z is 10-35 in original labels)
    upper_case = (y_original >= 10) & (y_original <= 35)
    # Convert to 10-35 (same as original)
    
    # For lowercase letters (a-z is 36-61 in original labels)
    lower_case = (y_original >= 36) & (y_original <= 61)
    # Convert to 10-35 (matching their uppercase counterparts)
    y_new[upper_case] = y_new[upper_case] + 26
    
    return y_new


# Load training data
train_images, train_labels = load_emnist_from_csv('emnist-byclass-train.csv')
train_images, train_labels = preprocess_emnist(train_images, train_labels)

# Load test data
test_images, test_labels = load_emnist_from_csv('emnist-byclass-test.csv')
test_images, test_labels = preprocess_emnist(test_images, test_labels)

# Apply to both train and test sets
train_labels = np.array(merge_case_labels(train_labels))
test_labels = np.array(merge_case_labels(test_labels))

#Convert labels
train_labels = np.array(convert_labels(train_labels))
test_labels = np.array(convert_labels(test_labels))

train_labels = label_encoder.transform(train_labels)
test_labels = label_encoder.transform(test_labels)

# Now we have 36 classes (0-9 digits + 26 letters)
num_classes = 36

# Create and train model (using num_classes=36 now)
model.fit(train_images, train_labels,
          epochs=20,
          batch_size=128,
          validation_data=(test_images, test_labels))

test_loss, test_acc = model.evaluate(X_test, Y_test, verbose=2)
print(f"\nTest accuracy: {test_acc:.4f}")
print(f"Test loss: {test_loss:.4f}")

# Generate predictions
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)

# Classification report
print("\nClassification Report:")
print(classification_report(Y_test, y_pred_classes, 
                           target_names=label_encoder.classes_))



# Shuffle data before split so that the classes are not completely absent from either side
X_train, Y_train = shuffle(X_train, Y_train, random_state=42)

train_datagen = ImageDataGenerator(
    rotation_range=15,      # ±15 degree rotation
    width_shift_range=0.1,  # 10% horizontal shift
    height_shift_range=0.1, # 10% vertical shift
    zoom_range=0.1,         # 10% zoom
    shear_range=0.1,        # 10% shear
    validation_split=0.2,
)

# Use flow_from_dataframe if you have structured data
train_generator = train_datagen.flow(
    X_train, Y_train,
    batch_size=32,
    subset="training"
)

val_generator = train_datagen.flow(
    X_train, Y_train,
    batch_size=32,
    subset="validation"
)

# 1. Freeze Convolutional Base
for layer in model.layers[:4]:  # First two conv-pool blocks
    layer.trainable = False
for layer in model.layers[4:]:  # Last conv block and dense layers
    layer.trainable = True

# # 2. Modify the Top Layers
# # Remove original classification head (last 2 layers)
# model.pop()  # Remove output layer
# model.pop()  # Remove dense layer

# # Add new dense layers for custom dataset
# model.add(Dense(64, activation='relu', name='new_dense'))
# model.add(Dense(num_classes, activation='softmax', name='new_output'))

# 3. Recompile with Lower Learning Rate
model.compile(optimizer=Adam(learning_rate=1e-4),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# 4. Display Trainable Layers
for i, layer in enumerate(model.layers):
    print(f"Layer {i}: {layer.name} - Trainable: {layer.trainable}")

# 5. Fine-Tune on Custom Data
history = model.fit(
    train_generator,
    epochs=100,
    batch_size=32,
    validation_data=val_generator,
    callbacks=callbacks
)


# history = model.fit(train_generator,
#                     epochs=50,
#                     batch_size=32,
#                     validation_data=val_generator)

# Evaluate on test set
test_loss, test_acc = model.evaluate(X_test, Y_test, verbose=2)
print(f"\nTest accuracy: {test_acc:.4f}")
print(f"Test loss: {test_loss:.4f}")

# Generate predictions
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)

# Classification report
print("\nClassification Report:")
print(classification_report(Y_test, y_pred_classes, 
                           target_names=label_encoder.classes_))

# Confusion matrix
plt.figure(figsize=(10,8))
cm = confusion_matrix(Y_test, y_pred_classes)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=label_encoder.classes_,
            yticklabels=label_encoder.classes_)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()

# Plot training history
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

# Save the model
model.save('pretrained_character_recognition_model.keras')


Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_9 (Conv2D)           (None, 26, 26, 32)        320       
                                                                 
 max_pooling2d_6 (MaxPoolin  (None, 13, 13, 32)        0         
 g2D)                                                            
                                                                 
 conv2d_10 (Conv2D)          (None, 11, 11, 64)        18496     
                                                                 
 max_pooling2d_7 (MaxPoolin  (None, 5, 5, 64)          0         
 g2D)                                                            
                                                                 
 conv2d_11 (Conv2D)          (None, 3, 3, 128)         73856     
                                                                 
 flatten_3 (Flatten)         (None, 1152)             