In [1]:
import os
import cv2
import numpy as np
import random
from tqdm import tqdm
import tensorflow as tf
from keras import layers, models, losses, metrics, regularizers, optimizers
from sklearn.model_selection import train_test_split
from PIL import Image

2025-04-16 00:55:09.671897: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-04-16 00:55:10.152471: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1744736110.417540  532609 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1744736110.487795  532609 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1744736110.995896  532609 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

In [2]:
# Global constant for the character set
CHARSET = "0123456789abcdefghijklmnopqrstuvwxyz"

In [3]:
def resize_and_pad_image(img, target_size):
    height, width = img.shape[0], img.shape[1]

    # Calculate scale to fit the longer side into target_size
    scale = target_size / max(height, width)
    new_height = int(height * scale)
    new_width = int(width * scale)

    # Resize with aspect ratio
    resized = cv2.resize(img, (new_width, new_height), interpolation=cv2.INTER_LINEAR)

    # Pad the image to target_size
    pad_height = target_size - new_height
    pad_width = target_size - new_width
    top = pad_height // 2
    bottom = pad_height - top
    left = pad_width // 2
    right = pad_width - left

    padded = cv2.copyMakeBorder(resized, top, bottom, left, right, cv2.BORDER_CONSTANT, value=0)

    return padded

## Original dataset

In [4]:
# -----------------------------------------------------------------------------
# 1) Load Segmented Images
# -----------------------------------------------------------------------------
def load_segmented_images(folder_path, test_size=0.1):
    X_list, Y_list = [], []
    test_dict = dict()
    captcha_names = os.listdir(folder_path)
    train_names, test_names = train_test_split(captcha_names, test_size=test_size, random_state=42)
    for captcha_name in captcha_names:
        captcha_dir = os.path.join(folder_path, captcha_name)
        if not os.path.isdir(captcha_dir) or len(captcha_name) == 0:
            continue
        image_files = [fname for fname in os.listdir(captcha_dir)
                if fname.lower().endswith(".png") and fname.startswith("char_")]
        for filename in image_files:
            try:
                char_index = int(filename.split("_")[1].split(".")[0])
            except Exception:
                continue
            if char_index >= len(captcha_name):
                continue
            label_char = captcha_name[char_index]
            if label_char not in CHARSET:
                continue
            label_idx = CHARSET.index(label_char)
            img_path = os.path.join(captcha_dir, filename)
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
            if img is None:
                continue
            
            img = resize_and_pad_image(img, 32)
            img = img.astype(np.float32) / 255.0
            img = np.expand_dims(img, axis=-1)
            label = np.zeros(len(CHARSET), dtype=np.float32)
            label[label_idx] = 1.0

            if captcha_name in train_names:
                X_list.append(img)
                Y_list.append(label)

            else:
                test_dict.setdefault(captcha_name, ([], []))
                test_dict[captcha_name][0].append(img)
                test_dict[captcha_name][1].append(label)
    return X_list, Y_list, test_dict

In [None]:
# Set seeds for reproducibility
np.random.seed(42)
random.seed(42)
tf.random.set_seed(42)

# Load and split segmented data
segment_folder = "segmented_new"
X_list, Y_list, test_dict = load_segmented_images(segment_folder)

X_train, X_val, Y_train, Y_val = train_test_split(X_list, Y_list, test_size=0.1, random_state=42)

X_train = np.array(X_train)
Y_train = np.array(Y_train)
X_val = np.array(X_val)
Y_val = np.array(Y_val)

print(f"Loaded {len(X_train)} training characters, {len(X_val)} validation characters and {len(test_dict)} test strings.")

In [None]:
display(Image.fromarray(X_val[1000].reshape(32, 32) * 255).convert("L"))

In [5]:
# -----------------------------------------------------------------------------
# 2) Enhanced Model Architecture (without TensorFlow Addons)
# -----------------------------------------------------------------------------
def build_model(num_classes=36):
    inputs = layers.Input(shape=(32, 32, 1))
    
    # Augmentation layers
    data_augmentation = tf.keras.Sequential([
        layers.RandomRotation(0.1),         
        layers.RandomZoom(0.1),           
        layers.RandomTranslation(0.1, 0.1),   
        layers.RandomContrast(0.2),        
        layers.GaussianNoise(0.1)          
    ])

    x = data_augmentation(inputs)
    
    # Stem block  
    x = layers.Conv2D(128, kernel_size=3, strides=1, padding='same', activation='relu', kernel_regularizer=regularizers.l2(1e-5))(x)
    x = layers.BatchNormalization()(x)

    x = layers.Conv2D(128, kernel_size=3, strides=1, padding='same', activation='relu', kernel_regularizer=regularizers.l2(1e-5))(x)
    x = layers.BatchNormalization()(x)

    x = layers.Conv2D(128, kernel_size=3, strides=1, padding='same', activation='relu', kernel_regularizer=regularizers.l2(1e-5))(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D(pool_size=2)(x)
    x = layers.Dropout(0.2)(x)

    x = layers.Conv2D(256, kernel_size=3, strides=1, padding='same', activation='relu', kernel_regularizer=regularizers.l2(1e-5))(x)
    x = layers.BatchNormalization()(x)
    
    x = layers.Conv2D(256, kernel_size=3, strides=1, padding='same', activation='relu', kernel_regularizer=regularizers.l2(1e-5))(x)
    x = layers.BatchNormalization()(x)

    x = layers.Conv2D(256, kernel_size=3, strides=1, padding='same', activation='relu', kernel_regularizer=regularizers.l2(1e-5))(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D(pool_size=2)(x)
    x = layers.Dropout(0.2)(x)

    x = layers.Conv2D(512, kernel_size=3, strides=1, padding='same', activation='relu', kernel_regularizer=regularizers.l2(1e-5))(x)
    x = layers.BatchNormalization()(x)

    x = layers.Conv2D(512, kernel_size=3, strides=1, padding='same', activation='relu', kernel_regularizer=regularizers.l2(1e-5))(x)
    x = layers.BatchNormalization()(x)

    x = layers.Conv2D(512, kernel_size=3, strides=1, padding='same', activation='relu', kernel_regularizer=regularizers.l2(1e-5))(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D(pool_size=2)(x)
    x = layers.Dropout(0.2)(x)

    x = layers.Conv2D(1024, kernel_size=3, strides=1, padding='same', activation='relu', kernel_regularizer=regularizers.l2(1e-5))(x)
    x = layers.BatchNormalization()(x)

    x = layers.Conv2D(1024, kernel_size=3, strides=1, padding='same', activation='relu', kernel_regularizer=regularizers.l2(1e-5))(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D(pool_size=2)(x)
    x = layers.Dropout(0.2)(x)
    
    # Head
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(512, activation='relu', kernel_regularizer=regularizers.l2(1e-5))(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.2)(x)
    x = layers.Dense(128, activation='relu', kernel_regularizer=regularizers.l2(1e-5))(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.2)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)
    
    model = models.Model(inputs=inputs, outputs=outputs)
    
    # Custom learning rate schedule
    # lr_schedule = optimizers.schedules.ExponentialDecay(
    #     initial_learning_rate=0.001,
    #     decay_steps=10000,
    #     decay_rate=0.9)
    
    optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3, weight_decay=1e-6)
    
    model.compile(
        optimizer=optimizer,
        loss=losses.CategoricalCrossentropy(label_smoothing=0.1),
        metrics=[metrics.CategoricalAccuracy()])
    
    return model

In [None]:
# Build model
model = build_model(num_classes=len(CHARSET))
model.summary()

# Enhanced callbacks
callbacks = [
tf.keras.callbacks.ModelCheckpoint(
    'best_model.h5', save_best_only=True, monitor='val_categorical_accuracy'),
tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss', factor=0.5, patience=2, min_lr=1e-6),
tf.keras.callbacks.EarlyStopping(
    monitor='val_categorical_accuracy', patience=10, 
    restore_best_weights=True, mode='max', baseline=0.4),
tf.keras.callbacks.TensorBoard(log_dir='./logs')
]

# Train with class weights
class_counts = np.sum(Y_train, axis=0)
class_weights = {i: 1.0 / (count + 1e-5) for i, count in enumerate(class_counts)}
class_weights = {k: v / sum(class_weights.values()) * len(CHARSET) for k, v in class_weights.items()}

In [None]:
history = model.fit(
X_train, Y_train,
epochs=100,
batch_size=128,
validation_data=(X_val, Y_val),
callbacks=callbacks,
class_weight=class_weights)

In [6]:
# -----------------------------------------------------------------------------
# 3) Evaluation Function
# -----------------------------------------------------------------------------
def evaluate_captcha(model, test_dict):
    total_chars = 0
    correct_chars = 0
    total_strings = len(test_dict)
    correct_strings = 0
    tqdm_bar = tqdm(total=total_strings, desc="Evaluating CAPTCHA strings")

    for captcha_name, (X_data, Y_data) in test_dict.items():
        total_chars += len(X_data)
        
        pred = model.predict(np.array(X_data), verbose=0)
        pred_labels = np.argmax(pred, axis=1)
        true_labels = np.argmax(np.array(Y_data), axis=1)
        
        is_string_correct = True
        for i in range(len(pred_labels)):
            if pred_labels[i] == true_labels[i]:
                correct_chars += 1
            else:
                is_string_correct = False
        if is_string_correct:
            correct_strings += 1
        tqdm_bar.update(1)
    
    char_accuracy = correct_chars / total_chars * 100
    string_accuracy = correct_strings / total_strings * 100
    print(f"\nCharacter Accuracy: {char_accuracy:.2f}%")
    print(f"String Accuracy: {string_accuracy:.2f}%")

## Results (model trained on original dataset)

In [12]:
# Evaluation
print("Final Evaluation:")
evaluate_captcha(model, test_dict)


Final Evaluation:


Evaluating CAPTCHA strings: 100%|█████████████| 722/722 [00:35<00:00, 20.08it/s]


Character Accuracy: 92.43%
String Accuracy: 67.45%





In [16]:
### Evaluate on generated images

print("Evaluating on GAN images:")
_, _, test_dict_generated = load_segmented_images("segmented_generated", test_size=0.999)
evaluate_captcha(model, test_dict_generated)

Loaded 4917 test strings.


Evaluating CAPTCHA strings: 100%|███████████| 4917/4917 [04:01<00:00, 20.36it/s]


Character Accuracy: 89.78%
String Accuracy: 57.96%





## Training with GAN generated images

In [27]:
X_list_ori, Y_list_ori, test_dict_ori = load_segmented_images("segmented_new")
X_list_gan, Y_list_gan, test_dict_gan = load_segmented_images("segmented_generated")

X_list_ori.extend(X_list_gan)
Y_list_ori.extend(Y_list_gan)

X_train, X_val, Y_train, Y_val = train_test_split(X_list_ori, Y_list_ori, test_size=0.1, random_state=42)

X_train = np.array(X_train)
Y_train = np.array(Y_train)
X_val = np.array(X_val)
Y_val = np.array(Y_val)

print(f"Loaded {len(X_train)} training characters from original and generated, \
{len(X_val)} validation characters from original and generated; \
{len(test_dict_ori)} test strings from original, \
{len(test_dict_gan)} test strings from generated.")

Loaded 58499 training characters, 6500 validation characters; 722 test strings from original, 493 test strings from GAN.


In [30]:
# Build model
model = build_model(num_classes=len(CHARSET))

# Enhanced callbacks
callbacks = [
tf.keras.callbacks.ModelCheckpoint(
    'best_model_on_combined_data.h5', save_best_only=True, monitor='val_categorical_accuracy'),
tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss', factor=0.5, patience=2, min_lr=1e-6),
tf.keras.callbacks.EarlyStopping(
    monitor='val_categorical_accuracy', patience=10, 
    restore_best_weights=True, mode='max', baseline=0.4),
tf.keras.callbacks.TensorBoard(log_dir='./logs')
]

# Train with class weights
class_counts = np.sum(Y_train, axis=0)
class_weights = {i: 1.0 / (count + 1e-5) for i, count in enumerate(class_counts)}
class_weights = {k: v / sum(class_weights.values()) * len(CHARSET) for k, v in class_weights.items()}

In [31]:
history = model.fit(
X_train, Y_train,
epochs=100,
batch_size=128,
validation_data=(X_val, Y_val),
callbacks=callbacks,
class_weight=class_weights)

Epoch 1/100
[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step - categorical_accuracy: 0.3903 - loss: 2.5682



[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 34ms/step - categorical_accuracy: 0.3907 - loss: 2.5669 - val_categorical_accuracy: 0.3857 - val_loss: 2.8725 - learning_rate: 0.0010
Epoch 2/100
[1m457/458[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 28ms/step - categorical_accuracy: 0.6615 - loss: 1.6855



[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 31ms/step - categorical_accuracy: 0.6617 - loss: 1.6847 - val_categorical_accuracy: 0.8091 - val_loss: 1.3063 - learning_rate: 0.0010
Epoch 3/100
[1m457/458[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 28ms/step - categorical_accuracy: 0.7336 - loss: 1.4877



[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 31ms/step - categorical_accuracy: 0.7338 - loss: 1.4873 - val_categorical_accuracy: 0.8394 - val_loss: 1.2253 - learning_rate: 0.0010
Epoch 4/100
[1m457/458[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 29ms/step - categorical_accuracy: 0.7851 - loss: 1.3663



[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 31ms/step - categorical_accuracy: 0.7852 - loss: 1.3661 - val_categorical_accuracy: 0.8694 - val_loss: 1.1434 - learning_rate: 0.0010
Epoch 5/100
[1m457/458[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 29ms/step - categorical_accuracy: 0.8273 - loss: 1.2575



[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 31ms/step - categorical_accuracy: 0.8273 - loss: 1.2574 - val_categorical_accuracy: 0.8785 - val_loss: 1.1183 - learning_rate: 0.0010
Epoch 6/100
[1m457/458[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 29ms/step - categorical_accuracy: 0.8359 - loss: 1.2406



[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 31ms/step - categorical_accuracy: 0.8359 - loss: 1.2405 - val_categorical_accuracy: 0.8809 - val_loss: 1.1177 - learning_rate: 0.0010
Epoch 7/100
[1m457/458[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 29ms/step - categorical_accuracy: 0.8512 - loss: 1.1940



[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 31ms/step - categorical_accuracy: 0.8513 - loss: 1.1940 - val_categorical_accuracy: 0.8852 - val_loss: 1.1041 - learning_rate: 0.0010
Epoch 8/100
[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 29ms/step - categorical_accuracy: 0.8542 - loss: 1.1962 - val_categorical_accuracy: 0.8738 - val_loss: 1.1519 - learning_rate: 0.0010
Epoch 9/100
[1m457/458[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 29ms/step - categorical_accuracy: 0.8609 - loss: 1.1844



[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 31ms/step - categorical_accuracy: 0.8609 - loss: 1.1844 - val_categorical_accuracy: 0.8923 - val_loss: 1.1030 - learning_rate: 0.0010
Epoch 10/100
[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 30ms/step - categorical_accuracy: 0.8656 - loss: 1.1855 - val_categorical_accuracy: 0.8834 - val_loss: 1.1430 - learning_rate: 0.0010
Epoch 11/100
[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 29ms/step - categorical_accuracy: 0.8508 - loss: 1.2382 - val_categorical_accuracy: 0.8842 - val_loss: 1.1605 - learning_rate: 0.0010
Epoch 12/100
[1m457/458[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 29ms/step - categorical_accuracy: 0.8833 - loss: 1.1587



[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 31ms/step - categorical_accuracy: 0.8833 - loss: 1.1587 - val_categorical_accuracy: 0.9132 - val_loss: 1.0713 - learning_rate: 5.0000e-04
Epoch 13/100
[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 29ms/step - categorical_accuracy: 0.8934 - loss: 1.1228 - val_categorical_accuracy: 0.9125 - val_loss: 1.0676 - learning_rate: 5.0000e-04
Epoch 14/100
[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 29ms/step - categorical_accuracy: 0.8940 - loss: 1.1216 - val_categorical_accuracy: 0.9129 - val_loss: 1.0696 - learning_rate: 5.0000e-04
Epoch 15/100
[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 30ms/step - categorical_accuracy: 0.8958 - loss: 1.1168 - val_categorical_accuracy: 0.9117 - val_loss: 1.0755 - learning_rate: 5.0000e-04
Epoch 16/100
[1m457/458[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 29ms/step - categorical_accuracy: 0.9079 - loss: 1.0811



[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 31ms/step - categorical_accuracy: 0.9080 - loss: 1.0810 - val_categorical_accuracy: 0.9243 - val_loss: 1.0384 - learning_rate: 2.5000e-04
Epoch 17/100
[1m457/458[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 28ms/step - categorical_accuracy: 0.9131 - loss: 1.0612



[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 31ms/step - categorical_accuracy: 0.9131 - loss: 1.0612 - val_categorical_accuracy: 0.9246 - val_loss: 1.0327 - learning_rate: 2.5000e-04
Epoch 18/100
[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 30ms/step - categorical_accuracy: 0.9124 - loss: 1.0604 - val_categorical_accuracy: 0.9203 - val_loss: 1.0358 - learning_rate: 2.5000e-04
Epoch 19/100
[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 30ms/step - categorical_accuracy: 0.9146 - loss: 1.0459 - val_categorical_accuracy: 0.9246 - val_loss: 1.0230 - learning_rate: 2.5000e-04
Epoch 20/100
[1m457/458[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 29ms/step - categorical_accuracy: 0.9196 - loss: 1.0363



[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 31ms/step - categorical_accuracy: 0.9196 - loss: 1.0363 - val_categorical_accuracy: 0.9258 - val_loss: 1.0158 - learning_rate: 2.5000e-04
Epoch 21/100
[1m457/458[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 29ms/step - categorical_accuracy: 0.9198 - loss: 1.0286



[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 31ms/step - categorical_accuracy: 0.9198 - loss: 1.0286 - val_categorical_accuracy: 0.9278 - val_loss: 1.0131 - learning_rate: 2.5000e-04
Epoch 22/100
[1m457/458[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 29ms/step - categorical_accuracy: 0.9241 - loss: 1.0201



[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 31ms/step - categorical_accuracy: 0.9241 - loss: 1.0201 - val_categorical_accuracy: 0.9292 - val_loss: 1.0050 - learning_rate: 2.5000e-04
Epoch 23/100
[1m457/458[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 29ms/step - categorical_accuracy: 0.9213 - loss: 1.0235



[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 31ms/step - categorical_accuracy: 0.9213 - loss: 1.0234 - val_categorical_accuracy: 0.9303 - val_loss: 1.0043 - learning_rate: 2.5000e-04
Epoch 24/100
[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 30ms/step - categorical_accuracy: 0.9241 - loss: 1.0152 - val_categorical_accuracy: 0.9275 - val_loss: 1.0049 - learning_rate: 2.5000e-04
Epoch 25/100
[1m457/458[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 29ms/step - categorical_accuracy: 0.9274 - loss: 1.0053



[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 31ms/step - categorical_accuracy: 0.9274 - loss: 1.0052 - val_categorical_accuracy: 0.9306 - val_loss: 1.0048 - learning_rate: 2.5000e-04
Epoch 26/100
[1m457/458[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 28ms/step - categorical_accuracy: 0.9306 - loss: 0.9942



[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 31ms/step - categorical_accuracy: 0.9306 - loss: 0.9942 - val_categorical_accuracy: 0.9329 - val_loss: 0.9900 - learning_rate: 1.2500e-04
Epoch 27/100
[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 30ms/step - categorical_accuracy: 0.9340 - loss: 0.9871 - val_categorical_accuracy: 0.9322 - val_loss: 0.9877 - learning_rate: 1.2500e-04
Epoch 28/100
[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 29ms/step - categorical_accuracy: 0.9391 - loss: 0.9680 - val_categorical_accuracy: 0.9317 - val_loss: 0.9862 - learning_rate: 1.2500e-04
Epoch 29/100
[1m457/458[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 29ms/step - categorical_accuracy: 0.9407 - loss: 0.9644



[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 31ms/step - categorical_accuracy: 0.9407 - loss: 0.9644 - val_categorical_accuracy: 0.9332 - val_loss: 0.9822 - learning_rate: 1.2500e-04
Epoch 30/100
[1m457/458[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 29ms/step - categorical_accuracy: 0.9421 - loss: 0.9583



[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 31ms/step - categorical_accuracy: 0.9421 - loss: 0.9583 - val_categorical_accuracy: 0.9342 - val_loss: 0.9781 - learning_rate: 1.2500e-04
Epoch 31/100
[1m457/458[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 29ms/step - categorical_accuracy: 0.9431 - loss: 0.9540



[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 31ms/step - categorical_accuracy: 0.9431 - loss: 0.9540 - val_categorical_accuracy: 0.9360 - val_loss: 0.9775 - learning_rate: 1.2500e-04
Epoch 32/100
[1m457/458[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 28ms/step - categorical_accuracy: 0.9427 - loss: 0.9511



[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 31ms/step - categorical_accuracy: 0.9427 - loss: 0.9511 - val_categorical_accuracy: 0.9372 - val_loss: 0.9735 - learning_rate: 1.2500e-04
Epoch 33/100
[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 30ms/step - categorical_accuracy: 0.9436 - loss: 0.9487 - val_categorical_accuracy: 0.9348 - val_loss: 0.9767 - learning_rate: 1.2500e-04
Epoch 34/100
[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 29ms/step - categorical_accuracy: 0.9457 - loss: 0.9434 - val_categorical_accuracy: 0.9365 - val_loss: 0.9739 - learning_rate: 1.2500e-04
Epoch 35/100
[1m457/458[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 29ms/step - categorical_accuracy: 0.9476 - loss: 0.9352



[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 31ms/step - categorical_accuracy: 0.9476 - loss: 0.9352 - val_categorical_accuracy: 0.9400 - val_loss: 0.9617 - learning_rate: 6.2500e-05
Epoch 36/100
[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 30ms/step - categorical_accuracy: 0.9497 - loss: 0.9259 - val_categorical_accuracy: 0.9391 - val_loss: 0.9613 - learning_rate: 6.2500e-05
Epoch 37/100
[1m457/458[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 29ms/step - categorical_accuracy: 0.9506 - loss: 0.9245



[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 31ms/step - categorical_accuracy: 0.9506 - loss: 0.9244 - val_categorical_accuracy: 0.9402 - val_loss: 0.9569 - learning_rate: 6.2500e-05
Epoch 38/100
[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 30ms/step - categorical_accuracy: 0.9523 - loss: 0.9219 - val_categorical_accuracy: 0.9375 - val_loss: 0.9630 - learning_rate: 6.2500e-05
Epoch 39/100
[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 30ms/step - categorical_accuracy: 0.9535 - loss: 0.9171 - val_categorical_accuracy: 0.9388 - val_loss: 0.9585 - learning_rate: 6.2500e-05
Epoch 40/100
[1m457/458[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 29ms/step - categorical_accuracy: 0.9536 - loss: 0.9121



[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 31ms/step - categorical_accuracy: 0.9536 - loss: 0.9121 - val_categorical_accuracy: 0.9406 - val_loss: 0.9544 - learning_rate: 3.1250e-05
Epoch 41/100
[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 29ms/step - categorical_accuracy: 0.9540 - loss: 0.9107 - val_categorical_accuracy: 0.9402 - val_loss: 0.9522 - learning_rate: 3.1250e-05
Epoch 42/100
[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 29ms/step - categorical_accuracy: 0.9571 - loss: 0.9046 - val_categorical_accuracy: 0.9400 - val_loss: 0.9529 - learning_rate: 3.1250e-05
Epoch 43/100
[1m457/458[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 29ms/step - categorical_accuracy: 0.9569 - loss: 0.9035



[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 31ms/step - categorical_accuracy: 0.9569 - loss: 0.9035 - val_categorical_accuracy: 0.9411 - val_loss: 0.9541 - learning_rate: 3.1250e-05
Epoch 44/100
[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 30ms/step - categorical_accuracy: 0.9566 - loss: 0.9030 - val_categorical_accuracy: 0.9409 - val_loss: 0.9532 - learning_rate: 1.5625e-05
Epoch 45/100
[1m457/458[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 28ms/step - categorical_accuracy: 0.9577 - loss: 0.9025



[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 31ms/step - categorical_accuracy: 0.9577 - loss: 0.9025 - val_categorical_accuracy: 0.9418 - val_loss: 0.9507 - learning_rate: 1.5625e-05
Epoch 46/100
[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 30ms/step - categorical_accuracy: 0.9586 - loss: 0.8995 - val_categorical_accuracy: 0.9406 - val_loss: 0.9512 - learning_rate: 1.5625e-05
Epoch 47/100
[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 30ms/step - categorical_accuracy: 0.9592 - loss: 0.8975 - val_categorical_accuracy: 0.9411 - val_loss: 0.9524 - learning_rate: 1.5625e-05
Epoch 48/100
[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 30ms/step - categorical_accuracy: 0.9600 - loss: 0.8952 - val_categorical_accuracy: 0.9405 - val_loss: 0.9513 - learning_rate: 7.8125e-06
Epoch 49/100
[1m457/458[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 29ms/step - categorical_accuracy: 0.9604 - loss: 0.8953



[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 31ms/step - categorical_accuracy: 0.9603 - loss: 0.8953 - val_categorical_accuracy: 0.9423 - val_loss: 0.9503 - learning_rate: 7.8125e-06
Epoch 50/100
[1m457/458[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 28ms/step - categorical_accuracy: 0.9614 - loss: 0.8934



[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 31ms/step - categorical_accuracy: 0.9614 - loss: 0.8934 - val_categorical_accuracy: 0.9429 - val_loss: 0.9499 - learning_rate: 7.8125e-06
Epoch 51/100
[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 29ms/step - categorical_accuracy: 0.9598 - loss: 0.8942 - val_categorical_accuracy: 0.9426 - val_loss: 0.9489 - learning_rate: 7.8125e-06
Epoch 52/100
[1m457/458[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 29ms/step - categorical_accuracy: 0.9605 - loss: 0.8940



[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 32ms/step - categorical_accuracy: 0.9605 - loss: 0.8940 - val_categorical_accuracy: 0.9431 - val_loss: 0.9496 - learning_rate: 7.8125e-06
Epoch 53/100
[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 30ms/step - categorical_accuracy: 0.9611 - loss: 0.8946 - val_categorical_accuracy: 0.9420 - val_loss: 0.9492 - learning_rate: 7.8125e-06
Epoch 54/100
[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 30ms/step - categorical_accuracy: 0.9606 - loss: 0.8931 - val_categorical_accuracy: 0.9415 - val_loss: 0.9489 - learning_rate: 3.9063e-06
Epoch 55/100
[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 29ms/step - categorical_accuracy: 0.9595 - loss: 0.8932 - val_categorical_accuracy: 0.9425 - val_loss: 0.9482 - learning_rate: 3.9063e-06
Epoch 56/100
[1m458/458[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 30ms/step - categorical_accuracy: 0.9612 - loss: 0.8907 - val

In [32]:
### Trained on combined evaluate on original

evaluate_captcha(model, test_dict_ori)

Evaluating CAPTCHA strings: 100%|█████████████| 722/722 [00:36<00:00, 19.68it/s]


Character Accuracy: 92.20%
String Accuracy: 67.45%





In [33]:
### Trained on combined evaluate on combined

test_dict_combined = test_dict_ori | test_dict_gan
evaluate_captcha(model, test_dict_combined)

Evaluating CAPTCHA strings: 100%|███████████| 1215/1215 [01:00<00:00, 20.23it/s]


Character Accuracy: 94.00%
String Accuracy: 74.07%





## Train with GAN-modified original images

In [20]:
def load_segmented_images_with_GAN_modified(folder_path, gan_modified_folder_path, test_size=0.1):
    X_train_list, X_val_list, Y_train_list, Y_val_list = [], [], [], []
    test_dict = dict()
    captcha_names = os.listdir(folder_path)
    train_names, test_names = train_test_split(captcha_names, test_size=test_size, random_state=42)
    train_names, val_names = train_test_split(train_names, test_size=test_size, random_state=42)
    for captcha_name in captcha_names:
        for path in [folder_path, gan_modified_folder_path]:
            captcha_dir = os.path.join(path, captcha_name)
            if not os.path.isdir(captcha_dir) or len(captcha_name) == 0:
                continue
            image_files = [fname for fname in os.listdir(captcha_dir)
                    if fname.lower().endswith(".png") and fname.startswith("char_")]
            for filename in image_files:
                try:
                    char_index = int(filename.split("_")[1].split(".")[0])
                except Exception:
                    continue
                if char_index >= len(captcha_name):
                    continue
                label_char = captcha_name[char_index]
                if label_char not in CHARSET:
                    continue
                label_idx = CHARSET.index(label_char)
                img_path = os.path.join(captcha_dir, filename)
                img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
                if img is None:
                    continue
                
                img = resize_and_pad_image(img, 32)
                img = img.astype(np.float32) / 255.0
                img = np.expand_dims(img, axis=-1)
                label = np.zeros(len(CHARSET), dtype=np.float32)
                label[label_idx] = 1.0
    
                if captcha_name in train_names:
                    X_train_list.append(img)
                    Y_train_list.append(label)

                # val set should only have original image
                elif captcha_name in val_names and path == folder_path:
                    X_val_list.append(img)
                    Y_val_list.append(label)

                # test set should only have original image
                elif captcha_name in test_names and path == folder_path:
                    test_dict.setdefault(captcha_name, ([], []))
                    test_dict[captcha_name][0].append(img)
                    test_dict[captcha_name][1].append(label)
    return np.array(X_train_list), np.array(X_val_list), np.array(Y_train_list), np.array(Y_val_list), test_dict

In [21]:
X_train, X_val, Y_train, Y_val, test_dict = load_segmented_images_with_GAN_modified("segmented_new", "segmented_ori_modified")

print(f"Loaded {len(X_train)} training characters from original and modified, \
{len(X_val)} validation characters from original only; \
{len(test_dict)} test strings from original only")

Loaded 64517 training characters from original and modified, 3837 validation characters from original only; 722 test strings from original only


In [27]:
# Build model
model = build_model(num_classes=len(CHARSET))

# Enhanced callbacks
callbacks = [
tf.keras.callbacks.ModelCheckpoint(
    'best_model_on_combined_data.h5', save_best_only=True, monitor='val_categorical_accuracy'),
tf.keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss', factor=0.5, patience=2, min_lr=1e-6),
tf.keras.callbacks.EarlyStopping(
    monitor='val_categorical_accuracy', patience=10, 
    restore_best_weights=True, mode='max', baseline=0.4),
tf.keras.callbacks.TensorBoard(log_dir='./logs')
]

# Train with class weights
class_counts = np.sum(Y_train, axis=0)
class_weights = {i: 1.0 / (count + 1e-5) for i, count in enumerate(class_counts)}
class_weights = {k: v / sum(class_weights.values()) * len(CHARSET) for k, v in class_weights.items()}

In [28]:
history = model.fit(
X_train, Y_train,
epochs=100,
batch_size=64,
validation_data=(X_val, Y_val),
callbacks=callbacks,
class_weight=class_weights)

Epoch 1/100


E0000 00:00:1744740839.647553  532609 meta_optimizer.cc:967] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inStatefulPartitionedCall/functional_9_1/dropout_24_1/stateless_dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


[1m1008/1009[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 20ms/step - categorical_accuracy: 0.3955 - loss: 2.5751



[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 21ms/step - categorical_accuracy: 0.3958 - loss: 2.5740 - val_categorical_accuracy: 0.7967 - val_loss: 1.3542 - learning_rate: 0.0010
Epoch 2/100
[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 20ms/step - categorical_accuracy: 0.7289 - loss: 1.5078 - val_categorical_accuracy: 0.7824 - val_loss: 1.3714 - learning_rate: 0.0010
Epoch 3/100
[1m1007/1009[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 20ms/step - categorical_accuracy: 0.7621 - loss: 1.4307



[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 21ms/step - categorical_accuracy: 0.7622 - loss: 1.4306 - val_categorical_accuracy: 0.8327 - val_loss: 1.2343 - learning_rate: 0.0010
Epoch 4/100
[1m1007/1009[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 20ms/step - categorical_accuracy: 0.7926 - loss: 1.3730



[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 21ms/step - categorical_accuracy: 0.7927 - loss: 1.3730 - val_categorical_accuracy: 0.8348 - val_loss: 1.2611 - learning_rate: 0.0010
Epoch 5/100
[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 20ms/step - categorical_accuracy: 0.7964 - loss: 1.3933 - val_categorical_accuracy: 0.8285 - val_loss: 1.3275 - learning_rate: 0.0010
Epoch 6/100
[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - categorical_accuracy: 0.8291 - loss: 1.3166



[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 21ms/step - categorical_accuracy: 0.8291 - loss: 1.3166 - val_categorical_accuracy: 0.8791 - val_loss: 1.1751 - learning_rate: 5.0000e-04
Epoch 7/100
[1m1007/1009[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 20ms/step - categorical_accuracy: 0.8446 - loss: 1.2777



[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 21ms/step - categorical_accuracy: 0.8446 - loss: 1.2777 - val_categorical_accuracy: 0.8804 - val_loss: 1.1727 - learning_rate: 5.0000e-04
Epoch 8/100
[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - categorical_accuracy: 0.8542 - loss: 1.2593



[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 21ms/step - categorical_accuracy: 0.8542 - loss: 1.2593 - val_categorical_accuracy: 0.8890 - val_loss: 1.1602 - learning_rate: 5.0000e-04
Epoch 9/100
[1m1008/1009[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 20ms/step - categorical_accuracy: 0.8582 - loss: 1.2555



[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 21ms/step - categorical_accuracy: 0.8582 - loss: 1.2555 - val_categorical_accuracy: 0.8991 - val_loss: 1.1579 - learning_rate: 5.0000e-04
Epoch 10/100
[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 20ms/step - categorical_accuracy: 0.8590 - loss: 1.2582 - val_categorical_accuracy: 0.8947 - val_loss: 1.1624 - learning_rate: 5.0000e-04
Epoch 11/100
[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 20ms/step - categorical_accuracy: 0.8591 - loss: 1.2707 - val_categorical_accuracy: 0.8952 - val_loss: 1.1735 - learning_rate: 5.0000e-04
Epoch 12/100
[1m1008/1009[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 20ms/step - categorical_accuracy: 0.8800 - loss: 1.2170



[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 21ms/step - categorical_accuracy: 0.8800 - loss: 1.2169 - val_categorical_accuracy: 0.9090 - val_loss: 1.1257 - learning_rate: 2.5000e-04
Epoch 13/100
[1m1008/1009[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 20ms/step - categorical_accuracy: 0.8911 - loss: 1.1782



[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 21ms/step - categorical_accuracy: 0.8911 - loss: 1.1782 - val_categorical_accuracy: 0.9098 - val_loss: 1.1176 - learning_rate: 2.5000e-04
Epoch 14/100
[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - categorical_accuracy: 0.8958 - loss: 1.1589



[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 21ms/step - categorical_accuracy: 0.8958 - loss: 1.1589 - val_categorical_accuracy: 0.9137 - val_loss: 1.1020 - learning_rate: 2.5000e-04
Epoch 15/100
[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 20ms/step - categorical_accuracy: 0.8987 - loss: 1.1387 - val_categorical_accuracy: 0.9127 - val_loss: 1.0944 - learning_rate: 2.5000e-04
Epoch 16/100
[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 20ms/step - categorical_accuracy: 0.9037 - loss: 1.1272 - val_categorical_accuracy: 0.9127 - val_loss: 1.1007 - learning_rate: 2.5000e-04
Epoch 17/100
[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 20ms/step - categorical_accuracy: 0.8985 - loss: 1.1294 - val_categorical_accuracy: 0.9088 - val_loss: 1.0913 - learning_rate: 2.5000e-04
Epoch 18/100
[1m1007/1009[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 20ms/step - categorical_accuracy: 0.9019 - loss: 1.1



[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 21ms/step - categorical_accuracy: 0.9019 - loss: 1.1215 - val_categorical_accuracy: 0.9150 - val_loss: 1.0867 - learning_rate: 2.5000e-04
Epoch 19/100
[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - categorical_accuracy: 0.9050 - loss: 1.1135



[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 21ms/step - categorical_accuracy: 0.9050 - loss: 1.1135 - val_categorical_accuracy: 0.9171 - val_loss: 1.0797 - learning_rate: 2.5000e-04
Epoch 20/100
[1m1008/1009[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 20ms/step - categorical_accuracy: 0.9063 - loss: 1.1059



[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 21ms/step - categorical_accuracy: 0.9063 - loss: 1.1059 - val_categorical_accuracy: 0.9187 - val_loss: 1.0772 - learning_rate: 2.5000e-04
Epoch 21/100
[1m1007/1009[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 20ms/step - categorical_accuracy: 0.9059 - loss: 1.1091



[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 21ms/step - categorical_accuracy: 0.9059 - loss: 1.1091 - val_categorical_accuracy: 0.9189 - val_loss: 1.0697 - learning_rate: 2.5000e-04
Epoch 22/100
[1m1008/1009[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 20ms/step - categorical_accuracy: 0.9071 - loss: 1.1012



[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 21ms/step - categorical_accuracy: 0.9071 - loss: 1.1012 - val_categorical_accuracy: 0.9260 - val_loss: 1.0528 - learning_rate: 2.5000e-04
Epoch 23/100
[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 20ms/step - categorical_accuracy: 0.9104 - loss: 1.0889 - val_categorical_accuracy: 0.9208 - val_loss: 1.0691 - learning_rate: 2.5000e-04
Epoch 24/100
[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 20ms/step - categorical_accuracy: 0.9119 - loss: 1.0829 - val_categorical_accuracy: 0.9166 - val_loss: 1.0749 - learning_rate: 2.5000e-04
Epoch 25/100
[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 20ms/step - categorical_accuracy: 0.9195 - loss: 1.0634 - val_categorical_accuracy: 0.9213 - val_loss: 1.0540 - learning_rate: 1.2500e-04
Epoch 26/100
[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 20ms/step - categorical_accuracy: 0.9260 - loss: 1.



[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 21ms/step - categorical_accuracy: 0.9300 - loss: 1.0286 - val_categorical_accuracy: 0.9302 - val_loss: 1.0343 - learning_rate: 1.2500e-04
Epoch 29/100
[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 20ms/step - categorical_accuracy: 0.9275 - loss: 1.0276 - val_categorical_accuracy: 0.9260 - val_loss: 1.0299 - learning_rate: 1.2500e-04
Epoch 30/100
[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 20ms/step - categorical_accuracy: 0.9296 - loss: 1.0204 - val_categorical_accuracy: 0.9275 - val_loss: 1.0387 - learning_rate: 1.2500e-04
Epoch 31/100
[1m1007/1009[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 20ms/step - categorical_accuracy: 0.9296 - loss: 1.0203



[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 21ms/step - categorical_accuracy: 0.9296 - loss: 1.0203 - val_categorical_accuracy: 0.9309 - val_loss: 1.0265 - learning_rate: 1.2500e-04
Epoch 32/100
[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 20ms/step - categorical_accuracy: 0.9311 - loss: 1.0127 - val_categorical_accuracy: 0.9268 - val_loss: 1.0299 - learning_rate: 1.2500e-04
Epoch 33/100
[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 20ms/step - categorical_accuracy: 0.9314 - loss: 1.0100 - val_categorical_accuracy: 0.9273 - val_loss: 1.0279 - learning_rate: 1.2500e-04
Epoch 34/100
[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - categorical_accuracy: 0.9378 - loss: 0.9926



[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 21ms/step - categorical_accuracy: 0.9378 - loss: 0.9926 - val_categorical_accuracy: 0.9330 - val_loss: 1.0171 - learning_rate: 6.2500e-05
Epoch 35/100
[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 20ms/step - categorical_accuracy: 0.9412 - loss: 0.9813 - val_categorical_accuracy: 0.9317 - val_loss: 1.0173 - learning_rate: 6.2500e-05
Epoch 36/100
[1m1008/1009[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 20ms/step - categorical_accuracy: 0.9423 - loss: 0.9789



[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 21ms/step - categorical_accuracy: 0.9423 - loss: 0.9789 - val_categorical_accuracy: 0.9338 - val_loss: 1.0149 - learning_rate: 6.2500e-05
Epoch 37/100
[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - categorical_accuracy: 0.9440 - loss: 0.9759



[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 21ms/step - categorical_accuracy: 0.9439 - loss: 0.9759 - val_categorical_accuracy: 0.9346 - val_loss: 1.0066 - learning_rate: 6.2500e-05
Epoch 38/100
[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 20ms/step - categorical_accuracy: 0.9427 - loss: 0.9725 - val_categorical_accuracy: 0.9309 - val_loss: 1.0117 - learning_rate: 6.2500e-05
Epoch 39/100
[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 20ms/step - categorical_accuracy: 0.9432 - loss: 0.9697 - val_categorical_accuracy: 0.9315 - val_loss: 1.0156 - learning_rate: 6.2500e-05
Epoch 40/100
[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 20ms/step - categorical_accuracy: 0.9466 - loss: 0.9607 - val_categorical_accuracy: 0.9315 - val_loss: 1.0080 - learning_rate: 3.1250e-05
Epoch 41/100
[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 20ms/step - categorical_accuracy: 0.9483 - loss: 0.



[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 21ms/step - categorical_accuracy: 0.9495 - loss: 0.9490 - val_categorical_accuracy: 0.9351 - val_loss: 1.0042 - learning_rate: 1.5625e-05
Epoch 44/100
[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 20ms/step - categorical_accuracy: 0.9510 - loss: 0.9480 - val_categorical_accuracy: 0.9333 - val_loss: 1.0046 - learning_rate: 1.5625e-05
Epoch 45/100
[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step - categorical_accuracy: 0.9508 - loss: 0.9467



[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 21ms/step - categorical_accuracy: 0.9508 - loss: 0.9467 - val_categorical_accuracy: 0.9354 - val_loss: 1.0015 - learning_rate: 1.5625e-05
Epoch 46/100
[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 20ms/step - categorical_accuracy: 0.9509 - loss: 0.9474 - val_categorical_accuracy: 0.9346 - val_loss: 0.9996 - learning_rate: 1.5625e-05
Epoch 47/100
[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 20ms/step - categorical_accuracy: 0.9519 - loss: 0.9436 - val_categorical_accuracy: 0.9346 - val_loss: 1.0005 - learning_rate: 1.5625e-05
Epoch 48/100
[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 20ms/step - categorical_accuracy: 0.9517 - loss: 0.9434 - val_categorical_accuracy: 0.9343 - val_loss: 0.9980 - learning_rate: 1.5625e-05
Epoch 49/100
[1m1009/1009[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 20ms/step - categorical_accuracy: 0.9524 - loss: 0.

In [29]:
### Evaluate on hold-out original
evaluate_captcha(model, test_dict)

Evaluating CAPTCHA strings: 100%|█████████████| 722/722 [00:34<00:00, 20.72it/s]


Character Accuracy: 92.50%
String Accuracy: 68.01%



