In [10]:
import os
import cv2
import numpy as np
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from sklearn.model_selection import train_test_split
import sys
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader
from tqdm import tqdm



image_dir = 'APS360_Project_Dataset/dataset5/img'


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
yolo_model = torch.hub.load('yolov5', 'custom', path='yolov5/runs/train/card_number_detector6/weights/best.pt', source = 'local').to(device)


class DigitCNN(nn.Module):
    def __init__(self):
        super(DigitCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(128 * 8 * 8, 256)
        self.fc2 = nn.Linear(256, 10)  # 10 classes for digits 0-9

        # Dropout layers
        self.dropout1 = nn.Dropout(0.1)  # After first FC layer

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2)
        x = F.relu(self.conv3(x))
        x = F.max_pool2d(x, 2)
        x = x.view(-1, 128 * 8 * 8)
        x = F.relu(self.fc1(x))
        x = self.dropout1(x)
        x = self.fc2(x)
        return x

digit_cnn = DigitCNN()
digit_cnn.load_state_dict(torch.load('model_checkpoints/model_epoch_30.pth', map_location=device))
digit_cnn.to(device)

class HybridOCR(nn.Module):
    def __init__(self, num_classes=10, rnn_hidden_size=256):
        super(HybridOCR, self).__init__()
        
        # CNN backbone with smaller initial channels and gradual increase
        self.cnn = nn.Sequential(
            # First conv block
            nn.Conv2d(1, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),  # 32 x 64
            
            # Second conv block
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2, 2),  # 16 x 32
            
            # Third conv block
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            
            # Fourth conv block without pooling to maintain width
            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            
            # Dropout for regularization
            nn.Dropout(0.2),
        )
        
        # Calculate the feature dimensions
        self.rnn_hidden_size = rnn_hidden_size
        
        # Bidirectional GRU (might work better than LSTM for this case)
        self.rnn = nn.GRU(input_size=256 * 16,  # height after CNN
                         hidden_size=rnn_hidden_size,
                         num_layers=2,
                         bidirectional=True,
                         dropout=0.2,
                         batch_first=True)
        
        # Final classification layer with proper initialization
        self.fc = nn.Linear(rnn_hidden_size * 2, num_classes + 1)  # +1 for CTC blank
        
        # Initialize weights
        self._initialize_weights()
        
    def _initialize_weights(self):
        """Initialize model weights for better training"""
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.xavier_normal_(m.weight)
                nn.init.constant_(m.bias, 0)
    
    def forward(self, x):
        # Input shape: (batch, channels=1, height=32, width=128)
        batch_size = x.size(0)
        
        # CNN feature extraction
        conv_features = self.cnn(x)
        
        # Prepare for RNN
        # Change shape from (batch, channels, height, width) to (batch, width, channels * height)
        conv_features = conv_features.permute(0, 3, 1, 2)
        conv_features = conv_features.contiguous().view(batch_size, -1, 256 * 16)
        
        # RNN
        rnn_output, _ = self.rnn(conv_features)
        
        # Final classification
        output = self.fc(rnn_output)
        
        # Apply log softmax over character predictions
        return F.log_softmax(output, dim=2)

def train_one_epoch(model, train_loader, criterion, optimizer, device, epoch):
    model.train()
    total_loss = 0
    total_char_acc = 0
    total_seq_acc = 0
    batch_count = 0
    
    # Progress bar
    pbar = tqdm(train_loader, desc=f'Epoch {epoch+1}')
    
    for batch_idx, (inputs, labels) in enumerate(pbar):
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        
        # Forward pass
        outputs = model(inputs)
        batch_size = outputs.size(0)
        
        # Prepare CTC inputs
        input_lengths = torch.full(size=(batch_size,),
                                 fill_value=outputs.size(1),
                                 dtype=torch.long,
                                 device=device)
        
        target_lengths = []
        target_labels = []
        
        for label in labels:
            valid_label = label[label != 0]
            target_lengths.append(len(valid_label))
            target_labels.extend(valid_label.tolist())
        
        target_lengths = torch.tensor(target_lengths, dtype=torch.long, device=device)
        target_labels = torch.tensor(target_labels, dtype=torch.long, device=device)
        
        # CTC loss calculation
        outputs_for_loss = outputs.permute(1, 0, 2)
        loss = criterion(outputs_for_loss, target_labels, input_lengths, target_lengths)
        
        # Backward pass
        loss.backward()
        
        # Gradient clipping
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=5.0)
        
        optimizer.step()
        
        # Calculate accuracy
        predictions = decode_predictions(outputs)
        char_acc, seq_acc = calculate_accuracy(predictions, labels)
        
        # Update metrics
        total_loss += loss.item()
        total_char_acc += char_acc
        total_seq_acc += seq_acc
        batch_count += 1
        
        # Update progress bar
        pbar.set_postfix({
            'loss': f'{loss.item():.4f}',
            'char_acc': f'{char_acc:.4f}',
            'seq_acc': f'{seq_acc:.4f}'
        })
    
    # Calculate epoch metrics
    avg_loss = total_loss / batch_count
    avg_char_acc = total_char_acc / batch_count
    avg_seq_acc = total_seq_acc / batch_count
    
    return avg_loss, avg_char_acc, avg_seq_acc

def validate(model, val_loader, criterion, device):
    model.eval()
    total_loss = 0
    total_char_acc = 0
    total_seq_acc = 0
    batch_count = 0
    
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs = inputs.to(device)
            labels = labels.to(device)
            
            outputs = model(inputs)
            batch_size = outputs.size(0)
            
            # Prepare CTC inputs
            input_lengths = torch.full(size=(batch_size,),
                                     fill_value=outputs.size(1),
                                     dtype=torch.long,
                                     device=device)
            
            target_lengths = []
            target_labels = []
            
            for label in labels:
                valid_label = label[label != 0]
                target_lengths.append(len(valid_label))
                target_labels.extend(valid_label.tolist())
            
            target_lengths = torch.tensor(target_lengths, dtype=torch.long, device=device)
            target_labels = torch.tensor(target_labels, dtype=torch.long, device=device)
            
            # CTC loss calculation
            outputs_for_loss = outputs.permute(1, 0, 2)
            loss = criterion(outputs_for_loss, target_labels, input_lengths, target_lengths)
            
            # Calculate accuracy
            predictions = decode_predictions(outputs)
            char_acc, seq_acc = calculate_accuracy(predictions, labels)
            
            # Update metrics
            total_loss += loss.item()
            total_char_acc += char_acc
            total_seq_acc += seq_acc
            batch_count += 1
    
    # Calculate validation metrics
    avg_loss = total_loss / batch_count
    avg_char_acc = total_char_acc / batch_count
    avg_seq_acc = total_seq_acc / batch_count
    
    return avg_loss, avg_char_acc, avg_seq_acc

# Main training loop
def train_model(model, train_loader, val_loader, num_epochs=50, device='cuda'):
    # Initialize criterion and optimizer
    criterion = nn.CTCLoss(blank=0, reduction='mean')
    optimizer = optim.AdamW(model.parameters(), lr=0.0003, weight_decay=0.01)
    scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=5, 
                                 verbose=True, min_lr=1e-6)
    
    # Training history
    history = {
        'train_loss': [], 'train_char_acc': [], 'train_seq_acc': [],
        'val_loss': [], 'val_char_acc': [], 'val_seq_acc': []
    }
    
    best_val_loss = float('inf')
    patience = 10
    patience_counter = 0
    
    print("Starting training...")
    print(f"Training device: {device}")
    print(f"Initial learning rate: {optimizer.param_groups[0]['lr']}")
    
    for epoch in range(num_epochs):
        print(f"\nEpoch {epoch+1}/{num_epochs}")
        
        # Training phase
        train_loss, train_char_acc, train_seq_acc = train_one_epoch(
            model, train_loader, criterion, optimizer, device, epoch
        )
        
        # Validation phase
        val_loss, val_char_acc, val_seq_acc = validate(
            model, val_loader, criterion, device
        )
        
        # Update learning rate
        scheduler.step(val_loss)
        
        # Update history
        history['train_loss'].append(train_loss)
        history['train_char_acc'].append(train_char_acc)
        history['train_seq_acc'].append(train_seq_acc)
        history['val_loss'].append(val_loss)
        history['val_char_acc'].append(val_char_acc)
        history['val_seq_acc'].append(val_seq_acc)
        
        # Print epoch results
        print(f"\nEpoch {epoch+1} Results:")
        print(f"Train Loss: {train_loss:.4f}, Char Acc: {train_char_acc:.4f}, Seq Acc: {train_seq_acc:.4f}")
        print(f"Val Loss: {val_loss:.4f}, Char Acc: {val_char_acc:.4f}, Seq Acc: {val_seq_acc:.4f}")
        print(f"Learning Rate: {optimizer.param_groups[0]['lr']:.6f}")
        
        # Save best model
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            patience_counter = 0
            torch.save({
                'epoch': epoch,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'scheduler_state_dict': scheduler.state_dict(),
                'best_val_loss': best_val_loss,
                'history': history
            }, 'best_hybrid_model.pth')
            print("New best model saved!")
        else:
            patience_counter += 1
        
        # Early stopping
        if patience_counter >= patience:
            print("\nEarly stopping triggered!")
            break
    
    return history

# Helper functions for decoding predictions remain the same
import pandas as pd

# Load the labels from the Excel file
labels_df = pd.read_excel('APS360_Project_Dataset/dataset5/labels.xlsx')

# Assuming your Excel has a single column with labels (1, 2, 3, ...)
# Create a dictionary mapping from numeric labels to corresponding image filenames
image_labels = {f'img_{int(row[0])}': int(row['card_num']) for _, row in labels_df.iterrows()}

class CreditCardDataset(Dataset):
    def __init__(self, image_dir, labels_dict, yolo_model, transform=None):
        """
        Args:
            image_dir (str): Directory with all the images
            labels_dict (dict): Dictionary mapping image names to their labels
            yolo_model: Loaded YOLO model for credit card number detection
            transform: Optional transform to be applied on the cropped image
        """
        self.image_dir = image_dir
        self.labels_dict = labels_dict
        self.transform = transform
        self.yolo_model = yolo_model
        self.image_filenames = list(labels_dict.keys())
        
    def preprocess_image(self, image_path):
        """
        Load image and use YOLO to crop the credit card number region
        """
        # Read image using cv2
        original_image = cv2.imread(image_path)
        if original_image is None:
            raise ValueError(f"Could not load image at {image_path}")
            
        # Convert BGR to RGB
        original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
        
        # Get YOLO predictions
        results = self.yolo_model(original_image)
        
        # Extract bounding box coordinates
        if len(results.xyxy[0]) > 0:
            # Get the detection with highest confidence
            detection = results.xyxy[0][0]  # [x1, y1, x2, y2, confidence, class]
            x1, y1, x2, y2 = map(int, detection[:4])
            
            # Crop the image
            cropped_image = original_image[y1:y2, x1:x2]
            
            # Convert to grayscale
            gray_image = cv2.cvtColor(cropped_image, cv2.COLOR_RGB2GRAY)
            
            # Convert to PIL Image
            pil_image = Image.fromarray(gray_image)
            
            return pil_image
        else:
            # If no detection, return the original image converted to grayscale
            print(f"No detection found for {image_path}, using full image")
            gray_image = cv2.cvtColor(original_image, cv2.COLOR_RGB2GRAY)
            pil_image = Image.fromarray(gray_image)
            return pil_image

    def __len__(self):
        return len(self.image_filenames)

    def __getitem__(self, idx):
        """
        Returns a tuple (image, label) where label is a list of integers
        """
        try:
            # Get image filename
            img_name = self.image_filenames[idx]
            image_path = os.path.join(self.image_dir, img_name + '.png')
            
            # Process image with YOLO and get cropped region
            image = self.preprocess_image(image_path)
            
            # Apply transforms if any
            if self.transform:
                image = self.transform(image)
            
            # Convert label from integer to list of integers
            label = [int(digit) for digit in str(self.labels_dict[img_name])]
            
            return image, torch.tensor(label, dtype=torch.long)
            
        except Exception as e:
            print(f"Error processing image {img_name}: {str(e)}")
            raise e

# Optional: Function to visualize the preprocessing
def visualize_preprocessing(dataset, index):
    """
    Visualize the original image, YOLO detection, and final processed image
    """
    import matplotlib.pyplot as plt
    
    # Get original image path
    img_name = dataset.image_filenames[index]
    image_path = os.path.join(dataset.image_dir, img_name + '.png')
    
    # Read original image
    original_image = cv2.imread(image_path)
    original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
    
    # Get YOLO predictions
    results = dataset.yolo_model(original_image)
    
    # Create a copy for drawing
    detection_image = original_image.copy()
    
    # Draw detection box
    if len(results.xyxy[0]) > 0:
        detection = results.xyxy[0][0]
        x1, y1, x2, y2 = map(int, detection[:4])
        cv2.rectangle(detection_image, (x1, y1), (x2, y2), (0, 255, 0), 2)
    
    # Get processed image
    processed_image, label = dataset[index]
    
    # Plot
    fig, axes = plt.subplots(1, 3, figsize=(15, 5))
    
    axes[0].imshow(original_image)
    axes[0].set_title('Original Image')
    axes[0].axis('off')
    
    axes[1].imshow(detection_image)
    axes[1].set_title('YOLO Detection')
    axes[1].axis('off')
    
    # Convert tensor to numpy for visualization
    processed_np = processed_image.squeeze().numpy()
    axes[2].imshow(processed_np, cmap='gray')
    axes[2].set_title('Processed Image')
    axes[2].axis('off')
    
    plt.tight_layout()
    plt.show()
    
    print(f"Label: {''.join(map(str, label.tolist()))}")

# Define any transformations (if needed)
transform = transforms.Compose([
    transforms.Resize((32, 128)),  # Resize to your model's expected input size
    transforms.ToTensor(),
])

# Split dataset into training, validation, and test sets
train_files, test_files = train_test_split(list(image_labels.keys()), test_size=0.2, random_state=42)
train_files, val_files = train_test_split(train_files, test_size=0.25, random_state=42)  # 60% train, 20% val

# Create datasets
train_labels_dict = {filename: image_labels[filename] for filename in train_files}
val_labels_dict = {filename: image_labels[filename] for filename in val_files}
test_labels_dict = {filename: image_labels[filename] for filename in test_files}

train_dataset = CreditCardDataset(image_dir, train_labels_dict, yolo_model, transform=transform)
val_dataset = CreditCardDataset(image_dir, val_labels_dict, yolo_model, transform=transform)
test_dataset = CreditCardDataset(image_dir, test_labels_dict, yolo_model, transform=transform)

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

from torch.nn.utils.rnn import pad_sequence

def custom_collate_fn(batch):
    images, labels = zip(*batch)
    
    # Pad labels to the maximum length in the batch and convert to tensor
    padded_labels = pad_sequence([torch.tensor(label, dtype=torch.long) for label in labels], 
                                 batch_first=True, padding_value=0)
    
    # Stack images into a single tensor
    images = torch.stack(images, dim=0)
    
    return images, padded_labels

# Update the DataLoader to use the custom collate function
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, collate_fn=custom_collate_fn)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, collate_fn=custom_collate_fn)


# Initialize the hybrid model
img_height = 32  # Based on the transform resize height
hybrid_model = HybridOCR().to(device)

# Initialize CTC loss
criterion = nn.CTCLoss(blank=0, reduction='mean')

# Initialize optimizer with learning rate scheduling
initial_lr = 0.001
optimizer = optim.Adam(filter(lambda p: p.requires_grad, hybrid_model.parameters()), 
                      lr=initial_lr)
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3, 
                             verbose=True, min_lr=1e-6)


# Define image transformations
transform = transforms.Compose([
    transforms.Resize((32, 128)),  # Resize to CRNN input size
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])  # Normalize grayscale image
])

# Create datasets with YOLO model
train_dataset = CreditCardDataset(
    image_dir=image_dir,
    labels_dict=train_labels_dict,
    yolo_model=yolo_model,
    transform=transform
)

val_dataset = CreditCardDataset(
    image_dir=image_dir,
    labels_dict=val_labels_dict,
    yolo_model=yolo_model,
    transform=transform
)

test_dataset = CreditCardDataset(
    image_dir=image_dir,
    labels_dict=test_labels_dict,
    yolo_model=yolo_model,
    transform=transform
)

# Create DataLoaders with the custom collate function
train_loader = DataLoader(
    train_dataset, 
    batch_size=32, 
    shuffle=True, 
    collate_fn=custom_collate_fn
)

val_loader = DataLoader(
    val_dataset, 
    batch_size=32, 
    shuffle=False, 
    collate_fn=custom_collate_fn
)

test_loader = DataLoader(
    test_dataset, 
    batch_size=32, 
    shuffle=False, 
    collate_fn=custom_collate_fn
)

def decode_predictions(outputs):
    """
    Decode the model outputs into digit sequences
    Args:
        outputs: Model outputs after log_softmax (T, B, C)
    Returns:
        List of predicted number sequences
    """
    # Convert to probabilities and get best indices
    _, max_indices = torch.max(outputs.transpose(0, 1), 2)
    
    # Convert to numpy for processing
    indices = max_indices.cpu().numpy()
    
    # Process each sequence in the batch
    decoded_sequences = []
    for sequence in indices:
        # Remove duplicates and blanks (0)
        current_digit = -1
        current_sequence = []
        
        for digit in sequence:
            if digit != 0 and digit != current_digit:  # Exclude blanks and duplicates
                current_sequence.append(digit)
                current_digit = digit
        
        decoded_sequences.append(current_sequence)
    
    return decoded_sequences

def calculate_accuracy(predictions, targets):
    """
    Calculate character and sequence level accuracy
    Args:
        predictions: List of predicted sequences
        targets: Tensor of target sequences (batch_size, max_length)
    Returns:
        (character_accuracy, sequence_accuracy)
    """
    correct_chars = 0
    total_chars = 0
    correct_sequences = 0
    total_sequences = 0
    
    for pred, target in zip(predictions, targets):
        # Convert target tensor to list, removing padding
        target_seq = [int(x) for x in target if x != 0]
        
        # Character level accuracy
        min_len = min(len(pred), len(target_seq))
        correct_chars += sum(1 for i in range(min_len) if pred[i] == target_seq[i])
        total_chars += max(len(pred), len(target_seq))
        
        # Sequence level accuracy
        if len(pred) == len(target_seq) and all(p == t for p, t in zip(pred, target_seq)):
            correct_sequences += 1
        total_sequences += 1
    
    char_accuracy = correct_chars / total_chars if total_chars > 0 else 0
    seq_accuracy = correct_sequences / total_sequences if total_sequences > 0 else 0
    
    return char_accuracy, seq_accuracy

# Training configuration
num_epochs = 50
best_val_accuracy = 0.0

print("Starting training...")
print(f"Training device: {device}")
print(f"Initial learning rate: {initial_lr}")
print(f"Number of epochs: {num_epochs}")

# Start training
history = train_model(
    model=hybrid_model,
    train_loader=train_loader,
    val_loader=val_loader,
    num_epochs=num_epochs,
    device=device,
)

# Save the final model
torch.save({
    'model_state_dict': hybrid_model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'scheduler_state_dict': scheduler.state_dict(),
    'history': history,
    'final_ensemble_weights': hybrid_model.get_ensemble_weights()
}, 'final_hybrid_model.pth')

print("Training completed!")
print(f"Final ensemble weights: {hybrid_model.get_ensemble_weights()}")

# Plot training history
import matplotlib.pyplot as plt

def plot_training_history(history):
    fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 10))
    
    # Plot losses
    ax1.plot(history['train_loss'], label='Train')
    ax1.plot(history['val_loss'], label='Validation')
    ax1.set_title('Loss Over Time')
    ax1.set_xlabel('Epoch')
    ax1.set_ylabel('Loss')
    ax1.legend()
    
    # Plot character accuracy
    ax2.plot(history['train_char_acc'], label='Train')
    ax2.plot(history['val_char_acc'], label='Validation')
    ax2.set_title('Character Accuracy Over Time')
    ax2.set_xlabel('Epoch')
    ax2.set_ylabel('Accuracy')
    ax2.legend()
    
    # Plot sequence accuracy
    ax3.plot(history['train_seq_acc'], label='Train')
    ax3.plot(history['val_seq_acc'], label='Validation')
    ax3.set_title('Sequence Accuracy Over Time')
    ax3.set_xlabel('Epoch')
    ax3.set_ylabel('Accuracy')
    ax3.legend()
    
    # Plot ensemble weights
    weights = np.array(history['ensemble_weights'])
    ax4.plot(weights[:, 0], label='CRNN Weight')
    ax4.plot(weights[:, 1], label='DigitCNN Weight')
    ax4.set_title('Ensemble Weights Over Time')
    ax4.set_xlabel('Epoch')
    ax4.set_ylabel('Weight')
    ax4.legend()
    
    plt.tight_layout()
    plt.show()

# Plot the training history
plot_training_history(history)


YOLOv5 🚀 2024-11-1 Python-3.9.13 torch-2.2.2 CPU

Fusing layers... 
YOLOv5s summary: 157 layers, 7012822 parameters, 0 gradients, 15.8 GFLOPs
Adding AutoShape... 


Starting training...
Training device: cpu
Initial learning rate: 0.001
Number of epochs: 50
Starting training...
Training device: cpu
Initial learning rate: 0.0003

Epoch 1/50


Epoch 1:   0%|                                            | 0/2 [00:00<?, ?it/s]

No detection found for APS360_Project_Dataset/dataset5/img/img_57.png, using full image


Epoch 1:  50%|▌| 1/2 [00:06<00:06,  6.17s/it, loss=inf, char_acc=0.0490, seq_acc

No detection found for APS360_Project_Dataset/dataset5/img/img_9.png, using full image


Epoch 1: 100%|█| 2/2 [00:08<00:00,  4.49s/it, loss=nan, char_acc=0.0000, seq_acc


No detection found for APS360_Project_Dataset/dataset5/img/img_74.png, using full image

Epoch 1 Results:
Train Loss: nan, Char Acc: 0.0245, Seq Acc: 0.0000
Val Loss: nan, Char Acc: 0.0000, Seq Acc: 0.0000
Learning Rate: 0.000300

Epoch 2/50


Epoch 2:   0%|                                            | 0/2 [00:00<?, ?it/s]

No detection found for APS360_Project_Dataset/dataset5/img/img_9.png, using full image
No detection found for APS360_Project_Dataset/dataset5/img/img_57.png, using full image


Epoch 2: 100%|█| 2/2 [00:07<00:00,  3.99s/it, loss=nan, char_acc=0.0000, seq_acc


No detection found for APS360_Project_Dataset/dataset5/img/img_74.png, using full image

Epoch 2 Results:
Train Loss: nan, Char Acc: 0.0000, Seq Acc: 0.0312
Val Loss: nan, Char Acc: 0.0000, Seq Acc: 0.0000
Learning Rate: 0.000300

Epoch 3/50


Epoch 3:  50%|▌| 1/2 [00:05<00:05,  5.41s/it, loss=nan, char_acc=0.0000, seq_acc

No detection found for APS360_Project_Dataset/dataset5/img/img_9.png, using full image
No detection found for APS360_Project_Dataset/dataset5/img/img_57.png, using full image


Epoch 3: 100%|█| 2/2 [00:08<00:00,  4.07s/it, loss=nan, char_acc=0.0000, seq_acc


No detection found for APS360_Project_Dataset/dataset5/img/img_74.png, using full image

Epoch 3 Results:
Train Loss: nan, Char Acc: 0.0000, Seq Acc: 0.0312
Val Loss: nan, Char Acc: 0.0000, Seq Acc: 0.0000
Learning Rate: 0.000300

Epoch 4/50


Epoch 4:   0%|                                            | 0/2 [00:00<?, ?it/s]

No detection found for APS360_Project_Dataset/dataset5/img/img_57.png, using full image


Epoch 4:  50%|▌| 1/2 [00:05<00:05,  5.45s/it, loss=nan, char_acc=0.0000, seq_acc

No detection found for APS360_Project_Dataset/dataset5/img/img_9.png, using full image


Epoch 4: 100%|█| 2/2 [00:08<00:00,  4.06s/it, loss=nan, char_acc=0.0000, seq_acc


No detection found for APS360_Project_Dataset/dataset5/img/img_74.png, using full image

Epoch 4 Results:
Train Loss: nan, Char Acc: 0.0000, Seq Acc: 0.0000
Val Loss: nan, Char Acc: 0.0000, Seq Acc: 0.0000
Learning Rate: 0.000300

Epoch 5/50


Epoch 5:   0%|                                            | 0/2 [00:00<?, ?it/s]

No detection found for APS360_Project_Dataset/dataset5/img/img_57.png, using full image
No detection found for APS360_Project_Dataset/dataset5/img/img_9.png, using full image


Epoch 5: 100%|█| 2/2 [00:07<00:00,  3.95s/it, loss=nan, char_acc=0.0000, seq_acc


No detection found for APS360_Project_Dataset/dataset5/img/img_74.png, using full image

Epoch 5 Results:
Train Loss: nan, Char Acc: 0.0000, Seq Acc: 0.0312
Val Loss: nan, Char Acc: 0.0000, Seq Acc: 0.0000
Learning Rate: 0.000300

Epoch 6/50


Epoch 6:   0%|                                            | 0/2 [00:00<?, ?it/s]

No detection found for APS360_Project_Dataset/dataset5/img/img_9.png, using full image


Epoch 6:  50%|▌| 1/2 [00:05<00:05,  5.46s/it, loss=nan, char_acc=0.0000, seq_acc

No detection found for APS360_Project_Dataset/dataset5/img/img_57.png, using full image


Epoch 6: 100%|█| 2/2 [00:08<00:00,  4.13s/it, loss=nan, char_acc=0.0000, seq_acc


No detection found for APS360_Project_Dataset/dataset5/img/img_74.png, using full image

Epoch 6 Results:
Train Loss: nan, Char Acc: 0.0000, Seq Acc: 0.0000
Val Loss: nan, Char Acc: 0.0000, Seq Acc: 0.0000
Learning Rate: 0.000150

Epoch 7/50


Epoch 7:   0%|                                            | 0/2 [00:00<?, ?it/s]

No detection found for APS360_Project_Dataset/dataset5/img/img_57.png, using full image
No detection found for APS360_Project_Dataset/dataset5/img/img_9.png, using full image


Epoch 7: 100%|█| 2/2 [00:08<00:00,  4.16s/it, loss=nan, char_acc=0.0000, seq_acc


No detection found for APS360_Project_Dataset/dataset5/img/img_74.png, using full image

Epoch 7 Results:
Train Loss: nan, Char Acc: 0.0000, Seq Acc: 0.0000
Val Loss: nan, Char Acc: 0.0000, Seq Acc: 0.0000
Learning Rate: 0.000150

Epoch 8/50


Epoch 8:   0%|                                            | 0/2 [00:00<?, ?it/s]

No detection found for APS360_Project_Dataset/dataset5/img/img_57.png, using full image


Epoch 8:  50%|▌| 1/2 [00:05<00:05,  5.61s/it, loss=nan, char_acc=0.0000, seq_acc

No detection found for APS360_Project_Dataset/dataset5/img/img_9.png, using full image


Epoch 8: 100%|█| 2/2 [00:08<00:00,  4.27s/it, loss=nan, char_acc=0.0000, seq_acc


No detection found for APS360_Project_Dataset/dataset5/img/img_74.png, using full image

Epoch 8 Results:
Train Loss: nan, Char Acc: 0.0000, Seq Acc: 0.0000
Val Loss: nan, Char Acc: 0.0000, Seq Acc: 0.0000
Learning Rate: 0.000150

Epoch 9/50


Epoch 9:   0%|                                            | 0/2 [00:00<?, ?it/s]

No detection found for APS360_Project_Dataset/dataset5/img/img_9.png, using full image


Epoch 9:  50%|▌| 1/2 [00:05<00:05,  5.70s/it, loss=nan, char_acc=0.0000, seq_acc

No detection found for APS360_Project_Dataset/dataset5/img/img_57.png, using full image


Epoch 9: 100%|█| 2/2 [00:08<00:00,  4.44s/it, loss=nan, char_acc=0.0000, seq_acc


No detection found for APS360_Project_Dataset/dataset5/img/img_74.png, using full image

Epoch 9 Results:
Train Loss: nan, Char Acc: 0.0000, Seq Acc: 0.0312
Val Loss: nan, Char Acc: 0.0000, Seq Acc: 0.0000
Learning Rate: 0.000150

Epoch 10/50


Epoch 10:   0%|                                           | 0/2 [00:00<?, ?it/s]

No detection found for APS360_Project_Dataset/dataset5/img/img_9.png, using full image
No detection found for APS360_Project_Dataset/dataset5/img/img_57.png, using full image


Epoch 10: 100%|█| 2/2 [00:14<00:00,  7.12s/it, loss=nan, char_acc=0.0000, seq_ac


No detection found for APS360_Project_Dataset/dataset5/img/img_74.png, using full image

Epoch 10 Results:
Train Loss: nan, Char Acc: 0.0000, Seq Acc: 0.0312
Val Loss: nan, Char Acc: 0.0000, Seq Acc: 0.0000
Learning Rate: 0.000150

Early stopping triggered!


AttributeError: 'HybridOCR' object has no attribute 'get_ensemble_weights'