<a href="https://colab.research.google.com/github/umair986/salah_detection/blob/main/training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Install required libraries if necessary
!pip install torch torchvision pillow

# Import libraries
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import os
from PIL import Image
from enum import Enum




In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
# Define the enum for prayer poses
class PrayerPose(Enum):
    QIYAM = "Qiyam"
    RUKU = "Ruku"
    SUJUD = "Sujud"
    JALSA = "Jalsa"
    TASHAHHUD = "Tashahhud"
    UNKNOWN = "Unknown"


In [5]:
# Define the custom dataset class
class NamazPoseDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.classes = [pose.value for pose in PrayerPose if pose != PrayerPose.UNKNOWN]
        self.image_paths = []
        self.labels = []

        # Load dataset
        for class_idx, class_name in enumerate(self.classes):
            class_dir = os.path.join(root_dir, class_name)
            if not os.path.exists(class_dir):
                print(f"Warning: Directory not found for {class_name}")
                continue

            print(f"Loading images for {class_name}...")
            for img_name in os.listdir(class_dir):
                if img_name.endswith(('.jpg', '.jpeg', '.png')):
                    self.image_paths.append(os.path.join(class_dir, img_name))
                    self.labels.append(class_idx)

            print(f"Found {len([x for x in self.labels if x == class_idx])} images for {class_name}")

    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        image = Image.open(img_path).convert('RGB')
        label = self.labels[idx]

        if self.transform:
            image = self.transform(image)

        return image, label


In [6]:
# Define the CNN model for prayer pose classification
class NamazPoseCNN(nn.Module):
    def __init__(self, num_classes):
        super(NamazPoseCNN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(256, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )

        self.classifier = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(512 * 14 * 14, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(4096, 1024),
            nn.ReLU(inplace=True),
            nn.Linear(1024, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x


In [7]:
# Replace this with your dataset directory path
dataset_directory = "/content/drive/MyDrive/dataset_directory"  # Update path for Colab

# Set up data transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Create dataset and dataloader
dataset = NamazPoseDataset(dataset_directory, transform=transform)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

# Check if dataset is loaded
if len(dataset) == 0:
    print("No images found in the dataset! Check your data directory structure.")
else:
    print(f"Dataset loaded successfully with {len(dataset)} images.")


Loading images for Qiyam...
Found 307 images for Qiyam
Loading images for Ruku...
Found 226 images for Ruku
Loading images for Sujud...
Found 191 images for Sujud
Loading images for Jalsa...
Found 57 images for Jalsa
Loading images for Tashahhud...
Found 172 images for Tashahhud
Dataset loaded successfully with 953 images.


In [8]:
torch.cuda.empty_cache()


In [9]:
import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from torch.cuda.amp import GradScaler

# Set up the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
if device.type == 'cuda':
    print(f"Using GPU: {torch.cuda.get_device_name(0)}")
else:
    print("Using CPU. Ensure GPU is enabled in Colab settings.")

# Initialize the model
num_classes = len([pose for pose in PrayerPose if pose != PrayerPose.UNKNOWN])
model = NamazPoseCNN(num_classes).to(device)

# Define the optimizer and loss criterion
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

# Set up GradScaler for mixed precision training
scaler = GradScaler()

# Optimized DataLoader
batch_size = 32  # Adjust based on GPU memory capacity
num_workers = 4  # Use multi-threaded data loading
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, persistent_workers=True)

# Training loop
num_epochs = 25
print("Starting training...")

model.train()
for epoch in range(num_epochs):
    epoch_start_time = torch.cuda.Event(enable_timing=True)
    epoch_end_time = torch.cuda.Event(enable_timing=True)
    epoch_start_time.record()

    running_loss = 0.0
    correct = 0
    total = 0

    for batch_idx, (inputs, labels) in enumerate(dataloader):
        batch_start_time = torch.cuda.Event(enable_timing=True)
        batch_end_time = torch.cuda.Event(enable_timing=True)
        batch_start_time.record()

        try:
            # Move inputs and labels to the device
            inputs, labels = inputs.to(device, non_blocking=True), labels.to(device, non_blocking=True)

            optimizer.zero_grad()

            # Forward and backward passes
            with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
                outputs = model(inputs)
                loss = criterion(outputs, labels)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            # Update metrics
            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

        except Exception as e:
            print(f"Error in batch {batch_idx+1}: {e}")
            continue

        batch_end_time.record()
        torch.cuda.synchronize()
        print(f"Batch {batch_idx+1} processed in {batch_start_time.elapsed_time(batch_end_time):.2f} ms")

    # Calculate and print epoch metrics
    epoch_loss = running_loss / len(dataloader)
    accuracy = 100. * correct / total
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Accuracy: {accuracy:.2f}%')

    # Save checkpoints less frequently
    if (epoch + 1) % 5 == 0:
        checkpoint = {
            'epoch': epoch + 1,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': epoch_loss,
        }
        torch.save(checkpoint, f'model_checkpoint_epoch_{epoch+1}.pth')
        print(f"Checkpoint saved after epoch {epoch+1}")

    epoch_end_time.record()
    torch.cuda.synchronize()
    print(f"Epoch {epoch+1} completed in {epoch_start_time.elapsed_time(epoch_end_time) / 1000:.2f} seconds")

print("Training completed!")


Using GPU: Tesla T4
Starting training...


  scaler = GradScaler()
  with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):


Batch 1 processed in 2107.80 ms
Batch 2 processed in 157.09 ms
Batch 3 processed in 147.87 ms
Batch 4 processed in 115.33 ms
Batch 5 processed in 165.31 ms
Batch 6 processed in 284.86 ms
Batch 7 processed in 251.30 ms
Batch 8 processed in 240.64 ms
Batch 9 processed in 289.58 ms
Batch 10 processed in 290.88 ms
Batch 11 processed in 241.55 ms
Batch 12 processed in 241.71 ms
Batch 13 processed in 289.19 ms
Batch 14 processed in 244.04 ms
Batch 15 processed in 240.48 ms
Batch 16 processed in 255.51 ms
Batch 17 processed in 289.90 ms
Batch 18 processed in 241.04 ms
Batch 19 processed in 240.44 ms
Batch 20 processed in 241.10 ms
Batch 21 processed in 289.91 ms
Batch 22 processed in 243.25 ms
Batch 23 processed in 240.55 ms
Batch 24 processed in 240.90 ms
Batch 25 processed in 286.85 ms
Batch 26 processed in 247.02 ms
Batch 27 processed in 240.76 ms
Batch 28 processed in 241.47 ms
Batch 29 processed in 289.69 ms
Batch 30 processed in 229.54 ms
Epoch [1/25], Loss: 3.6761, Accuracy: 34.42%
Epo