In [70]:
import torch
import torchvision
import torchaudio
import librosa
print("Everything loaded fine ✅")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

Everything loaded fine ✅
Using device: cuda


In [71]:
from torchvision import transforms, datasets
from torch.utils.data import DataLoader, random_split

def get_dataloaders(data_dir, batch_size=32):
    train_transform = transforms.Compose([
        transforms.Resize((180, 180)),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.RandomVerticalFlip(p=0.2),
        transforms.RandomRotation(degrees=30),  # increase rotation
        transforms.RandomPerspective(distortion_scale=0.3, p=0.5),  # more distortion
        transforms.RandomAffine(degrees=20, translate=(0.15, 0.15), scale=(0.8, 1.2)),  # wider range
        transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.1),
        transforms.ToTensor()
    ])

    test_transform = transforms.Compose([
        transforms.Resize((180, 180)),
        transforms.ToTensor()
    ])

    full_dataset = datasets.ImageFolder(root=data_dir)
    total_length = len(full_dataset)
    train_length = int(0.7 * total_length)
    val_length = int(0.2 * total_length)
    test_length = total_length - train_length - val_length
    train_set, val_set, test_set = random_split(full_dataset, [train_length, val_length, test_length])

    # Apply transform separately for each set
    train_set.dataset.transform = train_transform
    val_set.dataset.transform = test_transform
    test_set.dataset.transform = test_transform

    train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)

    return train_loader, val_loader, test_loader

In [72]:
data_path = "./Data/images_original"

train_loader, val_loader, test_loader = get_dataloaders(data_path)

for images, labels in train_loader:
    print("Batch of images shape:", images.shape)  # (batch_size, channels, height, width)
    print("Batch of labels shape:", labels.shape)  # (batch_size,)
    print("image dtype:", images.dtype)  # Should be torch.float32
    break  # Just show the first batch

Batch of images shape: torch.Size([32, 3, 180, 180])
Batch of labels shape: torch.Size([32])
image dtype: torch.float32


In [73]:
def train(model, train_loader, val_loader, optimizer, loss_fn, epochs=50):
    for epoch in range(epochs):
        model.train()
        total_loss = 0

        for images, labels in train_loader:
            images = images.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = loss_fn(outputs, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss:.4f}")

In [74]:
def evaluate_accuracy(model, data_loader):
    model.eval()
    total_correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in data_loader:
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            total_correct += (preds == labels).sum().item()
            total += labels.size(0)
    return total_correct / total

# Net1


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Net1(nn.Module):
    def __init__(self):
        super(Net1, self).__init__()
        self.fc1 = nn.Linear(3*180*180, 512)  # Input layer
        self.fc2 = nn.Linear(512, 128)  # Hidden layer
        self.output = nn.Linear(128, 10)  # Output layer (10 classes)

    def forward(self, x):
        x = x.view(x.size(0), -1) # Flatten the input
        x = F.relu(self.fc1(x)) # Activation function for the first layer
        x = F.relu(self.fc2(x)) # Activation function for the second layer
        x = self.output(x)
        return x

In [None]:
model = Net1().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001) 
loss_fn = nn.CrossEntropyLoss()

In [None]:
train (model, train_loader, val_loader, optimizer, loss_fn, epochs=50)
torch.save(model.state_dict(), "net1_fully_connected.pth")

In [None]:
# how to load the model from path
model = Net1().to(device)  # Reinitialize the model
model.load_state_dict(torch.load("net1_fully_connected.pth"))
model.eval()  # Set the model to evaluation mode

In [None]:
train_acc = evaluate_accuracy(model, train_loader)
val_acc = evaluate_accuracy(model, val_loader)
test_acc = evaluate_accuracy(model, test_loader)

print(f"✅ Train Acc: {train_acc:.2%}")
print(f"✅ Val Acc: {val_acc:.2%}")
print(f"✅ Test Acc: {test_acc:.2%}")

# Net2


In [78]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Net2(nn.Module):
    def __init__(self):
        super(Net2, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(16, 16, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)

        self.conv3 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.conv4 = nn.Conv2d(32, 32, kernel_size=3, padding=1)

        self.dropout1 = nn.Dropout(0.3)
        self.dropout2 = nn.Dropout(0.5)
        
        # self.gap = nn.AdaptiveAvgPool2d((1, 1))  # Global Average Pooling
        self.fc1 = nn.Linear(32 * 45 * 45, 256)
        self.fc2 = nn.Linear(256, 64)
        self.out = nn.Linear(64, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = self.pool(x)

        x = F.relu(self.conv3(x))
        x = F.relu(self.conv4(x))
        x = self.pool(x)

        # x = self.gap(x)  # Now shape is [B, 32, 1, 1]
        x = x.view(x.size(0), -1)  # Flatten to [B, 32]
        x = self.dropout1(F.relu(self.fc1(x)))
        x = self.dropout2(F.relu(self.fc2(x)))
        x = self.out(x)
        return x

In [None]:
model = Net2().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.0005, weight_decay=0.003)
# scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=15, gamma=0.5)
loss_fn = nn.CrossEntropyLoss()
train(model, train_loader, val_loader, optimizer, loss_fn, epochs=50)
torch.save(model.state_dict(), "net2_custom_cnn.pth")

Epoch 1/50, Loss: 50.8332
Epoch 2/50, Loss: 50.7156
Epoch 3/50, Loss: 50.6947
Epoch 4/50, Loss: 50.7031
Epoch 5/50, Loss: 50.7741
Epoch 6/50, Loss: 50.6649
Epoch 7/50, Loss: 50.6931
Epoch 8/50, Loss: 50.6770
Epoch 9/50, Loss: 50.6633
Epoch 10/50, Loss: 50.6431
Epoch 11/50, Loss: 50.6453
Epoch 12/50, Loss: 50.6375
Epoch 13/50, Loss: 50.6237
Epoch 14/50, Loss: 50.6237
Epoch 15/50, Loss: 50.6123
Epoch 16/50, Loss: 50.6509
Epoch 17/50, Loss: 50.6272


KeyboardInterrupt: 

In [82]:
train_acc = evaluate_accuracy(model, train_loader)
val_acc = evaluate_accuracy(model, val_loader)
test_acc = evaluate_accuracy(model, test_loader)

print(f"✅ Train Acc: {train_acc:.2%}")
print(f"✅ Val Acc: {val_acc:.2%}")
print(f"✅ Test Acc: {test_acc:.2%}")

✅ Train Acc: 83.55%
✅ Val Acc: 39.20%
✅ Test Acc: 45.54%


# Net3


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Net3(nn.Module):
    def __init__(self):
        super(Net3, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm2d(16)
        self.conv2 = nn.Conv2d(16, 16, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm2d(16)
        self.pool = nn.MaxPool2d(2, 2)

        self.conv3 = nn.Conv2d(16, 32, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm2d(32)
        self.conv4 = nn.Conv2d(32, 32, kernel_size=3, padding=1)
        self.bn4 = nn.BatchNorm2d(32)

        self.fc1 = nn.Linear(32 * 44 * 44, 256)
        self.bn_fc1 = nn.BatchNorm1d(256)
        self.fc2 = nn.Linear(256, 64)
        self.bn_fc2 = nn.BatchNorm1d(64)
        self.out = nn.Linear(64, 10)
        self.dropout = nn.Dropout(0.3)

    def forward(self, x):
        x = F.relu(self.bn1(self.conv1(x)))
        x = F.relu(self.bn2(self.conv2(x)))
        x = self.pool(x)

        x = F.relu(self.bn3(self.conv3(x)))
        x = F.relu(self.bn4(self.conv4(x)))
        x = self.pool(x)

        x = x.view(x.size(0), -1)
        x = self.dropout(F.relu(self.bn_fc1(self.fc1(x))))
        x = self.dropout(F.relu(self.bn_fc2(self.fc2(x))))
        x = self.out(x)
        return x



In [None]:
model = Net3().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
loss_fn = nn.CrossEntropyLoss()
train(model, train_loader, val_loader, optimizer, loss_fn, epochs=50)
torch.save(model.state_dict(), "net3_cnn_bn.pth")

In [None]:
train_acc = evaluate_accuracy(model, train_loader)
val_acc = evaluate_accuracy(model, val_loader)
test_acc = evaluate_accuracy(model, test_loader)

print(f"✅ Train Acc: {train_acc:.2%}")
print(f"✅ Val Acc: {val_acc:.2%}")
print(f"✅ Test Acc: {test_acc:.2%}")

# Net4


In [None]:
model = Net3()  # CNN with 4 conv layers + BatchNorm
optimizer = torch.optim.RMSprop(model.parameters(), lr=0.001)
loss_fn = nn.CrossEntropyLoss()
train(model, train_loader, val_loader, optimizer, loss_fn, epochs=50)
torch.save(model.state_dict(), "net4_cnn_bn_rmsprop.pth")