In [2]:
import torch
import torch.nn as nn

model = nn.Sequential(

    # 256x256
    nn.Conv2d(3, 32, kernel_size=3, padding=1),
    nn.BatchNorm2d(32),
    nn.ReLU(),
    nn.MaxPool2d(2),      # 128x128

    nn.Conv2d(32, 64, kernel_size=3, padding=1),
    nn.BatchNorm2d(64),
    nn.ReLU(),
    nn.MaxPool2d(2),      # 64x64

    nn.Conv2d(64, 128, kernel_size=3, padding=1),
    nn.BatchNorm2d(128),
    nn.ReLU(),
    nn.MaxPool2d(2),      # 32x32

    nn.Conv2d(128, 256, kernel_size=3, padding=1),
    nn.BatchNorm2d(256),
    nn.ReLU(),
    nn.MaxPool2d(2),      # 16x16

    # ðŸ”¥ Global Average Pooling
    nn.AdaptiveAvgPool2d((1, 1)),  # 256x1x1

    nn.Flatten(),                  # 256

    nn.Linear(256, 128),
    nn.ReLU(),
    nn.Dropout(0.4),

    nn.Linear(128, 2)
)

# Test
x = torch.randn(1, 3, 256, 256)
print(model(x).shape)


torch.Size([1, 2])


In [3]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize([0.5,0.5,0.5], [0.5,0.5,0.5])
])

val_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize([0.5,0.5,0.5], [0.5,0.5,0.5])
])

train_dataset = datasets.ImageFolder("data/train", transform=train_transform)
val_dataset = datasets.ImageFolder("data/val", transform=val_transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4)
print("Number of training samples: ", len(train_dataset))

Number of training samples:  71071


In [4]:
print("CUDA available: ", torch.cuda.is_available())
print("GPU name: ", torch.cuda.get_device_name(0))

CUDA available:  True
GPU name:  NVIDIA GeForce RTX 4050 Laptop GPU


In [5]:
import torch
import torch.nn as nn

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)


In [6]:
batch_size = 32


In [7]:
from tqdm import tqdm

num_epochs = 20

for epoch in range(num_epochs):
    model.train()
    running_loss = 0
    correct = 0
    total = 0

    train_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}")

    for images, labels in train_bar:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        train_bar.set_postfix({
            "Loss": f"{loss.item():.4f}",
            "Acc": f"{100*correct/total:.2f}%"
        })

    train_acc = 100 * correct / total

    # ----- Validation -----
    model.eval()
    val_correct = 0
    val_total = 0

    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()

    val_acc = 100 * val_correct / val_total

    print(f"\nðŸ“Š Epoch {epoch+1} Summary:")
    print(f"Train Acc: {train_acc:.2f}% | Val Acc: {val_acc:.2f}%\n")


Epoch 1/20:   5%|â–Œ         | 119/2221 [00:13<03:53,  9.01it/s, Loss=0.6608, Acc=57.35%]


KeyboardInterrupt: 