In [13]:
# final project
import os
from PIL import Image
import torch
from torch.utils.data import Dataset

class JHUCrowdDataset(Dataset):
    def __init__(self, data_dir, transform=None):
        self.image_dir = os.path.join(data_dir, "images")
        self.annotation_file = os.path.join(data_dir, "image_labels.txt")  # adjust if needed
        self.transform = transform

        self.data = []
        with open(self.annotation_file, "r") as f:
            for line in f:
                parts = line.strip().split(",")
                image_id = parts[0]
                count = float(parts[1])
                self.data.append((image_id + ".jpg", count))

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image_name, count = self.data[idx]
        image_path = os.path.join(self.image_dir, image_name)

        image = Image.open(image_path).convert("RGB")
        if self.transform:
            image = self.transform(image)

        return image, torch.tensor([count], dtype=torch.float32)

In [14]:
import torch.nn as nn

class CrowdCNN(nn.Module):
    def __init__(self):
        super(CrowdCNN, self).__init__()
        self.net = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.AdaptiveAvgPool2d((1, 1)),

            nn.Flatten(),
            nn.Linear(128, 1)
        )

    def forward(self, x):
        return self.net(x)


In [15]:
import os
from PIL import Image
import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from tqdm import tqdm


# Set up
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Transforms
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])


# Dataset and Loader
train_dir = "jhu_crowd_v2.0/train"
train_dataset = JHUCrowdDataset(train_dir, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)

# Model, loss, optimizer
model = CrowdCNN().to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

# Training loop with tqdm progress bar
epochs = 10
for epoch in range(epochs):
    model.train()
    total_loss = 0

    progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}")
    for images, counts in progress_bar:
        images = images.to(device)
        counts = counts.to(device)

        preds = model(images)
        loss = criterion(preds, counts)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        progress_bar.set_postfix(loss=loss.item())

    avg_loss = total_loss / len(train_loader)
    print(f"Epoch {epoch+1}/{epochs} - Avg Loss: {avg_loss:.4f}")

Epoch 1/10: 100%|██████████| 142/142 [05:02<00:00,  2.13s/it, loss=5.92e+4]


Epoch 1/10 - Avg Loss: 1358652.7805


Epoch 2/10:   6%|▌         | 8/142 [00:19<05:21,  2.40s/it, loss=9.14e+4]


KeyboardInterrupt: 