### Transfer Model

In [1]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torchvision.models import efficientnet_b0, EfficientNet_B0_Weights

# Load pre-trained EfficientNet-B0
weights = EfficientNet_B0_Weights.DEFAULT
model = efficientnet_b0(weights=weights)

# Modify the classifier for regression (outputting a single scalar)
model.classifier[1] = nn.Sequential(
    nn.Linear(model.classifier[1].in_features, 1),
    # nn.ReLU()
)

# Optional: Freeze feature extractor layers (at first)
for param in model.features.parameters():
    param.requires_grad = False



### Preprocess

In [2]:
import os
from PIL import Image
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms

class CrowdDataset(Dataset):
    def __init__(self, data_dir, transform=None):
        self.images_dir = os.path.join(data_dir, 'images')
        self.label_path = os.path.join(data_dir, 'image_labels.txt')
        self.transform = transform

        self.samples = []
        with open(self.label_path, 'r') as f:
            for line in f:
                parts = line.strip().split(',')
                if len(parts) >= 2:
                    image_id = parts[0]
                    count = float(parts[1])
                    if count > 1000:
                        continue  # skip this image
                    filename = image_id + '.jpg'
                    self.samples.append((filename, count))
        
        print("Sample:", filename, count)   # Make sure data loaded correctly


    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        filename, count = self.samples[idx]
        image_path = os.path.join(self.images_dir, filename)

        image = Image.open(image_path).convert("RGB")
        if self.transform:
            image = self.transform(image)

        count = torch.tensor(count, dtype=torch.float32)

        return image, count
    
transform = transforms.Compose([
    # transforms.Grayscale(num_output_channels=1),
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])
])

# Define paths to each data split
base_dir = './jhu_crowd_v2.0'
splits = {
    'train': os.path.join(base_dir, 'train'),
    'val': os.path.join(base_dir, 'val'),
    'test': os.path.join(base_dir, 'test')
}

### Training Setup

In [3]:
# loss function
criterion = nn.MSELoss()  # or try MAE with nn.L1Loss()

In [4]:
# optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

In [5]:
from torch.utils.data import DataLoader

train_dataset = CrowdDataset(splits['train'], transform=transform)
val_dataset = CrowdDataset(splits['val'], transform=transform)
test_dataset = CrowdDataset(splits['test'], transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)
test_loader = DataLoader(test_dataset, batch_size=32)

print("Total samples:", len(train_dataset))

Sample: 4380.jpg 153.0
Sample: 4377.jpg 92.0
Sample: 4378.jpg 112.0
Total samples: 2102


### Forward Pass Loop

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()  # or model.train() if training

for images, labels in train_loader:
    images = images.to(device)  # If using CUDA
    labels = labels.float().to(device)  # Make sure labels are float

    outputs = model(images)  # Shape: [batch_size, 1]
    outputs = outputs.squeeze(1)  # Shape: [batch_size]

    int_counts = outputs.int()  # predictions as ints
    true_counts = labels.int()  # actual values as ints

    loss = criterion(outputs, labels).round().int()

    print("Prediction: ", int_counts)
    print("Value: ", true_counts)
    print("Loss: ", loss.item())

    # If training:
    # optimizer.zero_grad()
    # loss.backward()
    # optimizer.step()

Prediction:  tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0], dtype=torch.int32)
Value:  tensor([263, 345,  48,  19, 100,  25,  14, 204,  34, 167, 357,  59, 155,  18,
         56, 168,  97, 304, 110, 173,  42,   8,  25, 360, 245, 135, 123,   3,
         21,  16,  14, 173], dtype=torch.int32)
Loss:  26824
Prediction:  tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0], dtype=torch.int32)
Value:  tensor([ 76,  44,  15, 236, 559,  45,  19, 106,  66, 107, 190,  44, 237,   3,
        107,  55,  49, 200, 147, 103,  82, 326, 123, 527, 128, 106, 938, 222,
         13, 191,  69, 230], dtype=torch.int32)
Loss:  63931
Prediction:  tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0], dtype=torch.int32)
Value:  tensor([ 14, 488, 173, 203,  30, 152, 392,  18,  20, 229, 150, 280,  14,  39,
        204,  19,  85,  11, 2

KeyboardInterrupt: 