In [211]:
import torch.nn as nn
import torch
from torchvision import models
from utils import save_net,load_net


In [212]:
# this is with CBAM attention

class ChannelAttention(nn.Module):
    #global avg pooling and max pooling-> compress feature maps into 1x1xC
    def __init__(self, in_planes, ratio=16):
        super(ChannelAttention, self).__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.max_pool = nn.AdaptiveMaxPool2d(1)
    #pass both tru an MLP (2 conv layer) and sum
        self.fc = nn.Sequential(
            nn.Conv2d(in_planes, in_planes // ratio, 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(in_planes // ratio, in_planes, 1, bias=False)
        )
    
        self.sigmoid = nn.Sigmoid() #gives weight per channel

        #multiply back into input x
        
    def forward(self, x):
        avg_out = self.fc(self.avg_pool(x))
        max_out = self.fc(self.max_pool(x))
        return self.sigmoid(avg_out + max_out)
    


class SpatialAttention(nn.Module):
    def __init__(self, kernel_size=7): #use 7x7 convo
        super(SpatialAttention, self).__init__()
        
        #concatenate them -> 2xHxW
        self.conv = nn.Conv2d(2, 1, kernel_size, padding=kernel_size//2, bias=False)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        avg_out = torch.mean(x, dim=1, keepdim=True)
        max_out, _ = torch.max(x, dim=1, keepdim=True)
        x = torch.cat([avg_out, max_out], dim=1)
        return self.sigmoid(self.conv(x))

class CBAM(nn.Module):
    def __init__(self, in_planes, ratio=16, kernel_size=7):
        super(CBAM, self).__init__()
        self.ca = ChannelAttention(in_planes, ratio)
        self.sa = SpatialAttention(kernel_size)

    def forward(self, x):
        x = x * self.ca(x)
        x = x * self.sa(x)
        return x

    
    #sequence of convo and pooling layers
def make_layers(cfg, in_channels=3, batch_norm=False, dilation=False):
    d_rate = 2 if dilation else 1
    layers = []
    for v in cfg:
        if v == 'M':
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        else:
            conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=d_rate, dilation=d_rate)
            if batch_norm:
                layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
            else:
                layers += [conv2d, nn.ReLU(inplace=True)]
            in_channels = v
    return nn.Sequential(*layers)

class CSRNet_CBAM(nn.Module):
    def __init__(self, load_weights=False):
        super(CSRNet_CBAM, self).__init__()
        self.seen = 0
        self.frontend_feat = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512]
        self.backend_feat = [512, 512, 512, 256, 128, 64]

        self.frontend = make_layers(self.frontend_feat)
        self.cbam = CBAM(512)  # Add CBAM after frontend output
        self.backend = make_layers(self.backend_feat, in_channels=512, dilation=True)
        self.output_layer = nn.Conv2d(64, 1, kernel_size=1)

        if not load_weights:
            mod = models.vgg16(pretrained=True)
            self._initialize_weights()
            vgg_state = mod.features.state_dict()
            frontend_state = self.frontend.state_dict()
            matched_weights = {k: v for k, v in vgg_state.items() if k in frontend_state and v.size() == frontend_state[k].size()}
            frontend_state.update(matched_weights)
            self.frontend.load_state_dict(frontend_state)

    def forward(self, x):
        x = self.frontend(x)
        x = self.cbam(x)        # Apply CBAM here
        x = self.backend(x)
        x = self.output_layer(x)
        return x

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.normal_(m.weight, std=0.01)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)

In [191]:
## this is original 
"""
class CSRNet(nn.Module):
    def __init__(self, load_weights=False):
        super(CSRNet, self).__init__()
        self.frontend_feat = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512]
        self.backend_feat  = [512, 512, 512, 256, 128, 64]

        self.frontend = make_layers(self.frontend_feat)
        self.backend = make_layers(self.backend_feat, in_channels=512, dilation=True)
        self.output_layer = nn.Conv2d(64, 1, kernel_size=1)

        if not load_weights:
            mod = models.vgg16(pretrained=True)
            self._initialize_weights()
            frontend_items = list(self.frontend.state_dict().items())
            mod_items = list(mod.state_dict().items())
            for i in range(len(frontend_items)):
                frontend_items[i][1].data[:] = mod_items[i][1].data[:]

    def forward(self,x):
        x = self.frontend(x)
        x = self.backend(x)
        x = self.output_layer(x)
        return x
    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.normal_(m.weight, 0.01)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)

def make_layers(cfg, in_channels = 3,batch_norm=False,dilation = False):
    if dilation:
        d_rate = 2
    else:
        d_rate = 1
    layers = []
    for v in cfg:
        if v == 'M':
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        else:
            conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=d_rate,dilation = d_rate)
            if batch_norm:
                layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
            else:
                layers += [conv2d, nn.ReLU(inplace=True)]
            in_channels = v
    return nn.Sequential(*layers)"""


In [135]:
#this part is to trained original csrnet with only 20 images

In [192]:
model = CSRNet()
x = torch.rand((1,3,255,255))
model(x).shape

torch.Size([1, 1, 31, 31])

In [195]:
!python train.py plain_train.json plain_val.json 0 0

epoch 0, processed 0 samples, lr 0.0000100000
📁 Epoch 0
✅ Total training images: 80
📦 Batch size: 1
🔁 Total batches per epoch: 80
🚀 Learning rate: 0.0000100000
Epoch: [0][0/80]	Time 10.476 (10.476)	Data 0.023 (0.023)	Loss 209.2738 (209.2738)	
Epoch: [0][30/80]	Time 1.799 (7.705)	Data 0.005 (0.012)	Loss 211.3284 (886.5945)	
Epoch: [0][60/80]	Time 9.808 (7.868)	Data 0.012 (0.012)	Loss 440.2245 (1063.1822)	
begin test
 * MAE 355.681 
 * best MAE 355.681 
epoch 1, processed 80 samples, lr 0.0000100000
📁 Epoch 1
✅ Total training images: 80
📦 Batch size: 1
🔁 Total batches per epoch: 80
🚀 Learning rate: 0.0000100000
Epoch: [1][0/80]	Time 6.601 (6.601)	Data 0.056 (0.056)	Loss 2679.7339 (2679.7339)	
Epoch: [1][30/80]	Time 1.798 (7.533)	Data 0.005 (0.013)	Loss 200.2495 (728.4380)	
Epoch: [1][60/80]	Time 10.377 (7.649)	Data 0.013 (0.012)	Loss 792.8362 (1128.2477)	
begin test
 * MAE 157.011 
 * best MAE 157.011 


In [None]:
#this part is for training if we make change to prerprocess data! PRETRAINED is at another file go find it

In [183]:
model = CSRNet_CBAM()

In [184]:
x = torch.rand((1,3,255,255))

In [185]:
model(x).shape

torch.Size([1, 1, 31, 31])

In [189]:
!python train.py plain_train.json plain_val.json 0 0

epoch 0, processed 0 samples, lr 0.0000100000
📁 Epoch 0
✅ Total training images: 80
📦 Batch size: 1
🔁 Total batches per epoch: 80
🚀 Learning rate: 0.0000100000
Epoch: [0][0/80]	Time 5.228 (5.228)	Data 0.019 (0.019)	Loss 332.6791 (332.6791)	
Epoch: [0][30/80]	Time 7.246 (8.567)	Data 0.012 (0.013)	Loss 26.2318 (958.7766)	
Epoch: [0][60/80]	Time 3.111 (8.493)	Data 0.006 (0.013)	Loss 276.2756 (1013.9602)	
begin test
 * MAE 1136.261 
 * best MAE 1136.261 
epoch 1, processed 80 samples, lr 0.0000100000
📁 Epoch 1
✅ Total training images: 80
📦 Batch size: 1
🔁 Total batches per epoch: 80
🚀 Learning rate: 0.0000100000
Epoch: [1][0/80]	Time 9.437 (9.437)	Data 0.018 (0.018)	Loss 1500.4268 (1500.4268)	
Epoch: [1][30/80]	Time 3.436 (7.995)	Data 0.007 (0.013)	Loss 6039.8164 (1029.2448)	
Epoch: [1][60/80]	Time 10.980 (7.690)	Data 0.016 (0.012)	Loss 497.2900 (956.6684)	
begin test
 * MAE 147.844 
 * best MAE 147.844 


In [170]:
!python train.py Atest1_train.json Atest1_val.json 0 0

epoch 0, processed 0 samples, lr 0.0000001000
Epoch: [0][0/1540]	Time 11.481 (11.481)	Data 0.028 (0.028)	Loss 1240.0499 (1240.0499)	
Epoch: [0][30/1540]	Time 7.477 (8.316)	Data 0.018 (0.013)	Loss 406.8138 (1519.9125)	
^C


In [196]:
##!!! here want to try train part A with CBAM 
# freezing the frotend with the pretrained weight
#so only traind cbam and backend

In [218]:
model = CSRNet_CBAM()
x = torch.rand((1,3,255,255))
model(x).shape

torch.Size([1, 1, 31, 31])

In [219]:
# ===== LOAD PRETRAINED FRONTEND WEIGHTS =====
checkpoint = torch.load('PartAmodel_best.pth.tar', map_location='cpu', weights_only=False)
pretrained_state = checkpoint['state_dict']
model_dict = model.state_dict()
pretrained_dict = {k: v for k, v in pretrained_state.items() if k in model_dict and 'frontend' in k}
model_dict.update(pretrained_dict)
model.load_state_dict(model_dict)

# ===== FREEZE FRONTEND =====
for param in model.frontend.parameters():
    param.requires_grad = False

# ===== OPTIMIZER (only trainable params) =====
optimizer = torch.optim.Adam(
    filter(lambda p: p.requires_grad, model.parameters()),
    lr=1e-5
)

# ===== OPTIONAL: print trainable parameters =====
print("Trainable parameters:")
for name, param in model.named_parameters():
    print(f"{'✅' if param.requires_grad else '❌'} {name}")


Trainable parameters:
❌ frontend.0.weight
❌ frontend.0.bias
❌ frontend.2.weight
❌ frontend.2.bias
❌ frontend.5.weight
❌ frontend.5.bias
❌ frontend.7.weight
❌ frontend.7.bias
❌ frontend.10.weight
❌ frontend.10.bias
❌ frontend.12.weight
❌ frontend.12.bias
❌ frontend.14.weight
❌ frontend.14.bias
❌ frontend.17.weight
❌ frontend.17.bias
❌ frontend.19.weight
❌ frontend.19.bias
❌ frontend.21.weight
❌ frontend.21.bias
✅ cbam.ca.fc.0.weight
✅ cbam.ca.fc.2.weight
✅ cbam.sa.conv.weight
✅ backend.0.weight
✅ backend.0.bias
✅ backend.2.weight
✅ backend.2.bias
✅ backend.4.weight
✅ backend.4.bias
✅ backend.6.weight
✅ backend.6.bias
✅ backend.8.weight
✅ backend.8.bias
✅ backend.10.weight
✅ backend.10.bias
✅ output_layer.weight
✅ output_layer.bias


In [224]:
!python train.py Atest1_train.json Atest1_val.json 0 0

epoch 0, processed 0 samples, lr 0.0000100000
📁 Epoch 0
✅ Total training images: 770
📦 Batch size: 1
🔁 Total batches per epoch: 770
🚀 Learning rate: 0.0000100000
Epoch: [0][0/770]	Time 4.439 (4.439)	Data 0.019 (0.019)	Loss 950.9924 (950.9924)	
Epoch: [0][30/770]	Time 1.542 (7.000)	Data 0.004 (0.012)	Loss 366.3684 (1034.0290)	
Epoch: [0][60/770]	Time 1.888 (7.644)	Data 0.005 (0.012)	Loss 561.8679 (777.5237)	
Epoch: [0][90/770]	Time 2.836 (7.704)	Data 0.007 (0.012)	Loss 6534.6318 (987.4860)	
Epoch: [0][120/770]	Time 7.676 (7.801)	Data 0.013 (0.012)	Loss 181.0604 (1053.1652)	
Epoch: [0][150/770]	Time 3.728 (7.735)	Data 0.008 (0.012)	Loss 260.7239 (1165.3106)	
Epoch: [0][180/770]	Time 2.780 (7.876)	Data 0.006 (0.012)	Loss 1698.8020 (1154.3618)	
Epoch: [0][210/770]	Time 2.522 (7.656)	Data 0.006 (0.012)	Loss 90.7918 (1169.0286)	
Epoch: [0][240/770]	Time 10.022 (7.632)	Data 0.014 (0.012)	Loss 1207.0767 (1180.6211)	
Epoch: [0][270/770]	Time 10.036 (7.722)	Data 0.013 (0.012)	Loss 3088.7578 (116

In [210]:
import importlib
import model
importlib.reload(model)  # 🔁 reload the updated model.py
import model
print(dir(model))

['CBAM', 'CSRNet', 'CSRNet_CBAM', 'ChannelAttention', 'SpatialAttention', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__spec__', 'load_net', 'make_layers', 'models', 'nn', 'save_net', 'torch']
