In [1]:
import sys
sys.path.append('/Users/pc/Documents/stfu/classes/_ee292d/CrowdCounting-P2PNet')
import os
import random
import torch
from collections import OrderedDict
from torch.utils.data import Dataset, Subset, DataLoader
from easydict import EasyDict
from PIL import Image
from torchvision import transforms
from models.backbone import BackboneBase_VGG
from models.matcher import build_matcher_crowd
from models.p2pnet import P2PNet, SetCriterion_Crowd
import models.vgg_ as vgg
from crowd_datasets.SHHA.SHHA import random_crop
from engine import train_one_epoch, evaluate_crowd_no_overlap, vis
import util.misc as utils
from tqdm import tqdm

SEED = 10541 #0x292D
VIS_DIR = 'vis'

In [2]:
def seed_all(seed=SEED):
    random.seed(seed)
    # np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

In [3]:
# need to load pretraining from elsewhere
class CustomVGGBackboneWrapper(BackboneBase_VGG):
    def __init__(self, name, return_interm_layers=True):
        if name == 'vgg16_bn':
            backbone = vgg.vgg16_bn()
        elif name == 'vgg16':
            backbone = vgg.vgg16()
        num_channels = 256
        super().__init__(backbone, num_channels, name, return_interm_layers)

In [5]:
def load_new_model(model_args, device, verbose=False,
                   pretrain_layers=['backbone', 'fpn', 'classification', 'regression'],
                   freeze_layers=[]):
    num_classes = 1

    backbone = CustomVGGBackboneWrapper(model_args.backbone)
    model = P2PNet(backbone, model_args.row, model_args.line)
    # load pretrained weights
    if len(pretrain_layers) > 0:
        checkpoint = torch.load('../CrowdCounting-P2PNet/weights/SHTechA.pth', map_location='cpu')
        valid = lambda s: s[:s.find('.')] in pretrain_layers
        valid_state = OrderedDict([(k,v) for k,v in checkpoint['model'].items() if valid(k)])
        missing, unexpected = model.load_state_dict(valid_state, strict=False)
        if verbose:
            print(f"pretrained weights loaded without the following keys:\n{'\n'.join(missing)}")
            if len(unexpected) > 0:
                print(f"WARNING: received unexpected keys:\n{'\n'.join(unexpected)}")
    for layer in freeze_layers:
        for param in tqdm(getattr(model, layer).parameters(), desc=layer):
            param.requires_grad = False


    weight_dict = {'loss_ce': 1, 'loss_points': model_args.point_loss_coef}
    losses = ['labels', 'points']
    matcher = build_matcher_crowd(model_args)
    criterion = SetCriterion_Crowd(num_classes, \
                                matcher=matcher, weight_dict=weight_dict, \
                                eos_coef=model_args.eos_coef, losses=losses)
    return model.to(device), criterion.to(device)

In [6]:
DEFAULT_TRANSFORM = transforms.Compose([
        transforms.ToTensor(), 
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225]),
    ])
class GrapeDataset(Dataset):
    def __init__(self, root_path='labeled/', img_ext='.jpg', lbl_ext='_lbl.txt', \
                 transform=DEFAULT_TRANSFORM, train=False, patch=False, flip=False):
        self.root = root_path + ('' if root_path[-1] == '/' else '/')
        self.instances = [f[:-len(lbl_ext)] for f in os.listdir(self.root)
                          if os.path.isfile(self.root+f) and f.endswith(lbl_ext)]
        self.img_ext = img_ext
        self.lbl_ext = lbl_ext
        self.trf = transform
        self.train = train
        self.patch = patch
        self.flip = flip
    
    def __len__(self):
        return len(self.instances)
    
    def _get_points(self, name):
        label_path = self.root + name + self.lbl_ext
        with open(label_path, 'r') as f:
            coords_list = f.readlines()
        coords = torch.tensor(tuple(tuple(map(float, line.split())) for line in coords_list))
        return coords
    
    def _load_image(self, name):
        img_path = self.root + name + self.img_ext
        img = Image.open(img_path)
        return img
    
    def __getitem__(self, i):
        # only support single-instance indexing
        img = self._load_image(self.instances[i]) # PIL image
        point = self._get_points(self.instances[i]) # tensor
        # perform val transforms on Image object
        if not (self.train and self.patch): # if not cropping, handle resizing
            # fix size of tensors (need to fix this)
            # HACK: only batch one example at a time
            width, height = img.size
            new_width = width // 128 * 128
            new_height = height // 128 * 128
            img = img.resize((new_width, new_height), Image.LANCZOS)
            point[:,0] *= new_width/width
            point[:,1] *= new_height/height
        # perform standard transforms
        if self.trf is not None:
            img = self.trf(img)
        # additional transformation if training
        if self.train:
            # data augmentation -> random scale
            scale_range = [0.7, 1.3]
            min_size = min(img.shape[1:])
            scale = random.uniform(*scale_range)
            # scale the image and points
            if scale * min_size > 128:
                img = torch.nn.functional.upsample_bilinear(img.unsqueeze(0), scale_factor=scale).squeeze(0)
                point *= scale
            # random crop augumentation
            if self.patch:
                # generates a batch of images -> we just want one
                imgs, points = random_crop(img, point, num_patch=1)
                img, point = torch.tensor(imgs[0]), points[0]
            # random flipping
            if random.random() > 0.5 and self.flip:
                # random flip
                img = transforms.functional.hflip(img)
                for i, _ in enumerate(point):
                    point[:, 0] = 128 - point[:, 0]
            
        img = img.type(torch.float32)
        # pack up related infos
        img_id = torch.tensor(i, dtype=torch.int32, requires_grad=False)
        target = {
            'point': point,
            'image_id': img_id,
            'labels': torch.ones([point.shape[0]]).long()
        }
        return img, target

In [7]:
dl_kwargs = {
    # 'batch_size': 128,
    'num_workers': 0,
    'pin_memory': True,
    'collate_fn': utils.collate_fn
}
def get_subset_dataloaders(dset, subset_idx, k, bsz=128, dl_kwargs=dl_kwargs):
    val_idx = subset_idx[k]
    val_subset = Subset(dset, val_idx)
    val_dl = DataLoader(val_subset, batch_size=1, **dl_kwargs) # HACK
    train_idx = torch.cat((subset_idx[:k].flatten(), subset_idx[k+1:].flatten()))
    trn_subset = Subset(dset, train_idx)
    train_dl = DataLoader(trn_subset, shuffle=True, batch_size=bsz, **dl_kwargs)
    trn_evaldl = DataLoader(trn_subset, batch_size=1, **dl_kwargs)
    return train_dl, trn_evaldl, val_dl

In [8]:
hparams = EasyDict(
    lr_gen = 1e-4, # orig: 1e-4
    lr_backbone = 1e-5, # orig: 1e-5
    weight_decay = 1e-4, #orig: 1e-4
    epochs = 200,
    lr_drop = 50,
    bsz = 64, # orig: 32
    clip_max_norm = 0.5, #orig: 0.1
    val_every = 10,
    pretrain_layers = ['backbone', 'fpn'],
    freeze_layers = ['backbone', 'fpn']
)
model_args = EasyDict(
    # required to build inference model
    backbone = 'vgg16_bn', # name of the convolutional backbone to use
    pretrained = False,
    row = 1, # row number of anchor points
    line = 1, # line number of anchor points
    # required for model training
    point_loss_coef = 2e-4, # orig: 2e-4
    eos_coef = 0.5, #orig: 0.5 # Relative classification weight of the no-object class
    set_cost_class = 2, #orig: 1 # for matcher -- Class coefficient in the matching cost
    set_cost_point = .05 #orig: .05 # for matcher -- L1 point coefficient in the matching cost
)
device = torch.device('mps' if torch.backends.mps.is_available() else 'cpu')
base_dset = GrapeDataset(train=False, patch=False, flip=False)
K = 5
print(len(base_dset))
assert len(base_dset) % K == 0 # kfold splits must be exactly even

200


In [9]:
seed_all()
folds = torch.randperm(len(base_dset)).reshape(K, -1)
# wipe vis directory for val outputs
for f in os.listdir(VIS_DIR):
    if f.endswith('jpg'):
        os.remove(f"{VIS_DIR}/{f}")
for k in range(5):
    print(f">>> FOLD {k+1}/{K} <<<")
    train_dl, trn_evaldl, val_dl = get_subset_dataloaders(base_dset, folds, k, bsz=hparams.bsz, dl_kwargs=dl_kwargs)
    model, criterion = load_new_model(model_args, device, pretrain_layers=hparams.pretrain_layers, freeze_layers=hparams.freeze_layers)
    opt_params = [
        {
            "params": [p for n, p in model.named_parameters() if "backbone" not in n and p.requires_grad],
            "lr": hparams.lr_gen
        },
        {
            "params": [p for n, p in model.named_parameters() if "backbone" in n and p.requires_grad],
            "lr": hparams.lr_backbone,
        },
    ]
    optimizer = torch.optim.Adam(opt_params)
    lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, hparams.lr_drop) # basically does nothing
    for attr in ['train', 'patch', 'flip']:
        base_dset.__setattr__(attr, True)
    for epoch in range(hparams.epochs):
        print(f"EPOCH {epoch+1}/{hparams.epochs}")
        stats = train_one_epoch(model, criterion, train_dl, optimizer, device, \
                                epoch, hparams.clip_max_norm)
        if epoch and (epoch+1) % hparams.val_every == 0:
            # HACK: disable and then re-enable training-time args for the dataset
            for attr in ['train', 'patch', 'flip']:
                base_dset.__setattr__(attr, False)
            # visualize final results only
            vis_dir = VIS_DIR if hparams.val_every + epoch >= hparams.epochs else None
            t_mae, t_mse = evaluate_crowd_no_overlap(model, trn_evaldl, device, vis_dir=vis_dir)
            print(f"trn: mae={t_mae}, mse={t_mse}")
            v_mae, v_mse = evaluate_crowd_no_overlap(model, val_dl, device, vis_dir=vis_dir)
            print(f"val: mae={v_mae}, mse={v_mse}")
            # reset stuff for training
            for attr in ['train', 'patch', 'flip']:
                base_dset.__setattr__(attr, True)
    print("val idxs:", sorted(folds[k].tolist()))


>>> FOLD 1/5 <<<


backbone: 52it [00:00, 150312.76it/s]
fpn: 12it [00:00, 51306.47it/s]
  img_id = torch.tensor(i, dtype=torch.int32, requires_grad=False)


EPOCH 1/200
Averaged stats: lr: 0.000100  loss: 0.6292 (0.6331)  loss_ce: 0.6292 (0.6331)  loss_ce_unscaled: 0.6292 (0.6331)  loss_point_unscaled: 12.6869 (12.6807)
EPOCH 2/200
Averaged stats: lr: 0.000100  loss: 0.4722 (0.4732)  loss_ce: 0.4722 (0.4732)  loss_ce_unscaled: 0.4722 (0.4732)  loss_point_unscaled: 13.1796 (13.2649)
EPOCH 3/200
Averaged stats: lr: 0.000100  loss: 0.3760 (0.3709)  loss_ce: 0.3760 (0.3709)  loss_ce_unscaled: 0.3760 (0.3709)  loss_point_unscaled: 14.4270 (14.2936)
EPOCH 4/200
Averaged stats: lr: 0.000100  loss: 0.3407 (0.3524)  loss_ce: 0.3407 (0.3524)  loss_ce_unscaled: 0.3407 (0.3524)  loss_point_unscaled: 14.9141 (14.9367)
EPOCH 5/200
Averaged stats: lr: 0.000100  loss: 0.3863 (0.3872)  loss_ce: 0.3863 (0.3872)  loss_ce_unscaled: 0.3863 (0.3872)  loss_point_unscaled: 13.8066 (13.8743)
EPOCH 6/200
Averaged stats: lr: 0.000100  loss: 0.3692 (0.3646)  loss_ce: 0.3692 (0.3646)  loss_ce_unscaled: 0.3692 (0.3646)  loss_point_unscaled: 14.4614 (14.5013)
EPOCH 7/20

backbone: 52it [00:00, 388085.07it/s]
fpn: 12it [00:00, 299593.14it/s]


EPOCH 1/200
Averaged stats: lr: 0.000100  loss: 0.6338 (0.6378)  loss_ce: 0.6338 (0.6378)  loss_ce_unscaled: 0.6338 (0.6378)  loss_point_unscaled: 13.2210 (13.1681)
EPOCH 2/200
Averaged stats: lr: 0.000100  loss: 0.4890 (0.4847)  loss_ce: 0.4890 (0.4847)  loss_ce_unscaled: 0.4890 (0.4847)  loss_point_unscaled: 14.1897 (14.4813)
EPOCH 3/200
Averaged stats: lr: 0.000100  loss: 0.3790 (0.3767)  loss_ce: 0.3790 (0.3767)  loss_ce_unscaled: 0.3790 (0.3767)  loss_point_unscaled: 15.2427 (15.4820)
EPOCH 4/200
Averaged stats: lr: 0.000100  loss: 0.3831 (0.3707)  loss_ce: 0.3831 (0.3707)  loss_ce_unscaled: 0.3831 (0.3707)  loss_point_unscaled: 14.8713 (14.9989)
EPOCH 5/200
Averaged stats: lr: 0.000100  loss: 0.3818 (0.3805)  loss_ce: 0.3818 (0.3805)  loss_ce_unscaled: 0.3818 (0.3805)  loss_point_unscaled: 15.1571 (15.3787)
EPOCH 6/200
Averaged stats: lr: 0.000100  loss: 0.3751 (0.3708)  loss_ce: 0.3751 (0.3708)  loss_ce_unscaled: 0.3751 (0.3708)  loss_point_unscaled: 15.1506 (15.1228)
EPOCH 7/20

backbone: 52it [00:00, 323116.75it/s]
fpn: 12it [00:00, 267721.53it/s]


EPOCH 1/200
Averaged stats: lr: 0.000100  loss: 0.6337 (0.6304)  loss_ce: 0.6337 (0.6304)  loss_ce_unscaled: 0.6337 (0.6304)  loss_point_unscaled: 12.2479 (12.3018)
EPOCH 2/200
Averaged stats: lr: 0.000100  loss: 0.4689 (0.4725)  loss_ce: 0.4689 (0.4725)  loss_ce_unscaled: 0.4689 (0.4725)  loss_point_unscaled: 13.3589 (13.0830)
EPOCH 3/200
Averaged stats: lr: 0.000100  loss: 0.3400 (0.3592)  loss_ce: 0.3400 (0.3592)  loss_ce_unscaled: 0.3400 (0.3592)  loss_point_unscaled: 13.5512 (13.7464)
EPOCH 4/200
Averaged stats: lr: 0.000100  loss: 0.3685 (0.3707)  loss_ce: 0.3685 (0.3707)  loss_ce_unscaled: 0.3685 (0.3707)  loss_point_unscaled: 13.8287 (13.7925)
EPOCH 5/200
Averaged stats: lr: 0.000100  loss: 0.3814 (0.3867)  loss_ce: 0.3814 (0.3867)  loss_ce_unscaled: 0.3814 (0.3867)  loss_point_unscaled: 13.9487 (13.8654)
EPOCH 6/200
Averaged stats: lr: 0.000100  loss: 0.3649 (0.3605)  loss_ce: 0.3649 (0.3605)  loss_ce_unscaled: 0.3649 (0.3605)  loss_point_unscaled: 14.0396 (13.9769)
EPOCH 7/20

backbone: 52it [00:00, 371556.74it/s]
fpn: 12it [00:00, 221725.32it/s]


EPOCH 1/200
Averaged stats: lr: 0.000100  loss: 0.6471 (0.6459)  loss_ce: 0.6471 (0.6459)  loss_ce_unscaled: 0.6471 (0.6459)  loss_point_unscaled: 12.4841 (12.5001)
EPOCH 2/200
Averaged stats: lr: 0.000100  loss: 0.4610 (0.4695)  loss_ce: 0.4610 (0.4695)  loss_ce_unscaled: 0.4610 (0.4695)  loss_point_unscaled: 13.1691 (13.2673)
EPOCH 3/200
Averaged stats: lr: 0.000100  loss: 0.3650 (0.3678)  loss_ce: 0.3650 (0.3678)  loss_ce_unscaled: 0.3650 (0.3678)  loss_point_unscaled: 14.5098 (14.9999)
EPOCH 4/200
Averaged stats: lr: 0.000100  loss: 0.3546 (0.3649)  loss_ce: 0.3546 (0.3649)  loss_ce_unscaled: 0.3546 (0.3649)  loss_point_unscaled: 14.6327 (14.6358)
EPOCH 5/200
Averaged stats: lr: 0.000100  loss: 0.3886 (0.3838)  loss_ce: 0.3886 (0.3838)  loss_ce_unscaled: 0.3886 (0.3838)  loss_point_unscaled: 14.8585 (14.6362)
EPOCH 6/200
Averaged stats: lr: 0.000100  loss: 0.3645 (0.3644)  loss_ce: 0.3645 (0.3644)  loss_ce_unscaled: 0.3645 (0.3644)  loss_point_unscaled: 14.5700 (14.5257)
EPOCH 7/20

backbone: 52it [00:00, 317472.79it/s]
fpn: 12it [00:00, 272062.96it/s]


EPOCH 1/200
Averaged stats: lr: 0.000100  loss: 0.6002 (0.5969)  loss_ce: 0.6002 (0.5969)  loss_ce_unscaled: 0.6002 (0.5969)  loss_point_unscaled: 12.0072 (12.0210)
EPOCH 2/200
Averaged stats: lr: 0.000100  loss: 0.4217 (0.4260)  loss_ce: 0.4217 (0.4260)  loss_ce_unscaled: 0.4217 (0.4260)  loss_point_unscaled: 12.9522 (13.0183)
EPOCH 3/200
Averaged stats: lr: 0.000100  loss: 0.3546 (0.3561)  loss_ce: 0.3546 (0.3561)  loss_ce_unscaled: 0.3546 (0.3561)  loss_point_unscaled: 13.2816 (13.5116)
EPOCH 4/200
Averaged stats: lr: 0.000100  loss: 0.3862 (0.3649)  loss_ce: 0.3862 (0.3649)  loss_ce_unscaled: 0.3862 (0.3649)  loss_point_unscaled: 13.6949 (13.3574)
EPOCH 5/200
Averaged stats: lr: 0.000100  loss: 0.3670 (0.3662)  loss_ce: 0.3670 (0.3662)  loss_ce_unscaled: 0.3670 (0.3662)  loss_point_unscaled: 13.6486 (13.4649)
EPOCH 6/200
Averaged stats: lr: 0.000100  loss: 0.3231 (0.3339)  loss_ce: 0.3231 (0.3339)  loss_ce_unscaled: 0.3231 (0.3339)  loss_point_unscaled: 13.6353 (13.6465)
EPOCH 7/20

In [49]:
torch.save(model.state_dict(), 'best-weights.pth')

In [50]:
model.state_dict().keys()

odict_keys(['backbone.body1.0.weight', 'backbone.body1.0.bias', 'backbone.body1.1.weight', 'backbone.body1.1.bias', 'backbone.body1.1.running_mean', 'backbone.body1.1.running_var', 'backbone.body1.1.num_batches_tracked', 'backbone.body1.3.weight', 'backbone.body1.3.bias', 'backbone.body1.4.weight', 'backbone.body1.4.bias', 'backbone.body1.4.running_mean', 'backbone.body1.4.running_var', 'backbone.body1.4.num_batches_tracked', 'backbone.body1.7.weight', 'backbone.body1.7.bias', 'backbone.body1.8.weight', 'backbone.body1.8.bias', 'backbone.body1.8.running_mean', 'backbone.body1.8.running_var', 'backbone.body1.8.num_batches_tracked', 'backbone.body1.10.weight', 'backbone.body1.10.bias', 'backbone.body1.11.weight', 'backbone.body1.11.bias', 'backbone.body1.11.running_mean', 'backbone.body1.11.running_var', 'backbone.body1.11.num_batches_tracked', 'backbone.body2.1.weight', 'backbone.body2.1.bias', 'backbone.body2.2.weight', 'backbone.body2.2.bias', 'backbone.body2.2.running_mean', 'backbon

In [64]:
model.backbone.body1[0].weight

Parameter containing:
tensor([[[[ 7.7970e-02,  2.3312e-02,  7.3356e-02],
          [ 4.4073e-02, -3.8663e-02,  1.4990e-02],
          [ 7.4774e-02,  7.1020e-02,  7.8159e-02]],

         [[-4.7257e-02, -1.8041e-01, -5.0482e-02],
          [-1.1559e-01, -2.8187e-01, -1.3983e-01],
          [-1.2540e-02, -6.9186e-02,  1.0502e-02]],

         [[ 2.4865e-02, -6.9761e-02,  4.1845e-02],
          [-3.2288e-02, -1.5699e-01, -3.9417e-02],
          [ 4.6226e-02,  9.0818e-03,  8.0102e-02]]],


        [[[-8.7527e-03,  3.4041e-02, -2.1575e-02],
          [-5.8559e-03,  1.2914e-01,  4.4241e-03],
          [-1.7512e-02,  1.6148e-03, -2.8583e-02]],

         [[-3.8385e-02,  1.6052e-02, -5.0427e-02],
          [ 2.0274e-02,  1.8862e-01,  2.6346e-02],
          [-1.1766e-02,  1.7792e-02, -1.6455e-02]],

         [[-6.6149e-02,  5.8199e-02, -5.2075e-02],
          [ 3.8872e-02,  2.7189e-01,  6.3307e-02],
          [-3.9280e-02,  3.8197e-02, -2.6360e-02]]],


        [[[ 1.4962e-06, -1.1430e-06,  1.2536

In [51]:
old_ckpt = torch.load('../CrowdCounting-P2PNet/weights/SHTechA.pth', map_location='cpu')

In [59]:
old_ckpt['model']['backbone.body1.0.weight'].shape

torch.Size([64, 3, 3, 3])

In [60]:
model.state_dict()['backbone.body1.0.weight'].shape

torch.Size([64, 3, 3, 3])

In [37]:
n_points = []
for samples, targets in trn_evaldl:
    n_points.append(targets[0]['point'].shape[0])
print(sum(n_points)/len(n_points))

  img_id = torch.tensor(i, dtype=torch.int32, requires_grad=False)


16.56875


In [None]:
n_points = []
for samples, targets in trn_evaldl:
    n_points.append(targets[0]['point'].shape[0])
print(sum(n_points)/len(n_points))

  img_id = torch.tensor(i, dtype=torch.int32, requires_grad=False)


16.56875


# fixing crops

In [38]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt

In [39]:
path = 'cropped/'
images = [f for f in os.listdir(path)
          if os.path.isfile(path+f) and f.endswith('.jpg')]
THRESH = 5 # need more than THRESH nonzero values to be considered valid
BLACK_THRESH = 100
MIN_IMG_DIM = 128

# finds the first index of 1D array x that surpasses the threshold
find_first = lambda x: (x > THRESH).argmax()
for img_name in images:
    img = cv2.imread(path+img_name)
    nonblack_mask = (img > BLACK_THRESH)
    img_1ch = nonblack_mask.any(axis=2).astype(int) # flatten channels
    first_good_x = find_first(img_1ch.sum(axis=0))
    first_good_y = find_first(img_1ch.sum(axis=1))
    # crop
    cropped_img = img
    if first_good_x:
        cropped_img = cropped_img[:, first_good_x:-first_good_x]
    if first_good_y:
        cropped_img = cropped_img[first_good_y:-first_good_y]
    roi = cropped_img
    min_dim = min(roi.shape[0], roi.shape[1])
    scale = max(1, MIN_IMG_DIM/min_dim)
    scale_up = lambda l: int(np.ceil(scale*l))
    roi_out = cv2.resize(roi, (scale_up(roi.shape[1]), scale_up(roi.shape[0])))
    # fix coordinates
    # label_path = path+img_name[:-len("anno.jpg")]+"lbl.txt"
    # with open(label_path, 'r') as f:
    #     coords_list = f.readlines()
    #     coords = [tuple(map(int, line.split())) for line in coords_list]
    #     fixed_coords = [(scale_up(x-first_good_x), scale_up(y-first_good_y)) for (x,y) in coords]
    # label_str = '\n'.join([f"{x} {y}" for (x,y) in fixed_coords])
    # overwrite files
    cv2.imwrite(path+img_name, roi_out)
    # with open(label_path, 'w') as f:
    #     f.write(label_str)