In [1]:
import sys
import os

import warnings

from model import CSRNet

from utils import save_checkpoint

import torch
import torch.nn as nn
from torch.autograd import Variable
from torchvision import datasets, transforms

import numpy as np
import argparse
import json
import cv2
import dataset
import time

  from ._conv import register_converters as _register_converters


In [2]:
def train(train_list, model, criterion, optimizer, epoch):
    
    losses = AverageMeter()
    batch_time = AverageMeter()
    data_time = AverageMeter()
    
    
    train_loader = torch.utils.data.DataLoader(
        dataset.listDataset(train_list,
                       shuffle=True,
                       transform=transforms.Compose([
                       transforms.ToTensor(),transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225]),
                   ]), 
                       train=True, 
                       seen=model.seen,
                       batch_size=batch_size,
                       num_workers=workers),
        batch_size=batch_size)
    print('epoch %d, processed %d samples, lr %.10f' % (epoch, epoch * len(train_loader.dataset), lr))
    
    model.train()
    end = time.time()
    
    for i,(img, target)in enumerate(train_loader):
        data_time.update(time.time() - end)
        
        img = img.cuda()
        img = Variable(img)
        output = model(img)
        
        
        
        
        target = target.type(torch.FloatTensor).unsqueeze(0).cuda()
        target = Variable(target)
        
        
        loss = criterion(output, target)
        
        losses.update(loss.item(), img.size(0))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()    
        
        batch_time.update(time.time() - end)
        end = time.time()
        
        if i % print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  .format(
                   epoch, i, len(train_loader), batch_time=batch_time,
                   data_time=data_time, loss=losses))

In [3]:
def validate(val_list, model, criterion):
    print ('begin test')
    test_loader = torch.utils.data.DataLoader(
    dataset.listDataset(val_list,
                   shuffle=False,
                   transform=transforms.Compose([
                       transforms.ToTensor(),transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225]),
                   ]),  train=False),
    batch_size=batch_size)    
    
    #model.eval()
    with torch.no_grad(): 

        mae = 0
        
        for i,(img, target) in enumerate(test_loader):
            img = img.cuda()
            img = Variable(img)
            output = model(img)
            
            mae += abs(output.data.sum()-target.sum().type(torch.FloatTensor).cuda())
            
        mae = mae/len(test_loader)    
        print(' * MAE {mae:.3f} '
                  .format(mae=mae))

    return mae    
        

In [4]:
def adjust_learning_rate(optimizer, epoch):
    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
    
    
    lr = original_lr
    
    for i in range(len(steps)):
        
        scale = scales[i] if i < len(scales) else 1
        
        
        if epoch >= steps[i]:
            lr = lr * scale
            if epoch == steps[i]:
                break
        else:
            break
    for param_group in optimizer.param_groups:
        param_group['lr'] = lr

In [5]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count   

In [6]:
best_prec1    = 1e6
original_lr    = 1e-7
lr = 1e-7
batch_size    = 1
momentum      = 0.95
decay         = 5*1e-4
start_epoch   = 0
epochs = 4
steps         = [-1,1,100,150]
scales        = [1,1,1,1]
workers = 4
seed = time.time()
print_freq = 30

In [7]:
def training(train_json, test_json, pre,gpu, task):
    
    global best_prec1
    
    with open(train_json, 'r') as outfile:        
        train_list = json.load(outfile)
    with open(test_json, 'r') as outfile:       
        val_list = json.load(outfile)
    
    os.environ['CUDA_VISIBLE_DEVICES'] = gpu
    torch.cuda.manual_seed(seed)
    
    model = CSRNet()
    
    model = model.cuda()
    
    criterion = nn.MSELoss(size_average=False).cuda()
    
    optimizer = torch.optim.SGD(model.parameters(), lr,
                                momentum=momentum,
                                weight_decay=decay)

#     if pre != '0':
#         if os.path.isfile(pre):
#             print("=> loading checkpoint '{}'".format(pre))
#             checkpoint = torch.load(pre)
#             start_epoch = checkpoint['epoch']
#             best_prec1 = checkpoint['best_prec1']
#             model.load_state_dict(checkpoint['state_dict'])
#             optimizer.load_state_dict(checkpoint['optimizer'])
#             print("=> loaded checkpoint '{}' (epoch {})"
#                   .format(pre, checkpoint['epoch']))
#         else:
#             print("=> no checkpoint found at '{}'".format(pre))
            
    for epoch in range(start_epoch, epochs):
        
        adjust_learning_rate(optimizer, epoch)
        
        train(train_list, model, criterion, optimizer, epoch)
        prec1 = validate(val_list, model, criterion)
        
        is_best = prec1 < best_prec1
        best_prec1 = min(prec1, best_prec1)
        print(' * best MAE {mae:.3f} '
              .format(mae=best_prec1))
        save_checkpoint({
            'epoch': epoch + 1,
            'arch': pre,
            'state_dict': model.state_dict(),
            'best_prec1': best_prec1,
            'optimizer' : optimizer.state_dict(),
        }, is_best,task)

In [8]:
training('part_A_train.json', 'part_A_val.json','0', '0', '0')



epoch 0, processed 0 samples, lr 0.0000001000
Epoch: [0][0/1928]	Time 1.220 (1.220)	Data 0.082 (0.082)	Loss 339.6719 (339.6719)	
Epoch: [0][30/1928]	Time 1.121 (1.056)	Data 0.051 (0.081)	Loss 74.6359 (511.1239)	
Epoch: [0][60/1928]	Time 0.848 (0.965)	Data 0.013 (0.059)	Loss 522.4189 (425.4228)	
Epoch: [0][90/1928]	Time 0.607 (1.032)	Data 0.044 (0.055)	Loss 57.6476 (391.9592)	
Epoch: [0][120/1928]	Time 1.196 (1.032)	Data 0.046 (0.052)	Loss 3407.2883 (395.8359)	
Epoch: [0][150/1928]	Time 1.059 (1.045)	Data 0.015 (0.050)	Loss 21.8681 (380.3613)	
Epoch: [0][180/1928]	Time 0.892 (1.039)	Data 0.025 (0.052)	Loss 17.0262 (378.5682)	
Epoch: [0][210/1928]	Time 1.186 (1.025)	Data 0.041 (0.050)	Loss 414.2641 (361.9537)	
Epoch: [0][240/1928]	Time 0.535 (1.026)	Data 0.037 (0.048)	Loss 130.3495 (370.6297)	
Epoch: [0][270/1928]	Time 1.691 (1.037)	Data 0.075 (0.048)	Loss 78.1689 (356.0024)	
Epoch: [0][300/1928]	Time 1.472 (1.037)	Data 0.046 (0.047)	Loss 272.8267 (373.4571)	
Epoch: [0][330/1928]	Time 0.

Epoch: [1][870/1928]	Time 1.164 (1.310)	Data 0.015 (0.041)	Loss 30.3526 (313.3175)	
Epoch: [1][900/1928]	Time 0.819 (1.309)	Data 0.050 (0.041)	Loss 488.0224 (316.0266)	
Epoch: [1][930/1928]	Time 1.039 (1.307)	Data 0.079 (0.041)	Loss 4868.5723 (323.0330)	
Epoch: [1][960/1928]	Time 1.048 (1.305)	Data 0.052 (0.041)	Loss 334.1432 (325.5874)	
Epoch: [1][990/1928]	Time 1.539 (1.298)	Data 0.047 (0.041)	Loss 26.5695 (328.7887)	
Epoch: [1][1020/1928]	Time 1.073 (1.301)	Data 0.050 (0.041)	Loss 292.6115 (324.2220)	
Epoch: [1][1050/1928]	Time 1.102 (1.295)	Data 0.016 (0.041)	Loss 29.0900 (323.2706)	
Epoch: [1][1080/1928]	Time 1.494 (1.298)	Data 0.050 (0.041)	Loss 396.4064 (323.0063)	
Epoch: [1][1110/1928]	Time 1.439 (1.293)	Data 0.046 (0.041)	Loss 94.8621 (329.0997)	
Epoch: [1][1140/1928]	Time 0.820 (1.294)	Data 0.052 (0.041)	Loss 299.4645 (325.6952)	
Epoch: [1][1170/1928]	Time 1.199 (1.289)	Data 0.016 (0.041)	Loss 59.6647 (323.2660)	
Epoch: [1][1200/1928]	Time 1.102 (1.292)	Data 0.017 (0.041)	Los

Epoch: [2][1770/1928]	Time 1.254 (1.312)	Data 0.031 (0.040)	Loss 1715.2294 (293.9072)	
Epoch: [2][1800/1928]	Time 1.822 (1.314)	Data 0.035 (0.040)	Loss 118.1933 (294.6539)	
Epoch: [2][1830/1928]	Time 0.966 (1.312)	Data 0.075 (0.041)	Loss 81.0025 (295.1875)	
Epoch: [2][1860/1928]	Time 0.946 (1.317)	Data 0.040 (0.041)	Loss 1564.6898 (296.1458)	
Epoch: [2][1890/1928]	Time 1.691 (1.317)	Data 0.051 (0.041)	Loss 78.3446 (299.5637)	
Epoch: [2][1920/1928]	Time 1.180 (1.318)	Data 0.026 (0.041)	Loss 118.9031 (298.1508)	
begin test
 * MAE 233.536 
 * best MAE 233.536 
epoch 3, processed 5784 samples, lr 0.0000001000
Epoch: [3][0/1928]	Time 0.212 (0.212)	Data 0.025 (0.025)	Loss 141.7271 (141.7271)	
Epoch: [3][30/1928]	Time 0.985 (1.446)	Data 0.045 (0.041)	Loss 128.8209 (206.4163)	
Epoch: [3][60/1928]	Time 2.000 (1.402)	Data 0.048 (0.042)	Loss 18.3737 (266.6182)	
Epoch: [3][90/1928]	Time 1.088 (1.425)	Data 0.061 (0.042)	Loss 160.2893 (252.2751)	
Epoch: [3][120/1928]	Time 0.999 (1.411)	Data 0.025 (0