In [11]:
import os

import torch
from option.option import parse
from data import create_dataset, create_dataloader
from model import define_C
from solver import create_solver
from util.metric import MetricMeter

In [18]:
opt = parse('./option/train/train_dense161.json')

export CUDA_VISIBLE_DEVICES=0


In [19]:
for phase, dataset_opt in opt['datasets'].items():
    if phase == 'train':
        train_set = create_dataset(dataset_opt)
        train_loader = create_dataloader(train_set, dataset_opt)
    elif phase == 'val':
        val_set = create_dataset(dataset_opt)
        val_loader = create_dataloader(val_set, dataset_opt)
    else:
        raise NotImplementedError('Dataset phase %s is not implemented!' % (phase))

In [21]:
if opt['solver']['balance_sample']:
    pos_weight = torch.zeros(20).float()
    for info in train_set.info_list:
        pos_weight[info[1]] += 1
    pos_weight = (len(train_set) - pos_weight) / pos_weight
    opt['solver']['pos_weight'] = pos_weight
    print('===> Using sample balance, weights are')
    print(pos_weight)

===> Using sample balance, weights are
tensor([ 2.0275, 15.4964,  8.8093, 30.4253,  7.1323, 25.0112, 27.0040, 16.0639,
        19.1304, 23.6277, 21.7705,  9.1535, 21.7705, 17.9754, 14.3991,  7.5424,
        15.3797, 19.4265, 14.4677, 18.5634])


In [15]:
pos_weight

tensor([ 0.7130, 25.4694, 14.5100, 50.0660, 12.1864, 41.6220, 44.2971, 27.4895,
        31.5105, 39.5468, 36.0753, 15.3535, 35.5743, 29.1560, 22.9513, 12.6692,
        25.5343, 32.0061, 24.0602, 30.8412])

In [4]:
if opt['use_tb_logger']:
    from tensorboardX import SummaryWriter
    tb_logger = SummaryWriter(log_dir=opt['path']['tb_logger'])
    print('===> tensorboardX logger created, log to %s' %
          (opt['path']['tb_logger']))

===> tensorboardX logger created, log to /home/tsinghuaee01/projects/Object-Classification/tb_logger/densenet161_object_classification


In [22]:
solver = create_solver(opt)
model_name = opt['network_C']['which_model_C'].upper()

===> Network Summary

Network structure: [DataParallel - densenet161], with parameters: [26,516,180]
===> Solver Initialized : [OCSolver] || Use GPU : [True]
optimizer_C:  Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    eps: 1e-08
    initial_lr: 0.0001
    lr: 0.0001
    weight_decay: 0.0001
)
lr_scheduler milestones: [10, 15, 18]   gamma: 0.500000


In [7]:

print('===> Start Train')
print("==================================================")

solver_log = solver.get_current_log()

NUM_EPOCH = int(opt['solver']['epoch'])
start_epoch = solver_log['epoch']
current_step = solver_log['step']

print("Method: %s || Epoch Range: (%d ~ %d) || Start Step: %d" %
      (model_name, start_epoch, NUM_EPOCH, current_step))

===> Start Train
Method: DENSENET161 || Epoch Range: (1 ~ 20)


In [None]:
    metric_meter = MetricMeter(class_num=20)
    for epoch in range(start_epoch, NUM_EPOCH + 1):
        metric_meter.reset()
        print('\n===> Training Epoch: [%d/%d]...  Learning Rate: %f' %
              (epoch, NUM_EPOCH, solver.get_current_learning_rate()))

        # Initialization
        solver_log['epoch'] = epoch

        # Train model
        train_loss_dict = {}
        val_loss_dict = {}
        for k in solver_log['records'].keys():
            if k.startswith('train'):  # 'train_loss_pixel'
                train_loss_dict[k[6:]] = []
            elif k.startswith('val'):
                val_loss_dict[k[4:]] = []
                
        with tqdm(
                total=len(train_loader),
                desc='Epoch: [%d/%d]' % (epoch, NUM_EPOCH),
                miniters=1) as t:
            for iter, batch in enumerate(train_loader):
                current_step += 1
                solver.feed_data(batch)
                iter_loss = solver.train_step()
                batch_size = batch['img'].size(0)
                for k, v in iter_loss.items():
                    train_loss_dict[k].append(v * batch_size)
                if opt['use_tb_logger']:
                    if current_step % opt['logger']['print_freq'] == 0:
                        for k, v in iter_loss.items():
                            tb_logger.add_scalar('train_' + k, v, current_step)

                t.set_postfix_str("Batch Loss: %.4f" % iter_loss['loss_total'])
                t.update()

        for k, v in train_loss_dict.items():
            solver_log['records']['train_' + k].append(sum(v) / len(v))
        solver_log['records']['lr'].append(solver.get_current_learning_rate())

        print(
            '\nEpoch: [%d/%d]   Avg Train Loss: %.6f' %
            (epoch, NUM_EPOCH, solver_log['records']['train_loss_total'][-1]))

        print('===> Validating...', )
        
        for iter, batch in enumerate(val_loader):
            solver.feed_data(batch)
            iter_loss = solver.test()
            for k, v in iter_loss.items():
                val_loss_dict[k].append(v)
            metric_meter.add(solver.predict, solver.target)
            
        for k, v in val_loss_dict.items():
            solver_log['records']['val_' + k].append(sum(v) / len(v))
        metric_value = metric_meter.value()
        for k, v in metric_value.items():
            solver_log['records'][k].append(v)
            
        if opt['use_tb_logger']:
            for k, v in iter_loss.items():
                tb_logger.add_scalar('val_' + k, v, current_step)
            for k, v in metric_value.items():
                tb_logger.add_scalar('val_' + k, v, current_step)
                
        # record the best epoch
        epoch_is_best = False
        if solver_log['best_mAP'] < metric_value['mAP']:
            solver_log['best_mAP'] = metric_value['mAP']
            epoch_is_best = True
            solver_log['best_epoch'] = epoch

        print(
            "[%s] mAP: %.2f   mAcc: %.4f   wAcc: %.4f Loss: %.6f   Best mAP: %.2f in Epoch: [%d]"
            % (val_set.name(), metric_value['mAP'],
               metric_value['mAcc'], metric_value['wAcc'],
               solver_log['records']['val_loss_total'][-1],
               solver_log['best_mAP'], solver_log['best_epoch']))

        solver.set_current_log(solver_log)
        solver.save_checkpoint(epoch, epoch_is_best)
        solver.save_current_log()

        # update lr
        solver.update_learning_rate(epoch)

    print('===> Finished !')

In [20]:
sample = next(it)
solver.feed_data(sample)
solver.train_step()

{'loss_c': tensor(0.3805, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>)}

In [8]:
s = 'afas'

In [9]:
s.startswith('a')

True