# Set up enviroment

__Define paths & import libraries__

In [7]:
import os, sys
print(os.getcwd())
from pathlib import Path
ROOT_PATH = Path('../../paper_clones/TBPS-CLIP').resolve()
sys.path.append(str(ROOT_PATH))
IMAGE_PATH = Path('../../DATASET').resolve()
sys.path.append(str(IMAGE_PATH))

/home/jovyan/workspace/BA-PRE_THESIS/REPORT/training


In [8]:
import os
from torchinfo import summary
import random
import wandb
import time, datetime
from pathlib import Path
import torch
from misc.build import load_checkpoint, cosine_scheduler, build_optimizer
from misc.data import build_pedes_data
from misc.eval import test
from misc.utils import parse_config, init_distributed_mode, set_seed, is_master, is_using_distributed, AverageMeter
from model.tbps_model import clip_vitb
from options import get_args
import nltk; nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /home/jovyan/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

__Load config__

In [9]:
config_path = ROOT_PATH/'config/config.yaml'
config = parse_config(config_path)

# Set training config

In [10]:
set_seed(config)
config['log']['print_period'] = 1
config['model']['checkpoint'] = ROOT_PATH/'checkpoint/ViT-B-16.pt'
config['anno_dir'] = ROOT_PATH/'annotation/CUHK-PEDES'
config['image_dir'] = IMAGE_PATH/'CUHK-PEDES/imgs'
config['device'] = 'cuda'
config['model']['use_gather'] = False
config['data']['batch_size'] = 16
config['model']['saved_path'] = ROOT_PATH/"checkpoint"
config

{'device': 'cuda',
 'misc': {'seed': 1},
 'experiment': {'input_resolution': [224, 224],
  'simclr_mlp': [512, 128, 512],
  'simclr_temperature': 0.1,
  'dropout': 0.05,
  'eda_alpha': 0.05,
  'back_trans': True,
  'backtrans_p': 0.1,
  'text_length': 77,
  'mixgen': False,
  'mixgen_type': 'cat',
  'mixgen_p': 0.1,
  'mixgen_ratio': 0.1,
  'mvs_image': True,
  'nitc_ratio': 1.0,
  'ss': True,
  'ss_ratio': 0.4,
  'ritc': True,
  'ritc_eps': 0.01,
  'ritc_ratio': 1.0,
  'mlm': False,
  'mlm_ratio': 1.0,
  'cmt_depth': 4,
  'citc': True,
  'citc_lambda1': 0.25,
  'citc_lambda2': 0.25,
  'citc_ratio': 0.1,
  'id': False,
  'id_ratio': 1.0},
 'schedule': {'lr': 0.0001,
  'epoch': 5,
  'epoch_warmup': 1,
  'lr_start': 1e-06,
  'lr_end': 5e-06,
  'weight_decay': 0.02,
  'betas': [0.9, 0.98],
  'eps': 1e-08},
 'model': {'ckpt_type': 'original_clip',
  'saved_path': PosixPath('/home/jovyan/workspace/BA-PRE_THESIS/paper_clones/TBPS-CLIP/checkpoint'),
  'checkpoint': PosixPath('/home/jovyan/wor

In [11]:
meters = {
    "loss": AverageMeter(),
    "nitc_loss": AverageMeter(),
    "ss_loss": AverageMeter(),
    "citc_loss": AverageMeter(),
    "ritc_loss": AverageMeter(),
    "mlm_loss": AverageMeter(),
    "id_loss": AverageMeter(),
}
best_rank_1 = 0.0
best_epoch = 0

__Build dataloader__

In [12]:
dataloader = build_pedes_data(config)
train_loader = dataloader['train_loader']
test_loader = dataloader['test_loader']
num_classes = len(train_loader.dataset.person2text)

__Build model__

In [13]:
model = clip_vitb(config, num_classes).to('cuda')
model, load_result = load_checkpoint(model, config)

__Build opimizer, learning scchduler, gradient scaler__

In [14]:
config.schedule.niter_per_ep = len(train_loader)
lr_schedule = cosine_scheduler(config)
optimizer = build_optimizer(config, model)
scaler = torch.cuda.amp.GradScaler()
type(lr_schedule), type(optimizer), type(scaler)

(numpy.ndarray, torch.optim.adamw.AdamW, torch.cuda.amp.grad_scaler.GradScaler)

# Set up logger wandb

In [15]:
os.environ['WANDB_NOTEBOOK_NAME'] = 'TBPS-CLIP_training.ipynb'
wandb.login()

[34m[1mwandb[0m: Currently logged in as: [33mdung-vo20csehcmut[0m ([33mtuandung[0m). Use [1m`wandb login --relogin`[0m to force relogin


True

In [16]:
wandb.finish()
run = wandb.init(
    project="TBPS-CLIP_experiment_9_11",
    config=config,
    name="training_" + '1',
)

# Training loop

In [None]:
it = 0
logger = run
for epoch in range(config.schedule.epoch):
    start_time = time.time()
    for meter in meters.values():
        meter.reset()
    model.train()
    for i, batch in enumerate(train_loader):
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr_schedule[it] * param_group['ratio']
        if epoch == 0:
            alpha = config.model.softlabel_ratio * \
                min(1.0, i / len(train_loader))
        else:
            alpha = config.model.softlabel_ratio

        with torch.autocast(device_type='cuda'):
            ret = model(batch, alpha)
            loss = sum([v for k, v in ret.items() if "loss" in k])
        batch_size = batch['image'].shape[0]
        meters['loss'].update(loss.item(), batch_size)
        meters['nitc_loss'].update(ret.get('nitc_loss', 0), batch_size)
        meters['ss_loss'].update(ret.get('ss_loss', 0), batch_size)
        meters['citc_loss'].update(ret.get('citc_loss', 0), batch_size)
        meters['ritc_loss'].update(ret.get('ritc_loss', 0), batch_size)
        meters['mlm_loss'].update(ret.get('mlm_loss', 0), batch_size)
        meters['id_loss'].update(ret.get('id_loss', 0), batch_size)
        batch_size = batch['image'].shape[0]
        logger.log({
            'epoch': epoch, 
            'step': i,
            **{k: v.avg for k, v in meters.items()}
        })       

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        model.zero_grad()
        optimizer.zero_grad()
        it += 1

        if (i + 1) % config.log.print_period == 0:
            info_str = f"Epoch[{epoch + 1}] Iteration[{i + 1}/{len(train_loader)}]"
            # log loss
            for k, v in meters.items():
                if v.val != 0:
                    info_str += f", {k}: {v.val:.4f}"
            info_str += f", Base Lr: {param_group['lr']:.2e}"
            print(info_str)

    end_time = time.time()
    time_per_batch = (end_time - start_time) / (i + 1)
    print("Epoch {} done. Time per batch: {:.3f}[s] Speed: {:.1f}[samples/s]"
          .format(epoch + 1, time_per_batch, train_loader.batch_size / time_per_batch))

    eval_result = test(
        model.module, dataloader['test_loader'], 77, config.device)
    rank_1, rank_5, rank_10, map = eval_result['r1'], eval_result['r5'], eval_result['r10'], eval_result['mAP']
    logger.log({
        'epoch': epoch, 
        'rank_1': rank_1,
        'rank_5': rank_5,
        'rank_10': rank_10,
        'mAP': map,
        'epoch_time': time_per_batch,
    })

    print('Acc@1 {top1:.5f} Acc@5 {top5:.5f} Acc@10 {top10:.5f} mAP {mAP:.5f}'.format(top1=rank_1, top5=rank_5,
                                                                                      top10=rank_10, mAP=map))
    torch.cuda.empty_cache()
    if best_rank_1 < rank_1:
        best_rank_1 = rank_1
        best_epoch = epoch

        save_obj = {
            'model': model.module.state_dict(),
            'optimizer': optimizer.state_dict(),
            'config': config,
        }
        torch.save(save_obj, os.path.join(
            config.model.saved_path, 'checkpoint_best_9_11.pth'))

print(f"best Acc@1: {best_rank_1} at epoch {best_epoch + 1}")

Epoch[1] Iteration[1/4257], loss: 4.5276, nitc_loss: 1.7859, ss_loss: 1.2375, citc_loss: 0.0013, ritc_loss: 1.5028, Base Lr: 1.00e-06
Epoch[1] Iteration[2/4257], loss: 5.1933, nitc_loss: 1.9361, ss_loss: 1.2482, citc_loss: 0.0008, ritc_loss: 2.0082, Base Lr: 1.02e-06
Epoch[1] Iteration[3/4257], loss: 4.8473, nitc_loss: 1.9373, ss_loss: 1.2707, citc_loss: 0.0005, ritc_loss: 1.6388, Base Lr: 1.05e-06
Epoch[1] Iteration[4/4257], loss: 6.1631, nitc_loss: 2.7113, ss_loss: 1.2266, citc_loss: 0.0009, ritc_loss: 2.2244, Base Lr: 1.07e-06
Epoch[1] Iteration[5/4257], loss: 6.0129, nitc_loss: 2.5648, ss_loss: 1.2692, citc_loss: 0.0006, ritc_loss: 2.1783, Base Lr: 1.09e-06
Epoch[1] Iteration[6/4257], loss: 5.2094, nitc_loss: 2.0971, ss_loss: 1.2224, citc_loss: 0.0007, ritc_loss: 1.8891, Base Lr: 1.12e-06
Epoch[1] Iteration[7/4257], loss: 5.5480, nitc_loss: 2.1795, ss_loss: 1.2441, citc_loss: 0.0013, ritc_loss: 2.1231, Base Lr: 1.14e-06
Epoch[1] Iteration[8/4257], loss: 4.7426, nitc_loss: 1.8080, s