In [1]:
import os

import os.path as op
import torch
import numpy as np
import random
import time

from datasets import build_dataloader
from processor.processor import do_train
from utils.checkpoint import Checkpointer
from utils.iotools import save_train_configs
from utils.logger import setup_logger
from solver import build_optimizer, build_lr_scheduler
from model import build_model
from utils.metrics import Evaluator
from utils.options import get_args
from utils.comm import get_rank, synchronize

In [2]:
def set_seed(seed=0):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True


In [3]:
import argparse

def get_temp_args():
    parser = argparse.ArgumentParser(description="IRRA Args")
    ######################## general settings ########################
    parser.add_argument("--local_rank", default=0, type=int)
    parser.add_argument("--name", default="baseline", help="experiment name to save")
    parser.add_argument("--output_dir", default="logs")
    parser.add_argument("--log_period", default=100)
    parser.add_argument("--eval_period", default=1)
    parser.add_argument("--val_dataset", default="test") # use val set when evaluate, if test use test set
    parser.add_argument("--resume", default=False, action='store_true')
    parser.add_argument("--resume_ckpt_file", default="", help='resume from ...')

    ######################## model general settings ########################
    parser.add_argument("--pretrain_choice", default='ViT-B/16') # whether use pretrained model
    parser.add_argument("--temperature", type=float, default=0.02, help="initial temperature value, if 0, don't use temperature")
    parser.add_argument("--img_aug", default=False, action='store_true')

    ## cross modal transfomer setting
    parser.add_argument("--cmt_depth", type=int, default=4, help="cross modal transformer self attn layers")
    parser.add_argument("--masked_token_rate", type=float, default=0.8, help="masked token rate for mlm task")
    parser.add_argument("--masked_token_unchanged_rate", type=float, default=0.1, help="masked token unchanged rate")
    parser.add_argument("--lr_factor", type=float, default=5.0, help="lr factor for random init self implement module")
    parser.add_argument("--MLM", default=False, action='store_true', help="whether to use Mask Language Modeling dataset")

    ######################## loss settings ########################
    parser.add_argument("--loss_names", default='sdm+id+mlm', help="which loss to use ['mlm', 'cmpm', 'id', 'itc', 'sdm']")
    parser.add_argument("--mlm_loss_weight", type=float, default=1.0, help="mlm loss weight")
    parser.add_argument("--id_loss_weight", type=float, default=1.0, help="id loss weight")
    
    ######################## vison trainsformer settings ########################
    parser.add_argument("--img_size", type=tuple, default=(384, 128))
    parser.add_argument("--stride_size", type=int, default=16)

    ######################## text transformer settings ########################
    parser.add_argument("--text_length", type=int, default=77)
    parser.add_argument("--vocab_size", type=int, default=49408)

    ######################## solver ########################
    parser.add_argument("--optimizer", type=str, default="Adam", help="[SGD, Adam, Adamw]")
    parser.add_argument("--lr", type=float, default=1e-5)
    parser.add_argument("--bias_lr_factor", type=float, default=2.)
    parser.add_argument("--momentum", type=float, default=0.9)
    parser.add_argument("--weight_decay", type=float, default=4e-5)
    parser.add_argument("--weight_decay_bias", type=float, default=0.)
    parser.add_argument("--alpha", type=float, default=0.9)
    parser.add_argument("--beta", type=float, default=0.999)
    
    ######################## scheduler ########################
    parser.add_argument("--num_epoch", type=int, default=60)
    parser.add_argument("--milestones", type=int, nargs='+', default=(20, 50))
    parser.add_argument("--gamma", type=float, default=0.1)
    parser.add_argument("--warmup_factor", type=float, default=0.1)
    parser.add_argument("--warmup_epochs", type=int, default=5)
    parser.add_argument("--warmup_method", type=str, default="linear")
    parser.add_argument("--lrscheduler", type=str, default="cosine")
    parser.add_argument("--target_lr", type=float, default=0)
    parser.add_argument("--power", type=float, default=0.9)

    ######################## dataset ########################
    parser.add_argument("--dataset_name", default="CUHK-PEDES", help="[CUHK-PEDES, ICFG-PEDES, RSTPReid]")
    parser.add_argument("--sampler", default="random", help="choose sampler from [idtentity, random]")
    parser.add_argument("--num_instance", type=int, default=4)
    parser.add_argument("--root_dir", default="./data")
    parser.add_argument("--batch_size", type=int, default=128)
    parser.add_argument("--test_batch_size", type=int, default=512)
    parser.add_argument("--num_workers", type=int, default=8)
    parser.add_argument("--test", dest='training', default=True, action='store_false')

    return parser

In [4]:
parser = get_temp_args()

In [5]:
args = parser.parse_args(args=[
    "--name","irra",
    "--img_aug","--itx+proj+fusion",
    "--batch_size","6",
    "--loss_names","itc",
    "--dataset_name","AGTBPR",
    "--root_dir",r"F:\Datasets\AG-ReID.v1",
    "--num_epoch","60",
])

usage: ipykernel_launcher.py [-h] [--local_rank LOCAL_RANK] [--name NAME] [--output_dir OUTPUT_DIR]
                             [--log_period LOG_PERIOD] [--eval_period EVAL_PERIOD] [--val_dataset VAL_DATASET]
                             [--resume] [--resume_ckpt_file RESUME_CKPT_FILE] [--pretrain_choice PRETRAIN_CHOICE]
                             [--temperature TEMPERATURE] [--img_aug] [--cmt_depth CMT_DEPTH]
                             [--masked_token_rate MASKED_TOKEN_RATE]
                             [--masked_token_unchanged_rate MASKED_TOKEN_UNCHANGED_RATE] [--lr_factor LR_FACTOR]
                             [--MLM] [--loss_names LOSS_NAMES] [--mlm_loss_weight MLM_LOSS_WEIGHT]
                             [--id_loss_weight ID_LOSS_WEIGHT] [--img_size IMG_SIZE] [--stride_size STRIDE_SIZE]
                             [--text_length TEXT_LENGTH] [--vocab_size VOCAB_SIZE] [--optimizer OPTIMIZER] [--lr LR]
                             [--bias_lr_factor BIAS_LR_FACTOR] [--momen

SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [6]:
train_loader, val_img_loader, val_txt_loader, num_classes = build_dataloader(args)

In [9]:
for i in train_loader:
    break

i

{'mlm_ids': tensor([[49406,   518, 49405,  ...,     0,     0,     0],
         [49406,   518, 18256,  ...,     0,     0,     0],
         [49406,   518, 18256,  ...,     0,     0,     0],
         ...,
         [49406,   518, 18256,  ...,     0,     0,     0],
         [49406,   518, 18256,  ...,     0,     0,     0],
         [49406, 40792, 18256,  ...,     0,     0,     0]]),
 'mlm_labels': tensor([[    0,     0, 18256,  ...,     0,     0,     0],
         [    0,     0,     0,  ...,     0,     0,     0],
         [    0,     0,     0,  ...,     0,     0,     0],
         ...,
         [    0,     0,     0,  ...,     0,     0,     0],
         [    0,     0,     0,  ...,     0,     0,     0],
         [    0,   518,     0,  ...,     0,     0,     0]]),
 'caption_ids': tensor([[49406,   518, 49405,  ...,     0,     0,     0],
         [49406,   518, 18256,  ...,     0,     0,     0],
         [49406,   518, 18256,  ...,     0,     0,     0],
         ...,
         [49406,   518, 18256

In [10]:
for i in val_img_loader:
    break

In [11]:
i

[tensor([146, 146, 146, 146, 146, 146,  30,  30,  30,  30,  30,  30, 111, 111,
         111, 111]),
 tensor([[[[ 0.2661,  0.2807,  0.3099,  ...,  0.7041,  0.7333,  0.8063],
           [ 0.2369,  0.2807,  0.3099,  ...,  0.6603,  0.6019,  0.6603],
           [ 0.2807,  0.2953,  0.2807,  ...,  0.7041,  0.6603,  0.6603],
           ...,
           [ 0.4997,  0.4559,  0.3391,  ...,  0.4121,  0.3537,  0.2807],
           [ 0.3099,  0.2953,  0.2661,  ...,  0.5289,  0.4851,  0.4851],
           [ 0.1785,  0.1785,  0.1931,  ...,  0.6165,  0.6165,  0.6165]],
 
          [[ 0.3490,  0.3640,  0.3940,  ...,  0.6942,  0.7242,  0.7992],
           [ 0.3190,  0.3640,  0.3940,  ...,  0.6491,  0.5891,  0.6491],
           [ 0.3640,  0.3790,  0.3640,  ...,  0.6942,  0.6491,  0.6491],
           ...,
           [ 0.5141,  0.4691,  0.3490,  ...,  0.5741,  0.5141,  0.4390],
           [ 0.3190,  0.3040,  0.2740,  ...,  0.6942,  0.6491,  0.6491],
           [ 0.1839,  0.1839,  0.1989,  ...,  0.8142,  0.8142,

In [7]:
len(train_loader.dataset.dataset)

8154

In [8]:
train_loader.dataset.dataset[0]

(8,
 0,
 'F:\\Datasets\\AG-ReID.v1\\AG-ReID\\bounding_box_train\\P0001T04041A0C0F121.jpg',
 'F:\\Datasets\\AG-ReID.v1\\AG-ReID\\bounding_box_train\\P0001T04041A0C3F10021.jpg',
 'The pedestrian in the image appears to be a male with long hair, wearing a dark-colored hoodie with some text on it. He is also wearing blue jeans and white shoes. He seems to be carrying a white bag or purse in his right hand.')

# Debug Dataset in detail

In [10]:
from datasets.build import __factory

In [11]:
import glob
import re
import mat4py
import pandas as pd
import torch
import logging

import json

import os.path as osp

from prettytable import PrettyTable


In [12]:
dataset_dir = 'AG-ReID'

In [13]:
args.root_dir

'F:\\Datasets\\AG-ReID.v1'

In [14]:
dataset_dir = osp.join(args.root_dir, dataset_dir)

In [15]:
train_dir = osp.join(dataset_dir, 'bounding_box_train')

In [16]:
query_dir = osp.join(dataset_dir, 'query_all_c0')
gallery_dir = osp.join(dataset_dir, 'bounding_box_test_all_c3')

In [14]:
dataset_dir

'F:\\Datasets\\AG-ReID.v1\\AG-ReID'

In [17]:
with open(osp.join(dataset_dir,"agtbpr_text.json"),"r") as f:
    data = json.load(f)

text_captions = {list(d.keys())[0]:list(d.values())[0] for d in data}

In [18]:
cid_range = [0] # 0 means the pic is getted by Aerials; 3 means Cameras.

pid_list = []
pid_container = set()    
image_id = 0
dataset = []
image_pids = []
image_paths = []
caption_pids = []
captions = []
camera_ids = []
gnd_img_paths = []

In [25]:
is_train=True

In [26]:
if is_train:
    dir_path = train_dir
else:
    dir_path = [query_dir,gallery_dir]

In [27]:
if is_train:
    img_paths = glob.glob(osp.join(dir_path, '*.jpg'))
else:
    img_paths = glob.glob(osp.join(dir_path[0], '*.jpg')) + glob.glob(osp.join(dir_path[1], '*.jpg'))

pattern_pid = re.compile(r'P([-\d]+)T([-\d]+)A([-\d]+)')
pattern_camid = re.compile(r'C([-\d]+)F([-\d]+)')

In [28]:
# rerange pids
for img_path in img_paths:
    fname = osp.split(img_path)[-1]
    pid_part1, pid_part2, pid_part3 = pattern_pid.search(fname).groups()
    pid = int(pid_part1 + pid_part2 + pid_part3)
    pid_list.append(pid)

In [29]:
# set Aerial-camera pair
pid_set = set(pid_list)
pid_key = {pid:key for key,pid in enumerate(pid_set)}
pid_dict = {key:[] for key in range(len(pid_key))}
pid_dict_ptr = {key:0 for key in range(len(pid_key))}
for img_path in img_paths:
    fname = osp.split(img_path)[-1]
    pid_part1, pid_part2, pid_part3 = pattern_pid.search(fname).groups()
    pid = int(pid_part1 + pid_part2 + pid_part3)
    pid = pid_key[pid]
    camid, frameid = pattern_camid.search(fname).groups()
    camid = int(camid)
    # purn the Camera images
    if camid not in cid_range:
        pid_dict[pid].append(img_path)

In [30]:
# process data
for img_path in img_paths:
    fname = osp.split(img_path)[-1]
    camid, frameid = pattern_camid.search(fname).groups()
    camid = int(camid)
    if camid not in cid_range:
        continue

    pid_part1, pid_part2, pid_part3 = pattern_pid.search(fname).groups()
    pid = int(pid_part1 + pid_part2 + pid_part3)
    pid = pid_key[pid]

    pid_container.add(pid)

    dir_path_tmp, fname_path_tmp = osp.split(img_path)
    fname_path_tmp = osp.splitext(fname_path_tmp)[0]
    _, dir_path_tmp = osp.split(dir_path_tmp)
    key = dir_path_tmp+"_"+fname_path_tmp

    gnd_img_name = pid_dict[pid][pid_dict_ptr[pid]]
    pid_dict_ptr[pid] = (pid_dict_ptr[pid]+1) % len(pid_dict[pid])
    if is_train:
        dataset.append((pid, image_id, img_path, gnd_img_name, text_captions[key]))
        image_id += 1

In [32]:
dataset[0]

TypeError: 'AG_ReID' object is not subscriptable

In [31]:
dataset = __factory[args.dataset_name](root=args.root_dir,name = args.dataset_name)

In [23]:
len(pid_dict)

{0: ['F:\\Datasets\\AG-ReID.v1\\AG-ReID\\bounding_box_train\\P0271T04051A2C3F2281.jpg',
  'F:\\Datasets\\AG-ReID.v1\\AG-ReID\\bounding_box_train\\P0271T04051A2C3F2311.jpg',
  'F:\\Datasets\\AG-ReID.v1\\AG-ReID\\bounding_box_train\\P0271T04051A2C3F2341.jpg',
  'F:\\Datasets\\AG-ReID.v1\\AG-ReID\\bounding_box_train\\P0271T04051A2C3F2371.jpg',
  'F:\\Datasets\\AG-ReID.v1\\AG-ReID\\bounding_box_train\\P0271T04051A2C3F2401.jpg',
  'F:\\Datasets\\AG-ReID.v1\\AG-ReID\\bounding_box_train\\P0271T04051A2C3F2461.jpg',
  'F:\\Datasets\\AG-ReID.v1\\AG-ReID\\bounding_box_train\\P0271T04051A2C3F2521.jpg',
  'F:\\Datasets\\AG-ReID.v1\\AG-ReID\\bounding_box_train\\P0271T04051A2C3F3151.jpg',
  'F:\\Datasets\\AG-ReID.v1\\AG-ReID\\bounding_box_train\\P0271T04051A2C3F3181.jpg',
  'F:\\Datasets\\AG-ReID.v1\\AG-ReID\\bounding_box_train\\P0271T04051A2C3F3211.jpg',
  'F:\\Datasets\\AG-ReID.v1\\AG-ReID\\bounding_box_train\\P0271T04051A2C3F3241.jpg',
  'F:\\Datasets\\AG-ReID.v1\\AG-ReID\\bounding_box_train\\P027

In [34]:
dataset.test

{'image_pids': [146,
  146,
  146,
  146,
  146,
  146,
  30,
  30,
  30,
  30,
  30,
  30,
  111,
  111,
  111,
  111,
  111,
  111,
  125,
  125,
  125,
  125,
  125,
  125,
  7,
  7,
  7,
  7,
  7,
  7,
  92,
  92,
  92,
  92,
  92,
  92,
  120,
  120,
  120,
  120,
  120,
  120,
  98,
  98,
  98,
  98,
  98,
  98,
  121,
  121,
  121,
  121,
  121,
  121,
  126,
  126,
  126,
  126,
  126,
  126,
  175,
  175,
  175,
  175,
  175,
  175,
  59,
  59,
  59,
  59,
  59,
  59,
  27,
  27,
  27,
  27,
  27,
  27,
  49,
  49,
  49,
  49,
  49,
  49,
  85,
  85,
  85,
  85,
  85,
  85,
  187,
  187,
  187,
  187,
  187,
  187,
  145,
  145,
  145,
  145,
  145,
  145,
  23,
  23,
  23,
  23,
  23,
  23,
  24,
  24,
  24,
  24,
  24,
  24,
  31,
  31,
  31,
  31,
  31,
  31,
  93,
  93,
  93,
  93,
  93,
  93,
  47,
  47,
  47,
  47,
  47,
  47,
  70,
  70,
  70,
  70,
  70,
  70,
  127,
  127,
  127,
  127,
  127,
  127,
  25,
  25,
  25,
  25,
  25,
  25,
  9,
  9,
  9,
  9,
  9,
  9,
  

In [19]:
dataset.train[0]

(8,
 0,
 'F:\\Datasets\\AG-ReID.v1\\AG-ReID\\bounding_box_train\\P0001T04041A0C0F121.jpg',
 'The pedestrian in the image appears to be a male with long hair, wearing a dark-colored hoodie with some text on it. He is also wearing blue jeans and white shoes. He seems to be carrying a white bag or purse in his right hand.')

In [20]:
pid_dict = {key:[] for key in list(set([i[0] for i in dataset.train]))}

In [21]:
for item in dataset.train:
    pid_dict[item]

{0: [],
 1: [],
 2: [],
 3: [],
 4: [],
 5: [],
 6: [],
 7: [],
 8: [],
 9: [],
 10: [],
 11: [],
 12: [],
 13: [],
 14: [],
 15: [],
 16: [],
 17: [],
 18: [],
 19: [],
 20: [],
 21: [],
 22: [],
 23: [],
 24: [],
 25: [],
 26: [],
 27: [],
 28: [],
 29: [],
 30: [],
 31: [],
 32: [],
 33: [],
 34: [],
 35: [],
 36: [],
 37: [],
 38: [],
 39: [],
 40: [],
 41: [],
 42: [],
 43: [],
 44: [],
 45: [],
 46: [],
 47: [],
 48: [],
 49: [],
 50: [],
 51: [],
 52: [],
 53: [],
 54: [],
 55: [],
 56: [],
 57: [],
 58: [],
 59: [],
 60: [],
 61: [],
 62: [],
 63: [],
 64: [],
 65: [],
 66: [],
 67: [],
 68: [],
 69: [],
 70: [],
 71: [],
 72: [],
 73: [],
 74: [],
 75: [],
 76: [],
 77: [],
 78: [],
 79: [],
 80: [],
 81: [],
 82: [],
 83: [],
 84: [],
 85: [],
 86: [],
 87: [],
 88: [],
 89: [],
 90: [],
 91: [],
 92: [],
 93: [],
 94: [],
 95: [],
 96: [],
 97: [],
 98: [],
 99: [],
 100: [],
 101: [],
 102: [],
 103: [],
 104: [],
 105: [],
 106: [],
 107: [],
 108: [],
 109: [],
 110: [],
