In [1]:
import os

import os.path as op
import torch
import numpy as np
import random
import time

from datasets import build_dataloader
from processor.processor import do_train
from utils.checkpoint import Checkpointer
from utils.iotools import save_train_configs
from utils.logger import setup_logger
from solver import build_optimizer, build_lr_scheduler
from model import build_model
from utils.metrics import Evaluator
from utils.options import get_args
from utils.comm import get_rank, synchronize


In [2]:
def set_seed(seed=0):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True


In [3]:
import argparse

def get_temp_args():
    parser = argparse.ArgumentParser(description="IRRA Args")
    ######################## general settings ########################
    parser.add_argument("--local_rank", default=0, type=int)
    parser.add_argument("--name", default="baseline", help="experiment name to save")
    parser.add_argument("--output_dir", default="logs")
    parser.add_argument("--log_period", default=100)
    parser.add_argument("--eval_period", default=1)
    parser.add_argument("--val_dataset", default="test") # use val set when evaluate, if test use test set
    parser.add_argument("--resume", default=False, action='store_true')
    parser.add_argument("--resume_ckpt_file", default="", help='resume from ...')

    ######################## model general settings ########################
    parser.add_argument("--pretrain_choice", default='ViT-B/16') # whether use pretrained model
    parser.add_argument("--temperature", type=float, default=0.02, help="initial temperature value, if 0, don't use temperature")
    parser.add_argument("--img_aug", default=False, action='store_true')

    ## cross modal transfomer setting
    parser.add_argument("--cmt_depth", type=int, default=4, help="cross modal transformer self attn layers")
    parser.add_argument("--masked_token_rate", type=float, default=0.8, help="masked token rate for mlm task")
    parser.add_argument("--masked_token_unchanged_rate", type=float, default=0.1, help="masked token unchanged rate")
    parser.add_argument("--lr_factor", type=float, default=5.0, help="lr factor for random init self implement module")
    parser.add_argument("--MLM", default=False, action='store_true', help="whether to use Mask Language Modeling dataset")

    ######################## loss settings ########################
    parser.add_argument("--loss_names", default='sdm+id+mlm', help="which loss to use ['mlm', 'cmpm', 'id', 'itc', 'sdm']")
    parser.add_argument("--mlm_loss_weight", type=float, default=1.0, help="mlm loss weight")
    parser.add_argument("--id_loss_weight", type=float, default=1.0, help="id loss weight")
    
    ######################## vison trainsformer settings ########################
    parser.add_argument("--img_size", type=tuple, default=(384, 128))
    parser.add_argument("--stride_size", type=int, default=16)

    ######################## text transformer settings ########################
    parser.add_argument("--text_length", type=int, default=77)
    parser.add_argument("--vocab_size", type=int, default=49408)

    ######################## solver ########################
    parser.add_argument("--optimizer", type=str, default="Adam", help="[SGD, Adam, Adamw]")
    parser.add_argument("--lr", type=float, default=1e-5)
    parser.add_argument("--bias_lr_factor", type=float, default=2.)
    parser.add_argument("--momentum", type=float, default=0.9)
    parser.add_argument("--weight_decay", type=float, default=4e-5)
    parser.add_argument("--weight_decay_bias", type=float, default=0.)
    parser.add_argument("--alpha", type=float, default=0.9)
    parser.add_argument("--beta", type=float, default=0.999)
    
    ######################## scheduler ########################
    parser.add_argument("--num_epoch", type=int, default=60)
    parser.add_argument("--milestones", type=int, nargs='+', default=(20, 50))
    parser.add_argument("--gamma", type=float, default=0.1)
    parser.add_argument("--warmup_factor", type=float, default=0.1)
    parser.add_argument("--warmup_epochs", type=int, default=5)
    parser.add_argument("--warmup_method", type=str, default="linear")
    parser.add_argument("--lrscheduler", type=str, default="cosine")
    parser.add_argument("--target_lr", type=float, default=0)
    parser.add_argument("--power", type=float, default=0.9)

    ######################## dataset ########################
    parser.add_argument("--dataset_name", default="CUHK-PEDES", help="[CUHK-PEDES, ICFG-PEDES, RSTPReid]")
    parser.add_argument("--sampler", default="random", help="choose sampler from [idtentity, random]")
    parser.add_argument("--num_instance", type=int, default=4)
    parser.add_argument("--root_dir", default="./data")
    parser.add_argument("--batch_size", type=int, default=128)
    parser.add_argument("--test_batch_size", type=int, default=512)
    parser.add_argument("--num_workers", type=int, default=8)
    parser.add_argument("--test", dest='training', default=True, action='store_false')

    return parser

In [4]:
parser = get_temp_args()

In [5]:
args = parser.parse_args(args=[
    "--name","irra",
    "--img_aug","--MLM",
    "--batch_size","16",
    "--loss_names","sdm+mlm+id",
    "--dataset_name","AGTBPR-g",
    "--root_dir",r"F:\Datasets\AG-ReID.v1",
    "--num_epoch","60",
])

In [6]:
1

1

In [7]:
train_loader, val_img_loader, val_txt_loader, num_classes = build_dataloader(args)

In [9]:
train_loader.dataset.dataset[0]

(8,
 0,
 'F:\\Datasets\\AG-ReID.v1\\AG-ReID\\bounding_box_train\\P0001T04041A0C3F10021.jpg',
 'The pedestrian in the image appears to be a male with short, dark hair. He is wearing a dark-colored shirt, blue jeans, and white shoes. He seems to be in motion, possibly walking or running, and is holding something in his hands, though the exact object is not clearly discernible.')

In [10]:
import os

In [11]:
os.path.split(train_loader.dataset.dataset[0][2])

('F:\\Datasets\\AG-ReID.v1\\AG-ReID\\bounding_box_train',
 'P0001T04041A0C3F10021.jpg')

In [12]:
os.pathsep

';'

In [13]:
os.path.split(os.path.split(train_loader.dataset.dataset[0][2])[0])

('F:\\Datasets\\AG-ReID.v1\\AG-ReID', 'bounding_box_train')

In [10]:
val_txt_loader.dataset.captions[0]

'The pedestrian in the image appears to be a woman with long, dark hair. She is wearing a floral dress with a pattern of various flowers. She has a mask covering her face, and she is carrying a black bag on her right side. Her posture suggests she is walking with a purpose, possibly heading towards or away from something.'

In [17]:
import json

In [21]:
with open(os.path.join(args.root_dir,"AG-ReID","agtbpr_text.json"),"r") as f:
    data = json.load(f)

In [22]:
data

[{'bounding_box_train_P0050T04051A0C0F8221': 'The pedestrian in the image appears to be a woman with long, dark hair. She is wearing a white top and black pants. She has a backpack on her back. Her posture suggests she is walking with a purpose, possibly heading towards or away from the camera.'},
 {'bounding_box_train_P0219T02210A2C3F2221': 'The pedestrian in the image appears to be an older male with a white beard and mustache. He is wearing a dark-colored hat, a dark-colored vest, and dark shorts. He is also carrying a dark-colored bag or pouch in his right hand. His posture is upright, and he seems to be walking forward with a purposeful stride.'},
 {'bounding_box_train_P0148T02210A1C0F24331': 'The pedestrian in the image appears to be a male with long, dark hair. He is wearing a dark-colored jacket or coat, and he seems to be in a seated or crouched position. He is holding a mobile device in his hands and has a bag or backpack beside him.'},
 {'bounding_box_train_P0288T04041A2C0F1

In [29]:
list(data[0].keys())[0]

'bounding_box_train_P0050T04051A0C0F8221'

In [23]:
text_dict = {}

In [30]:
text_dict = {list(d.keys())[0]:list(d.values())[0] for d in data}

In [32]:
text_dict[val_txt_loader.dataset.captions[0]]

'The pedestrian in the image appears to be a woman with long, dark hair. She is wearing a floral dress with a pattern of various flowers. She has a mask covering her face, and she is carrying a black bag on her right side. Her posture suggests she is walking with a purpose, possibly heading towards or away from something.'