# Experimenting with OadTR

this notebook is dedicated to conduct training and evaluation of the OadTR model using the METEOR dataset.

## imports and device specification

In [1]:
import json
import time
import numpy as np
import util as utl
import os
import utils
import datetime
import argparse
import random

import transformer_models
from custom_dataset import METEORDataLayer
from train import train_one_epoch, evaluate
from test import test_one_epoch
import torch.nn as nn
import torch


from torchsummary import summary 

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'selected device is {device}')

ModuleNotFoundError: No module named 'util'

## training

### set up

In [None]:
# set up args --> parser will collapse to default values

def str2bool(string):
    return True if string.lower() == 'true' else False


def get_args_parser():
    parser = argparse.ArgumentParser('Set IDU Online Detector', add_help=False)
    parser.add_argument('--lr', default=1e-4, type=float)     # 1e-4
    parser.add_argument('--batch_size', default=128, type=int)
    parser.add_argument('--weight_decay', default=1e-4, type=float)
    parser.add_argument('--epochs', default=5, type=int)
    parser.add_argument('--resize_feature', default=False, type=str2bool, help='run resize prepare_data or not')
    parser.add_argument('--lr_drop', default=1, type=int)
    parser.add_argument('--clip_max_norm', default=1., type=float,
                        help='gradient clipping max norm')  # dataparallel
    parser.add_argument('--dataparallel', action='store_true', help='multi-gpus for training')
    parser.add_argument('--removelog', action='store_true', help='remove old log')

    # * Network
    parser.add_argument('--version', default='v3', type=str,
                        help="fixed or learned")  # learned  fixed
    # decoder
    parser.add_argument('--query_num', default=8, type=int,
                        help="Number of query_num (prediction)")
    parser.add_argument('--decoder_layers', default=5, type=int,
                        help="Number of decoder_layers")
    parser.add_argument('--decoder_embedding_dim', default=1024, type=int,   # 1024
                        help="decoder_embedding_dim")
    parser.add_argument('--decoder_embedding_dim_out', default=1024, type=int,  # 256 512 1024
                        help="decoder_embedding_dim_out")
    parser.add_argument('--decoder_attn_dropout_rate', default=0.1, type=float,  # 0.1=0.2
                        help="rate of decoder_attn_dropout_rate")
    parser.add_argument('--decoder_num_heads', default=4, type=int,  # 8 4
                        help="decoder_num_heads")
    parser.add_argument('--classification_pred_loss_coef', default=0.5, type=float)  # 0.5

    # encoder
    parser.add_argument('--enc_layers', default=64, type=int,
                        help="Number of enc_layers")
    parser.add_argument('--lr_backbone', default=1e-4, type=float,    # 2e-4
                        help="lr_backbone")
    parser.add_argument('--feature', default='ResNet50', type=str,
                        help="feature type")
    parser.add_argument('--dim_feature', default=2048, type=int,
                        help="input feature dims")
    parser.add_argument('--patch_dim', default=1, type=int,
                        help="input feature dims")
    parser.add_argument('--embedding_dim', default=1024, type=int,  # 1024
                        help="input feature dims")
    parser.add_argument('--num_heads', default=4, type=int,
                        help="input feature dims")
    parser.add_argument('--num_layers', default=3, type=int,
                        help="input feature dims")
    parser.add_argument('--attn_dropout_rate', default=0.1, type=float,
                        help="attn dropout")
    parser.add_argument('--positional_encoding_type', default='learned', type=str,
                        help="fixed or learned")  # learned  fixed

    parser.add_argument('--hidden_dim', default=1024, type=int,  # 512 1024
                        help="Size of the embeddings")
    parser.add_argument('--dropout_rate', default=0.1, type=float,
                        help="Dropout applied ")

    parser.add_argument('--numclass', default=7, type=int,
                        help="Number of class")

    # * Loss coefficients
    parser.add_argument('--classification_x_loss_coef', default=0.3, type=float)
    parser.add_argument('--classification_h_loss_coef', default=1, type=float)
    parser.add_argument('--similar_loss_coef', default=0.1, type=float)   # 0.3
    parser.add_argument('--margin', default=1., type=float)

    # dataset parameters
    parser.add_argument('--dataset_file', type=str, default='../../pvc-meteor/features/data_info_new.json')
    parser.add_argument('--frozen_weights', type=str, default=None)
    '''
    parser.add_argument('--thumos_data_path', type=str, default='/home/dancer/mycode/Temporal.Online.Detection/'
                                                                'Online.TRN.Pytorch/preprocess/')
    parser.add_argument('--thumos_anno_path', type=str, default='data/thumos_{}_anno.pickle')
    '''
    parser.add_argument('--remove_difficult', action='store_true')
    parser.add_argument('--device', default=device,
                        help='device to use for training / testing')

    parser.add_argument('--output_dir', default='models',
                        help='path where to save, empty for no saving')
    parser.add_argument('--seed', default=20, type=int)
    parser.add_argument('--resume', default='', help='resume from checkpoint')
    parser.add_argument('--start_epoch', default=1, type=int, metavar='N',
                        help='start epoch')

    parser.add_argument('--eval', action='store_true')
    parser.add_argument('--num_workers', default=8, type=int)

    # distributed training parameters
    parser.add_argument('--world_size', default=1, type=int,
                        help='number of distributed processes')
    parser.add_argument('--dist_url', default='tcp://127.0.0.1:12342', help='url used to set up distributed training')
    # 'env://'
    return parser

args = get_args_parser().parse_args()

In [None]:
# set up model according to args
model = transformer_models.VisionTransformer_v3(args=args, img_dim=args.enc_layers,   # VisionTransformer_v3
                                                patch_dim=args.patch_dim,
                                                out_dim=args.numclass,
                                                embedding_dim=args.embedding_dim,
                                                num_heads=args.num_heads,
                                                num_layers=args.num_layers,
                                                hidden_dim=args.hidden_dim,
                                                dropout_rate=args.dropout_rate,
                                                attn_dropout_rate=args.attn_dropout_rate,
                                                num_channels=args.dim_feature,
                                                positional_encoding_type=args.positional_encoding_type
                                                )

model.to(device)

summary(model)

In [None]:
# define losses for encoder/decoder
loss_need = [
    'labels_encoder',
    'labels_decoder',
]
criterion = utl.SetCriterion(num_classes=args.numclass, losses=loss_need, args=args).to(device)

# define optiimizer and lr_scheduler
optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay,)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, args.lr_drop)

In [None]:
# load training data
dataset_train = METEORDataLayer(phase='train', args=args)

# define train sampler
sampler_train = torch.utils.data.RandomSampler(dataset_train)

# define data loader
data_loader_train = DataLoader(dataset_train, batch_sampler=batch_sampler_train, pin_memory=True, num_workers=args.num_workers)

### training loop

In [None]:
# set up logger
logger = utl.setup_logger(os.path.join(this_dir, 'log_dist.txt'), command=command)

# save args
for arg in vars(args):
    logger.output_print("{}:{}".format(arg, getattr(args, arg)))

# trainable parameters in model
n_parameters = sum(p.numel() for p in model.parameters() if p.requires_grad)
logger.output_print('number of params: {}'.format(n_parameters))

start_time = time.time()
for epoch in range(args.start_epoch, args.epochs):
    if args.distributed:
        sampler_train.set_epoch(epoch)
    train_stats = train_one_epoch(
        model, criterion, data_loader_train, optimizer, device, epoch,
        args.clip_max_norm)

    lr_scheduler.step()
    if args.output_dir:
        checkpoint_paths = [output_dir / 'checkpoint.pth']
        # extra checkpoint before LR drop and every 100 epochs
        if (epoch + 1) % args.lr_drop == 0 or (epoch + 1) % 100 == 0:
            checkpoint_paths.append(output_dir / f'checkpoint{epoch:04}.pth')
        for checkpoint_path in checkpoint_paths:
            utils.save_on_master({
                'model': model_without_ddp.state_dict(),
                'optimizer': optimizer.state_dict(),
                'lr_scheduler': lr_scheduler.state_dict(),
                'epoch': epoch,
                'args': args,
            }, checkpoint_path)

    test_stats = evaluate(
        model, criterion, data_loader_val, device, logger, args, epoch, nprocs=utils.get_world_size()
    )

    log_stats = {**{f'train_{k}': v for k, v in train_stats.items()},
                    **{f'test_{k}': v for k, v in test_stats.items()},
                    'epoch': epoch,
                    'n_parameters': n_parameters}

    if args.output_dir and utils.is_main_process():
        with (output_dir / "log_trai n&test.txt").open("a") as f:
            f.write(json.dumps(log_stats) + "\n")

total_time = time.time() - start_time
total_time_str = str(datetime.timedelta(seconds=int(total_time)))
print('Training time {}'.format(total_time_str))


## evaluation

### load model