In [1]:
import argparse
import collections
import torch
import numpy as np
import model.loss as module_loss
import model.metric as module_metric
from parse_config import ConfigParser
from utils.util import create_model, create_dataloader, create_trainer


In [2]:
# fix random seeds for reproducibility
SEED = 125
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(SEED)

In [3]:
args = argparse.ArgumentParser(description='Emotion Reasoning in Daily Life')
args.add_argument('-c', '--config', default='train.json', type=str,
                  help='config file path (default: None)')
args.add_argument('-r', '--resume', default=None, type=str,
                  help='path to latest checkpoint (default: None)')
args.add_argument('-d', '--device', default="0", type=str,
                  help='indices of GPUs to enable (default: all)')

# custom cli options to modify configuration from default values given in json file.
CustomArgs = collections.namedtuple('CustomArgs', 'flags type target')
options = [
    CustomArgs(['--lr', '--learning_rate'], type=float, target='optimizer;args;lr'),
    CustomArgs(['--bs', '--batch_size'], type=int, target='data_loader;args;batch_size')
]

In [4]:
config = ConfigParser.from_args(args, options)

jupyter


In [5]:
config._config['name'] += "_add_text"

In [6]:
print(config._config)

OrderedDict([('name', 'Train_9_2022_add_text'), ('message', 'MEmoR Baseline'), ('anno_file', 'data/anno.json'), ('data_file', 'data/data.json'), ('id_file', 'data/train_id.txt'), ('emo_type', 'primary'), ('speakers', ['sheldon', 'leonard', 'howard', 'rajesh', 'penny', 'bernadette', 'amy', 'others']), ('audio', OrderedDict([('feature_file', 'data/features/audio_features.json'), ('feature_dim', 6373)])), ('text', OrderedDict([('bert_model_name', 'bert-large-uncased'), ('feature_file', 'data/features/text_features.json'), ('feature_dim', 1024)])), ('visual', OrderedDict([('frame_path', 'data/clips_frames'), ('faces_feature_dir', 'data/features/visual_features/face_features'), ('faces_names_dir', 'data/features/visual_features/face_names'), ('obj_feature_dir', 'data/features/visual_features/object_features'), ('env_feature_dir', 'data/features/visual_features/environment_features'), ('dim_env', 2048), ('dim_obj', 1230), ('dim_face', 1024)])), ('personality', OrderedDict([('used', True), ('

In [7]:
logger = config.get_logger('train')

In [8]:
from data_loader_add.data_loaders_add import AddMEmoRDataLoader

In [9]:
# setup data_loader instances
data_loader = AddMEmoRDataLoader(config)
valid_data_loader = data_loader.split_validation()

Initializing VisualFeatureExtractor...
Initializing AudioFeatureExtracor...
Initializing TextFeatureExtractor...
Initializing PersonalityFeatureExtractor...
Initializing Add_TextFeatureExtractor...


  0%|▏                                                                                                                       | 11/6829 [00:00<01:03, 107.57it/s]

vectorize features.....


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 6829/6829 [00:57<00:00, 119.51it/s]


In [10]:
from model_add.model_add import AddAMER

In [11]:
# build model architecture, then print to console
model = AddAMER(config)

In [12]:
# get function handles of loss and metrics
criterion = getattr(module_loss, config['loss'])
metrics = [getattr(module_metric, met) for met in config['metrics']]

In [13]:
from trainer_add import AddMEmoRTrainer

In [14]:
trainer = AddMEmoRTrainer(model, criterion, metrics,
                      config=config,
                      data_loader=data_loader,
                      valid_data_loader=valid_data_loader,
                    )      

In [15]:
print(next(trainer.model.parameters()).is_cuda)

True


In [16]:
trainer.train()

    epoch          : 1
    loss           : 1.8144117669841444
    accuracy       : 0.33779274004683846
    macro_f1       : 0.24278287533285045
    weighted_f1    : 0.311579343113301
    val_loss       : 1.9760978187036793
    val_accuracy   : 0.2893772893772894
    val_macro_f1   : 0.23269435081561374
    val_weighted_f1: 0.2914119916370119
Saving current best: model_best.pth ...
Current best val_accuracy: 0.2893772893772894 at epoch 1, dir: 0320_222803
    epoch          : 2
    loss           : 1.1889612833321512
    accuracy       : 0.5986241217798595
    macro_f1       : 0.48688120993908823
    weighted_f1    : 0.5881880552767551
    val_loss       : 1.9866141586275825
    val_accuracy   : 0.33186813186813185
    val_macro_f1   : 0.2694636287353511
    val_weighted_f1: 0.3362851253293503
Saving current best: model_best.pth ...
Current best val_accuracy: 0.33186813186813185 at epoch 2, dir: 0320_222803
    epoch          : 3
    loss           : 0.8892597515982263
    accuracy    

In [17]:
exit()

In [18]:
target, U_v, U_a, U_t, U_p, M_v, M_a, M_t, target_loc, umask, seg_len, n_c, U_t_add = next(iter(data_loader))

In [19]:
U_t.shape

torch.Size([8, 45, 1024])

In [20]:
U_t

tensor([[[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [-0.4034, -0.0759, -0.4692,  ..., -0.3531, -0.2087,  0.2544],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         ...,
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],

        [[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [-0.2038, -0.2655, -0.0366,  ..., -0.4638, -0.0036,  0.0883],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         ...,
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000]],

        [[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0

In [21]:
U_t_add.shape

torch.Size([8, 45, 24])

In [22]:
import torch.nn as nn

In [23]:
enc_t_add = nn.Sequential(
            nn.Linear(24, 128 * 4),
            nn.ReLU(),
            nn.Linear(128 * 4, int(0.5 * 128)), # D_e = 128
        )    