In [1]:
import argparse
import collections
import torch
import numpy as np
import model.loss as module_loss
import model.metric as module_metric
from parse_config import ConfigParser
from utils.util import create_model, create_dataloader, create_trainer


In [2]:
# fix random seeds for reproducibility
SEED = 125
torch.manual_seed(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(SEED)

In [3]:
args = argparse.ArgumentParser(description='Emotion Reasoning in Daily Life')
args.add_argument('-c', '--config', default='train.json', type=str,
                  help='config file path (default: None)')
args.add_argument('-r', '--resume', default=None, type=str,
                  help='path to latest checkpoint (default: None)')
args.add_argument('-d', '--device', default=None, type=str,
                  help='indices of GPUs to enable (default: all)')

# custom cli options to modify configuration from default values given in json file.
CustomArgs = collections.namedtuple('CustomArgs', 'flags type target')
options = [
    CustomArgs(['--lr', '--learning_rate'], type=float, target='optimizer;args;lr'),
    CustomArgs(['--bs', '--batch_size'], type=int, target='data_loader;args;batch_size')
]

In [4]:
config = ConfigParser.from_args(args, options)

jupyter


In [5]:
print(config._config)

OrderedDict([('name', 'Train_9_2022'), ('message', 'MEmoR Baseline'), ('anno_file', 'data/anno.json'), ('data_file', 'data/data.json'), ('id_file', 'data/train_id.txt'), ('emo_type', 'primary'), ('speakers', ['sheldon', 'leonard', 'howard', 'rajesh', 'penny', 'bernadette', 'amy', 'others']), ('audio', OrderedDict([('feature_file', 'data/features/audio_features.json'), ('feature_dim', 6373)])), ('text', OrderedDict([('bert_model_name', 'bert-large-uncased'), ('feature_file', 'data/features/text_features.json'), ('feature_dim', 1024)])), ('visual', OrderedDict([('frame_path', 'data/clips_frames'), ('faces_feature_dir', 'data/features/visual_features/face_features'), ('faces_names_dir', 'data/features/visual_features/face_names'), ('obj_feature_dir', 'data/features/visual_features/object_features'), ('env_feature_dir', 'data/features/visual_features/environment_features'), ('dim_env', 2048), ('dim_obj', 1230), ('dim_face', 1024)])), ('personality', OrderedDict([('used', True), ('anno_file

In [5]:
logger = config.get_logger('train')

# setup data_loader instances
data_loader = create_dataloader(config)
valid_data_loader = data_loader.split_validation()

Initializing VisualFeatureExtractor...
Initializing AudioFeatureExtracor...
Initializing TextFeatureExtractor...


  0%|▏                                                                              | 12/6829 [00:00<01:06, 102.97it/s]

Initializing PersonalityFeatureExtractor...
vectorize features.....


100%|█████████████████████████████████████████████████████████████████████████████| 6829/6829 [01:00<00:00, 113.10it/s]


dataset [MEmoRDataLoader] was created


In [6]:
# build model architecture, then print to console
model = create_model(config)

model [AMER] was created


In [6]:
model.annos[0]

NameError: name 'model' is not defined

In [7]:
dataset = data_loader.dataset

In [16]:
dataset.personality_list.shape

torch.Size([8, 118])

In [18]:
anno = dataset.annos[0]

In [20]:
clip = anno['clip']
target_character = anno['character']
target_moment = anno['moment']
on_characters = dataset.data[clip]['on_character']

In [21]:
on_characters

[0, 1]

In [22]:
if target_character not in on_characters:
    on_characters.append(target_character)
on_characters = sorted(on_characters)

In [23]:
on_characters

[0, 1]

In [26]:
self = dataset

In [27]:
charcaters_seq, time_seq, target_loc, personality_seq = [], [], [], []

In [28]:
for character in on_characters:
    for ii in range(len(self.data[clip]['seg_start'])):
        charcaters_seq.append([0 if character != i else 1 for i in range(len(config['speakers']))])
        time_seq.append(ii)
        personality_seq.append(self.personality_list[character])
        if character == target_character and self.data[clip]['seg_start'][ii] <= target_moment < self.data[clip]['seg_end'][ii]:
            target_loc.append(1)
        else:
            target_loc.append(0)

In [29]:
target_loc

[0, 0, 1, 0, 0, 0]

In [44]:
dataset.text_features[0].shape

torch.Size([6, 1024])

In [41]:
dataset.text_valids[0]

tensor([0, 1, 0, 1, 0, 1], dtype=torch.int8)

In [33]:
charcaters_seq

[[1, 0, 0, 0, 0, 0, 0, 0],
 [1, 0, 0, 0, 0, 0, 0, 0],
 [1, 0, 0, 0, 0, 0, 0, 0],
 [0, 1, 0, 0, 0, 0, 0, 0],
 [0, 1, 0, 0, 0, 0, 0, 0],
 [0, 1, 0, 0, 0, 0, 0, 0]]

In [45]:
dataset.visual_valids[0]

tensor([1, 0, 1, 1, 1, 1], dtype=torch.int8)

In [47]:
dataset.visual_features[0].shape

torch.Size([6, 4302])

In [48]:
dataset.target_loc[0]

tensor([0, 0, 1, 0, 0, 0], dtype=torch.int8)

In [49]:
dataset.n_character[0]

2

In [50]:
dataset.seg_len[0]

3

In [52]:
data = next(iter(data_loader)) 

In [54]:
target, U_v, U_a, U_t, U_p, M_v, M_a, M_t, target_loc, umask, seg_len, n_c = [d for d in data]
           

In [61]:
dataset.text_features[0]

tensor([[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [-0.4642, -0.1232, -0.5972,  ..., -0.4113, -0.2036,  0.3357],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [-0.4539, -0.2744, -0.1105,  ..., -0.0808,  0.0839,  0.0051],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [-0.2883,  0.3014, -0.3497,  ..., -0.2069,  0.0710, -0.2053]])

In [9]:
from features import AudioFeatureExtractor, TextFeatureExtractor, VisualFeatureExtractor, PersonalityFeatureExtractor

In [10]:
tfe = TextFeatureExtractor(config)

Initializing TextFeatureExtractor...


In [9]:
#tfe.features['S01E01_000+2']

--------------------------------------------

In [6]:
from features_add import AddTextFeatureExtractor

In [7]:
a_tfe = AddTextFeatureExtractor(config)

Initializing Add_TextFeatureExtractor...


In [13]:
tfe.get_feature("S01E01_000", 2)[0].shape

torch.Size([9, 1024])

In [12]:
a_tfe.get_feature("S01E01_000", 2)[0].shape

torch.Size([9, 24])

In [14]:
#a_tfe.data.keys()