# Emotion Predictor

In [1]:
import time
import numpy as np
import tensorflow as tf
import h5py
from os import mkdir
from os.path import exists
from pytorch_transformers import RobertaTokenizer

from optimize import *
from model_utils import *
from model_emo_pred import EmotionPredictor, EmotionPredictorPlus, loss_function
from datasets import *


# Some hyper-parameters
num_layers = 12
d_model = 768
num_heads = 12
dff = d_model * 4
hidden_act = 'gelu'  # Use 'gelu' or 'relu'
dropout_rate = 0.1
layer_norm_eps = 1e-5
max_position_embed = 102
type_vocab_size = 2  # Segments

tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
vocab_size = tokenizer.vocab_size

num_emotions = 41
num_comms = 7
max_length = 100  # Maximum number of tokens

2021-11-17 17:37:24.131255: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1


In [2]:
# Define the model.
# # for meed2
# emotion_predictor = EmotionPredictor(num_layers, d_model, num_heads, dff, hidden_act,
#     dropout_rate, layer_norm_eps, max_position_embed, type_vocab_size, vocab_size, num_emotions)

# for meed2+
emotion_predictor = EmotionPredictorPlus(num_layers, d_model, num_heads, dff, hidden_act,
    dropout_rate, layer_norm_eps, max_position_embed, type_vocab_size, vocab_size, num_emotions, num_comms)

2021-11-17 17:37:26.936822: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcuda.so.1
2021-11-17 17:37:27.107386: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1716] Found device 0 with properties: 
pciBusID: 0000:04:00.0 name: TITAN X (Pascal) computeCapability: 6.1
coreClock: 1.531GHz coreCount: 28 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 447.48GiB/s
2021-11-17 17:37:27.109899: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1716] Found device 1 with properties: 
pciBusID: 0000:05:00.0 name: TITAN X (Pascal) computeCapability: 6.1
coreClock: 1.531GHz coreCount: 28 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 447.48GiB/s
2021-11-17 17:37:27.112396: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1716] Found device 2 with properties: 
pciBusID: 0000:84:00.0 name: TITAN X (Pascal) computeCapability: 6.1
coreClock: 1.531GHz coreCount: 28 deviceMemorySize: 11.91GiB deviceMemoryBandwidth: 447.48GiB/s
2021-11-17

In [3]:
build_emo_pred_plus_model(emotion_predictor, max_length, vocab_size)

2021-11-17 17:37:30.849086: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcublas.so.10


In [4]:
# create two h5 files
h5py.File("weights/roberta2emo_pred_embedder_ebp_red+.h5", "w")
h5py.File("weights/roberta2emo_pred_encoder_red+.h5", "w")

print('It works!')

It works!


In [5]:
emotion_predictor.embedder.save_weights('weights/roberta2emo_pred_embedder_ebp_red+.h5')
emotion_predictor.encoder.save_weights('weights/roberta2emo_pred_encoder_red+.h5')

# Load the Pre-trained RoBERTa Model

In [6]:
import os
import h5py
import torch
import numpy as np

In [7]:
from pytorch_transformers import RobertaTokenizer, RobertaModel

In [8]:
roberta = RobertaModel.from_pretrained('roberta-base')

In [9]:
print(roberta)

RobertaModel(
  (embeddings): RobertaEmbeddings(
    (word_embeddings): Embedding(50265, 768, padding_idx=0)
    (position_embeddings): Embedding(514, 768)
    (token_type_embeddings): Embedding(1, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): BertEncoder(
    (layer): ModuleList(
      (0): BertLayer(
        (attention): BertAttention(
          (self): BertSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): BertSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
    

In [10]:
# Print model's state_dict
print("Model's state_dict:")
for param_tensor in roberta.state_dict():
    print(param_tensor, '\t', roberta.state_dict()[param_tensor].size())

Model's state_dict:
embeddings.word_embeddings.weight 	 torch.Size([50265, 768])
embeddings.position_embeddings.weight 	 torch.Size([514, 768])
embeddings.token_type_embeddings.weight 	 torch.Size([1, 768])
embeddings.LayerNorm.weight 	 torch.Size([768])
embeddings.LayerNorm.bias 	 torch.Size([768])
encoder.layer.0.attention.self.query.weight 	 torch.Size([768, 768])
encoder.layer.0.attention.self.query.bias 	 torch.Size([768])
encoder.layer.0.attention.self.key.weight 	 torch.Size([768, 768])
encoder.layer.0.attention.self.key.bias 	 torch.Size([768])
encoder.layer.0.attention.self.value.weight 	 torch.Size([768, 768])
encoder.layer.0.attention.self.value.bias 	 torch.Size([768])
encoder.layer.0.attention.output.dense.weight 	 torch.Size([768, 768])
encoder.layer.0.attention.output.dense.bias 	 torch.Size([768])
encoder.layer.0.attention.output.LayerNorm.weight 	 torch.Size([768])
encoder.layer.0.attention.output.LayerNorm.bias 	 torch.Size([768])
encoder.layer.0.intermediate.dense.we

In [11]:
roberta.state_dict()['encoder.layer.0.attention.self.query.bias'].numpy().dtype

dtype('float32')

In [12]:
tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
print(tokenizer.vocab_size)

50265


In [13]:
tokenizer.encode('</s>')

[2]

In [14]:
tokenizer.encode('<s>')

[0]

In [15]:
tokenizer.encode('<s> How are you doing? </s> </s> I am fine. </s>')

[0, 1336, 32, 47, 608, 116, 2, 2, 38, 524, 2051, 4, 2]

In [16]:
tokenizer.encode('How are you doing?', 'I am fine.', add_special_tokens = True)

[0, 1336, 32, 47, 608, 116, 2, 2, 38, 524, 2051, 4, 2]

In [17]:
print(tokenizer.encode('How are you doing?'))
print(tokenizer.encode('I am fine.'))
print(tokenizer.encode('How are you doing? I am fine.'))

[1336, 32, 47, 608, 116]
[38, 524, 2051, 4]
[1336, 32, 47, 608, 116, 38, 524, 2051, 4]


# Initialize Encoder Weights

### Load the Emotion Predictor Encoder Weights in .h5 Format

In [18]:
f = h5py.File('weights/roberta2emo_pred_encoder_red+.h5', 'r+')

In [19]:
trainable_params = []

def find_params(name):
    if isinstance(f[name], h5py._hl.dataset.Dataset):
        trainable_params.append(name)

f.visit(find_params)

In [20]:
for param in trainable_params:
    print(param, f[param].shape)

encoder_layer_00/emotion_predictor_plus/plain_encoder/encoder_layer_00/layernorm_1/beta:0 (768,)
encoder_layer_00/emotion_predictor_plus/plain_encoder/encoder_layer_00/layernorm_1/gamma:0 (768,)
encoder_layer_00/emotion_predictor_plus/plain_encoder/encoder_layer_00/layernorm_2/beta:0 (768,)
encoder_layer_00/emotion_predictor_plus/plain_encoder/encoder_layer_00/layernorm_2/gamma:0 (768,)
encoder_layer_00/emotion_predictor_plus/plain_encoder/encoder_layer_00/multi_head_attention/key/bias:0 (768,)
encoder_layer_00/emotion_predictor_plus/plain_encoder/encoder_layer_00/multi_head_attention/key/kernel:0 (768, 768)
encoder_layer_00/emotion_predictor_plus/plain_encoder/encoder_layer_00/multi_head_attention/mha_output/bias:0 (768,)
encoder_layer_00/emotion_predictor_plus/plain_encoder/encoder_layer_00/multi_head_attention/mha_output/kernel:0 (768, 768)
encoder_layer_00/emotion_predictor_plus/plain_encoder/encoder_layer_00/multi_head_attention/query/bias:0 (768,)
encoder_layer_00/emotion_predict

### Build the Encoder Mapping Dictionary

In [21]:
encoder_mapper = {}  # From tf to pytorch
for param in roberta.state_dict():
    if param.startswith('encoder.layer'):
        pt_keywords = param.split('.')
        layer_num = int(pt_keywords[2])
        pt_prefix = '.'.join(pt_keywords[:3]) + '.'
        # beware of the folder name: emotion_predictor_plus if using meed2+ else emotion_predictor
        tf_prefix = 'encoder_layer_{:02d}/emotion_predictor_plus/plain_encoder/encoder_layer_{:02d}/'.format(layer_num, layer_num) 
        tf_prefix_alt = 'encoder_layer_{:02d}/'.format(layer_num)
        encoder_mapper[tf_prefix_alt + 'ff_hidden/bias:0'] = pt_prefix + 'intermediate.dense.bias'
        encoder_mapper[tf_prefix_alt + 'ff_hidden/kernel:0'] = pt_prefix + 'intermediate.dense.weight'
        encoder_mapper[tf_prefix_alt + 'ff_output/bias:0'] = pt_prefix + 'output.dense.bias'
        encoder_mapper[tf_prefix_alt + 'ff_output/kernel:0'] = pt_prefix + 'output.dense.weight'
        encoder_mapper[tf_prefix + 'layernorm_1/beta:0'] = pt_prefix + 'attention.output.LayerNorm.bias'
        encoder_mapper[tf_prefix + 'layernorm_1/gamma:0'] = pt_prefix + 'attention.output.LayerNorm.weight'
        encoder_mapper[tf_prefix + 'layernorm_2/beta:0'] = pt_prefix + 'output.LayerNorm.bias'
        encoder_mapper[tf_prefix + 'layernorm_2/gamma:0'] = pt_prefix + 'output.LayerNorm.weight'
        encoder_mapper[tf_prefix + 'multi_head_attention/key/bias:0'] = pt_prefix + 'attention.self.key.bias'
        encoder_mapper[tf_prefix + 'multi_head_attention/key/kernel:0'] = pt_prefix + 'attention.self.key.weight'
        encoder_mapper[tf_prefix + 'multi_head_attention/mha_output/bias:0'] = pt_prefix + 'attention.output.dense.bias'
        encoder_mapper[tf_prefix + 'multi_head_attention/mha_output/kernel:0'] = pt_prefix + 'attention.output.dense.weight'
        encoder_mapper[tf_prefix + 'multi_head_attention/query/bias:0'] = pt_prefix + 'attention.self.query.bias'
        encoder_mapper[tf_prefix + 'multi_head_attention/query/kernel:0'] = pt_prefix + 'attention.self.query.weight'
        encoder_mapper[tf_prefix + 'multi_head_attention/value/bias:0'] = pt_prefix + 'attention.self.value.bias'
        encoder_mapper[tf_prefix + 'multi_head_attention/value/kernel:0'] = pt_prefix + 'attention.self.value.weight'

In [22]:
for k, v in encoder_mapper.items():
    print(k)
    print('  ->', v)

encoder_layer_00/ff_hidden/bias:0
  -> encoder.layer.0.intermediate.dense.bias
encoder_layer_00/ff_hidden/kernel:0
  -> encoder.layer.0.intermediate.dense.weight
encoder_layer_00/ff_output/bias:0
  -> encoder.layer.0.output.dense.bias
encoder_layer_00/ff_output/kernel:0
  -> encoder.layer.0.output.dense.weight
encoder_layer_00/emotion_predictor_plus/plain_encoder/encoder_layer_00/layernorm_1/beta:0
  -> encoder.layer.0.attention.output.LayerNorm.bias
encoder_layer_00/emotion_predictor_plus/plain_encoder/encoder_layer_00/layernorm_1/gamma:0
  -> encoder.layer.0.attention.output.LayerNorm.weight
encoder_layer_00/emotion_predictor_plus/plain_encoder/encoder_layer_00/layernorm_2/beta:0
  -> encoder.layer.0.output.LayerNorm.bias
encoder_layer_00/emotion_predictor_plus/plain_encoder/encoder_layer_00/layernorm_2/gamma:0
  -> encoder.layer.0.output.LayerNorm.weight
encoder_layer_00/emotion_predictor_plus/plain_encoder/encoder_layer_00/multi_head_attention/key/bias:0
  -> encoder.layer.0.attent

### Write to the Emotion Predictor Encoder .h5 File

In [23]:
f.keys()

<KeysViewHDF5 ['encoder_layer_00', 'encoder_layer_01', 'encoder_layer_02', 'encoder_layer_03', 'encoder_layer_04', 'encoder_layer_05', 'encoder_layer_06', 'encoder_layer_07', 'encoder_layer_08', 'encoder_layer_09', 'encoder_layer_10', 'encoder_layer_11']>

In [24]:
for k, v in encoder_mapper.items():
    if len(f[k].shape) < 2:
        f[k][()] = roberta.state_dict()[v].numpy()
    else:
        f[k][()] = roberta.state_dict()[v].numpy().T

In [25]:
f.close()

### Check Equivalence

In [26]:
f = h5py.File('weights/roberta2emo_pred_encoder_red+.h5', 'r')

In [27]:
all_close = True
for k, v in encoder_mapper.items():
    if len(f[k].shape) < 2:
        if not np.allclose(f[k][()], roberta.state_dict()[v].numpy()):
            all_close = False
    else:
        if not np.allclose(f[k][()], roberta.state_dict()[v].numpy().T):
            all_close = False
print(all_close)

True


In [28]:
f.close()

# Initialize Embedder Weights

### Load the Emotion Predictor Embedder Weights in .h5 Format

In [29]:
f = h5py.File('weights/roberta2emo_pred_embedder_ebp_red+.h5', 'r+')

In [30]:
f.keys()

<KeysViewHDF5 ['comm_embed', 'dropout_embed', 'emot_embed', 'layernorm_embed', 'pos_embed', 'seg_embed', 'word_embed']>

In [31]:
trainable_params = []

def find_params(name):
    if isinstance(f[name], h5py._hl.dataset.Dataset):
        trainable_params.append(name)

f.visit(find_params)

In [32]:
for param in trainable_params:
    print(param, f[param].shape)

comm_embed/emotion_predictor_plus/meed_embedder/comm_embed/embeddings:0 (7, 768)
emot_embed/emotion_predictor_plus/meed_embedder/emot_embed/embeddings:0 (41, 768)
layernorm_embed/emotion_predictor_plus/meed_embedder/layernorm_embed/beta:0 (768,)
layernorm_embed/emotion_predictor_plus/meed_embedder/layernorm_embed/gamma:0 (768,)
pos_embed/emotion_predictor_plus/meed_embedder/pos_embed/embeddings:0 (102, 768)
seg_embed/emotion_predictor_plus/meed_embedder/seg_embed/embeddings:0 (2, 768)
word_embed/emotion_predictor_plus/meed_embedder/word_embed/embeddings:0 (50265, 768)


### Build the Embedder Mapping Dictionary

In [33]:
embedder_mapper = {
    'layernorm_embed/emotion_predictor_plus/meed_embedder/layernorm_embed/beta:0': 'embeddings.LayerNorm.bias',
    'layernorm_embed/emotion_predictor_plus/meed_embedder/layernorm_embed/gamma:0': 'embeddings.LayerNorm.weight',
    'word_embed/emotion_predictor_plus/meed_embedder/word_embed/embeddings:0': 'embeddings.word_embeddings.weight'
}

### Write to the Emotion Predictor Embedder .h5 File

In [34]:
for k, v in embedder_mapper.items():
    f[k][()] = roberta.state_dict()[v].numpy()

In [35]:
k = 'pos_embed/emotion_predictor_plus/meed_embedder/pos_embed/embeddings:0'
v = 'embeddings.position_embeddings.weight'
f[k][()] = roberta.state_dict()[v].numpy()[:102,:]

In [36]:
f.close()

### Check Equivalence

In [37]:
f = h5py.File('weights/roberta2emo_pred_embedder_ebp_red+.h5', 'r')

In [38]:
all_close = True
for k, v in embedder_mapper.items():
    if not np.allclose(f[k][()], roberta.state_dict()[v].numpy()):
        all_close = False
print(all_close)

True


In [39]:
f.close()