In [None]:
import csv
import emoji
import warnings
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow.keras.backend as K

from laserembeddings import Laser
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dense, LSTM, Dropout, Input,Concatenate, BatchNormalization
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Model, Sequential

warnings.simplefilter("ignore", UserWarning)

In [None]:
# Functions for preparing data

def emo_reading(column):
    labels = []
    for x in column:
        if x == 'anger':
            labels.append(0)
        elif x == 'disgust':
            labels.append(1)
        elif x == 'fear':
            labels.append(2)
        elif x == 'joy':
            labels.append(3)
        elif x == 'sadness':
            labels.append(4)
        elif x == 'surprise':
            labels.append(5)
        elif x == 'others':
            labels.append(6)
    return labels

def off_reading(column):
    labels = []
    for x in column:
        if x == 'OFF':
            labels.append(1)
        else:
            labels.append(0)
    return labels

def event_reading(column):
    events = []
    for t in column:
        if t == 'ChampionsLeague':
            events.append(0)
        elif t == 'GameOfThrones':
            events.append(1)
        elif t == 'GretaThunberg':
            events.append(2)
        elif t == 'LaLiga':
            events.append(3)
        elif t == 'NotreDame':
            events.append(4)
        elif t == 'SpainElection':
            events.append(5)
        elif t == 'Venezuela':
            events.append(6)
        elif t == 'WorldBookDay':
            events.append(7)
    return events

In [None]:
# Loading datasets

train = pd.read_csv('train.tsv', sep='\t')
train['emo_labels'] = emo_reading(train['emotion'])
train['off_labels'] = off_reading(train['offensive']) # NO - 0, OFF - 1
train['event_labels'] = event_reading(train['event'])

dev = pd.read_csv('dev.tsv', sep='\t')
dev['emo_labels'] = emo_reading(dev['emotion'])
dev['off_labels'] = off_reading(dev['offensive']) # NO - 0, OFF - 1
dev['event_labels'] = event_reading(dev['event'])

test = pd.read_csv('test.tsv', quoting=csv.QUOTE_NONE, error_bad_lines=False, sep='\t')
test['off_labels'] = off_reading(test['offensive']) # NO - 0, OFF - 1
test['event_labels'] = event_reading(test['event'])

test_gold = pd.read_csv('test_gold.tsv', sep='\t')
test_gold['emo_labels'] = emo_reading(test_gold['emotion'])

In [None]:
!python -m laserembeddings download-models

In [None]:
#Extracting embeddings

laser = Laser()
train_embeddings = laser.embed_sentences([emoji.demojize(t, language='es') for t in train['tweet']], lang = 'es')
dev_embeddings = laser.embed_sentences([emoji.demojize(t, language='es') for t in dev['tweet']], lang = 'es')
test_embeddings = laser.embed_sentences([emoji.demojize(t, language='es') for t in test['tweet']], lang = 'es')

In [None]:
# Reshaping datasets to tensors

TRANSFORMER_DIM = 1024 #laser

# - - - - - TRAIN FEATURES - - - - -
X1_text = tf.reshape(train_embeddings, [-1, 1, TRANSFORMER_DIM])

X1_event = to_categorical(train['event_labels'], 8)
X1_event = tf.reshape(X1_event, [-1, 1, 8])

X1_hate = to_categorical(train['off_labels'], 2)
X1_hate = tf.reshape(X1_hate, [-1, 1, 2])

Y1 = to_categorical(train['emo_labels'], 7)
Y1_reshaped = tf.reshape(Y1, [-1, 1, 7])

print('Train data shapes:', X1_text.shape, X1_event.shape, X1_hate.shape, Y1_reshaped.shape)

# - - - - - DEV FEATURES - - - - -
X2_text = tf.reshape(dev_embeddings, [-1, 1, TRANSFORMER_DIM])

X2_event = to_categorical(dev['event_labels'], 8)
X2_event = tf.reshape(X2_event, [-1, 1, 8])

X2_hate = to_categorical(dev['off_labels'], 2)
X2_hate = tf.reshape(X2_hate, [-1, 1, 2])

Y2 = to_categorical(dev['emo_labels'], 7)
Y2_reshaped = tf.reshape(Y2, [-1, 1, 7])

print('Dev data shapes:', X2_text.shape, X2_event.shape, X2_hate.shape, Y2_reshaped.shape)


# - - - - - TEST FEATURES - - - - -
X3_text = tf.reshape(test_embeddings, [-1, 1, TRANSFORMER_DIM])

X3_event = to_categorical(test['event_labels'], 8)
X3_event = tf.reshape(X3_event, [-1, 1, 8])

X3_hate = to_categorical(test['off_labels'], 2)
X3_hate = tf.reshape(X3_hate, [-1, 1, 2])

Y3 = to_categorical(test_gold['emo_labels'], 7)
Y3_reshaped = tf.reshape(Y3, [-1, 1, 7])

print('Test data shapes:', X3_text.shape, X3_event.shape, X3_hate.shape, Y3_reshaped.shape)

In [None]:
# Defining the model paramaters.

inputA = Input(shape=(1, TRANSFORMER_DIM, ))
inputB = Input(shape=(1, 8, ))
inputC = Input(shape=(1, 2, ))

# the first branch operates on the transformer embeddings
x = LSTM(TRANSFORMER_DIM, input_shape=(1, TRANSFORMER_DIM), return_sequences=True, dropout=0.1, recurrent_dropout=0.1)(inputA)
x = Dense(TRANSFORMER_DIM,activation='relu')(x)
x = Dense(256, activation="sigmoid")(x)
x = Dropout(0.5)(x)
x = Dense(128,activation='sigmoid')(x)
x_model = Model(inputs=inputA, outputs=x)

# the second branch operates on the topics features
y = Dense(128, activation="relu")(inputB)
y = Dense(24, activation = "relu")(y)
y_model = Model(inputs = inputB, outputs = y)

# the third branch operates on offense features
z = Dense(128, activation="relu")(inputC)
z = Dense(24, activation = "relu")(z)
z_model = Model(inputs = inputC, outputs = z)

# combine the output of the three branches
combined = Concatenate()([x_model.output, y_model.output, z_model.output])

# apply a FC layer and then a regression prediction on the combined outputs
z = BatchNormalization()(combined)
z1 = Dense(7, activation="softmax")(z)

# our model will accept the inputs of the two branches and then output a single value
model = Model(inputs=[x_model.inputs, y_model.inputs, z_model.inputs], outputs=z1)

In [None]:
def f1(y_true, y_pred):
    y_true = K.flatten(y_true)
    y_pred = K.flatten(y_pred)
    return 2 * (K.sum(y_true * y_pred)+ K.epsilon()) / (K.sum(y_true) + K.sum(y_pred) + K.epsilon())

In [None]:
# # Results of dev data classification

model.summary()
model.compile(loss='categorical_crossentropy', optimizer=Adam(0.001), 
              metrics=[tf.keras.metrics.CategoricalAccuracy(name='accuracy'), f1])

model.fit([X1_text, X1_event, X1_hate], Y1_reshaped, 
          validation_data=([X2_text, X2_event, X2_hate], Y2_reshaped), 
          batch_size=100, epochs=20)

In [None]:
# Results of testing data classification

model.compile(loss='categorical_crossentropy', optimizer=Adam(0.001), 
              metrics=[tf.keras.metrics.CategoricalAccuracy(name='accuracy'), f1])
model.fit([X1_text, X1_event, X1_hate], Y1_reshaped, 
          validation_data=([X3_text, X3_event, X3_hate], Y3_reshaped), epochs=20)