# Comparison between w2v and ELMo on a sentiment task

------------------
We try to reproduce the results obtained in the paper [Evaluation of sentence embeddings in downstream
and linguistic probing tasks](https://arxiv.org/pdf/1806.06259.pdf). The goal is to compare the performances of different sentence embeddings.

More specifically we will compare:
* Average of word2vec vectors
* SIF (Smoothing inverse frequency) with w2v vectors
* ELMo

The classification is done thanks to a one hidden layer perceptron with 50 neurons (as in the article). If enough time perform the classification also with a logistic regression

--------------------

In [2]:
! conda list | grep tensorflow

tensorflow                1.8.0                         0  
tensorflow-base           1.8.0            py36h1a1b453_0  


In [2]:
import pandas as pd
import keras
from keras.initializers import RandomNormal
from keras.layers import Dense
from keras.models import Sequential
from keras.optimizers import adam

from keras import backend as K
import keras.layers as layers
from keras.models import Model, load_model
from keras.engine import Layer

import tensorflow as tf
import tensorflow_hub as hub

import numpy as np

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


## Read file

In [3]:
def read_file(data_file):
    df_out = pd.DataFrame()
    df = pd.read_table('data/stsa.binary.train', header=None)
    labels = []
    sentences = []
    for i in range(len(df[0])):
        labels.append(int(df[0][i][:1]))
        sentences.append(df[0][i][1:])
    df_out['X'] = sentences
    df_out['Y'] = labels
    return df_out

In [4]:
df_train = read_file('data/stsa.binary.train')
df_test = read_file('data/stsa.binary.test')

## w2v + MCP

---------------------


In [10]:
class MCP:
    
    def __init__(self, input_size, output_size):
        self.input_size = input_size
        self.output_size = output_size
        self.learning_rate = 0.0001
        self.batch_size = 32
        self.model = self.build_model()
    
    def _build_model(self):
        init = RandomNormal(mean=0.0, stddev=0.01, seed=None)
        
        model = Sequential()
        model.add(Dense(units=50, activation='relu', input_dim=self.input_size, kernel_initializer=init))
        model.add(Dense(units=self.output_size, activation='softmax', kernel_initializer=init))
        
        model.compile(loss='mse', optimizer=adam(lr=self.learning_rate), metrics=['accuracy'])
        return model
        
    def train(self, X, Y, n_epochs):
        scores = self.model.fit(X,Y, epochs=n_epoches, verbose=1, batch_size=self.batch_size)
        loss = scores.history['loss']
        return loss

## Trainable elmo

-----------------------------------

### Simple test on elmo

In [5]:
elmo = hub.Module("https://tfhub.dev/google/elmo/2", trainable=True)
embeddings = elmo(
["the cat is on the mat", "dogs are in the fog", "pascal jauffret is in the house tonight"],
signature="default",
as_dict=True)["elmo"]

with tf.Session() as sess:
    init = tf.global_variables_initializer()
    sess.run(init)
    test=embeddings[0].eval()

INFO:tensorflow:Using C:\Users\ac40448\AppData\Local\Temp\tfhub_modules to cache modules.
INFO:tensorflow:Saver not created because there are no variables in the graph to restore


In [27]:
elmo = hub.Module("https://tfhub.dev/google/elmo/2", trainable=True)
tokens_input = [["the", "cat", "is", "on", "the", "mat"],
["dogs", "are", "in", "the", "fog", ""]]
tokens_length = [6, 5]
embeddings = elmo(
inputs={
"tokens": tokens_input,
"sequence_len": tokens_length
},
signature="tokens",
as_dict=True)


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


In [28]:
embeddings['word_emb'].

<tf.Tensor 'module_2_apply_tokens/bilm/Reshape_1:0' shape=(2, 6, 512) dtype=float32>

### Trainable layer

In [64]:
class ElmoEmbeddingLayer(Layer):
    def __init__(self, **kwargs):
        self.dimensions = 1024
        self.trainable=True
        super(ElmoEmbeddingLayer, self).__init__(**kwargs)

    def build(self, input_shape):
        self.elmo = hub.Module('https://tfhub.dev/google/elmo/2', trainable=self.trainable,
                               name="{}_module".format(self.name))

        self.trainable_weights += K.tf.trainable_variables(scope="^{}_module/.*".format(self.name))
        super(ElmoEmbeddingLayer, self).build(input_shape)

    def call(self, x, mask=None):
        result = self.elmo(K.squeeze(K.cast(x, tf.string), axis=1),
                      as_dict=True,
                      signature='default',
                      )['default']
        return result

    def compute_mask(self, inputs, mask=None):
        return K.not_equal(inputs, '--PAD--')

    def compute_output_shape(self, input_shape):
        return (input_shape[0], self.dimensions)

(7, 1024)