In [32]:
import numpy as np
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential, Model
from keras.layers import Flatten
from keras.layers import Dense, Embedding, LSTM, Dropout, Activation
from keras import backend as K
from deepexplain.tensorflow import DeepExplain
import pandas as pd
from keras.models import load_model
from keras.models import model_from_json

LSTM_MODEL_JSON = '../saved_model/model_lstm.json'
LSTM_MODEL_WEIGHTS = '../saved_model/model_lstm.h5'
HISTORY_FILE = '../saved_model/history_lstm.json'

def save_lstm_model(model):
    # load json and create model
    model_json = model.to_json()
    with open(LSTM_MODEL_JSON, 'w') as jsonfile:
        jsonfile.write(model_json)
    # serialize weights to HDF5
    model.save_weights(LSTM_MODEL_WEIGHTS)

def load_lstm_model(model):
    # load weights into new model
    loaded_model = model_from_json(LSTM_MODEL_JSON)
    loaded_model.load_weights(LSTM_MODEL_WEIGHTS)
    # evaluate loaded model on test data
    return loaded_model

In [35]:
df = pd.read_csv('../data/final_data_less.csv')

In [36]:
t=Tokenizer()
t.fit_on_texts(df['clean_sentiment'])

In [37]:
vocab_size = len(t.word_index)+1
encoded_docs = t.texts_to_sequences(df['clean_sentiment'])
print(vocab_size)

7051


In [38]:
labels = pd.get_dummies(df['sentiment'].values)
print(labels.shape)

(1000, 2)


In [39]:
max_length = max([len(x) for x in encoded_docs])
print(max_length)

580


In [40]:
# padding
padded_docs = pad_sequences(encoded_docs, maxlen=max_length, padding='post')

In [41]:
split_fraction = 0.8
split_idx = int(len(padded_docs)*split_fraction)
print(split_idx)

X_train, X_test = padded_docs[:split_idx], padded_docs[split_idx:]
y_train, y_test = labels[:split_idx], labels[split_idx:]

print("Training Shape: ", X_train.shape, "== Train Lables: ", y_train.shape)
print("Test Shape: ", X_test.shape, "== Test Lables: ", y_test.shape)

800
Training Shape:  (800, 580) == Train Lables:  (800, 2)
Test Shape:  (200, 580) == Test Lables:  (200, 2)


In [47]:
current_session = K.get_session()

In [46]:
with DeepExplain(session=current_session) as de:  # <-- init DeepExplain context
    model = Sequential()
    model.add(Embedding(vocab_size,128,input_length=max_length))
    model.add(Flatten());
    model.add(Dense(100, activation='relu')); # input_shape=(max_words,)
    model.add(Dropout(0.5));
    model.add(Dense(2, activation='linear'));
#     model.add(Dense(4, activation='linear'));
    model.add(Activation('softmax'));
    model.compile(loss='categorical_crossentropy',
                      optimizer='adam',
                      metrics=['accuracy']);
    print(model.summary());
    model.fit(X_train, y_train,
          batch_size=10,
          epochs=5,
          validation_data=(X_test, y_test),
          verbose=1,
          shuffle=True);

    # predict on test data
    y_pred = model.predict(np.array(X_test));
    y_test = np.array(y_test);
    
    # Evaluate the embedding tensor on the model input (in other words, perform the lookup)
    embedding_tensor = model.layers[0].output
    input_tensor = model.inputs[0]
    embedding_out = current_session.run(embedding_tensor, {input_tensor: X_test});

    xs = X_test;
    ys = y_test;
    # Run DeepExplain with the embedding as input
    attributions = de.explain('elrp', model.layers[-2].output * ys, model.layers[1].input, embedding_out);
    print("attributions shape --- {}".format(attributions.shape));

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_11 (Embedding)     (None, 580, 128)          902528    
_________________________________________________________________
flatten_11 (Flatten)         (None, 74240)             0         
_________________________________________________________________
dense_21 (Dense)             (None, 100)               7424100   
_________________________________________________________________
dropout_11 (Dropout)         (None, 100)               0         
_________________________________________________________________
dense_22 (Dense)             (None, 2)                 202       
_________________________________________________________________
activation_7 (Activation)    (None, 2)                 0         
Total params: 8,326,830
Trainable params: 8,326,830
Non-trainable params: 0
_________________________________________________________________


In [48]:
save_lstm_model(model)

In [49]:
# Final evaluation of the model
scores = model.evaluate(X_test, y_test, verbose=0)
print("Accuracy: %.2f%%" % (scores[1]*100))

Accuracy: 75.50%


In [51]:
attributions.shape

(200, 580, 128)