# Neural Affect Style Transfer

In [None]:
from numpy import zeros, concatenate, asarray, ones, amax, argmax
from IPython.display import display, HTML

In [None]:
def browser_alert(message):
    display(HTML('<script type="text/javascript">alert("' + message + '");</script>'))
    
def browser_notify(message):
    display(HTML('<script type="text/javascript">var notification=new Notification("' + \
                 'Jupyter Notification",{icon:"http://blog.jupyter.org/content/' + \
                 'images/2015/02/jupyter-sq-text.png",body:"' + message + \
                 '"});</script>'))

In [None]:
browser_notify("test")

## Read Data

In [None]:
dataset_path = "/home/v2john/attr-reviews-dataset/dev.txt"

In [None]:
all_texts = list()
with open(dataset_path) as dataset_file:
    for line in dataset_file:
        text = line.split('\t')[3]
        all_texts.append(text)

In [None]:
len(all_texts)

### Tokenize and build embeddings

In [None]:
from keras.preprocessing.text import Tokenizer

In [None]:
keras_tokenizer = Tokenizer(num_words=1000)

In [None]:
keras_tokenizer.fit_on_texts(all_texts)

In [None]:
len(keras_tokenizer.word_index)

In [None]:
text_sequences = keras_tokenizer.texts_to_sequences(all_texts)

In [None]:
text_sequences[1001]

## Keras Model

In [None]:
from keras.layers import Input, Dense, RepeatVector, LSTM, Conv1D, Masking, Embedding
from keras.layers.wrappers import TimeDistributed, Bidirectional
from keras.models import Model
from keras.preprocessing.sequence import pad_sequences

In [None]:
EMBEDDING_DIM = 100
MAX_SEQUENCE_LENGTH = 20
VOCAB_SIZE = 1000

In [None]:
x_train = pad_sequences(text_sequences, maxlen=MAX_SEQUENCE_LENGTH, padding='post', 
                        truncating='post', value=0)

In [None]:
x_train = x_train[:10000]

In [None]:
x_train.shape

In [None]:
x_train_rev = list()
for x_vector in x_train:
    x_rev_vector = list()
    for index in x_vector:
        word_vector = zeros(VOCAB_SIZE)
        word_vector[index] = 1
        x_rev_vector.append(word_vector)
    x_train_rev.append(asarray(x_rev_vector))
x_train_rev = asarray(x_train_rev)

In [None]:
x_train_rev.shape

In [None]:
main_input = Input(shape=x_train.shape[1:], dtype='float32', name='main_input')
embed_1 = Embedding(input_dim=VOCAB_SIZE, output_dim=EMBEDDING_DIM, 
                    mask_zero=True, input_length=MAX_SEQUENCE_LENGTH) (main_input)
print(embed_1)
# lstm_1 = Bidirectional(LSTM(EMBEDDING_DIM, return_sequences=True, name='lstm_1'))(embed_1)
# print(lstm_1)
lstm_2 = Bidirectional(LSTM(EMBEDDING_DIM, name='lstm_2'))(embed_1)
print(lstm_2)
repeat_1 = RepeatVector(MAX_SEQUENCE_LENGTH, name='repeat_1')(lstm_2)
print(repeat_1)
lstm_3 = Bidirectional(LSTM(EMBEDDING_DIM, return_sequences=True, name='lstm_3'))(repeat_1)
print(lstm_3)
# lstm_4 = LSTM(EMBEDDING_DIM, return_sequences=True, name='lstm_4')(lstm_3)
# print(lstm_4)
softmax_1 = TimeDistributed(Dense(VOCAB_SIZE, activation='softmax'))(lstm_3)
print(softmax_1)

In [None]:
model = Model(main_input, softmax_1)
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
model.fit(x_train, x_train_rev, batch_size=32, epochs=50, verbose=1)

In [None]:
predictions = model.predict(x_train)

In [None]:
word_dict = dict()
for word in keras_tokenizer.word_index:
    rank = keras_tokenizer.word_index[word]
    if rank <= VOCAB_SIZE:
        word_dict[rank] = word

In [None]:
max_prob = 0.3
def sequence_to_str(sequence):
    word_list = list()
    for element in sequence:
        if amax(element) < max_prob:
            continue        
        index = argmax(element) + 1
        word = word_dict[index]
        word_list.append(word)
        
    return word_list

In [None]:
for i in range(len(predictions)):
    predicted_word_list = sequence_to_str(predictions[i])
    actual_len = len(all_texts[i].split())
    print("Actual: " + all_texts[i])
    print("Generated: " + " ".join(predicted_word_list[:actual_len]) + " - " + "\n")

In [None]:
browser_notify("Sentences generated")

In [None]:
len(model.layers[1].get_weights()[0])

In [None]:
model.layers[1].get_weights()[0][1]