In [None]:
import codecs
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Model
import tensorflow.keras.backend as K
from tensorflow.keras.preprocessing import sequence
from tqdm import tqdm
import import_ipynb
from my_layers import Average, WeightedSum, WeightedAspectEmb, MaxMargin, Attention
from reader import get_data
from w2vEmbReader import W2VEmbReader as EmbReader
from model import create_model

In [None]:
args = {
    'ortho_reg': 0.1,
    'neg_size': 20,
    'emb_dim': 200,
    'aspect_size': 14 ,
    'emb_path': '../w2v_embedding',
    'algorithm': 'adam',
    'batch_size': 32,
    'neg_size': 20,
    'epochs' : 15
}

In [None]:
out_dir = '../' 

In [None]:
from optimizers import get_optimizer
optimizer = get_optimizer(args)

In [None]:
vocab, train_x, overall_maxlen = get_data('clean_review', 9000) 

In [None]:
train_x = sequence.pad_sequences(train_x, maxlen=overall_maxlen)
# test_x = sequence.pad_sequences(test_x, maxlen=overall_maxlen)

In [None]:
vocab_size = len(vocab)
batches_per_epoch = 1000
min_loss = float('inf')

In [None]:
def sentence_batch_generator(data, batch_size):
    n_batch = len(data) / batch_size
    batch_count = 0
    np.random.shuffle(data)

    while True:
        if batch_count == n_batch:
            np.random.shuffle(data)
            batch_count = 0

        batch = data[batch_count*batch_size: (batch_count+1)*batch_size]
        batch_count += 1
        yield batch

def negative_batch_generator(data, batch_size, neg_size):
    data_len = data.shape[0]
    dim = data.shape[1]

    while True:
        indices = np.random.choice(data_len, batch_size * neg_size)
        samples = data[indices].reshape(batch_size, neg_size, dim)
        yield samples

In [None]:
def max_margin_loss(y_true, y_pred):
    return K.mean(y_pred)

In [None]:
model = create_model(args, overall_maxlen, vocab)

In [None]:
model.get_layer('word_emb').trainable=False
model.compile(optimizer=optimizer, loss=max_margin_loss, metrics=[max_margin_loss])

In [None]:
vocab_inv = {}
for w, ind in vocab.items():
    vocab_inv[ind] = w

In [None]:
sen_gen = sentence_batch_generator(train_x, args['batch_size'])
neg_gen = negative_batch_generator(train_x, args['batch_size'], args['neg_size'])

In [None]:
# tf.keras.utils.plot_model(
#     model,
#     to_file="model.png",
#     show_shapes=False,
#     show_layer_names=True,
#     rankdir="TB",
#     expand_nested=True,
#     dpi=96,
# ) 

In [None]:
batches_per_epoch = 100
min_loss = float('inf')

In [None]:
for i in range(args['epochs']):
    loss, max_margin_loss = 0., 0.
    
    for b in tqdm(range(batches_per_epoch)):
        sen_input = next(sen_gen)
        neg_input = next(neg_gen)
    
        with tf.GradientTape() as tape:
            batch_loss, batch_max_margin_loss = model.train_on_batch([sen_input, neg_input], np.ones((args['batch_size'], 1)))
        
            loss += batch_loss / batches_per_epoch
            max_margin_loss += batch_max_margin_loss / batches_per_epoch
        
    if loss < min_loss:
        min_loss = loss
        word_emb = model.get_layer('word_emb').get_weights()
        aspect_emb = model.get_layer('aspect_emb').get_weights()
        word_emb = word_emb / np.linalg.norm(word_emb, axis=-1, keepdims=True)
        aspect_emb = aspect_emb / np.linalg.norm(aspect_emb, axis=-1, keepdims=True)
        aspect_file = codecs.open(out_dir+'/aspect.log', 'w', 'utf-8')
        model.save_weights(out_dir+'/model_param')
        
        for ind in range(len(aspect_emb[0])):
            desc = aspect_emb[0][ind]
            sims = word_emb[0].dot(desc.T)
            ordered_words = np.argsort(sims)[::-1]
            
            desc_list = [vocab_inv[w] for w in ordered_words[:20]]
            aspect_file.write('Aspect %d:\n' % ind)
            aspect_file.write(' '.join(desc_list) + '\n\n')
