In [2]:
import numpy as np

X_train = np.load('X_train_novel.npy')
X_test = np.load('X_test_novel.npy')
y_train = np.load('y_train_novel.npy')
y_test = np.load('y_test_novel.npy')
X_train_likes = np.load('X_train_likes_novel.npy')
X_test_likes = np.load('X_test_likes_novel.npy')

X_test_likes = X_test_likes.reshape(-1, 1)
X_train_likes = X_train_likes.reshape(-1, 1)

In [3]:
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense, LSTM, Flatten, Embedding, Dropout, Input, Concatenate, Bidirectional
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

In [4]:
vocab_size = 28645 - 13227 + 2 # total_cnt - rare_cnt + 2

input_A = Input(shape=X_train.shape[1:], name='review_input')
input_B = Input(shape=X_train_likes.shape[1:], name='like_input')

emb = Embedding(vocab_size, 100)(input_A)
rnn1 = Bidirectional(LSTM(50, dropout=0.2, return_sequences=True))(emb)
rnn2 = LSTM(50)(rnn1)
flatten_A = Flatten()(rnn2)

flatten_B = Flatten()(input_B)
hidden1 = Dense(256, activation='elu', kernel_initializer='lecun_normal')(flatten_B)
hidden2 = Dense(128, activation='tanh', kernel_initializer='lecun_normal')(hidden1)
hidden3 = Dense(64, activation='selu', kernel_initializer='lecun_normal')(hidden2)

concat = Concatenate()([flatten_A, hidden3])

output = Dense(4, activation='softmax')(concat)
model = Model(inputs=[input_A, input_B], outputs=[output])

In [7]:
model.compile(
    optimizer='rmsprop',
    loss='sparse_categorical_crossentropy',
    metrics=['acc']
)


es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=4)
mc = ModelCheckpoint("best_model.h5", monitor='val_acc', mode='max', verbose=1, save_best_only=True)

history = model.fit((X_train, X_train_likes),
                     y_train,
                     epochs=30,
                     callbacks=[es, mc],
                     batch_size=128,
                     validation_split=0.2
                    )

Train on 46284 samples, validate on 11572 samples
Epoch 1/30
Epoch 00001: val_acc improved from -inf to 0.66773, saving model to best_model.h5
Epoch 2/30
Epoch 00002: val_acc did not improve from 0.66773
Epoch 3/30
Epoch 00003: val_acc did not improve from 0.66773
Epoch 4/30
Epoch 00004: val_acc did not improve from 0.66773
Epoch 5/30
Epoch 00005: val_acc did not improve from 0.66773
Epoch 00005: early stopping


In [9]:
ce_test = model.evaluate((X_test, X_test_likes), y_test)
y_pred = model.predict((X_test, X_test_likes))



In [53]:
print(f'test loss: {ce_test[0]:.3f}, test acc: {ce_test[1]:.3f}')

test loss: 1.011, test acc: 0.660


In [51]:
import pandas as pd
prd = pd.DataFrame(np.argmax(y_pred, axis=1), columns=['Predcition']).T
target = pd.DataFrame(y_test, columns=['Target']).T
pd.concat([prd, target]).iloc[:, 50:65]

Unnamed: 0,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64
Predcition,3.0,0.0,3.0,3.0,1.0,2.0,3.0,0.0,2.0,3.0,1.0,1.0,2.0,2.0,3.0
Target,3.0,0.0,3.0,3.0,1.0,2.0,3.0,0.0,0.0,1.0,3.0,1.0,2.0,2.0,3.0
