In [1]:
import pickle
import numpy as np
import tensorflow as tf
from preppy import UserPreppy
from user import UserModel
from seq2seq import Seq2SeqModel
from tensorflow.contrib.seq2seq import *
from tensorflow.python.layers.core import Dense

np.set_printoptions(threshold=np.nan)

In [2]:
tf.reset_default_graph()

In [3]:
params = {
    'embedding_size': 50, # user embedding
    'num_users': 101,
    'hidden_size': 64,  # hidden dense layer
    
    'epochs': 1,
    'batch_size': 32,
    
    'grad_clip': 5.0,
    'learning_rate': 0.001,
    
    'save_path' : './Model/User/model.ckpt',
    'embedding_path' : './Model/User/user_embedding.pkl'
}


Make a dataset by reading the train 

In [4]:
def expand(x):
    x['label'] = tf.expand_dims(tf.convert_to_tensor(x['label']),0)
    x['user'] = tf.expand_dims(tf.convert_to_tensor(x['user']),0)
    return x

def deflate(x):
    x['label'] = tf.squeeze(x['label'])
    x['user'] = tf.squeeze(x['user'])
    return x

def tokenizer(sentence):
    return sentence.split()

def save_params(params, path='./Model/User/params.pkl'):
    with open(path, 'wb') as out_file:
        pickle.dump(params, out_file)

def load_params(path='./Model/User/params.pkl'):
    with open(path, 'rb') as in_file:
        return pickle.load(in_file)

In [5]:
preppy = pickle.load(open('./data/user/preppy.pkl','rb'))
dataset_train = tf.data.TFRecordDataset(['./data/user/train.tfrecord']).map(preppy.parse)
dataset_val = tf.data.TFRecordDataset(['./data/user/val.tfrecord']).map(preppy.parse)

In [6]:
dataset_train.output_shapes

{'sentence': TensorShape([Dimension(None)]),
 'user': TensorShape([]),
 'label': TensorShape([])}

In [7]:
sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False))

In [8]:
batched_train = dataset_train.map(expand).padded_batch(32,padded_shapes={
    "sentence":tf.TensorShape([None]),
    "label":1,
    "user":1
}, drop_remainder=True).map(deflate)

batched_val = dataset_val.map(expand).padded_batch(32,padded_shapes={
    "sentence":tf.TensorShape([None]),
    "label":1,
    "user":1
}, drop_remainder=True).map(deflate)

handle = tf.placeholder(tf.string, shape=[])
iterator = tf.data.Iterator.from_string_handle(
    handle, batched_train.output_types, batched_train.output_shapes)

next_item = iterator.get_next()

In [9]:
batched_train.output_shapes

{'sentence': TensorShape([Dimension(32), Dimension(None)]),
 'user': TensorShape([Dimension(32)]),
 'label': TensorShape([Dimension(32)])}

In [10]:
iterator_train = batched_train.make_initializable_iterator()
iterator_val = batched_val.make_initializable_iterator()

handle_train = sess.run(iterator_train.string_handle())
handle_val = sess.run(iterator_val.string_handle())

In [11]:
seqParams = load_params('./Model/Seq2seq/params.pkl')
seqParams["vocab_size"] = len(preppy.vocab)

Seq = Seq2SeqModel(next_item, seqParams)
saver = tf.train.Saver()
saver.restore(sess, seqParams["save_path"])

params["sentence_size"] = seqParams["hidden_size"]

INFO:tensorflow:Restoring parameters from ./Model/Seq2seq/model.ckpt


In [12]:
M = UserModel(params)
sess.run(tf.global_variables_initializer())

In [16]:
saver.restore(sess, params["save_path"])

INFO:tensorflow:Restoring parameters from ./Model/User/model.ckpt


In [17]:
loss_train = []
loss_val = []
for epoch in range(params["epochs"]):
    print("Epoch: %d"%(epoch))
    sess.run(iterator_train.initializer)
    print("Training")
    while True:
        try:
            sentence, label, user = sess.run([Seq.encoder_state, next_item["label"], next_item["user"]],feed_dict={handle: handle_train})
            sentence = sentence[1]
            _, loss = sess.run([M.train_op, M.loss],feed_dict={
                handle: handle_train, 
                M.lr: params["learning_rate"],
                M.sentence : sentence,
                M.label : label,
                M.user : user
            })
            loss_train.append(loss)
            print(loss)
        except tf.errors.OutOfRangeError:
            break
        except tf.errors.DataLossError:
            break
    print("Validation")
    sess.run(iterator_val.initializer)
    while True:
        try:
            sentence, label, user = sess.run([Seq.encoder_state, next_item["label"], next_item["user"]],feed_dict={handle: handle_train})
            sentence = sentence[1]
            loss = sess.run([M.loss],feed_dict={
                handle: handle_val, 
                M.sentence : sentence,
                M.label : label,
                M.user : user
            })
            loss_val.append(loss)
        except tf.errors.OutOfRangeError:
            break
        except tf.errors.DataLossError:
            break
print("Training and Validation Finish")

# Save Model
saver = tf.train.Saver()
saver.save(sess, params["save_path"])

# Save Params
save_params(params)

# Save User Embedding
embedding_val = sess.run(M.embedding)
with open('./Model/User/user_embedding.pkl', 'wb') as file_:
    pickle.dump(embedding_val, file_)
print("User embedding saved.")

print('Model Trained and Saved')

Epoch: 0
Training
0.6964846
0.68993723
0.6978911
0.6847792
0.69020134
0.6864337
0.69083273
0.6749105
0.66989493
0.69407606
0.69295365
0.69564354
0.684717
0.69610405
0.7015598
0.68744725
0.6923918
0.690138
0.6992079
0.69927347
0.67309225
0.6925549
0.6954669
0.68072695
0.692428
0.674317
0.68860185
0.68521947
0.6873394
0.70906407
0.6868935
0.6875931
0.68293715
0.6780328
0.68645287
0.68429923
0.7124741
0.69358677
0.7050556
0.6914098
0.70712733
0.71576077
0.6896998
0.67892253
0.6819953
0.70642835
0.70389366
0.688598
0.67295206
0.6987164
0.6884563
0.69931674
0.6824068
0.6873734
0.68728375
0.69111335
0.6775852
0.6884354
0.71058095
0.66827023
0.7029172
0.6976316
0.69959474
0.67035306
0.701221
0.6783539
0.6995835
0.6871792
0.691428
0.6871319
0.69992876
0.6631992
0.6918665
0.7049333
0.6962968
0.71169615
0.70991254
0.6528042
0.71186745
0.69307643
0.6847131
0.7218623
0.71742296
0.6912377
0.67516947
0.6803793
0.6923623
0.68002915
0.7019937
0.6854713
0.70147276
0.6933782
0.6906024
0.68401515
0.69729