In [0]:
from google.colab import drive
drive.mount('/content/gdrive')
import os
os.chdir('/content/gdrive/My Drive/finch/tensorflow1/recommender/movielens/main')

In [2]:
%tensorflow_version 1.x

TensorFlow 1.x selected.


In [3]:
import tensorflow as tf
import pprint
import logging

from pathlib import Path

print("TensorFlow Version", tf.__version__)
print('GPU Enabled:', tf.test.is_gpu_available())

TensorFlow Version 1.15.2
GPU Enabled: True


In [0]:
# stream data from text files
def gen_fn(f_path):
  movietype2idx = {}
  with open('../vocab/movie_types.txt') as f:
    for i, line in enumerate(f):
      line = line.rstrip()
      movietype2idx[line] = i
  
  with open(f_path) as f:
    print('Reading', f_path)
    for line in f:
      line = line.rstrip()
      (user_id, user_gender, user_age, user_job, movie_id, movie_types, movie_title, score) = line.split('\t')
      
      movie_types_ = [0] * len(movietype2idx)
      for movie_type in movie_types.split():
        movie_types_[movietype2idx[movie_type]] = 1
        
      movie_title = movie_title.split()
      yield (user_id, user_age, user_job, user_gender, movie_id, movie_types_, movie_title), score


def dataset(is_training, params):
  _shapes = (([], [], [], [], [], [18], [None]), [])
  _types = ((tf.string, tf.string, tf.string, tf.string, tf.string, tf.int32, tf.string), tf.float32)
  _pads = (('-1', '-1', '-1', '-1', '-1', -1, '<pad>'), 0.)
  
  if is_training:
    ds = tf.data.Dataset.from_generator(
      lambda: gen_fn(params['train_path']),
      output_shapes = _shapes,
      output_types = _types,)
    ds = ds.shuffle(params['buffer_size'])
    ds = ds.padded_batch(params['batch_size'], _shapes, _pads)
    ds = ds.prefetch(tf.data.experimental.AUTOTUNE)
  else:
    ds = tf.data.Dataset.from_generator(
      lambda: gen_fn(params['test_path']),
      output_shapes = _shapes,
      output_types = _types,)
    ds = ds.padded_batch(params['batch_size'], _shapes, _pads)
    ds = ds.prefetch(tf.data.experimental.AUTOTUNE)
  
  return ds

In [0]:
def clr(step,
        initial_learning_rate,
        maximal_learning_rate,
        step_size,
        scale_fn,
        scale_mode,):
  step = tf.cast(step, tf.float32)
  
  initial_learning_rate = tf.convert_to_tensor(
    initial_learning_rate, name='initial_learning_rate')
  dtype = initial_learning_rate.dtype
  maximal_learning_rate = tf.cast(maximal_learning_rate, dtype)
  step_size = tf.cast(step_size, dtype)
  cycle = tf.floor(1 + step / (2 * step_size))
  x = tf.abs(step / step_size - 2 * cycle + 1)

  mode_step = cycle if scale_mode == 'cycle' else step

  return initial_learning_rate + (
    maximal_learning_rate - initial_learning_rate) * tf.maximum(
      tf.cast(0, dtype), (1 - x)) * scale_fn(mode_step)

In [0]:
def model_fn(features, labels, mode, params):
  # Receive inputs
  user_id, user_age, user_job, user_gender, movie_id, movie_types, movie_title = features
  
  
  # Flag for Dropout / Batch Norm
  is_training = (mode == tf.estimator.ModeKeys.TRAIN)
  
  
  # Word Indexing
  lookup_user_id = tf.contrib.lookup.index_table_from_file(
    '../vocab/user_id.txt', num_oov_buckets=1)
  
  lookup_user_age = tf.contrib.lookup.index_table_from_file(
    '../vocab/user_age.txt', num_oov_buckets=1)
  
  lookup_user_job = tf.contrib.lookup.index_table_from_file(
    '../vocab/user_job.txt', num_oov_buckets=1)
  
  lookup_user_gender = tf.contrib.lookup.index_table_from_file(
    '../vocab/user_gender.txt', num_oov_buckets=1)
  
  lookup_movie_id = tf.contrib.lookup.index_table_from_file(
    '../vocab/movie_id.txt', num_oov_buckets=1)
  
  lookup_movie_title = tf.contrib.lookup.index_table_from_file(
    '../vocab/movie_title.txt', num_oov_buckets=1)
  
  user_id = lookup_user_id.lookup(user_id)
  
  user_age = lookup_user_age.lookup(user_age)
  
  user_job = lookup_user_job.lookup(user_job)
  
  user_gender = lookup_user_gender.lookup(user_gender)
  
  movie_id = lookup_movie_id.lookup(movie_id)
  
  movie_title = lookup_movie_title.lookup(movie_title)
  
  
  # Embedding
  user_id = tf.contrib.layers.embed_sequence(
    ids = user_id,
    vocab_size = params['user_id_size'] + 1,
    embed_dim = params['large_embed_dim'],
    scope='user_id')
  
  user_age = tf.contrib.layers.embed_sequence(
    ids = user_age,
    vocab_size = params['user_age_size'] + 1,
    embed_dim = params['small_embed_dim'],
    scope='user_age')
  
  user_job = tf.contrib.layers.embed_sequence(
    ids = user_job,
    vocab_size = params['user_job_size'] + 1,
    embed_dim = params['small_embed_dim'],
    scope='user_job')
  
  user_gender = tf.contrib.layers.embed_sequence(
    ids = user_gender,
    vocab_size = params['user_gender_size'],
    embed_dim = params['small_embed_dim'],
    scope='user_gender')
  
  movie_id = tf.contrib.layers.embed_sequence(
    ids = movie_id,
    vocab_size = params['movie_id_size'] + 1,
    embed_dim = params['large_embed_dim'],
    scope='movie_id')
  
  movie_types = tf.to_float(movie_types)
  
  movie_title = tf.contrib.layers.embed_sequence(
    ids = movie_title,
    vocab_size = params['movie_title_size'] + 1,
    embed_dim = params['large_embed_dim'],
    scope='movie_title')
  
  
  # User Network
  user_feature = tf.concat((user_id, user_age, user_job, user_gender), -1)
  
  user_feature = tf.layers.dropout(user_feature, params['dropout_rate'], training=is_training)
  
  user_feature = tf.layers.dense(user_feature,
                                 params['hidden_dim'],
                                 params['activation'],
                                 name='user_feature/fc')
  
  
  # Movie Network
  movie_title = tf.layers.dropout(movie_title, params['dropout_rate'], training=is_training)
  
  movie_title = tf.reduce_max(tf.layers.conv1d(movie_title,
                                               filters=params['large_embed_dim'],
                                               kernel_size=params['kernel_size'],
                                               activation=params['activation'],
                                               name='movie_feature/conv1d'), axis=1)
  
  movie_feature = tf.concat((movie_id, movie_types, movie_title), -1)
  
  movie_feature = tf.layers.dropout(movie_feature, params['dropout_rate'], training=is_training)
  
  movie_feature = tf.layers.dense(movie_feature,
                                  params['hidden_dim'],
                                  params['activation'],
                                  name='movie_feature/fc')
  
  
  # Aggregation
  scores = tf.concat([tf.abs(user_feature - movie_feature),
                      user_feature * movie_feature,
                      user_feature,
                      movie_feature], -1)
  scores = tf.layers.dropout(scores, params['dropout_rate'], training=is_training)
  scores = tf.layers.dense(scores, params['hidden_dim'], params['activation'])
  scores = tf.layers.dropout(scores, params['dropout_rate'], training=is_training)
  scores = tf.layers.dense(scores, params['hidden_dim'], params['activation'])
  scores = tf.layers.dense(scores, 1)
  scores = tf.squeeze(scores, -1)
  predictions = 5. * tf.sigmoid(scores)
  
  
  if labels is not None:
    loss_op = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(
      labels=labels/5., logits=scores))
  
  
  if mode == tf.estimator.ModeKeys.TRAIN:
    tf.logging.info('\n'+pprint.pformat(tf.trainable_variables()))
    
    global_step=tf.train.get_or_create_global_step()
    decay_lr = clr(
      step = global_step,
      initial_learning_rate = 1e-4,
      maximal_learning_rate = 8e-4,
      step_size = 2 * 900228 // params['batch_size'],
      scale_fn = lambda x: 1 / (2.0 ** (x - 1)),
      scale_mode = 'cycle',)
    
    optim = tf.train.AdamOptimizer(decay_lr)
    train_op = optim.minimize(
      loss_op, global_step=tf.train.get_or_create_global_step())
    
    hook = tf.train.LoggingTensorHook({'lr': decay_lr}, every_n_iter=100)
    
    return tf.estimator.EstimatorSpec(mode=mode,
                                      loss=loss_op,
                                      train_op=train_op,
                                      training_hooks=[hook],)
  
  
  if mode == tf.estimator.ModeKeys.EVAL:
    mae_op = tf.metrics.mean_absolute_error(labels=labels,
                                            predictions=predictions)
    return tf.estimator.EstimatorSpec(mode=mode,
                                      loss=loss_op,
                                      eval_metric_ops={'mae': mae_op})

In [0]:
params = {
    'log_path': '../log/dnn_sigmoid_clr.txt',
    'model_dir': '../model/dnn_sigmoid_clr',
    'train_path': '../data/train.txt',
    'test_path': '../data/test.txt',
    'user_id_size': 6040,
    'user_age_size': 7,
    'user_job_size': 21,
    'user_gender_size': 2,
    'movie_id_size': 3691,
    'movie_title_size': 3702,
    'small_embed_dim': 30,
    'large_embed_dim': 200,
    'hidden_dim': 200,
    'activation': tf.nn.elu,
    'kernel_size': 3,
    'dropout_rate': 0.2,
    'num_patience': 7,
    'buffer_size': 200000,
    'batch_size': 256,
}

In [8]:
# Create directory if not exist
Path(os.path.dirname(params['log_path'])).mkdir(exist_ok=True)
Path(params['model_dir']).mkdir(exist_ok=True, parents=True)

# Logging
logger = logging.getLogger('tensorflow')
logger.setLevel(logging.INFO)
fh = logging.FileHandler(params['log_path'])
logger.addHandler(fh)

# Create an estimator
config = tf.estimator.RunConfig(
  save_checkpoints_steps=900228//params['batch_size']+1,
  keep_checkpoint_max=3)

estimator = tf.estimator.Estimator(
  model_fn=model_fn,
  model_dir=params['model_dir'],
  config=config,
  params=params)

best_mae = 10000.
count = 0
tf.enable_eager_execution()

while True:
  estimator.train(input_fn=lambda: dataset(is_training=True, params=params))

  mae = estimator.evaluate(input_fn=lambda: dataset(is_training=False, params=params))['mae']
  logger.info("MAE: {:.3f}".format(mae))

  if mae < best_mae:
    best_mae = mae
    count = 0
  else:
    count += 1
  logger.info("Best MAE: {:.3f}".format(best_mae))

  if count == params['num_patience']:
    print(params['num_patience'], "times not improve the best result, therefore stop training")
    break

INFO:tensorflow:Using config: {'_model_dir': '../model/dnn_sigmoid_clr', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 3517, '_save_checkpoints_secs': None, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 3, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fca17070710>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized 