In [1]:
!pip install annoy

Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple
Collecting annoy
  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/a1/5b/1c22129f608b3f438713b91cd880dc681d747a860afe3e8e0af86e921942/annoy-1.17.0.tar.gz (646 kB)
[K     |████████████████████████████████| 646 kB 1.5 MB/s eta 0:00:01
[?25hBuilding wheels for collected packages: annoy
  Building wheel for annoy (setup.py) ... [?25ldone
[?25h  Created wheel for annoy: filename=annoy-1.17.0-cp37-cp37m-linux_x86_64.whl size=387317 sha256=a03f5982bab55fc7cad99ed49036c600114677e40bdbe63c069609ce47cad5fe
  Stored in directory: /home/yeyuel/.cache/pip/wheels/b2/fe/b6/e0c12ce95e0c7b8a1727ed7465d54b3ae2d6ffa29976ff229b
Successfully built annoy
Installing collected packages: annoy
Successfully installed annoy-1.17.0


In [3]:
import math
import os
import pandas as pd
import numpy as np
from datetime import datetime

import tensorflow as tf
from tensorflow import data

print ("Tensorflow: {}".format(tf.__version__))

SEED = 19831060

Tensorflow: 1.15.4


In [6]:
DATA_DIR='./data/recommend'
#! wget http://files.grouplens.org/datasets/movielens/ml-latest-small.zip -P data/recommend
! unzip data/recommend/ml-latest-small.zip -d data/recommend
TRAIN_DATA_FILE = os.path.join(DATA_DIR, 'ml-latest-small/ratings.csv')

Archive:  data/recommend/ml-latest-small.zip
   creating: data/recommend/ml-latest-small/
  inflating: data/recommend/ml-latest-small/links.csv  
  inflating: data/recommend/ml-latest-small/tags.csv  
  inflating: data/recommend/ml-latest-small/ratings.csv  
  inflating: data/recommend/ml-latest-small/README.txt  
  inflating: data/recommend/ml-latest-small/movies.csv  


In [8]:
ratings_data = pd.read_csv(TRAIN_DATA_FILE)
ratings_data.describe()

Unnamed: 0,userId,movieId,rating,timestamp
count,100836.0,100836.0,100836.0,100836.0
mean,326.127564,19435.295718,3.501557,1205946000.0
std,182.618491,35530.987199,1.042529,216261000.0
min,1.0,1.0,0.5,828124600.0
25%,177.0,1199.0,3.0,1019124000.0
50%,325.0,2991.0,3.5,1186087000.0
75%,477.0,8122.0,4.0,1435994000.0
max,610.0,193609.0,5.0,1537799000.0


In [10]:
ratings_data.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [11]:
movies_data = pd.read_csv(os.path.join(DATA_DIR, 'ml-latest-small/movies.csv'))
movies_data.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [12]:
HEADER = ['userId', 'movieId', 'rating', 'timestamp']
HEADER_DEAFAULTS = [0, 0, 0.0, 0]
TARGET_NAME = 'rating'
num_users = ratings_data.userId.max()
num_movies = movies_data.movieId.max()
num_users, num_movies

(610, 193609)

In [13]:
def make_input_fn(file_pattern, 
                 batch_size,
                 num_epochs,
                 mode=tf.estimator.ModeKeys.EVAL):
    def _input_fn():
        dataset = tf.data.experimental.make_csv_dataset(
            file_pattern=file_pattern,
            batch_size=batch_size,
            column_names=HEADER,
            column_defaults=HEADER_DEAFAULTS,
            label_name=TARGET_NAME,
            field_delim=',',
            use_quote_delim=True,
            header=True,
            num_epochs=num_epochs,
            shuffle=(mode == tf.estimator.ModeKeys.TRAIN)
        )
        return dataset
    return _input_fn

In [14]:
def create_feature_columns(embedding_size):
    feature_columns = []
    feature_columns.append(
        tf.feature_column.embedding_column(
            tf.feature_column.categorical_column_with_identity(
                'userId', num_buckets=num_users + 1
            ),
            embedding_size
        )
    )
    feature_columns.append(
        tf.feature_column.embedding_column(
            tf.feature_column.categorical_column_with_identity(
                'movieId', num_buckets=num_movies + 1
            ),
            embedding_size
        )
    )
    return feature_columns

In [67]:
def model_fn(features, labels, mode, params):
    feature_columns = create_feature_columns(params.embedding_size)
    user_layer = tf.feature_column.input_layer(
        features={'userId': features['userId']},
        feature_columns=[feature_columns[0]]
    )     
    predictions = None
    export_outputs = None
    loss = None
    train_op = None
    
    if mode == tf.estimator.ModeKeys.PREDICT:
        predictions = {'user_embedding': user_layer}
        export_outputs = {'predictions': tf.estimator.export.PredictOutput(predictions)}
    else:
        movie_layer = tf.feature_column.input_layer(
            features={'movieId': features['movieId']},
            feature_columns=[feature_columns[1]]
        )
        dot_product = tf.keras.layers.Dot(axes=1)([user_layer, movie_layer])
        logits = tf.clip_by_value(clip_value_min=0, clip_value_max=5, t=dot_product)
        loss = tf.losses.mean_squared_error(labels, tf.squeeze(logits))
        train_op = tf.train.FtrlOptimizer(params.learning_rate).minimize(
            loss=loss,
            global_step=tf.train.get_global_step()
        )
        loss = tf.losses.mean_squared_error(labels, tf.squeeze(logits))
    return tf.estimator.EstimatorSpec(
        mode=mode,
        predictions=predictions,
        export_outputs=export_outputs,
        loss=loss,
        train_op=train_op
    )

In [68]:
def create_estimator(params, run_config):
    estimator = tf.estimator.Estimator(
        model_fn,
        params=params,
        config=run_config
    )
    return estimator

In [69]:
def train_and_evaluate_experiment(params, run_config):
    train_input_fn = make_input_fn(
        TRAIN_DATA_FILE,
        batch_size=params.batch_size,
        num_epochs=None,
        mode=tf.estimator.ModeKeys.TRAIN
    )
    train_spec = tf.estimator.TrainSpec(
        input_fn=train_input_fn,
        max_steps=params.training_steps
    )
    
    eval_input_fn = make_input_fn(
        TRAIN_DATA_FILE,
        num_epochs=1,
        batch_size=params.batch_size,
    )

    eval_spec = tf.estimator.EvalSpec(
        name=datetime.utcnow().strftime("%H%M%S"),
        input_fn = eval_input_fn,
        steps=None,
        start_delay_secs=0,
        throttle_secs=params.eval_throttle_secs
    )
    
    tf.logging.set_verbosity(tf.logging.INFO)
    
    if tf.gfile.Exists(run_config.model_dir):
        print("Removing previous artefacts...")
        tf.gfile.DeleteRecursively(run_config.model_dir)
    
    print ('')
    estimator = create_estimator(params, run_config)
    print ('')
    
    
    time_start = datetime.utcnow() 
    print("Experiment started at {}".format(time_start.strftime("%H:%M:%S")))
    print(".......................................") 
    
    tf.estimator.train_and_evaluate(
        estimator=estimator,
        train_spec=train_spec, 
        eval_spec=eval_spec
    )
    
    time_end = datetime.utcnow() 
    print(".......................................")
    print("Experiment finished at {}".format(time_end.strftime("%H:%M:%S")))
    print("")
    time_elapsed = time_end - time_start
    print("Experiment elapsed time: {} seconds".format(time_elapsed.total_seconds()))
    
    return estimator

In [107]:
MODELS_LOCATION = 'models/movieles'
MODEL_NAME = 'recommender_01'
model_dir = os.path.join(MODELS_LOCATION, MODEL_NAME)

params = tf.contrib.training.HParams(
    batch_size=265,
    training_steps=100000,
    learning_rate=0.1,
    embedding_size=16,
    eval_throttle_secs=0,
)

run_config = tf.estimator.RunConfig(
    tf_random_seed=SEED,
    save_checkpoints_steps=100000,
    keep_checkpoint_max=3,
    model_dir=model_dir,
)

estimator = train_and_evaluate_experiment(params, run_config)

Removing previous artefacts...

INFO:tensorflow:Using config: {'_model_dir': 'models/movieles/recommender_01', '_tf_random_seed': 19831060, '_save_summary_steps': 100, '_save_checkpoints_steps': 100000, '_save_checkpoints_secs': None, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 3, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f0080c3eed0>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}

Experiment started at 11:56:31
.........

INFO:tensorflow:global_step/sec: 773.228
INFO:tensorflow:loss = 1.3287405, step = 6700 (0.129 sec)
INFO:tensorflow:global_step/sec: 753.996
INFO:tensorflow:loss = 1.8312188, step = 6800 (0.133 sec)
INFO:tensorflow:global_step/sec: 778.587
INFO:tensorflow:loss = 1.8006757, step = 6900 (0.128 sec)
INFO:tensorflow:global_step/sec: 768.392
INFO:tensorflow:loss = 1.1418326, step = 7000 (0.130 sec)
INFO:tensorflow:global_step/sec: 749.107
INFO:tensorflow:loss = 1.3865831, step = 7100 (0.133 sec)
INFO:tensorflow:global_step/sec: 765.237
INFO:tensorflow:loss = 1.4066986, step = 7200 (0.131 sec)
INFO:tensorflow:global_step/sec: 750.535
INFO:tensorflow:loss = 1.5605676, step = 7300 (0.133 sec)
INFO:tensorflow:global_step/sec: 726.87
INFO:tensorflow:loss = 1.3462518, step = 7400 (0.138 sec)
INFO:tensorflow:global_step/sec: 751.902
INFO:tensorflow:loss = 1.0402344, step = 7500 (0.133 sec)
INFO:tensorflow:global_step/sec: 762.703
INFO:tensorflow:loss = 1.6805042, step = 7600 (0.131 sec)
INFO:tensor

INFO:tensorflow:loss = 1.0134714, step = 14900 (0.135 sec)
INFO:tensorflow:global_step/sec: 715.465
INFO:tensorflow:loss = 0.6237663, step = 15000 (0.140 sec)
INFO:tensorflow:global_step/sec: 730.2
INFO:tensorflow:loss = 0.7413354, step = 15100 (0.137 sec)
INFO:tensorflow:global_step/sec: 749.337
INFO:tensorflow:loss = 0.8588084, step = 15200 (0.133 sec)
INFO:tensorflow:global_step/sec: 716.091
INFO:tensorflow:loss = 0.6840558, step = 15300 (0.140 sec)
INFO:tensorflow:global_step/sec: 746.511
INFO:tensorflow:loss = 0.9049974, step = 15400 (0.134 sec)
INFO:tensorflow:global_step/sec: 732.892
INFO:tensorflow:loss = 0.666028, step = 15500 (0.136 sec)
INFO:tensorflow:global_step/sec: 755.144
INFO:tensorflow:loss = 0.7345227, step = 15600 (0.132 sec)
INFO:tensorflow:global_step/sec: 725.81
INFO:tensorflow:loss = 0.74484664, step = 15700 (0.138 sec)
INFO:tensorflow:global_step/sec: 748.289
INFO:tensorflow:loss = 0.68073946, step = 15800 (0.134 sec)
INFO:tensorflow:global_step/sec: 734.424
IN

INFO:tensorflow:loss = 0.53736526, step = 23100 (0.143 sec)
INFO:tensorflow:global_step/sec: 729.569
INFO:tensorflow:loss = 0.7214883, step = 23200 (0.137 sec)
INFO:tensorflow:global_step/sec: 689.639
INFO:tensorflow:loss = 0.6468163, step = 23300 (0.145 sec)
INFO:tensorflow:global_step/sec: 714.057
INFO:tensorflow:loss = 0.60288936, step = 23400 (0.140 sec)
INFO:tensorflow:global_step/sec: 657.696
INFO:tensorflow:loss = 0.71493524, step = 23500 (0.152 sec)
INFO:tensorflow:global_step/sec: 679.149
INFO:tensorflow:loss = 0.75575703, step = 23600 (0.148 sec)
INFO:tensorflow:global_step/sec: 699.11
INFO:tensorflow:loss = 0.7926037, step = 23700 (0.143 sec)
INFO:tensorflow:global_step/sec: 738.479
INFO:tensorflow:loss = 0.6277408, step = 23800 (0.135 sec)
INFO:tensorflow:global_step/sec: 715.432
INFO:tensorflow:loss = 0.728066, step = 23900 (0.140 sec)
INFO:tensorflow:global_step/sec: 743.883
INFO:tensorflow:loss = 0.876035, step = 24000 (0.135 sec)
INFO:tensorflow:global_step/sec: 681.249

INFO:tensorflow:loss = 0.50514305, step = 31300 (0.154 sec)
INFO:tensorflow:global_step/sec: 766.217
INFO:tensorflow:loss = 0.43296418, step = 31400 (0.130 sec)
INFO:tensorflow:global_step/sec: 649.987
INFO:tensorflow:loss = 0.8560535, step = 31500 (0.154 sec)
INFO:tensorflow:global_step/sec: 703.625
INFO:tensorflow:loss = 0.96248794, step = 31600 (0.142 sec)
INFO:tensorflow:global_step/sec: 699.708
INFO:tensorflow:loss = 0.4305267, step = 31700 (0.143 sec)
INFO:tensorflow:global_step/sec: 800.828
INFO:tensorflow:loss = 0.45553523, step = 31800 (0.125 sec)
INFO:tensorflow:global_step/sec: 658.266
INFO:tensorflow:loss = 0.5008366, step = 31900 (0.152 sec)
INFO:tensorflow:global_step/sec: 660.523
INFO:tensorflow:loss = 0.7079783, step = 32000 (0.151 sec)
INFO:tensorflow:global_step/sec: 683.965
INFO:tensorflow:loss = 0.44266185, step = 32100 (0.146 sec)
INFO:tensorflow:global_step/sec: 694.544
INFO:tensorflow:loss = 0.31117368, step = 32200 (0.144 sec)
INFO:tensorflow:global_step/sec: 69

INFO:tensorflow:global_step/sec: 674.267
INFO:tensorflow:loss = 0.6245045, step = 39500 (0.148 sec)
INFO:tensorflow:global_step/sec: 724.788
INFO:tensorflow:loss = 0.48116472, step = 39600 (0.138 sec)
INFO:tensorflow:global_step/sec: 681.282
INFO:tensorflow:loss = 0.50174755, step = 39700 (0.147 sec)
INFO:tensorflow:global_step/sec: 790.048
INFO:tensorflow:loss = 0.32571065, step = 39800 (0.126 sec)
INFO:tensorflow:global_step/sec: 650.089
INFO:tensorflow:loss = 0.58401656, step = 39900 (0.154 sec)
INFO:tensorflow:global_step/sec: 708.307
INFO:tensorflow:loss = 0.68404424, step = 40000 (0.141 sec)
INFO:tensorflow:global_step/sec: 665.895
INFO:tensorflow:loss = 0.43906334, step = 40100 (0.150 sec)
INFO:tensorflow:global_step/sec: 762.317
INFO:tensorflow:loss = 0.25446472, step = 40200 (0.131 sec)
INFO:tensorflow:global_step/sec: 640.052
INFO:tensorflow:loss = 0.7641988, step = 40300 (0.156 sec)
INFO:tensorflow:global_step/sec: 749.55
INFO:tensorflow:loss = 0.51636136, step = 40400 (0.13

INFO:tensorflow:global_step/sec: 628.997
INFO:tensorflow:loss = 0.46266508, step = 47700 (0.159 sec)
INFO:tensorflow:global_step/sec: 756.974
INFO:tensorflow:loss = 0.4266801, step = 47800 (0.132 sec)
INFO:tensorflow:global_step/sec: 614.193
INFO:tensorflow:loss = 0.495845, step = 47900 (0.163 sec)
INFO:tensorflow:global_step/sec: 770.13
INFO:tensorflow:loss = 0.5976812, step = 48000 (0.130 sec)
INFO:tensorflow:global_step/sec: 650.175
INFO:tensorflow:loss = 0.39223373, step = 48100 (0.154 sec)
INFO:tensorflow:global_step/sec: 757.826
INFO:tensorflow:loss = 0.2753255, step = 48200 (0.132 sec)
INFO:tensorflow:global_step/sec: 642.889
INFO:tensorflow:loss = 0.7562758, step = 48300 (0.155 sec)
INFO:tensorflow:global_step/sec: 768.32
INFO:tensorflow:loss = 0.3522199, step = 48400 (0.130 sec)
INFO:tensorflow:global_step/sec: 644.698
INFO:tensorflow:loss = 0.5085943, step = 48500 (0.155 sec)
INFO:tensorflow:global_step/sec: 784.249
INFO:tensorflow:loss = 0.4475243, step = 48600 (0.128 sec)
I

INFO:tensorflow:global_step/sec: 633.52
INFO:tensorflow:loss = 0.19611292, step = 55900 (0.158 sec)
INFO:tensorflow:global_step/sec: 757.048
INFO:tensorflow:loss = 0.47511148, step = 56000 (0.132 sec)
INFO:tensorflow:global_step/sec: 643.053
INFO:tensorflow:loss = 0.34870827, step = 56100 (0.156 sec)
INFO:tensorflow:global_step/sec: 769.278
INFO:tensorflow:loss = 0.35224602, step = 56200 (0.130 sec)
INFO:tensorflow:global_step/sec: 638.406
INFO:tensorflow:loss = 0.34983614, step = 56300 (0.157 sec)
INFO:tensorflow:global_step/sec: 764.219
INFO:tensorflow:loss = 0.45463932, step = 56400 (0.131 sec)
INFO:tensorflow:global_step/sec: 633.955
INFO:tensorflow:loss = 0.3523127, step = 56500 (0.158 sec)
INFO:tensorflow:global_step/sec: 753.803
INFO:tensorflow:loss = 0.4872736, step = 56600 (0.133 sec)
INFO:tensorflow:global_step/sec: 640.22
INFO:tensorflow:loss = 0.42141065, step = 56700 (0.156 sec)
INFO:tensorflow:global_step/sec: 773.834
INFO:tensorflow:loss = 0.42671037, step = 56800 (0.130

INFO:tensorflow:global_step/sec: 637.732
INFO:tensorflow:loss = 0.26961726, step = 64100 (0.157 sec)
INFO:tensorflow:global_step/sec: 770.853
INFO:tensorflow:loss = 0.23456658, step = 64200 (0.130 sec)
INFO:tensorflow:global_step/sec: 660.629
INFO:tensorflow:loss = 0.4754884, step = 64300 (0.151 sec)
INFO:tensorflow:global_step/sec: 793.053
INFO:tensorflow:loss = 0.3027902, step = 64400 (0.126 sec)
INFO:tensorflow:global_step/sec: 612.497
INFO:tensorflow:loss = 0.4249696, step = 64500 (0.163 sec)
INFO:tensorflow:global_step/sec: 787.379
INFO:tensorflow:loss = 0.48185986, step = 64600 (0.127 sec)
INFO:tensorflow:global_step/sec: 625.201
INFO:tensorflow:loss = 0.43649566, step = 64700 (0.160 sec)
INFO:tensorflow:global_step/sec: 785.98
INFO:tensorflow:loss = 0.4505611, step = 64800 (0.127 sec)
INFO:tensorflow:global_step/sec: 613.154
INFO:tensorflow:loss = 0.20058887, step = 64900 (0.163 sec)
INFO:tensorflow:global_step/sec: 810.198
INFO:tensorflow:loss = 0.43700698, step = 65000 (0.123 

INFO:tensorflow:global_step/sec: 646.702
INFO:tensorflow:loss = 0.760399, step = 72300 (0.155 sec)
INFO:tensorflow:global_step/sec: 777.493
INFO:tensorflow:loss = 0.32826796, step = 72400 (0.129 sec)
INFO:tensorflow:global_step/sec: 613.409
INFO:tensorflow:loss = 0.3076093, step = 72500 (0.163 sec)
INFO:tensorflow:global_step/sec: 780.739
INFO:tensorflow:loss = 0.6039967, step = 72600 (0.128 sec)
INFO:tensorflow:global_step/sec: 634.572
INFO:tensorflow:loss = 0.4387877, step = 72700 (0.157 sec)
INFO:tensorflow:global_step/sec: 771.085
INFO:tensorflow:loss = 0.2334765, step = 72800 (0.130 sec)
INFO:tensorflow:global_step/sec: 614.627
INFO:tensorflow:loss = 0.35649255, step = 72900 (0.163 sec)
INFO:tensorflow:global_step/sec: 760.444
INFO:tensorflow:loss = 0.40526325, step = 73000 (0.132 sec)
INFO:tensorflow:global_step/sec: 630.613
INFO:tensorflow:loss = 0.61742514, step = 73100 (0.159 sec)
INFO:tensorflow:global_step/sec: 790.315
INFO:tensorflow:loss = 0.2526043, step = 73200 (0.127 se

INFO:tensorflow:global_step/sec: 597.06
INFO:tensorflow:loss = 0.22125956, step = 80500 (0.168 sec)
INFO:tensorflow:global_step/sec: 773.413
INFO:tensorflow:loss = 0.35549408, step = 80600 (0.129 sec)
INFO:tensorflow:global_step/sec: 655.308
INFO:tensorflow:loss = 0.23310737, step = 80700 (0.153 sec)
INFO:tensorflow:global_step/sec: 799.486
INFO:tensorflow:loss = 0.27189326, step = 80800 (0.125 sec)
INFO:tensorflow:global_step/sec: 592.221
INFO:tensorflow:loss = 0.33839518, step = 80900 (0.169 sec)
INFO:tensorflow:global_step/sec: 778.086
INFO:tensorflow:loss = 0.3959865, step = 81000 (0.128 sec)
INFO:tensorflow:global_step/sec: 624.77
INFO:tensorflow:loss = 0.37598902, step = 81100 (0.160 sec)
INFO:tensorflow:global_step/sec: 751.117
INFO:tensorflow:loss = 0.36086288, step = 81200 (0.133 sec)
INFO:tensorflow:global_step/sec: 596.527
INFO:tensorflow:loss = 0.25391975, step = 81300 (0.168 sec)
INFO:tensorflow:global_step/sec: 651.442
INFO:tensorflow:loss = 0.32287136, step = 81400 (0.15

INFO:tensorflow:loss = 0.36221075, step = 88600 (0.132 sec)
INFO:tensorflow:global_step/sec: 661.538
INFO:tensorflow:loss = 0.49298275, step = 88700 (0.151 sec)
INFO:tensorflow:global_step/sec: 757.668
INFO:tensorflow:loss = 0.24224779, step = 88800 (0.132 sec)
INFO:tensorflow:global_step/sec: 581.234
INFO:tensorflow:loss = 0.4027469, step = 88900 (0.172 sec)
INFO:tensorflow:global_step/sec: 659.73
INFO:tensorflow:loss = 0.24811909, step = 89000 (0.152 sec)
INFO:tensorflow:global_step/sec: 768.169
INFO:tensorflow:loss = 0.35074973, step = 89100 (0.130 sec)
INFO:tensorflow:global_step/sec: 751.317
INFO:tensorflow:loss = 0.258212, step = 89200 (0.133 sec)
INFO:tensorflow:global_step/sec: 590.182
INFO:tensorflow:loss = 0.19636938, step = 89300 (0.169 sec)
INFO:tensorflow:global_step/sec: 673.292
INFO:tensorflow:loss = 0.2702322, step = 89400 (0.149 sec)
INFO:tensorflow:global_step/sec: 764.193
INFO:tensorflow:loss = 0.27010238, step = 89500 (0.131 sec)
INFO:tensorflow:global_step/sec: 750

INFO:tensorflow:global_step/sec: 776.431
INFO:tensorflow:loss = 0.1839214, step = 96800 (0.129 sec)
INFO:tensorflow:global_step/sec: 559.788
INFO:tensorflow:loss = 0.2030171, step = 96900 (0.179 sec)
INFO:tensorflow:global_step/sec: 681.058
INFO:tensorflow:loss = 0.1968327, step = 97000 (0.147 sec)
INFO:tensorflow:global_step/sec: 799.049
INFO:tensorflow:loss = 0.21693608, step = 97100 (0.125 sec)
INFO:tensorflow:global_step/sec: 770.666
INFO:tensorflow:loss = 0.2463315, step = 97200 (0.130 sec)
INFO:tensorflow:global_step/sec: 557.686
INFO:tensorflow:loss = 0.23984145, step = 97300 (0.179 sec)
INFO:tensorflow:global_step/sec: 712.692
INFO:tensorflow:loss = 0.23568447, step = 97400 (0.140 sec)
INFO:tensorflow:global_step/sec: 749.696
INFO:tensorflow:loss = 0.33719847, step = 97500 (0.133 sec)
INFO:tensorflow:global_step/sec: 756.824
INFO:tensorflow:loss = 0.209382, step = 97600 (0.132 sec)
INFO:tensorflow:global_step/sec: 565.063
INFO:tensorflow:loss = 0.2799213, step = 97700 (0.177 se

In [108]:
def find_embedding_tensor():
    with tf.Session() as sess:
        saver = tf.train.import_meta_graph(os.path.join(model_dir, 'model.ckpt-100000.meta'))
        saver.restore(sess, os.path.join(model_dir, 'model.ckpt-100000'))
        graph = tf.get_default_graph()
        trainable_tensors = map(str, graph.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES))
        for tensor in set(trainable_tensors):
            print(tensor)
find_embedding_tensor()

INFO:tensorflow:Restoring parameters from models/movieles/recommender_01/model.ckpt-100000
<tf.Variable 'input_layer/userId_embedding/embedding_weights:0' shape=(611, 16) dtype=float32_ref>
<tf.Variable 'input_layer_1/movieId_embedding/embedding_weights:0' shape=(193610, 16) dtype=float32_ref>


In [110]:
def extract_embeddings():
    with tf.Session() as sess:
        saver = tf.train.import_meta_graph(os.path.join(model_dir, 'model.ckpt-100000.meta'))
        saver.restore(sess, os.path.join(model_dir, 'model.ckpt-100000'))
        graph = tf.get_default_graph()
        weights_tensor = graph.get_tensor_by_name('input_layer_1/movieId_embedding/embedding_weights:0')
        weights = np.array(sess.run(weights_tensor))
    embeddings = {}
    for i in range(weights.shape[0]):
        embeddings[i] = weights[i]
    return embeddings
embeddings = extract_embeddings()
embeddings[0], len(embeddings)

INFO:tensorflow:Restoring parameters from models/movieles/recommender_01/model.ckpt-100000


(array([-0.3186064 , -0.32052556,  0.00386797, -0.07156315, -0.09930257,
         0.26019013,  0.25462744,  0.0400505 ,  0.2191054 , -0.443145  ,
        -0.13772446,  0.10723513, -0.18739784, -0.3282234 ,  0.4154505 ,
         0.0642556 ], dtype=float32),
 193610)

In [111]:
from annoy import AnnoyIndex

def build_embeddings_index(num_trees):
    total_items = 0
    annoy_index = AnnoyIndex(params.embedding_size, metric='angular')
    for item_id in embeddings.keys():
        annoy_index.add_item(item_id, embeddings[item_id])
        total_items += 1
    print ("{} items where added to the index".format(total_items))
    annoy_index.build(n_trees=num_trees)
    print("Index is built")
    return annoy_index

index = build_embeddings_index(100)

193610 items where added to the index
Index is built


In [112]:
frequent_movie_ids = list(ratings_data.movieId.value_counts().index[:15])
frequent_movie_ids

[356, 318, 296, 593, 2571, 260, 480, 110, 589, 527, 2959, 1, 1196, 2858, 50]

In [113]:
movies_data[movies_data['movieId'].isin(frequent_movie_ids)]

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
46,50,"Usual Suspects, The (1995)",Crime|Mystery|Thriller
97,110,Braveheart (1995),Action|Drama|War
224,260,Star Wars: Episode IV - A New Hope (1977),Action|Adventure|Sci-Fi
257,296,Pulp Fiction (1994),Comedy|Crime|Drama|Thriller
277,318,"Shawshank Redemption, The (1994)",Crime|Drama
314,356,Forrest Gump (1994),Comedy|Drama|Romance|War
418,480,Jurassic Park (1993),Action|Adventure|Sci-Fi|Thriller
461,527,Schindler's List (1993),Drama|War
507,589,Terminator 2: Judgment Day (1991),Action|Sci-Fi


In [114]:
def get_similar_movies(movie_id, num_matches=5):
    similar_movie_ids = index.get_nns_by_item(
        movie_id,
        num_matches,
        search_k=1,
        include_distances=False
    )
    similar_movies = movies_data[movies_data['movieId'].isin(similar_movie_ids)].title
    return similar_movies

for movie_id in frequent_movie_ids:
    movie_title = movies_data[movies_data['movieId'] == movie_id].title.values[0]
    print ("Movie: {}".format(movie_title))
    similar_movies = get_similar_movies(movie_id)
    print ("Similar Movies:")
    print (similar_movies)
    print ("--------------------------------------")

Movie: Forrest Gump (1994)
Similar Movies:
55              Mr. Holland's Opus (1995)
314                   Forrest Gump (1994)
1486    Back to the Future Part II (1989)
2103                           Big (1988)
2342      River Runs Through It, A (1992)
Name: title, dtype: object
--------------------------------------
Movie: Shawshank Redemption, The (1994)
Similar Movies:
277                      Shawshank Redemption, The (1994)
995                                    Real Genius (1985)
2515                             American Graffiti (1973)
5714                                    Layer Cake (2004)
6165    Lives of Others, The (Das leben der Anderen) (...
Name: title, dtype: object
--------------------------------------
Movie: Pulp Fiction (1994)
Similar Movies:
257    Pulp Fiction (1994)
Name: title, dtype: object
--------------------------------------
Movie: Silence of the Lambs, The (1991)
Similar Movies:
510                      Silence of the Lambs, The (1991)
900     Raiders of 

In [115]:
def make_serving_input_receiver_fn():
    return tf.estimator.export.build_raw_serving_input_receiver_fn(
        {'userId': tf.placeholder(shape=[None], dtype=tf.int32)}
    )
export_dir = os.path.join(model_dir, 'export')

if tf.gfile.Exists(export_dir):
    tf.gfile.DeleteRecursively(export_dir)

estimator.export_saved_model(
    export_dir_base=export_dir,
    serving_input_receiver_fn=make_serving_input_receiver_fn()
)

INFO:tensorflow:Calling model_fn.
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Signatures INCLUDED in export for Classify: None
INFO:tensorflow:Signatures INCLUDED in export for Regress: None
INFO:tensorflow:Signatures INCLUDED in export for Predict: ['predictions', 'serving_default']
INFO:tensorflow:Signatures INCLUDED in export for Train: None
INFO:tensorflow:Signatures INCLUDED in export for Eval: None
INFO:tensorflow:Restoring parameters from models/movieles/recommender_01/model.ckpt-100000
INFO:tensorflow:Assets added to graph.
INFO:tensorflow:No assets to write.
INFO:tensorflow:SavedModel written to: models/movieles/recommender_01/export/temp-b'1644148775'/saved_model.pb


b'models/movieles/recommender_01/export/1644148775'

In [119]:
import os
import time

export_dir = os.path.join(model_dir, 'export')
saved_model_dir = os.path.join(
    export_dir, [f for f in os.listdir(export_dir) if f.isdigit()][0]
)
print(saved_model_dir)

predictor_fn = tf.contrib.predictor.from_saved_model(
    export_dir=saved_model_dir
)
start = time.time()
output = predictor_fn({"userId": [1]})
print('Elapse:{}'.format(time.time() - start))
print(output)

models/movieles/recommender_01/export/1644148775
INFO:tensorflow:Restoring parameters from models/movieles/recommender_01/export/1644148775/variables/variables
Elapse:0.018871545791625977
{'user_embedding': array([[-1.8726412 , -1.865848  , -0.86219114, -0.09486696, -0.25576988,
         1.2030656 ,  1.2501827 , -1.8678687 , -0.90552235, -0.87550503,
         0.8407979 ,  1.788189  , -1.4882287 ,  1.5865356 ,  0.30127403,
        -0.34822515]], dtype=float32)}


In [117]:
def recommend_new_movies(userId, num_recommendations=5):
    watched_movie_ids = list(ratings_data[ratings_data['userId'] == userId]['movieId'])
    user_embedding = predictor_fn({'userId': [userId]})['user_embedding'][0]
    similar_movie_ids = index.get_nns_by_vector(
        user_embedding, num_recommendations + len(watched_movie_ids), search_k = -1, include_distances=False
    )
    recommended_movie_ids = set(similar_movie_ids) - set(watched_movie_ids)
    similar_movies = movies_data[movies_data['movieId'].isin(recommended_movie_ids)].title
    return similar_movies

frequent_user_ids = list((ratings_data.userId.value_counts().index[-350:]))[:5] 
for user_id in frequent_user_ids:
    print ("User: {}".format(user_id))
    recommended = recommend_new_movies(user_id)
    print ("Recommend movies: {}".format(len(recommended)))
    print (recommended)
    print ("--------------------------------------")

User: 418
Recommend movies: 62
52                      Postman, The (Postino, Il) (1994)
83                                 Beautiful Girls (1996)
199                                        Exotica (1994)
344                        Low Down Dirty Shame, A (1994)
520                                          Fargo (1996)
                              ...                        
6922                                   Gran Torino (2008)
6941                                      Defiance (2008)
7263                             Ghost Writer, The (2010)
7351                             Letters to Juliet (2010)
8452    Birdman: Or (The Unexpected Virtue of Ignoranc...
Name: title, Length: 62, dtype: object
--------------------------------------
User: 244
Recommend movies: 98
208                       Farinelli: il castrato (1994)
2323                                       42 Up (1998)
4324    My Father the Hero (Mon père, ce héros.) (1991)
5591                          My Father the Hero (1994