In [1]:
import os
import math
import random
import pickle
import argparse
import matplotlib
import numpy as np
import pandas as pd
import tensorflow as tf
tf.compat.v1.enable_eager_execution()
print(tf.__version__)

from sklearn.preprocessing import Normalizer

1.15.0


In [2]:
#Making results reproduceable
RANDOM_SEED=42
np.random.seed(RANDOM_SEED)

In [3]:
path =  "../data/"
dataset = "cb12/"

raw_path = path + dataset + "raw/" 
interim_path = path + dataset + "interim/"
processed_path = path + dataset + "processed/"

# Step 1: Prepare job data

In [4]:
from utils import serialize, deserialize

In [5]:
def load_job_metadata_and_embeddings(input_job_metadata_and_embeddings_path):
    print('Loading job label encoders, job metadata and embeddings from {}'.format(input_job_metadata_and_embeddings_path))
    job_label_encoders, job_metadata_df, job_content_embeddings = deserialize(input_job_metadata_and_embeddings_path)
    return job_label_encoders, job_metadata_df, job_content_embeddings

job_label_encoders, job_metadata_df, job_content_embeddings = load_job_metadata_and_embeddings('../data/cb12/pickles/job_14d_30_metadata_and_embeddings_d2v.pickle')

Loading job label encoders, job metadata and embeddings from ../data/cb12/pickles/job_14d_30_metadata_and_embeddings_d2v.pickle


In [6]:
job_metadata_df = job_metadata_df.drop(columns=['JobCity','JobState', 'JobCountry'])
job_metadata_df.rename(columns={"JobCity_encoded": "JobCity", "JobState_encoded": "JobState", "JobCountry_encoded": "JobCountry"}, inplace = True)
print(job_metadata_df.shape)

(207972, 20)


In [7]:
#Apply l2-norm by sample
l2_normalizer_by_sample = Normalizer(norm='l2')
job_content_embeddings = l2_normalizer_by_sample.fit_transform(job_content_embeddings)

In [8]:
#Rescaling content features  
content_embedding_scale_factor = 1.0
job_content_embeddings = job_content_embeddings * content_embedding_scale_factor

In [9]:
from feature_helper import get_job_features_config

def process_job_metadata(job_metadata_df, job_features_config):
    job_metadata = {}
    for feature_name in job_features_config:
        job_metadata[feature_name] = job_metadata_df[feature_name].values
        #Appending a row in the first position to correspond to the <PAD> article #
        # (so that it correspond to content_article_embeddings_matrix.shape[0])
        job_metadata[feature_name] = np.hstack([[0], job_metadata[feature_name]])
    return job_metadata


job_features_config = get_job_features_config(job_label_encoders)
job_metadata = process_job_metadata(job_metadata_df, job_features_config)

Job Features: {'JobID': {'type': 'categorical', 'dtype': 'int', 'cardinality': 207973}, 'StartDate': {'type': 'numerical', 'dtype': 'int'}, 'EndDate': {'type': 'numerical', 'dtype': 'int'}, 'JobCity': {'type': 'categorical', 'dtype': 'int', 'cardinality': 8227}, 'JobState': {'type': 'categorical', 'dtype': 'int', 'cardinality': 123}, 'JobCountry': {'type': 'categorical', 'dtype': 'int', 'cardinality': 34}}


# Step 2: Prepare user data

In [10]:
def load_user_metadata(input_user_metadata_path):
    print('Loading user label encoders and user metadata from {}'.format(input_user_metadata_path))
    user_label_encoders, user_metadata_df = deserialize(input_user_metadata_path)
    return user_label_encoders, user_metadata_df


def process_user_metadata(user_metadata_df, user_features_config):
    user_metadata = {}
    for feature_name in user_features_config:
        user_metadata[feature_name] = user_metadata_df[feature_name + '_encoded'].values
        #Appending a row in the first position to correspond to the <PAD> article #
        # (so that it correspond to content_article_embeddings_matrix.shape[0])
        user_metadata[feature_name] = np.hstack([[0], user_metadata[feature_name]])
    return user_metadata


user_label_encoders, user_metadata_df = load_user_metadata('../data/cb12/pickles/user_14d_30_metadata.pickle')
user_metadata = process_user_metadata(user_metadata_df, user_label_encoders)

Loading user label encoders and user metadata from ../data/cb12/pickles/user_14d_30_metadata.pickle


In [11]:
from feature_helper import get_user_features_config

user_features_config = get_user_features_config(user_label_encoders)

User Features: {'UserID': {'type': 'categorical', 'dtype': 'int', 'cardinality': 111786}, 'UserCity': {'type': 'categorical', 'dtype': 'int', 'cardinality': 8227}, 'UserState': {'type': 'categorical', 'dtype': 'int', 'cardinality': 123}, 'UserCountry': {'type': 'categorical', 'dtype': 'int', 'cardinality': 34}, 'UserDegree': {'type': 'categorical', 'dtype': 'int', 'cardinality': 8}, 'UserMajor': {'type': 'categorical', 'dtype': 'int', 'cardinality': 21225}}


# Step 3: Prepare session config

In [12]:
from feature_helper import get_session_features_config

In [13]:
session_features_config = get_session_features_config(job_label_encoders, user_label_encoders)

Session Features: {'single_features': {'SessionID': {'type': 'numerical', 'dtype': 'int'}, 'UserID': {'type': 'numerical', 'dtype': 'int'}, 'SessionSize': {'type': 'numerical', 'dtype': 'int'}, 'SessionStart': {'type': 'numerical', 'dtype': 'int'}}, 'sequence_features': {'ApplicationDate': {'type': 'numerical', 'dtype': 'int'}, 'Job_clicked': {'type': 'categorical', 'dtype': 'int'}, 'JobCity': {'type': 'categorical', 'dtype': 'int', 'cardinality': 8227}, 'JobState': {'type': 'categorical', 'dtype': 'int', 'cardinality': 123}, 'JobCountry': {'type': 'categorical', 'dtype': 'int', 'cardinality': 34}, 'UserCity': {'type': 'categorical', 'dtype': 'int', 'cardinality': 8227}, 'UserState': {'type': 'categorical', 'dtype': 'int', 'cardinality': 123}, 'UserCountry': {'type': 'categorical', 'dtype': 'int', 'cardinality': 34}, 'UserDegree': {'type': 'categorical', 'dtype': 'int', 'cardinality': 8}, 'UserMajor': {'type': 'categorical', 'dtype': 'int', 'cardinality': 21225}}}


# Step 4: Build model

### Model

In [14]:
from PAN_model_improved_sampling import PAN

1.15.0
True


### ClickedJobsState

In [15]:
from clicked_job_state import ClickedJobsState

### Hook

In [16]:
from job_state_hook import JobsStateUpdaterHook

# Step 5: Train model

In [17]:
#from data_helper import prepare_dataset_iterator
import multiprocessing
from utils import merge_two_dicts, get_tf_dtype

In [18]:
#from data_helper import prepare_dataset_iterator
import multiprocessing
from utils import merge_two_dicts, get_tf_dtype


def expand_single_features(x, features_to_expand):
    """
    Hack. Because padded_batch doesn't play nice with scalars, so we expand the scalar to a vector of length 1
    """
    for feature_key in features_to_expand:
        x[feature_key] = tf.expand_dims(tf.convert_to_tensor(x[feature_key]), 0)
    return x


def parse_sequence_example(example, features_config, truncate_sequence_length=20):
    # Define how to parse the example
    
    context_features = {}
    features_config_single = features_config['single_features']
    for feature_name in features_config_single:        
        context_features[feature_name] = tf.FixedLenFeature([], dtype=get_tf_dtype(features_config_single[feature_name]['dtype']))
    
    sequence_features = {}
    features_config_sequence = features_config['sequence_features']
    for feature_name in features_config_sequence: 
        sequence_features[feature_name] = tf.FixedLenSequenceFeature(shape=[], dtype=get_tf_dtype(features_config_sequence[feature_name]['dtype']))
        
    context_parsed, sequence_parsed = tf.parse_single_sequence_example(
        example, 
        sequence_features=sequence_features,
        context_features=context_features,
        example_name="example"
    )
    
    #Truncate long sessions to a limit
    context_parsed['SessionSize'] = tf.minimum(context_parsed['SessionSize'], truncate_sequence_length)
    
    for feature_name in sequence_parsed:
        sequence_parsed[feature_name] = sequence_parsed[feature_name][:truncate_sequence_length] 
    
    #Ignoring first click from labels
    sequence_parsed['label_next_job'] = sequence_parsed['Job_clicked'][1:]    
    #Making it easy to retrieve the last label
    sequence_parsed['label_last_job'] = sequence_parsed['Job_clicked'][-1:]
    
    #Ignoring last clicked item from input    
    for feature_key in sequence_features:
        if feature_key not in ['label_next_job', 'label_last_job']:
            sequence_parsed[feature_key] = sequence_parsed[feature_key][:-1]
    
    merged_features = merge_two_dicts(context_parsed, sequence_parsed)
    
    #In order the pad the dataset, I had to use this hack to expand scalars to vectors.
    merged_expanded_features = expand_single_features(merged_features, features_to_expand=list(features_config['single_features'].keys()))
    return merged_expanded_features


def expand_to_vector_if_scalar(tensor):
    return tf.cond(tf.logical_and(tf.equal(tf.size(tensor), tf.constant(1)),tf.equal(tf.rank(tensor), tf.constant(0))), lambda: tf.expand_dims(tensor, 0), lambda: tensor)



def deflate_single_features(x, expanded_features):
    """
    Undo Hack. We undo the expansion we did in expand and make sure that vector has rank 2 (adds one dimension if this batch size == 1)
    """
    for feature_key in expanded_features:
        if feature_key != 'UserID':
            x[feature_key] = expand_to_vector_if_scalar(tf.squeeze(x[feature_key]))
    return x

    
def deflate_and_split_features_label(x, expanded_features): 
    #Undo that hack required for padding 
    x = deflate_single_features(x, expanded_features)
    labels = {
        'label_next_job': x['label_next_job'],
        'label_last_job': x['label_last_job']
    }
    del x['label_next_job']
    del x['label_last_job']
    
    #Returning features and label separatelly
    return(x, labels)

def make_dataset(path, features_config, batch_size=128, num_map_threads=None, truncate_sequence_length=20):
    def get_features_shapes(features_config):
        features_shapes = {}
        
        for feature_name in features_config['single_features']:        
            features_shapes[feature_name] = 1
        
        for feature_name in features_config['sequence_features']:        
            features_shapes[feature_name] = tf.TensorShape([None])
        
        features_shapes['label_next_job'] = tf.TensorShape([None])
        features_shapes['label_last_job'] = tf.TensorShape([None])
        
        return features_shapes
    
    if not num_map_threads:
        num_map_threads = multiprocessing.cpu_count()
        print('Using {} threads for parallel map'.format(num_map_threads))
    
    # Read a tf record file. This makes a dataset of raw TFRecords
    dataset = tf.data.TFRecordDataset(path, compression_type='GZIP')
    # Apply/map the parse function to every record. Now the dataset is a bunch of dictionaries of Tensors
    dataset =  dataset.map(lambda x: parse_sequence_example(x, features_config, truncate_sequence_length=truncate_sequence_length), num_parallel_calls=num_map_threads)
    
    #Batch the dataset so that we get batch_size examples in each batch.
    #Remember each item in the dataset is a dict of tensors, we need to specify padding for each tensor separately    
    features_shapes = get_features_shapes(features_config)
    dataset = dataset.padded_batch(batch_size, padded_shapes=features_shapes)
    
    #Splitting features and label
    expanded_features=list(features_config['single_features'].keys())
    dataset = dataset.map(lambda x: deflate_and_split_features_label(x, expanded_features), num_parallel_calls=num_map_threads)
    
    #Pre-fetches one batch ahead
    dataset = dataset.prefetch(1)
    return dataset

    
    
def prepare_dataset_iterator(path, features_config, batch_size=128, truncate_session_length=10):
    with tf.device('/cpu:0'):
        # Make a dataset 
        ds = make_dataset(path, features_config, batch_size=batch_size, truncate_sequence_length=truncate_session_length)    
        # Define an abstract iterator that has the shape and type of our datasets
        iterator = ds.make_one_shot_iterator()
        
        # This is an op that gets the next element from the iterator
        next_element = iterator.get_next()
        
        return next_element

In [19]:
### Training parameters
batch_size = 256
learning_rate = 0.001
dropout_keep_prob = 0.8
reg_l2 = 0.0001
softmax_temperature = 1.0

pretrained_job_embedding_size = 300
max_cardinality_for_ohe = 10
truncate_session_length = 10

### Recent applications buffer
# Maximum size of recent clicks buffer
recent_clicks_buffer_max_size = 1000
recent_clicks_for_normalization = 1000

### Negative samples
train_total_negative_samples = 15
# Training Negative samples from recent clicks buffer
train_negative_samples_from_buffer = 100

eval_total_negative_samples = 50
# Eval. Negative samples from recent clicks buffer
eval_negative_samples_from_buffer = 200

### RNN parameters
rnn_num_layers = 1
rnn_units = 256

### Evaluation
eval_metrics_top_n = 5

### Others
save_histograms = False

eval_metrics_by_session_position = False
eval_negative_sample_relevance = 1.0
save_eval_sessions_negative_samples = True
save_eval_sessions_recommendations = True

In [20]:
#Global vars updated by the Estimator Hook
clicked_jobs_state = None
eval_sessions_metrics_log = [] 
sessions_negative_jobs_log = [] if save_eval_sessions_negative_samples else None
sessions_model_recommendations_log = [] if save_eval_sessions_recommendations else None
global_eval_hour_id = 0

clicked_jobs_state = ClickedJobsState(recent_clicks_buffer_max_size, 
                                      recent_clicks_for_normalization, 
                                      job_content_embeddings.shape[0])   

In [21]:
def get_internal_enabled_features_config():
    VALID_INTERNAL_FEATURES = ['job_content_embeddings','job_clicked_embeddings']
    internal_features_config = {}
    enabled_features = set(VALID_INTERNAL_FEATURES)
    for feature in VALID_INTERNAL_FEATURES:
        internal_features_config[feature] = (feature in enabled_features)
    tf.logging.info('Enabled internal features: {}'.format(enabled_features))
    return internal_features_config


def module_model_fn(features, labels, mode, params):
    print('Mode', mode)
    if mode == tf.estimator.ModeKeys.TRAIN:
        negative_samples = params['train_total_negative_samples']
        negative_sample_from_buffer = params['train_negative_samples_from_buffer']
    
    elif mode == tf.estimator.ModeKeys.EVAL:
        negative_samples = params['eval_total_negative_samples']
        negative_sample_from_buffer = params['eval_negative_samples_from_buffer']
    
    elif mode == tf.estimator.ModeKeys.PREDICT:
        print('###PREDICT###')
        negative_samples = params['eval_total_negative_samples']
        negative_sample_from_buffer = params['eval_negative_samples_from_buffer']
     
        
    dropout_keep_prob = params['dropout_keep_prob'] if mode == tf.estimator.ModeKeys.TRAIN else 1.0
    internal_features_config = get_internal_enabled_features_config()
    
    eval_metrics_top_n = params['eval_metrics_top_n']
    
    model = PAN(mode,
                features, 
                labels,
                session_features_config=params['session_features_config'],
                job_features_config=params['job_features_config'],
                user_features_config=params['user_features_config'],
                batch_size=params['batch_size'], 
                lr=params['lr'],
                reg_l2_rate=params['reg_l2_rate'], 
                dropout_keep_prob=dropout_keep_prob,
                softmax_temperature=params['softmax_temperature'], 
                pretrained_job_content_embeddings=params['pretrained_job_content_embeddings'],
                pretrained_job_embedding_size=params['pretrained_job_embedding_size'],
                job_metadata=params['job_metadata'],
                user_metadata =params['user_metadata'],
                negative_samples=negative_samples,
                negative_sample_from_buffer=negative_sample_from_buffer,
                rnn_num_layers=params['rnn_num_layers'],
                rnn_units=params['rnn_units'],
                recent_clicks_buffer_max_size=params['recent_clicks_buffer_max_size'],
                recent_clicks_for_normalization=params['recent_clicks_for_normalization'],
                metrics_top_n=eval_metrics_top_n,
                max_cardinality_for_ohe=params['max_cardinality_for_ohe'],
                internal_features_config=internal_features_config,
                plot_histograms=params['save_histograms']
               )
    
    
    #Using these variables as global so that they persist across different train and eval
    global clicked_jobs_state, eval_sessions_metrics_log, sessions_negative_jobs_log
    
    eval_baseline_classifiers = []

    attention_log = []
    trained_session_embeddings = []
    
    
    hooks = [JobsStateUpdaterHook(mode, 
                                  model, 
                                  eval_metrics_top_n=eval_metrics_top_n,
                                  clicked_jobs_state=clicked_jobs_state, 
                                  eval_sessions_metrics_log=eval_sessions_metrics_log,
                                  sessions_negative_jobs_log=sessions_negative_jobs_log,
                                  sessions_model_recommendations_log=sessions_model_recommendations_log,
                                  pretrained_job_content_embeddings = params['pretrained_job_content_embeddings'],
                                  job_metadata=params['job_metadata'],
                                  user_metadata=params['user_metadata'],
                                  eval_baseline_classifiers=eval_baseline_classifiers,
                                  attention_log = attention_log,
                                  trained_session_embeddings = trained_session_embeddings
                                 )] 
    
    if mode == tf.estimator.ModeKeys.TRAIN:        
        return tf.estimator.EstimatorSpec(mode, loss=model.total_loss, train_op=model.train, training_chief_hooks=hooks)
    
    elif mode == tf.estimator.ModeKeys.EVAL:
        eval_metrics = {
            'hitrate_at_n': (model.recall_at_n, model.recall_at_n_update_op),
            'mrr_at_n': (model.mrr, model.mrr_update_op),   
            'ndcg_at_n': (model.ndcg_at_n_mean, model.ndcg_at_n_mean_update_op),                 
        }
        print(eval_metrics)
        return tf.estimator.EstimatorSpec(mode, loss=model.total_loss, eval_metric_ops=eval_metrics, evaluation_hooks=hooks) 

    
    else:
        predictions = {
            'predicted_job_ids': model.predicted_job_ids,
            'predicted_job_probs': model.predicted_job_probs,
            'att': model.attention,
            'job_clicked': model.job_clicked
        }
        return tf.estimator.EstimatorSpec(mode, predictions = predictions) 



def build_estimator(model_dir, job_content_embeddings, job_metadata, user_metadata, job_features_config, session_features_config):
    #Disabling GPU (memory issues on local machine)
    #config_proto = tf.ConfigProto(device_count={'GPU': 0})    
    run_config = tf.estimator.RunConfig(tf_random_seed=RANDOM_SEED,
                                        keep_checkpoint_max=1, 
                                        save_checkpoints_secs=1200, 
                                        save_summary_steps=100,
                                        log_step_count_steps=100,
                                        #session_config=config_proto
                                        )
    
    estimator = tf.estimator.Estimator(
        config=run_config,
        model_dir=model_dir,
        model_fn=module_model_fn,    
        params={
            'batch_size': batch_size,
            'lr': learning_rate,
            'reg_l2_rate': reg_l2,
            'dropout_keep_prob': dropout_keep_prob,
            'softmax_temperature': softmax_temperature,
            'train_total_negative_samples': train_total_negative_samples,
            'train_negative_samples_from_buffer': train_negative_samples_from_buffer,
            'eval_total_negative_samples': eval_total_negative_samples,
            'eval_negative_samples_from_buffer': eval_negative_samples_from_buffer,
            'rnn_num_layers': rnn_num_layers,
            'rnn_units': rnn_units,
            'recent_clicks_buffer_max_size': recent_clicks_buffer_max_size,
            'recent_clicks_for_normalization': recent_clicks_for_normalization,
            'eval_metrics_top_n': eval_metrics_top_n,
            'max_cardinality_for_ohe': max_cardinality_for_ohe,
            'save_histograms': save_histograms,
             #From pre-processing
            'session_features_config': session_features_config,
            'job_features_config': job_features_config,
            'user_features_config': user_features_config,
            'job_metadata': job_metadata, 
            'user_metadata': user_metadata,
            'pretrained_job_content_embeddings': job_content_embeddings,
            'pretrained_job_embedding_size': pretrained_job_embedding_size
            
            })
    return estimator
   

predict_input_fn = lambda: prepare_dataset_iterator('../data/cb12/sessions_tf/test_14d_30_sessions',session_features_config, batch_size=batch_size, truncate_session_length=truncate_session_length)

global eval_sessions_metrics_log, clicked_jobs_statejobs_state, sessions_negative_jobs_log, sessions_model_recommendations_log
eval_sessions_metrics_log = []

import itertools
model_output_dir = './tmp/30_improved_sampling/PAN_30' + '_' + str(batch_size) + '_' + str(train_total_negative_samples) + '_' + str(eval_total_negative_samples) + '_' + str(learning_rate) + '_' + str(reg_l2) + '_' + str(recent_clicks_buffer_max_size) + '_' + str(train_negative_samples_from_buffer) + '_' + str(eval_negative_samples_from_buffer) 
#model_output_dir = './tmp/30/LSTM_30' + '_' + str(batch_size) + '_' + str(train_total_negative_samples) + '_' + str(eval_total_negative_samples) + '_' + str(learning_rate) + '_' + str(reg_l2) + '_' + str(recent_clicks_buffer_max_size) + '_' + str(train_negative_samples_from_buffer) + '_' + str(eval_negative_samples_from_buffer) 

print(model_output_dir)
model = build_estimator(model_output_dir, job_content_embeddings, job_metadata, user_metadata, job_features_config, session_features_config) 
 
predict_input_fn = prepare_dataset_iterator('../data/cb12/sessions_tf/test_14d_30_sessions',session_features_config, batch_size=batch_size, truncate_session_length=truncate_session_length)


for epoch in range(1):
    model.train(input_fn=lambda: prepare_dataset_iterator('../data/cb12/sessions_tf/train_14d_30_sessions', session_features_config, batch_size=batch_size, truncate_session_length=truncate_session_length))
    model.evaluate(input_fn=lambda: prepare_dataset_iterator('../data/cb12/sessions_tf/test_14d_30_sessions', session_features_config, batch_size=batch_size, truncate_session_length=truncate_session_length))
    #predictions = list(itertools.islice(model.predict(input_fn=predict_input_fn),10))
    #predictions = list(itertools.islice(model.predict(input_fn=predict_input_fn), 10))
    

./tmp/30_improved_sampling/PAN_30_256_15_50_0.001_0.0001_1000_100_200
INFO:tensorflow:Using config: {'_model_dir': './tmp/30_improved_sampling/PAN_30_256_15_50_0.001_0.0001_1000_100_200', '_tf_random_seed': 42, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 1200, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 1, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fddcd554790>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0,

2022-09-20 15:41:46.586203: I tensorflow/core/platform/cpu_feature_guard.cc:142] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA
2022-09-20 15:41:46.661635: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7fddccd5bb20 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2022-09-20 15:41:46.661654: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version


Instructions for updating:
Use `for ... in dataset:` to iterate over a dataset. If using `tf.estimator`, return the `Dataset` object directly from your input function. As a last resort, you can use `tf.compat.v1.data.make_one_shot_iterator(dataset)`.
Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.
Using 8 threads for parallel map
INFO:tensorflow:Calling model_fn.
Mode train

INFO:tensorflow:Enabled internal features: {'job_clicked_embeddings', 'job_content_embeddings'}
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Instructions for updating:
This op will be removed after the deprecation date. Please switch to tf.sets.difference().
The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.

# Step 6: Save eval metrics

In [22]:
#After each number of train/eval loops
from utils import save_eval_baseline_metrics_csv, save_sessions_negative_jobs, save_sessions_model_recommendations_log

print('Saving eval metrics')
save_eval_baseline_metrics_csv(eval_sessions_metrics_log, model_output_dir)
if save_eval_sessions_negative_samples:
    #Flushing to disk the negative samples used to evaluate each sessions, so that benchmarks metrics outside the framework (eg. Matrix Factorization) can be comparable
    save_sessions_negative_jobs(model_output_dir, sessions_negative_jobs_log)
    sessions_negative_jobs_log = []

if save_eval_sessions_recommendations:  
    #Flushing to disk the recommended items to test re-ranking approaches (e.g. MMR)
    save_sessions_model_recommendations_log(model_output_dir, sessions_model_recommendations_log)
    sessions_model_recommendations_log = [] 

tf.logging.info('Saved eval metrics')#After each number of train/eval loops
from utils import save_eval_baseline_metrics_csv, save_sessions_negative_jobs, save_sessions_model_recommendations_log

print('Saving eval metrics')
save_eval_baseline_metrics_csv(eval_sessions_metrics_log, model_output_dir)
if save_eval_sessions_negative_samples:
    #Flushing to disk the negative samples used to evaluate each sessions, so that benchmarks metrics outside the framework (eg. Matrix Factorization) can be comparable
    save_sessions_negative_jobs(model_output_dir, sessions_negative_jobs_log)
    sessions_negative_jobs_log = []

if save_eval_sessions_recommendations:  
    #Flushing to disk the recommended items to test re-ranking approaches (e.g. MMR)
    save_sessions_model_recommendations_log(model_output_dir, sessions_model_recommendations_log)
    sessions_model_recommendations_log = [] 

tf.logging.info('Saved eval metrics')

Saving eval metrics
INFO:tensorflow:Saved eval metrics
Saving eval metrics
INFO:tensorflow:Saved eval metrics
