In [1]:
import os
import tempfile

%matplotlib inline
import matplotlib.pyplot as plt

import numpy as np
import tensorflow as tf
import tensorflow_datasets as tfds

import tensorflow_recommenders as tfrs

In [2]:
import pandas as pd
import pprint

In [3]:
def load_data_file_cold(file, stats):
    print('loading file:' + file)
    training_df = pd.read_csv(
        file,
        skiprows=[0],
        names=["viewer","broadcaster","viewer_age","viewer_gender","viewer_longitude","viewer_latitude","viewer_lang","viewer_country","broadcaster_age","broadcaster_gender","broadcaster_longitude","broadcaster_latitude","broadcaster_lang","broadcaster_country","duration", "viewer_network", "broadcaster_network", "count"], dtype={
            'viewer': np.unicode,
            'broadcaster': np.unicode,
            'viewer_age': np.single,
            'viewer_gender': np.unicode,
            'viewer_longitude': np.single,
            'viewer_latitude': np.single,
            'viewer_lang': np.unicode,
            'viewer_country': np.unicode,
            'broadcaster_age': np.single,
            'broadcaster_longitude': np.single,
            'broadcaster_latitude': np.single,
            'broadcaster_lang': np.unicode,
            'broadcaster_country': np.unicode,
            'viewer_network': np.unicode,
            'broadcaster_network': np.unicode,
            'count': np.int
        })

    values = {
        'viewer': 'unknown',
        'broadcaster': 'unknown',
        'viewer_age': 30,
        'viewer_gender': 'unknown',
        'viewer_longitude': 0,
        'viewer_latitude': 0,
        'viewer_lang': 'unknown',
        'viewer_country': 'unknown',
        'broadcaster_age': 30,
        'broadcaster_longitude': 0,
        'broadcaster_latitude': 0,
        'broadcaster_lang': 'unknown',
        'broadcaster_country': 'unknown',
        'duration': 0,
        'viewer_network': 'unknown',
        'broadcaster_network': 'unknown',
        'count': 0
    }
    training_df.fillna(value=values, inplace=True)
#     print(training_df.head(10))
#     print(training_df.iloc[-10:])
#     stats.send_stats('data-size', len(training_df.index))

    sampled_df = training_df.sample(frac=0.1)
    print(sampled_df.head(10))
    print(sampled_df.iloc[-10:])
    return sampled_df

def load_training_data_cold(file, stats):
    ratings_df = load_data_file_cold(file, stats)
    print('creating data set')
    training_ds = (
        tf.data.Dataset.from_tensor_slices(
            ({
                "viewer": tf.cast(
                    ratings_df['viewer'].values,
                    tf.string),
                "viewer_gender": tf.cast(
                    ratings_df['viewer_gender'].values,
                    tf.string),
                "viewer_lang": tf.cast(
                    ratings_df['viewer_lang'].values,
                    tf.string),
                "viewer_country": tf.cast(
                    ratings_df['viewer_country'].values,
                    tf.string),
                "viewer_age": tf.cast(
                    ratings_df['viewer_age'].values,
                    tf.int32),
                "viewer_longitude": tf.cast(
                    ratings_df['viewer_longitude'].values,
                    tf.float16),
                "viewer_latitude": tf.cast(
                    ratings_df['viewer_latitude'].values,
                    tf.float16),
                "broadcaster": tf.cast(
                    ratings_df['broadcaster'].values,
                    tf.string),
                "viewer_network": tf.cast(
                    ratings_df['viewer_network'].values,
                    tf.string),
                "broadcaster_network": tf.cast(
                    ratings_df['broadcaster_network'].values,
                    tf.string),
                "duration": tf.cast(
                    ratings_df['duration'].values,
                    tf.float16),
                "count": tf.cast(
                    ratings_df['count'].values,
                    tf.int16),
            })))

    return training_ds

In [4]:
ratings = load_training_data_cold(file="a3d86f3b-eb45-4641-b05d-30dff7423e6b.csv", stats="")

for x in ratings.take(1).as_numpy_iterator():
    pprint.pprint(x)

loading file:a3d86f3b-eb45-4641-b05d-30dff7423e6b.csv
                   viewer       broadcaster  viewer_age viewer_gender  \
4813132  meetme:309389222  meetme:311254050        29.0        female   
5284874  meetme:318036570  meetme:315043902        21.0          male   
1584655     pof:109638959   skout:182621750        37.0          male   
4054891     pof:308773777  meetme:269923127        42.0          male   
2518692     pof:316637226     pof:332075606        25.0        female   
2135182  meetme:260903230  meetme:317205757        32.0        female   
3109824  meetme:314778866  meetme:316023909        37.0          male   
2918063  meetme:302878501     pof:128654566        33.0          male   
3386332   skout:173550429     pof:290741519        39.0          male   
2014638  meetme:183463090     pof:331777479        38.0        female   

         viewer_longitude  viewer_latitude viewer_lang viewer_country  \
4813132        -46.639599       -23.555799          pt             BR

### Model definition

In [6]:
def get_list(training_data, key):
    return training_data.batch(1_000_000).map(lambda x: x[key], num_parallel_calls=tf.data.AUTOTUNE, deterministic=False)


def get_unique_list(data):
    return np.unique(np.concatenate(list(data)))

In [9]:
user_genders = get_list(ratings, 'viewer_gender')
for x in user_genders.batch(1).take(1):
    pprint.pprint(x[0:10])

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Constant'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Constant'
<tf.Tensor: shape=(1, 538393), dtype=string, numpy=
array([[b'female', b'male', b'male', ..., b'female', b'male', b'male']],
      dtype=object)>


In [11]:
user_langs = get_list(ratings, 'viewer_lang')
for x in user_langs.batch(1).take(1):
    pprint.pprint(x[0, :10])

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Constant'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Constant'
<tf.Tensor: shape=(10,), dtype=string, numpy=
array([b'pt', b'en', b'en', b'en', b'en', b'en', b'fr', b'en', b'en',
       b'en'], dtype=object)>


In [13]:
user_countries = get_list(ratings, 'viewer_country')
for x in user_countries.batch(1).take(1).as_numpy_iterator():
    pprint.pprint(x[0, :10])

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Constant'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Constant'
array([b'BR', b'US', b'US', b'US', b'US', b'US', b'FR', b'US', b'CA',
       b'US'], dtype=object)


In [15]:
viewer_age = get_list(ratings, 'viewer_age')
for x in viewer_age.batch(1).take(1).as_numpy_iterator():
    pprint.pprint(x[0, :10])

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Constant'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Constant'
array([29, 21, 37, 42, 25, 32, 37, 33, 39, 38], dtype=int32)


In [17]:
user_networks = get_list(ratings, 'viewer_network')
for x in user_networks.batch(1).take(1).as_numpy_iterator():
    pprint.pprint(x[0, :10])

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Constant'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Constant'
array([b'meetme', b'meetme', b'pof', b'pof', b'pof', b'meetme', b'meetme',
       b'meetme', b'skout', b'meetme'], dtype=object)


In [18]:
unique_user_genders = get_unique_list(user_genders)
print(unique_user_genders)

[b'female' b'male']


In [19]:
unique_user_langs = get_unique_list(user_langs)
print(unique_user_langs)

[b'af' b'ar' b'az' b'bg' b'bn' b'bs' b'ca' b'cs' b'da' b'de' b'el' b'en'
 b'es' b'et' b'fa' b'fi' b'fr' b'he' b'hi' b'hr' b'hu' b'id' b'in' b'it'
 b'iw' b'ja' b'ko' b'lo' b'lv' b'ml' b'ms' b'nb' b'ne' b'nl' b'pa' b'pl'
 b'ps' b'pt' b'ro' b'ru' b'si' b'sk' b'sl' b'sq' b'sr' b'sv' b'ta' b'te'
 b'th' b'to' b'tr' b'ur' b'vi' b'zh']


In [20]:
unique_user_countries = get_unique_list(user_countries)
print(unique_user_countries)

[b'419' b'AD' b'AE' b'AF' b'AG' b'AL' b'AO' b'AR' b'AS' b'AT' b'AU' b'AW'
 b'AX' b'AZ' b'BA' b'BD' b'BE' b'BG' b'BH' b'BN' b'BO' b'BR' b'BS' b'BT'
 b'BW' b'BZ' b'CA' b'CH' b'CI' b'CL' b'CN' b'CO' b'CP' b'CR' b'CU' b'CV'
 b'CY' b'CZ' b'DE' b'DK' b'DO' b'DZ' b'EA' b'EC' b'EE' b'EG' b'EN' b'ES'
 b'FI' b'FR' b'GB' b'GE' b'GF' b'GH' b'GL' b'GM' b'GR' b'GT' b'GU' b'GW'
 b'HK' b'HN' b'HR' b'HT' b'HU' b'ID' b'IE' b'IL' b'IM' b'IN' b'IQ' b'IR'
 b'IS' b'IT' b'JM' b'JO' b'JP' b'KE' b'KH' b'KR' b'KW' b'KY' b'KZ' b'LA'
 b'LB' b'LC' b'LK' b'LT' b'LU' b'LV' b'LY' b'MA' b'ME' b'MH' b'MK' b'MM'
 b'MO' b'MR' b'MU' b'MX' b'MY' b'NG' b'NI' b'NL' b'NO' b'NP' b'NZ' b'OM'
 b'PA' b'PE' b'PH' b'PK' b'PL' b'PR' b'PS' b'PT' b'PY' b'QA' b'RO' b'RS'
 b'RU' b'SA' b'SD' b'SE' b'SG' b'SI' b'SJ' b'SK' b'SM' b'SN' b'SV' b'SX'
 b'SY' b'TC' b'TH' b'TN' b'TR' b'TT' b'TW' b'TZ' b'UM' b'US' b'UY' b'VE'
 b'VG' b'VI' b'VN' b'WF' b'XK' b'XL' b'YE' b'ZA' b'ZG' b'ZM' b'unknown']


In [21]:
unique_user_networks = get_unique_list(user_networks)
print(unique_user_networks)

[b'meetme' b'pof' b'skout' b'zoosk']


In [76]:
class UserModel(tf.keras.Model):

    def __init__(self, unique_genders, unique_langs, unique_countries, unique_networks, viewer_age):
        super().__init__()

        self.gender_embedding = tf.keras.Sequential([
            tf.keras.layers.experimental.preprocessing.StringLookup(
                vocabulary=unique_genders, mask_token=None),
            tf.keras.layers.Embedding(len(unique_genders) + 1, 4),
        ])
        
        self.lang_embedding = tf.keras.Sequential([
            tf.keras.layers.experimental.preprocessing.StringLookup(
                vocabulary=unique_langs, mask_token=None),
            tf.keras.layers.Embedding(len(unique_langs) + 1, 11),
        ])

        self.country_embedding = tf.keras.Sequential([
            tf.keras.layers.experimental.preprocessing.StringLookup(
                vocabulary=unique_countries, mask_token=None),
            tf.keras.layers.Embedding(len(unique_countries) + 1, 11),
        ])

        self.network_embedding = tf.keras.Sequential([
            tf.keras.layers.experimental.preprocessing.StringLookup(
                vocabulary=unique_networks, mask_token=None),
            tf.keras.layers.Embedding(len(unique_networks) + 1, 5),
        ])

        self.normalized_ages = tf.keras.layers.experimental.preprocessing.Normalization(axis=None)
        self.normalized_ages.adapt(viewer_age)


    def call(self, inputs):
        return tf.concat([
            self.gender_embedding(inputs["viewer_gender"]),
            self.lang_embedding(inputs["viewer_lang"]),
            self.country_embedding(inputs["viewer_country"]),
            self.network_embedding(inputs["viewer_network"]),
            self.normalized_ages(inputs["viewer_age"]),
        ], axis=1)

In [63]:
# gender embedding
# user_gender_lookup = tf.keras.Sequential([
#             tf.keras.layers.experimental.preprocessing.StringLookup(
#                 vocabulary=unique_user_genders, mask_token=None),
#             tf.keras.layers.Embedding(len(unique_user_genders) + 1, 4),
#         ])

In [77]:
user_model = UserModel(unique_user_genders, unique_user_langs, unique_user_countries, unique_user_networks, viewer_age)

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'


In [79]:
for v in ratings.take(1).map(lambda x: x["viewer_gender"]).batch(1).as_numpy_iterator():
    print(user_model.gender_embedding(x))

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Constant'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Constant'
tf.Tensor(
[[[-0.02569976 -0.03433777  0.00771824 -0.01395029]
  [-0.02569976 -0.03433777  0.00771824 -0.01395029]
  [-0.02569976 -0.03433777  0.00771824 -0.01395029]
  ...
  [-0.02569976 -0.03433777  0.00771824 -0.01395029]
  [-0.02569976 -0.03433777  0.00771824 -0.01395029]
  [-0.02569976 -0.03433777  0.00771824 -0.01395029]]], shape=(1, 538393, 4), dtype=float32)


In [80]:
for v in ratings.take(1).map(lambda x: x["viewer_lang"]).batch(1).as_numpy_iterator():
    print(user_model.lang_embedding(x))

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Constant'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Constant'
tf.Tensor(
[[[-0.03237829 -0.00885711 -0.0400753  ... -0.00581033 -0.0038252
   -0.03017133]
  [-0.03237829 -0.00885711 -0.0400753  ... -0.00581033 -0.0038252
   -0.03017133]
  [-0.03237829 -0.00885711 -0.0400753  ... -0.00581033 -0.0038252
   -0.03017133]
  ...
  [-0.03237829 -0.00885711 -0.0400753  ... -0.00581033 -0.0038252
   -0.03017133]
  [-0.03237829 -0.00885711 -0.0400753  ... -0.00581033 -0.0038252
   -0.03017133]
  [-0.03237829 -0.00885711 -0.0400753  ... -0.00581033 -0.0038252
   -0.03017133]]], shape=(1, 538393, 11), dtype=float32)


In [81]:
for v in ratings.take(1).map(lambda x: x["viewer_country"]).batch(1).as_numpy_iterator():
    print(user_model.country_embedding(x))

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Constant'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Constant'
tf.Tensor(
[[[ 0.00980501 -0.01751616 -0.00616052 ...  0.02341903  0.00029063
    0.02393849]
  [ 0.00980501 -0.01751616 -0.00616052 ...  0.02341903  0.00029063
    0.02393849]
  [ 0.00980501 -0.01751616 -0.00616052 ...  0.02341903  0.00029063
    0.02393849]
  ...
  [ 0.00980501 -0.01751616 -0.00616052 ...  0.02341903  0.00029063
    0.02393849]
  [ 0.00980501 -0.01751616 -0.00616052 ...  0.02341903  0.00029063
    0.02393849]
  [ 0.00980501 -0.01751616 -0.00616052 ...  0.02341903  0.00029063
    0.02393849]]], shape=(1, 538393, 11), dtype=float32)


In [83]:
for v in ratings.take(1).map(lambda x: x["viewer_network"]).batch(1).as_numpy_iterator():
    print(user_model.network_embedding(x))

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Constant'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Constant'
tf.Tensor(
[[[ 0.02678044  0.01921315  0.03588673 -0.02111312 -0.0322271 ]
  [ 0.02678044  0.01921315  0.03588673 -0.02111312 -0.0322271 ]
  [ 0.00291411 -0.00511903  0.01702353  0.00565339  0.02216182]
  ...
  [-0.02370275  0.04277806 -0.03249246  0.03193189  0.02103117]
  [ 0.00291411 -0.00511903  0.01702353  0.00565339  0.02216182]
  [ 0.00291411 -0.00511903  0.01702353  0.00565339  0.02216182]]], shape=(1, 538393, 5), dtype=float32)


In [85]:
for v in ratings.take(1).map(lambda x: x["viewer_age"]).batch(1).as_numpy_iterator():
    print(user_model.normalized_ages(v))

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Constant'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Constant'
tf.Tensor([-0.5254036], shape=(1,), dtype=float32)


### Candidate Models

In [178]:
broadcaster_ids = get_list(ratings, 'broadcaster')

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Constant'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Constant'


In [87]:
unique_broadcasters = get_unique_list(broadcaster_ids)

In [90]:
print(unique_broadcasters)

[b'meetme:100081867' b'meetme:100104254' b'meetme:100114731' ...
 b'zoosk:faa44d7ec2ee93a7acfb6bd4fb051767'
 b'zoosk:fd0ea6de03386f5c0d31b3d462d47e02'
 b'zoosk:fd4dfc9e6f9990d95f3ae136b403368e']


In [89]:
class BroadcasterModel(tf.keras.Model):

    def __init__(self, unique_movie_titles, dims):
        super().__init__()

        self.broadcaster_embedding = tf.keras.Sequential([
            tf.keras.layers.experimental.preprocessing.StringLookup(
                vocabulary=unique_movie_titles, mask_token=None),
            tf.keras.layers.Embedding(len(unique_movie_titles) + 1, dims)
        ])

    def call(self, broadcaster):
        return tf.concat([
            self.broadcaster_embedding(broadcaster),
        ], axis=1)

In [91]:
broadcaster_embedding_dimension = 32
broadcaster_model = BroadcasterModel(unique_broadcasters, broadcaster_embedding_dimension)

In [92]:
for v in ratings.take(1).map(lambda x: x["broadcaster"]).batch(1).as_numpy_iterator():
    print(broadcaster_model.broadcaster_embedding(v))

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Constant'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Constant'
tf.Tensor(
[[ 0.04173012 -0.03631787 -0.01227497  0.01042102  0.02145941  0.01807603
   0.00790545 -0.00627408  0.01011752 -0.03060862 -0.00436055 -0.01595862
   0.04211653  0.02934315 -0.00160182 -0.04359237  0.00257302 -0.01767635
  -0.01203647  0.01430636  0.02566769  0.02506789 -0.04853121 -0.0469017
   0.00725818  0.01072966 -0.03149523  0.01092996  0.03255302 -0.04807407
   0.01524818 -0.026949  ]], shape=(1, 32), dtype=float32)


### Combined model

In [97]:
def get_broadcaster_data_set(train_ds):
    broadcasters = train_ds.cache().map(lambda x: x["broadcaster"], num_parallel_calls=tf.data.AUTOTUNE, deterministic=False)
    broadcasters_ds = tf.data.Dataset.from_tensor_slices(
        np.unique(list(broadcasters.as_numpy_iterator())))
    return broadcasters_ds

In [98]:
broadcasters_data_set = get_broadcaster_data_set(ratings)

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Constant'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Constant'


In [107]:
# Define your objectives.
task = tfrs.tasks.Retrieval(
    metrics=tfrs.metrics.FactorizedTopK(broadcasters_data_set.batch(128).map(broadcaster_model))
)

In [161]:
class FinalModel(tfrs.models.Model) :

	def __init__(self, unique_genders, unique_langs, unique_countries, unique_networks, unique_movie_titles, dims) :
		super().__init__()
		self.user_model = tf.keras.Sequential([
			UserModel(unique_genders, unique_langs, unique_countries, unique_networks),
			tf.keras.layers.Dense(32)
		])
        
		self.candidate_model = tf.keras.Sequential([
			BroadcasterModel(unique_movie_titles, dims),
			tf.keras.layers.Dense(32)
		])
        
		self.task = tfrs.tasks.Retrieval(
			metrics = tfrs.metrics.FactorizedTopK(
				candidates=broadcasters_data_set.batch(128).map(self.candidate_model),
			),
		)

	def compute_loss(self, features, training = False) :
		user_model = self.user_model({
				"viewer_gender": features["viewer_gender"],
 				"viewer_lang": features["viewer_lang"],
				"viewer_country": features["viewer_country"],
# 				"viewer_age": features["viewer_age"],
				"viewer_network": features["viewer_network"],
			})
		broadcaster_embeddings = self.candidate_model(features["broadcaster"], 32)

		return self.task(user_model, broadcaster_embeddings)

In [125]:
tf.random.set_seed(42)
shuffled = ratings.shuffle(100_000, seed=42, reshuffle_each_iteration=False)

train = shuffled.take(80_000)
test = shuffled.skip(80_000).take(20_000)

cached_train = train.shuffle(100_000).batch(2048)
cached_test = test.batch(4096).cache()

In [179]:
class UserModel(tf.keras.Model):

    def __init__(self, unique_genders, unique_langs, unique_countries, unique_networks, viewer_age):
        super().__init__()

        self.gender_embedding = tf.keras.Sequential([
            tf.keras.layers.experimental.preprocessing.StringLookup(
                vocabulary=unique_genders, mask_token=None),
            tf.keras.layers.Embedding(len(unique_genders) + 1, 4),
        ])
        
        self.lang_embedding = tf.keras.Sequential([
            tf.keras.layers.experimental.preprocessing.StringLookup(
                vocabulary=unique_langs, mask_token=None),
            tf.keras.layers.Embedding(len(unique_langs) + 1, 11),
        ])
        
        self.country_embedding = tf.keras.Sequential([
            tf.keras.layers.experimental.preprocessing.StringLookup(
                vocabulary=unique_countries, mask_token=None),
            tf.keras.layers.Embedding(len(unique_countries) + 1, 11),
        ])
        
        self.network_embedding = tf.keras.Sequential([
            tf.keras.layers.experimental.preprocessing.StringLookup(
                vocabulary=unique_networks, mask_token=None),
            tf.keras.layers.Embedding(len(unique_networks) + 1, 5),
        ])
        
        self.normalized_ages = tf.keras.layers.experimental.preprocessing.Normalization()
        self.normalized_ages.adapt(viewer_age)

    def call(self, inputs):
        return tf.concat([
            self.gender_embedding(inputs["viewer_gender"]),
            self.lang_embedding(inputs["viewer_lang"]),
            self.country_embedding(inputs["viewer_country"]),
            self.network_embedding(inputs["viewer_network"]),
            self.normalized_ages(inputs["viewer_age"]),
        ], axis=1)

In [180]:
class FinalModel(tfrs.models.Model) :

	def __init__(self, unique_genders, unique_langs, unique_countries, unique_networks, viewer_age, unique_movie_titles, dims) :
		super().__init__()
		self.user_model = tf.keras.Sequential([
			UserModel(unique_genders, unique_langs, unique_countries, unique_networks, viewer_age),
			tf.keras.layers.Dense(32)
		])
        
		self.candidate_model = tf.keras.Sequential([
			BroadcasterModel(unique_movie_titles, dims),
			tf.keras.layers.Dense(32)
		])
        
		self.task = tfrs.tasks.Retrieval(
			metrics = tfrs.metrics.FactorizedTopK(
				candidates=broadcasters_data_set.batch(128).map(self.candidate_model),
			),
		)

	def compute_loss(self, features, training = False) :
		user_embeddings = self.user_model({
				"viewer_gender": features["viewer_gender"],
				"viewer_lang": features["viewer_lang"],
				"viewer_country": features["viewer_country"],
				"viewer_network": features["viewer_network"],
				"viewer_age": features["viewer_age"],
			})
		broadcaster_embeddings = self.candidate_model(features["broadcaster"], 32)

		return self.task(user_embeddings, broadcaster_embeddings)

In [181]:
viewer_age = get_list(ratings, 'viewer_age')

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Constant'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Constant'


In [182]:
model = FinalModel(unique_user_genders, unique_user_langs, unique_user_countries,  unique_user_networks, viewer_age, unique_broadcasters, broadcaster_embedding_dimension)
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.1))

model.fit(cached_train, epochs=1)

train_accuracy = model.evaluate(
    cached_train, return_dict=True)["factorized_top_k/top_100_categorical_accuracy"]
test_accuracy = model.evaluate(
    cached_test, return_dict=True)["factorized_top_k/top_100_categorical_accuracy"]

print(f"Top-100 accuracy (train): {train_accuracy:.2f}.")
print(f"Top-100 accuracy (test): {test_accuracy:.2f}.")

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: 'arguments' object has no attribute 'posonlyargs'


ValueError: All `axis` values to be kept must have known shape. Got axis: (-1,), input shape: [None], with unknown axis at index: 0

In [64]:
class TwoTowers(tf.keras.Model):

    def __init__(self, broadcaster_model, user_model, task):
        super().__init__()
        self.broadcaster_model: tf.keras.Model = broadcaster_model
        self.embedding_model = user_model
        self.task: tf.keras.layers.Layer = task

    def train_step(self, features: Dict[Text, tf.Tensor]) -> tf.Tensor:

        # Set up a gradient tape to record gradients.
        with tf.GradientTape() as tape:

            # Loss computation.

            user_embeddings = self.embedding_model({
                "viewer_gender": features["viewer_gender"],
                "viewer_lang": features["viewer_lang"],
                "viewer_country": features["viewer_country"],
                "viewer_age": features["viewer_age"],
                "viewer_network": features["viewer_network"],
                "viewer_latitude": features["viewer_latitude"],
                "viewer_longitude": features["viewer_longitude"],
            })
            positive_movie_embeddings = self.broadcaster_model(
                features["broadcaster"])
            loss = self.task(user_embeddings, positive_movie_embeddings)

            # Handle regularization losses as well.
            regularization_loss = sum(self.losses)

            total_loss = loss + regularization_loss

        gradients = tape.gradient(total_loss, self.trainable_variables)
        self.optimizer.apply_gradients(
            zip(gradients, self.trainable_variables))

        metrics = {metric.name: metric.result() for metric in self.metrics}
        metrics["loss"] = loss
        metrics["regularization_loss"] = regularization_loss
        metrics["total_loss"] = total_loss

        return metrics

    def test_step(self, features: Dict[Text, tf.Tensor]) -> tf.Tensor:

        # Loss computation.

        user_embeddings = self.embedding_model({
            "viewer": features["viewer"],
        })
        positive_movie_embeddings = self.broadcaster_model(
            features["broadcaster"])
        loss = self.task(user_embeddings, positive_movie_embeddings)

        # Handle regularization losses as well.
        regularization_loss = sum(self.losses)

        total_loss = loss + regularization_loss

        metrics = {metric.name: metric.result() for metric in self.metrics}
        metrics["loss"] = loss
        metrics["regularization_loss"] = regularization_loss
        metrics["total_loss"] = total_loss
        return metrics


In [65]:
model = TwoTowers(broadcaster_model, user_model, task)

In [70]:
learning_rate = 0.05
batch_size = 16384
epochs = 2
top_k = 1999

In [71]:
model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=learning_rate))

In [74]:
train_ds = train.batch(batch_size).cache()
# train_ds = train_ds.prefetch(tf.data.experimental.AUTOTUNE)

In [81]:
ratings = load_training_data_cold(file="a3d86f3b-eb45-4641-b05d-30dff7423e6b.csv", stats="")

loading file:a3d86f3b-eb45-4641-b05d-30dff7423e6b.csv
                   viewer       broadcaster  viewer_age viewer_gender  \
20127    meetme:270873951  meetme:173297788        50.0          male   
4658043     pof:312987597  meetme:284812954        38.0          male   
3776639  meetme:317538802  meetme:272758486        40.0          male   
3885853   skout:183423249   skout:183634455        42.0          male   
1580865  meetme:266907949  meetme:266115204        30.0          male   
2008130  meetme:268556395  meetme:254201989        29.0          male   
2004183   skout:120969259  meetme:292551747        38.0          male   
2246152   skout:152167511   skout:182824110        28.0          male   
1569695  meetme:214623362     pof:323790889        29.0        female   
1040276     pof:171129060   skout:159743514        27.0          male   

         viewer_longitude  viewer_latitude viewer_lang viewer_country  \
20127         -118.239304        34.063400          en             US

In [82]:
tf.random.set_seed(42)
shuffled = ratings.shuffle(100_000, seed=42, reshuffle_each_iteration=False)

train = shuffled.take(80_000)
test = shuffled.skip(80_000).take(20_000)

cached_train = train.shuffle(100_000).batch(2048)
cached_test = test.batch(4096).cache()

In [83]:
len(cached_train)

40

In [84]:
model.fit(cached_train, epochs=1)

TypeError: 'NoneType' object is not callable

In [48]:
for x in train.take(2).as_numpy_iterator():
    pprint.pprint(x)

{'broadcaster': b'skout:168895853',
 'broadcaster_network': b'skout',
 'viewer': b'skout:151948757',
 'viewer_age': 28,
 'viewer_country': b'US',
 'viewer_gender': b'male',
 'viewer_lang': b'en',
 'viewer_latitude': 22.3,
 'viewer_longitude': 114.2,
 'viewer_network': b'skout'}
{'broadcaster': b'meetme:193371002',
 'broadcaster_network': b'meetme',
 'viewer': b'meetme:290274324',
 'viewer_age': 29,
 'viewer_country': b'US',
 'viewer_gender': b'male',
 'viewer_lang': b'en',
 'viewer_latitude': 8.234,
 'viewer_longitude': 124.25,
 'viewer_network': b'meetme'}
