In [None]:
import pandas as pd

In [None]:
%%time
# Load Parquet
train = pd.read_parquet("../data/train.parquet")
test = pd.read_parquet("../data/test.parquet")

print(f"Train {train.shape[0]:,d} rows\nTest {test.shape[0]:,d} rows")

In [None]:
%%time
# Create simple [User x Item] interaction dataset
train_pairs = pd.concat([train,test])[['session','aid']]
train_pairs.head(2)

In [None]:
# Free up memory
del train, test

In [None]:
%%time
# Create window of 2-len with context `aid` and next `aid`
train_pairs['aid_next'] = train_pairs.groupby('session')['aid'].shift(-1)
# Drop NaN values at the end of each session and then drop `session`
train_pairs = train_pairs[['aid', 'aid_next']].dropna().reset_index(drop=True)

In [None]:
# Correct type conversion
train_pairs['aid'] = train_pairs['aid'].astype('int64')
train_pairs['aid_next'] = train_pairs['aid_next'].astype('int64')

In [None]:
%%time
# Create train & validation dataset
train_pairs.to_parquet("../data/train_pairs.parquet")
train_pairs[-10_000_000:].to_parquet("../data/valid_pairs.parquet")

In [None]:
# How many items are there?
cardinality_aids = max(train_pairs['aid'].max(), train_pairs['aid_next'].max())
print(f"Cardinality of items is {cardinality_aids:,d}")

In [None]:
# %%time
# from merlin.loader.tensorflow import Loader
# from merlin.io import Dataset
#
# # Load data with Merlin for GPU batch management
# train_ds = Dataset('../data/train_pairs.parquet', engine='parquet')
# train_dl_merlin = Loader(train_ds, 65536, True)

In [1]:
# Check that we're using GPU
import tensorflow as tf

physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
    print("TensorFlow is using the following GPUs:", physical_devices)
else:
    print("TensorFlow is not using any GPUs.")

TensorFlow is using the following GPUs: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [64]:
# Create Factorization Model
import tensorflow as tf

class MatrixFactorization(tf.keras.Model):
    def __init__(self, n_aids, n_factors):
        super().__init__()
        self.aid_factors = tf.keras.layers.Embedding(n_aids, n_factors)

    def call(self, features):
        # Unpack the columns of the input tensor
        aid1, aid2 = features

        aid1 = self.aid_factors(aid1)
        aid2 = self.aid_factors(aid2)

        return tf.reduce_sum(aid1 * aid2, axis=1)

In [3]:
import pandas as pd

# Load Training Data
train_pairs = pd.read_parquet("../data/train_pairs.parquet")
cardinality_aids = max(train_pairs['aid'].max(), train_pairs['aid_next'].max())

In [74]:
import numpy as np

tf_train_pairs = tf.data.Dataset.from_tensor_slices(([1,2,3], [4,5,6])).batch(3, drop_remainder=True)

In [70]:
tf_train_pairs

<BatchDataset element_spec=(TensorSpec(shape=(3,), dtype=tf.int32, name=None), TensorSpec(shape=(3,), dtype=tf.int32, name=None))>

In [75]:
# Construct Model
model = MatrixFactorization(cardinality_aids+1, 32)
model.compile(optimizer='adam', loss=tf.keras.losses.BinaryCrossentropy(from_logits=True))

In [76]:
model.fit(tf_train_pairs, epochs=1, verbose=1)

OperatorNotAllowedInGraphError: in user code:

    File "C:\Users\17082\anaconda3\envs\multi-objective-recsys\lib\site-packages\keras\engine\training.py", line 1160, in train_function  *
        return step_function(self, iterator)
    File "C:\Users\17082\anaconda3\envs\multi-objective-recsys\lib\site-packages\keras\engine\training.py", line 1146, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Users\17082\anaconda3\envs\multi-objective-recsys\lib\site-packages\keras\engine\training.py", line 1135, in run_step  **
        outputs = model.train_step(data)
    File "C:\Users\17082\anaconda3\envs\multi-objective-recsys\lib\site-packages\keras\engine\training.py", line 993, in train_step
        y_pred = self(x, training=True)
    File "C:\Users\17082\anaconda3\envs\multi-objective-recsys\lib\site-packages\keras\utils\traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None

    OperatorNotAllowedInGraphError: Exception encountered when calling layer "matrix_factorization_11" "                 f"(type MatrixFactorization).
    
    in user code:
    
        File "C:\Users\17082\AppData\Local\Temp\ipykernel_2876\998960492.py", line 11, in call  *
            aid1, aid2 = features
    
        OperatorNotAllowedInGraphError: Iterating over a symbolic `tf.Tensor` is not allowed: AutoGraph did convert this function. This might indicate you are trying to use an unsupported feature.
    
    
    Call arguments received by layer "matrix_factorization_11" "                 f"(type MatrixFactorization):
      • features=tf.Tensor(shape=(3,), dtype=int32)
