In [None]:
import sys
sys.path.append('../')

import tensorflow as tf
import numpy as np
import pandas as pd

from src.models import Node2Vec, PlainEmbeddingModel, LSTMEmbeddingModel, AttentionDCN, RetrievalModel

In [None]:
adjacency_matrix_path = '../data/processed/adjacency_matrix'
node2vec_dataset_path = '../data/processed/node2vec_dataset'
train_dataset_path = '../data/processed/train'
test_dataset_path = '../data/processed/test'

users_vocab_path = '../data/processed/visitorid.npy'
users_feature_type = "int"

items_vocab_path = '../data/processed/itemid.npy'
items_feature_type = "int"

events_vocab_path = '../data/processed/event.npy'
events_feature_type = "str"

learning_rate = 0.001
embedding_dim = 64
batch_size = 1024

In [None]:
def construct_adjacency_matrix(path: str, shape: int) -> tf.SparseTensor:
    adjacency_matrix = tf.io.read_file(path)
    adjacency_matrix = tf.io.parse_tensor(adjacency_matrix, out_type=tf.int64)
    len_of_adj_matrix = adjacency_matrix.shape[0]
    adjacency_matrix = tf.sparse.SparseTensor(
        indices=adjacency_matrix.numpy(), 
        values=[1 for _ in range(len_of_adj_matrix)], 
        dense_shape=(shape, shape)
    )
    return adjacency_matrix

In [None]:
users_vocab = np.load(users_vocab_path)
events_vocab = np.load(events_vocab_path, allow_pickle=True)
items_vocab = np.load(items_vocab_path)

adjacency_matrix = construct_adjacency_matrix(adjacency_matrix_path, shape=len(items_vocab) + 1)

In [None]:
node2vec_dataset = tf.data.experimental.load(node2vec_dataset_path)
node2vec_dataset = node2vec_dataset.batch(batch_size, drop_remainder=True)

In [None]:
node2vec_model = Node2Vec(
    embedding_dim=embedding_dim, 
    target_feature='target', 
    context_feature='context', 
    feature_type='int', 
    feature_vocab=items_vocab)

In [None]:
node2vec_model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate),
    loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
)
node2vec_model.fit(node2vec_dataset, epochs=25)

In [None]:
sparse_tensor = tf.io.parse_tensor(tf.io.read_file('../data/processed/adjacency_matrix'), out_type=tf.int64)

In [None]:
tf.add_n([tf.reshape(tf.gather(sparse_tensor.indices, tf.where(sparse_tensor.indices[:,0] == 1))[:,:,1], (-1,))])

In [None]:
sparse_tensor = tf.sparse.SparseTensor(indices=sparse_tensor.numpy(), values=[1 for _ in range(597317)], dense_shape=(189173, 189173))

In [None]:
np.load('../data/processed/itemid.npy')