<a href="https://colab.research.google.com/github/upriyam-cmu/EDGE-Rec/blob/main/execute.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Install project code

In [None]:
!pip uninstall -y edge-rec
!pip install -e.

# Train model

In [None]:
from edge_rec.datasets import MovieLensDataHolder, RatingsTransform, FeatureTransform

from edge_rec.model import GraphReconstructionModel, GraphTransformer
from edge_rec.model.embed import MovieLensFeatureEmbedder, SinusoidalPositionalEmbedding

from edge_rec.diffusion import GaussianDiffusion
from edge_rec.exec import Trainer, compute_metrics

In [None]:
data_holder = MovieLensDataHolder(augmentations=dict(
    ratings=RatingsTransform.ToGaussian(),
    rating_counts=FeatureTransform.LogPolynomial(2),  # degree 2 --> dim_size = 2 (for embedder, below)
))

In [None]:
embed = MovieLensFeatureEmbedder(
    user_rating_counts_dims=2,
    movie_rating_counts_dims=2,
)
core = GraphTransformer(
    n_blocks=1,
    n_channels=1,
    n_features=embed.output_sizes,
    time_embedder=SinusoidalPositionalEmbedding(16),
    # attn_kwargs=dict(heads=1, dim_head=8, num_mem_kv=0)
)
model = GraphReconstructionModel(embed, core, feature_dim_size=None)

In [None]:
diffusion_model = GaussianDiffusion(model, image_size=(data_holder.n_users, data_holder.n_movies))
trainer = Trainer(
    # model
    diffusion_model=diffusion_model,
    # datasets
    train_dataset=data_holder.get_dataset(subgraph_size=None, target_density=None, train=True),
    test_dataset=data_holder.get_dataset(subgraph_size=None, target_density=None, train=False),
    # training
    batch_size=1,
    gradient_accumulate_every=1,
    force_batch_size=True,
    train_num_steps=int(1e4),
    train_mask_unknown_ratings=True,
    # optim
    train_lr=1e-4,
    adam_betas=(0.9, 0.99),
    max_grad_norm=1.,
    # logging
    results_folder="./results",
    ema_update_every=10,
    ema_decay=0.995,
    save_and_sample_every=200,
    # accelerator
    amp=False,
    mixed_precision_type='fp16',
    split_batches=True,
)
print("Using device:", trainer.device)

In [None]:
trainer.train()

# Sample ratings

In [None]:
# d70_subgraph, train_ratings, test_ratings = trainer.ds.dataset.get_subgraph(
#     subgraph_size=50,
#     target_density=0.7,
#     include_separate_train_test_ratings=True
# )
full_graph, train_ratings, test_ratings = trainer.ds.build_feat_graph(include_separate_train_test_ratings=True)
sampled_ratings = trainer.eval(full_graph=full_graph[:-1], milestone=2200, subgraph_size=50)[0, 0, :, :].cpu().detach()
sampled_ratings = sampled_ratings * 2.5 + 3

# Evaluate metrics

In [None]:
from edge_rec.evaluate import get_metrics
import torch

train_edges = train_ratings.long().to_sparse_coo()
train_edges = torch.cat([
    train_edges.indices(),
    train_edges.values().unsqueeze(dim=0)
], dim=0)
test_edges = test_ratings.long().to_sparse_coo()
test_edges = torch.cat([
    test_edges.indices(),
    test_edges.values().unsqueeze(dim=0)
], dim=0)

test_metrics = get_metrics(train_edges, test_edges, sampled_ratings)
test_metrics

# Display sampled ratings distribution

In [None]:
import matplotlib.pyplot as plt

plt.hist(sampled_ratings.numpy().flatten(), bins=5)
plt.show()