In [3]:
# https://github.com/twitter/the-algorithm-ml/tree/main/projects/home/recap
# https://github.com/alibaba/EasyRec/blob/master/easy_rec/python/layers/keras/mask_net.py
# https://arxiv.org/pdf/2102.07619.pdf

In [3]:
import torch
from torch import nn
import torch.nn.functional as F
import pandas as pd

In [26]:
class MyMaskBlock(nn.Module):
    def __init__(self, feature_count, input_dim, aggregation_size ):
        super().__init__()
        self.ln_emb =  nn.LayerNorm(input_dim) # Layernorm for embedding layer


        self.aggregation_layer = nn.Linear(input_dim, aggregation_size)
        self.projection_layer = nn.Linear(aggregation_size, input_dim)
        self.activation_layer = nn.ReLU()
        self.instance_guided_mask = nn.Sequential(self.aggregation_layer, self.activation_layer, self.projection_layer)

        self.hidden_layer = nn.Linear(input_dim, input_dim)
        self.ln_hidden = nn.LayerNorm(input_dim)
        
        
    def forward(self, inp):
        x = self.ln_emb(inp) * self.instance_guided_mask(inp)
        x = self.ln_hidden(self.hidden_layer(x))
        return x

class MyParallelMaskNet(nn.Module):
    def __init__(self, feature_count, embedding_dim, embedding_sizes, num_mask_blocks, aggregation_size):
        super().__init__()
        self.feature_count = feature_count
        self.num_mask_blocks = num_mask_blocks
        self.feature_count = feature_count
        self.embedding_dim = embedding_dim
        
        self.emb_layer = nn.ModuleList() # Embedding layer
        for i in range(feature_count):
            self.emb_layer.append(nn.Embedding(embedding_sizes[i], embedding_dim))

        self.mask_blocks = nn.ModuleList()
        for i in range(num_mask_blocks):
            self.mask_blocks.append(MyMaskBlock(feature_count, feature_count * embedding_dim, aggregation_size))

        self.hidden_layer = nn.Linear(feature_count * embedding_dim * num_mask_blocks, feature_count * embedding_dim)
        self.activation_layer = nn.ReLU()
        self.output_layer = nn.Linear(feature_count * embedding_dim, 1)

    def forward(self, features):
        feature_embs = []
        for i in range(self.feature_count):
            feature_embs.append(self.emb_layer[i](features[:,i]))

        inp = torch.concat(feature_embs, dim=1)
        
        inp = inp.view(-1,self.feature_count * self.embedding_dim)
        #print(inp.shape)

        maskblock_outp = []
        for i in range(self.num_mask_blocks):
            maskblock_outp.append(self.mask_blocks[i](inp))

        outp = torch.concat(maskblock_outp, dim=1)
        #print(outp.shape)

        outp = self.activation_layer(self.hidden_layer(outp))

        outp = self.output_layer(outp)

        return outp
            
        

In [5]:
model = MyParallelMaskNet(feature_count=2, embedding_dim=128, embedding_sizes=[3,5], num_mask_blocks=2, aggregation_size=512)

In [6]:
inp = torch.randint(3,(3,2))
inp

tensor([[2, 1],
        [1, 1],
        [1, 2]])

In [7]:
model(inp)

torch.Size([3, 256])
torch.Size([3, 512])


tensor([[-0.0274],
        [-0.1949],
        [-0.3761]], grad_fn=<AddmmBackward0>)

In [8]:
#!wget https://files.grouplens.org/datasets/movielens/ml-20m.zip
#!wget https://files.grouplens.org/datasets/movielens/ml-latest-small.zip
#!unzip ml-20m.zip
#!unzip ml-latest-small.zip

In [9]:
df_rating = pd.read_csv("./ml-latest-small/ratings.csv")

In [10]:
df_rating.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [11]:
df_movies = pd.read_csv("./ml-latest-small/movies.csv")
df_movies.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [13]:
df = df_rating.merge(df_movies, on = "movieId")
df.head()

Unnamed: 0,userId,movieId,rating,timestamp,title,genres
0,1,1,4.0,964982703,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,5,1,4.0,847434962,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
2,7,1,4.5,1106635946,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
3,15,1,2.5,1510577970,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
4,17,1,4.5,1305696483,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy


In [14]:
df["movieCat"] = df.movieId.astype("category").cat.codes
df["userCat"] = df.userId.astype("category").cat.codes
df["genresCat"] = df.genres.astype("category").cat.codes

df["timestamp_norm"] = (df.timestamp - df.timestamp.mean())/df.timestamp.std()
df["rating_norm"] = df.rating
df.head()

Unnamed: 0,userId,movieId,rating,timestamp,title,genres,movieCat,userCat,genresCat,timestamp_norm,rating_norm
0,1,1,4.0,964982703,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,0,0,351,-1.114225,4.0
1,5,1,4.0,847434962,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,0,4,351,-1.65777,4.0
2,7,1,4.5,1106635946,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,0,6,351,-0.459214,4.5
3,15,1,2.5,1510577970,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,0,14,351,1.40863,2.5
4,17,1,4.5,1305696483,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,0,16,351,0.46125,4.5


In [15]:
from torch.utils.data import Dataset, DataLoader
class MovieLensDataset(Dataset):
    def __init__(self, df):
        super().__init__()
        self.df = df

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        timestamp_data = float(self.df["timestamp_norm"].iloc[idx])  # float is expected by linear layer. avoid "RuntimeError: mat1 and mat2 must have the same dtype, but got Double and Float"
        movie_data = int(self.df["movieCat"].iloc[idx])
        user_data = int(self.df["userCat"].iloc[idx])
        genres_data = int(self.df["genresCat"].iloc[idx])
        rating_data = float(self.df["rating_norm"].iloc[idx])

        dense_data = torch.tensor([timestamp_data])
        sparse_data = torch.tensor([user_data, movie_data, genres_data])
        target_data = torch.tensor(rating_data)

        return dense_data, sparse_data, target_data

In [16]:
# random split
df_train=df.sample(frac=0.8,random_state=200)
df_test=df.drop(df_train.index)
movielensDataset_train = MovieLensDataset(df_train)
movielensDataset_test = MovieLensDataset(df_test)

movielensDataset_test[0]

(tensor([1.4086]), tensor([ 14,   0, 351]), tensor(2.5000))

In [18]:
movielensDataloader_train = DataLoader(movielensDataset_train, batch_size=512, shuffle=True)
movielensDataloader_test = DataLoader(movielensDataset_test, batch_size=512, shuffle=True)

dense_data, sparse_data, target_data = next(iter(movielensDataloader_test))
dense_data.shape, sparse_data.shape, target_data.shape

(torch.Size([512, 1]), torch.Size([512, 3]), torch.Size([512]))

In [47]:
device = "cuda"
model = MyParallelMaskNet(feature_count=3, embedding_dim=8, embedding_sizes=[len(df.userCat.unique()),len(df.movieCat.unique()), len(df.genresCat.unique())], num_mask_blocks=2, aggregation_size=64).to(device)

In [48]:
lossFunc = F.mse_loss
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

def train_loop():
    model.train()
    
    step = 0
    losses = 0
    for dense_data, sparse_data, target_data in movielensDataloader_train:
        outp = model(sparse_data.to(device))
        #use the mean of the output
        outp = outp.mean(dim = 1, keepdim=True)
        loss = lossFunc(outp, target_data[...,None].to(device))
        loss.backward()
    
        with torch.no_grad():
            optimizer.step()
            optimizer.zero_grad()
            
        step+=1
        losses += loss.item()
        if step % 100 == 0:
            print(loss)
    print(f"train epoch loss: {losses/step}")

@torch.no_grad()
def eval_loop():
    model.eval()
    step = 0
    losses = 0
    for dense_data, sparse_data, target_data in movielensDataloader_test:
        outp = model(sparse_data.to(device))
        #use the mean of the output
        outp = outp.mean(dim = 1, keepdim=True)
        loss = lossFunc(outp, target_data[...,None].to(device))
            
        step+=1
        losses += loss.item()
        if step % 100 == 0:
            print(loss)
    print(f"eval epoch loss: {losses/step}")
    
for i in range(10):
    print(f'epoch #{i}')
    train_loop()
    eval_loop()

epoch #0
tensor(1.0877, device='cuda:0', grad_fn=<MseLossBackward0>)
train epoch loss: 2.53808427875555
eval epoch loss: 1.0417838975787164
epoch #1
tensor(0.9994, device='cuda:0', grad_fn=<MseLossBackward0>)
train epoch loss: 1.0064091965367523
eval epoch loss: 0.9949704185128212
epoch #2
tensor(0.9994, device='cuda:0', grad_fn=<MseLossBackward0>)
train epoch loss: 0.9538810754878612
eval epoch loss: 0.95332021266222
epoch #3
tensor(0.9190, device='cuda:0', grad_fn=<MseLossBackward0>)
train epoch loss: 0.9012755555442616
eval epoch loss: 0.9208311676979065
epoch #4
tensor(0.8875, device='cuda:0', grad_fn=<MseLossBackward0>)
train epoch loss: 0.8554310941997962
eval epoch loss: 0.8948577970266343
epoch #5
tensor(0.8903, device='cuda:0', grad_fn=<MseLossBackward0>)
train epoch loss: 0.8180632157416283
eval epoch loss: 0.8796842291951179
epoch #6
tensor(0.6947, device='cuda:0', grad_fn=<MseLossBackward0>)
train epoch loss: 0.7881526317023024
eval epoch loss: 0.8676399469375611
epoch #7
t

In [49]:
movielensDataloader_test = DataLoader(movielensDataset_test, batch_size=64, shuffle=True)

dense_data, sparse_data, target_data = next(iter(movielensDataloader_test))

outp=model(sparse_data.to(device))
outp = outp.mean(dim = 1, keepdim=True)

#print(outp.squeeze())
#print(target_data)
loss = lossFunc(outp, target_data[...,None].to(device))
print(loss)

[(outp[i].item(), target_data[i].item()) for i in range(outp.shape[0])]

tensor(0.7439, device='cuda:0', grad_fn=<MseLossBackward0>)


[(2.154715061187744, 1.0),
 (3.025806427001953, 2.0),
 (4.176898956298828, 5.0),
 (3.1975975036621094, 3.0),
 (3.3168246746063232, 3.5),
 (2.6703145503997803, 4.0),
 (3.6318793296813965, 3.0),
 (3.802694320678711, 4.5),
 (2.154567003250122, 2.0),
 (3.624084949493408, 4.0),
 (3.0368292331695557, 2.0),
 (2.995434045791626, 2.0),
 (3.488330364227295, 3.0),
 (3.8117871284484863, 4.0),
 (3.92950439453125, 4.5),
 (3.3929388523101807, 3.0),
 (2.498929500579834, 2.0),
 (3.503556251525879, 5.0),
 (4.095623970031738, 3.0),
 (2.9982118606567383, 5.0),
 (2.6374692916870117, 1.5),
 (3.7243540287017822, 4.0),
 (4.872452735900879, 5.0),
 (2.816082239151001, 3.0),
 (4.045129776000977, 4.0),
 (3.403834581375122, 4.0),
 (3.3828320503234863, 3.5),
 (3.0333738327026367, 1.0),
 (2.8127565383911133, 1.5),
 (2.121614456176758, 2.5),
 (3.1574935913085938, 2.0),
 (3.504464864730835, 5.0),
 (3.266648292541504, 4.0),
 (3.5117990970611572, 3.5),
 (2.574347496032715, 3.0),
 (2.825565814971924, 3.0),
 (3.9126844406