In [2]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'

import torch
import torch.nn as nn
import torch.nn.functional as F

import argparse
from torch.utils.data import DataLoader
import torch
from tqdm import tqdm

from preprocessing import *
from dataset import *
from metrics import *
from model import *
from utils import bert2dict

In [3]:
MAX_LENGTH = 200
# class gru4recFC_decoder(nn.Module):
#     def __init__(self, hidden_size, output_size, dropout_p=0.1, max_length=MAX_LENGTH):
#         super(gru4recFC_decoder, self).__init__()
#         self.hidden_size = hidden_size
#         self.output_size = output_size
#         self.dropout_p = dropout_p
#         self.max_length = max_length

#         self.embedding = nn.Embedding(self.output_size, self.hidden_size)
#         self.attn = nn.Linear(self.hidden_size * 2, self.max_length)
#         self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)
#         self.dropout = nn.Dropout(self.dropout_p)
#         self.gru = nn.GRU(self.hidden_size, self.hidden_size)
#         self.out = nn.Linear(self.hidden_size, self.output_size)

#     def forward(self, input, hidden, encoder_outputs):
#         embedded = self.embedding(input).view(1, 1, -1)
#         embedded = self.dropout(embedded)

#         attn_weights = F.softmax(
#             self.attn(torch.cat((embedded[0], hidden[0]), 1)), dim=1)
#         attn_applied = torch.bmm(attn_weights.unsqueeze(0),
#                                  encoder_outputs.unsqueeze(0))

#         output = torch.cat((embedded[0], attn_applied[0]), 1)
#         output = self.attn_combine(output).unsqueeze(0)

#         output = F.relu(output)
#         output, hidden = self.gru(output, hidden)

#         output = F.log_softmax(self.out(output[0]), dim=1)
#         return output, hidden, attn_weights

#     def initHidden(self):
#         return torch.zeros(1, 1, self.hidden_size, device=device)
    
# # class EncoderRNN(nn.Module):
# #     def __init__(self, input_size, hidden_size):
# #         super(EncoderRNN, self).__init__()
# #         self.hidden_size = hidden_size

# #         self.embedding = nn.Embedding(input_size, hidden_size)
# #         self.gru = nn.GRU(hidden_size, hidden_size)

# #     def forward(self, input, hidden):
# #         embedded = self.embedding(input).view(1, 1, -1)
# #         output = embedded
# #         output, hidden = self.gru(output, hidden)
# #         return output, hidden

# #     def initHidden(self):
# #         return torch.zeros(1, 1, self.hidden_size, device=device)

class gru4recF_decoder(nn.Module):
    def __init__(self, hidden_dim, output_dim, dropout=0, max_length=MAX_LENGTH):
        super(gru4recF_decoder, self).__init__()
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        self.dropout = dropout
        self.max_length = max_length

        self.embedding = nn.Embedding(self.output_dim+1, self.hidden_dim)
        self.attn = nn.Linear(self.hidden_dim * 2, self.max_length)
        self.attn_combine = nn.Linear(self.hidden_dim * 2, self.hidden_dim)
        self.dropout = nn.Dropout(self.dropout)
        self.gru = nn.GRU(self.hidden_dim, self.hidden_dim)
        self.out = nn.Linear(self.hidden_dim, self.output_dim)

    def forward(self, input, hidden, encoder_outputs):
        embedded = self.embedding(input).view(1, input.size()[1], -1)
        embedded = self.dropout(embedded)
        
#         print("Pre-Embedded Tensor: ")
#         print(input[0])
        
#         print("Embedded Dimension: ")
#         print(embedded.size())
        
#         print("Hidden Dimension: ")
#         print(hidden.size())

#         print("Embedded Tensor: ")
#         print(embedded[0])
        
#         print("Hidden Tensor: ")
#         print(hidden[0])
        
        attn_weights = F.softmax(
            self.attn(torch.cat((embedded[0], hidden[0]), 1)), dim=1)
        
#         print("Attention Dimension: ")
#         print(attn_weights.size())
        
#         print("Encoder Outputs Dimension: ")
#         print(encoder_outputs.size())
        
        attn_applied = torch.bmm(attn_weights.unsqueeze(1),
                                 encoder_outputs).squeeze(1)
        
#         print("Attention Applied Dimension: ")
#         print(attn_applied.size())

        output = torch.cat((embedded[0], attn_applied), 1)
        output = self.attn_combine(output).unsqueeze(0)

        output = F.relu(output)
        output, hidden = self.gru(output, hidden)

        output = self.out(output[0])
        
#         print("Output Dimension: ")
#         print(output.size())
        
#         print("New Hidden Dimension: ")
#         print(hidden.size())
        
        return output, hidden, attn_weights

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

In [None]:
# class gru4recF_attention(nn.Module):
#         def __init__(self,embedding_dim,
#                  hidden_dim,
#                  output_dim,
#                  genre_dim=0,
#                  batch_first=True,
#                  max_length=200,
#                  pad_token=0,
#                  pad_genre_token=0,
#                  bert_dim=0,
#                  dropout=0,
#                  tied=False):
#             self.bert_dim = bert_dim
#             self.genre_dim = genre_dim
            
#             self.model = gru4recF_encoder(embedding_dim=embedding_dim,
#              hidden_dim=hidden_dim,
#              output_dim=output_dim,
#              genre_dim=genre_dim,
#              batch_first=True,
#              max_length=max_length,
#              bert_dim=bert_dim,
#              tied = tied,
#              dropout=dropout)

#             self.modelD = gru4recF_decoder(hidden_dim=hidden_dim, output_dim=output_dim, dropout=0, max_length=max_length)
            
#         def forward(self,x,x_lens,labels,x_genre=None,pack=True):
#             encoder_outputs, hidden_states = self.model(x=inputs.to(device),x_lens=x_lens.squeeze().tolist())
#             decoder_inputs = inputs[:,0].view(1,-1).to(device)
#             decoder_hidden = hidden_states
            
#             outputs = torch.zeros(inputs.size()[0],max_length,output_dim, device=device)
            
#             for i in range(max_length):
#                 decoder_outputs, decoder_hidden, decoder_attention = self.modelD(decoder_inputs, decoder_hidden, encoder_outputs)
#                 outputs[:,i,:] = decoder_outputs
#                 decoder_inputs = labels[:,i].view(1,-1)

In [4]:
class gru4recF_attention(nn.Module):
    """
    embedding dim: the dimension of the item-embedding look-up table
    hidden_dim: the dimension of the hidden state of the GRU-RNN
    batch_first: whether the batch dimension should be the first dimension of input to GRU-RNN
    output_dim: the output dimension of the last fully connected layer
    max_length: the maximum session length for any user, used for packing/padding input to GRU-RNN
    pad_token: the value that pad tokens should be set to for GRU-RNN and item embedding
    bert_dim: the dimension of the feature-embedding look-up table
    ... to do add all comments ... 
    """
    def __init__(self,embedding_dim,
                 hidden_dim,
                 output_dim,
                 genre_dim=0,
                 batch_first=True,
                 max_length=200,
                 pad_token=0,
                 pad_genre_token=0,
                 bert_dim=0,
                 dropout=0,
                 tied=False,
                 cat=True):
        
        super(gru4recF_attention,self).__init__()
        
        self.batch_first =batch_first
        
        self.embedding_dim = embedding_dim
        self.hidden_dim =hidden_dim
        self.output_dim =output_dim
        self.genre_dim = genre_dim
        self.bert_dim = bert_dim

        self.max_length = max_length
        self.pad_token = pad_token
        self.pad_genre_token = pad_genre_token
        
        self.tied = tied
        self.dropout = dropout
        self.cat = cat
        
        if self.tied:
            self.hidden_dim = embedding_dim
    
        # initialize item-id lookup table
        # add 1 to output dimension because we have to add a pad token
        self.movie_embedding = nn.Embedding(output_dim+1,embedding_dim,padding_idx=pad_token)
        
        #  initialize plot lookup table
        # add 1 to output dimensino because we have to add a pad token
        if bert_dim != 0:
            self.plot_embedding = nn.Embedding(output_dim+1,bert_dim,padding_idx=pad_token)
            #self.plot_embedding.requires_grad_(requires_grad=False)
            #self.plot_embedding = torch.ones(output_dim+1,bert_dim).cuda() #nn.Embedding(output_dim+1,bert_dim,padding_idx=pad_token)
            #self.plot_embedding[pad_token,:] = 0
            
            # project plot embedding to same dimensionality as movie embedding
            self.plot_projection = nn.Linear(bert_dim,embedding_dim)
                    
        if genre_dim != 0:
            self.genre_embedding = nn.Embedding(genre_dim+1,embedding_dim,padding_idx=pad_genre_token)


        self.encoder_layer = nn.GRU(embedding_dim,self.hidden_dim,batch_first=self.batch_first,dropout=self.dropout)

        if cat:
            hidden_dim = hidden_dim * 2
        
#         # add 1 to the output dimension because we have to add a pad token
        if not self.tied:
            self.output_layer = nn.Linear(hidden_dim,output_dim)
        
        if self.tied:
            self.output_layer = nn.Linear(hidden_dim,output_dim+1)
            self.output_layer.weight = self.movie_embedding.weight
    
    def forward(self,x,x_lens,x_genre=None,pack=True):
        # add the plot embedding and movie embedding
        # do I add non-linearity or not? ... 
        # concatenate or not? ...
        # many questions ...
        batch_size = x.size()[0]
        if (self.bert_dim != 0) and (self.genre_dim != 0):
            x = self.movie_embedding(x) + self.plot_projection(F.leaky_relu(self.plot_embedding(x))) + self.genre_embedding(x_genre).sum(2)
        elif (self.bert_dim != 0) and (self.genre_dim == 0):
            x = self.movie_embedding(x) + self.plot_projection(F.leaky_relu(self.plot_embedding(x)))
        elif (self.bert_dim == 0) and (self.genre_dim != 0):
            x = self.movie_embedding(x) + self.genre_embedding(x_genre).sum(2)
        else:
            x = self.movie_embedding(x)
        
#         print("Embedder Dimension: ")
#         print(x.size())
        
        if pack:
            x = pack_padded_sequence(x,x_lens,batch_first=True,enforce_sorted=False)
        
        output_packed,hidden_state = self.encoder_layer(x) 
        
        if pack:
            encoder_states, _ = pad_packed_sequence(output_packed, batch_first=self.batch_first,total_length=self.max_length,padding_value=self.pad_token)
        
        # CCs = BS x MS x 2HS
        if self.cat:
            combined_contexts = torch.zeros(batch_size,max_length,self.hidden_dim*2)
        else:
            combined_contexts = torch.zeros(batch_size,max_length,self.hidden_dim)
        
        for t in range(max_length):
            # CF = BS x (t+1) x HS
            context_frame = encoder_states[:,:t+1,:]
            # CH = BS x HS x 1
            current_hidden = encoder_states[:,t,:].squeeze(1).unsqueeze(2)
            # AS = BS x (t+1) x 1
            attention_score = torch.bmm(context_frame,current_hidden)
            # CFT = BS x HS x (t+1)
            context_frame_transposed = torch.transpose(context_frame,1,2)
            # CV = BS x HS
            context_vector = torch.bmm(context_frame_transposed,attention_score).squeeze(2)
            # CH = BS x HS
            current_hidden = current_hidden.squeeze(2)
            # CC = BS x 1 x 2HS
            combined_context = torch.cat((current_hidden,context_vector),1).unsqueeze(1)
            if self.cat:
                combined_contexts[:,t,:] = combined_context
            else:
                combined_contexts[:,t,:] = context_vector
            
        # CCs = BS x MS x 2HS
        # O = BS x MS x V
        x = self.output_layer(combined_contexts)
        
        return x

In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [6]:
# -*- coding: utf-8 -*-
"""
Created on Tue Mar 23 08:39:11 2021

@author: lpott
"""
import argparse
from torch.utils.data import DataLoader
import torch
from tqdm import tqdm

from preprocessing import *
from dataset import *
from metrics import *
from model import *
from utils import bert2dict


In [25]:
# variables

read_filename ="ml-1m\\ratings.dat"
read_bert_filename = "bert_sequence_20m.txt"
read_movie_filename = ""#"movies-1m.csv"
size = "1m"

num_epochs = 100
lr = 1e-2
batch_size = 64
reg = 1e-5
train_method = "normal"
loss_type = "BPR"
num_neg_samples = 10


hidden_dim = 256
embedding_dim = 256
bert_dim= 768
window = 0

freeze_plot = False
tied = False
dropout= 0

k = 10
max_length = 200
min_len = 10


# nextitnet options...
hidden_layers = 3
dilations = [1,2,2,4]

model_type = "attention"

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [26]:
torch.cuda.empty_cache()

In [27]:
# ------------------Data Initialization----------------------#

# convert .dat file to time-sorted pandas dataframe
ml_1m = create_df(read_filename,size=size)

# remove users who have sessions lengths less than min_len
ml_1m = filter_df(ml_1m,item_min=min_len)

user_id        6040
item_id        3706
rating            5
timestamp    458455
dtype: int64
(1000209, 4)
Minimum Session Length: 20
Maximum Session Length: 2314
Average Session Length: 165.60
user_id        6040
item_id        3706
rating            5
timestamp    458455
dtype: int64
(1000209, 4)
Minimum Session Length: 20
Maximum Session Length: 2314
Average Session Length: 165.60


In [28]:
# ------------------Data Initialization----------------------#
if read_movie_filename != "":
    ml_movie_df = create_movie_df(read_movie_filename,size=size)
    ml_movie_df = convert_genres(ml_movie_df)
    
    # initialize reset object
    reset_object = reset_df()
    
    # map all user ids, item ids, and genres to range 0 - number of users/items/genres
    ml_1m,ml_movie_df = reset_object.fit_transform(ml_1m,ml_movie_df)
    
    # value that padded genre tokens shall take
    pad_genre_token = reset_object.genre_enc.transform(["NULL"]).item()
    
    genre_dim = len(np.unique(np.concatenate(ml_movie_df.genre))) - 1

else:
    # initialize reset object
    reset_object = reset_df()
    
    # map all user ids and item ids to range 0 - Number of Users/Items 
    # i.e. [1,7,5] -> [0,2,1]
    ml_1m = reset_object.fit_transform(ml_1m)
    
    pad_genre_token = None
    ml_movie_df = None
    genre_dim = 0



In [29]:
# ------------------Data Initialization----------------------#
# how many unique users, items, ratings and timestamps are there
n_users,n_items,n_ratings,n_timestamp = ml_1m.nunique()

# value that padded tokens shall take
pad_token = n_items

# the output dimension for softmax layer
output_dim = n_items


# get the item id : bert plot embedding dictionary
if bert_dim != 0:
    feature_embed = bert2dict(bert_filename=read_bert_filename)



In [30]:
# create a dictionary of every user's session (history)
# i.e. {user: [user clicks]}
if size == "1m":
    user_history = create_user_history(ml_1m)

elif size == "20m":
    import pickle
    with open('userhistory.pickle', 'rb') as handle:
        user_history = pickle.load(handle)
# create a dictionary of all items a user has not clicked
# i.e. {user: [items not clicked by user]}
# user_noclicks = create_user_noclick(user_history,ml_1m,n_items)

  1%|▊                                                                              | 65/6040 [00:00<00:09, 643.57it/s]



100%|█████████████████████████████████████████████████████████████████████████████| 6040/6040 [00:08<00:00, 689.73it/s]


In [31]:
# split data by leave-one-out strategy
# have train dictionary {user: [last 41 items prior to last 2 items in user session]}
# have val dictionary {user: [last 41 items prior to last item in user session]}
# have test dictionary {user: [last 41 items]}
# i.e. if max_length = 4, [1,2,3,4,5,6] -> [1,2,3,4] , [2,3,4,5] , [3,4,5,6]
train_history,val_history,test_history = train_val_test_split(user_history,max_length=max_length)

# initialize the train,validation, and test pytorch dataset objects
# eval pads all items except last token to predict
train_dataset = GRUDataset(train_history,genre_df=ml_movie_df,mode='train',max_length=max_length,pad_token=pad_token,pad_genre_token=pad_genre_token)
val_dataset = GRUDataset(val_history,genre_df=ml_movie_df,mode='eval',max_length=max_length,pad_token=pad_token,pad_genre_token=pad_genre_token)
test_dataset = GRUDataset(test_history,genre_df=ml_movie_df,mode='eval',max_length=max_length,pad_token=pad_token,pad_genre_token=pad_genre_token)

# create the train,validation, and test pytorch dataloader objects
train_dl = DataLoader(train_dataset,batch_size = batch_size,shuffle=True)
val_dl = DataLoader(val_dataset,batch_size=64)
test_dl = DataLoader(test_dataset,batch_size=64)

100%|██████████████████████████████████████████████████████████████████████████| 6040/6040 [00:00<00:00, 177646.23it/s]






In [32]:
print("Bert dim: {:d}".format(bert_dim))
print("Genre dim: {:d}".format(genre_dim))
print("Pad Token: {}".format(pad_token))
print("Pad Genre Token: {}".format(pad_genre_token))

Bert dim: 768
Genre dim: 0
Pad Token: 3706
Pad Genre Token: None


In [33]:
# ------------------Model Initialization----------------------#

# initialize gru4rec model with arguments specified earlier
if model_type == "feature_add":
    model = gru4recF(embedding_dim=embedding_dim,
             hidden_dim=hidden_dim,
             output_dim=output_dim,
             genre_dim=genre_dim,
             batch_first=True,
             max_length=max_length,
             pad_token=pad_token,
             pad_genre_token=pad_genre_token,
             bert_dim=bert_dim,
             tied = tied,
             dropout=dropout)


if model_type == "feature_concat":
    model = gru4recFC(embedding_dim=embedding_dim,
             hidden_dim=hidden_dim,
             output_dim=output_dim,
             genre_dim=genre_dim,
             batch_first=True,
             max_length=max_length,
             pad_token=pad_token,
             pad_genre_token=pad_genre_token,
             bert_dim=bert_dim,
             tied = tied,
             dropout=dropout)

if model_type == "vanilla":
    model = gru4rec_vanilla(hidden_dim=hidden_dim,
                            output_dim=output_dim,
                            batch_first=True,
                            max_length=max_length,
                            pad_token=pad_token,
                            tied=tied,
                            embedding_dim=embedding_dim)

if model_type =="feature_only":
    model = gru4rec_feature(hidden_dim=hidden_dim,
                            output_dim=output_dim,
                            batch_first=True,
                            max_length=max_length,
                            pad_token=pad_token,
                            bert_dim=bert_dim)

if model_type == "conv":
    model = gru4rec_conv(embedding_dim,
                 hidden_dim,
                 output_dim,
                 batch_first=True,
                 max_length=200,
                 pad_token=0,
                 dropout=0,
                 window=3,
                 tied=tied)
    
if model_type == "nextitnet":
    model = NextItNet(embedding_dim=embedding_dim,
                      output_dim=output_dim,
                      hidden_layers=hidden_layers,
                      dilations=dilations,
                      pad_token=n_items,
                      max_len=max_length)

if model_type == "attention":
    model = gru4recF_attention(embedding_dim=embedding_dim,
             hidden_dim=hidden_dim,
             output_dim=output_dim,
             genre_dim=genre_dim,
             batch_first=True,
             max_length=max_length,
             pad_token=pad_token,
             pad_genre_token=pad_genre_token,
             bert_dim=bert_dim,
             tied = tied,
             dropout=dropout)
#     modelD = gru4recF_decoder(hidden_dim=hidden_dim, output_dim=output_dim, dropout=0, max_length=max_length)
#     modelD = modelD.to(device)

In [34]:
if bert_dim != 0:
    model.init_weight(reset_object,feature_embed)
    
model = model.to(device)

In [35]:
[name for name,param in model.named_parameters() if (("movie" not in name) or ("plot_embedding" in name) or ("genre" in name))]

['plot_embedding.weight',
 'plot_projection.weight',
 'plot_projection.bias',
 'encoder_layer.weight_ih_l0',
 'encoder_layer.weight_hh_l0',
 'encoder_layer.bias_ih_l0',
 'encoder_layer.bias_hh_l0',
 'output_layer.weight',
 'output_layer.bias']

In [36]:
[name for name,param in model.named_parameters() if ("plot" not in name) and ("genre" not in name)]

['movie_embedding.weight',
 'encoder_layer.weight_ih_l0',
 'encoder_layer.weight_hh_l0',
 'encoder_layer.bias_ih_l0',
 'encoder_layer.bias_hh_l0',
 'output_layer.weight',
 'output_layer.bias']

In [20]:
# initialize Adam optimizer with gru4rec model parameters
if train_method != "normal":
    optimizer_features = torch.optim.Adam([param for name,param in model.named_parameters() if (("movie" not in name) or ("plot_embedding" in name) or ("genre" in name)) ],
                                          lr=lr/10,weight_decay=reg)
    
    optimizer_ids = torch.optim.Adam([param for name,param in model.named_parameters() if ("plot" not in name) and ("genre" not in name)],
                                     lr=lr,weight_decay=reg)

elif train_method == "normal":
    optimizer = torch.optim.Adam(model.parameters(),lr=lr,weight_decay=reg)
    decoder_optimizer = torch.optim.Adam(model.parameters(), lr=lr,weight_decay=reg)
if freeze_plot and bert_dim !=0:
    model.plot_embedding.weight.requires_grad = False

In [21]:
loss_fn = nn.CrossEntropyLoss(ignore_index=n_items)
#Recall_Object = Recall_E_prob(ml_1m,user_history,n_users,n_items,k=k)
#Recall_Object = Recall_E_Noprob(ml_1m,user_history,n_users,n_items,k=k)

In [22]:
Recall_Object = Recall_E_prob(ml_1m,user_history,n_users,n_items,k=k,device=device)



In [24]:
# ------------------Training Initialization----------------------#
max_train_hit = (0,0,0)
max_val_hit = (0,0,0)
max_test_hit = (0,0,0)

max_train_ndcg = (0,0,0)
max_val_ndcg = (0,0,0)
max_test_ndcg = (0,0,0)

i = 0;
for epoch in range(num_epochs):
    print("="*20,"Epoch {}".format(epoch+1),"="*20)
    
    model.train()  
    
    running_loss = 0

    for j,data in enumerate(tqdm(train_dl,position=0,leave=True)):
        
        if train_method != "normal":
            optimizer_features.zero_grad()
            optimizer_ids.zero_grad()
            
        elif train_method == "normal": 
            optimizer.zero_grad()
        
        if genre_dim != 0:            
            inputs,genre_inputs,labels,x_lens,uid = data
            outputs = model(x=inputs.to(device),x_lens=x_lens.squeeze().tolist(),x_genre=genre_inputs.to(device))
        
        elif genre_dim == 0:
            inputs,labels,x_lens,uid = data 
            outputs = model(x=inputs.to(device),x_lens=x_lens.squeeze().tolist())
       
        if tied:
            outputs_ignore_pad = outputs[:,:,:-1]
            if loss_type == "XE":
                loss = loss_fn(outputs_ignore_pad.view(-1,outputs_ignore_pad.size(-1)),labels.view(-1).to(device))
            elif loss_type == "BPR" or loss_type == "BPR_MAX":
                loss = loss_fn(outputs,labels.to(device),x_lens,uid)

            
        else:
            if loss_type == "XE":
                loss = loss_fn(outputs.view(-1,outputs.size(-1)),labels.view(-1).to(device))
            elif loss_type == "BPR" or loss_type == "BPR_MAX":   
                loss = loss_fn(outputs,labels.to(device),x_lens,uid)

        loss.backward()
        
        
        if train_method != "normal":
            if train_method == "interleave":
                # interleave on the epochs
                if (j+1) % 2 == 0:
                    optimizer_features.step()
                else:
                    optimizer_ids.step()

            elif train_method == "alternate":
                if (epoch+1) % 2 == 0:
                    optimizer_features.step()
                else:
                    optimizer_ids.step()
        
    
                    
        elif train_method == "normal":
            optimizer.step()

        running_loss += loss.detach().cpu().item()

    del outputs
    torch.cuda.empty_cache()
    training_hit,training_ndcg = Recall_Object(model,train_dl,"train")
    validation_hit,validation_ndcg = Recall_Object(model,val_dl,"validation")
    testing_hit,testing_ndcg = Recall_Object(model,test_dl,"test")
    
    if max_val_ndcg[0] < validation_ndcg[0]:
        max_val_hit = validation_hit
        max_test_hit = testing_hit
        max_train_hit = training_hit
        
        max_train_ndcg = training_ndcg
        max_val_ndcg = validation_ndcg
        max_test_ndcg = testing_ndcg
    
    torch.cuda.empty_cache()
    print("Training Loss: {:.5f}".format(running_loss/len(train_dl)))
    
    print("Train Hits \t @10: {:.5f} \t @5 : {:.5f} \t @1 : {:.5f}".format(*training_hit))
    print("Train ndcg \t @10: {:.5f} \t @5 : {:.5f} \t @1 : {:.5f}".format(*training_ndcg))

    print("Valid Hits \t @10: {:.5f} \t @5 : {:.5f} \t @1 : {:.5f}".format(*validation_hit))
    print("Valid ndcg \t @10: {:.5f} \t @5 : {:.5f} \t @1 : {:.5f}".format(*validation_ndcg))

    print("Test Hits \t @10: {:.5f} \t @5 : {:.5f} \t @1 : {:.5f}".format(*testing_hit))
    print("Test ndcg \t @10: {:.5f} \t @5 : {:.5f} \t @1 : {:.5f}".format(*testing_ndcg))

print("="*100)
print("Maximum Training Hit \t @10: {:.5f} \t @5 : {:.5f} \t @1 : {:.5f}".format(*max_train_hit))
print("Maximum Validation Hit \t @10: {:.5f} \t @5 : {:.5f} \t @1 : {:.5f}".format(*max_val_hit))
print("Maximum Testing Hit \t @10: {:.5f} \t @5 : {:.5f} \t @1 : {:.5f}".format(*max_test_hit))

  0%|                                                                                           | 0/95 [00:00<?, ?it/s]






NameError: name 'loss_type' is not defined

In [1]:
print("="*100)
print("Maximum Training Hit@{:d}: {:.2f}".format(k,max_train_hit))
print("Maximum Validation Hit@{:d}: {:.2f}".format(k,max_val_hit))
print("Maximum Testing Hit@{:d}: {:.2f}".format(k,max_test_hit))



NameError: name 'k' is not defined

In [30]:
# input and label
x = [5,3,10,11]
y = [3,10,11,13]
print(torch.cuda.is_available())
print(y[0:5])
# notes: gru layer stores hidden layer when using sequence input
# use final hidden state from packed output
# cross entropy loss used


True
[3, 10, 11, 13]
