In [1]:
import os
import torch
import numpy as np
import matplotlib.pyplot as plt
from model.sasrec import SASRecModel
from trainers import Trainer
from utils import EarlyStopping, check_path, set_seed, set_logger
from dataset import get_seq_dic, get_dataloder, get_rating_matrix
import pandas as pd

# Set up arguments
class Args:
    data_dir = "./data/"
    output_dir = "output/"
    data_name = "input_test-Copy1"
    do_eval = False
    load_model = None
    train_name = "sasrec_model"
    num_items = 10
    num_users = 10
    lr = 0.001
    batch_size = 256
    epochs = 10
    no_cuda = False
    log_freq = 1
    patience = 2
    num_workers = 0  # Set num_workers to 0 to avoid BrokenPipeError on Windows
    seed = 42
    weight_decay = 0.0
    adam_beta1 = 0.9
    adam_beta2 = 0.999
    gpu_id = "0"
    variance = 5
    # model_type = 'bert4rec'
    model_type = 'sasrec_model'
    max_seq_length = 15
    hidden_size = 64
    num_hidden_layers = 2
    hidden_act = "gelu"
    num_attention_heads = 2
    attention_probs_dropout_prob = 0.5
    hidden_dropout_prob = 0.5
    initializer_range = 0.02
    item_size = 10

args = Args()

In [2]:
if __name__ == "__main__":
    # Initialize logger
    log_path = os.path.join(args.output_dir, args.train_name + '.log')
    logger = set_logger(log_path)

    # Set seed for reproducibility
    set_seed(args.seed)

    # Create output directory if not exists
    check_path(args.output_dir)

    # Set CUDA environment
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id
    args.cuda_condition = torch.cuda.is_available() and not args.no_cuda

    # Load data
    seq_dic, max_item, num_users = get_seq_dic(args)
    args.item_size = max_item + 1
    args.num_users = num_users + 1

    # Prepare checkpoint paths
    args.checkpoint_path = os.path.join(args.output_dir, args.train_name + '.pt')
    args.same_target_path = os.path.join(args.data_dir, args.data_name+'_same_target.npy')

    # Load dataloaders
    train_dataloader, eval_dataloader, test_dataloader = get_dataloder(args, seq_dic)

    # Initialize and log model
    logger.info(str(args))
    model = SASRecModel(args=args)
    logger.info(model)

    # Initialize trainer
    trainer = Trainer(model, train_dataloader, eval_dataloader, test_dataloader, args, logger)

    # Generate rating matrices for evaluation
    args.valid_rating_matrix, args.test_rating_matrix = get_rating_matrix(args.data_name, seq_dic, max_item)

    # Training and evaluation
    if args.do_eval:
        if args.load_model is None:
            logger.info(f"No model input!")
            exit(0)
        else:
            args.checkpoint_path = os.path.join(args.output_dir, args.load_model + '.pt')
            trainer.load(args.checkpoint_path)
            logger.info(f"Load model from {args.checkpoint_path} for test!")
            scores, result_info = trainer.test(0)
    else:
        early_stopping = EarlyStopping(args.checkpoint_path, logger=logger, patience=args.patience, verbose=True)
        for epoch in range(args.epochs):
            trainer.train(epoch)
            scores, _ = trainer.valid(epoch)
            # evaluate on MRR
            early_stopping(np.array(scores[-1:]), trainer.model)
            if early_stopping.early_stop:
                logger.info("Early stopping")
                break

        logger.info("---------------Test Score---------------")
        trainer.model.load_state_dict(torch.load(args.checkpoint_path))
        scores, result_info = trainer.test(0)

    logger.info(args.train_name)
    logger.info(result_info)

2024-07-29 19:01:32,815 - <__main__.Args object at 0x1490212e0>
2024-07-29 19:01:32,892 - SASRecModel(
  (item_embeddings): Embedding(33, 64, padding_idx=0)
  (position_embeddings): Embedding(15, 64)
  (LayerNorm): LayerNorm()
  (dropout): Dropout(p=0.5, inplace=False)
  (item_encoder): TransformerEncoder(
    (blocks): ModuleList(
      (0-1): 2 x TransformerBlock(
        (layer): MultiHeadAttention(
          (query): Linear(in_features=64, out_features=64, bias=True)
          (key): Linear(in_features=64, out_features=64, bias=True)
          (value): Linear(in_features=64, out_features=64, bias=True)
          (softmax): Softmax(dim=-1)
          (attn_dropout): Dropout(p=0.5, inplace=False)
          (dense): Linear(in_features=64, out_features=64, bias=True)
          (LayerNorm): LayerNorm((64,), eps=1e-12, elementwise_affine=True)
          (out_dropout): Dropout(p=0.5, inplace=False)
        )
        (feed_forward): FeedForward(
          (dense_1): Linear(in_features=64, o

In [2]:
# Standalone predict function
def predict(model, input_ids, device):
    model.eval()
    input_ids = torch.tensor(input_ids, dtype=torch.long).to(device)
    with torch.no_grad():
        recommend_output = model.forward(input_ids, all_sequence_output=False)
        recommend_output = recommend_output[:, -1, :]  # Last item in the sequence

        test_item_emb = model.item_embeddings.weight
        rating_pred = torch.matmul(recommend_output, test_item_emb.transpose(0, 1))
        rating_pred = rating_pred.cpu().data.numpy().copy()

        top20_indices = np.argpartition(rating_pred, -20)[:, -20:]
        arr_ind = rating_pred[np.arange(len(rating_pred))[:, None], top20_indices]
        arr_ind_argsort = np.argsort(arr_ind)[np.arange(len(rating_pred)), ::-1]
        top20_indices = top20_indices[np.arange(len(rating_pred))[:, None], arr_ind_argsort]

    return top20_indices

if __name__ == "__main__":
    # Set CUDA environment
    os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_id
    args.cuda_condition = torch.cuda.is_available() and not args.no_cuda
    device = torch.device("cuda" if args.cuda_condition else "cpu")

    # Load the checkpoint to get num_items
    model_checkpoint_path = os.path.join(args.output_dir, args.train_name + '.pt')
    checkpoint = torch.load(model_checkpoint_path, map_location=device)

    args.item_size = checkpoint['item_embeddings.weight'].size(0)
    
    # Initialize and load model
    model = SASRecModel(args=args)
    model.to(device)
    model.load_state_dict(checkpoint)

    # Load the input data file
    input_file_path = './data/input_test-Copy1.txt'
    with open(input_file_path, 'r') as f:
        input_data = f.readlines()

    # Prepare input for prediction
    input_ids = []
    for line in input_data:
        items = list(map(int, line.strip().split()))
        pad_len = args.max_seq_length - len(items)
        input_ids.append([0] * pad_len + items)

    # Predict top 20 items for each row
    predictions = predict(model, input_ids, device)

    data = {'Input': [line.strip() for line in input_data], 'Top 20 Predictions': [pred.tolist() for pred in predictions]}
    df = pd.DataFrame(data)

    output_file_path = './output/predictions.csv'
    df.to_csv(output_file_path, index=False)

    # Output predictions
    for i, pred in enumerate(predictions):
        print(f"Input {i}: {input_data[i].strip()} -> Top 20 Predictions: {pred.tolist()}")

Input 0: 0 12 13 14 -> Top 20 Predictions: [14, 9, 5, 4, 12, 0, 30, 23, 19, 3, 10, 11, 25, 31, 1, 16, 29, 2, 28, 18]
Input 1: 1 31 2 2 -> Top 20 Predictions: [2, 3, 9, 10, 29, 1, 5, 12, 24, 13, 32, 19, 31, 28, 14, 18, 23, 11, 25, 0]
Input 2: 2 18 31 32 -> Top 20 Predictions: [32, 9, 3, 2, 11, 0, 30, 29, 23, 31, 14, 19, 10, 12, 28, 13, 5, 17, 26, 24]
