# data 준비하기

In [None]:
import time
import os
import csv

import operator
import pickle
import datetime
import math

import pandas as pd

## download data

In [None]:
!gdown 1Yx7C701EnMs9phbJr2cnzg_yx8Tr0t--

Downloading...
From: https://drive.google.com/uc?id=1Yx7C701EnMs9phbJr2cnzg_yx8Tr0t--
To: /content/sr_gnn_sample.csv
  0% 0.00/395k [00:00<?, ?B/s]100% 395k/395k [00:00<00:00, 111MB/s]


In [None]:
sample_raw = pd.read_csv('./sr_gnn_sample.csv')
sample_raw.head(20)

Unnamed: 0,session_id;user_id;item_id;timeframe;eventdate
0,1;NA;81766;526309;2016-05-09
1,1;NA;31331;1031018;2016-05-09
2,1;NA;32118;243569;2016-05-09
3,1;NA;9654;75848;2016-05-09
4,1;NA;32627;1112408;2016-05-09
5,1;NA;33043;173912;2016-05-09
6,1;NA;12352;329870;2016-05-09
7,1;NA;35077;390072;2016-05-09
8,1;NA;36118;487369;2016-05-09
9,1;NA;129055;991416;2016-05-09


## preprocess data

In [None]:
dataset = './sr_gnn_sample.csv'

print("-- Starting @ %ss" % datetime.datetime.now())
with open(dataset, "r") as f:
    reader = csv.DictReader(f, delimiter=';')
    sess_clicks = {}
    sess_date = {}
    ctr = 0
    curid = -1
    curdate = None
    for data in reader:
        sessid = data['session_id']
        if curdate and not curid == sessid:
            date = ''
            date = time.mktime(time.strptime(curdate, '%Y-%m-%d'))
            sess_date[curid] = date
        curid = sessid
        item = data['item_id'], int(data['timeframe'])
        curdate = ''
        curdate = data['eventdate']

        if sessid in sess_clicks:
            sess_clicks[sessid] += [item]
        else:
            sess_clicks[sessid] = [item]
        ctr += 1
    date = ''
    date = time.mktime(time.strptime(curdate, '%Y-%m-%d'))
    for i in list(sess_clicks):
        sorted_clicks = sorted(sess_clicks[i], key=operator.itemgetter(1))
        sess_clicks[i] = [c[0] for c in sorted_clicks]
    sess_date[curid] = date
print("-- Reading data @ %ss" % datetime.datetime.now())

# Filter out length 1 sessions
for s in list(sess_clicks):
    if len(sess_clicks[s]) == 1:
        del sess_clicks[s]
        del sess_date[s]

# Count number of times each item appears
iid_counts = {}
for s in sess_clicks:
    seq = sess_clicks[s]
    for iid in seq:
        if iid in iid_counts:
            iid_counts[iid] += 1
        else:
            iid_counts[iid] = 1

sorted_counts = sorted(iid_counts.items(), key=operator.itemgetter(1))

length = len(sess_clicks)
for s in list(sess_clicks):
    curseq = sess_clicks[s]
    filseq = list(filter(lambda i: iid_counts[i] >= 5, curseq))
    if len(filseq) < 2:
        del sess_clicks[s]
        del sess_date[s]
    else:
        sess_clicks[s] = filseq

# Split out test set based on dates
dates = list(sess_date.items())
maxdate = dates[0][1]

for _, date in dates:
    if maxdate < date:
        maxdate = date

# 7 days for test
splitdate = maxdate - 86400 * 7

print('Splitting date', splitdate)
tra_sess = filter(lambda x: x[1] < splitdate, dates)
tes_sess = filter(lambda x: x[1] > splitdate, dates)

# Sort sessions by date
tra_sess = sorted(tra_sess, key=operator.itemgetter(1))     # [(session_id, timestamp), (), ]
tes_sess = sorted(tes_sess, key=operator.itemgetter(1))     # [(session_id, timestamp), (), ]
print(len(tra_sess))    # 186670    # 7966257
print(len(tes_sess))    # 15979     # 15324
print(tra_sess[:3])
print(tes_sess[:3])


-- Starting @ 2023-11-30 10:57:38.295614s
-- Reading data @ 2023-11-30 10:57:38.387347s
Splitting date 1464134400.0
469
47
[('2671', 1451952000.0), ('1211', 1452384000.0), ('3780', 1452384000.0)]
[('1864', 1464220800.0), ('1867', 1464220800.0), ('1868', 1464220800.0)]


In [None]:

# Choosing item count >=5 gives approximately the same number of items as reported in paper
item_dict = {}
# Convert training sessions to sequences and renumber items to start from 1
def obtian_tra():
    train_ids = []
    train_seqs = []
    train_dates = []
    item_ctr = 1
    for s, date in tra_sess:
        seq = sess_clicks[s]
        outseq = []
        for i in seq:
            if i in item_dict:
                outseq += [item_dict[i]]
            else:
                outseq += [item_ctr]
                item_dict[i] = item_ctr
                item_ctr += 1
        if len(outseq) < 2:  # Doesn't occur
            continue
        train_ids += [s]
        train_dates += [date]
        train_seqs += [outseq]
    print(item_ctr)     # 43098, 37484
    return train_ids, train_dates, train_seqs


# Convert test sessions to sequences, ignoring items that do not appear in training set
def obtian_tes():
    test_ids = []
    test_seqs = []
    test_dates = []
    for s, date in tes_sess:
        seq = sess_clicks[s]
        outseq = []
        for i in seq:
            if i in item_dict:
                outseq += [item_dict[i]]
        if len(outseq) < 2:
            continue
        test_ids += [s]
        test_dates += [date]
        test_seqs += [outseq]
    return test_ids, test_dates, test_seqs


tra_ids, tra_dates, tra_seqs = obtian_tra()
tes_ids, tes_dates, tes_seqs = obtian_tes()


def process_seqs(iseqs, idates):
    out_seqs = []
    out_dates = []
    labs = []
    ids = []
    for id, seq, date in zip(range(len(iseqs)), iseqs, idates):
        for i in range(1, len(seq)):
            tar = seq[-i]
            labs += [tar]
            out_seqs += [seq[:-i]]
            out_dates += [date]
            ids += [id]
    return out_seqs, out_dates, labs, ids


tr_seqs, tr_dates, tr_labs, tr_ids = process_seqs(tra_seqs, tra_dates)
te_seqs, te_dates, te_labs, te_ids = process_seqs(tes_seqs, tes_dates)
tra = (tr_seqs, tr_labs)
tes = (te_seqs, te_labs)
print(len(tr_seqs))
print(len(te_seqs))
print(tr_seqs[:3], tr_dates[:3], tr_labs[:3])
print(te_seqs[:3], te_dates[:3], te_labs[:3])
all = 0

for seq in tra_seqs:
    all += len(seq)
for seq in tes_seqs:
    all += len(seq)
print('avg length: ', all/(len(tra_seqs) + len(tes_seqs) * 1.0))

if not os.path.exists('sample'):
    os.makedirs('sample')

pickle.dump(tra, open('sample/train.txt', 'wb'))
pickle.dump(tes, open('sample/test.txt', 'wb'))
pickle.dump(tra_seqs, open('sample/all_train_seq.txt', 'wb'))


310
1205
99
[[1, 2], [1], [4]] [1451952000.0, 1451952000.0, 1452384000.0] [3, 2, 5]
[[282], [281, 308], [281]] [1464220800.0, 1464220800.0, 1464220800.0] [282, 281, 308]
avg length:  3.5669291338582676


# util functions

In [None]:
def data_masks(all_usr_pois, item_tail): # padding 작업을 수행하는 코드
    us_lens = [len(upois) for upois in all_usr_pois]
    len_max = max(us_lens)
    us_pois = [upois + item_tail * (len_max - le) for upois, le in zip(all_usr_pois, us_lens)]
    us_msks = [[1] * le + [0] * (len_max - le) for le in us_lens]
    return us_pois, us_msks, len_max


def split_validation(train_set, valid_portion):
    train_set_x, train_set_y = train_set
    n_samples = len(train_set_x)
    sidx = np.arange(n_samples, dtype='int32')
    np.random.shuffle(sidx)
    n_train = int(np.round(n_samples * (1. - valid_portion)))
    valid_set_x = [train_set_x[s] for s in sidx[n_train:]]
    valid_set_y = [train_set_y[s] for s in sidx[n_train:]]
    train_set_x = [train_set_x[s] for s in sidx[:n_train]]
    train_set_y = [train_set_y[s] for s in sidx[:n_train]]

    return (train_set_x, train_set_y), (valid_set_x, valid_set_y)

def trans_to_cuda(variable):
    if torch.cuda.is_available():
        return variable.cuda()
    else:
        return variable

def trans_to_cpu(variable):
    if torch.cuda.is_available():
        return variable.cpu()
    else:
        return variable

# Data class

In [None]:
class Data():
    def __init__(self, data, shuffle=False):
        inputs = data[0] # * inputs : [[4, 5, 7, 8,], [1, 3, 7], ...] 같은 구조

        # * mask : [[1, 1, 1, 1, 0, 0, 0, 0, 0, 0], ...] 같은 구조
        # * data_masks function을 보면 알겠지만, 제일 긴 sequence 하나 잡고, 그거를 기준으로 패딩해주는 거임
        # * inputs랑 아래 inputs의 차이는 padding 여부
        # * EX. inputs : [[4, 5, 7, 8,], [1, 3, 7], ...] --변환--> inputs : [[4, 5, 7, 8, 0, 0, 0, 0, ...], ...]
        inputs, mask, len_max = data_masks(inputs, [0])

        self.inputs = np.asarray(inputs)
        self.mask = np.asarray(mask)
        self.len_max = len_max
        self.targets = np.asarray(data[1])
        self.length = len(inputs)
        self.shuffle = shuffle # * boolean


    def generate_batch(self, batch_size):
        if self.shuffle:
            shuffled_arg = np.arange(self.length)
            np.random.shuffle(shuffled_arg)
            self.inputs = self.inputs[shuffled_arg]
            self.mask = self.mask[shuffled_arg]
            self.targets = self.targets[shuffled_arg]
        n_batch = int(self.length / batch_size)  # batch_size : 4, self.length : 10 이면, n_batch : 2
        if self.length % batch_size != 0:
            n_batch += 1 # n_batch : 3
        slices = np.split(np.arange(n_batch * batch_size), n_batch) # slices : [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]]
        slices[-1] = slices[-1][:(self.length - batch_size * (n_batch - 1))] # slices : [[0, 1, 2, 3], [4, 5, 6, 7], [8, 9]]
        # [8, 9] = [8, 9][:10 - 4 * (3 - 1)] = [8, 9][:2] = [8, 9]

        # * Suppose self.length = 10 and batch_size = 3.
        # * Number of batches (n_batch) would be 4 (three batches of 3 and one batch of 1).
        # * slices would be an array of index arrays: [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]].
        return slices

    def get_slice(self, slice):
        # * i : [0, 1, 2, 3] 같은 구조

        inputs, mask, targets = self.inputs[slice], self.mask[slice], self.targets[slice]
        items, n_node, A, alias_inputs = [], [], [], []

        # * n_node는 각 sequence의 unique한 item 갯수를 의미함.
        # * 결과적으로 제일 긴 node 가지고 padding 해주려고 그럼.
        for u_input in inputs:
            n_node.append(len(np.unique(u_input)))
        max_n_node = np.max(n_node)

        for u_input in inputs:
            node = np.unique(u_input)

            # * items : [[4, 5, 7, 8, 0, 0, 0, 0, ...], ...] 같은 구조 (sequence 아님, node 들임)
            items.append(node.tolist() + (max_n_node - len(node)) * [0])

            # * 아래 코드는 max unique sequence (session) 을 기준으로 커다란 Adjacency matrix를 만들고,
            # * EX. 제일 긴 seq이 10개 item이었으면, 10x10짜리 adj matrix를 만들어서, 각각의 sequence에 대해

            u_A = np.zeros((max_n_node, max_n_node))
            for slice in np.arange(len(u_input) - 1):
                if u_input[slice + 1] == 0:
                    # padding 나오면 break
                    break
                # node [4, 5, 7, 8]
                # [4, 5, 7, 8, 0, 0, 0, ]

                # [4, 5, 7, 4, 8] 에서 [4] [True, False, False, True, False]
                u = np.where(node == u_input[slice])[0][0]
                v = np.where(node == u_input[slice + 1])[0][0]
                u_A[u][v] = 1

            u_sum_in = np.sum(u_A, 0)
            u_sum_in[np.where(u_sum_in == 0)] = 1
            u_A_in = np.divide(u_A, u_sum_in) # normalize adj matrix
            u_sum_out = np.sum(u_A, 1)
            u_sum_out[np.where(u_sum_out == 0)] = 1
            u_A_out = np.divide(u_A.transpose(), u_sum_out) # normalize adj matrix
            u_A = np.concatenate([u_A_in, u_A_out]).transpose()
            A.append(u_A)

            alias_inputs.append([np.where(node == i)[0][0] for i in u_input])
            # * alias inputs는 각 sequence의 item들이 unique한 index를 가지고 있음.

        # * 결과적으로,
        return alias_inputs, A, items, mask, targets

## return 값 구경하기

In [None]:
import numpy as np

train_data = pickle.load(open('./sample/train.txt', 'rb'))
train_data = Data(train_data, shuffle=False)

print('inputs\n', train_data.inputs)
print('target\n', train_data.targets)

slices= [[43, 44, 45, 46]]
print('\n\ntrain_data\n', train_data.inputs[slices[0]])
print('target_data\n', train_data.targets[slices[0]])


for i, j in zip(slices, np.arange(len(slices))):
    alias_inputs, A, items, mask, targets = train_data.get_slice(i)
    print('\n\ni\n', i)
    print('\n\nalias_inputs\n', alias_inputs)
    print('\n\nA\n', A)
    print('\n\nitems\n', items)
    print('\n\nmask\n', mask)
    print('\n\ntargets\n', targets)

inputs
 [[  1   2   0 ...   0   0   0]
 [  1   0   0 ...   0   0   0]
 [  4   0   0 ...   0   0   0]
 ...
 [272 287 287 ...   0   0   0]
 [272 287   0 ...   0   0   0]
 [272   0   0 ...   0   0   0]]
target
 [  3   2   5 ... 287 287 287]


train_data
 [[12 13 12 13 35 35 12 13  0  0  0  0  0  0  0  0]
 [12 13 12 13 35 35 12  0  0  0  0  0  0  0  0  0]
 [12 13 12 13 35 35  0  0  0  0  0  0  0  0  0  0]
 [12 13 12 13 35  0  0  0  0  0  0  0  0  0  0  0]]
target_data
 [12 13 12 35]


i
 [43, 44, 45, 46]


alias_inputs
 [[1, 2, 1, 2, 3, 3, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0], [1, 2, 1, 2, 3, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 2, 1, 2, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 2, 1, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]


A
 [array([[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ],
       [0. , 0. , 0.5, 0.5, 0. , 0. , 1. , 0. ],
       [0. , 1. , 0. , 0. , 0. , 0.5, 0. , 0.5],
       [0. , 0. , 0.5, 0.5, 0. , 0.5, 0. , 0.5]]), array([[0. , 0. , 0. , 0. , 0. , 0. , 0. , 0. ],
       [0. , 0. , 0.5, 0

# model

In [None]:
import torch
from torch import nn
from torch.nn import Module, Parameter
import torch.nn.functional as F

In [None]:
class GNN(Module):
    def __init__(self, hidden_size, step=1):
        super(GNN, self).__init__()
        self.step = step
        self.hidden_size = hidden_size
        self.input_size = hidden_size * 2
        self.gate_size = 3 * hidden_size
        self.w_ih = Parameter(torch.Tensor(self.gate_size, self.input_size))
        self.w_hh = Parameter(torch.Tensor(self.gate_size, self.hidden_size))
        self.b_ih = Parameter(torch.Tensor(self.gate_size))
        self.b_hh = Parameter(torch.Tensor(self.gate_size))
        self.b_iah = Parameter(torch.Tensor(self.hidden_size))
        self.b_oah = Parameter(torch.Tensor(self.hidden_size))

        self.linear_edge_in = nn.Linear(self.hidden_size, self.hidden_size, bias=True)
        self.linear_edge_out = nn.Linear(self.hidden_size, self.hidden_size, bias=True)
        # self.linear_edge_f = nn.Linear(self.hidden_size, self.hidden_size, bias=True)

    def GNNCell(self, A, hidden):
        input_in = torch.matmul(A[:, :, :A.shape[1]], self.linear_edge_in(hidden)) + self.b_iah
        input_out = torch.matmul(A[:, :, A.shape[1]: 2 * A.shape[1]], self.linear_edge_out(hidden)) + self.b_oah
        inputs = torch.cat([input_in, input_out], 2)

        gi = F.linear(inputs, self.w_ih, self.b_ih)
        gh = F.linear(hidden, self.w_hh, self.b_hh)

        i_r, i_i, i_n = gi.chunk(3, 2)
        h_r, h_i, h_n = gh.chunk(3, 2)

        resetgate = torch.sigmoid(i_r + h_r) # reset gate
        inputgate = torch.sigmoid(i_i + h_i) # update gate

        newgate = torch.tanh(i_n + resetgate * h_n) # candidate date

        hy = newgate + inputgate * (hidden - newgate)

        return hy

    def forward(self, A, hidden):
        for i in range(self.step):
            hidden = self.GNNCell(A, hidden)
        return hidden


class SessionGraph(Module):
    def __init__(self,
                 n_node,
                 hiddenSize,
                 batchSize,
                 nonhybrid,
                 step,
                 lr,
                 l2,
                 lr_dc_step,
                 lr_dc
                 ):
        super(SessionGraph, self).__init__()
        self.n_node = n_node
        self.hidden_size = hiddenSize
        self.batch_size = batchSize
        self.nonhybrid = nonhybrid
        self.embedding = nn.Embedding(self.n_node, self.hidden_size)
        self.gnn = GNN(self.hidden_size, step=step)

        self.linear_transform = nn.Linear(self.hidden_size * 2, self.hidden_size, bias=True)
        self.loss_function = nn.CrossEntropyLoss()
        self.optimizer = torch.optim.Adam(self.parameters(), lr=lr, weight_decay=l2)
        self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=lr_dc_step, gamma=lr_dc)
        self.reset_parameters()

        self.linear_one = nn.Linear(self.hidden_size, self.hidden_size, bias=True)
        self.linear_two = nn.Linear(self.hidden_size, self.hidden_size, bias=True)
        self.linear_three = nn.Linear(self.hidden_size, 1, bias=False)

    def reset_parameters(self):
        stdv = 1.0 / math.sqrt(self.hidden_size)
        for weight in self.parameters():
            weight.data.uniform_(-stdv, stdv)

    def compute_scores(self, hidden, mask):
        ht = hidden[torch.arange(mask.shape[0]).long(), torch.sum(mask, 1) - 1]  # batch_size x latent_size

        q1 = self.linear_one(ht).view(ht.shape[0], 1, ht.shape[1])  # batch_size x 1 x latent_size
        q2 = self.linear_two(hidden)  # batch_size x seq_length x latent_size
        alpha = self.linear_three(torch.sigmoid(q1 + q2))
        a = torch.sum(alpha * hidden * mask.view(mask.shape[0], -1, 1).float(), 1)

        if not self.nonhybrid:
            a = self.linear_transform(torch.cat([a, ht], 1))

        b = self.embedding.weight[1:]  # n_nodes x latent_size

        scores = torch.matmul(a, b.transpose(1, 0)) # score
        return scores

    def forward(self, inputs, A):
        hidden = self.embedding(inputs)
        hidden = self.gnn(A, hidden)
        return hidden



def forward(model, i, data):
    # * model is SessionGraph instance
    alias_inputs, A, items, mask, targets = data.get_slice(i) # batch별로 필요한 데이터 생성 - Data.get_slice() 함수 확인
    alias_inputs = trans_to_cuda(torch.Tensor(alias_inputs).long())
    items = trans_to_cuda(torch.Tensor(items).long())
    A = trans_to_cuda(torch.Tensor(A).float())
    mask = trans_to_cuda(torch.Tensor(mask).long())

    hidden = model(items, A)
    get = lambda i: hidden[i][alias_inputs[i]]
    seq_hidden = torch.stack([get(i) for i in torch.arange(len(alias_inputs)).long()])
    return targets, model.compute_scores(seq_hidden, mask)


def train_test(model, train_data, test_data):
    # * model is SessionGraph instance
    model.scheduler.step()
    print('start training: ', datetime.datetime.now())
    model.train()
    total_loss = 0.0
    slices = train_data.generate_batch(model.batch_size) # session seq --batch--> [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]].
    for i, j in zip(slices, np.arange(len(slices))):
        model.optimizer.zero_grad()
        targets, scores = forward(model, i, train_data)
        targets = trans_to_cuda(torch.Tensor(targets).long())
        loss = model.loss_function(scores, targets - 1)
        loss.backward()
        model.optimizer.step()
        total_loss += loss
        if j % int(len(slices) / 5 + 1) == 0:
            print('[%d/%d] Loss: %.4f' % (j, len(slices), loss.item()))
    print('\tLoss:\t%.3f' % total_loss)

    print('start predicting: ', datetime.datetime.now())
    model.eval()
    hit, mrr = [], []
    slices = test_data.generate_batch(model.batch_size)
    for i in slices:
        targets, scores = forward(model, i, test_data)
        sub_scores = scores.topk(20)[1]
        sub_scores = trans_to_cpu(sub_scores).detach().numpy()
        for score, target, mask in zip(sub_scores, targets, test_data.mask):
            hit.append(np.isin(target - 1, score))
            if len(np.where(score == target - 1)[0]) == 0:
                mrr.append(0)
            else:
                mrr.append(1 / (np.where(score == target - 1)[0][0] + 1))
    hit = np.mean(hit) * 100
    mrr = np.mean(mrr) * 100
    return hit, mrr

# train!

## hyperparameters

In [None]:
DATASET = 'sample'
BATCH_SIZE = 100 # input batch size
HIDDEN_SIZE = 400 # hidden state size
EPOCH = 10 # the number of epochs to train for
LR = 0.001 # learning rate [0.001, 0.0005, 0.0001]
LR_DC = 0.1 # learning rate decay rate
LR_DC_STEP = 3 # the number of steps after which the learning rate decay
L2 = 1e-5 # l2 penalty [0.001, 0.0005, 0.0001, 0.00005, 0.00001]
STEP = 1 # gnn propogation steps
PATIENCE = 10 # the number of epoch to wait before early stop
NO_NHYBRID = False # only use the global preference to predict
VALIDATION = False # validation
VALID_PORTION = 0.1 # split the portion of training set as validation set

## main function

In [None]:
def main():
    train_data = pickle.load(open('./' + DATASET + '/train.txt', 'rb'))
    if VALIDATION:
        train_data, valid_data = split_validation(train_data, VALID_PORTION)
        test_data = valid_data
    else:
        test_data = pickle.load(open('./' + DATASET + '/test.txt', 'rb'))

    train_data = Data(train_data, shuffle=True) ## Data class 살펴보기
    test_data = Data(test_data, shuffle=False)

    N_NODE = 310
    model = trans_to_cuda(SessionGraph(N_NODE,
                                       HIDDEN_SIZE,
                                       BATCH_SIZE,
                                       NO_NHYBRID,
                                       STEP,
                                       LR,
                                       L2,
                                       LR_DC_STEP,
                                       LR_DC))  # Adjusted to use HIDDENSIZE directly

    start = time.time()
    best_result = [0, 0]
    best_epoch = [0, 0]
    bad_counter = 0
    for epoch in range(EPOCH):
        print('-------------------------------------------------------')
        print('epoch: ', epoch)
        hit, mrr = train_test(model, train_data, test_data)
        flag = 0
        if hit >= best_result[0]:
            best_result[0] = hit
            best_epoch[0] = epoch
            flag = 1
        if mrr >= best_result[1]:
            best_result[1] = mrr
            best_epoch[1] = epoch
            flag = 1
        print('Best Result:')
        print('\tRecall@20:\t%.4f\tMMR@20:\t%.4f\tEpoch:\t%d,\t%d' % (best_result[0], best_result[1], best_epoch[0], best_epoch[1]))
        bad_counter += 1 - flag
        if bad_counter >= PATIENCE:
            break
    print('-------------------------------------------------------')
    end = time.time()
    print("Run time: %f s" % (end - start))

    return model

In [None]:
model = main()

-------------------------------------------------------
epoch:  0
start training:  2023-11-30 10:57:43.235038


  A = trans_to_cuda(torch.Tensor(A).float())


[0/13] Loss: 5.7342
[3/13] Loss: 5.7312
[6/13] Loss: 5.7170
[9/13] Loss: 5.7201
[12/13] Loss: 5.6853
	Loss:	74.352
start predicting:  2023-11-30 10:57:46.490246
Best Result:
	Recall@20:	2.0202	MMR@20:	0.3367	Epoch:	0,	0
-------------------------------------------------------
epoch:  1
start training:  2023-11-30 10:57:46.577874
[0/13] Loss: 5.6580
[3/13] Loss: 5.5956
[6/13] Loss: 5.5351
[9/13] Loss: 5.5909
[12/13] Loss: 5.4351
	Loss:	71.988
start predicting:  2023-11-30 10:57:49.408983
Best Result:
	Recall@20:	5.0505	MMR@20:	0.5656	Epoch:	1,	1
-------------------------------------------------------
epoch:  2
start training:  2023-11-30 10:57:49.484624
[0/13] Loss: 5.2638
[3/13] Loss: 5.4098
[6/13] Loss: 5.3033
[9/13] Loss: 5.3198
[12/13] Loss: 5.7981
	Loss:	69.846
start predicting:  2023-11-30 10:57:52.472775
Best Result:
	Recall@20:	5.0505	MMR@20:	0.6241	Epoch:	2,	2
-------------------------------------------------------
epoch:  3
start training:  2023-11-30 10:57:52.591413
[0/13] Los

In [None]:
RANDOM_SEQ = 43

_, _, seq, _, _ = train_data.get_slice([RANDOM_SEQ])
print('sequence of item ids', seq)

result = list(forward(model, [RANDOM_SEQ], train_data)[1].detach().numpy())
print('result : ', np.argsort(result))
print('true target : ', train_data.targets[RANDOM_SEQ])

sequence of item ids [[0, 12, 13, 35]]
result :  [[ 79 298 236  44 112 100 297 229  50  82 288  88 159  75  38 166 134  63
   99  98 216   0 283  31 256 164 271 155 306 209  14 213 114 154  19  40
  143 190 121 272 251  87  57 211 188 130 169  20 245  25 238 175 255 220
  300 101  35 285 244 262  83 219  45  41 167 237 264 162 243  70 123 109
  144 280  37  89  90  91 205  78 214 257 147 126  80  73 265  48 108   8
  249 276 137 122 118 199 152 228 181 294 202 274 182  16 185  59  81 110
  161  36  21  15 270 153 232 241 148 128 290  39 165  29 173 273 192 135
   71 218 116  61 102 132 248 291 156 124  56   7 193 207 168  10 204 296
   95 189  74 179 303 269  58   1 307 176  49 197  96 195 145  66  32 278
  284  47  94 171 212  13 250   9 186 191   5 275  86 146 234 210  64  24
    4  18 131  46 138  97 282 299 113 292  17 286 174  53  92 125 129 177
  223   2 231 261 157 217  85 230 158 301 117  28  68 163   3  54  67 127
  119 170 263 106  27 246 208 107  93 133 240 105 136 235 305  