In [1]:
import math
import pandas as pd 
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.parameter import Parameter
from torch.nn.init import xavier_normal_, xavier_uniform_
import argparse
import numpy as np
import time
import sys
from os.path import abspath
import random
import collections  
from collections import defaultdict
import scipy.sparse as sp
from itertools import product
from random import shuffle,randint,choice,sample
import csv 

from util.conf import OptionConf
import torch
import torch.nn as nn 
import torch.nn.functional as F
from scipy.sparse import coo_matrix
from scipy.sparse.linalg import eigs
from util.loss_torch import bpr_loss, l2_reg_loss, EmbLoss, contrastLoss
from util.init import *
from base.torch_interface import TorchGraphInterface
import os
import numpy as np 
import time 
from torch.optim.lr_scheduler import ReduceLROnPlateau

from util.conf import ModelConf
from base.recommender import Recommender
from util.algorithm import find_k_largest
from time import strftime, localtime
from data.loader import FileIO
from util.evaluation import ranking_evaluation
from util.evaluation import early_stopping

from data.data import Data
from data.graph import Graph

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

## Graph Recommender

In [3]:
class GraphRecommender(Recommender):
    def __init__(self, conf, data, data_kg, knowledge_set, **kwargs):
        super(GraphRecommender, self).__init__(conf, data, data_kg, knowledge_set,**kwargs)
        self.data = data
        self.data_kg = data_kg
        self.bestPerformance = []
        top = self.ranking['-topN'].split(',')
        self.topN = [int(num) for num in top]
        self.max_N = max(self.topN)
        
        self.output_path = kwargs['output_path']
        if not os.path.exists(self.output_path):
            os.makedirs(self.output_path)
            
    def print_model_info(self):
        super(GraphRecommender, self).print_model_info()
        # # print dataset statistics
        print('Training Set Size: (user number: %d, item number %d, interaction number: %d)' % (self.data.training_size()))
        print('Test Set Size: (user number: %d, item number %d, interaction number: %d)' % (self.data.test_size()))
        print('=' * 80)

    def build(self):
        pass

    def train(self):
        pass

    def predict(self, u):
        pass

    def test(self, user_emb, item_emb):
        def process_bar(num, total):
            rate = float(num) / total
            ratenum = int(50 * rate)
            r = '\rProgress: [{}{}]{}%'.format('+' * ratenum, ' ' * (50 - ratenum), ratenum*2)
            sys.stdout.write(r)
            sys.stdout.flush()

        # predict
        rec_list = {}
        user_count = len(self.data.test_set)
        lst_users =  list(self.data_kg.userent.keys())
        lst_items =  list(self.data_kg.itement.keys())
        
        for i, user in enumerate(self.data.test_set):
            user_id  = lst_users.index(user)
            score = torch.matmul(user_emb[user_id], item_emb.transpose(0, 1))
            candidates = score.cpu().numpy()
            
            # e_find_candidates = time.time()
            # print("Calculate candidates time: %f s" % (e_find_candidates - s_find_candidates))
            # predictedItems = denormalize(predictedItems, self.data.rScale[-1], self.data.rScale[0])
            rated_list, li = self.data.user_rated(user)
            for item in rated_list:
                candidates[lst_items.index(item)] = -10e8
            # s_find_k_largest = time.time()
            ids, scores = find_k_largest(self.max_N, candidates)
            # e_find_k_largest = time.time()
            # print("Find k largest candidates: %f s" % (e_find_k_largest - s_find_k_largest))
            item_names = [lst_items[iid] for iid in ids]
            rec_list[user] = list(zip(item_names, scores))
            if i % 1000 == 0:
                process_bar(i, user_count)
        process_bar(user_count, user_count)
        print('')
        return rec_list
    
    def evaluate(self, rec_list):
        self.recOutput.append('userId: recommendations in (itemId, ranking score) pairs, * means the item is hit.\n')
        for user in self.data.test_set:
            line = str(user) + ':'
            for item in rec_list[user]:
                line += ' (' + str(item[0]) + ',' + str(item[1]) + ')'
                if item[0] in self.data.test_set[user]:
                    line += '*'
            line += '\n'
            self.recOutput.append(line)
        current_time = strftime("%Y-%m-%d %H-%M-%S", localtime(time.time()))
        # output prediction result
        out_dir = self.output_path
        file_name = self.config['model.name'] + '@' + current_time + '-top-' + str(self.max_N) + 'items' + '.txt'
        FileIO.write_file(out_dir, file_name, self.recOutput)
        print('The result has been output to ', abspath(out_dir), '.')
        file_name = self.config['model.name'] + '@' + current_time + '-performance' + '.txt'
        self.result = ranking_evaluation(self.data.test_set, rec_list, self.topN)
        self.model_log.add('###Evaluation Results###')
        self.model_log.add(self.result)
        FileIO.write_file(out_dir, file_name, self.result)
        print('The result of %s:\n%s' % (self.model_name, ''.join(self.result)))

    def fast_evaluation(self, model, epoch, user_embed, item_embed, kwargs=None):
        print('Evaluating the model...')
        s_test = time.time()
        rec_list = self.test(user_embed, item_embed)
        e_test = time.time() 
        print("Test time: %f s" % (e_test - s_test))
        
        s_measure = time.time()
        measure = ranking_evaluation(self.data.test_set, rec_list, [self.max_N])
        e_measure = time.time()
        print("Measure time: %f s" % (e_measure - s_measure))
        
        if len(self.bestPerformance) > 0:
            count = 0
            performance = {}
            for m in measure[1:]:
                k, v = m.strip().split(':')
                performance[k] = float(v)
            for k in self.bestPerformance[1]:
                if self.bestPerformance[1][k] > performance[k]:
                    count += 1
                else:
                    count -= 1
            if count < 0:
                self.bestPerformance[1] = performance
                self.bestPerformance[0] = epoch + 1
                # try:
                #     self.save(kwargs)
                # except:
                self.save(model)
        else:
            self.bestPerformance.append(epoch + 1)
            performance = {}
            for m in measure[1:]:
                k, v = m.strip().split(':')
                performance[k] = float(v)
            self.bestPerformance.append(performance)
            # try:
            #     self.save(kwargs)
            # except:
            self.save(model)
        print('-' * 120)
        print('Real-Time Ranking Performance ' + ' (Top-' + str(self.max_N) + ' Item Recommendation)')
        measure = [m.strip() for m in measure[1:]]
        print('*Current Performance*')
        print('Epoch:', str(epoch + 1) + ',', '  |  '.join(measure))
        bp = ''
        # for k in self.bestPerformance[1]:
        #     bp+=k+':'+str(self.bestPerformance[1][k])+' | '
        bp += 'Hit Ratio' + ':' + str(self.bestPerformance[1]['Hit Ratio']) + '  |  '
        bp += 'Precision' + ':' + str(self.bestPerformance[1]['Precision']) + '  |  '
        bp += 'Recall' + ':' + str(self.bestPerformance[1]['Recall']) + '  |  '
        # bp += 'F1' + ':' + str(self.bestPerformance[1]['F1']) + ' | '
        bp += 'NDCG' + ':' + str(self.bestPerformance[1]['NDCG'])
        print('*Best Performance* ')
        print('Epoch:fast_evaluation', str(self.bestPerformance[0]) + ',', bp)
        print('-' * 120)
        return measure
    
    def save(self, model):
        with torch.no_grad():
            ego_emb =  model.calc_cf_embeddings()
            user_emb = ego_emb[list(rec.data_kg.userent.keys())]
            item_emb = ego_emb[list(rec.data_kg.itement.keys())]
            self.best_user_emb, self.best_item_emb = user_emb, item_emb
        self.save_model(model)
    
    def save_model(self, model):
        # save model 
        current_time = strftime("%Y-%m-%d", localtime(time.time()))
        out_dir = self.output_path
        file_name =  self.config['model.name'] + '@' + current_time + '-weight' + '.pth'
        weight_file = out_dir + '/' + file_name 
        torch.save(model.state_dict(), weight_file)


    def save_performance_row(self, ep, data_ep):
        # opening the csv file in 'w' mode
        csv_path = self.output_path + 'train_performance.csv'
        
        # 'Hit Ratio:0.00328', 'Precision:0.00202', 'Recall:0.00337', 'NDCG:0.00292
        hit = float(data_ep[0].split(':')[1])
        precision = float(data_ep[1].split(':')[1])
        recall = float(data_ep[2].split(':')[1])
        ndcg = float(data_ep[3].split(':')[1])
        
        with open(csv_path, 'a+', newline = '') as f:
            header = ['ep', 'hit@20', 'prec@20', 'recall@20', 'ndcg@20']
            writer = csv.DictWriter(f, fieldnames = header)
            # writer.writeheader()
            writer.writerow({
                 'ep' : ep,
                 'hit@20': hit,
                 'prec@20': precision,
                 'recall@20': recall,
                 'ndcg@20': ndcg,
            })
            
    def save_loss_row(self, data_ep):
        csv_path = self.output_path + 'loss.csv'
        with open(csv_path, 'a+', newline ='') as f:
            header = ['ep', 'train_loss', 'cf_loss', 'kg_loss']
            writer = csv.DictWriter(f, fieldnames = header)
            # writer.writeheader()
            writer.writerow({
                'ep' : data_ep[0],
                'train_loss': data_ep[1],
                 'cf_loss': data_ep[2],
                 'kg_loss': data_ep[3]
            })

    def save_loss(self, train_losses, rec_losses, kg_losses):
        df_train_loss = pd.DataFrame(train_losses, columns = ['ep', 'loss'])
        df_rec_loss = pd.DataFrame(rec_losses, columns = ['ep', 'loss'])
        df_kg_loss = pd.DataFrame(kg_losses, columns = ['ep', 'loss'])
        df_train_loss.to_csv(self.output_path + '/train_loss.csv')
        df_rec_loss.to_csv(self.output_path + '/rec_loss.csv')
        df_kg_loss.to_csv(self.output_path + '/kg_loss.csv')
    
    def save_perfomance_training(self, log_train):
        df_train_log = pd.DataFrame(log_train)
        df_train_log.to_csv(self.output_path + '/train_performance.csv')

## Interaction

In [4]:
class Interaction(Data, Graph):
    def __init__(self, conf, training, test):
        self.conf = conf 
        Graph.__init__(self)
        Data.__init__(self,conf,training,test)

        self.user = {}
        self.item = {}
        self.id2user = {}
        self.id2item = {}
        self.training_set_u = defaultdict(dict)
        self.training_set_i = defaultdict(dict)
        self.test_set = defaultdict(dict)
        self.user_history_dict = defaultdict(dict)

        self.test_set_item = set()
        self.__generate_set()

        self.n_users = len(self.training_set_u)
        self.n_items = len(self.training_set_i) 

        self.n_cf_train = len(self.training_data)
        self.n_cf_test = len(self.test_data)

        # self.ui_adj = self.__create_sparse_bipartite_adjacency()
        # self.norm_adj = self.normalize_graph_mat(self.ui_adj)
        # self.interaction_mat, self.inv_interaction_mat = self.__create_sparse_interaction_matrix()
        
    def __generate_set(self):
        for entry in self.training_data:
            user, item, rating = entry
            user, item = int(user), int(item)
            if user not in self.user:
                self.user[user] = len(self.user)
                self.id2user[self.user[user]] = user
            if item not in self.item:
                self.item[item] = len(self.item)
                self.id2item[self.item[item]] = item
                # userList.append
            # construct user_history_dict 
            if rating == 1.0:
                if user not in self.user_history_dict:
                    self.user_history_dict[user] = []
                self.user_history_dict[user].append(item)
            
            self.training_set_u[user][item] = rating
            self.training_set_i[item][user] = rating
        
        for entry in self.test_data:
            user, item, rating = entry
            if user not in self.user:
                continue
            self.test_set[user][item] = rating
            self.test_set_item.add(item)

    def __create_sparse_bipartite_adjacency(self, self_connection=False):
        '''
        return a sparse adjacency matrix with the shape (user number + item number, user number + item number)
        '''
        n_nodes = self.n_users + self.n_items
        row_idx = [int(pair[0]) for pair in self.training_data]
        col_idx = [int(pair[1]) for pair in self.training_data]
        user_np = np.array(row_idx)
        item_np = np.array(col_idx)
        ratings = np.ones_like(user_np, dtype=np.float32)
        tmp_adj = sp.csr_matrix((ratings, (user_np, item_np + self.n_users)), shape=(n_nodes, n_nodes),dtype=np.float32)
        adj_mat = tmp_adj + tmp_adj.T
        if self_connection:
            adj_mat += sp.eye(n_nodes)
        return adj_mat
    
    def __create_sparse_interaction_matrix(self):
        """
            return a sparse adjacency matrix with the shape (user number, item number)
        """
        row, col, entries = [], [], []
        for pair in self.training_data:
            row += [int(pair[0])]
            col += [int(pair[1])]
            entries += [1.0]
        interaction_mat = sp.csr_matrix((entries, (row, col)), shape=(self.n_users,self.n_items),dtype=np.float32)
        inv_interaction_mat = sp.csr_matrix((entries, (col, row)), shape=(self.n_items, self.n_users), dtype=np.float32)
        return interaction_mat, inv_interaction_mat
            
    def get_user_id(self, u):
        if u in self.user:
            return self.user[u]

    def get_item_id(self, i):
        if i in self.item:
            return self.item[i]

    def training_size(self):
        return len(self.user), len(self.item), len(self.training_data)

    def test_size(self):
        return len(self.test_set), len(self.test_set_item), len(self.test_data)

    def contain(self, u, i):
        'whether user u rated item i'
        if u in self.user and i in self.training_set_u[u]:
            return True
        else:
            return False

    def contain_user(self, u):
        'whether user is in training set'
        if u in self.user:
            return True
        else:
            return False

    def contain_item(self, i):
        """whether item is in training set"""
        if i in self.item:
            return True
        else:
            return False

    def user_rated(self, u):
        return list(self.training_set_u[u].keys()), list(self.training_set_u[u].values())

    def item_rated(self, i):
        return list(self.training_set_i[i].keys()), list(self.training_set_i[i].values())


## Knowledge

In [5]:
class Knowledge(Interaction):
    def __init__(self, conf, training, test, knowledge):
        super().__init__(conf, training, test)
        self.conf = conf 
        self.kg_data = knowledge

        self.entity = {}
        self.id2ent = {}

        self.userent = {}
        self.itement = {}
        
        self.u2id = {}
        self.id2u = {}
        
        self.i2id = {}
        self.id2i = {}
        
        self.relation = {}
        self.id2rel = {}

        self.cf_train_data = np.array(training)
        self.training_set_e = defaultdict(dict)

        self.construct_data()
        
        self.laplacian_type = 'random-walk'
        self.create_adjacency_dict()
        self.create_laplacian_dict()
        
        self.kg_interaction_mat = self.__create_sparse_knowledge_interaction_matrix()
    
    def construct_data(self):
        kg_data = self.kg_data
        n_relations = max(kg_data['r']) + 1
        inverse_kg_data = kg_data.copy()
        inverse_kg_data = inverse_kg_data.rename({'h': 't', 't': 'h'}, axis='columns')
        inverse_kg_data['r'] += n_relations

        kg_data = pd.concat([kg_data, inverse_kg_data], axis=0, ignore_index=True, sort=False)

        # remap user_id 
        kg_data['r'] += 2
        
        kg_train_data = pd.concat([kg_data, inverse_kg_data], axis=0, ignore_index=True, sort=False)
        self.n_entities = max(max(kg_train_data['h']), max(kg_train_data['t'])) + 1
        self.n_relations = max(kg_train_data['r']) + 1

        # add interactions to kg data
        cf2kg_train_data = pd.DataFrame(np.zeros((self.n_cf_train, 3), dtype=np.int32), columns=['h', 'r', 't'])
        cf2kg_train_data['h'] = self.cf_train_data[:,0]
        cf2kg_train_data['t'] = self.cf_train_data[:,1]

        inverse_cf2kg_train_data = pd.DataFrame(np.ones((self.n_cf_train, 3), dtype=np.int32), columns=['h', 'r', 't'])
        inverse_cf2kg_train_data['h'] = self.cf_train_data[:,1]
        inverse_cf2kg_train_data['t'] = self.cf_train_data[:,0]

        self.kg_train_data = pd.concat([kg_train_data, cf2kg_train_data, inverse_cf2kg_train_data], ignore_index=True)
        self.n_kg_train = len(self.kg_train_data)

        self.n_users_entities = int(max(max(self.kg_train_data['h']), max(self.kg_train_data['t'])) + 1)

        # construct kg dict
        h_list = []
        t_list = []
        r_list = []

        self.train_kg_dict = collections.defaultdict(list)
        self.train_relation_dict = collections.defaultdict(list)

        for idx, row in self.kg_train_data.iterrows():
            h, r, t = int(row['h']), int(row['r']), int(row['t'])
            h_list.append(h)
            t_list.append(t)
            r_list.append(r)

            if h not in self.entity:
                self.entity[h] = len(self.entity)
                self.id2ent[self.entity[h]] = h
                # check h co phai user hay item k
                if h in self.user:
                    self.userent[h] = len(self.userent)
                #     # self.id2userent[self.userent[h]] = h
                if h in self.item:
                    self.itement[h] = len(self.itement)
                #     # self.id2itement[self.itement[h]] = h

            if t not in self.entity:
                self.entity[t] = len(self.entity)
                self.id2ent[self.entity[t]] = t 
                # check h co phai user hay item k 
                if t in self.user:
                    self.userent[t] = len(self.userent)
                #     # self.id2userent[self.userent[t]] = t
                if t in self.item:
                    self.itement[t] = len(self.itement)
                #     # self.id2itement[self.itement[t]] = t
            if r not in self.relation:
                self.relation[r] = len(self.relation)
                self.id2rel[self.relation[r]] = r 
            
            self.training_set_e[t][h] = r
            self.train_kg_dict[h].append((t, r))
            self.train_relation_dict[r].append((h, t))
        
        self.h_list = torch.LongTensor(h_list).to(device)
        self.t_list = torch.LongTensor(t_list).to(device)
        self.r_list = torch.LongTensor(r_list).to(device)
        
        lst_user_entities = list(self.userent.keys())
        lst_item_entities = list(self.itement.keys())

        for idx, u in enumerate(lst_user_entities):
            self.u2id[u] = idx
            self.id2u[idx] = u
        for idx, i in enumerate(lst_item_entities):
            self.i2id[i] = idx
            self.id2i[idx] = i
        
    def get_entity_id(self, e):
        if e in self.entity:
            return self.entity[e]
    
    def __create_sparse_knowledge_interaction_matrix(self):
        """
            return a sparse adjacency matrix with the shape (entity number, entity number)
        """
        row, col, entries = [], [], []
        for idx, pair in self.kg_train_data.iterrows():
            head, tail = int(pair['h']), int(pair['t'])
            row += [head]
            col += [tail]
            entries += [1.0]
        interaction_mat = sp.csr_matrix((entries, (row, col)), shape=(self.n_users_entities, self.n_users_entities),dtype=np.float32)
        return interaction_mat
    
    def convert_coo2tensor(self, coo):
        values = coo.data
        indices = np.vstack((coo.row, coo.col))

        i = torch.LongTensor(indices)
        v = torch.FloatTensor(values)
        shape = coo.shape
        return torch.sparse.FloatTensor(i, v, torch.Size(shape))
    
    def create_adjacency_dict(self):
        self.adjacency_dict = {}
        for r, ht_list in self.train_relation_dict.items():
            rows = [e[0] for e in ht_list]
            cols = [e[1] for e in ht_list]
            vals = [1] * len(rows)
            adj = sp.coo_matrix((vals, (rows, cols)), shape=(self.n_users_entities, self.n_users_entities))
            self.adjacency_dict[r] = adj
    
    def create_laplacian_dict(self):
        def symmetric_norm_lap(adj):
            rowsum = np.array(adj.sum(axis=1))

            d_inv_sqrt = np.power(rowsum, -0.5).flatten()
            d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0
            d_mat_inv_sqrt = sp.diags(d_inv_sqrt)

            norm_adj = d_mat_inv_sqrt.dot(adj).dot(d_mat_inv_sqrt)
            return norm_adj.tocoo()

        def random_walk_norm_lap(adj):
            rowsum = np.array(adj.sum(axis=1))

            d_inv = np.power(rowsum, -1.0).flatten()
            d_inv[np.isinf(d_inv)] = 0
            d_mat_inv = sp.diags(d_inv)

            norm_adj = d_mat_inv.dot(adj)
            return norm_adj.tocoo()

        if self.laplacian_type == 'symmetric':
            norm_lap_func = symmetric_norm_lap
        elif self.laplacian_type == 'random-walk':
            norm_lap_func = random_walk_norm_lap
        
        self.laplacian_dict = {}
        for r, adj in self.adjacency_dict.items():
            self.laplacian_dict[r] = norm_lap_func(adj)

        A_in = sum(self.laplacian_dict.values())
        self.A_in = self.convert_coo2tensor(A_in.tocoo())

## Model

In [6]:
class Aggregator(nn.Module):

    def __init__(self, in_dim, out_dim, dropout, aggregator_type):
        super(Aggregator, self).__init__()
        self.in_dim = in_dim
        self.out_dim = out_dim
        self.dropout = dropout
        self.aggregator_type = aggregator_type

        self.message_dropout = nn.Dropout(dropout)
        self.activation = nn.LeakyReLU()

        if self.aggregator_type == 'gcn':
            self.linear = nn.Linear(self.in_dim, self.out_dim)       # W in Equation (6)
            # nn.init.xavier_uniform_(self.linear.weight)

        elif self.aggregator_type == 'graphsage':
            self.linear = nn.Linear(self.in_dim * 2, self.out_dim)   # W in Equation (7)
            # nn.init.xavier_uniform_(self.linear.weight)

        elif self.aggregator_type == 'bi-interaction':
            
            self.linear1 = nn.Linear(self.in_dim, self.out_dim)      # W1 in Equation (8)
            self.linear2 = nn.Linear(self.in_dim, self.out_dim)      # W2 in Equation (8)
            # nn.init.xavier_uniform_(self.linear1.weight)
            # nn.init.xavier_uniform_(self.linear2.weight)
        else:
            raise NotImplementedError
        self.ln1 = nn.LayerNorm(self.out_dim)
        self.ln2 = nn.LayerNorm(self.out_dim)
        
    def forward(self, ego_embeddings, A_in):
        """
        ego_embeddings:  (n_users + n_entities, in_dim)
        A_in:            (n_users + n_entities, n_users + n_entities), torch.sparse.FloatTensor
        """
        # Equation (3)
        side_embeddings = torch.matmul(A_in, ego_embeddings)

        if self.aggregator_type == 'gcn':
            # Equation (6) & (9)
            embeddings = ego_embeddings + side_embeddings
            embeddings = self.ln1(self.activation(self.linear(embeddings)))
            
        elif self.aggregator_type == 'graphsage':
            # Equation (7) & (9)
            embeddings = torch.cat([ego_embeddings, side_embeddings], dim=1)
            embeddings = self.ln1(self.activation(self.linear(embeddings)))

        elif self.aggregator_type == 'bi-interaction':
            # Equation (8) & (9)
            sum_embeddings = self.ln1(self.activation(self.linear1(ego_embeddings + side_embeddings)))
            bi_embeddings = self.ln2(self.activation(self.linear2(ego_embeddings * side_embeddings)))
            embeddings = bi_embeddings + sum_embeddings

        embeddings = self.message_dropout(embeddings)           # (n_users + n_entities, out_dim)
        return embeddings

In [7]:
class KGAT(nn.Module):

    def __init__(self, args, rec, A_in=None, user_pre_embed=None, item_pre_embed=None):

        super(KGAT, self).__init__()
        
        self.user_indices = torch.LongTensor(list(rec.data_kg.userent.keys())).to(device)
        self.item_indices =  torch.LongTensor(list(rec.data_kg.itement.keys())).to(device)
        
        self.use_pretrain = args['use_pretrain']
        
        self.n_users = rec.data_kg.n_users
        self.n_entities = rec.data_kg.n_entities
        self.n_relations = rec.data_kg.n_relations
        self.n_users_entities = rec.data_kg.n_users_entities
        self.embed_dim = args['embed_dim']
        self.relation_dim = args['relation_dim']

        self.aggregation_type = args['aggregation_type']
        self.conv_dim_list = [args['embed_dim']] + eval(args['conv_dim_list'])
        self.mess_dropout = eval(args['mess_dropout'])
        self.n_layers = len(eval(args['conv_dim_list']))

        self.kg_l2loss_lambda = args['kg_l2loss_lambda']
        self.cf_l2loss_lambda = args['cf_l2loss_lambda']

        self.entity_user_embed = nn.Embedding(self.n_users_entities, self.embed_dim).to(device)
        self.relation_embed = nn.Embedding(self.n_relations, self.relation_dim).to(device)
        self.trans_M = nn.Parameter(torch.Tensor(self.n_relations, self.embed_dim, self.relation_dim)).to(device)

        self.all_user_idx = list(rec.data_kg.userent.keys())
        self.all_item_idx =  list(rec.data_kg.itement.keys())
        
        if (self.use_pretrain == 1) and (user_pre_embed is not None) and (item_pre_embed is not None):
            other_entity_embed = nn.Parameter(torch.Tensor(self.n_entities - item_pre_embed.shape[0], self.embed_dim))
            nn.init.xavier_uniform_(other_entity_embed)
            entity_user_embed = torch.cat([item_pre_embed, other_entity_embed, user_pre_embed], dim=0)
            self.entity_user_embed.weight = nn.Parameter(entity_user_embed)
        else:
            nn.init.xavier_uniform_(self.entity_user_embed.weight)

        nn.init.xavier_uniform_(self.relation_embed.weight)
        nn.init.xavier_uniform_(self.trans_M)

        self.aggregator_layers = nn.ModuleList()
        for k in range(self.n_layers):
            self.aggregator_layers.append(Aggregator(self.conv_dim_list[k], self.conv_dim_list[k + 1], self.mess_dropout[k], self.aggregation_type).to(device))

        self.A_in = nn.Parameter(torch.sparse.FloatTensor(self.n_users_entities, self.n_users_entities))
        if A_in is not None:
            self.A_in.data = A_in 
        self.A_in.requires_grad = False

    def calc_cf_embeddings(self):
        ego_embed = self.entity_user_embed.weight
        all_embed = [ego_embed]

        for idx, layer in enumerate(self.aggregator_layers):
            ego_embed = layer(ego_embed, self.A_in.to(device))
            norm_embed = F.normalize(ego_embed, p=2, dim=1)
            all_embed.append(norm_embed)

        # Equation (11)
        all_embed = torch.cat(all_embed, dim=1)         # (n_users + n_entities, concat_dim)
        return all_embed

    def calc_cf_loss(self, user_embed, item_pos_embed, item_neg_embed):
        """
        user_ids:       (cf_batch_size)
        item_pos_ids:   (cf_batch_size)
        item_neg_ids:   (cf_batch_size)
        """
        # Equation (12)
        pos_score = torch.sum(user_embed * item_pos_embed, dim=1)   # (cf_batch_size)
        neg_score = torch.sum(user_embed * item_neg_embed, dim=1)   # (cf_batch_size)

        # Equation (13)
        # cf_loss = F.softplus(neg_score - pos_score)
        cf_loss = (-1.0) * F.logsigmoid(pos_score - neg_score)
        cf_loss = torch.mean(cf_loss)

        l2_loss = _L2_loss_mean(user_embed) + _L2_loss_mean(item_pos_embed) + _L2_loss_mean(item_neg_embed)
        loss = cf_loss + self.cf_l2loss_lambda * l2_loss
        return loss

    def calc_kg_loss(self, h, r, pos_t, neg_t):
        """
        h:      (kg_batch_size)
        r:      (kg_batch_size)
        pos_t:  (kg_batch_size)
        neg_t:  (kg_batch_size)
        """
        r_embed = self.relation_embed(r)                                                # (kg_batch_size, relation_dim)
        W_r = self.trans_M[r]                                                           # (kg_batch_size, embed_dim, relation_dim)

        h_embed = self.entity_user_embed(h)                                             # (kg_batch_size, embed_dim)
        pos_t_embed = self.entity_user_embed(pos_t)                                     # (kg_batch_size, embed_dim)
        neg_t_embed = self.entity_user_embed(neg_t)                                     # (kg_batch_size, embed_dim)

        r_mul_h = torch.bmm(h_embed.unsqueeze(1), W_r).squeeze(1)                       # (kg_batch_size, relation_dim)
        r_mul_pos_t = torch.bmm(pos_t_embed.unsqueeze(1), W_r).squeeze(1)               # (kg_batch_size, relation_dim)
        r_mul_neg_t = torch.bmm(neg_t_embed.unsqueeze(1), W_r).squeeze(1)               # (kg_batch_size, relation_dim)

        # Equation (1)
        pos_score = torch.sum(torch.pow(r_mul_h + r_embed - r_mul_pos_t, 2), dim=1)     # (kg_batch_size)
        neg_score = torch.sum(torch.pow(r_mul_h + r_embed - r_mul_neg_t, 2), dim=1)     # (kg_batch_size)

        # Equation (2)
        # kg_loss = F.softplus(pos_score - neg_score)
        kg_loss = (-1.0) * F.logsigmoid(neg_score - pos_score)
        kg_loss = torch.mean(kg_loss)

        l2_loss = _L2_loss_mean(r_mul_h) + _L2_loss_mean(r_embed) + _L2_loss_mean(r_mul_pos_t) + _L2_loss_mean(r_mul_neg_t)
        loss = kg_loss + self.kg_l2loss_lambda * l2_loss
        return loss

    def update_attention_batch(self, h_list, t_list, r_idx):
        r_embed = self.relation_embed.weight[r_idx]
        W_r = self.trans_M[r_idx]

        h_embed = self.entity_user_embed.weight[h_list]
        t_embed = self.entity_user_embed.weight[t_list]

        # Equation (4)
        r_mul_h = torch.matmul(h_embed, W_r)
        r_mul_t = torch.matmul(t_embed, W_r)
        v_list = torch.sum(r_mul_t * torch.tanh(r_mul_h + r_embed), dim=1)
        return v_list

    def update_attention(self, h_list, t_list, r_list, relations):
        device = self.A_in.device

        rows = []
        cols = []
        values = []

        for r_idx in relations:
            index_list = torch.where(r_list == r_idx)
            batch_h_list = h_list[index_list]
            batch_t_list = t_list[index_list]

            batch_v_list = self.update_attention_batch(batch_h_list, batch_t_list, r_idx)
            rows.append(batch_h_list)
            cols.append(batch_t_list)
            values.append(batch_v_list)

        rows = torch.cat(rows)
        cols = torch.cat(cols)
        values = torch.cat(values)

        indices = torch.stack([rows, cols])
        shape = self.A_in.shape
        A_in = torch.sparse.FloatTensor(indices, values, torch.Size(shape))

        # Equation (5)
        A_in = torch.sparse.softmax(A_in.cpu(), dim=1)
        self.A_in.data = A_in.to(device)

    def calc_score(self, user_ids, item_ids):
        """
        user_ids:  (n_users)
        item_ids:  (n_items)
        """
        all_embed = self.calc_cf_embeddings()           # (n_users + n_entities, concat_dim)
        user_embed = all_embed[user_ids]                # (n_users, concat_dim)
        item_embed = all_embed[item_ids]                # (n_items, concat_dim)

        # Equation (12)
        cf_score = torch.matmul(user_embed, item_embed.transpose(0, 1))    # (n_users, n_items)
        return cf_score 
    

## Sampler

In [8]:
import datetime 
def next_batch_pairwise(rec, batch_size, n_negs=1):
    training_data = rec.data.training_data
    shuffle(training_data)
    ptr = 0
    data_size = len(training_data)
    while ptr < data_size:
        if ptr + batch_size < data_size:
            batch_end = ptr + batch_size
        else:   
            batch_end = data_size
        users = [training_data[idx][0] for idx in range(ptr, batch_end)]
        items = [training_data[idx][1] for idx in range(ptr, batch_end)]
        ptr = batch_end
        u_idx, i_idx, j_idx = [], [], []
        item_list = list(data.item.keys())
        for i, user in enumerate(users):
            i_idx.append(items[i])
            u_idx.append(user)
            for m in range(n_negs):
                neg_item = choice(item_list)
                while neg_item in data.training_set_u[user]:
                    neg_item = choice(item_list)
                j_idx.append(neg_item)

        u_idx  = torch.LongTensor(u_idx).to(device)
        i_idx  = torch.LongTensor(i_idx).to(device)
        j_idx  = torch.LongTensor(j_idx).to(device)
        yield u_idx, i_idx, j_idx
    
def next_batch_kg(kg_data, kg_dict, batch_size, n_negs=1):
    ptr = 0
    exist_heads= kg_dict.keys()
    h_list = list(exist_heads)
    h_dict = {value: idx for idx, value in enumerate(h_list)}
    all_tails = list(set(kg_data[:,2]))
    data_size = len(kg_data)
    # Pre-compute positive tail sets and negative tails for each head
    pos_tail_sets = {head: set([it[0] for it in tails]) for head, tails in kg_dict.items()}
    # neg_tail_sets = {head: np.random.choice(list(all_tails - pos_tails), size=n_negs) for head, pos_tails in pos_tail_sets.items()}
    
    while ptr < data_size:
        if ptr + batch_size < data_size:
            batch_end = ptr + batch_size
        else:   
            batch_end = data_size
        
        heads, relations, tails = kg_data[ptr:batch_end, 0], kg_data[ptr:batch_end, 1], kg_data[ptr:batch_end, 2]
        
        ptr = batch_end
        h_idx, r_idx, pos_t_idx, neg_t_idx = [], [], [], []
        # time1 = datetime.datetime.now()
        h_idx = [h_dict[head] for head in heads]
        
        r_idx.extend([int(rel) for rel in relations])
        pos_t_idx.extend([int(tail) for tail in tails])
        for head in heads:
            neg_t = random.choice(all_tails)
            while neg_t in pos_tail_sets[head]:
                neg_t = random.choice(all_tails)
            neg_t_idx.append(int(h_dict[neg_t]))
                
        # time2 = datetime.datetime.now()
        # time_difference = time2 - time1
        # print(time_difference)

        h_idx  = torch.LongTensor(h_idx).to(device)
        r_idx  = torch.LongTensor(r_idx).to(device)
        pos_t_idx  = torch.LongTensor(pos_t_idx).to(device)
        neg_t_idx  = torch.LongTensor(neg_t_idx).to(device)
        yield h_idx, r_idx, pos_t_idx, neg_t_idx
            

## Util

In [9]:
def _L2_loss_mean(x):
    return torch.mean(torch.sum(torch.pow(x, 2), dim=1, keepdim=False) / 2.)

## Train

In [10]:
def train(train_model, rec, args):
    # seed
    random.seed(args['seed'])
    np.random.seed(args['seed'])
    torch.manual_seed(args['seed'])
    torch.cuda.manual_seed_all(args['seed'])
    
    lst_train_losses = []
    lst_rec_losses = []
    lst_kg_losses = []
    lst_performances = []
    recall_list = []

    reg = args['reg']
    reg_kg = args['reg_kg']
    alpha = args['alpha']
    
    
    cf_optimizer  = torch.optim.Adam(train_model.parameters(), lr=lRate)
    kg_optimizer = torch.optim.Adam(train_model.parameters(), lr=lRateKG)

    kg_data = rec.data_kg.kg_train_data.to_numpy()
    kg_dict = rec.data_kg.train_kg_dict
        
    for ep in range(maxEpoch):
        train_model.train()
        
        train_losses = []
        cf_losses = []
        kg_losses = []
        
        cf_total_loss = 0
        kg_total_loss = 0
        
        n_cf_batch = int(rec.data.n_cf_train // batchSize + 1)
        n_kg_batch = int(rec.data_kg.n_kg_train // batchSizeKG + 1)
        
        shuffle(kg_data)
        
        # Learn cf graph
        for n, batch in enumerate(next_batch_pairwise(rec, batchSize)):
            user_idx, pos_idx, neg_idx = batch
            entity_emb = train_model.calc_cf_embeddings()
            
            user_emb = entity_emb[user_idx]
            pos_item_emb = entity_emb[pos_idx]
            neg_item_emb = entity_emb[neg_idx]
            
            cf_batch_loss = train_model.calc_cf_loss(user_emb, pos_item_emb, neg_item_emb)
            if np.isnan(cf_batch_loss.cpu().detach().numpy()):
                print('ERROR (CF Training): Epoch {:04d} Iter {:04d} / {:04d} Loss is nan.'.format(ep, n, n_cf_batch))

            cf_batch_loss.backward()
            cf_optimizer.step()
            cf_optimizer.zero_grad()
            cf_total_loss += cf_batch_loss.item()
            
            cf_losses.append(cf_batch_loss.item())
            if (n % 20) == 0:
                print('CF Training: Epoch {:04d} Iter {:04d} / {:04d} | Iter Loss {:.4f} | Iter Mean Loss {:.4f}'.format(ep, n, n_cf_batch,  cf_batch_loss.item(), cf_total_loss / (n+1)))
        
        # Learn knowledge grap
        for n, batch in enumerate(next_batch_kg(kg_data, kg_dict, batchSizeKG)):
            kg_batch_head, kg_batch_relation, kg_batch_pos_tail, kg_batch_neg_tail = batch
            
            kg_batch_loss = train_model.calc_kg_loss(kg_batch_head, kg_batch_relation, kg_batch_pos_tail, kg_batch_neg_tail)
            if np.isnan(kg_batch_loss.cpu().detach().numpy()):
                print('ERROR (KG Training): Epoch {:04d} Iter {:04d} / {:04d} Loss is nan.'.format(ep, n, n_kg_batch))
            kg_batch_loss.backward()
            kg_optimizer.step()
            kg_optimizer.zero_grad()
            kg_total_loss += kg_batch_loss.item()
            
            kg_losses.append(kg_batch_loss.item())
            if (n % 10) == 0:
                print('KG Training: Epoch {:04d} Iter {:04d} / {:04d} | Iter Loss {:.4f} | Iter Mean Loss {:.4f}'.format(ep, n, n_kg_batch,  kg_batch_loss.item(), kg_total_loss / (n+1)))

        # Learn attention 
        h_list = rec.data_kg.h_list.to(device)
        t_list = rec.data_kg.t_list.to(device)
        r_list = rec.data_kg.r_list.to(device)
        relations = list(rec.data_kg.laplacian_dict.keys())
        train_model.update_attention(h_list, t_list, r_list, relations)
        train_model.eval()

        cf_loss = np.mean(cf_losses)
        kg_loss = np.mean(kg_losses)
        train_loss = cf_loss + kg_loss

        with torch.no_grad():
            entity_emb = train_model.calc_cf_embeddings()
            user_emb = entity_emb[train_model.user_indices]
            item_emb = entity_emb[train_model.item_indices]
            data_ep = rec.fast_evaluation(train_model, ep, user_emb, item_emb)

            cur_recall =  float(data_ep[2].split(':')[1])
            recall_list.append(cur_recall)
            best_recall, should_stop = early_stopping(recall_list, 100)
            if should_stop:
                break
                
        rec.save_performance_row(ep, data_ep)
        rec.save_loss_row([ep, train_loss, cf_loss, kg_loss])
    
        lst_performances.append(data_ep)
        lst_train_losses.append([ep, train_loss]) 
        lst_rec_losses.append([ep, cf_loss])
        lst_kg_losses.append([ep, kg_loss])
    rec.save_loss(lst_train_losses, lst_rec_losses, lst_kg_losses)
    rec.save_perfomance_training(lst_performances)
    user_emb, item_emb = rec.best_user_emb, rec.best_item_emb
    return user_emb, item_emb

## Test

In [11]:
def test(rec, user_emb, item_emb):
    def process_bar(num, total):
        rate = float(num) / total
        ratenum = int(50 * rate)
        r = '\rProgress: [{}{}]{}%'.format('+' * ratenum, ' ' * (50 - ratenum), ratenum*2)
        sys.stdout.write(r)
        sys.stdout.flush()

    # predict
    rec_list = {}
    user_count = len(rec.data.test_set)
    for i, user in enumerate(rec.data.test_set):
        user_id = rec.data_kg.u2id[user]
        score = torch.matmul(user_emb[user_id], item_emb.transpose(0, 1))
        candidates = score.cpu().numpy()
        
        rated_list, li = rec.data.user_rated(user)
        for item in rated_list:
            candidates[rec.data_kg.i2id[item]] = -10e8
        # s_find_k_largest = time.time()
        ids, scores = find_k_largest(rec.max_N, candidates)

        item_names = [rec.data_kg.id2i[iid] for iid in ids]
        rec_list[user] = list(zip(item_names, scores))
        if i % 1000 == 0:
            process_bar(i, user_count)
    process_bar(user_count, user_count)
    print('')
    rec.evaluate(rec_list)

## Main

In [12]:
model = 'KGAT'
config = ModelConf('./conf/' + model + '.conf')
lRates = [0.001]
lRateKGs = [0.001]
lrDecays = [0.7]
maxEpochs = [500]
batchSizes = [2048]
batchSizeKGs = [8192]
nLayers = [2]
regs = [1e-5]
regkgs = [ 1e-5]
embeddingSizes = [128]
datasets = ['lastfm', 'ml-1m']

In [None]:
hyperparameters = [lRates, lRateKGs, lrDecays, maxEpochs, batchSizes, batchSizeKGs, nLayers, regs, regkgs, embeddingSizes, datasets]
for params in product(*hyperparameters):
    lRate, lRateKG, lrDecay, maxEpoch, batchSize, batchSizeKG, nLayer, reg, reg_kg, embeddingSize, dataset = params
    args = {
        'lr': lRate,
        'lr_kg': lRateKG,
        'max_epoch': maxEpoch,
        'batch_size': batchSize, 
        'lr_decay': lrDecay,
        'dataset': dataset,
        'n_layers': nLayer,
        'use_pretrain': 0,
        'input_dim': 32,
        'embed_dim': embeddingSize,
        'relation_dim': 32,
        'reg': 1e-5,
        'reg_kg': 1e-5,
        'aggregation_type': 'bi-interaction',
        'mess_dropout': '[0.1, 0.1, 0.1]',
        'conv_dim_list': '[64, 32, 16]',
        'kg_l2loss_lambda': reg_kg,
        'cf_l2loss_lambda': reg,
        'seed': 123,
        'alpha': 0.1
    }
    args['output_path'] =  f"./results/KGAT/{dataset}/@KGAT-inp_emb:{args['input_dim']}-emb:{args['embed_dim']}-bs:{args['batch_size']}-lr:{args['lr']}-lr_kg:{args['lr_kg']}-n_layers:{args['n_layers']}/"
    if not os.path.exists(args['output_path']):
        os.makedirs(args['output_path'])

    current_time = strftime("%Y-%m-%d", localtime(time.time()))
    file_name =  config['model.name'] + '@' + current_time + '-weight' + '.pth'
    weight_path = args['output_path'] + file_name 
    # data
    training_data = FileIO.load_data_set('./dataset/' + dataset + '/' +config['training.set'], config['model.type'])
    test_data = FileIO.load_data_set('./dataset/' + dataset + '/'  +config['test.set'], config['model.type'])
    knowledge_set = FileIO.load_kg_data('./dataset/' + dataset +'/'+ dataset +'.kg')
    data = Interaction(config, training_data, test_data)
    data_kg = Knowledge(config, training_data, test_data, knowledge_set)
    # rec 
    rec = GraphRecommender(config, data, data_kg, knowledge_set, **args)
    
    A_in = TorchGraphInterface.convert_sparse_mat_to_tensor(rec.data_kg.kg_interaction_mat).to(device)
    
    train_model = KGAT(args, rec, A_in=A_in)
    if os.path.exists(weight_path):
        train_model.load_state_dict(torch.load(weight_path))
    user_emb, item_emb = train(train_model, rec, args)   
    test(rec, user_emb, item_emb)

  d_inv = np.power(rowsum, -1.0).flatten()


parameter ss_rate is not found in the configuration file!


  i = torch.LongTensor([coo.row, coo.col])


CF Training: Epoch 0000 Iter 0000 / 0034 | Iter Loss 0.1263 | Iter Mean Loss 0.1263
CF Training: Epoch 0000 Iter 0020 / 0034 | Iter Loss 0.1303 | Iter Mean Loss 0.1268
KG Training: Epoch 0000 Iter 0000 / 0263 | Iter Loss 0.3733 | Iter Mean Loss 0.3733
KG Training: Epoch 0000 Iter 0010 / 0263 | Iter Loss 0.5468 | Iter Mean Loss 0.4650
KG Training: Epoch 0000 Iter 0020 / 0263 | Iter Loss 0.5403 | Iter Mean Loss 0.5040
KG Training: Epoch 0000 Iter 0030 / 0263 | Iter Loss 0.5068 | Iter Mean Loss 0.5084
KG Training: Epoch 0000 Iter 0040 / 0263 | Iter Loss 0.4655 | Iter Mean Loss 0.5015
KG Training: Epoch 0000 Iter 0050 / 0263 | Iter Loss 0.4271 | Iter Mean Loss 0.4904
KG Training: Epoch 0000 Iter 0060 / 0263 | Iter Loss 0.3964 | Iter Mean Loss 0.4777
KG Training: Epoch 0000 Iter 0070 / 0263 | Iter Loss 0.3814 | Iter Mean Loss 0.4647
KG Training: Epoch 0000 Iter 0080 / 0263 | Iter Loss 0.3664 | Iter Mean Loss 0.4537
KG Training: Epoch 0000 Iter 0090 / 0263 | Iter Loss 0.3851 | Iter Mean Loss