In [1]:
import math
import pandas as pd 
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.parameter import Parameter
from torch.nn.init import xavier_normal_, xavier_uniform_
import argparse
import numpy as np
import time
import sys
from os.path import abspath
import random
import collections  
from collections import defaultdict
import scipy.sparse as sp

# from util.sampler import  next_batch_pairwise
from util.conf import OptionConf
import torch
import torch.nn as nn 
import torch.nn.functional as F
from scipy.sparse import coo_matrix
from util.loss_torch import bpr_loss, l2_reg_loss, EmbLoss, contrastLoss
from util.init import *
from base.torch_interface import TorchGraphInterface
import os
import numpy as np 
import time 
from torch.optim.lr_scheduler import ReduceLROnPlateau

from util.conf import ModelConf
from base.recommender import Recommender
# from data.ui_graph import Interaction
# from data.knowledge import Knowledge
from util.algorithm import find_k_largest
from time import strftime, localtime
from data.loader import FileIO
from util.evaluation import ranking_evaluation
# from util.sampler import * 
from data.ui_graph import Interaction
from itertools import product


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')

## Base Recommender

In [17]:
class GraphRecommender(Recommender):
    def __init__(self, conf, data, data_kg, knowledge_set, **kwargs):
        super(GraphRecommender, self).__init__(conf, data, data_kg, knowledge_set,**kwargs)
        # self.data = Interaction(conf, training_set, test_set)
        # self.data_kg = Knowledge(conf, training_set, test_set, knowledge_set)
        self.data = data
        self.data_kg = data_kg
        self.bestPerformance = []
        top = self.ranking['-topN'].split(',')
        self.topN = [int(num) for num in top]
        self.max_N = max(self.topN)
        
        self.output_path = kwargs['output_path']
        if not os.path.exists(self.output_path):
            os.makedirs(self.output_path)
            
    def print_model_info(self):
        super(GraphRecommender, self).print_model_info()
        # # print dataset statistics
        print('Training Set Size: (user number: %d, item number %d, interaction number: %d)' % (self.data.training_size()))
        print('Test Set Size: (user number: %d, item number %d, interaction number: %d)' % (self.data.test_size()))
        print('=' * 80)

    def build(self):
        pass

    def train(self):
        pass

    def predict(self, u):
        pass

    def test(self, user_emb, item_emb):
        def process_bar(num, total):
            rate = float(num) / total
            ratenum = int(50 * rate)
            r = '\rProgress: [{}{}]{}%'.format('+' * ratenum, ' ' * (50 - ratenum), ratenum*2)
            sys.stdout.write(r)
            sys.stdout.flush()

        # predict
        rec_list = {}
        user_count = len(self.data.test_set)
        
        
        lst_users =  list(self.data_kg.userent.keys())
        lst_items =  list(self.data_kg.itement.keys())
        
        for i, user in enumerate(self.data.test_set):
            # s_find_candidates = time.time()
            # candidates = predict(user)
            user_id  = lst_users.index(user)
            score = torch.matmul(user_emb[user_id], item_emb.transpose(0, 1))
            candidates = score.cpu().numpy()
            
            # e_find_candidates = time.time()
            # print("Calculate candidates time: %f s" % (e_find_candidates - s_find_candidates))
            # predictedItems = denormalize(predictedItems, self.data.rScale[-1], self.data.rScale[0])
            rated_list, li = self.data.user_rated(user)
            for item in rated_list:
                candidates[lst_items.index(item)] = -10e8
            # s_find_k_largest = time.time()
            ids, scores = find_k_largest(self.max_N, candidates)
            # e_find_k_largest = time.time()
            # print("Find k largest candidates: %f s" % (e_find_k_largest - s_find_k_largest))
            item_names = [lst_items[iid] for iid in ids]
            rec_list[user] = list(zip(item_names, scores))
            if i % 1000 == 0:
                process_bar(i, user_count)
        process_bar(user_count, user_count)
        print('')
        return rec_list

    def evaluate(self, rec_list):
        self.recOutput.append('userId: recommendations in (itemId, ranking score) pairs, * means the item is hit.\n')
        for user in self.data.test_set:
            line = str(user) + ':'
            for item in rec_list[user]:
                line += ' (' + str(item[0]) + ',' + str(item[1]) + ')'
                if item[0] in self.data.test_set[user]:
                    line += '*'
            line += '\n'
            self.recOutput.append(line)
        current_time = strftime("%Y-%m-%d %H-%M-%S", localtime(time.time()))
        # output prediction result
        out_dir = self.output_path
        file_name = self.config['model.name'] + '@' + current_time + '-top-' + str(self.max_N) + 'items' + '.txt'
        FileIO.write_file(out_dir, file_name, self.recOutput)
        print('The result has been output to ', abspath(out_dir), '.')
        file_name = self.config['model.name'] + '@' + current_time + '-performance' + '.txt'
        self.result = ranking_evaluation(self.data.test_set, rec_list, self.topN)
        self.model_log.add('###Evaluation Results###')
        self.model_log.add(self.result)
        FileIO.write_file(out_dir, file_name, self.result)
        print('The result of %s:\n%s' % (self.model_name, ''.join(self.result)))

    def fast_evaluation(self, model, epoch, user_embed, item_embed, kwargs=None):
        print('Evaluating the model...')
        s_test = time.time()
        rec_list = self.test(user_embed, item_embed)
        e_test = time.time() 
        print("Test time: %f s" % (e_test - s_test))
        
        s_measure = time.time()
        measure = ranking_evaluation(self.data.test_set, rec_list, [self.max_N])
        e_measure = time.time()
        print("Measure time: %f s" % (e_measure - s_measure))
        
        if len(self.bestPerformance) > 0:
            count = 0
            performance = {}
            for m in measure[1:]:
                k, v = m.strip().split(':')
                performance[k] = float(v)
            for k in self.bestPerformance[1]:
                if self.bestPerformance[1][k] > performance[k]:
                    count += 1
                else:
                    count -= 1
            if count < 0:
                self.bestPerformance[1] = performance
                self.bestPerformance[0] = epoch + 1
                # try:
                #     self.save(kwargs)
                # except:
                self.save(model)
        else:
            self.bestPerformance.append(epoch + 1)
            performance = {}
            for m in measure[1:]:
                k, v = m.strip().split(':')
                performance[k] = float(v)
            self.bestPerformance.append(performance)
            # try:
            #     self.save(kwargs)
            # except:
            self.save(model)
        print('-' * 120)
        print('Real-Time Ranking Performance ' + ' (Top-' + str(self.max_N) + ' Item Recommendation)')
        measure = [m.strip() for m in measure[1:]]
        print('*Current Performance*')
        print('Epoch:', str(epoch + 1) + ',', '  |  '.join(measure))
        bp = ''
        # for k in self.bestPerformance[1]:
        #     bp+=k+':'+str(self.bestPerformance[1][k])+' | '
        bp += 'Hit Ratio' + ':' + str(self.bestPerformance[1]['Hit Ratio']) + '  |  '
        bp += 'Precision' + ':' + str(self.bestPerformance[1]['Precision']) + '  |  '
        bp += 'Recall' + ':' + str(self.bestPerformance[1]['Recall']) + '  |  '
        # bp += 'F1' + ':' + str(self.bestPerformance[1]['F1']) + ' | '
        bp += 'NDCG' + ':' + str(self.bestPerformance[1]['NDCG'])
        print('*Best Performance* ')
        print('Epoch:fast_evaluation', str(self.bestPerformance[0]) + ',', bp)
        print('-' * 120)
        return measure
    
    def save(self, model):
        with torch.no_grad():
            ego_emb =  model()
            user_emb = ego_emb[list(rec.data_kg.userent.keys())]
            item_emb = ego_emb[list(rec.data_kg.itement.keys())]
            self.best_user_emb, self.best_item_emb = user_emb, item_emb
        self.save_model(model)
    
    def save_model(self, model):
        # save model 
        current_time = strftime("%Y-%m-%d", localtime(time.time()))
        out_dir = self.output_path
        file_name =  self.config['model.name'] + '@' + current_time + '-weight' + '.pth'
        weight_file = out_dir + '/' + file_name 
        torch.save(model.state_dict(), weight_file)

    def save_loss(self, train_losses, rec_losses, kg_losses):
        df_train_loss = pd.DataFrame(train_losses, columns = ['ep', 'loss'])
        df_rec_loss = pd.DataFrame(rec_losses, columns = ['ep', 'loss'])
        df_kg_loss = pd.DataFrame(kg_losses, columns = ['ep', 'loss'])
        df_train_loss.to_csv(self.output_path + '/train_loss.csv')
        df_rec_loss.to_csv(self.output_path + '/rec_loss.csv')
        df_kg_loss.to_csv(self.output_path + '/kg_loss.csv')

    def save_perfomance_training(self, log_train):
        df_train_log = pd.DataFrame(log_train)
        df_train_log.to_csv(self.output_path + '/train_performance.csv')

In [18]:
class GraphRecommender(Recommender):
    def __init__(self, conf, data, data_kg, knowledge_set, **kwargs):
        super(GraphRecommender, self).__init__(conf, data, data_kg, knowledge_set,**kwargs)
        # self.data = Interaction(conf, training_set, test_set)
        # self.data_kg = Knowledge(conf, training_set, test_set, knowledge_set)
        self.data = data
        self.data_kg = data_kg
        self.bestPerformance = []
        top = self.ranking['-topN'].split(',')
        self.topN = [int(num) for num in top]
        self.max_N = max(self.topN)
        
        self.output_path = kwargs['output_path']
        if not os.path.exists(self.output_path):
            os.makedirs(self.output_path)
            
    def print_model_info(self):
        super(GraphRecommender, self).print_model_info()
        # # print dataset statistics
        print('Training Set Size: (user number: %d, item number %d, interaction number: %d)' % (self.data.training_size()))
        print('Test Set Size: (user number: %d, item number %d, interaction number: %d)' % (self.data.test_size()))
        print('=' * 80)

    def build(self):
        pass

    def train(self):
        pass

    def predict(self, u):
        pass

    def test(self, user_emb, item_emb):
        def process_bar(num, total):
            rate = float(num) / total
            ratenum = int(50 * rate)
            r = '\rProgress: [{}{}]{}%'.format('+' * ratenum, ' ' * (50 - ratenum), ratenum*2)
            sys.stdout.write(r)
            sys.stdout.flush()

        # predict
        rec_list = {}
        user_count = len(self.data.test_set)
        
        
        lst_users =  list(self.data_kg.userent.keys())
        lst_items =  list(self.data_kg.itement.keys())
        
        for i, user in enumerate(self.data.test_set):
            # s_find_candidates = time.time()
            # candidates = predict(user)
            user_id  = lst_users.index(user)
            score = torch.matmul(user_emb[user_id], item_emb.transpose(0, 1))
            candidates = score.cpu().numpy()
            
            # e_find_candidates = time.time()
            # print("Calculate candidates time: %f s" % (e_find_candidates - s_find_candidates))
            # predictedItems = denormalize(predictedItems, self.data.rScale[-1], self.data.rScale[0])
            rated_list, li = self.data.user_rated(user)
            for item in rated_list:
                candidates[lst_items.index(item)] = -10e8
            # s_find_k_largest = time.time()
            ids, scores = find_k_largest(self.max_N, candidates)
            # e_find_k_largest = time.time()
            # print("Find k largest candidates: %f s" % (e_find_k_largest - s_find_k_largest))
            item_names = [lst_items[iid] for iid in ids]
            rec_list[user] = list(zip(item_names, scores))
            if i % 1000 == 0:
                process_bar(i, user_count)
        process_bar(user_count, user_count)
        print('')
        return rec_list

    def evaluate(self, rec_list):
        self.recOutput.append('userId: recommendations in (itemId, ranking score) pairs, * means the item is hit.\n')
        for user in self.data.test_set:
            line = str(user) + ':'
            for item in rec_list[user]:
                line += ' (' + str(item[0]) + ',' + str(item[1]) + ')'
                if item[0] in self.data.test_set[user]:
                    line += '*'
            line += '\n'
            self.recOutput.append(line)
        current_time = strftime("%Y-%m-%d %H-%M-%S", localtime(time.time()))
        # output prediction result
        out_dir = self.output_path
        file_name = self.config['model.name'] + '@' + current_time + '-top-' + str(self.max_N) + 'items' + '.txt'
        FileIO.write_file(out_dir, file_name, self.recOutput)
        print('The result has been output to ', abspath(out_dir), '.')
        file_name = self.config['model.name'] + '@' + current_time + '-performance' + '.txt'
        self.result = ranking_evaluation(self.data.test_set, rec_list, self.topN)
        self.model_log.add('###Evaluation Results###')
        self.model_log.add(self.result)
        FileIO.write_file(out_dir, file_name, self.result)
        print('The result of %s:\n%s' % (self.model_name, ''.join(self.result)))

    def fast_evaluation(self, model, epoch, user_embed, item_embed, kwargs=None):
        print('Evaluating the model...')
        s_test = time.time()
        rec_list = self.test(user_embed, item_embed)
        e_test = time.time() 
        print("Test time: %f s" % (e_test - s_test))
        
        s_measure = time.time()
        measure = ranking_evaluation(self.data.test_set, rec_list, [self.max_N])
        e_measure = time.time()
        print("Measure time: %f s" % (e_measure - s_measure))
        
        if len(self.bestPerformance) > 0:
            count = 0
            performance = {}
            for m in measure[1:]:
                k, v = m.strip().split(':')
                performance[k] = float(v)
            for k in self.bestPerformance[1]:
                if self.bestPerformance[1][k] > performance[k]:
                    count += 1
                else:
                    count -= 1
            if count < 0:
                self.bestPerformance[1] = performance
                self.bestPerformance[0] = epoch + 1
                # try:
                #     self.save(kwargs)
                # except:
                self.save(model)
        else:
            self.bestPerformance.append(epoch + 1)
            performance = {}
            for m in measure[1:]:
                k, v = m.strip().split(':')
                performance[k] = float(v)
            self.bestPerformance.append(performance)
            # try:
            #     self.save(kwargs)
            # except:
            self.save(model)
        print('-' * 120)
        print('Real-Time Ranking Performance ' + ' (Top-' + str(self.max_N) + ' Item Recommendation)')
        measure = [m.strip() for m in measure[1:]]
        print('*Current Performance*')
        print('Epoch:', str(epoch + 1) + ',', '  |  '.join(measure))
        bp = ''
        # for k in self.bestPerformance[1]:
        #     bp+=k+':'+str(self.bestPerformance[1][k])+' | '
        bp += 'Hit Ratio' + ':' + str(self.bestPerformance[1]['Hit Ratio']) + '  |  '
        bp += 'Precision' + ':' + str(self.bestPerformance[1]['Precision']) + '  |  '
        bp += 'Recall' + ':' + str(self.bestPerformance[1]['Recall']) + '  |  '
        # bp += 'F1' + ':' + str(self.bestPerformance[1]['F1']) + ' | '
        bp += 'NDCG' + ':' + str(self.bestPerformance[1]['NDCG'])
        print('*Best Performance* ')
        print('Epoch:fast_evaluation', str(self.bestPerformance[0]) + ',', bp)
        print('-' * 120)
        return measure
    
    def save(self, model):
        with torch.no_grad():
            ego_emb =  model()
            user_emb = ego_emb[list(rec.data_kg.userent.keys())]
            item_emb = ego_emb[list(rec.data_kg.itement.keys())]
            self.best_user_emb, self.best_item_emb = user_emb, item_emb
        self.save_model(model)
    
    def save_model(self, model):
        # save model 
        current_time = strftime("%Y-%m-%d", localtime(time.time()))
        out_dir = self.output_path
        file_name =  self.config['model.name'] + '@' + current_time + '-weight' + '.pth'
        weight_file = out_dir + '/' + file_name 
        torch.save(model.state_dict(), weight_file)

    def save_loss(self, train_losses, rec_losses, kg_losses):
        df_train_loss = pd.DataFrame(train_losses, columns = ['ep', 'loss'])
        df_rec_loss = pd.DataFrame(rec_losses, columns = ['ep', 'loss'])
        df_kg_loss = pd.DataFrame(kg_losses, columns = ['ep', 'loss'])
        df_train_loss.to_csv(self.output_path + '/train_loss.csv')
        df_rec_loss.to_csv(self.output_path + '/rec_loss.csv')
        df_kg_loss.to_csv(self.output_path + '/kg_loss.csv')

    def save_perfomance_training(self, log_train):
        df_train_log = pd.DataFrame(log_train)
        df_train_log.to_csv(self.output_path + '/train_performance.csv')

In [19]:
class GraphRecommender(Recommender):
    def __init__(self, conf, data, data_kg, knowledge_set, **kwargs):
        super(GraphRecommender, self).__init__(conf, data, data_kg, knowledge_set,**kwargs)
        # self.data = Interaction(conf, training_set, test_set)
        # self.data_kg = Knowledge(conf, training_set, test_set, knowledge_set)
        self.data = data
        self.data_kg = data_kg
        self.bestPerformance = []
        top = self.ranking['-topN'].split(',')
        self.topN = [int(num) for num in top]
        self.max_N = max(self.topN)
        
        self.output_path = kwargs['output_path']
        if not os.path.exists(self.output_path):
            os.makedirs(self.output_path)
            
    def print_model_info(self):
        super(GraphRecommender, self).print_model_info()
        # # print dataset statistics
        print('Training Set Size: (user number: %d, item number %d, interaction number: %d)' % (self.data.training_size()))
        print('Test Set Size: (user number: %d, item number %d, interaction number: %d)' % (self.data.test_size()))
        print('=' * 80)

    def build(self):
        pass

    def train(self):
        pass

    def predict(self, u):
        pass

    def test(self, user_emb, item_emb):
        def process_bar(num, total):
            rate = float(num) / total
            ratenum = int(50 * rate)
            r = '\rProgress: [{}{}]{}%'.format('+' * ratenum, ' ' * (50 - ratenum), ratenum*2)
            sys.stdout.write(r)
            sys.stdout.flush()

        # predict
        rec_list = {}
        user_count = len(self.data.test_set)
        
        
        lst_users =  list(self.data_kg.userent.keys())
        lst_items =  list(self.data_kg.itement.keys())
        
        for i, user in enumerate(self.data.test_set):
            # s_find_candidates = time.time()
            # candidates = predict(user)
            user_id  = lst_users.index(user)
            score = torch.matmul(user_emb[user_id], item_emb.transpose(0, 1))
            candidates = score.cpu().numpy()
            
            # e_find_candidates = time.time()
            # print("Calculate candidates time: %f s" % (e_find_candidates - s_find_candidates))
            # predictedItems = denormalize(predictedItems, self.data.rScale[-1], self.data.rScale[0])
            rated_list, li = self.data.user_rated(user)
            for item in rated_list:
                candidates[lst_items.index(item)] = -10e8
            # s_find_k_largest = time.time()
            ids, scores = find_k_largest(self.max_N, candidates)
            # e_find_k_largest = time.time()
            # print("Find k largest candidates: %f s" % (e_find_k_largest - s_find_k_largest))
            item_names = [lst_items[iid] for iid in ids]
            rec_list[user] = list(zip(item_names, scores))
            if i % 1000 == 0:
                process_bar(i, user_count)
        process_bar(user_count, user_count)
        print('')
        return rec_list

    def evaluate(self, rec_list):
        self.recOutput.append('userId: recommendations in (itemId, ranking score) pairs, * means the item is hit.\n')
        for user in self.data.test_set:
            line = str(user) + ':'
            for item in rec_list[user]:
                line += ' (' + str(item[0]) + ',' + str(item[1]) + ')'
                if item[0] in self.data.test_set[user]:
                    line += '*'
            line += '\n'
            self.recOutput.append(line)
        current_time = strftime("%Y-%m-%d %H-%M-%S", localtime(time.time()))
        # output prediction result
        out_dir = self.output_path
        file_name = self.config['model.name'] + '@' + current_time + '-top-' + str(self.max_N) + 'items' + '.txt'
        FileIO.write_file(out_dir, file_name, self.recOutput)
        print('The result has been output to ', abspath(out_dir), '.')
        file_name = self.config['model.name'] + '@' + current_time + '-performance' + '.txt'
        self.result = ranking_evaluation(self.data.test_set, rec_list, self.topN)
        self.model_log.add('###Evaluation Results###')
        self.model_log.add(self.result)
        FileIO.write_file(out_dir, file_name, self.result)
        print('The result of %s:\n%s' % (self.model_name, ''.join(self.result)))

    def fast_evaluation(self, model, epoch, user_embed, item_embed, kwargs=None):
        print('Evaluating the model...')
        s_test = time.time()
        rec_list = self.test(user_embed, item_embed)
        e_test = time.time() 
        print("Test time: %f s" % (e_test - s_test))
        
        s_measure = time.time()
        measure = ranking_evaluation(self.data.test_set, rec_list, [self.max_N])
        e_measure = time.time()
        print("Measure time: %f s" % (e_measure - s_measure))
        
        if len(self.bestPerformance) > 0:
            count = 0
            performance = {}
            for m in measure[1:]:
                k, v = m.strip().split(':')
                performance[k] = float(v)
            for k in self.bestPerformance[1]:
                if self.bestPerformance[1][k] > performance[k]:
                    count += 1
                else:
                    count -= 1
            if count < 0:
                self.bestPerformance[1] = performance
                self.bestPerformance[0] = epoch + 1
                # try:
                #     self.save(kwargs)
                # except:
                self.save(model)
        else:
            self.bestPerformance.append(epoch + 1)
            performance = {}
            for m in measure[1:]:
                k, v = m.strip().split(':')
                performance[k] = float(v)
            self.bestPerformance.append(performance)
            # try:
            #     self.save(kwargs)
            # except:
            self.save(model)
        print('-' * 120)
        print('Real-Time Ranking Performance ' + ' (Top-' + str(self.max_N) + ' Item Recommendation)')
        measure = [m.strip() for m in measure[1:]]
        print('*Current Performance*')
        print('Epoch:', str(epoch + 1) + ',', '  |  '.join(measure))
        bp = ''
        # for k in self.bestPerformance[1]:
        #     bp+=k+':'+str(self.bestPerformance[1][k])+' | '
        bp += 'Hit Ratio' + ':' + str(self.bestPerformance[1]['Hit Ratio']) + '  |  '
        bp += 'Precision' + ':' + str(self.bestPerformance[1]['Precision']) + '  |  '
        bp += 'Recall' + ':' + str(self.bestPerformance[1]['Recall']) + '  |  '
        # bp += 'F1' + ':' + str(self.bestPerformance[1]['F1']) + ' | '
        bp += 'NDCG' + ':' + str(self.bestPerformance[1]['NDCG'])
        print('*Best Performance* ')
        print('Epoch:fast_evaluation', str(self.bestPerformance[0]) + ',', bp)
        print('-' * 120)
        return measure
    
    def save(self, model):
        with torch.no_grad():
            ego_emb =  model()
            user_emb = ego_emb[list(rec.data_kg.userent.keys())]
            item_emb = ego_emb[list(rec.data_kg.itement.keys())]
            self.best_user_emb, self.best_item_emb = user_emb, item_emb
        self.save_model(model)
    
    def save_model(self, model):
        # save model 
        current_time = strftime("%Y-%m-%d", localtime(time.time()))
        out_dir = self.output_path
        file_name =  self.config['model.name'] + '@' + current_time + '-weight' + '.pth'
        weight_file = out_dir + '/' + file_name 
        torch.save(model.state_dict(), weight_file)

    def save_loss(self, train_losses, rec_losses, kg_losses):
        df_train_loss = pd.DataFrame(train_losses, columns = ['ep', 'loss'])
        df_rec_loss = pd.DataFrame(rec_losses, columns = ['ep', 'loss'])
        df_kg_loss = pd.DataFrame(kg_losses, columns = ['ep', 'loss'])
        df_train_loss.to_csv(self.output_path + '/train_loss.csv')
        df_rec_loss.to_csv(self.output_path + '/rec_loss.csv')
        df_kg_loss.to_csv(self.output_path + '/kg_loss.csv')

    def save_perfomance_training(self, log_train):
        df_train_log = pd.DataFrame(log_train)
        df_train_log.to_csv(self.output_path + '/train_performance.csv')

## Knowledge

In [20]:
import numpy as np
import pandas as pd 
from data.loader import FileIO
import collections  
from collections import defaultdict
import torch
import scipy.sparse as sp

from data.ui_graph import Interaction 

class Knowledge(Interaction):
    def __init__(self, conf,  training, test, knowledge):
        super().__init__(conf,  training, test)
        self.conf = conf    
        self.kg_data = knowledge 

        self.entity = {}
        self.id2ent = {}

        self.userent = {}
        self.itement = {}

        self.u2id = {}
        self.i2id = {}
        self.id2u = {}
        self.id2i = {}

        self.relation = {}
        self.id2rel = {}

        self.cf_train_data = np.array(training)

        self.training_set_e = defaultdict(dict)

        self.construct_data()
        self.kg_interaction_mat = self.__create_sparse_knowledge_interaction_matrix()

    def construct_data(self):
        kg_data = self.kg_data
        n_relations = max(kg_data['r']) + 1
        inverse_kg_data = kg_data.copy()
        inverse_kg_data = inverse_kg_data.rename({'h': 't', 't': 'h'}, axis='columns')
        inverse_kg_data['r'] += n_relations

        kg_data = pd.concat([kg_data, inverse_kg_data], axis=0, ignore_index=True, sort=False)

        # remap user_id 
        kg_data['r'] += 2
        
        kg_train_data = pd.concat([kg_data, inverse_kg_data], axis=0, ignore_index=True, sort=False)
        self.n_entities = max(max(kg_train_data['h']), max(kg_train_data['t'])) + 1
        self.n_relations = max(kg_train_data['r']) + 1

        # add interactions to kg data
        cf2kg_train_data = pd.DataFrame(np.zeros((self.n_cf_train, 3), dtype=np.int32), columns=['h', 'r', 't'])
        cf2kg_train_data['h'] = self.cf_train_data[:,0]
        cf2kg_train_data['t'] = self.cf_train_data[:,1]

        inverse_cf2kg_train_data = pd.DataFrame(np.ones((self.n_cf_train, 3), dtype=np.int32), columns=['h', 'r', 't'])
        inverse_cf2kg_train_data['h'] = self.cf_train_data[:,1]
        inverse_cf2kg_train_data['t'] = self.cf_train_data[:,0]

        self.kg_train_data = pd.concat([kg_train_data, cf2kg_train_data, inverse_cf2kg_train_data], ignore_index=True)
        self.n_kg_train = len(self.kg_train_data)

        self.n_users_entities = int(max(max(self.kg_train_data['h']), max(self.kg_train_data['t'])) + 1)

        # construct kg dict
        h_list = []
        t_list = []
        r_list = []

        self.train_kg_dict = collections.defaultdict(list)
        self.train_relation_dict = collections.defaultdict(list)

        for idx, row in self.kg_train_data.iterrows():
            h, r, t = int(row['h']), int(row['r']), int(row['t'])
            h_list.append(h)
            t_list.append(t)
            r_list.append(r)

            if h not in self.entity:
                self.entity[h] = len(self.entity)
                self.id2ent[self.entity[h]] = h
                # check h co phai user hay item k
                if h in self.user:
                    self.userent[h] = len(self.userent)
                    # self.id2userent[self.userent[h]] = h
                if h in self.item:
                    self.itement[h] = len(self.itement)
                    # self.id2itement[self.itement[h]] = h

            if t not in self.entity:
                self.entity[t] = len(self.entity)
                self.id2ent[self.entity[t]] = t 
                # check h co phai user hay item k 
                if t in self.user:
                    self.userent[t] = len(self.userent)
                    # self.id2userent[self.userent[t]] = t
                if t in self.item:
                    self.itement[t] = len(self.itement)
                    # self.id2itement[self.itement[t]] = t
            if r not in self.relation:
                self.relation[r] = len(self.relation)
                self.id2rel[self.relation[r]] = r 
            
            self.training_set_e[t][h] = r
            self.train_kg_dict[h].append((t, r))
            self.train_relation_dict[r].append((h, t))
        
        lst_user_entities = list(self.userent.keys())
        lst_item_entities = list(self.itement.keys())

        for idx, u in enumerate(lst_user_entities):
            self.u2id[u] = idx 
            self.id2u[idx] = u

        for idx, i in enumerate(lst_item_entities):
            self.i2id[i] = idx
            self.id2i[idx] = i 

    def get_entity_id(self, e):
        if e in self.entity:
            return self.entity[e]

    def __create_sparse_knowledge_interaction_matrix(self):
        """
            return a sparse adjacency matrix with the shape (entity number, entity number)
        """
        row, col, entries = [], [], []
        for idx, pair in self.kg_train_data.iterrows():
            head, tail = int(pair['h']), int(pair['t'])
            row += [self.entity[head]]
            col += [self.entity[tail]]
            entries += [1.0]
        interaction_mat = sp.csr_matrix((entries, (row, col)), shape=(self.n_users_entities, self.n_users_entities),dtype=np.float32)
        return interaction_mat

## Model

In [21]:
class Aggregator(nn.Module):

    def __init__(self, in_dim, out_dim, dropout, aggregator_type):
        super(Aggregator, self).__init__()
        self.in_dim = in_dim
        self.out_dim = out_dim
        self.dropout = dropout
        self.aggregator_type = aggregator_type

        self.message_dropout = nn.Dropout(dropout)
        self.activation = nn.LeakyReLU()

        if self.aggregator_type == 'gcn':
            self.linear = nn.Linear(self.in_dim, self.out_dim)       # W in Equation (6)
            nn.init.xavier_uniform_(self.linear.weight)

        elif self.aggregator_type == 'graphsage':
            self.linear = nn.Linear(self.in_dim * 2, self.out_dim)   # W in Equation (7)
            nn.init.xavier_uniform_(self.linear.weight)

        elif self.aggregator_type == 'bi-interaction':
            self.linear1 = nn.Linear(self.in_dim, self.out_dim)      # W1 in Equation (8)
            self.linear2 = nn.Linear(self.in_dim, self.out_dim)      # W2 in Equation (8)
            nn.init.xavier_uniform_(self.linear1.weight)
            nn.init.xavier_uniform_(self.linear2.weight)

        else:
            raise NotImplementedError


    def forward(self, ego_embeddings, A_in):
        """
        ego_embeddings:  (n_users + n_entities, in_dim)
        A_in:            (n_users + n_entities, n_users + n_entities), torch.sparse.FloatTensor
        """
        # Equation (3)
        side_embeddings = torch.matmul(A_in, ego_embeddings)

        if self.aggregator_type == 'gcn':
            # Equation (6) & (9)
            embeddings = ego_embeddings + side_embeddings
            embeddings = self.activation(self.linear(embeddings))

        elif self.aggregator_type == 'graphsage':
            # Equation (7) & (9)
            embeddings = torch.cat([ego_embeddings, side_embeddings], dim=1)
            embeddings = self.activation(self.linear(embeddings))

        elif self.aggregator_type == 'bi-interaction':
            # Equation (8) & (9)
            sum_embeddings = self.activation(self.linear1(ego_embeddings + side_embeddings))
            bi_embeddings = self.activation(self.linear2(ego_embeddings * side_embeddings))
            embeddings = bi_embeddings + sum_embeddings

        embeddings = self.message_dropout(embeddings)           # (n_users + n_entities, out_dim)
        return embeddings

In [22]:
class KGAT(nn.Module):

    def __init__(self, args, rec, A_in=None, user_pre_embed=None, item_pre_embed=None):

        super(KGAT, self).__init__()
        self.use_pretrain = args['use_pretrain']
        
        self.n_users = rec.data_kg.n_users
        self.n_entities = rec.data_kg.n_entities
        self.n_relations = rec.data_kg.n_relations
        self.n_users_entities = rec.data_kg.n_users_entities
        self.embed_dim = args['embed_dim']
        self.relation_dim = args['relation_dim']

        self.aggregation_type = args['aggregation_type']
        self.conv_dim_list = [args['embed_dim']] + eval(args['conv_dim_list'])
        self.mess_dropout = eval(args['mess_dropout'])
        self.n_layers = len(eval(args['conv_dim_list']))

        self.kg_l2loss_lambda = args['kg_l2loss_lambda']
        self.cf_l2loss_lambda = args['cf_l2loss_lambda']

        self.entity_user_embed = nn.Embedding(self.n_users_entities, self.embed_dim).to(device)
        self.relation_embed = nn.Embedding(self.n_relations, self.relation_dim).to(device)
        self.trans_M = nn.Parameter(torch.Tensor(self.n_relations, self.embed_dim, self.relation_dim)).to(device)

        self.all_user_idx = list(rec.data_kg.userent.keys())
        self.all_item_idx =  list(rec.data_kg.itement.keys())
        
        if (self.use_pretrain == 1) and (user_pre_embed is not None) and (item_pre_embed is not None):
            other_entity_embed = nn.Parameter(torch.Tensor(self.n_entities - item_pre_embed.shape[0], self.embed_dim))
            nn.init.xavier_uniform_(other_entity_embed)
            entity_user_embed = torch.cat([item_pre_embed, other_entity_embed, user_pre_embed], dim=0)
            self.entity_user_embed.weight = nn.Parameter(entity_user_embed)
        else:
            nn.init.xavier_uniform_(self.entity_user_embed.weight)

        nn.init.xavier_uniform_(self.relation_embed.weight)
        nn.init.xavier_uniform_(self.trans_M)

        self.aggregator_layers = nn.ModuleList()
        for k in range(self.n_layers):
            self.aggregator_layers.append(Aggregator(self.conv_dim_list[k], self.conv_dim_list[k + 1], self.mess_dropout[k], self.aggregation_type).to(device))

        self.A_in = nn.Parameter(torch.sparse.FloatTensor(self.n_users_entities, self.n_users_entities))
        if A_in is not None:
            self.A_in.data = A_in
        self.A_in.requires_grad = False

    def calc_cf_embeddings(self):
        ego_embed = self.entity_user_embed.weight
        all_embed = [ego_embed]

        for idx, layer in enumerate(self.aggregator_layers):
            ego_embed = layer(ego_embed, self.A_in.to(device))
            norm_embed = F.normalize(ego_embed, p=2, dim=1)
            all_embed.append(norm_embed)

        # Equation (11)
        all_embed = torch.cat(all_embed, dim=1)         # (n_users + n_entities, concat_dim)
        return all_embed

    def calc_cf_loss(self, user_ids, item_pos_ids, item_neg_ids):
        """
        user_ids:       (cf_batch_size)
        item_pos_ids:   (cf_batch_size)
        item_neg_ids:   (cf_batch_size)
        """
        all_embed = self.calc_cf_embeddings()                       # (n_users + n_entities, concat_dim)
        user_embed = all_embed[user_ids]                            # (cf_batch_size, concat_dim)
        item_pos_embed = all_embed[item_pos_ids]                    # (cf_batch_size, concat_dim)
        item_neg_embed = all_embed[item_neg_ids]                    # (cf_batch_size, concat_dim)

        # Equation (12)
        pos_score = torch.sum(user_embed * item_pos_embed, dim=1)   # (cf_batch_size)
        neg_score = torch.sum(user_embed * item_neg_embed, dim=1)   # (cf_batch_size)

        # Equation (13)
        # cf_loss = F.softplus(neg_score - pos_score)
        cf_loss = (-1.0) * F.logsigmoid(pos_score - neg_score)
        cf_loss = torch.mean(cf_loss)

        l2_loss = _L2_loss_mean(user_embed) + _L2_loss_mean(item_pos_embed) + _L2_loss_mean(item_neg_embed)
        loss = cf_loss + self.cf_l2loss_lambda * l2_loss
        return loss

    def calc_kg_loss(self, h, r, pos_t, neg_t):
        """
        h:      (kg_batch_size)
        r:      (kg_batch_size)
        pos_t:  (kg_batch_size)
        neg_t:  (kg_batch_size)
        """
        r_embed = self.relation_embed(r)                                                # (kg_batch_size, relation_dim)
        W_r = self.trans_M[r]                                                           # (kg_batch_size, embed_dim, relation_dim)

        h_embed = self.entity_user_embed(h)                                             # (kg_batch_size, embed_dim)
        pos_t_embed = self.entity_user_embed(pos_t)                                     # (kg_batch_size, embed_dim)
        neg_t_embed = self.entity_user_embed(neg_t)                                     # (kg_batch_size, embed_dim)

        r_mul_h = torch.bmm(h_embed.unsqueeze(1), W_r).squeeze(1)                       # (kg_batch_size, relation_dim)
        r_mul_pos_t = torch.bmm(pos_t_embed.unsqueeze(1), W_r).squeeze(1)               # (kg_batch_size, relation_dim)
        r_mul_neg_t = torch.bmm(neg_t_embed.unsqueeze(1), W_r).squeeze(1)               # (kg_batch_size, relation_dim)

        # Equation (1)
        pos_score = torch.sum(torch.pow(r_mul_h + r_embed - r_mul_pos_t, 2), dim=1)     # (kg_batch_size)
        neg_score = torch.sum(torch.pow(r_mul_h + r_embed - r_mul_neg_t, 2), dim=1)     # (kg_batch_size)

        # Equation (2)
        # kg_loss = F.softplus(pos_score - neg_score)
        kg_loss = (-1.0) * F.logsigmoid(neg_score - pos_score)
        kg_loss = torch.mean(kg_loss)

        l2_loss = _L2_loss_mean(r_mul_h) + _L2_loss_mean(r_embed) + _L2_loss_mean(r_mul_pos_t) + _L2_loss_mean(r_mul_neg_t)
        loss = kg_loss + self.kg_l2loss_lambda * l2_loss
        return loss

    def update_attention_batch(self, h_list, t_list, r_idx):
        r_embed = self.relation_embed.weight[r_idx]
        W_r = self.trans_M[r_idx]

        h_embed = self.entity_user_embed.weight[h_list]
        t_embed = self.entity_user_embed.weight[t_list]

        # Equation (4)
        r_mul_h = torch.matmul(h_embed, W_r)
        r_mul_t = torch.matmul(t_embed, W_r)
        v_list = torch.sum(r_mul_t * torch.tanh(r_mul_h + r_embed), dim=1)
        return v_list

    def update_attention(self, h_list, t_list, r_list, relations):
        device = self.A_in.device

        rows = []
        cols = []
        values = []

        for r_idx in relations:
            index_list = torch.where(r_list == r_idx)
            batch_h_list = h_list[index_list]
            batch_t_list = t_list[index_list]

            batch_v_list = self.update_attention_batch(batch_h_list, batch_t_list, r_idx)
            rows.append(batch_h_list)
            cols.append(batch_t_list)
            values.append(batch_v_list)

        rows = torch.cat(rows)
        cols = torch.cat(cols)
        values = torch.cat(values)

        indices = torch.stack([rows, cols])
        shape = self.A_in.shape
        A_in = torch.sparse.FloatTensor(indices, values, torch.Size(shape))

        # Equation (5)
        A_in = torch.sparse.softmax(A_in.cpu(), dim=1)
        self.A_in.data = A_in.to(device)

    def calc_score(self, user_ids, item_ids):
        """
        user_ids:  (n_users)
        item_ids:  (n_items)
        """
        all_embed = self.calc_cf_embeddings()           # (n_users + n_entities, concat_dim)
        user_embed = all_embed[user_ids]                # (n_users, concat_dim)
        item_embed = all_embed[item_ids]                # (n_items, concat_dim)

        # Equation (12)
        cf_score = torch.matmul(user_embed, item_embed.transpose(0, 1))    # (n_users, n_items)
        return cf_score 
    
    def forward(self):
        return self.calc_cf_embeddings()
    

## Util

In [23]:
def calculate_loss(anchor_emb, pos_emb, neg_emb, batch_size):
    calc_reg_loss = EmbLoss()
    rec_loss = bpr_loss(anchor_emb, pos_emb, neg_emb)
    reg_loss = reg * calc_reg_loss(anchor_emb, pos_emb, neg_emb) / batch_size
    return rec_loss, reg_loss
def _L2_loss_mean(x):
    return torch.mean(torch.sum(torch.pow(x, 2), dim=1, keepdim=False) / 2.)

In [24]:
def predict(self, u):
    user_id  = self.data.get_user_id(u)
    score = torch.matmul(self.user_emb[user_id], self.item_emb.transpose(0, 1))
    return score.cpu().numpy()  

## Train

In [25]:
def next_batch_pairwise(rec, batch_size):
    def sample_pos_items_for_u(user_dict, user_id, n_sample_pos_items):
        pos_items = user_dict[user_id]
        n_pos_items = len(pos_items)
        sample_pos_items = []
        while True:
            if len(sample_pos_items) == n_sample_pos_items:
                break

            pos_item_idx = np.random.randint(low=0, high=n_pos_items, size=1)[0]
            pos_item_id = pos_items[pos_item_idx]
            if pos_item_id not in sample_pos_items:
                sample_pos_items.append(pos_item_id)
        return sample_pos_items
    
    def sample_neg_items_for_u(user_dict, user_id, n_sample_neg_items):
        pos_items = user_dict[user_id]
        sample_neg_items = []
        while True:
            if len(sample_neg_items) == n_sample_neg_items:
                break

            neg_item_id = np.random.randint(low=0, high=rec.data.n_items, size=1)[0]
            if neg_item_id not in pos_items and neg_item_id not in sample_neg_items:
                sample_neg_items.append(neg_item_id)
        return sample_neg_items
    exist_users = list(rec.data.user_history_dict.keys())
    user_dict = rec.data.user_history_dict

    if batch_size <= len(exist_users):
        batch_user = random.sample(exist_users, batch_size)
    else:
        batch_user = [random.choice(exist_users) for _ in range(batch_size)]
    batch_pos_item, batch_neg_item = [], []
    for u in batch_user:
        batch_pos_item += sample_pos_items_for_u(user_dict, u, 1)
        batch_neg_item += sample_neg_items_for_u(user_dict, u, 1)
    
    batch_user = torch.LongTensor(batch_user).to(device)
    batch_pos_item = torch.LongTensor(batch_pos_item).to(device)
    batch_neg_item = torch.LongTensor(batch_neg_item).to(device)
    return batch_user, batch_pos_item, batch_neg_item
    
def next_batch_pairwise_kg(rec, batch_size):
    def sample_pos_triples_for_h( kg_dict, head, n_sample_pos_triples):
        pos_triples = kg_dict[head]
        n_pos_triples = len(pos_triples)

        sample_relations, sample_pos_tails = [], []
        while True:
            if len(sample_relations) == n_sample_pos_triples:
                break

            pos_triple_idx = np.random.randint(low=0, high=n_pos_triples, size=1)[0]
            tail = pos_triples[pos_triple_idx][0]
            relation = pos_triples[pos_triple_idx][1]

            if relation not in sample_relations and tail not in sample_pos_tails:
                sample_relations.append(relation)
                sample_pos_tails.append(tail)
        return sample_relations, sample_pos_tails


    def sample_neg_triples_for_h( kg_dict, head, relation, n_sample_neg_triples, highest_neg_idx):
        pos_triples = kg_dict[head]

        sample_neg_tails = []
        while True:
            if len(sample_neg_tails) == n_sample_neg_triples:
                break

            tail = np.random.randint(low=0, high=highest_neg_idx, size=1)[0]
            if (tail, relation) not in pos_triples and tail not in sample_neg_tails:
                sample_neg_tails.append(tail)
        return sample_neg_tails

    kg_dict = rec.data_kg.train_kg_dict
    exist_heads= kg_dict.keys()
    
    if batch_size <= len(exist_heads):
        batch_head = random.sample(exist_heads, batch_size)
    else:
        batch_head = [random.choice(exist_heads) for _ in range(batch_size)]

    batch_relation, batch_pos_tail, batch_neg_tail = [], [], []
    
    for h in batch_head:
        relation, pos_tail = sample_pos_triples_for_h(kg_dict, h, 1)
        batch_relation += relation
        batch_pos_tail += pos_tail

        neg_tail = sample_neg_triples_for_h(kg_dict, h, relation[0], 1, rec.data_kg.n_users_entities)
        batch_neg_tail += neg_tail
    
    batch_head = torch.LongTensor(batch_head).to(device)
    batch_relation = torch.LongTensor(batch_relation).to(device)
    batch_pos_tail = torch.LongTensor(batch_pos_tail).to(device)
    batch_neg_tail = torch.LongTensor(batch_neg_tail).to(device)
    
    return batch_head, batch_relation, batch_pos_tail, batch_neg_tail

In [12]:
def  train(train_model, rec, args):
    lst_train_losses = []
    lst_rec_losses = []
    lst_kg_losses = []
    lst_performances = []
    
    for ep in range(maxEpoch):
        train_losses = []
        cf_losses = []
        kg_losses = []

        train_model.train()

        cf_total_loss = 0

        n_cf_batch = int(rec.data.n_cf_train // batchSize + 1)
        n_kg_batch = int(rec.data_kg.n_kg_train // batchSizeKG + 1)
        
        for n in range(1,n_cf_batch+1):
            user_idx, pos_idx, neg_idx = next_batch_pairwise(rec, batchSize)
            cf_batch_loss = train_model.calc_cf_loss(user_idx, pos_idx, neg_idx)
            cf_batch_loss.backward()
            optimizer.step()
            optimizer.zero_grad()
            cf_total_loss  += cf_batch_loss.item()
            
            cf_losses.append(cf_batch_loss.item())

            if (n % 20) == 0:
                print('CF Training: Epoch {:04d} Iter {:04d} / {:04d} | Iter Loss {:.4f} | Iter Mean Loss {:.4f}'.format(ep, n, n_cf_batch, cf_batch_loss.item(), cf_total_loss / (n+1)))

        kg_total_loss = 0 

        for n in range(1, n_kg_batch+1):
            kg_batch_head, kg_batch_relation, kg_batch_pos_tail, kg_batch_neg_tail = next_batch_pairwise_kg(rec, batchSizeKG)
            # entity_emb = train_model(mode='kg')
            kg_batch_loss =  train_model.calc_kg_loss(kg_batch_head, kg_batch_relation, kg_batch_pos_tail, kg_batch_neg_tail)
            kg_batch_loss.backward()
            kg_optimizer.step()
            kg_optimizer.zero_grad()
            kg_total_loss += kg_batch_loss.item()
            
            kg_losses.append(kg_batch_loss.item())

            if (n % 20) == 0:
                print('KG Training: Epoch {:04d} Iter {:04d} / {:04d} | Iter Mean Loss {:.4f}'.format(ep, n, n_kg_batch, kg_batch_loss.item(), kg_total_loss / (n+1)))
        
        train_loss = np.mean(cf_losses) +  np.mean(kg_losses)
        cf_loss = np.mean(cf_losses)
        kg_loss = np.mean(kg_losses)
        
        scheduler_cf.step(cf_loss)
        scheduler_kg.step(kg_loss)
        
        lst_train_losses.append([ep, train_loss])
        lst_rec_losses.append([ep,cf_loss])
        lst_kg_losses.append([ep, kg_loss])

        # Evaluation
        train_model.eval()
        with torch.no_grad():
            ego_emb  = train_model()
            user_emb = ego_emb[list(rec.data_kg.userent.keys())]
            item_emb = ego_emb[list(rec.data_kg.itement.keys())]
            data_ep = rec.fast_evaluation(train_model, ep, user_emb, item_emb)
        lst_performances.append(data_ep)
        
    rec.save_loss(lst_train_losses, lst_rec_losses, lst_kg_losses)
    rec.save_perfomance_training(lst_performances)
    user_emb, item_emb = rec.best_user_emb, rec.best_item_emb
    return user_emb, item_emb

## Test

In [26]:
def test(rec, user_emb, item_emb):
    def process_bar(num, total):
        rate = float(num) / total
        ratenum = int(50 * rate)
        r = '\rProgress: [{}{}]{}%'.format('+' * ratenum, ' ' * (50 - ratenum), ratenum*2)
        sys.stdout.write(r)
        sys.stdout.flush()

    # predict
    rec_list = {}
    user_count = len(rec.data.test_set)
    for i, user in enumerate(rec.data.test_set):
        user_id  = rec.data_kg.u2id[user]
        score = torch.matmul(user_emb[user_id], item_emb.transpose(0, 1))
        candidates = score.cpu().numpy()

        # e_find_candidates = time.time()
        # print("Calculate candidates time: %f s" % (e_find_candidates - s_find_candidates))
        # predictedItems = denormalize(predictedItems, self.data.rScale[-1], self.data.rScale[0])
        rated_list, li = rec.data.user_rated(user)
        for item in rated_list:
            candidates[rec.data_kg.i2id[item]] = -10e8
        # s_find_k_largest = time.time()
        ids, scores = find_k_largest(rec.max_N, candidates)

        item_names = [rec.data_kg.id2i[iid] for iid in ids]
        rec_list[user] = list(zip(item_names, scores))
        if i % 1000 == 0:
            process_bar(i, user_count)
    process_bar(user_count, user_count)
    print('')
    rec.evaluate(rec_list)

## Main

In [27]:
model = 'KGAT'

config = ModelConf('./conf/' + model + '.conf')
lRates = [0.1, 0.01]
lRateKGs = [0.1, 0.01]
lrDecays = [0.9]
maxEpochs = [1000]
batchSizes = [2048]
batchSizeKGs = [10000]
nLayers = [1,2]
regs = [ 1e-5, 1e-3, 0.1]
regkgs = [ 1e-5, 1e-3, 0.1]
embeddingSizes = [128]
datasets = ['lastfm']
dataset = datasets[0]

In [15]:
training_data = FileIO.load_data_set('./dataset/' + dataset + '/' +config['training.set'], config['model.type'])
test_data = FileIO.load_data_set('./dataset/' + dataset + '/'  +config['test.set'], config['model.type'])
knowledge_set = FileIO.load_kg_data('./dataset/' + dataset +'/'+ dataset +'.kg')
data = Interaction(config, training_data, test_data)
data_kg = Knowledge(config, training_data, test_data, knowledge_set)

In [None]:
hyperparameters = [lRates, lRateKGs, lrDecays, maxEpochs, batchSizes, batchSizeKGs, nLayers, regs, regkgs, embeddingSizes, datasets]

for params in product(*hyperparameters):
    lRate, lRateKG, lrDecay, maxEpoch, batchSize, batchSizeKG, nLayer, reg, reg_kg, embeddingSize, dataset = params
    args = {
        'lr': lRate,
        'lr_kg': lRateKG,
        'max_epoch': maxEpoch,
        'batch_size': batchSize, 
        'lr_decay': lrDecay,
        'dataset': dataset,
        'n_layers': nLayer,
        'use_pretrain': 0,
        'input_dim': 32,
        'embed_dim': embeddingSize,
        'relation_dim': 32,
        'aggregation_type': 'bi-interaction',
        'mess_dropout': '[0.1, 0.1, 0.1]',
        'conv_dim_list': '[64, 32, 16]',
        'kg_l2loss_lambda': reg_kg,
        'cf_l2loss_lambda': reg
    }
    args['output_path'] =  f"./results/KGAT/{dataset}/@KGAT-inp_emb:{args['input_dim']}-emb:{args['embed_dim']}-bs:{args['batch_size']}-lr:{args['lr']}-n_layers:{args['n_layers']}/"
    rec = GraphRecommender(config, data, data_kg, knowledge_set, **args)
    train_model = KGAT(args, rec)
    optimizer  = torch.optim.Adam(train_model.parameters(), lr=lRate)
    kg_optimizer = torch.optim.Adam(train_model.parameters(), lr=lRateKG)
    scheduler_cf = ReduceLROnPlateau(optimizer, 'min', factor=lrDecay, patience=10)
    scheduler_kg = ReduceLROnPlateau(kg_optimizer, 'min', factor=lrDecay, patience=10)
    user_emb, item_emb = train(train_model, rec, args)   
    test(rec, user_emb, item_emb)

parameter ss_rate is not found in the configuration file!
CF Training: Epoch 0000 Iter 0020 / 0034 | Iter Loss 0.9084 | Iter Mean Loss 0.8627


since Python 3.9 and will be removed in a subsequent version.
  batch_head = random.sample(exist_heads, batch_size)


KG Training: Epoch 0000 Iter 0020 / 0216 | Iter Mean Loss 0.0308
KG Training: Epoch 0000 Iter 0040 / 0216 | Iter Mean Loss 0.0201
KG Training: Epoch 0000 Iter 0060 / 0216 | Iter Mean Loss 0.0176
KG Training: Epoch 0000 Iter 0080 / 0216 | Iter Mean Loss 0.0197
KG Training: Epoch 0000 Iter 0100 / 0216 | Iter Mean Loss 0.0201
KG Training: Epoch 0000 Iter 0120 / 0216 | Iter Mean Loss 0.0161
KG Training: Epoch 0000 Iter 0140 / 0216 | Iter Mean Loss 0.0164
KG Training: Epoch 0000 Iter 0160 / 0216 | Iter Mean Loss 0.0177
KG Training: Epoch 0000 Iter 0180 / 0216 | Iter Mean Loss 0.0170
KG Training: Epoch 0000 Iter 0200 / 0216 | Iter Mean Loss 0.0183
Evaluating the model...
Progress: [++++++++++++++++++++++++++++++++++++++++++++++++++]100%
Test time: 6.456246 s
Measure time: 0.014898 s
------------------------------------------------------------------------------------------------------------------------
Real-Time Ranking Performance  (Top-20 Item Recommendation)
*Current Performance*
Epoch: 1,

since Python 3.9 and will be removed in a subsequent version.
  batch_head = random.sample(exist_heads, batch_size)


KG Training: Epoch 0001 Iter 0020 / 0216 | Iter Mean Loss 0.0204
KG Training: Epoch 0001 Iter 0040 / 0216 | Iter Mean Loss 0.0205
KG Training: Epoch 0001 Iter 0060 / 0216 | Iter Mean Loss 0.0197
KG Training: Epoch 0001 Iter 0080 / 0216 | Iter Mean Loss 0.0243
KG Training: Epoch 0001 Iter 0100 / 0216 | Iter Mean Loss 0.0246
KG Training: Epoch 0001 Iter 0120 / 0216 | Iter Mean Loss 0.0247
KG Training: Epoch 0001 Iter 0140 / 0216 | Iter Mean Loss 0.0118
KG Training: Epoch 0001 Iter 0160 / 0216 | Iter Mean Loss 0.0145
KG Training: Epoch 0001 Iter 0180 / 0216 | Iter Mean Loss 0.0157
KG Training: Epoch 0001 Iter 0200 / 0216 | Iter Mean Loss 0.0244
Evaluating the model...
Progress: [++++++++++++++++++++++++++++++++++++++++++++++++++]100%
Test time: 3.390411 s
Measure time: 0.014851 s
------------------------------------------------------------------------------------------------------------------------
Real-Time Ranking Performance  (Top-20 Item Recommendation)
*Current Performance*
Epoch: 2,