In [1]:
import sys
import os
import random
import numpy as np
from collections import Counter

class MetaPathGenerator:
    def __init__(self):
        self.id_user = dict()
        self.user_id = dict()
        self.id_game = dict()
        self.game_id = dict()
        self.id_item = dict()
        self.item_id = dict()
        self.item_user = dict()
        self.user_item = dict()
        self.game_user = dict()
        self.user_game = dict()
        self.game_item = dict()
        self.item_game = dict()
        self.game_userlist = dict()
        self.user_gamelist = dict()
        self.game_itemlist = dict()
        self.item_gamelist = dict()
        self.user_itemlist = dict()
        self.item_userlist = dict()

    def read_data(self, dirpath):
        with open(dirpath + "/id_user.txt") as udictfile:
            for line in udictfile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    self.id_user[toks[0]] = toks[1].replace(" ", "")    # {uid1: User1, uid2: User2....}
                    self.user_id[toks[1]] = toks[0].replace(" ", "")    # {User1: uid1, User2: uid2....}

        print("#users", len(self.id_user))

        with open(dirpath + "/id_game.txt") as gdictfile:
            for line in gdictfile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    self.id_game[toks[0]] = toks[1].replace(" ", "")    # {33302: LoLsg, 33305: LoLmy....}
                    self.game_id[toks[1]] = toks[0].replace(" ", "")    # {LoLsg: 33302, LoLmy: 33305....}

        print("#games", len(self.id_game))
        
        with open(dirpath + "/id_item.txt") as idictfile:
            for line in idictfile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    self.id_item[toks[0]] = toks[1].replace(" ", "")    # {itemid1: Item1, itemid2: Item2....}
                    self.item_id[toks[1]] = toks[0].replace(" ", "")    # {Item1: itemid1, Item2: itemid2....}

        print("#items", len(self.id_item)) # comprehensive list of items

        with open(dirpath + "/game_user.txt") as gufile:  # data pulled from user game activity for 20 days
            for line in gufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    g, u = toks[0], toks[1]
                    if g not in self.game_user:
                        self.game_user[g] = []
                    self.game_user[g].append(u)   # {gameid1: [uid1,uid2,...], gameid2: [uid2,uid3,....]} = dict of list of users playing games
                    if u not in self.user_game:
                        self.user_game[u] = []
                    self.user_game[u].append(g)   # {uid1: [gameid1,gameid2,...], uid2: [gameid2,gameid3,....]} = dict of list of games played by each user
        sample_items = {k: self.user_game[k] for k in list(self.user_game)[:20]}
        print(sample_items)
        print("\n")
        
        
        with open(dirpath + "/item_user.txt") as iufile:  # list of all items purchased by users
            for line in iufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, u = toks[0], toks[1]
                    if i not in self.item_user:
                        self.item_user[i] = []
                    self.item_user[i].append(u)   # {itemid1: [uid1,uid2,...], itemid2: [uid2,uid3,....]} = dict of list of users purchasing each item 
                    if u not in self.user_item:
                        self.user_item[u] = []
                    self.user_item[u].append(i)   # {uid1: [itemid1,itemid2,...], uid2: [itemd2,itemd3,....]} = dict of list of items purchased by each user
        sample_items = {k: self.user_item[k] for k in list(self.user_item)[:20]}
        print(sample_items)
        print("\n")
        
        
        with open(dirpath + "/item_game.txt") as igfile:  # list of all in-game items
            for line in igfile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, g = toks[0], toks[1]
                    if i not in self.item_game:
                        self.item_game[i] = []  
                    self.item_game[i].append(g) # {itemid1: [gameid1,gameid2,...], itemid2: [gameid2,gameid3,....]} = list of items that got recommended. Every item can be in multiple games
                    if g not in self.game_item:
                        self.game_item[g] = []
                    self.game_item[g].append(i) # {gameid1: [itemid1,itemid2,...], gameid2: [itemd2,itemd3,....]} = list of items available in-game for purchase
        sample_items = {k: self.item_game[k] for k in list(self.item_game)[:20]}
        print(sample_items)
        print("\n")
        
                            

    def generate_random_ugigu(self, outfilename, numwalks, walklength):
        for game in self.game_user:
            self.game_itemlist[game] = []
            self.game_userlist[game] = []
            for item in self.game_item[game]:
                if item in self.item_user:
                    for user in self.item_user[item]:
                        self.game_itemlist[game].append(item)  # list of items purchased within game
                        self.game_userlist[game].append(user)  # list of users in game who have purchased items
                        if item not in self.item_gamelist:
                            self.item_gamelist[item] = []
                        self.item_gamelist[item].append(game)  # list of games for each purchasable item(key)
                        if user not in self.user_gamelist:
                            self.user_gamelist[user] = []
                        self.user_gamelist[user].append(game)  # list of games played by users(key) who purchased items in-game
        outfile = open(outfilename, 'w')
        print("Number of users in consideration set:", len(self.user_game))
        for user in self.user_game:                        # to capture a user from all user-game activity
            user0 = user
            for i in range(0, numwalks):
                outline = self.id_user[user0]
                for j in range(0, walklength):
                    games = self.user_game[user]           # selecting games for user 'i'
                    numg = len(games)
                    gameid = random.randrange(numg)
                    game = games[gameid]
                    outline += " " + self.id_game[game]
                    items = self.game_itemlist[game]       # selecting all purchasable items within the randomly chosen game
                    numi = len(items)
                    itemid = random.randrange(numi)
                    item = items[itemid]
                    outline += " " + self.id_item[item]
                    games = self.item_gamelist[item]       # selecting all games that a particular purchased item is available in
                    numg = len(games)
                    gameid = random.randrange(numg)
                    game = games[gameid]
                    outline += " " + self.id_game[game]
                    users = self.game_user[game]           # selecting all users for a randomly chosen game
                    numu = len(users)
                    userid = random.randrange(numu)
                    user = users[userid]
                    outline += " " + self.id_user[user]
                outfile.write(outline + "\n")
        outfile.close()
        
    def generate_random_uigiu(self, outfilename, numwalks, walklength):
        for game in self.game_user:
            self.game_itemlist[game] = []
            self.game_userlist[game] = []
            for item in self.game_item[game]:
                if item in self.item_user:
                    for user in self.item_user[item]:
                        self.game_itemlist[game].append(item)  # list of items purchased within game
                        self.game_userlist[game].append(user)  # list of users in game who have purchased items
                        if item not in self.item_gamelist:
                            self.item_gamelist[item] = []
                        self.item_gamelist[item].append(game)  # list of games for each purchasable item(key)
                        if user not in self.user_gamelist:
                            self.user_gamelist[user] = []
                        self.user_gamelist[user].append(game)  # list of games played by users(key) who purchased items in-game
        outfile = open(outfilename, 'w')
        for user in self.user_item:                            # to capture a user from all user-item activity
            user0 = user
            for i in range(0, numwalks):
                outline = self.id_user[user0]
                for j in range(0, walklength):
                    items = self.user_item[user]               # selecting items purchased by user 'i'
                    numi = len(items)
                    itemid = random.randrange(numi)
                    item = items[itemid]
                    outline += " " + self.id_item[item]
                    games = self.item_gamelist[item]           # selecting all games containing the randomly purchased item
                    numg = len(games)
                    gameid = random.randrange(numg)
                    game = games[gameid]
                    outline += " " + self.id_game[game]
                    items = self.game_itemlist[game]           # selecting all items that got purchased within the game
                    numi = len(items)
                    itemid = random.randrange(numi)
                    item = items[itemid]
                    outline += " " + self.id_item[item]
                    users = self.item_user[item]               # selecting all users for a randomly purchased item
                    numu = len(users)
                    userid = random.randrange(numu)
                    user = users[userid]
                    outline += " " + self.id_user[user]
                outfile.write(outline + "\n")
        outfile.close()
        
                                
#python py4genMetaPaths.py 1000 100 net_aminer output.aminer.w1000.l100.txt
#python py4genMetaPaths.py 1000 100 net_dbis   output.dbis.w1000.l100.txt

#numwalks = int(sys.argv[1])
#walklength = int(sys.argv[2])
#dirpath = sys.argv[3]
#outfilename = sys.argv[4]

numwalks = 50
walklength = 20
dirpath = 'C:/Vidit/PhD/RA Work/KZ - RA/Gaming Project/data/Files'
outfilename1 = 'C:/Vidit/PhD/RA Work/KZ - RA/Gaming Project/data/Latent Vector Representation/Gaming_input_metapath_UGIGU_w50_l20.txt'
outfilename2 = 'C:/Vidit/PhD/RA Work/KZ - RA/Gaming Project/data/Latent Vector Representation/Gaming_input_metapath_UIGIU_w50_l20.txt'



def main():
    mpg = MetaPathGenerator()
    mpg.read_data(dirpath)
    #mpg.generate_random_ugigu(outfilename1, numwalks, walklength)
    #mpg.generate_random_uigiu(outfilename2, numwalks, walklength)
    print("\nSuccess")


if __name__ == "__main__":
    main()

#users 1496096
#games 22
#items 3825
{'42465': ['33300'], '42535': ['33300'], '48889': ['33300'], '50613': ['33300', '33306'], '50643': ['33300', '33323'], '54583': ['33300'], '58043': ['33300'], '71509': ['33300', '33321'], '76127': ['33300'], '80377': ['33300'], '84133': ['33300'], '89961': ['33300', '33305', '33332'], '94437': ['33300', '33318'], '94577': ['33300', '33321', '33323'], '94629': ['33300', '33305'], '96461': ['33300'], '98003': ['33300'], '101481': ['33300', '33321', '33323'], '102363': ['33300', '33302', '33304'], '102763': ['33300']}


{'43639': ['89', '9001'], '44765': ['lolvn001', 'lolvn002'], '45121': ['MSTAR001', 'MSTAR004', 'MSTAR006'], '45693': ['tw_18_5'], '47495': ['loltw004'], '47503': ['145', '151', '2', '239', '43', '57', '9000', '9001', '9002'], '52609': ['lolvn003'], '54583': ['package_1', 'package_2', 'package_4'], '61621': ['tw_18_5'], '61647': ['RP001', 'RP002', 'RP003'], '64165': ['tw_18_1', 'tw_18_5'], '66201': ['15325'], '67707': ['lolvn002', 'lolvn

In [2]:
import sys
import os
import random
import numpy as np
from collections import Counter

class RecommendVirtualItems:
    def __init__(self):
        self.id_user = dict()
        self.user_id = dict()
        self.id_game = dict()
        self.game_id = dict()
        self.id_item = dict()
        self.item_id = dict()
        self.item_user = dict()
        self.user_item = dict()
        self.game_user = dict()
        self.user_game = dict()
        self.game_item = dict()
        self.item_game = dict()
        self.game_userlist = dict()
        self.user_gamelist = dict()
        self.game_itemlist = dict()
        self.item_gamelist = dict()
        self.user_itemlist = dict()
        self.item_userlist = dict()
        self.test2days_item_user = dict()
        self.test2days_user_item = dict()
        self.test4days_item_user = dict()
        self.test4days_user_item = dict()
        self.test10days_item_user = dict()
        self.test10days_user_item = dict()
        self.user_recitems = dict()
        self.nuser_recitems = dict()
        self.userlist = []
        self.selected_user_items = dict()

    def read_data(self, dirpath):
        with open(dirpath + "/id_user.txt") as udictfile:
            for line in udictfile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    self.id_user[toks[0]] = toks[1].replace(" ", "")    # {uid1: User1, uid2: User2....}
                    self.user_id[toks[1]] = toks[0].replace(" ", "")    # {User1: uid1, User2: uid2....}

        print("#users", len(self.id_user))

        with open(dirpath + "/id_game.txt") as gdictfile:
            for line in gdictfile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    self.id_game[toks[0]] = toks[1].replace(" ", "")    # {33302: LoLsg, 33305: LoLmy....}
                    self.game_id[toks[1]] = toks[0].replace(" ", "")    # {LoLsg: 33302, LoLmy: 33305....}

        print("#games", len(self.id_game))
        
        with open(dirpath + "/id_item.txt") as idictfile:
            for line in idictfile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    self.id_item[toks[0]] = toks[1].replace(" ", "")    # {itemid1: Item1, itemid2: Item2....}
                    self.item_id[toks[1]] = toks[0].replace(" ", "")    # {Item1: itemid1, Item2: itemid2....}

        print("#items", len(self.id_item)) # comprehensive list of items

        with open(dirpath + "/game_user.txt") as gufile:  # data pulled from user game activity for 20 days
            for line in gufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    g, u = toks[0], toks[1]
                    if g not in self.game_user:
                        self.game_user[g] = []
                    self.game_user[g].append(u)   # {gameid1: [uid1,uid2,...], gameid2: [uid2,uid3,....]} = dict of list of users playing games
                    if u not in self.user_game:
                        self.user_game[u] = []
                    self.user_game[u].append(g)   # {uid1: [gameid1,gameid2,...], uid2: [gameid2,gameid3,....]} = dict of list of games played by each user
        sample_items = {k: self.user_game[k] for k in list(self.user_game)[:20]}
        print(sample_items)
        print("\n")
        
        
        with open(dirpath + "/item_user.txt") as iufile:  # list of all items purchased by users
            for line in iufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, u = toks[0], toks[1]
                    if i not in self.item_user:
                        self.item_user[i] = []
                    self.item_user[i].append(u)   # {itemid1: [uid1,uid2,...], itemid2: [uid2,uid3,....]} = dict of list of users purchasing each item 
                    if u not in self.user_item:
                        self.user_item[u] = []
                    self.user_item[u].append(i)   # {uid1: [itemid1,itemid2,...], uid2: [itemd2,itemd3,....]} = dict of list of items purchased by each user
        sample_items = {k: self.user_item[k] for k in list(self.user_item)[:20]}
        print(sample_items)
        print("\n")
        
        
        with open(dirpath + "/item_game.txt") as igfile:  # list of all in-game items
            for line in igfile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, g = toks[0], toks[1]
                    if i not in self.item_game:
                        self.item_game[i] = []  
                    self.item_game[i].append(g) # {itemid1: [gameid1,gameid2,...], itemid2: [gameid2,gameid3,....]} = list of items that got recommended. Every item can be in multiple games
                    if g not in self.game_item:
                        self.game_item[g] = []
                    self.game_item[g].append(i) # {gameid1: [itemid1,itemid2,...], gameid2: [itemd2,itemd3,....]} = list of items available in-game for purchase
        sample_items = {k: self.item_game[k] for k in list(self.item_game)[:20]}
        print(sample_items)
        print("\n")
        
        with open(dirpath + "/test2days_item_user.txt") as iufile:  # list of all items purchased by users in 2 days of testing period
            for line in iufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, u = toks[0], toks[1]
                    if i not in self.test2days_item_user:
                        self.test2days_item_user[i] = []
                    self.test2days_item_user[i].append(u)   # {itemid1: [uid1,uid2,...], itemid2: [uid2,uid3,....]} = dict of list of users purchasing each item 
                    if u not in self.test2days_user_item:
                        self.test2days_user_item[u] = []
                    self.test2days_user_item[u].append(i)   # {uid1: [itemid1,itemid2,...], uid2: [itemd2,itemd3,....]} = dict of list of items purchased by each user
        print("No of users who bought items in first 2 days of test period:", len(self.test2days_user_item))
        
        with open(dirpath + "/test4days_item_user.txt") as iufile:  # list of all items purchased by users in 4 days of testing period
            for line in iufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, u = toks[0], toks[1]
                    if i not in self.test4days_item_user:
                        self.test4days_item_user[i] = []
                    self.test4days_item_user[i].append(u)   # {itemid1: [uid1,uid2,...], itemid2: [uid2,uid3,....]} = dict of list of users purchasing each item 
                    if u not in self.test4days_user_item:
                        self.test4days_user_item[u] = []
                    self.test4days_user_item[u].append(i)   # {uid1: [itemid1,itemid2,...], uid2: [itemd2,itemd3,....]} = dict of list of items purchased by each user
        print("No of users who bought items in first 4 days of test period:", len(self.test4days_user_item))
        
        with open(dirpath + "/test10days_item_user.txt") as iufile:  # list of all items purchased by users in 10 days of testing period
            for line in iufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, u = toks[0], toks[1]
                    if i not in self.test10days_item_user:
                        self.test10days_item_user[i] = []
                    self.test10days_item_user[i].append(u)   # {itemid1: [uid1,uid2,...], itemid2: [uid2,uid3,....]} = dict of list of users purchasing each item 
                    if u not in self.test10days_user_item:
                        self.test10days_user_item[u] = []
                    self.test10days_user_item[u].append(i)   # {uid1: [itemid1,itemid2,...], uid2: [itemd2,itemd3,....]} = dict of list of items purchased by each user
        print("No of users who bought items in first 10 days of test period:", len(self.test10days_user_item))
        
        with open(dirpath + "/selected_user.txt") as sufile:  # pull the randomly selected users
            for user in sufile:
                self.userlist.append(user.strip())
                    

            
    def read_latent_output(self, outputfilename):
        self.user_vec = dict()
        self.game_vec = dict()
        self.item_vec = dict()
        with open(outputfilename) as nvfile:             
            for line in nvfile:
                toks = line.strip().split(" ")
                if len(toks) == 129:
                    node = toks[0]
                    del toks[0]
                    if node[0] == 'u': 
                        self.user_vec[self.user_id[node]] = toks    # {uid1: [user_vector_values], uid2: [user_vector_values]..}
                    elif node[0] == 'g':
                        self.game_vec[self.game_id[node]] = toks    # {gameid1: [game_vector_values], gameid2: [game_vector_values]..}
                    elif node[0] == 'i':
                        self.item_vec[self.item_id[node]] = toks    # {itemid1: [item_vector_values], itemid2: [item_vector_values]..}
        sample_items = {k: self.user_vec[k] for k in list(self.user_vec)[:2]}
        print(sample_items)
        print("\n")
        sample_items = {k: self.game_vec[k] for k in list(self.game_vec)[:2]}
        print(sample_items)
        print("\n")
        sample_items = {k: self.item_vec[k] for k in list(self.item_vec)[:2]}
        print(sample_items)
        print("\n")
        
    def get_cosine_similarity(self, vec1, vec2):
        a = np.array(vec1, dtype=float)
        b = np.array(vec2, dtype=float)
        dot_product = np.dot(a, b)
        norm_a = np.linalg.norm(a)
        norm_b = np.linalg.norm(b)
        return dot_product / (norm_a * norm_b)
        
    def make_item_recommendations_selected_users(self):
        self.user_recitems = dict()
        for user in self.userlist:
            uservec = self.user_vec.get(user,[])
            if (uservec != []): 
                item_simscore = {}
                for item in self.item_vec:
                    itemvec = self.item_vec.get(item, [])
                    cosine_sim_score = round(self.get_cosine_similarity(uservec, itemvec), 3)
                    item_simscore[item] = cosine_sim_score
                l = sorted(item_simscore.items(), key=lambda x: x[1], reverse = True)
                top_five_items = [item for item,_ in l[0:5]]
                self.user_recitems[user] = top_five_items
        sample_items = {k: self.user_recitems[k] for k in list(self.user_recitems)[:20]}
        print(sample_items)
        print("\n")
        
        
    def make_first_alternate_item_recommendations_selected_users(self):
        self.user_recitems = dict()
        print("\n First Alternate Approach\n")
        print("#Number of users randomly selected:", len(self.userlist))
        print("\n")
        for user in self.userlist:
            items = self.user_item.get(user,[])
            if len(items) > 0:
                item_simscore = {}
                for item1 in items:
                    item1vec = self.item_vec[item1]
                    for item2 in self.item_vec:
                        if (item1 == item2): continue
                        item2vec = self.item_vec[item2]
                        cosine_sim_score = round(self.get_cosine_similarity(item1vec, item2vec), 3)
                        item_simscore[item2] = cosine_sim_score
                l = sorted(item_simscore.items(), key=lambda x: x[1], reverse = True)
                top_five_items = [item for item,_ in l[0:5]]
                self.user_recitems[user] = top_five_items
        sample_items = {k: self.user_recitems[k] for k in list(self.user_recitems)[:20]}
        print(sample_items)
        print("\n")
        
    def make_second_alternate_item_recommendations_selected_users(self):
        self.user_recitems = dict()
        print("\n Second Alternate Approach\n")
        print("#Number of users randomly selected:", len(self.userlist))
        print("\n")
        for user1 in self.userlist:                             # iterating through 10,000 randomly selected users
            user1vec = self.user_vec.get(user1,[])              # getting the latent vector representation
            if (user1vec != []):
                user_simscore = {}
                user_rec = {}
                items1 = self.user_item.get(user1,[])           # getting the items purchased
                for user2 in self.user_item:                    # iterating through other users in the gaming network
                    user2vec = self.user_vec.get(user2,[])      # getting the latent vector representation
                    if (user2vec != []):
                        items2 = self.user_item[user2]          # getting the items purchased by these other users
                        if (user1 == user2): continue
                        cosine_sim_score = round(self.get_cosine_similarity(user1vec, user2vec), 3)  # getting user-user cosine similarity
                        user_simscore[user2] = cosine_sim_score # storing similarity values 
                        rec_items = [item for item in items2 if item not in items1]  # getting recommended items via each of these other users
                        user_rec[user2] = rec_items             # storing recommended items 
                l = sorted(user_simscore.items(), key=lambda x: x[1], reverse = True)
                top_five_users = [user for user,_ in l[0:5]]    # getting the top 5 most similar users to the focal user
                top_five_users_items = set()                    # now we fetch the recommended items for these top users
                for user in top_five_users:
                    items = user_rec[user]
                    for item in items:
                        top_five_users_items.add(item)
                self.user_recitems[user1] = list(top_five_users_items)
        sample_items = {k: self.user_recitems[k] for k in list(self.user_recitems)[:20]}
        print(sample_items)
        print("\n")
        
    def get_accuracy_score(self):
        count_2days = count_4days = count_10days = 0
        for user in self.user_recitems:
            recommended_items = set(self.user_recitems.get(user,[]))
            new_items_2days = set(self.test2days_user_item.get(user,[]))
            new_items_4days = set(self.test4days_user_item.get(user,[]))
            new_items_10days = set(self.test10days_user_item.get(user,[]))
            if len(recommended_items & new_items_2days) > 0:
                count_2days += 1
            if len(recommended_items & new_items_4days) > 0:
                count_4days += 1
            if len(recommended_items & new_items_10days) > 0:
                count_10days += 1
        print("Accuracy for UGIGU Metapath based Recommender System")
        print("For a 2-day testing period:", count_2days/10000)
        print("For a 4-day testing period:", count_4days/10000)
        print("For a 10-day testing period:", count_10days/10000)
                            
                
dirpath = 'C:/Vidit/PhD/RA Work/KZ - RA/Gaming Project/data/Files'
outputfilename = 'C:/Vidit/PhD/RA Work/KZ - RA/Gaming Project/data/Latent Vector Representation/Gaming_output_metapath_UGIGU_w50_l20.txt'


def main():
    rvi = RecommendVirtualItems()
    rvi.read_data(dirpath)
    rvi.read_latent_output(outputfilename)
    rvi.make_item_recommendations_selected_users()
    rvi.get_accuracy_score()
    rvi.make_first_alternate_item_recommendations_selected_users()
    rvi.get_accuracy_score()
    rvi.make_second_alternate_item_recommendations_selected_users()
    rvi.get_accuracy_score()

if __name__ == "__main__":
    main()

#users 1496096
#games 22
#items 3825
{'42465': ['33300'], '42535': ['33300'], '48889': ['33300'], '50613': ['33300', '33306'], '50643': ['33300', '33323'], '54583': ['33300'], '58043': ['33300'], '71509': ['33300', '33321'], '76127': ['33300'], '80377': ['33300'], '84133': ['33300'], '89961': ['33300', '33305', '33332'], '94437': ['33300', '33318'], '94577': ['33300', '33321', '33323'], '94629': ['33300', '33305'], '96461': ['33300'], '98003': ['33300'], '101481': ['33300', '33321', '33323'], '102363': ['33300', '33302', '33304'], '102763': ['33300']}


{'43639': ['89', '9001'], '44765': ['lolvn001', 'lolvn002'], '45121': ['MSTAR001', 'MSTAR004', 'MSTAR006'], '45693': ['tw_18_5'], '47495': ['loltw004'], '47503': ['145', '151', '2', '239', '43', '57', '9000', '9001', '9002'], '52609': ['lolvn003'], '54583': ['package_1', 'package_2', 'package_4'], '61621': ['tw_18_5'], '61647': ['RP001', 'RP002', 'RP003'], '64165': ['tw_18_1', 'tw_18_5'], '66201': ['15325'], '67707': ['lolvn002', 'lolvn

{'11493857': ['15421', '16689', '15158', '15097', '16530'], '10842607': ['tw_19_1', 'tw_16_4', 'tw_16_1', '14992', 'tw_19_4'], '6540767': ['CISLUCK11', 'package_10', 'CISLUCK5', 'package_9', 'fo3id004'], '11722909': ['tw_16_1', 'tw_18_1', 'tw_16_3', 'tw_16_4', 'tw_18_4'], '33909557': ['tw_19_1', 'tw_19_4', 'tw_19_5', '15197', 'tw_19_2'], '135957': ['15534', '15197', '15313', '16504', '16646'], '11047793': ['15197', '15189', '15214', '16578', '15365'], '43444903': ['12772', '13155', '13723', '13101', '13538'], '16912887': ['RP007', 'RP008', '63', '69', '22'], '37896045': ['13636', '12237', '13695', '14131', '14082'], '14309347': ['lolth007', 'lolth005', 'lolth006', 'lolth002', 'lolth003'], '23027837': ['tw_15_2', '15544', '15158', '15261', '15455'], '6387099': ['15453', '15543', '15022', '15522', '15012'], '7990961': ['elph006', 'RP007', 'elph007', 'elph004', 'elph005'], '31866147': ['15522', '15543', 'tw_15_2', '15008', '15565'], '33673571': ['RP007', 'RP008', '69', '22', '63'], '38494

In [3]:
import sys
import os
import random
import numpy as np
from collections import Counter

class RecommendVirtualItems:
    def __init__(self):
        self.id_user = dict()
        self.user_id = dict()
        self.id_game = dict()
        self.game_id = dict()
        self.id_item = dict()
        self.item_id = dict()
        self.item_user = dict()
        self.user_item = dict()
        self.game_user = dict()
        self.user_game = dict()
        self.game_item = dict()
        self.item_game = dict()
        self.game_userlist = dict()
        self.user_gamelist = dict()
        self.game_itemlist = dict()
        self.item_gamelist = dict()
        self.user_itemlist = dict()
        self.item_userlist = dict()
        self.test2days_item_user = dict()
        self.test2days_user_item = dict()
        self.test4days_item_user = dict()
        self.test4days_user_item = dict()
        self.test10days_item_user = dict()
        self.test10days_user_item = dict()
        self.user_recitems = dict()
        self.nuser_recitems = dict()
        self.userlist = []
        self.selected_user_items = dict()

    def read_data(self, dirpath):
        with open(dirpath + "/id_user.txt") as udictfile:
            for line in udictfile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    self.id_user[toks[0]] = toks[1].replace(" ", "")    # {uid1: User1, uid2: User2....}
                    self.user_id[toks[1]] = toks[0].replace(" ", "")    # {User1: uid1, User2: uid2....}

        print("#users", len(self.id_user))

        with open(dirpath + "/id_game.txt") as gdictfile:
            for line in gdictfile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    self.id_game[toks[0]] = toks[1].replace(" ", "")    # {33302: LoLsg, 33305: LoLmy....}
                    self.game_id[toks[1]] = toks[0].replace(" ", "")    # {LoLsg: 33302, LoLmy: 33305....}

        print("#games", len(self.id_game))
        
        with open(dirpath + "/id_item.txt") as idictfile:
            for line in idictfile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    self.id_item[toks[0]] = toks[1].replace(" ", "")    # {itemid1: Item1, itemid2: Item2....}
                    self.item_id[toks[1]] = toks[0].replace(" ", "")    # {Item1: itemid1, Item2: itemid2....}

        print("#items", len(self.id_item)) # comprehensive list of items

        with open(dirpath + "/game_user.txt") as gufile:  # data pulled from user game activity for 20 days
            for line in gufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    g, u = toks[0], toks[1]
                    if g not in self.game_user:
                        self.game_user[g] = []
                    self.game_user[g].append(u)   # {gameid1: [uid1,uid2,...], gameid2: [uid2,uid3,....]} = dict of list of users playing games
                    if u not in self.user_game:
                        self.user_game[u] = []
                    self.user_game[u].append(g)   # {uid1: [gameid1,gameid2,...], uid2: [gameid2,gameid3,....]} = dict of list of games played by each user
        sample_items = {k: self.user_game[k] for k in list(self.user_game)[:20]}
        print(sample_items)
        print("\n")
        
        
        with open(dirpath + "/item_user.txt") as iufile:  # list of all items purchased by users
            for line in iufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, u = toks[0], toks[1]
                    if i not in self.item_user:
                        self.item_user[i] = []
                    self.item_user[i].append(u)   # {itemid1: [uid1,uid2,...], itemid2: [uid2,uid3,....]} = dict of list of users purchasing each item 
                    if u not in self.user_item:
                        self.user_item[u] = []
                    self.user_item[u].append(i)   # {uid1: [itemid1,itemid2,...], uid2: [itemd2,itemd3,....]} = dict of list of items purchased by each user
        sample_items = {k: self.user_item[k] for k in list(self.user_item)[:20]}
        print(sample_items)
        print("\n")
        
        
        with open(dirpath + "/item_game.txt") as igfile:  # list of all in-game items
            for line in igfile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, g = toks[0], toks[1]
                    if i not in self.item_game:
                        self.item_game[i] = []  
                    self.item_game[i].append(g) # {itemid1: [gameid1,gameid2,...], itemid2: [gameid2,gameid3,....]} = list of items that got recommended. Every item can be in multiple games
                    if g not in self.game_item:
                        self.game_item[g] = []
                    self.game_item[g].append(i) # {gameid1: [itemid1,itemid2,...], gameid2: [itemd2,itemd3,....]} = list of items available in-game for purchase
        sample_items = {k: self.item_game[k] for k in list(self.item_game)[:20]}
        print(sample_items)
        print("\n")
        
        with open(dirpath + "/test2days_item_user.txt") as iufile:  # list of all items purchased by users in 2 days of testing period
            for line in iufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, u = toks[0], toks[1]
                    if i not in self.test2days_item_user:
                        self.test2days_item_user[i] = []
                    self.test2days_item_user[i].append(u)   # {itemid1: [uid1,uid2,...], itemid2: [uid2,uid3,....]} = dict of list of users purchasing each item 
                    if u not in self.test2days_user_item:
                        self.test2days_user_item[u] = []
                    self.test2days_user_item[u].append(i)   # {uid1: [itemid1,itemid2,...], uid2: [itemd2,itemd3,....]} = dict of list of items purchased by each user
        print("No of users who bought items in first 2 days of test period:", len(self.test2days_user_item))
        
        with open(dirpath + "/test4days_item_user.txt") as iufile:  # list of all items purchased by users in 4 days of testing period
            for line in iufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, u = toks[0], toks[1]
                    if i not in self.test4days_item_user:
                        self.test4days_item_user[i] = []
                    self.test4days_item_user[i].append(u)   # {itemid1: [uid1,uid2,...], itemid2: [uid2,uid3,....]} = dict of list of users purchasing each item 
                    if u not in self.test4days_user_item:
                        self.test4days_user_item[u] = []
                    self.test4days_user_item[u].append(i)   # {uid1: [itemid1,itemid2,...], uid2: [itemd2,itemd3,....]} = dict of list of items purchased by each user
        print("No of users who bought items in first 4 days of test period:", len(self.test4days_user_item))
        
        with open(dirpath + "/test10days_item_user.txt") as iufile:  # list of all items purchased by users in 10 days of testing period
            for line in iufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, u = toks[0], toks[1]
                    if i not in self.test10days_item_user:
                        self.test10days_item_user[i] = []
                    self.test10days_item_user[i].append(u)   # {itemid1: [uid1,uid2,...], itemid2: [uid2,uid3,....]} = dict of list of users purchasing each item 
                    if u not in self.test10days_user_item:
                        self.test10days_user_item[u] = []
                    self.test10days_user_item[u].append(i)   # {uid1: [itemid1,itemid2,...], uid2: [itemd2,itemd3,....]} = dict of list of items purchased by each user
        print("No of users who bought items in first 10 days of test period:", len(self.test10days_user_item))
        
        with open(dirpath + "/selected_user.txt") as sufile:  # pull the randomly selected users
            for user in sufile:
                self.userlist.append(user.strip())
                    
           
    def read_latent_output(self, outputfilename):
        self.user_vec = dict()
        self.game_vec = dict()
        self.item_vec = dict()
        with open(outputfilename) as nvfile:             
            for line in nvfile:
                toks = line.strip().split(" ")
                if len(toks) == 129:
                    node = toks[0]
                    del toks[0]
                    if node[0] == 'u': 
                        self.user_vec[self.user_id[node]] = toks    # {uid1: [user_vector_values], uid2: [user_vector_values]..}
                    elif node[0] == 'g':
                        self.game_vec[self.game_id[node]] = toks    # {gameid1: [game_vector_values], gameid2: [game_vector_values]..}
                    elif node[0] == 'i':
                        self.item_vec[self.item_id[node]] = toks    # {itemid1: [item_vector_values], itemid2: [item_vector_values]..}
        sample_items = {k: self.user_vec[k] for k in list(self.user_vec)[:2]}
        print(sample_items)
        print("\n")
        sample_items = {k: self.game_vec[k] for k in list(self.game_vec)[:2]}
        print(sample_items)
        print("\n")
        sample_items = {k: self.item_vec[k] for k in list(self.item_vec)[:2]}
        print(sample_items)
        print("\n")
        
    def get_cosine_similarity(self, vec1, vec2):
        a = np.array(vec1, dtype=float)
        b = np.array(vec2, dtype=float)
        dot_product = np.dot(a, b)
        norm_a = np.linalg.norm(a)
        norm_b = np.linalg.norm(b)
        return dot_product / (norm_a * norm_b)
        
    def make_item_recommendations_selected_users(self):
        self.user_recitems = dict()
        for user in self.userlist:                                 # iterating through 10,000 randomly selected users
            uservec = self.user_vec.get(user,[])                   # getting the latent vector representation
            if (uservec != []): 
                item_simscore = {}
                for item in self.item_vec:                         # iterating through all items in item_vec
                    itemvec = self.item_vec.get(item, [])
                    cosine_sim_score = round(self.get_cosine_similarity(uservec, itemvec), 3) #getting similarity score between a user and a virtual item, as users associate their social status on the gaming platform through purchase of items with high social status
                    item_simscore[item] = cosine_sim_score         # storing similarity scores of each item with the iterated user
                l = sorted(item_simscore.items(), key=lambda x: x[1], reverse = True)  # sorting through the dictionary
                top_five_items = [item for item,_ in l[0:5]]       # getting the top 5 items
                self.user_recitems[user] = top_five_items
        sample_items = {k: self.user_recitems[k] for k in list(self.user_recitems)[:20]}
        print(sample_items)
        print("\n")
        
        
    def make_first_alternate_item_recommendations_selected_users(self):
        self.user_recitems = dict()
        print("\n First Alternate Approach\n")
        print("#Number of users randomly selected:", len(self.userlist))
        print("\n")
        for user in self.userlist:                               # iterating through 10,000 randomly selected users
            items = self.user_item.get(user,[])                  # getting the items purchased by these users during the training period
            if len(items) > 0:
                item_simscore = {}
                for item1 in items:                              # iterating through the purchased items
                    item1vec = self.item_vec[item1]              # getting the vector representation  
                    for item2 in self.item_vec:                  # iterating through all available items in item_vec
                        if (item1 == item2): continue            # we don't want to recommend the item already purchased
                        item2vec = self.item_vec[item2]          # getting the vector representation
                        cosine_sim_score = round(self.get_cosine_similarity(item1vec, item2vec), 3)
                        item_simscore[item2] = cosine_sim_score  # storing similarity scores of each item
                l = sorted(item_simscore.items(), key=lambda x: x[1], reverse = True)
                top_five_items = [item for item,_ in l[0:5]]     # getting the top 5 items
                self.user_recitems[user] = top_five_items
        sample_items = {k: self.user_recitems[k] for k in list(self.user_recitems)[:20]}
        print(sample_items)
        print("\n")
        
    def make_second_alternate_item_recommendations_selected_users(self):
        self.user_recitems = dict()
        print("\n Second Alternate Approach\n")
        print("#Number of users randomly selected:", len(self.userlist))
        print("\n")
        for user1 in self.userlist:                             # iterating through 10,000 randomly selected users
            user1vec = self.user_vec.get(user1,[])              # getting the latent vector representation
            if (user1vec != []):
                user_simscore = {}
                user_rec = {}
                items1 = self.user_item.get(user1,[])           # getting the items purchased
                for user2 in self.user_item:                    # iterating through other users in the gaming network
                    user2vec = self.user_vec.get(user2,[])      # getting the latent vector representation
                    if (user2vec != []):
                        items2 = self.user_item[user2]          # getting the items purchased by these other users
                        if (user1 == user2): continue
                        cosine_sim_score = round(self.get_cosine_similarity(user1vec, user2vec), 3)  # getting user-user cosine similarity
                        user_simscore[user2] = cosine_sim_score # storing similarity values 
                        rec_items = [item for item in items2 if item not in items1]  # getting recommended items via each of these other users
                        user_rec[user2] = rec_items             # storing recommended items 
                l = sorted(user_simscore.items(), key=lambda x: x[1], reverse = True)
                top_five_users = [user for user,_ in l[0:5]]    # getting the top 5 most similar users to the focal user
                top_five_users_items = set()                    # now we fetch the recommended items for these top users
                for user in top_five_users:
                    items = user_rec[user]
                    for item in items:
                        top_five_users_items.add(item)
                self.user_recitems[user1] = list(top_five_users_items)
        sample_items = {k: self.user_recitems[k] for k in list(self.user_recitems)[:20]}
        print(sample_items)
        print("\n")
        
    def get_accuracy_score(self):
        count_2days = count_4days = count_10days = 0
        for user in self.user_recitems:
            recommended_items = set(self.user_recitems.get(user,[]))
            new_items_2days = set(self.test2days_user_item.get(user,[]))
            new_items_4days = set(self.test4days_user_item.get(user,[]))
            new_items_10days = set(self.test10days_user_item.get(user,[]))
            if len(recommended_items & new_items_2days) > 0:
                count_2days += 1
            if len(recommended_items & new_items_4days) > 0:
                count_4days += 1
            if len(recommended_items & new_items_10days) > 0:
                count_10days += 1
        print("Accuracy for UIGIU Metapath based Recommender System")
        print("For a 2-day testing period:", count_2days/10000)
        print("For a 4-day testing period:", count_4days/10000)
        print("For a 10-day testing period:", count_10days/10000)
                            

dirpath = 'C:/Vidit/PhD/RA Work/KZ - RA/Gaming Project/data/Files'
outputfilename = 'C:/Vidit/PhD/RA Work/KZ - RA/Gaming Project/data/Latent Vector Representation/Gaming_output_metapath_UIGIU_w50_l20.txt'

def main():
    rvi = RecommendVirtualItems()
    rvi.read_data(dirpath)
    rvi.read_latent_output(outputfilename)
    rvi.make_item_recommendations_selected_users()
    rvi.get_accuracy_score()
    rvi.make_first_alternate_item_recommendations_selected_users()
    rvi.get_accuracy_score()
    rvi.make_second_alternate_item_recommendations_selected_users()
    rvi.get_accuracy_score()


if __name__ == "__main__":
    main()

#users 1496096
#games 22
#items 3825
{'42465': ['33300'], '42535': ['33300'], '48889': ['33300'], '50613': ['33300', '33306'], '50643': ['33300', '33323'], '54583': ['33300'], '58043': ['33300'], '71509': ['33300', '33321'], '76127': ['33300'], '80377': ['33300'], '84133': ['33300'], '89961': ['33300', '33305', '33332'], '94437': ['33300', '33318'], '94577': ['33300', '33321', '33323'], '94629': ['33300', '33305'], '96461': ['33300'], '98003': ['33300'], '101481': ['33300', '33321', '33323'], '102363': ['33300', '33302', '33304'], '102763': ['33300']}


{'43639': ['89', '9001'], '44765': ['lolvn001', 'lolvn002'], '45121': ['MSTAR001', 'MSTAR004', 'MSTAR006'], '45693': ['tw_18_5'], '47495': ['loltw004'], '47503': ['145', '151', '2', '239', '43', '57', '9000', '9001', '9002'], '52609': ['lolvn003'], '54583': ['package_1', 'package_2', 'package_4'], '61621': ['tw_18_5'], '61647': ['RP001', 'RP002', 'RP003'], '64165': ['tw_18_1', 'tw_18_5'], '66201': ['15325'], '67707': ['lolvn002', 'lolvn

{'16912887': ['RP007', 'RP008', 'fo3id007', 'fo3id002', 'fo3id004'], '38738735': ['12824', '12844', '13887', '13206', '13717'], '14563799': ['187', '178', '72', '2', '50'], '17428065': ['15397', '14981', '15182', '16646', '15567'], '15231853': ['13895', '14118', '13918', '12216', '12831'], '7958955': ['13332', '14446', '14064', '13891', '13548'], '24233593': ['15397', '15182', '15376', '15396', '15199'], '39286035': ['15397', '15165', '16711', '15376', '15567'], '21432899': ['13783', '13404', '14145', '12893', '13080'], '12193291': ['RP007', '162', 'RP008', 'RP006', 'RP005'], '35164759': ['12015', '13428', '13825', '13001', '13029'], '11480619': ['14064', '13191', '13548', '13039', '13080'], '6036353': ['14439', '13695', '13009', '14190', '13332'], '14173775': ['RP007', 'RP006', 'RP008', 'fo3id002', 'fo3id007'], '14430819': ['14381', '12824', '14093', '14064', '13218'], '15507579': ['13282', '12664', '13717', '12819', '13010'], '38551377': ['15397', '15567', '15396', '16766', '16513'],

In [1]:
import sys
import os
import random
import numpy as np
from collections import Counter

class RecommendVirtualItems:
    def __init__(self):
        self.id_user = dict()
        self.user_id = dict()
        self.id_game = dict()
        self.game_id = dict()
        self.id_item = dict()
        self.item_id = dict()
        self.item_user = dict()
        self.user_item = dict()
        self.game_user = dict()
        self.user_game = dict()
        self.game_item = dict()
        self.item_game = dict()
        self.game_userlist = dict()
        self.user_gamelist = dict()
        self.game_itemlist = dict()
        self.item_gamelist = dict()
        self.user_itemlist = dict()
        self.item_userlist = dict()
        self.test2days_item_user = dict()
        self.test2days_user_item = dict()
        self.test4days_item_user = dict()
        self.test4days_user_item = dict()
        self.test10days_item_user = dict()
        self.test10days_user_item = dict()
        self.userlist = []
        self.selected_user_items = dict()

    def read_data(self, dirpath):
        with open(dirpath + "/id_user.txt") as udictfile:
            for line in udictfile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    self.id_user[toks[0]] = toks[1].replace(" ", "")    # {uid1: User1, uid2: User2....}
                    self.user_id[toks[1]] = toks[0].replace(" ", "")    # {User1: uid1, User2: uid2....}

        print("#users", len(self.id_user))

        with open(dirpath + "/id_game.txt") as gdictfile:
            for line in gdictfile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    self.id_game[toks[0]] = toks[1].replace(" ", "")    # {33302: LoLsg, 33305: LoLmy....}
                    self.game_id[toks[1]] = toks[0].replace(" ", "")    # {LoLsg: 33302, LoLmy: 33305....}

        print("#games", len(self.id_game))
        
        with open(dirpath + "/id_item.txt") as idictfile:
            for line in idictfile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    self.id_item[toks[0]] = toks[1].replace(" ", "")    # {itemid1: Item1, itemid2: Item2....}
                    self.item_id[toks[1]] = toks[0].replace(" ", "")    # {Item1: itemid1, Item2: itemid2....}

        print("#items", len(self.id_item)) # comprehensive list of items

        with open(dirpath + "/game_user.txt") as gufile:  # data pulled from user game activity for 20 days
            for line in gufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    g, u = toks[0], toks[1]
                    if g not in self.game_user:
                        self.game_user[g] = []
                    self.game_user[g].append(u)   # {gameid1: [uid1,uid2,...], gameid2: [uid2,uid3,....]} = dict of list of users playing games
                    if u not in self.user_game:
                        self.user_game[u] = []
                    self.user_game[u].append(g)   # {uid1: [gameid1,gameid2,...], uid2: [gameid2,gameid3,....]} = dict of list of games played by each user
        
        
        with open(dirpath + "/item_user.txt") as iufile:  # list of all items purchased by users
            for line in iufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, u = toks[0], toks[1]
                    if i not in self.item_user:
                        self.item_user[i] = []
                    self.item_user[i].append(u)   # {itemid1: [uid1,uid2,...], itemid2: [uid2,uid3,....]} = dict of list of users purchasing each item 
                    if u not in self.user_item:
                        self.user_item[u] = []
                    self.user_item[u].append(i)   # {uid1: [itemid1,itemid2,...], uid2: [itemd2,itemd3,....]} = dict of list of items purchased by each user
        
        
        with open(dirpath + "/item_game.txt") as igfile:  # list of all in-game items
            for line in igfile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, g = toks[0], toks[1]
                    if i not in self.item_game:
                        self.item_game[i] = []  
                    self.item_game[i].append(g) # {itemid1: [gameid1,gameid2,...], itemid2: [gameid2,gameid3,....]} = list of items that got recommended. Every item can be in multiple games
                    if g not in self.game_item:
                        self.game_item[g] = []
                    self.game_item[g].append(i) # {gameid1: [itemid1,itemid2,...], gameid2: [itemd2,itemd3,....]} = list of items available in-game for purchase
        
        with open(dirpath + "/test2days_item_user.txt") as iufile:  # list of all items purchased by users in 2 days of testing period
            for line in iufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, u = toks[0], toks[1]
                    if i not in self.test2days_item_user:
                        self.test2days_item_user[i] = []
                    self.test2days_item_user[i].append(u)   # {itemid1: [uid1,uid2,...], itemid2: [uid2,uid3,....]} = dict of list of users purchasing each item 
                    if u not in self.test2days_user_item:
                        self.test2days_user_item[u] = []
                    self.test2days_user_item[u].append(i)   # {uid1: [itemid1,itemid2,...], uid2: [itemd2,itemd3,....]} = dict of list of items purchased by each user
        print("No of users who bought items in first 2 days of test period:", len(self.test2days_user_item))
        
        with open(dirpath + "/test4days_item_user.txt") as iufile:  # list of all items purchased by users in 4 days of testing period
            for line in iufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, u = toks[0], toks[1]
                    if i not in self.test4days_item_user:
                        self.test4days_item_user[i] = []
                    self.test4days_item_user[i].append(u)   # {itemid1: [uid1,uid2,...], itemid2: [uid2,uid3,....]} = dict of list of users purchasing each item 
                    if u not in self.test4days_user_item:
                        self.test4days_user_item[u] = []
                    self.test4days_user_item[u].append(i)   # {uid1: [itemid1,itemid2,...], uid2: [itemd2,itemd3,....]} = dict of list of items purchased by each user
        print("No of users who bought items in first 4 days of test period:", len(self.test4days_user_item))
        
        with open(dirpath + "/test10days_item_user.txt") as iufile:  # list of all items purchased by users in 10 days of testing period
            for line in iufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, u = toks[0], toks[1]
                    if i not in self.test10days_item_user:
                        self.test10days_item_user[i] = []
                    self.test10days_item_user[i].append(u)   # {itemid1: [uid1,uid2,...], itemid2: [uid2,uid3,....]} = dict of list of users purchasing each item 
                    if u not in self.test10days_user_item:
                        self.test10days_user_item[u] = []
                    self.test10days_user_item[u].append(i)   # {uid1: [itemid1,itemid2,...], uid2: [itemd2,itemd3,....]} = dict of list of items purchased by each user
        print("No of users who bought items in first 10 days of test period:", len(self.test10days_user_item))
        
        with open(dirpath + "/selected_user.txt") as sufile:  # pull the randomly selected users
            for user in sufile:
                self.userlist.append(user.strip())
                            
    def read_latent_output(self, outputfilename):
        self.user_vec = dict()
        self.game_vec = dict()
        self.item_vec = dict()
        with open(outputfilename) as nvfile:             
            for line in nvfile:
                toks = line.strip().split(" ")
                if len(toks) == 129:
                    node = toks[0]
                    del toks[0]
                    if node[0] == 'u': 
                        self.user_vec[self.user_id[node]] = toks    # {uid1: [user_vector_values], uid2: [user_vector_values]..}
                    elif node[0] == 'g':
                        self.game_vec[self.game_id[node]] = toks    # {gameid1: [game_vector_values], gameid2: [game_vector_values]..}
                    elif node[0] == 'i':
                        self.item_vec[self.item_id[node]] = toks    # {itemid1: [item_vector_values], itemid2: [item_vector_values]..}
        
    def get_cosine_similarity(self, vec1, vec2):
        a = np.array(vec1, dtype=float)
        b = np.array(vec2, dtype=float)
        dot_product = np.dot(a, b)
        norm_a = np.linalg.norm(a)
        norm_b = np.linalg.norm(b)
        return dot_product / (norm_a * norm_b)
        
    def make_item_recommendations_selected_users(self):
        top_5_user_recitems = dict()
        top_10_user_recitems = dict()
        top_15_user_recitems = dict()
        print("\nMaking Recommendations\n")
        print("#Number of users randomly selected:", len(self.userlist))
        for user1 in self.userlist:                             # iterating through 10,000 randomly selected users
            user1vec = self.user_vec.get(user1,[])              # getting the latent vector representation
            if (user1vec != []):
                self.user_simscore = {}
                self.user_rec = {}
                items1 = self.user_item.get(user1,[])           # getting the items purchased
                for user2 in self.user_item:                    # iterating through other users in the gaming network
                    user2vec = self.user_vec.get(user2,[])      # getting the latent vector representation
                    if (user2vec != []):
                        items2 = self.user_item[user2]          # getting the items purchased by these other users
                        if (user1 == user2): continue
                        cosine_sim_score = round(self.get_cosine_similarity(user1vec, user2vec), 3)  # getting user-user cosine similarity
                        self.user_simscore[user2] = cosine_sim_score                                 # storing similarity values of each of iterated users with focal user
                        rec_items = [item for item in items2 if item not in items1]                  # getting recommended items via each of these other users
                        self.user_rec[user2] = rec_items                                             # storing recommended items 
                self.sorted_user_simscore = sorted(self.user_simscore.items(), key=lambda x: x[1], reverse = True)
                top_5_user_recitems[user1] = self.get_top_k_user_items(k=5)
                top_10_user_recitems[user1] = self.get_top_k_user_items(k=10)
                top_15_user_recitems[user1] = self.get_top_k_user_items(k=15)
        sample_items = {k: top_5_user_recitems[k] for k in list(top_5_user_recitems)[:20]}
        print("\n")
        print("Accuracy for UGIGU Metapath Recommender System based on Top-5 similar users")
        self.get_accuracy_score(user_recitems = top_5_user_recitems)
        sample_items = {k: top_10_user_recitems[k] for k in list(top_10_user_recitems)[:20]}
        print("\n")
        print("Accuracy for UGIGU Metapath Recommender System based on Top-10 similar users")
        self.get_accuracy_score(user_recitems = top_10_user_recitems)
        sample_items = {k: top_15_user_recitems[k] for k in list(top_15_user_recitems)[:20]}
        print("\n")
        print("Accuracy for UGIGU Metapath Recommender System based on Top-15 similar users")
        self.get_accuracy_score(user_recitems = top_15_user_recitems)
        
    def get_top_k_user_items(self, k):
        top_k_users = [user for user,_ in self.sorted_user_simscore[0:k]]   # getting the top k most similar users to the focal user
        top_k_users_items = set()                                           # now we fetch the recommended items for these top users
        for user in top_k_users:
            items = self.user_rec[user]
            for item in items:
                top_k_users_items.add(item)
        return list(top_k_users_items)      
        
    def get_accuracy_score(self, user_recitems):
        count_2days = count_4days = count_10days = 0
        for user in user_recitems:
            recommended_items = set(user_recitems.get(user,[]))
            new_items_2days = set(self.test2days_user_item.get(user,[]))
            new_items_4days = set(self.test4days_user_item.get(user,[]))
            new_items_10days = set(self.test10days_user_item.get(user,[]))
            if len(recommended_items & new_items_2days) > 0:
                count_2days += 1
            if len(recommended_items & new_items_4days) > 0:
                count_4days += 1
            if len(recommended_items & new_items_10days) > 0:
                count_10days += 1
        print("For a 2-day testing period:", count_2days/10000)
        print("For a 4-day testing period:", count_4days/10000)
        print("For a 10-day testing period:", count_10days/10000)
                                            
dirpath = 'C:/Vidit/PhD/RA Work/KZ - RA/Gaming Project/data/Files'
outputfilename = 'C:/Vidit/PhD/RA Work/KZ - RA/Gaming Project/data/Latent Vector Representation/Gaming_output_metapath_UGIGU_w50_l20.txt'


def main():
    rvi = RecommendVirtualItems()
    rvi.read_data(dirpath)
    rvi.read_latent_output(outputfilename)
    rvi.make_item_recommendations_selected_users()

if __name__ == "__main__":
    main()

#users 1496096
#games 22
#items 3825
No of users who bought items in first 2 days of test period: 2700
No of users who bought items in first 4 days of test period: 4999
No of users who bought items in first 10 days of test period: 10302

Making Recommendations

#Number of users randomly selected: 10000




Accuracy for UGIGU Metapath Recommender System based on Top-5 similar users
For a 2-day testing period: 0.0067
For a 4-day testing period: 0.0142
For a 10-day testing period: 0.0303


Accuracy for UGIGU Metapath Recommender System based on Top-10 similar users
For a 2-day testing period: 0.0083
For a 4-day testing period: 0.0168
For a 10-day testing period: 0.0355


Accuracy for UGIGU Metapath Recommender System based on Top-15 similar users
For a 2-day testing period: 0.0087
For a 4-day testing period: 0.0176
For a 10-day testing period: 0.0372


In [2]:
import sys
import os
import random
import numpy as np
from collections import Counter

class RecommendVirtualItems:
    def __init__(self):
        self.id_user = dict()
        self.user_id = dict()
        self.id_game = dict()
        self.game_id = dict()
        self.id_item = dict()
        self.item_id = dict()
        self.item_user = dict()
        self.user_item = dict()
        self.game_user = dict()
        self.user_game = dict()
        self.game_item = dict()
        self.item_game = dict()
        self.game_userlist = dict()
        self.user_gamelist = dict()
        self.game_itemlist = dict()
        self.item_gamelist = dict()
        self.user_itemlist = dict()
        self.item_userlist = dict()
        self.test2days_item_user = dict()
        self.test2days_user_item = dict()
        self.test4days_item_user = dict()
        self.test4days_user_item = dict()
        self.test10days_item_user = dict()
        self.test10days_user_item = dict()
        self.userlist = []
        self.star_items = []
        self.mid_items = []
        self.tail_items = []

    def read_data(self, dirpath):
        with open(dirpath + "/id_user.txt") as udictfile:
            for line in udictfile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    self.id_user[toks[0]] = toks[1].replace(" ", "")    # {uid1: User1, uid2: User2....}
                    self.user_id[toks[1]] = toks[0].replace(" ", "")    # {User1: uid1, User2: uid2....}

        print("#users", len(self.id_user))

        with open(dirpath + "/id_game.txt") as gdictfile:
            for line in gdictfile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    self.id_game[toks[0]] = toks[1].replace(" ", "")    # {33302: LoLsg, 33305: LoLmy....}
                    self.game_id[toks[1]] = toks[0].replace(" ", "")    # {LoLsg: 33302, LoLmy: 33305....}

        print("#games", len(self.id_game))
        
        with open(dirpath + "/id_item.txt") as idictfile:
            for line in idictfile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    self.id_item[toks[0]] = toks[1].replace(" ", "")    # {itemid1: Item1, itemid2: Item2....}
                    self.item_id[toks[1]] = toks[0].replace(" ", "")    # {Item1: itemid1, Item2: itemid2....}

        print("#items", len(self.id_item)) # comprehensive list of items

        with open(dirpath + "/game_user.txt") as gufile:  # data pulled from user game activity for 20 days
            for line in gufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    g, u = toks[0], toks[1]
                    if g not in self.game_user:
                        self.game_user[g] = []
                    self.game_user[g].append(u)   # {gameid1: [uid1,uid2,...], gameid2: [uid2,uid3,....]} = dict of list of users playing games
                    if u not in self.user_game:
                        self.user_game[u] = []
                    self.user_game[u].append(g)   # {uid1: [gameid1,gameid2,...], uid2: [gameid2,gameid3,....]} = dict of list of games played by each user
        
        
        with open(dirpath + "/item_user.txt") as iufile:  # list of all items purchased by users
            for line in iufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, u = toks[0], toks[1]
                    if i not in self.item_user:
                        self.item_user[i] = []
                    self.item_user[i].append(u)   # {itemid1: [uid1,uid2,...], itemid2: [uid2,uid3,....]} = dict of list of users purchasing each item 
                    if u not in self.user_item:
                        self.user_item[u] = []
                    self.user_item[u].append(i)   # {uid1: [itemid1,itemid2,...], uid2: [itemd2,itemd3,....]} = dict of list of items purchased by each user
        
        
        with open(dirpath + "/item_game.txt") as igfile:  # list of all in-game items
            for line in igfile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, g = toks[0], toks[1]
                    if i not in self.item_game:
                        self.item_game[i] = []  
                    self.item_game[i].append(g) # {itemid1: [gameid1,gameid2,...], itemid2: [gameid2,gameid3,....]} = list of items that got recommended. Every item can be in multiple games
                    if g not in self.game_item:
                        self.game_item[g] = []
                    self.game_item[g].append(i) # {gameid1: [itemid1,itemid2,...], gameid2: [itemd2,itemd3,....]} = list of items available in-game for purchase
        
        with open(dirpath + "/test2days_item_user.txt") as iufile:  # list of all items purchased by users in 2 days of testing period
            for line in iufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, u = toks[0], toks[1]
                    if i not in self.test2days_item_user:
                        self.test2days_item_user[i] = []
                    self.test2days_item_user[i].append(u)   # {itemid1: [uid1,uid2,...], itemid2: [uid2,uid3,....]} = dict of list of users purchasing each item 
                    if u not in self.test2days_user_item:
                        self.test2days_user_item[u] = []
                    self.test2days_user_item[u].append(i)   # {uid1: [itemid1,itemid2,...], uid2: [itemd2,itemd3,....]} = dict of list of items purchased by each user
        print("No of users who bought items in first 2 days of test period:", len(self.test2days_user_item))
        
        with open(dirpath + "/test4days_item_user.txt") as iufile:  # list of all items purchased by users in 4 days of testing period
            for line in iufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, u = toks[0], toks[1]
                    if i not in self.test4days_item_user:
                        self.test4days_item_user[i] = []
                    self.test4days_item_user[i].append(u)   # {itemid1: [uid1,uid2,...], itemid2: [uid2,uid3,....]} = dict of list of users purchasing each item 
                    if u not in self.test4days_user_item:
                        self.test4days_user_item[u] = []
                    self.test4days_user_item[u].append(i)   # {uid1: [itemid1,itemid2,...], uid2: [itemd2,itemd3,....]} = dict of list of items purchased by each user
        print("No of users who bought items in first 4 days of test period:", len(self.test4days_user_item))
        
        with open(dirpath + "/test10days_item_user.txt") as iufile:  # list of all items purchased by users in 10 days of testing period
            for line in iufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, u = toks[0], toks[1]
                    if i not in self.test10days_item_user:
                        self.test10days_item_user[i] = []
                    self.test10days_item_user[i].append(u)   # {itemid1: [uid1,uid2,...], itemid2: [uid2,uid3,....]} = dict of list of users purchasing each item 
                    if u not in self.test10days_user_item:
                        self.test10days_user_item[u] = []
                    self.test10days_user_item[u].append(i)   # {uid1: [itemid1,itemid2,...], uid2: [itemd2,itemd3,....]} = dict of list of items purchased by each user
        print("No of users who bought items in first 10 days of test period:", len(self.test10days_user_item))
        
        with open(dirpath + "/selected_user.txt") as sufile:  # pull the randomly selected users
            for user in sufile:
                self.userlist.append(user.strip())
                            
    def read_latent_output(self, outputfilename):
        self.user_vec = dict()
        self.game_vec = dict()
        self.item_vec = dict()
        with open(outputfilename) as nvfile:             
            for line in nvfile:
                toks = line.strip().split(" ")
                if len(toks) == 129:
                    node = toks[0]
                    del toks[0]
                    if node[0] == 'u': 
                        self.user_vec[self.user_id[node]] = toks    # {uid1: [user_vector_values], uid2: [user_vector_values]..}
                    elif node[0] == 'g':
                        self.game_vec[self.game_id[node]] = toks    # {gameid1: [game_vector_values], gameid2: [game_vector_values]..}
                    elif node[0] == 'i':
                        self.item_vec[self.item_id[node]] = toks    # {itemid1: [item_vector_values], itemid2: [item_vector_values]..}
        
    def get_cosine_similarity(self, vec1, vec2):
        a = np.array(vec1, dtype=float)
        b = np.array(vec2, dtype=float)
        dot_product = np.dot(a, b)
        norm_a = np.linalg.norm(a)
        norm_b = np.linalg.norm(b)
        return dot_product / (norm_a * norm_b)
        
    def make_user_recommendations_selected_cluster(self, cluster_items):
        top_5_item_recusers = dict()
        top_10_item_recusers = dict()
        top_15_item_recusers = dict()
        top_20_item_recusers = dict()
        for item in cluster_items:
            user_simscore = {}
            temp_user_simscore = dict()
            users = self.item_user.get(item,[])
            if (users != []):
                for user1 in users:
                    user1vec = self.user_vec.get(user1,[])
                    items1 = self.user_item.get(user1,[])
                    for user2 in self.userlist:
                        user2vec = self.user_vec.get(user2,[])
                        items2 = self.user_item.get(user2,[])
                        if (user1 == user2): continue
                        if item not in items2:
                            if (user1vec != []) and (user2vec != []):
                                cosine_sim_score = round(self.get_cosine_similarity(user1vec, user2vec), 3)
                                if user2 not in temp_user_simscore:
                                    temp_user_simscore[user2] = []
                                temp_user_simscore[user2].append(cosine_sim_score)
                for user in temp_user_simscore:
                    cosine_sim_values = temp_user_simscore[user]
                    user_simscore[user] = max(cosine_sim_values)
            sorted_user_simscore = sorted(user_simscore.items(), key=lambda x: x[1], reverse = True)
            top_5_item_recusers[item] = [user for user,_ in sorted_user_simscore[0:5]]
            top_10_item_recusers[item] = [user for user,_ in sorted_user_simscore[0:10]]
            top_15_item_recusers[item] = [user for user,_ in sorted_user_simscore[0:15]]
            top_20_item_recusers[item] = [user for user,_ in sorted_user_simscore[0:20]]
        sample_users = {k: top_5_item_recusers[k] for k in list(top_5_item_recusers)[:20]}
        #print(sample_users)
        #print("\n")
        sample_users = {k: top_10_item_recusers[k] for k in list(top_10_item_recusers)[:20]}
        #print(sample_users)
        #print("\n")
        sample_users = {k: top_15_item_recusers[k] for k in list(top_15_item_recusers)[:20]}
        #print(sample_users)
        #print("\n")
        print("Accuracy for UGIGU Metapath Recommender System based on Top-5 similar users")
        self.get_accuracy_score(item_recusers = top_5_item_recusers, cluster_items = cluster_items)
        print("Accuracy for UGIGU Metapath Recommender System based on Top-10 similar users")
        self.get_accuracy_score(item_recusers = top_10_item_recusers, cluster_items = cluster_items)
        print("Accuracy for UGIGU Metapath Recommender System based on Top-15 similar users")
        self.get_accuracy_score(item_recusers = top_15_item_recusers, cluster_items = cluster_items)
        print("Accuracy for UGIGU Metapath Recommender System based on Top-20 similar users")
        self.get_accuracy_score(item_recusers = top_20_item_recusers, cluster_items = cluster_items)
    
        
    def get_accuracy_score(self, item_recusers, cluster_items):
        count_2days = count_4days = count_10days = 0
        for item in cluster_items:
            counter_2days = counter_4days = counter_10days = 0
            recommended_users = item_recusers.get(item,[])
            for user in recommended_users:
                new_items_2days = self.test2days_user_item.get(user,[])
                new_items_4days = self.test4days_user_item.get(user,[])
                new_items_10days = self.test10days_user_item.get(user,[])
                if item in new_items_2days:
                    counter_2days += 1
                if item in new_items_4days:
                    counter_4days += 1
                if item in new_items_10days:
                    counter_10days += 1        
            if (counter_2days > 0):
                count_2days += 1
            if (counter_4days > 0):
                count_4days += 1
            if (counter_10days > 0):
                count_10days += 1
        print("For a 2-day testing period:", count_2days/len(cluster_items))
        print("For a 4-day testing period:", count_4days/len(cluster_items))
        print("For a 10-day testing period:", count_10days/len(cluster_items))
        
    
    def make_item_recommendations(self):
        with open(dirpath + "/rank_item_quantity_price_sales.txt") as itemfile:  # pull the ranked items, and create clusters
            for line in itemfile:
                toks = line.strip().split("\t")
                if len(toks) == 5:
                    rank = int(toks[0])
                    if rank <= 40:
                        self.star_items.append(toks[1])
                    else:
                        self.tail_items.append(toks[1])
        print(self.star_items)
        print(self.tail_items)
        print("\nMaking Recommendations\n")
        print("\nAccuracy for Star Items\n")
        self.make_user_recommendations_selected_cluster(cluster_items = self.star_items)
        print("\nAccuracy for Long Tail Items\n")
        self.make_user_recommendations_selected_cluster(cluster_items = self.tail_items)      
                                            
dirpath = 'C:/Vidit/PhD/RA Work/KZ - RA/Gaming Project/data/Files'
outputfilename = 'C:/Vidit/PhD/RA Work/KZ - RA/Gaming Project/data/Latent Vector Representation/Gaming_output_metapath_UGIGU_w50_l20.txt'


def main():
    rvi = RecommendVirtualItems()
    rvi.read_data(dirpath)
    rvi.read_latent_output(outputfilename)
    rvi.make_item_recommendations()

if __name__ == "__main__":
    main()

#users 1496096
#games 22
#items 3825
No of users who bought items in first 2 days of test period: 2700
No of users who bought items in first 4 days of test period: 4999
No of users who bought items in first 10 days of test period: 10302
['lolvn002', 'lolvn001', 'lolvn003', 'loltw001', 'RP003', 'loltw003', 'RP002', 'loltw004', 'lolvn004', 'RP001', 'package_20', 'loltw005', 'tw_18_3', '89', 'loltw002', 'tw_18_2', 'tw_18_5', 'lolsam001', '9002', 'lolsam003', 'fo3vn003', 'RP004', 'FO3TH003', 'fo3vn004', 'tw_18_4', 'package_4', '9001', 'package_2', 'fo3vn002', 'lolsam004', 'tw_19_3', 'package_18', 'package_19', 'package_1', 'tw_19_2', '166', '145', 'fo3vn005', '9000', '33']
['43', '31', 'RP005', '237', 'HONLUCK1', '57', '2', 'lolsam006', 'lolsam002', '239', 'FO3TH004', 'FO3TH006', 'tw_19_5', 'lolvn005', '157', 'pbth001', '159', '205', 'lolth004', '228', '280', 'tw_19_4', 'lolth001', 'lolsam005', '138', 'lolth003', 'tw_18_1', 'tw_15_5', '34', 'tw_15_4', '188', '25', '192', '200', 'FO3TH005',

In [3]:
import sys
import os
import random
import numpy as np
from collections import Counter

class RecommendVirtualItems:
    def __init__(self):
        self.id_user = dict()
        self.user_id = dict()
        self.id_game = dict()
        self.game_id = dict()
        self.id_item = dict()
        self.item_id = dict()
        self.item_user = dict()
        self.user_item = dict()
        self.game_user = dict()
        self.user_game = dict()
        self.game_item = dict()
        self.item_game = dict()
        self.game_userlist = dict()
        self.user_gamelist = dict()
        self.game_itemlist = dict()
        self.item_gamelist = dict()
        self.user_itemlist = dict()
        self.item_userlist = dict()
        self.test2days_item_user = dict()
        self.test2days_user_item = dict()
        self.test4days_item_user = dict()
        self.test4days_user_item = dict()
        self.test10days_item_user = dict()
        self.test10days_user_item = dict()
        self.userlist = []
        self.star_items = []
        self.tail_items = []
        self.all_items = []

    def read_data(self, dirpath):
        with open(dirpath + "/id_user.txt") as udictfile:
            for line in udictfile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    self.id_user[toks[0]] = toks[1].replace(" ", "")    # {uid1: User1, uid2: User2....}
                    self.user_id[toks[1]] = toks[0].replace(" ", "")    # {User1: uid1, User2: uid2....}

        print("#users", len(self.id_user))

        with open(dirpath + "/id_game.txt") as gdictfile:
            for line in gdictfile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    self.id_game[toks[0]] = toks[1].replace(" ", "")    # {33302: LoLsg, 33305: LoLmy....}
                    self.game_id[toks[1]] = toks[0].replace(" ", "")    # {LoLsg: 33302, LoLmy: 33305....}

        print("#games", len(self.id_game))
        
        with open(dirpath + "/id_item.txt") as idictfile:
            for line in idictfile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    self.id_item[toks[0]] = toks[1].replace(" ", "")    # {itemid1: Item1, itemid2: Item2....}
                    self.item_id[toks[1]] = toks[0].replace(" ", "")    # {Item1: itemid1, Item2: itemid2....}

        print("#items", len(self.id_item)) # comprehensive list of items

        with open(dirpath + "/game_user.txt") as gufile:  # data pulled from user game activity for 20 days
            for line in gufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    g, u = toks[0], toks[1]
                    if g not in self.game_user:
                        self.game_user[g] = []
                    self.game_user[g].append(u)   # {gameid1: [uid1,uid2,...], gameid2: [uid2,uid3,....]} = dict of list of users playing games
                    if u not in self.user_game:
                        self.user_game[u] = []
                    self.user_game[u].append(g)   # {uid1: [gameid1,gameid2,...], uid2: [gameid2,gameid3,....]} = dict of list of games played by each user
        
        
        with open(dirpath + "/item_user.txt") as iufile:  # list of all items purchased by users
            for line in iufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, u = toks[0], toks[1]
                    if i not in self.item_user:
                        self.item_user[i] = []
                    self.item_user[i].append(u)   # {itemid1: [uid1,uid2,...], itemid2: [uid2,uid3,....]} = dict of list of users purchasing each item 
                    if u not in self.user_item:
                        self.user_item[u] = []
                    self.user_item[u].append(i)   # {uid1: [itemid1,itemid2,...], uid2: [itemd2,itemd3,....]} = dict of list of items purchased by each user
        
        
        with open(dirpath + "/item_game.txt") as igfile:  # list of all in-game items
            for line in igfile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, g = toks[0], toks[1]
                    if i not in self.item_game:
                        self.item_game[i] = []  
                    self.item_game[i].append(g) # {itemid1: [gameid1,gameid2,...], itemid2: [gameid2,gameid3,....]} = list of items that got recommended. Every item can be in multiple games
                    if g not in self.game_item:
                        self.game_item[g] = []
                    self.game_item[g].append(i) # {gameid1: [itemid1,itemid2,...], gameid2: [itemd2,itemd3,....]} = list of items available in-game for purchase
        
        with open(dirpath + "/test2days_item_user.txt") as iufile:  # list of all items purchased by users in 2 days of testing period
            for line in iufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, u = toks[0], toks[1]
                    if i not in self.test2days_item_user:
                        self.test2days_item_user[i] = []
                    self.test2days_item_user[i].append(u)   # {itemid1: [uid1,uid2,...], itemid2: [uid2,uid3,....]} = dict of list of users purchasing each item 
                    if u not in self.test2days_user_item:
                        self.test2days_user_item[u] = []
                    self.test2days_user_item[u].append(i)   # {uid1: [itemid1,itemid2,...], uid2: [itemd2,itemd3,....]} = dict of list of items purchased by each user
        print("No of users who bought items in first 2 days of test period:", len(self.test2days_user_item))
        
        with open(dirpath + "/test4days_item_user.txt") as iufile:  # list of all items purchased by users in 4 days of testing period
            for line in iufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, u = toks[0], toks[1]
                    if i not in self.test4days_item_user:
                        self.test4days_item_user[i] = []
                    self.test4days_item_user[i].append(u)   # {itemid1: [uid1,uid2,...], itemid2: [uid2,uid3,....]} = dict of list of users purchasing each item 
                    if u not in self.test4days_user_item:
                        self.test4days_user_item[u] = []
                    self.test4days_user_item[u].append(i)   # {uid1: [itemid1,itemid2,...], uid2: [itemd2,itemd3,....]} = dict of list of items purchased by each user
        print("No of users who bought items in first 4 days of test period:", len(self.test4days_user_item))
        
        with open(dirpath + "/test10days_item_user.txt") as iufile:  # list of all items purchased by users in 10 days of testing period
            for line in iufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, u = toks[0], toks[1]
                    if i not in self.test10days_item_user:
                        self.test10days_item_user[i] = []
                    self.test10days_item_user[i].append(u)   # {itemid1: [uid1,uid2,...], itemid2: [uid2,uid3,....]} = dict of list of users purchasing each item 
                    if u not in self.test10days_user_item:
                        self.test10days_user_item[u] = []
                    self.test10days_user_item[u].append(i)   # {uid1: [itemid1,itemid2,...], uid2: [itemd2,itemd3,....]} = dict of list of items purchased by each user
        print("No of users who bought items in first 10 days of test period:", len(self.test10days_user_item))
        
        with open(dirpath + "/selected_user.txt") as sufile:  # pull the randomly selected users
            for user in sufile:
                self.userlist.append(user.strip())
                            
    def read_latent_output(self, outputfilename):
        self.user_vec = dict()
        self.game_vec = dict()
        self.item_vec = dict()
        with open(outputfilename) as nvfile:             
            for line in nvfile:
                toks = line.strip().split(" ")
                if len(toks) == 129:
                    node = toks[0]
                    del toks[0]
                    if node[0] == 'u': 
                        self.user_vec[self.user_id[node]] = toks    # {uid1: [user_vector_values], uid2: [user_vector_values]..}
                    elif node[0] == 'g':
                        self.game_vec[self.game_id[node]] = toks    # {gameid1: [game_vector_values], gameid2: [game_vector_values]..}
                    elif node[0] == 'i':
                        self.item_vec[self.item_id[node]] = toks    # {itemid1: [item_vector_values], itemid2: [item_vector_values]..}
        
    def get_cosine_similarity(self, vec1, vec2):
        a = np.array(vec1, dtype=float)
        b = np.array(vec2, dtype=float)
        dot_product = np.dot(a, b)
        norm_a = np.linalg.norm(a)
        norm_b = np.linalg.norm(b)
        return dot_product / (norm_a * norm_b)
    
    def make_user_recommendations_selected_cluster(self, cluster_items):
        top_5_user_recitems = dict()
        top_10_user_recitems = dict()
        top_15_user_recitems = dict()
        top_20_user_recitems = dict()
        print("\nMaking Recommendations")
        print("#Number of users randomly selected:", len(self.userlist))
        for user1 in self.userlist:                             # iterating through 10,000 randomly selected users
            user1vec = self.user_vec.get(user1,[])              # getting the latent vector representation
            if (user1vec != []):
                self.user_simscore = {}
                self.user_rec = {}
                items1 = self.user_item.get(user1,[])           # getting the items purchased
                for user2 in self.user_item:                    # iterating through other users in the gaming network
                    user2vec = self.user_vec.get(user2,[])      # getting the latent vector representation
                    if (user2vec != []):
                        items2 = self.user_item[user2]          # getting the items purchased by these other users
                        if (user1 == user2): continue
                        rec_items = []
                        cosine_sim_score = round(self.get_cosine_similarity(user1vec, user2vec), 3)  # getting user-user cosine similarity
                        self.user_simscore[user2] = cosine_sim_score                                 # storing similarity values of each of iterated users with focal user
                        for item in cluster_items:
                            if item not in items1:
                                if item in items2:
                                    rec_items.append(item)
                        self.user_rec[user2] = rec_items                                             # storing recommended items 
                self.sorted_user_simscore = sorted(self.user_simscore.items(), key=lambda x: x[1], reverse = True)
                top_5_user_recitems[user1] = self.get_top_k_user_items(k=5)
                top_10_user_recitems[user1] = self.get_top_k_user_items(k=10)
                top_15_user_recitems[user1] = self.get_top_k_user_items(k=15)
                top_20_user_recitems[user1] = self.get_top_k_user_items(k=20)
        print("\n")
        print("Accuracy for UGIGU Metapath Recommender System based on Top-5 similar users")
        self.get_accuracy_score(user_recitems = top_5_user_recitems)
        print("\n")
        print("Accuracy for UGIGU Metapath Recommender System based on Top-10 similar users")
        self.get_accuracy_score(user_recitems = top_10_user_recitems)
        print("\n")
        print("Accuracy for UGIGU Metapath Recommender System based on Top-15 similar users")
        self.get_accuracy_score(user_recitems = top_15_user_recitems)
        print("\n")
        print("Accuracy for UGIGU Metapath Recommender System based on Top-20 similar users")
        self.get_accuracy_score(user_recitems = top_20_user_recitems)
        print("\n")
        
    def get_top_k_user_items(self, k):
        top_k_users = [user for user,_ in self.sorted_user_simscore[0:k]]   # getting the top k most similar users to the focal user
        top_k_users_items = set()                                           # now we fetch the recommended items for these top users
        for user in top_k_users:
            items = self.user_rec[user]
            for item in items:
                top_k_users_items.add(item)
        return list(top_k_users_items)      
        
    def get_accuracy_score(self, user_recitems):
        count_2days = count_4days = count_10days = 0
        for user in user_recitems:
            recommended_items = set(user_recitems.get(user,[]))
            new_items_2days = set(self.test2days_user_item.get(user,[]))
            new_items_4days = set(self.test4days_user_item.get(user,[]))
            new_items_10days = set(self.test10days_user_item.get(user,[]))
            if len(recommended_items & new_items_2days) > 0:
                count_2days += 1
            if len(recommended_items & new_items_4days) > 0:
                count_4days += 1
            if len(recommended_items & new_items_10days) > 0:
                count_10days += 1
        print("For a 2-day testing period:", count_2days/10000)
        print("For a 4-day testing period:", count_4days/10000)
        print("For a 10-day testing period:", count_10days/10000)
        
    
    def make_item_recommendations(self):
        with open(dirpath + "/rank_item_quantity_price_sales.txt") as itemfile:  # pull the ranked items, and create clusters
            for line in itemfile:
                toks = line.strip().split("\t")
                if len(toks) == 5:
                    self.all_items.append(toks[1])
                    rank = int(toks[0])
                    if rank <= 40:
                        self.star_items.append(toks[1])
                    else:
                        self.tail_items.append(toks[1])
        #print(self.star_items)
        #print(self.tail_items)
        #print(self.all_items)
        print("\nMaking Recommendations\n")
        print("\nAccuracy for Star Items\n")
        self.make_user_recommendations_selected_cluster(cluster_items = self.star_items)
        print("\nAccuracy for Long Tail Items\n")
        self.make_user_recommendations_selected_cluster(cluster_items = self.tail_items)
        print("\nAccuracy for All Items\n")
        self.make_user_recommendations_selected_cluster(cluster_items = self.all_items)
                                            
dirpath = 'C:/Vidit/PhD/RA Work/KZ - RA/Gaming Project/data/Files'
outputfilename = 'C:/Vidit/PhD/RA Work/KZ - RA/Gaming Project/data/Latent Vector Representation/Gaming_output_metapath_UGIGU_w50_l20.txt'


def main():
    rvi = RecommendVirtualItems()
    rvi.read_data(dirpath)
    rvi.read_latent_output(outputfilename)
    rvi.make_item_recommendations()

if __name__ == "__main__":
    main()

#users 1496096
#games 22
#items 3825
No of users who bought items in first 2 days of test period: 2700
No of users who bought items in first 4 days of test period: 4999
No of users who bought items in first 10 days of test period: 10302

Making Recommendations


Accuracy for Star Items


Making Recommendations
#Number of users randomly selected: 10000


Accuracy for UGIGU Metapath Recommender System based on Top-5 similar users
For a 2-day testing period: 0.0064
For a 4-day testing period: 0.0138
For a 10-day testing period: 0.0287


Accuracy for UGIGU Metapath Recommender System based on Top-10 similar users
For a 2-day testing period: 0.0075
For a 4-day testing period: 0.0158
For a 10-day testing period: 0.0329


Accuracy for UGIGU Metapath Recommender System based on Top-15 similar users
For a 2-day testing period: 0.0078
For a 4-day testing period: 0.0162
For a 10-day testing period: 0.0342


Accuracy for UGIGU Metapath Recommender System based on Top-20 similar users
For a 2-day t