In [1]:
import sys
import os
import random
from collections import Counter

class LineInputGenerator:
    def __init__(self):
        self.id_user = dict()
        self.id_game = dict()
        self.id_item = dict()
        self.item_user = dict()
        self.user_item = dict()
        self.game_user = dict()
        self.user_game = dict()
        self.game_item = dict()
        self.item_game = dict()

    def read_data(self, dirpath):
        with open(dirpath + "\id_user.txt") as udictfile:
            for line in udictfile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    self.id_user[toks[0]] = toks[1].replace(" ", "")    # {uid1: User1, uid2: User2....}

        print("#users", len(self.id_user))

        with open(dirpath + "\id_game.txt") as gdictfile:
            for line in gdictfile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    self.id_game[toks[0]] = toks[1].replace(" ", "")    # {33302: LoLsg, 33305: LoLmy....}

        print("#games", len(self.id_game))
        
        with open(dirpath + "\id_item.txt") as idictfile:
            for line in idictfile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    self.id_item[toks[0]] = toks[1].replace(" ", "")    # {itemid1: Item1, itemid2: Item2....}

        print("#items", len(self.id_item)) # comprehensive list of items

        with open(dirpath + "\game_user.txt") as gufile:  # data pulled from user game activity for 20 days
            for line in gufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    g, u = toks[0], toks[1]
                    if g not in self.game_user:
                        self.game_user[g] = []
                    self.game_user[g].append(u)   # {gameid1: [uid1,uid2,...], gameid2: [uid2,uid3,....]} = dict of list of users playing games
                    if u not in self.user_game:
                        self.user_game[u] = []
                    self.user_game[u].append(g)   # {uid1: [gameid1,gameid2,...], uid2: [gameid2,gameid3,....]} = dict of list of games played by each user
        
        with open(dirpath + "\item_user.txt") as iufile:  # list of all items purchased by users
            for line in iufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, u = toks[0], toks[1]
                    if i not in self.item_user:
                        self.item_user[i] = []
                    self.item_user[i].append(u)   # {itemid1: [uid1,uid2,...], itemid2: [uid2,uid3,....]} = dict of list of users purchasing each item 
                    if u not in self.user_item:
                        self.user_item[u] = []
                    self.user_item[u].append(i)   # {uid1: [itemid1,itemid2,...], uid2: [itemd2,itemd3,....]} = dict of list of items purchased by each user

        with open(dirpath + "\item_game.txt") as igfile:  # list of all in-game items
            for line in igfile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, g = toks[0], toks[1]
                    if i not in self.item_game:
                        self.item_game[i] = []  
                    self.item_game[i].append(g) # {itemid1: [gameid1,gameid2,...], itemid2: [gameid2,gameid3,....]} = list of items that got recommended. Every item can be in multiple games
                    if g not in self.game_item:
                        self.game_item[g] = []
                    self.game_item[g].append(i) # {gameid1: [itemid1,itemid2,...], gameid2: [itemd2,itemd3,....]} = list of items available in-game for purchase


    def generate_LINE_input(self, outfilename):
        outfile = open(outfilename, 'w')
        ug = self.user_game.items()
        users = [x for x,_ in ug]
        print("#Number of users in consideration set", len(users))
        for i in range(0, len(users)-1):                      
            user1 = users[i]
            games1 = set(self.user_game[user1])
            items1 = set(self.user_item.get(user1,[]))
            for j in range(i+1, len(users)):
                user2 = users[j]
                games2 = set(self.user_game[user2])
                items2 = set(self.user_item.get(user2,[]))
                shared_games = len(games1 & games2)
                shared_items = len(items1 & items2)
                if (shared_games + shared_items > 0):
                    outline1 = user1 + " " + user2 + " " + str(shared_games + shared_items)
                    outline2 = user2 + " " + user1 + " " + str(shared_games + shared_items)
                    outfile.write(outline1 + "\n" + outline2 + "\n")
        outfile.close()
        


dirpath = 'C:/Vidit/PhD/RA Work/KZ - RA/Gaming Project/data/Files'
outfilename = 'C:/Vidit/PhD/RA Work/KZ - RA/Gaming Project/data/LINE/LINE_network_input_file.txt'

def main():
    lig = LineInputGenerator()
    lig.read_data(dirpath)
    #lig.generate_LINE_input(outfilename)
    print("Success")


if __name__ == "__main__":
    main()

#users 1496096
#games 22
#items 3825
Success


In [2]:
import sys
import os
import random
from collections import Counter
import numpy as np

class RecommendVirtualItems:
    def __init__(self):
        self.id_user = dict()
        self.id_game = dict()
        self.id_item = dict()
        self.item_user = dict()
        self.user_item = dict()
        self.game_user = dict()
        self.user_game = dict()
        self.game_item = dict()
        self.item_game = dict()
        self.test2days_item_user = dict()
        self.test2days_user_item = dict()
        self.test4days_item_user = dict()
        self.test4days_user_item = dict()
        self.test10days_item_user = dict()
        self.test10days_user_item = dict()
        self.userlist = []
        self.selected_user_items = dict()

    def read_data(self, dirpath):
        with open(dirpath + "\id_user.txt") as udictfile:
            for line in udictfile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    self.id_user[toks[0]] = toks[1].replace(" ", "")    # {uid1: User1, uid2: User2....}

        print("#users", len(self.id_user))

        with open(dirpath + "\id_game.txt") as gdictfile:
            for line in gdictfile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    self.id_game[toks[0]] = toks[1].replace(" ", "")    # {33302: LoLsg, 33305: LoLmy....}

        print("#games", len(self.id_game))
        
        with open(dirpath + "\id_item.txt") as idictfile:
            for line in idictfile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    self.id_item[toks[0]] = toks[1].replace(" ", "")    # {itemid1: Item1, itemid2: Item2....}

        print("#items", len(self.id_item)) # comprehensive list of items

        with open(dirpath + "\game_user.txt") as gufile:  # data pulled from user game activity for 20 days
            for line in gufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    g, u = toks[0], toks[1]
                    if g not in self.game_user:
                        self.game_user[g] = []
                    self.game_user[g].append(u)   # {gameid1: [uid1,uid2,...], gameid2: [uid2,uid3,....]} = dict of list of users playing games
                    if u not in self.user_game:
                        self.user_game[u] = []
                    self.user_game[u].append(g)   # {uid1: [gameid1,gameid2,...], uid2: [gameid2,gameid3,....]} = dict of list of games played by each user
        
        with open(dirpath + "\item_user.txt") as iufile:  # list of all items purchased by users
            for line in iufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, u = toks[0], toks[1]
                    if i not in self.item_user:
                        self.item_user[i] = []
                    self.item_user[i].append(u)   # {itemid1: [uid1,uid2,...], itemid2: [uid2,uid3,....]} = dict of list of users purchasing each item 
                    if u not in self.user_item:
                        self.user_item[u] = []
                    self.user_item[u].append(i)   # {uid1: [itemid1,itemid2,...], uid2: [itemd2,itemd3,....]} = dict of list of items purchased by each user

        with open(dirpath + "\item_game.txt") as igfile:  # list of all in-game items
            for line in igfile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, g = toks[0], toks[1]
                    if i not in self.item_game:
                        self.item_game[i] = []  
                    self.item_game[i].append(g) # {itemid1: [gameid1,gameid2,...], itemid2: [gameid2,gameid3,....]} = list of items that got recommended. Every item can be in multiple games
                    if g not in self.game_item:
                        self.game_item[g] = []
                    self.game_item[g].append(i) # {gameid1: [itemid1,itemid2,...], gameid2: [itemd2,itemd3,....]} = list of items available in-game for purchase
        
        with open(dirpath + "/test2days_item_user.txt") as iufile:  # list of all items purchased by users in 2 days of testing period
            for line in iufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, u = toks[0], toks[1]
                    if i not in self.test2days_item_user:
                        self.test2days_item_user[i] = []
                    self.test2days_item_user[i].append(u)   # {itemid1: [uid1,uid2,...], itemid2: [uid2,uid3,....]} = dict of list of users purchasing each item 
                    if u not in self.test2days_user_item:
                        self.test2days_user_item[u] = []
                    self.test2days_user_item[u].append(i)   # {uid1: [itemid1,itemid2,...], uid2: [itemd2,itemd3,....]} = dict of list of items purchased by each user
        print("\nNo of users who bought items in first 2 days of test period:", len(self.test2days_user_item))
        
        with open(dirpath + "/test4days_item_user.txt") as iufile:  # list of all items purchased by users in 4 days of testing period
            for line in iufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, u = toks[0], toks[1]
                    if i not in self.test4days_item_user:
                        self.test4days_item_user[i] = []
                    self.test4days_item_user[i].append(u)   # {itemid1: [uid1,uid2,...], itemid2: [uid2,uid3,....]} = dict of list of users purchasing each item 
                    if u not in self.test4days_user_item:
                        self.test4days_user_item[u] = []
                    self.test4days_user_item[u].append(i)   # {uid1: [itemid1,itemid2,...], uid2: [itemd2,itemd3,....]} = dict of list of items purchased by each user
        print("No of users who bought items in first 4 days of test period:", len(self.test4days_user_item))
        
        with open(dirpath + "/test10days_item_user.txt") as iufile:  # list of all items purchased by users in 10 days of testing period
            for line in iufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, u = toks[0], toks[1]
                    if i not in self.test10days_item_user:
                        self.test10days_item_user[i] = []
                    self.test10days_item_user[i].append(u)   # {itemid1: [uid1,uid2,...], itemid2: [uid2,uid3,....]} = dict of list of users purchasing each item 
                    if u not in self.test10days_user_item:
                        self.test10days_user_item[u] = []
                    self.test10days_user_item[u].append(i)   # {uid1: [itemid1,itemid2,...], uid2: [itemd2,itemd3,....]} = dict of list of items purchased by each user
        print("No of users who bought items in first 10 days of test period:", len(self.test10days_user_item))
        
        with open(dirpath + "/selected_user.txt") as sufile:  # pull the randomly selected users
            for user in sufile:
                self.userlist.append(user.strip())
                              
    def read_LINE_output(self, outputfilename):
        self.user_vec = dict()
        with open(outputfilename) as nvfile:             
            for line in nvfile:
                toks = line.strip().split(" ")
                if len(toks) == 129:
                    node = toks[0]
                    del toks[0] 
                    self.user_vec[node] = toks    # {uid1: [user_vector_values], uid2: [user_vector_values]..}
        
    def get_cosine_similarity(self, vec1, vec2):
        a = np.array(vec1, dtype=float)
        b = np.array(vec2, dtype=float)
        dot_product = np.dot(a, b)
        norm_a = np.linalg.norm(a)
        norm_b = np.linalg.norm(b)
        return dot_product / (norm_a * norm_b)
        
    def make_item_recommendations_selected_users(self):
        top_5_user_recitems = dict()
        top_10_user_recitems = dict()
        top_15_user_recitems = dict()
        print("\nMaking Recommendations\n")
        print("#Number of users randomly selected:", len(self.userlist))
        for user1 in self.userlist:                             # iterating through 10,000 randomly selected users
            user1vec = self.user_vec.get(user1,[])              # getting the latent vector representation
            if (user1vec != []):
                self.user_simscore = {}
                self.user_rec = {}
                items1 = self.user_item.get(user1,[])           # getting the items purchased
                for user2 in self.user_item:                    # iterating through other users in the gaming network
                    user2vec = self.user_vec.get(user2,[])      # getting the latent vector representation
                    if (user2vec != []):
                        items2 = self.user_item[user2]          # getting the items purchased by these other users
                        if (user1 == user2): continue
                        cosine_sim_score = round(self.get_cosine_similarity(user1vec, user2vec), 3)  # getting user-user cosine similarity
                        self.user_simscore[user2] = cosine_sim_score                                 # storing similarity values 
                        rec_items = [item for item in items2 if item not in items1]                  # getting recommended items via each of these other users
                        self.user_rec[user2] = rec_items                                             # storing recommended items 
                self.sorted_user_simscore = sorted(self.user_simscore.items(), key=lambda x: x[1], reverse = True)
                top_5_user_recitems[user1] = self.get_top_k_user_items(k=5)
                top_10_user_recitems[user1] = self.get_top_k_user_items(k=10)
                top_15_user_recitems[user1] = self.get_top_k_user_items(k=15)
        sample_items = {k: top_5_user_recitems[k] for k in list(top_5_user_recitems)[:20]}
        print("\n")
        print("Accuracy for LINE Recommender System based on Top-5 similar users")
        self.get_accuracy_score(user_recitems = top_5_user_recitems)
        sample_items = {k: top_10_user_recitems[k] for k in list(top_10_user_recitems)[:20]}
        print("\n")
        print("Accuracy for LINE Recommender System based on Top-10 similar users")
        self.get_accuracy_score(user_recitems = top_10_user_recitems)
        sample_items = {k: top_15_user_recitems[k] for k in list(top_15_user_recitems)[:20]}
        print("\n")
        print("Accuracy for LINE Recommender System based on Top-15 similar users")
        self.get_accuracy_score(user_recitems = top_15_user_recitems)
        
    def get_top_k_user_items(self, k):
        top_k_users = [user for user,_ in self.sorted_user_simscore[0:k]]   # getting the top k most similar users to the focal user
        top_k_users_items = set()                                           # now we fetch the recommended items for these top users
        for user in top_k_users:
            items = self.user_rec[user]
            for item in items:
                top_k_users_items.add(item)
        return list(top_k_users_items)      
        
    def get_accuracy_score(self, user_recitems):
        count_2days = count_4days = count_10days = 0
        for user in user_recitems:
            recommended_items = set(user_recitems.get(user,[]))
            new_items_2days = set(self.test2days_user_item.get(user,[]))
            new_items_4days = set(self.test4days_user_item.get(user,[]))
            new_items_10days = set(self.test10days_user_item.get(user,[]))
            if len(recommended_items & new_items_2days) > 0:
                count_2days += 1
            if len(recommended_items & new_items_4days) > 0:
                count_4days += 1
            if len(recommended_items & new_items_10days) > 0:
                count_10days += 1
        print("For a 2-day testing period:", count_2days/10000)
        print("For a 4-day testing period:", count_4days/10000)
        print("For a 10-day testing period:", count_10days/10000)

dirpath = 'C:/Vidit/PhD/RA Work/KZ - RA/Gaming Project/data/Files'
outputfilename = 'C:/Vidit/PhD/RA Work/KZ - RA/Gaming Project/data/LINE/LINE_vec_2nd_wo_norm.txt'

def main():
    rvi = RecommendVirtualItems()
    rvi.read_data(dirpath)
    rvi.read_LINE_output(outputfilename)
    rvi.make_item_recommendations_selected_users()

if __name__ == "__main__":
    main()

#users 1496096
#games 22
#items 3825

No of users who bought items in first 2 days of test period: 2700
No of users who bought items in first 4 days of test period: 4999
No of users who bought items in first 10 days of test period: 10302

Making Recommendations

#Number of users randomly selected: 10000


Accuracy for LINE Recommender System based on Top-5 similar users
For a 2-day testing period: 0.0065
For a 4-day testing period: 0.0135
For a 10-day testing period: 0.0278


Accuracy for LINE Recommender System based on Top-10 similar users
For a 2-day testing period: 0.0077
For a 4-day testing period: 0.0154
For a 10-day testing period: 0.032


Accuracy for LINE Recommender System based on Top-15 similar users
For a 2-day testing period: 0.008
For a 4-day testing period: 0.016
For a 10-day testing period: 0.0335


In [3]:
import sys
import os
import random
from collections import Counter
import numpy as np

class RecommendVirtualItems:
    def __init__(self):
        self.id_user = dict()
        self.id_game = dict()
        self.id_item = dict()
        self.item_user = dict()
        self.user_item = dict()
        self.game_user = dict()
        self.user_game = dict()
        self.game_item = dict()
        self.item_game = dict()
        self.test2days_item_user = dict()
        self.test2days_user_item = dict()
        self.test4days_item_user = dict()
        self.test4days_user_item = dict()
        self.test10days_item_user = dict()
        self.test10days_user_item = dict()
        self.user_recitems = dict()
        self.nuser_recitems = dict()
        self.userlist = []
        self.selected_user_items = dict()

    def read_data(self, dirpath):
        with open(dirpath + "\id_user.txt") as udictfile:
            for line in udictfile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    self.id_user[toks[0]] = toks[1].replace(" ", "")    # {uid1: User1, uid2: User2....}

        print("#users", len(self.id_user))

        with open(dirpath + "\id_game.txt") as gdictfile:
            for line in gdictfile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    self.id_game[toks[0]] = toks[1].replace(" ", "")    # {33302: LoLsg, 33305: LoLmy....}

        print("#games", len(self.id_game))
        
        with open(dirpath + "\id_item.txt") as idictfile:
            for line in idictfile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    self.id_item[toks[0]] = toks[1].replace(" ", "")    # {itemid1: Item1, itemid2: Item2....}

        print("#items", len(self.id_item)) # comprehensive list of items

        with open(dirpath + "\game_user.txt") as gufile:  # data pulled from user game activity for 20 days
            for line in gufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    g, u = toks[0], toks[1]
                    if g not in self.game_user:
                        self.game_user[g] = []
                    self.game_user[g].append(u)   # {gameid1: [uid1,uid2,...], gameid2: [uid2,uid3,....]} = dict of list of users playing games
                    if u not in self.user_game:
                        self.user_game[u] = []
                    self.user_game[u].append(g)   # {uid1: [gameid1,gameid2,...], uid2: [gameid2,gameid3,....]} = dict of list of games played by each user
        
        with open(dirpath + "\item_user.txt") as iufile:  # list of all items purchased by users
            for line in iufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, u = toks[0], toks[1]
                    if i not in self.item_user:
                        self.item_user[i] = []
                    self.item_user[i].append(u)   # {itemid1: [uid1,uid2,...], itemid2: [uid2,uid3,....]} = dict of list of users purchasing each item 
                    if u not in self.user_item:
                        self.user_item[u] = []
                    self.user_item[u].append(i)   # {uid1: [itemid1,itemid2,...], uid2: [itemd2,itemd3,....]} = dict of list of items purchased by each user

        with open(dirpath + "\item_game.txt") as igfile:  # list of all in-game items
            for line in igfile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, g = toks[0], toks[1]
                    if i not in self.item_game:
                        self.item_game[i] = []  
                    self.item_game[i].append(g) # {itemid1: [gameid1,gameid2,...], itemid2: [gameid2,gameid3,....]} = list of items that got recommended. Every item can be in multiple games
                    if g not in self.game_item:
                        self.game_item[g] = []
                    self.game_item[g].append(i) # {gameid1: [itemid1,itemid2,...], gameid2: [itemd2,itemd3,....]} = list of items available in-game for purchase
        
        with open(dirpath + "/test2days_item_user.txt") as iufile:  # list of all items purchased by users in 2 days of testing period
            for line in iufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, u = toks[0], toks[1]
                    if i not in self.test2days_item_user:
                        self.test2days_item_user[i] = []
                    self.test2days_item_user[i].append(u)   # {itemid1: [uid1,uid2,...], itemid2: [uid2,uid3,....]} = dict of list of users purchasing each item 
                    if u not in self.test2days_user_item:
                        self.test2days_user_item[u] = []
                    self.test2days_user_item[u].append(i)   # {uid1: [itemid1,itemid2,...], uid2: [itemd2,itemd3,....]} = dict of list of items purchased by each user
        print("No of users who bought items in first 2 days of test period:", len(self.test2days_user_item))
        
        with open(dirpath + "/test4days_item_user.txt") as iufile:  # list of all items purchased by users in 4 days of testing period
            for line in iufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, u = toks[0], toks[1]
                    if i not in self.test4days_item_user:
                        self.test4days_item_user[i] = []
                    self.test4days_item_user[i].append(u)   # {itemid1: [uid1,uid2,...], itemid2: [uid2,uid3,....]} = dict of list of users purchasing each item 
                    if u not in self.test4days_user_item:
                        self.test4days_user_item[u] = []
                    self.test4days_user_item[u].append(i)   # {uid1: [itemid1,itemid2,...], uid2: [itemd2,itemd3,....]} = dict of list of items purchased by each user
        print("No of users who bought items in first 4 days of test period:", len(self.test4days_user_item))
        
        with open(dirpath + "/test10days_item_user.txt") as iufile:  # list of all items purchased by users in 10 days of testing period
            for line in iufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, u = toks[0], toks[1]
                    if i not in self.test10days_item_user:
                        self.test10days_item_user[i] = []
                    self.test10days_item_user[i].append(u)   # {itemid1: [uid1,uid2,...], itemid2: [uid2,uid3,....]} = dict of list of users purchasing each item 
                    if u not in self.test10days_user_item:
                        self.test10days_user_item[u] = []
                    self.test10days_user_item[u].append(i)   # {uid1: [itemid1,itemid2,...], uid2: [itemd2,itemd3,....]} = dict of list of items purchased by each user
        print("No of users who bought items in first 10 days of test period:", len(self.test10days_user_item))
        
        with open(dirpath + "/selected_user.txt") as sufile:  # pull the randomly selected users
            for user in sufile:
                self.userlist.append(user.strip())
                    
           
    def read_LINE_output(self, outputfilename):
        self.user_vec = dict()
        with open(outputfilename) as nvfile:             
            for line in nvfile:
                toks = line.strip().split(" ")
                if len(toks) == 129:
                    node = toks[0]
                    del toks[0] 
                    self.user_vec[node] = toks    # {uid1: [user_vector_values], uid2: [user_vector_values]..}
        sample_items = {k: self.user_vec[k] for k in list(self.user_vec)[:2]}
        print(sample_items)
        print("\n")
        
        
    def get_cosine_similarity(self, vec1, vec2):
        a = np.array(vec1, dtype=float)
        b = np.array(vec2, dtype=float)
        dot_product = np.dot(a, b)
        norm_a = np.linalg.norm(a)
        norm_b = np.linalg.norm(b)
        return dot_product / (norm_a * norm_b)
        
    def make_item_recommendations_selected_users(self):
        self.user_recitems = dict()
        print("#Number of users randomly selected:", len(self.userlist))
        print("\n")
        for user1 in self.userlist:                             # iterating through 10,000 randomly selected users
            user1vec = self.user_vec.get(user1,[])              # getting the latent vector representation
            if (user1vec != []):
                user_simscore = {}
                user_rec = {}
                items1 = self.user_item.get(user1,[])           # getting the items purchased
                for user2 in self.user_item:                    # iterating through other users in the gaming network
                    user2vec = self.user_vec.get(user2,[])      # getting the latent vector representation
                    if (user2vec != []):
                        items2 = self.user_item[user2]          # getting the items purchased by these other users
                        if (user1 == user2): continue
                        cosine_sim_score = round(self.get_cosine_similarity(user1vec, user2vec), 3)  # getting user-user cosine similarity
                        user_simscore[user2] = cosine_sim_score # storing similarity values 
                        rec_items = [item for item in items2 if item not in items1]  # getting recommended items via each of these other users
                        user_rec[user2] = rec_items             # storing recommended items 
                l = sorted(user_simscore.items(), key=lambda x: x[1], reverse = True)
                top_5_users = [user for user,_ in l[0:5]]       # getting the top 5 most similar users to the focal user
                top_5_users_items = set()                       # now we fetch the recommended items for these top users
                for user in top_5_users:
                    items = user_rec[user]
                    for item in items:
                        top_5_users_items.add(item)
                self.user_recitems[user1] = list(top_5_users_items)
        sample_items = {k: self.user_recitems[k] for k in list(self.user_recitems)[:20]}
        print(sample_items)
        print("\n")
        
    def get_accuracy_score(self):
        count_2days = count_4days = count_10days = 0
        for user in self.user_recitems:
            recommended_items = set(self.user_recitems.get(user,[]))
            new_items_2days = set(self.test2days_user_item.get(user,[]))
            new_items_4days = set(self.test4days_user_item.get(user,[]))
            new_items_10days = set(self.test10days_user_item.get(user,[]))
            if len(recommended_items & new_items_2days) > 0:
                count_2days += 1
            if len(recommended_items & new_items_4days) > 0:
                count_4days += 1
            if len(recommended_items & new_items_10days) > 0:
                count_10days += 1
        print("Accuracy for LINE based Recommender System")
        print("For a 2-day testing period:", count_2days/10000)
        print("For a 4-day testing period:", count_4days/10000)
        print("For a 10-day testing period:", count_10days/10000)

dirpath = 'C:/Vidit/PhD/RA Work/KZ - RA/Gaming Project/data/Files'
outputfilename = 'C:/Vidit/PhD/RA Work/KZ - RA/Gaming Project/data/LINE/LINE_vec_1st_wo_norm.txt'

def main():
    rvi = RecommendVirtualItems()
    rvi.read_data(dirpath)
    rvi.read_LINE_output(outputfilename)
    rvi.make_item_recommendations_selected_users()
    rvi.get_accuracy_score()

if __name__ == "__main__":
    main()

#users 1496096
#games 22
#items 3825
No of users who bought items in first 2 days of test period: 2700
No of users who bought items in first 4 days of test period: 4999
No of users who bought items in first 10 days of test period: 10302
{'42465': ['-0.044180', '-0.161388', '-0.102636', '-0.038985', '-0.043256', '-0.056325', '0.010789', '0.007799', '0.234772', '0.074731', '0.052724', '-0.072020', '0.109656', '0.006553', '0.016900', '0.022804', '-0.001478', '-0.163160', '-0.079894', '-0.081053', '0.093849', '0.032760', '0.204234', '0.038684', '-0.101360', '-0.144031', '-0.110028', '-0.115857', '-0.017826', '0.057286', '0.141551', '0.076690', '-0.060286', '0.076739', '-0.027286', '0.070308', '-0.125035', '-0.055693', '0.074181', '0.141232', '0.106361', '0.044840', '0.056446', '-0.119791', '0.040124', '-0.047916', '0.020764', '0.021422', '0.039043', '0.080577', '0.022586', '0.107153', '0.030985', '0.011960', '-0.026155', '-0.027500', '-0.026658', '0.030637', '-0.053504', '0.149140', '0.073

In [1]:
import sys
import os
import random
import numpy as np
from collections import Counter

class RecommendVirtualItems:
    def __init__(self):
        self.id_user = dict()
        self.user_id = dict()
        self.id_game = dict()
        self.game_id = dict()
        self.id_item = dict()
        self.item_id = dict()
        self.item_user = dict()
        self.user_item = dict()
        self.game_user = dict()
        self.user_game = dict()
        self.game_item = dict()
        self.item_game = dict()
        self.game_userlist = dict()
        self.user_gamelist = dict()
        self.game_itemlist = dict()
        self.item_gamelist = dict()
        self.user_itemlist = dict()
        self.item_userlist = dict()
        self.test2days_item_user = dict()
        self.test2days_user_item = dict()
        self.test4days_item_user = dict()
        self.test4days_user_item = dict()
        self.test10days_item_user = dict()
        self.test10days_user_item = dict()
        self.userlist = []
        self.star_items = []
        self.mid_items = []
        self.tail_items = []

    def read_data(self, dirpath):
        with open(dirpath + "/id_user.txt") as udictfile:
            for line in udictfile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    self.id_user[toks[0]] = toks[1].replace(" ", "")    # {uid1: User1, uid2: User2....}
                    self.user_id[toks[1]] = toks[0].replace(" ", "")    # {User1: uid1, User2: uid2....}

        print("#users", len(self.id_user))

        with open(dirpath + "/id_game.txt") as gdictfile:
            for line in gdictfile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    self.id_game[toks[0]] = toks[1].replace(" ", "")    # {33302: LoLsg, 33305: LoLmy....}
                    self.game_id[toks[1]] = toks[0].replace(" ", "")    # {LoLsg: 33302, LoLmy: 33305....}

        print("#games", len(self.id_game))
        
        with open(dirpath + "/id_item.txt") as idictfile:
            for line in idictfile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    self.id_item[toks[0]] = toks[1].replace(" ", "")    # {itemid1: Item1, itemid2: Item2....}
                    self.item_id[toks[1]] = toks[0].replace(" ", "")    # {Item1: itemid1, Item2: itemid2....}

        print("#items", len(self.id_item)) # comprehensive list of items

        with open(dirpath + "/game_user.txt") as gufile:  # data pulled from user game activity for 20 days
            for line in gufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    g, u = toks[0], toks[1]
                    if g not in self.game_user:
                        self.game_user[g] = []
                    self.game_user[g].append(u)   # {gameid1: [uid1,uid2,...], gameid2: [uid2,uid3,....]} = dict of list of users playing games
                    if u not in self.user_game:
                        self.user_game[u] = []
                    self.user_game[u].append(g)   # {uid1: [gameid1,gameid2,...], uid2: [gameid2,gameid3,....]} = dict of list of games played by each user
        
        
        with open(dirpath + "/item_user.txt") as iufile:  # list of all items purchased by users
            for line in iufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, u = toks[0], toks[1]
                    if i not in self.item_user:
                        self.item_user[i] = []
                    self.item_user[i].append(u)   # {itemid1: [uid1,uid2,...], itemid2: [uid2,uid3,....]} = dict of list of users purchasing each item 
                    if u not in self.user_item:
                        self.user_item[u] = []
                    self.user_item[u].append(i)   # {uid1: [itemid1,itemid2,...], uid2: [itemd2,itemd3,....]} = dict of list of items purchased by each user
        
        
        with open(dirpath + "/item_game.txt") as igfile:  # list of all in-game items
            for line in igfile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, g = toks[0], toks[1]
                    if i not in self.item_game:
                        self.item_game[i] = []  
                    self.item_game[i].append(g) # {itemid1: [gameid1,gameid2,...], itemid2: [gameid2,gameid3,....]} = list of items that got recommended. Every item can be in multiple games
                    if g not in self.game_item:
                        self.game_item[g] = []
                    self.game_item[g].append(i) # {gameid1: [itemid1,itemid2,...], gameid2: [itemd2,itemd3,....]} = list of items available in-game for purchase
        
        with open(dirpath + "/test2days_item_user.txt") as iufile:  # list of all items purchased by users in 2 days of testing period
            for line in iufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, u = toks[0], toks[1]
                    if i not in self.test2days_item_user:
                        self.test2days_item_user[i] = []
                    self.test2days_item_user[i].append(u)   # {itemid1: [uid1,uid2,...], itemid2: [uid2,uid3,....]} = dict of list of users purchasing each item 
                    if u not in self.test2days_user_item:
                        self.test2days_user_item[u] = []
                    self.test2days_user_item[u].append(i)   # {uid1: [itemid1,itemid2,...], uid2: [itemd2,itemd3,....]} = dict of list of items purchased by each user
        print("No of users who bought items in first 2 days of test period:", len(self.test2days_user_item))
        
        with open(dirpath + "/test4days_item_user.txt") as iufile:  # list of all items purchased by users in 4 days of testing period
            for line in iufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, u = toks[0], toks[1]
                    if i not in self.test4days_item_user:
                        self.test4days_item_user[i] = []
                    self.test4days_item_user[i].append(u)   # {itemid1: [uid1,uid2,...], itemid2: [uid2,uid3,....]} = dict of list of users purchasing each item 
                    if u not in self.test4days_user_item:
                        self.test4days_user_item[u] = []
                    self.test4days_user_item[u].append(i)   # {uid1: [itemid1,itemid2,...], uid2: [itemd2,itemd3,....]} = dict of list of items purchased by each user
        print("No of users who bought items in first 4 days of test period:", len(self.test4days_user_item))
        
        with open(dirpath + "/test10days_item_user.txt") as iufile:  # list of all items purchased by users in 10 days of testing period
            for line in iufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, u = toks[0], toks[1]
                    if i not in self.test10days_item_user:
                        self.test10days_item_user[i] = []
                    self.test10days_item_user[i].append(u)   # {itemid1: [uid1,uid2,...], itemid2: [uid2,uid3,....]} = dict of list of users purchasing each item 
                    if u not in self.test10days_user_item:
                        self.test10days_user_item[u] = []
                    self.test10days_user_item[u].append(i)   # {uid1: [itemid1,itemid2,...], uid2: [itemd2,itemd3,....]} = dict of list of items purchased by each user
        print("No of users who bought items in first 10 days of test period:", len(self.test10days_user_item))
        
        with open(dirpath + "/selected_user.txt") as sufile:  # pull the randomly selected users
            for user in sufile:
                self.userlist.append(user.strip())
                            
    def read_LINE_output(self, outputfilename):
        self.user_vec = dict()
        with open(outputfilename) as nvfile:             
            for line in nvfile:
                toks = line.strip().split(" ")
                if len(toks) == 129:
                    node = toks[0]
                    del toks[0] 
                    self.user_vec[node] = toks    # {uid1: [user_vector_values], uid2: [user_vector_values]..}
        
    def get_cosine_similarity(self, vec1, vec2):
        a = np.array(vec1, dtype=float)
        b = np.array(vec2, dtype=float)
        dot_product = np.dot(a, b)
        norm_a = np.linalg.norm(a)
        norm_b = np.linalg.norm(b)
        return dot_product / (norm_a * norm_b)
        
    def make_user_recommendations_selected_cluster(self, cluster_items):
        top_5_item_recusers = dict()
        top_10_item_recusers = dict()
        top_15_item_recusers = dict()
        top_20_item_recusers = dict()
        for item in cluster_items:
            user_simscore = {}
            temp_user_simscore = dict()
            users = self.item_user.get(item,[])
            if (users != []):
                for user1 in users:
                    user1vec = self.user_vec.get(user1,[])
                    items1 = self.user_item.get(user1,[])
                    for user2 in self.userlist:
                        user2vec = self.user_vec.get(user2,[])
                        items2 = self.user_item.get(user2,[])
                        if (user1 == user2): continue
                        if item not in items2:
                            if (user1vec != []) and (user2vec != []):
                                cosine_sim_score = round(self.get_cosine_similarity(user1vec, user2vec), 3)
                                if user2 not in temp_user_simscore:
                                    temp_user_simscore[user2] = []
                                temp_user_simscore[user2].append(cosine_sim_score)
                for user in temp_user_simscore:
                    cosine_sim_values = temp_user_simscore[user]
                    user_simscore[user] = max(cosine_sim_values)
            sorted_user_simscore = sorted(user_simscore.items(), key=lambda x: x[1], reverse = True)
            top_5_item_recusers[item] = [user for user,_ in sorted_user_simscore[0:5]]
            top_10_item_recusers[item] = [user for user,_ in sorted_user_simscore[0:10]]
            top_15_item_recusers[item] = [user for user,_ in sorted_user_simscore[0:15]]
            top_20_item_recusers[item] = [user for user,_ in sorted_user_simscore[0:20]]
        sample_users = {k: top_5_item_recusers[k] for k in list(top_5_item_recusers)[:20]}
        #print(sample_users)
        #print("\n")
        sample_users = {k: top_10_item_recusers[k] for k in list(top_10_item_recusers)[:20]}
        #print(sample_users)
        #print("\n")
        sample_users = {k: top_15_item_recusers[k] for k in list(top_15_item_recusers)[:20]}
        #print(sample_users)
        #print("\n")
        print("Accuracy for LINE Recommender System based on Top-5 similar users")
        self.get_accuracy_score(item_recusers = top_5_item_recusers, cluster_items = cluster_items)
        print("Accuracy for LINE Recommender System based on Top-10 similar users")
        self.get_accuracy_score(item_recusers = top_10_item_recusers, cluster_items = cluster_items)
        print("Accuracy for LINE Recommender System based on Top-15 similar users")
        self.get_accuracy_score(item_recusers = top_15_item_recusers, cluster_items = cluster_items)
        print("Accuracy for LINE Recommender System based on Top-20 similar users")
        self.get_accuracy_score(item_recusers = top_20_item_recusers, cluster_items = cluster_items)
    
        
    def get_accuracy_score(self, item_recusers, cluster_items):
        count_2days = count_4days = count_10days = 0
        for item in cluster_items:
            counter_2days = counter_4days = counter_10days = 0
            recommended_users = item_recusers.get(item,[])
            for user in recommended_users:
                new_items_2days = self.test2days_user_item.get(user,[])
                new_items_4days = self.test4days_user_item.get(user,[])
                new_items_10days = self.test10days_user_item.get(user,[])
                if item in new_items_2days:
                    counter_2days += 1
                if item in new_items_4days:
                    counter_4days += 1
                if item in new_items_10days:
                    counter_10days += 1        
            if (counter_2days > 0):
                count_2days += 1
            if (counter_4days > 0):
                count_4days += 1
            if (counter_10days > 0):
                count_10days += 1
        print("For a 2-day testing period:", count_2days/len(cluster_items))
        print("For a 4-day testing period:", count_4days/len(cluster_items))
        print("For a 10-day testing period:", count_10days/len(cluster_items))
        
    
    def make_item_recommendations(self):
        with open(dirpath + "/rank_item_quantity_price_sales.txt") as itemfile:  # pull the ranked items, and create clusters
            for line in itemfile:
                toks = line.strip().split("\t")
                if len(toks) == 5:
                    rank = int(toks[0])
                    if rank <= 40:
                        self.star_items.append(toks[1])
                    else:
                        self.tail_items.append(toks[1])
        print(self.star_items)
        print(self.tail_items)
        print("\nMaking Recommendations\n")
        print("\nAccuracy for Star Items\n")
        self.make_user_recommendations_selected_cluster(cluster_items = self.star_items)
        print("\nAccuracy for Long Tail Items\n")
        self.make_user_recommendations_selected_cluster(cluster_items = self.tail_items)
                                            
dirpath = 'C:/Vidit/PhD/RA Work/KZ - RA/Gaming Project/data/Files'
outputfilename = 'C:/Vidit/PhD/RA Work/KZ - RA/Gaming Project/data/LINE/LINE_vec_2nd_wo_norm.txt'


def main():
    rvi = RecommendVirtualItems()
    rvi.read_data(dirpath)
    rvi.read_LINE_output(outputfilename)
    rvi.make_item_recommendations()

if __name__ == "__main__":
    main()

#users 1496096
#games 22
#items 3825
No of users who bought items in first 2 days of test period: 2700
No of users who bought items in first 4 days of test period: 4999
No of users who bought items in first 10 days of test period: 10302
['lolvn002', 'lolvn001', 'lolvn003', 'loltw001', 'RP003', 'loltw003', 'RP002', 'loltw004', 'lolvn004', 'RP001', 'package_20', 'loltw005', 'tw_18_3', '89', 'loltw002', 'tw_18_2', 'tw_18_5', 'lolsam001', '9002', 'lolsam003', 'fo3vn003', 'RP004', 'FO3TH003', 'fo3vn004', 'tw_18_4', 'package_4', '9001', 'package_2', 'fo3vn002', 'lolsam004', 'tw_19_3', 'package_18', 'package_19', 'package_1', 'tw_19_2', '166', '145', 'fo3vn005', '9000', '33']
['43', '31', 'RP005', '237', 'HONLUCK1', '57', '2', 'lolsam006', 'lolsam002', '239', 'FO3TH004', 'FO3TH006', 'tw_19_5', 'lolvn005', '157', 'pbth001', '159', '205', 'lolth004', '228', '280', 'tw_19_4', 'lolth001', 'lolsam005', '138', 'lolth003', 'tw_18_1', 'tw_15_5', '34', 'tw_15_4', '188', '25', '192', '200', 'FO3TH005',

In [1]:
import sys
import os
import random
from collections import Counter
import numpy as np

class RecommendVirtualItems:
    def __init__(self):
        self.id_user = dict()
        self.id_game = dict()
        self.id_item = dict()
        self.item_user = dict()
        self.user_item = dict()
        self.game_user = dict()
        self.user_game = dict()
        self.game_item = dict()
        self.item_game = dict()
        self.test2days_item_user = dict()
        self.test2days_user_item = dict()
        self.test4days_item_user = dict()
        self.test4days_user_item = dict()
        self.test10days_item_user = dict()
        self.test10days_user_item = dict()
        self.userlist = []
        self.selected_user_items = dict()
        self.star_items = []
        self.tail_items = []
        self.all_items = []

    def read_data(self, dirpath):
        with open(dirpath + "\id_user.txt") as udictfile:
            for line in udictfile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    self.id_user[toks[0]] = toks[1].replace(" ", "")    # {uid1: User1, uid2: User2....}

        print("#users", len(self.id_user))

        with open(dirpath + "\id_game.txt") as gdictfile:
            for line in gdictfile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    self.id_game[toks[0]] = toks[1].replace(" ", "")    # {33302: LoLsg, 33305: LoLmy....}

        print("#games", len(self.id_game))
        
        with open(dirpath + "\id_item.txt") as idictfile:
            for line in idictfile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    self.id_item[toks[0]] = toks[1].replace(" ", "")    # {itemid1: Item1, itemid2: Item2....}

        print("#items", len(self.id_item)) # comprehensive list of items

        with open(dirpath + "\game_user.txt") as gufile:  # data pulled from user game activity for 20 days
            for line in gufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    g, u = toks[0], toks[1]
                    if g not in self.game_user:
                        self.game_user[g] = []
                    self.game_user[g].append(u)   # {gameid1: [uid1,uid2,...], gameid2: [uid2,uid3,....]} = dict of list of users playing games
                    if u not in self.user_game:
                        self.user_game[u] = []
                    self.user_game[u].append(g)   # {uid1: [gameid1,gameid2,...], uid2: [gameid2,gameid3,....]} = dict of list of games played by each user
        
        with open(dirpath + "\item_user.txt") as iufile:  # list of all items purchased by users
            for line in iufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, u = toks[0], toks[1]
                    if i not in self.item_user:
                        self.item_user[i] = []
                    self.item_user[i].append(u)   # {itemid1: [uid1,uid2,...], itemid2: [uid2,uid3,....]} = dict of list of users purchasing each item 
                    if u not in self.user_item:
                        self.user_item[u] = []
                    self.user_item[u].append(i)   # {uid1: [itemid1,itemid2,...], uid2: [itemd2,itemd3,....]} = dict of list of items purchased by each user

        with open(dirpath + "\item_game.txt") as igfile:  # list of all in-game items
            for line in igfile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, g = toks[0], toks[1]
                    if i not in self.item_game:
                        self.item_game[i] = []  
                    self.item_game[i].append(g) # {itemid1: [gameid1,gameid2,...], itemid2: [gameid2,gameid3,....]} = list of items that got recommended. Every item can be in multiple games
                    if g not in self.game_item:
                        self.game_item[g] = []
                    self.game_item[g].append(i) # {gameid1: [itemid1,itemid2,...], gameid2: [itemd2,itemd3,....]} = list of items available in-game for purchase
        
        with open(dirpath + "/test2days_item_user.txt") as iufile:  # list of all items purchased by users in 2 days of testing period
            for line in iufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, u = toks[0], toks[1]
                    if i not in self.test2days_item_user:
                        self.test2days_item_user[i] = []
                    self.test2days_item_user[i].append(u)   # {itemid1: [uid1,uid2,...], itemid2: [uid2,uid3,....]} = dict of list of users purchasing each item 
                    if u not in self.test2days_user_item:
                        self.test2days_user_item[u] = []
                    self.test2days_user_item[u].append(i)   # {uid1: [itemid1,itemid2,...], uid2: [itemd2,itemd3,....]} = dict of list of items purchased by each user
        print("\nNo of users who bought items in first 2 days of test period:", len(self.test2days_user_item))
        
        with open(dirpath + "/test4days_item_user.txt") as iufile:  # list of all items purchased by users in 4 days of testing period
            for line in iufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, u = toks[0], toks[1]
                    if i not in self.test4days_item_user:
                        self.test4days_item_user[i] = []
                    self.test4days_item_user[i].append(u)   # {itemid1: [uid1,uid2,...], itemid2: [uid2,uid3,....]} = dict of list of users purchasing each item 
                    if u not in self.test4days_user_item:
                        self.test4days_user_item[u] = []
                    self.test4days_user_item[u].append(i)   # {uid1: [itemid1,itemid2,...], uid2: [itemd2,itemd3,....]} = dict of list of items purchased by each user
        print("No of users who bought items in first 4 days of test period:", len(self.test4days_user_item))
        
        with open(dirpath + "/test10days_item_user.txt") as iufile:  # list of all items purchased by users in 10 days of testing period
            for line in iufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, u = toks[0], toks[1]
                    if i not in self.test10days_item_user:
                        self.test10days_item_user[i] = []
                    self.test10days_item_user[i].append(u)   # {itemid1: [uid1,uid2,...], itemid2: [uid2,uid3,....]} = dict of list of users purchasing each item 
                    if u not in self.test10days_user_item:
                        self.test10days_user_item[u] = []
                    self.test10days_user_item[u].append(i)   # {uid1: [itemid1,itemid2,...], uid2: [itemd2,itemd3,....]} = dict of list of items purchased by each user
        print("No of users who bought items in first 10 days of test period:", len(self.test10days_user_item))
        
        with open(dirpath + "/selected_user.txt") as sufile:  # pull the randomly selected users
            for user in sufile:
                self.userlist.append(user.strip())
                              
    def read_LINE_output(self, outputfilename):
        self.user_vec = dict()
        with open(outputfilename) as nvfile:             
            for line in nvfile:
                toks = line.strip().split(" ")
                if len(toks) == 129:
                    node = toks[0]
                    del toks[0] 
                    self.user_vec[node] = toks    # {uid1: [user_vector_values], uid2: [user_vector_values]..}
        
    def get_cosine_similarity(self, vec1, vec2):
        a = np.array(vec1, dtype=float)
        b = np.array(vec2, dtype=float)
        dot_product = np.dot(a, b)
        norm_a = np.linalg.norm(a)
        norm_b = np.linalg.norm(b)
        return dot_product / (norm_a * norm_b)
        
    def make_user_recommendations_selected_cluster(self, cluster_items):
        top_5_user_recitems = dict()
        top_10_user_recitems = dict()
        top_15_user_recitems = dict()
        top_20_user_recitems = dict()
        print("\nMaking Recommendations")
        print("#Number of users randomly selected:", len(self.userlist))
        for user1 in self.userlist:                             # iterating through 10,000 randomly selected users
            user1vec = self.user_vec.get(user1,[])              # getting the latent vector representation
            if (user1vec != []):
                self.user_simscore = {}
                self.user_rec = {}
                items1 = self.user_item.get(user1,[])           # getting the items purchased
                for user2 in self.user_item:                    # iterating through other users in the gaming network
                    user2vec = self.user_vec.get(user2,[])      # getting the latent vector representation
                    if (user2vec != []):
                        items2 = self.user_item[user2]          # getting the items purchased by these other users
                        if (user1 == user2): continue
                        rec_items = []
                        cosine_sim_score = round(self.get_cosine_similarity(user1vec, user2vec), 3)  # getting user-user cosine similarity
                        self.user_simscore[user2] = cosine_sim_score                                 # storing similarity values 
                        for item in cluster_items:
                            if item not in items1:
                                if item in items2:
                                    rec_items.append(item)
                        self.user_rec[user2] = rec_items                                             # storing recommended items 
                self.sorted_user_simscore = sorted(self.user_simscore.items(), key=lambda x: x[1], reverse = True)
                top_5_user_recitems[user1] = self.get_top_k_user_items(k=5)
                top_10_user_recitems[user1] = self.get_top_k_user_items(k=10)
                top_15_user_recitems[user1] = self.get_top_k_user_items(k=15)
                top_20_user_recitems[user1] = self.get_top_k_user_items(k=20)
        print("\n")
        print("Accuracy for LINE Recommender System based on Top-5 similar users")
        self.get_accuracy_score(user_recitems = top_5_user_recitems)
        print("\n")
        print("Accuracy for LINE Recommender System based on Top-10 similar users")
        self.get_accuracy_score(user_recitems = top_10_user_recitems)
        print("\n")
        print("Accuracy for LINE Recommender System based on Top-15 similar users")
        self.get_accuracy_score(user_recitems = top_15_user_recitems)
        print("\n")
        print("Accuracy for LINE Recommender System based on Top-20 similar users")
        self.get_accuracy_score(user_recitems = top_20_user_recitems)
        print("\n")
        
    def get_top_k_user_items(self, k):
        top_k_users = [user for user,_ in self.sorted_user_simscore[0:k]]   # getting the top k most similar users to the focal user
        top_k_users_items = set()                                           # now we fetch the recommended items for these top users
        for user in top_k_users:
            items = self.user_rec[user]
            for item in items:
                top_k_users_items.add(item)
        return list(top_k_users_items)      
        
    def get_accuracy_score(self, user_recitems):
        count_2days = count_4days = count_10days = 0
        for user in user_recitems:
            recommended_items = set(user_recitems.get(user,[]))
            new_items_2days = set(self.test2days_user_item.get(user,[]))
            new_items_4days = set(self.test4days_user_item.get(user,[]))
            new_items_10days = set(self.test10days_user_item.get(user,[]))
            if len(recommended_items & new_items_2days) > 0:
                count_2days += 1
            if len(recommended_items & new_items_4days) > 0:
                count_4days += 1
            if len(recommended_items & new_items_10days) > 0:
                count_10days += 1
        print("For a 2-day testing period:", count_2days/10000)
        print("For a 4-day testing period:", count_4days/10000)
        print("For a 10-day testing period:", count_10days/10000)
        
    def make_item_recommendations(self):
        with open(dirpath + "/rank_item_quantity_price_sales.txt") as itemfile:  # pull the ranked items, and create clusters
            for line in itemfile:
                toks = line.strip().split("\t")
                if len(toks) == 5:
                    self.all_items.append(toks[1])
                    rank = int(toks[0])
                    if rank <= 40:
                        self.star_items.append(toks[1])
                    else:
                        self.tail_items.append(toks[1])
        #print(self.star_items)
        #print(self.tail_items)
        #print(self.all_items)
        print("\nMaking Recommendations\n")
        print("\nAccuracy for Star Items\n")
        self.make_user_recommendations_selected_cluster(cluster_items = self.star_items)
        print("\nAccuracy for Long Tail Items\n")
        self.make_user_recommendations_selected_cluster(cluster_items = self.tail_items)

dirpath = 'C:/Vidit/PhD/RA Work/KZ - RA/Gaming Project/data/Files'
outputfilename = 'C:/Vidit/PhD/RA Work/KZ - RA/Gaming Project/data/LINE/LINE_vec_2nd_wo_norm.txt'

def main():
    rvi = RecommendVirtualItems()
    rvi.read_data(dirpath)
    rvi.read_LINE_output(outputfilename)
    rvi.make_item_recommendations()

if __name__ == "__main__":
    main()

#users 1496096
#games 22
#items 3825

No of users who bought items in first 2 days of test period: 2700
No of users who bought items in first 4 days of test period: 4999
No of users who bought items in first 10 days of test period: 10302

Making Recommendations


Accuracy for Star Items


Making Recommendations
#Number of users randomly selected: 10000


Accuracy for LINE Recommender System based on Top-5 similar users
For a 2-day testing period: 0.0058
For a 4-day testing period: 0.0127
For a 10-day testing period: 0.0264


Accuracy for LINE Recommender System based on Top-10 similar users
For a 2-day testing period: 0.007
For a 4-day testing period: 0.0147
For a 10-day testing period: 0.0303


Accuracy for LINE Recommender System based on Top-15 similar users
For a 2-day testing period: 0.0072
For a 4-day testing period: 0.0149
For a 10-day testing period: 0.0316


Accuracy for LINE Recommender System based on Top-20 similar users
For a 2-day testing period: 0.0074
For a 4-day testin