In [1]:
import sys
import os
import random
from collections import Counter
import pandas as pd

class CommunityNetworkGenerator:
    def __init__(self):
        self.group_user = dict()
        self.user_group = dict()
        self.item_user = dict()
        self.user_item = dict()
        self.test2days_item_user = dict()
        self.test2days_user_item = dict()
        self.test4days_item_user = dict()
        self.test4days_user_item = dict()
        self.test10days_item_user = dict()
        self.test10days_user_item = dict()
        self.user_groupmember = dict()
        self.user_recitems = dict()
        self.nuser_recitems = dict()
        self.userlist = []
        self.selected_user_items = dict()

    def read_data(self, dirpath):
        with open(dirpath + "/item_user.txt") as iufile:  # list of all items purchased by users
            for line in iufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, u = toks[0], toks[1]
                    if i not in self.item_user:
                        self.item_user[i] = []
                    self.item_user[i].append(u)   # {itemid1: [uid1,uid2,...], itemid2: [uid2,uid3,....]} = dict of list of users purchasing each item 
                    if u not in self.user_item:
                        self.user_item[u] = []
                    self.user_item[u].append(i)   # {uid1: [itemid1,itemid2,...], uid2: [itemd2,itemd3,....]} = dict of list of items purchased by each user

        with open(dirpath + "/test2days_item_user.txt") as iufile:  # list of all items purchased by users in 2 days of testing period
            for line in iufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, u = toks[0], toks[1]
                    if i not in self.test2days_item_user:
                        self.test2days_item_user[i] = []
                    self.test2days_item_user[i].append(u)   # {itemid1: [uid1,uid2,...], itemid2: [uid2,uid3,....]} = dict of list of users purchasing each item 
                    if u not in self.test2days_user_item:
                        self.test2days_user_item[u] = []
                    self.test2days_user_item[u].append(i)   # {uid1: [itemid1,itemid2,...], uid2: [itemd2,itemd3,....]} = dict of list of items purchased by each user
        print("No of users who bought items in first 2 days of test period:", len(self.test2days_user_item))
        
        with open(dirpath + "/test4days_item_user.txt") as iufile:  # list of all items purchased by users in 4 days of testing period
            for line in iufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, u = toks[0], toks[1]
                    if i not in self.test4days_item_user:
                        self.test4days_item_user[i] = []
                    self.test4days_item_user[i].append(u)   # {itemid1: [uid1,uid2,...], itemid2: [uid2,uid3,....]} = dict of list of users purchasing each item 
                    if u not in self.test4days_user_item:
                        self.test4days_user_item[u] = []
                    self.test4days_user_item[u].append(i)   # {uid1: [itemid1,itemid2,...], uid2: [itemd2,itemd3,....]} = dict of list of items purchased by each user
        print("No of users who bought items in first 4 days of test period:", len(self.test4days_user_item))
        
        with open(dirpath + "/test10days_item_user.txt") as iufile:  # list of all items purchased by users in 10 days of testing period
            for line in iufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, u = toks[0], toks[1]
                    if i not in self.test10days_item_user:
                        self.test10days_item_user[i] = []
                    self.test10days_item_user[i].append(u)   # {itemid1: [uid1,uid2,...], itemid2: [uid2,uid3,....]} = dict of list of users purchasing each item 
                    if u not in self.test10days_user_item:
                        self.test10days_user_item[u] = []
                    self.test10days_user_item[u].append(i)   # {uid1: [itemid1,itemid2,...], uid2: [itemd2,itemd3,....]} = dict of list of items purchased by each user
        print("No of users who bought items in first 10 days of test period:", len(self.test10days_user_item))
        
        with open(dirpath + "/group_user.txt") as gufile:  # list of all in-game groups
            for line in gufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    g, u = toks[0], toks[1]
                    if g not in self.group_user:
                        self.group_user[g] = []
                    self.group_user[g].append(u)   # {groupid1: [uid1,uid2,...], groupid2: [uid2,uid3,....]} = dict of each group and its members
                    if u not in self.user_group:
                        self.user_group[u] = []
                    self.user_group[u].append(g)   # {uid1: [groupid1,groupid2,...], uid2: [groupid2,groupid3,....]} = dict of list of groups for each user
     
        with open(dirpath + "/selected_user.txt") as sufile:  # pull the randomly selected users
            for user in sufile:
                self.userlist.append(user.strip())

    
    def generate_community_input(self, inputfilename):
        outfile = open(inputfilename, 'w')
        ug = self.user_group.items()
        users = [x for x,_ in ug]
        print("#Number of users in consideration set:", len(users))
        for i in range(0, len(users)-1):                      
            user1 = users[i] 
            groups1 = set(self.user_group[user1])
            for j in range(i+1, len(users)):
                user2 = users[j]
                groups2 = set(self.user_group[user2])
                shared_groups = len(groups1 & groups2)
                if (shared_groups > 0):
                    outline1 = user1 + " " + user2 + " " + str(shared_groups)
                    outfile.write(outline1 + "\n")
        outfile.close()
        
    def read_community_input(self, inputfilename):
        with open(inputfilename) as ugwfile:             # list of all user-user community linkages
            for line in ugwfile:
                toks = line.strip().split(" ")
                if len(toks) == 3:
                    u, g, w = toks[0], toks[1], toks[2]
                    if u not in self.user_groupmember:
                        self.user_groupmember[u] = []
                    self.user_groupmember[u].append(g)   # {uid1: [memberid1,memberid2,...], uid2: [memberid2,memberid3,....]} = dict of each user's group mates
        
    def get_community_item_recommendations(self):
        print("#Number of users in consideration set:", len(self.user_groupmember))
        print("\n")
        for user in self.user_groupmember:                                # iterating through users
            members = self.user_groupmember[user]                         # fetching members from all shared groups of the iterated user
            for member in members:                                        # iterating through each member the user shares a group with
                items = self.user_item.get(member,[])                     # fetching items bought by the particular iterated member
                if len(items) > 0:
                    for item in items:                                    # iterating through items of the member
                        if item not in self.user_item.get(user,[]):       # if the item purchased by member has not already been purchased by the user, recommend the item to him
                            if user not in self.user_recitems:
                                self.user_recitems[user] = []
                            if item not in self.user_recitems[user]:      # to make sure duplicate items don't get recommended from multiple group members
                                self.user_recitems[user].append(item)     # {uid1: [itemid1,itemid2,...], uid2: [itemd2,itemd3,....]} = dict of list of users with recommended items that their community/group members purchased 
        sample_items = {k: self.user_recitems[k] for k in list(self.user_recitems)[:20]}
        print(sample_items)
        print("\n")
        print("#Number of users in training set for whom items are recommended:", len(self.user_recitems))
        print("\n")
    
    def get_item_recommendations_selected_users(self):
        for user in self.userlist:
            self.selected_user_items[user] = self.user_recitems.get(user,[])
        selected_user_items = {k: self.selected_user_items[k] for k in list(self.selected_user_items)[:20]}
        print(selected_user_items)
        print("\n")
        users_with_items = [user for user in self.selected_user_items if len(self.selected_user_items[user]) > 0]
        print("#Users in the random set for whom items are recommended based on training data:", len(users_with_items))
        print("\n")
        
    def get_accuracy_score(self):
        count_2days = count_4days = count_10days = 0
        for user in self.selected_user_items:
            recommended_items = set(self.selected_user_items[user])
            new_items_2days = set(self.test2days_user_item.get(user,[]))
            new_items_4days = set(self.test4days_user_item.get(user,[]))
            new_items_10days = set(self.test10days_user_item.get(user,[]))
            if len(recommended_items & new_items_2days) > 0:
                count_2days += 1
            if len(recommended_items & new_items_4days) > 0:
                count_4days += 1
            if len(recommended_items & new_items_10days) > 0:
                count_10days += 1
        print("Accuracy for Community-based Recommender System")
        print("For a 2-day testing period:", count_2days/10000)
        print("For a 4-day testing period:", count_4days/10000)
        print("For a 10-day testing period:", count_10days/10000)
        
    def get_alternate_selected_user_item_recommendations(self):
        print("\nAlternate Approach\n")
        print("#Number of users randomly selected:", len(self.userlist))
        print("\n")
        for user in self.userlist:                                        # iterating through users
            members = self.user_groupmember.get(user,[])                  # fetching members from all shared groups of the iterated user
            mitems = set()
            for member in members:                                        # iterating through each member the user shares a group with
                memitems = self.user_item.get(member,[])                  # fetching items bought by the particular iterated member                     
                if len(memitems) > 0:
                    for item in memitems:                                    # iterating through items of the member
                        mitems.add(item)
            uitems = self.user_item.get(user,[])
            if user not in self.nuser_recitems:
                self.nuser_recitems[user] = []
            for item in mitems:
                if item not in uitems:                                    # if the item purchased by member has not already been purchased by the user, recommend the item to him   
                    self.nuser_recitems[user].append(item)                 # {uid1: [itemid1,itemid2,...], uid2: [itemd2,itemd3,....]} = dict of list of users with recommended items that their community/group members purchased    
        nsample_items = {k: self.nuser_recitems[k] for k in list(self.nuser_recitems)[:20]}
        print(nsample_items)
        print("\n")
        users_with_items = [user for user in self.nuser_recitems if len(self.nuser_recitems[user]) > 0]
        print("#Users in the random set for whom items are recommended based on training data:", len(users_with_items))
        print("\n")
        
    def get_alternate_accuracy_score(self):
        count_2days = count_4days = count_10days = 0
        for user in self.nuser_recitems:
            recommended_items = set(self.nuser_recitems[user])
            new_items_2days = set(self.test2days_user_item.get(user,[]))
            new_items_4days = set(self.test4days_user_item.get(user,[]))
            new_items_10days = set(self.test10days_user_item.get(user,[]))
            if len(recommended_items & new_items_2days) > 0:
                count_2days += 1
            if len(recommended_items & new_items_4days) > 0:
                count_4days += 1
            if len(recommended_items & new_items_10days) > 0:
                count_10days += 1
        print("Accuracy for Community-based Recommender System")
        print("For a 2-day testing period:", count_2days/10000)
        print("For a 4-day testing period:", count_4days/10000)
        print("For a 10-day testing period:", count_10days/10000)
        

dirpath = 'C:/Vidit/PhD/RA Work/KZ - RA/Gaming Project/data/Files'
inputfilename = 'C:/Vidit/PhD/RA Work/KZ - RA/Gaming Project/data/Community Based Recommendation/community_input_file.txt'  


def main():
    cng = CommunityNetworkGenerator()
    cng.read_data(dirpath)
    #cng.generate_community_input(inputfilename)
    cng.read_community_input(inputfilename)
    cng.get_community_item_recommendations()
    cng.get_item_recommendations_selected_users()
    cng.get_accuracy_score()
    cng.get_alternate_selected_user_item_recommendations()
    cng.get_alternate_accuracy_score()
    print("\nSuccess")


if __name__ == "__main__":
    main()

No of users who bought items in first 2 days of test period: 2700
No of users who bought items in first 4 days of test period: 4999
No of users who bought items in first 10 days of test period: 10302
#Number of users in consideration set: 165873


{'1496154': ['loltw001'], '1441473': ['loltw001'], '10660742': ['166', '2'], '2976249': ['166', '2', 'lolsam007'], '9153571': ['2', '226', '237'], '201775287': ['166', '2'], '8268906': ['166', '2'], '69144531': ['166', '2'], '74013761': ['166', '2'], '3692771': ['HONLUCK1', 'package_20', 'lolvn001'], '2121038': ['package_20', 'lolvn001'], '86841579': ['package_20'], '4326528': ['package_20', 'HONLUCK1', 'lolvn001'], '9974489': ['RP001'], '7026125': ['lolvn001'], '9839792': ['lolvn001'], '131839995': ['lolvn002', 'lolvn001', '13036'], '7044029': ['package_20', 'lolvn001', '145', '222', '237', '31', '76', '89'], '27825837': ['lolvn001'], '86285373': ['package_20', 'lolvn001']}


#Number of users in training set for whom items are recommended: 1

In [1]:
import sys
import os
import random
from collections import Counter
import pandas as pd

class CommunityNetworkGenerator:
    def __init__(self):
        self.group_user = dict()
        self.user_group = dict()
        self.item_user = dict()
        self.user_item = dict()
        self.test2days_item_user = dict()
        self.test2days_user_item = dict()
        self.test4days_item_user = dict()
        self.test4days_user_item = dict()
        self.test10days_item_user = dict()
        self.test10days_user_item = dict()
        self.user_groupmember = dict()
        self.userlist = []

    def read_data(self, dirpath):
        with open(dirpath + "/item_user.txt") as iufile:  # list of all items purchased by users
            for line in iufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, u = toks[0], toks[1]
                    if i not in self.item_user:
                        self.item_user[i] = []
                    self.item_user[i].append(u)   # {itemid1: [uid1,uid2,...], itemid2: [uid2,uid3,....]} = dict of list of users purchasing each item 
                    if u not in self.user_item:
                        self.user_item[u] = []
                    self.user_item[u].append(i)   # {uid1: [itemid1,itemid2,...], uid2: [itemd2,itemd3,....]} = dict of list of items purchased by each user

        with open(dirpath + "/test2days_item_user.txt") as iufile:  # list of all items purchased by users in 2 days of testing period
            for line in iufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, u = toks[0], toks[1]
                    if i not in self.test2days_item_user:
                        self.test2days_item_user[i] = []
                    self.test2days_item_user[i].append(u)   # {itemid1: [uid1,uid2,...], itemid2: [uid2,uid3,....]} = dict of list of users purchasing each item 
                    if u not in self.test2days_user_item:
                        self.test2days_user_item[u] = []
                    self.test2days_user_item[u].append(i)   # {uid1: [itemid1,itemid2,...], uid2: [itemd2,itemd3,....]} = dict of list of items purchased by each user
        print("No of users who bought items in first 2 days of test period:", len(self.test2days_user_item))
        
        with open(dirpath + "/test4days_item_user.txt") as iufile:  # list of all items purchased by users in 4 days of testing period
            for line in iufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, u = toks[0], toks[1]
                    if i not in self.test4days_item_user:
                        self.test4days_item_user[i] = []
                    self.test4days_item_user[i].append(u)   # {itemid1: [uid1,uid2,...], itemid2: [uid2,uid3,....]} = dict of list of users purchasing each item 
                    if u not in self.test4days_user_item:
                        self.test4days_user_item[u] = []
                    self.test4days_user_item[u].append(i)   # {uid1: [itemid1,itemid2,...], uid2: [itemd2,itemd3,....]} = dict of list of items purchased by each user
        print("No of users who bought items in first 4 days of test period:", len(self.test4days_user_item))
        
        with open(dirpath + "/test10days_item_user.txt") as iufile:  # list of all items purchased by users in 10 days of testing period
            for line in iufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, u = toks[0], toks[1]
                    if i not in self.test10days_item_user:
                        self.test10days_item_user[i] = []
                    self.test10days_item_user[i].append(u)   # {itemid1: [uid1,uid2,...], itemid2: [uid2,uid3,....]} = dict of list of users purchasing each item 
                    if u not in self.test10days_user_item:
                        self.test10days_user_item[u] = []
                    self.test10days_user_item[u].append(i)   # {uid1: [itemid1,itemid2,...], uid2: [itemd2,itemd3,....]} = dict of list of items purchased by each user
        print("No of users who bought items in first 10 days of test period:", len(self.test10days_user_item))
        
        with open(dirpath + "/group_user.txt") as gufile:  # list of all in-game groups
            for line in gufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    g, u = toks[0], toks[1]
                    if g not in self.group_user:
                        self.group_user[g] = []
                    self.group_user[g].append(u)   # {groupid1: [uid1,uid2,...], groupid2: [uid2,uid3,....]} = dict of each group and its members
                    if u not in self.user_group:
                        self.user_group[u] = []
                    self.user_group[u].append(g)   # {uid1: [groupid1,groupid2,...], uid2: [groupid2,groupid3,....]} = dict of list of groups for each user
     
        with open(dirpath + "/selected_user.txt") as sufile:  # pull the randomly selected users
            for user in sufile:
                self.userlist.append(user.strip())

    
    def generate_community_input(self, inputfilename):
        outfile = open(inputfilename, 'w')
        ug = self.user_group.items()
        users = [x for x,_ in ug]
        print("#Number of users in consideration set:", len(users))
        for i in range(0, len(users)-1):                      
            user1 = users[i] 
            groups1 = set(self.user_group[user1])
            for j in range(i+1, len(users)):
                user2 = users[j]
                groups2 = set(self.user_group[user2])
                shared_groups = len(groups1 & groups2)
                if (shared_groups > 0):
                    outline1 = user1 + " " + user2 + " " + str(shared_groups)
                    outfile.write(outline1 + "\n")
        outfile.close()
        
    def read_community_input(self, inputfilename):
        with open(inputfilename) as ugwfile:             # list of all user-user community linkages
            for line in ugwfile:
                toks = line.strip().split(" ")
                if len(toks) == 3:
                    u, g, w = toks[0], toks[1], toks[2]
                    if u not in self.user_groupmember:
                        self.user_groupmember[u] = []
                    self.user_groupmember[u].append(g)   # {uid1: [memberid1,memberid2,...], uid2: [memberid2,memberid3,....]} = dict of each user's group mates
        
    def get_community_item_recommendations(self):
        top_5_user_recitems = dict()
        top_10_user_recitems = dict()
        top_15_user_recitems = dict()
        top_20_user_recitems = dict()
        print("\nMaking Recommendations\n")
        print("#Number of users randomly selected:", len(self.userlist))
        for user1 in self.userlist:                                                                                
            self.user_simscore = {}
            self.user_rec = {}
            items1 = self.user_item.get(user1,[])                                                               
            groups1 = set(self.user_group.get(user1,[]))
            if len(groups1) > 0:
                for group in groups1:
                    users = self.group_user[group]
                    for user2 in users:
                        if (user1 == user2): continue
                        items2 = self.user_item.get(user2,[])
                        if len(items2) > 0:
                            groups2 = set(self.user_group.get(user2,[]))                                                                
                            mutual_groups = len(groups1 & groups2)
                            total_groups = len(groups1 | groups2)
                            user_similarity_score = mutual_groups/total_groups
                            rec_items = [item for item in items2 if item not in items1]                                 
                            self.user_rec[user2] = rec_items
                            self.user_simscore[user2] = user_similarity_score
                self.sorted_user_simscore = sorted(self.user_simscore.items(), key=lambda x: x[1], reverse = True)
                top_5_user_recitems[user1] = self.get_top_k_user_items(k=5)
                top_10_user_recitems[user1] = self.get_top_k_user_items(k=10)
                top_15_user_recitems[user1] = self.get_top_k_user_items(k=15)
                top_20_user_recitems[user1] = self.get_top_k_user_items(k=20)
        print("\n")
        print("Accuracy for Community-based Recommender System based on Top-5 similar users")
        self.get_accuracy_score(user_recitems = top_5_user_recitems)
        print("\n")
        print("Accuracy for Community-based Recommender System based on Top-10 similar users")
        self.get_accuracy_score(user_recitems = top_10_user_recitems)
        print("\n")
        print("Accuracy for Community-based Recommender System based on Top-15 similar users")
        self.get_accuracy_score(user_recitems = top_15_user_recitems)  
        print("\n")
        print("Accuracy for Community-based Recommender System based on Top-20 similar users")
        self.get_accuracy_score(user_recitems = top_20_user_recitems)    
        
    def get_top_k_user_items(self, k):
        top_k_users = [user for user,_ in self.sorted_user_simscore[0:k]]   # getting the top k most similar users to the focal user
        top_k_users_items = set()                                           # now we fetch the recommended items for these top users
        for user in top_k_users:
            items = self.user_rec[user]
            for item in items:
                top_k_users_items.add(item)
        return list(top_k_users_items)      
        
    def get_accuracy_score(self, user_recitems):
        count_2days = count_4days = count_10days = 0
        for user in self.userlist:
            recommended_items = set(user_recitems.get(user,[]))
            new_items_2days = set(self.test2days_user_item.get(user,[]))
            new_items_4days = set(self.test4days_user_item.get(user,[]))
            new_items_10days = set(self.test10days_user_item.get(user,[]))
            if len(recommended_items & new_items_2days) > 0:
                count_2days += 1
            if len(recommended_items & new_items_4days) > 0:
                count_4days += 1
            if len(recommended_items & new_items_10days) > 0:
                count_10days += 1
        print("For a 2-day testing period:", count_2days/10000)
        print("For a 4-day testing period:", count_4days/10000)
        print("For a 10-day testing period:", count_10days/10000)

dirpath = 'C:/Vidit/PhD/RA Work/KZ - RA/Gaming Project/data/Files'
inputfilename = 'C:/Vidit/PhD/RA Work/KZ - RA/Gaming Project/data/Community Based Recommendation/community_input_file.txt'  


def main():
    cng = CommunityNetworkGenerator()
    cng.read_data(dirpath)
    cng.get_community_item_recommendations()
    print("\nSuccess")


if __name__ == "__main__":
    main()

No of users who bought items in first 2 days of test period: 2700
No of users who bought items in first 4 days of test period: 4999
No of users who bought items in first 10 days of test period: 10302

Making Recommendations

#Number of users randomly selected: 10000


Accuracy for Community-based Recommender System based on Top-5 similar users
For a 2-day testing period: 0.0002
For a 4-day testing period: 0.0005
For a 10-day testing period: 0.0005


Accuracy for Community-based Recommender System based on Top-10 similar users
For a 2-day testing period: 0.0002
For a 4-day testing period: 0.0005
For a 10-day testing period: 0.0005


Accuracy for Community-based Recommender System based on Top-15 similar users
For a 2-day testing period: 0.0002
For a 4-day testing period: 0.0005
For a 10-day testing period: 0.0005


Accuracy for Community-based Recommender System based on Top-20 similar users
For a 2-day testing period: 0.0002
For a 4-day testing period: 0.0005
For a 10-day testing perio

In [2]:
import sys
import os
import random
from collections import Counter
import pandas as pd

class CommunityNetworkGenerator:
    def __init__(self):
        self.group_user = dict()
        self.user_group = dict()
        self.item_user = dict()
        self.user_item = dict()
        self.test2days_item_user = dict()
        self.test2days_user_item = dict()
        self.test4days_item_user = dict()
        self.test4days_user_item = dict()
        self.test10days_item_user = dict()
        self.test10days_user_item = dict()
        self.user_groupmember = dict()
        self.userlist = []
        self.star_items = []
        self.mid_items = []
        self.tail_items = []

    def read_data(self, dirpath):
        with open(dirpath + "/item_user.txt") as iufile:  # list of all items purchased by users
            for line in iufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, u = toks[0], toks[1]
                    if i not in self.item_user:
                        self.item_user[i] = []
                    self.item_user[i].append(u)   # {itemid1: [uid1,uid2,...], itemid2: [uid2,uid3,....]} = dict of list of users purchasing each item 
                    if u not in self.user_item:
                        self.user_item[u] = []
                    self.user_item[u].append(i)   # {uid1: [itemid1,itemid2,...], uid2: [itemd2,itemd3,....]} = dict of list of items purchased by each user

        with open(dirpath + "/test2days_item_user.txt") as iufile:  # list of all items purchased by users in 2 days of testing period
            for line in iufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, u = toks[0], toks[1]
                    if i not in self.test2days_item_user:
                        self.test2days_item_user[i] = []
                    self.test2days_item_user[i].append(u)   # {itemid1: [uid1,uid2,...], itemid2: [uid2,uid3,....]} = dict of list of users purchasing each item 
                    if u not in self.test2days_user_item:
                        self.test2days_user_item[u] = []
                    self.test2days_user_item[u].append(i)   # {uid1: [itemid1,itemid2,...], uid2: [itemd2,itemd3,....]} = dict of list of items purchased by each user
        print("No of users who bought items in first 2 days of test period:", len(self.test2days_user_item))
        
        with open(dirpath + "/test4days_item_user.txt") as iufile:  # list of all items purchased by users in 4 days of testing period
            for line in iufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, u = toks[0], toks[1]
                    if i not in self.test4days_item_user:
                        self.test4days_item_user[i] = []
                    self.test4days_item_user[i].append(u)   # {itemid1: [uid1,uid2,...], itemid2: [uid2,uid3,....]} = dict of list of users purchasing each item 
                    if u not in self.test4days_user_item:
                        self.test4days_user_item[u] = []
                    self.test4days_user_item[u].append(i)   # {uid1: [itemid1,itemid2,...], uid2: [itemd2,itemd3,....]} = dict of list of items purchased by each user
        print("No of users who bought items in first 4 days of test period:", len(self.test4days_user_item))
        
        with open(dirpath + "/test10days_item_user.txt") as iufile:  # list of all items purchased by users in 10 days of testing period
            for line in iufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, u = toks[0], toks[1]
                    if i not in self.test10days_item_user:
                        self.test10days_item_user[i] = []
                    self.test10days_item_user[i].append(u)   # {itemid1: [uid1,uid2,...], itemid2: [uid2,uid3,....]} = dict of list of users purchasing each item 
                    if u not in self.test10days_user_item:
                        self.test10days_user_item[u] = []
                    self.test10days_user_item[u].append(i)   # {uid1: [itemid1,itemid2,...], uid2: [itemd2,itemd3,....]} = dict of list of items purchased by each user
        print("No of users who bought items in first 10 days of test period:", len(self.test10days_user_item))
        
        with open(dirpath + "/group_user.txt") as gufile:  # list of all in-game groups
            for line in gufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    g, u = toks[0], toks[1]
                    if g not in self.group_user:
                        self.group_user[g] = []
                    self.group_user[g].append(u)   # {groupid1: [uid1,uid2,...], groupid2: [uid2,uid3,....]} = dict of each group and its members
                    if u not in self.user_group:
                        self.user_group[u] = []
                    self.user_group[u].append(g)   # {uid1: [groupid1,groupid2,...], uid2: [groupid2,groupid3,....]} = dict of list of groups for each user
     
        with open(dirpath + "/selected_user.txt") as sufile:  # pull the randomly selected users
            for user in sufile:
                self.userlist.append(user.strip())

   
    def make_user_recommendations_selected_cluster(self, cluster_items):
        top_5_item_recusers = dict()
        top_10_item_recusers = dict()
        top_15_item_recusers = dict()
        top_20_item_recusers = dict()
        for item in cluster_items:
            user_simscore = {}
            temp_user_simscore = dict()
            users = self.item_user.get(item,[])
            if (users != []):
                for user1 in users:
                    items1 = self.user_item.get(user1,[])
                    groups1 = set(self.user_group.get(user1,[]))
                    if len(groups1) > 0:
                        for user2 in self.userlist:
                            items2 = self.user_item.get(user2,[])
                            if (user1 == user2): continue
                            if item not in items2:
                                groups2 = set(self.user_group.get(user2,[]))
                                mutual_groups = len(groups1 & groups2)
                                total_groups = len(groups1 | groups2)
                                user_similarity_score = mutual_groups/total_groups
                                if user2 not in temp_user_simscore:
                                    temp_user_simscore[user2] = []
                                temp_user_simscore[user2].append(user_similarity_score)
                for user in temp_user_simscore:
                    user_sim_values = temp_user_simscore[user]
                    user_simscore[user] = max(user_sim_values)
            sorted_user_simscore = sorted(user_simscore.items(), key=lambda x: x[1], reverse = True)
            top_5_item_recusers[item] = [user for user,_ in sorted_user_simscore[0:5]]
            top_10_item_recusers[item] = [user for user,_ in sorted_user_simscore[0:10]]
            top_15_item_recusers[item] = [user for user,_ in sorted_user_simscore[0:15]]
            top_20_item_recusers[item] = [user for user,_ in sorted_user_simscore[0:20]]
        sample_users = {k: top_5_item_recusers[k] for k in list(top_5_item_recusers)[:20]}
        #print(sample_users)
        #print("\n")
        sample_users = {k: top_10_item_recusers[k] for k in list(top_10_item_recusers)[:20]}
        #print(sample_users)
        #print("\n")
        sample_users = {k: top_15_item_recusers[k] for k in list(top_15_item_recusers)[:20]}
        #print(sample_users)
        #print("\n")
        print("Accuracy for Community-based Recommender System based on Top-5 similar users")
        self.get_accuracy_score(item_recusers = top_5_item_recusers, cluster_items = cluster_items)
        print("Accuracy for Community-based Recommender System based on Top-10 similar users")
        self.get_accuracy_score(item_recusers = top_10_item_recusers, cluster_items = cluster_items)
        print("Accuracy for Community-based Recommender System based on Top-15 similar users")
        self.get_accuracy_score(item_recusers = top_15_item_recusers, cluster_items = cluster_items)
        print("Accuracy for Community-based Recommender System based on Top-20 similar users")
        self.get_accuracy_score(item_recusers = top_20_item_recusers, cluster_items = cluster_items)
    
        
    def get_accuracy_score(self, item_recusers, cluster_items):
        count_2days = count_4days = count_10days = 0
        for item in cluster_items:
            counter_2days = counter_4days = counter_10days = 0
            recommended_users = item_recusers.get(item,[])
            for user in recommended_users:
                new_items_2days = self.test2days_user_item.get(user,[])
                new_items_4days = self.test4days_user_item.get(user,[])
                new_items_10days = self.test10days_user_item.get(user,[])
                if item in new_items_2days:
                    counter_2days += 1
                if item in new_items_4days:
                    counter_4days += 1
                if item in new_items_10days:
                    counter_10days += 1        
            if (counter_2days > 0):
                count_2days += 1
            if (counter_4days > 0):
                count_4days += 1
            if (counter_10days > 0):
                count_10days += 1
        print("For a 2-day testing period:", count_2days/len(cluster_items))
        print("For a 4-day testing period:", count_4days/len(cluster_items))
        print("For a 10-day testing period:", count_10days/len(cluster_items))
        
    
    def make_item_recommendations(self):
        with open(dirpath + "/rank_item_quantity_price_sales.txt") as itemfile:  # pull the ranked items, and create clusters
            for line in itemfile:
                toks = line.strip().split("\t")
                if len(toks) == 5:
                    rank = int(toks[0])
                    if rank <= 40:
                        self.star_items.append(toks[1])
                    elif rank > 40 and rank <= 98:
                        self.mid_items.append(toks[1])
                    else:
                        self.tail_items.append(toks[1])
        print(self.star_items)
        print(self.mid_items)
        print(self.tail_items)
        print("\nMaking Recommendations\n")
        print("\nAccuracy for Star Items\n")
        self.make_user_recommendations_selected_cluster(cluster_items = self.star_items)
        print("\nAccuracy for Mid Items\n")
        self.make_user_recommendations_selected_cluster(cluster_items = self.mid_items)
        print("\nAccuracy for Long Tail Items\n")
        self.make_user_recommendations_selected_cluster(cluster_items = self.tail_items)     

dirpath = 'C:/Vidit/PhD/RA Work/KZ - RA/Gaming Project/data/Files'
inputfilename = 'C:/Vidit/PhD/RA Work/KZ - RA/Gaming Project/data/Community Based Recommendation/community_input_file.txt'  


def main():
    cng = CommunityNetworkGenerator()
    cng.read_data(dirpath)
    cng.make_item_recommendations()
    print("\nSuccess")


if __name__ == "__main__":
    main()

No of users who bought items in first 2 days of test period: 2700
No of users who bought items in first 4 days of test period: 4999
No of users who bought items in first 10 days of test period: 10302
['lolvn002', 'lolvn001', 'lolvn003', 'loltw001', 'RP003', 'loltw003', 'RP002', 'loltw004', 'lolvn004', 'RP001', 'package_20', 'loltw005', 'tw_18_3', '89', 'loltw002', 'tw_18_2', 'tw_18_5', 'lolsam001', '9002', 'lolsam003', 'fo3vn003', 'RP004', 'FO3TH003', 'fo3vn004', 'tw_18_4', 'package_4', '9001', 'package_2', 'fo3vn002', 'lolsam004', 'tw_19_3', 'package_18', 'package_19', 'package_1', 'tw_19_2', '166', '145', 'fo3vn005', '9000', '33']
['43', '31', 'RP005', '237', 'HONLUCK1', '57', '2', 'lolsam006', 'lolsam002', '239', 'FO3TH004', 'FO3TH006', 'tw_19_5', 'lolvn005', '157', 'pbth001', '159', '205', 'lolth004', '228', '280', 'tw_19_4', 'lolth001', 'lolsam005', '138', 'lolth003', 'tw_18_1', 'tw_15_5', '34', 'tw_15_4', '188', '25', '192', '200', 'FO3TH005', 'FO3TH001', '230', 'elph003', 'lolth