In [10]:
# Creating the network
import networkx as nx

def create_Gaming_network():
    G = nx.Graph()
    filename1 = 'C:/Vidit/PhD/RA Work/KZ - RA/Gaming Project/data/Files/game_user.txt'
    filename2 = 'C:/Vidit/PhD/RA Work/KZ - RA/Gaming Project/data/Files/item_game.txt'
    filename3 = 'C:/Vidit/PhD/RA Work/KZ - RA/Gaming Project/data/Files/item_user.txt'
    filename4 = 'C:/Vidit/PhD/RA Work/KZ - RA/Gaming Project/data/Files/user_friend.txt'

    # Adding User-Game connections
    user_game = []
    with open(filename1) as f1:
        for line in f1:
            tup = ()
            l = []
            toks = line.strip().split("\t")
            user = 'u' + toks[1]
            game = 'g' + toks[0]
            l.append(user)
            l.append(game)
            tup = tup + tuple(l)
            user_game.append(tup)
    f1.close()
    G.add_edges_from(user_game)  # Adding to the network

    # Adding Game-Item connections
    game_item = []
    with open(filename2) as f2:
        for line in f2:
            tup = ()
            l = []
            toks = line.strip().split("\t")
            game = 'g' + toks[1]
            item = 'i' + toks[0]
            l.append(game)
            l.append(item)
            tup = tup + tuple(l)
            game_item.append(tup)
    f2.close()
    G.add_edges_from(game_item)  # Adding to the network

    # Adding User-Item connections
    user_item = []
    with open(filename3) as f3:
        for line in f3:
            tup = ()
            l = []
            toks = line.strip().split("\t")
            user = 'u' + toks[1]
            item = 'i' + toks[0]
            l.append(user)
            l.append(item)
            tup = tup + tuple(l)
            user_item.append(tup)
    f3.close()
    G.add_edges_from(user_item)  # Adding to the network

    # Adding User-Friend connections
    user_friend = []
    with open(filename4) as f4:
        for line in f4:
            tup = ()
            l = []
            toks = line.strip().split("\t")
            user = 'u' + toks[0]
            friend = 'u' + toks[1]
            l.append(user)
            l.append(friend)
            tup = tup + tuple(l)
            user_friend.append(tup)
    f4.close()
    #G.add_edges_from(user_friend)  # Adding to the network
    #print("Numer of nodes in network:",G.number_of_nodes())
    #print("Numer of edges in network:",G.number_of_edges())
    return G

In [11]:
# Setup of Matrices

from networkx import to_numpy_matrix, to_scipy_sparse_matrix
import scipy.sparse as sp
import networkx as nx
import numpy as np
from scipy.linalg import fractional_matrix_power
import torch


def load_data():
    G = create_Gaming_network()
    order = sorted(list(G.nodes()))
    A = to_scipy_sparse_matrix(G, nodelist=order)
    I = sp.eye(A.shape[0])
    A_hat = A + I  # Adding Self-loops
    #D = np.array(np.sum(A_hat, axis=0))[0]  # getting degree of all nodes
    #D = np.matrix(np.diag(D))   # degree matrix is a diagonal matrix
    D = np.array(A_hat.sum(1))  # getting degree of all nodes
    D_inv = np.power(D, -0.5).flatten()
    D_inv[np.isinf(D_inv)] = 0
    D_inv = sp.diags(D_inv)
    A = sparse_mx_to_torch_sparse_tensor(A)
    I = sparse_mx_to_torch_sparse_tensor(I)
    A_hat = sparse_mx_to_torch_sparse_tensor(A_hat)
    D_inv = sparse_mx_to_torch_sparse_tensor(D_inv)
    A_old = torch.mul(A_hat, D_inv)
    A_new = torch.mul(D_inv, A_old)
    return I, G, A_new

def sparse_mx_to_torch_sparse_tensor(sparse_mx):
    """Convert a scipy sparse matrix to a torch sparse tensor."""
    sparse_mx = sparse_mx.tocoo().astype(np.float64)
    indices = torch.from_numpy(
        np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))
    values = torch.from_numpy(sparse_mx.data)
    shape = torch.Size(sparse_mx.shape)
    return torch.sparse.FloatTensor(indices, values, shape)

In [12]:
# Create GCN Layers and GCN Model Step

import math
import torch
from torch.nn.parameter import Parameter
from torch.nn.modules.module import Module
import torch.nn as nn
import torch.nn.functional as F



class GraphConvolution(Module):

    def __init__(self, in_features, out_features, bias=True):
        super(GraphConvolution, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.weight = Parameter(torch.DoubleTensor(in_features, out_features))
        if bias:
            self.bias = Parameter(torch.DoubleTensor(out_features))
        else:
            self.register_parameter('bias', None)
        self.reset_parameters()

    def reset_parameters(self):
        stdv = 1. / math.sqrt(self.weight.size(1))
        self.weight.data.uniform_(-stdv, stdv)
        if self.bias is not None:
            self.bias.data.uniform_(-stdv, stdv)

    def forward(self, input, adj):
        support = torch.mm(input, self.weight)
        output = torch.mm(adj, support)
        return output

    
class GCN(nn.Module):
    def __init__(self, in_features, out_features, hidden_layer_one_units, hidden_layer_two_units, dropout):
        super(GCN, self).__init__()

        self.gc1 = GraphConvolution(in_features, hidden_layer_one_units)
        self.gc2 = GraphConvolution(hidden_layer_one_units, hidden_layer_two_units)
        self.gc3 = GraphConvolution(hidden_layer_two_units, out_features)
        self.dropout = dropout

    def forward(self, x, adj):    
        x = F.relu(self.gc1(x, adj))
        x = F.dropout(x, self.dropout, training=self.training)
        x = F.relu(self.gc2(x, adj))
        x = F.dropout(x, self.dropout, training=self.training)
        x = self.gc3(x, adj)
        return F.log_softmax(x, dim=1)

In [None]:
# Loss -> Optimizer -> Train

from __future__ import division
from __future__ import print_function

import time
import argparse
import numpy as np
import networkx as nx
import scipy.sparse

import torch
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable

def get_P_distribution(G):
    print("Getting P Distribution...")
    distances = []
    for i in sorted(list(G.nodes)):
        for j in sorted(list(G.nodes)):
            if i != j:

                temp_distance = nx.shortest_path_length(G, i, j)
                distances.append(temp_distance)
    
    mx = scipy.sparse.csr_matrix(distances)
    mx = sparse_mx_to_torch_sparse_tensor(mx)
    sum = torch.sparse.sum(mx)
    distances_norm = torch.div(mx,sum)
    distances_norm = Variable(distances_norm, requires_grad = True)
    return distances_norm

def get_Q_distribution(output):
    print("Getting Q Distribution...")
    distances = []
    for row_num1, node1 in enumerate(output):
        for row_num2, node2 in enumerate(output):
            if row_num1 != row_num2:
                temp_distance = torch.dist(node1, node2)
                distances.append(temp_distance.tolist())
    
    for i in range(0,len(distances)):
        if distances[i]==0:
            distances[i] = 0.000001
    mx = scipy.sparse.csr_matrix(distances)
    mx = sparse_mx_to_torch_sparse_tensor(mx)
    sum = torch.sparse.sum(mx)
    distances_norm = torch.div(mx,sum)
    return distances_norm

def sparse_mx_to_torch_sparse_tensor(sparse_mx):
    """Convert a scipy sparse matrix to a torch sparse tensor."""
    sparse_mx = sparse_mx.tocoo().astype(np.float64)
    indices = torch.from_numpy(
        np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))
    values = torch.from_numpy(sparse_mx.data)
    shape = torch.Size(sparse_mx.shape)
    return torch.sparse.FloatTensor(indices, values, shape)


# Load data
I, G, A_new = load_data()
P = get_P_distribution(G)

# Model, Loss and optimizer
model = GCN(in_features = I.shape[1], out_features = 128, hidden_layer_one_units = 64, hidden_layer_two_units = 32, dropout = 0.5)
loss_fn = nn.KLDivLoss(reduction='sum')
optimizer = optim.Adam(model.parameters(), lr = 0.01)


def get_old_Q_distribution(output):
    m = []
    for node1 in output:
        l = []
        for node2 in output:
            d = torch.dist(node1,node2)
            l.append(d.tolist())
        m.append(l)
    y = np.asarray(m)
    return(torch.from_numpy(np.matrix(y/y.sum(axis=1, keepdims = True))))
    
def train(epoch):
    t = time.time()
    # Forward Propagation
    model.train()
    optimizer.zero_grad()
    output = model.forward(I, A_new)
    Q = get_Q_distribution(output)
    loss = loss_fn(P.coalesce().values().log(),Q.coalesce().values())
    
    # Back Propagation
    loss = Variable(loss, requires_grad = True)
    print("loss=", loss)
    loss.backward()
    optimizer.step()
    return output

# Train model
t_total = time.time()
for epoch in range(2):
    train(epoch)
print("Optimization Finished!")
print("Total time elapsed: {:.4f}s".format(time.time() - t_total))

# Train model
t_total = time.time()
I, G, A_new = load_data()
for epoch in range(2):
    output = train(epoch)
nodes = sorted(list(G.nodes))
output_embedding = output.tolist()
filename = "C:/Vidit/PhD/RA Work/KZ - RA/Gaming Project/data/GCN/GCN_Gaming_Embeddings.txt"
result = list(zip(nodes,output_embedding))
with open(filename,'w') as f:
    for x in result:
        f.write(str(x[0]) + " ")
        for y in x[1]:
            f.write(str(y) + " ")
        f.write("\n")
f.close()
print("Optimization Finished successfully!")
print("Total time elapsed: {:.4f}s".format(time.time() - t_total))

Getting P Distribution...


In [5]:
# Creating the network
import networkx as nx

def create_Gaming_network():
    G = nx.Graph()
    filename1 = 'C:/Vidit/PhD/RA Work/KZ - RA/Gaming Project/data/Files/game_user.txt'
    filename2 = 'C:/Vidit/PhD/RA Work/KZ - RA/Gaming Project/data/Files/item_game.txt'
    filename3 = 'C:/Vidit/PhD/RA Work/KZ - RA/Gaming Project/data/Files/item_user.txt'
    filename4 = 'C:/Vidit/PhD/RA Work/KZ - RA/Gaming Project/data/Files/user_friend.txt'

    # Adding User-Game connections
    user_game = []
    with open(filename1) as f1:
        for line in f1:
            tup = ()
            l = []
            toks = line.strip().split("\t")
            user = 'u' + toks[1]
            game = 'g' + toks[0]
            l.append(user)
            l.append(game)
            tup = tup + tuple(l)
            user_game.append(tup)
    f1.close()
    G.add_edges_from(user_game)  # Adding to the network

    # Adding Game-Item connections
    game_item = []
    with open(filename2) as f2:
        for line in f2:
            tup = ()
            l = []
            toks = line.strip().split("\t")
            game = 'g' + toks[1]
            item = 'i' + toks[0]
            l.append(game)
            l.append(item)
            tup = tup + tuple(l)
            game_item.append(tup)
    f2.close()
    G.add_edges_from(game_item)  # Adding to the network

    # Adding User-Item connections
    user_item = []
    with open(filename3) as f3:
        for line in f3:
            tup = ()
            l = []
            toks = line.strip().split("\t")
            user = 'u' + toks[1]
            item = 'i' + toks[0]
            l.append(user)
            l.append(item)
            tup = tup + tuple(l)
            user_item.append(tup)
    f3.close()
    G.add_edges_from(user_item)  # Adding to the network

    # Adding User-Friend connections
    user_friend = []
    with open(filename4) as f4:
        for line in f4:
            tup = ()
            l = []
            toks = line.strip().split("\t")
            user = 'u' + toks[0]
            friend = 'u' + toks[1]
            l.append(user)
            l.append(friend)
            tup = tup + tuple(l)
            user_friend.append(tup)
    f4.close()
    #G.add_edges_from(user_friend)  # Adding to the network
    #print("Numer of nodes in network:",G.number_of_nodes())
    #print("Numer of edges in network:",G.number_of_edges())
    return G

In [6]:
# Setup of Matrices

from networkx import to_numpy_matrix, to_scipy_sparse_matrix
import scipy.sparse as sp
import networkx as nx
import numpy as np
from scipy.linalg import fractional_matrix_power
import torch


def load_data():
    G = create_Gaming_network()
    order = sorted(list(G.nodes()))
    A = to_scipy_sparse_matrix(G, nodelist=order)
    I = sp.eye(A.shape[0])
    A_hat = A + I  # Adding Self-loops
    #D = np.array(np.sum(A_hat, axis=0))[0]  # getting degree of all nodes
    #D = np.matrix(np.diag(D))   # degree matrix is a diagonal matrix
    D = np.array(A_hat.sum(1))  # getting degree of all nodes
    D_inv = np.power(D, -0.5).flatten()
    D_inv[np.isinf(D_inv)] = 0
    D_inv = sp.diags(D_inv)
    mx = D_inv.dot(A_hat)
    new_mx = mx.dot(D_inv)
    A = sparse_mx_to_torch_sparse_tensor(A)
    I = sparse_mx_to_torch_sparse_tensor(I)
    A_hat = sparse_mx_to_torch_sparse_tensor(A_hat)
    D_inv = sparse_mx_to_torch_sparse_tensor(D_inv)
    A_new = sparse_mx_to_torch_sparse_tensor(new_mx)
    #A_old = torch.mul(A_hat, D_inv)
    #A_new = torch.mul(D_inv, A_old)
    return I, G, A_new

def sparse_mx_to_torch_sparse_tensor(sparse_mx):
    """Convert a scipy sparse matrix to a torch sparse tensor."""
    sparse_mx = sparse_mx.tocoo().astype(np.float64)
    indices = torch.from_numpy(
        np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))
    values = torch.from_numpy(sparse_mx.data)
    shape = torch.Size(sparse_mx.shape)
    return torch.sparse.FloatTensor(indices, values, shape)

In [7]:
# Create GCN Layers and GCN Model Step

import math
import torch
from torch.nn.parameter import Parameter
from torch.nn.modules.module import Module
import torch.nn as nn
import torch.nn.functional as F

class GCN(nn.Module):
    def __init__(self, in_features, out_features, hidden_layer_one_units, hidden_layer_two_units):
        super(GCN, self).__init__()
        
        self.in_features = in_features
        self.out_features = out_features
        self.hidden_layer_one_units = hidden_layer_one_units
        self.hidden_layer_two_units = hidden_layer_two_units
        #self.dropout = dropout
        self.weight_one = Parameter(torch.rand(in_features, hidden_layer_one_units, requires_grad=True))
        self.weight_two = Parameter(torch.rand(hidden_layer_one_units, hidden_layer_two_units, requires_grad=True))
        self.weight_three = Parameter(torch.rand(hidden_layer_two_units, out_features, requires_grad=True))
        #if bias:
        #    self.bias = Parameter(torch.rand(out_features))
        #else:
        #    self.register_parameter('bias', None)
        self.reset_parameters()
        
    def reset_parameters(self):
        stdv1 = 1. / math.sqrt(self.weight_one.size(1))
        self.weight_one.data.uniform_(-stdv1, stdv1)
        
        stdv2 = 1. / math.sqrt(self.weight_two.size(1))
        self.weight_two.data.uniform_(-stdv2, stdv2)
        
        stdv3 = 1. / math.sqrt(self.weight_three.size(1))
        self.weight_three.data.uniform_(-stdv3, stdv3)

    def gcn_layer_output(self, input, adj, weight):
        support = torch.mm(input, weight.double())
        output = torch.mm(adj, support)
        return output

    def forward(self, x, adj):    
        x = F.relu(self.gcn_layer_output(x, adj, self.weight_one))
        #x = F.dropout(x, self.dropout, training=self.training)
        x = F.relu(self.gcn_layer_output(x, adj, self.weight_two))
        #x = F.dropout(x, self.dropout, training=self.training)
        x = F.relu(self.gcn_layer_output(x, adj, self.weight_three))
        return F.log_softmax(x, dim=1)

In [8]:
# Loss -> Optimizer -> Train

from __future__ import division
from __future__ import print_function

import time
import argparse
import numpy as np
import networkx as nx
import scipy.sparse

import torch
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable

def get_P_distribution(G):
    distances = []
    for i in sorted(list(G.nodes)):
        for j in sorted(list(G.nodes)):
            if i != j:

                temp_distance = nx.shortest_path_length(G, i, j)
                distances.append(temp_distance)
    
    mx = scipy.sparse.csr_matrix(distances)
    mx = sparse_mx_to_torch_sparse_tensor(mx)
    sum = torch.sparse.sum(mx)
    distances_norm = torch.div(mx,sum)
    distances_norm = Variable(distances_norm, requires_grad = True)
    return distances_norm

def get_Q_distribution(output):
    distances = []
    for row_num1, node1 in enumerate(output):
        for row_num2, node2 in enumerate(output):
            if row_num1 != row_num2:
                temp_distance = torch.dist(node1, node2)
                distances.append(temp_distance.tolist())
    
    for i in range(0,len(distances)):
        if distances[i]==0:
            distances[i] = 0.000001
    mx = scipy.sparse.csr_matrix(distances)
    mx = sparse_mx_to_torch_sparse_tensor(mx)
    sum = torch.sparse.sum(mx)
    distances_norm = torch.div(mx,sum)
    return distances_norm

def sparse_mx_to_torch_sparse_tensor(sparse_mx):
    """Convert a scipy sparse matrix to a torch sparse tensor."""
    sparse_mx = sparse_mx.tocoo().astype(np.float64)
    indices = torch.from_numpy(
        np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))
    values = torch.from_numpy(sparse_mx.data)
    shape = torch.Size(sparse_mx.shape)
    return torch.sparse.FloatTensor(indices, values, shape)


# Load data
I, G, A_new = load_data()

# Model, Loss and optimizer
model = GCN(in_features = I.shape[1], out_features = 128, hidden_layer_one_units = 128, hidden_layer_two_units = 128)
optimizer = optim.Adam(model.parameters(), lr = 0.01)


def get_old_Q_distribution(output):
    m = []
    for node1 in output:
        l = []
        for node2 in output:
            d = torch.dist(node1,node2)
            l.append(d.tolist())
        m.append(l)
    y = np.asarray(m)
    return(torch.from_numpy(np.matrix(y/y.sum(axis=1, keepdims = True))))
    
def train(epoch):
    t = time.time()
    # Forward Propagation
    model.train()
    optimizer.zero_grad()
    output = model.forward(I, A_new)
    optimizer.step()
    return output

# Train model
t_total = time.time()
I, G, A_new = load_data()
for epoch in range(10):
    output = train(epoch)
nodes = sorted(list(G.nodes()))
output_embedding = output.tolist()
filename = "C:/Vidit/PhD/RA Work/KZ - RA/Gaming Project/data/GCN/GCN_Gaming_Base_Network_Embeddings.txt"
result = list(zip(nodes,output_embedding))
with open(filename,'w') as f:
    for x in result:
        f.write(str(x[0]) + " ")
        for y in x[1]:
            f.write(str(y) + " ")
        f.write("\n")
f.close()
print("Optimization Finished successfully!")
print("Total time elapsed: {:.4f}s".format(time.time() - t_total))

Optimization Finished successfully!
Total time elapsed: 658.2307s


In [9]:
import sys
import os
import random
import numpy as np
from collections import Counter

class RecommendVirtualItems:
    def __init__(self):
        self.id_user = dict()
        self.user_id = dict()
        self.id_game = dict()
        self.game_id = dict()
        self.id_item = dict()
        self.item_id = dict()
        self.item_user = dict()
        self.user_item = dict()
        self.game_user = dict()
        self.user_game = dict()
        self.game_item = dict()
        self.item_game = dict()
        self.game_userlist = dict()
        self.user_gamelist = dict()
        self.game_itemlist = dict()
        self.item_gamelist = dict()
        self.user_itemlist = dict()
        self.item_userlist = dict()
        self.test2days_item_user = dict()
        self.test2days_user_item = dict()
        self.test4days_item_user = dict()
        self.test4days_user_item = dict()
        self.test10days_item_user = dict()
        self.test10days_user_item = dict()
        self.userlist = []
        self.star_items = []
        self.tail_items = []
        self.all_items = []

    def read_data(self, dirpath):
        with open(dirpath + "/id_user.txt") as udictfile:
            for line in udictfile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    self.id_user[toks[0]] = toks[1].replace(" ", "")    # {uid1: User1, uid2: User2....}
                    self.user_id[toks[1]] = toks[0].replace(" ", "")    # {User1: uid1, User2: uid2....}

        print("#users", len(self.id_user))

        with open(dirpath + "/id_game.txt") as gdictfile:
            for line in gdictfile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    self.id_game[toks[0]] = toks[1].replace(" ", "")    # {33302: LoLsg, 33305: LoLmy....}
                    self.game_id[toks[1]] = toks[0].replace(" ", "")    # {LoLsg: 33302, LoLmy: 33305....}

        print("#games", len(self.id_game))
        
        with open(dirpath + "/id_item.txt") as idictfile:
            for line in idictfile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    self.id_item[toks[0]] = toks[1].replace(" ", "")    # {itemid1: Item1, itemid2: Item2....}
                    self.item_id[toks[1]] = toks[0].replace(" ", "")    # {Item1: itemid1, Item2: itemid2....}

        print("#items", len(self.id_item)) # comprehensive list of items

        with open(dirpath + "/game_user.txt") as gufile:  # data pulled from user game activity for 20 days
            for line in gufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    g, u = toks[0], toks[1]
                    if g not in self.game_user:
                        self.game_user[g] = []
                    self.game_user[g].append(u)   # {gameid1: [uid1,uid2,...], gameid2: [uid2,uid3,....]} = dict of list of users playing games
                    if u not in self.user_game:
                        self.user_game[u] = []
                    self.user_game[u].append(g)   # {uid1: [gameid1,gameid2,...], uid2: [gameid2,gameid3,....]} = dict of list of games played by each user
        
        
        with open(dirpath + "/item_user.txt") as iufile:  # list of all items purchased by users
            for line in iufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, u = toks[0], toks[1]
                    if i not in self.item_user:
                        self.item_user[i] = []
                    self.item_user[i].append(u)   # {itemid1: [uid1,uid2,...], itemid2: [uid2,uid3,....]} = dict of list of users purchasing each item 
                    if u not in self.user_item:
                        self.user_item[u] = []
                    self.user_item[u].append(i)   # {uid1: [itemid1,itemid2,...], uid2: [itemd2,itemd3,....]} = dict of list of items purchased by each user
        
        
        with open(dirpath + "/item_game.txt") as igfile:  # list of all in-game items
            for line in igfile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, g = toks[0], toks[1]
                    if i not in self.item_game:
                        self.item_game[i] = []  
                    self.item_game[i].append(g) # {itemid1: [gameid1,gameid2,...], itemid2: [gameid2,gameid3,....]} = list of items that got recommended. Every item can be in multiple games
                    if g not in self.game_item:
                        self.game_item[g] = []
                    self.game_item[g].append(i) # {gameid1: [itemid1,itemid2,...], gameid2: [itemd2,itemd3,....]} = list of items available in-game for purchase
        
        with open(dirpath + "/test2days_item_user.txt") as iufile:  # list of all items purchased by users in 2 days of testing period
            for line in iufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, u = toks[0], toks[1]
                    if i not in self.test2days_item_user:
                        self.test2days_item_user[i] = []
                    self.test2days_item_user[i].append(u)   # {itemid1: [uid1,uid2,...], itemid2: [uid2,uid3,....]} = dict of list of users purchasing each item 
                    if u not in self.test2days_user_item:
                        self.test2days_user_item[u] = []
                    self.test2days_user_item[u].append(i)   # {uid1: [itemid1,itemid2,...], uid2: [itemd2,itemd3,....]} = dict of list of items purchased by each user
        print("No of users who bought items in first 2 days of test period:", len(self.test2days_user_item))
        
        with open(dirpath + "/test4days_item_user.txt") as iufile:  # list of all items purchased by users in 4 days of testing period
            for line in iufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, u = toks[0], toks[1]
                    if i not in self.test4days_item_user:
                        self.test4days_item_user[i] = []
                    self.test4days_item_user[i].append(u)   # {itemid1: [uid1,uid2,...], itemid2: [uid2,uid3,....]} = dict of list of users purchasing each item 
                    if u not in self.test4days_user_item:
                        self.test4days_user_item[u] = []
                    self.test4days_user_item[u].append(i)   # {uid1: [itemid1,itemid2,...], uid2: [itemd2,itemd3,....]} = dict of list of items purchased by each user
        print("No of users who bought items in first 4 days of test period:", len(self.test4days_user_item))
        
        with open(dirpath + "/test10days_item_user.txt") as iufile:  # list of all items purchased by users in 10 days of testing period
            for line in iufile:
                toks = line.strip().split("\t")
                if len(toks) == 2:
                    i, u = toks[0], toks[1]
                    if i not in self.test10days_item_user:
                        self.test10days_item_user[i] = []
                    self.test10days_item_user[i].append(u)   # {itemid1: [uid1,uid2,...], itemid2: [uid2,uid3,....]} = dict of list of users purchasing each item 
                    if u not in self.test10days_user_item:
                        self.test10days_user_item[u] = []
                    self.test10days_user_item[u].append(i)   # {uid1: [itemid1,itemid2,...], uid2: [itemd2,itemd3,....]} = dict of list of items purchased by each user
        print("No of users who bought items in first 10 days of test period:", len(self.test10days_user_item))
        
        with open(dirpath + "/selected_user.txt") as sufile:  # pull the randomly selected users
            for user in sufile:
                self.userlist.append(user.strip())
                            
    def read_gcn_output(self, outputfilename):
        self.user_vec = dict()
        self.game_vec = dict()
        self.item_vec = dict()
        with open(outputfilename) as nvfile:             
            for line in nvfile:
                toks = line.strip().split(" ")
                if len(toks) == 129:
                    node = toks[0]
                    del toks[0]
                    if node[0] == 'u': 
                        self.user_vec[node[1:]] = toks    # {uid1: [user_vector_values], uid2: [user_vector_values]..}
                    elif node[0] == 'g':
                        self.game_vec[node[1:]] = toks    # {gameid1: [game_vector_values], gameid2: [game_vector_values]..}
                    elif node[0] == 'i':
                        self.item_vec[node[1:]] = toks    # {itemid1: [item_vector_values], itemid2: [item_vector_values]..}
        
    def get_cosine_similarity(self, vec1, vec2):
        a = np.array(vec1, dtype=float)
        b = np.array(vec2, dtype=float)
        dot_product = np.dot(a, b)
        norm_a = np.linalg.norm(a)
        norm_b = np.linalg.norm(b)
        return dot_product / (norm_a * norm_b)
    
    def make_user_recommendations_selected_cluster(self, cluster_items):
        top_5_user_recitems = dict()
        top_10_user_recitems = dict()
        top_15_user_recitems = dict()
        top_20_user_recitems = dict()
        print("\nMaking Recommendations")
        print("#Number of users randomly selected:", len(self.userlist))
        for user1 in self.userlist:                             # iterating through 10,000 randomly selected users
            user1vec = self.user_vec.get(user1,[])              # getting the latent vector representation
            if (user1vec != []):
                self.user_simscore = {}
                self.user_rec = {}
                items1 = self.user_item.get(user1,[])           # getting the items purchased
                for user2 in self.user_item:                    # iterating through other users in the gaming network
                    user2vec = self.user_vec.get(user2,[])      # getting the latent vector representation
                    if (user2vec != []):
                        items2 = self.user_item[user2]          # getting the items purchased by these other users
                        if (user1 == user2): continue
                        rec_items = []
                        cosine_sim_score = round(self.get_cosine_similarity(user1vec, user2vec), 3)  # getting user-user cosine similarity
                        self.user_simscore[user2] = cosine_sim_score                                 # storing similarity values of each of iterated users with focal user
                        for item in cluster_items:
                            if item not in items1:
                                if item in items2:
                                    rec_items.append(item)
                        self.user_rec[user2] = rec_items                                             # storing recommended items 
                self.sorted_user_simscore = sorted(self.user_simscore.items(), key=lambda x: x[1], reverse = True)
                top_5_user_recitems[user1] = self.get_top_k_user_items(k=5)
                top_10_user_recitems[user1] = self.get_top_k_user_items(k=10)
                top_15_user_recitems[user1] = self.get_top_k_user_items(k=15)
                top_20_user_recitems[user1] = self.get_top_k_user_items(k=20)
        print("\n")
        print("Accuracy for GCN Recommender System based on Top-5 similar users")
        self.get_accuracy_score(user_recitems = top_5_user_recitems)
        print("\n")
        print("Accuracy for GCN Recommender System based on Top-10 similar users")
        self.get_accuracy_score(user_recitems = top_10_user_recitems)
        print("\n")
        print("Accuracy for GCN Recommender System based on Top-15 similar users")
        self.get_accuracy_score(user_recitems = top_15_user_recitems)
        print("\n")
        print("Accuracy for GCN Recommender System based on Top-20 similar users")
        self.get_accuracy_score(user_recitems = top_20_user_recitems)
        print("\n")
        
    def get_top_k_user_items(self, k):
        top_k_users = [user for user,_ in self.sorted_user_simscore[0:k]]   # getting the top k most similar users to the focal user
        top_k_users_items = set()                                           # now we fetch the recommended items for these top users
        for user in top_k_users:
            items = self.user_rec[user]
            for item in items:
                top_k_users_items.add(item)
        return list(top_k_users_items)      
        
    def get_accuracy_score(self, user_recitems):
        count_2days = count_4days = count_10days = 0
        for user in user_recitems:
            recommended_items = set(user_recitems.get(user,[]))
            new_items_2days = set(self.test2days_user_item.get(user,[]))
            new_items_4days = set(self.test4days_user_item.get(user,[]))
            new_items_10days = set(self.test10days_user_item.get(user,[]))
            if len(recommended_items & new_items_2days) > 0:
                count_2days += 1
            if len(recommended_items & new_items_4days) > 0:
                count_4days += 1
            if len(recommended_items & new_items_10days) > 0:
                count_10days += 1
        print("For a 2-day testing period:", count_2days/10000)
        print("For a 4-day testing period:", count_4days/10000)
        print("For a 10-day testing period:", count_10days/10000)
        
    
    def make_item_recommendations(self):
        with open(dirpath + "/rank_item_quantity_price_sales.txt") as itemfile:  # pull the ranked items, and create clusters
            for line in itemfile:
                toks = line.strip().split("\t")
                if len(toks) == 5:
                    self.all_items.append(toks[1])
                    rank = int(toks[0])
                    if rank <= 40:
                        self.star_items.append(toks[1])
                    else:
                        self.tail_items.append(toks[1])
        #print(self.star_items)
        #print(self.tail_items)
        #print(self.all_items)
        print("\nMaking Recommendations\n")
        print("\nAccuracy for Star Items\n")
        self.make_user_recommendations_selected_cluster(cluster_items = self.star_items)
        print("\nAccuracy for Long Tail Items\n")
        self.make_user_recommendations_selected_cluster(cluster_items = self.tail_items)
        print("\nAccuracy for All Items\n")
        self.make_user_recommendations_selected_cluster(cluster_items = self.all_items)
                                            
dirpath = 'C:/Vidit/PhD/RA Work/KZ - RA/Gaming Project/data/Files'
outputfilename = 'C:/Vidit/PhD/RA Work/KZ - RA/Gaming Project/data/GCN/GCN_Gaming_Base_Network_Embeddings.txt'


def main():
    rvi = RecommendVirtualItems()
    rvi.read_data(dirpath)
    rvi.read_gcn_output(outputfilename)
    rvi.make_item_recommendations()

if __name__ == "__main__":
    main()

#users 1496096
#games 22
#items 3825
No of users who bought items in first 2 days of test period: 2700
No of users who bought items in first 4 days of test period: 4999
No of users who bought items in first 10 days of test period: 10302

Making Recommendations


Accuracy for Star Items


Making Recommendations
#Number of users randomly selected: 10000


Accuracy for GCN Recommender System based on Top-5 similar users
For a 2-day testing period: 0.0019
For a 4-day testing period: 0.0048
For a 10-day testing period: 0.0131


Accuracy for GCN Recommender System based on Top-10 similar users
For a 2-day testing period: 0.0036
For a 4-day testing period: 0.0095
For a 10-day testing period: 0.0221


Accuracy for GCN Recommender System based on Top-15 similar users
For a 2-day testing period: 0.0036
For a 4-day testing period: 0.0095
For a 10-day testing period: 0.0221


Accuracy for GCN Recommender System based on Top-20 similar users
For a 2-day testing period: 0.0036
For a 4-day testing pe