In [1]:
import pickle
import torch
import numpy as np
import MF
import math
from sklearn import cluster
from operator import itemgetter
from math import log
import torch.nn as nn
import torch.nn.functional as F
import time
import argparse
import pandas as pd
import utility
from scipy.sparse import csr_matrix, rand as sprand
from tqdm import tqdm
from MF import MF
import VAE
import os


np.random.seed(0)
torch.manual_seed(0)

<torch._C.Generator at 0x152bdfb2270>

In [2]:
### Unpickling user and item matrices

test_like = np.load("./data/user_vali_like.npy", allow_pickle=True)
train_like = np.load("./data/user_train_like.npy", allow_pickle=True)

### mtx_item[I]: the list of users that like item I
### mtx_user[U]: the list of items that user U like

with open("./data/item", "rb") as f: 
    mtx_item = pickle.load(f)

with open("./data/user", "rb") as f:
    mtx_user = pickle.load(f)
    
### Read the trained model
model = torch.load("./data/save.pt")
# Item matrix
Q = model.item_factors.weight.H.tolist()
# User matrix
P = model.user_factors.weight.H.tolist()

P = np.array(P)
Q = np.array(Q)

### Predicted matrix
Rec = np.matmul(P.T, Q)


### The test_like and train_like lists, but for item
### test_like_item[I]: the users who like item I in the test set
### train_like_item[I]: the users who like item I in the train set
test_like_item = []
for t in range(Rec.shape[1]):
    test_like_item.append([])
i = 0
for t in test_like:
    for tt in t:
        test_like_item[tt].append(i)
    i += 1

train_like_item = []
for t in range(Rec.shape[0]):
    train_like_item.append([])
i = 0
for t in train_like:
    for tt in t:
        train_like_item[tt].append(i)
    i += 1

    



In [3]:
###
Rec_train = []
for t in range(Rec.shape[0]):
    row = [0]*Rec.shape[1]
    for item in train_like[t]:
        row[item] = 1
    Rec_train.append(row)
Rec_train = np.array(Rec_train)

J_similar = Rec_train.dot(Rec_train.T)
for t in range(J_similar.shape[0]):
    J_similar[t][t] = 0
J_similar = J_similar.astype("float32")
train_like_len = []
for t in train_like:
    train_like_len.append(len(t))
    
for i in range(Rec_train.shape[0]):
    for j in range(i):
        if J_similar[i][j] != 0:
            J_similar[i][j] /= (train_like_len[i] + train_like_len[j])

Rec_train_item = Rec_train.T
J_similar_item = Rec_train_item.dot(Rec_train_item.T)
for t in range(J_similar_item.shape[0]):
    J_similar_item[t][t] = 0
J_similar_item = J_similar_item.astype("float32")
train_like_item_len = []
for t in train_like_item:
    train_like_item_len.append(len(t))
            
for i in range(J_similar_item.shape[0]):
    for j in range(i):
        if J_similar_item[i][j] != 0:
            J_similar_item[i][j] /= (train_like_item_len[i] + train_like_item_len[j])

In [4]:
### Functions for metrices
def get_activeness(user):
    return len(train_like[user])

def get_popularity(item):
    return len(train_like_item[item])

def get_user_mainstreamness(user):
    ### mainstreamness == jacarrd similarity
    return(sum(J_similar[user])/(J_similar.shape[0]-1))

def get_item_mainstreamness(item):
    ### mainstreamness == jacarrd similarity
    return(sum(J_similar_item[item])/(J_similar_item.shape[1]-1))



In [5]:
### Calculate the metrices

activeness = []
user_mainstreamness = []
for t in range(Rec.shape[0]):
    activeness.append(get_activeness(t))
    user_mainstreamness.append((get_user_mainstreamness(t)))
                               
popularity = []
item_mainstreamness = []
for t in range(Rec.shape[1]):
    popularity.append(get_popularity(t))
    item_mainstreamness.append((get_item_mainstreamness(t)))

In [6]:
### _sort[0] = values
### _sort[1] = indexs

act_sort = (np.sort(np.array(activeness)), np.argsort(np.array(activeness)))
usrmain_sort = (np.sort(np.array(user_mainstreamness)), np.argsort(np.array(user_mainstreamness)))
pop_sort = (np.sort(np.array(popularity)), np.argsort(np.array(popularity)))
itmmain_sort = (np.sort(np.array(item_mainstreamness)), np.argsort(np.array(item_mainstreamness)))

In [10]:
np.save("./data/activeness.npy", activeness)
np.save("./data/user_mainstreamness.npy", user_mainstreamness)
np.save("./data/popularity.npy", popularity)
np.save("./data/item_mainstreamness.npy", item_mainstreamness)

np.save("./data/act_sort.npy", act_sort)
np.save("./data/usrmain_sort.npy", usrmain_sort)
np.save("./data/pop_sort.npy", pop_sort)
np.save("./data/itmmain_sort.npy", itmmain_sort)

In [14]:
### Load the metrics from disk
act_sort = np.load("./data/act_sort.npy")
pop_sort = np.load("./data/pop_sort.npy")
usrmain_sort = np.load("./data/usrmain_sort.npy")
itmmain_sort = np.load("./data/itmmain_sort.npy")

In [11]:
### Functions for calculating NDCG@K, performance, and average rank

num_u = Rec.shape[0]
like = train_like
for i in range(num_u):
    Rec[i, like[i]] = -100000.0

### No train set in Rec_sort!!!
### Pretty quick even we sort all users and items
Rec_sort = []
for t in Rec:
    num = t.tolist().count(-100000.0)
    tt = np.argsort(t)[::-1]
    tt = tt[0:(len(tt)-num)]
    Rec_sort.append(tt)

def NDCG_at_k(predicted_list, ground_truth, k):
    dcg_value = [(v / log(i + 1 + 1, 2)) for i, v in enumerate(predicted_list[:k])]
    dcg = np.sum(dcg_value)
    if len(ground_truth) < k:
        ground_truth += [0 for i in range(k - len(ground_truth))]
    idcg_value = [(v / log(i + 1 + 1, 2)) for i, v in enumerate(ground_truth[:k])]
    idcg = np.sum(idcg_value)
    return dcg / idcg

def get_ndcg_ave(user):
    k_set = [1,5,10,20]
    u_pred = Rec[user, :]
    top15_item_idx_no_train = np.argpartition(u_pred, -k_set[-1])[-k_set[-1]:]
    top15 = (np.array([top15_item_idx_no_train, u_pred[top15_item_idx_no_train]])).T
    top15 = sorted(top15, key=itemgetter(1), reverse=True)
    new_user_prediction = top15
    test = test_like[user]

    dcg_list = []

    # compute the number of true positive items at top k
    rank_sum = 0
    count = 0
    for i in range(k_set[3]):
        if new_user_prediction[i][0] in test:
            rank_sum += (i+1)
            count += 1
            dcg_list.append(1)
        else:
            dcg_list.append(0)

    # calculate NDCG@k
    idcg_list = [1 for i in range(len(test))]
    ndcg_tmp_15 = NDCG_at_k(dcg_list, idcg_list, k_set[3])
    ### Calculate average rank of test items
    ### Notice: only test items in the top 20 count
    if rank_sum!= 0:
        ave_rank = rank_sum/count
    else:
        ave_rank = 100000
    
    return ndcg_tmp_15, ave_rank



def get_performance_ave(item):
### !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
### get performance@20 after 20 is 0
    k = 20
    test = test_like_item[item]
    ### Number of the users who likes this item
    hit_usr = len(test)
    ### Top of the fraction
    a = 0
    
    rank_sum = 0
    count = 0
    for t in range(k):
        if t < hit_usr:
            ranklist = Rec_sort[test[t]]
            rank = ranklist.tolist().index(item)+1
            rank_sum += rank
            count += 1
            a += 1/math.log(rank+2, 2)
        else:
            a += 0
    if count != 0:
        performance = a/min(hit_usr, k) ### a/min(hit_usr, 20) or a/20?
        ave_rank = rank_sum/count
    else:
        performance = 0
        ave_rank = 100000
    
    return performance, ave_rank


In [12]:
def evaluate_user():
    user_num = Rec.shape[0]
    NDCG = []
    Ave_rank = []
    for t in range(user_num):
        ndcg_, ave_rank_ = get_ndcg_ave(t)
        NDCG.append(ndcg_)
        Ave_rank.append(ave_rank_)
    return NDCG, Ave_rank

def evaluate_item():
    item_num = Rec.shape[1]
    Performance = []
    Ave_rank = []
    for t in range(item_num):
        performance_, ave_rank_ = get_performance_ave(t)
        Performance.append(performance_)
        Ave_rank.append(ave_rank_)
    return Performance, Ave_rank


In [13]:
NDCG, Ave_rank_user = evaluate_user()
Performance, Ave_rank_item = evaluate_item()
NDCG = np.array(NDCG)
Ave_rank_user = np.array(Ave_rank_user)
Performance = np.array(Performance)
Ave_rank_item = np.array(Ave_rank_item)

  return dcg / idcg


In [25]:
### K is the number of groups
k = 5
results = {"usrmain":{"NDCG":[], "Ave_rank":[], "Ave_mtc":[], "Metric":[]}, "activeness":{"NDCG":[], "Ave_rank":[], "Ave_mtc":[], "Metric":[]}, 
           "itmmain":{"Performance":[], "Ave_rank":[], "Ave_mtc":[], "Metric":[]}, "popularity":{"Performance":[], "Ave_rank":[], "Ave_mtc":[], "Metric":[]}}
num_user = Rec.shape[0]
bt_user = int(num_user/k)
num_item = Rec.shape[1]
bt_item = int(num_item/k)
for i in range(k):
    if i == k-1:
        idxs = usrmain_sort[1][i*bt_user::]
        idxs2 = act_sort[1][i*bt_user::]
        idxs3 = itmmain_sort[1][i*bt_item::]
        idxs4 = pop_sort[1][i*bt_item::]
    else:
        idxs = usrmain_sort[1][i*bt_user:(i+1)*bt_user]
        idxs2 = act_sort[1][i*bt_user:(i+1)*bt_user]
        idxs3 = itmmain_sort[1][i*bt_item:(i+1)*bt_item]
        idxs4 = pop_sort[1][i*bt_item:(i+1)*bt_item]
    results["usrmain"]["NDCG"].append(NDCG[idxs])
    results["usrmain"]["Ave_rank"].append(Ave_rank_user[idxs])
    results["usrmain"]["Ave_mtc"].append(np.mean(usrmain_sort[0][idxs]))
    results["usrmain"]["Metric"].append(usrmain_sort[0][idxs])
    results["activeness"]["NDCG"].append(NDCG[idxs2])
    results["activeness"]["Ave_rank"].append(Ave_rank_user[idxs2])
    results["activeness"]["Ave_mtc"].append(np.mean(act_sort[0][idxs2]))
    results["activeness"]["Metric"].append(act_sort[0][idxs2])
    results["itmmain"]["Performance"].append(Performance[idxs3])
    results["itmmain"]["Ave_rank"].append(Ave_rank_item[idxs3])
    results["itmmain"]["Ave_mtc"].append(np.mean(itmmain_sort[0][idxs3]))
    results["itmmain"]["Metric"].append(itmmain_sort[0][idxs3])
    results["popularity"]["Performance"].append(Performance[idxs4])
    results["popularity"]["Ave_rank"].append(Ave_rank_item[idxs4])
    results["popularity"]["Ave_mtc"].append(np.mean(pop_sort[0][idxs4]))
    results["popularity"]["Metric"].append(pop_sort[0][idxs4])
    


        
    

In [30]:
### Convert results to dataframe
### Save the dataframes
df = {"NDCG": [], "Ave_rank":[], "metric": [], "group": [], "ave_metric": []}
for group in range(k):
    for t in results["usrmain"]["NDCG"][group]:
        df["NDCG"].append(t)
    for t in results["usrmain"]["Ave_rank"][group]:
        df["Ave_rank"].append(t)
    ave_metric_ = results["usrmain"]["Ave_mtc"][group]
    for t in results["usrmain"]["Metric"][group]:
        df["metric"].append(t)
        df["group"].append(group)
        df["ave_metric"].append(ave_metric_)
df = pd.DataFrame(df)
df.to_csv("./data/user main.csv")

df = {"NDCG": [], "Ave_rank":[], "metric": [], "group": [], "ave_metric": []}
for group in range(k):
    for t in results["activeness"]["NDCG"][group]:
        df["NDCG"].append(t)
    for t in results["activeness"]["Ave_rank"][group]:
        df["Ave_rank"].append(t)
    ave_metric_ = results["activeness"]["Ave_mtc"][group]
    for t in results["activeness"]["Metric"][group]:
        df["metric"].append(t)
        df["group"].append(group)
        df["ave_metric"].append(ave_metric_)
df = pd.DataFrame(df)
df.to_csv("./data/user act.csv")

df = {"Performance": [], "Ave_rank":[], "metric": [], "group": [], "ave_metric": []}
for group in range(k):
    for t in results["itmmain"]["Performance"][group]:
        df["Performance"].append(t)
    for t in results["itmmain"]["Ave_rank"][group]:
        df["Ave_rank"].append(t)
    ave_metric_ = results["itmmain"]["Ave_mtc"][group]
    for t in results["itmmain"]["Metric"][group]:
        df["metric"].append(t)
        df["group"].append(group)
        df["ave_metric"].append(ave_metric_)
df = pd.DataFrame(df)
df.to_csv("./data/item main.csv")

df = {"Performance": [], "Ave_rank":[], "metric": [], "group": [], "ave_metric": []}
for group in range(k):
    for t in results["popularity"]["Performance"][group]:
        df["Performance"].append(t)
    for t in results["popularity"]["Ave_rank"][group]:
        df["Ave_rank"].append(t)
    ave_metric_ = results["popularity"]["Ave_mtc"][group]
    for t in results["popularity"]["Metric"][group]:
        df["metric"].append(t)
        df["group"].append(group)
        df["ave_metric"].append(ave_metric_)
df = pd.DataFrame(df)
df.to_csv("./data/item pop.csv")
