In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import xgboost as xgb
import joblib
from copy import deepcopy
import training_function_real_data as tp
import build_new_data as bnd
# import assortment as at
import warnings
import torch
from tqdm import tqdm
from sklearn.model_selection import cross_val_score

## Split the original data into training set and test set

In [2]:
data = pd.read_csv('data.csv')
for column in ['stayDurationMinutes', 'totalPrice','totalTripDurationMinutes', 'dtd', 'nAirlines', 'nFlights','outDepTime_sin', 'outDepTime_cos','outArrTime_sin', 'outArrTime_cos']:
  max1 = max(data[column])
  min1 = min(data[column])
  data[column] = (data[column]-min1)/(max1-min1)

total_order_list = list(set(data['orderid']))
total_num = len(total_order_list)

#Generate the corresponding relationship between order_id and row number
total_dic = {}
for rows in range(len(data)):
    temp_id = data.loc[rows,'orderid']
    if temp_id in total_dic:
        total_dic[temp_id].append(rows)
    else:
        total_dic[temp_id] = [rows]

order_list_random = np.random.permutation(total_order_list)
train_order = order_list_random[:int(0.8*total_num)]
valid_order = order_list_random[int(0.8*total_num):int(0.9*total_num)]
test_order = order_list_random[int(0.9*total_num):]

train_list = []
valid_list = []
test_list = []
for orderid in train_order:
    train_list.extend(total_dic[orderid])
for orderid in valid_order:
    valid_list.extend(total_dic[orderid])
for orderid in test_order:
    test_list.extend(total_dic[orderid])
train_data = data.loc[train_list]
train_data = train_data.reset_index(drop=True)
valid_data = data.loc[valid_list]
valid_data = valid_data.reset_index(drop=True)
test_data = data.loc[test_list]
test_data = test_data.reset_index(drop=True)

## Record assortment information

In [7]:
train_dic = {}
for rows in range(len(train_data)):
    temp_id = train_data.loc[rows,'orderid']
    if temp_id in train_dic:
        train_dic[temp_id].append(rows)
    else:
        train_dic[temp_id] = [rows]

valid_dic = {}
temp_id = -1
for rows in range(len(valid_data)):
    temp_id = valid_data.loc[rows,'orderid']
    if temp_id in valid_dic:
        valid_dic[temp_id].append(rows)
    else:
        valid_dic[temp_id] = [rows]

test_dic = {}
temp_id = -1
for rows in range(len(test_data)):
    temp_id = test_data.loc[rows,'orderid']
    if temp_id in test_dic:
        test_dic[temp_id].append(rows)
    else:
        test_dic[temp_id] = [rows]
np.save('train_dic.npy', train_dic)
np.save('valid_dic.npy', valid_dic)
np.save('test_dic.npy', test_dic)

In [8]:
warnings.filterwarnings('ignore')

label = train_data['orderlabel']

#Convert training data and labels to numpy format
del_feature = ['orderid','alternative','orderlabel']
features = [i for i in train_data.columns if i not in del_feature]

data_np = np.array(train_data[features])
data_np_cate = np.zeros(shape = data_np.shape)
for idx,item in enumerate([11, 7, 97, 63, 2, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1]):
    if item !=1 :
        data_np_cate[:,idx] = data_np[:,idx]
        data_np[:,idx] = 1

for idx,item in enumerate([11, 7, 97, 63, 2, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1]):
    if item != 1:
        data_np[:,idx] = 1

total_train_data = np.array([data_np_cate,data_np])

In [9]:
np.save('data_ztn.npy',total_train_data)
np.save('label_ztn.npy',label)

In [10]:
label = valid_data['orderlabel']
#Preprocess the new validation set
del_feature = ['orderid','alternative','orderlabel']
features = [i for i in valid_data.columns if i not in del_feature]
data_np = np.array(valid_data[features])
data_np_cate = np.zeros(shape = data_np.shape)
for idx,item in enumerate([11, 7, 97, 63, 2, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1]):
    if item !=1 :
        data_np_cate[:,idx] = data_np[:,idx]
        data_np[:,idx] = 1
        
for idx,item in enumerate([11, 7, 97, 63, 2, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1]):
    if item != 1:
        data_np[:,idx] = 1

total_valid_data = np.array([data_np_cate,data_np])
total_valid_data.shape

(2, 106296, 17)

In [11]:
np.save('valid_data_ztn.npy',total_valid_data)
np.save('valid_label_ztn.npy',label)

In [12]:
label = test_data['orderlabel']
#Preprocess the new validation set
del_feature = ['orderid','alternative','orderlabel']
features = [i for i in test_data.columns if i not in del_feature]
data_np = np.array(test_data[features])
data_np_cate = np.zeros(shape = data_np.shape)
for idx,item in enumerate([11, 7, 97, 63, 2, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1]):
    if item !=1 :
        data_np_cate[:,idx] = data_np[:,idx]
        data_np[:,idx] = 1
        
for idx,item in enumerate([11, 7, 97, 63, 2, 1, 1, 1, 1, 1, 1, 2, 2, 1, 1, 1, 1]):
    if item != 1:
        data_np[:,idx] = 1

total_test_data = np.array([data_np_cate,data_np])

In [13]:
np.save('test_data_ztn.npy',total_test_data)
np.save('test_label_ztn.npy',label)

## load data

In [14]:
train_data = np.load('data_ztn.npy')
train_label = np.load('label_ztn.npy')
train_dic = np.load('train_dic.npy', allow_pickle=True).item()

valid_data = np.load('valid_data_ztn.npy')
valid_label = np.load('valid_label_ztn.npy')
valid_dic = np.load('valid_dic.npy', allow_pickle=True).item()

test_data = np.load('test_data_ztn.npy')
test_label = np.load('test_label_ztn.npy')
test_dic = np.load('test_dic.npy', allow_pickle=True).item()

In [15]:
# Generate data for DeepFM-a
data = deepcopy(train_data)
train_data_assort = np.array([np.concatenate([data[0],np.zeros((train_data.shape[1], 9)),np.zeros((train_data.shape[1], 2))],axis = 1),np.concatenate([data[1],np.ones((train_data.shape[1], 9)),np.ones((train_data.shape[1], 2))],axis = 1)])
for i in train_dic.values():
    data_temp = np.zeros((len(i), 9))
    data_cate_temp = np.zeros((len(i), 2))
    min1 = np.min(data[1,i,5])
    max1 = np.max(data[1,i,5])
    mean1 = np.mean(data[1,i,5])
    min2 = np.min(data[1,i,6])
    max2 = np.max(data[1,i,6])
    mean2 = np.mean(data[1,i,6])
    min3 = np.min(data[1,i,7])
    max3 = np.max(data[1,i,7])
    mean3 = np.mean(data[1,i,7])
    data_temp[:,0] = min1
    data_temp[:,1] = max1
    data_temp[:,2] = mean1
    data_temp[:,3] = min2
    data_temp[:,4] = max2
    data_temp[:,5] = mean2
    data_temp[:,6] = min3
    data_temp[:,7] = max3
    data_temp[:,8] = mean3 
    for idx,j in enumerate(i):
        if data[1,j,6] == np.min(data[1,i,6]):
            data_cate_temp[idx,0] = 1
        if data[1,j,6] == np.max(data[1,i,6]):
            data_cate_temp[idx,1] = 1
    train_data_assort[0,i,-2:] = data_cate_temp
    train_data_assort[1,i,17:26] = data_temp

In [16]:
data = deepcopy(valid_data)
valid_data_assort = np.array([np.concatenate([data[0],np.zeros((data.shape[1], 9)),np.zeros((data.shape[1], 2))],axis = 1),np.concatenate([data[1],np.ones((data.shape[1], 9)),np.ones((data.shape[1], 2))],axis = 1)])
for i in valid_dic.values():
    data_temp = np.zeros((len(i), 9))
    data_cate_temp = np.zeros((len(i), 2))
    min1 = np.min(data[1,i,5])
    max1 = np.max(data[1,i,5])
    mean1 = np.mean(data[1,i,5])
    min2 = np.min(data[1,i,6])
    max2 = np.max(data[1,i,6])
    mean2 = np.mean(data[1,i,6])
    min3 = np.min(data[1,i,7])
    max3 = np.max(data[1,i,7])
    mean3 = np.mean(data[1,i,7])
    data_temp[:,0] = min1
    data_temp[:,1] = max1
    data_temp[:,2] = mean1
    data_temp[:,3] = min2
    data_temp[:,4] = max2
    data_temp[:,5] = mean2
    data_temp[:,6] = min3
    data_temp[:,7] = max3
    data_temp[:,8] = mean3 
    for idx,j in enumerate(i):
        if data[1,j,6] == np.min(data[1,i,6]):
            data_cate_temp[idx,0] = 1
        if data[1,j,6] == np.max(data[1,i,6]):
            data_cate_temp[idx,1] = 1
    valid_data_assort[0,i,-2:] = data_cate_temp
    valid_data_assort[1,i,17:26] = data_temp

In [17]:
data = deepcopy(test_data)
test_data_assort = np.array([np.concatenate([data[0],np.zeros((data.shape[1], 9)),np.zeros((data.shape[1], 2))],axis = 1),np.concatenate([data[1],np.ones((data.shape[1], 9)),np.ones((data.shape[1], 2))],axis = 1)])
for i in test_dic.values():
    data_temp = np.zeros((len(i), 9))
    data_cate_temp = np.zeros((len(i), 2))
    min1 = np.min(data[1,i,5])
    max1 = np.max(data[1,i,5])
    mean1 = np.mean(data[1,i,5])
    min2 = np.min(data[1,i,6])
    max2 = np.max(data[1,i,6])
    mean2 = np.mean(data[1,i,6])
    min3 = np.min(data[1,i,7])
    max3 = np.max(data[1,i,7])
    mean3 = np.mean(data[1,i,7])
    data_temp[:,0] = min1
    data_temp[:,1] = max1
    data_temp[:,2] = mean1
    data_temp[:,3] = min2
    data_temp[:,4] = max2
    data_temp[:,5] = mean2
    data_temp[:,6] = min3
    data_temp[:,7] = max3
    data_temp[:,8] = mean3 
    for idx,j in enumerate(i):
        if data[1,j,6] == np.min(data[1,i,6]):
            data_cate_temp[idx,0] = 1
        if data[1,j,6] == np.max(data[1,i,6]):
            data_cate_temp[idx,1] = 1
    test_data_assort[0,i,-2:] = data_cate_temp
    test_data_assort[1,i,17:26] = data_temp

## 1 MNL

### 1.1 Train the MNL

In [18]:
train_data_MNL = np.concatenate((train_data,valid_data),axis = 1)
train_label_MNL = np.concatenate((train_label,valid_label),axis = 0)
train_dic_MNL = deepcopy(train_dic)
for i in valid_dic.keys():
    if i in train_dic.keys():
        print('error')
    else:
        train_dic_MNL[i] = list(np.array(valid_dic[i]) + train_data.shape[1])

In [None]:
PATH = "trained_model_ztn\\MNL_parameters_ztn.pt"
model_MNL = tp.train_data(train_data_MNL,train_label_MNL, train_dic_MNL, NUM_EPOCHS = 100,BATCH_SIZE = 32,path = PATH)

In [7]:
model_MNL = torch.load("trained_model_ztn\\MNL_parameters_ztn.pt")

### 1.2 Prediction Results

In [8]:
X = torch.from_numpy(test_data[0]).to(torch.long)
weight = torch.from_numpy(test_data[1]).to(torch.float)
utility_MNL = model_MNL([X,weight])
utility_MNL = utility_MNL.detach().numpy()
for item in test_dic:
    temp_lst = test_dic[item]
    temp_sum = np.sum(np.exp(np.array(utility_MNL[test_dic[item]])))
    for i in temp_lst:
        utility_MNL[i] = np.exp(utility_MNL[i]) /temp_sum

total = 0
count = 0
top_n = 3
rmse = []
denominator = 0
for item in test_dic:
    denominator += len(test_dic[item])
    #print(np.sum(utility_MNL[test_dic[item]]))
    rmse.append(np.sum((utility_MNL[test_dic[item]]-test_label[test_dic[item]])**2))
    temp_prob_max = np.argsort(-utility_MNL[test_dic[item]])[0:top_n]
    temp_test_y = list(test_label[test_dic[item]])
    # print(test_label[test_dic[item]])
    # print(utility_MNL[test_dic[item]])
    # print(np.sum((utility_MNL[test_dic[item]]-test_label[test_dic[item]])**2))
    # input()
    total += 1
    temp_y_max = temp_test_y.index(max(temp_test_y))
    if temp_y_max in temp_prob_max:
        count += 1
print("RMSE: ",np.sqrt(sum(rmse)/denominator))
print(count/total)
print(total)
from sklearn.metrics import log_loss
from sklearn.metrics import roc_auc_score
logloss = 0
for i in range(len(utility_MNL)):
    if test_label[i] == 1:
        logloss += -test_label[i] * (np.log(utility_MNL[i]*5000)-np.log(5000))
    else:
        logloss += -(1-test_label[i]) * (np.log((1-utility_MNL[i])*5000)-np.log(5000))
logloss = logloss/len(utility_MNL)
print(logloss)
print(roc_auc_score(test_label, utility_MNL))

## 2 MMNL

### 2.1 training the model

In [None]:
train_data_MNL = np.concatenate((train_data,valid_data),axis = 1)
train_label_MNL = np.concatenate((train_label,valid_label),axis = 0)
train_dic_MNL = deepcopy(train_dic)
for i in valid_dic.keys():
    if i in train_dic.keys():
        print('error')
    else:
        train_dic_MNL[i] = list(np.array(valid_dic[i]) + train_data.shape[1])

#PATH = "trained_model_ztn\\MMNL_parameters_ztn.pt"
model_MMNL_list,alpha = tp.CG_algo(train_data_MNL,train_label_MNL, train_dic_MNL, NUM_EPOCHS = 100,BATCH_SIZE = 32)


### 2.2 Predictin results

In [None]:
model_MMNL_list = np.load("trained_model_ztn\\MMNL.npy",allow_pickle=True)
alpha = np.load("trained_model_ztn\\alpha.npy")

pro_temp = []
for model_MNL in model_MMNL_list:
    X = torch.from_numpy(test_data[0]).to(torch.long)
    weight = torch.from_numpy(test_data[1]).to(torch.float)
    utility_MNL = model_MNL([X,weight])
    utility_MNL = utility_MNL.detach().numpy()
    for item in test_dic:
        temp_lst = test_dic[item]
        temp_sum = np.sum(np.exp(np.array(utility_MNL[test_dic[item]]))) 
        for i in temp_lst:
            utility_MNL[i] = np.exp(utility_MNL[i]) /temp_sum
    pro_temp.append(utility_MNL)
pro = 0
for i in range(len(pro_temp)):
    pro = alpha[i]*np.array(pro_temp[i]) + pro

total = 0
count = 0
top_n = 3
rmse = []
denominator = 0
for item in test_dic:
    denominator += len(test_dic[item])
    rmse.append(np.sum((pro[test_dic[item]]-test_label[test_dic[item]])**2))
    temp_prob_max = np.argsort(-pro[test_dic[item]])[0:top_n]
    temp_test_y = list(test_label[test_dic[item]])
    total += 1
    temp_y_max = temp_test_y.index(max(temp_test_y))
    if temp_y_max in temp_prob_max:
        count += 1
print("RMSE: ",np.sqrt(sum(rmse)/denominator))
print(count/total)

## 3 DeepFM

### 3.1 train the model

In [15]:
PATH = "trained_model_ztn\\DeepFM_parameters_ztn.pt"
model_DeepFM = tp.train_DeepFM(train_data,train_label,valid_data,valid_label,valid_dic, NUM_EPOCHS = 100,BATCH_SIZE = 256,LR = 0.05,weight1 = 31,path = PATH)

---------------- Epoch: 01 ----------
loss 1.17276,acc 0.19798
---------------- Epoch: 02 ----------
loss 0.992891,acc 0.190024
---------------- Epoch: 03 ----------
loss 1.00676,acc 0.19951
---------------- Epoch: 04 ----------
loss 0.955415,acc 0.200734
---------------- Epoch: 05 ----------
loss 0.984403,acc 0.199816
---------------- Epoch: 06 ----------
loss 0.967779,acc 0.20716
---------------- Epoch: 07 ----------
loss 0.946068,acc 0.205018
---------------- Epoch: 08 ----------
loss 0.953947,acc 0.205936
EarlyStopping counter: 1 out of 10
---------------- Epoch: 09 ----------
loss 0.944871,acc 0.2041
---------------- Epoch: 10 ----------
loss 0.950531,acc 0.20563
EarlyStopping counter: 1 out of 10
---------------- Epoch: 11 ----------
loss 0.942306,acc 0.206854
---------------- Epoch: 12 ----------
loss 0.959235,acc 0.206242
EarlyStopping counter: 1 out of 10
---------------- Epoch: 13 ----------
loss 0.940268,acc 0.20869
---------------- Epoch: 14 ----------
loss 0.971805,acc 0.2

### 3.2 Prediction results

In [11]:
model_DeepFM = torch.load("trained_model_ztn\\DeepFM_parameters_ztn.pt")

In [12]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
cat = torch.from_numpy(test_data[0]).to(torch.long).to(device)
weight = torch.from_numpy(test_data[1]).to(torch.float).to(device)
outputs = model_DeepFM([cat, weight])
outputs = outputs.cpu().detach().numpy()
pro = np.exp(outputs)/(np.exp(outputs)+1)

In [16]:
total = 0
count = 0
top_n = 3
rmse = []
denominator = 0
for item in test_dic:
    temp_prob_max = []
    temp_prob_max = np.argsort(-pro[test_dic[item]])[0:top_n]
    denominator += len(test_dic[item])
    rmse.append(np.sum((pro[test_dic[item]]/np.sum(pro[test_dic[item]])-test_label[test_dic[item]])**2))
    temp_test_y = list(test_label[test_dic[item]])
    total += 1
    temp_y_max = temp_test_y.index(max(temp_test_y))
    if temp_y_max in temp_prob_max:
        count += 1
print(count/total)
print("RMSE: ",np.sqrt(sum(rmse)/denominator))

0.45724338381520574
RMSE:  0.16464937482856498


## 3 DeepFM-a

### 3.1 Train the model

In [12]:
PATH = "trained_model_ztn\\DeepFM-a_parameters_ztn.pt"
model_DeepFM = tp.train_DeepFMa(train_data_assort,train_label,valid_data_assort,valid_label,valid_dic, NUM_EPOCHS = 100,BATCH_SIZE = 256,LR = 0.05,weight1 = 31,path = PATH)

---------------- Epoch: 01 ----------
loss 1.05344,acc 0.214504
---------------- Epoch: 02 ----------
loss 1.00086,acc 0.214198
---------------- Epoch: 03 ----------
loss 1.3856,acc 0.212362
---------------- Epoch: 04 ----------
loss 0.950384,acc 0.212668
---------------- Epoch: 05 ----------
loss 1.1036,acc 0.215116
---------------- Epoch: 06 ----------
loss 1.09849,acc 0.213586
---------------- Epoch: 07 ----------
loss 1.40586,acc 0.205018
EarlyStopping counter: 1 out of 10
---------------- Epoch: 08 ----------
loss 1.00352,acc 0.214198
---------------- Epoch: 09 ----------
loss 0.923876,acc 0.211444
---------------- Epoch: 10 ----------
loss 1.00327,acc 0.21175
EarlyStopping counter: 1 out of 10
---------------- Epoch: 11 ----------
loss 0.970573,acc 0.211138
EarlyStopping counter: 2 out of 10
---------------- Epoch: 12 ----------
loss 1.0309,acc 0.21481
EarlyStopping counter: 3 out of 10
---------------- Epoch: 13 ----------
loss 1.85013,acc 0.2041
EarlyStopping counter: 4 out of 

### 3.2 Prediction Results

In [17]:
model_DeepFM = torch.load("trained_model_ztn\\DeepFM-a_parameters_ztn.pt")

In [18]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
cat = torch.from_numpy(test_data_assort[0]).to(torch.long).to(device)
weight = torch.from_numpy(test_data_assort[1]).to(torch.float).to(device)
outputs = model_DeepFM([cat, weight])
outputs = outputs.cpu().detach().numpy()
pro = np.exp(outputs)/(np.exp(outputs)+1)

In [20]:
total = 0
count = 0
top_n = 5
rmse = []
denominator = 0
for item in test_dic:
    temp_prob_max = []
    temp_prob_max = np.argsort(-pro[test_dic[item]])[0:top_n]
    denominator += len(test_dic[item])
    rmse.append(np.sum((pro[test_dic[item]]/np.sum(pro[test_dic[item]])-test_label[test_dic[item]])**2))
    temp_test_y = list(test_label[test_dic[item]])
    total += 1
    temp_y_max = temp_test_y.index(max(temp_test_y))
    if temp_y_max in temp_prob_max:
        count += 1
print(count/total)
print("RMSE: ",np.sqrt(sum(rmse)/denominator))

0.6100657794095151
RMSE:  0.16467736989501122


## 4 Exponential Choice Model

### 4.1 train the model

In [3]:
train_data_exp = np.concatenate((train_data,valid_data),axis = 1)
train_label_exp = np.concatenate((train_label,valid_label),axis = 0)
train_dic_exp = deepcopy(train_dic)
for i in valid_dic.keys():
    if i in train_dic.keys():
        print('error')
    else:
        train_dic_exp[i] = list(np.array(valid_dic[i]) + train_data.shape[1])

In [5]:
PATH = "trained_model_ztn\\EXP_parameters_ztn.pt"
model_MNL = tp.train_data(train_data_exp,train_label_exp, train_dic_exp, NUM_EPOCHS = 100,BATCH_SIZE = 32,path = PATH, model_type = 'EXP')

---------------- Epoch: 01 ----------
loss 2.81093
---------------- Epoch: 02 ----------
loss 2.70854
---------------- Epoch: 03 ----------
loss 2.73404
---------------- Epoch: 04 ----------
loss 2.94712
---------------- Epoch: 05 ----------
loss 2.68275
---------------- Epoch: 06 ----------
loss 3.18568
EarlyStopping counter: 1 out of 10
---------------- Epoch: 07 ----------
loss 2.70411
EarlyStopping counter: 2 out of 10
---------------- Epoch: 08 ----------
loss 2.75742
EarlyStopping counter: 3 out of 10
---------------- Epoch: 09 ----------
loss 2.77556
EarlyStopping counter: 4 out of 10
---------------- Epoch: 10 ----------
loss 2.73669
EarlyStopping counter: 5 out of 10
---------------- Epoch: 11 ----------
loss 2.71328
EarlyStopping counter: 6 out of 10
---------------- Epoch: 12 ----------
loss 2.996
EarlyStopping counter: 7 out of 10
---------------- Epoch: 13 ----------
loss 2.95275
EarlyStopping counter: 8 out of 10
---------------- Epoch: 14 ----------
loss 2.97581
EarlySto

In [6]:
PATH = "trained_model_ztn\\EXP_parameters_ztn.pt"
model_exp = torch.load(PATH)

### 4.2 Prediction Results

In [7]:
X = torch.from_numpy(test_data[0]).to(torch.long)
weight = torch.from_numpy(test_data[1]).to(torch.float)
utility_exp = model_exp([X,weight])
utility_exp = utility_exp.detach().numpy()

In [8]:
pro_exp = np.zeros_like(utility_exp)
for item in test_dic:
    temp_lst = test_dic[item]
    utility_temp = utility_exp[test_dic[item]]
    utility_temp = np.sort(utility_temp)
    for i in temp_lst:
        index = np.where(utility_temp == utility_exp[i])[0][0]
        Q1 = np.exp(-np.sum(utility_temp[index:] - utility_temp[index]))/(utility_temp.shape[0]-index)
        if index == 0:
            pro_exp[i] = Q1
        else:
            Q2 = 0
            for j in range(index):
                Q2 += np.exp(-np.sum(utility_temp[j:] - utility_temp[j]))/((utility_temp.shape[0]-j-1)*(utility_temp.shape[0]-j))
            pro_exp[i] = Q1-Q2

In [11]:
total = 0
count = 0
top_n = 3
rmse = []
denominator = 0
for item in test_dic:
    denominator += len(test_dic[item])
    rmse.append(np.sum((pro_exp[test_dic[item]]-test_label[test_dic[item]])**2))
    temp_prob_max = np.argsort(-pro_exp[test_dic[item]])[0:top_n]
    temp_test_y = list(test_label[test_dic[item]])
    total += 1
    temp_y_max = temp_test_y.index(max(temp_test_y))
    if temp_y_max in temp_prob_max:
        count += 1
print("RMSE: ", np.sqrt(sum(rmse)/denominator))
print(count/total)
print(total)
from sklearn.metrics import log_loss
from sklearn.metrics import roc_auc_score
logloss = 0
for i in range(len(pro_exp)):
    if test_label[i] == 1:
        logloss += -test_label[i] * (np.log(pro_exp[i]*5000)-np.log(5000))
    else:
        logloss += -(1-test_label[i]) * (np.log((1-pro_exp[i])*5000)-np.log(5000))
logloss = logloss/len(pro_exp)
print(logloss)
print(roc_auc_score(test_label, pro_exp))

RMSE:  0.1661375263459456
0.40767936362245677
6537
0.11219007450757641
0.839770912228547
