In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import sys

if './src' not in sys.path:
    sys.path.append('./src')

if './' not in sys.path:
    sys.path.append('./')

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
from os.path import join
from src.data import Data

from src.trainer import Trainer
from src.list import ModelList
from src.testers import *
import datetime
import time

import argparse
from src import param
import csv

from analysisUtil import *

def get_model_identifier(whichmodel, ver):
    prefix = whichmodel.value
    if ver!=0:
        prefix= prefix +'_v'+ str(ver)
    now = datetime.datetime.now()
    date = '%02d%02d' % (now.month, now.day)  # two digits month/day
    identifier = prefix + '_' + date
    return identifier

parser.add_argument('--start', type=int, default=10, help="Starting epoch")
parser.add_argument('--to', type=int, default=800, help="Ending epoch")
parser.add_argument('--step', type=int, default=10, help="Epoch step size")



# regularizer coefficient (lambda)
parser.add_argument('--reg_scale', type=float, default=0.005,
                    help="The scale for regularizer (lambda). Default 0.0005")
param.verbose = False
param.whichdata = "ppi5k" # nl27k, cn15k
param.ver = 0   # only for VAE distmult
param.whichmodel = ModelList(args.model) # DistMult_VAE
param.batch_size = 512 # 256, 512
param.dim = 256  # 256, 512
param.neg_per_pos = 10
param.reg_scale = 0.0005
param.resume_model_path = args.resume_model_path
param.n_psl = 0

start = 100
to = 1000
step = 40

param.data_surfix = ''

if '_threshold_' in param.whichdata:
    # '_threshold_0.x'
    t = args.data.index('_threshold_')
    param.data_surfix = param.whichdata[t: t + 14]


# files_opened = []
def save_loss(losses, filename, columns):
    # if not filename in files_opened:
    #     files_opened.append(filename)
    #     if os.path.exists(filename):
    #         os.rename(filename, filename + '.bak')

    df = pd.DataFrame(losses, columns=columns)
    print(df.tail(5))
    df.tail(1).to_csv(filename, header=False, index=False, mode='a')



# input files
data_dir = join('./data', param.whichdata)
file_train = join(data_dir, 'train.tsv')  # training data
file_val = join(data_dir, 'val.tsv')  # validation datan
file_test = join(data_dir, 'test.tsv')  # validation datan
file_psl = join(data_dir, 'softlogic.tsv')  # probabilistic soft logic
# print('file_psl: %s' % file_psl)

# more_filt = [file_val, join(data_dir, 'test.tsv')]
print('Read train.tsv from', data_dir)

losses_classify_triple_testing = []
losses_mean_ndcg = []
losses_mse = []
mean_ranks = []


val_mean_rank_save_path = os.path.join(param.resume_model_path, 'val%s_mean_rank_accurate.csv'%(param.data_surfix,))
val_ndcg_save_path = os.path.join(param.resume_model_path, 'val%s_loss_accurate.csv'%(param.data_surfix,))
f = open(val_mean_rank_save_path, "w")
f.truncate()
f.close()
f = open(val_ndcg_save_path, "w")
f.truncate()
f.close()

def val(n):
    """
    @param n: the epoch/step to validate
    """

#     if getRecord(val_mean_rank_save_path, 'mean_rank', n) != None and \
#         getRecord(val_ndcg_save_path, 'ndcg', n) != None:
#         return "existed"

    print('val', n)

    model_path = os.path.join(param.resume_model_path, 'model.bin-%d.meta'%n)
    while not os.path.exists(model_path):
        # print('\033[91m[error]', model_path, 'not found. sleeping...\033[91m')
        print('[error]', model_path, 'not found. sleeping...')
        time.sleep(60)
    # get corredponding tester
    tmp_data_obj = Data()
    tmp_data_obj.load_data(file_train=file_train, file_val=file_val, file_test=file_test, file_psl= file_psl )
#     support = tmp_data_obj.support.copy()
#     print(support)
    tmp_trainger = Trainer()
#     print(support)
    tmp_trainger.build(tmp_data_obj, '', psl=(param.n_psl > 0))
#     print(support)
    tester = tmp_trainger.tester
    del tmp_trainger
#     del tmp_data_obj
    
    tester.build_by_file(file_val, param.resume_model_path, model_filename='model.bin-%d'%n)


    tester.load_hr_map(param.data_dir(), 'val.tsv', ['train.tsv', 'val.tsv','test.tsv'])

    KG_THRES = 0

    hr_map = {}
    for h in tester.hr_map:
        for r in tester.hr_map[h]:
            # print('%s %s %d'%(h, r, len(list(tester.hr_map[h][r].values()))))
            # print('%d %d %f'%(h, r, max(list(tester.hr_map[h][r].values()))))
            if len(tester.hr_map[h][r]) > 1 \
                    or list(tester.hr_map[h][r].values())[0]>KG_THRES:
            # if len(tester.hr_map[h][r]) >= 8:
                    # and max(list(tester.hr_map[h][r].values()))>KG_THRES:
                if h not in hr_map:
                    hr_map[h] = {}
                hr_map[h][r] = tester.hr_map[h][r]


    r_N = tester.vec_r.shape[0]
    h_N = tester.vec_c.shape[0]

#     print(tester.test_triples)
#     print(h_N)
    # metrics: mse
    mse = tester.get_mse(epoch=n, toprint=True)
    mse_neg = tester.get_mse_neg(10)
    mse_neg2 = tester.get_mse_neg(10)
    


    mean_ndcg, mean_exp_ndcg, mean_ndcg_r, count_r, all_ndcg = tester.mean_ndcg(hr_map, accurate_mode=False)#, verbose=True)
    losses_mean_ndcg.append(np.insert(mean_ndcg_r, 0, n))


    losses_mse.append([n, mse, mse_neg, mse_neg2, mean_ndcg, mean_exp_ndcg])
    # print('losses_mse')
    # print(losses_mse)

    with open(os.path.join(param.resume_model_path, 'val%s_detail_%d.csv'%(param.data_surfix, n)), 'w') as f:
        csv.writer(f).writerows(all_ndcg)

    mean_hitAtK, _ = tester.mean_hitAtK(hr_map, [10,20,40,10,20,40], weighted=[False,False,False,True,True,True], accurate_mode=False,verbose = False)

    mean_rank, _ = tester.mean_rank(hr_map, accurate_mode=False)
    mean_rank_weighted, _ = tester.mean_rank(hr_map, weighted = True, accurate_mode=False)

    mean_hitAt10 = mean_hitAtK[0]
    mean_hitAt20 = mean_hitAtK[1]
    mean_hitAt40 = mean_hitAtK[2]
    mean_hitAt10_weighted = mean_hitAtK[3]
    mean_hitAt20_weighted = mean_hitAtK[4]
    mean_hitAt40_weighted = mean_hitAtK[5]
    mean_ranks.append([n, mean_rank, mean_hitAt10, mean_hitAt20, mean_hitAt40, mean_rank_weighted, mean_hitAt10_weighted, mean_hitAt20_weighted, mean_hitAt40_weighted])
    save_loss(mean_ranks, val_mean_rank_save_path,
                               columns=['val_epoch', 'mean_rank', 'mean_hit@10', 'mean_hit@20', 'mean_hit@40', 'mean_rank_weighted', 'mean_hit@10_weighted', 'mean_hit@20_weighted', 'mean_hit@40_weighted'])

    save_loss(losses_mse, val_ndcg_save_path,
                               columns=['val_epoch', 'mse', 'mse_neg', 'mse_neg(second)', 'ndcg(linear)', 'ndcg(exp)'])

    return 'done'
    
for n in range(args.start, args.to, args.step):
    status = val(n)
    if status == "existed": 
        print("val existed")














InvalidArgumentError: Invalid device ordinal value (3). Valid range is [0, 2].
	while setting up XLA_GPU_JIT device number 3

In [None]:

for i in :