In [None]:
import os
import pandas as pd
import numpy as np

from core import DATA_CLEAN_PATH, RES_PATH
from utils.evaluation import write_results_to_excel
from utils.utils_params_helper import ABLATION_PARAMS_DICT, get_train_eval_time

import warnings
warnings.filterwarnings("ignore")

# Time Measurements

In [None]:
MODEL_NAMES = [
    'Popularity', 
    'UserKNN', 'ItemKNN',
    'iALS', 'BPR', 
    'NCF', 'BiVAE', 'LightGCN',
    'LightFM', 'PinSage', 'Tensor',
]

train_time_ls = []
eval_time_ls = []
for model in MODEL_NAMES: 
    train_time, eval_time = get_train_eval_time(model)
    train_time_ls.append(train_time)
    eval_time_ls.append(eval_time)

time_df = pd.DataFrame({
    'model': MODEL_NAMES,  
    'train time': train_time_ls, 
    'evaluation time': eval_time_ls 
})
write_results_to_excel(time_df, os.path.join(RES_PATH, 'test_results.xlsx'), 'time')

# Hybrid Models

In [None]:
# lightfm: run 10 times, each time change seed
# get average time and test results

MODEL_NAMES = [
    'LightFM_allfeature', 'LightFM_nofeature', 'LightFM_notext', 'LightFM_onlytext', 
]

train_time_ls = []
eval_time_ls = []

if os.path.exists(os.path.join(RES_PATH, 'ablation_results_lightfm.txt')):
    f = open(os.path.join(RES_PATH, 'ablation_results_lightfm.txt'), 'a')
else:
    f = open(os.path.join(RES_PATH, 'ablation_results_lightfm.txt'), 'w')

n_run = 10
for model in MODEL_NAMES: 
    if model.split("_")[0] == 'LightFM': 
        ABLATION_PARAMS_DICT[model]['seed'] = np.random.random_integers(3000)
        # generate a random integer as seed for LightFM model

    train_time_arr = np.zeros((n_run,1))
    eval_time_arr = np.zeros((n_run,1))
    test_res = np.zeros((6,3))
    for i in range(n_run): 
        train_time, eval_time, res = get_train_eval_time(model, params_dict=ABLATION_PARAMS_DICT, verbose=False)
        train_time_arr[i] = train_time
        eval_time_arr[i] = eval_time
        test_res += res.values
    train_time = np.mean(train_time_arr)
    eval_time = np.mean(eval_time_arr)
    test_res = test_res/n_run

    print(model, file=f)
    print(f'train_time = {train_time}, eval_time = {eval_time}', file=f)
    print('test results:', file=f)
    print(test_res, file=f)
    print('', file=f)

    print(model)
    print(f'train_time = {train_time}, eval_time = {eval_time}')
    print('test results:')
    print(test_res)
    print()

f.close()

In [None]:
# pinsage: run 10 times
MODEL_NAMES = [
    'PinSage_allfeature', 'PinSage_nofeature', 'PinSage_notext', 'PinSage_onlytext', 
    # 'Tensor_allfeature', 'Tensor_notext', 'Tensor_onlytext',
]

train_time_ls = []
eval_time_ls = []

if os.path.exists(os.path.join(RES_PATH, 'ablation_results.txt')):
    f = open(os.path.join(RES_PATH, 'ablation_results.txt'), 'a')
else:
    f = open(os.path.join(RES_PATH, 'ablation_results.txt'), 'w')

n_run = 10
for model in MODEL_NAMES: 
    train_time_arr = np.zeros((n_run,1))
    eval_time_arr = np.zeros((n_run,1))
    test_res = np.zeros((6,3))
    for i in range(n_run): 
        train_time, eval_time, res = get_train_eval_time(model, params_dict=ABLATION_PARAMS_DICT, verbose=False)
        train_time_arr[i] = train_time
        eval_time_arr[i] = eval_time
        test_res += res.values
    train_time = np.mean(train_time_arr)
    eval_time = np.mean(eval_time_arr)
    test_res = test_res/n_run

    print(model, file=f)
    print(f'train_time = {train_time}, eval_time = {eval_time}', file=f)
    print('test results:', file=f)
    print(test_res, file=f)
    print('', file=f)

    print(model)
    print(f'train_time = {train_time}, eval_time = {eval_time}')
    print('test results:')
    print(test_res)
    print()

f.close()