In [1]:
import os
import numpy as np
from core.tensor_factorization_methods.tensor_factorization_model import TensorRecommender
from core import MAIN_DIRECTORY, DATA_CLEAN_PATH, RES_PATH, SEED, TEST_K
from utils.evaluation import get_test_results, write_results_to_excel

# Test

In [2]:
data = 'adobe_core5'

print('all feature')
rec = TensorRecommender(data, DATA_CLEAN_PATH, use_text_feature=True, use_no_feature=False, use_only_text=False)
print(rec.item_feature.shape)
print('no text')
rec = TensorRecommender(data, DATA_CLEAN_PATH, use_text_feature=False, use_no_feature=False, use_only_text=False)
print(rec.item_feature.shape)
print('only text')
rec = TensorRecommender(data, DATA_CLEAN_PATH, use_text_feature=True, use_no_feature=False, use_only_text=True)
print(rec.item_feature.shape)
print('no feature')
rec = TensorRecommender(data, DATA_CLEAN_PATH, use_text_feature=False, use_no_feature=True, use_only_text=False)
print(rec.item_feature.shape)

all feature
(2899, 10576)
no text
(2899, 2981)
only text
(2899, 10494)
no feature
(2899, 2899)


# Tune hyperparameters with hyperopt

In [7]:
from hyperopt import hp, fmin, tpe, Trials, STATUS_OK, STATUS_FAIL

space = {
    'user_item_k': hp.quniform('user_item_k', 20, 200, 1),
    'item_time_k': hp.quniform('item_time_k', 20, 200, 1),
    'batch_size': hp.choice('batch_size', [64, 128, 256, 512]),
    'lr': hp.loguniform('lr', np.log(0.00001), np.log(0.1)), 
    'lambda_c': hp.loguniform('lambda_c', np.log(0.00001), np.log(10)), 
    'lambda_r': hp.quniform('lambda_r', 0.1, 1.5, 0.1),
    'n_neg': hp.quniform('n_neg', 4, 8, 1)
}

def tune_tensor(data,
    use_text_feature=True,
    use_no_feature=False, 
    use_only_text=False, 
    tune_res_path=os.path.join(RES_PATH, 'tensor_recommender_tuning_results.txt'),
    test_res_path=os.path.join(RES_PATH, 'test_results_TensorRecommender.xlsx'),
    verbose=True,
):
    rec = TensorRecommender(data, DATA_CLEAN_PATH, 
                            use_text_feature=use_text_feature, 
                            use_no_feature=use_no_feature,
                            use_only_text=use_only_text)
    print(rec.item_feature.shape)
    
    def objective(params):
        try:
            rec.train_model(**params, early_stopping=True, verbose=verbose)
            return {
                'loss':-rec.params['best_eval_score'], 
                'status':STATUS_OK,
                'best_epoch': rec.params['best_epoch']
            }
        except: # model training fail
            return {
                'loss':0,
                'status':STATUS_FAIL,
                'best_epoch': 0
            }

    print('Tuning hyperparameters of Tensor Recommender on dataset {}...'.format(data))
    print(f'use_text_feature={use_text_feature}, use_no_feature={use_no_feature}, use_only_text={use_only_text}')
    trials = Trials()
    trials._random_state = np.random.RandomState(SEED)
    best = fmin(objective, space, algo=tpe.suggest, max_evals=50, trials=trials)

    # save result
    if os.path.exists(tune_res_path):
        f = open(tune_res_path, 'a')
    else:
        f = open(tune_res_path, 'w')

    print(f'Tensor Recommender tuning results on dataset {data}', file=f)
    print(f'with use_text_feature={use_text_feature} and use_no_feature={use_no_feature} and use_only_text={use_only_text}...', 
    file=f)
    
    # get the best trial information
    best_trial_idx = np.argmin([trial_info['result']['loss'] for trial_info in trials.trials])

    optimal_params = {
        'user_item_k': trials.trials[best_trial_idx]['misc']['vals']['user_item_k'][0],
        'item_time_k': trials.trials[best_trial_idx]['misc']['vals']['item_time_k'][0],
        'batch_size': [64, 128, 256, 512][trials.trials[best_trial_idx]['misc']['vals']['batch_size'][0]],
        'lr': trials.trials[best_trial_idx]['misc']['vals']['lr'][0],
        'lambda_c': trials.trials[best_trial_idx]['misc']['vals']['lambda_c'][0],
        'lambda_r': trials.trials[best_trial_idx]['misc']['vals']['lambda_r'][0],
        'n_neg': trials.trials[best_trial_idx]['misc']['vals']['n_neg'][0],
        'n_epochs': trials.trials[best_trial_idx]['result']['best_epoch']
    }

    print(
        'The optimal hyperparamters are: \n user_item_k={}, item_time_k={}, batch_size={}, lr={}, lambda_c={}, lambda_r={}, n_neg={}, n_epochs={}'.format(
            optimal_params['user_item_k'], optimal_params['item_time_k'], 
            optimal_params['batch_size'], optimal_params['lr'], 
            optimal_params['lambda_c'], optimal_params['lambda_r'], optimal_params['n_neg'],optimal_params['n_epochs'],
        ),
    file=f
    )
    best_score = trials.trials[best_trial_idx]['result']['loss']
    print('Best validation NDCG@10 = {}'.format(best_score), file=f)

    # retrain the model
    print('Retraining model using the optimal params...', file=f)
    rec.train_model(**optimal_params, early_stopping=False, verbose=verbose)
    print(f'Validation NDCG@10 of the retrained model = {rec.get_validation_ndcg()}', file=f)

    # print test metrics
    print('Test results of the retrained model:', file=f)
    test_res = get_test_results(rec, TEST_K)
    print(test_res, file=f)
    
    if use_no_feature:
        sheet_name = data+'_nofeature'
    elif use_text_feature:
        if use_only_text:
            sheet_name = data+'_onlytext'
        else:
            sheet_name = data
    else:
        sheet_name = data+'_notext'
    
    write_results_to_excel(test_res, test_res_path, sheet_name)

    f.close()

In [None]:
tune_tensor('movielens_100k', use_text_feature=True, use_no_feature=False, use_only_text=False)
# all feature

tune_tensor('movielens_100k', use_text_feature=False, use_no_feature=True, use_only_text=False)
# no feature

## adobe_core5

In [3]:
from hyperopt import hp, fmin, tpe, Trials, STATUS_OK, STATUS_FAIL

space = {
    'user_item_k': hp.quniform('user_item_k', 20, 200, 1),
    'item_time_k': hp.quniform('item_time_k', 20, 200, 1),
    'batch_size': hp.choice('batch_size', [256, 512, 1024, 2048]),
    'lr': hp.loguniform('lr', np.log(0.00001), np.log(0.1)), 
    'lambda_c': hp.loguniform('lambda_c', np.log(0.00001), np.log(10)), 
    'lambda_r': hp.quniform('lambda_r', 0.1, 1.5, 0.1),
    'n_neg': hp.quniform('n_neg', 4, 8, 1)
}

def tune_tensor(data,
    use_text_feature=True,
    use_no_feature=False,
    use_only_text=False,
    tune_res_path=os.path.join(RES_PATH, 'tensor_recommender_tuning_results.txt'),
    test_res_path=os.path.join(RES_PATH, 'test_results_TensorRecommender.xlsx'),
    verbose=True,
):
    rec = TensorRecommender(data, DATA_CLEAN_PATH, use_text_feature=use_text_feature,
                            use_no_feature=use_no_feature,
                            use_only_text=use_only_text)
    print(rec.item_feature.shape)
    
    def objective(params):
        try:
            rec.train_model(**params, early_stopping=True, verbose=verbose)
            return {
                'loss':-rec.params['best_eval_score'], 
                'status':STATUS_OK,
                'best_epoch': rec.params['best_epoch']
            }
        except: # model training fail
            return {
                'loss':0,
                'status':STATUS_FAIL,
                'best_epoch': 0
            }

    print('Tuning hyperparameters of Tensor Recommender on dataset {}...'.format(data))
    print(f'use_text_feature={use_text_feature}, use_no_feature={use_no_feature}, use_only_text={use_only_text}')
    trials = Trials()
    trials._random_state = np.random.RandomState(SEED)
    best = fmin(objective, space, algo=tpe.suggest, max_evals=50, trials=trials)

    # save result
    if os.path.exists(tune_res_path):
        f = open(tune_res_path, 'a')
    else:
        f = open(tune_res_path, 'w')

    print(f'Tensor Recommender tuning results on dataset {data}', file=f)
    print(f'with use_text_feature={use_text_feature} and use_no_feature={use_no_feature} and use_only_text={use_only_text}...', 
    file=f)
    
    # get the best trial information
    best_trial_idx = np.argmin([trial_info['result']['loss'] for trial_info in trials.trials])

    optimal_params = {
        'user_item_k': trials.trials[best_trial_idx]['misc']['vals']['user_item_k'][0],
        'item_time_k': trials.trials[best_trial_idx]['misc']['vals']['item_time_k'][0],
        'batch_size': [256, 512, 1024, 2048][trials.trials[best_trial_idx]['misc']['vals']['batch_size'][0]],
        'lr': trials.trials[best_trial_idx]['misc']['vals']['lr'][0],
        'lambda_c': trials.trials[best_trial_idx]['misc']['vals']['lambda_c'][0],
        'lambda_r': trials.trials[best_trial_idx]['misc']['vals']['lambda_r'][0],
        'n_neg': trials.trials[best_trial_idx]['misc']['vals']['n_neg'][0],
        'n_epochs': trials.trials[best_trial_idx]['result']['best_epoch']
    }

    print(
        'The optimal hyperparamters are: \n user_item_k={}, item_time_k={}, batch_size={}, lr={}, lambda_c={}, lambda_r={}, n_neg={}, n_epochs={}'.format(
            optimal_params['user_item_k'], optimal_params['item_time_k'], 
            optimal_params['batch_size'], optimal_params['lr'], 
            optimal_params['lambda_c'], optimal_params['lambda_r'], optimal_params['n_neg'],optimal_params['n_epochs'],
        ),
    file=f
    )
    best_score = trials.trials[best_trial_idx]['result']['loss']
    print('Best validation NDCG@10 = {}'.format(best_score), file=f)

    # retrain the model
    print('Retraining model using the optimal params...', file=f)
    rec.train_model(**optimal_params, early_stopping=False, verbose=verbose)
    print(f'Validation NDCG@10 of the retrained model = {rec.get_validation_ndcg()}', file=f)

    # print test metrics
    print('Test results of the retrained model:', file=f)
    test_res = get_test_results(rec, TEST_K)
    print(test_res, file=f)
    
    if use_no_feature:
        sheet_name = data+'_nofeature'
    elif use_text_feature:
        if use_only_text:
            sheet_name = data+'_onlytext'
        else:
            sheet_name = data
    else:
        sheet_name = data+'_notext'
    
    write_results_to_excel(test_res, test_res_path, sheet_name)

    # # save the model
    # print('Saving the retrained model...')
    # model_info = {'model': rec.model, 'params': optimal_params}
    # with open(os.path.join(SAVEMODEL_PATH, f'bivae_{data}.pkl'), 'wb') as flp:
    #     pickle.dump(model_info, flp, protocol=pickle.HIGHEST_PROTOCOL)
    # print('Model saved. ')

    f.close()

In [None]:
tune_tensor('adobe_core5', use_text_feature=False, use_no_feature=False, use_only_text=False)
# no text
tune_tensor('adobe_core5', use_text_feature=True, use_no_feature=False, use_only_text=False)
# all feature
tune_tensor('adobe_core5', use_text_feature=True, use_no_feature=False, use_only_text=True)
# only text
tune_tensor('adobe_core5', use_text_feature=False, use_no_feature=True, use_only_text=False)
# no feature