In [None]:
# disable INFO and DEBUG logging everywhere
import logging
logging.disable(logging.WARNING)

import warnings
warnings.filterwarnings("ignore")

import os
from core import DATA_CLEAN_PATH, SEED, RES_PATH, TEST_K
from core.neural_based_methods.two_tower.two_tower_model import TwoTowerRecommender
from utils.evaluation import get_test_results, write_results_to_excel

# Test

In [None]:
data = 'movielens_100k'
rec = TwoTowerRecommender(data, DATA_CLEAN_PATH, use_text_feature=False, use_no_feature=True,
                          use_only_text=False)
rec.train.schema

# Parameter tuning

In [None]:
from hyperopt import hp, fmin, tpe, Trials, STATUS_OK, STATUS_FAIL
import numpy as np

space = {
    'batch_size': hp.choice('batch_size', [64, 128, 256, 512]),
    'learning_rate': hp.loguniform('learning_rate', np.log(0.00001), np.log(0.1)),
    'tower_dim': hp.quniform('tower_dim', 20, 200, 1),
    'user_dim': hp.quniform('user_dim', 20, 200, 1),
    'item_dim': hp.quniform('item_dim', 20, 200, 1),
}

def tune_two_tower(data,
    use_text_feature=True,
    use_no_feature=False,
    use_only_text=False,
    tune_res_path=os.path.join(RES_PATH, 'two_tower_recommender_tuning_results.txt'),
    test_res_path=os.path.join(RES_PATH, 'test_results_TwoTowerRecommender.xlsx'),
):
    rec = TwoTowerRecommender(data, DATA_CLEAN_PATH, use_text_feature=use_text_feature, 
                              use_no_feature=use_no_feature,
                              use_only_text=use_only_text)

    def objective(params):
        try:
            rec.train_model(**params, early_stopping=True, verbose=True)
            best_score = rec.params['best_eval_score']
            if best_score < 0.8:
                return {
                    'loss':-rec.params['best_eval_score'], 
                    'status':STATUS_OK,
                    'best_epoch': rec.params['best_epoch']
                }
            else:  # abnornally high validation score
                
                return {
                    'loss':0, 
                    'status':STATUS_FAIL,
                    'best_epoch':0
                }
        except:
            return {
                'loss':0, 
                'status':STATUS_FAIL,
                'best_epoch':0
            }

    print('Tuning hyperparameters of Two Tower Recommender on dataset {}...'.format(data))
    print(f'use_text_feature={use_text_feature}, use_no_feature={use_no_feature}, use_only_text={use_only_text}')
    trials = Trials()
    trials._random_state = np.random.RandomState(SEED)
    best = fmin(objective, space, algo=tpe.suggest, max_evals=50, trials=trials)

    # save result
    if os.path.exists(tune_res_path):
        f = open(tune_res_path, 'a')
    else:
        f = open(tune_res_path, 'w')

    print(f'Two Tower Recommender tuning results on dataset {data} \nwith use_text_feature={use_text_feature} and use_no_feature={use_no_feature} and use_only_text={use_only_text}...', 
        file=f)

    # get the best trial information
    best_trial_idx = np.argmin([trial_info['result']['loss'] for trial_info in trials.trials])

    optimal_params = {
        'batch_size': [64, 128, 256, 512][trials.trials[best_trial_idx]['misc']['vals']['batch_size'][0]],
        'learning_rate': trials.trials[best_trial_idx]['misc']['vals']['learning_rate'][0],
        'tower_dim': trials.trials[best_trial_idx]['misc']['vals']['tower_dim'][0],
        'user_dim': trials.trials[best_trial_idx]['misc']['vals']['user_dim'][0],
        'item_dim': trials.trials[best_trial_idx]['misc']['vals']['item_dim'][0],
        # epochs
        'n_epochs': trials.trials[best_trial_idx]['result']['best_epoch']
    }

    print(
        'The optimal hyperparamters are: \nbatch_size={}, learning_rate={}, \ntower_dim={}, user_dim={}, item_dim={}, n_epochs={}'.format(
            optimal_params['batch_size'], optimal_params['learning_rate'], 
            optimal_params['tower_dim'], optimal_params['user_dim'], optimal_params['item_dim'], optimal_params['n_epochs']
        ),
    file=f
    )
    best_score = trials.trials[best_trial_idx]['result']['loss']
    print('Best validation NDCG@10 = {}'.format(best_score), file=f)

    # retrain the model
    print('Retraining model using the optimal params...', file=f)
    rec.train_model(**optimal_params, early_stopping=False, verbose=False)
    print(f'Validation NDCG@10 of the retrained model = {rec.get_validation_ndcg()}', file=f)

    # print test metrics
    print('Test results of the retrained model:', file=f)
    test_res = get_test_results(rec, TEST_K)
    print(test_res, file=f)
    
    sheet_name = data
    if use_no_feature:
        sheet_name = data+'_nofeature'
    elif use_text_feature:
        if use_only_text:
            sheet_name = data+'_onlytext'
        else:
            sheet_name = data
    else:
        sheet_name = data+'_notext'
    
    write_results_to_excel(test_res, test_res_path, sheet_name)

    f.close()

In [3]:
# tune_two_tower('movielens_100k', use_text_feature=True, use_no_feature=False,
#                           use_only_text=False)
# # all feature

# tune_two_tower('movielens_100k', use_text_feature=False, use_no_feature=True,
#                           use_only_text=False)
# # no feature

In [None]:
# tune_two_tower('adobe_core5', use_text_feature=True,
#                             use_no_feature=False,
#                             use_only_text=False)
# # all feature

# tune_two_tower('adobe_core5', use_text_feature=False,
#                             use_no_feature=True,
#                             use_only_text=False)
# # no feature