In [9]:
import os
import numpy as np

from core import SEED, TEST_K, DATA_CLEAN_PATH, RES_PATH
from core.neural_based_methods.pinsage.pinsage_recommender import PinSageRecommender
from utils.evaluation import write_results_to_excel, get_test_results

# Test

In [10]:
params = {
    'random_walk_length': 2,
    'random_walk_restart_prob': 0.5,
    'num_random_walks': 5,
    'num_neighbors': 3,
    'num_layers': 2,
    'hidden_dims': 32,
    'lr': 0.001,
    'batch_size': 128,
    'device': 'cpu'
}

rec = PinSageRecommender('adobe_core5', DATA_CLEAN_PATH, use_text_feature=True, use_no_feature=False,
                         use_only_text=True)

# Tuning hyperparameters using hyperopt

In [11]:
from hyperopt import hp, fmin, tpe, Trials, STATUS_OK

space = {
    'random_walk_length': hp.quniform('random_walk_length', 2, 5, 1),
    'random_walk_restart_prob': hp.quniform('random_walk_restart_prob', 0.3, 0.7, 0.1),
    'num_random_walks': hp.quniform('num_random_walks', 5, 15, 1),
    'num_neighbors': hp.quniform('num_neighbors', 3, 10, 1),
    'num_layers': hp.quniform('num_layers', 2, 4, 1),
    'hidden_dims': hp.quniform('hidden_dims', 16, 256, 1),
    'lr': hp.loguniform('lr', np.log(0.00001), np.log(0.1)), 
    'batch_size': hp.choice('batch_size', [64, 128, 256, 512])
}

In [12]:
def tune_pinsage(
    data,
    use_text_feature=True,
    use_no_feature=False,
    use_only_text=False, 
    tune_res_path=os.path.join(RES_PATH, 'pinsage_recommender_tuning_results.txt'),
    test_res_path=os.path.join(RES_PATH, 'test_results_PinSageRecommender.xlsx'),
    device='cpu',
):

    rec = PinSageRecommender(data, DATA_CLEAN_PATH, 
                             use_text_feature=use_text_feature, use_no_feature=use_no_feature,
                             use_only_text=use_only_text)
    
    def objective(params):
        rec.train_model(**params, device=device, early_stopping=True, verbose=True)
        return {
            'loss':-rec.params['best_eval_score'], 
            'status':STATUS_OK,
            'best_epoch':rec.params['best_epoch']
        }
    
    print('Tuning hyperparameters of PinSage Recommender on dataset {}...'.format(data))
    print(f'use_text_feature={use_text_feature}, use_no_feature={use_no_feature}, use_only_text={use_only_text}')
    trials = Trials()
    trials._random_state = np.random.RandomState(SEED)
    best = fmin(objective, space, algo=tpe.suggest, max_evals=50, trials=trials)

    # save result
    if os.path.exists(tune_res_path):
        f = open(tune_res_path, 'a')
    else:
        f = open(tune_res_path, 'w')

    print(f'PinSage Recommender tuning results on dataset {data} with use_text_feature={use_text_feature} and use_no_feature={use_no_feature} and use_only_text={use_only_text}...', 
        file=f)

    # get the best trial information
    best_trial_idx = np.argmin([trial_info['result']['loss'] for trial_info in trials.trials])
    optimal_params = {
        'random_walk_length': trials.trials[best_trial_idx]['misc']['vals']['random_walk_length'][0],
        'random_walk_restart_prob': trials.trials[best_trial_idx]['misc']['vals']['random_walk_restart_prob'][0], 
        'num_random_walks': trials.trials[best_trial_idx]['misc']['vals']['num_random_walks'][0],
        'num_neighbors': trials.trials[best_trial_idx]['misc']['vals']['num_neighbors'][0],
        'num_layers': trials.trials[best_trial_idx]['misc']['vals']['num_layers'][0],
        'hidden_dims': trials.trials[best_trial_idx]['misc']['vals']['hidden_dims'][0],
        'lr': trials.trials[best_trial_idx]['misc']['vals']['lr'][0], 
        'batch_size': [64, 128, 256, 512][trials.trials[best_trial_idx]['misc']['vals']['batch_size'][0]],
        'epochs': trials.trials[best_trial_idx]['result']['best_epoch']
    }

    print('The optimal hyperparamters are: \nrandom_walk_length={}, random_walk_restart_prob={}, num_random_walks={}, num_neighbors={}, num_layers={}, hidden_dims={}, lr={}, batch_size={}, epochs={}'.format(
            optimal_params['random_walk_length'], 
            optimal_params['random_walk_restart_prob'], 
            optimal_params['num_random_walks'], 
            optimal_params['num_neighbors'], 
            optimal_params['num_layers'], 
            optimal_params['hidden_dims'], 
            optimal_params['lr'],
            optimal_params['batch_size'],
            optimal_params['epochs']
        ), file=f
    )

    best_score = trials.trials[best_trial_idx]['result']['loss']
    print('Best validation NDCG@10 = {}'.format(best_score), file=f)

    # retrain the model
    print('Retraining model using the optimal params...', file=f)
    rec.train_model(**optimal_params, device='cpu', early_stopping=False, verbose=False)
    print(f'Validation NDCG@10 of the retrained model = {rec.get_validation_ndcg()}', file=f)

    # print test metrics
    print('Test results of the retrained model:', file=f)
    test_res = get_test_results(rec, TEST_K)
    print(test_res,file=f)
    
    sheet_name = data
    if use_no_feature:
        sheet_name = data+'_nofeature'
    elif use_text_feature:
        if use_only_text:
            sheet_name = data+'_onlytext'
        else:
            sheet_name = data
    else:
        sheet_name = data+'_notext'
    
    write_results_to_excel(test_res, test_res_path, sheet_name)

    f.close()

In [4]:
# tune_pinsage('movielens_100k')
# tune_pinsage('adobe_core5')
# all feature

In [5]:
tune_pinsage('adobe_core5', use_text_feature=False, use_no_feature=False, use_only_text=False)
# no text
tune_pinsage('adobe_core5', use_text_feature=False, use_no_feature=True, use_only_text=False)
# no feature

Tuning hyperparameters of PinSage Recommender on dataset adobe_core5...
use_text_feature=False, use_no_feature=False
  0%|          | 0/50 [00:00<?, ?trial/s, best loss=?]




epoch 1 [11.5s]:  training loss=0.7161015868186951    
epoch 2 [11.47s]:  training loss=0.9833914041519165   
epoch 3 [11.48s]:  training loss=1.1394459009170532   
epoch 4 [11.94s]:  training loss=1.20106041431427     
epoch 5 [13.24s]: training loss=1.3142961263656616  validation ndcg@10=0.013442241983496864 [0.45s]
epoch 6 [12.22s]:  training loss=1.3186707496643066   
epoch 7 [12.91s]:  training loss=1.384917974472046    
epoch 8 [14.2s]:  training loss=1.4378061294555664    
epoch 9 [14.22s]:  training loss=1.4524741172790527   
epoch 10 [13.2s]: training loss=1.443228840827942  validation ndcg@10=0.016084149841091747 [0.41s]
epoch 11 [14.26s]:  training loss=1.5368672609329224  
epoch 12 [13.68s]:  training loss=1.4835575819015503  
epoch 13 [14.59s]:  training loss=1.564459204673767   
epoch 14 [13.49s]:  training loss=1.5095664262771606  
epoch 15 [14.15s]: training loss=1.5592113733291626  validation ndcg@10=0.017449398631320823 [0.43s]
epoch 16 [13.31s]:  training loss=1.6797




epoch 1 [35.96s]:  training loss=0.9053891897201538   
epoch 2 [39.11s]:  training loss=1.3746825456619263   
epoch 3 [42.26s]:  training loss=1.510263204574585    
epoch 4 [37.91s]:  training loss=1.553107738494873    
epoch 5 [38.75s]: training loss=1.6357768774032593  validation ndcg@10=0.01221994761789056 [0.9s]
epoch 6 [38.63s]:  training loss=1.7369637489318848   
epoch 7 [39.29s]:  training loss=1.733157753944397    
epoch 8 [38.35s]:  training loss=1.762195110321045    
epoch 9 [39.48s]:  training loss=1.8380528688430786   
epoch 10 [38.11s]: training loss=1.8715980052947998  validation ndcg@10=0.014503616012785053 [0.93s]
epoch 11 [38.93s]:  training loss=1.8492015600204468  
epoch 12 [40.49s]:  training loss=2.0136160850524902  
epoch 13 [38.91s]:  training loss=2.0256567001342773  
epoch 14 [39.81s]:  training loss=2.0235257148742676  
epoch 15 [38.09s]: training loss=2.0244855880737305  validation ndcg@10=0.015503290323800316 [0.86s]
epoch 16 [39.6s]:  training loss=1.99154




epoch 1 [7.55s]:  training loss=0.7324574589729309    
epoch 2 [6.56s]:  training loss=0.481343150138855     
epoch 3 [6.44s]:  training loss=0.4459838271141052    
epoch 4 [5.9s]:  training loss=0.42334625124931335    
epoch 5 [5.49s]: training loss=0.4106220602989197  validation ndcg@10=0.0682104118087298 [0.08s]
epoch 6 [5.3s]:  training loss=0.4085623025894165     
epoch 7 [5.2s]:  training loss=0.4007148742675781     
epoch 8 [5.12s]:  training loss=0.3933311998844147    
epoch 9 [5.26s]:  training loss=0.39009302854537964   
epoch 10 [5.12s]: training loss=0.378045916557312  validation ndcg@10=0.0641736374414028 [0.06s]
epoch 11 [5.14s]:  training loss=0.3722127676010132   
epoch 12 [5.11s]:  training loss=0.36945798993110657  
epoch 13 [5.18s]:  training loss=0.3600902557373047   
epoch 14 [5.12s]:  training loss=0.3576946258544922   
epoch 15 [5.15s]: training loss=0.35976409912109375  validation ndcg@10=0.05883538586857692 [0.09s]
epoch 16 [5.26s]:  training loss=0.36174365878

In [13]:
tune_pinsage('movielens_100k', use_text_feature=False, use_no_feature=True, use_only_text=False)
# no feature

Tuning hyperparameters of PinSage Recommender on dataset movielens_100k...
use_text_feature=False, use_no_feature=True, use_only_text=False
  0%|          | 0/50 [00:00<?, ?trial/s, best loss=?]




epoch 1 [33.72s]:  training loss=0.6936063766479492   
epoch 2 [34.12s]:  training loss=0.5093884468078613   
epoch 3 [33.39s]:  training loss=0.47025322914123535  
epoch 4 [32.77s]:  training loss=0.4237317740917206   
epoch 5 [32.42s]: training loss=0.3964024782180786  validation ndcg@10=0.05698711055927033 [0.5s]
epoch 6 [33.14s]:  training loss=0.37654680013656616  
epoch 7 [33.18s]:  training loss=0.3620908260345459   
epoch 8 [33.4s]:  training loss=0.34853512048721313   
epoch 9 [33.34s]:  training loss=0.35513243079185486  
epoch 10 [33.24s]: training loss=0.33690837025642395  validation ndcg@10=0.05058978595118428 [0.53s]
epoch 11 [33.35s]:  training loss=0.33975034952163696 
epoch 12 [33.23s]:  training loss=0.333355188369751   
epoch 13 [35.14s]:  training loss=0.3320879340171814  
epoch 14 [32.72s]:  training loss=0.33134105801582336 
epoch 15 [33.29s]: training loss=0.3234368860721588  validation ndcg@10=0.04733395675244561 [0.49s]
epoch 16 [33.13s]:  training loss=0.31937

In [14]:
tune_pinsage('adobe_core5', use_text_feature=True, use_no_feature=False, use_only_text=True)
# only text

Tuning hyperparameters of PinSage Recommender on dataset adobe_core5...
use_text_feature=True, use_no_feature=False, use_only_text=True
  0%|          | 0/50 [00:00<?, ?trial/s, best loss=?]




epoch 1 [23.62s]:  training loss=0.3486691415309906   
epoch 2 [23.87s]:  training loss=0.22067128121852875  
epoch 3 [24.69s]:  training loss=0.20616216957569122  
epoch 4 [24.33s]:  training loss=0.19862930476665497  
epoch 5 [24.59s]: training loss=0.1990644931793213  validation ndcg@10=0.019722343761656696 [0.7s]
epoch 6 [24.49s]:  training loss=0.20327425003051758  
epoch 7 [24.36s]:  training loss=0.19845826923847198  
epoch 8 [23.78s]:  training loss=0.2015356868505478   
epoch 9 [24.06s]:  training loss=0.19544020295143127  
epoch 10 [24.21s]: training loss=0.19767020642757416  validation ndcg@10=0.015980039889260098 [0.75s]
epoch 11 [23.93s]:  training loss=0.19936113059520721 
epoch 12 [24.59s]:  training loss=0.20640616118907928 
epoch 13 [25.29s]:  training loss=0.20818683505058289 
epoch 14 [23.39s]:  training loss=0.20791779458522797 
epoch 15 [24.47s]: training loss=0.20599789917469025  validation ndcg@10=0.015415416165420434 [0.7s]
epoch 16 [23.82s]:  training loss=0.20