In [1]:
from helpers import *
from baseline import *
from baseline_helpers import *
from surprise_helpers import *
from spotlight_helpers import *
from pyfm_helpers import *
import scipy.optimize as sco


In [2]:
def load_models():
    print("Loading models...")
    models_dict = dict(
        # Baseline parameters: (train, test)
        baseline = dict(
            global_mean = baseline_global_mean,
            global_median = baseline_global_median,
            user_mean = baseline_user_mean,
            user_median = baseline_user_median,
            movie_mean = baseline_movie_mean,
            movie_median = baseline_movie_median,
            movie_mean_user_std = movie_mean_user_standardize,
            movie_median_user_std = movie_median_user_standardize,
            movie_mean_user_habit_std = movie_mean_user_habit_standardize,
            movie_median_user_habit_std = movie_median_user_habit_standardize,
            movie_mean_user_habit = movie_mean_user_habit,
            movie_mdeian_user_habit = movie_median_user_habit,
        ),
        
        # surprise
        surprise = dict(
            surprise_svd = SVD(n_factors=50, n_epochs=200, lr_bu=1e-9 , lr_qi=1e-5, reg_all=0.01),
            surprise_svd_pp = SVDpp(n_factors=50, n_epochs=200, lr_bu=1e-9 , lr_qi=1e-5, reg_all=0.01),
            surprise_knn = KNNBaseline(k=100, sim_options={'name': 'pearson_baseline', 'user_based': False}),
        ),
        # spotlight
        spotlight = dict(
            spotlight=ExplicitFactorizationModel(loss='regression',
                                   embedding_dim=150,  # latent dimensionality
                                   n_iter=50,  # number of epochs of training
                                   batch_size=256,  # minibatch size
                                   l2=1e-5,  # strength of L2 regularization
                                   learning_rate=0.0001,
                                   use_cuda=torch.cuda.is_available()),
        ),
        # als
        
        # pyfm
        pyfm = dict(
            pyfm=pylibfm.FM(num_factors=42, num_iter=200, verbose=True, 
                          task="regression", initial_learning_rate=0.01, 
                          learning_rate_schedule="optimal")
        ),
        # keras
        # MF
    )
    
    model_msg = "{} model families loaded:\n ".format(len(list(models_dict.keys())))
    for i in list(models_dict.keys()):
        model_msg = model_msg + "{}; ".format(i)
    print(model_msg)
    return models_dict
    

In [3]:
models = load_models()

Loading models...
4 model families loaded:
 baseline; surprise; spotlight; pyfm; 


In [4]:
def load_algos():
    algo_dict = dict(
        baseline = baseline_algo, # baseline_algo(train, test, model)
        surprise = surprise_algo, # surprise_algo(train, test, algo, verbose=True, training=False)
        spotlight = spotlight_algo, # spotlight_algo(train, test, model, verbose=True)
        pyfm = pyfm_algo,
    )
    return algo_dict
load_algos()

{'baseline': <function baseline_helpers.baseline_algo(train, test, model, training=False)>,
 'surprise': <function surprise_helpers.surprise_algo(train, test, algo, verbose=True, training=False)>,
 'spotlight': <function spotlight_helpers.spotlight_algo(train, test, model, verbose=True)>,
 'pyfm': <function pyfm_helpers.pyfm_algo(train_df, test_df, model)>}

In [5]:
def predict_and_save(folder = "./predictions/"):
    # create folder if not existent
    if not os.path.exists(folder):
        os.makedirs(folder)
    
    # load csv
    train_df = load_dataset(train_dataset)
    test_df = load_dataset(test_dataset)
    
    # dictionary of the predictions
    predictions = dict()
        
    # load models
    models_dict = load_models()
    # load algos
    algo_dict = load_algos()
    t = Timer()
    t.start()
    for model_family_name, model_family in models_dict.items():
        algo = algo_dict[model_family_name]
        print("Predicting using algo: {}, model: {}...".format(algo, model_family_name))

        for model_name, model in model_family.items():
            
            prediction = algo(train_df, test_df, model)
            print("Saving results of {}...".format(model_name))
            prediction.to_csv("{}_predictions({}).csv".format(folder, t.now()))
            predictions[model_name] = prediction
        
    return predictions
        

In [6]:
predict_and_save()

Loading models...
4 model families loaded:
 baseline; surprise; spotlight; pyfm; 
Predicting using algo: <function baseline_algo at 0x7f7ddbba2620>, model: baseline...
Saving results of global_mean...
Saving results of global_median...
Saving results of user_mean...
Saving results of user_median...
Saving results of movie_mean...
Saving results of movie_median...


NameError: name 'user_standardize' is not defined