In [None]:
!pip install scikit-surprise
!pip install optuna

Collecting scikit-surprise
[?25l  Downloading https://files.pythonhosted.org/packages/97/37/5d334adaf5ddd65da99fc65f6507e0e4599d092ba048f4302fe8775619e8/scikit-surprise-1.1.1.tar.gz (11.8MB)
[K     |████████████████████████████████| 11.8MB 4.0MB/s 
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.1-cp37-cp37m-linux_x86_64.whl size=1617603 sha256=fac166ba920b021c51f598562bb471f72b23b706dbc578b78d3fd058709df0db
  Stored in directory: /root/.cache/pip/wheels/78/9c/3d/41b419c9d2aff5b6e2b4c0fc8d25c538202834058f9ed110d0
Successfully built scikit-surprise
Installing collected packages: scikit-surprise
Successfully installed scikit-surprise-1.1.1
Collecting optuna
[?25l  Downloading https://files.pythonhosted.org/packages/1a/18/b49ca91cf592747e19f2d333c2a86cd7c81895b922a5a09adf6335471576/optuna-2.8.0-py3-none-any.whl (301kB)
[K     |█████████████████

In [None]:
# Mount Google Drive and set data paths.
import os
from google.colab import drive
from google.colab import files

drive.mount('/content/gdrive')
DATA_PATH = "/content/gdrive/My Drive/ETH/Computational Intelligence Lab/CIL-Project/data"
TRAIN_DATA_PATH = os.path.join(DATA_PATH, "data_train.csv")
TEST_DATA_PATH = os.path.join(DATA_PATH, "data_test.csv")

Mounted at /content/gdrive


In [None]:
import numpy as np
import pandas as pd
import torch
from torch.utils.data import TensorDataset
from sklearn.model_selection import train_test_split
import surprise
from surprise import accuracy
from surprise.model_selection import cross_validate
import optuna

In [None]:
def load_data(file_path: str, full_dataset: bool, train_val_split: bool, random_seed: int = 0, train_size: float = 0):
    data_pd = pd.read_csv(file_path)

    # Reduce Dataset for Testing
    if not full_dataset:
        data_pd = data_pd.head(10000)

    if train_val_split:
        train_pd, val_pd = train_test_split(data_pd, train_size=train_size, random_state=random_seed)
        return train_pd, val_pd
    else:
        return data_pd

def __extract_users_items_ratings(data_pd: pd.DataFrame):
    users, movies = \
        [np.squeeze(arr) for arr in np.split(data_pd.Id.str.extract('r(\d+)_c(\d+)').values.astype(int) - 1, 2, axis=-1)]
    ratings = data_pd.Prediction.values
    return users, movies, ratings

def create_surprise_data(data_pd):
    users, movies, ratings = __extract_users_items_ratings(data_pd)

    df = pd.DataFrame({
        'users': users,
        'movies': movies,
        'ratings': ratings
    })
    reader = surprise.Reader(rating_scale=(1, 5))
    return surprise.Dataset.load_from_df(df[['users', 'movies', 'ratings']], reader=reader)

def __get_tensors_from_dataframe(data_pd: pd.DataFrame):
    users, movies, ratings = __extract_users_items_ratings(data_pd)
    users_torch = torch.tensor(users, dtype=torch.int64)
    movies_torch = torch.tensor(movies, dtype=torch.int64)
    ratings_torch = torch.tensor(ratings, dtype=torch.int64)

    return users_torch, movies_torch, ratings_torch


def create_dataset(data_pd: pd.DataFrame, test_dataset: bool = False):
    users_torch, movies_torch, ratings_torch = __get_tensors_from_dataframe(data_pd)

    if not test_dataset:
        return TensorDataset(users_torch, movies_torch, ratings_torch)
    else:
        test_ids = data_pd.Id
        return test_ids, TensorDataset(users_torch, movies_torch)

In [None]:
random_seed = 42
full_dataset = True

np.random.seed(random_seed)

train_pd = load_data(
    file_path=TRAIN_DATA_PATH,
    full_dataset=full_dataset,
    train_val_split=False
)

In [None]:
train_data = create_surprise_data(train_pd)
trainset, testset = surprise.model_selection.train_test_split(train_data, test_size=0.2)

In [None]:
from surprise import SVD

def objective(trial):
    n_factors = trial.suggest_int("n_factors", 2, 500) #default 100
    n_epochs = trial.suggest_int("n_epochs", 10, 30) #default 20
    init_mean = trial.suggest_int("init_mean", 0, 5) #default 0
    init_std_dev = trial.suggest_float("init_std_dev", 0, 1) #default 0.1
    lr_all = trial.suggest_float("lr_all", 0, 1e-2) #default 0.005
    reg_all = trial.suggest_float("reg_all", 0, 1e-1) #default 0.02

    algo = SVD(biased=False, n_factors=n_factors, n_epochs=n_epochs, init_mean=init_mean, init_std_dev=init_std_dev, lr_all=lr_all, reg_all=reg_all)
    
    algo.fit(trainset)
    predictions = algo.test(testset)
    rmse = accuracy.rmse(predictions)
    return rmse

In [None]:
study = optuna.create_study(direction="minimize")

[32m[I 2021-06-08 19:27:34,580][0m A new study created in memory with name: no-name-6d72f57b-01f3-44ee-9e68-78d5cd27a39e[0m


In [None]:
# Enqueue trials with default parameters.
study.enqueue_trial(
    {
        "n_factors": 100,
        "n_epochs": 20,
        "init_mean": 0,
        "init_std_dev": 0.1,
        "lr_all": 0.005,
        "reg_all": 0.02
    }
)


enqueue_trial is experimental (supported from v1.2.0). The interface can change in the future.


create_trial is experimental (supported from v2.0.0). The interface can change in the future.


add_trial is experimental (supported from v2.0.0). The interface can change in the future.



In [None]:
study.optimize(objective, n_trials=100)

[32m[I 2021-06-08 19:28:34,514][0m Trial 0 finished with value: 1.0327349604267961 and parameters: {'n_factors': 100, 'n_epochs': 20, 'init_mean': 0, 'init_std_dev': 0.1, 'lr_all': 0.005, 'reg_all': 0.02}. Best is trial 0 with value: 1.0327349604267961.[0m


RMSE: 1.0327


[32m[I 2021-06-08 19:30:11,828][0m Trial 1 finished with value: 1.1726615088368082 and parameters: {'n_factors': 292, 'n_epochs': 13, 'init_mean': 0, 'init_std_dev': 0.22602245437744117, 'lr_all': 0.003304524580820677, 'reg_all': 0.003220847700665708}. Best is trial 0 with value: 1.0327349604267961.[0m


RMSE: 1.1727


[32m[I 2021-06-08 19:31:00,087][0m Trial 2 finished with value: 1.5971405493085367 and parameters: {'n_factors': 150, 'n_epochs': 12, 'init_mean': 4, 'init_std_dev': 0.9729636789560008, 'lr_all': 0.0011966039763301117, 'reg_all': 0.04133579844443397}. Best is trial 0 with value: 1.0327349604267961.[0m


RMSE: 1.5971


[32m[I 2021-06-08 19:32:31,635][0m Trial 3 finished with value: 1.5971405493085367 and parameters: {'n_factors': 183, 'n_epochs': 20, 'init_mean': 5, 'init_std_dev': 0.1467600871463801, 'lr_all': 0.005111403426667759, 'reg_all': 0.0076551287731434605}. Best is trial 0 with value: 1.0327349604267961.[0m


RMSE: 1.5971


[32m[I 2021-06-08 19:34:18,757][0m Trial 4 finished with value: 1.0897006996306178 and parameters: {'n_factors': 262, 'n_epochs': 17, 'init_mean': 0, 'init_std_dev': 0.3840956621793141, 'lr_all': 0.002070488927811165, 'reg_all': 0.05506468886860675}. Best is trial 0 with value: 1.0327349604267961.[0m


RMSE: 1.0897


[32m[I 2021-06-08 19:35:11,470][0m Trial 5 finished with value: 1.6409533022305658 and parameters: {'n_factors': 81, 'n_epochs': 20, 'init_mean': 1, 'init_std_dev': 0.28243607845678165, 'lr_all': 0.0075979939552400354, 'reg_all': 0.0483260361691987}. Best is trial 0 with value: 1.0327349604267961.[0m


RMSE: 1.6410


[32m[I 2021-06-08 19:38:18,412][0m Trial 6 finished with value: 1.5971405493085367 and parameters: {'n_factors': 293, 'n_epochs': 25, 'init_mean': 2, 'init_std_dev': 0.7827252462437955, 'lr_all': 0.008451943707629379, 'reg_all': 0.0018434830117968872}. Best is trial 0 with value: 1.0327349604267961.[0m


RMSE: 1.5971


[32m[I 2021-06-08 19:39:40,206][0m Trial 7 finished with value: 1.5971405493085367 and parameters: {'n_factors': 194, 'n_epochs': 17, 'init_mean': 4, 'init_std_dev': 0.7015123323793748, 'lr_all': 0.006259876694897816, 'reg_all': 0.009504246827011998}. Best is trial 0 with value: 1.0327349604267961.[0m


RMSE: 1.5971


[32m[I 2021-06-08 19:41:43,366][0m Trial 8 finished with value: 1.5971405493085367 and parameters: {'n_factors': 169, 'n_epochs': 29, 'init_mean': 2, 'init_std_dev': 0.8014814732098676, 'lr_all': 0.0025643682513787312, 'reg_all': 0.08899614051417205}. Best is trial 0 with value: 1.0327349604267961.[0m


RMSE: 1.5971


[32m[I 2021-06-08 19:45:06,203][0m Trial 9 finished with value: 1.3312558647868977 and parameters: {'n_factors': 317, 'n_epochs': 24, 'init_mean': 0, 'init_std_dev': 0.7222405887214748, 'lr_all': 0.009969350454587243, 'reg_all': 0.017303180546481924}. Best is trial 0 with value: 1.0327349604267961.[0m


RMSE: 1.3313


[32m[I 2021-06-08 19:52:13,717][0m Trial 10 finished with value: 1.5971405493085367 and parameters: {'n_factors': 475, 'n_epochs': 29, 'init_mean': 1, 'init_std_dev': 0.0030534003241793894, 'lr_all': 0.004340046874235349, 'reg_all': 0.026235650591891084}. Best is trial 0 with value: 1.0327349604267961.[0m


RMSE: 1.5971


[32m[I 2021-06-08 19:52:33,206][0m Trial 11 finished with value: 3.068597545794488 and parameters: {'n_factors': 7, 'n_epochs': 16, 'init_mean': 0, 'init_std_dev': 0.42448146151549215, 'lr_all': 6.80776568774224e-05, 'reg_all': 0.07439577339875816}. Best is trial 0 with value: 1.0327349604267961.[0m


RMSE: 3.0686


[32m[I 2021-06-08 19:56:07,716][0m Trial 12 finished with value: 1.4841765149283037 and parameters: {'n_factors': 419, 'n_epochs': 17, 'init_mean': 1, 'init_std_dev': 0.0009786199914926796, 'lr_all': 0.0021755786086811967, 'reg_all': 0.06955556885016784}. Best is trial 0 with value: 1.0327349604267961.[0m


RMSE: 1.4842


[32m[I 2021-06-08 19:56:33,525][0m Trial 13 finished with value: 1.0160328874279687 and parameters: {'n_factors': 3, 'n_epochs': 24, 'init_mean': 0, 'init_std_dev': 0.4694077151940101, 'lr_all': 0.005456735095645565, 'reg_all': 0.03387754943797488}. Best is trial 13 with value: 1.0160328874279687.[0m


RMSE: 1.0160


[32m[I 2021-06-08 19:56:59,105][0m Trial 14 finished with value: 1.3261157713859453 and parameters: {'n_factors': 2, 'n_epochs': 24, 'init_mean': 3, 'init_std_dev': 0.5572191221926552, 'lr_all': 0.005907195494898096, 'reg_all': 0.031959830063097536}. Best is trial 13 with value: 1.0160328874279687.[0m


RMSE: 1.3261


[32m[I 2021-06-08 19:57:50,871][0m Trial 15 finished with value: 1.5057119794203417 and parameters: {'n_factors': 62, 'n_epochs': 23, 'init_mean': 1, 'init_std_dev': 0.09952581298014723, 'lr_all': 0.007290357944875803, 'reg_all': 0.027906574376523816}. Best is trial 13 with value: 1.0160328874279687.[0m


RMSE: 1.5057


[32m[I 2021-06-08 19:59:02,127][0m Trial 16 finished with value: 1.0618712578652694 and parameters: {'n_factors': 83, 'n_epochs': 27, 'init_mean': 0, 'init_std_dev': 0.5765486385516823, 'lr_all': 0.004107668780094368, 'reg_all': 0.03866588819708372}. Best is trial 13 with value: 1.0160328874279687.[0m


RMSE: 1.0619


[32m[I 2021-06-08 19:59:38,264][0m Trial 17 finished with value: 1.6437408022749882 and parameters: {'n_factors': 30, 'n_epochs': 22, 'init_mean': 2, 'init_std_dev': 0.9565403622335646, 'lr_all': 0.005144116999269936, 'reg_all': 0.019174815901624606}. Best is trial 13 with value: 1.0160328874279687.[0m


RMSE: 1.6437


[32m[I 2021-06-08 20:01:09,667][0m Trial 18 finished with value: 1.6697559839025957 and parameters: {'n_factors': 122, 'n_epochs': 27, 'init_mean': 1, 'init_std_dev': 0.33735784670805646, 'lr_all': 0.006483990149782114, 'reg_all': 0.05698673338689264}. Best is trial 13 with value: 1.0160328874279687.[0m


RMSE: 1.6698


[32m[I 2021-06-08 20:02:20,478][0m Trial 19 finished with value: 1.5971405493085367 and parameters: {'n_factors': 118, 'n_epochs': 21, 'init_mean': 3, 'init_std_dev': 0.46932531828290336, 'lr_all': 0.009381380886433387, 'reg_all': 0.015217216109645944}. Best is trial 13 with value: 1.0160328874279687.[0m


RMSE: 1.5971


[32m[I 2021-06-08 20:02:50,508][0m Trial 20 finished with value: 1.0102935565235391 and parameters: {'n_factors': 45, 'n_epochs': 15, 'init_mean': 0, 'init_std_dev': 0.13634468946133171, 'lr_all': 0.003886399814982175, 'reg_all': 0.03725676019741283}. Best is trial 20 with value: 1.0102935565235391.[0m


RMSE: 1.0103


[32m[I 2021-06-08 20:03:17,852][0m Trial 21 finished with value: 1.0104749402505047 and parameters: {'n_factors': 41, 'n_epochs': 14, 'init_mean': 0, 'init_std_dev': 0.1546181078026862, 'lr_all': 0.0034274707473192934, 'reg_all': 0.036869498515294974}. Best is trial 20 with value: 1.0102935565235391.[0m


RMSE: 1.0105


[32m[I 2021-06-08 20:03:36,576][0m Trial 22 finished with value: 1.0126518384377523 and parameters: {'n_factors': 33, 'n_epochs': 10, 'init_mean': 0, 'init_std_dev': 0.21137642626091607, 'lr_all': 0.003498355237125342, 'reg_all': 0.04041227442094919}. Best is trial 20 with value: 1.0102935565235391.[0m


RMSE: 1.0127


[32m[I 2021-06-08 20:04:03,715][0m Trial 23 finished with value: 1.39506567875439 and parameters: {'n_factors': 41, 'n_epochs': 14, 'init_mean': 1, 'init_std_dev': 0.2180149708806567, 'lr_all': 0.003485878608261093, 'reg_all': 0.046366976756949674}. Best is trial 20 with value: 1.0102935565235391.[0m


RMSE: 1.3951


[32m[I 2021-06-08 20:04:58,712][0m Trial 24 finished with value: 1.0235492993306108 and parameters: {'n_factors': 222, 'n_epochs': 10, 'init_mean': 0, 'init_std_dev': 0.0494555701008101, 'lr_all': 0.000901972547529847, 'reg_all': 0.06197789376174835}. Best is trial 20 with value: 1.0102935565235391.[0m


RMSE: 1.0235


[32m[I 2021-06-08 20:05:19,075][0m Trial 25 finished with value: 1.352399813700765 and parameters: {'n_factors': 42, 'n_epochs': 10, 'init_mean': 1, 'init_std_dev': 0.19318664418655443, 'lr_all': 0.003208939146111625, 'reg_all': 0.04206051474944527}. Best is trial 20 with value: 1.0102935565235391.[0m


RMSE: 1.3524


[32m[I 2021-06-08 20:06:15,364][0m Trial 26 finished with value: 1.0488499876169455 and parameters: {'n_factors': 137, 'n_epochs': 15, 'init_mean': 0, 'init_std_dev': 0.2918669820914376, 'lr_all': 0.004155056755021781, 'reg_all': 0.03542758479287529}. Best is trial 20 with value: 1.0102935565235391.[0m


RMSE: 1.0488


[32m[I 2021-06-08 20:08:13,179][0m Trial 27 finished with value: 1.8211935078701813 and parameters: {'n_factors': 353, 'n_epochs': 12, 'init_mean': 2, 'init_std_dev': 0.1458852642529591, 'lr_all': 0.0013817279688149156, 'reg_all': 0.05187693406517579}. Best is trial 20 with value: 1.0102935565235391.[0m


RMSE: 1.8212


[32m[I 2021-06-08 20:08:40,508][0m Trial 28 finished with value: 1.4579865611868448 and parameters: {'n_factors': 67, 'n_epochs': 11, 'init_mean': 1, 'init_std_dev': 0.07349437082812212, 'lr_all': 0.002802944025088598, 'reg_all': 0.0644800714242346}. Best is trial 20 with value: 1.0102935565235391.[0m


RMSE: 1.4580


[32m[I 2021-06-08 20:09:36,314][0m Trial 29 finished with value: 1.056255379487439 and parameters: {'n_factors': 103, 'n_epochs': 18, 'init_mean': 0, 'init_std_dev': 0.2683550045051828, 'lr_all': 0.004188968595995954, 'reg_all': 0.024813217185343343}. Best is trial 20 with value: 1.0102935565235391.[0m


RMSE: 1.0563


[32m[I 2021-06-08 20:09:53,289][0m Trial 30 finished with value: 1.0114265694657771 and parameters: {'n_factors': 6, 'n_epochs': 14, 'init_mean': 0, 'init_std_dev': 0.14854477986454984, 'lr_all': 0.004629624501851361, 'reg_all': 0.04426718777642041}. Best is trial 20 with value: 1.0102935565235391.[0m


RMSE: 1.0114


[32m[I 2021-06-08 20:10:10,233][0m Trial 31 finished with value: 1.0122724654930435 and parameters: {'n_factors': 5, 'n_epochs': 14, 'init_mean': 0, 'init_std_dev': 0.16459159209559682, 'lr_all': 0.004622831132820009, 'reg_all': 0.047820490622684245}. Best is trial 20 with value: 1.0102935565235391.[0m


RMSE: 1.0123


[32m[I 2021-06-08 20:10:27,330][0m Trial 32 finished with value: 1.011675326049026 and parameters: {'n_factors': 4, 'n_epochs': 14, 'init_mean': 0, 'init_std_dev': 0.14181590298167582, 'lr_all': 0.004511707875986098, 'reg_all': 0.046747244699397915}. Best is trial 20 with value: 1.0102935565235391.[0m


RMSE: 1.0117


[32m[I 2021-06-08 20:10:55,157][0m Trial 33 finished with value: 1.0088706125748041 and parameters: {'n_factors': 51, 'n_epochs': 13, 'init_mean': 0, 'init_std_dev': 0.04828644211224341, 'lr_all': 0.0035900559397986837, 'reg_all': 0.04383330886168267}. Best is trial 33 with value: 1.0088706125748041.[0m


RMSE: 1.0089


[32m[I 2021-06-08 20:11:22,609][0m Trial 34 finished with value: 1.3911612979937655 and parameters: {'n_factors': 58, 'n_epochs': 12, 'init_mean': 1, 'init_std_dev': 0.039671350519974447, 'lr_all': 0.0037622125072381708, 'reg_all': 0.029769345519039203}. Best is trial 33 with value: 1.0088706125748041.[0m


RMSE: 1.3912


[32m[I 2021-06-08 20:12:02,882][0m Trial 35 finished with value: 1.0086532500191185 and parameters: {'n_factors': 101, 'n_epochs': 13, 'init_mean': 0, 'init_std_dev': 0.10039696498839037, 'lr_all': 0.0028629837974267426, 'reg_all': 0.05794734652521989}. Best is trial 35 with value: 1.0086532500191185.[0m


RMSE: 1.0087


[32m[I 2021-06-08 20:12:39,672][0m Trial 36 finished with value: 1.5971405493085367 and parameters: {'n_factors': 98, 'n_epochs': 12, 'init_mean': 5, 'init_std_dev': 0.007569411546926863, 'lr_all': 0.0015324855602455688, 'reg_all': 0.07630882037348138}. Best is trial 35 with value: 1.0086532500191185.[0m


RMSE: 1.5971


[32m[I 2021-06-08 20:13:56,506][0m Trial 37 finished with value: 1.4337952326613668 and parameters: {'n_factors': 155, 'n_epochs': 19, 'init_mean': 1, 'init_std_dev': 0.09620138012890043, 'lr_all': 0.0030351771347581504, 'reg_all': 0.059824043970238175}. Best is trial 35 with value: 1.0086532500191185.[0m


RMSE: 1.4338


[32m[I 2021-06-08 20:15:19,185][0m Trial 38 finished with value: 1.0643879520156336 and parameters: {'n_factors': 212, 'n_epochs': 16, 'init_mean': 0, 'init_std_dev': 0.3476071615062749, 'lr_all': 0.0021784950630902257, 'reg_all': 0.054320215704871444}. Best is trial 35 with value: 1.0086532500191185.[0m


RMSE: 1.0644


[32m[I 2021-06-08 20:15:55,867][0m Trial 39 finished with value: 1.6607624057811377 and parameters: {'n_factors': 87, 'n_epochs': 13, 'init_mean': 4, 'init_std_dev': 0.24910247464167848, 'lr_all': 0.00044071939734879276, 'reg_all': 0.06698124707808134}. Best is trial 35 with value: 1.0086532500191185.[0m


RMSE: 1.6608


[32m[I 2021-06-08 20:16:52,274][0m Trial 40 finished with value: 1.0110869974658818 and parameters: {'n_factors': 139, 'n_epochs': 15, 'init_mean': 0, 'init_std_dev': 0.0547255971370653, 'lr_all': 0.002700165000925076, 'reg_all': 0.08458320409213929}. Best is trial 35 with value: 1.0086532500191185.[0m


RMSE: 1.0111


[32m[I 2021-06-08 20:17:46,787][0m Trial 41 finished with value: 1.0108164725105155 and parameters: {'n_factors': 131, 'n_epochs': 15, 'init_mean': 0, 'init_std_dev': 0.05491917768024849, 'lr_all': 0.002617381993253549, 'reg_all': 0.08457766453031379}. Best is trial 35 with value: 1.0086532500191185.[0m


RMSE: 1.0108


[32m[I 2021-06-08 20:18:29,852][0m Trial 42 finished with value: 1.0076500443714709 and parameters: {'n_factors': 113, 'n_epochs': 13, 'init_mean': 0, 'init_std_dev': 0.11093050731811957, 'lr_all': 0.001915128297980739, 'reg_all': 0.036600105989613135}. Best is trial 42 with value: 1.0076500443714709.[0m


RMSE: 1.0077


[32m[I 2021-06-08 20:19:19,634][0m Trial 43 finished with value: 1.0108985801466195 and parameters: {'n_factors': 173, 'n_epochs': 11, 'init_mean': 0, 'init_std_dev': 0.11651613228318894, 'lr_all': 0.001957299070665232, 'reg_all': 0.03681823844365399}. Best is trial 42 with value: 1.0076500443714709.[0m


RMSE: 1.0109


[32m[I 2021-06-08 20:20:01,311][0m Trial 44 finished with value: 1.2304992673051955 and parameters: {'n_factors': 107, 'n_epochs': 13, 'init_mean': 1, 'init_std_dev': 0.18498963830936632, 'lr_all': 0.0018520873244705794, 'reg_all': 0.02328948345484285}. Best is trial 42 with value: 1.0076500443714709.[0m


RMSE: 1.2305


[32m[I 2021-06-08 20:20:40,254][0m Trial 45 finished with value: 1.0224706113499904 and parameters: {'n_factors': 65, 'n_epochs': 16, 'init_mean': 0, 'init_std_dev': 0.3070991776515806, 'lr_all': 0.003738982057093861, 'reg_all': 0.05136653684562742}. Best is trial 42 with value: 1.0076500443714709.[0m


RMSE: 1.0225


[32m[I 2021-06-08 20:21:01,560][0m Trial 46 finished with value: 1.0102654430803948 and parameters: {'n_factors': 24, 'n_epochs': 13, 'init_mean': 0, 'init_std_dev': 0.1095028971623736, 'lr_all': 0.0009150385044389699, 'reg_all': 0.04263723601730861}. Best is trial 42 with value: 1.0076500443714709.[0m


RMSE: 1.0103


[32m[I 2021-06-08 20:21:30,717][0m Trial 47 finished with value: 1.34671850800035 and parameters: {'n_factors': 75, 'n_epochs': 11, 'init_mean': 1, 'init_std_dev': 0.007655422892861252, 'lr_all': 0.00019059867082683488, 'reg_all': 0.042481726807959655}. Best is trial 42 with value: 1.0076500443714709.[0m


RMSE: 1.3467


[32m[I 2021-06-08 20:21:50,960][0m Trial 48 finished with value: 1.0116900694990278 and parameters: {'n_factors': 20, 'n_epochs': 13, 'init_mean': 0, 'init_std_dev': 0.09596271855675934, 'lr_all': 0.0009304549041462865, 'reg_all': 0.056692686896316624}. Best is trial 42 with value: 1.0076500443714709.[0m


RMSE: 1.0117


[32m[I 2021-06-08 20:23:39,245][0m Trial 49 finished with value: 1.313566750395784 and parameters: {'n_factors': 263, 'n_epochs': 17, 'init_mean': 1, 'init_std_dev': 0.24336902229983579, 'lr_all': 0.002369788485413278, 'reg_all': 0.02926017093757136}. Best is trial 42 with value: 1.0076500443714709.[0m


RMSE: 1.3136


[32m[I 2021-06-08 20:24:32,496][0m Trial 50 finished with value: 1.342034236942891 and parameters: {'n_factors': 154, 'n_epochs': 13, 'init_mean': 2, 'init_std_dev': 0.034678273284182506, 'lr_all': 0.0007740969658444195, 'reg_all': 0.013286985825175835}. Best is trial 42 with value: 1.0076500443714709.[0m


RMSE: 1.3420


[32m[I 2021-06-08 20:24:59,718][0m Trial 51 finished with value: 1.0054483292926464 and parameters: {'n_factors': 36, 'n_epochs': 15, 'init_mean': 0, 'init_std_dev': 0.11962223342030279, 'lr_all': 0.0015658243688355018, 'reg_all': 0.03319129727035947}. Best is trial 51 with value: 1.0054483292926464.[0m


RMSE: 1.0054


[32m[I 2021-06-08 20:25:26,500][0m Trial 52 finished with value: 1.0061052252028335 and parameters: {'n_factors': 52, 'n_epochs': 12, 'init_mean': 0, 'init_std_dev': 0.1083957287540716, 'lr_all': 0.0018033087809419292, 'reg_all': 0.032228208049801046}. Best is trial 51 with value: 1.0054483292926464.[0m


RMSE: 1.0061


[32m[I 2021-06-08 20:25:45,579][0m Trial 53 finished with value: 1.0052730075786034 and parameters: {'n_factors': 22, 'n_epochs': 12, 'init_mean': 0, 'init_std_dev': 0.09351544623303326, 'lr_all': 0.0015329782540204858, 'reg_all': 0.03263680352616632}. Best is trial 53 with value: 1.0052730075786034.[0m


RMSE: 1.0053


[32m[I 2021-06-08 20:26:18,222][0m Trial 54 finished with value: 1.0053810862589383 and parameters: {'n_factors': 93, 'n_epochs': 11, 'init_mean': 0, 'init_std_dev': 0.012584978174517578, 'lr_all': 0.0015253415838943204, 'reg_all': 0.03147953866092263}. Best is trial 53 with value: 1.0052730075786034.[0m


RMSE: 1.0054


[32m[I 2021-06-08 20:26:50,452][0m Trial 55 finished with value: 1.0057125666183284 and parameters: {'n_factors': 91, 'n_epochs': 11, 'init_mean': 0, 'init_std_dev': 0.0828888959164392, 'lr_all': 0.0016588208942588327, 'reg_all': 0.020994331530031246}. Best is trial 53 with value: 1.0052730075786034.[0m


RMSE: 1.0057


[32m[I 2021-06-08 20:27:21,492][0m Trial 56 finished with value: 1.416267821415327 and parameters: {'n_factors': 84, 'n_epochs': 11, 'init_mean': 1, 'init_std_dev': 0.001177456186006301, 'lr_all': 0.001652466038054639, 'reg_all': 0.021801732396605704}. Best is trial 53 with value: 1.0052730075786034.[0m


RMSE: 1.4163


[32m[I 2021-06-08 20:27:55,994][0m Trial 57 finished with value: 1.1834504345698253 and parameters: {'n_factors': 117, 'n_epochs': 10, 'init_mean': 0, 'init_std_dev': 0.1995169810747574, 'lr_all': 0.0004747829646360325, 'reg_all': 0.03190988675323142}. Best is trial 53 with value: 1.0052730075786034.[0m


RMSE: 1.1835


[32m[I 2021-06-08 20:28:55,238][0m Trial 58 finished with value: 1.0044486821715974 and parameters: {'n_factors': 198, 'n_epochs': 12, 'init_mean': 0, 'init_std_dev': 0.0016365360300690246, 'lr_all': 0.0013202828781224253, 'reg_all': 0.009160160961232948}. Best is trial 58 with value: 1.0044486821715974.[0m


RMSE: 1.0044


[32m[I 2021-06-08 20:30:28,195][0m Trial 59 finished with value: 1.5532895339005863 and parameters: {'n_factors': 314, 'n_epochs': 11, 'init_mean': 1, 'init_std_dev': 0.0003209578330493068, 'lr_all': 0.0011812847636275855, 'reg_all': 0.007139327746522543}. Best is trial 58 with value: 1.0044486821715974.[0m


RMSE: 1.5533


[32m[I 2021-06-08 20:31:26,599][0m Trial 60 finished with value: 1.5971405493085367 and parameters: {'n_factors': 195, 'n_epochs': 12, 'init_mean': 3, 'init_std_dev': 0.07303065857113844, 'lr_all': 0.0013562798994970933, 'reg_all': 0.0005131450111848096}. Best is trial 58 with value: 1.0044486821715974.[0m


RMSE: 1.5971


[32m[I 2021-06-08 20:31:53,654][0m Trial 61 finished with value: 1.017537614379097 and parameters: {'n_factors': 78, 'n_epochs': 10, 'init_mean': 0, 'init_std_dev': 0.1763781689763168, 'lr_all': 0.0018161388389404736, 'reg_all': 0.019115873708026587}. Best is trial 58 with value: 1.0044486821715974.[0m


RMSE: 1.0175


[32m[I 2021-06-08 20:32:11,779][0m Trial 62 finished with value: 1.0040474358380413 and parameters: {'n_factors': 23, 'n_epochs': 11, 'init_mean': 0, 'init_std_dev': 0.0302527716586039, 'lr_all': 0.002156247491445139, 'reg_all': 0.004995176082915636}. Best is trial 62 with value: 1.0040474358380413.[0m


RMSE: 1.0040


[32m[I 2021-06-08 20:32:29,242][0m Trial 63 finished with value: 3.0692695212316456 and parameters: {'n_factors': 20, 'n_epochs': 11, 'init_mean': 0, 'init_std_dev': 0.016225705378233455, 'lr_all': 1.5270048709208495e-05, 'reg_all': 0.006121112740187587}. Best is trial 62 with value: 1.0040474358380413.[0m


RMSE: 3.0693


[32m[I 2021-06-08 20:33:39,466][0m Trial 64 finished with value: 1.007438494731597 and parameters: {'n_factors': 247, 'n_epochs': 12, 'init_mean': 0, 'init_std_dev': 0.07290705822489763, 'lr_all': 0.0023659291486358433, 'reg_all': 0.013532027504879487}. Best is trial 62 with value: 1.0040474358380413.[0m


RMSE: 1.0074


[32m[I 2021-06-08 20:34:02,078][0m Trial 65 finished with value: 1.0078181106491644 and parameters: {'n_factors': 54, 'n_epochs': 10, 'init_mean': 0, 'init_std_dev': 0.031931744472835644, 'lr_all': 0.0011849628995226993, 'reg_all': 0.010717012127207404}. Best is trial 62 with value: 1.0040474358380413.[0m


RMSE: 1.0078


[32m[I 2021-06-08 20:34:22,568][0m Trial 66 finished with value: 1.1275498838732172 and parameters: {'n_factors': 27, 'n_epochs': 12, 'init_mean': 0, 'init_std_dev': 0.9409834163858244, 'lr_all': 0.0005914695814279736, 'reg_all': 0.0032685899697953514}. Best is trial 62 with value: 1.0040474358380413.[0m


RMSE: 1.1275


[32m[I 2021-06-08 20:34:43,194][0m Trial 67 finished with value: 1.2953595284655337 and parameters: {'n_factors': 36, 'n_epochs': 11, 'init_mean': 1, 'init_std_dev': 0.5735001418319481, 'lr_all': 0.001499955719792416, 'reg_all': 0.02658580350850251}. Best is trial 62 with value: 1.0040474358380413.[0m


RMSE: 1.2954


[32m[I 2021-06-08 20:35:12,719][0m Trial 68 finished with value: 1.0060991342027403 and parameters: {'n_factors': 91, 'n_epochs': 10, 'init_mean': 0, 'init_std_dev': 0.07904779478234328, 'lr_all': 0.002235891291433657, 'reg_all': 0.016758029547542696}. Best is trial 62 with value: 1.0040474358380413.[0m


RMSE: 1.0061


[32m[I 2021-06-08 20:37:12,205][0m Trial 69 finished with value: 1.0105266833111044 and parameters: {'n_factors': 408, 'n_epochs': 10, 'init_mean': 0, 'init_std_dev': 0.07786168461506666, 'lr_all': 0.0022629011166740887, 'reg_all': 0.015744985338277923}. Best is trial 62 with value: 1.0040474358380413.[0m


RMSE: 1.0105


[32m[I 2021-06-08 20:37:41,844][0m Trial 70 finished with value: 1.4873739043280738 and parameters: {'n_factors': 90, 'n_epochs': 10, 'init_mean': 1, 'init_std_dev': 0.6330411466963866, 'lr_all': 0.0010397431902004555, 'reg_all': 0.020233885861977708}. Best is trial 62 with value: 1.0040474358380413.[0m


RMSE: 1.4874


[32m[I 2021-06-08 20:38:08,993][0m Trial 71 finished with value: 1.0089633687199737 and parameters: {'n_factors': 68, 'n_epochs': 11, 'init_mean': 0, 'init_std_dev': 0.13596793295900067, 'lr_all': 0.0015970936935187606, 'reg_all': 0.03209255150523449}. Best is trial 62 with value: 1.0040474358380413.[0m


RMSE: 1.0090


[32m[I 2021-06-08 20:38:33,884][0m Trial 72 finished with value: 1.0092569908911804 and parameters: {'n_factors': 49, 'n_epochs': 12, 'init_mean': 0, 'init_std_dev': 0.12988836835837703, 'lr_all': 0.0020663212172496084, 'reg_all': 0.010487996286993333}. Best is trial 62 with value: 1.0040474358380413.[0m


RMSE: 1.0093


[32m[I 2021-06-08 20:38:49,892][0m Trial 73 finished with value: 1.0047565232998492 and parameters: {'n_factors': 16, 'n_epochs': 11, 'init_mean': 0, 'init_std_dev': 0.02057177987470496, 'lr_all': 0.0024806650586159406, 'reg_all': 0.004640986376064455}. Best is trial 62 with value: 1.0040474358380413.[0m


RMSE: 1.0048


[32m[I 2021-06-08 20:39:06,292][0m Trial 74 finished with value: 1.0061389280666868 and parameters: {'n_factors': 16, 'n_epochs': 11, 'init_mean': 0, 'init_std_dev': 0.02170380226542126, 'lr_all': 0.003009171519396136, 'reg_all': 0.005521732999457021}. Best is trial 62 with value: 1.0040474358380413.[0m


RMSE: 1.0061


[32m[I 2021-06-08 20:39:19,130][0m Trial 75 finished with value: 1.0048200370573572 and parameters: {'n_factors': 7, 'n_epochs': 10, 'init_mean': 0, 'init_std_dev': 0.07472307179093442, 'lr_all': 0.0023706480718729155, 'reg_all': 0.0014973282948727382}. Best is trial 62 with value: 1.0040474358380413.[0m


RMSE: 1.0048


[32m[I 2021-06-08 20:39:36,202][0m Trial 76 finished with value: 1.0066581256010598 and parameters: {'n_factors': 8, 'n_epochs': 14, 'init_mean': 0, 'init_std_dev': 0.00262122328892793, 'lr_all': 0.0032205973072175296, 'reg_all': 0.0027224642312266715}. Best is trial 62 with value: 1.0040474358380413.[0m


RMSE: 1.0067


[32m[I 2021-06-08 20:39:56,896][0m Trial 77 finished with value: 1.0053131804248268 and parameters: {'n_factors': 36, 'n_epochs': 11, 'init_mean': 0, 'init_std_dev': 0.050615677864292524, 'lr_all': 0.0025734965100482456, 'reg_all': 6.28240815143703e-05}. Best is trial 62 with value: 1.0040474358380413.[0m


RMSE: 1.0053


[32m[I 2021-06-08 20:40:19,107][0m Trial 78 finished with value: 1.0047404384639835 and parameters: {'n_factors': 35, 'n_epochs': 12, 'init_mean': 0, 'init_std_dev': 0.05060266272909668, 'lr_all': 0.0025521978024147913, 'reg_all': 0.001026550851914247}. Best is trial 62 with value: 1.0040474358380413.[0m


RMSE: 1.0047


[32m[I 2021-06-08 20:40:31,457][0m Trial 79 finished with value: 1.0071147172888633 and parameters: {'n_factors': 4, 'n_epochs': 10, 'init_mean': 1, 'init_std_dev': 0.04526762608007277, 'lr_all': 0.0025407347343476853, 'reg_all': 0.001409516388143882}. Best is trial 62 with value: 1.0040474358380413.[0m


RMSE: 1.0071


[32m[I 2021-06-08 20:40:49,106][0m Trial 80 finished with value: 1.0060036443236158 and parameters: {'n_factors': 18, 'n_epochs': 12, 'init_mean': 0, 'init_std_dev': 0.027316702936261556, 'lr_all': 0.002964098361972704, 'reg_all': 0.00018512576103626023}. Best is trial 62 with value: 1.0040474358380413.[0m


RMSE: 1.0060


[32m[I 2021-06-08 20:41:12,817][0m Trial 81 finished with value: 1.0048239393399436 and parameters: {'n_factors': 43, 'n_epochs': 12, 'init_mean': 0, 'init_std_dev': 0.055372016927198536, 'lr_all': 0.0025579165026934, 'reg_all': 0.008768074710881469}. Best is trial 62 with value: 1.0040474358380413.[0m


RMSE: 1.0048


[32m[I 2021-06-08 20:41:34,361][0m Trial 82 finished with value: 1.013583091319093 and parameters: {'n_factors': 42, 'n_epochs': 11, 'init_mean': 0, 'init_std_dev': 0.16157990727434154, 'lr_all': 0.0025017133955693195, 'reg_all': 0.008726508572199723}. Best is trial 62 with value: 1.0040474358380413.[0m


RMSE: 1.0136


[32m[I 2021-06-08 20:41:48,332][0m Trial 83 finished with value: 1.0052005810042621 and parameters: {'n_factors': 2, 'n_epochs': 12, 'init_mean': 0, 'init_std_dev': 0.05925219767590817, 'lr_all': 0.0026748915143055114, 'reg_all': 0.0039422887625551895}. Best is trial 62 with value: 1.0040474358380413.[0m


RMSE: 1.0052


[32m[I 2021-06-08 20:42:02,453][0m Trial 84 finished with value: 1.007007041368912 and parameters: {'n_factors': 4, 'n_epochs': 12, 'init_mean': 0, 'init_std_dev': 0.05946397469349139, 'lr_all': 0.003346867846060382, 'reg_all': 0.004479490290278955}. Best is trial 62 with value: 1.0040474358380413.[0m


RMSE: 1.0070


[32m[I 2021-06-08 20:42:26,186][0m Trial 85 finished with value: 1.004806339232225 and parameters: {'n_factors': 31, 'n_epochs': 14, 'init_mean': 0, 'init_std_dev': 0.05019916773626695, 'lr_all': 0.0026439812146667094, 'reg_all': 0.012250828222686434}. Best is trial 62 with value: 1.0040474358380413.[0m


RMSE: 1.0048


[32m[I 2021-06-08 20:42:48,884][0m Trial 86 finished with value: 1.0056522307772922 and parameters: {'n_factors': 26, 'n_epochs': 14, 'init_mean': 0, 'init_std_dev': 0.0002790288425177506, 'lr_all': 0.002830436508343042, 'reg_all': 0.012385662417894332}. Best is trial 62 with value: 1.0040474358380413.[0m


RMSE: 1.0057


[32m[I 2021-06-08 20:43:06,796][0m Trial 87 finished with value: 1.0043925611484412 and parameters: {'n_factors': 14, 'n_epochs': 13, 'init_mean': 0, 'init_std_dev': 0.09363327198247129, 'lr_all': 0.002068691287496781, 'reg_all': 0.007706874648451244}. Best is trial 62 with value: 1.0040474358380413.[0m


RMSE: 1.0044


[32m[I 2021-06-08 20:46:16,798][0m Trial 88 finished with value: 1.008068069621735 and parameters: {'n_factors': 496, 'n_epochs': 13, 'init_mean': 0, 'init_std_dev': 0.06474380030406807, 'lr_all': 0.0021164051419712802, 'reg_all': 0.008638298256553729}. Best is trial 62 with value: 1.0040474358380413.[0m


RMSE: 1.0081


[32m[I 2021-06-08 20:46:34,327][0m Trial 89 finished with value: 1.0754154632837634 and parameters: {'n_factors': 10, 'n_epochs': 14, 'init_mean': 1, 'init_std_dev': 0.42439215266920305, 'lr_all': 0.0027297106607091564, 'reg_all': 0.0035419820023629817}. Best is trial 62 with value: 1.0040474358380413.[0m


RMSE: 1.0754


[32m[I 2021-06-08 20:47:03,386][0m Trial 90 finished with value: 1.0052586408020852 and parameters: {'n_factors': 58, 'n_epochs': 13, 'init_mean': 0, 'init_std_dev': 0.02849865442810412, 'lr_all': 0.003082318383611086, 'reg_all': 0.007351034931442689}. Best is trial 62 with value: 1.0040474358380413.[0m


RMSE: 1.0053


[32m[I 2021-06-08 20:47:32,727][0m Trial 91 finished with value: 1.0053946586455138 and parameters: {'n_factors': 60, 'n_epochs': 13, 'init_mean': 0, 'init_std_dev': 0.03506196072578823, 'lr_all': 0.003175921697517797, 'reg_all': 0.006253483521909368}. Best is trial 62 with value: 1.0040474358380413.[0m


RMSE: 1.0054


[32m[I 2021-06-08 20:48:04,507][0m Trial 92 finished with value: 1.008718008702808 and parameters: {'n_factors': 69, 'n_epochs': 13, 'init_mean': 0, 'init_std_dev': 6.82872414606954e-05, 'lr_all': 0.003997564098716887, 'reg_all': 0.011945813764196594}. Best is trial 62 with value: 1.0040474358380413.[0m


RMSE: 1.0087


[32m[I 2021-06-08 20:48:29,052][0m Trial 93 finished with value: 1.009994394071025 and parameters: {'n_factors': 48, 'n_epochs': 12, 'init_mean': 0, 'init_std_dev': 0.12543676593008252, 'lr_all': 0.0023718274046828883, 'reg_all': 0.008218524195524268}. Best is trial 62 with value: 1.0040474358380413.[0m


RMSE: 1.0100


[32m[I 2021-06-08 20:48:44,494][0m Trial 94 finished with value: 1.0038693786106014 and parameters: {'n_factors': 2, 'n_epochs': 14, 'init_mean': 0, 'init_std_dev': 0.03788221465311516, 'lr_all': 0.0020584712697595004, 'reg_all': 0.014562382454470047}. Best is trial 94 with value: 1.0038693786106014.[0m


RMSE: 1.0039


[32m[I 2021-06-08 20:49:07,827][0m Trial 95 finished with value: 1.0038025058306475 and parameters: {'n_factors': 30, 'n_epochs': 14, 'init_mean': 0, 'init_std_dev': 0.052150520280646796, 'lr_all': 0.00202934716100709, 'reg_all': 0.014927795931791398}. Best is trial 95 with value: 1.0038025058306475.[0m


RMSE: 1.0038


[32m[I 2021-06-08 20:49:35,054][0m Trial 96 finished with value: 1.0043978168185215 and parameters: {'n_factors': 32, 'n_epochs': 16, 'init_mean': 0, 'init_std_dev': 0.09172483174821101, 'lr_all': 0.0019507327301615458, 'reg_all': 0.015002056951906961}. Best is trial 95 with value: 1.0038025058306475.[0m


RMSE: 1.0044


[32m[I 2021-06-08 20:49:55,493][0m Trial 97 finished with value: 1.2950053240848938 and parameters: {'n_factors': 15, 'n_epochs': 15, 'init_mean': 5, 'init_std_dev': 0.08982026579776198, 'lr_all': 0.001921589977321658, 'reg_all': 0.017363181522037162}. Best is trial 95 with value: 1.0038025058306475.[0m


RMSE: 1.2950


[32m[I 2021-06-08 20:50:22,499][0m Trial 98 finished with value: 1.0068513496189553 and parameters: {'n_factors': 29, 'n_epochs': 16, 'init_mean': 0, 'init_std_dev': 0.15867181186752954, 'lr_all': 0.0012924029197044107, 'reg_all': 0.013754723284689864}. Best is trial 95 with value: 1.0038025058306475.[0m


RMSE: 1.0069


[32m[I 2021-06-08 20:53:44,940][0m Trial 99 finished with value: 1.0260055320411121 and parameters: {'n_factors': 403, 'n_epochs': 18, 'init_mean': 0, 'init_std_dev': 0.10646622297116701, 'lr_all': 0.00207936833314455, 'reg_all': 0.015159933774924939}. Best is trial 95 with value: 1.0038025058306475.[0m


RMSE: 1.0260


In [None]:
# NOTE: We do not prune any trials since surprise does not support partial fit.
pruned_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.PRUNED]
complete_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE]

print("Study statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

Study statistics: 
  Number of finished trials:  100
  Number of pruned trials:  0
  Number of complete trials:  100
Best trial:
  Value:  1.0038025058306475
  Params: 
    n_factors: 30
    n_epochs: 14
    init_mean: 0
    init_std_dev: 0.052150520280646796
    lr_all: 0.00202934716100709
    reg_all: 0.014927795931791398


In [None]:
test_pd = load_data(
    file_path=TEST_DATA_PATH,
    full_dataset=full_dataset,
    train_val_split=False
)

train_data = create_surprise_data(train_pd).build_full_trainset()
test_ids, test_data = create_dataset(test_pd, test_dataset=True)
test_ids = test_ids.to_numpy()

from surprise import SVD

n_factors = 30
n_epochs = 14
init_mean = 0
init_std_dev = 0.052150520280646796
lr_all = 0.00202934716100709
reg_all = 0.014927795931791398

algo = SVD(biased=False, n_factors=n_factors, n_epochs=n_epochs, init_mean=init_mean, init_std_dev=init_std_dev, lr_all=lr_all, reg_all=reg_all)

algo.fit(train_data)

predictions = []
for user, movie in test_data:
    prediction = algo.predict(user.item(), movie.item()).est
    predictions.append(prediction)

output = np.stack((test_ids, predictions), axis=1)

pd.DataFrame(output, columns=["Id", "Prediction"]).to_csv("svd_unbiased_output.csv", index=None)
files.download("svd_unbiased_output.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>