In [None]:
!pip install scikit-surprise
!pip install optuna

Collecting scikit-surprise
[?25l  Downloading https://files.pythonhosted.org/packages/97/37/5d334adaf5ddd65da99fc65f6507e0e4599d092ba048f4302fe8775619e8/scikit-surprise-1.1.1.tar.gz (11.8MB)
[K     |████████████████████████████████| 11.8MB 294kB/s 
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.1-cp37-cp37m-linux_x86_64.whl size=1617671 sha256=f6eba63dd6b2cc40c7f10704d9447cbdd296c1995251350d379b72bf940b2b72
  Stored in directory: /root/.cache/pip/wheels/78/9c/3d/41b419c9d2aff5b6e2b4c0fc8d25c538202834058f9ed110d0
Successfully built scikit-surprise
Installing collected packages: scikit-surprise
Successfully installed scikit-surprise-1.1.1
Collecting optuna
[?25l  Downloading https://files.pythonhosted.org/packages/1a/18/b49ca91cf592747e19f2d333c2a86cd7c81895b922a5a09adf6335471576/optuna-2.8.0-py3-none-any.whl (301kB)
[K     |█████████████████

In [None]:
# Mount Google Drive and set data paths.
import os
from google.colab import drive
from google.colab import files

drive.mount('/content/gdrive')
DATA_PATH = "/content/gdrive/My Drive/ETH/Computational Intelligence Lab/CIL-Project/data"
TRAIN_DATA_PATH = os.path.join(DATA_PATH, "data_train.csv")
TEST_DATA_PATH = os.path.join(DATA_PATH, "data_test.csv")

Mounted at /content/gdrive


In [None]:
import numpy as np
import pandas as pd
import torch
from torch.utils.data import TensorDataset
from sklearn.model_selection import train_test_split
import surprise
from surprise import accuracy
from surprise.model_selection import cross_validate
import optuna

In [None]:
def load_data(file_path: str, full_dataset: bool, train_val_split: bool, random_seed: int = 0, train_size: float = 0):
    data_pd = pd.read_csv(file_path)

    # Reduce Dataset for Testing
    if not full_dataset:
        data_pd = data_pd.head(10000)

    if train_val_split:
        train_pd, val_pd = train_test_split(data_pd, train_size=train_size, random_state=random_seed)
        return train_pd, val_pd
    else:
        return data_pd

def __extract_users_items_ratings(data_pd: pd.DataFrame):
    users, movies = \
        [np.squeeze(arr) for arr in np.split(data_pd.Id.str.extract('r(\d+)_c(\d+)').values.astype(int) - 1, 2, axis=-1)]
    ratings = data_pd.Prediction.values
    return users, movies, ratings

def create_surprise_data(data_pd):
    users, movies, ratings = __extract_users_items_ratings(data_pd)

    df = pd.DataFrame({
        'users': users,
        'movies': movies,
        'ratings': ratings
    })
    reader = surprise.Reader(rating_scale=(1, 5))
    return surprise.Dataset.load_from_df(df[['users', 'movies', 'ratings']], reader=reader)

def __get_tensors_from_dataframe(data_pd: pd.DataFrame):
    users, movies, ratings = __extract_users_items_ratings(data_pd)
    users_torch = torch.tensor(users, dtype=torch.int64)
    movies_torch = torch.tensor(movies, dtype=torch.int64)
    ratings_torch = torch.tensor(ratings, dtype=torch.int64)

    return users_torch, movies_torch, ratings_torch


def create_dataset(data_pd: pd.DataFrame, test_dataset: bool = False):
    users_torch, movies_torch, ratings_torch = __get_tensors_from_dataframe(data_pd)

    if not test_dataset:
        return TensorDataset(users_torch, movies_torch, ratings_torch)
    else:
        test_ids = data_pd.Id
        return test_ids, TensorDataset(users_torch, movies_torch)

In [None]:
random_seed = 42
full_dataset = True

np.random.seed(random_seed)

train_pd = load_data(
    file_path=TRAIN_DATA_PATH,
    full_dataset=full_dataset,
    train_val_split=False
)

In [None]:
train_data = create_surprise_data(train_pd)
trainset, testset = surprise.model_selection.train_test_split(train_data, test_size=0.2)

In [None]:
from surprise import NMF

def objective(trial):
    biased = trial.suggest_categorical("biased", [False, True])
    n_factors = trial.suggest_int("n_factors", 2, 100)
    n_epochs = trial.suggest_int("n_epochs", 10, 100)
    init_low = trial.suggest_int("init_low", 0, 5)
    init_high = trial.suggest_int("init_high", 0 , 5)

    algo = NMF(biased=biased, n_factors=n_factors, n_epochs=n_epochs, init_low=init_low, init_high=init_high)
    
    algo.fit(trainset)
    predictions = algo.test(testset)
    rmse = accuracy.rmse(predictions)
    return rmse

In [None]:
study = optuna.create_study(direction="minimize")

[32m[I 2021-06-08 19:04:01,053][0m A new study created in memory with name: no-name-4a5c1cdc-844c-46e1-80e6-95c7f4065b17[0m


In [None]:
# Enqueue trials with default parameters.
study.enqueue_trial(
    {
        "biased": False,
        "n_factors": 15,
        "n_epochs": 50,
        "init_low": 0,
        "init_high": 1
    }
)


enqueue_trial is experimental (supported from v1.2.0). The interface can change in the future.


create_trial is experimental (supported from v2.0.0). The interface can change in the future.


add_trial is experimental (supported from v2.0.0). The interface can change in the future.



In [None]:
study.optimize(objective, n_trials=80)

[32m[I 2021-06-08 19:05:11,125][0m Trial 0 finished with value: 1.0090560166643434 and parameters: {'biased': False, 'n_factors': 15, 'n_epochs': 50, 'init_low': 0, 'init_high': 1}. Best is trial 0 with value: 1.0090560166643434.[0m


RMSE: 1.0091


[32m[I 2021-06-08 19:08:13,184][0m Trial 1 finished with value: 1.0036458570903077 and parameters: {'biased': True, 'n_factors': 33, 'n_epochs': 98, 'init_low': 4, 'init_high': 4}. Best is trial 1 with value: 1.0036458570903077.[0m


RMSE: 1.0036


[32m[I 2021-06-08 19:10:03,895][0m Trial 2 finished with value: 1.0814862694986065 and parameters: {'biased': True, 'n_factors': 85, 'n_epochs': 33, 'init_low': 0, 'init_high': 4}. Best is trial 1 with value: 1.0036458570903077.[0m


RMSE: 1.0815


[32m[I 2021-06-08 19:15:44,190][0m Trial 3 finished with value: 1.1046080131509843 and parameters: {'biased': True, 'n_factors': 88, 'n_epochs': 98, 'init_low': 3, 'init_high': 1}. Best is trial 1 with value: 1.0036458570903077.[0m


RMSE: 1.1046


[32m[I 2021-06-08 19:17:23,479][0m Trial 4 finished with value: 1.2076023909307116 and parameters: {'biased': True, 'n_factors': 58, 'n_epochs': 38, 'init_low': 0, 'init_high': 2}. Best is trial 1 with value: 1.0036458570903077.[0m


RMSE: 1.2076


[32m[I 2021-06-08 19:19:54,395][0m Trial 5 finished with value: 1.1949295997146336 and parameters: {'biased': True, 'n_factors': 39, 'n_epochs': 72, 'init_low': 0, 'init_high': 5}. Best is trial 1 with value: 1.0036458570903077.[0m


RMSE: 1.1949


[32m[I 2021-06-08 19:20:35,792][0m Trial 6 finished with value: 1.4324113770348108 and parameters: {'biased': True, 'n_factors': 58, 'n_epochs': 15, 'init_low': 5, 'init_high': 5}. Best is trial 1 with value: 1.0036458570903077.[0m


RMSE: 1.4324


[32m[I 2021-06-08 19:25:56,152][0m Trial 7 finished with value: 1.0575800794804895 and parameters: {'biased': True, 'n_factors': 87, 'n_epochs': 92, 'init_low': 5, 'init_high': 3}. Best is trial 1 with value: 1.0036458570903077.[0m


RMSE: 1.0576


[32m[I 2021-06-08 19:26:56,346][0m Trial 8 finished with value: 1.1391994677641315 and parameters: {'biased': True, 'n_factors': 45, 'n_epochs': 25, 'init_low': 2, 'init_high': 5}. Best is trial 1 with value: 1.0036458570903077.[0m


RMSE: 1.1392


[32m[I 2021-06-08 19:29:37,536][0m Trial 9 finished with value: 1.129266929461411 and parameters: {'biased': True, 'n_factors': 70, 'n_epochs': 53, 'init_low': 3, 'init_high': 4}. Best is trial 1 with value: 1.0036458570903077.[0m


RMSE: 1.1293


[32m[I 2021-06-08 19:31:06,022][0m Trial 10 finished with value: 1.115524911165342 and parameters: {'biased': False, 'n_factors': 6, 'n_epochs': 80, 'init_low': 4, 'init_high': 3}. Best is trial 1 with value: 1.0036458570903077.[0m


RMSE: 1.1155


[32m[I 2021-06-08 19:32:25,756][0m Trial 11 finished with value: 1.1511433404914173 and parameters: {'biased': False, 'n_factors': 16, 'n_epochs': 56, 'init_low': 2, 'init_high': 0}. Best is trial 1 with value: 1.0036458570903077.[0m


RMSE: 1.1511


[32m[I 2021-06-08 19:34:02,256][0m Trial 12 finished with value: 1.3154385926037828 and parameters: {'biased': False, 'n_factors': 26, 'n_epochs': 56, 'init_low': 4, 'init_high': 0}. Best is trial 1 with value: 1.0036458570903077.[0m


RMSE: 1.3154


[32m[I 2021-06-08 19:36:07,274][0m Trial 13 finished with value: 1.1876452985269745 and parameters: {'biased': False, 'n_factors': 28, 'n_epochs': 71, 'init_low': 1, 'init_high': 1}. Best is trial 1 with value: 1.0036458570903077.[0m


RMSE: 1.1876


[32m[I 2021-06-08 19:36:55,534][0m Trial 14 finished with value: 1.6489080464018155 and parameters: {'biased': False, 'n_factors': 4, 'n_epochs': 45, 'init_low': 4, 'init_high': 2}. Best is trial 1 with value: 1.0036458570903077.[0m


RMSE: 1.6489


[32m[I 2021-06-08 19:38:33,557][0m Trial 15 finished with value: 1.1800473328889065 and parameters: {'biased': False, 'n_factors': 18, 'n_epochs': 67, 'init_low': 1, 'init_high': 1}. Best is trial 1 with value: 1.0036458570903077.[0m


RMSE: 1.1800


[32m[I 2021-06-08 19:41:26,588][0m Trial 16 finished with value: 1.0842559374747054 and parameters: {'biased': False, 'n_factors': 38, 'n_epochs': 86, 'init_low': 3, 'init_high': 4}. Best is trial 1 with value: 1.0036458570903077.[0m


RMSE: 1.0843


[32m[I 2021-06-08 19:41:42,541][0m Trial 17 finished with value: 1.5971405493085367 and parameters: {'biased': False, 'n_factors': 15, 'n_epochs': 10, 'init_low': 5, 'init_high': 2}. Best is trial 1 with value: 1.0036458570903077.[0m


RMSE: 1.5971


[32m[I 2021-06-08 19:43:31,808][0m Trial 18 finished with value: 1.0313210799905268 and parameters: {'biased': True, 'n_factors': 28, 'n_epochs': 62, 'init_low': 1, 'init_high': 3}. Best is trial 1 with value: 1.0036458570903077.[0m


RMSE: 1.0313


[32m[I 2021-06-08 19:44:24,840][0m Trial 19 finished with value: 1.5265915506555 and parameters: {'biased': False, 'n_factors': 8, 'n_epochs': 45, 'init_low': 4, 'init_high': 0}. Best is trial 1 with value: 1.0036458570903077.[0m


RMSE: 1.5266


[32m[I 2021-06-08 19:45:13,135][0m Trial 20 finished with value: 1.5969387822039764 and parameters: {'biased': False, 'n_factors': 35, 'n_epochs': 24, 'init_low': 2, 'init_high': 4}. Best is trial 1 with value: 1.0036458570903077.[0m


RMSE: 1.5969


[32m[I 2021-06-08 19:46:59,945][0m Trial 21 finished with value: 1.0169490844860345 and parameters: {'biased': True, 'n_factors': 24, 'n_epochs': 63, 'init_low': 1, 'init_high': 3}. Best is trial 1 with value: 1.0036458570903077.[0m


RMSE: 1.0169


[32m[I 2021-06-08 19:49:06,692][0m Trial 22 finished with value: 1.0188579388533174 and parameters: {'biased': True, 'n_factors': 21, 'n_epochs': 81, 'init_low': 1, 'init_high': 3}. Best is trial 1 with value: 1.0036458570903077.[0m


RMSE: 1.0189


[32m[I 2021-06-08 19:50:35,831][0m Trial 23 finished with value: 1.0743189253670882 and parameters: {'biased': True, 'n_factors': 32, 'n_epochs': 46, 'init_low': 0, 'init_high': 4}. Best is trial 1 with value: 1.0036458570903077.[0m


RMSE: 1.0743


[32m[I 2021-06-08 19:54:22,469][0m Trial 24 finished with value: 1.107472510720291 and parameters: {'biased': True, 'n_factors': 47, 'n_epochs': 99, 'init_low': 1, 'init_high': 2}. Best is trial 1 with value: 1.0036458570903077.[0m


RMSE: 1.1075


[32m[I 2021-06-08 19:55:44,054][0m Trial 25 finished with value: 1.014963662997939 and parameters: {'biased': True, 'n_factors': 11, 'n_epochs': 63, 'init_low': 0, 'init_high': 3}. Best is trial 1 with value: 1.0036458570903077.[0m


RMSE: 1.0150


[32m[I 2021-06-08 19:57:21,157][0m Trial 26 finished with value: 1.0011956014445267 and parameters: {'biased': True, 'n_factors': 11, 'n_epochs': 76, 'init_low': 0, 'init_high': 1}. Best is trial 26 with value: 1.0011956014445267.[0m


RMSE: 1.0012


[32m[I 2021-06-08 19:58:52,578][0m Trial 27 finished with value: 1.0043393498531548 and parameters: {'biased': True, 'n_factors': 3, 'n_epochs': 91, 'init_low': 2, 'init_high': 1}. Best is trial 26 with value: 1.0011956014445267.[0m


RMSE: 1.0043


[32m[I 2021-06-08 20:00:20,806][0m Trial 28 finished with value: 1.003662126256191 and parameters: {'biased': True, 'n_factors': 2, 'n_epochs': 91, 'init_low': 3, 'init_high': 1}. Best is trial 26 with value: 1.0011956014445267.[0m


RMSE: 1.0037


[32m[I 2021-06-08 20:01:59,857][0m Trial 29 finished with value: 1.0073386562048856 and parameters: {'biased': True, 'n_factors': 11, 'n_epochs': 78, 'init_low': 3, 'init_high': 0}. Best is trial 26 with value: 1.0011956014445267.[0m


RMSE: 1.0073


[32m[I 2021-06-08 20:05:48,308][0m Trial 30 finished with value: 1.100057197046043 and parameters: {'biased': True, 'n_factors': 54, 'n_epochs': 93, 'init_low': 4, 'init_high': 1}. Best is trial 26 with value: 1.0011956014445267.[0m


RMSE: 1.1001


[32m[I 2021-06-08 20:07:17,965][0m Trial 31 finished with value: 1.0032878057315486 and parameters: {'biased': True, 'n_factors': 2, 'n_epochs': 90, 'init_low': 3, 'init_high': 1}. Best is trial 26 with value: 1.0011956014445267.[0m


RMSE: 1.0033


[32m[I 2021-06-08 20:08:49,074][0m Trial 32 finished with value: 1.0060645887858277 and parameters: {'biased': True, 'n_factors': 4, 'n_epochs': 86, 'init_low': 3, 'init_high': 2}. Best is trial 26 with value: 1.0011956014445267.[0m


RMSE: 1.0061


[32m[I 2021-06-08 20:10:29,466][0m Trial 33 finished with value: 1.003925704121278 and parameters: {'biased': True, 'n_factors': 2, 'n_epochs': 100, 'init_low': 3, 'init_high': 1}. Best is trial 26 with value: 1.0011956014445267.[0m


RMSE: 1.0039


[32m[I 2021-06-08 20:12:36,797][0m Trial 34 finished with value: 1.0265488164897083 and parameters: {'biased': True, 'n_factors': 13, 'n_epochs': 95, 'init_low': 4, 'init_high': 0}. Best is trial 26 with value: 1.0011956014445267.[0m


RMSE: 1.0265


[32m[I 2021-06-08 20:16:58,764][0m Trial 35 finished with value: 1.214142279514267 and parameters: {'biased': True, 'n_factors': 74, 'n_epochs': 86, 'init_low': 3, 'init_high': 1}. Best is trial 26 with value: 1.0011956014445267.[0m


RMSE: 1.2141


[32m[I 2021-06-08 20:18:52,417][0m Trial 36 finished with value: 1.0170083891975894 and parameters: {'biased': True, 'n_factors': 20, 'n_epochs': 75, 'init_low': 4, 'init_high': 2}. Best is trial 26 with value: 1.0011956014445267.[0m


RMSE: 1.0170


[32m[I 2021-06-08 20:21:01,611][0m Trial 37 finished with value: 1.0104121805223976 and parameters: {'biased': True, 'n_factors': 11, 'n_epochs': 100, 'init_low': 2, 'init_high': 1}. Best is trial 26 with value: 1.0011956014445267.[0m


RMSE: 1.0104


[32m[I 2021-06-08 20:24:16,183][0m Trial 38 finished with value: 1.027339417727558 and parameters: {'biased': True, 'n_factors': 45, 'n_epochs': 87, 'init_low': 3, 'init_high': 2}. Best is trial 26 with value: 1.0011956014445267.[0m


RMSE: 1.0273


[32m[I 2021-06-08 20:25:49,195][0m Trial 39 finished with value: 1.0031072819226596 and parameters: {'biased': True, 'n_factors': 2, 'n_epochs': 96, 'init_low': 5, 'init_high': 1}. Best is trial 26 with value: 1.0011956014445267.[0m


RMSE: 1.0031


[32m[I 2021-06-08 20:31:37,262][0m Trial 40 finished with value: 1.0068518231862362 and parameters: {'biased': True, 'n_factors': 97, 'n_epochs': 97, 'init_low': 5, 'init_high': 5}. Best is trial 26 with value: 1.0011956014445267.[0m


RMSE: 1.0069


[32m[I 2021-06-08 20:33:16,497][0m Trial 41 finished with value: 1.0062867448844626 and parameters: {'biased': True, 'n_factors': 7, 'n_epochs': 90, 'init_low': 5, 'init_high': 1}. Best is trial 26 with value: 1.0011956014445267.[0m


RMSE: 1.0063


[32m[I 2021-06-08 20:34:39,100][0m Trial 42 finished with value: 1.0046315324053534 and parameters: {'biased': True, 'n_factors': 3, 'n_epochs': 83, 'init_low': 5, 'init_high': 0}. Best is trial 26 with value: 1.0011956014445267.[0m


RMSE: 1.0046


[32m[I 2021-06-08 20:38:52,846][0m Trial 43 finished with value: 1.0355451023712994 and parameters: {'biased': True, 'n_factors': 63, 'n_epochs': 94, 'init_low': 4, 'init_high': 1}. Best is trial 26 with value: 1.0011956014445267.[0m


RMSE: 1.0355


[32m[I 2021-06-08 20:40:19,159][0m Trial 44 finished with value: 1.0118629089457047 and parameters: {'biased': True, 'n_factors': 8, 'n_epochs': 76, 'init_low': 3, 'init_high': 0}. Best is trial 26 with value: 1.0011956014445267.[0m


RMSE: 1.0119


[32m[I 2021-06-08 20:41:56,769][0m Trial 45 finished with value: 2.541440950739879 and parameters: {'biased': True, 'n_factors': 2, 'n_epochs': 100, 'init_low': 4, 'init_high': 1}. Best is trial 26 with value: 1.0011956014445267.[0m


RMSE: 2.5414


[32m[I 2021-06-08 20:44:19,127][0m Trial 46 finished with value: 1.0248904973601993 and parameters: {'biased': True, 'n_factors': 23, 'n_epochs': 90, 'init_low': 4, 'init_high': 2}. Best is trial 26 with value: 1.0011956014445267.[0m


RMSE: 1.0249


[32m[I 2021-06-08 20:46:10,611][0m Trial 47 finished with value: 1.0132509292488308 and parameters: {'biased': True, 'n_factors': 15, 'n_epochs': 82, 'init_low': 5, 'init_high': 1}. Best is trial 26 with value: 1.0011956014445267.[0m


RMSE: 1.0133


[32m[I 2021-06-08 20:48:19,105][0m Trial 48 finished with value: 1.0248793670249259 and parameters: {'biased': True, 'n_factors': 31, 'n_epochs': 71, 'init_low': 3, 'init_high': 0}. Best is trial 26 with value: 1.0011956014445267.[0m


RMSE: 1.0249


[32m[I 2021-06-08 20:51:30,312][0m Trial 49 finished with value: 1.097063614893864 and parameters: {'biased': True, 'n_factors': 39, 'n_epochs': 94, 'init_low': 2, 'init_high': 1}. Best is trial 26 with value: 1.0011956014445267.[0m


RMSE: 1.0971


[32m[I 2021-06-08 20:53:13,347][0m Trial 50 finished with value: 1.0144726706294225 and parameters: {'biased': True, 'n_factors': 9, 'n_epochs': 89, 'init_low': 4, 'init_high': 2}. Best is trial 26 with value: 1.0011956014445267.[0m


RMSE: 1.0145


[32m[I 2021-06-08 20:54:46,694][0m Trial 51 finished with value: 1.0045788057188487 and parameters: {'biased': True, 'n_factors': 2, 'n_epochs': 97, 'init_low': 3, 'init_high': 1}. Best is trial 26 with value: 1.0011956014445267.[0m


RMSE: 1.0046


[32m[I 2021-06-08 20:57:11,432][0m Trial 52 finished with value: 1.015183110549794 and parameters: {'biased': True, 'n_factors': 19, 'n_epochs': 99, 'init_low': 3, 'init_high': 1}. Best is trial 26 with value: 1.0011956014445267.[0m


RMSE: 1.0152


[32m[I 2021-06-08 20:58:32,730][0m Trial 53 finished with value: 1.0035178568290242 and parameters: {'biased': True, 'n_factors': 2, 'n_epochs': 84, 'init_low': 3, 'init_high': 1}. Best is trial 26 with value: 1.0011956014445267.[0m


RMSE: 1.0035


[32m[I 2021-06-08 21:00:02,998][0m Trial 54 finished with value: 1.0030470475507896 and parameters: {'biased': True, 'n_factors': 6, 'n_epochs': 83, 'init_low': 2, 'init_high': 2}. Best is trial 26 with value: 1.0011956014445267.[0m


RMSE: 1.0030


[32m[I 2021-06-08 21:01:52,908][0m Trial 55 finished with value: 1.0070594395190784 and parameters: {'biased': True, 'n_factors': 16, 'n_epochs': 79, 'init_low': 2, 'init_high': 2}. Best is trial 26 with value: 1.0011956014445267.[0m


RMSE: 1.0071


[32m[I 2021-06-08 21:03:25,368][0m Trial 56 finished with value: 1.006078212398182 and parameters: {'biased': True, 'n_factors': 6, 'n_epochs': 85, 'init_low': 5, 'init_high': 2}. Best is trial 26 with value: 1.0011956014445267.[0m


RMSE: 1.0061


[32m[I 2021-06-08 21:05:14,800][0m Trial 57 finished with value: 1.008196878464303 and parameters: {'biased': True, 'n_factors': 12, 'n_epochs': 83, 'init_low': 0, 'init_high': 3}. Best is trial 26 with value: 1.0011956014445267.[0m


RMSE: 1.0082


[32m[I 2021-06-08 21:07:17,481][0m Trial 58 finished with value: 1.002601777819685 and parameters: {'biased': True, 'n_factors': 26, 'n_epochs': 74, 'init_low': 2, 'init_high': 2}. Best is trial 26 with value: 1.0011956014445267.[0m


RMSE: 1.0026


[32m[I 2021-06-08 21:08:30,014][0m Trial 59 finished with value: 1.002560430051827 and parameters: {'biased': True, 'n_factors': 6, 'n_epochs': 67, 'init_low': 2, 'init_high': 2}. Best is trial 26 with value: 1.0011956014445267.[0m


RMSE: 1.0026


[32m[I 2021-06-08 21:10:24,899][0m Trial 60 finished with value: 1.004018574015128 and parameters: {'biased': True, 'n_factors': 27, 'n_epochs': 68, 'init_low': 2, 'init_high': 2}. Best is trial 26 with value: 1.0011956014445267.[0m


RMSE: 1.0040


[32m[I 2021-06-08 21:11:45,454][0m Trial 61 finished with value: 1.0048675869763726 and parameters: {'biased': True, 'n_factors': 6, 'n_epochs': 75, 'init_low': 2, 'init_high': 2}. Best is trial 26 with value: 1.0011956014445267.[0m


RMSE: 1.0049


[32m[I 2021-06-08 21:13:18,022][0m Trial 62 finished with value: 1.0249576611320983 and parameters: {'biased': True, 'n_factors': 16, 'n_epochs': 67, 'init_low': 1, 'init_high': 2}. Best is trial 26 with value: 1.0011956014445267.[0m


RMSE: 1.0250


[32m[I 2021-06-08 21:14:41,289][0m Trial 63 finished with value: 1.0033260190369366 and parameters: {'biased': True, 'n_factors': 9, 'n_epochs': 72, 'init_low': 2, 'init_high': 2}. Best is trial 26 with value: 1.0011956014445267.[0m


RMSE: 1.0033


[32m[I 2021-06-08 21:15:51,872][0m Trial 64 finished with value: 1.0099643689305786 and parameters: {'biased': True, 'n_factors': 9, 'n_epochs': 61, 'init_low': 2, 'init_high': 3}. Best is trial 26 with value: 1.0011956014445267.[0m


RMSE: 1.0100


[32m[I 2021-06-08 21:17:22,792][0m Trial 65 finished with value: 1.0166126596053118 and parameters: {'biased': True, 'n_factors': 13, 'n_epochs': 70, 'init_low': 1, 'init_high': 2}. Best is trial 26 with value: 1.0011956014445267.[0m


RMSE: 1.0166


[32m[I 2021-06-08 21:18:41,287][0m Trial 66 finished with value: 1.0032318474940356 and parameters: {'biased': True, 'n_factors': 6, 'n_epochs': 73, 'init_low': 2, 'init_high': 2}. Best is trial 26 with value: 1.0011956014445267.[0m


RMSE: 1.0032


[32m[I 2021-06-08 21:19:43,659][0m Trial 67 finished with value: 1.0122812650048678 and parameters: {'biased': True, 'n_factors': 5, 'n_epochs': 59, 'init_low': 2, 'init_high': 3}. Best is trial 26 with value: 1.0011956014445267.[0m


RMSE: 1.0123


[32m[I 2021-06-08 21:21:26,112][0m Trial 68 finished with value: 1.0219017004099364 and parameters: {'biased': True, 'n_factors': 23, 'n_epochs': 65, 'init_low': 1, 'init_high': 2}. Best is trial 26 with value: 1.0011956014445267.[0m


RMSE: 1.0219


[32m[I 2021-06-08 21:23:14,986][0m Trial 69 finished with value: 1.0070594395190784 and parameters: {'biased': True, 'n_factors': 16, 'n_epochs': 79, 'init_low': 2, 'init_high': 2}. Best is trial 26 with value: 1.0011956014445267.[0m


RMSE: 1.0071


[32m[I 2021-06-08 21:24:54,703][0m Trial 70 finished with value: 1.1542235690749865 and parameters: {'biased': False, 'n_factors': 13, 'n_epochs': 77, 'init_low': 1, 'init_high': 3}. Best is trial 26 with value: 1.0011956014445267.[0m


RMSE: 1.1542


[32m[I 2021-06-08 21:26:23,021][0m Trial 71 finished with value: 1.0029467691737486 and parameters: {'biased': True, 'n_factors': 9, 'n_epochs': 76, 'init_low': 2, 'init_high': 2}. Best is trial 26 with value: 1.0011956014445267.[0m


RMSE: 1.0029


[32m[I 2021-06-08 21:27:45,779][0m Trial 72 finished with value: 1.003060303243588 and parameters: {'biased': True, 'n_factors': 7, 'n_epochs': 75, 'init_low': 2, 'init_high': 2}. Best is trial 26 with value: 1.0011956014445267.[0m


RMSE: 1.0031


[32m[I 2021-06-08 21:29:04,157][0m Trial 73 finished with value: 1.0032318474940356 and parameters: {'biased': True, 'n_factors': 6, 'n_epochs': 73, 'init_low': 2, 'init_high': 2}. Best is trial 26 with value: 1.0011956014445267.[0m


RMSE: 1.0032


[32m[I 2021-06-08 21:30:42,199][0m Trial 74 finished with value: 1.002961561595438 and parameters: {'biased': True, 'n_factors': 18, 'n_epochs': 68, 'init_low': 2, 'init_high': 2}. Best is trial 26 with value: 1.0011956014445267.[0m


RMSE: 1.0030


[32m[I 2021-06-08 21:32:24,250][0m Trial 75 finished with value: 1.071526312796275 and parameters: {'biased': True, 'n_factors': 20, 'n_epochs': 68, 'init_low': 2, 'init_high': 3}. Best is trial 26 with value: 1.0011956014445267.[0m


RMSE: 1.0715


[32m[I 2021-06-08 21:33:58,801][0m Trial 76 finished with value: 1.0029885652301067 and parameters: {'biased': True, 'n_factors': 18, 'n_epochs': 65, 'init_low': 2, 'init_high': 2}. Best is trial 26 with value: 1.0011956014445267.[0m


RMSE: 1.0030


[32m[I 2021-06-08 21:35:21,910][0m Trial 77 finished with value: 1.0032963897399954 and parameters: {'biased': True, 'n_factors': 18, 'n_epochs': 56, 'init_low': 2, 'init_high': 2}. Best is trial 26 with value: 1.0011956014445267.[0m


RMSE: 1.0033


[32m[I 2021-06-08 21:37:09,267][0m Trial 78 finished with value: 1.0027021843415784 and parameters: {'biased': True, 'n_factors': 25, 'n_epochs': 65, 'init_low': 2, 'init_high': 2}. Best is trial 26 with value: 1.0011956014445267.[0m


RMSE: 1.0027


[32m[I 2021-06-08 21:39:13,274][0m Trial 79 finished with value: 1.0034867543017707 and parameters: {'biased': True, 'n_factors': 34, 'n_epochs': 65, 'init_low': 2, 'init_high': 2}. Best is trial 26 with value: 1.0011956014445267.[0m


RMSE: 1.0035


In [None]:
# NOTE: We do not prune any trials since surprise does not support partial fit.
pruned_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.PRUNED]
complete_trials = [t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE]

print("Study statistics: ")
print("  Number of finished trials: ", len(study.trials))
print("  Number of pruned trials: ", len(pruned_trials))
print("  Number of complete trials: ", len(complete_trials))

print("Best trial:")
trial = study.best_trial

print("  Value: ", trial.value)

print("  Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))

Study statistics: 
  Number of finished trials:  80
  Number of pruned trials:  0
  Number of complete trials:  80
Best trial:
  Value:  1.0011956014445267
  Params: 
    biased: True
    n_factors: 11
    n_epochs: 76
    init_low: 0
    init_high: 1


In [None]:
test_pd = load_data(
    file_path=TEST_DATA_PATH,
    full_dataset=full_dataset,
    train_val_split=False
)

train_data = create_surprise_data(train_pd).build_full_trainset()
test_ids, test_data = create_dataset(test_pd, test_dataset=True)
test_ids = test_ids.to_numpy()

from surprise import NMF

biased = True
n_factors = 11
n_epochs = 76
init_low = 0
init_high = 1

algo = NMF(biased=biased, n_factors=n_factors, n_epochs=n_epochs, init_low=init_low, init_high=init_high)

algo.fit(train_data)

predictions = []
for user, movie in test_data:
    prediction = algo.predict(user.item(), movie.item()).est
    predictions.append(prediction)

output = np.stack((test_ids, predictions), axis=1)

pd.DataFrame(output, columns=["Id", "Prediction"]).to_csv("nmf_output.csv", index=None)
files.download("nmf_output.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>