# Recommender Systems 2022/23

## Import libraries

In [None]:
! pip install bayesian-optimization

In [None]:
__author__ = 'Shalby Hazem and Giuseppe Urso'
__status__ = 'Dev'

import os
from typing import Tuple, Callable, Dict, Optional, List

import numpy as np
import pandas as pd
import scipy.sparse as sp
from tqdm.notebook import tqdm
from sklearn.model_selection import train_test_split
from bayes_opt import BayesianOptimization

import matplotlib.pyplot as pyplot
%matplotlib inline  


## Dataset Loading with pandas

interactions_and_impressions.csv : Contains the training set, describing implicit preferences expressed by the users.
- user_id : identifier of the user
- item_id : identifier of the item (TV series)
- impression_list : string containing the items that were present on the screen when the user interacted with the item in column item_id. Not all interactions have a corresponding impressions list.
- data : "0" if the user watched the item, "1" if the user opened the item details page.

In [None]:
# Clone the entire repo.
!git clone -l -s https://github.com/MaurizioFD/RecSys_Course_AT_PoliMi.git RecSys_Course_AT_PoliMi

In [None]:
%cd /kaggle/working/RecSys_Course_AT_PoliMi
!python run_compile_all_cython.py

In [None]:
def load_data():
    return pd.read_csv("./starting_kit/interactions_and_impressions.csv", 
                       sep=",", 
                       names=["UserID", "ItemID", "Impressions", "Data"],
                       header=0)#,
                       #dtype={"UserID": np.int32,"ItemID": np.int32,"impression_list": np.int32,"data": np.int64})

In [None]:
def load_target_list():
    return pd.read_csv("./starting_kit/data_target_users_test.csv", 
                       sep=",", 
                       names=["UserID"],
                       header=0)
                       #dtype={"UserID": int,"ItemID": int,"impression_list": np.int32,"data": int})

In [None]:
def load_icm():
    return pd.read_csv("./starting_kit/data_ICM_type.csv", 
                       sep=",", 
                       names=["ItemID", "FeatureID", "Data"],
                       header=0)#,
                       #dtype={"UserID": np.int32,"ItemID": np.int32,"impression_list": np.int32,"data": np.int64})    

In [None]:
%cd /kaggle/working/RecSys
ratings = load_data()
targets = load_target_list()
icm_dataframe = load_icm()

In [None]:
ratings

In [None]:
targets

In [None]:
icm_dataframe

In [None]:
ratings['UserID'].nunique()

In [None]:
ratings = ratings[['UserID','ItemID','Data']]
ratings.head(20)

In [None]:
ratings = ratings.assign(Data = 1)

In [None]:
ratings

In [None]:
userID_unique = ratings.UserID.unique()
itemID_unique = ratings["ItemID"].unique()

In [None]:
n_users = len(userID_unique)
n_items = len(itemID_unique)
n_interactions = len(ratings)

print ("Number of items\t {}, Number of users\t {}".format(n_items, n_users))
print ("Max ID items\t {}, Max Id users\t {}\n".format(max(itemID_unique), max(userID_unique)))

In [None]:
ratings = ratings.drop_duplicates(ignore_index=True)
ratings

In [None]:
URM_all = sp.coo_matrix((ratings["Data"].values,(ratings["UserID"].values, ratings["ItemID"].values)),shape = (n_users, n_items))
URM_all

ICM_all = sp.coo_matrix((icm_dataframe["Data"].values,(icm_dataframe["ItemID"].values, icm_dataframe["FeatureID"].values)))

In [None]:
%cd /kaggle/working/RecSys_Course_AT_PoliMi

from Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample

In [None]:
URM_train, URM_validation = split_train_in_two_percentage_global_sample(URM_all, train_percentage = 0.85)
evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])


ICM_train, ICM_validation = split_train_in_two_percentage_global_sample(ICM_all, train_percentage = 0.85)
evaluator_test = EvaluatorHoldout(ICM_validation, cutoff_list=[10])

In [None]:
from scipy import sparse
URM_train = sparse.load_npz("/kaggle/input/matrices/URM_train.npz")
URM_validation = sparse.load_npz("/kaggle/input/matrices/URM_validation.npz")
URM_all = sparse.load_npz("/kaggle/input/matrices/URM_all.npz")

In [None]:
%cd /kaggle/working/RecSys_Course_AT_PoliMi

from Evaluation.Evaluator import EvaluatorHoldout
from Data_manager.split_functions.split_train_validation_random_holdout import split_train_in_two_percentage_global_sample
evaluator_validation = EvaluatorHoldout(URM_validation, cutoff_list=[10])

## METRICS

In [None]:
def recall(recommendations: np.array, relevant_items: np.array) -> float:
    is_relevant = np.in1d(recommendations, relevant_items, assume_unique=True)
    recall_score = np.sum(is_relevant) / relevant_items.shape[0]
    return recall_score
    
def precision(recommendations: np.array, relevant_items: np.array) -> float:
    is_relevant = np.in1d(recommendations, relevant_items, assume_unique=True)
    precision_score = np.sum(is_relevant) / recommendations.shape[0]
    return precision_score

def mean_average_precision(recommendations: np.array, relevant_items: np.array) -> float:
    is_relevant = np.in1d(recommendations, relevant_items, assume_unique=True)
    precision_at_k = is_relevant * np.cumsum(is_relevant, dtype=np.float32) / (1 + np.arange(is_relevant.shape[0]))
    map_score = np.sum(precision_at_k) / np.min([relevant_items.shape[0], is_relevant.shape[0]])
    return map_score

In [None]:
def evaluator(recommender: object, urm_train: sp.csr_matrix, urm_test: sp.csr_matrix):
    recommendation_length = 10
    accum_precision = 0
    accum_recall = 0
    accum_map = 0
    
    num_users = urm_train.shape[0]
    
    num_users_evaluated = 0
    num_users_skipped = 0
    for user_id in tqdm(range(num_users)):
        user_profile_start = urm_test.indptr[user_id]
        user_profile_end = urm_test.indptr[user_id+1]
        
        relevant_items = urm_test.indices[user_profile_start:user_profile_end]
        
        if relevant_items.size == 0:
            num_users_skipped += 1
            continue
        
        #recommender.set_items_to_ignore(items_to_remove[user_id])
        recommendations = recommender.recommend(user_id_array=user_id, 
                                               cutoff=recommendation_length)
        
        recommendations = np.asarray(recommendations)
        
        accum_precision += precision(recommendations, relevant_items)
        accum_recall += recall(recommendations, relevant_items)
        accum_map += mean_average_precision(recommendations, relevant_items)
        
        num_users_evaluated += 1
        
    
    accum_precision /= max(num_users_evaluated, 1)
    accum_recall /= max(num_users_evaluated, 1)
    accum_map /=  max(num_users_evaluated, 1)
    
    return accum_precision, accum_recall, accum_map, num_users_evaluated, num_users_skipped

## SLIM ELASTIC NET

In [None]:
!pip install shared-memory38

In [None]:
from shared_memory import *

In [None]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
@author: Massimo Quadrana, Cesare Bernardis
"""


import numpy as np
import scipy.sparse as sps
from Recommenders.Recommender_utils import check_matrix
from sklearn.linear_model import ElasticNet
from Recommenders.BaseSimilarityMatrixRecommender import BaseItemSimilarityMatrixRecommender
from Recommenders.Similarity.Compute_Similarity_Python import Incremental_Similarity_Builder
from Utils.seconds_to_biggest_unit import seconds_to_biggest_unit
import time, sys
from tqdm import tqdm
from sklearn.utils._testing import ignore_warnings
from sklearn.exceptions import ConvergenceWarning

# os.environ["PYTHONWARNINGS"] = ('ignore::exceptions.ConvergenceWarning:sklearn.linear_model')
# os.environ["PYTHONWARNINGS"] = ('ignore:Objective did not converge:ConvergenceWarning:')

class SLIMElasticNetRecommender(BaseItemSimilarityMatrixRecommender):
    """
    Train a Sparse Linear Methods (SLIM) item similarity model.
    NOTE: ElasticNet solver is parallel, a single intance of SLIM_ElasticNet will
          make use of half the cores available
    See:
        Efficient Top-N Recommendation by Linear Regression,
        M. Levy and K. Jack, LSRS workshop at RecSys 2013.
        SLIM: Sparse linear methods for top-n recommender systems,
        X. Ning and G. Karypis, ICDM 2011.
        http://glaros.dtc.umn.edu/gkhome/fetch/papers/SLIM2011icdm.pdf
    """

    RECOMMENDER_NAME = "SLIMElasticNetRecommender"

    def __init__(self, URM_train, verbose = True):
        super(SLIMElasticNetRecommender, self).__init__(URM_train, verbose = verbose)

    @ignore_warnings(category=ConvergenceWarning)
    def fit(self, l1_ratio=0.1, alpha = 1.0, positive_only=True, topK = 100):

        assert l1_ratio>= 0 and l1_ratio<=1, "{}: l1_ratio must be between 0 and 1, provided value was {}".format(self.RECOMMENDER_NAME, l1_ratio)

        self.l1_ratio = l1_ratio
        self.positive_only = positive_only
        self.topK = topK


        # initialize the ElasticNet model
        self.model = ElasticNet(alpha=alpha,
                                l1_ratio=self.l1_ratio,
                                positive=self.positive_only,
                                fit_intercept=False,
                                copy_X=False,
                                precompute=True,
                                selection='random',
                                max_iter=100,
                                tol=1e-4)

        URM_train = check_matrix(self.URM_train, 'csc', dtype=np.float32)

        n_items = URM_train.shape[1]

        similarity_builder = Incremental_Similarity_Builder(self.n_items, initial_data_block=self.n_items*self.topK, dtype = np.float32)

        start_time = time.time()
        start_time_printBatch = start_time

        # fit each item's factors sequentially (not in parallel)
        for currentItem in range(n_items):

            # get the target column
            y = URM_train[:, currentItem].toarray()

            # set the j-th column of X to zero
            start_pos = URM_train.indptr[currentItem]
            end_pos = URM_train.indptr[currentItem + 1]

            current_item_data_backup = URM_train.data[start_pos: end_pos].copy()
            URM_train.data[start_pos: end_pos] = 0.0

            # fit one ElasticNet model per column
            self.model.fit(URM_train, y)

            # self.model.coef_ contains the coefficient of the ElasticNet model
            # let's keep only the non-zero values
            nonzero_model_coef_index = self.model.sparse_coef_.indices
            nonzero_model_coef_value = self.model.sparse_coef_.data

            # Check if there are more data points than topK, if so, extract the set of K best values
            if len(nonzero_model_coef_value) > self.topK:
                # Partition the data because this operation does not require to fully sort the data
                relevant_items_partition = np.argpartition(-np.abs(nonzero_model_coef_value), self.topK-1, axis=0)[0:self.topK]
                nonzero_model_coef_index = nonzero_model_coef_index[relevant_items_partition]
                nonzero_model_coef_value = nonzero_model_coef_value[relevant_items_partition]

            similarity_builder.add_data_lists(row_list_to_add=nonzero_model_coef_index,
                                              col_list_to_add=np.ones(len(nonzero_model_coef_index), dtype = int) * currentItem,
                                              data_list_to_add=nonzero_model_coef_value)


            # finally, replace the original values of the j-th column
            URM_train.data[start_pos:end_pos] = current_item_data_backup

            elapsed_time = time.time() - start_time
            new_time_value, new_time_unit = seconds_to_biggest_unit(elapsed_time)


            if time.time() - start_time_printBatch > 300 or currentItem == n_items-1:
                self._print("Processed {} ({:4.1f}%) in {:.2f} {}. Items per second: {:.2f}".format(
                    currentItem+1,
                    100.0* float(currentItem+1)/n_items,
                    new_time_value,
                    new_time_unit,
                    float(currentItem)/elapsed_time))

                sys.stdout.flush()
                sys.stderr.flush()

                start_time_printBatch = time.time()

        self.W_sparse = similarity_builder.get_SparseMatrix()



from multiprocessing import Pool, cpu_count
from functools import partial


def create_shared_memory(a):
    shm = shared_memory.SharedMemory(create=True, size=a.nbytes)
    b = np.ndarray(a.shape, dtype=a.dtype, buffer=shm.buf)
    b[:] = a[:]
    return shm


@ignore_warnings(category=ConvergenceWarning)
def _partial_fit(items, topK, alpha, l1_ratio, urm_shape, positive_only=True, shm_names=None, shm_shapes=None, shm_dtypes=None):

    model = ElasticNet(
        alpha=alpha,
        l1_ratio=l1_ratio,
        positive=positive_only,
        fit_intercept=False,
        copy_X=False,
        precompute=True,
        selection='random',
        max_iter=100,
        tol=1e-4
    )

    indptr_shm = shared_memory.SharedMemory(name=shm_names[0], create=False)
    indices_shm = shared_memory.SharedMemory(name=shm_names[1], create=False)
    data_shm = shared_memory.SharedMemory(name=shm_names[2], create=False)

    X_j = sps.csc_matrix((
            np.ndarray(shm_shapes[2], dtype=shm_dtypes[2], buffer=data_shm.buf).copy(),
            np.ndarray(shm_shapes[1], dtype=shm_dtypes[1], buffer=indices_shm.buf),
            np.ndarray(shm_shapes[0], dtype=shm_dtypes[0], buffer=indptr_shm.buf),
        ), shape=urm_shape)

    values, rows, cols = [], [], []

    for currentItem in items:

        y = X_j[:, currentItem].toarray()

        backup = X_j.data[X_j.indptr[currentItem]:X_j.indptr[currentItem + 1]]
        X_j.data[X_j.indptr[currentItem]:X_j.indptr[currentItem + 1]] = 0.0

        model.fit(X_j, y)

        nonzero_model_coef_index = model.sparse_coef_.indices
        nonzero_model_coef_value = model.sparse_coef_.data

        # Check if there are more data points than topK, if so, extract the set of K best values
        if len(nonzero_model_coef_value) > topK:
            # Partition the data because this operation does not require to fully sort the data
            relevant_items_partition = np.argpartition(-np.abs(nonzero_model_coef_value), topK-1, axis=0)[0:topK]
            nonzero_model_coef_index = nonzero_model_coef_index[relevant_items_partition]
            nonzero_model_coef_value = nonzero_model_coef_value[relevant_items_partition]

        values.extend(nonzero_model_coef_value)
        rows.extend(nonzero_model_coef_index)
        cols.extend([currentItem] * len(nonzero_model_coef_index))

        X_j.data[X_j.indptr[currentItem]:X_j.indptr[currentItem + 1]] = backup

    indptr_shm.close()
    indices_shm.close()
    data_shm.close()

    return values, rows, cols




class MultiThreadSLIM_SLIMElasticNetRecommender(SLIMElasticNetRecommender):

    def fit(self, alpha=1.0, l1_ratio=0.1, positive_only=True, topK=100,
            verbose=True, workers=int(cpu_count()*0.3)):

        assert l1_ratio>= 0 and l1_ratio<=1, \
            "ElasticNet: l1_ratio must be between 0 and 1, provided value was {}".format(l1_ratio)

        self.alpha = alpha
        self.l1_ratio = l1_ratio
        self.positive_only = positive_only
        self.topK = topK

        self.workers = workers

        self.URM_train = check_matrix(self.URM_train, 'csc', dtype=np.float32)

        indptr_shm = create_shared_memory(self.URM_train.indptr)
        indices_shm = create_shared_memory(self.URM_train.indices)
        data_shm = create_shared_memory(self.URM_train.data)

        _pfit = partial(_partial_fit, topK=self.topK, alpha=self.alpha, urm_shape=self.URM_train.shape,
                        l1_ratio=self.l1_ratio, positive_only=self.positive_only,
                        shm_names=[indptr_shm.name, indices_shm.name, data_shm.name],
                        shm_shapes=[self.URM_train.indptr.shape, self.URM_train.indices.shape, self.URM_train.data.shape],
                        shm_dtypes=[self.URM_train.indptr.dtype, self.URM_train.indices.dtype, self.URM_train.data.dtype])

        with Pool(processes=self.workers) as pool:

            pool_chunksize = 4
            item_chunksize = 8

            itemchunks = np.array_split(np.arange(self.n_items), int(self.n_items / item_chunksize))
            if verbose:
                pbar = tqdm(total=self.n_items)

            # res contains a vector of (values, rows, cols) tuples
            values, rows, cols = [], [], []
            for values_, rows_, cols_ in pool.imap_unordered(_pfit, itemchunks, pool_chunksize):
                values.extend(values_)
                rows.extend(rows_)
                cols.extend(cols_)
                if verbose:
                    pbar.update(item_chunksize)

        indptr_shm.close()
        indices_shm.close()
        data_shm.close()

        indptr_shm.unlink()
        indices_shm.unlink()
        data_shm.unlink()

        # generate the sparse weight matrix
        self.W_sparse = sps.csr_matrix((values, (rows, cols)), shape=(self.n_items, self.n_items), dtype=np.float32)
        self.URM_train = self.URM_train.tocsr()

In [None]:
slim_elastic_net = MultiThreadSLIM_SLIMElasticNetRecommender(URM_train)
slim_elastic_net.fit(alpha=0.00037, l1_ratio=0.265, topK=485, workers=4, verbose=False)
results_run_dict, results_run_string = evaluator_validation.evaluateRecommender(slim_elastic_net)
print(results_run_string)

In [None]:
from bayes_opt import BayesianOptimization
def black_box_function(topK_v, l1_ratio, alpha):
    topK_v = int(topK_v)
    slim_elastic_net = MultiThreadSLIM_SLIMElasticNetRecommender(URM_train)
    slim_elastic_net.fit(l1_ratio=l1_ratio, alpha=alpha,topK=topK_v, workers=4, verbose=False)
        
    results_run_dict, results_run_string = evaluator_validation.evaluateRecommender(slim_elastic_net)
    print(results_run_string)
    return results_run_dict['MAP'].iloc[0]
    
    
#bayesian optimization
pbounds = {"topK_v": [250, 500],"l1_ratio":[0.1,0.5],"alpha":[0.0001,0.001]}
# Create a BayesianOptimization optimizer,
# and optimize the given black_box_function.
optimizer = BayesianOptimization(f = black_box_function,pbounds = pbounds, verbose = 2, random_state = 102030)
optimizer.maximize(init_points = 5, n_iter = 10 )
print("Best result: {}; f(x) = {}.".format(optimizer.max["params"], optimizer.max["target"]))

In [None]:
slim_elastic_net_final = MultiThreadSLIM_SLIMElasticNetRecommender(URM_all.tocsr())
slim_elastic_net_final.fit(alpha=0.00037, l1_ratio=0.265, topK=485, workers=4, verbose=False)

## RP3Beta Recommender

In [None]:
%cd /kaggle/working/RecSys_Course_AT_PoliMi
from Recommenders.GraphBased.RP3betaRecommender import *

In [None]:
from bayes_opt import BayesianOptimization
def black_box_function(topK_v, alpha_v, beta_v):
    topK_v = int(topK_v)
    rp3beta_recommender = RP3betaRecommender(URM_train)
    rp3beta_recommender.fit(implicit=True, topK=topK_v, alpha=alpha_v,beta=beta_v)
        
    #ev_precision, ev_recall, ev_map, _, _ = evaluator(rp3beta_recommender, URM_train, URM_validation)
    results_run_dict, results_run_string = evaluator_validation.evaluateRecommender(rp3beta_recommender)
    print(results_run_string)
    return results_run_dict['MAP'].iloc[0]
    
    
#bayesian optimization
pbounds = {"topK_v": [50, 150],"alpha_v":[0.75,1],"beta_v":[0.3,0.5]}
# Create a BayesianOptimization optimizer,
# and optimize the given black_box_function.
optimizer = BayesianOptimization(f = black_box_function,pbounds = pbounds, verbose = 2, random_state = 2210)
optimizer.maximize(init_points = 10, n_iter = 100 )
print("Best result: {}; f(x) = {}.".format(optimizer.max["params"], optimizer.max["target"]))

In [None]:
rp3beta_recommender = RP3betaRecommender(URM_train)
rp3beta_recommender.fit(implicit=True, topK=59, alpha=0.845, beta=0.3365)
#ev_precision, ev_recall, ev_map, _, _ = evaluator(rp3beta_recommender, URM_train, URM_validation)
#ev_precision, ev_recall, ev_map

In [None]:
results_run_dict, results_run_string = evaluator_validation.evaluateRecommender(rp3beta_recommender)
print(results_run_string)

In [None]:
# TRAIN RP3BETA ON ALL DATA
rp3beta_recommender_final = RP3betaRecommender(URM_all.tocsr())
rp3beta_recommender_final.fit(implicit=True, topK=59, alpha=0.845, beta=0.3365)

## EASE_R

In [None]:
from Recommenders.EASE_R.EASE_R_Recommender import *

In [None]:
import gc
gc.collect()

In [None]:
ease_r_recommender = EASE_R_Recommender(URM_train)
gc.collect()
ease_r_recommender.fit(l2_norm=152, topK=485)
results_run_dict, results_run_string = evaluator_validation.evaluateRecommender(ease_r_recommender)
print(results_run_string)

In [None]:
ease_r_recommender_final = EASE_R_Recommender(URM_all.tocsr())
ease_r_recommender_final.fit(l2_norm=152, topK=485)

## Hybrid SLIM EN, RP3Beta and EASE_R

In [None]:
%cd /kaggle/working/RecSys_Course_AT_PoliMi
from Recommenders.KNN.ItemKNNCustomSimilarityRecommender import *

In [None]:
alpha1 = 0.27
alpha2 = 0.44
res1 = slim_elastic_net
res2 = ease_r_recommender
res3 = rp3beta_recommender

new_similarity = sp.csr_matrix((1 - alpha1 - alpha2) * res1.W_sparse + alpha1 * res2.W_sparse + alpha2 * res3.W_sparse)
final_hybrid_recommender = ItemKNNCustomSimilarityRecommender(URM_train)
final_hybrid_recommender.fit(new_similarity)
results_run_dict, results_run_string = evaluator_validation.evaluateRecommender(final_hybrid_recommender)
print(results_run_string)

In [None]:
from bayes_opt import BayesianOptimization
def black_box_function(alpha1, alpha2):
    #topK_v = int(topK_v)
    res1 = slim_elastic_net
    res2 = ease_r_recommender
    res3 = rp3beta_recommender
    new_similarity = sp.csr_matrix((1 - alpha1 - alpha2) * res1.W_sparse + alpha1 * res2.W_sparse + alpha2 * res3.W_sparse)
    recommender_object = ItemKNNCustomSimilarityRecommender(URM_train)
    recommender_object.fit(new_similarity)

    #ev_precision, ev_recall, ev_map, _, _ = evaluator(recommender_object, URM_train, URM_validation)
    #return ev_map
    
    results_run_dict, results_run_string = evaluator_validation.evaluateRecommender(recommender_object)
    print(results_run_string)
    return results_run_dict['MAP'].iloc[0]
    
    
#bayesian optimization
pbounds = {"alpha1":[0,1], "alpha2":[0,1]}
# Create a BayesianOptimization optimizer,
# and optimize the given black_box_function.
optimizer = BayesianOptimization(f = black_box_function,pbounds = pbounds, verbose = 2, random_state = 201219)
optimizer.maximize(init_points = 10, n_iter = 100 )
print("Best result: {}; f(x) = {}.".format(optimizer.max["params"], optimizer.max["target"]))

In [None]:
alpha1 = 0.27
alpha2 = 0.44
res1 = slim_elastic_net_final
res2 = ease_r_recommender_final
res3 = rp3beta_recommender_final

new_similarity = sp.csr_matrix((1 - alpha1 - alpha2) * res1.W_sparse + alpha1 * res2.W_sparse + alpha2 * res3.W_sparse)
final_hybrid_recommender_all = ItemKNNCustomSimilarityRecommender(URM_all.tocsr())
final_hybrid_recommender_all.fit(new_similarity)

## Submission to competition

This step serves as a similar step that you will perform when preparing a submission to the competition. Specially after you have chosen and trained your recommender.

For this step the best suggestion is to select the most-performing configuration obtained in the hyperparameter tuning step and to train the recommender using both the `train` and `validation` set. Remember that in the competition you *do not* have access to the test set.

We simulated the users to generate recommendations by randomly selecting 100 users from the original identifiers. Do consider that in the competition you are most likely to be provided with the list of users to generate recommendations. 

Another consideration is that, due to easier and faster calculations, we replaced the user/item identifiers with new ones in the preprocessing step. For the competition, you are required to generate recommendations using the dataset's original identifiers. Due to this, this step also reverts back the newer identifiers with the ones originally found in the dataset.

Last, this step creates a function that writes the recommendations for each user in the same file in a tabular format following this format: 
```csv
<user_id>,<item_id_1> <item_id_2> <item_id_3> <item_id_4> <item_id_5> <item_id_6> <item_id_7> <item_id_8> <item_id_9> <item_id_10>
```

Always verify the competitions' submission file model as it might vary from the one we presented here.

In [None]:
best_recommender = final_hybrid_recommender_all
best_recommender.__str__

In [None]:
users_to_recommend = targets['UserID'].tolist()
#users_to_recommend

In [None]:
from tqdm.notebook import tqdm
def prepare_submission(users_to_recommend: np.array, recommender: object):
    submission = []
    for user_id in tqdm(users_to_recommend):  
        recommendations = recommender.recommend(user_id, cutoff=10)
        submission.append((user_id, recommendations))
    return submission

In [None]:
submission = prepare_submission(users_to_recommend, best_recommender)

In [None]:
submission

In [None]:
def write_submission(submissions):
    with open("/kaggle/working/submission_giuseppe_14_01_1.csv", "w") as f:
      f.write("user_id,item_list")
      for user_id, items in submissions:
            f.writelines(f"\n{user_id},{' '.join([str(item) for item in items])}")
    

In [None]:
write_submission(submission)