# DeepFM Recommender System - Expedia Hotel dataset

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

import tensorflow as tf
import warnings
import math
from math import sqrt
import sys
import holidays
import datetime

from sklearn.metrics import roc_curve, auc,roc_auc_score
from sklearn import metrics
from sklearn.metrics import mean_squared_error,mean_absolute_error
from sklearn.preprocessing import LabelEncoder, MinMaxScaler

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

from deepctr.inputs import build_input_features, get_linear_logit, input_from_feature_columns, combined_dnn_input
from deepctr.layers.core import PredictionLayer, DNN
from deepctr.layers.utils import add_func
from deepctr.models import WDL, DeepFM
from deepctr.inputs import SparseFeat,get_feature_names


from tensorflow.python.keras.models import Model
from tensorflow.python.keras.layers import Dense

import scipy.sparse as sparse
from scipy.sparse import csr_matrix
from scipy.sparse import coo_matrix
import implicit

  import pandas.util.testing as tm


In [2]:
df = pd.read_csv('../data/hotel_data/train.csv', sep=',', nrows=1500000)
destinations = pd.read_csv('../data/hotel_data/destinations.csv', sep=',')
df.shape

(1500000, 24)

In [3]:
#merge only top 10 most correlated columns with rating column
df = pd.merge(df,destinations[['srch_destination_id','d33', 'd64', 'd52', 'd120', 'd72', 'd136', 'd7', 'd59', 'd50', 'd30']],on='srch_destination_id')

In [4]:
# rename 2 columns
df = df.rename(columns={'hotel_cluster': 'item_id', 'is_booking': 'rating'})
df = df.dropna()

## Feature Engineering

In [5]:
# sort values
from pandas.tseries.offsets import Week
df = df.sort_values("date_time").reset_index()
df.drop('index',axis=1,inplace=True)

In [6]:
df["date_time"] =  pd.to_datetime(df["date_time"], infer_datetime_format=True)
df["date_time"] = df.date_time.dt.strftime('%Y-%m-%d')
#df["date_time_timestamp"] =  pd.to_datetime(df["date_time"], infer_datetime_format=True)

In [7]:
d = datetime.timedelta(days=14)
df['lagged_date_time'] = df["date_time"].apply(lambda x: datetime.datetime.strptime(x,"%Y-%m-%d") + d)

def extract_week(feature,week,lag):
    df[feature] =  pd.to_datetime(df[feature], infer_datetime_format=True)
    df[feature] = df.date_time.dt.strftime('%Y-%m-%d')
    if lag == True:
        d = datetime.timedelta(days=14)
        df['lag_date_time'] = df[feature].apply(lambda x: datetime.datetime.strptime(x,"%Y-%m-%d") + d)
        df['week'] = pd.DatetimeIndex(df['lag_date_time']).week
        df['year']=pd.DatetimeIndex(df['lag_date_time']).year
        
        # countinue week numbers for the next year
        df[week] = df['week'].where(df['year'] ==2013 , df['week']+52)
extract_week('date_time','click_week',lag=True)

# extract month from date_time
df['click_month'] = pd.DatetimeIndex(df['date_time']).month

In [8]:
df['checkin_month'] = pd.DatetimeIndex(df['srch_ci']).month
df['checkout_month'] = pd.DatetimeIndex(df['srch_co']).month

df['checkin_year'] = pd.DatetimeIndex(df['srch_ci']).year
df['checkout_year'] = pd.DatetimeIndex(df['srch_co']).year

In [9]:
# Define holidays in some countries
ca_holidays = holidays.Canada()
us_holidays = holidays.UnitedStates()

# check if checkin or checkout date is in holiday of different countries

df['north_am_ci'] = df['srch_ci'].apply(lambda x: 1 if x in (us_holidays or ca_holidays)  else 0)
df['north_am_co'] = df['srch_co'].apply(lambda x: 1 if x in (us_holidays or ca_holidays)  else 0)

In [10]:
df= df.drop(['date_time'],axis=1)
df= df.drop(['week'],axis=1)
df= df.drop(['year'],axis=1)
df= df.drop(['srch_ci'],axis=1)
df= df.drop(['srch_co'],axis=1)
df= df.drop(['lag_date_time'],axis=1)
#df= df.drop(['date_time_timestamp'],axis=1)
df= df.drop(['lagged_date_time'],axis=1)
#df= df.drop(['num_visit'],axis=1)

In [11]:
#Note that we add 1 to the raw count to prevent the logarithm from
# exploding into negative infinity in case the count is zero.
df['log_orig_destination_distance'] = np.log10(df['orig_destination_distance'] + 1)

df= df.drop(['orig_destination_distance'],axis=1)

In [12]:
from sklearn.cluster import KMeans
def create_cluster(feature):
    y = df[feature]
    X = df.drop(feature,axis=1)
    wcss=[]
    for i in range(1,11):
        kmeans = KMeans(n_clusters= i, init='k-means++', random_state=0)
        kmeans.fit(X)
        wcss.append(kmeans.inertia_)
    plt.plot(range(1,11), wcss)
    plt.title('The Elbow Method')
    plt.xlabel('number of clusters')
    plt.ylabel('wcss')
    plt.show()

In [13]:
X = df.drop("user_location_region",axis=1)
kmeansmodel = KMeans(n_clusters= 3, init='k-means++', random_state=0)
y_kmeans= kmeansmodel.fit_predict(X)
df['kmeans_user_location_region']=y_kmeans
df= df.drop(['user_location_region'],axis=1)

In [14]:
X = df.drop("user_location_city",axis=1)
kmeansmodel = KMeans(n_clusters= 3, init='k-means++', random_state=0)
y_kmeans= kmeansmodel.fit_predict(X)
df['kmeans_user_location_city']=y_kmeans
df= df.drop(['user_location_city'],axis=1)

In [15]:
condlist = [(df['srch_adults_cnt']==0) & (df['srch_children_cnt']==0),
            (df['srch_adults_cnt']==2) & (df['srch_children_cnt']==0),
            (df['srch_adults_cnt']==2) & (df['srch_children_cnt']==1),
            (df['srch_adults_cnt']==2) & (df['srch_children_cnt']==2),
           (df['srch_adults_cnt']==1) & (df['srch_children_cnt']==0),
            (df['srch_adults_cnt']>1) & (df['srch_children_cnt']>0),
           (df['srch_adults_cnt']==1) & (df['srch_children_cnt'] > 0),
           (df['srch_adults_cnt']>2) & (df['srch_children_cnt'] == 0),
           (df['srch_adults_cnt']==0) & (df['srch_children_cnt'] > 0)]

choicelist = ['empty_room',
                'couple_with_no_children',
                'couple_with_one_child',
                'couple_with_two_children',
                'single',
                'big_family',
                'single_parent',
                'friends',
                'unsupervised_children']

df['family_status'] = np.select(condlist,choicelist)

In [16]:
#Convert the family_status into dummy variables
dummies = pd.get_dummies(df['family_status'],drop_first=True)
df= pd.concat( [df.drop('family_status',axis=1),dummies],axis=1)

if "unsupervised_children" in df.columns:
    df= df.drop("unsupervised_children",axis=1)
if "empty_room" in df.columns:
    df= df.drop("empty_room",axis=1)

In [17]:
df['cnt'] = (df['cnt'] - df['cnt'].mean())/df['cnt'].std()

# Define features

In [18]:
# categ_sparse / conti_dense
sparse_features = ["site_name", #ID of the Expedia point of sale (i.e. Expedia.com, Expedia.co.uk, Expedia.co.jp, …)
"posa_continent", #ID of continent associated with site_name
"user_location_country", #The ID of the country the customer is located
"kmeans_user_location_region", #The ID of the region the customer is located clustered in 2 groups
"kmeans_user_location_city", #The ID of the city the customer is located clustered in 2 groups
"user_id", #ID of user
"is_mobile", #1 when a user connected from a mobile device, 0 otherwise
"is_package", #1 if the click/booking was generated as a part of a package (i.e. combined with a flight), 0 otherwise
"channel", #ID of a marketing channel
"cnt", #Numer of similar events in the context of the same user session
"srch_destination_id", #ID of the destination where the hotel search was performed'
"srch_destination_type_id", #Type of destination
"hotel_continent", #'Hotel continent',
"hotel_country", #Hotel country
"item_id", #(hotel_cluster)ID of a hotel cluster
"north_am_ci", # 1 if check-in date it's a holiday in north America
"north_am_co",# 1 if check-out date it's a holiday in north America
'hotel_market', #Hotel market
'couple_with_no_children','couple_with_one_child','couple_with_two_children',"friends","single","single_parent",
#hotel search latent attributes highly correlated with rating:
'd33', 'd64','d52','d120', 'd72', 'd136', 'd7', 'd59', 'd50', 'd30'] 

dense_features = ["srch_adults_cnt", #The number of adults specified in the hotel room
"srch_children_cnt", #The number of (extra occupancy) children specified in the hotel room
"srch_rm_cnt", #The number of hotel rooms specified in the search
'log_orig_destination_distance', # Log transformed physical distance between a hotel and a customer at the time of search
"click_week",
"click_month",
"checkin_month",
"checkout_month",
"checkin_year",
"checkout_year"]
target = ['rating']

### Simple preprocessing

In [19]:
# Label Encoding for sparse features,and normalization for dense numerical features
for feat in sparse_features:
    lbe = LabelEncoder()
    df[feat] = lbe.fit_transform(df[feat])

In [20]:
mms = MinMaxScaler(feature_range=(0,1))
df[dense_features] = mms.fit_transform(df[dense_features])

### Generate feature columns
For sparse features, we transform them into dense vectors by embedding techniques. For dense numerical features, we concatenate them to the input tensors of fully connected layer.

In [21]:
# count #unique features for each sparse field
fixlen_feature_columns = [SparseFeat(feat, df[feat].nunique(),embedding_dim=4)
                          for feat in sparse_features]
linear_feature_columns = fixlen_feature_columns
dnn_feature_columns = fixlen_feature_columns
feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)

### Generate the training samples and train the model

In [22]:
# generate input data for model
train, test = train_test_split(df, test_size=0.3)
train_model_input = {name:train[name].values for name in feature_names}
test_model_input = {name:test[name].values for name in feature_names}

In [23]:
train.shape, test.shape

((658258, 45), (282111, 45))

# Best DeepFM Model after hyper-parameter tuning

In [24]:
model = DeepFM(linear_feature_columns, dnn_feature_columns, dnn_hidden_units=(128,128)
            , init_std=0.0001, seed=1024, dnn_dropout=0.5, dnn_activation='relu',task='binary',
               fm_group=['default_group'],dnn_use_bn=False)

model.compile("adam", "mse", metrics=['mse'])

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
keep_dims is deprecated, use keepdims instead


In [25]:
history = model.fit(train_model_input, train[target].values,
                        batch_size=256, epochs=10, verbose=2, validation_split=0.2, )

Train on 526606 samples, validate on 131652 samples
Epoch 1/10
526606/526606 - 38s - loss: 0.0754 - mean_squared_error: 0.0736 - val_loss: 0.0745 - val_mean_squared_error: 0.0720
Epoch 2/10
526606/526606 - 37s - loss: 0.0725 - mean_squared_error: 0.0696 - val_loss: 0.0750 - val_mean_squared_error: 0.0722
Epoch 3/10
526606/526606 - 34s - loss: 0.0718 - mean_squared_error: 0.0686 - val_loss: 0.0757 - val_mean_squared_error: 0.0728
Epoch 4/10
526606/526606 - 28s - loss: 0.0713 - mean_squared_error: 0.0682 - val_loss: 0.0756 - val_mean_squared_error: 0.0726
Epoch 5/10
526606/526606 - 28s - loss: 0.0710 - mean_squared_error: 0.0678 - val_loss: 0.0756 - val_mean_squared_error: 0.0726
Epoch 6/10
526606/526606 - 28s - loss: 0.0707 - mean_squared_error: 0.0675 - val_loss: 0.0761 - val_mean_squared_error: 0.0730
Epoch 7/10
526606/526606 - 28s - loss: 0.0706 - mean_squared_error: 0.0672 - val_loss: 0.0760 - val_mean_squared_error: 0.0728
Epoch 8/10
526606/526606 - 28s - loss: 0.0705 - mean_square

In [26]:
pred_ans = model.predict(test_model_input, batch_size=256)

In [27]:
auc = roc_auc_score(test[target].values, pred_ans)
print("RMSE:\t%f" % np.round(math.sqrt(mean_squared_error(test[target].values, pred_ans)),3),
      "MAE:\t%f" % np.round(mean_absolute_error(test[target].values, pred_ans),3),
      "MSE:\t%f" % np.round(mean_squared_error(test[target].values, pred_ans),3),
      "AUC:\t%f" % np.round(auc,3),
      sep='\n')

RMSE:	0.270000
MAE:	0.134000
MSE:	0.073000
AUC:	0.784000


In [28]:
warnings.filterwarnings("ignore")
new_df = test[['rating','item_id','user_id']]

#replace the rating with algorithm generated output
new_df['rating']=pred_ans

In [29]:
new_df.head()

Unnamed: 0,rating,item_id,user_id
213374,0.142968,39,27471
149222,2.5e-05,73,12867
27514,0.252081,95,4537
559850,0.062676,56,13237
684066,0.189638,97,7674


### Create dataframe to store clusters

In [30]:
hotel_df = pd.DataFrame(columns=['item_id','hotel_type'])
hotel_df['item_id']=list(range(100))

cluster = {"apartment":[5, 11, 22, 28,41, 56, 73],
          'business_hotels':[ 64,69, 70, 97],
          "condo":[3,8,36, 37, 55],
          "private_vacation_homes":[ 4, 9, 21, 49, 75, 77],
          "motel":[2,25,27, 95, 98],
          "beach_resort":[0, 17, 26, 31, 34, 80, 84, 92],
          "casino_hotel":[1, 19, 45, 54, 79,89, 93],
          "hotel_resort":[52, 65, 66, 87, 96],
          "bed_n_breakfast":[23, 39, 50, 51, 76],
          "hosetel":[12, 20, 38, 53, 57, 60, 61, 85, 86]}

### Store it on a dataframe

In [31]:
warnings.filterwarnings("ignore")
for i in cluster.keys():
    hotel_df['hotel_type'][cluster[i]]= i
hotel_df.head(n=10)
hotel_df = hotel_df.dropna().reset_index().drop('index',axis=1)

In [32]:
#csr_matrix((data, (row, col))
sparse_item_user = sparse.csr_matrix((new_df['rating'].astype(float),(new_df['item_id'], new_df['user_id'])))
sparse_user_item = sparse.csr_matrix((new_df['rating'].astype(float),(new_df['user_id'], new_df['item_id'])))


model = implicit.als.AlternatingLeastSquares(factors=20,regularization=0.1,iterations=20)
alpha_val = 15
data_conf = (sparse_item_user * alpha_val).astype('double')
model.fit(data_conf)



HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))




## Find similar clusters based on the results of DeepFM

We can compare that if the results are similar with the defined following clusters 

In [33]:
hotel_df['item_id'][:1]


0    0
Name: item_id, dtype: int64

In [34]:
def find_similar_clusters(item_id,n_similar):
    similar = model.similar_items(item_id,n_similar)
    return similar

In [35]:
for i,x in enumerate(hotel_df['item_id'][:1]):
    a = find_similar_clusters(x,10)
    a_list=[]
    for i in range(len(a)):
        if a[i][0] not in hotel_df['item_id'].unique():
            a_list.append(a[i][0])

In [36]:
a_list

[]

In [37]:
a = find_similar_clusters(5,10)
a

[(5, 0.9986916),
 (83, 0.6613542),
 (11, 0.61122227),
 (25, 0.5856495),
 (90, 0.45089),
 (47, 0.41762224),
 (29, 0.3743322),
 (93, 0.3367008),
 (73, 0.3354691),
 (75, 0.32569543)]

In [38]:
b = find_similar_clusters(0,10)
b

[(0, 0.73556024),
 (96, 0.698922),
 (34, 0.67736465),
 (26, 0.6660772),
 (87, 0.6574121),
 (66, 0.65617114),
 (65, 0.65330756),
 (52, 0.65208405),
 (31, 0.6378538),
 (84, 0.6052376)]

In [39]:
tt = pd.DataFrame(a, columns =['item_id', 'Score'])

In [40]:
csv_df = pd.DataFrame(columns= ['item_id','sim1','sim2','sim3'],
                      index=range(len(hotel_df['item_id'].unique())))

#find 25 similar clusters for each item_id in hotel_df
for i,x in enumerate(hotel_df['item_id'].unique()):
    a= find_similar_clusters(x,25)
    #store them in a dataframe
    tt = pd.DataFrame(a, columns =['item_id', 'Score'])
    # keep only clusters that have different type with the current cluster
    for j in range(len(tt)):
        if tt['item_id'][j] in cluster[hotel_df['hotel_type'][i]]:
            tt=tt.drop([j])
    bb = tt.copy()
    # keep the top 5
    bb=bb.reset_index(drop=True)
    # keep only clusters that are available in hotel_df
    for k in range(len(bb)):
        if bb['item_id'][k] not in hotel_df['item_id']:
            bb=bb.drop([k])
    cc = bb.copy()
    cc= cc.reset_index(drop=True)
    csv_df["item_id"][i]=x
    csv_df["sim1"][i]=cc['item_id'][0]
    csv_df["sim2"][i]=cc['item_id'][1]
    csv_df["sim3"][i]=cc['item_id'][2]

In [41]:
csv_df

Unnamed: 0,item_id,sim1,sim2,sim3
0,0,52,44,21
1,1,24,49,51
2,2,47,9,11
3,3,53,30,35
4,4,13,19,11
...,...,...,...,...
56,93,3,27,44
57,95,18,5,19
58,96,0,34,26
59,97,58,38,46


In [42]:
csv_df.head(n=6)

Unnamed: 0,item_id,sim1,sim2,sim3
0,0,52,44,21
1,1,24,49,51
2,2,47,9,11
3,3,53,30,35
4,4,13,19,11
5,5,25,47,29


In [43]:
csv_df.to_csv("output.csv", encoding='utf-8', index=False)

### 1) apartment :[5, 11, 22, 28,41, 56, 73]

In [44]:
a = find_similar_clusters(5,10)
a_list=[]
b_list=[]
for i in range(len(a)):
    if a[i][0] not in cluster['apartment']:
        a_list.append(a[i][0])
for x in a_list:
    if x in hotel_df['item_id'].values:
        b_list.append(hotel_df[hotel_df['item_id']==x]['hotel_type'].values)

In [45]:
a_list

[83, 25, 90, 47, 29, 93, 75]

In [46]:
b_list[:3]

[array(['motel'], dtype=object),
 array(['casino_hotel'], dtype=object),
 array(['private_vacation_homes'], dtype=object)]

### 2) business_hotels:[ 64,69, 70, 97]

In [47]:
b= find_similar_clusters(64,10)
b_list=[]
for i in range(len(a)):
    if b[i][0] not in cluster['business_hotels']:
        b_list.append(b[i][0])
for x in a_list:
    if x in hotel_df['item_id'].values:
        print(hotel_df[hotel_df['item_id']==x]['hotel_type'].values)

['motel']
['casino_hotel']
['private_vacation_homes']


### 3) "condo":[3,8,36, 37, 55]

In [48]:
a = find_similar_clusters(3,10)
a_list=[]
for i in range(len(a)):
    if a[i][0] not in cluster['condo']:
        a_list.append(a[i][0])
for x in a_list:
    if x in hotel_df['item_id'].values:
        print(hotel_df[hotel_df['item_id']==x]['hotel_type'].values)

['hosetel']
['hosetel']
['casino_hotel']
['hosetel']
['hosetel']
['hosetel']


### 4) "private_vacation_homes":[ 4, 9, 21, 49, 75, 77]

In [49]:
a = find_similar_clusters(3,10)
a_list=[]
for i in range(len(a)):
    if a[i][0] not in cluster['private_vacation_homes']:
        a_list.append(a[i][0])
for x in a_list:
    if x in hotel_df['item_id'].values:
        print(hotel_df[hotel_df['item_id']==x]['hotel_type'].values)

['condo']
['hosetel']
['hosetel']
['casino_hotel']
['hosetel']
['hosetel']
['hosetel']


### 5)"motel":[2,25,27, 95, 98]

In [50]:
a = find_similar_clusters(2,10)
a_list=[]
for i in range(len(a)):
    if a[i][0] not in cluster['motel']:
        a_list.append(a[i][0])
for x in a_list:
    if x in hotel_df['item_id'].values:
        print(hotel_df[hotel_df['item_id']==x]['hotel_type'].values)

['private_vacation_homes']
['apartment']
['bed_n_breakfast']


### 6) "beach_resort":[0, 17, 26, 31, 34, 80, 84, 92],
         

In [51]:
a = find_similar_clusters(0,10)
a_list=[]
for i in range(len(a)):
    if a[i][0] not in cluster['beach_resort']:
        a_list.append(a[i][0])
for x in a_list:
    if x in hotel_df['item_id'].values:
        print(hotel_df[hotel_df['item_id']==x]['hotel_type'].values)

['hotel_resort']
['hotel_resort']
['hotel_resort']
['hotel_resort']
['hotel_resort']


### 7) "casino_hotel":[1, 19, 45, 54, 79,89, 93],

In [52]:
a = find_similar_clusters(1,10)
a_list=[]
for i in range(len(a)):
    if a[i][0] not in cluster['casino_hotel']:
        a_list.append(a[i][0])
for x in a_list:
    if x in hotel_df['item_id'].values:
        print(hotel_df[hotel_df['item_id']==x]['hotel_type'].values)

['private_vacation_homes']
['bed_n_breakfast']


### 8) "hotel_resort":[52, 65, 66, 87, 96],


In [53]:
a = find_similar_clusters(52,10)
a_list=[]
for i in range(len(a)):
    if a[i][0] not in cluster['hotel_resort']:
        a_list.append(a[i][0])
for x in a_list:
    if x in hotel_df['item_id'].values:
        print(hotel_df[hotel_df['item_id']==x]['hotel_type'].values)

['beach_resort']
['beach_resort']
['beach_resort']
['apartment']
['beach_resort']


### 9)"bed_n_breakfast":[23, 39, 50, 51, 76],


In [54]:
a = find_similar_clusters(23,10)
a_list=[]
for i in range(len(a)):
    if a[i][0] not in cluster['bed_n_breakfast']:
        a_list.append(a[i][0])
for x in a_list:
    if x in hotel_df['item_id'].values:
        print(hotel_df[hotel_df['item_id']==x]['hotel_type'].values)

['private_vacation_homes']
['hosetel']


### 10) "hosetel":[12, 20, 38, 53, 57, 60, 61, 85, 86]

In [55]:
a = find_similar_clusters(12,10)
a_list=[]
for i in range(len(a)):
    if a[i][0] not in cluster['hosetel']:
        a_list.append(a[i][0])
for x in a_list:
    if x in hotel_df['item_id'].values:
        print(hotel_df[hotel_df['item_id']==x]['hotel_type'].values)

['condo']
['condo']


### 65 is in hotel_resort and the results have 4 out of 5 clusters in common

In [56]:
a = find_similar_clusters(65,10)

### 70 is in business_hotels and the results have 2 out of 5 clusters in common

In [57]:
find_similar_clusters(70,6)

[(70, 0.9828824),
 (56, 0.8358953),
 (98, 0.7379527),
 (41, 0.52802414),
 (69, 0.4185779),
 (37, 0.26818994)]

### 4 is in private_vacation_homes and the results have 2 out of 5 clusters in common

In [58]:
find_similar_clusters(4,6)

[(4, 0.903138),
 (83, 0.43959123),
 (13, 0.37228698),
 (19, 0.30119917),
 (49, 0.2873956),
 (11, 0.26796383)]

## Recommend 5 hotel clusters to a user

In [59]:
#Get Recommendations
user_id =   800
recommended = model.recommend(user_id, sparse_user_item)
recommended

[(40, 0.11947691),
 (39, 0.114687555),
 (65, 0.096853465),
 (77, 0.09677903),
 (26, 0.08109836),
 (51, 0.080432214),
 (0, 0.08021465),
 (76, 0.07629116),
 (34, 0.07572769),
 (96, 0.07480145)]

In [60]:
recommended_df = pd.DataFrame(columns=['user_id','rec1','rec2','rec3','rec4','rec5'],
                              index=range(len(new_df['user_id'][:5].unique())))


In [61]:
for i,x in enumerate(new_df['user_id'][:5].unique()):
    recommended = model.recommend(x, sparse_user_item)
    recommended_df['user_id'].iloc[i]=x
    recommended_df['rec1'].iloc[i]=recommended[0][0]
    recommended_df['rec2'].iloc[i]=recommended[1][0]
    recommended_df['rec3'].iloc[i]=recommended[2][0]
    recommended_df['rec4'].iloc[i]=recommended[3][0]
    recommended_df['rec5'].iloc[i]=recommended[4][0]

In [62]:
recommended_df

Unnamed: 0,user_id,rec1,rec2,rec3,rec4,rec5
0,27471,40,76,41,9,23
1,12867,77,42,4,0,41
2,4537,18,6,50,83,5
3,13237,13,21,17,69,77
4,7674,46,22,67,41,86
