In [1]:
# Source: https://github.com/aayushmnit/cookbook/blob/master/recsys.py
# https://towardsdatascience.com/solving-business-usecases-by-recommender-system-using-lightfm-4ba7b3ac8e62

In [2]:
import sys
import os
from recsys import *
from generic_preprocessing import *
from lightfm.data import Dataset



In [None]:
def feature_colon_value(my_list):
    """
    Takes as input a list and prepends the columns names to respective values in the list.
    For example: if my_list = [24,M,technician,85711],
    resultant output = ['age:24', 'sex:M', 'occupation:technician', 'zipcode:85711']
    """
    result = []
    ll = ['age:','sex:', 'occupation:', 'zip_code:']
    aa = my_list
    for x,y in zip(ll,aa):
        res = str(x) +""+ str(y)
        result.append(res)
    return result

In [4]:
#User's data
users_cols = ['user_id', 'age', 'sex', 'occupation', 'zip_code']
users = pd.read_csv('ml-100k/u.user', sep='|', names=users_cols, parse_dates=True) 
#Ratings
rating_cols = ['user_id', 'movie_id', 'rating', 'unix_timestamp']
ratings = pd.read_csv('ml-100k/u.data', sep='\t', names=rating_cols)
#Movies
movie_cols = ['movie_id', 'title', 'release_date', 'video_release_date', 'imdb_url']
movies = pd.read_csv('ml-100k/u.item', sep='|', names=movie_cols, usecols=range(5),encoding='latin-1')

In [5]:
#Merging movie data with their ratings
movie_ratings = pd.merge(movies, ratings)
#merging movie_ratings data with the User's dataframe
df = pd.merge(movie_ratings, users)

In [6]:
users['zip_code'] =  users['zip_code'].astype(str)

In [53]:
#Pivot Table(This creates a matrix of users and movie_ratings)
interactions = ratings.pivot_table(index=['user_id'],columns=['movie_id'],values='rating').reset_index(drop=True)
interactions.fillna( 0, inplace = True )


In [54]:
interactions.head()

movie_id,1,2,3,4,5,6,7,8,9,10,...,1673,1674,1675,1676,1677,1678,1679,1680,1681,1682
0,5.0,3.0,4.0,3.0,3.0,5.0,4.0,1.0,5.0,3.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,4.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [55]:
interaction = create_interaction_matrix(df = ratings,
                                         user_col = 'user_id',
                                         item_col = 'movie_id',
                                         rating_col = 'rating',
                                         threshold = '3')

In [56]:
interaction.head()

movie_id,1,2,3,4,5,6,7,8,9,10,...,1673,1674,1675,1676,1677,1678,1679,1680,1681,1682
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,5.0,3.0,4.0,3.0,3.0,5.0,4.0,1.0,5.0,3.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,4.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [17]:
user_dict = create_user_dict(interactions=interactions)

In [18]:
movies_dict = create_item_dict(df = movies,
                               id_col = 'movie_id',
                               name_col = 'title')

In [20]:
uf = []
col = ['age']*len(users.age.unique()) + ['sex']*len(users.sex.unique()) + ['occupation']*len(users.occupation.unique()) + ['zip_code']*len(users['zip_code'].unique())
unique_f1 = list(users.age.unique()) + list(users.sex.unique()) + list(users.occupation.unique()) + list(users['zip_code'].unique())
#print('f1:', unique_f1)
for x,y in zip(col, unique_f1):
    res = str(x)+ ":" +str(y)
    uf.append(res)

In [21]:
dataset1 = Dataset()
dataset1.fit(
        users.user_id.unique(), # all the users
        movies.movie_id.unique(), # all the items
        user_features = uf
)

In [22]:
len(users.user_id.unique())

943

In [23]:
ll = ['age','sex', 'occupation', 'zip_code']
ad_subset = users[['age','sex', 'occupation', 'zip_code']] 
ad_list = [list(x) for x in ad_subset.values]

In [24]:
feature_list = []
for item in ad_list:
    feature_list.append(feature_colon_value(item))

In [25]:
user_tuple = list(zip(users.user_id, feature_list))

In [26]:
user_features = dataset1.build_user_features(user_tuple, normalize= False)
user_features.todense()

matrix([[1., 0., 0., ..., 0., 0., 0.],
        [0., 1., 0., ..., 0., 0., 0.],
        [0., 0., 1., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 1., 0., 0.],
        [0., 0., 0., ..., 0., 1., 0.],
        [0., 0., 0., ..., 0., 0., 1.]], dtype=float32)

In [27]:
len(interactions)

943

In [28]:
user_features.shape

(943, 1822)

In [31]:
mf_model = runMF(interactions = interactions,
                 n_components = 30,
                 loss = 'warp',
                 k = 15,
                 epoch = 30,
                 n_jobs = 4,
                 user_features=user_features)

In [34]:
rec_list = sample_recommendation_user(model = mf_model, 
                                      interactions = interactions, 
                                      user_id = 15, 
                                      user_dict = user_dict,
                                      item_dict = movies_dict, 
                                      threshold = 4,
                                      nrec_items = 10)

Known Likes:
1- Fox and the Hound, The (1981)
2- Dave (1993)
3- Singin' in the Rain (1952)
4- In the Line of Fire (1993)
5- Manchurian Candidate, The (1962)
6- Stand by Me (1986)
7- Chinatown (1974)
8- Grifters, The (1990)
9- Rear Window (1954)
10- American in Paris, An (1951)

 Recommended Items:
1- Sleepless in Seattle (1993)
2- Mrs. Doubtfire (1993)
3- Grease (1978)
4- Dances with Wolves (1990)
5- Indiana Jones and the Last Crusade (1989)
6- While You Were Sleeping (1995)
7- Psycho (1960)
8- Princess Bride, The (1987)
9- Blues Brothers, The (1980)
10- Four Weddings and a Funeral (1994)


In [48]:

list_of_users = sample_recommendation_item(model = mf_model,
                           interactions = interactions,
                           item_id = 1,
                           user_dict = user_dict,
                           item_dict = movies_dict,
                           number_of_user = 15)

In [49]:
list_of_users

[799, 878, 894, 358, 102, 230, 92, 758, 237, 437, 548, 66, 119, 673, 512]

In [47]:
sample_recommendation_user(model = mf_model, 
                                      interactions = interactions, 
                                      user_id = 878, 
                                      user_dict = user_dict,
                                      item_dict = movies_dict, 
                                      threshold = 4,
                                      nrec_items = 10)

Known Likes:
1- Michael (1996)
2- Happy Gilmore (1996)
3- Godfather, The (1972)
4- Phenomenon (1996)

 Recommended Items:
1- Twister (1996)
2- Independence Day (ID4) (1996)
3- Mission: Impossible (1996)
4- Liar Liar (1997)
5- Mr. Holland's Opus (1995)
6- Rock, The (1996)
7- Grumpier Old Men (1995)
8- Toy Story (1995)
9- Dante's Peak (1997)
10- Ransom (1996)


In [45]:
rec_list = sample_recommendation_user(model = mf_model, 
                                      interactions = interactions, 
                                      user_id = 799, 
                                      user_dict = user_dict,
                                      item_dict = movies_dict, 
                                      threshold = 4,
                                      nrec_items = 10)

Known Likes:
1- Rosewood (1997)
2- Sling Blade (1996)

 Recommended Items:
1- Liar Liar (1997)
2- Mr. Holland's Opus (1995)
3- Air Force One (1997)
4- Dante's Peak (1997)
5- English Patient, The (1996)
6- Jungle2Jungle (1997)
7- Murder at 1600 (1997)
8- Saint, The (1997)
9- Contact (1997)
10- Jerry Maguire (1996)


In [36]:
item_item_dist = create_item_embedding_distance_matrix(model = mf_model,
                                                       interactions = interactions)

In [44]:

rec_list = item_item_recommendation(item_embedding_distance_matrix = item_item_dist,
                                    item_id = 7,
                                    item_dict = movies_dict,
                                    n_items = 10)

Item of interest :Twelve Monkeys (1995)
Item similar to the above item:
1- Fargo (1996)
2- Independence Day (ID4) (1996)
3- Toy Story (1995)
4- Star Trek: First Contact (1996)
5- Heat (1995)
6- Star Wars (1977)
7- Arrival, The (1996)
8- Rock, The (1996)
9- Mars Attacks! (1996)
10- Long Kiss Goodnight, The (1996)


In [34]:
item_item_dist

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1.000000,0.685415,0.376671,0.070300,0.390729,0.492168,0.258641,0.232002,0.341891,0.707808,...,-0.587525,-0.710694,-0.540378,-0.507455,-0.528808,-0.517201,-0.568411,-0.534294,-0.505178,-0.574958
2,0.685415,1.000000,0.531024,0.060557,0.544537,0.479063,0.335959,0.287156,0.206342,0.774636,...,-0.604303,-0.710902,-0.560751,-0.554280,-0.548169,-0.549564,-0.558431,-0.610248,-0.606481,-0.508038
3,0.376671,0.531024,1.000000,0.444430,0.581024,0.399764,0.663089,0.449790,0.445221,0.586137,...,-0.497700,-0.563245,-0.587954,-0.504504,-0.571322,-0.415541,-0.471689,-0.578754,-0.564530,-0.490232
4,0.070300,0.060557,0.444430,1.000000,0.571677,0.041805,0.654426,0.501068,0.359884,0.198039,...,0.061032,-0.048778,-0.013043,0.044382,-0.053773,0.108240,0.017557,0.028752,0.018055,-0.062271
5,0.390729,0.544537,0.581024,0.571677,1.000000,0.172447,0.638122,0.409900,0.370303,0.467759,...,-0.163891,-0.249522,-0.149966,-0.154211,-0.163822,-0.072113,-0.120023,-0.191747,-0.163329,-0.226064
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
193581,-0.517201,-0.549564,-0.415541,0.108240,-0.072113,-0.542651,-0.252462,-0.068887,-0.104291,-0.589592,...,0.933080,0.929667,0.929815,0.956851,0.914449,1.000000,0.963489,0.928506,0.958628,0.555825
193583,-0.568411,-0.558431,-0.471689,0.017557,-0.120023,-0.502524,-0.320727,-0.081031,-0.036005,-0.583178,...,0.945526,0.932422,0.958786,0.956567,0.961523,0.963489,1.000000,0.944145,0.970327,0.611128
193585,-0.534294,-0.610248,-0.578754,0.028752,-0.191747,-0.553692,-0.340490,-0.137744,-0.155284,-0.656588,...,0.935510,0.919403,0.960033,0.970913,0.924770,0.928506,0.944145,1.000000,0.964682,0.596296
193587,-0.505178,-0.606481,-0.564530,0.018055,-0.163329,-0.525788,-0.393446,-0.135215,-0.110009,-0.634498,...,0.945878,0.927634,0.973316,0.961604,0.965037,0.958628,0.970327,0.964682,1.000000,0.615658
