In [54]:
import numpy as np
import pandas as pd
import requests
from io import StringIO
from lightfm import LightFM
from scipy.sparse import coo_matrix

# fetches MovieLens dataset from URL and formats it
movies_url = 'http://files.grouplens.org/datasets/movielens/ml-100k/u.item'
ratings_url = 'http://files.grouplens.org/datasets/movielens/ml-100k/u.data'

# loads the movie titles
response = requests.get(movies_url)
movies_df = pd.read_csv(StringIO(response.text), sep='|', header=None, encoding='latin-1')
movies_df.columns = ['movie_id', 'title', 'release_date', 'video_release_date', 'IMDb_URL', 'unknown', 'Action', 'Adventure', 'Animation', 'Children', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror', 'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western']

# creates a dictionary mapping movie IDs to titles
movie_id_to_title = pd.Series(movies_df.title.values, index=movies_df.movie_id).to_dict()

# downloads and loads ratings data
response = requests.get(ratings_url)
ratings_df = pd.read_csv(StringIO(response.text), sep='\t', header=None)
ratings_df.columns = ['user_id', 'movie_id', 'rating', 'timestamp']

# creates lightfm-compatible data
n_users = ratings_df.user_id.max() + 1
n_items = ratings_df.movie_id.max() + 1

ratings_matrix = coo_matrix(
    (ratings_df['rating'], (ratings_df['user_id'], ratings_df['movie_id'])),
    shape=(n_users, n_items))

# creates and trains the lightfm model
model = LightFM(loss = 'warp')
model.fit(ratings_matrix, epochs = 30, num_threads=2)

<lightfm.lightfm.LightFM at 0x14003f1a0>

In [55]:
# generates the recommendation function
def sample_recommendation(model, ratings_matrix, user_ids):
    n_users, n_items = ratings_matrix.shape

    for user_id in user_ids:
        # adds  known positive movies (movies that they already like)
        known_positives_indices = ratings_matrix.tocsr()[user_id].indices
        known_positives_titles = [movie_id_to_title.get(i, str(i)) for i in known_positives_indices]

        # model predicts what movies the user will like
        scores = model.predict(user_id, np.arange(n_items))

        # ranks them in order of most liked to least liked
        top_items_indices = np.argsort(-scores)
        top_items_titles = [movie_id_to_title.get(i, str(i)) for i in top_items_indices]

        # prints out result
        print(f"User {user_id}")
        print("     Known positives:")
        for title in known_positives_titles[:3]:
            print(f"        {title}")

        print("     Recommended:")
        for title in top_items_titles[:3]:
            print(f"        {title}")



In [57]:
# example
sample_recommendation(model, ratings_matrix, [3, 25, 450])

User 3
     Known positives:
        Return of the Jedi (1983)
        Devil's Own, The (1997)
        Contact (1997)
     Recommended:
        Air Force One (1997)
        L.A. Confidential (1997)
        Game, The (1997)
User 25
     Known positives:
        Toy Story (1995)
        Twelve Monkeys (1995)
        Babe (1995)
     Recommended:
        Star Wars (1977)
        Raiders of the Lost Ark (1981)
        Return of the Jedi (1983)
User 450
     Known positives:
        Toy Story (1995)
        GoldenEye (1995)
        Four Rooms (1995)
     Recommended:
        Forrest Gump (1994)
        Dances with Wolves (1990)
        When Harry Met Sally... (1989)
