Load and Explore the Data

In [42]:
import pandas as pd

# Load datasets
movies = pd.read_csv('/content/movies.csv')
ratings = pd.read_csv('/content/ratings.csv')
tags = pd.read_csv('/content/tags.csv')
links = pd.read_csv('/content/links.csv')

print("Movies DataFrame:")
print(movies.head())

print("\nRatings DataFrame:")
print(ratings.head())

print("\nTags DataFrame:")
print(tags.head())

print("\nLinks DataFrame:")
print(links.head())


Movies DataFrame:
   movieId                               title  \
0        1                    Toy Story (1995)   
1        2                      Jumanji (1995)   
2        3             Grumpier Old Men (1995)   
3        4            Waiting to Exhale (1995)   
4        5  Father of the Bride Part II (1995)   

                                        genres  
0  Adventure|Animation|Children|Comedy|Fantasy  
1                   Adventure|Children|Fantasy  
2                               Comedy|Romance  
3                         Comedy|Drama|Romance  
4                                       Comedy  

Ratings DataFrame:
   userId  movieId  rating  timestamp
0       1        1     4.0  964982703
1       1        3     4.0  964981247
2       1        6     4.0  964982224
3       1       47     5.0  964983815
4       1       50     5.0  964982931

Tags DataFrame:
   userId  movieId              tag   timestamp
0       2    60756            funny  1445714994
1       2    60756  Highly

Preprocess the Data

In [43]:
user_item_matrix = ratings.pivot(index='userId', columns='movieId', values='rating')
user_item_matrix.fillna(0, inplace=True)

print(user_item_matrix.head())

movieId  1       2       3       4       5       6       7       8       \
userId                                                                    
1           4.0     0.0     4.0     0.0     0.0     4.0     0.0     0.0   
2           0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0   
3           0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0   
4           0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0   
5           4.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0   

movieId  9       10      ...  193565  193567  193571  193573  193579  193581  \
userId                   ...                                                   
1           0.0     0.0  ...     0.0     0.0     0.0     0.0     0.0     0.0   
2           0.0     0.0  ...     0.0     0.0     0.0     0.0     0.0     0.0   
3           0.0     0.0  ...     0.0     0.0     0.0     0.0     0.0     0.0   
4           0.0     0.0  ...     0.0     0.0     0.0     0.0     0.0     0

Build the Recommendation System

In [45]:
import pandas as pd
from scipy.sparse.linalg import svds
import numpy as np

user_ratings_mean = user_item_matrix.mean(axis=1)
user_item_matrix_normalized = user_item_matrix.sub(user_ratings_mean, axis=0)

user_item_matrix_normalized_array = user_item_matrix_normalized.to_numpy()

U, sigma, Vt = svds(user_item_matrix_normalized_array, k=50)  # Use the array here

sigma = np.diag(sigma)

all_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt) + user_ratings_mean.values.reshape(-1, 1)
predicted_ratings = pd.DataFrame(all_user_predicted_ratings, columns=user_item_matrix.columns)

print(predicted_ratings.head())

movieId    1         2         3         4         5         6         7       \
0        2.167328  0.402751  0.840184 -0.076281 -0.551337  2.504091 -0.890114   
1        0.211459  0.006658  0.033455  0.017419  0.183430 -0.062473  0.083037   
2        0.003588  0.030518  0.046393  0.008176 -0.006247  0.107328 -0.012416   
3        2.051549 -0.387104 -0.252199  0.087562  0.130465  0.270210  0.477835   
4        1.344738  0.778511  0.065749  0.111744  0.273144  0.584426  0.254930   

movieId    8         9         10      ...    193565    193567    193571  \
0       -0.026443  0.196974  1.593259  ... -0.023453 -0.019967 -0.026939   
1        0.024158  0.049330 -0.152530  ...  0.019498  0.016777  0.022219   
2        0.003779  0.007297 -0.059362  ...  0.005909  0.006209  0.005610   
3        0.040313  0.025858 -0.017365  ...  0.004836  0.004172  0.005500   
4        0.128788 -0.085541  1.023455  ... -0.008042 -0.007419 -0.008664   

movieId    193573    193579    193581    193583    19358

Recommend Movies

In [46]:
def recommend_movies(predictions_df, user_id, movies_df, original_ratings_df, num_recommendations=10):
    # Get and sort the user's predicted ratings
    user_row_number = user_id - 1  # User ID starts at 1, not 0
    sorted_user_predictions = predictions_df.iloc[user_row_number].sort_values(ascending=False)

    # Get the user's data and merge with the movies dataframe
    user_data = original_ratings_df[original_ratings_df.userId == user_id]
    user_full = (user_data.merge(movies_df, how='left', left_on='movieId', right_on='movieId').
                 sort_values(['rating'], ascending=False))

    print('User {0} has already rated {1} movies.'.format(user_id, user_full.shape[0]))
    print('Recommending the highest {0} predicted rating movies not already rated.'.format(num_recommendations))

    # Recommend the highest predicted rating movies that the user hasn't seen yet
    recommendations = (movies_df[~movies_df['movieId'].isin(user_full['movieId'])].
                        merge(pd.DataFrame(sorted_user_predictions).reset_index(), how='left',
                              left_on='movieId',
                              right_on='movieId').
                        rename(columns={user_row_number: 'PredictedRating'}).
                        sort_values('PredictedRating', ascending=False).
                        iloc[:num_recommendations, :-1])

    return user_full, recommendations

# Recommend movies for user 1
already_rated, predictions = recommend_movies(predicted_ratings, 1, movies, ratings, 10)

print("\nMovies already rated by the user:")
print(already_rated)

print("\nTop 10 movie recommendations for the user:")
print(predictions)


User 1 has already rated 232 movies.
Recommending the highest 10 predicted rating movies not already rated.

Movies already rated by the user:
     userId  movieId  rating  timestamp  \
231       1     5060     5.0  964984002   
185       1     2872     5.0  964981680   
89        1     1291     5.0  964981909   
90        1     1298     5.0  964984086   
190       1     2948     5.0  964982191   
..      ...      ...     ...        ...   
170       1     2617     2.0  964982588   
143       1     2253     2.0  964981775   
148       1     2338     2.0  964983546   
152       1     2389     2.0  964983094   
205       1     3176     1.0  964983504   

                                            title  \
231                  M*A*S*H (a.k.a. MASH) (1970)   
185                              Excalibur (1981)   
89      Indiana Jones and the Last Crusade (1989)   
90                    Pink Floyd: The Wall (1982)   
190                  From Russia with Love (1963)   
..                    