In [3]:
import os
import urllib.request
import zipfile

# Create a directory for the dataset
dataset_dir = 'ml-100k'
if not os.path.exists(dataset_dir):
    os.makedirs(dataset_dir)

# Download the dataset
url = 'http://files.grouplens.org/datasets/movielens/ml-100k.zip'
zip_path = os.path.join(dataset_dir, 'ml-100k.zip')
urllib.request.urlretrieve(url, zip_path)

# Unzip the dataset
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(dataset_dir)

# List the files to confirm
os.listdir(dataset_dir)


['ml-100k.zip', 'ml-100k']

In [7]:
import pandas as pd

# Define file paths
ratings_path = os.path.join(dataset_dir, 'ml-100k', 'u.data')
movies_path = os.path.join(dataset_dir, 'ml-100k', 'u.item')

# Load the dataset
ratings = pd.read_csv(ratings_path, sep='\t', names=['user_id', 'item_id', 'rating', 'timestamp'])
movies = pd.read_csv(movies_path, sep='|', encoding='latin-1', names=['item_id', 'title'], usecols=[0, 1])

# Display the first few rows of each dataset
print(ratings.head())
print(movies.head())

# Merge ratings with movies
ratings = pd.merge(ratings, movies, on='item_id')

# Create user-item interaction matrix
user_movie_matrix = ratings.pivot_table(index='user_id', columns='title', values='rating')

# Fill NaN values with 0
user_movie_matrix_filled = user_movie_matrix.fillna(0)

# Compute cosine similarity between users
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

user_similarity = cosine_similarity(user_movie_matrix_filled)

# Convert to DataFrame for better readability
user_similarity_df = pd.DataFrame(user_similarity, index=user_movie_matrix.index, columns=user_movie_matrix.index)

# Function to get recommendations
def get_recommendations(user_id, num_recommendations=5):
    # Get similarity scores for the user
    user_similarity_scores = user_similarity_df[user_id]

    # Get ratings given by the user
    user_ratings = user_movie_matrix.loc[user_id]

    # Compute the weighted sum of ratings of similar users
    weighted_ratings = user_similarity_scores.dot(user_movie_matrix_filled) / user_similarity_scores.sum()

    # Create a DataFrame with the weighted ratings
    recommendations = pd.DataFrame(weighted_ratings, index=user_movie_matrix.columns, columns=['rating'])

    # Remove movies already rated by the user
    recommendations = recommendations[~recommendations.index.isin(user_ratings.dropna().index)]

    # Sort the recommendations
    recommendations = recommendations.sort_values(by='rating', ascending=False)

    return recommendations.head(num_recommendations)

# Get recommendations for a user
user_id = 1
recommendations = get_recommendations(user_id)
print(recommendations)





   user_id  item_id  rating  timestamp
0      196      242       3  881250949
1      186      302       3  891717742
2       22      377       1  878887116
3      244       51       2  880606923
4      166      346       1  886397596
   item_id              title
0        1   Toy Story (1995)
1        2   GoldenEye (1995)
2        3  Four Rooms (1995)
3        4  Get Shorty (1995)
4        5     Copycat (1995)
                                            rating
title                                             
'Til There Was You (1997)                      NaN
1-900 (1994)                                   NaN
187 (1997)                                     NaN
2 Days in the Valley (1996)                    NaN
3 Ninjas: High Noon At Mega Mountain (1998)    NaN
