In [2]:
# 🧠 Task 4: Movie Recommendation System using SVD (MovieLens 100k)

# 🛠️ Fix NumPy version compatibility for scikit-surprise
!pip install numpy==1.23.5
!pip install scikit-surprise --no-binary :all:

Collecting numpy==1.23.5
  Downloading numpy-1.23.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.3 kB)
Downloading numpy-1.23.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.1/17.1 MB[0m [31m54.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 2.0.2
    Uninstalling numpy-2.0.2:
      Successfully uninstalled numpy-2.0.2
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
imbalanced-learn 0.13.0 requires numpy<3,>=1.24.3, but you have numpy 1.23.5 which is incompatible.
jaxlib 0.5.1 requires numpy>=1.25, but you have numpy 1.23.5 which is incompatible.
pymc 5.22.0 requires numpy>=1.25.0, but you have numpy 1.23.5 which is incompatible.
xarray 2025.3.1 re



In [1]:
# 🔄 Restart runtime manually after running the above 2 cells
# Then run the rest of this code ⬇️

In [2]:
# ✅ Imports (run after restart)
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split, cross_validate
from surprise import accuracy
import pandas as pd
from collections import defaultdict

In [3]:
# Load built-in MovieLens 100k dataset
data = Dataset.load_builtin('ml-100k')

Dataset ml-100k could not be found. Do you want to download it? [Y/n] y
Trying to download dataset from https://files.grouplens.org/datasets/movielens/ml-100k.zip...
Done! Dataset ml-100k has been saved to /root/.surprise_data/ml-100k


In [4]:
# Train-test split
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

In [5]:
# Use SVD algorithm
model = SVD()
model.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7a4bf5ed7910>

In [6]:
# Predict on test data
predictions = model.test(testset)

In [7]:
# Evaluate model
rmse = accuracy.rmse(predictions)
mae = accuracy.mae(predictions)

RMSE: 0.9372
MAE:  0.7393


In [8]:
# Cross-validation scores
cross_validate(model, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9427  0.9354  0.9343  0.9371  0.9280  0.9355  0.0047  
MAE (testset)     0.7426  0.7390  0.7342  0.7371  0.7315  0.7369  0.0038  
Fit time          1.53    1.52    1.78    2.17    1.55    1.71    0.25    
Test time         0.26    0.12    0.36    0.12    0.13    0.20    0.10    


{'test_rmse': array([0.94265655, 0.93542835, 0.93434459, 0.93705541, 0.92802017]),
 'test_mae': array([0.74260867, 0.73899939, 0.73421786, 0.73711741, 0.73151919]),
 'fit_time': (1.5287423133850098,
  1.5239768028259277,
  1.7805545330047607,
  2.17036771774292,
  1.5543088912963867),
 'test_time': (0.26389551162719727,
  0.12084269523620605,
  0.3582899570465088,
  0.12327790260314941,
  0.13286232948303223)}

In [9]:
# Recommend top 5 movies for a given user
def get_top_n(predictions, n=5):
    from collections import defaultdict
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]
    return top_n

top_n_recommendations = get_top_n(predictions, n=5)

In [10]:
# Print top 5 movie IDs recommended for user '196'
print("Top 5 movie IDs recommended for user 196:")
for movie_id, rating in top_n_recommendations['196']:
    print(f"Movie ID: {movie_id}, Predicted Rating: {rating:.2f}")

Top 5 movie IDs recommended for user 196:
Movie ID: 306, Predicted Rating: 4.11
Movie ID: 173, Predicted Rating: 4.09
Movie ID: 116, Predicted Rating: 3.86
Movie ID: 153, Predicted Rating: 3.67
Movie ID: 70, Predicted Rating: 3.66
