# **Install required libraries**

In [1]:
# Step A: Clean install of numpy and surprise
!pip uninstall -y numpy
!pip uninstall -y scikit-surprise
!pip install numpy==1.23.5
!pip install scikit-surprise


Found existing installation: numpy 2.3.0
Uninstalling numpy-2.3.0:
  Successfully uninstalled numpy-2.3.0
Found existing installation: scikit-surprise 1.1.4
Uninstalling scikit-surprise-1.1.4:
  Successfully uninstalled scikit-surprise-1.1.4
Collecting numpy==1.23.5
  Downloading numpy-1.23.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.3 kB)
Downloading numpy-1.23.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.1/17.1 MB[0m [31m88.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: numpy
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
jax 0.5.2 requires numpy>=1.25, but you have numpy 1.23.5 which is incompatible.
chex 0.1.89 requires numpy>=1.24.1, but you have numpy 1.23.5 which is incompatible.
imbalanced-learn 0.13.0 req

Collecting scikit-surprise
  Using cached scikit_surprise-1.1.4-cp311-cp311-linux_x86_64.whl
Installing collected packages: scikit-surprise
Successfully installed scikit-surprise-1.1.4


# **Import libraries**

In [1]:
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from surprise import accuracy
import pandas as pd

# **Load sample dataset**

In [2]:
data = Dataset.load_builtin('ml-100k')
trainset, testset = train_test_split(data, test_size=0.2)


Dataset ml-100k could not be found. Do you want to download it? [Y/n] y
Trying to download dataset from https://files.grouplens.org/datasets/movielens/ml-100k.zip...
Done! Dataset ml-100k has been saved to /root/.surprise_data/ml-100k


# **Build and Train SVD Model (Matrix Factorization)**

In [3]:
model = SVD()
model.fit(trainset)


<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7cf12eef0710>

# **Make Predictions**

In [4]:
predictions = model.test(testset)

# **Evaluate the Model**

In [5]:
rmse = accuracy.rmse(predictions)
mae = accuracy.mae(predictions)

RMSE: 0.9434
MAE:  0.7466


# **Make a Recommendation for a User**

In [6]:
# Predict rating for user 196 on item 302 (sample)
uid = str(196)
iid = str(302)

pred = model.predict(uid, iid)
print(f"Predicted rating for user {uid} on item {iid} is: {pred.est:.2f}")

Predicted rating for user 196 on item 302 is: 4.06


# **Show Top-N Recommendation**

In [7]:
from collections import defaultdict

def get_top_n(predictions, n=5):
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))
    # Sort and retrieve top n
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]
    return top_n

top_n = get_top_n(predictions, n=5)
top_n[str(196)]  # Top 5 recommendations for user 196

[('173', 4.152154235132657),
 ('8', 4.093820105360976),
 ('257', 3.78778541720714),
 ('286', 3.667495889885389),
 ('13', 3.6019429028003067)]