In [11]:
# 1. Import Libraries
import pandas as pd
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from surprise import accuracy
from sklearn.metrics import precision_score, recall_score

# 2. Load CSVs with raw strings to avoid unicode errors
ratings_file = r"C:\Users\Admin\Downloads\ml-latest-small\ml-latest-small\ratings.csv"
movies_file = r"C:\Users\Admin\Downloads\ml-latest-small\ml-latest-small\movies.csv"

# 3. Read data
df = pd.read_csv(ratings_file)
movies_df = pd.read_csv(movies_file)

# Optional: Display first few rows
print("Ratings Data:")
print(df.head())
print("\nMovies Data:")
print(movies_df.head())

# 4. Prepare data for Surprise
reader = Reader(rating_scale=(0.5, 5))
data = Dataset.load_from_df(df[['userId', 'movieId', 'rating']], reader)
trainset, testset = train_test_split(data, test_size=0.2)

# 5. Train a model using SVD
model = SVD()
model.fit(trainset)

# 6. Predict on test set
predictions = model.test(testset)

# 7. Evaluate with RMSE
rmse = accuracy.rmse(predictions)
print(f"RMSE: {rmse:.4f}")

# 8. Precision & Recall Calculation
def calculate_precision_recall(predictions, threshold=3.5):
    y_true = []
    y_pred = []

    for uid, iid, true_r, est, _ in predictions:
        y_true.append(1 if true_r >= threshold else 0)
        y_pred.append(1 if est >= threshold else 0)

    precision = precision_score(y_true, y_pred, zero_division=0)
    recall = recall_score(y_true, y_pred, zero_division=0)
    return precision, recall

precision, recall = calculate_precision_recall(predictions)
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")

# 9. Coverage Calculation
def calculate_coverage(predictions):
    recommended_items = {iid for (_, iid, _, est, _) in predictions if est >= 3.5}
    total_items = df['movieId'].nunique()
    return len(recommended_items) / total_items

coverage = calculate_coverage(predictions)
print(f"Coverage: {coverage:.4f}")

# 10. Top-N Recommendations for a user
def get_top_n(predictions, n=10):
    top_n = {}
    for uid, iid, true_r, est, _ in predictions:
        top_n.setdefault(uid, []).append((iid, est))

    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]
    return top_n

top_n = get_top_n(predictions, n=10)

# 11. Display top recommendations for User 1
movie_titles = dict(zip(movies_df['movieId'], movies_df['title']))
print("\nTop 10 movie recommendations for User 1:")
for movie_id, score in top_n.get(1, []):
    print(f"{movie_titles.get(movie_id, 'Unknown Movie')} - Predicted Rating: {score:.2f}")

# 12. Save recommendations for User 1 to CSV
recommendations = [
    [movie_titles.get(movie_id, 'Unknown Movie'), score]
    for movie_id, score in top_n.get(1, [])
]
recommendations_df = pd.DataFrame(recommendations, columns=["Movie", "Predicted Rating"])
recommendations_df.to_csv("user_1_top_10_recommendations.csv", index=False)


Ratings Data:
   userId  movieId  rating  timestamp
0       1        1     4.0  964982703
1       1        3     4.0  964981247
2       1        6     4.0  964982224
3       1       47     5.0  964983815
4       1       50     5.0  964982931

Movies Data:
   movieId                               title  \
0        1                    Toy Story (1995)   
1        2                      Jumanji (1995)   
2        3             Grumpier Old Men (1995)   
3        4            Waiting to Exhale (1995)   
4        5  Father of the Bride Part II (1995)   

                                        genres  
0  Adventure|Animation|Children|Comedy|Fantasy  
1                   Adventure|Children|Fantasy  
2                               Comedy|Romance  
3                         Comedy|Drama|Romance  
4                                       Comedy  
RMSE: 0.8711
RMSE: 0.8711
Precision: 0.7928
Recall: 0.6913
Coverage: 0.3021

Top 10 movie recommendations for User 1:
Monty Python and the Holy Grail