# <B>ItemBased CF</b>

In [23]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

def performItemBasedCF(ratingMatrix, targetUserIndex, N, K):

    # calculate cosine similarity between items
    itemSimilarityMatrix = cosine_similarity(ratingMatrix.T)
    np.fill_diagonal(itemSimilarityMatrix, 0)

    # find recommended movies
    recommendations = []
    for movieIndex, movieTitle in enumerate(ratingMatrix.columns):
        # exclude already watched movies
        if ratingMatrix.iat[targetUserIndex, movieIndex] == 0:
            # select similar item with target item
            lstSimilarity = itemSimilarityMatrix[movieIndex]
            lstSimilarItems = np.argsort(-lstSimilarity)[:K]  # top-K similar items

        # calculate the predicted rating by weighted average
        valueSum = 0
        weightSum = 0
        for neighbor in lstSimilarItems:
            valueSum += lstSimilarity[neighbor] * userMovieRating.iat[targetUserIndex, neighbor]
            weightSum += lstSimilarity[neighbor]
            
        recommendations.append((movieTitle, valueSum / weightSum))

    # sort the recommendations by rating in descending order
    recommendations.sort(key=lambda x: x[1], reverse=True)
    top_n_recommendations = recommendations[:N]

    return top_n_recommendations


rawRatings = pd.read_csv('ratings.csv')
rawMovies = pd.read_csv('movies.csv')

rawData = pd.merge(rawRatings, rawMovies, on = 'movieId')

userMovieRating = rawData.pivot_table('rating', index = 'userId', columns='title')
userMovieRating.fillna(0, inplace = True)

targetUserIndex = 80  # 0 ~ 609
N = 10
K = 5

top_n_recommendations = performItemBasedCF(userMovieRating, targetUserIndex, N, K)

# display the list of recommended movies for the user
print(f"Top {N} movie recommendations for User {targetUserIndex + 1}:")
for movie_title, predicted_rating in top_n_recommendations:
    print(f"{movie_title} (Predicted Rating: {predicted_rating:.2f})")

Top 10 movie recommendations for User 81:
Silence of the Lambs, The (1991) (Predicted Rating: 2.99)
Babe (1995) (Predicted Rating: 2.99)
Maverick (1994) (Predicted Rating: 2.60)
Dances with Wolves (1990) (Predicted Rating: 2.40)
Interview with the Vampire: The Vampire Chronicles (1994) (Predicted Rating: 2.20)
Demolition Man (1993) (Predicted Rating: 1.96)
In the Line of Fire (1993) (Predicted Rating: 1.93)
Usual Suspects, The (1995) (Predicted Rating: 1.85)
Clear and Present Danger (1994) (Predicted Rating: 1.82)
Mrs. Doubtfire (1993) (Predicted Rating: 1.77)


## Step 1 - Preparing Data

In [24]:
rawRatings = pd.read_csv('ratings.csv')
rawMovies = pd.read_csv('movies.csv')

rawData = pd.merge(rawRatings, rawMovies, on = 'movieId')

userMovieRating = rawData.pivot_table('rating', index = 'userId', columns='title')
userMovieRating.fillna(0, inplace = True)
userMovieRating.head()

title,'71 (2014),'Hellboy': The Seeds of Creation (2004),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),'Tis the Season for Love (2015),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),*batteries not included (1987),...,Zulu (2013),[REC] (2007),[REC]² (2009),[REC]³ 3 Génesis (2012),anohana: The Flower We Saw That Day - The Movie (2013),eXistenZ (1999),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931)
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Step 2 - calculate cosine similarity between items

In [25]:
userMovieRating.T.head()

userId,1,2,3,4,5,6,7,8,9,10,...,601,602,603,604,605,606,607,608,609,610
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
'71 (2014),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0
'Hellboy': The Seeds of Creation (2004),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
'Round Midnight (1986),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
'Salem's Lot (2004),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
'Til There Was You (1997),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [26]:
itemSimilarityMatrix = cosine_similarity(userMovieRating.T)
np.fill_diagonal(itemSimilarityMatrix, 0)

print(itemSimilarityMatrix)

[[0.         0.         0.         ... 0.32732684 0.         0.        ]
 [0.         0.         0.70710678 ... 0.         0.         0.        ]
 [0.         0.70710678 0.         ... 0.         0.         0.        ]
 ...
 [0.32732684 0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]]


## Step 3 - Find Neighbors & Calculate Unobserved Movies with Raw Rating

In [27]:
targetUserIndex = 80  # 0 ~ 609
N = 10
K = 5

In [28]:
# find recommended movies
recommendations = []
for movieIndex, movieTitle in enumerate(userMovieRating.columns):
    # exclude already watched movies
    if userMovieRating.iat[targetUserIndex, movieIndex] == 0:
        # select similar item with target item
        lstSimilarity = itemSimilarityMatrix[movieIndex]
        lstSimilarItems = np.argsort(-lstSimilarity)[:K]  # top-K similar items

        # calculate the predicted rating by weighted average
        valueSum = 0
        weightSum = 0
        for neighbor in lstSimilarItems:
            valueSum += lstSimilarity[neighbor] * userMovieRating.iat[targetUserIndex, neighbor]
            weightSum += lstSimilarity[neighbor]
            
        recommendations.append((movieTitle, valueSum / weightSum))

In [45]:
lstSimilarity

array([0., 0., 0., ..., 0., 0., 0.])

In [47]:
lstSimilarItems

array([5990, 8255, 3144, 3044, 1922], dtype=int64)

In [31]:
recommendations

[("'71 (2014)", 0.0),
 ("'Hellboy': The Seeds of Creation (2004)", 0.0),
 ("'Round Midnight (1986)", 0.0),
 ("'Salem's Lot (2004)", 0.0),
 ("'Til There Was You (1997)", 0.0),
 ("'Tis the Season for Love (2015)", 0.0),
 ("'burbs, The (1989)", 0.0),
 ("'night Mother (1986)", 0.0),
 ('(500) Days of Summer (2009)', 0.0),
 ('*batteries not included (1987)', 0.0),
 ('...All the Marbles (1981)', 0.0),
 ('...And Justice for All (1979)', 0.0),
 ('00 Schneider - Jagd auf Nihil Baxter (1994)', 0.0),
 ('1-900 (06) (1994)', 0.0),
 ('10 (1979)', 0.0),
 ('10 Cent Pistol (2015)', 0.0),
 ('10 Cloverfield Lane (2016)', 0.0),
 ('10 Items or Less (2006)', 0.0),
 ('10 Things I Hate About You (1999)', 0.0),
 ('10 Years (2011)', 0.0),
 ('10,000 BC (2008)', 0.0),
 ('100 Girls (2000)', 0.0),
 ('100 Streets (2016)', 0.0),
 ('101 Dalmatians (1996)', 0.0),
 ('101 Dalmatians (One Hundred and One Dalmatians) (1961)', 0.0),
 ("101 Dalmatians II: Patch's London Adventure (2003)", 0.0),
 ('101 Reykjavik (101 Reykjavík

## Step 4 - Display the List of Recommended Movies for the User

In [32]:
recommendations.sort(key=lambda x: x[1], reverse=True)
top_n_recommendations = recommendations[:N]

top_n_recommendations

[('Silence of the Lambs, The (1991)', 2.985992418767609),
 ('Babe (1995)', 2.9852955338368403),
 ('Maverick (1994)', 2.602360579324617),
 ('Dances with Wolves (1990)', 2.398415308035809),
 ('Interview with the Vampire: The Vampire Chronicles (1994)',
  2.2004657320922547),
 ('Demolition Man (1993)', 1.9649256967135196),
 ('In the Line of Fire (1993)', 1.9336576044198672),
 ('Usual Suspects, The (1995)', 1.8456256056403149),
 ('Clear and Present Danger (1994)', 1.8204893948779006),
 ('Mrs. Doubtfire (1993)', 1.7671966167066917)]

In [33]:
# display the list of recommended movies for the user
print(f"Top {N} movie recommendations for User {targetUserIndex + 1}:")
for movie_title, predicted_rating in top_n_recommendations:
    print(f"{movie_title} (Predicted Rating: {predicted_rating:.2f})")

Top 10 movie recommendations for User 81:
Silence of the Lambs, The (1991) (Predicted Rating: 2.99)
Babe (1995) (Predicted Rating: 2.99)
Maverick (1994) (Predicted Rating: 2.60)
Dances with Wolves (1990) (Predicted Rating: 2.40)
Interview with the Vampire: The Vampire Chronicles (1994) (Predicted Rating: 2.20)
Demolition Man (1993) (Predicted Rating: 1.96)
In the Line of Fire (1993) (Predicted Rating: 1.93)
Usual Suspects, The (1995) (Predicted Rating: 1.85)
Clear and Present Danger (1994) (Predicted Rating: 1.82)
Mrs. Doubtfire (1993) (Predicted Rating: 1.77)
