In [1]:
import pandas as pd

import numpy as np

In [2]:

ratings = pd.read_csv('./Data/Data_final/ratings.csv')

movies = pd.read_csv('./Data/Data_final/movies.csv', encoding='latin-1')

users = pd.read_csv('./Data/Data_final/users.csv', encoding='latin-1')



In [3]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from scipy import sparse

# Chuyển đổi ratings thành numpy array
Y_data = ratings[['UserID', 'MovieID', 'Rating']].to_numpy()

# Lớp CF như đã sửa đổi ở trên
class CF(object):
    def __init__(self, Y_data, k, dist_func=cosine_similarity, uuCF=1):
        self.uuCF = uuCF
        self.Y_data = Y_data if uuCF else Y_data[:, [1, 0, 2]]
        self.k = k
        self.dist_func = dist_func
        self.Ybar_data = None
        self.n_users = int(np.max(self.Y_data[:, 0])) + 1
        self.n_items = int(np.max(self.Y_data[:, 1])) + 1

    def add(self, new_data):
        self.Y_data = np.concatenate((self.Y_data, new_data), axis=0)

    def normalize_Y(self):
        users = self.Y_data[:, 0]
        self.Ybar_data = self.Y_data.copy()
        self.mu = np.zeros((self.n_users,))
        for n in range(self.n_users):
            ids = np.where(users == n)[0].astype(np.int32)
            item_ids = self.Y_data[ids, 1]
            ratings = self.Y_data[ids, 2]
            m = np.mean(ratings)
            if np.isnan(m):
                m = 0
            self.mu[n] = m
            self.Ybar_data[ids, 2] = ratings - self.mu[n]

        self.Ybar = sparse.coo_matrix((self.Ybar_data[:, 2],
                                       (self.Ybar_data[:, 1], self.Ybar_data[:, 0])),
                                      (self.n_items, self.n_users))
        self.Ybar = self.Ybar.tocsr()

    def similarity(self):
        self.S = self.dist_func(self.Ybar.T, self.Ybar.T)

    def refresh(self):
        self.normalize_Y()
        self.similarity()

    def fit(self):
        self.refresh()

    def __pred(self, u, i, normalized=1):
        ids = np.where(self.Y_data[:, 1] == i)[0].astype(np.int32)
        users_rated_i = (self.Y_data[ids, 0]).astype(np.int32)
        sim = self.S[u, users_rated_i]
        a = np.argsort(sim)[-self.k:]
        nearest_s = sim[a]
        r = self.Ybar[i, users_rated_i[a]]
        if normalized:
            return (r * nearest_s).sum() / (np.abs(nearest_s).sum() + 1e-8)
        return (r * nearest_s).sum() / (np.abs(nearest_s).sum() + 1e-8) + self.mu[u]

    def pred(self, u, i, normalized=1):
        if self.uuCF:
            return self.__pred(u, i, normalized)
        return self.__pred(i, u, normalized)

    def recommend(self, u, normalized=1):
        ids = np.where(self.Y_data[:, 0] == u)[0]
        items_rated_by_u = self.Y_data[ids, 1].tolist()
        recommended_items = []
        for i in range(self.n_items):
            if i not in items_rated_by_u:
                rating = self.__pred(u, i)
                if rating > 0:
                    recommended_items.append(i)
        return recommended_items

    def recommend_for_user(self, user_id):
        recommended_items = self.recommend(user_id)[:5]  # Lấy tối đa 5 phim
        movie_names = [movies.loc[movies['MovieID'] == item, 'Title'].values[0] for item in recommended_items]
        return movie_names

# Khởi tạo và huấn luyện mô hình cho phương pháp user-user
rs_user_user = CF(Y_data, k=2, uuCF=1)
rs_user_user.fit()

# Khởi tạo và huấn luyện mô hình cho phương pháp item-item
rs_item_item = CF(Y_data, k=2, uuCF=0)
rs_item_item.fit()

# Gợi ý phim cho user ID 1 theo phương pháp user-user
user_id = 1
recommended_movies_user_user = rs_user_user.recommend_for_user(user_id)
print(f'User-user CF recommendations for user ID {user_id}:')
for movie in recommended_movies_user_user:
    print(movie)

# Gợi ý phim cho user ID 1 theo phương pháp item-item
recommended_movies_item_item = rs_item_item.recommend_for_user(user_id)
print(f'Item-item CF recommendations for user ID {user_id}:')
for movie in recommended_movies_item_item:
    print(movie)


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


User-user CF recommendations for user ID 1:
Sign of the Lion
The Whip and the Body
Frankenhooker
I Lost My Body
Flickering Lights
Item-item CF recommendations for user ID 1:
Macross Plus Movie Edition
The Great McGinty
The Whip and the Body
The Lower Depths
Pale Rider
