In [13]:
import pandas as pd

# Load the dataset
ratings_data = pd.read_csv('u.data', sep='\t', names=['user_id', 'item_id', 'rating', 'timestamp'])
item_data = pd.read_csv('u.item', sep='|', encoding='latin-1', header=None, names=['movie_id', 'movie_title', 'release_date', 'video_release_date', 'IMDb_URL', 'unknown', 'Action', 'Adventure', 'Animation', 'Children\'s', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror', 'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western'])


In [15]:
# Merge user ratings and item data
merged_dataset = pd.merge(ratings_data, item_data, left_on='item_id', right_on='movie_id')

# Define user profile aggregation function
def aggregate_user_profile(group):
    film_features = ' '.join(group['movie_title'])
    return film_features

# Aggregate film features and ratings for each user
user_profiles = merged_dataset.groupby('user_id').apply(aggregate_user_profile).reset_index(name='film_features')


In [16]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from scipy.spatial import KDTree


# Create a count vectorizer
vectorizer = CountVectorizer()

# Fit and transform the film features
film_features_matrix = vectorizer.fit_transform(user_profiles['film_features'])
kdtree = KDTree(film_features_matrix.toarray())
def get_similar_users(user_id, kdtree, k=5):
    target_features = film_features_matrix[user_id - 1].toarray()
    _, indices = kdtree.query(target_features, k=k+1)
    similar_users = indices[0][1:].tolist()
    return similar_users

def get_recommended_films(user_id, similar_users, ratings_data, item_data, top_n=5):
    user_ratings = ratings_data[ratings_data['user_id'] == user_id]
    user_films = set(user_ratings['item_id'])
    
    similar_users_ratings = ratings_data[ratings_data['user_id'].isin(similar_users)]
    similar_users_films = set(similar_users_ratings['item_id'])
    
    recommended_films = similar_users_films - user_films
    
    recommended_films_data = item_data[item_data['movie_id'].isin(recommended_films)]
    recommended_films_data = recommended_films_data[['movie_id', 'movie_title']]
    
    return recommended_films_data.head(top_n)

In [17]:
target_user_id = 210
similar_users = get_similar_users(target_user_id, kdtree, k=5)
recommended_films = get_recommended_films(target_user_id, similar_users, ratings_data, item_data, top_n=5)

print("Recommended Films:")
print(recommended_films[['movie_id', 'movie_title']])



Recommended Films:
   movie_id              movie_title
1         2         GoldenEye (1995)
2         3        Four Rooms (1995)
6         7    Twelve Monkeys (1995)
7         8              Babe (1995)
8         9  Dead Man Walking (1995)
