Skip to content

Create santhipriya #38146

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 97 additions & 0 deletions santhipriya
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Sample movie dataset
movies = pd.DataFrame({
'movieId': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
'title': ['Inception', 'Interstellar', 'The Dark Knight', 'Parasite', 'Joker',
'The Shawshank Redemption', 'The Godfather', 'Pulp Fiction', 'Forrest Gump', 'The Lord of the Rings: The Return of the King'],
'genre': ['Sci-Fi', 'Sci-Fi', 'Action', 'Thriller', 'Drama',
'Drama', 'Crime', 'Crime', 'Drama', 'Fantasy']
})

# Sample user ratings
ratings = pd.DataFrame({
'userId': [1, 2, 1, 2, 3, 1, 3, 2, 1, 3, 2, 3],
'movieId': [1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3], # Added more ratings for demonstration
'rating': [5, 4, 4, 5, 3, 5, 4, 5, 4, 3, 4, 5]
})

# 1. Collaborative Filtering (Matrix Factorization)
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)
trainset, testset = train_test_split(data, test_size=0.25) # Increased test size for better evaluation

model = SVD()
model.fit(trainset)

# Evaluate the model (optional)
predictions = model.test(testset)
print(f"Collaborative Filtering RMSE: {accuracy.rmse(predictions)}")


# 2. Content-Based Filtering
tfidf = TfidfVectorizer(stop_words="english")
movie_matrix = tfidf.fit_transform(movies["genre"])
cosine_sim = cosine_similarity(movie_matrix)


# 3. Hybrid Recommendation System
def get_recommendations(user_id, top_n=5):
"""
Gets movie recommendations for a user using a hybrid approach.

Args:
user_id (int): The ID of the user.
top_n (int, optional): The number of recommendations to return. Defaults to 5.

Returns:
list: A list of movie titles recommended for the user.
"""

# 1. Get Collaborative Filtering Recommendations
# Get a list of movies the user has not rated
unrated_movies = movies[~movies['movieId'].isin(ratings[ratings['userId'] == user_id]['movieId'])]['movieId']

# Predict ratings for unrated movies
predicted_ratings = [model.predict(user_id, movie_id).est for movie_id in unrated_movies]

# Create a DataFrame with predicted ratings
cf_recs = pd.DataFrame({'movieId': unrated_movies, 'predicted_rating': predicted_ratings})
cf_recs = cf_recs.sort_values(by=['predicted_rating'], ascending=False).head(top_n)

# 2. Get Content-Based Recommendations (based on user's highest-rated movie)
# Find the user's highest-rated movie
user_ratings = ratings[ratings['userId'] == user_id]
highest_rated_movie_id = user_ratings.loc[user_ratings['rating'].idxmax(), 'movieId']
highest_rated_movie_title = movies[movies['movieId'] == highest_rated_movie_id]['title'].values[0]

# Get content-based recommendations for the highest-rated movie
cb_recs_titles = recommend_content(highest_rated_movie_title)
cb_recs = movies[movies['title'].isin(cb_recs_titles)]

# 3. Combine and Rank Recommendations
# Combine recommendations from both methods
all_recs = pd.concat([cf_recs, cb_recs]).drop_duplicates(subset=['movieId'])

# Rank recommendations by predicted rating (CF) or similarity (CB)
# You can adjust the ranking logic based on your preferences
# Here, we prioritize CF recommendations
final_recs = all_recs.sort_values(by=['predicted_rating'], ascending=False).head(top_n)

return final_recs['title'].tolist()


# Content-Based Recommendation Function (unchanged)
def recommend_content(movie_title):
index = movies[movies["title"] == movie_title].index[0]
similar_movies = list(enumerate(cosine_sim[index]))
sorted_movies = sorted(similar_movies, key=lambda x: x[1], reverse=True)[1:4] # Get top 3 similar movies
return [movies.iloc[i[0]]["title"] for i in sorted_movies]


# Example Usage
user_id = 1
recommendations = get_recommendations(user_id, top_n=5)
print(f"Movie recommendations for user {user_id}: {recommendations}")
Loading
Oops, something went wrong.