In [1]:
import streamlit as st
import pandas as pd
import numpy as np

# Function to compute cosine similarity
def compute_cosine_similarity(matrix):
    norm = np.sqrt(np.nansum(matrix**2, axis=0))  # Compute norms for each column
    similarity = np.dot(matrix.T.fillna(0), matrix.fillna(0)) / (norm[:, None] * norm[None, :])
    return similarity

# Function to filter top K similarities
def filter_top_k(similarity_matrix, k=30):
    filtered_matrix = np.zeros_like(similarity_matrix)
    for i in range(similarity_matrix.shape[0]):
        top_k_indices = np.argsort(-similarity_matrix[i, :])[:k]
        filtered_matrix[i, top_k_indices] = similarity_matrix[i, top_k_indices]
    return filtered_matrix

# Function for generating recommendations
def myIBCF(new_user_ratings, top_k_similarity_matrix, k=10):
    weighted_sum = np.nansum(top_k_similarity_matrix * new_user_ratings[:, None], axis=0)
    normalization = np.nansum((top_k_similarity_matrix != 0) * ~np.isnan(new_user_ratings[:, None]), axis=0)
    predicted_ratings = weighted_sum / normalization

    # Get top k recommended movies
    recommended_indices = np.argsort(-predicted_ratings)[:k]
    recommendations = pd.DataFrame({
        'MovieID': recommended_indices + 1,  # Adjust index to MovieID
        'PredictedRating': predicted_ratings[recommended_indices]
    })
    return recommendations

# Load datasets
movies_path = 'movies.dat'
ratings_path = 'ratings.dat'

# Load movies and ratings data
movies_df = pd.read_csv(movies_path, sep='::', engine='python', header=None, encoding='latin-1')
movies_df.columns = ['MovieID', 'Title', 'Genres']

ratings_df = pd.read_csv(ratings_path, sep='::', engine='python', header=None, encoding='latin-1')
ratings_df.columns = ['UserID', 'MovieID', 'Rating', 'Timestamp']

# Create and normalize the rating matrix
rating_matrix = ratings_df.pivot(index='UserID', columns='MovieID', values='Rating')
rating_matrix_centered = rating_matrix.sub(rating_matrix.mean(axis=1), axis=0)

# Compute similarity matrix dynamically
st.write("Computing similarity matrix, please wait...")
similarity_matrix = compute_cosine_similarity(rating_matrix_centered)
top_k_similarity_matrix = filter_top_k(similarity_matrix)

# Movie IDs to display (subset for simplicity)
sample_movies = movies_df.sample(10)  # Display 10 random movies

# Streamlit App
st.title("Movie Recommendation System")
st.write("Rate the following movies to get personalized recommendations!")

# Rating inputs
user_ratings = []
for _, row in sample_movies.iterrows():
    rating = st.slider(f"Rate '{row['Title']}'", 0, 5, 0)  # Default rating is 0
    user_ratings.append(rating)

# Convert user ratings to numpy array
new_user_ratings = np.zeros(top_k_similarity_matrix.shape[0])  # Initialize with zeros
for i, movie_id in enumerate(sample_movies['MovieID']):
    new_user_ratings[movie_id - 1] = user_ratings[i]  # Align ratings with MovieID

# Generate recommendations
if st.button("Get Recommendations"):
    recommendations = myIBCF(new_user_ratings, top_k_similarity_matrix)
    recommendations = recommendations.merge(movies_df[['MovieID', 'Title']], on='MovieID')
    st.write("Top 10 Movie Recommendations:")
    st.table(recommendations[['Title', 'PredictedRating']])


2024-12-11 15:33:04.311 
  command:

    streamlit run /Users/yuwakayama/opt/anaconda3/lib/python3.11/site-packages/ipykernel_launcher.py [ARGUMENTS]
