In [None]:
!conda install -c conda-forge scikit-surprise -y

In [None]:
# Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import re
import string

from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score, mean_squared_error
from sklearn.model_selection import train_test_split

from scipy.sparse import hstack, csr_matrix
from collections import Counter, defaultdict
from ast import literal_eval  

import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer

from surprise import Dataset, Reader, SVD, accuracy
from surprise.model_selection import GridSearchCV
#from surprise.model_selection import train_test_split
from collections import defaultdict
from ast import literal_eval 
from surprise import accuracy


In [None]:
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('punkt_tab')
nltk.download('stopwords')

In [None]:
#load datasets
credit = pd.read_csv("credits.xls")
title = pd.read_csv("titles.xls")
user= pd.read_csv("user_interactions.xls")

print("credit data")
print(credit.head().to_string())
print(credit.tail().to_string())


In [None]:
print("title data")
print(title.head().to_string())
print(title.tail().to_string())

In [None]:
print("User data")
print(user.head().to_string())
print(user.tail().to_string())

In [None]:
#Aggregate credit details before merging
credit_agg = credit.groupby("id").agg({
    "person_id": list,  
    "name": list,      
    "character": list,  
    "role": list       
}).reset_index()

#Merge titles with aggregated credits
combine = title.merge(credit_agg, on="id", how="left")

#Merge user interactions with the combined titles+credits dataset
final_data = user.merge(combine, left_on="id", right_on="id", how="left")

# Display final merged dataset
print("\nFinal Merged Dataset:")
print(final_data.head().to_string())

In [None]:
# Show the shape of the merged dataset
print("Number of Rows and Columns in Merged Data:", final_data.shape)

In [None]:
#show column details
print("\nColumns in final DataFrame:")
print(final_data .columns)

print("\nDataset Information:")
print(final_data .info())

In [None]:
#missing value checking
print("\nMissing Values in Each Column:")
print(final_data .isnull().sum())

 # Missing Values Percentage
print("\nMissing Values Percentage in Each Column:")
round(final_data .isnull().sum()/len(final_data )*100, 2)


In [None]:
print("\nSummary Statistics:")
print(final_data .describe())

In [None]:
plt.figure(figsize=(12, 5))

sns.histplot(final_data['imdb_score'], bins=30, kde=True, color='blue', label='IMDb Score')
sns.histplot(final_data['tmdb_score'], bins=30, kde=True, color='red', label='TMDb Score')

plt.legend()
plt.title("Distribution of IMDb & TMDb Scores")
plt.xlabel("Score")
plt.ylabel("Frequency")
plt.show()


In [None]:
#show most popular genres
# Convert genres from string format to a list
all_genres = [genre for sublist in final_data['genres'].dropna().apply(lambda x: x.split(',')) for genre in sublist]

# Count occurrences
genre_counts = Counter(all_genres)

# Convert to DataFrame for visualization
genre_combine = pd.DataFrame(genre_counts.items(), columns=['Genre', 'Count']).sort_values(by='Count', ascending=False)

plt.figure(figsize=(12, 5))
sns.barplot(x=genre_combine['Genre'], y=genre_combine['Count'], palette='viridis')
plt.xticks(rotation=45, ha='right')  # Rotate labels for better readability
plt.xlabel("Genre")
plt.ylabel("Count")
plt.title("Most Common Movie/TV Show Genres")

plt.show()


In [None]:
#remove redundant features
# Select only numerical columns from final_data
numeric_cols = final_data.select_dtypes(include=['number'])

# Compute correlation on numerical columns only
plt.figure(figsize=(10, 6))
sns.heatmap(numeric_cols.corr(), annot=True, cmap="coolwarm", fmt=".2f", linewidths=0.5)

plt.title("🔗 Feature Correlation Heatmap")
plt.xticks(rotation=45, ha='right')
plt.yticks(rotation=0)

plt.show()


In [None]:
# Handle missing values 
final_data['title'] = final_data['title'].fillna('Unknown Title')
final_data['description'] = final_data['description'].fillna('No description available')
final_data['age_certification'] = final_data['age_certification'].fillna('Not Rated')
final_data['seasons'] = final_data['seasons'].fillna(0)
final_data['imdb_id'] = final_data['imdb_id'].fillna('Unknown')

# Fill numerical columns with appropriate statistics
final_data['imdb_score'] = final_data['imdb_score'].fillna(final_data['imdb_score'].mean())
final_data['tmdb_score'] = final_data['tmdb_score'].fillna(final_data['tmdb_score'].mean())
final_data['imdb_votes'] = final_data['imdb_votes'].fillna(final_data['imdb_votes'].median())
final_data['tmdb_popularity'] = final_data['tmdb_popularity'].fillna(final_data['tmdb_popularity'].median())

# Fill missing credits data
final_data[['person_id', 'name', 'character', 'role']] = final_data[['person_id', 'name', 'character', 'role']].fillna('Unknown')

# Fill missing user data
final_data['user_id'] = final_data['user_id'].fillna('Unknown User')
final_data['rating'] = final_data['rating'].fillna(final_data['rating'].median())  # Assuming ratings are numeric

# Print remaining missing values
print(final_data.isna().sum())
print(final_data.dtypes)


In [None]:
# Selecting numerical features
num_features = ['runtime', 'imdb_score', 'imdb_votes', 'tmdb_popularity', 'tmdb_score', 'rating']

# outlier detection
plt.figure(figsize=(12, 6))
final_data[num_features].boxplot()
plt.title("Boxplot of Numerical Features (Outlier Detection)")
plt.xticks(rotation=45)
plt.show()


In [None]:
train, test = train_test_split(final_data, test_size=0.2, random_state=42)

# Outlier detection using IQR
def detect_outliers_iqr(data, column):
    Q1 = data[column].quantile(0.25)
    Q3 = data[column].quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    return data[(data[column] < lower_bound) | (data[column] > upper_bound)]

# Apply outlier removal only to training set
num_features = ['imdb_score', 'imdb_votes', 'tmdb_score', 'tmdb_popularity', 'rating']

for col in num_features:
    outliers = detect_outliers_iqr(train, col)
    train = train.drop(outliers.index)

print("Training dataset shape after outlier removal:", train.shape)

In [None]:
# Initialize the scaler
scaler = MinMaxScaler()

# List of numerical features to normalize
num_features = ['imdb_score', 'imdb_votes', 'tmdb_score', 'tmdb_popularity']

# Fit on training data and transform both train and test sets
train.loc[:, num_features] = scaler.fit_transform(train[num_features])
test.loc[:, num_features] = scaler.transform(test[num_features])

In [None]:
# Initialize lemmatizer and stopwords
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))

#  text preprocessing
def preprocess_text(text):
    if pd.isnull(text):
        return "unknown"  # Handle missing text
    
    text = text.lower()
    text = re.sub(r'[^a-z\s]', '', text)  # Remove special characters and numbers to keep only letters and spaces

    # Tokenization 
    tokens = word_tokenize(text)
    tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words]

    # If all tokens are stop words or the text becomes empty, return "unknown" to avoid empty text
    if not tokens:
        return "unknown"
    
    return ' '.join(tokens)
    
# Apply preprocessing to the training and test sets' textual columns
train['description'] = train['description'].apply(preprocess_text)
test['description'] = test['description'].apply(preprocess_text)

train['title'] = train['title'].apply(preprocess_text)
test['title'] = test['title'].apply(preprocess_text)

train['character'] = train['character'].apply(lambda x: preprocess_text(str(x)))
test['character'] = test['character'].apply(lambda x: preprocess_text(str(x)))

print(train[['title', 'description', 'character']].head())
print(test[['title', 'description', 'character']].head())

# Create combined text-based features for TF-IDF processing
train['text_features'] = (
    train['genres'].fillna('') + " " +
    train['production_countries'].fillna('') + " " +
    train['description'].fillna('')
)

test['text_features'] = (
    test['genres'].fillna('') + " " +
    test['production_countries'].fillna('') + " " +
    test['description'].fillna('')
)

# Apply TF-IDF Vectorization 
tfidf = TfidfVectorizer(stop_words='english', max_features=5000, min_df=1) #min_df can be increased for more rigorous text filtering

# Remove rows with empty text features
train = train[train['text_features'].str.strip() != '']
test = test[test['text_features'].str.strip() != '']

# Apply TF-IDF 
tfidf_matrix_train = tfidf.fit_transform(train['text_features'])
tfidf_matrix_test = tfidf.transform(test['text_features'])
print(f"TF-IDF Matrix Train Shape: {tfidf_matrix_train.shape}")
print(f"TF-IDF Matrix Test Shape: {tfidf_matrix_test.shape}")

# Select and scale numeric features
numeric_features = train[['release_year']].fillna(train[['release_year']].mean())  # Fill missing values with mean
scaler = MinMaxScaler()
numeric_scaled_train = scaler.fit_transform(numeric_features)
numeric_scaled_test = scaler.transform(test[['release_year']])

# Convert numeric features to sparse matrix format
numeric_scaled_train = csr_matrix(numeric_scaled_train)
numeric_scaled_test = csr_matrix(numeric_scaled_test)

# Combine text and numeric features into sparse matrices
combined_features_train = hstack([tfidf_matrix_train, numeric_scaled_train]).tocsr()
combined_features_test = hstack([tfidf_matrix_test, numeric_scaled_test]).tocsr()

# Handle nullvalues by replacing with 0 for sparse matrices
combined_features_train = combined_features_train.copy()
combined_features_train.data[np.isnan(combined_features_train.data)] = 0
combined_features_test = combined_features_test.copy()
combined_features_test.data[np.isnan(combined_features_test.data)] = 0

# Compute cosine similarity between training and  test samples
cosine_sim_train = cosine_similarity(combined_features_train, combined_features_train)
cosine_sim_test = cosine_similarity(combined_features_test, combined_features_test)

# Convert similarity matrix 
similarity_df_train = pd.DataFrame(cosine_sim_train, index=train.index, columns=train.index)
similarity_df_test = pd.DataFrame(cosine_sim_test, index=test.index, columns=test.index)

print(similarity_df_train.head())
print(similarity_df_test.head())

In [None]:
#content-based

def get_recommendations(user_type=None, user_release_year=None, user_genres=None, user_countries=None, n=10):
    filtered_data = test.copy()

    if user_type:
        filtered_data = filtered_data[filtered_data['type'].str.contains(user_type, case=False, na=False)]
    if user_release_year:
        filtered_data = filtered_data[filtered_data['release_year'] == user_release_year]
    if user_genres:
        filtered_data = filtered_data[filtered_data['genres'].apply(lambda x: user_genres.lower() in [g.lower() for g in literal_eval(x)] if isinstance(x, str) else False)]
    if user_countries:
        filtered_data = filtered_data[filtered_data['production_countries'].str.contains(user_countries, case=False, na=False)]

    if filtered_data.empty:
        return "No matching movies found!"

    filtered_data = filtered_data.drop_duplicates(subset='title')
    
    similar_movies = pd.Series(dtype='float64')

    if 'similarity_df_test' in globals():  # Ensure similarity matrix exists
        for idx in filtered_data.index:
            if idx in similarity_df_test.index:
                similar_movies = similar_movies.add(similarity_df_test.loc[idx], fill_value=0)

    if similar_movies.empty:
        return "No similar movies found in the dataset!"
    
    similar_movies = similar_movies.sort_values(ascending=False)
    recommended_movies = test.loc[similar_movies.index, ['title', 'genres', 'rating']].drop_duplicates()

    if len(recommended_movies) < n:
        additional_recommendations = filtered_data[['title', 'genres', 'rating']].sort_values(by='rating', ascending=False).head(n - len(recommended_movies))
        recommended_movies = pd.concat([recommended_movies, additional_recommendations])
    
    # Sort final recommendations by rating in descending order
    recommended_movies = recommended_movies.sort_values(by='rating', ascending=False).head(n)

    return recommended_movies

# User Input
user_type = input("Enter movie type (Movie/Show) or press Enter to skip: ").strip() or None
user_release_year = input("Enter release year or press Enter to skip: ").strip()
user_release_year = int(user_release_year) if user_release_year else None
user_genres = input("Enter genres or press Enter to skip: ").strip() or None
user_countries = input("Enter production countries or press Enter to skip: ").strip() or None

recommended_movies = get_recommendations(user_type, user_release_year, user_genres, user_countries, n=10)

print("\nTop 10 recommended Movies for you:")
if isinstance(recommended_movies, str):
    print(recommended_movies)
else:
    for _, row in recommended_movies.iterrows():
        print(f"Movie Title: {row['title']}, Genres: {row['genres']}, Rating: {row.get('rating', 'N/A')}")

def evaluate_recommendations(y_true, y_pred):
    threshold = 3
    y_true_bin = [1 if rating >= threshold else 0 for rating in y_true]
    y_pred_bin = [1 if rating >= threshold else 0 for rating in y_pred]

    precision = precision_score(y_true_bin, y_pred_bin, zero_division=1)
    recall = recall_score(y_true_bin, y_pred_bin, zero_division=1)
    f1 = f1_score(y_true_bin, y_pred_bin, zero_division=1)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    
    return {"RMSE": rmse, "Precision": precision, "F1-score": f1, "Recall": recall}

if isinstance(recommended_movies, str):
    print("No recommendations to evaluate.")
else:
    y_true = test['rating'].tolist()
    y_pred = recommended_movies['rating'].fillna(test['rating'].mean()).tolist()  # Use mean rating for missing values
    
    # Ensure the lengths match
    y_pred = y_pred[:len(y_true)] + [test['rating'].mean()] * (len(y_true) - len(y_pred))
    
    eval_metrics = evaluate_recommendations(y_true, y_pred)
    
    print("\nEvaluation Metrics:")
    for metric, value in sorted(eval_metrics.items(), key=lambda x: x[1], reverse=True):
        print(f"{metric}: {value:.4f}")


In [None]:
#collaborative-filtering
!pip install -U setuptools wheel
!pip install scikit-surprise

In [None]:
import pandas as pd
import numpy as np
from surprise import SVD, Dataset, Reader, accuracy
from surprise.model_selection import train_test_split, GridSearchCV
from collections import defaultdict

# loaded final_data 
if 'final_data' not in globals():
    raise ValueError("Error: 'final_data' dataset is not defined.")

# Create user-item matrix
user_item_matrix = final_data.pivot_table(index='user_id', columns='id', values='rating')

# Fill missing values with median ratings
user_item_matrix_filled = user_item_matrix.apply(lambda x: x.fillna(x.median()), axis=0)

# Convert to long format
long_format = final_data[['user_id', 'id', 'rating']].dropna(subset=['rating'])

# Prepare dataset for Surprise
reader = Reader(rating_scale=(1, 5))
dataset = Dataset.load_from_df(long_format[['user_id', 'id', 'rating']], reader)

# Split data (80-20)
trainset, testset = train_test_split(dataset, test_size=0.2)

# Hyperparameter tuning
param_grid = {
    'n_factors': [50, 100, 150],
    'lr_all': [0.002, 0.005, 0.01],
    'reg_all': [0.02, 0.1, 0.2]
}
grid_search = GridSearchCV(SVD, param_grid, measures=['rmse'], cv=5)
grid_search.fit(dataset)

print("Best Parameters:", grid_search.best_params['rmse'])
print("Best RMSE Score:", grid_search.best_score['rmse'])

# Train best model
best_svd = grid_search.best_estimator['rmse']
best_svd.fit(trainset)

# Evaluate model
predictions = best_svd.test(testset)
rmse = accuracy.rmse(predictions, verbose=False)
print(f"RMSE: {rmse:.4f}")

# evaluation matric Calculation
def precision_recall_at_k(predictions, k=5, threshold=3.0):
    user_est_true = defaultdict(list)
    for uid, _, true_r, est, _ in predictions:
        user_est_true[uid].append((est, true_r))

    precisions, recalls = [], []
    for uid, user_ratings in user_est_true.items():
        user_ratings.sort(reverse=True, key=lambda x: x[0])
        n_rel = sum((true_r >= threshold) for (_, true_r) in user_ratings)
        n_rec_k = sum((est >= threshold) for (est, _) in user_ratings[:k])
        n_rel_and_rec_k = sum((true_r >= threshold) and (est >= threshold) for (est, true_r) in user_ratings[:k])
        
        precision = n_rel_and_rec_k / n_rec_k if n_rec_k else 0
        recall = n_rel_and_rec_k / n_rel if n_rel else 0
        precisions.append(precision)
        recalls.append(recall)
    
    precision_avg = np.mean(precisions)
    recall_avg = np.mean(recalls)
    f1_score_avg = (2 * precision_avg * recall_avg / (precision_avg + recall_avg)) if (precision_avg + recall_avg) else 0
    
    return precision_avg, recall_avg, f1_score_avg

precision, recall, f1_score = precision_recall_at_k(predictions, k=5, threshold=3.0)
print(f"Precision: {precision:.4f}, Recall: {recall:.4f}, F1-score: {f1_score:.4f}")

# Recommendation function
def recommend_for_user(user_id, n_recommendations=5):
    if user_id not in user_item_matrix.index:
        return [("User not found in dataset", None, None)]

    all_items = final_data['id'].unique()
    rated_items = user_item_matrix.loc[user_id].dropna().index
    items_to_predict = [item for item in all_items if item not in rated_items]
    
    if not items_to_predict:
        return [("No new recommendations available", None, None)]
    
    predictions = [best_svd.predict(user_id, item_id) for item_id in items_to_predict]
    predictions.sort(key=lambda x: x.est, reverse=True)
    
    top_n_recommendations = predictions[:n_recommendations]
    
    # Move the recommended_movies inside the function properly
    recommended_items = [
        (
            pred.iid,  
            final_data.loc[final_data['id'] == pred.iid, 'title'].values[0] if not final_data.loc[final_data['id'] == pred.iid, 'title'].empty else "Unknown Title",
            round(pred.est, 1)
        ) for pred in top_n_recommendations
    ]
    
    return recommended_items  


# Get user input
try:
    user_id_input = int(input("Enter your user ID: ").strip())
    top_recommendations = recommend_for_user(user_id_input, n_recommendations=10)
    
    print("\nTop 10 recommended items:")
    
    for item_id, movie_title, predicted_rating in top_recommendations:
        print(f"Item ID: {item_id}, Movie Title: {movie_title}, Predicted Rating: {predicted_rating}")

except ValueError:
    print("Invalid input! Please enter a valid numeric user ID.")

In [None]:
#weight hybrid system

# loaded final_data dataset
if 'final_data' not in globals():
    raise ValueError("Error: 'final_data' dataset is not defined.")

# Prepare data for Surprise
reader = Reader(rating_scale=(1, 5))
dataset = Dataset.load_from_df(final_data[['user_id', 'id', 'rating']], reader)

# Train-Test Split (80-20)
trainset, testset = train_test_split(dataset, test_size=0.2)

# Train the best collaborative filtering model (SVD)
svd = SVD(n_factors=100, lr_all=0.005, reg_all=0.1)
svd.fit(trainset)

# Content-Based Filtering: Precompute Similarities
content_features = final_data[['id', 'title', 'genres', 'production_countries', 'release_year']]
content_features['genres'] = content_features['genres'].apply(lambda x: literal_eval(x) if isinstance(x, str) else [])
content_features['production_countries'] = content_features['production_countries'].apply(lambda x: literal_eval(x) if isinstance(x, str) else [])

def compute_content_similarity(movie_id, user_genres, user_countries):
    """Compute content similarity score based on genres and production countries."""
    if movie_id not in content_features['id'].values:
        return 0  # If the movie is not found, return 0 similarity
    
    movie_row = content_features[content_features['id'] == movie_id].iloc[0]
    
    genre_similarity = len(set(movie_row['genres']) & set(user_genres)) / len(set(user_genres)) if user_genres else 0
    country_similarity = len(set(movie_row['production_countries']) & set(user_countries)) / len(set(user_countries)) if user_countries else 0
    
    return 0.7 * genre_similarity + 0.3 * country_similarity  # Weighted similarity

def hybrid_recommend_for_user(user_id, n_recommendations=10, weight_cf=0.6, weight_cbf=0.4, genre_filter=None, year_filter=None):
    # Check if the user exists in the dataset
    if user_id not in final_data['user_id'].values:
        print(" User not found in dataset. Recommending top movies instead.")
        all_items = final_data['id'].unique()  # Use all movies
    else:
        user_movies = final_data[final_data['user_id'] == user_id]

        # Extract user preferences (genres, countries)
        user_genres = list(set([genre for genres in user_movies['genres'].apply(literal_eval) for genre in genres if genres]))
        user_countries = list(set([country for countries in user_movies['production_countries'].apply(literal_eval) for country in countries if countries]))

        all_items = final_data['id'].unique()

    # Apply genre filter
    if genre_filter:
        genre_filter = genre_filter.lower()
        filtered_items = [
            item for item in all_items
            if any(genre_filter in g.lower() for g in literal_eval(final_data.loc[final_data['id'] == item, 'genres'].values[0]))
        ]
        if not filtered_items:
            print("No movies match the selected genre. Displaying top recommendations instead.")
        else:
            all_items = filtered_items

    # Apply year filter
    if year_filter:
        all_items = [
            item for item in all_items
            if final_data.loc[final_data['id'] == item, 'release_year'].values[0] == year_filter
        ]
        if not all_items:
            print(" No movies match the selected year. Displaying top recommendations instead.")

    # Ensure have movies to recommend
    if not all_items:
        return [("No recommendations available", None)]

    predictions = []
    
    for item_id in all_items:
        cf_score = svd.predict(user_id, item_id).est  # CF score
        cbf_score = compute_content_similarity(item_id, user_genres, user_countries)  # CBF score
        
        final_score = (weight_cf * cf_score) + (weight_cbf * cbf_score)  # Hybrid score
        predictions.append((item_id, final_score))

    predictions.sort(key=lambda x: x[1], reverse=True)  # Sort by highest score
    
    top_n_recommendations = predictions[:n_recommendations]

    recommended_items = [
        (
            pred[0],  # Movie ID
            final_data.loc[final_data['id'] == pred[0], 'title'].values[0] if not final_data.loc[final_data['id'] == pred[0], 'title'].empty else "Unknown Title"
        ) for pred in top_n_recommendations
    ]
    
    return recommended_items


#user input
try:
    user_id_input = int(input("Enter your user ID: ").strip())

    genre_input = input("Enter a movie genre to filter (or press Enter to skip): ").strip().lower()
    genre_filter = genre_input if genre_input else None

    year_input = input("Enter a release year to filter (or press Enter to skip): ").strip()
    year_filter = int(year_input) if year_input.isdigit() else None

    top_recommendations = hybrid_recommend_for_user(user_id_input, n_recommendations=10, genre_filter=genre_filter, year_filter=year_filter)
    
    print("\nTop 10 recommended movie:")
    
    for item_id, movie_title in top_recommendations:
        predicted_rating = svd.predict(user_id_input, item_id).est  # Get predicted rating
        print(f" Movie Title: {movie_title}(Predicted Rating: {predicted_rating:.2f})")

except ValueError:
    print("Invalid input! Please enter a valid numeric user ID.")

# Evaluation
test_predictions = svd.test(testset)

# Define threshold for relevant recommendations
threshold = 3.5 
k = 10  # Top-K recommendations

# Group predictions by user
user_est_true = defaultdict(list)
for uid, _, true_r, est, _ in test_predictions:
    user_est_true[uid].append((est, true_r))

# Compute RMSE and MAE once
rmse = accuracy.rmse(test_predictions)
mae = accuracy.mae(test_predictions)

# Compute Precision and Recall
precision_list = []
recall_list = []

for uid, ratings in user_est_true.items():
    ratings.sort(key=lambda x: x[0], reverse=True)  # Sort by predicted rating (desc)
    top_k = ratings[:k]  # Get Top-K items
    
    relevant_items = sum((true_r >= threshold) for (_, true_r) in ratings)  # Actual good items
    recommended_relevant = sum((true_r >= threshold) for (_, true_r) in top_k)  # Recommended good items

    precision = recommended_relevant / k if k > 0 else 0
    recall = recommended_relevant / relevant_items if relevant_items > 0 else 0

    precision_list.append(precision)
    recall_list.append(recall)

# Average Precision and Recall
precision = sum(precision_list) / len(precision_list) if precision_list else 0
recall = sum(recall_list) / len(recall_list) if recall_list else 0
f1_score = (2 * precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

#Ensure RMSE & MAE are displayed only once
print("\nModel Evaluation Metrics:")
print(f"RMSE: {rmse:.4f}")
print(f"MAE: {mae:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1_score:.4f}")

