In [80]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score

# Step 1: Load and preprocess data
movies = pd.read_csv('modelfile.csv')
movies = movies[['id', 'title', 'overview', 'genres']]

# Fill missing values
movies.fillna('', inplace=True)

# Combine features
movies['tags'] = movies['overview'] + ' ' + movies['genres']
new_data = movies.drop(columns=['overview', 'genres'])

# Step 2: Vectorization with TF-IDF
tfidf = TfidfVectorizer(max_features=10000, stop_words='english')
vector = tfidf.fit_transform(new_data['tags'].values.astype('U')).toarray()

# Step 3: Similarity Matrix
similarity = cosine_similarity(vector)

# Step 4: Train-Test Split
train_data, test_data = train_test_split(new_data, test_size=0.2, random_state=42)

# Step 5: Recommendation Function
def recommend(movie_title, data, similarity_matrix, top_k=10):
    try:
        index = data[data['title'] == movie_title].index[0]
        distances = sorted(list(enumerate(similarity_matrix[index])), reverse=True, key=lambda x: x[1])
        recommended_titles = [data.iloc[i[0]].title for i in distances[1:top_k + 1]]
        return recommended_titles
    except IndexError:
        return []

# Step 6: Evaluate Precision@10 and Recall@10
true_positives = 0
all_recommendations = 0
relevant_recommendations = 0

for _, row in test_data.iterrows():
    try:
        recommendations = recommend(row['title'], new_data, similarity, top_k=10)
        relevant_recommendations += 1  # Test movie is relevant
        if row['title'] in recommendations:
            true_positives += 1
        all_recommendations += len(recommendations)
    except IndexError:
        continue

precision = true_positives / all_recommendations if all_recommendations > 0 else 0
recall = true_positives / relevant_recommendations if relevant_recommendations > 0 else 0

print(f"Precision@10: {precision:.2f}")
print(f"Recall@10: {recall:.2f}")

# Step 7: Example Recommendations
print("\nRecommendations for 'Iron Man':")
print(recommend("Iron Man", new_data, similarity, top_k=10))

Precision@10: 0.00
Recall@10: 0.04

Recommendations for 'Iron Man':
['Iron Man 2', 'Visiting Van Gogh', 'The Brother from Space', 'Der Tag des Jorun', 'Am nächsten Morgen kehrte der Minister nicht an seinen Arbeitsplatz zurück', 'Die Letzte Droge', 'Xaver und sein außerirdischer Freund', 'The Invincible Iron Man', 'The Dark Half', 'Strawberry in the Supermarket']
