<a href="https://colab.research.google.com/github/udaycodespace/ToDoApp/blob/main/Unsupervised_Anime_Recommendation_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install rapidfuzz


Collecting rapidfuzz
  Downloading rapidfuzz-3.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Downloading rapidfuzz-3.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m17.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: rapidfuzz
Successfully installed rapidfuzz-3.13.0


In [None]:
# 📦 Import packages
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import MultiLabelBinarizer, MinMaxScaler
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import hstack, csr_matrix
from rapidfuzz import process

In [None]:
# 📁 Load anime dataset
df = pd.read_csv('/content/anime_2025.csv', low_memory=False)

In [None]:
# 📌 Select relevant columns
anime = df[['anime_id', 'name', 'genres', 'studios', 'synopsis', 'score', 'popularity']].copy()

In [None]:
# 🧹 Drop rows with missing key data
anime.dropna(subset=['synopsis', 'genres', 'score', 'popularity'], inplace=True)

In [None]:
# 🧬 Convert genre and studio strings to lists
anime['genre_list'] = anime['genres'].apply(lambda x: [g.strip() for g in str(x).split(',') if g.strip()])
anime['studio_list'] = anime['studios'].apply(lambda x: [s.strip() for s in str(x).split(',') if s.strip()])


In [None]:
# 🧠 1. Text vectorization using TF-IDF on synopsis
tfidf = TfidfVectorizer(stop_words='english', max_features=5000)
synopsis_matrix = tfidf.fit_transform(anime['synopsis'])

In [None]:
# 🎭 2. One-hot encode genres and studios
mlb_genres = MultiLabelBinarizer()
genre_matrix = mlb_genres.fit_transform(anime['genre_list'])
mlb_studios = MultiLabelBinarizer()
studio_matrix = mlb_studios.fit_transform(anime['studio_list'])

In [None]:
# 🔢 3. Normalize numeric features (score and popularity)
scaler = MinMaxScaler()
numeric_features = scaler.fit_transform(anime[['score', 'popularity']])
numeric_sparse = csr_matrix(numeric_features)

In [None]:
# 🧩 4. Combine all feature matrices
feature_matrix = hstack([
    synopsis_matrix,
    csr_matrix(genre_matrix),
    csr_matrix(studio_matrix),
    numeric_sparse
], format='csr')

# 📐 Precompute cosine similarity matrix
cosine_sim = cosine_similarity(feature_matrix, feature_matrix)

# 🔍 Create title-to-index mapping for fast lookup
indices = pd.Series(anime.index, index=anime['name'].str.lower())

# 💡 Anime Recommendation Function
def recommend_anime(title, top_n=5, score_cutoff=75):
    """
    Recommend top_n similar anime based on content features using cosine similarity.
    Uses fuzzy string matching to handle typos or case mismatches.

    Parameters:
        title (str): Title of the anime to search.
        top_n (int): Number of recommendations to return.
        score_cutoff (int): Minimum fuzzy score to consider a valid match.

    Returns:
        pd.DataFrame: Top recommendations with similarity scores.
    """

    # 🧾 Handle invalid or empty input
    if not isinstance(title, str) or not title.strip():
        print("⚠️ Please enter a valid anime name. Showing best starters Anime.")
        title = "Death Note"

    # 🔍 Fuzzy match title to closest anime
    all_titles = anime['name'].tolist()
    match = process.extractOne(title, all_titles, score_cutoff=score_cutoff)

    if not match:
        print(f"❌ No close match found for '{title}'. Please try a different name.")
        return pd.DataFrame()

    best_title = match[0]
    idx = indices.get(best_title.lower())

    if idx is None:
        print(f"❌ Could not find index for '{best_title}'.")
        return pd.DataFrame()

    # 🔗 Get similarity scores from cosine matrix
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # 📈 Top N similar anime (excluding itself)
    top_scores = sim_scores[1:top_n + 1]
    results = [(anime['name'].iloc[i], score) for i, score in top_scores]

    return pd.DataFrame(results, columns=['name', 'similarity']).set_index('name')

In [None]:
#▶️ Example usage
if __name__ == '__main__':
    user_input = input("Enter anime title (e.g., 'Naruto'): ").strip()
    print(f"\n📌 Recommendations for: '{user_input}'")
    print(recommend_anime(user_input, top_n=5))

Enter anime title (e.g., 'Naruto'): One Punch Man

📌 Recommendations for: 'One Punch Man'
                                similarity
name                                      
Fugou Keiji: Balance:Unlimited    0.624117
Boku dake ga Inai Machi           0.571262
Shadows House 2nd Season          0.567497
Yuukoku no Moriarty Part 2        0.565551
Shadows House                     0.563962


In [None]:
# ▶️ Example usage
if __name__ == '__main__':
    user_input = input("Enter anime title (e.g., 'Naruto'): ").strip()
    print(f"\n📌 Recommendations for: '{user_input}'")
    print(recommend_anime(user_input, top_n=5))

Enter anime title (e.g., 'Naruto'): One Piece

📌 Recommendations for: 'One Piece'
                                                   similarity
name                                                         
Kingdom 3rd Season                                   0.788998
Kingdom 4th Season                                   0.788536
Kingdom                                              0.653244
Kingdom 2nd Season                                   0.641910
Naruto: Shippuuden - Shippuu! "Konoha Gakuen" Den    0.610272
