In [12]:
import re

def clean_title(title):
    title = re.sub("[^a-zA-Z0-9 ]", "", title)
    return title

anime['clean_title'] = anime['English name'].apply(clean_title)


anime_cleaned = anime.dropna(subset=['Name', 'clean_title', 'Score', 'id'])


print(anime_cleaned[['Name', 'clean_title']].head())

                              Name             clean_title
0                     Cowboy Bebop            Cowboy Bebop
1  Cowboy Bebop: Tengoku no Tobira  Cowboy Bebop The Movie
2                           Trigun                  Trigun
3               Witch Hunter Robin      Witch Hunter Robin
4                   Bouken Ou Beet  Beet the Vandel Buster


In [13]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(ngram_range=(1,2))

tfidf = vectorizer.fit_transform(anime["clean_title"])


In [14]:
import pandas as pd
import matplotlib.pyplot as plt
import re
anime = pd.read_csv("series-dataset-2023.csv")
anime.rename(columns={'anime_id': 'id'}, inplace=True)

anime.head()

Unnamed: 0,id,Name,English name,Other name,Score,Genres,Synopsis,Type,Episodes,Aired,...,Studios,Source,Duration,Rating,Rank,Popularity,Favorites,Scored By,Members,Image URL
0,1,Cowboy Bebop,Cowboy Bebop,カウボーイビバップ,8.75,"Action, Award Winning, Sci-Fi","Crime is timeless. By the year 2071, humanity ...",TV,26.0,"Apr 3, 1998 to Apr 24, 1999",...,Sunrise,Original,24 min per ep,R - 17+ (violence & profanity),41.0,43,78525,914193.0,1771505,https://cdn.myanimelist.net/images/anime/4/196...
1,5,Cowboy Bebop: Tengoku no Tobira,Cowboy Bebop: The Movie,カウボーイビバップ 天国の扉,8.38,"Action, Sci-Fi","Another day, another bounty—such is the life o...",Movie,1.0,"Sep 1, 2001",...,Bones,Original,1 hr 55 min,R - 17+ (violence & profanity),189.0,602,1448,206248.0,360978,https://cdn.myanimelist.net/images/anime/1439/...
2,6,Trigun,Trigun,トライガン,8.22,"Action, Adventure, Sci-Fi","Vash the Stampede is the man with a $$60,000,0...",TV,26.0,"Apr 1, 1998 to Sep 30, 1998",...,Madhouse,Manga,24 min per ep,PG-13 - Teens 13 or older,328.0,246,15035,356739.0,727252,https://cdn.myanimelist.net/images/anime/7/203...
3,7,Witch Hunter Robin,Witch Hunter Robin,Witch Hunter ROBIN (ウイッチハンターロビン),7.25,"Action, Drama, Mystery, Supernatural",Robin Sena is a powerful craft user drafted in...,TV,26.0,"Jul 3, 2002 to Dec 25, 2002",...,Sunrise,Original,25 min per ep,PG-13 - Teens 13 or older,2764.0,1795,613,42829.0,111931,https://cdn.myanimelist.net/images/anime/10/19...
4,8,Bouken Ou Beet,Beet the Vandel Buster,冒険王ビィト,6.94,"Adventure, Fantasy, Supernatural",It is the dark century and the people are suff...,TV,52.0,"Sep 30, 2004 to Sep 29, 2005",...,Toei Animation,Manga,23 min per ep,PG - Children,4240.0,5126,14,6413.0,15001,https://cdn.myanimelist.net/images/anime/7/215...


In [15]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np


def search(name):
    # Ensure 'anime' DataFrame has been defined and contains the necessary columns
    if 'anime' not in globals():
        print("The 'anime' DataFrame is not defined.")
        return pd.DataFrame()

    # Check for the correct column names
    if 'Name' not in anime.columns or 'id' not in anime.columns or 'English name' not in anime.columns:
        print("The 'anime' DataFrame does not contain the required columns.")
        return pd.DataFrame()

    # Perform the search with case-insensitivity and handling NaN values
    results = anime[anime['Name'].str.contains(name, case=False, na=False)]

    # If no results are found, return an empty DataFrame
    if results.empty:
        print("No results found.")
        return pd.DataFrame()  # Return an empty DataFrame if no matches are found

    # Ensure results are sorted or handled as needed
    return results[['id', 'English name']]


In [16]:
import ipywidgets as widgets
from IPython.display import display

anime_input = widgets.Text(
    value='',
    description='Anime Name:',
    disabled=False
)

anime_list = widgets.Output()

def on_type(data):
    with anime_list:
        anime_list.clear_output()
        name = data["new"]
        if len(name) > 0:  # Check if the input length is sufficient
            results = search(name)
            if not results.empty:
                # Display the results with English names
                display(results)

anime_input.observe(on_type, names='value')

display(anime_input, anime_list)

Text(value='', description='Anime Name:')

Output()

In [17]:

ratings = pd.read_csv("rating.csv")
animeID = 21
ratings.rename(columns={'anime_id': 'id'}, inplace=True)
animes = anime[anime["id"] == animeID]
ratings.head()


Unnamed: 0,user_id,id,rating
0,1,20,-1.0
1,1,24,-1.0
2,1,79,-1.0
3,1,226,-1.0
4,1,241,-1.0


In [18]:
similar_users = ratings[(ratings["id"] == animeID) & (ratings["rating"] > 7)]["user_id"].unique()
similar_user_recs = ratings[(ratings["user_id"].isin(similar_users)) & (ratings["rating"] > 7)]["id"]
similar_user_recs = similar_user_recs.value_counts() / len(similar_users)
similar_user_recs = similar_user_recs[similar_user_recs > 0.1]



In [19]:
all_users = ratings[(ratings["id"].isin(similar_user_recs.index)) & (ratings["rating"] > 7)]
all_user_recs = all_users["id"].value_counts() / len(all_users["user_id"].unique())

In [20]:
rec_percentages = pd.concat([similar_user_recs, all_user_recs], axis=1)
rec_percentages.columns = ["similar", "all"]



In [21]:
rec_percentages["score"] = rec_percentages["similar"] / rec_percentages["all"]


rec_percentages = rec_percentages.sort_values("score", ascending=False)

rec_percentages.head(10).merge(anime, left_index=True, right_on="id")



Unnamed: 0,similar,all,score,id,Name,English name,Other name,Score,Genres,Synopsis,...,Studios,Source,Duration,Rating,Rank,Popularity,Favorites,Scored By,Members,Image URL


In [22]:
def find_similar_anime(id):
    similar_users = ratings[(ratings["id"] == id) & (ratings["rating"] > 4)]["user_id"].unique()
    similar_user_recs = ratings[(ratings["user_id"].isin(similar_users)) & (ratings["rating"] > 4)]["id"]
    similar_user_recs = similar_user_recs.value_counts() / len(similar_users)
    similar_user_recs = similar_user_recs[similar_user_recs > .10]

    all_users = ratings[(ratings["id"].isin(similar_user_recs.index)) & (ratings["rating"] > 4)]
    all_user_recs = all_users["id"].value_counts() / len(all_users["user_id"].unique())
    rec_percentages = pd.concat([similar_user_recs, all_user_recs], axis=1)
    rec_percentages.columns = ["similar", "all"]

    rec_percentages["score"] = rec_percentages["similar"] / rec_percentages["all"]
    rec_percentages = rec_percentages.sort_values("similar", ascending=False)
    return rec_percentages.head(10).merge(anime, left_index=True, right_on="id")


In [23]:
import ipywidgets as widgets
from IPython.display import display

movie_name_input = widgets.Text(
    value='',
    description='Show :',
    disabled=False
)
recommendation_list = widgets.Output()

def on_type(data):
    with recommendation_list:
        recommendation_list.clear_output()
        name = data["new"]
        if len(name) > 5:
            results = search(name)
            animeID = results.iloc[0]["id"]
            display(find_similar_anime(animeID))

movie_name_input.observe(on_type, names='value')

display(movie_name_input, recommendation_list)


Text(value='', description='Show :')

Output()