<a href="https://colab.research.google.com/github/yechika/latihan_anime/blob/main/scikit.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split
import pickle
import numpy as np

In [2]:
try:
    df = pd.read_csv('anime-dataset-2023.csv')
    print("Dataset berhasil dimuat.")
except FileNotFoundError:
    print("Error: File 'anime-dataset-2023.csv' tidak ditemukan.")

feature_cols = [
    'Name', 'Genres', 'Synopsis', 'Type', 'Studios', 'Source',
    'Score', 'Popularity', 'Favorites'
]
df_clean = df[feature_cols].copy()
df_clean.drop_duplicates(subset=['Name'], inplace=True)
for col in ['Genres', 'Synopsis', 'Type', 'Studios', 'Source']:
    df_clean[col] = df_clean[col].fillna('')

for col in ['Score', 'Popularity', 'Favorites']:
    df_clean[col] = pd.to_numeric(df_clean[col], errors='coerce')
    df_clean[col] = df_clean[col].fillna(0)

df_clean.reset_index(drop=True, inplace=True)
print(f"Jumlah data setelah dibersihkan: {len(df_clean)}")
df_clean.head()

Dataset berhasil dimuat.
Jumlah data setelah dibersihkan: 24901


Unnamed: 0,Name,Genres,Synopsis,Type,Studios,Source,Score,Popularity,Favorites
0,Cowboy Bebop,"Action, Award Winning, Sci-Fi","Crime is timeless. By the year 2071, humanity ...",TV,Sunrise,Original,8.75,43,78525
1,Cowboy Bebop: Tengoku no Tobira,"Action, Sci-Fi","Another day, another bounty—such is the life o...",Movie,Bones,Original,8.38,602,1448
2,Trigun,"Action, Adventure, Sci-Fi","Vash the Stampede is the man with a $$60,000,0...",TV,Madhouse,Manga,8.22,246,15035
3,Witch Hunter Robin,"Action, Drama, Mystery, Supernatural",Robin Sena is a powerful craft user drafted in...,TV,Sunrise,Original,7.25,1795,613
4,Bouken Ou Beet,"Adventure, Fantasy, Supernatural",It is the dark century and the people are suff...,TV,Toei Animation,Manga,6.94,5126,14


In [3]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import MinMaxScaler
from scipy.sparse import hstack

tfidf_synopsis = TfidfVectorizer(stop_words='english', max_features=5000)
synopsis_matrix = tfidf_synopsis.fit_transform(df_clean['Synopsis'])

df_clean['metadata'] = df_clean['Genres'] + ' ' + df_clean['Studios'] + ' ' + df_clean['Source'] + ' ' + df_clean['Type']
tfidf_metadata = TfidfVectorizer(stop_words='english')
metadata_matrix = tfidf_metadata.fit_transform(df_clean['metadata'])

scaler = MinMaxScaler()
numeric_features = scaler.fit_transform(df_clean[['Score', 'Popularity', 'Favorites']])

feature_matrix = hstack([synopsis_matrix, metadata_matrix, numeric_features])

print("Bentuk matriks fitur gabungan:", feature_matrix.shape)

Bentuk matriks fitur gabungan: (24901, 6201)


In [4]:
from sklearn.metrics.pairwise import cosine_similarity

cosine_sim = cosine_similarity(feature_matrix, feature_matrix)

print("Bentuk matriks Cosine Similarity:", cosine_sim.shape)

Bentuk matriks Cosine Similarity: (24901, 24901)


In [5]:
indices = pd.Series(df_clean.index, index=df_clean['Name']).drop_duplicates()

def get_recommendations(title, cosine_sim_matrix=cosine_sim, dataframe=df_clean):
    """
    Fungsi ini mencari 10 anime paling mirip dari seluruh dataset.
    """
    if title not in indices:
        possible_matches = dataframe[dataframe['Name'].str.contains(title, case=False)]
        if len(possible_matches) == 0:
            return f"Anime dengan judul '{title}' tidak ditemukan."
        else:
            title = possible_matches.iloc[0]['Name']
            print(f"Mungkin maksud Anda: '{title}'? Menampilkan hasil untuk judul ini.")
    idx = indices[title]

    sim_scores = list(enumerate(cosine_sim_matrix[idx]))

    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    sim_scores = sim_scores[1:11]
    anime_indices = [i[0] for i in sim_scores]

    return dataframe['Name'].iloc[anime_indices]

print("Fungsi rekomendasi siap digunakan.")

Fungsi rekomendasi siap digunakan.


In [23]:
print(get_recommendations('One Piece'))

13409    One Piece: Episode of East Blue - Luffy to 4-n...
4038                         One Piece: Romance Dawn Story
14699                         One Piece Movie 14: Stampede
3514                          One Piece Film: Strong World
6456                                Hunter x Hunter (2011)
1128     One Piece: Oounabara ni Hirake! Dekkai Dekkai ...
6823                                     One Piece Film: Z
734                                          Dragon Ball Z
3961                      Fullmetal Alchemist: Brotherhood
430                                     One Piece Movie 01
Name: Name, dtype: object


In [8]:
import pickle

data_to_export = {
    'anime_list': df_clean,
    'similarity_matrix': cosine_sim
}

with open('anime_recommender.pkl', 'wb') as f:
    pickle.dump(data_to_export, f)

print("Data dan matriks similarity berhasil diekspor ke 'anime_recommender.pkl'")

Data dan matriks similarity berhasil diekspor ke 'anime_recommender.pkl'
