##modeling

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd

data = pd.read_csv('/content/proses_pml.csv')

# TF-IDF Vectorization pada kolom 'Genre'
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(data['Genre'])

# Hitung Cosine Similarity
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Buat fungsi rekomendasi berbasis konten
indices = pd.Series(data.index, index=data['Movie Name']).drop_duplicates()

def content_based_recommendations(user_id=None):
    if user_id:  # Jika ID pengguna dimasukkan
        # Ambil semua film yang direview oleh pengguna
        watched_movies = data[data['Reviewer'] == user_id]['Movie Name'].tolist()
        if not watched_movies:
            return f"Pengguna dengan ID '{user_id}' tidak memiliki review dalam database."

        # Ambil indeks film yang direview
        watched_indices = indices[watched_movies].values
        # Hitung skor kesamaan untuk semua film yang direview
        sim_scores = sum([cosine_sim[i] for i in watched_indices])
    else:  # Jika tidak ada ID pengguna, gunakan film pertama sebagai referensi
        sim_scores = cosine_sim[0]

    # Urutkan skor kesamaan dan ambil 3 teratas
    sim_scores = list(enumerate(sim_scores))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    movie_indices = [i[0] for i in sim_scores[1:4]]  # Ambil rekomendasi teratas

    return data['Movie Name'].iloc[movie_indices]

# Input pengguna: ID pengguna
user_id_input = input("Masukkan ID / Username: ").strip()

if user_id_input:
    print(f"ID / Username: {user_id_input} diterima. Menyesuaikan rekomendasi...")
    recommendations = content_based_recommendations(user_id=user_id_input)
else:
    recommendations = content_based_recommendations()

print("Rekomendasi:")
print(recommendations)

# Evaluasi model: Menghitung Mean Cosine Similarity
def evaluate_model():
    """
    Fungsi untuk mengevaluasi model dengan menghitung mean cosine similarity
    dari semua rekomendasi yang diberikan.
    """
    similarities = []
    for idx in range(len(data)):
        # Cosine similarity untuk tiap item
        sim_scores = list(enumerate(cosine_sim[idx]))
        sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
        top_similarities = [x[1] for x in sim_scores[1:4]]
        similarities.extend(top_similarities)
    mean_similarity = sum(similarities) / len(similarities)
    return mean_similarity

# Hitung evaluasi model
mean_similarity = evaluate_model()
print(f"Mean Cosine Similarity untuk rekomendasi: {mean_similarity:.4f}")

Masukkan ID / Username: JPV852
ID / Username: JPV852 diterima. Menyesuaikan rekomendasi...
Rekomendasi:
13579    Extinction
11119     Peninsula
18032    Shark Bait
Name: Movie Name, dtype: object
Mean Cosine Similarity untuk rekomendasi: 0.9940


In [None]:
!pip install scikit-surprise

Collecting scikit-surprise
  Downloading scikit_surprise-1.1.4.tar.gz (154 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/154.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m154.4/154.4 kB[0m [31m11.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (pyproject.toml) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.4-cp310-cp310-linux_x86_64.whl size=2357276 sha256=0f0925374dd6ebe975d9d79bba9a569c36bda4d3adccf642fe2d8b4a706078f3
  Stored in directory: /root/.cache/pip/wheels/4b/3f/df/6acbf0a40397d9bf3ff97f582cc22fb9ce66adde75bc71fd54
Successfully built scikit-surprise
Installing collected packages: scikit-surprise
Succ

In [None]:
from surprise import SVD, Dataset, Reader
from surprise.model_selection import cross_validate
from surprise import PredictionImpossible

# Dataset untuk Surprise
reader = Reader(rating_scale=(0, 100))  # Sesuaikan skala rating
collab_data = Dataset.load_from_df(data[['Reviewer', 'Movie Name', 'Score']], reader)

# Model SVD (Singular Value Decomposition)
svd_model = SVD()
cross_validate(svd_model, collab_data, cv=5, verbose=True)

# Latih model
trainset = collab_data.build_full_trainset()
svd_model.fit(trainset)

# Fungsi Collaborative Recommendations
def collaborative_recommendations(user_id, n_recommendations=3):
    movies = data['Movie Name'].unique()
    watched_movies = data[data['Reviewer'] == user_id]['Movie Name']
    to_predict = [movie for movie in movies if movie not in watched_movies.values]
    predictions = []
    for movie in to_predict:
        try:
            predictions.append((movie, svd_model.predict(user_id, movie).est))
        except PredictionImpossible:
            continue
    predictions = sorted(predictions, key=lambda x: x[1], reverse=True)
    return [pred[0] for pred in predictions[:n_recommendations]]

# Input pengguna
user_id = input("Masukkan ID / Username: ").strip()
recommendations = collaborative_recommendations(user_id)

# Output
if not recommendations:
    print(f"Tidak ada rekomendasi untuk user '{user_id}'.")
else:
    print(f"Rekomendasi untuk '{user_id}':")
    for i, movie in enumerate(recommendations, 1):
        print(f"{i}. {movie}")


Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    19.4999 19.3889 19.0137 19.1788 18.8168 19.1796 0.2472  
MAE (testset)     12.7357 12.6741 12.5178 12.6979 12.2379 12.5727 0.1831  
Fit time          0.32    0.37    0.31    0.49    0.65    0.43    0.13    
Test time         0.02    0.03    0.02    0.04    0.04    0.03    0.01    
Masukkan ID / Username: JPV852
Rekomendasi untuk 'JPV852':
1. Portrait of a Lady on Fire
2. Top Gun: Maverick
3. Jurassic Park


In [None]:
def hybrid_with_sentiment(user_id, n_recommendations=5):
    """
    Fungsi untuk menghasilkan rekomendasi hybrid berdasarkan konten dan kolaboratif,
    kemudian diurutkan berdasarkan skor sentimen.
    """
    # Rekomendasi berbasis konten
    content_recs = content_based_recommendations()
    if isinstance(content_recs, str):  # Jika terjadi error, kosongkan rekomendasi konten
        content_recs = []

    # Rekomendasi berbasis kolaboratif
    collab_recs = collaborative_recommendations(user_id)

    # Gabungkan rekomendasi dan urutkan berdasarkan Sentiment Score
    combined_recs = list(set(content_recs).union(set(collab_recs)))
    sentiment_sorted = sorted(
        combined_recs,
        key=lambda x: data.loc[data['Movie Name'] == x, 'Sentiment Score'].mean(),
        reverse=True
    )
    return sentiment_sorted[:n_recommendations]


def evaluate_hybrid():
    """
    Fungsi untuk mengevaluasi cakupan rekomendasi hybrid.
    Coverage dihitung sebagai persentase film yang direkomendasikan
    terhadap total film dalam dataset.
    """
    total_movies = len(data['Movie Name'].unique())
    recommended_movies = set(hybrid_with_sentiment(user_id_input))
    coverage = len(recommended_movies) / total_movies * 100
    return coverage


# Input pengguna
user_id_input = input("Masukkan ID pengguna: ").strip()

# Hybrid Recommendations
hybrid_recs = hybrid_with_sentiment(user_id_input)

# Menampilkan rekomendasi hybrid
if not hybrid_recs:
    print(f"Tidak ada rekomendasi untuk user '{user_id_input}'.")
else:
    print(f"\nRekomendasi Hybrid untuk user '{user_id_input}':")
    for idx, movie in enumerate(hybrid_recs, 1):
        print(f"{idx}. {movie}")

# Evaluasi cakupan rekomendasi
coverage = evaluate_hybrid()
print(f"\nCoverage of Hybrid Recommendations: {coverage:.2f}%")


Masukkan ID pengguna: JPV852

Rekomendasi Hybrid untuk user 'JPV852':
1. Yes Day
2. The Lighthouse
3. Battle: Los Angeles
4. Guardians of the Galaxy
5. Padre no hay mas que uno 3

Coverage of Hybrid Recommendations: 0.05%
