# Import Libraries

In [1]:
# Import Libraries
import numpy as np
import pandas as pd
from sklearn.neighbors import NearestNeighbors
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
import matplotlib.pyplot as plt

In [2]:
# Load Dataset
data = pd.read_csv("/content/Tempat-Wisata-Toba-Preprocessing.csv")

# Data Cleaning
data_cleaned = data.drop(columns=['Unnamed: 0.1', 'Unnamed: 0'])
data_cleaned['Reviews'] = data_cleaned['Reviews'].fillna('No review')
data_cleaned = data_cleaned[data_cleaned['Rating'].between(1, 5)]
data_cleaned = data_cleaned.drop_duplicates()
data_cleaned['Nama_tempat_wisata'] = data_cleaned['Nama_tempat_wisata'].str.title()
data_cleaned['Reviews'] = data_cleaned['Reviews'].str.lower()
data_cleaned['Category'] = data_cleaned['Category'].str.title()
data_cleaned['ReviewerId'] = data_cleaned['ReviewerId'].astype(str)

In [3]:
# Group Data by Reviewer and Place
data_grouped = data_cleaned.groupby(['Nama_tempat_wisata', 'ReviewerId'], as_index=False)['Rating'].mean()

# Pivot to Create User-Item Matrix
df = data_grouped.pivot(index='Nama_tempat_wisata', columns='ReviewerId', values='Rating').fillna(0)

In [4]:
# Split Data into Train and Test Sets
train_data, test_data = train_test_split(data_grouped, test_size=0.3, random_state=42)

# Create Train and Test Pivot Tables
train_pivot = train_data.pivot(index='Nama_tempat_wisata', columns='ReviewerId', values='Rating').fillna(0)
test_pivot = test_data.pivot(index='Nama_tempat_wisata', columns='ReviewerId', values='Rating').fillna(0)

In [5]:
# KNN Model
knn_model = NearestNeighbors(metric='cosine', algorithm='brute')
knn_model.fit(train_pivot)

In [6]:
train_pivot.head()

ReviewerId,1.00001e+20,1.00003e+20,1.00004e+20,1.00005e+20,1.00007e+20,1.00009e+20,1.00011e+20,1.00012e+20,1.00014e+20,1.00015e+20,...,1.18439e+20,1.1843e+20,1.18441e+20,1.18442e+20,1.18443e+20,1.18444e+20,1.18445e+20,1.18446e+20,1.1844e+20,1.18e+20
Nama_tempat_wisata,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Aek Rangat Pangururan,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0
Aek Sipangolu Bakkara,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Air Terjun Efrata,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0
Air Terjun Janji,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Air Terjun Pandumaan,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [10]:
def recommend_items_knn(df, reviewer_id, n_neighbors=5, n_recommendations=5):
    """
    Rekomendasikan items untuk pengguna berdasarkan KNN
    """
    if reviewer_id not in df.columns:
        raise ValueError(f"ReviewerId {reviewer_id} tidak ditemukan dalam data.")

    # Items yang sudah dirating oleh user
    user_ratings = df[reviewer_id]
    rated_items = user_ratings[user_ratings > 0].index.tolist()

    # Items yang belum dirating oleh user
    unrated_items = df.index.difference(rated_items)

    recommendations = []
    for item in unrated_items:
        # Selaraskan kolom dengan train_pivot dan pastikan fitur valid
        item_vector = pd.DataFrame([df.loc[item]], columns=train_pivot.columns)

        # Cari tetangga terdekat
        distances, indices = knn_model.kneighbors(item_vector, n_neighbors=n_neighbors)

        # Ambil item mirip dan hitung skor prediksi
        similar_items = train_pivot.index[indices.flatten()]
        similar_scores = distances.flatten()

        weighted_sum = 0
        similarity_sum = 0
        for sim_item, score in zip(similar_items, similar_scores):
            if sim_item in rated_items:
                rating = df.loc[sim_item, reviewer_id]
                weighted_sum += rating * (1 - score)
                similarity_sum += (1 - score)

        predicted_rating = weighted_sum / similarity_sum if similarity_sum > 0 else 0
        recommendations.append((item, predicted_rating))

    # Urutkan berdasarkan rating prediksi
    recommendations = sorted(recommendations, key=lambda x: x[1], reverse=True)

    return recommendations[:n_recommendations]

# Selaraskan kolom antara train_pivot dan df
df = df.reindex(columns=train_pivot.columns, fill_value=0)

# Tes rekomendasi untuk Reviewer tertentu
reviewer_id = '1.00003e+20'  # Ganti dengan ID Reviewer yang valid
recommended_items = recommend_items_knn(df, reviewer_id, n_neighbors=5, n_recommendations=5)

# Tampilkan hasil
print("Rekomendasi untuk Reviewer:", reviewer_id)
for item, rating in recommended_items:
    print(f"{item}, Rating: {rating:.2f}")

Rekomendasi untuk Reviewer: 1.00003e+20
Item: Air Terjun Taman Eden 100, Rating: 5.00
Item: Taman Bunga Sapo Juma, Rating: 5.00
Item: Air Terjun Efrata, Rating: 4.00
Item: Bukit Beta Tuk-Tuk, Rating: 4.00
Item: Desa Wisata Sigapiton, Rating: 4.00


In [None]:
def evaluate_model(train_pivot, test_pivot, knn_model, n_neighbors=5):
    """
    Evaluasi model KNN menggunakan MSE, RMSE, dan MAE.
    """
    actual_ratings = []
    predicted_ratings = []

    for item in test_pivot.index:
        for reviewer_id in test_pivot.columns:
            # Ambil rating aktual dari test_pivot
            actual_rating = test_pivot.loc[item, reviewer_id]
            if actual_rating > 0:  # Hanya evaluasi pada item yang dirated
                actual_ratings.append(actual_rating)

                # Buat prediksi
                try:
                    item_vector = pd.DataFrame([train_pivot.loc[item]], columns=train_pivot.columns)
                    distances, indices = knn_model.kneighbors(item_vector, n_neighbors=n_neighbors)

                    # Hitung prediksi rating
                    similar_items = train_pivot.index[indices.flatten()]
                    similar_scores = distances.flatten()

                    weighted_sum = 0
                    similarity_sum = 0
                    for sim_item, score in zip(similar_items, similar_scores):
                        if reviewer_id in train_pivot.columns and sim_item in train_pivot.index:
                            rating = train_pivot.loc[sim_item, reviewer_id]
                            weighted_sum += rating * (1 - score)
                            similarity_sum += (1 - score)

                    predicted_rating = weighted_sum / similarity_sum if similarity_sum > 0 else 0
                except:
                    predicted_rating = 0  # Default jika tidak ada prediksi yang memungkinkan

                predicted_ratings.append(predicted_rating)

    # Hitung metrik evaluasi
    mse = mean_squared_error(actual_ratings, predicted_ratings)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(actual_ratings, predicted_ratings)

    return mse, rmse, mae

# Evaluasi model
mse, rmse, mae = evaluate_model(train_pivot, test_pivot, knn_model, n_neighbors=5)

print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
print(f"Mean Absolute Error (MAE): {mae:.2f}")