In [1]:
import pandas as pd
import numpy as np
# Libraries for Text Preprocessing
import re
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
# Libraries for Visualization
import wordcloud
import matplotlib.pyplot as plt
import plotly.express as px
# Libraries for Model
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Embedding, LSTM, SpatialDropout1D
from sklearn.model_selection import train_test_split
from sklearn import metrics
import seaborn as sns
import neattext.functions as nfx
from keras.models import load_model

tokenizer=Tokenizer()

In [2]:
data_set = pd.read_excel('./data_evaluation/validation_2_aspect.xlsx')
data_set.tail()

Unnamed: 0,no,aspek,text,makanan,fasilitas,pelayanan,kamar,harga,lokasi
76,77,,Saya sangat puas dengan pelayanan yang diberik...,0,0,3,0,5,0
77,78,,Pelayanan yang prima dengan harga yang terjang...,0,0,3,0,5,0
78,79,Pelayanan - Lokasi,"Pelayanan di hotel ini sangat baik, dan lokasi...",0,0,3,0,0,6
79,80,,"Staf hotel sangat ramah dan siap membantu, ser...",0,0,3,0,0,6
80,81,,Kami sangat puas dengan pelayanan yang diberik...,0,0,3,0,0,6


In [3]:
def cleaning(text):
    text = re.sub(r'\n',' ',text) # Hapus \n (enter)
    text = nfx.remove_hashtags(text) # Hapus hashtags
    text = nfx.remove_numbers(text) # Hapus number
    text = text.strip() # Hapus Whitespace
    text = re.sub('[^0-9a-zA-Z]+', ' ', text) # Hapus karakter selain alfabet dan angka
    return text

def casefolding(text):
    return text.lower()

data_set['content_cleaned'] = data_set['text'].apply(casefolding)
data_set['content_cleaned'] = data_set['text'].apply(cleaning)

In [4]:
tokenizer.fit_on_texts(data_set['content_cleaned'])
X_validasi=tokenizer.texts_to_sequences(data_set['content_cleaned'])
X_validasi=pad_sequences(X_validasi, maxlen=31, padding='post')

print(X_validasi)

[[15  1 41 ...  0  0  0]
 [ 8  2 24 ...  0  0  0]
 [15  5  6 ... 36 64 33]
 ...
 [14  5  6 ...  0  0  0]
 [82  6  2 ...  0  0  0]
 [22  2 24 ...  0  0  0]]


In [15]:
model_filenames = ['aspek_fasilitas.h5', 'aspek_makanan.h5']  # List of models
models = []
for filename in model_filenames:
    model = load_model(filename)
    models.append(model)


# Lakukan prediksi pada seluruh data test menggunakan setiap model
predictions = np.zeros((X_validasi.shape[0], len(models)))

for i, model in enumerate(models):
    preds = model.predict(X_validasi)
#     print(preds)
    preds_binary = np.where(preds > 0.5, 1, 0)
    for j, pred in enumerate(preds_binary):
        predictions[j][i] = pred[0]
        
prediction_aspect_result = predictions.astype(int)

In [16]:
#makanan=1,fasilitas=2,pelayanan=3,kamar=4,harga=5,lokasi=6 
new_df = data_set[['fasilitas', 'makanan']]
actual_aspect = new_df.values.tolist()
actual_aspect = data = [[1 if val != 0.0 else 0 for val in row] for row in actual_aspect]

y_pred = np.array(prediction_aspect_result)
y_true = np.array(actual_aspect)

# Menghitung Top-1 Accuracy
y_pred_top1 = np.argmax(y_pred, axis=1)
top1_accuracy = np.mean(np.equal(y_pred_top1, np.argmax(y_true, axis=1)))

print("Top-1 Accuracy:", top1_accuracy)

Top-1 Accuracy: 0.5308641975308642


In [17]:
# count top-1 accuracy

result = []  # Variabel untuk menyimpan hasil akhir

for i in range(len(prediction_aspect_result)):
    if prediction_aspect_result[i][1] == actual_aspect[i][1]:
        result.append(1)
    else:
        result.append(0)

print(result)

# Step 1: Hitung berapa kali nilai 1 muncul pada indeks pertama
count_ones = result.count(1)

# Step 2: Bagi hasil dari langkah pertama dengan total jumlah data
total_data = len(result)
top1_accuracy = count_ones / total_data

# Step 3: Kalikan dengan 100 untuk mendapatkan persentase top-1 accuracy
top1_accuracy_percentage = top1_accuracy * 100

print("Top-1 Accuracy: {:.2f}%".format(top1_accuracy_percentage))



[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Top-1 Accuracy: 34.57%
