In [1]:
import pandas as pd
import numpy as np
# Libraries for Text Preprocessing
import re
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
# Libraries for Model
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Embedding, LSTM, SpatialDropout1D
from sklearn.model_selection import train_test_split
from sklearn import metrics
import seaborn as sns
import neattext.functions as nfx

In [2]:
data_set = pd.read_excel('./data_evaluation/validation_mini (5).xlsx')
data_set.head(20)

Unnamed: 0,text,makanan,fasilitas,pelayanan,kamar,harga,lokasi
0,Hotel menawarkan harga yang terjangkau tanpa m...,0,2,3,0,5,0
1,Saya sangat terkesan dengan hotel ini karena h...,0,2,3,0,5,0
2,aneh sekali,0,0,0,0,0,0
3,rumah besar sepi sekaligus menyeramkan,0,0,0,0,0,0
4,"Kamar yg bersih, fasilitas oke lokasi dekat dg...",0,2,0,4,0,6
5,"Staffnya ramah dan sigap, untuk menu makanan j...",1,0,3,4,0,0
6,"Pelayanan oke banget, staff nya ramah, parkira...",1,2,3,0,0,0
7,"Parkiran terbatas sepertinya, pelayanan ramah,...",0,2,3,4,0,0
8,"Menyenangkan nginep the 101 yogyakarta, kamarn...",0,0,3,4,0,6
9,"Pelayanan sgt bagus, hotel bersih dan menu sar...",1,2,3,0,0,0


In [3]:
tokenizer=Tokenizer()

def cleaning(text):
    text = re.sub(r'\n',' ',text) # Hapus \n (enter)
    text = nfx.remove_hashtags(text) # Hapus hashtags
    text = nfx.remove_numbers(text) # Hapus number
    text = text.strip() # Hapus Whitespace
    text = re.sub('[^0-9a-zA-Z]+', ' ', text) # Hapus karakter selain alfabet dan angka
    return text

def casefolding(text):
    return text.lower()

data_set['content_cleaned'] = data_set['text'].apply(casefolding)
data_set['content_cleaned'] = data_set['text'].apply(cleaning)

tokenizer.fit_on_texts(data_set['content_cleaned'])
X_validasi=tokenizer.texts_to_sequences(data_set['content_cleaned'])
X_validasi=pad_sequences(X_validasi, maxlen=31, padding='post')

print(X_validasi)

[[  7  29   4   5  30  52  53   2   1   8  22  15   3   1  54   1   8   5
   31  15  55   0   0   0   0   0   0   0   0   0   0]
 [ 32  15  56   9   7  57  58   4   5  30  59  60  29   2   5  33  61  22
   15  62   1  63  23  64  65  66   5  34   0   0   0]
 [ 67  68   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0]
 [ 69  35  70  71  72   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0]
 [ 11  16  12   8  24  25  17  36  37  18  38   2  16   3   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0]
 [ 73   3   1  74  19  26  18  39  23  13   9  75  76  10  12   1  34   0
    0   0   0   0   0   0   0   0   0   0   0   0   0]
 [  2  24  77  78  79   3  40  80  81  20  41   1  27   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0]
 [ 40  42  82   2   3  10  24  83   6  43  35   0   0   0   0   0   0   0
    0   0   0   0   0  

In [52]:
from keras.models import load_model

# get actual aspect
new_df = data_set[['harga', 'fasilitas', 'pelayanan']]
actual_aspect = new_df.values.tolist()
actual_aspect = data = [[1 if val != 0.0 else 0 for val in row] for row in actual_aspect]

# get predicted aspect
model_filenames = ['aspek_harga.h5', 'aspek_fasilitas.h5', 'aspek_pelayanan.h5']  # List of models
models = []
for filename in model_filenames:
    model = load_model(filename)
    models.append(model)


# Lakukan prediksi pada seluruh data test menggunakan setiap model
predictions = np.zeros((X_validasi.shape[0], len(models)))

for i, model in enumerate(models):
    preds = model.predict(X_validasi)
    preds_binary = np.where(preds > 0.5, 1, 0)
    for j, pred in enumerate(preds_binary):
        predictions[j][i] = pred[0]
        
prediction_aspect_result = predictions.astype(int)

print(prediction_aspect_result)

[[1 0 1]
 [1 0 0]
 [0 0 0]
 [0 1 0]
 [1 1 1]
 [1 1 1]
 [1 1 0]
 [1 1 1]
 [1 1 0]
 [1 1 1]
 [1 1 1]
 [1 1 0]
 [1 0 0]
 [1 1 1]
 [1 0 0]
 [0 1 0]
 [1 1 1]
 [1 1 0]
 [1 1 1]]


In [53]:
# count top-1 accuracy

result = []  # Variabel untuk menyimpan hasil akhir

for i in range(len(prediction_aspect_result)):
    if prediction_aspect_result[i][1] == actual_aspect[i][1]:
        result.append(1)
    else:
        result.append(0)

print(result)

# Step 1: Hitung berapa kali nilai 1 muncul pada indeks pertama
count_ones = result.count(1)

# Step 2: Bagi hasil dari langkah pertama dengan total jumlah data
total_data = len(result)
top1_accuracy = count_ones / total_data

# Step 3: Kalikan dengan 100 untuk mendapatkan persentase top-1 accuracy
top1_accuracy_percentage = top1_accuracy * 100

print("Top-1 Accuracy: {:.2f}%".format(top1_accuracy_percentage))



[0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1]
Top-1 Accuracy: 52.63%


In [55]:
list_a = prediction_aspect_result
list_b = actual_aspect

result = []

for i in range(len(list_a)):
    count_same_values = sum(a == b for a, b in zip(list_a[i], list_b[i]))
    if count_same_values >=2:
        result.append(1)
    else:
        result.append(0)

print(result)

# Step 1: Hitung berapa kali nilai 1 muncul pada indeks pertama
count_ones = result.count(1)

# Step 2: Bagi hasil dari langkah pertama dengan total jumlah data
total_data = len(result)
top1_accuracy = count_ones / total_data

# Step 3: Kalikan dengan 100 untuk mendapatkan persentase top-1 accuracy
top1_accuracy_percentage = top1_accuracy * 100

print("Top-2 Accuracy: {:.2f}%".format(top1_accuracy_percentage))

[1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1]
Top-2 Accuracy: 57.89%
