In [1]:
import pandas as pd
import numpy as np
# Libraries for Text Preprocessing
import re
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from sklearn import metrics
import seaborn as sns
import neattext.functions as nfx
from keras.models import load_model

tokenizer=Tokenizer()

In [2]:
data_set = pd.read_excel('validation_2_aspect.xlsx')
data_set.tail()

Unnamed: 0,no,aspek,text,makanan,fasilitas,pelayanan,kamar,harga,lokasi
76,77.0,,Saya sangat puas dengan pelayanan yang diberik...,0.0,0.0,3.0,0.0,5.0,0.0
77,78.0,,Pelayanan yang prima dengan harga yang terjang...,0.0,0.0,3.0,0.0,5.0,0.0
78,79.0,Pelayanan - Lokasi,"Pelayanan di hotel ini sangat baik, dan lokasi...",0.0,0.0,3.0,0.0,0.0,6.0
79,80.0,,"Staf hotel sangat ramah dan siap membantu, ser...",0.0,0.0,3.0,0.0,0.0,6.0
80,81.0,,Kami sangat puas dengan pelayanan yang diberik...,0.0,0.0,3.0,0.0,0.0,6.0


In [3]:
def cleaning(text):
    text = re.sub(r'\n',' ',text) # Hapus \n (enter)
    text = nfx.remove_hashtags(text) # Hapus hashtags
    text = nfx.remove_numbers(text) # Hapus number
    text = text.strip() # Hapus Whitespace
    text = re.sub('[^0-9a-zA-Z]+', ' ', text) # Hapus karakter selain alfabet dan angka
    return text

def casefolding(text):
    return text.lower()

data_set['content_cleaned'] = data_set['text'].apply(casefolding)
data_set['content_cleaned'] = data_set['text'].apply(cleaning)

In [4]:
tokenizer.fit_on_texts(data_set['content_cleaned'])
X_validasi=tokenizer.texts_to_sequences(data_set['content_cleaned'])
X_validasi=pad_sequences(X_validasi, maxlen=31, padding='post')

print(X_validasi)

[[15  1 41 ...  0  0  0]
 [ 8  2 24 ...  0  0  0]
 [15  5  6 ... 36 64 33]
 ...
 [14  5  6 ...  0  0  0]
 [82  6  2 ...  0  0  0]
 [22  2 24 ...  0  0  0]]


In [5]:
model_filenames = ['aspek_fasilitas.h5', 'aspek_kamar.h5']  # List of models
models = []
for filename in model_filenames:
    model = load_model(filename)
    models.append(model)


# Lakukan prediksi pada seluruh data test menggunakan setiap model
predictions = np.zeros((X_validasi.shape[0], len(models)))

for i, model in enumerate(models):
    preds = model.predict(X_validasi)
#     print(preds)
    preds_binary = np.where(preds > 0.5, 1, 0)
    for j, pred in enumerate(preds_binary):
        predictions[j][i] = pred[0]
        
prediction_aspect_result = predictions.astype(int)
# print(prediction_aspect_result)

In [6]:
#makanan=1,fasilitas=2,pelayanan=3,kamar=4,harga=5,lokasi=6 
new_df = data_set[['fasilitas', 'kamar']]
actual_aspect = new_df.values.tolist()
actual_aspect = data = [[1 if val != 0.0 else 0 for val in row] for row in actual_aspect]

y_pred = np.array(prediction_aspect_result)
y_true = np.array(actual_aspect)

# Menghitung Top-1 Accuracy
y_pred_top1 = np.argmax(y_pred, axis=1)
top1_accuracy = np.mean(np.equal(y_pred_top1, np.argmax(y_true, axis=1)))

print("Top-1 Accuracy:", top1_accuracy)

Top-1 Accuracy: 0.7901234567901234


In [7]:
data = [[1, 1], [2, 2]]
new_prediction_aspect_result = pd.DataFrame(prediction_aspect_result)
print(new_prediction_aspect_result)

    0  1
0   0  0
1   1  1
2   0  0
3   0  0
4   0  1
.. .. ..
76  1  0
77  1  1
78  1  1
79  1  1
80  1  1

[81 rows x 2 columns]


In [10]:
prediction_result_convert = pd.DataFrame({
    'original_text':data_set['text'],
    'text_cleaned':data_set['content_cleaned'],
    'actual_1': data_set['lokasi'],
    'actual_2': data_set['kamar'],
    'predicted_1': new_prediction_aspect_result[0],
    'predicted_2': new_prediction_aspect_result[1]
})
prediction_result_convert.tail(20)

Unnamed: 0,original_text,text_cleaned,actual_1,actual_2,predicted_1,predicted_2
61,Kami sangat terkesan dengan pelayanan yang ram...,Kami sangat terkesan dengan pelayanan yang ram...,0.0,0.0,0,0
62,Pelayanan yang sangat baik dan fasilitas yang ...,Pelayanan yang sangat baik dan fasilitas yang ...,0.0,0.0,0,1
63,"Harga kamar di hotel ini sangat terjangkau, me...",Harga kamar di hotel ini sangat terjangkau mem...,0.0,4.0,0,0
64,Saya sangat terkesan dengan kualitas kamar yan...,Saya sangat terkesan dengan kualitas kamar yan...,0.0,4.0,1,1
65,Harga kamar yang ditawarkan sangat sebanding d...,Harga kamar yang ditawarkan sangat sebanding d...,0.0,4.0,0,0
66,Lokasi hotel ini sangat strategis dan kamar ya...,Lokasi hotel ini sangat strategis dan kamar ya...,6.0,4.0,1,1
67,Lokasi hotel ini sangat dekat dengan transport...,Lokasi hotel ini sangat dekat dengan transport...,6.0,4.0,1,1
68,Lokasi hotel yang terletak di pusat kota sanga...,Lokasi hotel yang terletak di pusat kota sanga...,6.0,4.0,1,1
69,Pelayanan di hotel ini sangat baik dan kamar y...,Pelayanan di hotel ini sangat baik dan kamar y...,0.0,4.0,1,1
70,"Staf hotel sangat ramah dan membantu, membuat ...",Staf hotel sangat ramah dan membantu membuat p...,0.0,4.0,1,1


In [11]:
prediction_result_convert.to_csv('result_2_aspect.csv', index=False)