In [1]:
import pandas as pd
import numpy as np
# Libraries for Text Preprocessing
import re
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences

import neattext.functions as nfx
from keras.models import load_model

from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
data_validation = pd.read_excel('./data_evaluation/validation_sentimen.xlsx')

def cleaning(text):
    text = re.sub(r'\n',' ',text) # Hapus \n (enter)
    # text = nfx.remove_hashtags(text) # Hapus hashtags
    text = nfx.remove_numbers(text) # Hapus number
    # text = text.strip() # Hapus Whitespace
    text = re.sub('[^0-9a-zA-Z]+', ' ', text) # Hapus karakter selain alfabet dan angka
    return text

def casefolding(text):
    return text.lower()

data_validation['content_cleaned'] = data_validation['Text'].apply(cleaning).apply(casefolding)

data_validation.head()

Unnamed: 0,Text,makanan,fasilitas,pelayanan,kamar,harga,lokasi,Sentimen,content_cleaned
0,"Kamar yg bersih, fasilitas oke lokasi dekat dg...",0,1,1,1,0,0,1,kamar yg bersih fasilitas oke lokasi dekat dg ...
1,"Staffnya ramah dan sigap, untuk menu makanan j...",1,0,1,1,0,0,1,staffnya ramah dan sigap untuk menu makanan ju...
2,Super duper ramah all staff.nya. Hotelnya bers...,0,1,1,0,0,0,1,super duper ramah all staff nya hotelnya bersi...
3,Sarapan sahurnya juga enak banget rasanya. Lok...,1,0,0,0,0,1,1,sarapan sahurnya juga enak banget rasanya loka...
4,Lokasi hotel dekat dengan jalan raya dan ramai...,0,0,0,1,0,1,1,lokasi hotel dekat dengan jalan raya dan ramai...


In [3]:
tokenizer=Tokenizer()
tokenizer.fit_on_texts(data_validation['content_cleaned'])
X=tokenizer.texts_to_sequences(data_validation['content_cleaned'])
X=pad_sequences(X, maxlen=31, padding='post')

In [18]:
model_filenames = ['./aspect_sentiment_models/harga_aspek_dan_sentiment.h5', './aspect_sentiment_models/makanan_aspek_dan_sentiment.h5']  # List of models
prediction_sentiments = []
threshold = 0.5

models = []
for filename in model_filenames:
    model = load_model(filename)
    models.append(model)

predictions = np.zeros((X.shape[0], len(models)))

for i, model in enumerate(models):
    preds = model.predict(X)
    print(preds)
    preds_binary = np.where(preds > 0.5, 1, 0)
    for j, pred in enumerate(preds_binary):
        predictions[j][i] = pred[0]
        
prediction_aspect_result = predictions.astype('int')

len(prediction_aspect_result)

[[0.9624544  0.5557864 ]
 [0.98488796 0.6153778 ]
 [0.98347604 0.6059614 ]
 [0.9834391  0.6061817 ]
 [0.9846739  0.6142961 ]
 [0.98429745 0.6103061 ]
 [0.984555   0.61265004]
 [0.9844947  0.61164725]
 [0.9847679  0.61396474]
 [0.984684   0.6134709 ]
 [0.9845885  0.6130074 ]
 [0.9845377  0.6116848 ]
 [0.9849175  0.61401504]
 [0.9846618  0.61338437]
 [0.9847976  0.61361474]
 [0.9849217  0.6143162 ]
 [0.9847894  0.61437905]
 [0.98474425 0.61296797]
 [0.9844886  0.6116525 ]
 [0.98491216 0.6146327 ]
 [0.11323678 0.5327538 ]
 [0.98456883 0.6120562 ]
 [0.98487747 0.6143762 ]
 [0.9842218  0.61073923]
 [0.9829118  0.6030688 ]
 [0.9774457  0.5851097 ]
 [0.9844308  0.6110754 ]
 [0.9849448  0.61418223]
 [0.9848379  0.61373544]
 [0.9837384  0.6075618 ]
 [0.98452455 0.61159843]
 [0.9848683  0.6138047 ]
 [0.98488355 0.613826  ]
 [0.9833573  0.6051613 ]
 [0.09451821 0.53669304]
 [0.9846266  0.6123421 ]
 [0.98310244 0.6044698 ]
 [0.98451066 0.6113884 ]
 [0.98458457 0.61193645]
 [0.983792   0.6076999 ]


68

In [5]:
matrix = np.column_stack((prediction_aspect_result[0], prediction_aspect_result[1], prediction_aspect_result[2]))

print(matrix)

[[1 1 1]
 [1 1 1]]
