In [12]:
import pandas as pd
import numpy as np
# Libraries for Text Preprocessing
import re
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
# Libraries for Visualization
import wordcloud
import matplotlib.pyplot as plt
import plotly.express as px
# Libraries for Model
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Embedding, LSTM, SpatialDropout1D
from sklearn.model_selection import train_test_split
from sklearn import metrics
import tensorflow as tf

data_set = pd.read_excel('aspek makanan.xlsx')
def remove_underscored(text):
    return text.translate(str.maketrans('_', ' '))

data_set["clean"] = data_set["clean text"].apply(lambda text: remove_underscored(text))
data_set.tail()

Unnamed: 0,clean text,aspect,sentiment,clean
1187,kondisi menyedihkan,lainnya,negative,kondisi menyedihkan
1188,anak kelaperan tunggu makanan datang,lainnya,negative,anak kelaperan tunggu makanan datang
1189,tidak_sempat menikmati sarapan,lainnya,negative,tidak sempat menikmati sarapan
1190,kecewa helm motor dicuri,lainnya,negative,kecewa helm motor dicuri
1191,tidak_ada kompensasi ganti rugi pihak hotel,lainnya,negative,tidak ada kompensasi ganti rugi pihak hotel


In [13]:
tokenizer=Tokenizer(num_words=1500)
tokenizer.fit_on_texts(data_set["clean"].values)
X=tokenizer.texts_to_sequences(data_set["clean"].values)
X=pad_sequences(X, maxlen=31, padding='post')
print(data_set["clean"])
print(X)
print(X.shape)
len(X)

0                            resto baik ada rooftop
1              ada kafe atas pemandangan kota jogja
2                              resto rooftop nyaman
3                          rooftop resto nuansa oke
4                               rooftop kafe nyaman
                           ...                     
1187                            kondisi menyedihkan
1188           anak kelaperan tunggu makanan datang
1189                 tidak sempat menikmati sarapan
1190                       kecewa helm motor dicuri
1191    tidak ada kompensasi ganti rugi pihak hotel
Name: clean, Length: 1192, dtype: object
[[ 12  18   5 ...   0   0   0]
 [  5 338 145 ...   0   0   0]
 [ 12 127  15 ...   0   0   0]
 ...
 [  3 519 168 ...   0   0   0]
 [ 52 513 335 ...   0   0   0]
 [  3   5 517 ...   0   0   0]]
(1192, 31)


1192

In [14]:
y=pd.get_dummies(data_set[["aspect"]])

# switch values for data aspect makanan
new_column_order = ['aspect_makanan', 'aspect_lainnya']
y = y[new_column_order]

# Split the data into training and testing sets
x_train,x_test,y_train,y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print("X Train: ",x_train.shape)
print("Y Train: ",y_train.shape)
print("X Test: ",x_test.shape)
print("Y Test: ",y_test.shape)
print(y)

X Train:  (953, 31)
Y Train:  (953, 2)
X Test:  (239, 31)
Y Test:  (239, 2)
      aspect_makanan  aspect_lainnya
0                  1               0
1                  1               0
2                  1               0
3                  1               0
4                  1               0
...              ...             ...
1187               0               1
1188               0               1
1189               0               1
1190               0               1
1191               0               1

[1192 rows x 2 columns]


In [31]:
model = Sequential()
model.add(Embedding(1500, 32,input_length = 31))
model.add(SpatialDropout1D(0.8))
model.add(LSTM(64, dropout=0.4, recurrent_dropout=0.2))
model.add(Dense(2,activation='softmax'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
history = model.fit(
    x_train,
    y_train,
    epochs=10,
    batch_size=32,
    validation_data=(x_test, y_test)
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [39]:
review_test=["makanan dan minuman enak semua deh"]
review_tok=tokenizer.texts_to_sequences(review_test)
review_pad=pad_sequences(review_tok,maxlen=31,padding='post')
prediction=model.predict(review_pad).flatten()

# Convert probabilities to binary class labels
threshold = 0.5
prediction = tf.nn.softmax(prediction)
prediction = tf.where(prediction < threshold, 0, 1)

print("prediction: ", prediction.numpy())
# print("predicted_labels: ",predicted_labels)

prediction:  [1 0]


In [34]:
model.save('aspek_makanan_4.h5')

In [38]:
test_data = ['menu makanan bervariasi di restoran']
test_data_tok=tokenizer.texts_to_sequences(test_data)
test_data_pad=pad_sequences(test_data_tok,maxlen=31,padding='post')

models = ['aspek_makanan_4.h5', 'aspek_harga_3.h5']  # List of model filenames
predictions = []
threshold = 0.5

for model_filename in models:
    model_all = tf.keras.models.load_model(model_filename)
    model_predictions = model_all.predict(test_data_pad)
    predictions.append(model_predictions)

# Display the predictions
for i, model_filename in enumerate(models):
    print(f"Predictions from {model_filename}:")
    print(predictions[i])
    print()

Predictions from aspek_makanan_4.h5:
[[0.96988285 0.03011714]]

Predictions from aspek_harga_3.h5:
[[9.9962735e-01 3.7268706e-04]]

