In [1]:
import pandas as pd
import numpy as np
# Libraries for Text Preprocessing
import re
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
# Libraries for Visualization
import wordcloud
import matplotlib.pyplot as plt
import plotly.express as px
# Libraries for Model
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Embedding, LSTM, SpatialDropout1D
from sklearn.model_selection import train_test_split
from sklearn import metrics
import tensorflow as tf

data_set = pd.read_excel('aspek lokasi.xlsx')
def remove_underscored(text):
    return text.translate(str.maketrans('_', ' '))

data_set["clean"] = data_set["clean text"].apply(lambda text: remove_underscored(text))
data_set.head()

Unnamed: 0,clean text,aspect,sentiment,clean
0,pemandangan nyaman,lokasi,positive,pemandangan nyaman
1,pemandangan kota baik,lokasi,positive,pemandangan kota baik
2,hotel nyaman sunyi,lokasi,positive,hotel nyaman sunyi
3,nyaman suasana tenang,lokasi,positive,nyaman suasana tenang
4,tempat sejuk,lokasi,positive,tempat sejuk


In [2]:
tokenizer=Tokenizer(num_words=1500)
tokenizer.fit_on_texts(data_set["clean"].values)
X=tokenizer.texts_to_sequences(data_set["clean"].values)
X=pad_sequences(X, maxlen=31, padding='post')
print(data_set["clean"])
print(X)
print(X.shape)
len(X)

0                           pemandangan nyaman
1                        pemandangan kota baik
2                           hotel nyaman sunyi
3                        nyaman suasana tenang
4                                 tempat sejuk
                         ...                  
1181           cukup sekali menginap hotel ini
1182                        tidak akan kembali
1183    tampilan gambar tidak sesuai kenyataan
1184             pikir seribu kali nginep lagi
1185                     tidak berharap banyak
Name: clean, Length: 1186, dtype: object
[[  27   12    0 ...    0    0    0]
 [  27   10   37 ...    0    0    0]
 [   2   12  173 ...    0    0    0]
 ...
 [1092 1093    5 ...    0    0    0]
 [1095 1096  322 ...    0    0    0]
 [   5 1097   19 ...    0    0    0]]
(1186, 31)


1186

In [3]:
y=pd.get_dummies(data_set[["aspect"]])

# switch values for data aspect makanan
new_column_order = ['aspect_lokasi', 'aspect_lainnya']
y = y[new_column_order]

x_train,x_test,y_train,y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(y)

      aspect_lokasi  aspect_lainnya
0                 1               0
1                 1               0
2                 1               0
3                 1               0
4                 1               0
...             ...             ...
1181              0               1
1182              0               1
1183              0               1
1184              0               1
1185              0               1

[1186 rows x 2 columns]


In [4]:
model = Sequential()
model.add(Embedding(1500, 32,input_length = 31))
model.add(SpatialDropout1D(0.8))
model.add(LSTM(64, dropout=0.4, recurrent_dropout=0.2))
model.add(Dense(2,activation='softmax'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc'])
history = model.fit(
    x_train,
    y_train,
    epochs=10,
    batch_size=32,
    validation_data=(x_test, y_test)
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [5]:
review_test=["ngaco"]
review_tok=tokenizer.texts_to_sequences(review_test)
review_pad=pad_sequences(review_tok,maxlen=31,padding='post')
prediction=model.predict(review_pad).flatten()

# Convert probabilities to binary class labels
threshold = 0.5
prediction = tf.nn.softmax(prediction)
prediction = tf.where(prediction < threshold, 0, 1)

print("prediction: ", prediction.numpy())
# print("predicted_labels: ",predicted_labels)

prediction:  [0 1]


In [6]:
model.save('aspek_lokasi_4.h5')