In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from konlpy.tag import Mecab
from tensorflow.keras.preprocessing.text import Tokenizer, text_to_word_sequence
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Flatten, Embedding
from tensorflow.keras.utils import to_categorical

In [None]:
test_data = pd.read_csv("../05machine_learning/data/bank_app_reviews_test.csv")
test_data.head(2)

In [None]:
import re

def clean_text(text):
    cleaned = re.sub(r'[^가-힣a-zA-Z0-9\s]','', text) #한글, 영문, 숫자
    cleaned = re.sub(r'\s+', ' ', cleaned) # 연속된 공백을 하나의 공백
    return cleaned.strip()

In [None]:
test_data['사용자리뷰'] = test_data['사용자리뷰'].apply(clean_text)
test_data['사용자리뷰']

In [None]:
test_data['is_good'] = test_data['평점'].apply(lambda x: 1 if x >=4 else 0)
test_data['is_good']

In [None]:
mecab = Mecab()

In [None]:
tokenized_docs = test_data['사용자리뷰'].apply(mecab.morphs)

In [None]:
tokenized_docs[0]

# train에서 사용했던 tokenizer를 불러와서 one hot encoding

In [None]:
import joblib

In [None]:
token = joblib.load("./model/bank_app_tokeizer.joblib")

In [None]:
x = token.texts_to_sequences(tokenized_docs)
print(x[0])

# train에서 사용했던 패딩 길이(모델에 넣을 컬럼 수)

In [None]:
max_length = joblib.load("./model/bank_app_max_length.joblib")
print(max_length)

In [None]:
X_padded = pad_sequences(x, maxlen=max_length, padding='post')
print(X_padded[1])

In [None]:
len(X_padded[1])

In [None]:
y = test_data['is_good']
y

# 모델 불러와서 예측하고 결과 비교하기

In [None]:
birnn_best = load_model("./model/bank_app_review_birnn.keras")
cnn_lstm_best = load_model("./model/bank_app_review_lstm_cnn.keras")
attn_best = load_model("./model/bank_app_review_attn_model.keras")

In [None]:
birnn_pred = birnn_best.predict(X_padded)
cnn_latm_pred = cnn_lstm_best.predict(X_padded)
attn_pred = attn_best.predict(X_padded)

In [None]:
birnn_pred = pd.DataFrame(birnn_pred)
cnn_lstm_pred = pd.DataFrame(cnn_latm_pred)
attn_pred = pd.DataFrame(attn_pred)

In [None]:
y

In [None]:
y = pd.DataFrame(y)

In [None]:
birnn_result = y.join(birnn_pred)
cnn_lstm_result = y.join(cnn_lstm_pred)
attn_pred_result = y.join(attn_pred)

In [None]:
birnn_result.loc[:, 0] = birnn_result.loc[:, 0].apply(lambda x: 1 if x > 0.5 else 0)
cnn_lstm_result.loc[:, 0] = cnn_lstm_result.loc[:, 0].apply(lambda x: 1 if x > 0.5 else 0)
attn_pred_result.loc[:, 0] = attn_pred_result.loc[:, 0].apply(lambda x: 1 if x > 0.5 else 0)


In [None]:
birnn_result

In [None]:
cnn_lstm_result

In [None]:
attn_pred_result

In [None]:
from sklearn.metrics import classification_report

In [None]:
print(classification_report(birnn_result['is_good'], birnn_result[0]))

In [None]:
print(classification_report(cnn_lstm_result['is_good'], cnn_lstm_result[0]))

In [None]:
print(classification_report(attn_pred_result['is_good'], attn_pred_result[0]))

In [None]:
attn_pred_result

# evaluate

In [None]:
%%time
birnn_best.evaluate(X_padded, test_data['is_good'])

In [None]:
%%time
cnn_lstm_best.evaluate(X_padded, test_data['is_good'])

In [None]:
%%time
attn_best.evaluate(X_padded, test_data['is_good'])