# 실습 5: 영화 리뷰 감성 분석 (LSTM)

**목표**: IMDB 리뷰 긍정/부정 분류 85%+

In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np

In [2]:
# 데이터 로드
VOCAB_SIZE = 10000
MAX_LEN = 200

(X_train, y_train), (X_test, y_test) = keras.datasets.imdb.load_data(num_words=VOCAB_SIZE)
X_train = keras.preprocessing.sequence.pad_sequences(X_train, maxlen=MAX_LEN)
X_test = keras.preprocessing.sequence.pad_sequences(X_test, maxlen=MAX_LEN)

print(f"학습: {X_train.shape}, 테스트: {X_test.shape}")

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
[1m17464789/17464789[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
학습: (25000, 200), 테스트: (25000, 200)


In [3]:
# LSTM 모델
model = keras.Sequential([
    layers.Embedding(VOCAB_SIZE, 128),
    layers.LSTM(64, dropout=0.2),
    layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()

In [4]:
# 학습
history = model.fit(X_train, y_train, epochs=5, batch_size=64, validation_split=0.2)

Epoch 1/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 15ms/step - accuracy: 0.7022 - loss: 0.5548 - val_accuracy: 0.8630 - val_loss: 0.3265
Epoch 2/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 12ms/step - accuracy: 0.8991 - loss: 0.2588 - val_accuracy: 0.8654 - val_loss: 0.3177
Epoch 3/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 13ms/step - accuracy: 0.9334 - loss: 0.1807 - val_accuracy: 0.8656 - val_loss: 0.3321
Epoch 4/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 13ms/step - accuracy: 0.9478 - loss: 0.1523 - val_accuracy: 0.8680 - val_loss: 0.3413
Epoch 5/5
[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 12ms/step - accuracy: 0.9617 - loss: 0.1049 - val_accuracy: 0.8508 - val_loss: 0.4221


In [5]:
# 평가
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)
print(f"테스트 정확도: {test_acc:.2%}")

테스트 정확도: 84.41%


In [6]:
# 직접 테스트
word_index = keras.datasets.imdb.get_word_index()

def predict_sentiment(text):
    words = text.lower().split()
    encoded = [word_index.get(w, 2) + 3 for w in words]
    encoded = encoded[:MAX_LEN]
    padded = keras.preprocessing.sequence.pad_sequences([encoded], maxlen=MAX_LEN)
    pred = model.predict(padded, verbose=0)[0][0]
    sentiment = "긍정" if pred > 0.5 else "부정"
    return f"{sentiment} ({pred:.1%})"

print(predict_sentiment("This movie is amazing and wonderful"))
print(predict_sentiment("Terrible movie waste of time"))

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb_word_index.json
[1m1641221/1641221[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
긍정 (92.6%)
부정 (0.7%)


In [9]:
user_input = input("감성을 분석할 문장을 입력하세요: ")
result = predict_sentiment(user_input)
print(f"입력하신 문장은: '{user_input}'")
print(f"감성 분석 결과: {result}")

감성을 분석할 문장을 입력하세요: 영화가 참 재밌네요
입력하신 문장은: '영화가 참 재밌네요'
감성 분석 결과: 부정 (48.0%)
