In [1]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

# Example dataset
conversations = [
 ["Hello", "Hi there!"],
 ["How are you?", "I'm doing well, thanks."],
 ["What's your name?", "I'm a chatbot."]
]
token = Tokenizer()
token.fit_on_texts(conversations)
print(token.word_index)

sequences = token.texts_to_sequences(conversations)
print(sequences)

user_input = 'How are you?'
input_seq = token.texts_to_sequences([user_input])
print("Input Seq: ", input_seq)
exit(0)

max_sequence_len = max([len(seq) for seq in sequences])
print("Max Input Len: ", max_sequence_len)

print("==================: ", len(sequences))
X = []; y = []
for idx in range(len(sequences)):
  X.append([sequences[idx][0]])
  y.append([sequences[idx][1]])
print(X)
print(y)
word_size = len(token.word_index) + 1
print("Size: ", word_size)

# 모델의 설정
model = Sequential()
model.add( Embedding(word_size, 64, input_length=max_sequence_len, mask_zero=True) )
model.add( LSTM(100, return_sequences=True) )
model.add( Dense(word_size, activation='softmax') )

# 모델의 컴파일
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model.fit(X, y, epochs=50, verbose=1)

def generate_response(input_text):
  input_seq = token.texts_to_sequences([[input_text]])
  print("input seq: ", input_seq)
  predicted_output = model.predict(input_seq)
  print("Predict Output: ", predicted_output)

  predicted_word_index = tf.argmax(predicted_output, axis=-1).numpy()
  print("Predict Index: ", predicted_word_index)
  response = token.sequences_to_texts(predicted_word_index)
  return response[0]

# Test the chatbot
user_input = "how are you?"
response = generate_response(user_input)
print(f"User: {user_input}")
print(f"Chatbot: {response}")

{'hello': 1, 'hi there!': 2, 'how are you?': 3, "i'm doing well, thanks.": 4, "what's your name?": 5, "i'm a chatbot.": 6}
[[1, 2], [3, 4], [5, 6]]
Input Seq:  [[]]
Max Input Len:  2
[[1], [3], [5]]
[[2], [4], [6]]
Size:  7
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
input seq:  [[3]]
Predict Output:  [[[0.1036343  0.09686843 0.14839494 0.09731966 0.3061382  0.09633213
   0.15131234]]]
Predict Index:  [[4]]
User: how are you?
Chatbot: i'm doing well