In [3]:
!pip install tensorflow numpy pandas nltk




In [4]:
import os
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

ModuleNotFoundError: No module named 'tensorflow'

In [None]:
# **1. Load Dataset**
dataset_path = "data/dataset.csv"  # Sesuaikan dengan lokasi dataset Anda
if not os.path.exists(dataset_path):
    raise FileNotFoundError(f"Dataset tidak ditemukan di path {dataset_path}")

# Baca dataset
data = pd.read_csv(dataset_path)

# Pastikan dataset memiliki kolom yang sesuai
if 'question' not in data.columns or 'answer' not in data.columns:
    raise ValueError("Dataset harus memiliki kolom 'question' dan 'answer'")

questions = data['question'].values
answers = data['answer'].values


In [None]:
# **2. Tokenisasi Data**
tokenizer = Tokenizer()
tokenizer.fit_on_texts(np.concatenate([questions, answers]))
vocab_size = len(tokenizer.word_index) + 1

# Konversi pertanyaan ke urutan token
input_sequences = tokenizer.texts_to_sequences(questions)
max_len = max(len(seq) for seq in input_sequences)  # Panjang maksimum sekuens
input_sequences = pad_sequences(input_sequences, maxlen=max_len, padding='post')

# Konversi jawaban ke urutan token
output_sequences = tokenizer.texts_to_sequences(answers)
output_sequences = pad_sequences(output_sequences, maxlen=max_len, padding='post')

# Karena kita menggunakan sparse_categorical_crossentropy, output hanya berupa 1 dimensi
output_sequences = np.array(output_sequences)[:, -1]  # Ambil hanya token terakhir

In [None]:
# Make sure vocab_size and max_len are defined correctly
print("vocab_size:", vocab_size)  # Make sure it's > 1
print("max_len:", max_len)        # Make sure it's > 0

# Initialize model
model = Sequential([
    Embedding(vocab_size, 128, input_length=max_len),  # Embedding layer
    LSTM(256, return_sequences=False),  # LSTM layer
    Dense(128, activation='relu'),      # Dense layer
    Dense(vocab_size, activation='softmax')  # Output layer
])

# Build model explicitly with input shape
model.build(input_shape=(None, max_len))  # Explicitly set input shape

# Compile the model
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Check model summary
model.summary()


vocab_size: 197
max_len: 16




In [None]:
# **4. Training Model**
model.fit(input_sequences, output_sequences, epochs=50, batch_size=32)

Epoch 1/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step - accuracy: 0.0000e+00 - loss: 5.2781
Epoch 2/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 105ms/step - accuracy: 0.7600 - loss: 5.2210
Epoch 3/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 105ms/step - accuracy: 0.7600 - loss: 5.1488
Epoch 4/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 101ms/step - accuracy: 0.7600 - loss: 5.0307
Epoch 5/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 114ms/step - accuracy: 0.7600 - loss: 4.8291
Epoch 6/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 117ms/step - accuracy: 0.7600 - loss: 4.4757
Epoch 7/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 101ms/step - accuracy: 0.7600 - loss: 3.8529
Epoch 8/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 103ms/step - accuracy: 0.7600 - loss: 2.8511
Epoch 9/50
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[3

<keras.src.callbacks.history.History at 0x26d9d216860>

In [None]:
# **5. Fungsi Chatbot**
def generate_response(input_text):
    tokenized_input = tokenizer.texts_to_sequences([input_text])
    padded_input = pad_sequences(tokenized_input, maxlen=max_len, padding='post')
    prediction = model.predict(padded_input, verbose=0)
    response_index = np.argmax(prediction, axis=1)[0]
    # Cari kata dari indeks
    for word, index in tokenizer.word_index.items():
        if index == response_index:
            return word
    return "Maaf, saya tidak mengerti."


In [None]:
# **6. Uji Chatbot**
while True:
    user_input = input("Anda: ")
    if user_input.lower() == 'exit':
        print("Chatbot: Terima kasih! Sampai jumpa.")
        break 
    response = generate_response(user_input)
    print("Chatbot:", response)

Chatbot: Terima kasih! Sampai jumpa.
