In [1]:
# Libraries needed for NLP
import nltk
nltk.download('punkt')
from nltk.stem import PorterStemmer
stemmer = PorterStemmer()

# Libraries needed for Tensorflow processing
import tensorflow as tf
import numpy as np
import random
import json

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Lenovo\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping tokenizers\punkt.zip.


In [2]:
# import our chat-bot intents file
with open('intents.json') as json_data:
    intents = json.load(json_data)

In [3]:
intents

{'intents': [{'tag': 'Salam_Pagi',
   'patterns': ['Selamat Pagi', 'Pagi'],
   'responses': ['Pagi! Ada yang bisa CuBot bantu?',
    'Selamat Pagi! Bagaimana harimu?',
    'Pagi! Bagaimana kegiatanmu?',
    'Pagi! Apa yang sedang kamu pikirkan?',
    'Selamat Pagi! Semoga hari ini penuh kebahagiaan.',
    'Hai! Selamat Pagi! Apa yang bisa CuBot lakukan untukmu?',
    'Pagi yang indah! Bagaimana kabarmu hari ini?',
    'Selamat Pagi! Ada yang spesial yang ingin kamu lakukan hari ini?',
    'Pagi yang segar! Ada yang perlu didiskusikan?']},
  {'tag': 'Salam_Siang',
   'patterns': ['Selamat Siang', 'Siang'],
   'responses': ['Selamat Siang! Ada yang ingin kamu ceritakan?',
    'Siang! Bagaimana kegiatanmu hari ini?',
    'Selamat Siang! Semoga makan siangmu lezat.',
    'Siang! Apa yang bisa CuBot bantu?',
    'Siang! Ada yang ingin kamu diskusikan?',
    'Selamat Siang! Bagaimana rencanamu hari ini?',
    'Hai! Selamat Siang! Apa yang bisa CuBot lakukan untukmu?',
    'Siang yang menyena

In [4]:
words = []
classes = []
documents = []
ignore = ['?','!',',']
# loop through each sentence in the intent's patterns
for intent in intents['intents']:
    for pattern in intent['patterns']:
        # tokenize each and every word in the sentence
        w = nltk.word_tokenize(pattern)
        # add word to the words list
        words.extend(w)
        # add word(s) to documents
        documents.append((w, intent['tag']))
        # add tags to our classes list
        if intent['tag'] not in classes:
            classes.append(intent['tag'])

In [5]:
# Perform stemming and lower each word as well as remove duplicates
words = [stemmer.stem(w.lower()) for w in words if w not in ignore]
words = sorted(list(set(words)))

# remove duplicate classes
classes = sorted(list(set(classes)))

print (len(documents), "documents")
print (len(classes), "classes", classes)
print (len(words), "unique stemmed words", words)

170 documents
24 classes ['Berpisah', 'CuBot_Berterimakasih_Komentar', 'CuBot_Minta_Komentar', 'Default', 'Kabar_CuBot', 'Komentar', 'Pola_Tidak_Ditemukan', 'Respon_Umum_1', 'Respon_Umum_10', 'Respon_Umum_11', 'Respon_Umum_2', 'Respon_Umum_3', 'Respon_Umum_4', 'Respon_Umum_5', 'Respon_Umum_6', 'Respon_Umum_7', 'Respon_Umum_8', 'Respon_Umum_9', 'Salam_Malam', 'Salam_Pagi', 'Salam_Siang', 'Salam_Sore', 'Salam_Umum', 'Terimakasih_CuBot']
180 unique stemmed words ['.', 'ada', 'adalah', 'akan', 'aku', 'aman', 'apa', 'apakah', 'arah', 'asa', 'asmara', 'ata', 'bagaimana', 'bahagia', 'baik', 'baik-baik', 'banyak', 'begitu', 'benar-benar', 'berarti', 'berdaya', 'beri', 'berikan', 'berjalan', 'berpisah', 'bersedih', 'bersyukur', 'bertemu', 'beruntung', 'biasa', 'bingung', 'bisa', 'bye', 'cema', 'cinta', 'cubot', 'cukup', 'dadah', 'dan', 'dari', 'deh', 'dengan', 'depan', 'dicintai', 'dihargai', 'dimabuk', 'dong', 'dulu', 'enak', 'frustasi', 'gelisah', 'gembira', 'geram', 'hai', 'hallo', 'halo', '

In [6]:
# create training data
training = []
output = []
# create an empty array for output
output_empty = [0] * len(classes)

# create training set, bag of words for each sentence
for doc in documents:
    # initialize bag of words
    bag = []
    # list of tokenized words for the pattern
    pattern_words = doc[0]
    # stemming each word
    pattern_words = [stemmer.stem(word.lower()) for word in pattern_words]
    # create bag of words array
    for w in words:
        bag.append(1) if w in pattern_words else bag.append(0)

    # output is '1' for current tag and '0' for rest of other tags
    output_row = list(output_empty)
    output_row[classes.index(doc[1])] = 1

    training.append([bag, output_row])

# shuffling features and turning it into np.array
random.shuffle(training)
training = np.array(training, dtype='object')

# creating training lists
train_x = list(training[:,0])
train_y = list(training[:,1])

In [7]:
from keras import regularizers

model = tf.keras.Sequential()
model.add(tf.keras.layers.Dense(20, input_shape=(len(train_x[0]),),activation='relu'))
model.add(tf.keras.layers.Dense(10, kernel_regularizer=regularizers.l2(0.01)))
model.add(tf.keras.layers.Dense(len(train_y[0]), activation='softmax'))
model.compile(tf.keras.optimizers.Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

In [8]:
model.fit(np.array(train_x), np.array(train_y), epochs=500, batch_size=8, verbose=1)
model.save("model.pkl")

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

INFO:tensorflow:Assets written to: model.pkl\assets


In [9]:
import pickle
pickle.dump( {'words':words, 'classes':classes}, open( "training_data", "wb" ) )

In [10]:
from keras.models import load_model
model = load_model("model.pkl")

In [11]:
# restoring all the data structures
data = pickle.load( open( "training_data", "rb" ) )
words = data['words']
classes = data['classes']

In [12]:
def clean_up_sentence(sentence):
    # tokenizing the pattern
    sentence_words = nltk.word_tokenize(sentence)
    # stemming each word
    sentence_words = [stemmer.stem(word.lower()) for word in sentence_words]
    return sentence_words

# returning bag of words array: 0 or 1 for each word in the bag that exists in the sentence
def bow(sentence, words):
    # tokenizing the pattern
    sentence_words = clean_up_sentence(sentence)
    # generating bag of words
    bag = [0]*len(words)
    for s in sentence_words:
        for i,w in enumerate(words):
            if w == s:
                bag[i] = 1
    bag=np.array(bag)
    return(bag)

In [15]:
import os
ERROR_THRESHOLD = 0.30
def classify(sentence):
    # generate probabilities from the model
    bag = bow(sentence, words)
    results = model.predict(np.array([bag]))
    # filter out predictions below a threshold
    results = [[i,r] for i,r in enumerate(results[0]) if r>ERROR_THRESHOLD]
    # sort by strength of probability
    results.sort(key=lambda x: x[1], reverse=True)
    return_list = []
    for r in results:
        return_list.append((classes[r[0]], r[1]))
    # return tuple of intent and probability
    return return_list

def save_comment(username, comment):
    folder_name = 'Comments'
    if not os.path.exists(folder_name):
        os.makedirs(folder_name)

    filename = os.path.join(folder_name, f"{username}_comments.txt")
    with open(filename, 'a') as file:
        file.write(comment + '\n')

In [16]:
user_name = input("Masukkan nama Anda: ")
exited = False
default_found = False

while not default_found:
  user_input = "CuBot"
  results = classify(user_input)
  if results:
    for i in intents['intents']:
        if results[0][0] == i['tag'] == 'Default':
              print(f"{'CuBot':<{len(user_name)}}: {random.choice(i['responses'])}")
              default_found = True
              break

while not exited:
  user_input = input(f"{user_name}: ")
  results = classify(user_input)

  if results:
      for i in intents['intents']:
          if results[0][0] == i['tag'] == 'Berpisah':
              print(f"{'CuBot':<{len(user_name)}}: {random.choice(i['responses'])}")
              user_input = "CuBot, ada saran untukmu"
              results = classify(user_input)
              for j in intents['intents']:
                if j['tag'] == 'CuBot_Minta_Komentar':
                    comment = input(f"{'CuBot':<{len(user_name)}}: {random.choice(j['responses'])}\nKomentar Anda terkait CuBot: ")
                    save_comment(user_name, comment)
                    user_input = "Komentarnya sudah saya berikan yah"
                    results = classify(user_input)
                    break
              for k in intents['intents']:
                if k['tag'] == 'CuBot_Berterimakasih_Komentar':
                    print(f"{'CuBot':<{len(user_name)}}: {random.choice(k['responses'])}")
                    exited = True
                    break
              break
          elif results[0][0] == i['tag'] == 'Pola_Tidak_Ditemukan':
              print(f"{'CuBot':<{len(user_name)}}: {random.choice(i['responses'])}")
              break
          elif results[0][0] == i['tag']:
              print(f"{'CuBot':<{len(user_name)}}: {random.choice(i['responses'])}")
              break
  else:
      for i in intents['intents']:
          if i['tag'] == 'Pola_Tidak_Ditemukan':
              print(f"CuBot: {random.choice(i['responses']):<{len(user_name)}}")
              break

CuBot  : Halo! Aku CuBot, teman curhat yang selalu punya telinga untuk mendengarkan. Ada yang bisa CuBot bantu hari ini?
CuBot  : Hallo! Ada yang ingin kamu tanyakan?
CuBot  : Terima kasih sudah mampir! Semoga harimu menyenangkan.
CuBot  : CuBot senang bisa mendengarkan ceritamu! Terima kasih sudah mempercayakan cerita dan komentarmu kepada CuBot.
