In [83]:
import re
import string
import json
import torch
import random
from transformers import AutoTokenizer, AutoModel
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import evaluation
from sentence_transformers import util
from torch.utils.data import DataLoader
from sentence_transformers import SentenceTransformer, SentencesDataset, InputExample, losses

In [84]:
def load_intent_data(file_path):
    with open(file_path, 'r') as file:
        data = json.load(file)
    return data

In [85]:
intent_data = load_intent_data('Dataset Chatbot.json')

In [86]:
train_examples = [InputExample(texts=[pattern], label=intent['tag']) for intent in intent_data['intents'] for pattern in intent['patterns']]

In [87]:
tag_to_label = {intent['tag']: idx for idx, intent in enumerate(intent_data['intents'])}

In [88]:
train_examples = []
for intent in intent_data['intents']:
    for pattern in intent['patterns']:
        label = tag_to_label[intent['tag']]   # Mengonversi tag menjadi label numerik
        train_examples.append(InputExample(texts=[pattern, pattern], label=float(label))) # Mengonversi label ke float

In [89]:
# Create a model
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

# Define a batch size
batch_size = 32

# Define a dataloader
train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=batch_size)

# Define a loss function
train_loss = losses.CosineSimilarityLoss(model)

# Training the model
model.fit(train_objectives=[(train_dataloader, train_loss)], epochs=25, save_best_model=True, output_path='model/')

Epoch:   0%|          | 0/25 [00:00<?, ?it/s]

Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

In [90]:
def match_intent(input_token, intent_data, model):
    input_embeddings = model.encode(input_token, convert_to_tensor=True)

    best_match = None
    best_similarity = -1  # Perhatikan bahwa similarity akan menjadi nilai -1 hingga 1.

    for intent in intent_data['intents']:
        for pattern in intent['patterns']:
            pattern_embedding = model.encode(pattern, convert_to_tensor=True)
            similarity = util.pytorch_cos_sim(input_embeddings, pattern_embedding)[0].item()

            if similarity > best_similarity:
                best_similarity = similarity
                best_match = (intent, pattern, similarity)

    return best_match

In [91]:
model = SentenceTransformer('model')

In [92]:
while True:
    try:
        input_user = input("You: ")
        if input_user in ("quit", "exit"):
            print("masuk break")
            break
        best_match = match_intent(input_user, intent_data, model)
        if best_match is not None:
            matched_intent, matched_pattern, similarity = best_match
            if similarity >= 0.8:
                print(f"Input user: {input_user}")
                print(f"Token input cocok dengan intent: {matched_intent['tag']}")
                print(f"Pola terbaik: {matched_pattern}")
                print(f"Kemiripan: {similarity * 100:.2f}%")
                print("Bot : ", random.choice(matched_intent['response']))
            else:
                print("Mohon maaf chatbot tidak mengerti instruksi dari anda. Mohon berikan instruksi ulang atau berikan instruksi lain.")
        else:
            print("Mohon maaf chatbot tidak mengerti instruksi dari anda. Mohon berikan instruksi ulang atau berikan instruksi lain.")
    except Exception as e:
        print("Terjadi kesalahan:", e)

Input user: halo
Token input cocok dengan intent: greetings
Pola terbaik: halo
Kemiripan: 100.00%
Bot :  Halo, selamat datang di fitur chatbot Tanggap. Ada yang bisa saya bantu?
Input user: fasilitas sekolah ada yang rusak
Token input cocok dengan intent: pengaduan-fasilitas-sekolah
Pola terbaik: fasilitas sekolah tidak berfungsi
Kemiripan: 98.68%
Bot :  Terima kasih atas pertanyaan atau keluhan Anda mengenai fasilitas di sekolah. Untuk informasi lebih lanjut atau bantuan terkait fasilitas sekolah, Anda dapat menghubungi Customer Service kami melalui tombol yang tersedia. Jika Anda ingin membuat laporan pengaduan, mohon sertakan deskripsi detail seperti alamat, foto, dan informasi lebih lanjut mengenai kondisi yang Anda alami. Kami akan berusaha membantu Anda secepat mungkin. Terima kasih.
Input user: administrasi kecamatan buruk
Token input cocok dengan intent: administrasi-sekolah
Pola terbaik: administrasi tidak ada kemajuan
Kemiripan: 99.02%
Bot :  Terima kasih atas pertanyaan atau