In [1]:
import re
import string
import json
import torch
import random
from transformers import AutoTokenizer, AutoModel
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import evaluation
from sentence_transformers import util
from torch.utils.data import DataLoader
from sentence_transformers import SentenceTransformer, SentencesDataset, InputExample, losses



In [2]:
def load_intent_data(file_path):
    with open(file_path, 'r') as file:
        data = json.load(file)
    return data

In [3]:
intent_data = load_intent_data('Dataset Chatbot.json')

In [4]:
tag_to_label = {intent['tag']: idx for idx, intent in enumerate(intent_data['intents'])}

In [5]:
train_examples = []
for intent in intent_data['intents']:
    for pattern in intent['patterns']:
        label = tag_to_label[intent['tag']]   # Mengonversi tag menjadi label numerik
        train_examples.append(InputExample(texts=[pattern, pattern], label=float(label))) # Mengonversi label ke float

In [6]:
# Create a model
model = SentenceTransformer('paraphrase-MiniLM-L6-v2')

# Define a batch size
batch_size = 32

# Define a dataloader
train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=batch_size)

# Define a loss function
train_loss = losses.CosineSimilarityLoss(model)

# Training the model
model.fit(train_objectives=[(train_dataloader, train_loss)], epochs=25, save_best_model=True, output_path='model/')

Epoch:   0%|          | 0/25 [00:00<?, ?it/s]

Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

Iteration:   0%|          | 0/22 [00:00<?, ?it/s]

In [7]:
def match_intent(input_token, intent_data, model):
    input_embeddings = model.encode(input_token, convert_to_tensor=True)

    best_match = None
    best_similarity = -1  # Perhatikan bahwa similarity akan menjadi nilai -1 hingga 1.

    for intent in intent_data['intents']:
        for pattern in intent['patterns']:
            pattern_embedding = model.encode(pattern, convert_to_tensor=True)
            similarity = util.pytorch_cos_sim(input_embeddings, pattern_embedding)[0].item()

            if similarity > best_similarity:
                best_similarity = similarity
                best_match = (intent, pattern, similarity)

    return best_match

In [8]:
model = SentenceTransformer('model')

In [9]:
while True:
    try:
        input_user = input("You: ")
        if input_user in ("quit", "exit"):
            print("masuk break")
            break
        best_match = match_intent(input_user, intent_data, model)
        if best_match is not None:
            matched_intent, matched_pattern, similarity = best_match
            if similarity >= 0.8:
                print(f"Input user: {input_user}")
                print(f"Token input cocok dengan intent: {matched_intent['tag']}")
                print(f"Pola terbaik: {matched_pattern}")
                print(f"Kemiripan: {similarity * 100:.2f}%")
                print("Bot : ", random.choice(matched_intent['response']))
            else:
                print("Mohon maaf chatbot tidak mengerti instruksi dari anda. Mohon berikan instruksi ulang atau berikan instruksi lain.")
        else:
            print("Mohon maaf chatbot tidak mengerti instruksi dari anda. Mohon berikan instruksi ulang atau berikan instruksi lain.")
    except Exception as e:
        print("Terjadi kesalahan:", e)

tensor([ 6.0677e-02,  1.1360e+00,  1.8714e-01, -5.8197e-03, -6.4986e-01,
        -4.2507e-01,  8.0370e-01, -4.1109e-01,  5.2938e-01, -1.7792e-01,
         4.8003e-01, -7.6061e-01,  2.5018e-01, -1.6979e-01,  7.8725e-01,
        -1.3629e-01, -5.6345e-02,  1.7577e-01, -4.5617e-01, -5.3044e-02,
         6.2468e-02,  2.6787e-01,  1.4820e-01,  8.9380e-02,  4.0076e-02,
        -8.0694e-02, -3.9750e-01,  7.9232e-02,  2.2385e-01, -6.3609e-01,
         1.3482e-01,  9.5486e-02,  2.9533e-01,  4.3064e-02, -3.0380e-01,
         1.0151e+00, -2.8465e-02,  1.6090e-01, -5.6772e-01,  5.5935e-03,
         3.5345e-01, -1.4155e-01,  2.7630e-02, -2.8739e-01, -4.5375e-02,
        -1.0867e-01, -4.2615e-01, -1.2192e-01, -1.3298e-01,  1.3708e-01,
        -5.7365e-01, -1.3820e-01,  3.1823e-01,  1.7995e-01,  6.8687e-01,
         2.4904e-02,  5.3244e-01,  9.2128e-01,  3.0548e-01, -5.7254e-01,
         4.4847e-02,  1.1831e-01, -1.1274e+00,  5.4294e-01,  5.0628e-01,
        -4.9272e-01,  2.9050e-02, -5.7895e-01, -7.1