In [52]:
from semantic_router import Route
from semantic_router.encoders import OpenAIEncoder
from semantic_router import RouteLayer
import pandas as pd
from sklearn.model_selection import train_test_split

In [53]:
description_dict = {
    "creating_flashcards": "The user wants to generate flashcards based on provided information",
    "motivational_support": "The user needs motivational support to continue on with their studies",
    "recommendations_and_learning_resources": "the user needs some recomendations of resources for their studying",
    "update_user_info": "The user wants to update/read/delete their personal information",
    "aurora_related": "The user wants to know more about Aurora, the Chatbot",
    "study_planning": "The user wants to have a study plan based on their preferences",
    "generate_citation": "The user want to have a citation to a givent text from possible documents",
    "summarize_file": "The user wants to recieve the summarised text from the provided file",
    "creating_quizzes": "The user wants to receive a quizz based on provided information",
    "send_email": "The user wants to send an email to someone",
    "None": "The users talks about unrelated topics"
}

In [54]:
# data import and split
df_loaded = pd.read_json("files/intentions.json") 
X = df_loaded[['Id','Message']]
y = df_loaded['Intention'].to_list()

In [55]:
# Split the dataset with stratification
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=0, stratify=y)
# Replace "None" with None
y_train = [None if i == "None" else i for i in y_train]
y_test = [None if i == "None" else i for i in y_test]
# Set the column Id as the index
X_train.set_index('Id', inplace=True)
X_test.set_index('Id', inplace=True)

In [56]:
messages_dict = {}
for message, label in zip(X_train["Message"], y_train):
    if label is not None and label in messages_dict.values():
        messages_dict[label].append(message)
    elif label is not None:
        messages_dict[label] = [message]

In [57]:
routes = []
for label, message_list in zip(messages_dict.keys(),
                               messages_dict.values()):
    routes.append(
        Route(
            name=label,
            description=description_dict[label], 
            utterances=message_list
        )
    )

In [58]:
encoder = OpenAIEncoder()
oa_rl = RouteLayer(encoder=encoder, routes=routes, aggregation="mean")

# Initialize a dictionary to store the accuracy for each intention
intention_accuracy = {}
intention_counts = {}

# Total correct predictions and examples for overall accuracy
total_correct = 0
total_examples = 0

# Iterate through each unique intention in y_test
for intention in set(y_test):
    # Filter messages and labels for the current intention
    indices = [i for i, y in enumerate(y_test) if y == intention]
    X_subset = [X_test["Message"].iloc[i] for i in indices]
    y_subset = [y_test[i] for i in indices]
    
    # Evaluate the accuracy for the current intention
    if len(y_subset) > 0:
        accuracy = oa_rl.evaluate(X=X_subset, y=y_subset)
        intention_accuracy[intention] = accuracy
        intention_counts[intention] = len(y_subset)
        
        # Update overall correct predictions and examples
        total_correct += int(accuracy * len(y_subset))
        total_examples += len(y_subset)
    else:
        intention_accuracy[intention] = None
        intention_counts[intention] = 0

# Print accuracy per intention
for intention, accuracy in intention_accuracy.items():
    if accuracy is not None:
        print(f"Intention '{intention}': {accuracy * 100:.2f}% ({intention_counts[intention]} examples)")
    else:
        print(f"Intention '{intention}': No examples in the test set.")

# Calculate and print overall accuracy
overall_accuracy = total_correct / total_examples if total_examples > 0 else 0
print(f"\nOverall Accuracy: {overall_accuracy * 100:.2f}%")


Generating embeddings: 100%|██████████| 1/1 [00:00<00:00,  1.57it/s]
Generating embeddings: 100%|██████████| 1/1 [00:00<00:00,  1.48it/s]
Generating embeddings: 100%|██████████| 1/1 [00:00<00:00,  1.31it/s]
Generating embeddings: 100%|██████████| 1/1 [00:00<00:00,  1.34it/s]
Generating embeddings: 100%|██████████| 1/1 [00:00<00:00,  1.56it/s]
Generating embeddings: 100%|██████████| 1/1 [00:00<00:00,  1.53it/s]
Generating embeddings: 100%|██████████| 1/1 [00:00<00:00,  2.26it/s]
Generating embeddings: 100%|██████████| 1/1 [00:00<00:00,  1.50it/s]
Generating embeddings: 100%|██████████| 1/1 [00:00<00:00,  1.47it/s]
Generating embeddings: 100%|██████████| 1/1 [00:00<00:00,  1.34it/s]
Generating embeddings: 100%|██████████| 1/1 [00:00<00:00,  1.49it/s]

Intention 'motivational_support': 75.00% (8 examples)
Intention 'update_user_info': 85.71% (7 examples)
Intention 'recommendations_and_learning_resources': 85.71% (7 examples)
Intention 'study_planning': 75.00% (8 examples)
Intention 'generate_citation': 100.00% (7 examples)
Intention 'send_email': 100.00% (8 examples)
Intention 'None': 100.00% (8 examples)
Intention 'aurora_related': 100.00% (8 examples)
Intention 'creating_quizzes': 85.71% (7 examples)
Intention 'summarize_file': 71.43% (7 examples)
Intention 'creating_flashcards': 87.50% (8 examples)

Overall Accuracy: 87.95%





In [59]:
oa_rl.to_json("files/layer.json")

[32m2025-01-04 17:42:58 INFO semantic_router.utils.logger Saving route config to files/layer.json[0m


In [60]:
# those all are check
print(oa_rl.retrieve_multiple_routes("Я люблю сосиски і квізи"), "\n")
print(oa_rl("Я люблю сосиски і квізи").name, "\n")
route_thresholds = oa_rl.get_thresholds()
print("New route thresholds:", route_thresholds, "\n")

for (index, row), label in zip(X_test.iterrows(), y_test):
    message = row["Message"]
    prediction = oa_rl(message)

    if prediction.name == label:
        continue
    else:
        print(f"Id: {index}")
        print(f"Message: {message}")
        print(f"True Route: {label}, Predicted Route: {prediction.name}")
        print()

[] 

None 

New route thresholds: {'motivational_support': 0.3, 'update_user_info': 0.3, 'recommendations_and_learning_resources': 0.3, 'creating_quizzes': 0.3, 'send_email': 0.3, 'generate_citation': 0.3, 'creating_flashcards': 0.3, 'aurora_related': 0.3, 'study_planning': 0.3, 'summarize_file': 0.3} 

Id: 277
Message: Can you summarize this article on programming best practices?
True Route: summarize_file, Predicted Route: aurora_related

Id: 367
Message: I don’t feel motivated at all.
True Route: motivational_support, Predicted Route: None

Id: 352
Message: I am so lost on Text Mining.
True Route: motivational_support, Predicted Route: generate_citation

Id: 144
Message: Prepare flashcards on computer vision concepts.
True Route: creating_flashcards, Predicted Route: recommendations_and_learning_resources

Id: 534
Message: Can you create a plan to prepare for my Python programming test?
True Route: study_planning, Predicted Route: creating_quizzes

Id: 401
Message: What are the reco