# Testing

## Robustness Testing
We are going to check using Giskard our code if it is resilient to query variations.

In [53]:
# We will create test set to test for robustness of text processing functions
# The test set will include:
# Misspelling
# Synonyms
# Abbreviations
# Acronyms
# Unexpected capitalisation, emojis, special characters
# Extra whitespace

# Using benefits documents and RAG

# Test with sample questions
test_questions = [
    "What are different gym programs?",
    "Will my son have gym discount as well?",
    "I am a full-time employee. Am I eligable for gym benefit?",
    "During parental leave, am I still insured?",
    "I want to go on vacation. How much notice do I need to give?",
    "I haven't used my vacation days. Can I carry them over to next year?",
    "Can I work remotely from another country?",
    "What is the process for requesting time off?"]

# Test with misspellings
misspelled_questions = [
    "What are diffrent gym programs?",
    "Will my son have gym discout as well?",
    "I am a full-time employee. Am I eligable for gym benfit?",
    "During parental leave, am I still insurerd?",
    "I want to go on vacaton. How much notice do I need to give?",
    "I haven't used my vacaton days. Can I carry them over to next year?",
    "Can I work remotly from another country?"]

# Test with synonyms
synonym_questions = [
    "What are various gym programs?",
    "Will my son have gym perks as well?",
    "I am a full-time employee. Am I qualified for gym benefit?",
    "During parental leave, am I still covered?",
    "I want to go on holiday. How much notice do I need to give?",
    "I haven't used my holiday days. Can I carry them over to next year?",
    "Can I work from another country?",
    "What is the procedure for requesting time off?"]

# Test with abbreviations and acronyms
abbreviation_questions = [
    "What are diff gym programs?",
    "Will my son have gym disc as well?",
    "I am a FTE. Am I eligable for gym benefit?",
    "During parental leave, am I still insured?",
    "I want to go on vacay. How much notice do I need to give?",
    "I haven't used my vacay days. Can I carry them over to next year?"]

# Test with unexpected capitalisation, emojis, special characters
special_char_questions = [
    "WHAT are different GYM programs?",
    "Will my son have gym discount as well? 😊",
    "I am a full-time employee!!! Am I eligable for gym benefit???",
    "During parental leave, am I still insured???",
    "I want to go on vacation... How much notice do I need to give???",
    "I haven't used my vacation days!!! Can I carry them over to next year???",
    "Can I work remotely from another country???",
    "What is the process for requesting time off???"]

# Test with extra whitespace
whitespace_questions = [
    "What are    different gym programs?",
    "Will my son have gym discount    as well?",
    "I am a full-time employee.    Am I eligable for gym benefit?",
    "During parental leave, am I still insured?   ",
    "I want to go on vacation.    How much notice do I need to give?",
    "I haven't used my vacation days.   Can I carry them over to next year?",
    "Can I work remotely from another country?   ",
    "What is the process for requesting time off?   "]

# Combine all test questions into a single list
all_test_questions = (test_questions + misspelled_questions + synonym_questions +
                      abbreviation_questions + special_char_questions +
                      whitespace_questions)



In [54]:
import os
import re
import string
from dotenv import load_dotenv
from PyPDF2 import PdfReader
from langchain_core.documents import Document
from typing import List
from langchain_community.document_loaders import TextLoader, Docx2txtLoader, CSVLoader
from rag import clean_text, load_files
# Load environment variables from .env file
load_dotenv()

# Path to the documents
dir_path = 'assets/documents/'

files = [f for f in os.listdir(dir_path) if os.path.isfile(os.path.join(dir_path, f))]

# Collect all loaded documents
all_documents = []
for filename in files:
    full_path = os.path.join(dir_path, filename)
    try:
        docs = load_files(full_path)
        all_documents.extend(docs)
        print(f"Loaded & cleaned {filename}")
    except ValueError as e:
        print(e)

# We start by splitting the document into sections for later text preprocessing
from langchain_text_splitters import RecursiveCharacterTextSplitter

# Suppose `documents` is what you loaded from load_files()
splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,    # max characters per chunk
    chunk_overlap=200,  # overlap between chunks (keeps context)
)

split_docs = splitter.split_documents(all_documents)


Loaded & cleaned tuition-reimbursement-policy.pdf
Loaded & cleaned health-insurance-policy.pdf
Loaded & cleaned work-from-home-policy.pdf
Loaded & cleaned gym-policy.pdf
Loaded & cleaned vacation-policy.pdf
Loaded & cleaned 401k-retirement-policy.pdf
Loaded & cleaned life-insurance-policy.pdf
Loaded & cleaned childcare-policy.pdf


In [55]:
import pandas as pd

# all_documents = list of LangChain Document objects
# Each Document should have page_content and optional metadata

knowledge_base_df = pd.DataFrame([node.page_content for node in split_docs], columns=["text"])
knowledge_base = KnowledgeBase(knowledge_base_df)

testset = generate_testset(
    knowledge_base, num_questions=50, agent_description="A chatbot answering questions about the benefits documents"
)

2025-08-25 16:29:38,599 pid:44316 MainThread giskard.rag  INFO     Finding topics in the knowledge base.
2025-08-25 16:29:48,257 pid:44316 MainThread giskard.rag  INFO     Found 9 topics in the knowledge base.


Generating questions: 100%|██████████| 50/50 [02:12<00:00,  2.65s/it]


In [None]:
# Save the testset
testset.save("generated_testset.jsonl")
from giskard.rag import QATestset
# Load the testset
testset = QATestset.load("generated_testset.jsonl")

In [57]:
testset.to_pandas()

Unnamed: 0_level_0,question,reference_answer,reference_context,conversation_history,metadata
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
91353e43-87a6-4a7e-8cc0-fe43f420a800,How can employees request specific fitness fac...,Employees can submit requests for specific fac...,Document 71: convenient to my home or work loc...,[],"{'question_type': 'simple', 'seed_document_id'..."
6ca2a6f3-dde5-41cb-af9f-8171dc220f74,What happens to my gym membership if I leave T...,Corporate discount rates end on your last day ...,"Document 69: facility tier. for example, a typ...",[],"{'question_type': 'simple', 'seed_document_id'..."
98040f1b-ca99-44ba-9987-1f514f514de6,What fitness benefits are available to Techlan...,Techlance employees and their families have co...,Document 58: 18 receive an additional 10% disc...,[],"{'question_type': 'simple', 'seed_document_id'..."
50ab1d3e-f676-4fd7-b203-72507166e0d9,How much notice will employees receive for sig...,Employees will receive 30 days advance notice ...,Document 145: for ﬁnding specialized care prov...,[],"{'question_type': 'simple', 'seed_document_id'..."
a0e6b99c-1b0e-46c4-b2ea-6cbc0cf84f81,What benefits does the wellness program offer ...,The wellness program offers opportunities to e...,"Document 27: after age 65, you can withdraw hs...",[],"{'question_type': 'simple', 'seed_document_id'..."
ab2bbd36-9c01-496f-af43-9826615ae588,What is the monthly reimbursement limit for in...,Internet service is reimbursed up to $75 per m...,Document 45: the costs of remote work. interne...,[],"{'question_type': 'simple', 'seed_document_id'..."
958e77e3-064e-4969-afc1-017fc6b029e7,What is the lifetime maximum benefit for emplo...,The lifetime maximum benefit for employee educ...,"Document 5: are covered up to $3,000 per year,...",[],"{'question_type': 'simple', 'seed_document_id'..."
2a4a95ca-42ba-408f-a573-beb621ad4026,What should be done if a primary beneficiary p...,It's wise to name contingent beneficiaries who...,Document 117: and these percentages must total...,[],"{'question_type': 'simple', 'seed_document_id'..."
bf0eddba-609b-410e-9437-158ea750867c,What are the repayment conditions if an employ...,Employees leaving within the first six months ...,Document 10: may result in overlapping commitm...,[],"{'question_type': 'simple', 'seed_document_id'..."
e4f558f1-971a-407a-b1fe-c10d38f80082,Under what conditions does Techlance offer res...,"Techlance provides backup care options, helps ...",Document 142: backup care options and helps em...,[],"{'question_type': 'complex', 'seed_document_id..."


In [74]:
from openai import OpenAI
from rag import load_retriever_from_collection, expand_query, retrieve_with_expanded_queries

client = OpenAI()

# Load retriever once
retriever = load_retriever_from_collection(
    collection_name="benefits_collection",
    score_threshold=0.6,
    top_k=3
)

def policy_explainer(query: str, metadata_filter: dict = None) -> str:
    """
    RAG + LLM function to generate employee-friendly answers.
    """
    # Use metadata filter if provided
    if metadata_filter:
        retriever_with_filter = load_retriever_from_collection(
            collection_name="benefits_collection",
            score_threshold=0.4,
            top_k=3,
            metadata_filter=metadata_filter
        )
        docs = retriever_with_filter.get_relevant_documents(query)
    else:
        docs = retriever.get_relevant_documents(query)

    if not docs:
        return "No relevant information found."

    context = "\n\n".join(doc.page_content for doc in docs[:3])

    messages = [
        {"role": "system", "content": "You are an assistant summarizing company policies into employee-friendly explanations. Use only the provided context to answer the question. If the context does not contain the answer, respond with a suggestion to contact the employees manager or HR for guidance."},
        {"role": "user", "content": query + "\n" + context}
    ]

    try:
        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=messages,
            temperature=0.2,
            max_tokens=500,
        )
        return response.choices[0].message.content
    except Exception as e:
        print(f"Error generating response: {e}")
        return "Error generating response."


In [75]:
# Define answer function for RAGET evaluation
from giskard.rag import AgentAnswer, evaluate

def answer_fn(question: str, history: list = None) -> AgentAnswer:
    return policy_explainer(question)

try:
    rag_report = evaluate(
        answer_fn,
        testset=testset,
        knowledge_base=knowledge_base
    )

except Exception as e:
    print(f"Error during evaluation: {e}")
    rag_report = None

Asking questions to the agent: 100%|██████████| 50/50 [02:54<00:00,  3.49s/it]
CorrectnessMetric evaluation: 100%|██████████| 50/50 [00:45<00:00,  1.11it/s]


In [None]:
rag_report.save("report/rag_evaluation_report_change_retrieval")
display(rag_report)

In [None]:
from rag import load_chroma_collection, load_retriever_from_collection, load_retriever_with_metadata_from_collection
from openai import OpenAI

client = OpenAI()

def append_message(messages, role, content):
    """Append a message to the conversation history."""
    messages.append({"role": role, "content": content})
    return messages

def get_response(messages):
    """
    Generate a response using the chat completions API
    """
    try:
        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=messages
        )
        return response.choices[0].message.content
    except Exception as e:
        print(f"Error generating response: {e}")
        return None

retriever = load_retriever_from_collection(
    collection_name="benefits_collection",
    score_threshold=0.6,
    top_k=3
)
    
# Initialize conversation
messages = [
    {"role": "system", "content": "You are an assistant summarizing policies into employee-friendly explanations."}
]
while True:
    # Let the user ask a question
    query = input()

    # Get the most similar answer from the RAG
    rag_results = retriever.get_relevant_documents(query)
    rag_result = rag_results[0].page_content # Get the most similar one

    # Concatenate the user query with the rag response
    final_query = query + "\n" + rag_result

    # Append user message
    messages = append_message(messages, "user", final_query)

    # Get response from the chatbot and add it to the message to continue the context
    response = get_response(messages)
    messages = append_message(messages, "assistant", response)
    print(response+"\n")