In [2]:
import os
from dotenv import load_dotenv, find_dotenv

# Load environment variables from the .env file using 'from dotenv import find_dotenv, load_dotenv'
load_dotenv(find_dotenv(filename='SURF-Project_Optimizing-PerunaBot/setup/.env'))

openai_api_key = os.environ['OPENAI_API_KEY']

In [3]:
import nest_asyncio
nest_asyncio.apply()

In [3]:
from langchain_community.document_loaders import PyPDFLoader

# file paths of PDFs to be used
pdf_paths = ['../Data/Evaluation Data/Southern Methodist University - 2023-2024 Undergraduate Catalog from About SMU to Right to Know.pdf',
             '../Data/Evaluation Data/Important University Resources from SMU Student Handbook 23-24.pdf',
             '../Data/Evaluation Data/Important SMU Numbers and Websites.pdf'
             ]

# Function to load PDFs using LangChain's PyPDFLoader
def load_pdfs_with_langchain(pdf_paths):
    documents = []
    for path in pdf_paths:
        try:
            # Use LangChain's PyPDFLoader to load the PDF
            loader = PyPDFLoader(path)
            # Load and pase the PDF into document instances
            pdf_doc = loader.load()
            # Insert the parsed PDF documents into the documents list
            documents.extend(pdf_doc)
        except Exception as e:
            print(f"Error loading {path}: {e}")
    return documents

# Load PDF documents using the function
evaluation_pdf_docs = load_pdfs_with_langchain(pdf_paths)

print(len(evaluation_pdf_docs))
print(evaluation_pdf_docs[0].page_content[0:100])
print(evaluation_pdf_docs[7].metadata)

93
14 
 About SMU  
The Vision of Southern Methodist University  
To create and impart knowledge that w
{'source': '../Data/Evaluation Data/Southern Methodist University - 2023-2024 Undergraduate Catalog from About SMU to Right to Know.pdf', 'page': 7}


In [4]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100, 
                                                length_function=len, add_start_index=True)  

# Split the PDF documents into chunks using the text splitter
split_evaluation_pdf_docs = text_splitter.split_documents(evaluation_pdf_docs)
print(len(split_evaluation_pdf_docs))
print(split_evaluation_pdf_docs[0].page_content)
print(split_evaluation_pdf_docs[1].page_content)


395
14 
 About SMU  
The Vision of Southern Methodist University  
To create and impart knowledge that will shape citizens who contribute to their communities and lead their 
professions in a global society.  
The Mission of Southern Methodist University  
Southern Methodist University will create, expand and impart knowledge through teaching, research and service, 
shaping world changers who contribute to their communities and excel in their professions in a global society. 
Among its faculty, students and st aff, the University will cultivate principled thought, develop intellectual skills and 
promote an environment emphasizing individual dignity and worth. SMU affirms its historical commitment to academic freedom and open inquiry, to moral and ethical values,  and to its United Methodist heritage.  
Southern Methodist University  
As a private, comprehensive university enriched by its United Methodist heritage and its partnership with the Dallas
Metroplex, Southern Methodist Univer

In [5]:
from ragas.testset.generator import TestsetGenerator
from ragas.testset.evolutions import simple, reasoning, multi_context
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

#generator with openai models
generator_llm = ChatOpenAI(model="gpt-3.5-turbo")
critic_llm= ChatOpenAI(model="gpt-3.5-turbo")
embeddings = OpenAIEmbeddings()

generator = TestsetGenerator.from_langchain(
     generator_llm=generator_llm,
    critic_llm=critic_llm,
    embeddings=embeddings, 
)


In [None]:

# Generate testset
testset_1 = generator.generate_with_langchain_docs(split_evaluation_pdf_docs, test_size=10, distributions = {simple: 0.5, reasoning: 0.3, multi_context: 0.2})

In [8]:
test_df_1 = testset_1.to_pandas()
test_df_1.to_csv('../Data/Evaluation Data/ragas_testset_1.csv', index=False)

In [None]:
# Generate testset
testset_2 = generator.generate_with_langchain_docs(split_evaluation_pdf_docs, test_size=10, distributions = {simple: 0.3, reasoning: 0.5, multi_context: 0.2})

In [7]:

test_df_2 = testset_2.to_pandas()
test_df_2.to_csv('../Data/Evaluation Data/ragas_testset_2.csv', index=False)

In [1]:
import pandas as pd

csv_paths = ['../Data/Evaluation Data/ragas_testset_1.csv', '../Data/Evaluation Data/ragas_testset_2.csv']

# Read each CSV file and concatenate them into one dataframe
combined_testset = pd.concat([pd.read_csv(path) for path in csv_paths])

combined_testset

Unnamed: 0,question,contexts,ground_truth,evolution_type,metadata,episode_done
0,What are the primary considerations for merit ...,['• High academic accomplishments or achieveme...,High academic accomplishments or achievement i...,simple,[{'source': '../Data/Evaluation Data/Southern ...,True
1,Why is it important for students to obtain pre...,"[""may change the designation of the courses. S...",It is important for students to obtain pre-app...,simple,[{'source': '../Data/Evaluation Data/Southern ...,True
2,What information is Southern Methodist Univers...,['97 \n Right to Know \nSouthern Methodist Un...,Southern Methodist University is required to p...,simple,[{'source': '../Data/Evaluation Data/Southern ...,True
3,How does the Critical Reasoning component of t...,['comprises approximately one -third of the ba...,The Critical Reasoning component of the baccal...,simple,[{'source': '../Data/Evaluation Data/Southern ...,True
4,What is the purpose of the entrance examinatio...,['23 \n Music. All entering undergraduate stu...,The purpose of the entrance examinations in wr...,simple,[{'source': '../Data/Evaluation Data/Southern ...,True
5,How does the Common Curriculum meet the LAI gr...,"['Analysis and Interpretation (LAI), Philosoph...",The answer to given question is not present in...,reasoning,[{'source': '../Data/Evaluation Data/Southern ...,True
6,What exception applies to credit hours enrollm...,['completion of the degree. \nLyle School of ...,During the term in which a student is to gradu...,reasoning,[{'source': '../Data/Evaluation Data/Southern ...,True
7,What pre-term programs assist new and readmitt...,['instructions at https://www. smu.edu/Enrollm...,To assist new and readmitted students in makin...,multi_context,[{'source': '../Data/Evaluation Data/Southern ...,True
8,What are the payment terms for room charges at...,['deposit. These fees are nonrefundable. Notif...,Room charges for the fall term are billed and ...,multi_context,[{'source': '../Data/Evaluation Data/Southern ...,True
0,What information is provided by Southern Metho...,['97 \n Right to Know \nSouthern Methodist Un...,Southern Methodist University provides informa...,simple,[{'source': '../Data/Evaluation Data/Southern ...,True


In [7]:
combined_testset.to_csv('../Data/Evaluation Data/combined_ragas_testset.csv', index=False)

In [2]:

test_question_type = combined_testset["evolution_type"].tolist()
test_questions = combined_testset["question"].tolist()
test_contexts = combined_testset["contexts"].tolist()
test_ground_truths = combined_testset["ground_truth"].tolist()

In [3]:
from datasets import Dataset

def run_chain_over_dataset(chain):
    answers = []
    contexts = []

    for question in test_questions:
        response = chain.invoke({"question": question})
        answers.append(response["output"])

        context = response["context"]
        contexts.append([doc.page_content for doc in context])

    response_dataset = Dataset.from_dict({
        "question_type": test_question_type,
        "question": test_questions,
        "ground_truths": test_ground_truths,
        "answer": answers, 
        "contexts": contexts
        })
    return response_dataset

In [5]:
from OG_PerunaBot_chain import Original_PerunaBot_eval_chain
from chain_0 import base_retriever_eval_chain_0
from chain_1 import parent_retriever_eval_chain_1
from chain_2 import ensemble_retriever_eval_chain_2

In [8]:
#langsmith
import os
from dotenv import load_dotenv, find_dotenv

# Load environment variables from the .env file using 'from dotenv import find_dotenv, load_dotenv'
load_dotenv(find_dotenv(filename='SURF-Project_Optimizing-PerunaBot/setup/.env'))

from langsmith import Client
import os
from langchain_openai import ChatOpenAI

langsmith_api_key = os.environ["LANGSMITH_API_KEY"]
langchain_endpoint = os.environ["LANGCHAIN_ENDPOINT"]
langsmith_project = os.environ["LANGCHAIN_PROJECT"]
os.environ["LANGCHAIN_TRACING_V2"]

# Initialize LangSmith Client using 'from langsmith import Client'
langsmith_client = Client(api_url=langchain_endpoint, api_key=langsmith_api_key)

llm = ChatOpenAI(model="gpt-3.5-turbo")
llm.invoke("Hello, world!")


true


In [9]:
OG_PerunaBot_eval = run_chain_over_dataset(Original_PerunaBot_eval_chain)
OG_PerunaBot_eval[0]

{'question_type': 'simple',
 'question': 'What are the primary considerations for merit scholarships and need-based financial aid for National Merit finalists at SMU?',
 'ground_truth': 'High academic accomplishments or achievement in the arts, National Merit finalists, International Baccalaureate Diploma recipients, dependent children and spouses of ordained United Methodist ministers engaged in full-time, church-related vocations, and Texas residents are the primary considerations for merit scholarships and need-based financial aid for National Merit finalists at SMU.',
 'chain answer': 'The primary considerations for merit scholarships and need-based financial aid for National Merit finalists at SMU include completing the SMU Application for Admission by January 15, submitting the Free Application for Federal Student Aid (FAFSA) at www.fafsa.ed.gov, and the CSS/Financial Aid Profile at www.collegeboard.org. SMU also offers merit-based scholarships and grant programs to students who 

In [10]:
PerunaBot_0_eval = run_chain_over_dataset(base_retriever_eval_chain_0)
PerunaBot_0_eval[0]

{'question_type': 'simple',
 'question': 'What are the primary considerations for merit scholarships and need-based financial aid for National Merit finalists at SMU?',
 'ground_truth': 'High academic accomplishments or achievement in the arts, National Merit finalists, International Baccalaureate Diploma recipients, dependent children and spouses of ordained United Methodist ministers engaged in full-time, church-related vocations, and Texas residents are the primary considerations for merit scholarships and need-based financial aid for National Merit finalists at SMU.',
 'chain answer': 'National Merit finalists at SMU are considered for a range of merit scholarships and need-based financial aid. To receive primary consideration for all SMU merit scholarships and other aid, students should:\n\n1. Complete the SMU Application for Admission by January 15.\n2. Submit the Free Application for Federal Student Aid (FAFSA) at [www.fafsa.ed.gov](http://www.fafsa.ed.gov) and the CSS/Financial

In [11]:
PerunaBot_1_eval = run_chain_over_dataset(parent_retriever_eval_chain_1)
PerunaBot_1_eval[0]

{'question_type': 'simple',
 'question': 'What are the primary considerations for merit scholarships and need-based financial aid for National Merit finalists at SMU?',
 'ground_truth': 'High academic accomplishments or achievement in the arts, National Merit finalists, International Baccalaureate Diploma recipients, dependent children and spouses of ordained United Methodist ministers engaged in full-time, church-related vocations, and Texas residents are the primary considerations for merit scholarships and need-based financial aid for National Merit finalists at SMU.',
 'chain answer': 'National Merit finalists at SMU are considered for merit-based scholarships and need-based financial aid based on several criteria. For merit scholarships, SMU evaluates high academic accomplishments or achievements in the arts, and specifically recognizes National Merit finalists. To receive primary consideration for all SMU merit scholarships and other aid, students should:\n\n1. Complete the SMU A

In [12]:
PerunaBot_2_eval = run_chain_over_dataset(ensemble_retriever_eval_chain_2)
PerunaBot_2_eval[0]

{'question_type': 'simple',
 'question': 'What are the primary considerations for merit scholarships and need-based financial aid for National Merit finalists at SMU?',
 'ground_truth': 'High academic accomplishments or achievement in the arts, National Merit finalists, International Baccalaureate Diploma recipients, dependent children and spouses of ordained United Methodist ministers engaged in full-time, church-related vocations, and Texas residents are the primary considerations for merit scholarships and need-based financial aid for National Merit finalists at SMU.',
 'chain answer': 'Primary consideration for merit scholarships and need-based financial aid at SMU for National Merit finalists includes the following criteria:\n\n1. **Complete the SMU Admission Application**: Submit the application along with all supporting materials by January 15.\n2. **File the FAFSA and CSSProfile**: For Early Decision/Early Action, these should be filed by November 1. For Regular Admission, they

In [54]:
from ragas import evaluate
from ragas.metrics import (
    Faithfulness,
    AnswerRelevancy,
    AnswerCorrectness,
    ContextRecall,
    ContextPrecision,
    faithfulness,
    answer_relevancy,
    answer_correctness,
    context_recall,
    context_precision
)

metrics = [
    Faithfulness,
    AnswerRelevancy,
    AnswerCorrectness,
    ContextRecall,
    ContextPrecision
]

metrics = [
    faithfulness,
    answer_relevancy,
    answer_correctness,
    context_recall,
    context_precision
]

In [39]:
OG_PerunaBot_eval_results = evaluate(OG_PerunaBot_eval, metrics)

Evaluating:   0%|          | 0/80 [00:00<?, ?it/s]

In [40]:
PerunaBot_0_eval_results = evaluate(PerunaBot_0_eval, metrics)

Evaluating:   0%|          | 0/80 [00:00<?, ?it/s]

In [55]:
PerunaBot_1_eval_results = evaluate(PerunaBot_1_eval, metrics)


Evaluating:   0%|          | 0/80 [00:00<?, ?it/s]

In [57]:
PerunaBot_eval_2_results = evaluate(PerunaBot_2_eval, metrics)

Evaluating:   0%|          | 0/80 [00:00<?, ?it/s]

In [75]:
OG_PerunaBot_eval_results

{'faithfulness': 0.8326, 'answer_relevancy': 0.9247, 'answer_correctness': 0.5257, 'context_recall': 0.7542, 'context_precision': 0.8351}

In [77]:
PerunaBot_0_eval_results

{'faithfulness': 0.7731, 'answer_relevancy': 0.8430, 'answer_correctness': 0.6438, 'context_recall': 0.8155, 'context_precision': 0.7977}

In [79]:
PerunaBot_1_eval_results

{'faithfulness': 0.7268, 'answer_relevancy': 0.8399, 'answer_correctness': 0.6372, 'context_recall': 0.7426, 'context_precision': 0.8038}

In [82]:
PerunaBot_eval_2_results

{'faithfulness': 0.8323, 'answer_relevancy': 0.8491, 'answer_correctness': 0.6134, 'context_recall': 0.6771, 'context_precision': 0.8044}

In [100]:
OG_PerunaBot_eval_results_df = OG_PerunaBot_eval_results.to_pandas()
PerunaBot_0_eval_results_df = PerunaBot_0_eval_results.to_pandas()
PerunaBot_1_eval_results_df = PerunaBot_1_eval_results.to_pandas()
PerunaBot_2_eval_results_df = PerunaBot_eval_2_results.to_pandas()

In [106]:
import pandas as pd

new_df_0 = pd.DataFrame(list(OG_PerunaBot_eval_results.items()), columns=['Metric', 'OG_PerunaBot'])
new_df_1 = pd.DataFrame(list(PerunaBot_0_eval_results.items()), columns=['Metric', 'PerunaBot_0'])
new_df_2 = pd.DataFrame(list(PerunaBot_1_eval_results.items()), columns=['Metric', 'PerunaBot_1'])
new_df_3 = pd.DataFrame(list(PerunaBot_eval_2_results.items()), columns=['Metric', 'PerunaBot_2'])

for df in [new_df_1, new_df_2, new_df_3]:
    df.drop('Metric', axis=1, inplace=True)

total_results_comparison = pd.concat([new_df_0, new_df_1, new_df_2, new_df_3], axis=1)
total_results_comparison

Unnamed: 0,Metric,OG_PerunaBot,PerunaBot_0,PerunaBot_1,PerunaBot_2
0,faithfulness,0.832589,0.773087,0.726772,0.832339
1,answer_relevancy,0.924686,0.842953,0.839893,0.849084
2,answer_correctness,0.525716,0.643834,0.637232,0.613385
3,context_recall,0.754167,0.815476,0.74256,0.677083
4,context_precision,0.835069,0.797671,0.803812,0.804449


In [119]:
# List of dataframes and their names
dataframes = [
    ("Final Results Comparison", total_results_comparison),
    ("OG PerunaBot Evaluation Results", OG_PerunaBot_eval_results_df),
    ("PerunaBot 0 Evaluation Results", PerunaBot_0_eval_results_df),
    ("PerunaBot 1 Evaluation Results", PerunaBot_1_eval_results_df),
    ("PerunaBot 2 Evaluation Results", PerunaBot_2_eval_results_df)
]

# Create the CSV file with utf-8 encoding
with open('../Data/Evaluation Data/1st RAGAS Evaluation 7_25_2024.csv', 'w', encoding='utf-8') as f:
    for name, df in dataframes:
        # Write the name of the dataframe
        f.write(name + "\n")
        # Write the dataframe to csv format
        df.to_csv(f, index=False)
        # Add an empty line after each dataframe if not the last one
        if name != dataframes[-1][0]:
            f.write("\n")