In [20]:
import pandas as pd 
import os, warnings
from pathlib import Path
from dotenv import load_dotenv
from IPython.display import display, Markdown

#Local
from config import Config
from data_processing import data_processing
from utils import remove_dirs, check_and_create_dirs
from question_generator import question_generator
from evaluation import plot


#Setting
warnings.filterwarnings("ignore")
# pd.set_option('display.max_colwidth', None)

#CONFIG
load_dotenv()
api_key = os.getenv("PERPLEXITY_API_KEY")

# 1. Initialization

In [21]:
print('INITIALIZING SESSION')
cfg = Config(
    project_name="session_1",# Name of the project
    input_dir="data", # Input directory containing the data files
    api_key= api_key, # API key for Perplexity
    n_questions_per_file=1, #This mean 1 question per file (There are 10 files)
    n_page_summary=3, # Number of pages to summarize
    chunk_size=5000, 
    chunk_overlap=500
)

# remove_dirs(cfg) # This will delete the output directories if they exist
check_and_create_dirs(cfg)


INITIALIZING SESSION
Folder 'session_1' already exists.


In [22]:
print("USER'S RAG MODEL EXAMPLE")
display(Markdown("The user having a FAISS-based retrieval-augmented system that chunks PDFs, embeds them with all-mpnet-base-v2, and queries them using the Sonar LLM for question-answering."))

from user_models.qa import create_unified_chain
qa_chain = create_unified_chain()

USER'S RAG MODEL EXAMPLE


The user having a FAISS-based retrieval-augmented system that chunks PDFs, embeds them with all-mpnet-base-v2, and queries them using the Sonar LLM for question-answering.

Loading existing vector store for model_1...
Vector store loaded from: faiss_index_open
Using standard retriever
QA chain created successfully for model_1


# QUESTION GENERATION

In [23]:
#I QUESTION GENERATION

print('DATA PROCESSING AND CHUNKING ...')
data_processing(api_key, cfg, verbose = False)

print('QUESTION GENERATION ...')
df = question_generator(cfg, verbose = False)
df.to_csv(cfg.question_file, index = False)

print('QUESTION EXAMPLE:')
questions = df['question'].to_list()
display(Markdown(questions[1]))
display(Markdown(questions[2]))

DATA PROCESSING AND CHUNKING ...
QUESTION GENERATION ...
QUESTION EXAMPLE:


What percentage of Cisco's U.S. employee base in fiscal year 2023 was comprised of Asian, African American/Black, and Hispanic/Latino ethnicities?

For Netflix, Inc., during the fiscal year ended December 31, 2024, what categories of employees are exempt from trading window restrictions under the Insider Trading Policy?

# GENERATE ANSWER FROM USER MODEL

(Should take about 7s per questions)

In [24]:
answers = []
for question in df['question']:
    ans = qa_chain.invoke(question)['result'] 
    answers.append(ans)

df['answer'] = answers

# EVALUATION

In [26]:
from evaluation import evaluate_answers, radar_plot, statistics

print('EVALUATION ...')
final_df = evaluate_answers(api_key, df, verbose = False)

EVALUATION ...


KeyError: 'factual_correctness_score'

In [None]:
final_df

Unnamed: 0,question,factual_correctness_score,completeness_score,clarity_score,overall_score
0,What were the key financial accomplishments of...,5,5,5,5.0
1,What percentage of Cisco's U.S. employee base ...,5,5,5,5.0
2,"For Netflix, Inc., during the fiscal year ende...",5,5,5,5.0
3,What were the factors that made auditing Amazo...,4,3,4,3.7
4,Here is a question based strictly on the provi...,3,3,4,3.2
5,How does NVIDIA Corporation's expansion of sup...,4,4,5,4.2
6,"How might incidents related to safety threats,...",3,2,4,2.9
7,"Based on the provided chunk about Reddit, Inc....",5,5,5,5.0
8,What are the key risks and operational challen...,2,2,4,2.4
9,What potential impact could Apple's inability ...,4,4,5,4.2


In [None]:
statistics(final_df)

Unnamed: 0,count,mean,std,min,median,max
factual_correctness_score,10.0,4.2,1.23,2.0,5.0,5.0
completeness_score,10.0,4.0,1.25,2.0,4.5,5.0
clarity_score,10.0,4.7,0.67,3.0,5.0,5.0
overall_score,10.0,4.24,1.09,2.2,4.85,5.0


In [None]:
radar_plot(final_df)

In [None]:
overall_histogram(final_df)