In [1]:
import ragas.metrics as m
import os
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper

load_dotenv()
link = os.getenv('dsa_2214')
token = os.getenv('team_token')
os.environ['OPENAI_API_KEY'] = token

chapters = [
    'Data Structures and Algorithms',
    'Mathematical Preliminaries',
    'Algorithm Analysis',
    'Lists, Stacks, and Queues',
    'Binary Trees',
    'Non-Binary Trees',
    'Internal Sorting',
    'File Processing and External Sorting',
    'Searching',
    'Indexing',
    'Graphs',
    'Lists and Arrays Revisited',
    'Advanced Tree Structures',
    'Analysis Techniques',
    'Lower Bounds',
    'Patterns of Algorithms',
    'Limits to Computation',
]

  from .autonotebook import tqdm as notebook_tqdm


In [7]:
actual_concepts = []
with open('data/sorting.txt', 'r') as f:
    lines = f.readlines()
    for line in lines:
        # print(line.strip())
        words = line.strip().split('->')
        # print(concepts)
        for concept in words:
            if concept not in actual_concepts:
                actual_concepts.append(concept)

actual_concepts = [' '.join(actual_concepts)] * 4
# actual_concepts

In [2]:
from src.extractor import LLM_Relation_Extractor

# only considering chapters 6 - 10 (sorting and searching)
rs_extractor = LLM_Relation_Extractor(link, token, chapters[6:10], 'bibliography')



In [4]:
concepts = rs_extractor.identify_concepts()

In [12]:
print('Results using vector store similarity search...')
eval_llm = LangchainLLMWrapper(langchain_llm = ChatOpenAI(temperature = 0))
eval_embeddings = LangchainEmbeddingsWrapper(OpenAIEmbeddings())

# for i in range(5):
samples = rs_extractor.evaluate(concepts, 
                                actual_concepts,
                                metrics = [m.Faithfulness(), 
                                            # m.LLMContextPrecisionWithReference(), 
                                            m.LLMContextRecall(llm = eval_llm),
                                            m.ContextEntityRecall(),
                                            m.NoiseSensitivity(),
                                            m.ResponseRelevancy(),
                                            m.SemanticSimilarity()])

Results using vector store similarity search...


Evaluating: 100%|██████████| 24/24 [01:25<00:00,  3.56s/it]

{'faithfulness': 0.9487, 'context_recall': 0.9375, 'context_entity_recall': 0.0288, 'noise_sensitivity_relevant': 0.4987, 'answer_relevancy': 0.8501, 'semantic_similarity': 0.8099}



