In [1]:
import ragas.metrics as m
import os
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper

load_dotenv()
link = os.getenv('dsa_2214')
token = os.getenv('team_token')
os.environ['OPENAI_API_KEY'] = token

chapters = [
    'Data Structures and Algorithms',
    'Mathematical Preliminaries',
    'Algorithm Analysis',
    'Lists, Stacks, and Queues',
    'Binary Trees',
    'Non-Binary Trees',
    'Internal Sorting',
    'File Processing and External Sorting',
    'Searching',
    'Indexing',
    'Graphs',
    'Lists and Arrays Revisited',
    'Advanced Tree Structures',
    'Analysis Techniques',
    'Lower Bounds',
    'Patterns of Algorithms',
    'Limits to Computation',
]

  from .autonotebook import tqdm as notebook_tqdm


In [7]:
actual_concepts = []
with open('data/sorting.txt', 'r') as f:
    lines = f.readlines()
    for line in lines:
        # print(line.strip())
        words = line.strip().split('->')
        # print(concepts)
        for concept in words:
            if concept not in actual_concepts:
                actual_concepts.append(concept)

actual_concepts = [' '.join(actual_concepts)] * 4
# actual_concepts

In [2]:
from src.extractor import LLM_Relation_Extractor

# only considering chapters 6 - 10 (sorting and searching)
rs_extractor = LLM_Relation_Extractor(link, token, chapters[6:10], 'bibliography')

In [3]:
terms = rs_extractor.identify_key_terms(chapters, 10)

In [4]:
concepts = rs_extractor.identify_concepts()

In [5]:
' '.join(concepts[0])

'1. Comparison of sorting algorithms based on performance metrics such as time complexity and space complexity. 2. Understanding the trade-offs between different sorting algorithms in terms of efficiency and implementation complexity. 3. Analysis of lower bounds for sorting algorithms to determine the best possible efficiency for solving the sorting problem. 4. Empirical comparison of sorting algorithms to determine the fastest algorithm for different input sizes and data distributions. 5. Optimization techniques for sorting algorithms, such as optimizing Quicksort by combining it with Insertion Sort for small arrays. 6. Consideration of key types and their impact on the implementation and efficiency of sorting algorithms, such as Radix Sort for different key types. 7. Theoretical analysis of sorting algorithms in terms of key comparison operations and their impact on algorithm efficiency. 8. Implementation considerations for sorting algorithms, such as using stacks to optimize recursi

In [12]:
print('Results using vector store similarity search...')
eval_llm = LangchainLLMWrapper(langchain_llm = ChatOpenAI(temperature = 0))
eval_embeddings = LangchainEmbeddingsWrapper(OpenAIEmbeddings())

# for i in range(5):
samples = rs_extractor.evaluate(concepts, 
                                actual_concepts,
                                metrics = [m.Faithfulness(), 
                                            # m.LLMContextPrecisionWithReference(), 
                                            m.LLMContextRecall(llm = eval_llm),
                                            m.ContextEntityRecall(),
                                            m.NoiseSensitivity(),
                                            m.ResponseRelevancy(),
                                            m.SemanticSimilarity()])

Results using vector store similarity search...


Evaluating: 100%|██████████| 24/24 [01:25<00:00,  3.56s/it]

{'faithfulness': 0.9487, 'context_recall': 0.9375, 'context_entity_recall': 0.0288, 'noise_sensitivity_relevant': 0.4987, 'answer_relevancy': 0.8501, 'semantic_similarity': 0.8099}





In [3]:
rs_extractor.identify_main_topics()

['1. Introduction to Data Structures and Algorithm Analysis :: This topic is likely to be covered as it sets the foundation for understanding the rest of the textbook.',
 '',
 '2. Algorithm Analysis :: Understanding how to analyze algorithms is crucial for evaluating their efficiency and performance.',
 '',
 '3. Lists, Stacks, and Queues :: These are fundamental data structures that are commonly used in programming and should be covered in a textbook on data structures and algorithms.',
 '',
 '4. Trees :: Trees are another important data structure that is widely used in computer science and programming.',
 '',
 '5. Hash Tables :: Hash tables are essential for efficient data retrieval and storage, making them a key topic in data structures.',
 '',
 '6. Sorting and Searching :: Sorting and searching algorithms are fundamental to computer science and programming, so they are likely to be covered in this textbook.',
 '',
 '7. Graphs and Graph Algorithms :: Graphs are used to model relation

Zero shot flow (new methodology)

1. Extract main concepts from the given resource(s)
2. Extract key terms associated each main concepts 
3. Identify term-term is-a relationship, which build up a terminology
4. Identify non-taxonomic relations between terms
5. Extract learning outcomes associated each main concepts 
6. Extract learning outcomes associated key terms
7. Build a knowledge graph and visualize it

Evaluate step 1, 2, 5, 6

In [4]:
rs_extractor.extract_terminology(terms)

'Based on the provided list of terms and their descriptions, the corresponding terminologies from the textbook "Data Structures And Algorithm Analysis in Java" by Clifford A. Shaffer are as follows:\n\n1. Graphs :: Graphs\n2. Binary Trees :: Binary Trees\n3. Lists, Stacks, and Queues :: Lists, Stacks, and Queues\n4. Algorithm Analysis :: Algorithm Analysis\n5. Searching :: Searching\n6. Internal Sorting :: Sorting\n7. Advanced Tree Structures :: Advanced Tree Structures\n8. File Processing and External Sorting :: File Processing and External Sorting\n9. Mathematical Preliminaries :: Mathematical Preliminaries\n10. Analysis Techniques :: Analysis Techniques\n\nThese terms and concepts are discussed in the textbook and are essential for understanding data structures and algorithm analysis.'

In [13]:
terms

['1. Graphs :: 0.95 :: 0.05 :: Graphs are fundamental data structures used to represent relationships between objects. They are widely used in various applications such as social networks, transportation systems, and computer networks.',
 '2. Binary Trees :: 0.92 :: 0.08 :: Binary trees are hierarchical data structures where each node has at most two children. They are essential for efficient searching and sorting algorithms like binary search trees.',
 '3. Lists, Stacks, and Queues :: 0.90 :: 0.10 :: These are basic data structures used to store and manipulate collections of items. Lists allow flexible insertion and deletion, stacks follow Last In First Out (LIFO) principle, and queues follow First In First Out (FIFO) principle.',
 '4. Algorithm Analysis :: 0.88 :: 0.12 :: Algorithm analysis involves evaluating the efficiency and performance of algorithms in terms of time and space complexity. It helps in comparing different algorithms and selecting the most suitable one for a given p

In [14]:
[item.split(" :: ")[0].split(" ")[1] for item in terms]

['Graphs',
 'Binary',
 'Lists,',
 'Algorithm',
 'Searching',
 'Internal',
 'Advanced',
 'File',
 'Mathematical',
 'Analysis']