In [4]:
from langchain.llms import OpenAI
import os
from dotenv import load_dotenv
import matplotlib.pyplot as plt
import graphviz
from IPython.display import display, Image 
import re

In [5]:
import warnings
warnings.filterwarnings("ignore")

In [6]:
load_dotenv()

# You can replace os.getenv('team_token') with a string containing the token, I just did it this way for more security
os.environ['OPENAI_API_KEY'] = os.getenv('team_token')

In [7]:
llm = OpenAI()
dsa_2214 = os.getenv('dsa_2214')

# Try out class as well
from relation_algorithms.relation_extraction_functions import LLM_Relation_Extractor
extractor = LLM_Relation_Extractor(link = dsa_2214, token = os.getenv('team_token'))

chapters = {
    'Chapter 1': 'Data Structures and Algorithms',
    'Chapter 2': 'Mathematical Preliminaries',
    'Chapter 3': 'Algorithm Analysis',
    'Chapter 4': 'Lists, Stacks, and Queues',
    'Chapter 5': 'Binary Trees',
    'Chapter 6': 'Non-Binary Trees',
    'Chapter 7': 'Internal Sorting',
    'Chapter 8': 'File Processing and External Sorting',
    'Chapter 9': 'Searching',
    'Chapter 10': 'Indexing',
    'Chapter 11': 'Graphs',
    'Chapter 12': 'Lists and Arrays Revisited',
    'Chapter 13': 'Advanced Tree Structures',
    'Chapter 14': 'Analysis Techniques',
    'Chapter 15': 'Lower Bounds',
    'Chapter 16': 'Patterns of Algorithms',
    'Chapter 17': 'Limits to Computation'
}

In [8]:

# Identify learning concepts and outcomes using llm
learning_concepts_list = []
learning_outcomes_list = []

# Iterate through chapters list, use chapter name and id to retrieve the concept from the textbook
for chapter, chapter_name in chapters.items():
    learning_concept = llm(f"Please identify the main learning concepts given {chapter}, the chapter name is {chapter_name}. Here is the textbook in which to retrieve them: {dsa_2214}")
    # Replace any characters that are not a letter, comma, period, exclamation, or question mark with an empty string
    learning_concept = re.sub(re.compile('[^a-zA-Z\s\.,!?]'), '', learning_concept)
    # Append concept to the learning concepts list 
    learning_concepts_list.append(learning_concept)

    learning_outcome = llm(f"Please identify the main learning outcomes given {chapter}, the chapter name is {chapter_name}. Here is the textbook in which to retrieve them: {dsa_2214}")
    learning_outcome = re.sub(re.compile('[^a-zA-Z\s\.,!?]'), '', learning_outcome)
    learning_outcomes_list.append(learning_outcome)

In [9]:
common_concepts_dict = {}
# common_outcomes_dict = {}

# Initialize each chapter name to a key in the dictiontary, and a tuple of two lists as the value.
# The first list will serve for the first run and the second list for the second run
for name in chapters.values():
    common_concepts_dict[name] = ([], [])
    # common_outcomes_dict[name] = ([], [])

# Run 5 times and compare results, do this twice 
for i in range(2):
    for j in range(5):
        for chapter, chapter_name in chapters.items():
            # Ask llm for learning concepts for the current chapter, replace any characters that are not letters or punctation with an empty string
            learning_concept = llm(f"Please identify the main learning concepts given {chapter}, the chapter name is {chapter_name}. Here is the textbook in which to retrieve them: {dsa_2214}. Please limit your answer to 20 concepts and provide them in a list format.")
            learning_concept = re.sub(re.compile('[^a-zA-Z\s\.,!?]'), '', learning_concept)

            # Append learning concept to the current list (1st or 2nd) depending on the value of i.
            # [:2] removes the two empty strings at the front of the list, due to the new line characters
            # Only append learning concepts, ignore empty string ('') characters 
            common_concepts_dict[chapter_name][i].append([concept for concept in learning_concept.split('\n')[2:] if concept != ''])
    
            # learning_outcome = llm(f"Please identify the main learning outcomes given {chapter}, the chapter name is {chapter_name}. Here is the textbook in which to retrieve them: {dsa_2214}. Please limit your answer to 20 outcomes and provide them in a list format.")
            # learning_outcome = re.sub(re.compile('[^a-zA-Z\s\.,!?]'), '', learning_outcome)
            # common_outcomes_dict[chapter_name][i].append([outcome for outcome in learning_outcome.split('\n')[2:] if outcome != ''])


In [10]:
in_common_concepts = {}
# in_common_outcomes = {}

# Data structure 
for key in chapters.values():
    in_common_concepts[key] = []
    # in_common_outcomes[key] = []

first_key = list(common_concepts_dict.keys())[0]

# Run twice for both lists, pass in all of the lists holding the concepts for that chapter to the llm and ask whats in common. 
# Append as a list, so each value with a list of two lists: [[], []]
for idx in range(len(common_concepts_dict[first_key])):
    for chapter_name, chapter_concepts in common_concepts_dict.items():
        content = llm(f"Please identify the common concepts between these lists of concepts for chapter {chapter_name}? {chapter_concepts[idx][0]}, {chapter_concepts[idx][1]}, {chapter_concepts[idx][2]}, {chapter_concepts[idx][3]}, {chapter_concepts[idx][4]}? Please limit your response to 20 concepts in a list format.")
        in_common_concepts[chapter_name].append(content.split('\n')[2:])
    
    # for chapter_name, chapter_outcomes in common_outcomes_dict.items():
    #     content = llm(f"Please identify the common learning outcomes between these lists for chapter {chapter_name}? {chapter_outcomes[idx][0]}, {chapter_outcomes[idx][1]}, {chapter_outcomes[idx][2]}, {chapter_outcomes[idx][3]}, {chapter_outcomes[idx][4]}? Please limit your response to 20 outcomes in a list format.")
    #     in_common_outcomes[chapter_name].append(content.split('\n')[2:])


In [11]:
final_common_concept_dict = {}
final_common_outcome_dict = {}

# Iterate through the chapters, compare the two lists, and produce a final result from the comparsion. 
# Top 9 concepts is used to try and match the length of the true values list for evaluation 
for key in in_common_concepts.keys():
    content = llm(f"Please identify the common concepts between these two lists: {in_common_concepts[key][0]}, {in_common_concepts[key][1]}. Please only include the top NINE most important concepts.")
    final_common_concept_dict[key] = [concept for concept in content.split('\n')[2:] if concept.strip() != '']

    # content = llm(f"Please identify the common learning outcomes between these two lists: {in_common_outcomes[key][0]}, {in_common_outcomes[key][1]}")
    # final_common_outcome_dict[key] = [outcome for outcome in content.split('\n')[2:] if outcome.strip() != '']


In [None]:
import warnings
warnings.filterwarnings('ignore')

keys = list(final_common_concept_dict.keys())
dependencies = {}

for key in keys:
    dependencies[key] = []

# Iterate through the set of keys (chapters)
for i in range(len(keys)):
    # Grab current concepts list from the current chapter
    current_concept = final_common_concept_dict[keys[i]]
    for j in range(i + 1, len(keys)):
        # Grab next concepts list from the next chapter, compare the two and see if the second has a prerequisite for the first.
        # If there is a prerequisite, append the first chapter name to the second chapter list of dependencies
        next_concept = final_common_concept_dict[keys[j]]
        content = llm(f"Please identify if this list of concepts: {next_concept} has a prerequisite for this list of concepts: {current_concept}. If there is NO prerequisite, please respond with 'No' and 'No' only.")

        if content.split(',')[0].strip() != 'No':
            dependencies[keys[j]].append(keys[i])
            

In [26]:
from ragas import evaluate, SingleTurnSample
from ragas.metrics import AnswerCorrectness, SemanticSimilarity, FactualCorrectness
from ragas.dataset_schema import EvaluationDataset

true_values = []
with open('S2_2214_ontology_knowledge_terms.txt', 'r') as data:
    for line in data.readlines():
        line = [word.strip() for word in line.split('->')]
        for word in line:
            if word not in true_values:
                true_values.append(word)
            

predictions = []
for concept_list in list(final_common_concept_dict.values()):
    for concept in concept_list:
        concept = re.sub(r'^\d+\.\s*', '', concept)
        if concept not in predictions:
            predictions.append(concept)

In [29]:
from relation_algorithms.relation_extraction_functions import LLM_Relation_Extractor
# extractor.validate(final_common_concept_dict)


# from sklearn.metrics import precision_score, recall_score, f1_score
import random

# Finish metric scores 
if len(predictions) > len(true_values):
    random_indices = random.choices(range(len(predictions)), k = len(predictions) - len(true_values))
    predictions = [predictions[i] for i in range(len(predictions)) if i not in random_indices]
else:
    random_indices = random.choices(range(len(predictions)), k = len(true_values) - len(predictions))
    predictions = [predictions[i] for i in range(len(predictions)) if i not in random_indices]


samples = []
for true_v, pred in zip(true_values, predictions):
    samples.append(SingleTurnSample(
        user_input = f'Please identify the main learning concepts given {chapter}, the chapter name is {chapter_name}.'
        
    ))



# I have no idea why the scores are 0 and how to fix it :(

# print(f'Precision Score: {precision_score(true_values, predictions, average = "micro")}')
# print(f'Recall Score: {recall_score(true_values, predictions, average = "micro")}')
# print(f'F1 Score: {f1_score(true_values, predictions, average = "micro")}')

In [None]:
import graphlib
# Topologically sort dependencies graph before drawing the hypergraph
sorted_dependencies = graphlib.TopologicalSorter(graph = dependencies)
sorted_dependencies = tuple(sorted_dependencies.static_order())
print(sorted_dependencies)

In [None]:
from hypernetx import draw, Hypergraph

# Create new dictionary from the topogically sorted list of chapters 
temp = sorted_dependencies
sorted_dependencies = {}

for value in temp:
    sorted_dependencies[value] = dependencies[value]

print(sorted_dependencies)

# Plot hypergraph 
draw(Hypergraph(sorted_dependencies))
plt.title('Hypergraph')
plt.show()

In [None]:
from py3plex.core import multinet

# Create multi layer network
multi_graph = multinet.multi_layer_network(network_type = "multiplex")

# Place nodes and edges into graph, nodes will be the chapter names and their edges will be the chapter names theyre related to
for node, edges in sorted_dependencies.items():
    node_data = {"source": node, "type": node}
    multi_graph.add_nodes(node_data)
    for edge in edges:
        simple_edge = {
                "source": node,
                "target": edge,
                "source_type": node,
                "target_type": edge
                }
        
        multi_graph.add_edges(simple_edge, input_type = "dict") 

# Plot multigraph 
multi_graph.visualize_network(style = "diagonal")
plt.title("Multilayered Dependency Graph")
plt.show()

In [None]:
# Create learning concept graph association structure
learning_concept_graph = {}
for idx, name in enumerate(chapters.values()):
    learning_concept_graph[name] = (learning_concepts_list[idx], learning_outcomes_list[idx])

# Dictionary structure now:
# The key is the chapter title, and the value is a tuple containing the learning concept(s) at index 0, and outcome(s) at index 1
learning_concept_graph

In [None]:
association_dict = {}

for name in chapters.values():
    association_dict[name] = []

# Build associations algorithm - this never seemed to work that well for some reason 
for i in range(len(list(learning_concept_graph.values()))):
    current_tuple = list(learning_concept_graph.values())[i]
    for j in range(i + 1, len(list(learning_concept_graph.values()))):
        next_tuple = list(learning_concept_graph.values())[j]

        # Ask llm if there is an association between the two concepts 
        new_association = llm(f"Please identify if there is an association between this concept: {current_tuple[0]}, and this other concept: {next_tuple[0]}. If there is NO association, please start your response with 'No' and 'No' only.")
        new_association = re.sub(re.compile('[^a-zA-Z\s\.,!?]'), '', new_association)
        # Try to only add associations to the graph, but its difficult because sometimes the LLM won't start its response with 'No'
        if new_association.split(',')[0].strip() != 'No':
            association_dict[list(learning_concept_graph.keys())[i]].append(list(learning_concept_graph.keys())[j])


In [None]:
association_dict

In [None]:
# Plot directed graph from learning concept associations 
graph = graphviz.Digraph(name = "Learning Concept Associations")

for key, values in association_dict.items():
    graph.node(name = key)
    for value in values:
        graph.edge(key, value)

display(Image(graph.pipe(format = "png", renderer = "cairo")))

In [None]:
from pyvis.network import Network

graph = Network(notebook = True, cdn_resources = "remote")

graph.toggle_physics(False)

# Showing all interactivity options, but can be parameterized to only include some
graph.show_buttons()

node_id_dict = {}
count = 1
for name in list(learning_concept_graph.keys()):
    node_id_dict[name] = count
    count += 1

for chapter_name, chapter_id in node_id_dict.items():
    graph.add_node(n_id = chapter_id, label = chapter_name, title = "Main Learning Concepts: " + learning_concept_graph[chapter_name][0] + "\n" + "Main Learning Outcomes:" + learning_concept_graph[chapter_name][1])

for key, values in association_dict.items():
    for value in values:
        graph.add_edge(node_id_dict[key], node_id_dict[value])

graph.show(name = "interactive.html")

In [None]:
# Ask llm to identify ten main topics from the textbook and print them
topics = llm(f"Can you give 10 main topics from this textbook? {dsa_2214}")
topics = topics.split('\n')
for topic in topics:
    print(topic)

In [None]:
main_concept_graph = {}

for topic in topics[2:]:
    main_concept_graph[topic] = []
main_concept_graph.keys()

In [None]:
# Ask llm for the associations between the main topics, 
# if there is an association add the associated chapter to the others list of associations in the dictionary:
# chapter_name : [chapter_name 1, chapter_name 2, ..., chapter_name n ]

for i in range(2, len(topics)):
    for j in range(2, len(topics)):
        if i != j:
            association = llm(f'Are there any associations between {topics[i]} and {topics[j]}? If there are NOT, please respond with "No" and "No" only.')
            association = re.sub(re.compile('[^a-zA-Z\s\.,!?]',), '', association)
            if association.split()[0] != 'No':
                main_concept_graph[topics[i]].append(topics[j])
main_concept_graph

In [None]:
# Plot directed graph of topic associations
main_graph = graphviz.Digraph(name = "Main Concept Graph")

for key in main_concept_graph.keys():
    main_graph.node(name = key)

for key, values in main_concept_graph.items():
    for value in values:
        main_graph.edge(key, value)

display(Image(main_graph.pipe(format = "png", renderer = "cairo")))