In [None]:
! pip install openai langchain
! pip install llama_index llama-index-embeddings-huggingface

In [None]:
from IPython.display import HTML, display

def set_css():
  display(HTML('''
  <style>
    pre {
        white-space: pre-wrap;
    }
  </style>
  '''))
get_ipython().events.register('pre_run_cell', set_css)

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
from google.colab import userdata

os.environ["OPENAI_API_KEY"] = userdata.get('OPENAI_API_KEY')
os.environ["HUGGINGFACE_API_KEY"] = userdata.get('HF_TOKEN')

def get_openai_api_key(): return os.getenv("OPENAI_API_KEY")
def get_hf_api_key(): return os.getenv("HUGGINGFACE_API_KEY")

In [None]:
data_path = 'drive/My Drive/DATA/advanced_RAG/'

In [None]:
from llama_index.core import SimpleDirectoryReader

input_files = [data_path + "eBook-How-to-Build-a-Career-in-AI.pdf"]
documents = SimpleDirectoryReader(input_files=input_files).load_data()

In [None]:
print(type(documents), "\n")
print(len(documents), "\n")
print(type(documents[0]))
print(documents[0])

<class 'list'> 

41 

<class 'llama_index.core.schema.Document'>
Doc ID: 344fd3c2-18ac-43cf-8ff1-c05dd253369f
Text: PAGE 1Founder, DeepLearning.AICollected Insights from Andrew Ng
How to  Build Your Career in AIA Simple Guide


## COMBINE ALL CHAPTER INTO A SINGLE DOCUMENT:
This makes it more amenable for text blending while during retreival:

In [None]:
from llama_index.core import Document

document = Document(text="\n\n".join([doc.text for doc in documents]))

# The HierarchialNodeParser:

In [None]:
from llama_index.core.node_parser import HierarchicalNodeParser

# create the hierarchical node parser w/ default settings
chunk_sizes = [2048, 512, 128]
node_parser = HierarchicalNodeParser.from_defaults(chunk_sizes=chunk_sizes)

nodes = node_parser.get_nodes_from_documents([document]) # returns all nodes (root, intermediate & leaf)

In [None]:
len(nodes)

144

In [None]:
from llama_index.core.node_parser import get_leaf_nodes, get_root_nodes

leaf_nodes = get_leaf_nodes(nodes)
root_nodes = get_root_nodes(nodes)

len(leaf_nodes), len(root_nodes)

(112, 6)

In [None]:
print(leaf_nodes[30].text)

But this became less important as numerical linear algebra libraries matured.
Deep learning is still an emerging technology, so when you train a neural network and the 
optimization algorithm struggles to converge, understanding the math behind gradient 
descent, momentum, and the Adam  optimization algorithm will help you make better decisions. 
Similarly, if your neural network does something funny — say, it makes bad predictions on 
images of a certain resolution, but not others — understanding the math behind neural network 
architectures puts you in a better position to figure out what to do.
Of course, I also encourage learning driven by curiosity.


In [None]:
nodes_by_id = {node.node_id: node for node in nodes}

parent_node = nodes_by_id[leaf_nodes[30].parent_node.node_id]
print(parent_node.text)

PAGE 12Should You 
Learn Math to 
Get a Job in AI? CHAPTER 3
LEARNING

PAGE 13Should you Learn Math to Get a Job in AI? CHAPTER 3
Is math a foundational skill for AI? It’s always nice to know more math! But there’s so much to 
learn that, realistically, it’s necessary to prioritize. Here’s how you might go about strengthening 
your math background.
To figure out what’s important to know, I find it useful to ask what you need to know to make 
the decisions required for the work you want to do. At DeepLearning.AI, we frequently ask, 
“What does someone need to know to accomplish their goals?” The goal might be building a 
machine learning model, architecting a system, or passing a job interview.
Understanding the math behind algorithms you use is often helpful, since it enables you to 
debug them. But the depth of knowledge that’s useful changes over time. As machine learning 
techniques mature and become more reliable and turnkey, they require less debugging, and a 
shallower understand

In [None]:
parent_node.child_nodes

[RelatedNodeInfo(node_id='3d5941f8-2feb-4097-8003-adb5efe8926a', node_type=<ObjectType.TEXT: '1'>, metadata={}, hash='0b055b41cd232263958cb198e3e546c1cd576221d83ac18f918f053a9763fc4e'),
 RelatedNodeInfo(node_id='bbee0bdf-e103-4d80-ad32-0a1e46de2f9c', node_type=<ObjectType.TEXT: '1'>, metadata={}, hash='43f12cfb773774246e3b4cb4763280b0eadf866dc0be37c4f6ccf42b0dbc0632'),
 RelatedNodeInfo(node_id='48364374-6911-4389-846b-bce4d47edf14', node_type=<ObjectType.TEXT: '1'>, metadata={}, hash='1c0d8b1af9f2cf385841b8732d6f33b8002f3b33a9f30fa08392d58e0f59b3b8'),
 RelatedNodeInfo(node_id='a3cb8a4f-f9a3-4250-b0e3-4118f2d67bd2', node_type=<ObjectType.TEXT: '1'>, metadata={}, hash='c8fbfe10a099b84f259c3f554384b093c527edd01d1057c3048324db73adf8bd'),
 RelatedNodeInfo(node_id='cc161721-894d-47dd-9686-4504a2d4f923', node_type=<ObjectType.TEXT: '1'>, metadata={}, hash='9d48d41905bd495d1bbf0d5367c9020e14e8ff4aa4f7f91a701468e78706c961')]

In [None]:
for child_node in parent_node.child_nodes:
    print(nodes_by_id[child_node.node_id])
    print()
    print('----------')

Node ID: 3d5941f8-2feb-4097-8003-adb5efe8926a
Text: PAGE 12Should You  Learn Math to  Get a Job in AI? CHAPTER 3
LEARNING  PAGE 13Should you Learn Math to Get a Job in AI? CHAPTER 3
Is math a foundational skill for AI? It’s always nice to know more
math! But there’s so much to  learn that, realistically, it’s
necessary to prioritize. Here’s how you might go about strengthening
your math background.

----------
Node ID: bbee0bdf-e103-4d80-ad32-0a1e46de2f9c
Text: Here’s how you might go about strengthening  your math
background. To figure out what’s important to know, I find it useful
to ask what you need to know to make  the decisions required for the
work you want to do. At DeepLearning.AI, we frequently ask,  “What
does someone need to know to accomplish their goals?” The goal might
be building a  mac...

----------
Node ID: 48364374-6911-4389-846b-bce4d47edf14
Text: But the depth of knowledge that’s useful changes over time. As
machine learning  techniques mature and become more reli

### Building the index

In [None]:
from llama_index.llms.openai import OpenAI
from llama_index.core import ServiceContext

llm = OpenAI(model="gpt-3.5-turbo", temperature=0.1)

auto_merging_context = ServiceContext.from_defaults(
    llm=llm,
    embed_model="local:BAAI/bge-small-en-v1.5",
    node_parser=node_parser,
)

  auto_merging_context = ServiceContext.from_defaults(


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/94.8k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/133M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [None]:
from llama_index.core import VectorStoreIndex, StorageContext


storage_context = StorageContext.from_defaults()
storage_context.docstore.add_documents(nodes)

automerging_index = VectorStoreIndex(
    leaf_nodes,
    storage_context=storage_context,
    service_context=auto_merging_context
)

automerging_index.storage_context.persist(persist_dir="./merging_index")

In [None]:
# This block of code is optional to check
# if an index file exist, then it will load it
# if not, it will rebuild it

import os
from llama_index.core import load_index_from_storage

if not os.path.exists("./merging_index"):
    storage_context = StorageContext.from_defaults()
    storage_context.docstore.add_documents(nodes)

    automerging_index = VectorStoreIndex(
            leaf_nodes,
            storage_context=storage_context,
            service_context=auto_merging_context
        )

    automerging_index.storage_context.persist(persist_dir="./merging_index")
else:
    automerging_index = load_index_from_storage(
        StorageContext.from_defaults(persist_dir="./merging_index"),
        service_context=auto_merging_context
    )


### Defining the retriever and running the query engine

In [None]:
from llama_index.core.retrievers import AutoMergingRetriever
from llama_index.core.indices.postprocessor import SentenceTransformerRerank
from llama_index.core.query_engine import RetrieverQueryEngine

automerging_retriever = automerging_index.as_retriever(
    similarity_top_k=12
)

retriever = AutoMergingRetriever(
    automerging_retriever,
    automerging_index.storage_context,
    verbose=True
)

rerank = SentenceTransformerRerank(top_n=6, model="BAAI/bge-reranker-base")

auto_merging_engine = RetrieverQueryEngine.from_args(
    automerging_retriever, node_postprocessors=[rerank]
)

In [None]:
auto_merging_response = auto_merging_engine.query(
    "What is the importance of networking in AI?"
)

In [None]:
from llama_index.core.response.notebook_utils import display_response

display_response(auto_merging_response)

**`Final Response:`** Networking in AI is crucial as it allows individuals to build a strong professional community that can provide valuable information, support, and opportunities. By connecting with others in the field, individuals can receive help, advice, and referrals to potential employers. Additionally, networking can help individuals stay updated on the latest trends and developments in AI, foster collaborations, and create a supportive environment for personal and professional growth.

## TruLens Evaluation

In [None]:
from trulens_eval import Tru

Tru().reset_database()

### Two layers

In [None]:
auto_merging_index_0 = build_automerging_index(
    documents,
    llm=OpenAI(model="gpt-3.5-turbo", temperature=0.1),
    embed_model="local:BAAI/bge-small-en-v1.5",
    save_dir="merging_index_0",
    chunk_sizes=[2048,512],
)

In [None]:
auto_merging_engine_0 = get_automerging_query_engine(
    auto_merging_index_0,
    similarity_top_k=12,
    rerank_top_n=6,
)

In [None]:
from utils import get_prebuilt_trulens_recorder

tru_recorder = get_prebuilt_trulens_recorder(
    auto_merging_engine_0,
    app_id ='app_0'
)

In [None]:
eval_questions = []
with open('generated_questions.text', 'r') as file:
    for line in file:
        # Remove newline character and convert to integer
        item = line.strip()
        eval_questions.append(item)

In [None]:
def run_evals(eval_questions, tru_recorder, query_engine):
    for question in eval_questions:
        with tru_recorder as recording:
            response = query_engine.query(question)

In [None]:
run_evals(eval_questions, tru_recorder, auto_merging_engine_0)

In [None]:
from trulens_eval import Tru

Tru().get_leaderboard(app_ids=[])

In [None]:
Tru().run_dashboard()

### Three layers

In [None]:
auto_merging_index_1 = build_automerging_index(
    documents,
    llm=OpenAI(model="gpt-3.5-turbo", temperature=0.1),
    embed_model="local:BAAI/bge-small-en-v1.5",
    save_dir="merging_index_1",
    chunk_sizes=[2048,512,128],
)

In [None]:
auto_merging_engine_1 = get_automerging_query_engine(
    auto_merging_index_1,
    similarity_top_k=12,
    rerank_top_n=6,
)


In [None]:
tru_recorder = get_prebuilt_trulens_recorder(
    auto_merging_engine_1,
    app_id ='app_1'
)

In [None]:
run_evals(eval_questions, tru_recorder, auto_merging_engine_1)

In [None]:
from trulens_eval import Tru

Tru().get_leaderboard(app_ids=[])

In [None]:
Tru().run_dashboard()