# SETTINGS:

In [None]:
! pip install openai langchain
! pip install llama_index llama-index-embeddings-huggingface
! pip install trulens-eval

In [1]:
from IPython.display import HTML, display

def set_css():
  display(HTML('''
  <style>
    pre {
        white-space: pre-wrap;
    }
  </style>
  '''))
get_ipython().events.register('pre_run_cell', set_css)

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# SET ENVIRONMENT VARIABLES:

In [4]:
import os
from google.colab import userdata

os.environ["OPENAI_API_KEY"] = userdata.get('OPENAI_API_KEY')
os.environ["HUGGINGFACE_API_KEY"] = userdata.get('HF_TOKEN')

## TRULENS UTIL FUNCS:

In [5]:
import numpy as np
from trulens_eval import (Feedback, TruLlama, OpenAI)
from trulens_eval.feedback import Groundedness
import nest_asyncio

nest_asyncio.apply()
openai = OpenAI()

# FEEDBACK FUNC BET QUERY AND ANSWER GENERATED:
ans_relevance = (
    Feedback(openai.relevance_with_cot_reasons, name="Answer Relevance")
    .on_input()  # QUERY
    .on_output() # ANSWER
)

# FEEDBACK FUNC BET QUERY & RETRIEVED DOCS
context_relevance = (
    Feedback(openai.relevance_with_cot_reasons, name = "Context Relevance")
    .on_input()                                   # QUERY
    .on(TruLlama.select_source_nodes().node.text) # RETRIEVED DOCS
    .aggregate(np.mean)
)

# FEEDBACK FUNC BET RETRIEVED DOCS & ANSWER GENERATED:
grounded = Groundedness(groundedness_provider=openai)
groundedness = (
    Feedback(grounded.groundedness_measure_with_cot_reasons, name="Groundedness")
        .on(TruLlama.select_source_nodes().node.text) # RETRIEVED DOCS
        .on_output()                                  # ANSWER
        .aggregate(grounded.grounded_statements_aggregator)
)

feedbacks = [ans_relevance, context_relevance, groundedness]

def get_trulens_recorder(query_engine, feedbacks, app_id):
    tru_recorder = TruLlama(
        query_engine,
        app_id=app_id,
        feedbacks=feedbacks
    )
    return tru_recorder

def get_prebuilt_trulens_recorder(query_engine, app_id):
    tru_recorder = TruLlama(
        query_engine,
        app_id=app_id,
        feedbacks=feedbacks
        )
    return tru_recorder

[nltk_data] Downloading package stopwords to
[nltk_data]     /usr/local/lib/python3.10/dist-
[nltk_data]     packages/llama_index/legacy/_static/nltk_cache...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package punkt to
[nltk_data]     /usr/local/lib/python3.10/dist-
[nltk_data]     packages/llama_index/legacy/_static/nltk_cache...
[nltk_data]   Unzipping tokenizers/punkt.zip.


✅ In Answer Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Answer Relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In Context Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Context Relevance, input response will be set to __record__.app.query.rets.source_nodes[:].node.text .


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


✅ In Groundedness, input source will be set to __record__.app.query.rets.source_nodes[:].node.text .
✅ In Groundedness, input statement will be set to __record__.main_output or `Select.RecordOutput` .


### LLAMA INDEX - SENTENCE WINDOW RETRIEVAL - UTIL FUNCS

In [6]:
from llama_index.core import ServiceContext, VectorStoreIndex, StorageContext
from llama_index.core.node_parser import SentenceWindowNodeParser
from llama_index.core.indices.postprocessor import MetadataReplacementPostProcessor
from llama_index.core.indices.postprocessor import SentenceTransformerRerank
from llama_index.core import load_index_from_storage
import os


def build_sentence_window_index(
    document, llm,
    embed_model="local:BAAI/bge-small-en-v1.5", # "local:BAAI/bge-large-en-v1.5" is also available
    save_dir="sentence_window_index"):

    # create the sentence window node parser w/ default settings
    node_parser = SentenceWindowNodeParser.from_defaults(
        window_size=3,
        window_metadata_key="window",
        original_text_metadata_key="original_text",
    )

    sentence_context = ServiceContext.from_defaults(
        llm=llm,
        embed_model=embed_model, # Embedding model is downloaded from Hugging Face and run locally
        node_parser=node_parser, # See comments just below :--
    )

    # Since we defined the node_parser as part of the service_context, what this step will do is:
    # 1. Transform the source documnt into a series of sentences.
    # 2. Augment it with surrounding context.
    # 3. Embed it.
    # 4. Finally load it into the vector store."""

    if not os.path.exists(save_dir):
        sentence_window_index = VectorStoreIndex.from_documents(
            [document], service_context=sentence_context
        )
        sentence_window_index.storage_context.persist(persist_dir=save_dir)
    else:
        sentence_window_index = load_index_from_storage(
            StorageContext.from_defaults(persist_dir=save_dir),
            service_context=sentence_context,
        )

    return sentence_window_index


def get_sentence_window_query_engine(
    sentence_window_index,
    similarity_top_k=6,
    rerank_top_n=2):

    # define postprocessors
    postproc = MetadataReplacementPostProcessor(target_metadata_key="window")
    rerank = SentenceTransformerRerank(top_n=rerank_top_n,
                                       model="BAAI/bge-reranker-base"
    )

    sentence_window_engine = sentence_window_index.as_query_engine(
        similarity_top_k=similarity_top_k,
        node_postprocessors=[postproc, rerank]
    )
    return sentence_window_engine

# 1. TXT DOCUMENT:

In [7]:
data_path = 'drive/My Drive/DATA/advanced_RAG/'

In [8]:
from llama_index.core import SimpleDirectoryReader

documents = SimpleDirectoryReader(
    input_files=[data_path + "eBook-How-to-Build-a-Career-in-AI.pdf"]
).load_data()

# A. SENTENCE WINDOW RAG PIPELINE:

#### 1. JOIN ALL PGS INTO A SINGLE DOC:



In [9]:
from llama_index.core import Document

document = Document(text="\n\n".join([doc.text for doc in documents]))

In [10]:
type(document)

### 2. CREATE AN INDEX (VECTOR db):

In [11]:
from llama_index.llms.openai import OpenAI

llm = OpenAI()

In [12]:
sentence_index = build_sentence_window_index(
    document,
    llm,
    embed_model="local:BAAI/bge-small-en-v1.5",
    save_dir="sentence_index"
)

query_engine = get_sentence_window_query_engine(sentence_index)

  sentence_context = ServiceContext.from_defaults(


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/94.8k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/133M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/799 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.11G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/443 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/279 [00:00<?, ?B/s]

### 3. QUERY THE INDEX:

In [13]:
query = "What are steps to take when finding projects to build your experience?"
response = query_engine.query(query)

print(response.response)

Identify a business problem (not an AI problem) and find a domain expert to ask about the top three things they wish worked better. If something interests you, pursue learning it regardless of its immediate usefulness, as it may lead to a creative spark or technical breakthrough.


In [15]:
query = "What does the document say about Machine Learning?"
response = query_engine.query(query)

print(response.response)

The document emphasizes the importance of understanding foundational machine learning skills such as linear regression, logistic regression, neural networks, decision trees, clustering, and anomaly detection. It also highlights the significance of grasping core concepts like bias/variance, cost functions, regularization, optimization algorithms, and error analysis. Additionally, it mentions the increasing importance of deep learning in the field of machine learning, stressing the value of knowledge in neural networks, hyperparameter tuning, convolutional networks, sequence models, and transformers.


In [17]:
query = "WHich math topics are important for understanding AI"
response = query_engine.query(query)

print(response.response)

Math topics that are important for understanding AI include linear algebra, calculus, probability and statistics.


# B. EVALUATION USING TRULENS:

#### 1. CURATE A SET OF EVALUATION QUESTIONS:

In [18]:
eval_questions = \
['What are the keys to building a career in AI?',
 'How can teamwork contribute to success in AI?',
 'What is the importance of networking in AI?',
 'What are some good habits to develop for a successful career?',
 'How can altruism be beneficial in building a career?',
 'What is imposter syndrome and how does it relate to AI?',
 'Who are some accomplished individuals who have experienced imposter syndrome?',
 'What is the first step to becoming good at AI?',
 'What are some common challenges in AI?',
 'Is it normal to find parts of AI challenging?',
 'What is the right AI job for me?']

#### 2. TRULENS RECORDER:

In [20]:
from trulens_eval import Tru
tru = Tru()

tru.reset_database()

tru_recorder_sentence_window = get_prebuilt_trulens_recorder(
    query_engine,
    app_id = "Sentence Window Query Engine"
)

In [22]:
for question in eval_questions:
    with tru_recorder_sentence_window as recording:
        response = query_engine.query(question)

Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/3 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/3 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/3 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/3 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/3 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/3 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

Groundedness per statement in source:   0%|          | 0/1 [00:00<?, ?it/s]

#### 3. PERFORMANCE OVERVIEW:

In [23]:
sentence_window_perf = tru.get_leaderboard(app_ids=["Sentence Window Query Engine"])
sentence_window_perf

Unnamed: 0_level_0,Answer Relevance,Context Relevance,Groundedness,latency,total_cost
app_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Sentence Window Query Engine,0.881818,0.436364,0.656061,12.727273,0.000904


In [None]:
# launches on http://localhost:8501/
tru.run_dashboard()