# Lesson 3: Sentence Window Retrieval

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
import utils

import os
import openai
openai.api_key = utils.get_openai_api_key()

✅ In Answer Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Answer Relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
✅ In Context Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Context Relevance, input response will be set to __record__.calls[-1].rets.source_nodes[:].node.text .
✅ In Groundedness, input source will be set to __record__.calls[-1].rets.source_nodes[:].node.text .
✅ In Groundedness, input statement will be set to __record__.main_output or `Select.RecordOutput` .


In [4]:
#from llama_index import SimpleDirectoryReader
from llama_index.core import SimpleDirectoryReader


documents = SimpleDirectoryReader(
    input_files=["./eBook-How-to-Build-a-Career-in-AI.pdf"]
).load_data()

In [5]:
print(type(documents), "\n")
print(len(documents), "\n")
print(type(documents[0]))
print(documents[0])

<class 'list'> 

41 

<class 'llama_index.core.schema.Document'>
Doc ID: a5779b9a-e3cf-4202-bdb5-742a58016431
Text: PAGE 1 Founder, DeepLearning.AI Collected Insights from Andrew
Ng How to  Build Your Career in AI A Simple Guide


In [6]:
#from llama_index import Document
from llama_index.core import Document


document = Document(text="\n\n".join([doc.text for doc in documents]))

## Window-sentence retrieval setup

In [69]:
# add at top of file (imports)
from llama_index.core import VectorStoreIndex, StorageContext, load_index_from_storage
from llama_index.core.node_parser import SentenceWindowNodeParser
from llama_index.core.postprocessor import MetadataReplacementPostProcessor, SentenceTransformerRerank
#from llama_index.embeddings.huggingface_openai import HuggingFaceEmbedding
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.embeddings.openai import OpenAIEmbedding
import os


from llama_index.core.node_parser import HierarchicalNodeParser, get_leaf_nodes

def _ensure_embed_model(embed_model):
    # allow passing either a ready embedding object or a "local:MODEL_NAME" string (your current style)
    if hasattr(embed_model, "get_text_embedding"):
        return embed_model
    if isinstance(embed_model, str):
        if embed_model.startswith("local:"):
            return HuggingFaceEmbedding(model_name=embed_model.split("local:", 1)[1])
        else:
            # treat as HF model name
            return HuggingFaceEmbedding(model_name=embed_model)
    raise ValueError("embed_model must be an embedding object or a model name string.")

"""
def build_sentence_window_index(
    document,
    llm,
    embed_model="local:BAAI/bge-small-en-v1.5",
    sentence_window_size=3,       # <--- add this
    save_dir="sentence_index",
):
    # 1) node parsing
    node_parser = SentenceWindowNodeParser.from_defaults(
        #window_size=3,
        window_size=sentence_window_size,   # <--- use it here
        window_metadata_key="window",
        original_text_metadata_key="original_text",
    )
    nodes = node_parser.get_nodes_from_documents([document])

    # 2) ensure embedding object
    embed = _ensure_embed_model(embed_model)

    # 3) build or load index (pass llm/embed directly; no ServiceContext)
    if not os.path.exists(save_dir):
        index = VectorStoreIndex(nodes, llm=llm, embed_model=embed)
        index.storage_context.persist(persist_dir=save_dir)
    else:
        storage = StorageContext.from_defaults(persist_dir=save_dir)
        index = load_index_from_storage(storage, llm=llm, embed_model=embed)

    return index
"""

def build_automerging_index(
    documents,
    llm,
    embed_model="local:BAAI/bge-small-en-v1.5",
    save_dir="merging_index",
    chunk_sizes=None,
):
    chunk_sizes = chunk_sizes or [2048, 512, 128]

    # 1) parse hierarchy → leaf nodes
    node_parser = HierarchicalNodeParser.from_defaults(chunk_sizes=chunk_sizes)
    nodes = node_parser.get_nodes_from_documents(documents)
    leaf_nodes = get_leaf_nodes(nodes)

    # 2) ensure embedding object
    embed = _ensure_embed_model(embed_model)

    # 3) build or load index (no ServiceContext)
    storage_context = StorageContext.from_defaults()
    storage_context.docstore.add_documents(nodes)

    if not os.path.exists(save_dir):
        index = VectorStoreIndex(
            leaf_nodes,
            storage_context=storage_context,
            llm=llm,
            embed_model=embed,
        )
        index.storage_context.persist(persist_dir=save_dir)
    else:
        storage = StorageContext.from_defaults(persist_dir=save_dir)
        index = load_index_from_storage(storage, llm=llm, embed_model=embed)

    return index


In [8]:
#from llama_index.node_parser import SentenceWindowNodeParser

# create the sentence window node parser w/ default settings
node_parser = SentenceWindowNodeParser.from_defaults(
    window_size=3,
    window_metadata_key="window",
    original_text_metadata_key="original_text",
)

In [9]:
text = "hello. how are you? I am fine!  "

nodes = node_parser.get_nodes_from_documents([Document(text=text)])

In [10]:
print([x.text for x in nodes])

['hello. ', 'how are you? ', 'I am fine!  ']


In [11]:
print(nodes[1].metadata["window"])

hello.  how are you?  I am fine!  


In [12]:
text = "hello. foo bar. cat dog. mouse"

nodes = node_parser.get_nodes_from_documents([Document(text=text)])

In [13]:
print([x.text for x in nodes])

['hello. ', 'foo bar. ', 'cat dog. ', 'mouse']


In [14]:
print(nodes[0].metadata["window"])

hello.  foo bar.  cat dog.  mouse


### Building the index

In [15]:
#from llama_index.llms import OpenAI
from llama_index.llms.openai import OpenAI


llm = OpenAI(model="gpt-3.5-turbo", temperature=0.1)

In [19]:
"""
#from llama_index import ServiceContext
from llama_index.core import ServiceContext


sentence_context = ServiceContext.from_defaults(
    llm=llm,
    embed_model="local:BAAI/bge-small-en-v1.5",
    # embed_model="local:BAAI/bge-large-en-v1.5"
    node_parser=node_parser,
)
"""

'\n#from llama_index import ServiceContext\nfrom llama_index.core import ServiceContext\n\n\nsentence_context = ServiceContext.from_defaults(\n    llm=llm,\n    embed_model="local:BAAI/bge-small-en-v1.5",\n    # embed_model="local:BAAI/bge-large-en-v1.5"\n    node_parser=node_parser,\n)\n'

In [20]:
"""
from llama_index import VectorStoreIndex

sentence_index = VectorStoreIndex.from_documents(
    [document], service_context=sentence_context
)
"""

'\nfrom llama_index import VectorStoreIndex\n\nsentence_index = VectorStoreIndex.from_documents(\n    [document], service_context=sentence_context\n)\n'

In [21]:
from llama_index.core import VectorStoreIndex, Settings
from llama_index.core.node_parser import SentenceWindowNodeParser
from llama_index.llms.openai import OpenAI
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

# Global defaults
Settings.llm = OpenAI(model="gpt-3.5-turbo", temperature=0.1)
Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

node_parser = SentenceWindowNodeParser.from_defaults(
    window_size=3,
    window_metadata_key="window",
    original_text_metadata_key="original_text",
)
nodes = node_parser.get_nodes_from_documents([document])

# No need to pass llm/embed now
sentence_index = VectorStoreIndex(nodes)
sentence_index.storage_context.persist(persist_dir="./sentence_index")


2025-08-29 16:56:27,949 - INFO - Load pretrained SentenceTransformer: BAAI/bge-small-en-v1.5
2025-08-29 16:56:30,262 - INFO - 1 prompt is loaded, with the key: query


In [22]:
sentence_index.storage_context.persist(persist_dir="./sentence_index")


In [25]:
"""
# This block of code is optional to check
# if an index file exist, then it will load it
# if not, it will rebuild it

#import os
#from llama_index import VectorStoreIndex, StorageContext, load_index_from_storage
#from llama_index import load_index_from_storage

if not os.path.exists("./sentence_index"):
    sentence_index = VectorStoreIndex.from_documents(
        [document], service_context=sentence_context
    )

    sentence_index.storage_context.persist(persist_dir="./sentence_index")
else:
    sentence_index = load_index_from_storage(
        StorageContext.from_defaults(persist_dir="./sentence_index"),
        service_context=sentence_context
    )
"""

'\n# This block of code is optional to check\n# if an index file exist, then it will load it\n# if not, it will rebuild it\n\n#import os\n#from llama_index import VectorStoreIndex, StorageContext, load_index_from_storage\n#from llama_index import load_index_from_storage\n\nif not os.path.exists("./sentence_index"):\n    sentence_index = VectorStoreIndex.from_documents(\n        [document], service_context=sentence_context\n    )\n\n    sentence_index.storage_context.persist(persist_dir="./sentence_index")\nelse:\n    sentence_index = load_index_from_storage(\n        StorageContext.from_defaults(persist_dir="./sentence_index"),\n        service_context=sentence_context\n    )\n'

### Building the postprocessor

In [27]:
#from llama_index.indices.postprocessor import MetadataReplacementPostProcessor

postproc = MetadataReplacementPostProcessor(
    target_metadata_key="window"
)

In [29]:
#from llama_index.schema import NodeWithScore
from llama_index.core.schema import NodeWithScore

from copy import deepcopy

scored_nodes = [NodeWithScore(node=x, score=1.0) for x in nodes]
nodes_old = [deepcopy(n) for n in nodes]

In [30]:
nodes_old[1].text

'It will \ntransform and improve \nall areas of human life."\n'

In [31]:
replaced_nodes = postproc.postprocess_nodes(scored_nodes)

In [32]:
print(replaced_nodes[1].text)

PAGE 1
Founder, DeepLearning.AI
Collected Insights
from Andrew Ng
How to 
Build
Your
Career
in AI
A Simple Guide


PAGE 2
"AI is the new 
electricity.  It will 
transform and improve 
all areas of human life."
 Andrew Ng

PAGE 3
Table of 
Contents
Introduction: Coding AI is the New Literacy.
 Chapter 1: Three Steps to Career Growth.
 Chapter 2: Learning Technical Skills for a 
Promising AI Career.



### Adding a reranker

In [34]:
#from llama_index.indices.postprocessor import SentenceTransformerRerank

# BAAI/bge-reranker-base
# link: https://huggingface.co/BAAI/bge-reranker-base
rerank = SentenceTransformerRerank(
    top_n=2, model="BAAI/bge-reranker-base"
)

In [39]:
#from llama_index import QueryBundle
from llama_index.core import QueryBundle

#from llama_index.schema import TextNode, NodeWithScore
from llama_index.core.schema import TextNode, NodeWithScore


query = QueryBundle("I want a dog.")

scored_nodes = [
    NodeWithScore(node=TextNode(text="This is a cat"), score=0.6),
    NodeWithScore(node=TextNode(text="This is a dog"), score=0.4),
]

In [40]:
reranked_nodes = rerank.postprocess_nodes(
    scored_nodes, query_bundle=query
)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

In [41]:
print([(x.text, x.score) for x in reranked_nodes])

[('This is a dog', np.float32(0.9182743)), ('This is a cat', np.float32(0.00140408))]


### Runing the query engine

In [42]:
sentence_window_engine = sentence_index.as_query_engine(
    similarity_top_k=6, node_postprocessors=[postproc, rerank]
)

In [43]:
window_response = sentence_window_engine.query(
    "What are the keys to building a career in AI?"
)

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

2025-08-29 17:08:37,129 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


In [46]:
#from llama_index.response.notebook_utils import display_response
from llama_index.core.response.notebook_utils import display_response


display_response(window_response)

**`Final Response:`** The keys to building a career in AI involve learning foundational technical skills, working on projects, finding a job, and being part of a supportive community. Additionally, collaborating with others, influencing, and being influenced by others are critical aspects for success in AI career development.

## Putting it all Together

In [59]:
"""
import os
from llama_index import ServiceContext, VectorStoreIndex, StorageContext
from llama_index.node_parser import SentenceWindowNodeParser
from llama_index.indices.postprocessor import MetadataReplacementPostProcessor
from llama_index.indices.postprocessor import SentenceTransformerRerank
from llama_index import load_index_from_storage


def build_sentence_window_index(
    documents,
    llm,
    embed_model="local:BAAI/bge-small-en-v1.5",
    sentence_window_size=3,
    save_dir="sentence_index",
):
    # create the sentence window node parser w/ default settings
    node_parser = SentenceWindowNodeParser.from_defaults(
        window_size=sentence_window_size,
        window_metadata_key="window",
        original_text_metadata_key="original_text",
    )
    sentence_context = ServiceContext.from_defaults(
        llm=llm,
        embed_model=embed_model,
        node_parser=node_parser,
    )
    if not os.path.exists(save_dir):
        sentence_index = VectorStoreIndex.from_documents(
            documents, service_context=sentence_context
        )
        sentence_index.storage_context.persist(persist_dir=save_dir)
    else:
        sentence_index = load_index_from_storage(
            StorageContext.from_defaults(persist_dir=save_dir),
            service_context=sentence_context,
        )

    return sentence_index
"""


In [62]:
#from llama_index.llms import OpenAI

#index = build_sentence_window_index(
#    [document],
#    llm=OpenAI(model="gpt-3.5-turbo", temperature=0.1),
#    save_dir="./sentence_index",
#)

index = build_sentence_window_index(
    document,
    llm=OpenAI(model="gpt-3.5-turbo", temperature=0.1),
    save_dir="./sentence_index",
)

Loading llama_index.core.storage.kvstore.simple_kvstore from ./sentence_index/docstore.json.
Loading llama_index.core.storage.kvstore.simple_kvstore from ./sentence_index/index_store.json.


In [53]:
def get_sentence_window_query_engine(
    sentence_index, similarity_top_k=6, rerank_top_n=2
):
    # define postprocessors
    postproc = MetadataReplacementPostProcessor(target_metadata_key="window")
    rerank = SentenceTransformerRerank(
        top_n=rerank_top_n, model="BAAI/bge-reranker-base"
    )

    sentence_window_engine = sentence_index.as_query_engine(
        similarity_top_k=similarity_top_k, node_postprocessors=[postproc, rerank]
    )
    return sentence_window_engine
    
query_engine = get_sentence_window_query_engine(index, similarity_top_k=6)


## TruLens Evaluation

In [54]:
eval_questions = []
with open('generated_questions.text', 'r') as file:
    for line in file:
        # Remove newline character and convert to integer
        item = line.strip()
        eval_questions.append(item)

In [55]:
from trulens_eval import Tru

def run_evals(eval_questions, tru_recorder, query_engine):
    for question in eval_questions:
        with tru_recorder as recording:
            response = query_engine.query(question)



In [57]:
from utils import get_prebuilt_trulens_recorder

from trulens_eval import Tru

Tru().reset_database()

2025-08-29 17:19:47,177 - INFO - Context impl SQLiteImpl.
2025-08-29 17:19:47,178 - INFO - Will assume non-transactional DDL.
Updating app_name and app_version in apps table: 0it [00:00, ?it/s]
Updating app_id in records table: 0it [00:00, ?it/s]
Updating app_json in apps table: 0it [00:00, ?it/s]


### Sentence window size = 1

In [65]:
from collections.abc import Sequence
from llama_index.core import Document  # make sure this import exists

def build_sentence_window_index(
    document,                      # can be Document OR Sequence[Document]
    llm,
    embed_model="local:BAAI/bge-small-en-v1.5",
    sentence_window_size=3,
    save_dir="sentence_index",
):
    # 1) normalize to list[Document]
    if isinstance(document, Sequence) and not isinstance(document, (str, Document)):
        documents = list(document)  # already a list/tuple of Documents
    else:
        documents = [document]      # single Document

    # 2) node parsing
    node_parser = SentenceWindowNodeParser.from_defaults(
        window_size=sentence_window_size,
        window_metadata_key="window",
        original_text_metadata_key="original_text",
    )
    nodes = node_parser.get_nodes_from_documents(documents)

    # 3) embedding object
    embed = _ensure_embed_model(embed_model)

    # 4) build or load index
    if not os.path.exists(save_dir):
        index = VectorStoreIndex(nodes, llm=llm, embed_model=embed)
        index.storage_context.persist(persist_dir=save_dir)
    else:
        storage = StorageContext.from_defaults(persist_dir=save_dir)
        index = load_index_from_storage(storage, llm=llm, embed_model=embed)

    return index


In [66]:
sentence_index_1 = build_sentence_window_index(
    documents,
    llm=OpenAI(model="gpt-3.5-turbo", temperature=0.1),
    embed_model="local:BAAI/bge-small-en-v1.5",
    sentence_window_size=1,
    save_dir="sentence_index_1",
)

In [67]:
sentence_window_engine_1 = get_sentence_window_query_engine(
    sentence_index_1
)

In [68]:
tru_recorder_1 = get_prebuilt_trulens_recorder(
    sentence_window_engine_1,
    app_id='sentence window engine 1'
)

instrumenting <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'> for base <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'>
instrumenting <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'> for base <class 'llama_index.core.embeddings.multi_modal_base.MultiModalEmbedding'>
instrumenting <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'> for base <class 'llama_index.core.base.embeddings.base.BaseEmbedding'>
instrumenting <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'> for base <class 'llama_index.core.schema.TransformComponent'>
instrumenting <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'> for base <class 'llama_index.core.schema.BaseComponent'>
instrumenting <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'> for base <class 'pydantic.main.BaseModel'>
instrumenting <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'> for base

In [39]:
run_evals(eval_questions, tru_recorder_1, sentence_window_engine_1)

In [40]:
Tru().run_dashboard()

Starting dashboard ...
Config file already exists. Skipping writing process.
Credentials file already exists. Skipping writing process.


Accordion(children=(VBox(children=(VBox(children=(Label(value='STDOUT'), Output())), VBox(children=(Label(valu…

Dashboard started at https://s172-29-126-76p38560.lab-aws-production.deeplearning.ai/ .


<Popen: returncode: None args: ['streamlit', 'run', '--server.headless=True'...>

### Note about the dataset of questions
- Since this evaluation process takes a long time to run, the following file `generated_questions.text` contains one question (the one mentioned in the lecture video).
- If you would like to explore other possible questions, feel free to explore the file directory by clicking on the "Jupyter" logo at the top right of this notebook. You'll see the following `.text` files:

> - `generated_questions_01_05.text`
> - `generated_questions_06_10.text`
> - `generated_questions_11_15.text`
> - `generated_questions_16_20.text`
> - `generated_questions_21_24.text`

Note that running an evaluation on more than one question can take some time, so we recommend choosing one of these files (with 5 questions each) to run and explore the results.

- For evaluating a personal project, an eval set of 20 is reasonable.
- For evaluating business applications, you may need a set of 100+ in order to cover all the use cases thoroughly.
- Note that since API calls can sometimes fail, you may occasionally see null responses, and would want to re-run your evaluations.  So running your evaluations in smaller batches can also help you save time and cost by only re-running the evaluation on the batches with issues.

In [70]:
eval_questions = []
with open('generated_questions.text', 'r') as file:
    for line in file:
        # Remove newline character and convert to integer
        item = line.strip()
        eval_questions.append(item)

### Sentence window size = 3

In [71]:
sentence_index_3 = build_sentence_window_index(
    documents,
    llm=OpenAI(model="gpt-3.5-turbo", temperature=0.1),
    embed_model="local:BAAI/bge-small-en-v1.5",
    sentence_window_size=3,
    save_dir="sentence_index_3",
)
sentence_window_engine_3 = get_sentence_window_query_engine(
    sentence_index_3
)

tru_recorder_3 = get_prebuilt_trulens_recorder(
    sentence_window_engine_3,
    app_id='sentence window engine 3'
)

instrumenting <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'> for base <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'>
instrumenting <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'> for base <class 'llama_index.core.embeddings.multi_modal_base.MultiModalEmbedding'>
instrumenting <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'> for base <class 'llama_index.core.base.embeddings.base.BaseEmbedding'>
instrumenting <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'> for base <class 'llama_index.core.schema.TransformComponent'>
instrumenting <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'> for base <class 'llama_index.core.schema.BaseComponent'>
instrumenting <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'> for base <class 'pydantic.main.BaseModel'>
instrumenting <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'> for base

In [72]:
run_evals(eval_questions, tru_recorder_3, sentence_window_engine_3)

In [73]:
Tru().run_dashboard()

Starting dashboard ...


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Accordion(children=(VBox(children=(VBox(children=(Label(value='STDOUT'), Output())), VBox(children=(Label(valu…

Dashboard started at http://localhost:65411 .


<Popen: returncode: None args: ['streamlit', 'run', '--server.headless=True'...>