In [None]:
%pip install deeplake==3.9.27 langchain openai tiktoken llama-index cohere llama-index-postprocessor-cohere-rerank
%pip install llama-index-vector-stores-deeplake
%pip install llama-index-llms-openai llama-index-question-gen-openai

Collecting deeplake==3.9.27
  Using cached deeplake-3.9.27-py3-none-any.whl
Collecting langchain
  Using cached langchain-0.3.27-py3-none-any.whl.metadata (7.8 kB)
Collecting openai
  Using cached openai-1.105.0-py3-none-any.whl.metadata (29 kB)
Collecting tiktoken
  Using cached tiktoken-0.11.0-cp312-cp312-macosx_11_0_arm64.whl.metadata (6.7 kB)
Collecting llama-index
  Using cached llama_index-0.13.4-py3-none-any.whl.metadata (12 kB)
Collecting cohere
  Using cached cohere-5.17.0-py3-none-any.whl.metadata (3.4 kB)
Collecting numpy<2.0 (from deeplake==3.9.27)
  Using cached numpy-1.26.4-cp312-cp312-macosx_11_0_arm64.whl.metadata (61 kB)
Collecting pillow~=10.4.0 (from deeplake==3.9.27)
  Using cached pillow-10.4.0-cp312-cp312-macosx_11_0_arm64.whl.metadata (9.2 kB)
Collecting boto3 (from deeplake==3.9.27)
  Using cached boto3-1.40.23-py3-none-any.whl.metadata (6.7 kB)
Collecting click (from deeplake==3.9.27)
  Using cached click-8.2.1-py3-none-any.whl.metadata (2.5 kB)
Collecting path

In [23]:
import os
from dotenv import load_dotenv


load_dotenv("../.env")
assert os.getenv("OPENAI_API_KEY")
assert os.getenv("ACTIVELOOP_TOKEN")
assert os.getenv("COHERE_API_KEY")

In [12]:
!mkdir -p "data/paul_graham/"
!curl "https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/paul_graham/paul_graham_essay.txt" -o "data/paul_graham/paul_graham_essay.txt"

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 75042  100 75042    0     0   546k      0 --:--:-- --:--:-- --:--:--  546k


In [4]:
from llama_index.core import SimpleDirectoryReader


# load documents
documents = SimpleDirectoryReader("./data/paul_graham").load_data()
print(len(documents))

1


In [5]:
from llama_index.core.node_parser import SimpleNodeParser


# chunc documents and create nodes
node_parser = SimpleNodeParser.from_defaults(chunk_size=512, chunk_overlap=64)
nodes = node_parser.get_nodes_from_documents(documents)

print(len(nodes))
print(nodes[0])

43
Node ID: cc36dd9d-b650-4649-b573-1bccd1af4e8b
Text: What I Worked On  February 2021  Before college the two main
things I worked on, outside of school, were writing and programming. I
didn't write essays. I wrote what beginning writers were supposed to
write then, and probably still are: short stories. My stories were
awful. They had hardly any plot, just characters with strong feelings,
which I ...


In [6]:
from llama_index.vector_stores.deeplake import DeepLakeVectorStore


my_activeloop_org_id = "yaroslava"
my_activeloop_dataset_name = "LlamaIndex_paulgraham_essay"
dataset_path = f"hub://{my_activeloop_org_id}/{my_activeloop_dataset_name}"

# Create an index over the documnts
vector_store = DeepLakeVectorStore(dataset_path=dataset_path, overwrite=False)

  import pkg_resources  # type: ignore


Deep Lake Dataset in hub://yaroslava/LlamaIndex_paulgraham_essay already exists, loading from the storage


In [7]:
from llama_index.core import StorageContext


storage_context = StorageContext.from_defaults(vector_store=vector_store)
storage_context.docstore.add_documents(nodes)

In [8]:
from llama_index.core import VectorStoreIndex


vector_index = VectorStoreIndex(nodes, storage_context=storage_context)

2025-09-04 14:48:32,905 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Uploading data to deeplake dataset.


100%|██████████| 43/43 [00:02<00:00, 20.03it/s]
\

Dataset(path='hub://yaroslava/LlamaIndex_paulgraham_essay', tensors=['embedding', 'id', 'metadata', 'text'])

  tensor      htype      shape      dtype  compression
  -------    -------    -------    -------  ------- 
 embedding  embedding  (86, 1536)  float32   None   
    id        text      (86, 1)      str     None   
 metadata     json      (86, 1)      str     None   
   text       text      (86, 1)      str     None   


 

In [9]:
query_engine = vector_index.as_query_engine(streaming=True, similarity_top_k=10)

In [10]:
streaming_response = query_engine.query(
    "What does Paul Graham do?",
)
streaming_response.print_response_stream()

2025-09-04 14:49:25,401 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2025-09-04 14:49:27,659 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Paul Graham organizes a summer program called the Summer Founders Program where undergraduates start startups instead of taking temporary jobs at tech companies during the summer. He also gives talks on startups and seed funding, and eventually starts an investment firm with partners.

# SubQuestion Query Engine

In [11]:
query_engine = vector_index.as_query_engine(similarity_top_k=10)

In [15]:
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.core.query_engine import SubQuestionQueryEngine


query_engine_tools = [
    QueryEngineTool(
        query_engine=query_engine,
        metadata=ToolMetadata(
            name="pg_essay",
            description="Paul Graham essay on What I Worked On",
        ),
    ),
]

query_engine = SubQuestionQueryEngine.from_defaults(
    query_engine_tools=query_engine_tools,
    use_async=True,
)

In [18]:
response = query_engine.query(
    "How was Paul Grahams life different before, during, and after YC?"
)

2025-09-04 14:52:23,170 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
2025-09-04 14:52:23,176 - INFO - Retrying request to /embeddings in 0.459244 seconds


Generated 3 sub questions.
[1;3;38;2;237;90;200m[pg_essay] Q: What did Paul Graham work on before Y Combinator?
[0m[1;3;38;2;90;149;237m[pg_essay] Q: What did Paul Graham work on during Y Combinator?
[0m[1;3;38;2;11;159;203m[pg_essay] Q: What did Paul Graham work on after Y Combinator?
[0m

2025-09-04 14:52:23,479 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2025-09-04 14:52:23,516 - INFO - Retrying request to /chat/completions in 0.455333 seconds
2025-09-04 14:52:23,886 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2025-09-04 14:52:23,913 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2025-09-04 14:52:24,748 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


[1;3;38;2;90;149;237m[pg_essay] A: During Y Combinator, Paul Graham worked on writing essays, working on YC, and developing a new version of Arc.
[0m

2025-09-04 14:52:25,117 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


[1;3;38;2;237;90;200m[pg_essay] A: Before Y Combinator, Paul Graham worked on a new version of Arc in the summer of 2006. This version of Arc was compiled into Scheme, and to test it, he wrote Hacker News. Initially, Hacker News was meant to be a news aggregator for startup founders called Startup News, but it was later changed to Hacker News with a broader topic to engage intellectual curiosity.
[0m

2025-09-04 14:52:27,062 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


[1;3;38;2;11;159;203m[pg_essay] A: After Y Combinator, Paul Graham worked on a new version of Arc in the summer of 2006. This version of Arc was compiled into Scheme. To test this new Arc, he created Hacker News, which was originally intended to be a news aggregator for startup founders. However, he later changed the name to Hacker News and broadened the topic to engage intellectual curiosity beyond just startups.
[0m

2025-09-04 14:52:29,007 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


In [19]:
print( ">>> The final response:\n", response)

>>> The final response:
 Paul Graham's focus before Y Combinator was on developing a new version of Arc and creating Hacker News to test it. During his time at Y Combinator, he worked on writing essays, YC operations, and further developing Arc. After Y Combinator, he continued working on a new version of Arc and the creation of Hacker News, which transitioned from a startup-focused news aggregator to a platform with broader intellectual topics.


# Cohere Rerank

In [None]:
import cohere

# Get your cohere API key on: www.cohere.com
co = cohere.Client(os.environ['COHERE_API_KEY'])

# Example query and passages
query = "What is the capital of the United States?"
documents = [
   "Carson City is the capital city of the American state of Nevada. At the  2010 United States Census, Carson City had a population of 55,274.",
   "The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean that are a political division controlled by the United States. Its capital is Saipan.",
   "Charlotte Amalie is the capital and largest city of the United States Virgin Islands. It has about 20,000 people. The city is on the island of Saint Thomas.",
   "Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district. ",
   "Capital punishment (the death penalty) has existed in the United States since before the United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states.",
   "North Dakota is a state in the United States. 672,591 people lived in North Dakota in the year 2010. The capital and seat of government is Bismarck."
   ]

In [None]:
results = co.rerank(query=query, documents=documents, top_n=3, model='rerank-english-v2.0') # Change top_n to change the number of results returned. If top_n is not passed, all results will be returned.

for idx, r in enumerate(results):
  print(f"Document Rank: {idx + 1}, Document Index: {r.index}")
  print(f"Document: {r.document['text']}")
  print(f"Relevance Score: {r.relevance_score:.2f}")
  print("\n")

Document Rank: 1, Document Index: 3
Document: Washington, D.C. (also known as simply Washington or D.C., and officially as the District of Columbia) is the capital of the United States. It is a federal district. 
Relevance Score: 0.98


Document Rank: 2, Document Index: 1
Document: The Commonwealth of the Northern Mariana Islands is a group of islands in the Pacific Ocean that are a political division controlled by the United States. Its capital is Saipan.
Relevance Score: 0.30


Document Rank: 3, Document Index: 4
Document: Capital punishment (the death penalty) has existed in the United States since before the United States was a country. As of 2017, capital punishment is legal in 30 of the 50 states.
Relevance Score: 0.28




# Cohere in LlamaIndex

In [25]:
import os
from llama_index.postprocessor.cohere_rerank import CohereRerank



cohere_rerank = CohereRerank(api_key=os.environ.get("COHERE_API_KEY"), top_n=2)

In [26]:
query_engine = vector_index.as_query_engine(
    similarity_top_k=10,
    node_postprocessors=[cohere_rerank],
)

In [27]:
response = query_engine.query(
    "What did Sam Altman do in this essay?",
)
print(response)

2025-09-04 15:27:29,513 - INFO - HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
2025-09-04 15:27:29,920 - INFO - HTTP Request: POST https://api.cohere.com/v1/rerank "HTTP/1.1 200 OK"
2025-09-04 15:27:31,403 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


Sam Altman was involved in the decision-making process to reorganize Y Combinator (YC) and eventually agreed to become the president of YC starting with the winter 2014 batch.
