In [16]:
! pip install langchain_community tiktoken langchain-openai langchainhub chromadb langchain sentence-transformers

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Defaulting to user installation because normal site-packages is not writeable


In [2]:
import getpass
import os

os.environ['LANGCHAIN_TRACING_V2'] = 'True'
os.environ['LANGSMITH_ENDPOINT']= 'https://api.smith.langchain.com'
os.environ['LANGSMITH_API_KEY']=os.environ['LANGSMITH_API_KEY_2']

os.environ['OPENAI_API_KEY'] = os.environ['OPENAI_API_KEY']
os.environ['GEMINI_API_KEY'] = os.environ['GEMINI_API_KEY']

In [3]:
question = 'Summarise the content given'
with open('smallContent.txt', 'r') as f:
    data = f.read()

# to approximate tokens
import tiktoken

def count_tokens(content, encoding_name='cl100k_base'):
    encoding = tiktoken.get_encoding(encoding_name)
    num_tokens = len(encoding.encode(content))
    return num_tokens

tokens = count_tokens(question)
print(f"question: {tokens}")

tokens = count_tokens(data)
print(f'content: {tokens}')


question: 6
content: 787


In [4]:
# split
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap=10)
splits = text_splitter.create_documents([data])
print(splits[1])


page_content='He was too much absorbed with his own thoughts to give any immediate
     answer to my remonstrance. He leaned upon his hand, with his untasted
     breakfast before him, and he stared at the slip of paper which he had
     just drawn from its envelope. Then he took the envelope itself, held
     it up to the light, and very carefully studied both the exterior and
     the flap.'


In [5]:
# embeddings
from langchain_community.embeddings import HuggingFaceEmbeddings

model_name = 'intfloat/e5-large'
embeddings = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs={'device': 'cpu'},  
    encode_kwargs={'normalize_embeddings': True}
)
print(embeddings)

  embeddings = HuggingFaceEmbeddings(
2025-11-03 11:08:29.512654: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-11-03 11:08:29.540632: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-11-03 11:08:29.540672: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-03 11:08:29.540706: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-11-03 11:08:29.548809: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
T

client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': False, 'architecture': 'BertModel'})
  (1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
) model_name='intfloat/e5-large' cache_folder=None model_kwargs={'device': 'cpu'} encode_kwargs={'normalize_embeddings': True} multi_process=False show_progress=False


In [6]:
# retriever
from langchain_community.vectorstores import Chroma

persist_directory = './chroma_e5_db'

vectorstore = Chroma.from_documents(
    documents=splits,
    embedding=embeddings,  
    persist_directory=persist_directory
)



In [7]:
# Retrieval and Generation

retriever = vectorstore.as_retriever(kwargs=3)

# 1. Retrieval
from langchain import hub

prompt = hub.pull("rlm/rag-prompt")

# LLM
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
    google_api_key=os.environ["GEMINI_API_KEY"],
)


In [8]:
# post processing - generation
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

# Question
rag_chain.invoke("Who is Porlock?")


'Porlock is a nom-de-plume, not his real name, representing a shifty and evasive personality. He is difficult to trace and has openly stated that his name is not his own. Porlock is important not for himself, but for his connection to a "great man."'