In [1]:
#Data Ingestion
from langchain_community.document_loaders import TextLoader

loader = TextLoader("speech.txt")
text_documents = loader.load()

In [2]:
import os
from dotenv import load_dotenv

load_dotenv()

os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
os.environ["GEMINI_API_KEY"] = os.getenv("GEMINI_API_KEY")

In [3]:
#web based loader 

from langchain_community.document_loaders import WebBaseLoader
import bs4

loader = WebBaseLoader(web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
                       bs_kwargs=dict(parse_only=bs4.SoupStrainer(
                           class_ = ("post-title","post-header","post-content")
                       )))

text_documents = loader.load()

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [4]:
# PDF based loader

from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader('attention.pdf')
docs = loader.load()

In [5]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)

documents = text_splitter.split_documents(docs)

In [6]:
documents[:5]

[Document(metadata={'source': 'attention.pdf', 'page': 0}, page_content='Provided proper attribution is provided, Google hereby grants permission to\nreproduce the tables and figures in this paper solely for use in journalistic or\nscholarly works.\nAttention Is All You Need\nAshish Vaswani∗\nGoogle Brain\navaswani@google.com\nNoam Shazeer∗\nGoogle Brain\nnoam@google.com\nNiki Parmar∗\nGoogle Research\nnikip@google.com\nJakob Uszkoreit∗\nGoogle Research\nusz@google.com\nLlion Jones∗\nGoogle Research\nllion@google.com\nAidan N. Gomez∗ †\nUniversity of Toronto\naidan@cs.toronto.edu\nŁukasz Kaiser∗\nGoogle Brain\nlukaszkaiser@google.com\nIllia Polosukhin∗ ‡\nillia.polosukhin@gmail.com\nAbstract\nThe dominant sequence transduction models are based on complex recurrent or\nconvolutional neural networks that include an encoder and a decoder. The best\nperforming models also connect the encoder and decoder through an attention\nmechanism. We propose a new simple network architecture, the Tran

In [7]:
#vector embedding and vector store

import google.generativeai as genai
from langchain_community.vectorstores import Chroma
from langchain.embeddings.base import Embeddings

# Configure the API key
genai.configure(api_key=os.environ["GEMINI_API_KEY"])

# Define a custom embedding function using Google Generative AI
class GoogleGenAIEmbeddings(Embeddings):
    def embed_documents(self, texts):
        return [genai.embed_content(model="models/text-embedding-004", content=text)["embedding"] for text in texts]

    def embed_query(self, text):
        return genai.embed_content(model="models/text-embedding-004", content=text)["embedding"]

# Instantiate the embedding model
google_embeddings = GoogleGenAIEmbeddings()

# Pass the embeddings to the Chroma vector store
db = Chroma.from_documents(documents[:20], google_embeddings)


  from .autonotebook import tqdm as notebook_tqdm


In [8]:
#Vector database

query = "what is attention is all you need"
result = db.similarity_search(query)
result

[Document(metadata={'page': 0, 'source': 'attention.pdf'}, page_content='Provided proper attribution is provided, Google hereby grants permission to\nreproduce the tables and figures in this paper solely for use in journalistic or\nscholarly works.\nAttention Is All You Need\nAshish Vaswani∗\nGoogle Brain\navaswani@google.com\nNoam Shazeer∗\nGoogle Brain\nnoam@google.com\nNiki Parmar∗\nGoogle Research\nnikip@google.com\nJakob Uszkoreit∗\nGoogle Research\nusz@google.com\nLlion Jones∗\nGoogle Research\nllion@google.com\nAidan N. Gomez∗ †\nUniversity of Toronto\naidan@cs.toronto.edu\nŁukasz Kaiser∗\nGoogle Brain\nlukaszkaiser@google.com\nIllia Polosukhin∗ ‡\nillia.polosukhin@gmail.com\nAbstract\nThe dominant sequence transduction models are based on complex recurrent or\nconvolutional neural networks that include an encoder and a decoder. The best\nperforming models also connect the encoder and decoder through an attention\nmechanism. We propose a new simple network architecture, the Tran

In [9]:
#FAISS vector database 

from langchain_community.vectorstores import FAISS

db1 = FAISS.from_documents(documents[:20],google_embeddings)

In [10]:
result1 = db1.similarity_search(query="What is attention is all you need")
result1

[Document(id='bda4be52-597a-4de5-b033-3e37f87c23bf', metadata={'source': 'attention.pdf', 'page': 0}, page_content='Provided proper attribution is provided, Google hereby grants permission to\nreproduce the tables and figures in this paper solely for use in journalistic or\nscholarly works.\nAttention Is All You Need\nAshish Vaswani∗\nGoogle Brain\navaswani@google.com\nNoam Shazeer∗\nGoogle Brain\nnoam@google.com\nNiki Parmar∗\nGoogle Research\nnikip@google.com\nJakob Uszkoreit∗\nGoogle Research\nusz@google.com\nLlion Jones∗\nGoogle Research\nllion@google.com\nAidan N. Gomez∗ †\nUniversity of Toronto\naidan@cs.toronto.edu\nŁukasz Kaiser∗\nGoogle Brain\nlukaszkaiser@google.com\nIllia Polosukhin∗ ‡\nillia.polosukhin@gmail.com\nAbstract\nThe dominant sequence transduction models are based on complex recurrent or\nconvolutional neural networks that include an encoder and a decoder. The best\nperforming models also connect the encoder and decoder through an attention\nmechanism. We propose 

In [35]:
api_key = "AIzaSyD9W9Pl9Imp_9ot-fbJMP4wgyZezi9dDa4"
genai.configure(api_key="AIzaSyD9W9Pl9Imp_9ot-fbJMP4wgyZezi9dDa4")


In [36]:
from langchain_google_genai import GoogleGenerativeAI
llm = GoogleGenerativeAI(model="gemini-2.0-flash-exp",google_api_key=api_key)

In [39]:
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_template('''
                            Answer the following questions only based on the following context.
                            Think step by step before providing a detailed answer. 
                            I will tip you $1000 if the user finds the answer helpful.
                            <context>
                            {context}
                            </context>
                            Question : {input}
                            ''')

In [40]:
# Create stuff document chain
from langchain.chains.combine_documents import create_stuff_documents_chain

document_chain = create_stuff_documents_chain(llm,prompt)

In [41]:
# Retriver just needs to retrieve data from vector store but not store it.

retriever = db1.as_retriever()

In [42]:
from langchain.chains import create_retrieval_chain
retrieva_chain = create_retrieval_chain(retriever,document_chain)

In [43]:
retrieva_chain.invoke({"input":"An attention function can be described as mapping a query"})

{'input': 'An attention function can be described as mapping a query',
 'context': [Document(id='7db96f5b-67f7-4979-a4b8-f698295fa67f', metadata={'source': 'attention.pdf', 'page': 2}, page_content='3.2 Attention\nAn attention function can be described as mapping a query and a set of key-value pairs to an output,\nwhere the query, keys, values, and output are all vectors. The output is computed as a weighted sum\n3'),
  Document(id='09fe6590-d942-420e-b796-cd184651951e', metadata={'source': 'attention.pdf', 'page': 3}, page_content='Scaled Dot-Product Attention\n Multi-Head Attention\nFigure 2: (left) Scaled Dot-Product Attention. (right) Multi-Head Attention consists of several\nattention layers running in parallel.\nof the values, where the weight assigned to each value is computed by a compatibility function of the\nquery with the corresponding key.\n3.2.1 Scaled Dot-Product Attention\nWe call our particular attention "Scaled Dot-Product Attention" (Figure 2). The input consists of\