In [18]:
### Data Ingestion
# Data ingestion is the process of collecting and processing data from various sources. In the context of IoT,
# data ingestion involves collecting data from sensors, devices, and other sources. This data is then processed and
# stored in a database or data warehouse for further analysis.
from langchain_community.document_loaders import TextLoader
loader = TextLoader('speech.txt')
text_documents = loader.load()
text_documents

[Document(metadata={'source': 'speech.txt'}, page_content='langchain_openai\nlangchain_core\npython-dotenv\nstreamlit\nlangchain_community\nlangserve\nfastapi\nuvicorn\nsse_starlette')]

In [19]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ['OPENAI_API_KEY'] = os.getenv("OPENAI_API_KEY")

In [20]:
# Web based loader
from langchain_community.document_loaders import WebBaseLoader
import bs4

loader = WebBaseLoader(web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
                        bs_kwargs=dict(parse_only=bs4.SoupStrainer(
                            class_=("post-title", "post_content", "post_header")
                        )),)
text_document = loader.load()
text_document

[Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='\n      LLM Powered Autonomous Agents\n    ')]

In [21]:
from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader("attention.pdf")
docs = loader.load()


In [22]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
documents = text_splitter.split_documents(docs)
documents[:5]

[Document(metadata={'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2024-04-10T21:11:43+00:00', 'author': '', 'keywords': '', 'moddate': '2024-04-10T21:11:43+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'subject': '', 'title': '', 'trapped': '/False', 'source': 'attention.pdf', 'total_pages': 15, 'page': 0, 'page_label': '1'}, page_content='Provided proper attribution is provided, Google hereby grants permission to\nreproduce the tables and figures in this paper solely for use in journalistic or\nscholarly works.\nAttention Is All You Need\nAshish Vaswani∗\nGoogle Brain\navaswani@google.com\nNoam Shazeer∗\nGoogle Brain\nnoam@google.com\nNiki Parmar∗\nGoogle Research\nnikip@google.com\nJakob Uszkoreit∗\nGoogle Research\nusz@google.com\nLlion Jones∗\nGoogle Research\nllion@google.com\nAidan N. Gomez∗ †\nUniversity of Toronto\naidan@cs.toronto.edu\nŁukasz Kaiser∗\nGoogle Brain\nlukaszk

In [24]:
## Vector Embedding and Vector Store

from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma

os.environ["ANONYMIZED_TELEMETRY"] = "False"
db = Chroma.from_documents(documents[:15], OpenAIEmbeddings(), persist_directory="./chroma_store")

Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
Failed to send telemetry event ClientCreateCollectionEvent: capture() takes 1 positional argument but 3 were given


In [29]:
query = "An attention function can be described as mapping a query"
result = db.similarity_search(query)
result[0].page_content

'Scaled Dot-Product Attention\n Multi-Head Attention\nFigure 2: (left) Scaled Dot-Product Attention. (right) Multi-Head Attention consists of several\nattention layers running in parallel.\nof the values, where the weight assigned to each value is computed by a compatibility function of the\nquery with the corresponding key.\n3.2.1 Scaled Dot-Product Attention\nWe call our particular attention "Scaled Dot-Product Attention" (Figure 2). The input consists of\nqueries and keys of dimension dk, and values of dimension dv. We compute the dot products of the\nquery with all keys, divide each by √dk, and apply a softmax function to obtain the weights on the\nvalues.\nIn practice, we compute the attention function on a set of queries simultaneously, packed together\ninto a matrix Q. The keys and values are also packed together into matrices K and V . We compute\nthe matrix of outputs as:\nAttention(Q, K, V) = softmax(QKT\n√dk\n)V (1)'

In [32]:
## Faiss Vector Database
from langchain_community.vectorstores import FAISS
db1 = FAISS.from_documents(docs[:30], OpenAIEmbeddings())

In [31]:
query = "An attention function can be described as mapping a query"
result = db1.similarity_search(query)
result[0].page_content

'Scaled Dot-Product Attention\n Multi-Head Attention\nFigure 2: (left) Scaled Dot-Product Attention. (right) Multi-Head Attention consists of several\nattention layers running in parallel.\nof the values, where the weight assigned to each value is computed by a compatibility function of the\nquery with the corresponding key.\n3.2.1 Scaled Dot-Product Attention\nWe call our particular attention "Scaled Dot-Product Attention" (Figure 2). The input consists of\nqueries and keys of dimension dk, and values of dimension dv. We compute the dot products of the\nquery with all keys, divide each by √dk, and apply a softmax function to obtain the weights on the\nvalues.\nIn practice, we compute the attention function on a set of queries simultaneously, packed together\ninto a matrix Q. The keys and values are also packed together into matrices K and V . We compute\nthe matrix of outputs as:\nAttention(Q, K, V) = softmax(QKT\n√dk\n)V (1)\nThe two most commonly used attention functions are additiv

In [33]:
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_template("""
Answer the following question based only on the provided context. 
Think step by step before providing a detailed answer.
I will tip you $1000 if the user finds the answer helpful.
<context>
{context}
</context>
Question: {input}
""")

In [34]:
# Chain Introduction 
# Created Stuffed Document Chain
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model="gpt-4")
from langchain.chains.combine_documents import create_stuff_documents_chain
document_chain = create_stuff_documents_chain(llm, prompt) 

In [35]:
# Retriever
retriever = db1.as_retriever()
retriever

VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x7ff698381990>, search_kwargs={})

In [36]:
# Retrieval Chain
from langchain.chains import create_retrieval_chain
retrieval_chain = create_retrieval_chain(retriever, document_chain)

In [38]:
response = retrieval_chain.invoke({
    "input": "An attention function can be described as mapping a query"
})
response['answer']

'An attention function can be described as mapping a query and a set of key-value pairs to an output. The output is a weighted sum of the values, where the weight assigned to each value is computed by a compatibility function of the query with the corresponding key. In Scaled Dot-Product Attention, the input consists of queries and keys of dimension dk, and values of dimension dv. The compatibility function computes the dot products of the query with all keys, divides each by √dk, and applies a softmax function to obtain the weights on the values. In Multi-Head Attention, instead of performing a single attention function, it linearly projects the queries, keys and values several times with different learned linear projections to different dimensions. This process is performed in parallel for each projected version, yielding a certain dimensional output values which are then concatenated and once again projected.'