In [5]:
# DATA INGESTION
from langchain_community.document_loaders import TextLoader
loader = TextLoader("speech.txt",encoding="utf-8")

textdocuments = loader.load()
print(textdocuments)

[Document(metadata={'source': 'speech.txt'}, page_content='Martin Luther King Jr. — "I Have a Dream"\n“I have a dream that one day every valley shall be exalted,\nevery hill and mountain shall be made low,\nthe rough places will be made plain,\nand the crooked places will be made straight;\nand the glory of the Lord shall be revealed and all flesh shall see it together.”\n\n“I have a dream that my four little children will one day live in a nation\nwhere they will not be judged by the color of their skin\nbut by the content of their character.”')]


In [None]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ["LANGSMITH_TRACING"] = "true"
os.environ["LANGSMITH_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGSMITH_PROJECT"] = "llm_translator"
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY")

In [40]:
# WEB BASED LOADERS
from langchain_community.document_loaders import WebBaseLoader

loader = WebBaseLoader(web_path= "https://en.wikipedia.org/wiki/Natural_language_processing")
text_doc = loader.load()
print(text_doc)

[Document(metadata={'source': 'https://en.wikipedia.org/wiki/Natural_language_processing', 'title': 'Natural language processing - Wikipedia', 'language': 'en'}, page_content='\n\n\n\nNatural language processing - Wikipedia\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nJump to content\n\n\n\n\n\n\n\nMain menu\n\n\n\n\n\nMain menu\nmove to sidebar\nhide\n\n\n\n\t\tNavigation\n\t\n\n\nMain pageContentsCurrent eventsRandom articleAbout WikipediaContact us\n\n\n\n\n\n\t\tContribute\n\t\n\n\nHelpLearn to editCommunity portalRecent changesUpload fileSpecial pages\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nSearch\n\n\n\n\n\n\n\n\n\n\n\nSearch\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nAppearance\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nDonate\n\nCreate account\n\nLog in\n\n\n\n\n\n\n\n\nPersonal tools\n\n\n\n\n\nDonate Create account Log in\n\n\n\n\n\n\t\tPages for logged out editors learn more\n\n\n\nContributionsTalk\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nContents\nmove to 

In [None]:

from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("test.pdf")

text = loader.load()
all_text = "\n".join(doc.page_content for doc in text)



from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap =200)

chunks = text_splitter.split_documents(text)


#vector embeddings and vectore store

from langchain_huggingface import HuggingFaceEmbeddings
model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': False}
embeddings = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

import faiss
from langchain_community.vectorstores import FAISS
from langchain_community.docstore.in_memory import InMemoryDocstore
from uuid import uuid4

# Create FAISS index
index = faiss.IndexFlatL2(embeddings.embed_query("test").__len__())

vector_store = FAISS(
    embedding_function=embeddings,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={}
)

uuids = [str(uuid4()) for _ in range(len(chunks))]
vector_store.add_documents(documents=chunks, ids=uuids)

vector_store.save_local("my_faiss_index")


##Query
query = "Normal Distribution"
res = vector_store.similarity_search (query=query)

print(res[0].page_content)


Normal Distribution [4] 
Example 2: According to a study by A.C. Neilson,
children between 2 and 5 years of age watch an
average of 25 hours of television per week. Assume the
variable is approximately normally distributed with a
standard deviation of 2. If a child is selected at
random, find the probability that the child watched
more than 27 hours of television per week.
Dr. Faisal Bukhari, PU, Lahore 9


In [45]:
from langchain.chat_models import init_chat_model
from langchain.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain

llm = init_chat_model("llama3-8b-8192", model_provider="groq")

prompt = ChatPromptTemplate.from_template ("Answer the following questions with respect to the provided context only with step by step reasoning . <context> {context} </context> Question: {input}")
from langchain.chains.combine_documents import create_stuff_documents_chain

document_chain = create_stuff_documents_chain(llm,prompt)

retriever = vector_store.as_retriever()

retrieval_chain = create_retrieval_chain(retriever, document_chain)

response = retrieval_chain.invoke({"input" : "answer of example 2"})

print(response['answer'])


Based on the provided context, we are asked to answer the solution of Example 2.

Example 2 is labeled as "Normal Distribution [2]". We assume that this is a normal distribution problem.

However, there is no specific problem or question provided for Example 2. Therefore, we cannot provide a solution for Example 2.

If you could provide the specific problem or question, I would be happy to help you with the solution.
