<a href="https://colab.research.google.com/github/y-pred/Langchain/blob/main/Chatbot_Trained_on_Own_Datasource.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install langchain -q
!pip install openai -q
!pip install chromaDB -q
!pip install unstructured -q
!pip install -U sentence-transformers
!pip install transformers -q

In [2]:
import os
import langchain
from langchain.llms import OpenAI
os.environ['OPENAI_API_KEY'] = 'OPENAI API KEY'

In [None]:
llm = OpenAI(temperature=0.2)

###Document Indexing

In [None]:
from langchain.document_loaders import UnstructuredURLLoader
loaders = UnstructuredURLLoader(urls=[
    "https://en.wikipedia.org/wiki/Kanpur",
    "https://travel.india.com/guide/destination/discover-the-top-6-must-try-restaurants-in-kanpur-6832444/",
    "https://www.tripoto.com/uttar-pradesh/trips/kanpur-beyond-stereotypes-the-city-with-a-little-of-everything-5dad96a31a569"
])

data = loaders.load()
len(data)

###Text Splitting

In [5]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=100
)
chunks = text_splitter.split_documents(data)

In [None]:
!pip install tiktoken

###Embeddings using Sentence transformer

In [None]:
from langchain.embeddings import SentenceTransformerEmbeddings
embeddings = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")


In [8]:
from langchain.vectorstores import Chroma
db = Chroma.from_documents(chunks,embeddings)

In [9]:
query = 'What was the former name of Kanpur?'
docs = db.similarity_search(query)
print(docs[0].page_content)

Kanpur (

/kɑːnˈpʊər/

ⓘ), formerly anglicized as

Cawnpore, is a large industrial city located in the central-western part of the state of

Uttar Pradesh, India. Founded in year 1207, Kanpur became one of the most important commercial and military stations of

British India. Kanpur is also the financial capital of Uttar Pradesh and part of upcoming Bundelkhand Industrial Development Authority i.e. BIDA Nestled on the banks of


In [10]:
docs_with_score = db.similarity_search_with_score(query, k=3)
docs_with_score

[(Document(page_content='Kanpur (\n\n/kɑːnˈpʊər/\n\nⓘ), formerly anglicized as\n\nCawnpore, is a large industrial city located in the central-western part of the state of\n\nUttar Pradesh, India. Founded in year 1207, Kanpur became one of the most important commercial and military stations of\n\nBritish India. Kanpur is also the financial capital of Uttar Pradesh and part of upcoming Bundelkhand Industrial Development Authority i.e. BIDA Nestled on the banks of', metadata={'source': 'https://en.wikipedia.org/wiki/Kanpur'}),
  0.4760293960571289),
 (Document(page_content='It is the 12th most populous city and the 11th most populous urban agglomeration in India (Census of India, 2011). Kanpur was an important British garrison town until 1947, when India gained independence. The urban district of Kanpur Nagar serves as the headquarters of the Kanpur Division, Kanpur Range and Kanpur Zone.\n\nSome of the more popular places in Kanpur include J. K. Temple, Kanpur Central, ZSquare Mall, Atal

###Retrival QA

In [13]:
#Use retrivalQA for retriving relevant parts of the data
from langchain.chains import RetrievalQA
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 2})
res = retriever.get_relevant_documents("Former name of Kanpur")
res

[Document(page_content='Kanpur (\n\n/kɑːnˈpʊər/\n\nⓘ), formerly anglicized as\n\nCawnpore, is a large industrial city located in the central-western part of the state of\n\nUttar Pradesh, India. Founded in year 1207, Kanpur became one of the most important commercial and military stations of\n\nBritish India. Kanpur is also the financial capital of Uttar Pradesh and part of upcoming Bundelkhand Industrial Development Authority i.e. BIDA Nestled on the banks of', metadata={'source': 'https://en.wikipedia.org/wiki/Kanpur'}),
 Document(page_content='Main article: \n\nList of people from Kanpur\n\nLala Kamlapat Singhania, industrialist\n\nPadampat Singhania, industrialist\n\nKuldeep Yadav, cricketer\n\nHarish-Chandra, mathematician\n\nGiriraj Kishore, novelist\n\nIrshad Mirza, industrialist\n\nGaurav Khanna, actor\n\nSee also[edit]\n\nKanpur Dehat (Lok Sabha constituency)\n\nList of cities in Uttar Pradesh\n\nList of engineering colleges in Kanpur\n\nRenamed places in Kanpur\n\nList of twi

###ConversationalRetrivalChain

In [15]:
#Using conversationalretrivalChain since this has memory inbuilt

from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain

chat = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
#matching_docs = db.similarity_search(query)

chain = ConversationalRetrievalChain.from_llm(llm=chat, retriever=retriever)

response = chain({"question":'Which year did Kanpur come into existance?', "chat_history":[]})

#response = chain({"question":'Which year this was done??',"chat_history":[('Who created tge City of Kanpur?','Raja Kanh Deo of the Kanhpuriya clan of Rajputs established the city of Kanhpur and made it his capital, which later came to be known as Kanpur.')]})
response

{'question': 'Which year did Kanpur come into existance?',
 'chat_history': [],
 'answer': 'Kanpur was founded in the year 1207.'}

In [None]:
response['answer']

'The city of Kanpur was established in the year 1207 by Raja Kanh Deo of the Kanhpuriya clan of Rajputs.'

Filling up chat history so that we can understand how chat history is populated

In [16]:
response = chain({"question":'Which industries are in Kanpur?',"chat_history":[('Which year did Kanpur come into existance?','The city of Kanpur was established in the year 1207 by Raja Kanh Deo of the Kanhpuriya clan of Rajputs.')]})

In [17]:
response

{'question': 'Which industries are in Kanpur?',
 'chat_history': [('Which year did Kanpur come into existance?',
   'The city of Kanpur was established in the year 1207 by Raja Kanh Deo of the Kanhpuriya clan of Rajputs.')],
 'answer': 'Kanpur is known for its leather and textile industries. Additionally, the city has a significant presence in the chemical, fertilizer, and engineering industries.'}

In [24]:
response = chain({"question":'What is the most famous tourist spots in Kanpur?',"chat_history":[('Which year did Kanpur come into existance?','The city of Kanpur was established in the year 1207 by Raja Kanh Deo of the Kanhpuriya clan of Rajputs.','Which industries are in Kanpur?','Kanpur is known for its leather and textile industries. Additionally, the city has a significant presence in the chemical, fertilizer, and engineering industries.')]})
response

{'question': 'What is the most famous tourist spots in Kanpur?',
 'chat_history': [('Which year did Kanpur come into existance?',
   'The city of Kanpur was established in the year 1207 by Raja Kanh Deo of the Kanhpuriya clan of Rajputs.',
   'Which industries are in Kanpur?',
   'Kanpur is known for its leather and textile industries. Additionally, the city has a significant presence in the chemical, fertilizer, and engineering industries.')],
 'answer': 'Some of the most famous tourist spots in Kanpur are:\n\n1. Allen Forest Zoo\n2. Phool Bagh\n3. Kanpur Memorial Church\n4. Moti Jheel\n5. Blue World Theme Park\n6. J.K. Temple\n7. Bithoor\n\nThese are just a few of the popular attractions in Kanpur that visitors often enjoy exploring.'}