In [None]:
!pip install langchain selenium unstructured openai deeplake tiktoken

In [7]:
import os
os.environ['OPENAI_API_KEY'] = ""
os.environ['ACTIVELOOP_API_KEY'] = ""

In [9]:
!activeloop login -t your_activeloop key

Successfully logged in to Activeloop.


In [10]:
# Import the necessary libraries
from langchain import OpenAI
from langchain.vectorstores import DeepLake
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain import PromptTemplate
from langchain.text_splitter import CharacterTextSplitter
from langchain.document_loaders import SeleniumURLLoader

In [11]:
# URLs from where the data will be fetched
urls = [
    "https://timesofindia.indiatimes.com/india/chandrayaan-3-news-live-updates-isro-lunar-mission-mooon-landing-time-site-telecast-vikram-lander-photos/liveblog/102929797.cms",
    "https://www.hindustantimes.com/india-news/chandrayaan-3-last-20-minutes-of-vikram-landers-journey-to-be-most-crucial-101692691970317.html",
    "https://www.gadgets360.com/chandrayaan/features/chandrayaan-3-live-update-landing-date-time-august-23-latest-news-videos-4317977",
    "https://www.abplive.com/states/up-uk/up-schools-will-open-for-one-hour-on-evening-of-august-23-decision-taken-regarding-chandrayaan-3-mission-2478624",
    "https://www.space.com/chandrayaan-3-indian-moon-mission-rover"
    "https://www.wionews.com/india-news/chandrayaan-3-landing-on-aug-27-does-india-have-plan-b-for-the-lunar-mission-627807"
]

### Split the documents into chunks & compute their embeddings


In [12]:
## Dataloader
dataloader = SeleniumURLLoader(urls=urls)
data_load = dataloader.load()

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


In [None]:
## Text Splitter
splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=30)
split = splitter.split_documents(data_load)




Deep Lake Dataset in hub://vishnusharma7/Customer_Support_chatbot already exists, loading from the storage


In [13]:
## Embeddings
embeddings = OpenAIEmbeddings(model='text-embedding-ada-002')

activeloop_id = "vishnusharma7"
activeloop_dataset = "Customer_Support_chatbot"
dataset_path = f'hub://{activeloop_id}/{activeloop_dataset}'

db = DeepLake(dataset_path=dataset_path, embedding=embeddings)

Deep Lake Dataset in hub://vishnusharma7/Customer_Support_chatbot already exists, loading from the storage


In [None]:
# Uploading the data to vector databases
db.add_documents(split)

Creating embedding data: 100%|██████████| 15/15 [04:03<00:00, 16.25s/it]
\

Dataset(path='hub://vishnusharma7/Customer_Support_chatbot', tensors=['embedding', 'id', 'metadata', 'text'])

  tensor      htype       shape      dtype  compression
  -------    -------     -------    -------  ------- 
 embedding  embedding  (166, 1536)  float32   None   
    id        text      (166, 1)      str     None   
 metadata     json      (166, 1)      str     None   
   text       text      (166, 1)      str     None   


 

['e547aa1e-4700-11ee-8a7c-0242ac1c000c',
 'e547abae-4700-11ee-8a7c-0242ac1c000c',
 'e547ac6c-4700-11ee-8a7c-0242ac1c000c',
 'e547ad0c-4700-11ee-8a7c-0242ac1c000c',
 'e547ada2-4700-11ee-8a7c-0242ac1c000c',
 'e547af0a-4700-11ee-8a7c-0242ac1c000c',
 'e547afdc-4700-11ee-8a7c-0242ac1c000c',
 'e547b072-4700-11ee-8a7c-0242ac1c000c',
 'e547b0fe-4700-11ee-8a7c-0242ac1c000c',
 'e547b180-4700-11ee-8a7c-0242ac1c000c',
 'e547b216-4700-11ee-8a7c-0242ac1c000c',
 'e547b31a-4700-11ee-8a7c-0242ac1c000c',
 'e547b3a6-4700-11ee-8a7c-0242ac1c000c',
 'e547b464-4700-11ee-8a7c-0242ac1c000c',
 'e547b4fa-4700-11ee-8a7c-0242ac1c000c',
 'e547b590-4700-11ee-8a7c-0242ac1c000c',
 'e547b61c-4700-11ee-8a7c-0242ac1c000c',
 'e547b69e-4700-11ee-8a7c-0242ac1c000c',
 'e547b716-4700-11ee-8a7c-0242ac1c000c',
 'e547b77a-4700-11ee-8a7c-0242ac1c000c',
 'e547b7e8-4700-11ee-8a7c-0242ac1c000c',
 'e547b856-4700-11ee-8a7c-0242ac1c000c',
 'e547b8ce-4700-11ee-8a7c-0242ac1c000c',
 'e547b95a-4700-11ee-8a7c-0242ac1c000c',
 'e547b9f0-4700-

In [14]:
# Inference
query = "When is chandrayaan-3 is going to land on moon?"
ans = db.similarity_search(query)

print(ans[0].page_content)

As quoted by news agency PTI, Director Desai said: "On August 23, two hours before Chandrayaan-3 lands on the Moon, we will decide on whether or not it will be appropriate to land it at that time based on the health of the lander module and the conditions on the moon."

"In case, if any factor appears to be not favourable, then we will land the module on the moon on August 27. No problem should occur and we will be able to land the module on August 23," Desai added.

In the latest update, ISRO shared pictures of the lunar far side captured by the Lander Hazard Detection and Avoidance Camera (LHDAC). The Indian space agency also thanked everyone for the wishes and positivity for the mission as the Indian space agency said that Chandrayaan-3 is set to land on the Moon on August 23 around 6:04 pm IST.


### Crafting a Prompt

In [16]:
template = """ You are a extraordinary customer support chatbot that answer questions.

You know the context information which will be entered via {context} input.

You have to answer the questions based on the context information, You are not allowed to invent answes by yourself.

Question = {query}

Answer:

"""

prompt = PromptTemplate(
    input_variables = ["context", "query"],
    template=template
)

query = "What is chandrayaan-3's mission?"

docs = db.similarity_search(query)

retrieved_docs = [doc.page_content for doc in docs]

context = "\n\n".join(retrieved_docs)

final_prompt = prompt.format(context=context, query=query)


In [17]:
llm = OpenAI(model='text-davinci-003', temperature=0)
answer = llm(final_prompt)

print(answer)

Chandrayaan-3's mission is to traverse a distance of approximately half a kilometre on the moon's surface over the next 12 days, capturing vital images and data.
