### Import necassary libraries

In [11]:
import llama_index
from llama_index.embeddings import LangchainEmbedding
from langchain.embeddings import huggingface as lhf
import dotenv
from llama_index import download_loader
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from llama_index import LangchainEmbedding, ServiceContext
from llama_index.node_parser import SimpleNodeParser
from llama_index.llms import OpenAI, HuggingFaceLLM

from pathlib import Path
import os
import openai

openai.api_key = os.getenv("OPENAI_API_KEY")


dotenv.load_dotenv('RAG/.env')

True

In [2]:
# from script import main

# index = main()

### Set the service context

In [3]:
embed_model = LangchainEmbedding(
  HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
)
llm = OpenAI(temperature=0, model="text-davinci-002")

service_context = ServiceContext.from_defaults(embed_model=embed_model, llm=llm)



  from .autonotebook import tqdm as notebook_tqdm


### Load the data

In [4]:
MarkdownReader = download_loader("MarkdownReader")

loader = MarkdownReader()
path = os.getenv("PATH_DOC")

documents = loader.load_data(file=Path(path))



In [5]:
path = os.getenv("PATH_DOC")

documents = loader.load_data(file=Path(path))



In [6]:

parser = SimpleNodeParser()

nodes = parser.get_nodes_from_documents(documents)



### Create the index and retriver

In [7]:
from llama_index import (
    VectorStoreIndex,
    get_response_synthesizer,
)
from llama_index.indices.vector_store.retrievers import VectorIndexRetriever
from llama_index.vector_stores.types import ExactMatchFilter, MetadataFilters

index = VectorStoreIndex(nodes=nodes, service_context=service_context,show_progress=True)

# retriver:
retriever = VectorIndexRetriever(
    index=index,
    similarity_top_k=3,
    vector_store_query_mode="default",
    # filters=[ExactMatchFilter(key="name", value="paul graham")],
    alpha=None,
    doc_ids=None,
)


Generating embeddings:   0%|          | 0/26 [00:00<?, ?it/s]

Generating embeddings: 100%|██████████| 26/26 [00:20<00:00,  1.26it/s]


In [8]:
from llama_index.query_engine.retriever_query_engine import RetrieverQueryEngine

query_engine = RetrieverQueryEngine(
    retriever=retriever, response_synthesizer=get_response_synthesizer()
)


### Experiments with chat and query

In [12]:
response = query_engine.query("Help me apply for a aadhar card")

In [13]:
response

Response(response='\nUnfortunately, we do not provide assistance with applying for an Aadhaar card. You can visit the official website of the Unique Identification Authority of India (UIDAI) to apply for an Aadhaar card.', source_nodes=[NodeWithScore(node=TextNode(id_='6582bad4-13f4-4ff4-bf91-6f8cdb7ac24f', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='a111b233-afb7-4b2e-8641-14ef83a25113', node_type=None, metadata={}, hash='69097c1ac8f6bcf026ec06bd537ec32b6b59464fabe28540d4baa4e52d3d8629')}, hash='1c1ed016b92f68cadc3eeb87a617d8e0df32d9dfb7f5fe4627f345a9adab0fb8', text='- Go to ABC app\n- Navigate to Services > NRI PAN Card > Link PAN with Aadhaar\n- Request reissue the required PAN card and make the payment\n- Our team will reach out to you for the required documents\n\nAlternatively, you can also initiate the process on WhatsApp as well.', start_char_idx=0, end_char_

In [None]:
response.source_nodes[0].score  

0.49417061783879623

In [None]:
query_engine = index.as_chat_engine(
    service_context=service_context, chat_mode="react", verbose=True
)


In [None]:
query_engine.chat("What documents do I need for a pan card?")

In [None]:
query_engine.chat("What utility bills are accepted?")

[38;5;200m[1;3mResponse:  Utility bills accepted for PAN card include electricity bills, water bills, telephone bills, gas bills, and broadband bills.
[0m

AgentChatResponse(response=' Utility bills accepted for PAN card include electricity bills, water bills, telephone bills, gas bills, and broadband bills.', sources=[])

In [None]:
import torch

# To use open source models for generation, the following code can be used:
# Current problem: Stuck at downloading "shards"
"""
llm = HuggingFaceLLM(
    context_window=4096,
    max_new_tokens=256,
    generate_kwargs={"temperature": 0.7, "do_sample": False},
    tokenizer_name="StabilityAI/stablelm-tuned-alpha-3b",
    model_name="StabilityAI/stablelm-tuned-alpha-3b",
    device_map="auto",
    stopping_ids=[50278, 50279, 50277, 1, 0],
    tokenizer_kwargs={"max_length": 4096},
    # uncomment this if using CUDA to reduce memory usage
    model_kwargs={"torch_dtype": torch.float16}
)
service_context = ServiceContext.from_defaults(chunk_size=1024, llm=llm)
"""

In [None]:
chat_engine = index.as_chat_engine(
    service_context=service_context, chat_mode="react", verbose=True
)

In [None]:
chat_engine.chat("What documents do I need for a pan card?")

[38;5;200m[1;3mThought: I need to use a tool to help me answer the question.
Action: query_engine_tool
Action Input: {'input': 'What documents do I need for a pan card?'}
[0m[36;1m[1;3mObservation: 
If you have an Aadhaar card, no other document is required. 

If you don't have an Aadhaar card, you will need:
- Passport (Any Country) / OCI Card
- Passport Size Photograph
- Overseas address proof with zip code (Supporting documents - Indian NRO/NRE Account statement or Overseas bank statement or Utility bill)
[0m[38;5;200m[1;3mResponse: To apply for a PAN card, you will need an Aadhaar card or any of the following documents: Passport (Any Country) / OCI Card, Passport Size Photograph, and Overseas address proof with zip code (Supporting documents - Indian NRO/NRE Account statement or Overseas bank statement or Utility bill).
[0m

AgentChatResponse(response='To apply for a PAN card, you will need an Aadhaar card or any of the following documents: Passport (Any Country) / OCI Card, Passport Size Photograph, and Overseas address proof with zip code (Supporting documents - Indian NRO/NRE Account statement or Overseas bank statement or Utility bill).', sources=[])