<a href="https://colab.research.google.com/github/vnshtiwari/test_collab_nb/blob/main/RAG_insurance.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install langchain faiss-cpu sentence-transformers huggingface_hub langchain_community
!pip install google-generativeai

from langchain.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain_google_genai import ChatGoogleGenerativeAI
from google.colab import userdata

# Step 1: Define URLs
DATA_URL = [
    "https://www.acko.com/life-insurance/",
    "https://www.acko.com/health-insurance/for-parents/",
    "https://www.acko.com/car-insurance/",
    "https://www.acko.com/two-wheeler-insurance/"
]

# Step 2: Load web content
loader = WebBaseLoader(DATA_URL)
documents = loader.load()

# Step 3: Split into chunks
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
docs = splitter.split_documents(documents)

# Step 4: Embeddings using Hugging Face (reusing the previous embedding model)
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vectorstore = FAISS.from_documents(docs, embedding_model)

# Step 5: Google Generative AI LLM
# Make sure you have your GOOGLE_API_KEY set up in Colab secrets
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash-latest", google_api_key="xxxxxxxx")


# Step 6: RetrievalQA chain
retriever = vectorstore.as_retriever()
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)

# Step 7: Ask a question
query = "What are the benefits of life insurance from ACKO?"
response = qa_chain.invoke({"query": query})

print("Query:", query)
print("Response:", response['result'])