<a href="https://colab.research.google.com/github/zaidrj/RAG/blob/main/RAG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [51]:
%pip install -qU langchain-pinecone langchain-google-genai

In [52]:
from google.colab import userdata
userdata.get('PINECONE_API_KEY')

from pinecone import Pinecone, ServerlessSpec

pinecone_api_key = userdata.get('PINECONE_API_KEY')

pc = Pinecone(api_key=pinecone_api_key)

In [53]:
from google.colab import userdata
GOOGLE_API_KEY= userdata.get('GOOGLE_API_KEY')

In [56]:
import time

index_name = "langchain-rag-project"

pc.create_index(
        name=index_name,
        dimension=768,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1"),
)

index = pc.Index(index_name)

In [57]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
import os

os.environ["GOOGLE_API_KEY"] = userdata.get('GOOGLE_API_KEY')

embeddings = GoogleGenerativeAIEmbeddings(
    model="models/text-embedding-004"
)

In [66]:
vector = embeddings.embed_query("hello")

In [67]:
vector[:5]

[0.048244964331388474,
 0.011776605620980263,
 -0.01155206747353077,
 -0.018164537847042084,
 -0.002611024072393775]

In [60]:
from langchain_pinecone import PineconeVectorStore

vector_store = PineconeVectorStore(index=index, embedding=embeddings)

In [61]:
# Dummy_Data_Save

from uuid import uuid4

from langchain_core.documents import Document

document_1 = Document(
    page_content="I had chocalate chip pancakes and scrambled eggs for breakfast this morning.",
    metadata={"source": "tweet"},
)

document_2 = Document(
    page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.",
    metadata={"source": "news"},
)

document_3 = Document(
    page_content="Building an exciting new project with LangChain - come check it out!",
    metadata={"source": "tweet"},
)

document_4 = Document(
    page_content="Robbers broke into the city bank and stole $1 million in cash.",
    metadata={"source": "news"},
)

document_5 = Document(
    page_content="Wow! That was an amazing movie. I can't wait to see it again.",
    metadata={"source": "tweet"},
)

document_6 = Document(
    page_content="Is the new iPhone worth the price? Read this review to find out.",
    metadata={"source": "website"},
)

document_7 = Document(
    page_content="The top 10 soccer players in the world right now.",
    metadata={"source": "website"},
)

document_8 = Document(
    page_content="LangGraph is the best framework for building stateful, agentic applications!",
    metadata={"source": "tweet"},
)

document_9 = Document(
    page_content="The stock market is down 500 points today due to fears of a recession.",
    metadata={"source": "news"},
)

document_10 = Document(
    page_content="I have a bad feeling I am going to get deleted :(",
    metadata={"source": "tweet"},
)

documents = [
    document_1,
    document_2,
    document_3,
    document_4,
    document_5,
    document_6,
    document_7,
    document_8,
    document_9,
    document_10,
]

In [62]:
uuids = [str(uuid4()) for _ in range(len(documents))]

vector_store.add_documents(documents=documents, ids=uuids)

['1444b0f9-85c8-41cd-b5c9-29a7639e4dbb',
 'f1764352-422e-405b-bc4e-fd2220489565',
 '3f4ae522-fa8f-4d26-a004-c3ddb1be40fd',
 '2d3c7bbf-37a4-4caf-888a-40c21272bc4c',
 'e927aaa0-1ad1-40f0-9ab7-40025e2e0176',
 'afd93e6f-b084-442f-b3c3-33127d62d441',
 '97e6f920-f0a2-4dad-a456-05c72b7184d5',
 'ec5b210b-456e-4497-9cb9-3e03c176863c',
 'b062d89c-194b-4c5b-b708-b71c8be2ba74',
 '5f69c783-e0db-4a7b-8bb6-008890a1db56']

In [63]:
# Data_Retreival
results = vector_store.similarity_search(
    "LangChain provides abstractions to make working with LLMs easy",
    k=2,
    filter={"source": "tweet"},
)
for res in results:
    print(f"* {res.page_content} [{res.metadata}]")

In [64]:
results = vector_store.similarity_search_with_score(
    "Will it be hot tomorrow?"
)
for res, score in results:
    print(f"* [SIM={score:3f}] {res.page_content} [{res.metadata}]")

In [69]:
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash"
)

In [73]:
def answer_question(question: str) -> str:
    vector_results = vector_store.similarity_search(question, k=1)
    final_Result = llm.invoke(f"Answer this question of user: {question}, here is reference {vector_results}",)
    return final_Result

In [80]:
answer = answer_question("what is the source for this : The top 10 soccer players in the world right now")
print(answer.content)

Based on the provided information, the source for the statement "The top 10 soccer players in the world right now" is a website.
