In [None]:
import os
from langchain.llms import HuggingFaceHub
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain.vectorstores import Pinecone as PineconeVectorStore
from pinecone import Pinecone  # ✅ Correct import for Pinecone
from langchain.prompts import PromptTemplate
from langchain.memory import ConversationBufferMemory
from langchain.chains import RetrievalQA
from dotenv import load_dotenv

# ✅ Load environment variables
load_dotenv()

# ✅ Hugging Face API Key (Ensure it's set in your environment variables)
HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACEHUB_API_TOKEN")
if not HUGGINGFACE_API_KEY:
    raise ValueError("❌ Missing Hugging Face API Key. Set it as HUGGINGFACEHUB_API_TOKEN")

# ✅ Load DeepSeek-R1 via API Inference
llm = HuggingFaceHub(
    repo_id="deepseek-ai/DeepSeek-R1",  # Model name
    model_kwargs={"temperature": 0.5, "max_length": 2048},
    huggingfacehub_api_token=HUGGINGFACE_API_KEY
)

# ✅ Initialize Pinecone Client
pinecone_client = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))

# ✅ Ensure index name is set
index_name = os.getenv("PINECONE_INDEX")
if not index_name:
    raise ValueError("❌ PINECONE_INDEX is not set. Check your .env file.")

# ✅ Ensure the index exists before using it
existing_indexes = [idx["name"] for idx in pinecone_client.list_indexes()]
if index_name not in existing_indexes:
    raise ValueError(f"❌ Index '{index_name}' does not exist in Pinecone. Please create it first.")

# ✅ Initialize Pinecone Index
index = pinecone_client.Index(index_name)

# ✅ Initialize the same embedding model used for storing embeddings
huggingface_embeddings = HuggingFaceBgeEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2",
    model_kwargs={'device': 'cpu'},
    encode_kwargs={'normalize_embeddings': True}
)

# ✅ Load the vector store for retrieval
vectorstore = PineconeVectorStore(
    index=index,  # ✅ Pass the initialized Pinecone index
    embedding=huggingface_embeddings,
    text_key="text"  # ✅ Specify the key containing stored text
)

# ✅ Create a retriever from the vector store
retriever = vectorstore.as_retriever(
    search_type="similarity",  
    search_kwargs={"k": 3}  # Retrieve top 3 most relevant chunks
)

# ✅ Define the Prompt Template for RAG-based Q&A
prompt_template = """
You are a auditor expert in analyzing call center transcription calls.

Instructions:
1. Answer based only on the context (delimited by <ctx> </ctx>).
2. If the answer is not in the context, respond with "I don't have this information."
3. Provide only the relevant answer to the user's question.

-------
<ctx>
{context}
</ctx>
-------

Question: {question}
Useful Answer:
"""

PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

# ✅ Memory buffer to handle multiple interactions
memory = ConversationBufferMemory(
    memory_key="chat_history",
    input_key="question"
)

# ✅ Define Retrieval-Based Q&A Chain
retrievalQA = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt": PROMPT, "memory": memory}
)

In [None]:
query = "What was the remaining balance in call_004?"
response = retrievalQA.invoke({"query": query})

print(response['result'])

In [None]:
#query = "Which calls mention the word 'extensions'?"
#query = "Which calls mention  the word 'payment plan'?"
query = "Which calls mention the word 'confirmation'?"
response = retrievalQA.invoke({"query": query})

print(response['result'])

In [None]:
query = "is there any call that talk about loosing a job? give me the specific call ID"
response = retrievalQA.invoke({"query": query})

print(response['result'])

In [None]:
#query = "give me a summary from the call_004"
query = "give me a summary from the call_003"

response = retrievalQA.invoke({"query": query})

print(response['result'])

In [None]:
query = "what call did John call?"

response = retrievalQA.invoke({"query": query})

print(response['result'])