In [None]:
import os
import streamlit as st 
import pickle
import time
from dotenv import load_dotenv # Recommended for managing API keys securely

# HuggingFace specific integrations
from langchain_huggingface import HuggingFaceEndpoint
from langchain_community.embeddings import HuggingFaceEmbeddings
# Retrival QA
from langchain_classic.chains import RetrievalQAWithSourcesChain

# Text splitting 
from langchain_text_splitters import RecursiveCharacterTextSplitter

# Document loaders and vector stores 
from langchain_community.document_loaders import UnstructuredURLLoader
from langchain_community.vectorstores import FAISS


In [2]:
# Load environment variables from .env file
load_dotenv()

True

In [3]:
# Choose a model (Mistral 7B is an excellent open-source choice)
repo_id = "mistralai/Mistral-7B-Instruct-v0.2"

In [4]:
# Instantiate the new LLM
llm_hf = HuggingFaceEndpoint(
    repo_id=repo_id,
    temperature=0.9,  # Lower temperature for factual RAG
    max_new_tokens=512
)

**Load data** 

In [5]:
loaders = UnstructuredURLLoader(urls=[
    "https://www.moneycontrol.com/news/business/markets/wall-street-rises-as-tesla-soars-on-ai-optimism-11351111.html",
    "https://www.moneycontrol.com/news/business/tata-motors-launches-punch-icng-price-starts-at-rs-7-1-lakh-11098751.html"
])

data = loaders.load()
len(data)

2

**Split data to create chunks**

In [6]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1000,
    chunk_overlap = 200
)

# As data is of type documents we can directly use split_documents over split_text in order to get the chunks.
docs = text_splitter.split_documents(data)

In [7]:
len(docs)

18

In [8]:
docs

[Document(metadata={'source': 'https://www.moneycontrol.com/news/business/markets/wall-street-rises-as-tesla-soars-on-ai-optimism-11351111.html'}, page_content='English\n\nHindi\n\nGujarati\n\nSpecials\n\nHello, Login\n\nHello, Login\n\nLog-inor Sign-Up\n\nMy Account\n\nMy Profile\n\nMy Portfolio\n\nMy Watchlist\n\nMy Alerts\n\nMy Messages\n\nPrice Alerts\n\nMy Profile\n\nMy PRO\n\nMy Portfolio\n\nMy Watchlist\n\nMy Alerts\n\nMy Messages\n\nPrice Alerts\n\nLogout\n\nLoans up to ₹50 LAKHS\n\nFixed Deposits\n\nCredit CardsLifetime Free\n\nCredit Score\n\nChat with Us\n\nDownload App\n\nFollow us on:\n\nNetwork 18\n\nGo Ad-Free\n\nMy Alerts\n\n>->MC_ENG_DESKTOP/MC_ENG_NEWS/MC_ENG_MARKETS_AS/MC_ENG_ROS_NWS_MKTS_AS_ATF_728\n\nMoneycontrol\n\nGo PRO NowPRO\n\nMoneycontrol PRO\n\nAdvertisement\n\nRemove Ad\n\nBusiness\n\nMarkets\n\nStocks\n\nEconomy\n\nCompanies\n\nTrends\n\nIPO\n\nOpinion\n\nEV Special\n\nHomeNewsBusinessMarketsWall Street rises as Tesla soars on AI optimism\n\nTrending Topi

In [9]:
page_contents_list = [doc.page_content for doc in docs]
page_contents_list

['English\n\nHindi\n\nGujarati\n\nSpecials\n\nHello, Login\n\nHello, Login\n\nLog-inor Sign-Up\n\nMy Account\n\nMy Profile\n\nMy Portfolio\n\nMy Watchlist\n\nMy Alerts\n\nMy Messages\n\nPrice Alerts\n\nMy Profile\n\nMy PRO\n\nMy Portfolio\n\nMy Watchlist\n\nMy Alerts\n\nMy Messages\n\nPrice Alerts\n\nLogout\n\nLoans up to ₹50 LAKHS\n\nFixed Deposits\n\nCredit CardsLifetime Free\n\nCredit Score\n\nChat with Us\n\nDownload App\n\nFollow us on:\n\nNetwork 18\n\nGo Ad-Free\n\nMy Alerts\n\n>->MC_ENG_DESKTOP/MC_ENG_NEWS/MC_ENG_MARKETS_AS/MC_ENG_ROS_NWS_MKTS_AS_ATF_728\n\nMoneycontrol\n\nGo PRO NowPRO\n\nMoneycontrol PRO\n\nAdvertisement\n\nRemove Ad\n\nBusiness\n\nMarkets\n\nStocks\n\nEconomy\n\nCompanies\n\nTrends\n\nIPO\n\nOpinion\n\nEV Special\n\nHomeNewsBusinessMarketsWall Street rises as Tesla soars on AI optimism\n\nTrending Topics\n\nSensex Today\n\nTata Motors PV Shares\n\nSpiceJet Shares\n\nAlembic Pharma Shares\n\nIRB Infrastructure Shares\n\nWall Street rises as Tesla soars on AI 

**Create embeddings for these chunks and save them to FAISS index**

In [10]:
# Create the embeddings of the chunks using openAIEmbeddings
embeddings = HuggingFaceEmbeddings()

# Pass the documents and embeddings inorder to create FAISS vector index
vectorindex_local = FAISS.from_texts(page_contents_list, embeddings)

In [11]:
# Storing vector index created in local (optional, for persistence)
file_path = "vector_index.pkl"
with open(file_path, "wb") as f:
    pickle.dump(vectorindex_local, f)

if os.path.exists(file_path):
    with open(file_path, "rb") as f:
        vectorIndex = pickle.load(f)

**Retrieve similar embeddings for a given question and call LLM to retrieve final answer**

In [12]:
chain = RetrievalQAWithSourcesChain.from_llm(llm=llm_hf, retriever=vectorIndex.as_retriever())
chain



In [13]:
query = "what is the price of Tiago iCNG?"
# query = "what are the main features of punch iCNG?"

chain({"question": query}, return_only_outputs=True)

  chain({"question": query}, return_only_outputs=True)


ValueError: You must provide an api_key to work with featherless-ai API or log in with `hf auth login`.