In [2]:
# importing libaries
import os
import streamlit as st
import pickle
import time
import langchain
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredURLLoader
from langchain_ollama import ChatOllama, OllamaEmbeddings
from langchain.vectorstores import FAISS

In [3]:
# initialise LLM with required params
llm = ChatOllama(model='gemma3:1b')

loader = UnstructuredURLLoader(
    urls=[
        "https://www.moneycontrol.com/news/business/banks/hdfc-bank-re-appoints-sanmoy-chakrabarti-as-chief-risk-officer-11259771.html",
        "https://www.moneycontrol.com/news/business/markets/market-corrects-post-rbi-ups-inflation-forecast-icrr-bet-on-these-top-10-rate-sensitive-stocks-ideas-11142611.html"
    ]
)

data = loader.load()
len(data)

2

In [4]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)

# as data is type documents we can directly use split_documents over split_text in order to get the chunks
docs = text_splitter.split_documents(data)
len(docs)

31

In [5]:
docs[1]

Document(metadata={'source': 'https://www.moneycontrol.com/news/business/banks/hdfc-bank-re-appoints-sanmoy-chakrabarti-as-chief-risk-officer-11259771.html'}, page_content='Trending Topics\n\nSensex LiveJSW Cement Share PriceAll time plastics share priceSawaliya Food share priceRegaal Resources IPO GMP\n\nHDFC Bank re-appoints Sanmoy Chakrabarti as Chief Risk Officer\n\nChakrabarti has been appointed for a period of five years from December 14, 2023 to December 13, 2028.\n\nMoneycontrol News\n\nAugust 25, 2023 / 18:54 IST\n\nHDFC BANK\n\nHDFC Bank\n\nWatchlist\n\nPortfolio\n\nMessage\n\nSet Alert\n\nlive\n\nbselive\n\nnselive\n\nVolume\n\nTodays L/H\n\nMore\n\nHDFC Bank, the country\'s largest private sector lender, said on August 25 that its board approved the re-appointment of Sanmoy Chakrabarti as the Chief Risk Officer (CRO).\n\nAccording to the bank\'s filing to the stock exchanges, Chakrabarti has been re-appointed for a period of five years.\n\nStory continues below Advertisemen

In [6]:
# embedding using ollama
embeddings = OllamaEmbeddings(model='mxbai-embed-large:latest')

# vector db
vector_db = FAISS.from_documents(docs, embeddings)

In [7]:
# storing the vector index create in local
save_directory = "my_faiss_index"
vector_db.save_local(save_directory)

In [8]:
# load vector db
loaded_vector_db = FAISS.load_local(save_directory, embeddings, allow_dangerous_deserialization=True)

In [12]:
# create retriever
retriever = vector_db.as_retriever()
retriever

# create a retrival chain for question answering

chain=RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=retriever)
chain



In [17]:
# query
query = "tell me about Bank of Maharashtra"

langchain.debug=True

chain({"question":query}, return_only_outputs=True)

[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain] Entering Chain run with input:
[0m{
  "question": "tell me about Bank of Maharashtra"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain > chain:MapReduceDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain > chain:MapReduceDocumentsChain > chain:LLMChain] Entering Chain run with input:
[0m{
  "input_list": [
    {
      "context": "Additionally, the price remains above the 100-period EMA and the Ichimoku Indicator, supporting the uptrend. The RSI momentum indicator is also trending, with new highs, confirming the trend.\n\nIn light of these technical factors, a long position could be considered at Rs 3,059 or a potential decline to Rs 3,050, with a target price of Rs 3,500. However, this bullish view would be invalidated if the stock closes below the support level of Rs 2,850.\n\nImage810082023\n\nBank of Maharashtra

{'answer': 'Bank of Maharashtra is a financial institution.\n', 'sources': ''}