In [114]:
import os
import nltk
import numpy as np
import langchain
from dotenv import load_dotenv
from langchain_groq import ChatGroq
from langchain.document_loaders import UnstructuredURLLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.docstore import InMemoryDocstore
from langchain.vectorstores import FAISS
from faiss import IndexFlatL2
from sentence_transformers import SentenceTransformer

In [115]:
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\Tanay\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

In [116]:
load_dotenv()

True

In [117]:
API_KEY = os.getenv("GROQ_API_KEY")

In [118]:
llm = ChatGroq(
    model="llama-3.3-70b-versatile",
    temperature=0.3,
    api_key=API_KEY
)

sample_query = llm.invoke("What is the capital of France?")
print(sample_query.content)

[32;1m[1;3m[llm/start][0m [1m[llm:ChatGroq] Entering LLM run with input:
[0m{
  "prompts": [
    "Human: What is the capital of France?"
  ]
}
[36;1m[1;3m[llm/end][0m [1m[llm:ChatGroq] [378ms] Exiting LLM run with output:
[0m{
  "generations": [
    [
      {
        "text": "The capital of France is Paris.",
        "generation_info": {
          "finish_reason": "stop",
          "logprobs": null
        },
        "type": "ChatGeneration",
        "message": {
          "lc": 1,
          "type": "constructor",
          "id": [
            "langchain",
            "schema",
            "messages",
            "AIMessage"
          ],
          "kwargs": {
            "content": "The capital of France is Paris.",
            "response_metadata": {
              "token_usage": {
                "completion_tokens": 8,
                "prompt_tokens": 42,
                "total_tokens": 50,
                "completion_time": 0.029090909,
                "prompt_time": 0.0046

In [119]:
loader = UnstructuredURLLoader(urls=["https://www.bbc.com/news/articles/cz7vlezv05no",
                                     "https://timesofindia.indiatimes.com/business/india-business/stock-market-today-bse-sensex-nifty50-march-06-2025-dalal-street-indian-equities-global-markets-trump-tariff/articleshow/118748289.cms",
                                     "https://www.thehindu.com/business/markets/rupee-falls-6-paise-to-settle-at-8712-against-us-dollar/article69298165.ece"])

article_result = loader.load()
print(article_result[0].page_content)

Boom to gloom: India middle-class jitters amid trillion-dollar market rout

1 day ago

Soutik Biswas and Nikhil Inamdar

BBC News•@soutikBBC

Two years ago, on his bank adviser's suggestion, Rajesh Kumar pulled out his savings - fixed deposits included - and shifted to mutual funds, stocks and bonds.

With India's stock market booming, Mr Kumar, a Bihar-based engineer, joined millions investing in publicly traded companies. Six years ago, only one in 14 Indian households channelled their savings into the stock market - now, it's one in five.

But the tide has turned.

For six months, India's markets have slid as foreign investors pulled out, valuations remained high, earnings weakened and global capital shifted to China - wiping out $900bn in investor value since their September peak. While the decline began before US President Donald Trump's tariff announcements, they have now become a bigger drag as more details emerge.

India's benchmark Nifty 50 share index, which tracks the countr

In [120]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=800,
    chunk_overlap=200
)

docs = text_splitter.split_documents(article_result)
print("No of chunks:", len(docs))
print(docs[5].page_content)

No of chunks: 57
"I've put 80% of my savings into mutual funds, keeping just 20% in the bank. Now my adviser warns me - Don't check your investments for six months, unless you want a heart attack!"

For now, Mr Sircar isn't entirely sure if moving his retirement fund into the stock market was the right decision. "I'm both ignorant and confident," he says with wry candour. "Ignorant about what's happening and why the market is reacting this way, yet confident because Instagram 'experts' make investing sound like a fast track to millions. At the same time, I know I might be caught in a web of deception and hype."


In [121]:
encoder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
embeddings = encoder.encode([doc.page_content for doc in docs])

In [122]:
print(embeddings[0])
embeddings = np.array(embeddings, dtype=np.float32)

[ 5.54088950e-02 -4.24845107e-02 -8.33748840e-03  2.00059526e-02
  8.10002238e-02 -9.23456531e-03  5.62588684e-02  2.56380085e-02
 -7.20573813e-02 -2.44018175e-02 -4.08077128e-02  1.73314344e-02
 -1.64087582e-02 -7.23519996e-02  4.71766219e-02 -1.94639936e-02
 -2.36441437e-02 -5.64711392e-02  1.81306489e-02  9.86666419e-03
 -5.72498031e-02 -3.65117155e-02 -4.96529825e-02  7.15679396e-03
  8.59426260e-02  4.72911410e-02  7.14928061e-02 -1.03446692e-01
 -7.15557486e-02  1.32678421e-02  2.98180450e-02  9.23067555e-02
 -2.53876988e-02  1.75135825e-02  1.55693004e-02  4.15143147e-02
 -1.11587846e-03  1.34580836e-01  9.07916650e-02 -7.27653801e-02
  2.38380805e-02 -6.42763898e-02 -1.72029180e-03 -5.86084314e-02
  4.22138311e-02 -1.03048971e-02  4.34496328e-02 -4.23496123e-03
  5.20460531e-02 -5.46844155e-02 -5.81303053e-02 -2.49505043e-02
 -1.04057360e-02 -8.11441708e-03 -2.54733171e-02  1.04068760e-02
  2.47209202e-02 -2.89747398e-03  2.36234386e-02  8.20292458e-02
 -1.57979932e-02 -5.44469

In [123]:
# vector_index = FAISS.from_documents(docs, embeddings)
vector_index = IndexFlatL2(embeddings.shape[1])
vector_index.add(embeddings)

In [124]:
index_to_docstore_id = {i: str(i) for i in range(len(docs))}
docstore = InMemoryDocstore({str(i): doc for i, doc in enumerate(docs)})

vector_store = FAISS(index=vector_index, 
                      embedding_function=HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2"),
                      docstore=docstore,
                      index_to_docstore_id=index_to_docstore_id)

In [125]:
chain = RetrievalQAWithSourcesChain.from_llm(
    llm = llm,
    retriever = vector_store.as_retriever()
)

In [127]:
langchain.debug = True
query = "Summarize sbout Mr. Sircar from the articles"
result = chain.invoke({"question":query})
print(result["answer"])
print(result["sources"])

[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain] Entering Chain run with input:
[0m{
  "question": "Summarize sbout Mr. Sircar from the articles"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain > chain:MapReduceDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQAWithSourcesChain > chain:MapReduceDocumentsChain > chain:LLMChain] Entering Chain run with input:
[0m{
  "input_list": [
    {
      "context": "Mr Sircar says he was drawn to the markets by TV shows hyping stocks and excited chatter in WhatsApp groups. \"The TV anchors talk up the market and people in my WhatsApp group boast about their stock market gains,\" he says.\n\nIn his sprawling apartment complex, even teenagers discuss investments - in fact, during a badminton game, a teenager gave him a hot tip on a telecom stock. \"When you hear all this around you, you start thinking - why not give it a shot? So I did, and t