In [24]:
import os
from bs4 import BeautifulSoup
import requests

os.environ['GROQ_API_KEY'] = os.getenv('GROQ_API_KEY')
groq_api_key = os.getenv('GROQ_API_KEY')

In [25]:
def scrape_website(url):
    response = requests.get(url, headers={'User-Agent': 'Mozilla/5.0'})
    soup = BeautifulSoup(response.text, "html.parser")

    # we have to scrape the text from p, div, span........
    elements = soup.find_all(['p', 'div', 'span'])
    paragraphs = [el.get_text(strip=True) for el in elements if len(el.get_text(strip=True)) > 50]

    print(f"Total number of  paragraphs scraped: {len(paragraphs)}")
    return paragraphs



In [26]:
texts = scrape_website("https://www.investopedia.com/terms/s/stock.asp")
texts[:5]


Total number of  paragraphs scraped: 181


['InvestopediaTradeSearchPlease fill out this field.Newsletters',
 'SearchPlease fill out this field.NewsNewsMarketsCompaniesEarningsCD RatesMortgage RatesEconomyGovernmentCryptoLive Markets NewsPersonal FinanceView AllInvestingInvestingStocksCryptocurrencyBondsETFsOptions and DerivativesCommoditiesTradingAutomated InvestingBrokersFundamental AnalysisMarketsView AllSimulatorSimulatorLogin / PortfolioTradeResearchMy GamesLeaderboardBankingBankingSavings AccountsCertificates of Deposit (CDs)Money Market AccountsChecking AccountsView AllPersonal FinancePersonal FinanceBudgeting and SavingPersonal LoansInsuranceMortgagesCredit and DebtStudent LoansTaxesCredit CardsFinancial LiteracyRetirementView AllEconomyEconomyGovernment and PolicyMonetary PolicyFiscal PolicyEconomicsView AllReviewsReviewsBest Online BrokersBest Crypto ExchangesBest Savings RatesBest CD RatesBest Life InsuranceBest Mortgage RatesBest Robo-AdvisorsBest Personal LoansBest Debt Relief CompaniesView AllNewslettersFollow Us'

In [27]:
def is_financial(text):
    keywords = ["stock", "finance", "investment", "revenue", "profit", "loss", "bank", "economy", "market"]
    return any(word in text.lower() for word in keywords)

def filter_financial_texts(texts):
    filtered = [t for t in texts if is_financial(t)]
    print(f"No. of Financial paragraphs are found : {len(filtered)} / {len(texts)}")
    return filtered


In [28]:
financial_texts = filter_financial_texts(texts)
financial_texts[:3]


No. of Financial paragraphs are found : 130 / 181


['SearchPlease fill out this field.NewsNewsMarketsCompaniesEarningsCD RatesMortgage RatesEconomyGovernmentCryptoLive Markets NewsPersonal FinanceView AllInvestingInvestingStocksCryptocurrencyBondsETFsOptions and DerivativesCommoditiesTradingAutomated InvestingBrokersFundamental AnalysisMarketsView AllSimulatorSimulatorLogin / PortfolioTradeResearchMy GamesLeaderboardBankingBankingSavings AccountsCertificates of Deposit (CDs)Money Market AccountsChecking AccountsView AllPersonal FinancePersonal FinanceBudgeting and SavingPersonal LoansInsuranceMortgagesCredit and DebtStudent LoansTaxesCredit CardsFinancial LiteracyRetirementView AllEconomyEconomyGovernment and PolicyMonetary PolicyFiscal PolicyEconomicsView AllReviewsReviewsBest Online BrokersBest Crypto ExchangesBest Savings RatesBest CD RatesBest Life InsuranceBest Mortgage RatesBest Robo-AdvisorsBest Personal LoansBest Debt Relief CompaniesView AllNewslettersFollow Us',
 'NewsMarketsCompaniesEarningsCD RatesMortgage RatesEconomyGover

In [29]:
print(financial_texts[:1])

['SearchPlease fill out this field.NewsNewsMarketsCompaniesEarningsCD RatesMortgage RatesEconomyGovernmentCryptoLive Markets NewsPersonal FinanceView AllInvestingInvestingStocksCryptocurrencyBondsETFsOptions and DerivativesCommoditiesTradingAutomated InvestingBrokersFundamental AnalysisMarketsView AllSimulatorSimulatorLogin / PortfolioTradeResearchMy GamesLeaderboardBankingBankingSavings AccountsCertificates of Deposit (CDs)Money Market AccountsChecking AccountsView AllPersonal FinancePersonal FinanceBudgeting and SavingPersonal LoansInsuranceMortgagesCredit and DebtStudent LoansTaxesCredit CardsFinancial LiteracyRetirementView AllEconomyEconomyGovernment and PolicyMonetary PolicyFiscal PolicyEconomicsView AllReviewsReviewsBest Online BrokersBest Crypto ExchangesBest Savings RatesBest CD RatesBest Life InsuranceBest Mortgage RatesBest Robo-AdvisorsBest Personal LoansBest Debt Relief CompaniesView AllNewslettersFollow Us']


In [30]:
from sentence_transformers import SentenceTransformer
from langchain_community.vectorstores import Chroma
# from langchain.embeddings import HuggingFaceEmbeddings
from langchain_community.embeddings import HuggingFaceEmbeddings


def create_vector_db(financial_texts):
    embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
    db = Chroma.from_texts(financial_texts, embedding=embeddings)
    print("Vector DB created successfully.")
    return db


In [31]:
# sample_texts = financial_texts[:1]
# db = create_vector_db(sample_texts)

In [32]:
db = create_vector_db(financial_texts)

Vector DB created successfully.


In [33]:
from langchain_groq import ChatGroq
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_core.runnables import RunnableSequence

# Creating the RAG pipeline
def create_rag_pipeline():
    llm = ChatGroq(model="llama-3.1-8b-instant", api_key=groq_api_key)
    embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

    db = Chroma(
        persist_directory="./finance_db",
        embedding_function=embeddings
    )

    retriever = db.as_retriever(search_kwargs={"k": 3})

    #Prompt template....
    prompt = ChatPromptTemplate.from_template("""
    You are a financial expert. Use only the following context to answer user queries.
    If the query is unrelated to finance, politely say:
    "Sorry, I can only answer questions related to finance."

    Context:
    {context}

    Question:
    {question}
    """)

    #retriever function....
    def retrieve_docs(inputs):
        query = inputs["question"]
        docs = retriever.invoke(query)
        return {"context": "\n\n".join([d.page_content for d in docs]), "question": query}

    #RAG Pipeline....
    rag_pipeline = (
        RunnablePassthrough.assign(**{"question": lambda x: x["question"]})
        | retrieve_docs
        | prompt
        | llm
        | StrOutputParser()
    )

    print("RAG pipeline creatod successfully......")
    return rag_pipeline



In [34]:
qa = create_rag_pipeline()

RAG pipeline creatod successfully......


In [35]:
#Query function......
def ask_bot(query):
    non_financial_keywords = ["cricket", "movie", "food", "sports", "weather", "music","politics","history","geography","travel","health","fitness","entertainment","gaming"]
    if any(k in query.lower() for k in non_financial_keywords):
        return "Sorry, I can only answer finance related questions.Please ask a finance-related question."

    response = qa.invoke({"question": query})
    return response


In [36]:
#Testing the bot
print("Q:", "What is a stock?")
print("A:", ask_bot("What is a stock?"))

Q: What is a stock?
A: A stock, also known as an equity security, represents ownership in a company. When you buy a stock, you're essentially buying a small portion of that company's assets and profits. Stocks are traded on stock exchanges, such as the New York Stock Exchange (NYSE) or the NASDAQ.

Think of it like owning a small piece of a pizza. If the pizza shop makes more money, the value of your slice of pizza (or stock) might increase. You can then sell your stock for a profit, but if the company performs poorly, the value of your stock might go down.

There are different types of stocks, including:

1. Common stock: This is the most common type of stock, which represents ownership in a company and gives shareholders voting rights.
2. Preferred stock: This type of stock has a higher claim on assets and dividends than common stock, but typically doesn't come with voting rights.

Understanding stocks is crucial for building a solid investment portfolio and achieving long-term finan

In [37]:
print("Q:", "What is the chances of winning womens world cup by womens team?")
print("A:", ask_bot("What is the chances of winning womens world cup by womens team?"))


Q: What is the chances of winning womens world cup by womens team?
A: Sorry, I can only answer questions related to finance.


In [38]:
print("Q:", "What do you mean financial freedom and how can I achieve it?")
print("A:", ask_bot("What do you mean financial freedom and how can I achieve it?"))

Q: What do you mean financial freedom and how can I achieve it?
A: Financial freedom is the state of having sufficient wealth and financial stability to live a life without financial stress, constraints, or worries. It's a goal many people strive for, and it can be achieved by adopting a few key strategies.

To achieve financial freedom, consider the following steps:

1. **Create a budget and track expenses**: Understand where your money is going and make conscious decisions about how you spend it.
2. **Live below your means**: Spend less than you earn and save or invest the difference.
3. **Invest for the future**: Grow your wealth through investments, such as stocks, real estate, or a retirement account.
4. **Pay off high-interest debt**: Focus on paying off high-interest loans or credit cards to free up more money in your budget.
5. **Build multiple income streams**: Diversify your income sources to reduce financial risk and increase your overall wealth.
6. **Develop a long-term pla