In [15]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/finance-data/Original_data.csv
/kaggle/input/finance-data/Finance_data.csv


In [45]:
# Install all required libraries
!pip install -q langchain langchain-community langchain-core transformers langchain-text-splitters
!pip install -qU sentence-transformers chromadb bitsandbytes

In [47]:
# Install all required libraries
!pip install -q langchain langchain-community langchain-core transformers sentence-transformers chromadb bitsandbytes

# Import necessary libraries
import pandas as pd
from langchain.docstore.document import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter  # Correct module import
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.llms import HuggingFaceHub
from langchain.chains import RetrievalQA

In [48]:
# Load data
data = pd.read_csv("/kaggle/input/finance-data/Finance_data.csv")
data_fin = data.to_dict(orient='records')

In [49]:
# Convert the data to prompt-response format
prompt_response_data = []
for entry in data_fin:
    prompt = f"I'm a {entry['age']}-year-old {entry['gender']} looking to invest in {entry['Avenue']} for {entry['Purpose']} over the next {entry['Duration']}. What are my options?"
    response = (
        f"Based on your preferences, here are your investment options:\n"
        f"- Mutual Funds: {entry['Mutual_Funds']}\n"
        f"- Equity Market: {entry['Equity_Market']}\n"
        f"- Debentures: {entry['Debentures']}\n"
        f"- Government Bonds: {entry['Government_Bonds']}\n"
        f"- Fixed Deposits: {entry['Fixed_Deposits']}\n"
        f"- PPF: {entry['PPF']}\n"
        f"- Gold: {entry['Gold']}\n"
        f"Factors considered: {entry['Factor']}\n"
        f"Objective: {entry['Objective']}\n"
        f"Expected returns: {entry['Expect']}\n"
        f"Investment monitoring: {entry['Invest_Monitor']}\n"
        f"Reasons for choices:\n"
        f"- Equity: {entry['Reason_Equity']}\n"
        f"- Mutual Funds: {entry['Reason_Mutual']}\n"
        f"- Bonds: {entry['Reason_Bonds']}\n"
        f"- Fixed Deposits: {entry['Reason_FD']}\n"
        f"Source of information: {entry['Source']}\n"
    )
    prompt_response_data.append({"prompt": prompt, "response": response})

In [50]:
# Convert into Document format
documents = []
for entry in prompt_response_data:
    combined_text = f"Prompt: {entry['prompt']}\nResponse: {entry['response']}"
    documents.append(Document(page_content=combined_text))

# Split documents using Text Splitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
texts = text_splitter.split_documents(documents)

In [51]:
# Set up Vector DB with Chroma
from langchain.vectorstores import Chroma

In [52]:
# Load Hugging Face embedding model
hg_embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

# Create the vector store
persist_directory = "chroma_db"  # Define a directory to store the vector database
vectordb_fin = Chroma.from_documents(
    documents=texts,
    embedding=hg_embeddings,
    persist_directory=persist_directory
)




In [53]:
# Initialize the Hugging Face LLM
HUGGINGFACE_API_KEY = "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"
llm = HuggingFaceHub(
    repo_id="tiiuae/falcon-7b",
    model_kwargs={"temperature": 0.7},
    huggingfacehub_api_token=HUGGINGFACE_API_KEY
)

# Set up the RetrievalQA chain
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectordb_fin.as_retriever(),  # Use the retriever from the vector store
    return_source_documents=False
)

# Make the query
query = "I'm a 34-year-old female looking to invest in mutual funds for wealth creation over the next 1-3 years. What are the best mutual fund options for me?"

# Execute the query
result = qa({"query": query})
print(result['result'])

Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

Prompt: I'm a 23-year-old Female looking to invest in Mutual Fund for Wealth Creation over the next More than 5 years. What are my options?
Response: Based on your preferences, here are your investment options:
- Mutual Funds: 4
- Equity Market: 3
- Debentures: 2
- Government Bonds: 1
- Fixed Deposits: 5
- PPF: 6
- Gold: 7
Factors considered: Locking Period
Objective: Capital Appreciation
Expected returns: 20%-30%
Investment monitoring: Weekly
Reasons for choices:
- Equity: Dividend

Prompt: I'm a 24-year-old Female looking to invest in Mutual Fund for Wealth Creation over the next 1-3 years. What are my options?
Response: Based on your preferences, here are your investment options:
- Mutual Funds: 7
- Equity Market: 5
- Debentures: 4
- Government Bonds: 6
- Fixed Deposits: 3
- PPF: 1
- Gold: 2
Factors considered: Risk
Objec

In [29]:
pip install transformers langchain

  pid, fd = os.forkpty()


Note: you may need to restart the kernel to use updated packages.


In [32]:
from langchain.llms import HuggingFaceHub
from langchain.chains import RetrievalQA

# Set up your Hugging Face API key
HUGGINGFACE_API_KEY = "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"

# Initialize the Hugging Face LLM
llm = HuggingFaceHub(repo_id="tiiuae/falcon-7b", 
                     model_kwargs={"temperature": 0.7}, 
                     huggingfacehub_api_token=HUGGINGFACE_API_KEY)

# Use the retrieval-based QA chain
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever_fin,
    return_source_documents=False
)

# Make a query
result = qa({"query": query})
print(result)

{'query': "I'm a 34-year-old female looking to invest in mutual funds for wealth creation over the next 1-3 years. What are my best options?", 'result': "Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n\nPrompt: I'm a 23-year-old Female looking to invest in Mutual Fund for Wealth Creation over the next More than 5 years. What are my options?\nResponse: Based on your preferences, here are your investment options:\n- Mutual Funds: 4\n- Equity Market: 3\n- Debentures: 2\n- Government Bonds: 1\n- Fixed Deposits: 5\n- PPF: 6\n- Gold: 7\nFactors considered: Locking Period\nObjective: Capital Appreciation\nExpected returns: 20%-30%\nInvestment monitoring: Weekly\nReasons for choices:\n- Equity: Dividend\n\nPrompt: I'm a 23-year-old Female looking to invest in Mutual Fund for Wealth Creation over the next More than 5 years. What are my options?\nResponse: Based on your preference

In [35]:
query = "I'm a 34-year-old female looking to invest in mutual funds for wealth creation over the next 1-3 years. What are the best mutual fund options for me?"

In [36]:
from langchain.llms import HuggingFaceHub
from langchain.chains import RetrievalQA

# Initialize the Hugging Face LLM
llm = HuggingFaceHub(
    repo_id="tiiuae/falcon-7b",
    model_kwargs={"temperature": 0.7},
    huggingfacehub_api_token="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"
)

# Set up the RetrievalQA chain
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever_fin,
    return_source_documents=False
)

# Make the query
result = qa({"query": query})
print(result['result'])

Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

Prompt: I'm a 23-year-old Female looking to invest in Mutual Fund for Wealth Creation over the next More than 5 years. What are my options?
Response: Based on your preferences, here are your investment options:
- Mutual Funds: 4
- Equity Market: 3
- Debentures: 2
- Government Bonds: 1
- Fixed Deposits: 5
- PPF: 6
- Gold: 7
Factors considered: Locking Period
Objective: Capital Appreciation
Expected returns: 20%-30%
Investment monitoring: Weekly
Reasons for choices:
- Equity: Dividend

Prompt: I'm a 23-year-old Female looking to invest in Mutual Fund for Wealth Creation over the next More than 5 years. What are my options?
Response: Based on your preferences, here are your investment options:
- Mutual Funds: 4
- Equity Market: 3
- Debentures: 2
- Government Bonds: 1
- Fixed Deposits: 5
- PPF: 6
- Gold: 7
Factors considered: Lo

In [37]:
query = "I'm a 34-year-old female looking to invest in mutual funds for wealth creation over the next 1-3 years. What are the best mutual fund options for me?"

In [39]:
from langchain.llms import HuggingFaceHub
from langchain.chains import RetrievalQA

# Initialize the Hugging Face LLM
llm = HuggingFaceHub(
    repo_id="tiiuae/falcon-7b",
    model_kwargs={"temperature": 0.7},
    huggingfacehub_api_token="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"
)

# Set up the RetrievalQA chain
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever_fin,
    return_source_documents=False
)

# Make the query
result = qa({"query": query})
print(result['result'])


Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

Prompt: I'm a 23-year-old Female looking to invest in Mutual Fund for Wealth Creation over the next More than 5 years. What are my options?
Response: Based on your preferences, here are your investment options:
- Mutual Funds: 4
- Equity Market: 3
- Debentures: 2
- Government Bonds: 1
- Fixed Deposits: 5
- PPF: 6
- Gold: 7
Factors considered: Locking Period
Objective: Capital Appreciation
Expected returns: 20%-30%
Investment monitoring: Weekly
Reasons for choices:
- Equity: Dividend

Prompt: I'm a 23-year-old Female looking to invest in Mutual Fund for Wealth Creation over the next More than 5 years. What are my options?
Response: Based on your preferences, here are your investment options:
- Mutual Funds: 4
- Equity Market: 3
- Debentures: 2
- Government Bonds: 1
- Fixed Deposits: 5
- PPF: 6
- Gold: 7
Factors considered: Lo

In [41]:
query = "I'm a 34-year-old female looking to invest in mutual funds for wealth creation over the next 1-3 years. What are the best mutual fund options for me?"
from langchain.chains import RetrievalQA

# Initialize the LLM
llm = HuggingFaceHub(
    repo_id="tiiuae/falcon-7b",
    model_kwargs={"temperature": 0.7},
    huggingfacehub_api_token="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"
)

# Set up the RetrievalQA chain
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever_fin,  # Ensure this is correctly fetching relevant context
    return_source_documents=False
)

# Execute the query
result = qa({"query": query})
print(result['result'])


Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

Prompt: I'm a 23-year-old Female looking to invest in Mutual Fund for Wealth Creation over the next More than 5 years. What are my options?
Response: Based on your preferences, here are your investment options:
- Mutual Funds: 4
- Equity Market: 3
- Debentures: 2
- Government Bonds: 1
- Fixed Deposits: 5
- PPF: 6
- Gold: 7
Factors considered: Locking Period
Objective: Capital Appreciation
Expected returns: 20%-30%
Investment monitoring: Weekly
Reasons for choices:
- Equity: Dividend

Prompt: I'm a 23-year-old Female looking to invest in Mutual Fund for Wealth Creation over the next More than 5 years. What are my options?
Response: Based on your preferences, here are your investment options:
- Mutual Funds: 4
- Equity Market: 3
- Debentures: 2
- Government Bonds: 1
- Fixed Deposits: 5
- PPF: 6
- Gold: 7
Factors considered: Lo

In [54]:
# Install all required libraries
!pip install -q langchain langchain-community langchain-core transformers langchain-text-splitters
!pip install -qU sentence-transformers chromadb bitsandbytes

# Import necessary libraries
import pandas as pd
from langchain.docstore.document import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from transformers import AutoTokenizer, pipeline, AutoModelForCausalLM, BitsAndBytesConfig
from langchain.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA

# Load and prepare data
data = pd.read_csv("/kaggle/input/finance-data/Finance_data.csv")
data_fin = data.to_dict(orient='records')

# Convert the data to prompt-response format
prompt_response_data = []
for entry in data_fin:
    prompt = f"I'm a {entry['age']}-year-old {entry['gender']} looking to invest in {entry['Avenue']} for {entry['Purpose']} over the next {entry['Duration']}. What are my options?"
    response = (
        f"Based on your preferences, here are your investment options:\n"
        f"- Mutual Funds: {entry['Mutual_Funds']}\n"
        f"- Equity Market: {entry['Equity_Market']}\n"
        f"- Debentures: {entry['Debentures']}\n"
        f"- Government Bonds: {entry['Government_Bonds']}\n"
        f"- Fixed Deposits: {entry['Fixed_Deposits']}\n"
        f"- PPF: {entry['PPF']}\n"
        f"- Gold: {entry['Gold']}\n"
        f"Factors considered: {entry['Factor']}\n"
        f"Objective: {entry['Objective']}\n"
        f"Expected returns: {entry['Expect']}\n"
        f"Investment monitoring: {entry['Invest_Monitor']}\n"
        f"Reasons for choices:\n"
        f"- Equity: {entry['Reason_Equity']}\n"
        f"- Mutual Funds: {entry['Reason_Mutual']}\n"
        f"- Bonds: {entry['Reason_Bonds']}\n"
        f"- Fixed Deposits: {entry['Reason_FD']}\n"
        f"Source of information: {entry['Source']}\n"
    )
    prompt_response_data.append({"prompt": prompt, "response": response})

# Convert into Document format
documents = []
for entry in prompt_response_data:
    combined_text = f"Prompt: {entry['prompt']}\nResponse: {entry['response']}"
    documents.append(Document(page_content=combined_text))

# Split documents using Text Splitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
texts = text_splitter.split_documents(documents)

# Import Chroma and set up Vector DB
from langchain.vectorstores import Chroma

# Load Hugging Face embedding model
from langchain.embeddings import HuggingFaceEmbeddings

hg_embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

# Directory for storing vector database
persist_directory = 'chroma_db'
vectordb_fin = Chroma.from_documents(
    documents=texts,
    embedding=hg_embeddings,
    persist_directory=persist_directory
)

# Initialize the Hugging Face LLM
from langchain.llms import HuggingFaceHub

HUGGINGFACE_API_KEY = "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"

llm = HuggingFaceHub(repo_id="tiiuae/falcon-7b", 
                     model_kwargs={"temperature": 0.7}, 
                     huggingfacehub_api_token=HUGGINGFACE_API_KEY)

# Set up the RetrievalQA chain
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectordb_fin.as_retriever(),  # Ensur this is correctly fetching relevant context
    return_source_documents=False
)

# Make a query
query = "I'm a 34-year-old female looking to invest in mutual funds for wealth creation over the next 1-3 years. What are the best mutual fund options for me?"

result = qa({"query": query})
print(result['result'])


  pid, fd = os.forkpty()


Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

Prompt: I'm a 23-year-old Female looking to invest in Mutual Fund for Wealth Creation over the next More than 5 years. What are my options?
Response: Based on your preferences, here are your investment options:
- Mutual Funds: 4
- Equity Market: 3
- Debentures: 2
- Government Bonds: 1
- Fixed Deposits: 5
- PPF: 6
- Gold: 7
Factors considered: Locking Period
Objective: Capital Appreciation
Expected returns: 20%-30%
Investment monitoring: Weekly
Reasons for choices:
- Equity: Dividend

Prompt: I'm a 23-year-old Female looking to invest in Mutual Fund for Wealth Creation over the next More than 5 years. What are my options?
Response: Based on your preferences, here are your investment options:
- Mutual Funds: 4
- Equity Market: 3
- Debentures: 2
- Government Bonds: 1
- Fixed Deposits: 5
- PPF: 6
- Gold: 7
Factors considered: Lo