In [1]:
!pip install pdfplumber
!pip install openai==0.28.1
!pip install tiktoken==0.6.0
!pip install langchain==0.1.20
!pip install chromadb==0.5.0
!pip install faiss-cpu
!pip install PyPDF2

Collecting pdfplumber
  Downloading pdfplumber-0.11.4-py3-none-any.whl.metadata (41 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/42.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.0/42.0 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pdfminer.six==20231228 (from pdfplumber)
  Downloading pdfminer.six-20231228-py3-none-any.whl.metadata (4.2 kB)
Collecting pypdfium2>=4.18.0 (from pdfplumber)
  Downloading pypdfium2-4.30.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (48 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m48.5/48.5 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
Downloading pdfplumber-0.11.4-py3-none-any.whl (59 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.2/59.2 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pdfminer.six-20231228-py3-none-any.whl (5.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [3]:
import openai
import numpy as np
import pandas as pd
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import CSVLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.prompts import ChatPromptTemplate
from langchain.vectorstores import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableLambda, RunnablePassthrough
from openai.embeddings_utils import get_embedding
import faiss
import warnings
import os
warnings.filterwarnings("ignore")
import PyPDF2

In [4]:
openai.api_key = ''

In [6]:
def extract_text_from_pdf(file_path):
    text = ""
    with open(file_path, 'rb') as pdf_file:
        reader = PyPDF2.PdfReader(pdf_file)
        for page in reader.pages:
            text += page.extract_text()
    return text

In [9]:
pdf_text = extract_text_from_pdf("/content/financial-wellness-journal-english.pdf")

In [11]:
print(len(pdf_text))

80030


In [12]:
chunk_size = 500
chunks = [pdf_text[i:i+chunk_size] for i in range(0, len(pdf_text), chunk_size)]

In [13]:
print(chunks)

['Financial Wellness Journal \nIntroduction\nGetting started Pre-assessmentTABLE OF CONTENTS\nChapter 1: All About Financial Wellness\nChapter \nintroductionHow FinEd can \nhelp you achieve \nfinancial wellnessThe life stages\nof money Facing these \ndifficulties head onActivity: Visioning \nexercise\nChapter 2: All About Saving\nCase study: Mang \nRafael’s saving \nproblem Common \nmisconceptions \nabout savingWhat are the goals \nof saving?What are the tools \nwe can use to start \nsaving?Where do we save?\nCase st', 'udy: Aling \nYolanda insures her \ndaughter’s future What is insurance? Why do we need \nto insure ourselves \nand our family?What are the \ndifferent kinds\nof insurance?Chapter wrap-up\nChapter 4: All About CreditChapter 3: All About Insurance\nChapter 5: All About Investing\nConclusion3 4\n6 7 8 9 11\n12 13 14 15 20\n23 21 24\n27 28 29 29 31\n33 34 35 36\n38 39 39 4037\n41 42 43 43 46Where should our \nsavings go?What are the \nmethods of \nbudgeting?Chapter wrap-up\

In [14]:
embeddings = [get_embedding(chunk, engine = "text-embedding-3-small") for chunk in chunks]

In [15]:
embedding_dim = len(embeddings[0])

In [16]:
embeddings_np = np.array(embeddings).astype('float32')

In [17]:
index = faiss.IndexFlatL2(embedding_dim)

In [18]:
index.add(embeddings_np)

In [19]:
user_message = "What are the common misconceptions of saving?"

In [20]:
query_embedding = get_embedding(user_message, engine = "text-embedding-3-small")
query_embedding_np = np.array([query_embedding]).astype('float32')

In [22]:
_, indices = index.search(query_embedding_np, 1)

In [23]:
retrieved_docs = [chunks[i] for i in indices[0]]

In [24]:
context = ' '.join(retrieved_docs)

In [25]:
structured_prompt = f"Context:\n{context}\n\nQuery:\n{user_message}\n\nResponse:"

In [26]:
struct = [{"role": "system", "content": "You are a helpful Assistant"}]

In [27]:
chat =  openai.ChatCompletion.create(model = "gpt-4o-mini", messages = struct + [{"role": "user", "content" : structured_prompt}], temperature=0.5, max_tokens=1500, top_p=1, frequency_penalty=0, presence_penalty=0)
struct.append({"role": "user", "content": user_message})
response = chat.choices[0].message.content
struct.append({"role": "assistant", "content": response})

In [28]:
print(response)

Common misconceptions about saving include:

1. **Low Interest Rates Make Savings Accounts Useless**: Many people believe that because savings accounts typically offer low interest rates, saving in banks is not worthwhile. However, the primary purpose of a savings account is to keep your money secure and easily accessible, rather than to generate significant growth.

2. **You Need a Large Amount to Start Saving**: Some individuals think they need a substantial amount of money to begin saving. In reality, starting with small amounts can still lead to significant savings over time, especially with consistent contributions.

3. **Saving is Only for the Wealthy**: There is a misconception that saving is only for those who already have money. In truth, anyone can save, regardless of income level, and it’s important to develop a saving habit early on.

4. **You Should Only Save After Paying Off Debt**: While paying off high-interest debt is important, it’s also crucial to save simultaneously