In [1]:
%pip install langchain chromadb pypdf2 ollama langchain_community langchain-ollama openai tiktoken

Defaulting to user installation because normal site-packages is not writeable
You should consider upgrading via the '/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.


In [9]:
import os

os.environ['OPENAI_API_KEY'] = ''

In [10]:
import PyPDF2

def extract_text_from_pdf(pdf_file):
    reader = PyPDF2.PdfReader(pdf_file)
    text = ""
    for page in reader.pages:
        text += page.extract_text()
    return text


In [11]:
from langchain.vectorstores import Chroma
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain_ollama import OllamaEmbeddings
from langchain.docstore.document import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_ollama.llms import OllamaLLM

embedding_function = OllamaEmbeddings(model='nomic-embed-text')  
vector_store = Chroma(embedding_function=embedding_function, collection_name="Capital", persist_directory="./chromadb")

def index_pdf_to_chroma(pdf_file, vector_store):
    text = extract_text_from_pdf(pdf_file)
    
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
    texts = text_splitter.split_text(text)
    
    docs = [Document(page_content=chunk) for chunk in texts]
    
    vector_store.add_documents(docs)


In [12]:
def retrieve_from_chroma(query, vector_store):
    docs = vector_store.similarity_search(query, k=5) 
    return docs


In [13]:
def generate_answer(query, vector_store, chat_llm):
    relevant_docs = retrieve_from_chroma(query, vector_store)
    context = "\n".join([doc.page_content for doc in relevant_docs])

    prompt = f"Context: {context}\n\nQuestion: {query}\n\nAnswer:"
    response = chat_llm.invoke(prompt)
    return response


In [28]:
def generate_mcqs(query, vector_store, chat_llm):
    relevant_docs = retrieve_from_chroma(query, vector_store)
    context = "\n".join([doc.page_content for doc in relevant_docs])

    prompt = "Context:" + context + """\n\n Generate 5 MCQ type of question in a structure manner as like in JSON format along with the correct option and make sure options are not too long and questions are well defined, example output: [
  {
    "question": "What is the Internal Rate of Return (IRR)?",
    "options": [
      "The discount rate at which the net present value of all cash flows equals zero.",
      "The interest rate that makes the sum of the present value of future cash flows equal to the initial investment.",
      "The ratio of average annual accounting profit to the initial investment cost."
    ],
    "correct": 1
  },
  {
    "question": "What is the Decision Rule for IRR?",
    "options": [
      "If the IRR exceeds the required rate of return or cost of capital, reject the project.",
      "If the IRR equals the required rate of return or cost of capital, accept the project.",
      "If the IRR exceeds the required rate of return or cost of capital, accept the project."
    ],
    "correct": 2
  }]. Additionally make sure about: """ + query
    
    response = chat_llm.invoke(prompt)
    return response

In [29]:
chat_llm = OllamaLLM(model="llama3.2")


In [30]:
pdf_file_path = "./Capital.pdf"
index_pdf_to_chroma(pdf_file_path, vector_store)


In [31]:
query = "Give me hard"
answer = generate_mcqs(query, vector_store, chat_llm)
print("Output:", answer)

Output: Here are five MCQ-style questions in JSON format along with the correct options:

[
  {
    "question": "What is the primary goal of allocating available capital to projects during Capital Budgeting?",
    "options": [
      "To maximize profits",
      "To minimize costs",
      "To allocate available capital to the most valuable projects while staying within budget"
    ],
    "correct": 3
  },
  {
    "question": "What type of capital rationing is imposed by a company's management to maintain control over capital expenditures?",
    "options": [
      "Hard Capital Rationing",
      "Soft Capital Rationing",
      "Both Hard and Soft Capital Rationing"
    ],
    "correct": 2
  },
  {
    "question": "What is the formula for calculating Internal Rate of Return (IRR)?",
    "options": [
      "Trial and error or financial calculators",
      "NPV equals zero",
      "IRR equals the cost of capital"
    ],
    "correct": 1
  },
  {
    "question": "According to the Decision Ru

In [36]:
data = answer[answer.index('[') : answer.rindex(']')+1]

In [38]:
import json

In [39]:
json.loads(data)

[{'question': 'What is the primary goal of allocating available capital to projects during Capital Budgeting?',
  'options': ['To maximize profits',
   'To minimize costs',
   'To allocate available capital to the most valuable projects while staying within budget'],
  'correct': 3},
 {'question': "What type of capital rationing is imposed by a company's management to maintain control over capital expenditures?",
  'options': ['Hard Capital Rationing',
   'Soft Capital Rationing',
   'Both Hard and Soft Capital Rationing'],
  'correct': 2},
 {'question': 'What is the formula for calculating Internal Rate of Return (IRR)?',
  'options': ['Trial and error or financial calculators',
   'NPV equals zero',
   'IRR equals the cost of capital'],
  'correct': 1},
 {'question': 'According to the Decision Rule, what happens if the IRR exceeds the required rate of return or cost of capital?',
  'options': ['Accept the project',
   'Reject the project',
   'Consider alternative options'],
  'corre

In [4]:
!pip3 install openai-whisper

Defaulting to user installation because normal site-packages is not writeable
Collecting openai-whisper
  Using cached openai_whisper-20240930-py3-none-any.whl
Collecting numba
  Using cached numba-0.60.0-cp39-cp39-macosx_11_0_arm64.whl (2.7 MB)
Collecting more-itertools
  Using cached more_itertools-10.5.0-py3-none-any.whl (60 kB)
Collecting llvmlite<0.44,>=0.43.0dev0
  Using cached llvmlite-0.43.0-cp39-cp39-macosx_11_0_arm64.whl (28.8 MB)
Installing collected packages: llvmlite, numba, more-itertools, openai-whisper
Successfully installed llvmlite-0.43.0 more-itertools-10.5.0 numba-0.60.0 openai-whisper-20240930
You should consider upgrading via the '/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip' command.[0m


In [5]:
import whisper

In [6]:

model = whisper.load_model("small")

100%|███████████████████████████████████████| 461M/461M [04:08<00:00, 1.94MiB/s]
  checkpoint = torch.load(fp, map_location=device)


In [11]:
result = model.transcribe("https://studius.s3.ap-south-1.amazonaws.com/cm2ev0gep0001d0e3yyei36nz/cm2ev0gep0001d0e3yyei36nz/2024-10-18T22%3A54%3A45.724Z.mp3")
print(result["text"])



 වවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවවව
