In [5]:
import os 
from dotenv import load_dotenv
from langchain_community.llms import HuggingFaceHub
from langchain_community.document_loaders import PyPDFLoader
from langchain_ollama import OllamaLLM

In [6]:
llm = OllamaLLM(model="llama3")

In [7]:
%pwd

'/Users/YGT/ygt/interview-questions-project/research'

In [8]:
%cd ..

/Users/YGT/ygt/interview-questions-project


In [9]:
from langchain.document_loaders import PyPDFLoader

In [10]:
file_path = "data/SDG.pdf" 
loader = PyPDFLoader(file_path) 
data = loader.load()

In [11]:
len(data)

24

In [12]:
question_gen = "" 
for page in data: 
    question_gen += page.page_content

In [13]:
question_gen

'IN THE YEAR 2015, LEADERS FROM 193 COUNTRIES OF THE WORLD \nCAME TOGETHER TO FACE THE FUTURE.\nAnd what they saw was daunting. Famines. Drought. Wars. Plagues. Poverty. \nNot just in some faraway place, but in their own cities and towns and villages.\nThey knew things didn’t have to be this way. They knew we had enough \nfood to feed the world, but that it wasn’t getting shared. They knew there \nwere medicines for HIV and other diseases, but they cost a lot. They knew \nthat earthquakes and floods were inevitable, but that the high death \ntolls were not. \nThey also knew that billions of people worldwide shared their hope for a \nbetter future.\nSo leaders from these countries created a plan called the Sustainable \nDevelopment Goals (SDGs). This set of 17 goals imagines a future just 15 years \noff that would be rid of poverty and hunger, and safe from the worst effects of \nclimate change. It’s an ambitious plan. \nBut there’s ample evidence that we can succeed. In the past 15 yea

In [14]:
from langchain.text_splitter import TokenTextSplitter

In [15]:
splitter_question_gen = TokenTextSplitter(
    encoding_name="cl100k_base",  
    chunk_size=10000,
    chunk_overlap=100
)

In [16]:
chunk_question_gen = splitter_question_gen.split_text(question_gen) 

In [17]:
chunk_question_gen

['IN THE YEAR 2015, LEADERS FROM 193 COUNTRIES OF THE WORLD \nCAME TOGETHER TO FACE THE FUTURE.\nAnd what they saw was daunting. Famines. Drought. Wars. Plagues. Poverty. \nNot just in some faraway place, but in their own cities and towns and villages.\nThey knew things didn’t have to be this way. They knew we had enough \nfood to feed the world, but that it wasn’t getting shared. They knew there \nwere medicines for HIV and other diseases, but they cost a lot. They knew \nthat earthquakes and floods were inevitable, but that the high death \ntolls were not. \nThey also knew that billions of people worldwide shared their hope for a \nbetter future.\nSo leaders from these countries created a plan called the Sustainable \nDevelopment Goals (SDGs). This set of 17 goals imagines a future just 15 years \noff that would be rid of poverty and hunger, and safe from the worst effects of \nclimate change. It’s an ambitious plan. \nBut there’s ample evidence that we can succeed. In the past 15 ye

In [18]:
len(chunk_question_gen)

1

In [19]:
type(chunk_question_gen[0])

str

In [20]:
from langchain.docstore.document import Document 

In [21]:
document_question_gen = [Document(page_content=t) for t in chunk_question_gen]

In [22]:
document_question_gen

[Document(metadata={}, page_content='IN THE YEAR 2015, LEADERS FROM 193 COUNTRIES OF THE WORLD \nCAME TOGETHER TO FACE THE FUTURE.\nAnd what they saw was daunting. Famines. Drought. Wars. Plagues. Poverty. \nNot just in some faraway place, but in their own cities and towns and villages.\nThey knew things didn’t have to be this way. They knew we had enough \nfood to feed the world, but that it wasn’t getting shared. They knew there \nwere medicines for HIV and other diseases, but they cost a lot. They knew \nthat earthquakes and floods were inevitable, but that the high death \ntolls were not. \nThey also knew that billions of people worldwide shared their hope for a \nbetter future.\nSo leaders from these countries created a plan called the Sustainable \nDevelopment Goals (SDGs). This set of 17 goals imagines a future just 15 years \noff that would be rid of poverty and hunger, and safe from the worst effects of \nclimate change. It’s an ambitious plan. \nBut there’s ample evidence tha

In [23]:
type(document_question_gen[0]) 

langchain_core.documents.base.Document

In [24]:
splitter_answers_gen = TokenTextSplitter(
    encoding_name="cl100k_base",  
    chunk_size=1000,
    chunk_overlap=100
)

In [25]:
document_answer_gen = splitter_answers_gen.split_documents(document_question_gen) 

In [26]:
len(document_answer_gen)

4

In [31]:
llm_ques_gen_pipeline = OllamaLLM(model="llama3", temperature=0.3)

In [32]:
prompt_template = """
You are a expert at creating questions based on coding materials and documentations.
Your goal is to prepare a coder or programmer for their exam and codding tests. 
You do this by asking questions about the text below: 

----------
{text}
-----------  
Create questions that will prepare the coders or programmers for their tests. 
Make sure not to lose any important information. 

QUESTIONS:
"""

In [33]:
from langchain.prompts import PromptTemplate

In [34]:
PROMPT_QUESTIONS = PromptTemplate(template=prompt_template, input_variables=['text']) 

In [35]:
refine_template = ("""
You are a expert at creating questions based on coding materials and documentations.
Your goal is to prepare a coder or programmer for their exam and codding tests. 
We have recieved some practise questions to a certain extent:{existing_answer}.
We have the option to refine the existing questions or add news ones. 
(only if necessary) with some more context below.
----------
{text}
-----------  
                   Given the new context, refine the original questions in English. 
                   If the context is not helpful, please provide the original questions. 

QUESTIONS:
""")

In [36]:
REFINE_PROMPT_QUESTIONS = PromptTemplate(
    input_variables=["existing_answer", "text"], 
    template=refine_template 
)

In [37]:
from langchain.chains.summarize import load_summarize_chain

In [38]:
ques_gen_chain = load_summarize_chain(llm=llm_ques_gen_pipeline,
                                      chain_type="refine", 
                                      verbose=True, 
                                      question_prompt=PROMPT_QUESTIONS, 
                                      refine_prompt=REFINE_PROMPT_QUESTIONS)

In [39]:
ques = ques_gen_chain.run(document_question_gen) 
print(ques) 

  ques = ques_gen_chain.run(document_question_gen)




[1m> Entering new RefineDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
You are a expert at creating questions based on coding materials and documentations.
Your goal is to prepare a coder or programmer for their exam and codding tests. 
You do this by asking questions about the text below: 

----------
IN THE YEAR 2015, LEADERS FROM 193 COUNTRIES OF THE WORLD 
CAME TOGETHER TO FACE THE FUTURE.
And what they saw was daunting. Famines. Drought. Wars. Plagues. Poverty. 
Not just in some faraway place, but in their own cities and towns and villages.
They knew things didn’t have to be this way. They knew we had enough 
food to feed the world, but that it wasn’t getting shared. They knew there 
were medicines for HIV and other diseases, but they cost a lot. They knew 
that earthquakes and floods were inevitable, but that the high death 
tolls were not. 
They also knew that billions of people worldwide shared their hope for a 


In [41]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embeddings_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001")


E0000 00:00:1760451685.469876  125405 alts_credentials.cc:93] ALTS creds ignored. Not running on GCP and untrusted ALTS is not enabled.
E0000 00:00:1760451685.474057  125405 alts_credentials.cc:93] ALTS creds ignored. Not running on GCP and untrusted ALTS is not enabled.


In [42]:
from langchain.vectorstores import FAISS

In [43]:
from langchain_huggingface import HuggingFaceEmbeddings
embeddings_model = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)

In [44]:
vector_store = FAISS.from_documents(document_answer_gen, embeddings_model) 

In [45]:
llm_answer_gen = OllamaLLM(
    model="llama3",
    temperature=0.3
)

In [46]:
ques

'Here are some potential questions based on the text:\n\n**Goal 1: End Extreme Poverty in All Forms**\n\n1. What is the goal of ending extreme poverty, and what was achieved by 2015?\n2. How many people still live on less than $1.25 a day worldwide?\n\n**Goal 2: End Hunger, Achieve Food Security, Improve Nutrition, and Promote Sustainable Agriculture**\n\n3. What has been the progress in reducing hunger over the past 20 years?\n4. What is the goal for achieving food security and sustainable agriculture by 2030?\n\n**Goal 3: Ensure Healthy Lives and Promote Well-being for All at All Ages**\n\n5. What are some of the challenges remaining in ensuring healthy lives, such as preventable child deaths and maternal mortality?\n6. How can we make good health more than just a wish?\n\n**Goal 4: Ensure Inclusive and Equitable Quality Education and Promote Lifelong Learning Opportunities for All**\n\n7. What is the current state of education worldwide, including primary education enrollment rates?

In [47]:
ques_list = ques.split("\n") 
ques_list 

['Here are some potential questions based on the text:',
 '',
 '**Goal 1: End Extreme Poverty in All Forms**',
 '',
 '1. What is the goal of ending extreme poverty, and what was achieved by 2015?',
 '2. How many people still live on less than $1.25 a day worldwide?',
 '',
 '**Goal 2: End Hunger, Achieve Food Security, Improve Nutrition, and Promote Sustainable Agriculture**',
 '',
 '3. What has been the progress in reducing hunger over the past 20 years?',
 '4. What is the goal for achieving food security and sustainable agriculture by 2030?',
 '',
 '**Goal 3: Ensure Healthy Lives and Promote Well-being for All at All Ages**',
 '',
 '5. What are some of the challenges remaining in ensuring healthy lives, such as preventable child deaths and maternal mortality?',
 '6. How can we make good health more than just a wish?',
 '',
 '**Goal 4: Ensure Inclusive and Equitable Quality Education and Promote Lifelong Learning Opportunities for All**',
 '',
 '7. What is the current state of educatio

In [48]:
from langchain.chains import RetrievalQA

In [49]:
answer_generation_chain = RetrievalQA.from_chain_type(llm=llm_answer_gen,
                                                      chain_type="stuff", 
                                                      retriever=vector_store.as_retriever())

In [50]:
for question in ques_list:
    print('question: ', question)

question:  Here are some potential questions based on the text:
question:  
question:  **Goal 1: End Extreme Poverty in All Forms**
question:  
question:  1. What is the goal of ending extreme poverty, and what was achieved by 2015?
question:  2. How many people still live on less than $1.25 a day worldwide?
question:  
question:  **Goal 2: End Hunger, Achieve Food Security, Improve Nutrition, and Promote Sustainable Agriculture**
question:  
question:  3. What has been the progress in reducing hunger over the past 20 years?
question:  4. What is the goal for achieving food security and sustainable agriculture by 2030?
question:  
question:  **Goal 3: Ensure Healthy Lives and Promote Well-being for All at All Ages**
question:  
question:  5. What are some of the challenges remaining in ensuring healthy lives, such as preventable child deaths and maternal mortality?
question:  6. How can we make good health more than just a wish?
question:  
question:  **Goal 4: Ensure Inclusive and Equ

In [51]:
# Answer each question and save to a file 
for question in ques_list: 
    print("Question: ", question) 
    answer = answer_generation_chain.run(question) 
    print("Answer: ", answer)
    print("---------------------------------------------\\n\\n")
    # Save answer to file 
    with open("answes.txt", "a") as f:
        f.write("Question: " + question + "\\n")
        f.write("Answer: " + answer + "\\n") 
        f.write("---------------------------------------------\\n\\n")

Question:  Here are some potential questions based on the text:
Answer:  I don't know. The text does not mention a specific goal or target related to reducing inequality within and among countries. It only mentions that income inequality is a global problem that requires global solutions, but it does not provide further information on how to achieve this goal.
---------------------------------------------\n\n
Question:  
Answer:  I don't know. The text does not provide a specific question, so I cannot attempt to answer it.
---------------------------------------------\n\n
Question:  **Goal 1: End Extreme Poverty in All Forms**
Answer:  I don't know the answer to this question as it is not provided in the given context.
---------------------------------------------\n\n
Question:  
Answer:  I don't know. The text does not provide information about what people can do to help achieve a specific Sustainable Development Goal (SDG). It appears to be a general overview of the SDGs and their im