In [19]:
!pip install --quiet --upgrade langchain langchain-community langchain-chroma

In [22]:
import bs4
from langchain import hub
from langchain_chroma import Chroma
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader

In [24]:
import os
import getpass

In [26]:
!pip install -qU langchain-openai
os.environ["LANGCHAIN_TRACING_V2"] = "false"  
os.environ["OPENAI_API_KEY"] = getpass.getpass()
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model="gpt-4o-mini")

 ········


In [28]:
!pip install pypdf
import PyPDF2

pdf_loader = PyPDFLoader(file_path='Resilient_LosAngeles_Plan.pdf') 
docs = pdf_loader.load()



In [29]:
len(docs[0].page_content)

91

In [32]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
all_splits = text_splitter.split_documents(docs)
len(all_splits)

468

In [34]:
from langchain_chroma import Chroma
from langchain_openai import OpenAIEmbeddings

vectorstore = Chroma.from_documents(documents=all_splits, embedding=OpenAIEmbeddings())

In [35]:
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 6})

retrieved_docs = retriever.invoke("What Climate Threats?")

len(retrieved_docs)

6

In [329]:
print(retrieved_docs[0].page_content)

Recent catastrophes around the world —
hurricanes in the U.S. and the Caribbean, 
earthquakes in Mexico, fires in the western U.S., 
flooding in Nepal and Bangladesh —are serious 
reminders that every community needs focused resilience strategies. 
In Los Angeles, we know a major earthquake is 
a matter of when, not if. And we know that the 
next one could be more than 10 times worse 
than the 1994 Northridge earthquake that 
claimed 57 lives, injured over 9,000, displaced 
tens of thousands from their homes, and caused 
$20 billion in damage. 
And earthquakes aren’t the only threat we face: 
The effects of climate change are worsening, 
and cybercrime is causing more turmoil than 
ever for businesses, governments, and other 
institutions around the world. These threats are emerging quickly, and 
we can’t wait for catastrophes to hit before 
confronting them. We need a comprehensive, 
strategically coordinated approach to  
urban resilience.


In [38]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

In [40]:
definitions = {
    "Climate Change": "A long-term shift in weather patterns and temperatures, primarily caused by human activities emitting greenhouse gases (GHGs).",
    "Greenhouse Gases (GHGs)": "Atmospheric gases like CO₂, CH₄, N₂O that absorb and emit radiation, leading to the greenhouse effect.",
    "Anthropogenic Emissions": "Emissions of GHGs resulting from human activities such as burning fossil fuels and deforestation.",
    "Climate Impacts": "Consequences of climate-related hazards on natural and human systems, affecting lives, ecosystems, economies, and infrastructure.",
    "Climate Risk": "Potential negative consequences from climate impacts, resulting from the interaction of hazard, exposure, and vulnerability.",
    "Climate Vulnerability": "The degree to which a system is susceptible to harm from climate change and its ability to adapt.",
    "Climate Policies": "Strategies and measures adopted to implement resilience, mitigation, and adaptation options.",
    "Resilience": "The ability of systems to cope with climate hazards by maintaining essential functions and adapting to changes.",
    "Resilience Options": "Strategies to build resilience through policy changes, infrastructure improvements, planning, etc.",
    "Mitigation": "Efforts to reduce or prevent emission of GHGs.",
    "Mitigation Options": "Technologies or practices that contribute to mitigation, like renewable energy or waste minimization.",
    "Adaptation": "Adjusting systems to actual or expected climate changes to minimize harm or exploit beneficial opportunities.",
    "Adaptation Options": "Strategies addressing climate change adaptation, including structural, institutional, ecological, and behavioral measures.",
    "Climate Justice": "Ensuring equitable sharing of the burdens and benefits of climate change impacts.",
    "Maladaptation": "Actions that may increase vulnerability to climate change or diminish resilience.",
    "Scenario": "A plausible description of how the future may develop based on a coherent set of assumptions."
}

In [42]:
normalized_definitions = {key.lower(): value for key, value in definitions.items()}
definitions_context = "\n".join([f"{key}: {value}" for key, value in definitions.items()])


In [71]:
system_prompt = (
    "You are a researcher specializing in extracting climate-related information from climate adaptation and resilience plans."
    "You are provided with a document (referred to as the 'plan') from a state or city in the United States (the 'planning jurisdiction')that has adopted policies addressing climate change, which may pertain to resilience, mitigation, and adaptation."
    "Please provide a detailed answer to the following question and include citations in the format: (Chapter name, Section header if applicable, Page number). If any of these citation variables are missing, write N/A"
    "\n\n"
    "{context}"
    "Definitions:\n"
    f"{definitions_context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)
print("Prompt Structure:", prompt)

question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)


Prompt Structure: input_variables=['context', 'input'] input_types={} partial_variables={} messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template="You are a researcher specializing in extracting climate-related information from climate adaptation and resilience plans.You are provided with a document (referred to as the 'plan') from a state or city in the United States (the 'planning jurisdiction')that has adopted policies addressing climate change, which may pertain to resilience, mitigation, and adaptation.Please provide a detailed answer to the following question and include citations in the format: (Chapter name, Section header if applicable, Page number). If any of these citation variables are missing, write N/A\n\n{context}Definitions:\nClimate Change: A long-term shift in weather patterns and temperatures, primarily caused by human activities emitting greenhouse gases (GHGs).\nGreenhouse Gases (GHGs)

In [85]:
import pandas as pd
questions_df = pd.read_csv("Question_Prompts.csv")

# Extract the 'Question' column (or whichever column contains your questions)
questions = questions_df['Question'].tolist()

In [87]:
def generate_responses(questions):
    responses = []
    for question in questions:
        try:
            response = rag_chain.invoke({"input": question.lower()}) #generate a response
            
            
            responses.append(f"**{question}**\n{response['answer']}\n\n") #formatting output
        except Exception as e:
            responses.append(f"**{question}**\nError: {str(e)}\n\n")
    return responses

In [89]:
print(questions_df.dtypes)
print(questions_df.head())

Question    object
dtype: object
                                            Question
0  Provide a summary of information available fro...
1  To what extent does the plan address resilienc...
2  Does the plan describe a scenario of climate c...
3  What climate impacts does the plan identify as...
4  For each identified climate impact, summarize ...


In [91]:
Output = "### Los Angeles Plan\n\n"

Output += "## Questions\n"
Output += ''.join(generate_responses(questions))

print(Output)

### Los Angeles Plan

## Questions
**Provide a summary of information available from the plan regarding the planning jurisdiction, including its name, location, population, geography, major industries, per capita income, and similar information.**
The provided excerpt does not contain specific information regarding the planning jurisdiction's name, location, population, geography, major industries, or per capita income. It focuses primarily on climate resilience strategies, planning processes, and community engagement in Los Angeles without providing demographic or economic data about the jurisdiction. Therefore, I am unable to provide a summary of relevant information in that regard. 

If more context or sections of the plan were available, additional details could potentially be extracted. As it stands, the information required is missing. 

Citation: N/A

**To what extent does the plan address resilience, mitigation, and adaptation?**
The plan addresses resilience, mitigation, and a

In [93]:
with open("los_angeles_plan.md", "w") as file:
    file.write(Output)
print("Markdown document created: los_angeles_plan.md")

Markdown document created: los_angeles_plan.md
