In [1]:
import sys
sys.path.append("..")

from dotenv import load_dotenv
_ = load_dotenv("../.env")

In [2]:
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Annotated
from uuid import uuid4

import geopandas as gpd
import pandas as pd
from langchain_core.tools import tool
from langgraph.prebuilt import InjectedState
from langchain_anthropic import ChatAnthropic
from pydantic import BaseModel, Field
from shapely.geometry import shape
from langchain_core.messages import AIMessage, HumanMessage

from zeno.agents.distalert.tool_location import location_tool
from zeno.agents.kba.prompts import KBA_INSIGHTS_PROMPT, KBA_COLUMN_SELECTION_PROMPT

data_dir = Path("../data/kba")
kba = gpd.read_file(data_dir / "kba_merged.gpkg")

sonnet = ChatAnthropic(model="claude-3-5-sonnet-latest")

In [3]:
class KbaDataInput(BaseModel):
    query: str = Field(
        ...,
        description="Name of the location to search for. Can be a city, region, or country name.",
    )


@tool(
    "kba-data-tool",
    args_schema=KbaDataInput,
    return_direct=False,
    response_format="content_and_artifact",
)
def kba_data_tool(
    query: str,
) -> List[Tuple[Optional[str], Optional[Dict[str, Any]]]]:
    """
    Finds data about all Key Biodiversity Areas (KBAs) with in an area of interest.
    """
    print("kba data tool")
    result = location_tool.invoke(
        {
            "name": "location-tool",
            "args": {
                "query": query,
            },
            "id": str(uuid4()),
            "type": "tool_call",
        }
    )  # pass a tool call to return the artifact
    _, artifact = result.content, result.artifact
    aoi_geometry = shape(artifact[0]["geometry"])
    aoi_buffered = aoi_geometry.buffer(0.1)

    kba_within_aoi = kba[kba.geometry.within(aoi_buffered)]

    info = f"Found data of {len(kba_within_aoi)} KBAs within the area of interest: {query}."
    return (info, kba_within_aoi)

In [4]:
dt = kba_data_tool.invoke({
    "type": "tool_call",
    "name": "kba-data-tool",
    "id": str(uuid4()),
    "args": {
        "query": "Odisha"
    }
})

kba data tool


In [5]:
dt.content

'Found data of 8 KBAs within the area of interest: Odisha.'

In [21]:
dt.artifact.head()

Unnamed: 0,area__ha,umd_tree_cover_extent_2000__ha,umd_tree_cover_gain__ha,gfw_forest_carbon_gross_removals_aboveground_2001_2023__Mg_CO2,gfw_forest_carbon_gross_removals_belowground_2001_2023__Mg_CO2,gfw_forest_carbon_gross_removals_2001_2023__Mg_CO2,gfw_forest_carbon_gross_emissions_all_gases_2001_2023__Mg_CO2e,gfw_forest_carbon_net_flux_2001_2023__Mg_CO2e,gfw_aboveground_carbon_stock_2000__Mg_C,gfw_belowground_carbon_stock_2000__Mg_C,...,regions,siteAreaCalculated,sitecode,siteDescription,siteName,siteNameNational,threatsDescription,updatedAt,yearOfAssessment,geometry
3080,44.948501,8.67066,0.0,546.6937,141.891072,688.5848,20.413344,-668.1714,524.1878,135.353,...,Asia,193.456136,18334,Chandaka was declared a sanctuary for the Asia...,Chandaka - Dampara Wildlife Sanctuary,Chandaka - Dampara Wildlife Sanctuary,MAIN THREATS: Agricultural intensification and...,2004-01-01 00:00:00+00:00,2004,"MULTIPOLYGON (((85.56518 20.26727, 85.56525 20..."
3081,60150.38201,36413.32053,93.685847,1307121.0,366172.7984,1673294.0,29013.70295,-1644280.0,3506184.0,971970.5,...,Asia,658.079281,18337,Satkosia Gorge Sanctuary lies on either side o...,Satkosia Gorge Wildlife Sanctuary,Satkosia Gorge Wildlife Sanctuary,MAIN THREATS: Fishing; Poaching; Fragmentation...,2004-01-01 00:00:00+00:00,2004,"MULTIPOLYGON (((84.79079 20.57804, 84.78873 20..."
3082,116459.9794,65696.30976,258.554323,2643922.0,781141.182,3425064.0,199694.8929,-3225369.0,6942960.0,2042322.0,...,Asia,2349.941592,18338,The Simlipal National Park is the most importa...,Simlipal National Park,Simlipal National Park,MAIN THREATS: Poaching; Overgrazing on the fri...,2004-01-01 00:00:00+00:00,2004,"MULTIPOLYGON (((86.04453 21.83424, 86.04556 21..."
3083,63457.30079,17418.58856,77.967738,1105769.0,349917.0532,1455686.0,52095.8681,-1403590.0,1599350.0,505240.1,...,Asia,634.596953,18339,Sunabeda is situated in the Nuapada district o...,Sunabeda Wildlife Sanctuary,Sunabeda Wildlife Sanctuary,MAIN THREATS: Human pressure; Livestock grazin...,2004-01-01 00:00:00+00:00,2004,"MULTIPOLYGON (((82.35715 20.70552, 82.35773 20..."
13779,146177.1227,5008.590506,370.142859,343753.7,112433.3228,456187.0,20133.27195,-436053.7,414924.8,135025.8,...,Asia,1461.806307,46929,,Heerakund Reservoir and Debrigarh Wildlife San...,Heerakund Reservoir and Debrigarh Wildlife San...,,2016-01-01 00:00:00+00:00,2016,"MULTIPOLYGON (((84.01235 21.56453, 83.97388 21..."


In [16]:
class ColumnSelectionOutput(BaseModel):
    columns: List[str] = Field(
        ...,
        description="List of column names relevant to the user query based on knowledge base, user persona, and user query",
    )

column_selection_agent = sonnet.with_structured_output(ColumnSelectionOutput)

column_description = pd.read_csv("../data/kba/kba_column_descriptions.csv")

class KbaInsightsInput(BaseModel):
    question: str = Field(
        ...,
        description="The user's question or query",
    )
@tool(
    "kba-insights-tool",
    args_schema=KbaInsightsInput,
    return_direct=False,
    response_format="content_and_artifact"
)
def kba_insights_tool(question: str):
    """Find insights relevant to the user query for the Key Biodiversity Areas (KBAs)."""
    # kba_within_aoi = state.kba_within_aoi
    # user_persona = state.user_persona
    kba_within_aoi = dt.artifact
    user_persona = "I am a journalist interested in threats & pressures to biodiversity."

    column_selection_prompt = KBA_COLUMN_SELECTION_PROMPT.format(
        user_persona=user_persona,
        question=question,
        dataset_description=column_description.to_csv(index=False),
    )
    columns = column_selection_agent.invoke([AIMessage(content=column_selection_prompt), HumanMessage(content=question)]).columns
    print("COLUMNS", columns, type(columns))

    # add siteName and sitecode to the columns list if they are not already in the list
    if "siteName" not in columns:
        columns.append("siteName")
    if "sitecode" not in columns:
        columns.append("sitecode")
    # remove geometry column if it is in the columns list
    if "geometry" in columns:
        columns.remove("geometry")

    kba_within_aoi_filtered = kba_within_aoi[columns]

    kba_insights_prompt = KBA_INSIGHTS_PROMPT.format(
        user_persona=user_persona,
        question=question,
        dataset_description=column_description[column_description.column.str.contains("|".join(columns))].to_csv(index=False),
        data=kba_within_aoi_filtered.to_csv(index=False),
    )
    print(kba_insights_prompt)

    response = sonnet.invoke(kba_insights_prompt)

    return response, "hello"

In [17]:
it = kba_insights_tool.invoke(
    {
        "type": "tool_call",
        "name": "kba-data-tool",
        "id": str(uuid4()),
        "args": {
            "question": "I would like to understand threats to wildlife species in Odisha"
        }
    }
)

COLUMNS ['threatsDescription', 'additionalBiodiversityValues', 'landUseRegimesAtSite', 'permAg_tcl_2001-2023', 'hardCommodities_tcl_2001-2023', 'settlements_tcl_2001-2023', 'habitatDescription', 'calculatedProtectedArea', 'howIsTheSiteManaged', 'umd_tree_cover_extent_2000__ha', 'umd_tree_cover_gain__ha'] <class 'list'>

You are Keeper Koala 🐨, an expert analyst of Key Biodiversity Areas (KBAs). Your mission is to provide data-driven insights about KBAs while maintaining an engaging, informative tone.

KNOWLEDGE BASE STRUCTURE:
column,description
umd_tree_cover_extent_2000__ha,"Tree cover extent, in hectares, measured in 2000 where tree canopy density is â¥30%. Here âtree coverâ was defined as all vegetation taller than 5 meters in height. âTree coverâ is the biophysical presence of trees and may take the form of natural forests or plantations existing over a range of canopy densities."
umd_tree_cover_gain__ha,Tree cover gain in hectares between 2000 and 2020.
permAg_tcl_2001-2

In [22]:
it

ToolMessage(content='content="G\'day! Keeper Koala here to analyze the threats and pressures facing Key Biodiversity Areas in this dataset. 🐨\\n\\nKey Findings:\\n\\n1. Agricultural Expansion & Land Use Change\\n- Small to large-scale agriculture is a significant pressure, with permanent tree cover loss (permAg_tcl) ranging from 0 to 144.67 hectares across sites\\n- Simlipal National Park shows the highest agricultural pressure with 144.67 hectares lost\\n- Several sites face threats from agricultural intensification and prawn culture conversion\\n\\n2. Infrastructure Development\\n- While hardCommodities_tcl (mining/energy) shows minimal direct impact (near 0 across sites)\\n- Urban expansion particularly threatens Chandaka-Dampara Wildlife Sanctuary, creating human-wildlife conflict with elephants\\n\\n3. Human-Wildlife Conflict\\n- Multiple sites report increasing human-wildlife conflict, particularly with elephants\\n- Chandaka faces severe elephant-human conflict with 65-70 elepha

In [59]:
column_description[column_description.column.str.startswith("umd")]

Unnamed: 0,column,description
1,umd_tree_cover_extent_2000__ha,"Tree cover extent, in hectares, measured in 20..."
2,umd_tree_cover_gain__ha,Tree cover gain in hectares between 2000 and 2...


In [8]:
dt.artifact[['threatsDescription', 'additionalBiodiversityValues', 'country', 'habitatDescription', 'siteName', 'landUseRegimesAtSite', 'permAg_tcl_2001-2023', 'hardCommodities_tcl_2001-2023', 'wildfire_tcl_2001-2023', 'settlements_tcl_2001-2023', 'umd_tree_cover_extent_2000__ha', 'umd_tree_cover_gain__ha', 'sitecode']]

Unnamed: 0,threatsDescription,additionalBiodiversityValues,country,habitatDescription,siteName,landUseRegimesAtSite,permAg_tcl_2001-2023,hardCommodities_tcl_2001-2023,wildfire_tcl_2001-2023,settlements_tcl_2001-2023,umd_tree_cover_extent_2000__ha,umd_tree_cover_gain__ha,sitecode
3080,MAIN THREATS: Agricultural intensification and...,AVIFAUNA: Kumarkhunti reservoir used to hold b...,India,,Chandaka - Dampara Wildlife Sanctuary,nature conservation and research | tourism/rec...,58.443912,0.0,0.0,0.0,8.67066,0.0,18334
3081,MAIN THREATS: Fishing; Poaching; Fragmentation...,AVIFAUNA: Besides the two critically endangere...,India,,Satkosia Gorge Wildlife Sanctuary,nature conservation and research | tourism/rec...,18.384711,0.0,0.0,0.0,36413.32053,93.685847,18337
3082,MAIN THREATS: Poaching; Overgrazing on the fri...,AVIFAUNA: Despite the great importance of Siml...,India,,Simlipal National Park,nature conservation and research | tourism/rec...,144.667301,0.286241,1.430593,0.0,65696.30976,258.554323,18338
3083,MAIN THREATS: Human pressure; Livestock grazin...,AVIFAUNA: Around 200 species of birds have bee...,India,,Sunabeda Wildlife Sanctuary,nature conservation and research | urban/indus...,50.579793,0.0,2.307496,0.0,17418.58856,77.967738,18339
13779,,,India,,Heerakund Reservoir and Debrigarh Wildlife San...,,4.299694,0.0,12.255698,0.0,5008.590506,370.142859,46929
13918,MAIN THREATS: Poaching and illegal trade of bi...,"AVIFAUNA: The marshes around Mangaljodi, and t...",India,,Mangal Jodi,agriculture,0.0,0.0,0.0,0.0,65.654211,11.880393,18336
14334,MAIN THREATS: Indiscriminate fishing; Pollutio...,AVIFAUNA: Chilika Lake in general and Nalabana...,India,,Nalabana Bird Sanctuary (Chilika Lake),fisheries/aquaculture | nature conservation an...,0.870472,0.0,0.0,0.0,205.028896,431.606703,18335
14787,MAIN THREATS: Brackish water prawn culture pon...,AVIFAUNA: Nearly 220 species of birds have bee...,India,,Bhitarkanika Wildlife Sanctuary and National Park,nature conservation and research | tourism/rec...,0.0,0.0,0.0,0.0,0.216214,1.441558,18333
