# Pull Polygons from TerraMatch API

This notebook sets up the process to pull polygon geometries and metadata from the TerraMatch API. The steps for pulling polygons are as follows:
1. Set up configuration and API token
2. 

In [1]:
import requests
import yaml
import json
import pandas as pd
import geopandas as gpd
from tqdm import tqdm
from datetime import datetime, timedelta

## Set up token and API URL

In [2]:
# Set up token access
auth_path = '../secrets.yaml'
with open(auth_path) as auth_file:
    auth = yaml.safe_load(auth_file)
headers = {
    'Authorization': f"Bearer {auth['access_token']}"
    }

In [4]:
# TerraMatch API URLs
staging_url = "https://api-staging.terramatch.org/research/v3/sitePolygons?" # Use for testing queries
prod_url = "https://api.terramatch.org/research/v3/sitePolygons?" # Use to pull data for analysis

## API Request

In [5]:
def pull_tm_api_data(url, headers, project_ids):
    '''
    edits to the above function include:
        iterating through list of project ids within func so output is a df with 
        multiple projects
        add project id as a column to support maxar metadata request
        update to last record variable
        added tqdm progress bar  
    '''
    # List to store all retrieved polygon metadata
    results = []
    # Set up a progress bar
    with tqdm(total=len(project_ids), desc="Processing Projects", unit="project") as progress_bar:
        # For every project in the list of project_ids
        for project_id in project_ids:
            # Set parameters with the current project ID
            params = {
                'projectId[]': project_id,
                'polygonStatus[]': 'approved',
                'includeTestProjects': 'false',
                'page[size]': '100'
            }

            last_record = ''
            new_last_record = None  # Ensure it's defined before use

            while True:
                # Send GET request and store the response (polygon geometries & metadata)
                response = requests.get(url, headers=headers, params=params)

                # Check status code
                if response.status_code != 200:
                    raise ValueError(f'Request failed for project {project_id} with status code {response.status_code}')
                
                # Convert the response to a JSON and record the total number of records returned
                response_json = response.json()
                total_records = response_json['meta']['page']['total']

                # Parse response data
                # If there are no polygons for this project
                if total_records == 0:
                    break  # Exit if no data is available (skip to the next project)

                # Loop through each polygon in the response
                for idx in range(0, len(response.json()['data'])):
                    # Extract polygon attributes from each record and store them in dictionary data
                    data = response_json['data'][idx]['attributes']
                    data['poly_id'] = response_json['data'][idx]['meta']['page']['cursor']
                    # Store the project_id in data
                    data['project_id'] = project_id 
                    # Append data ( a dictionary of that project's metadata) in the overall results list
                    results.append(data)

                    # Assign the last cursor only if there are records
                    if idx == (total_records - 1):
                        new_last_record = response_json['data'][idx]['meta']['page']['cursor']

                # Check if there are more pages
                if (len(response.json()['data']) == int(params['page[size]'])):
                    last_record = new_last_record
                    params['page[after]'] = last_record
                else:
                    break  # Exit pagination if no new cursor is found

            progress_bar.update(1) 
    return results

In [6]:
# Create the list of projects to pull
full = pd.read_csv('../projects_all_approved_202501091214.csv')
full = full[(full.framework_key == 'terrafund-landscapes') | (full.framework_key == 'terrafund')]
full.framework_key.value_counts()

framework_key
terrafund               108
terrafund-landscapes     99
Name: count, dtype: int64

In [7]:
full

Unnamed: 0,project_id,project_name,organization_name,organisation_id,country,framework_key,description,status
0,c462918b-47f7-4ed5-99e0-7fec6e342036,"""Nakuru Eco-Reforestation Project""",,1382,KE,terrafund-landscapes,MAIN ACTIVITIES\nJAN - FEB 2024- TREE NURSERY...,approved
1,caae56f9-0bb6-45a2-9d77-ff088b085917,0726 project,,6283,BR,terrafund,org 0726,approved
2,c004619e-c1aa-4f7f-b56b-c8f9b4385d4e,1,,1582,AL,terrafund,1,approved
4,6083e1cf-a636-4c64-9253-ac86cd08f5d7,3SC Production 2.3,,3279,AF,terrafund,3SC Production 2.3,approved
6,617601e0-9839-49fd-b48e-6c07404e7140,Afram Headwaters Restoration Initiative (AHRI),,1358,GH,terrafund-landscapes,1.\tSite Reconnaissance – which has already be...,approved
...,...,...,...,...,...,...,...,...
268,2d1cffcd-300f-4939-a136-310347cf6879,WITHDRAWN: Divine Bamboo Afri 100,,953,UG,terrafund,NOTE: THIS PROJECT HAS WITHDRAWN FROM TERRAFUN...,approved
269,568dc331-b945-41fd-ab57-80a98be57941,Women and Youth-Led Climate Initiative for res...,,1814,KE,terrafund-landscapes,2024\n1. Community sensitization and mobilizat...,approved
270,77fc2c03-5605-45b3-b417-f69d93157215,Women led Community forest Conservation Init...,,2815,KE,terrafund-landscapes,Project Activity \tSummary\n1. Stakeholders in...,approved
271,96c86eae-d4f9-45d8-9780-69c55a9e36e9,World Vision Ethiopia One Tree Planted Project,,1047,ET,terrafund,07/01/2022 to 07/31/2023,approved


In [7]:
# Create a list of project ids to query
ids = list(set(full.project_id))

# Short list for testing purposes
ids = ids[:11]
ids

['e3f2727f-1d76-4f70-a3c7-a6fcc181918a',
 '53f9f5a6-9346-436c-87bb-b5cc86d978c0',
 'a8940698-ff28-456c-b8cd-f7289e612913',
 'c004619e-c1aa-4f7f-b56b-c8f9b4385d4e',
 'c9534124-afcb-4c06-bfdb-d2ade7b82b54',
 '652ba56f-2e75-4735-a0d1-aafebbd940c1',
 '04af9a53-455b-4972-bf23-baa3019919be',
 'caf78b0a-d80a-482e-9c62-842302109a90',
 '4a5aac1e-4ae0-4a54-9a0c-a2f332aabd9f',
 '82dd3a84-2562-4a6f-85d9-f83790daaaba',
 '6d4d7217-175b-4d0c-927d-7596381ddc55']

In [17]:
# If you only want to order data for a subset of project_ids
ids = ['146b6912-62a1-4b58-b027-466dc3295731', '3a860077-df4c-4e95-8fec-41520c551243', '529e1bae-2187-473f-a2a3-17e577720aba']

# DREK
drek = ['9019106b-6e2d-4deb-97a5-2889f976a931']
drek

['9019106b-6e2d-4deb-97a5-2889f976a931']

In [18]:
# Pull polygons from projects in list of ids from TerraMatch API
project_results = pull_tm_api_data(prod_url, headers, drek)

Processing Projects: 100%|██████████| 1/1 [00:00<00:00,  1.19project/s]


In [19]:
# Convert the polygon geometries into a dataframe
project_df = pd.DataFrame(project_results)
project_df.columns = project_df.columns.str.lower()

In [20]:
project_df.head()

Unnamed: 0,name,status,siteid,geometry,plantstart,plantend,practice,targetsys,distr,numtrees,calcarea,indicators,establishmenttreespecies,reportingperiods,poly_id,project_id
0,Degega Community Land,approved,72c8b929-4bdb-4f59-85e3-b3f2e6bd5b23,"{'type': 'Polygon', 'coordinates': [[[39.94405...",2024-10-15,2024-12-20,tree-planting,natural-forest,partial,3000,5.431819,[],[],"[{'dueAt': '2022-09-30T00:00:00.000Z', 'submit...",0df498fa-804f-4a24-9e1a-73b749595cf3,9019106b-6e2d-4deb-97a5-2889f976a931
1,Wathajir Nanighi C Farm,approved,72c8b929-4bdb-4f59-85e3-b3f2e6bd5b23,"{'type': 'Polygon', 'coordinates': [[[39.87186...",2024-10-15,2024-12-20,tree-planting,agroforest,partial,1000,3.331816,[],[],"[{'dueAt': '2022-09-30T00:00:00.000Z', 'submit...",da6d461d-f6de-4123-9723-5d20a1d422a0,9019106b-6e2d-4deb-97a5-2889f976a931
2,Kamuthe Farm,approved,72c8b929-4bdb-4f59-85e3-b3f2e6bd5b23,"{'type': 'Polygon', 'coordinates': [[[39.79968...",2024-10-15,2024-12-20,Agroforestry,Tree Planting,Partial,1100,4.384676,[],[],"[{'dueAt': '2022-09-30T00:00:00.000Z', 'submit...",5ad11d3b-2ed0-4d34-92f9-1126640cb434,9019106b-6e2d-4deb-97a5-2889f976a931
3,Nadhir Farm,approved,72c8b929-4bdb-4f59-85e3-b3f2e6bd5b23,"{'type': 'Polygon', 'coordinates': [[[39.82112...",2024-10-15,2024-12-20,Agroforestry,Tree Planting,Partial,800,3.331821,[],[],"[{'dueAt': '2022-09-30T00:00:00.000Z', 'submit...",0b048fa9-29da-4ffb-a1e2-2d048648469b,9019106b-6e2d-4deb-97a5-2889f976a931
4,Wathajir Nanighi B Farm,approved,72c8b929-4bdb-4f59-85e3-b3f2e6bd5b23,"{'type': 'Polygon', 'coordinates': [[[39.86125...",2024-10-15,2024-12-20,tree-planting,agroforest,partial,1000,3.331818,[],[],"[{'dueAt': '2022-09-30T00:00:00.000Z', 'submit...",6b56879d-9c04-464e-8d63-0c6707370138,9019106b-6e2d-4deb-97a5-2889f976a931


In [21]:
len(project_df.columns)
project_df.shape

(19, 16)

In [22]:
# Export the polygon geometries & metadata as a csv
today = datetime.today().strftime('%Y-%m-%d')
# project_df.to_csv(f"../data/tm_api_{today}.csv", index=False) # To the darby-tm-api-pull repo
# project_df.to_csv(f"/home/darby/github_repos/maxar-tools/data/tm_api_{today}.csv", index=False) # To the darby-maxar-tools repo


# TEST PULL
project_df.to_csv(f"../data/tm_api_DREK_2025-02-26.csv", index=False) # To the darby-tm-api-pull repo
#project_df.to_csv(f"/home/darby/github_repos/maxar-tools/data/tm_api_TEST_PROD_NEW.csv", index=False) # To the darby-maxar-tools repo