# Pull Polygons from TerraMatch API

This notebook sets up the process to pull polygon geometries and metadata from the TerraMatch API.

In [1]:
import yaml
import pandas as pd
from tm_api_utils import pull_tm_api_data, patch_tm_api_data
from tqdm import tqdm
import json
import sys
from datetime import datetime
sys.path.append('../src/')
import api_utils as api
import process_tm_api_results as clean

## Set file paths

In [2]:
# Naming convention
run_name = 'cohort2'
run_dir = 'tf_cohort2'

# Today's date
today = datetime.today().strftime('%Y-%m-%d') # Check computer date before running

# Files
approved_projects_file = '../projects_all_approved_202502211226.csv' # List of approved projects (infile)
tm_api_pull_results_file = f'../data/{run_dir}/tm_api_response_prod_{run_name}_{today}.json' # Save a JSON file that stores the results of the TM API pull; read it back in to clean the results (outfile, infile)
polygon_features_file = f'../data/{run_dir}/tm_api_{run_name}_{today}.csv' # Save the cleaned polygon features csv in the terrafund-portfolio-analyses repo (outfile)
polygon_features_maxar_file = f'/home/darby/github_repos/maxar-tools/data/{run_dir}/polygon_data/tm_api_{run_name}_{today}.csv' # Save the cleaned polygon features csv in the maxar-tools repo (outfile)

## Set up token and API URL

In [3]:
# Set up token access
auth_path = '../secrets.yaml'
with open(auth_path) as auth_file:
    auth = yaml.safe_load(auth_file)
headers = {
    'Authorization': f"Bearer {auth['access_token']}"
    }

In [4]:
# TerraMatch API URLs
staging_url = "https://api-staging.terramatch.org/research/v3/sitePolygons?" # Use for testing queries
prod_url = "https://api.terramatch.org/research/v3/sitePolygons?" # Use to pull data for analysis

## Create lists of projects to pull

In [5]:
# Read in list of approved projects (2025-02-21)
full = pd.read_csv(approved_projects_file)
full.shape

(282, 10)

In [6]:
# Create lists of projects by Cohort (and split cohort 1 into projects within the TF landscapes and outside of the TF landscapes)
cohort1 = full[full['cohort'] == 'terrafund']
cohort1_landscapes = cohort1[cohort1['country'].isin(['BI', 'CD', 'RW', 'KE', 'GH'])]
cohort1_non_landscapes = cohort1[~cohort1['country'].isin(['BI', 'CD', 'RW', 'KE', 'GH'])]
cohort2 = full[full['cohort'] == 'terrafund-landscapes']

In [7]:
# Create a list of project ids to query
ids = list(set(cohort2.project_id))
len(ids)

# Create a short list of ids for testing
# ids = ids[1:3]
# ids

94

## Pull polygons from TM API

In [8]:
results = api.pull_wrapper(prod_url, headers, ids, outfile=tm_api_pull_results_file)

Pulling Projects: 100%|██████████| 94/94 [05:20<00:00,  3.41s/project]


Results saved to ../data/tf_cohort2/tm_api_response_prod_cohort2_2025-04-02.json


In [9]:
df = pd.DataFrame(results)

In [10]:
print(len(df))
print(f"df has {df.project_id.nunique()} unique projects")
print(f"df has {df.poly_id.nunique()} unique polygons")
df['project_id'].value_counts()
df.head()

16838
df has 76 unique projects
df has 16838 unique polygons


Unnamed: 0,status,plantStart,calcArea,plantEnd,practice,targetSys,distr,numTrees,name,siteId,indicators,siteName,geometry,establishmentTreeSpecies,reportingPeriods,lightResource,poly_id,project_id
0,approved,2024-12-28,0.881495,2024-12-28,tree-planting,woodlot-or-plantation,full,4000,KAGONYI,67009810-42cc-4171-9849-c69cc7350787,"[{'indicatorSlug': 'treeCover', 'yearOfAnalysi...",WDI-KAGONYI,"{'type': 'Polygon', 'coordinates': [[[29.48052...",[],"[{'dueAt': '2024-07-30T00:00:00.000Z', 'submit...",False,3b65b6e6-fdbe-4f8e-9961-42a75899a342,34bfb500-c388-47f6-9843-1b4dcf7ac70f
1,approved,2024-12-14,8.525688,2024-12-16,tree-planting,agroforest,partial,560,Kagonyi 2,67009810-42cc-4171-9849-c69cc7350787,"[{'indicatorSlug': 'treeCover', 'yearOfAnalysi...",WDI-KAGONYI,"{'type': 'Polygon', 'coordinates': [[[29.47870...",[],"[{'dueAt': '2024-07-30T00:00:00.000Z', 'submit...",False,1d352518-c616-4721-8d29-ea29ce72aa18,34bfb500-c388-47f6-9843-1b4dcf7ac70f
2,approved,2024-12-14,39.6337,2024-12-18,tree-planting,agroforest,partial,9750,Nyabwuya,7b183198-48dc-4319-8e13-bf9268157787,"[{'indicatorSlug': 'treeCover', 'yearOfAnalysi...",WDI-NYABWUYA,"{'type': 'Polygon', 'coordinates': [[[29.48487...",[],"[{'dueAt': '2024-07-30T00:00:00.000Z', 'submit...",False,d1f3ea0e-11f3-4508-aa42-3789f703563c,34bfb500-c388-47f6-9843-1b4dcf7ac70f
3,approved,2024-12-14,24.871274,2024-12-18,tree-planting,agroforest,partial,11250,Karengane 1,873dbf9d-ac28-49dd-936d-16c8a3d534ec,"[{'indicatorSlug': 'treeCover', 'yearOfAnalysi...",WDI-KARENGANE,"{'type': 'Polygon', 'coordinates': [[[29.49131...",[],"[{'dueAt': '2024-07-30T00:00:00.000Z', 'submit...",False,9662ba01-18d4-4eba-8815-56bb17d0b2db,34bfb500-c388-47f6-9843-1b4dcf7ac70f
4,approved,2024-12-14,5.493717,2024-12-17,tree-planting,agroforest,partial,6000,Karengane 3,873dbf9d-ac28-49dd-936d-16c8a3d534ec,"[{'indicatorSlug': 'treeCover', 'yearOfAnalysi...",WDI-KARENGANE,"{'type': 'Polygon', 'coordinates': [[[29.49035...",[],"[{'dueAt': '2024-07-30T00:00:00.000Z', 'submit...",False,4581dd57-7270-4b0f-bae0-7fbf3d670299,34bfb500-c388-47f6-9843-1b4dcf7ac70f


## Clean Attributes

In [11]:
# Load the saved JSON file
with open(tm_api_pull_results_file, 'r') as file:
    project_results = json.load(file)

In [12]:
# Clean the csv and transform it into a dataframe
## Identifies and converts invalid plantstart and plantend dates to NaT
## Saves one copy of the polygon features csv to the terrafund-portfolio-analysis repo and one to the maxar-tools repo
clean_api = clean.process_tm_api_results(project_results,
                                         outfile1 = polygon_features_file,
                                         outfile2 = polygon_features_maxar_file)

  affected_rows.loc[is_feb_29] = non_leap_years
  affected_rows.loc[is_feb_29] = non_leap_years


Number of rows missing a 'plantstart' date: 1/16838
Number of rows missing a 'plantend' date: 1/16838
⚠️ Total rows missing start and end plant date: 0
⚠️ Total projects with at least 1 polygon missing 'plantstart': 1
⚠️ Total polygons missing 'plantstart': 1
There are 0 projects with ALL polygons missing plantstart.
Projects with SOME polygons missing plantstart: 1
