# Pull Polygons from TerraMatch API

This notebook sets up the process to pull polygon geometries and metadata from the TerraMatch API.

In [1]:
import yaml
import pandas as pd
from tm_api_utils import pull_tm_api_data, patch_tm_api_data
from tqdm import tqdm
import json
import sys
sys.path.append('../src/')
import api_utils as api
import process_tm_api_results as clean

## Set file paths

In [2]:
# Files
approved_projects_file = '../projects_all_approved_202502211226.csv' # List of approved projects (infile)
tm_api_pull_results_file = '../data/tf_cohort1/tm_api_response_prod.json' # Save a JSON file that stores the results of the TM API pull; read it back in to clean the results (outfile, infile)
polygon_features_file = '../data/tf_cohort1/tm_api_cohort1_2025-04-01.csv' # Save the cleaned polygon features csv in the terrafund-portfolio-analyses repo (outfile)
polygon_features_maxar_file = '/home/darby/github_repos/maxar-tools/data/tf_cohort1/polygon_data/tm_api_cohort1_2025-04-01.csv' # Save the cleaned polygon features csv in the maxar-tools repo (outfile)

## Set up token and API URL

In [3]:
# Set up token access
auth_path = '../secrets.yaml'
with open(auth_path) as auth_file:
    auth = yaml.safe_load(auth_file)
headers = {
    'Authorization': f"Bearer {auth['access_token']}"
    }

In [4]:
# TerraMatch API URLs
staging_url = "https://api-staging.terramatch.org/research/v3/sitePolygons?" # Use for testing queries
prod_url = "https://api.terramatch.org/research/v3/sitePolygons?" # Use to pull data for analysis

## Create lists of projects to pull

In [5]:
# Read in list of approved projects (2025-02-21)
full = pd.read_csv(approved_projects_file)
full.shape

(282, 10)

In [6]:
# Create lists of projects by Cohort (and split cohort 1 into projects within the TF landscapes and outside of the TF landscapes)
cohort1 = full[full['cohort'] == 'terrafund']
cohort1_landscapes = cohort1[cohort1['country'].isin(['BI', 'CD', 'RW', 'KE', 'GH'])]
cohort1_non_landscapes = cohort1[~cohort1['country'].isin(['BI', 'CD', 'RW', 'KE', 'GH'])]
cohort2 = full[full['cohort'] == 'terrafund-landscapes']

In [8]:
# Create a list of project ids to query
ids = list(set(cohort1.project_id))

# Create a short list of ids for testing
ids = ids[1:3]
ids

['449adf55-f6f8-4f17-97d3-ab6f6bf6676d',
 'cf106374-3dd4-401d-80ba-25b70247381a']

## Pull polygons from TM API

In [9]:
results = api.pull_wrapper(prod_url, headers, ids, outfile=tm_api_pull_results_file)

Pulling Projects: 100%|██████████| 2/2 [00:05<00:00,  2.75s/project]

Results saved to ../data/tf_cohort1/tm_api_response_prod.json





In [10]:
df = pd.DataFrame(results)

In [11]:
print(len(df))
df.head()

7


Unnamed: 0,status,plantStart,calcArea,plantEnd,practice,targetSys,distr,numTrees,name,siteId,indicators,siteName,geometry,establishmentTreeSpecies,reportingPeriods,lightResource,poly_id,project_id
0,approved,2023-06-20,36.080068,2024-09-30,tree-planting,agroforest,full,10450.0,Parcelle Pissa 1,c7da0650-dcbb-49b4-831c-af5da6fb9a96,"[{'indicatorSlug': 'treeCover', 'yearOfAnalysi...",Pissa Centre - EEDD,"{'type': 'Polygon', 'coordinates': [[[18.17574...",[],"[{'dueAt': '2022-09-30T00:00:00.000Z', 'submit...",False,0cf9fc3e-43d5-424d-a998-f66cc34e86b0,449adf55-f6f8-4f17-97d3-ab6f6bf6676d
1,approved,2023-06-15,25.381693,2024-09-30,tree-planting,agroforest,full,6000.0,Parcelle Sakoulou 1,39aa58ad-4dfe-4c1b-8636-e0698b44b1e0,"[{'indicatorSlug': 'treeCover', 'yearOfAnalysi...",Site de grande culture de SAKOULOU - EEDD,"{'type': 'Polygon', 'coordinates': [[[18.24070...",[],"[{'dueAt': '2023-01-31T00:00:00.000Z', 'submit...",False,02faceea-68cb-47c5-b25a-e39771c9e8d9,449adf55-f6f8-4f17-97d3-ab6f6bf6676d
2,approved,2023-06-20,47.686425,2024-09-30,tree-planting,agroforest,full,19000.0,Parcelle Boyali 1,c003b109-d89f-4fce-90a7-ec83a45f2492,"[{'indicatorSlug': 'treeCover', 'yearOfAnalysi...",Boyali - EEDD,"{'type': 'Polygon', 'coordinates': [[[18.22819...",[],"[{'dueAt': '2023-01-31T00:00:00.000Z', 'submit...",False,fdfc1a6b-3ab3-4642-b91b-9584d4207cf8,449adf55-f6f8-4f17-97d3-ab6f6bf6676d
3,approved,2024-06-15,96.187621,2024-09-30,tree-planting,agroforest,full,36000.0,Parcelle Boyali,c003b109-d89f-4fce-90a7-ec83a45f2492,"[{'indicatorSlug': 'treeCover', 'yearOfAnalysi...",Boyali - EEDD,"{'type': 'Polygon', 'coordinates': [[[18.22240...",[],"[{'dueAt': '2023-01-31T00:00:00.000Z', 'submit...",False,2210ca48-6683-4c8b-8967-bf4647c4651b,449adf55-f6f8-4f17-97d3-ab6f6bf6676d
4,approved,2024-06-15,95.372916,2024-09-30,tree-planting,agroforest,full,45000.0,Parcelle Sakoulou,39aa58ad-4dfe-4c1b-8636-e0698b44b1e0,"[{'indicatorSlug': 'treeCover', 'yearOfAnalysi...",Site de grande culture de SAKOULOU - EEDD,"{'type': 'Polygon', 'coordinates': [[[18.24229...",[],"[{'dueAt': '2023-01-31T00:00:00.000Z', 'submit...",False,31ab5498-d4c9-4f78-9f88-c242daf97aa8,449adf55-f6f8-4f17-97d3-ab6f6bf6676d


## Clean Attributes

In [12]:
# Load the saved JSON file
with open(tm_api_pull_results_file, 'r') as file:
    project_results = json.load(file)

In [13]:
# Clean the csv and transform it into a dataframe
## Identifies and converts invalid plantstart and plantend dates to NaT
## Saves one copy of the polygon features csv to the terrafund-portfolio-analysis repo and one to the maxar-tools repo
clean_api = clean.process_tm_api_results(project_results,
                                         outfile1 = polygon_features_file,
                                         outfile2 = polygon_features_maxar_file)

Number of rows missing a 'plantstart' date: 0/7
Number of rows missing a 'plantend' date: 1/7
⚠️ Total rows missing start and end plant date: 0
⚠️ Total projects with at least 1 polygon missing 'plantstart': 0
⚠️ Total polygons missing 'plantstart': 0
There are 0 projects with ALL polygons missing plantstart.
Projects with SOME polygons missing plantstart: 0


  affected_rows.loc[is_feb_29] = non_leap_years
  affected_rows.loc[is_feb_29] = non_leap_years
