# Pull Polygons from TerraMatch API

This notebook sets up the process to pull polygon geometries and metadata from the TerraMatch API.

In [1]:
import yaml
import pandas as pd
from tm_api_utils import pull_tm_api_data, patch_tm_api_data
from tqdm import tqdm
import json
import sys
sys.path.append('../src/')
import api_utils as api
import process_tm_api_results as clean

## Params

In [2]:
polygon_feats = '../data/tm_api_TEST_2025-04-01.csv'
polygon_feats_maxar_repo = '../data/tm_api_TEST_copy_for_maxar_repo.csv' # change this once testing done


## Set up token and API URL

In [4]:
# Set up token access
auth_path = '../secrets.yaml'
with open(auth_path) as auth_file:
    auth = yaml.safe_load(auth_file)
headers = {
    'Authorization': f"Bearer {auth['access_token']}"
    }

In [6]:
# TerraMatch API URLs
staging_url = "https://api-staging.terramatch.org/research/v3/sitePolygons?" # Use for testing queries
prod_url = "https://api.terramatch.org/research/v3/sitePolygons?" # Use to pull data for analysis

## Create lists of projects to pull

In [7]:
# Read in list of approved projects (2025-02-21)
full = pd.read_csv('../projects_all_approved_202502211226.csv')
full.shape

(282, 10)

In [8]:
# Create lists of projects by Cohort (and split cohort 1 into projects within the TF landscapes and outside of the TF landscapes)
cohort1 = full[full['cohort'] == 'terrafund']
cohort1_landscapes = cohort1[cohort1['country'].isin(['BI', 'CD', 'RW', 'KE', 'GH'])]
cohort1_non_landscapes = cohort1[~cohort1['country'].isin(['BI', 'CD', 'RW', 'KE', 'GH'])]
cohort2 = full[full['cohort'] == 'terrafund-landscapes']

In [9]:
# Create a list of project ids to query
ids = list(set(cohort1.project_id))

# Create a short list of ids for testing
ids = ids[:10]
ids

['128c2ac7-dd30-4093-a079-6e9111aa3351',
 'cf16b937-a02b-4691-b816-28669ec348f2',
 '389aad5b-6577-4cea-bf9f-446dcfd94966',
 '9019106b-6e2d-4deb-97a5-2889f976a931',
 '16b297b3-30a3-4624-bcc9-4333919f66fc',
 '802bb88c-5eb5-4ce2-836f-19bc8e0ddfc4',
 '814fc561-d635-423d-ab9d-30ce02a81172',
 '246a0906-7e03-4b57-bf71-b697b333509f',
 '529e1bae-2187-473f-a2a3-17e577720aba',
 '33274073-8a4e-4eca-8b97-0e8da3833105']

## Pull polygons from TM API

In [10]:
results = api.pull_wrapper(prod_url, headers, ids, outfile='../data/tm_api_response_prod.json')

Pulling Projects: 100%|██████████| 10/10 [00:29<00:00,  2.96s/project]


Results saved to ../data/tm_api_response_prod.json


In [13]:
df = pd.DataFrame(results)

In [14]:
print(len(df))
df.head()

699


Unnamed: 0,status,plantStart,calcArea,plantEnd,practice,targetSys,distr,numTrees,name,siteId,indicators,siteName,geometry,establishmentTreeSpecies,reportingPeriods,lightResource,poly_id,project_id
0,approved,2022-05-05,31.778331,,tree-planting,agroforest,,,Feature 1,c99de88c-5b26-4e08-9ff0-25377d60a094,"[{'indicatorSlug': 'treeCover', 'yearOfAnalysi...",Mwenga Centre - CCAO,"{'type': 'Polygon', 'coordinates': [[[28.42796...",[],"[{'dueAt': '2023-01-31T00:00:00.000Z', 'submit...",False,88c1d83b-bb07-45c7-9b39-1c28e7978229,128c2ac7-dd30-4093-a079-6e9111aa3351
1,approved,2022-05-05,40.951329,,tree-planting,agroforest,,,Feature 5 (new),0c27bce9-6391-4292-b598-3b199acc4469,"[{'indicatorSlug': 'treeCover', 'yearOfAnalysi...",Kabukimba - CCAO,"{'type': 'Polygon', 'coordinates': [[[28.46915...",[],"[{'dueAt': '2023-01-31T00:00:00.000Z', 'submit...",False,5e7a0c9a-a54d-42e0-a7d9-a831683cc1af,128c2ac7-dd30-4093-a079-6e9111aa3351
2,approved,2022-05-05,177.997992,,tree-planting,agroforest,,,Feature 2,2f52ba7e-9e8f-46f7-92ec-2c143a508c25,"[{'indicatorSlug': 'treeCover', 'yearOfAnalysi...",Irangi - CCAO,"{'type': 'Polygon', 'coordinates': [[[28.51420...",[],"[{'dueAt': '2023-01-31T00:00:00.000Z', 'submit...",False,df3615e5-9439-45ee-8c82-15a439910c1f,128c2ac7-dd30-4093-a079-6e9111aa3351
3,approved,2022-05-05,59.226822,,tree-planting,agroforest,,,Feature 3 (new),f2a96acb-969d-42f4-a112-5394b093eb21,"[{'indicatorSlug': 'treeCover', 'yearOfAnalysi...",Kasika - CCAO,"{'type': 'Polygon', 'coordinates': [[[28.53504...",[],"[{'dueAt': '2023-01-31T00:00:00.000Z', 'submit...",False,0ae358bd-bc5c-497a-a986-22e105950985,128c2ac7-dd30-4093-a079-6e9111aa3351
4,approved,2022-05-05,137.410293,,tree-planting,agroforest,,,Feature 4 (new),7b617ba0-e9cc-49de-98ac-0bab24cfdcdc,"[{'indicatorSlug': 'treeCover', 'yearOfAnalysi...",Kalambi - CCAO,"{'type': 'Polygon', 'coordinates': [[[28.44113...",[],"[{'dueAt': '2023-01-31T00:00:00.000Z', 'submit...",False,33bdde42-022d-45ab-84b8-fb96a4e21c84,128c2ac7-dd30-4093-a079-6e9111aa3351


## Clean Attributes

In [11]:
# Load the saved JSON file
with open("../data/tm_api_response_prod.json", 'r') as file:
    project_results = json.load(file)

In [12]:
# Clean the csv and transform it into a dataframe
## Identifies and converts invalid plantstart and plantend dates to NaT
clean_api = clean.process_tm_api_results(project_results,
                                         outfile1 = polygon_feats,
                                         outfile2 = polygon_feats_maxar_repo)

Number of rows missing a 'plantstart' date: 48/699
Number of rows missing a 'plantend' date: 511/699
⚠️ Total rows missing start and end plant date: 0
⚠️ Total projects with at least 1 polygon missing 'plantstart': 2
⚠️ Total polygons missing 'plantstart': 48
There are 0 projects with ALL polygons missing plantstart.
Projects with SOME polygons missing plantstart: 2


  affected_rows.loc[is_feb_29] = non_leap_years
  affected_rows.loc[is_feb_29] = non_leap_years
