# Pull PPC Polygons from TerraMatch API

This notebook sets up the process to pull PPC polygon geometries and metadata from the TerraMatch API.

In [1]:
import yaml
import pandas as pd
from tm_api_utils import pull_tm_api_data, patch_tm_api_data
from tqdm import tqdm
import json
import sys
from datetime import datetime
sys.path.append('../src/')
import api_utils as api
import process_tm_api_results as clean
import geospatial_utils_NEW as geo

## Set file paths

In [2]:
# Naming convention
run_name = 'ppc_2025_tree_count_elig'
#run_dir = 'ppc_batch2'

# Today's date
today = datetime.today().strftime('%Y-%m-%d') # Check computer date before running

## Input Files
# List of all approved projects on TerraMatch
approved_projects_file = '../projects_all_approved_202502211226.csv'

# PPC Batch 2 Project List
batch2_projects = "../data/ppc/ppc_2025_batch2_project_list.csv"

# PPC Prospective Tree Count (Group 1) Projects
#tree_count_group1_file = '/home/darby/github_repos/tf-biophysical-monitoring/data/ppc/ppc_tree_count_projects_group1_20250509.csv'
tree_count_file = '~/github_repos/tf-biophysical-monitoring/data/ppc/ppc_2025_potential_tree_count_projects_2025-07-16.csv'

## Output Files
# A JSON file that stores the results of the TM API pull; we'll read it back in to clean the results (outfile, infile)
tm_api_pull_results_file = f'/home/darby/github_repos/tf-biophysical-monitoring/data/ppc/tm_api_response_prod_{run_name}_{today}.json'

# The cleaned polygon features csv
polygon_features_file = f'/home/darby/github_repos/tf-biophysical-monitoring/data/ppc/tm_api_{run_name}_{today}.csv' 

## Read in files

In [3]:
# List of all approved projects on TerraMatch
approved_projects_df = pd.read_csv(approved_projects_file)

# PPC Prospective Tree Count (Group 1) Projects
#tree_count_df = pd.read_csv(tree_count_group1_file)
tree_count_df = pd.read_csv(tree_count_file)

# PPC 2025 Batch 2 Projects
#batch2_projects_df = pd.read_csv(batch2_projects)

## Set up token and API URL

In [5]:
# Set up token access
auth_path = '../secrets.yaml'
with open(auth_path) as auth_file:
    auth = yaml.safe_load(auth_file)
headers = {
    'Authorization': f"Bearer {auth['access_token']}"
    }

In [7]:
# TerraMatch API URLs
staging_url = "https://api-staging.terramatch.org/research/v3/sitePolygons?" # Use for testing queries
prod_url = "https://api.terramatch.org/research/v3/sitePolygons?" # Use to pull data for analysis

## Create list of projects to pull

#### Pull Projects from group list (Batch 1, prospective tree count group 1, etc.)

In [8]:
# Make a list of the unique project_ids from the Batch 1 projects
#tree_count_proj_ids = list(tree_count_df.project_id.unique())
#batch2_proj_ids = list(batch2_project_list['project_id'].unique())
# batch2_proj_ids = ['02b3119e-9505-4dba-b58d-f2a967b71ef9', '5e8a3c5e-7a28-4ff4-be07-f950361f56b2', '5bb542b2-0efb-4b52-841f-2b5898f533b8',
#                     'ad149677-7ee0-479c-8d23-aa8c3bf58532', '7e7d390b-1894-4a1b-acc2-c531f213c1ca', 'd2c2a1fe-c5e8-435a-b865-00dce7a9809f',
#                     'c8ef8d8e-a75a-46f4-88d4-8057ed5a50f8', 'e4108d7a-58d8-4604-8dd8-2f95c9c181d5']

# Potential tree count projects list from Asana: https://app.asana.com/1/25496124013636/project/1208493878648584/task/1210412708479452?focus=true
tree_count_proj_ids = list(tree_count_df['project_id'].unique())
tree_count_proj_ids

['244eaf7e-e109-47b2-b84e-9ebe24508391',
 '5e8a3c5e-7a28-4ff4-be07-f950361f56b2',
 'e4108d7a-58d8-4604-8dd8-2f95c9c181d5',
 'd2c2a1fe-c5e8-435a-b865-00dce7a9809f']

#### OR Pull projects from list of all approved polygons

In [24]:
# Filter the list of all approved projects by cohort ('ppc'), project_id (the Batch 1 list), 
ppc = approved_projects_df[approved_projects_df['cohort'] == 'ppc']

# Filter to just the batch 1 projects list
batch2 = ppc[ppc['project_id'].isin(batch2_proj_ids)]

## Pull polygons from TM API

In [9]:
results = api.pull_wrapper(prod_url, headers, tree_count_proj_ids, outfile=tm_api_pull_results_file)

Pulling Projects: 100%|██████████| 4/4 [00:06<00:00,  1.72s/project]

Results saved to /home/darby/github_repos/tf-biophysical-monitoring/data/ppc/tm_api_response_prod_ppc_2025_tree_count_elig_2025-07-16.json





In [10]:
df = pd.DataFrame(results)

In [11]:
print(len(df))
print(f"df has {df.project_id.nunique()} unique projects")
print(f"df has {df.poly_id.nunique()} unique polygons")
df['project_id'].value_counts()
df.head(2)

201
df has 4 unique projects
df has 201 unique polygons


Unnamed: 0,status,polygonUuid,plantStart,calcArea,lat,long,practice,targetSys,distr,numTrees,...,projectId,projectShortName,indicators,siteName,geometry,establishmentTreeSpecies,reportingPeriods,lightResource,poly_id,project_id
0,approved,77aca476-0d2b-4d73-b792-1e0853a2aa07,2022-03-15,2.771125,-2.560058,-45.786027,tree-planting,silvopasture,full,,...,244eaf7e-e109-47b2-b84e-9ebe24508391,,"[{'indicatorSlug': 'treeCover', 'yearOfAnalysi...",ALimpa,"{'type': 'Polygon', 'coordinates': [[[-45.7849...","[{'name': 'Psidium guajava', 'amount': 0}, {'n...","[{'dueAt': '2022-04-01T12:00:00.000Z', 'submit...",False,67931b60-9df4-45cd-818e-ff7ddfaf11fc,244eaf7e-e109-47b2-b84e-9ebe24508391
1,approved,475eaf2f-55c8-4f59-b762-1e57c8b84052,2022-03-15,1.10342,-2.559621,-45.670952,tree-planting,agroforest,full,,...,244eaf7e-e109-47b2-b84e-9ebe24508391,,"[{'indicatorSlug': 'treeCover', 'yearOfAnalysi...",QuadraXXIII,"{'type': 'Polygon', 'coordinates': [[[-45.6703...","[{'name': 'Psidium guajava', 'amount': 0}, {'n...","[{'dueAt': '2022-04-01T12:00:00.000Z', 'submit...",False,ecf4d885-4b74-45f0-b6b0-21cc82820fa2,244eaf7e-e109-47b2-b84e-9ebe24508391


In [13]:
# Check for NA values
#df.isna().sum()

## Clean attributes and save as csv

In [14]:
# Load the saved JSON file
with open(tm_api_pull_results_file, 'r') as file:
    project_results = json.load(file)

In [15]:
# Clean the csv and transform it into a dataframe
## Identifies and converts invalid plantstart and plantend dates to NaT
## Saves one copy of the polygon features csv to the terrafund-portfolio-analysis repo and one to the maxar-tools repo
clean_api = clean.process_tm_api_results(project_results,
                                         '2021-01-01',
                                         outfile1 = polygon_features_file,
                                         outfile2 = None)

Number of rows missing a 'plantstart' date: 0/201
Number of rows missing a 'plantend' date: 201/201
⚠️ Total rows missing start and end plant date: 0
⚠️ Total projects with at least 1 polygon missing 'plantstart': 0
⚠️ Total polygons missing 'plantstart': 0
There are 0 projects with ALL polygons missing plantstart.
Projects with SOME polygons missing plantstart: 0


  affected_rows.loc[is_feb_29] = non_leap_years
  affected_rows.loc[is_feb_29] = non_leap_years


#### Check Processed CSV

In [40]:
tc_df = pd.read_csv('/home/darby/github_repos/tf-biophysical-monitoring/data/ppc/tm_api_ppc_tree_count_group1_2025-05-09.csv')

In [41]:
print(len(tc_df))
print(f"df has {tc_df.project_id.nunique()} unique projects")
print(f"df has {tc_df.poly_id.nunique()} unique polygons")
tc_df['project_id'].value_counts()
tc_df.head()

196
df has 4 unique projects
df has 196 unique polygons


Unnamed: 0,project_id,poly_id,site_id,geometry,plantstart,plantend,practice,target_sys,dist,project_phase
0,244eaf7e-e109-47b2-b84e-9ebe24508391,67931b60-9df4-45cd-818e-ff7ddfaf11fc,b9bdfd4f-7eaa-41c9-b0b6-761d3b449628,"{'type': 'Polygon', 'coordinates': [[[-45.7849...",2022-03-15,,tree-planting,silvopasture,full,
1,244eaf7e-e109-47b2-b84e-9ebe24508391,ecf4d885-4b74-45f0-b6b0-21cc82820fa2,060e7825-e8f6-4fc6-8879-c387e5431194,"{'type': 'Polygon', 'coordinates': [[[-45.6703...",2022-03-15,,tree-planting,agroforest,full,
2,244eaf7e-e109-47b2-b84e-9ebe24508391,b843c681-acb8-4e71-a488-df238e875766,b9bdfd4f-7eaa-41c9-b0b6-761d3b449628,"{'type': 'Polygon', 'coordinates': [[[-45.7782...",2022-03-15,,tree-planting,silvopasture,full,
3,244eaf7e-e109-47b2-b84e-9ebe24508391,0fb77a06-012a-4743-a5f2-2ca8eb40abed,0f8a4317-b4c1-4c89-86a0-ef714e334e1f,"{'type': 'Polygon', 'coordinates': [[[-45.8229...",2022-03-15,,tree-planting,agroforest,full,
4,244eaf7e-e109-47b2-b84e-9ebe24508391,bb119f02-dbc3-4cf4-a688-b248107f3b18,0f8a4317-b4c1-4c89-86a0-ef714e334e1f,"{'type': 'Polygon', 'coordinates': [[[-45.8486...",2022-03-15,,tree-planting,agroforest,full,


In [43]:
tc_df.isna().sum()

project_id         0
poly_id            0
site_id            0
geometry           0
plantstart         0
plantend         190
practice           0
target_sys         0
dist               0
project_phase    196
dtype: int64