# Pull PPC Polygons from TerraMatch API

This notebook sets up the process to pull PPC polygon geometries and metadata from the TerraMatch API.

In [1]:
import yaml
import pandas as pd
from tm_api_utils import pull_tm_api_data, patch_tm_api_data
from tqdm import tqdm
import json
import sys
from datetime import datetime
sys.path.append('../src/')
import api_utils as api
import process_tm_api_results as clean
import geospatial_utils_NEW as geo

## Set file paths

In [2]:
# Naming convention
run_name = 'ppc_tree_count_group1'
#run_dir = 'ppc_batch1'

# Today's date
today = datetime.today().strftime('%Y-%m-%d') # Check computer date before running

## Input Files
# List of all approved projects on TerraMatch
approved_projects_file = '../projects_all_approved_202502211226.csv'

# PPC Prospective Tree Count (Group 1) Projects
tree_count_group1_file = '/home/darby/github_repos/tf-biophysical-monitoring/data/ppc/ppc_tree_count_projects_group1_20250509.csv'

## Output Files
# A JSON file that stores the results of the TM API pull; we'll read it back in to clean the results (outfile, infile)
tm_api_pull_results_file = f'/home/darby/github_repos/tf-biophysical-monitoring/data/ppc/tm_api_response_prod_{run_name}_{today}.json'

# The cleaned polygon features csv
polygon_features_file = f'/home/darby/github_repos/tf-biophysical-monitoring/data/ppc/tm_api_{run_name}_{today}.csv' 

## Read in files

In [3]:
# List of all approved projects on TerraMatch
approved_projects_df = pd.read_csv(approved_projects_file)

# PPC Prospective Tree Count (Group 1) Projects
tree_count_df = pd.read_csv(tree_count_group1_file)

## Set up token and API URL

In [4]:
# Set up token access
auth_path = '../secrets.yaml'
with open(auth_path) as auth_file:
    auth = yaml.safe_load(auth_file)
headers = {
    'Authorization': f"Bearer {auth['access_token']}"
    }

In [5]:
# TerraMatch API URLs
staging_url = "https://api-staging.terramatch.org/research/v3/sitePolygons?" # Use for testing queries
prod_url = "https://api.terramatch.org/research/v3/sitePolygons?" # Use to pull data for analysis

## Create list of projects to pull

#### Pull Projects from group list (Batch 1, prospective tree count group 1, etc.)

In [6]:
# Make a list of the unique project_ids from the Batch 1 projects
tree_count_proj_ids = list(tree_count_df.project_id.unique())

In [7]:
tree_count_proj_ids

['244eaf7e-e109-47b2-b84e-9ebe24508391',
 '5e8a3c5e-7a28-4ff4-be07-f950361f56b2',
 'e4108d7a-58d8-4604-8dd8-2f95c9c181d5',
 '1977b649-908c-46c3-836d-f4f6485427c2']

#### OR Pull projects from list of all approved polygons

In [None]:
# # Filter the list of all approved projects by cohort ('ppc'), project_id (the Batch 1 list), 
# ppc = approved_projects_df[approved_projects_df['cohort'] == 'ppc']

# # Filter to just the batch 1 projects list
# batch1 = ppc[ppc['project_id'].isin(batch_1_proj_ids)]

## Pull polygons from TM API

In [8]:
results = api.pull_wrapper(prod_url, headers, tree_count_proj_ids, outfile=tm_api_pull_results_file)

Pulling Projects: 100%|██████████| 4/4 [00:11<00:00,  2.85s/project]

Results saved to /home/darby/github_repos/tf-biophysical-monitoring/data/ppc/tm_api_response_prod_ppc_tree_count_group1_2025-05-09.json





In [9]:
df = pd.DataFrame(results)

In [11]:
print(len(df))
print(f"df has {df.project_id.nunique()} unique projects")
print(df.project_id.unique())
print(f"df has {df.poly_id.nunique()} unique polygons")
df['project_id'].value_counts()
df.head(2)

196
df has 4 unique projects
['244eaf7e-e109-47b2-b84e-9ebe24508391'
 '5e8a3c5e-7a28-4ff4-be07-f950361f56b2'
 'e4108d7a-58d8-4604-8dd8-2f95c9c181d5'
 '1977b649-908c-46c3-836d-f4f6485427c2']
df has 196 unique polygons


Unnamed: 0,status,plantStart,calcArea,plantEnd,practice,targetSys,distr,numTrees,name,siteId,projectId,indicators,siteName,geometry,establishmentTreeSpecies,reportingPeriods,lightResource,poly_id,project_id
0,approved,2022-03-15,2.771125,,tree-planting,silvopasture,full,,34-4 (new),b9bdfd4f-7eaa-41c9-b0b6-761d3b449628,244eaf7e-e109-47b2-b84e-9ebe24508391,"[{'indicatorSlug': 'treeCoverLoss', 'yearOfAna...",ALimpa,"{'type': 'Polygon', 'coordinates': [[[-45.7849...","[{'name': 'Psidium guajava', 'amount': None}, ...","[{'dueAt': '2022-04-01T12:00:00.000Z', 'submit...",False,67931b60-9df4-45cd-818e-ff7ddfaf11fc,244eaf7e-e109-47b2-b84e-9ebe24508391
1,approved,2022-03-15,1.10342,,tree-planting,agroforest,full,,25-15 (new),060e7825-e8f6-4fc6-8879-c387e5431194,244eaf7e-e109-47b2-b84e-9ebe24508391,"[{'indicatorSlug': 'treeCoverLoss', 'yearOfAna...",QuadraXXIII,"{'type': 'Polygon', 'coordinates': [[[-45.6703...","[{'name': 'Psidium guajava', 'amount': None}, ...","[{'dueAt': '2022-04-01T12:00:00.000Z', 'submit...",False,ecf4d885-4b74-45f0-b6b0-21cc82820fa2,244eaf7e-e109-47b2-b84e-9ebe24508391


In [12]:
df[df['poly_id'] == '1270c4aa-6121-414d-bcc7-3c02909b72ae']

Unnamed: 0,status,plantStart,calcArea,plantEnd,practice,targetSys,distr,numTrees,name,siteId,projectId,indicators,siteName,geometry,establishmentTreeSpecies,reportingPeriods,lightResource,poly_id,project_id
86,approved,2021-11-01,25.482124,,assisted-natural-regeneration,natural-forest,full,0.0,3501,cf9808cf-2737-4cd9-9d19-cc2219f6fbe5,5e8a3c5e-7a28-4ff4-be07-f950361f56b2,"[{'indicatorSlug': 'treeCoverLoss', 'yearOfAna...",CES Rioterra - RESEX D,"{'type': 'Polygon', 'coordinates': [[[-62.2603...","[{'name': '', 'amount': None}]","[{'dueAt': '2023-02-03T05:00:00.000Z', 'submit...",False,1270c4aa-6121-414d-bcc7-3c02909b72ae,5e8a3c5e-7a28-4ff4-be07-f950361f56b2


## Clean attributes and save as csv

In [13]:
# Load the saved JSON file
with open(tm_api_pull_results_file, 'r') as file:
    project_results = json.load(file)

In [16]:
# Clean the csv and transform it into a dataframe
## Identifies and converts invalid plantstart and plantend dates to NaT
## Saves one copy of the polygon features csv to the terrafund-portfolio-analysis repo and one to the maxar-tools repo
clean_api = clean.process_tm_api_results(project_results,
                                         '2021-01-01',
                                         outfile1 = polygon_features_file,
                                         outfile2 = None)

Number of rows missing a 'plantstart' date: 0/196
Number of rows missing a 'plantend' date: 190/196
⚠️ Total rows missing start and end plant date: 0
⚠️ Total projects with at least 1 polygon missing 'plantstart': 0
⚠️ Total polygons missing 'plantstart': 0
There are 0 projects with ALL polygons missing plantstart.
Projects with SOME polygons missing plantstart: 0


  affected_rows.loc[is_feb_29] = non_leap_years
  affected_rows.loc[is_feb_29] = non_leap_years


In [17]:
tc_df = pd.read_csv('/home/darby/github_repos/tf-biophysical-monitoring/data/ppc/tm_api_ppc_tree_count_group1_2025-05-09.csv')

In [18]:
print(len(tc_df))
print(f"df has {tc_df.project_id.nunique()} unique projects")
print(f"df has {tc_df.poly_id.nunique()} unique polygons")
tc_df['project_id'].value_counts()
tc_df.head()

196
df has 4 unique projects
df has 196 unique polygons


Unnamed: 0,project_id,poly_id,site_id,geometry,plantstart,plantend,practice,target_sys,dist,project_phase
0,244eaf7e-e109-47b2-b84e-9ebe24508391,67931b60-9df4-45cd-818e-ff7ddfaf11fc,b9bdfd4f-7eaa-41c9-b0b6-761d3b449628,"{'type': 'Polygon', 'coordinates': [[[-45.7849...",2022-03-15,,tree-planting,silvopasture,full,
1,244eaf7e-e109-47b2-b84e-9ebe24508391,ecf4d885-4b74-45f0-b6b0-21cc82820fa2,060e7825-e8f6-4fc6-8879-c387e5431194,"{'type': 'Polygon', 'coordinates': [[[-45.6703...",2022-03-15,,tree-planting,agroforest,full,
2,244eaf7e-e109-47b2-b84e-9ebe24508391,b843c681-acb8-4e71-a488-df238e875766,b9bdfd4f-7eaa-41c9-b0b6-761d3b449628,"{'type': 'Polygon', 'coordinates': [[[-45.7782...",2022-03-15,,tree-planting,silvopasture,full,
3,244eaf7e-e109-47b2-b84e-9ebe24508391,0fb77a06-012a-4743-a5f2-2ca8eb40abed,0f8a4317-b4c1-4c89-86a0-ef714e334e1f,"{'type': 'Polygon', 'coordinates': [[[-45.8229...",2022-03-15,,tree-planting,agroforest,full,
4,244eaf7e-e109-47b2-b84e-9ebe24508391,bb119f02-dbc3-4cf4-a688-b248107f3b18,0f8a4317-b4c1-4c89-86a0-ef714e334e1f,"{'type': 'Polygon', 'coordinates': [[[-45.8486...",2022-03-15,,tree-planting,agroforest,full,


In [19]:
tc_df.isna().sum()

project_id         0
poly_id            0
site_id            0
geometry           0
plantstart         0
plantend         190
practice           0
target_sys         0
dist               0
project_phase    196
dtype: int64

In [20]:
tc_df[tc_df['plantstart'].isna()]

Unnamed: 0,project_id,poly_id,site_id,geometry,plantstart,plantend,practice,target_sys,dist,project_phase


In [21]:
tc_df[tc_df['project_id'] == '5e8a3c5e-7a28-4ff4-be07-f950361f56b2']

Unnamed: 0,project_id,poly_id,site_id,geometry,plantstart,plantend,practice,target_sys,dist,project_phase
84,5e8a3c5e-7a28-4ff4-be07-f950361f56b2,72572a5b-bd1d-457a-b3cc-4921ffa2df13,62f04628-9b2a-4b74-bb1c-b722569f4a84,"{'type': 'Polygon', 'coordinates': [[[-64.0688...",2022-12-28,,"assisted-natural-regeneration,tree-planting",natural-forest,full,
85,5e8a3c5e-7a28-4ff4-be07-f950361f56b2,b7781db4-c2a9-4243-bee5-23633b410bdc,5157ea50-23ae-4d55-886a-5781375cb923,"{'type': 'Polygon', 'coordinates': [[[-64.0755...",2022-12-28,,tree-planting,natural-forest,full,
86,5e8a3c5e-7a28-4ff4-be07-f950361f56b2,1270c4aa-6121-414d-bcc7-3c02909b72ae,cf9808cf-2737-4cd9-9d19-cc2219f6fbe5,"{'type': 'Polygon', 'coordinates': [[[-62.2603...",2021-11-01,,assisted-natural-regeneration,natural-forest,full,
87,5e8a3c5e-7a28-4ff4-be07-f950361f56b2,8c54dc85-1209-46a2-9255-f0eea7ce600b,24b0b754-cf5e-42e5-9092-677db722ab22,"{'type': 'Polygon', 'coordinates': [[[-62.2721...",2021-11-01,,"assisted-natural-regeneration,tree-planting",natural-forest,"full,partial",
88,5e8a3c5e-7a28-4ff4-be07-f950361f56b2,211b29b6-ab19-4e9e-8367-39a7a65728b5,ecbe807b-a6d1-4b72-b540-2e2d1c484020,"{'type': 'Polygon', 'coordinates': [[[-62.2760...",2021-11-01,2021-11-01,assisted-natural-regeneration,natural-forest,full,
89,5e8a3c5e-7a28-4ff4-be07-f950361f56b2,cab336c2-cdc5-4b8e-850c-15c4c84ea99b,c41450e7-55e2-4ee8-ad57-6700bff61ba0,"{'type': 'Polygon', 'coordinates': [[[-62.3243...",2021-12-01,2021-12-31,"assisted-natural-regeneration, tree-planting",natural-forest,full,
90,5e8a3c5e-7a28-4ff4-be07-f950361f56b2,a80892e5-0d0c-4481-bb17-cd4a25df6c52,c41450e7-55e2-4ee8-ad57-6700bff61ba0,"{'type': 'Polygon', 'coordinates': [[[-62.3333...",2021-12-01,2021-12-31,tree-planting,natural-forest,partial,


In [22]:
tc_df[tc_df['poly_id'] == '1270c4aa-6121-414d-bcc7-3c02909b72ae']

Unnamed: 0,project_id,poly_id,site_id,geometry,plantstart,plantend,practice,target_sys,dist,project_phase
86,5e8a3c5e-7a28-4ff4-be07-f950361f56b2,1270c4aa-6121-414d-bcc7-3c02909b72ae,cf9808cf-2737-4cd9-9d19-cc2219f6fbe5,"{'type': 'Polygon', 'coordinates': [[[-62.2603...",2021-11-01,,assisted-natural-regeneration,natural-forest,full,
