# Pull Polygons from TerraMatch API (Simple)

This notebook sets up the process to pull polygon geometries and metadata from the TerraMatch API.

Updated to include indicators and simplify preprocessing

In [1]:
import yaml
import pandas as pd
from tm_api_utils import pull_tm_api_data, patch_tm_api_data
from tqdm import tqdm
import json
import sys
from datetime import datetime
sys.path.append('../src/')
import api_utils as api
import process_tm_api_results as clean

## Set file paths

In [2]:
## PARAMS
# Naming convention
run_dir = 'ppc'
run_name = 'ppc_2025_batch1'

# Today's date
today = datetime.today().strftime('%Y-%m-%d') # Check computer date before running (if out of sync, run sudo hwclock -s)

In [3]:
## FILES
# Input Files
# List of approved projects
approved_projects_file = '../projects_all_approved_202502211226.csv'

# PPC 2025 Batch 1 Projects
ppc_batch1_file = '/home/darby/github_repos/tf-biophysical-monitoring/data/ppc/ppc_batch1_projects_20250501.csv'

# Output Files
# JSON file to store the results of the TM API pull; read it back in to clean the results (outfile, infile)
tm_api_pull_results_file = f'../data/{run_dir}/tm_api_response_prod_{run_name}_{today}.json'

# CSV file to save the results of the TM API pull
polygon_features_file = f'../data/{run_dir}/tm_api_{run_name}_{today}.csv'

## Set up token & API URL

In [4]:
# Set up token access
auth_path = '../secrets.yaml'
with open(auth_path) as auth_file:
    auth = yaml.safe_load(auth_file)
headers = {
    'Authorization': f"Bearer {auth['access_token']}"
    }

In [5]:
# TerraMatch API URLs
staging_url = "https://api-staging.terramatch.org/research/v3/sitePolygons?" # Use for testing queries
prod_url = "https://api.terramatch.org/research/v3/sitePolygons?" # Use to pull data for analysis

## Create list of projects to pull

In [6]:
# Read in list of approved projects (2025-02-21)
full = pd.read_csv(approved_projects_file)

In [7]:
# Create lists of projects by Cohort (and split cohort 1 into projects within the TF landscapes and outside of the TF landscapes)
cohort1 = full[full['cohort'] == 'terrafund']
cohort1_landscapes = cohort1[cohort1['country'].isin(['BI', 'CD', 'RW', 'KE', 'GH'])]
cohort1_non_landscapes = cohort1[~cohort1['country'].isin(['BI', 'CD', 'RW', 'KE', 'GH'])]
cohort2 = full[full['cohort'] == 'terrafund-landscapes']

ppc = full[full['cohort'] == 'ppc']

In [8]:
# Create a list of project ids to query
#ids = list(set(cohort1.project_id))
#len(ids)

# Create a short list of ids for testing 
ids = ['244eaf7e-e109-47b2-b84e-9ebe24508391', '24d8c9a2-b8ef-481c-930b-78c9aeaf239e', 'f17dd6cf-8187-4edd-895e-07013d4990c9', '1115dda6-0165-4099-b52f-0ac53595c3a9',
       '465f543e-d53a-4356-ae8d-9790aa42d30e', 'ad149677-7ee0-479c-8d23-aa8c3bf58532', '1977b649-908c-46c3-836d-f4f6485427c2', '6d9089aa-2a6f-4dc0-8064-32c5b67ffed6']
len(ids)

ppc_batch1 = pd.read_csv(ppc_batch1_file)
ids = list(ppc_batch1['project_id'].unique())
ids

['244eaf7e-e109-47b2-b84e-9ebe24508391',
 '24d8c9a2-b8ef-481c-930b-78c9aeaf239e',
 'f17dd6cf-8187-4edd-895e-07013d4990c9',
 '1115dda6-0165-4099-b52f-0ac53595c3a9',
 '465f543e-d53a-4356-ae8d-9790aa42d30e',
 'ad149677-7ee0-479c-8d23-aa8c3bf58532',
 '1977b649-908c-46c3-836d-f4f6485427c2',
 '6d9089aa-2a6f-4dc0-8064-32c5b67ffed6']

## Pull projects from TerraMatch API

In [9]:
results = api.pull_wrapper(prod_url, headers, ids, outfile=tm_api_pull_results_file)

Pulling Projects: 100%|██████████| 8/8 [00:20<00:00,  2.60s/project]


Results saved to ../data/ppc/tm_api_response_prod_ppc_2025_batch1_2025-05-30.json


## Parse and save the API output

In [10]:
# Load the saved JSON file
with open(tm_api_pull_results_file, 'r') as file:
    results = json.load(file)

In [11]:
# Convert the JSON output into a dataframe with selected fields
results_df = api.parse_tm_api_results(results, outfile = polygon_features_file, parse_indicators=True)

In [12]:
results_df.head(2)

Unnamed: 0,project_id,poly_id,site_id,geometry,plantstart,plantend,practice,target_sys,dist,project_phase,tree_cover_loss,tree_cover_loss_fires,restoration_by_strategy,restoration_by_land_use
0,244eaf7e-e109-47b2-b84e-9ebe24508391,67931b60-9df4-45cd-818e-ff7ddfaf11fc,b9bdfd4f-7eaa-41c9-b0b6-761d3b449628,"{'type': 'Polygon', 'coordinates': [[[-45.7849...",2022-03-15,,tree-planting,silvopasture,full,,"{'indicatorSlug': 'treeCoverLoss', 'yearOfAnal...","{'indicatorSlug': 'treeCoverLossFires', 'yearO...","{'indicatorSlug': 'restorationByStrategy', 'ye...","{'indicatorSlug': 'restorationByLandUse', 'yea..."
1,244eaf7e-e109-47b2-b84e-9ebe24508391,ecf4d885-4b74-45f0-b6b0-21cc82820fa2,060e7825-e8f6-4fc6-8879-c387e5431194,"{'type': 'Polygon', 'coordinates': [[[-45.6703...",2022-03-15,,tree-planting,agroforest,full,,"{'indicatorSlug': 'treeCoverLoss', 'yearOfAnal...","{'indicatorSlug': 'treeCoverLossFires', 'yearO...","{'indicatorSlug': 'restorationByStrategy', 'ye...","{'indicatorSlug': 'restorationByLandUse', 'yea..."
