# Pull Polygons from TerraMatch API (Simple)

This notebook sets up the process to pull polygon geometries and metadata from the TerraMatch API.

Updated to include indicators and simplify preprocessing

In [1]:
import yaml
import pandas as pd
from tm_api_utils import pull_tm_api_data, patch_tm_api_data
from tqdm import tqdm
import json
import sys
from datetime import datetime
sys.path.append('../src/')
import api_utils as api
import process_tm_api_results as clean

## Set file paths

In [2]:
## PARAMS
# Naming convention
run_name = 'ppc_test'
run_dir = 'test'

# Today's date
today = datetime.today().strftime('%Y-%m-%d') # Check computer date before running

In [3]:
## FILES
# List of approved projects
approved_projects_file = '../projects_all_approved_202502211226.csv'

# JSON file to store the results of the TM API pull; read it back in to clean the results (outfile, infile)
tm_api_pull_results_file = f'../data/{run_dir}/tm_api_response_prod_{run_name}_{today}.json'


## Set up token & API URL

In [4]:
# Set up token access
auth_path = '../secrets.yaml'
with open(auth_path) as auth_file:
    auth = yaml.safe_load(auth_file)
headers = {
    'Authorization': f"Bearer {auth['access_token']}"
    }

In [5]:
# TerraMatch API URLs
staging_url = "https://api-staging.terramatch.org/research/v3/sitePolygons?" # Use for testing queries
prod_url = "https://api.terramatch.org/research/v3/sitePolygons?" # Use to pull data for analysis

## Create list of projects to pull

In [6]:
# Read in list of approved projects (2025-02-21)
full = pd.read_csv(approved_projects_file)

In [7]:
# Create lists of projects by Cohort (and split cohort 1 into projects within the TF landscapes and outside of the TF landscapes)
cohort1 = full[full['cohort'] == 'terrafund']
cohort1_landscapes = cohort1[cohort1['country'].isin(['BI', 'CD', 'RW', 'KE', 'GH'])]
cohort1_non_landscapes = cohort1[~cohort1['country'].isin(['BI', 'CD', 'RW', 'KE', 'GH'])]
cohort2 = full[full['cohort'] == 'terrafund-landscapes']

ppc = full[full['cohort'] == 'ppc']

In [8]:
# Create a list of project ids to query
ids = list(set(ppc.project_id))
len(ids)

# Create a short list of ids for testing 
ids = ['465f543e-d53a-4356-ae8d-9790aa42d30e', '1977b649-908c-46c3-836d-f4f6485427c2']
ids = ['1977b649-908c-46c3-836d-f4f6485427c2']
ids

['1977b649-908c-46c3-836d-f4f6485427c2']

## Pull projects from TerraMatch API

In [9]:
results = api.pull_wrapper(prod_url, headers, ids, outfile=tm_api_pull_results_file)

Pulling Projects: 100%|██████████| 1/1 [00:00<00:00,  1.26project/s]

Results saved to ../data/test/tm_api_response_prod_ppc_test_2025-05-16.json





In [12]:
# Load the saved JSON file
with open(tm_api_pull_results_file, 'r') as file:
    project_results = json.load(file)

In [None]:
# DEFAULT_INDICATOR_MAPPING = {
#         'treeCover': 'tree_cover',
#         'treeCoverLoss': 'tree_cover_loss',
#         'treeCoverLossFires': 'tree_cover_loss_fires',
#         'restorationByStrategy': 'restoration_by_strat',
#         'restorationByLandUse': 'restoration_by_land_use'
#         }

In [None]:
# def parse_tm_api_results(results, outfile, parse_indicators=False, indicator_mapping=None):
#     """
#     Converts TerraMatch API results JSON into a structured DataFrame with selected fields.
    
#     Args:
#         results (list): Raw JSON results from the API (list of dicts)
#         outfile (str): Path to save cleaned CSV output
#         parse_indicators (bool): Include indicators columns in the final DataFrame
#         indicator_mapping (dict[str, str]): Required if parse_indicators = True. Dictionary used to map indicatorSlug names to desired column names. Keys should be the indicatorSlug keys
#           within the results dictionary. Values should be the desired column name in the final DataFrame.
#             Example:
#                 {
#                 'indicatorSlug': 'indicator_col_name',
#                 'treeCover': 'tree_cover',
#                 'treeCoverLoss': 'tree_cover_loss'
#                 }

#     Returns:
#         final_df (pd.DataFrame): Structured dataframe with selected fields 
#     """
#     extracted_data = []

#     # Iterate over each feature in the results JSON to extract polygon information
#     for feature in results: 
#         # Basic attributes
#         row_data = {
#             'project_id': feature.get('project_id'),
#             'poly_id': feature.get('poly_id'),
#             'site_id': feature.get('siteId'),
#             'geometry': feature.get('geometry'),
#             'plantstart': feature.get('plantStart'),
#             'plantend': feature.get('plantEnd'),
#             'practice': feature.get('practice'),
#             'target_sys': feature.get('targetSys'),
#             'dist': feature.get('distr'),
#             'project_phase': feature.get('projectPhase', '')  # default if missing
#         }

#         # Optionally parse the 'indicators' list into separate columns
#         if parse_indicators:
#             if indicator_mapping is None:
#                 indicator_mapping = DEFAULT_INDICATOR_MAPPING
#             elif not isinstance(indicator_mapping, dict):
#                 raise ValueError("indicator_mapping must be provided as a dictionary.")
            
#             # Get the value associated with the 'indicators' key
#             indicators = feature.get('indicators', [])
#             # For each indicator dicationary
#             for indicator in indicators:
#                 slug = indicator.get('indicatorSlug')
#                 if slug in indicator_mapping:
#                     col_name = indicator_mapping[slug]
#                     row_data[col_name] = indicator  # Keep full dictionary

#         extracted_data.append(row_data)

#     final_df = pd.DataFrame(extracted_data)
    
#     # Save results
#     final_df.to_csv(outfile, index=False)

#     return final_df

In [16]:
test = api.parse_tm_api_results(project_results, outfile=None, parse_indicators=True)
test

Unnamed: 0,project_id,poly_id,site_id,geometry,plantstart,plantend,practice,target_sys,dist,project_phase,tree_cover_loss,tree_cover_loss_fires,restoration_by_strategy,restoration_by_land_use
0,1977b649-908c-46c3-836d-f4f6485427c2,0e9b287a-fd4f-4eb1-8665-2e9e9cc269fc,a7b60544-61e1-42de-8a3e-8d88e459d1eb,"{'type': 'Polygon', 'coordinates': [[[-40.1244...",2024-12-01,2025-01-31,direct-seeding,natural-forest,partial,,"{'indicatorSlug': 'treeCoverLoss', 'yearOfAnal...","{'indicatorSlug': 'treeCoverLossFires', 'yearO...","{'indicatorSlug': 'restorationByStrategy', 'ye...","{'indicatorSlug': 'restorationByLandUse', 'yea..."
1,1977b649-908c-46c3-836d-f4f6485427c2,2005a901-3d5d-4591-805a-b765a82b995b,a7b60544-61e1-42de-8a3e-8d88e459d1eb,"{'type': 'Polygon', 'coordinates': [[[-40.1193...",2024-12-01,2025-01-31,direct-seeding,natural-forest,partial,,"{'indicatorSlug': 'treeCoverLoss', 'yearOfAnal...","{'indicatorSlug': 'treeCoverLossFires', 'yearO...","{'indicatorSlug': 'restorationByStrategy', 'ye...","{'indicatorSlug': 'restorationByLandUse', 'yea..."
