# Pull Polygons from TerraMatch API (Simple)

This notebook sets up the process to pull polygon geometries and metadata from the TerraMatch API.

Updated to include indicators and simplify preprocessing

In [None]:
import yaml
import pandas as pd
from tm_api_utils import pull_tm_api_data, patch_tm_api_data
from tqdm import tqdm
import json
import sys
from datetime import datetime
sys.path.append('../src/')
import api_utils as api
import process_tm_api_results as clean # unused in this notebook
import geospatial_utils_NEW as geo

## Set file paths

In [None]:
## PARAMS
# Naming convention
run_dir = 'test'
run_name = 'filters_test'

# Today's date
today = datetime.today().strftime('%Y-%m-%d') # Check computer date before running (if out of sync, run sudo hwclock -s)

In [None]:
## FILES
# Input Files
# List of approved projects
approved_projects_file = '../projects_all_approved_202502211226.csv'

# PPC 2025 Batch 1 Projects
#ppc_batch1_file = '/home/darby/github_repos/tf-biophysical-monitoring/data/ppc/ppc_batch1_projects_20250501.csv'

# Maxar spend-down tracker
maxar_spenddown_tracker = '../data/maxar_spenddown/maxar_spend_down_tracker.csv' 

# Output Files
# JSON file to store the results of the TM API pull; read it back in to clean the results (outfile, infile)
tm_api_pull_results_file = f'../data/{run_dir}/tm_api_response_prod_{run_name}_{today}.json'

# CSV file to save the results of the TM API pull
polygon_features_file = f'../data/{run_dir}/tm_api_{run_name}_{today}.csv'

## Set up token & API URL

In [None]:
# Set up token access
auth_path = '../secrets.yaml'
with open(auth_path) as auth_file:
    auth = yaml.safe_load(auth_file)
headers = {
    'Authorization': f"Bearer {auth['access_token']}"
    }

In [None]:
# TerraMatch API URLs
staging_url = "https://api-staging.terramatch.org/research/v3/sitePolygons?" # Use for testing queries
prod_url = "https://api.terramatch.org/research/v3/sitePolygons?" # Use to pull data for analysis

## Create list of projects to pull

In [None]:
# Read in list of approved projects (2025-02-21)
full = pd.read_csv(approved_projects_file)

In [None]:
# # Create lists of projects by Cohort (and split cohort 1 into projects within the TF landscapes and outside of the TF landscapes)
# cohort1 = full[full['cohort'] == 'terrafund']
# cohort1_landscapes = cohort1[cohort1['country'].isin(['BI', 'CD', 'RW', 'KE', 'GH'])]
# cohort1_non_landscapes = cohort1[~cohort1['country'].isin(['BI', 'CD', 'RW', 'KE', 'GH'])]
# cohort2 = full[full['cohort'] == 'terrafund-landscapes']

# ppc = full[full['cohort'] == 'ppc']

In [None]:
# # Create a list of project ids to query
# ids = list(set(cohort2.project_id))
# #ids = ids[:3]
# print(len(ids))

# Create a short list of ids for testing 
# ids = ['449adf55-f6f8-4f17-97d3-ab6f6bf6676d', '9019106b-6e2d-4deb-97a5-2889f976a931', '39871658-bff0-49c2-aa20-ccac0b03a2c2', 'bad12444-7180-4b29-a14c-d2b4305b7f52',
#        'd5e0a4ff-8601-45d0-9020-8c104e5ea508', '24fc33cb-53ad-4383-82ca-f6e2ac3fd143', '96c86eae-d4f9-45d8-9780-69c55a9e36e9', '8a112e82-e191-44ad-b306-2578c064104b',
#        'f449aef3-4453-42c9-b542-57acc7c2e5eb', '62043c88-f03d-475e-ac9c-2f057536e2a8', 'e4fe2fa4-6869-4c1e-9347-ba9b135306f5', 'b7f26543-0ddb-4d10-a215-abfc093b0ed0',
#        'ed1cadff-e20f-43a7-8627-aee10f48cc7a']
# len(ids)

ids = ['ad149677-7ee0-479c-8d23-aa8c3bf58532']

## Pull projects from TerraMatch API

In [None]:
results = api.pull_wrapper(prod_url, headers, ids, modified_since='2025-09-09', outfile=tm_api_pull_results_file)

## Parse and save the API output

In [None]:
# Load the saved JSON file
with open(tm_api_pull_results_file, 'r') as file:
    results = json.load(file)

In [None]:
# Convert the JSON output into a dataframe with selected fields
results_df = api.parse_tm_api_results(results, outfile = polygon_features_file, parse_indicators=True)

In [None]:
# Export the dataframe as a geojson (optional)
geo.df_to_geojson(df=results_df, geometry_col='geometry', output_path=f'../data/{run_dir}/tm_api_{run_name}_{today}.geojson', crs='EPSG:4326')