# Pull Polygons from TerraMatch API

This notebook sets up the process to pull polygon geometries and metadata from the TerraMatch API. The steps for pulling polygons are as follows:
1. Set up configuration and API token
2. 

In [None]:
import requests
import yaml
import json
import pandas as pd
import geopandas as gpd
from tqdm import tqdm
from datetime import datetime, timedelta

## Set up token and API URL

In [None]:
# Set up token access
auth_path = '../secrets.yaml'
with open(auth_path) as auth_file:
    auth = yaml.safe_load(auth_file)
headers = {
    'Authorization': f"Bearer {auth['access_token']}"
    }

In [None]:
# TerraMatch API URLs
staging_url = "https://api-staging.terramatch.org/research/v3/sitePolygons?" # Use for testing queries
prod_url = "https://api.terramatch.org/research/v3/sitePolygons?" # Use to pull data for analysis

## API Request

In [None]:
# Define function to pull TM API data
def pull_tm_api_data(url, headers, project_ids):
    '''
    edits to the above function include:
        iterating through list of project ids within func so output is a df with 
        multiple projects
        add project id as a column to support maxar metadata request
        update to last record variable
        added tqdm progress bar  
    '''
    # List to store all retrieved polygon metadata
    results = []
    # Set up a progress bar
    with tqdm(total=len(project_ids), desc="Processing Projects", unit="project") as progress_bar:
        # For every project in the list of project_ids
        for project_id in project_ids:
            # Set parameters with the current project ID
            params = {
                'projectId[]': project_id,
                'polygonStatus[]': 'approved',
                'includeTestProjects': 'false',
                'page[size]': '100'
            }

            last_record = ''
            new_last_record = None  # Ensure it's defined before use

            while True:
                # Send GET request and store the response (polygon geometries & metadata)
                response = requests.get(url, headers=headers, params=params)

                # Check status code
                if response.status_code != 200:
                    raise ValueError(f'Request failed for project {project_id} with status code {response.status_code}')
                
                # Convert the response to a JSON and record the total number of records returned
                response_json = response.json()
                total_records = response_json['meta']['page']['total']

                # Parse response data
                # If there are no polygons for this project
                if total_records == 0:
                    break  # Exit if no data is available (skip to the next project)

                # Loop through each polygon in the response
                for idx in range(0, len(response.json()['data'])):
                    # Extract polygon attributes from each record and store them in dictionary data
                    data = response_json['data'][idx]['attributes']
                    data['poly_id'] = response_json['data'][idx]['meta']['page']['cursor']
                    # Store the project_id in data
                    data['project_id'] = project_id 
                    # Append data ( a dictionary of that project's metadata) in the overall results list
                    results.append(data)

                    # Assign the last cursor only if there are records
                    if idx == (total_records - 1):
                        new_last_record = response_json['data'][idx]['meta']['page']['cursor']

                # Check if there are more pages
                if (len(response.json()['data']) == int(params['page[size]'])):
                    last_record = new_last_record
                    params['page[after]'] = last_record
                else:
                    break  # Exit pagination if no new cursor is found

            progress_bar.update(1) 
    return results

## Create lists of projects to pull

In [None]:
# Read in list of approved projects (2025-02-21)
full = pd.read_csv('../projects_all_approved_202502211226.csv')
full.shape

In [None]:
# Create lists of projects by Cohort (and split cohort 1 into projects within the TF landscapes and outside of the TF landscapes)
cohort1 = full[full['cohort'] == 'terrafund']
cohort1_landscapes = cohort1[cohort1['country'].isin(['BI', 'CD', 'RW', 'KE', 'GH'])]
cohort1_non_landscapes = cohort1[~cohort1['country'].isin(['BI', 'CD', 'RW', 'KE', 'GH'])]
cohort2 = full[full['cohort'] == 'terrafund-landscapes']

In [None]:
# Create a list of project ids to query
ids = list(set(cohort1.project_id))

In [None]:
# Create a short list of ids for testing
ids = ids[:11]
ids

In [None]:
# Create a list of ids by specifying project_ids
# BirdLife International
#birdlife = ['36504a4e-f7a3-4963-9ff2-9aa9982cf990']

In [None]:
# Pull polygons from projects in list of ids from TerraMatch API
project_results = pull_tm_api_data(prod_url, headers, ids)

In [None]:
# Convert the polygon geometries into a dataframe
project_df = pd.DataFrame(project_results)
project_df.columns = project_df.columns.str.lower()

In [None]:
project_df.head()

In [None]:
len(project_df.columns)
project_df.shape

In [None]:
# Export the polygon geometries & metadata as a csv
today = datetime.today().strftime('%Y-%m-%d')
# project_df.to_csv(f"../data/tm_api_{today}.csv", index=False) # To the darby-tm-api-pull repo
# project_df.to_csv(f"/home/darby/github_repos/maxar-tools/data/tm_api_{today}.csv", index=False) # To the darby-maxar-tools repo


# TEST PULL
project_df.to_csv(f"../data/tm_api_DREK_2025-02-26.csv", index=False) # To the darby-tm-api-pull repo
#project_df.to_csv(f"/home/darby/github_repos/maxar-tools/data/tm_api_TEST_PROD_NEW.csv", index=False) # To the darby-maxar-tools repo