# Finalize Polygons for PPC 2025 Batch 1 Indicators Analysis 
### (Remove Non-Batch 1 Polygons)

In [1]:
import yaml
import pandas as pd
from tm_api_utils import pull_tm_api_data, patch_tm_api_data
from tqdm import tqdm
import json
import sys
from datetime import datetime
sys.path.append('../src/')
import api_utils as api
import process_tm_api_results as clean

In [2]:
# Today's date
today = datetime.today().strftime('%Y-%m-%d') # Check computer date before running (if out of sync, run sudo hwclock -s)

In [4]:
# Import version of 2025 Batch 1 Polygons pulled from TM, but with polygons from the GBM PPC Project's Makongo Forest Est 2022 Site (not included in batch 1 due to confusions
# about plantstart date) already removed (06/02/25)
df = pd.read_csv('../data/ppc/tm_api_ppc_2025_batch1_2025-06-02.csv')

In [5]:
# Convert plantstart to datetime
df['plantstart'] = pd.to_datetime(df['plantstart'], errors='coerce')

# Extract year
df['plantstart_yr'] = df['plantstart'].dt.year

In [10]:
# Check # polygons per project
df['project_id'].value_counts().sort_index()

project_id
1115dda6-0165-4099-b52f-0ac53595c3a9     15
1977b649-908c-46c3-836d-f4f6485427c2      2
244eaf7e-e109-47b2-b84e-9ebe24508391     84
24d8c9a2-b8ef-481c-930b-78c9aeaf239e     42
465f543e-d53a-4356-ae8d-9790aa42d30e      3
6d9089aa-2a6f-4dc0-8064-32c5b67ffed6    653
ad149677-7ee0-479c-8d23-aa8c3bf58532     80
f17dd6cf-8187-4edd-895e-07013d4990c9     20
Name: count, dtype: int64

In [7]:
# Dictionary mapping project_id to list of allowed years
batch1_criteria = {
    '244eaf7e-e109-47b2-b84e-9ebe24508391': [2022],              # EMA Maranh√£o
    '24d8c9a2-b8ef-481c-930b-78c9aeaf239e': [2022, 2023],        # Puerto Princesa
    'f17dd6cf-8187-4edd-895e-07013d4990c9': [2022, 2023, 2024],  # UAE
    '1115dda6-0165-4099-b52f-0ac53595c3a9': [2021],              # GANB
    '465f543e-d53a-4356-ae8d-9790aa42d30e': [2023],              # Ciclos Flagship
    'ad149677-7ee0-479c-8d23-aa8c3bf58532': [2023],              # Trees for Villagers
    '1977b649-908c-46c3-836d-f4f6485427c2': [2024],              # CEPAN
    '6d9089aa-2a6f-4dc0-8064-32c5b67ffed6': [2023, 2024],        # GBM
}


In [8]:
# Filter polygons by year
# Build a boolean mask for valid polygons
mask = df.apply(lambda row: row['plantstart_yr'] in batch1_criteria.get(row['project_id'], []), axis=1)

# Filter the dataframe
df_batch1 = df[mask].copy()

In [9]:
df['project_id'].value_counts().sort_index()

project_id
1115dda6-0165-4099-b52f-0ac53595c3a9     15
1977b649-908c-46c3-836d-f4f6485427c2      2
244eaf7e-e109-47b2-b84e-9ebe24508391     84
24d8c9a2-b8ef-481c-930b-78c9aeaf239e     42
465f543e-d53a-4356-ae8d-9790aa42d30e      3
6d9089aa-2a6f-4dc0-8064-32c5b67ffed6    653
ad149677-7ee0-479c-8d23-aa8c3bf58532     80
f17dd6cf-8187-4edd-895e-07013d4990c9     20
Name: count, dtype: int64

In [11]:
df_batch1['project_id'].value_counts().sort_index()

project_id
1115dda6-0165-4099-b52f-0ac53595c3a9    15
1977b649-908c-46c3-836d-f4f6485427c2     2
244eaf7e-e109-47b2-b84e-9ebe24508391    84
24d8c9a2-b8ef-481c-930b-78c9aeaf239e    42
465f543e-d53a-4356-ae8d-9790aa42d30e     3
6d9089aa-2a6f-4dc0-8064-32c5b67ffed6     7
ad149677-7ee0-479c-8d23-aa8c3bf58532     6
f17dd6cf-8187-4edd-895e-07013d4990c9    20
Name: count, dtype: int64

In [None]:
# Save filtered polygon CSV file
## This is the polygons as pulled from the TM API 6/2/25, with the Makongo Forest Est 2022 Site polygons (3) removed, with all polygons with a plantstart year not included in the 
## batch 1 task removed

# Save to terrafund-portfolio-analysis repo
df_batch1.to_csv('../data/ppc/tm_api_ppc_2025_batch1_task_yrs_2025-06-13.csv', index=False)

In [None]:
# Save to tf-biophysical-monitoring repo (to use with the CI spreadsheet builder notebook)
df_batch1.to_csv('/home/darby/github_repos/tf-biophysical-monitoring/data/ppc/tm_api_ppc_2025_batch1_task_yrs_2025-06-13.csv', index=False)