# Finalize Polygons for PPC 2025 Batch 1 Indicators Analysis 
### (Remove Non-Batch 1 Polygons)

In [1]:
import yaml
import pandas as pd
from tm_api_utils import pull_tm_api_data, patch_tm_api_data
from tqdm import tqdm
import json
import sys
from datetime import datetime
sys.path.append('../src/')
import api_utils as api
import process_tm_api_results as clean

In [2]:
# Today's date
today = datetime.today().strftime('%Y-%m-%d') # Check computer date before running (if out of sync, run sudo hwclock -s)

In [3]:
# Import version of 2025 Batch 1 Polygons pulled from TM, but with polygons from the GBM PPC Project's Makongo Forest Est 2022 Site (not included in batch 1 due to confusions
# about plantstart date) already removed.
df = pd.read_csv('../data/ppc/tm_api_ppc_2025_batch1_2025-06-02.csv')

In [17]:
# Convert plantstart to datetime
df['plantstart'] = pd.to_datetime(df['plantstart'], errors='coerce')

# Extract year
df['plantstart_yr'] = df['plantstart'].dt.year

In [18]:
# Check # polygons per project
df['project_id'].value_counts()

project_id
6d9089aa-2a6f-4dc0-8064-32c5b67ffed6    653
244eaf7e-e109-47b2-b84e-9ebe24508391     84
ad149677-7ee0-479c-8d23-aa8c3bf58532     80
24d8c9a2-b8ef-481c-930b-78c9aeaf239e     42
f17dd6cf-8187-4edd-895e-07013d4990c9     20
1115dda6-0165-4099-b52f-0ac53595c3a9     15
465f543e-d53a-4356-ae8d-9790aa42d30e      3
1977b649-908c-46c3-836d-f4f6485427c2      2
Name: count, dtype: int64

In [20]:
# Dictionary mapping project_id to list of allowed years
batch1_criteria = {
    '244eaf7e-e109-47b2-b84e-9ebe24508391': [2022],              # EMA Maranh√£o
    '24d8c9a2-b8ef-481c-930b-78c9aeaf239e': [2022, 2023],        # Puerto Princesa
    'f17dd6cf-8187-4edd-895e-07013d4990c9': [2022, 2023, 2024],  # UAE
    '1115dda6-0165-4099-b52f-0ac53595c3a9': [2021],              # GANB
    '465f543e-d53a-4356-ae8d-9790aa42d30e': [2023],              # Ciclos Flagship
    'ad149677-7ee0-479c-8d23-aa8c3bf58532': [2023],              # Trees for Villagers
    '1977b649-908c-46c3-836d-f4f6485427c2': [2024],              # CEPAN
    '6d9089aa-2a6f-4dc0-8064-32c5b67ffed6': [2023, 2024],        # GBM
}


In [None]:
# Filter polygons by year
# Build a boolean mask for valid polygons
mask = df.apply(lambda row: row['plantstart_yr'] in batch1_criteria.get(row['project_id'], []), axis=1)

# Filter the dataframe
df_batch1 = df[mask].copy()