In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np
import json
from shapely.geometry import shape, mapping
import sys
sys.path.append('../src/')
import geospatial_utils_NEW as utils
import analyze_img_coverage as analyze
import image_coverage as img_cover

In [4]:
# Read in CSVs
# List of approved projects (with country codes)
approved_projects = pd.read_csv('../projects_all_approved_202502211226.csv')

# Polygon-level image availability
baseline_poly = pd.read_csv('../data/tf_cohort1/results/baseline/polygon_imagery_coverage_cohort1_2025-04-02.csv')
ev_poly = pd.read_csv('../data/tf_cohort1/results/year_2/polygon_imagery_coverage_cohort1_2025-04-02.csv')

# Polygon-level low coverage 
baseline_low_cov = pd.read_csv('../data/tf_cohort1/results/baseline/low_coverage_polygons_cohort1_2025-04-02.csv')
ev_low_cov = pd.read_csv('../data/tf_cohort1/results/year_2/low_coverage_polygons_cohort1_2025-04-02.csv')

# Project-level image availability
baseline_proj = pd.read_csv('../data/tf_cohort1/results/baseline/project_imagery_coverage_cohort1_2025-04-02.csv')
ev_proj = pd.read_csv('../data/tf_cohort1/results/year_2/project_imagery_coverage_cohort1_2025-04-02.csv')

### Check # Low-Coverage Polygons

In [32]:
# TF Cohort 1 Polygons
num_poly = baseline_poly['poly_id'].nunique()
print('# Total TF Cohort 1 Polygons:', num_poly)
print('')

# Baseline Low Coverage
num_base_low_cov = baseline_low_cov['poly_id'].nunique()
num_base_no_img = len(baseline_low_cov[baseline_low_cov['num_images'] == 0])
num_base_1_img = len(baseline_low_cov[baseline_low_cov['num_images'] == 1])
num_base_mult_img = len(baseline_low_cov[baseline_low_cov['num_images'] > 1])

print('# Poly wi/ Low Cover at Baseline:', num_base_low_cov)
print(f'{(num_base_low_cov/num_poly):.1%} of polygons have low image cover at baseline')
print(f'Of low cover at baseline polygons, {num_base_no_img} have 0 available images.')
print(f'    - This is {num_base_no_img/num_base_low_cov:.1%} of low-coverage polygons.')
print(f'    - This is {num_base_no_img/num_poly:.1%} of all polygons.')
print(f'Of low cover at baseline polygons, {num_base_1_img} have 1 available image (the low-cover image).')
print(f'    - This is {num_base_1_img/num_base_low_cov:.1%} of low-coverage polygons.')
print(f'    - This is {num_base_1_img/num_poly:.1%} of all polygons.')
print(f'Of low cover at baseline polygons, {num_base_mult_img} have > 1 available images (possible other images to select from).')
print(f'    - This is {num_base_mult_img/num_base_low_cov:.1%} of low-coverage polygons.')
print(f'    - This is {num_base_mult_img/num_poly:.1%} of all polygons.')
print('')

# (1-Year Post-Planting) Low Coverage
num_ev_low_cov = ev_low_cov['poly_id'].nunique()
num_ev_no_img = len(ev_low_cov[ev_low_cov['num_images'] == 0])
num_ev_1_img = len(ev_low_cov[ev_low_cov['num_images'] == 1])
num_ev_mult_img = len(ev_low_cov[ev_low_cov['num_images'] > 1])

print('# Poly wi/ Low Cover at EV:', num_ev_low_cov)
print(f'{(num_ev_low_cov/num_poly):.1%} of polygons have low image cover at EV')
print(f'Of low cover at EV polygons, {num_ev_no_img} have 0 available images.')
print(f'    - This is {num_ev_no_img/num_ev_low_cov:.1%} of low-coverage polygons.')
print(f'    - This is {num_ev_no_img/num_poly:.1%} of all polygons.')
print(f'Of low cover at EV polygons, {num_ev_1_img} have 1 available image (the low-cover image).')
print(f'    - This is {num_ev_1_img/num_ev_low_cov:.1%} of low-coverage polygons.')
print(f'    - This is {num_ev_1_img/num_poly:.1%} of all polygons.')
print(f'Of low cover at EV polygons, {num_ev_mult_img} have > 1 available images (possible other images to select from).')
print(f'    - This is {num_ev_mult_img/num_ev_low_cov:.1%} of low-coverage polygons.')
print(f'    - This is {num_ev_mult_img/num_poly:.1%} of all polygons.')

# Total TF Cohort 1 Polygons: 13537

# Poly wi/ Low Cover at Baseline: 6793
50.2% of polygons have low image cover at baseline
Of low cover at baseline polygons, 6556 have 0 available images.
    - This is 96.5% of low-coverage polygons.
    - This is 48.4% of all polygons.
Of low cover at baseline polygons, 96 have 1 available image (the low-cover image).
    - This is 1.4% of low-coverage polygons.
    - This is 0.7% of all polygons.
Of low cover at baseline polygons, 141 have > 1 available images (possible other images to select from).
    - This is 2.1% of low-coverage polygons.
    - This is 1.0% of all polygons.

# Poly wi/ Low Cover at EV: 7017
51.8% of polygons have low image cover at EV
Of low cover at EV polygons, 6738 have 0 available images.
    - This is 96.0% of low-coverage polygons.
    - This is 49.8% of all polygons.
Of low cover at EV polygons, 148 have 1 available image (the low-cover image).
    - This is 2.1% of low-coverage polygons.
    - This is 1.1% of all pol

In [36]:
def calc_low_cover_poly_stats(all_polygons_df, low_cover_polygons_df, analysis_period):
    """
    Calculate statistics about the # and % of polygons with no or low (defined as an image covering < 50% of the polygon) image coverage at a given analysis time period.
    
    Args:
    - all_polygons_df (DataFrame): Dataframe of all polygons in the analysis. Must have 1 row for each unique polygon, and a unique polygon ID column called 'poly_id'.
    - low_cover_polygons_df (DataFrame): DataFrame of all low cover polygons in the analysis. Must include a 'num_images' column containing the # of available Maxar images for that polygon
    - analysis_period (str): A string containing the time period for this analysis (Baseline or EV). Used for naming conventions 
    """
    num_poly = all_polygons_df['poly_id'].nunique()
    num_low_cov = low_cover_polygons_df['poly_id'].nunique()
    num_no_img = len(low_cover_polygons_df[low_cover_polygons_df['num_images'] == 0])
    num_1_img = len(low_cover_polygons_df[low_cover_polygons_df['num_images'] == 1])
    num_mult_img = len(low_cover_polygons_df[low_cover_polygons_df['num_images'] > 1])

    print(f'# Poly wi/ Low Cover at {analysis_period}:', num_low_cov)
    print(f'{(num_low_cov/num_poly):.1%} of polygons have low image cover at {analysis_period.lower()}')
    print(f'Of low cover at {analysis_period.lower()} polygons, {num_no_img} have 0 available images.')
    print(f'    - This is {num_no_img/num_low_cov:.1%} of low-coverage polygons.')
    print(f'    - This is {num_no_img/num_poly:.1%} of all polygons.')
    print(f'Of low cover at {analysis_period.lower()} polygons, {num_1_img} have 1 available image (the low-cover image).')
    print(f'    - This is {num_1_img/num_low_cov:.1%} of low-coverage polygons.')
    print(f'    - This is {num_1_img/num_poly:.1%} of all polygons.')
    print(f'Of low cover at {analysis_period.lower()} polygons, {num_mult_img} have > 1 available images (possible other images to select from).')
    print(f'    - This is {num_mult_img/num_low_cov:.1%} of low-coverage polygons.')
    print(f'    - This is {num_mult_img/num_poly:.1%} of all polygons.')
    print('')

In [39]:
calc_low_cover_poly_stats(baseline_poly, baseline_low_cov, 'Baseline')
calc_low_cover_poly_stats(ev_poly, ev_low_cov, 'Early Verification')

# Poly wi/ Low Cover at Baseline: 6793
50.2% of polygons have low image cover at baseline
Of low cover at baseline polygons, 6556 have 0 available images.
    - This is 96.5% of low-coverage polygons.
    - This is 48.4% of all polygons.
Of low cover at baseline polygons, 96 have 1 available image (the low-cover image).
    - This is 1.4% of low-coverage polygons.
    - This is 0.7% of all polygons.
Of low cover at baseline polygons, 141 have > 1 available images (possible other images to select from).
    - This is 2.1% of low-coverage polygons.
    - This is 1.0% of all polygons.

# Poly wi/ Low Cover at Early Verification: 7017
51.8% of polygons have low image cover at early verification
Of low cover at early verification polygons, 6738 have 0 available images.
    - This is 96.0% of low-coverage polygons.
    - This is 49.8% of all polygons.
Of low cover at early verification polygons, 148 have 1 available image (the low-cover image).
    - This is 2.1% of low-coverage polygons.
  

In [46]:
print(len(baseline_low_cov[(baseline_low_cov['overlap_area_ha'] > 0) * (baseline_low_cov['num_images'] > 1)]))
baseline_low_cov[(baseline_low_cov['overlap_area_ha'] > 0) * (baseline_low_cov['num_images'] > 1)]

37


Unnamed: 0,poly_id,project_id,best_image,num_images,poly_area_ha,overlap_area_ha,percent_img_cover
473,20229368-bd60-4b95-b8e0-66cf9fbdec84,943bb150-f1b7-4ad2-bb9e-60a559df2ebd,Maxar WV02 Image 10300100E24B5B00,3,0.159798,0.01817,11.370409
756,1d3bcc02-1007-4ba4-b574-8332ee9ee0d1,943bb150-f1b7-4ad2-bb9e-60a559df2ebd,Maxar WV02 Image 10300100D3A41900,2,0.836483,0.401011,47.940116
761,30b394c5-a489-44a2-9022-40280177882b,943bb150-f1b7-4ad2-bb9e-60a559df2ebd,Maxar WV02 Image 10300100D3A41900,3,0.612705,0.197755,32.275818
776,d0ca0c51-7682-4894-bce8-61ba458381a0,943bb150-f1b7-4ad2-bb9e-60a559df2ebd,Maxar WV02 Image 10300100D1D6C800,3,0.640151,0.073561,11.491159
917,c9ebeb2d-9857-4c4e-b909-86df05bbc788,943bb150-f1b7-4ad2-bb9e-60a559df2ebd,Maxar WV02 Image 10300100D28B4C00,3,0.837122,0.221544,26.464954
945,c14b1768-3516-4ce4-be63-4eece173f48c,943bb150-f1b7-4ad2-bb9e-60a559df2ebd,Maxar WV02 Image 10300100D136D700,4,0.83648,0.344294,41.159883
1012,02faceea-68cb-47c5-b25a-e39771c9e8d9,449adf55-f6f8-4f17-97d3-ab6f6bf6676d,Maxar WV03 Image 104001007F651000,2,25.251827,1.820769,7.210445
1900,f992189b-c606-4ed6-909e-ac1dd7ec293e,f9dc4f5a-503b-45e5-baf0-0162f7bdb891,Maxar WV03 Image 104001007511D300,4,1.602185,0.044434,2.773339
2146,67ba344a-688d-49a1-8d15-49783e7d6703,f9dc4f5a-503b-45e5-baf0-0162f7bdb891,Maxar WV02 Image 10300100D0324100,2,1.17138,0.000794,0.067826
2462,895b9d69-2f75-41d7-b11e-5cf5fa266762,4124a403-45ca-4e48-be6f-6c15b3b48617,Maxar WV02 Image 10300100EC12C200,2,0.340314,0.082694,24.299259


In [3]:
# Merge the country codes from the list of approved projects into the csvs of image availability
# Polygon-level
baseline_poly = baseline_poly.merge(approved_projects[['project_id', 'country']], on='project_id', how='left')
ev_poly = ev_poly.merge(approved_projects[['project_id', 'country']], on='project_id', how='left')

# Project-level
baseline_proj = baseline_proj.merge(approved_projects[['project_id', 'country']], on='project_id', how='left')
ev_proj = ev_proj.merge(approved_projects[['project_id', 'country']], on='project_id', how='left')

In [None]:
# Filter by landscapes
landscape_countries = ['BI', 'CD', 'GH', 'KE', 'RW']

# Polygon-level
baseline_poly_landscapes = baseline_poly[baseline_poly['country'].isin(landscape_countries)]
ev_poly_landscapes = ev_poly[ev_poly['country'].isin(landscape_countries)]

# Project-level
baseline_proj_landscapes = baseline_proj[baseline_proj['country'].isin(landscape_countries)]
ev_proj_landscapes = ev_proj[ev_proj['country'].isin(landscape_countries)]

# Print results
print(f"There are {len(baseline_proj)} projects with imagery at baseline.")
print(f"There are {len(baseline_proj_landscapes)} projects in the TF focus landscapes with imagery at baseline.")
print()
print(f"There are {len(ev_proj)} projects with imagery 1 year+ post-plantstart.")
print(f"There are {len(ev_proj_landscapes)} projects in the TF focus landscapes with imagery 1 year+ post-plantstart.")

In [None]:
## FILTER BY COVERAGE THRESHOLD

# Set coverage threshold
thresh = 50

# Filter projects with >= X% overage
baseline_landscapes_thresh = baseline_proj_landscapes[baseline_proj_landscapes['total_percent_area_covered'] >= thresh]
ev_landscapes_thresh = ev_proj_landscapes[ev_proj_landscapes['total_percent_area_covered'] >= thresh]

# Print individual results
print(f"There are {len(baseline_landscapes_thresh)} projects with >={thresh}% coverage at baseline.")
print(f"There are {len(ev_landscapes_thresh)} projects with >={thresh}% coverage 1 year+ post-planting")
print()

# Find common project ids
common_project_ids_thresh = set(baseline_landscapes_thresh['project_id']).intersection(ev_landscapes_thresh['project_id'])

# Retrieve details of common projects
common_projects_baseline_thresh= baseline_landscapes_thresh[baseline_landscapes_thresh['project_id'].isin(common_project_ids_thresh)]
common_projects_ev_thresh = ev_landscapes_thresh[ev_landscapes_thresh['project_id'].isin(common_project_ids_thresh)]

# Display results
print(f"There are {len(common_project_ids_thresh)} projects with >= {thresh}% coverage at both baseline and 1-year post-plantstart")
print(list(common_project_ids_thresh))

In [None]:
# Create merged comparison dataframe for high coverage projects
merged_high_cov = common_projects_baseline_thresh[['project_id', 'total_percent_area_covered']].merge(
    common_projects_ev_thresh[['project_id', 'total_percent_area_covered']],
    on='project_id',
    suffixes=('_baseline', '_ev')
)
print(f"\n Coverage Comparison from Baseline to Early Verification for Projects with {thresh}% Coverage:")
merged_high_cov

In [None]:
# Create merged comparison dataframe for ALL projects
# Find common project ids
common_project_ids = set(baseline_proj_landscapes['project_id']).intersection(ev_proj_landscapes['project_id'])

# Retrieve details of common projects
common_projects_baseline = baseline_proj_landscapes[baseline_proj_landscapes['project_id'].isin(common_project_ids)]
common_projects_ev = ev_proj_landscapes[ev_proj_landscapes['project_id'].isin(common_project_ids)]

# Create merged comparison dataframe for all projects
merged = common_projects_baseline[['project_id', 'total_percent_area_covered']].merge(
    common_projects_ev[['project_id', 'total_percent_area_covered']],
    on='project_id',
    suffixes=('_baseline', '_ev')
)

merged.to_csv('../tf_cohort1_landscapes_baseline_ev_pct_cover_comparison.csv', index=False)

print(f"\n Coverage Comparison from Baseline to Early Verification for All Projects in Landscapes:")
print(len(merged))
merged

In [None]:
# TerraFund polygons Cohort 1
polygons = pd.read_csv('../data/tf_cohort1/tm_api_cohort1_2025-04-02.csv')
print(len(polygons))
polygons.head()

In [None]:
l = list(polygons.plantstart.unique())[0:8]
value = float('nan')
print(type(value))
for i in l:
    if i == value:
        print('nan')
    else:
        print(i)

In [None]:
# Filter by ARCOS
arcos = polygons[polygons['project_id'] == 'bbd88e69-cd85-429e-bebf-6234bf82dbb3'].copy()
print(len(arcos))
arcos.head()

In [None]:
arcos['geometry'].iloc[0]

In [None]:
# Export ARCOS polygons as geoJSON
utils.df_to_geojson(arcos, output_path='../arcos_polygons_2025-04-03.geojson')

### Calculating the % of Polygons with >X% Coverage for Both Baseline & EV

In [None]:
baseline_poly
ev_poly.head()

In [None]:
def compare_polygon_coverage(baseline_df, ev_df, threshold):
    # Create dataframes with only relevent columns and rename for clarity before merging
    base = baseline_df[['poly_id', 'project_id', 'percent_img_cover']].rename(
        columns={'percent_img_cover': 'base_pct_img_cover'})
    ev = ev_df[['poly_id', 'percent_img_cover']].rename(
        columns={'percent_img_cover': 'ev_pct_img_cover'})
    
    # Merge dataframes on poly_id
    merged = base.merge(ev, on='poly_id', how='inner')

    # Filter polygons that meet the threshold in *both* periods
    merged['both_high'] = (
        (merged['base_pct_img_cover'] >= threshold) &
        (merged['ev_pct_img_cover'] >= threshold)
    )

    # Group by project and compute:
    # - total number of shared polygons
    # - number of polygons that meet threshold in both
    summary = (
        merged.groupby('project_id')
        .agg(total_polygons=('poly_id', 'count'),
             polygons_high_both=('both_high', 'sum'))
        .reset_index()
    )

    # Add percent
    summary['percent_polygons_high_both'] = (
        summary['polygons_high_both'] / summary['total_polygons'] * 100
    )

    return summary

In [None]:
both_high_poly_cover = compare_polygon_coverage(baseline_poly_landscapes, ev_poly_landscapes, 10).sort_values(by='percent_polygons_high_both', ascending=False)
print(len(both_high_poly_cover))
both_high_poly_cover

In [None]:
both_high_poly_cover.to_csv('../poly_wi_gte_70_pct_cover_base_ev.csv', index=False)

### Calculating Overlap in Actual Imagery Coverage Between Baseline and EV Imagery Area 

In [4]:
### 1. LOAD POLYGON AND IMAGE DATA FOR COHORT 1 (ALL TIME PERIODS) ###
maxar_df = pd.read_csv('../data/tf_cohort1/imagery_availability/comb_img_availability_cohort1_2025-04-02.csv')
poly_df = pd.read_csv('../data/tf_cohort1/tm_api_cohort1_2025-04-02.csv')

In [5]:
### 2. PREPROCESS POLYGON AND IMAGE DATA ###
poly_gdf = img_cover.preprocess_polygons(poly_df, debug=True)
maxar_gdf = img_cover.preprocess_images(maxar_df, debug=True)

Processing polygon data...
Cleaning geometries...

ðŸ§¾ Geometry Cleaning Summary:
  âž¤ Total geometries processed: 13537
  âž¤ Invalid geometries:         0
  âž¤ Repaired with buffer(0):    0
  âž¤ Dropped:                    0
  âœ… Final valid polygons:       13537

Processing Maxar image data...
There are 175641 images for 12168 polygons in 78 projects in this dataset.


In [6]:
poly_gdf.head()

Unnamed: 0,project_id,poly_id,site_id,poly_geom,plantstart,plantend,practice,target_sys,dist,project_phase
0,389aad5b-6577-4cea-bf9f-446dcfd94966,a40e322b-42ff-4008-8407-e611b170a39c,ec236c6f-214d-46dc-af61-6c1aa44530fc,"POLYGON ((31.39719 1.51653, 31.3972 1.51652, 3...",2022-01-08,,tree-planting,agroforest,,
1,389aad5b-6577-4cea-bf9f-446dcfd94966,9dcccf42-cd63-471b-a251-abd1009fb819,ec236c6f-214d-46dc-af61-6c1aa44530fc,"POLYGON ((31.39611 1.51079, 31.39611 1.51075, ...",2022-01-08,,tree-planting,agroforest,,
2,389aad5b-6577-4cea-bf9f-446dcfd94966,32199fa7-3ed0-4222-8f5d-fc21ac8e2e16,ec236c6f-214d-46dc-af61-6c1aa44530fc,"POLYGON ((31.08733 1.41471, 31.08732 1.4148, 3...",2022-01-08,,tree-planting,agroforest,,
3,389aad5b-6577-4cea-bf9f-446dcfd94966,9e64aa1c-dd3a-4b62-8e82-e7a64bbe3131,ec236c6f-214d-46dc-af61-6c1aa44530fc,"POLYGON ((31.39622 1.51093, 31.39622 1.51093, ...",2022-01-08,,tree-planting,agroforest,,
4,389aad5b-6577-4cea-bf9f-446dcfd94966,65be56b4-31ad-443d-98fa-04818907a246,ec236c6f-214d-46dc-af61-6c1aa44530fc,"POLYGON ((31.39411 1.53001, 31.39402 1.52995, ...",2022-01-08,,tree-planting,agroforest,,


In [9]:
maxar_gdf.head()

Unnamed: 0,img_id,title,project_id,poly_id,img_date,area:cloud_cover_percentage,eo:cloud_cover,area:avg_off_nadir_angle,view:sun_elevation,img_geom
0,10400100A3954600,Maxar WV03 Image 10400100A3954600,57589518-68da-40e0-b211-bf6d64eccca7,ff18fd70-e1c6-480c-99a2-9ba98794804b,2025-01-16 09:33:51.140959,0.0,1.065642,2.218095,54.80353,"POLYGON ((13.87831 4.77328, 13.76055 4.77314, ..."
1,104001009E799200,Maxar WV03 Image 104001009E799200,57589518-68da-40e0-b211-bf6d64eccca7,ff18fd70-e1c6-480c-99a2-9ba98794804b,2024-11-27 09:30:05.589714,0.0,0.001014,16.905159,57.238293,"POLYGON ((13.85451 4.66878, 13.72715 4.67526, ..."
2,10400100A0B34F00,Maxar WV03 Image 10400100A0B34F00,57589518-68da-40e0-b211-bf6d64eccca7,ff18fd70-e1c6-480c-99a2-9ba98794804b,2024-11-15 09:34:32.202146,100.0,99.099001,8.354685,60.343291,"POLYGON ((13.84236 4.6707, 13.72192 4.66965, 1..."
3,10300100DF837900,Maxar WV02 Image 10300100DF837900,57589518-68da-40e0-b211-bf6d64eccca7,ff18fd70-e1c6-480c-99a2-9ba98794804b,2022-12-15 09:32:59.194394,20.034193,43.241046,22.346207,55.053746,"POLYGON ((13.7975 4.76311, 13.79754 4.7517, 13..."
4,10300100DEC10000,Maxar WV02 Image 10300100DEC10000,57589518-68da-40e0-b211-bf6d64eccca7,ff18fd70-e1c6-480c-99a2-9ba98794804b,2022-11-18 09:29:41.355872,52.250822,69.032079,22.391607,59.376543,"POLYGON ((13.79359 5.05278, 13.79359 5.05186, ..."


In [7]:
### 3. MERGE POLYGON METADATA INTO IMAGE DATA ###
merged_gdf, missing_polygons_list = img_cover.merge_polygons_images(maxar_gdf, poly_gdf, debug=True)

Merging polygon metadata into image data...
Total images in img_gdf: 175641
Total polygons in poly_gdf: 13537
Number of polygons removed from merged dataset due to invalid (unfixable) geometries: 0
Number of rows removed from image dataset because their polygons had invalid (unfixable) geometries: 0
Total rows in merged dataset: 175641
Unique polygons in merged dataset: 12168
1369 polygons were dropped from the merged dataset because they have no Maxar images
Polygons without images (dropped at this stage): [('55e31b6a-5d76-4fc1-96f3-768f9df86e08', 'f449aef3-4453-42c9-b542-57acc7c2e5eb'), ('b5398eda-cb07-41ee-87b7-e9c1152ff7b1', 'f449aef3-4453-42c9-b542-57acc7c2e5eb'), ('402cd902-35a7-4fb7-82f5-2f73af62b450', 'f449aef3-4453-42c9-b542-57acc7c2e5eb'), ('5cd5902c-c20c-4058-aee4-2dbd1b4ad9e4', 'b7f26543-0ddb-4d10-a215-abfc093b0ed0'), ('5b9d7706-ee3d-4a74-9779-523a77a9d1c2', 'f9dc4f5a-503b-45e5-baf0-0162f7bdb891'), ('cbf29060-c709-47f9-9b47-bd4a2da236f2', 'f9dc4f5a-503b-45e5-baf0-0162f7bdb8

In [10]:
### 4. DO INITIAL HARD FILTER OF IMAGES (INCLUDES DATE RANGE) ###
# For Baseline
# Set filters
base_filters = {
    'cloud_cover': 50,          # Remove images with >50% cloud cover
    'off_nadir': 30,            # Remove images with >30Â° off-nadir angle
    'sun_elevation': 30,        # Keep only images where sun elevation >30Â°
    'date_range': (-366, 0),    # Date range of 1 year before plantstart (baseline)
    'img_count': 1,             # Threshold for identifying image availability (REASSESS)
}
# Filter gdf
base_img_gdf_filtered = img_cover.filter_images(merged_gdf, base_filters, debug=True)

# For early verification (1 year+ post plantstart)
# Set filters
ev_filters = {
    'cloud_cover': 50,          # Remove images with >50% cloud cover
    'off_nadir': 30,            # Remove images with >30Â° off-nadir angle
    'sun_elevation': 30,        # Keep only images where sun elevation >30Â°
    'date_range': (365, 9999),    # Date range of y year post-plantstart through today (upper bound of maxar_md dataset is today's date) (year_2)
    'img_count': 1,             # Threshold for identifying image availability (REASSESS)
}
# Filter gdf
ev_img_gdf_filtered = img_cover.filter_images(merged_gdf, ev_filters, debug=True)

# Print results
print(f"Total images before filtering: {len(merged_gdf)}")
print()
print('BASELINE:')
print(f"Total images after filtering: {len(base_img_gdf_filtered)}")
print(f"Polygons with at least one valid image: {base_img_gdf_filtered['poly_id'].nunique()}")
print()
print('EARLY VERIFICATION:')
print(f"Total images after filtering: {len(ev_img_gdf_filtered)}")
print(f"Polygons with at least one valid image: {ev_img_gdf_filtered['poly_id'].nunique()}")

Total images before filtering: 175641
Total images after filtering: 14032
Polygons with at least one valid filtered image: 6981
Total images before filtering: 175641
Total images after filtering: 17151
Polygons with at least one valid filtered image: 6799
Total images before filtering: 175641

BASELINE:
Total images after filtering: 14032
Polygons with at least one valid image: 6981

EARLY VERIFICATION:
Total images after filtering: 17151
Polygons with at least one valid image: 6799


In [None]:
### 5. COMPUTE POLYGON-LEVEL IMAGERY COVERAGE ###
#### BASELINE ###
# Initialize storage for results & low-coverage polygons list
base_low_img_coverage_log = []
base_results = []

# Iterate through all polygons and compute imagery coverage per polygon
for poly_id, project_id in zip(poly_gdf['poly_id'], poly_gdf['project_id']):
    result = img_cover.compute_polygon_image_coverage(poly_id, project_id, poly_gdf, base_img_gdf_filtered, base_low_img_coverage_log)
    base_results.append(result)

# Convert the results to a DataFrame
base_results_df = pd.DataFrame(base_results, columns=['poly_id', 'project_id', 'best_image', 'num_images',
                                            'poly_area_ha', 'overlap_area_ha', 'percent_img_cover'])
base_results_df['best_image'] = base_results_df['best_image'].fillna("None")

In [None]:
print(len(base_results_df))
base_results_df.head()

In [None]:
### EARLY VERIFICATION ###
### 5. COMPUTE POLYGON-LEVEL IMAGERY COVERAGE ###
# Initialize storage for results & low-coverage polygons list
ev_low_img_coverage_log = []
ev_results = []

# Iterate through all polygons and compute imagery coverage per polygon
for poly_id, project_id in zip(poly_gdf['poly_id'], poly_gdf['project_id']):
    result = img_cover.compute_polygon_image_coverage(poly_id, project_id, poly_gdf, ev_img_gdf_filtered, ev_low_img_coverage_log)
    ev_results.append(result)

# Convert the results to a DataFrame
ev_results_df = pd.DataFrame(ev_results, columns=['poly_id', 'project_id', 'best_image', 'num_images',
                                            'poly_area_ha', 'overlap_area_ha', 'percent_img_cover'])
ev_results_df['best_image'] = ev_results_df['best_image'].fillna("None")

In [None]:
ev_results_df.head()