# Analyze Maxar Image Availability

Takes in a CSVs of:
- All polygon features
- All Maxar images available for those polygons
- Filtered mage availability per polygon
- Filtered image availability per project (aggregated)
- Low coverage polygons

In [1]:
import pandas as pd
import geopandas as gpd
import sys
sys.path.append('../src/')
import image_coverage as img_cover
import analyze_img_coverage as analyze
from datetime import datetime, timedelta

## Set file paths

In [2]:
# File paths
feats = '../data/tf_cohort1/tm_api_cohort1_2025-04-02.csv' # Polygon metadata & geometries from TM API
maxar_md = '../data/tf_cohort1/imagery_availability/comb_img_availability_cohort1_2025-04-02.csv' # Metadata for Maxar images corresponding to polygons
approved_projects = '../projects_all_approved_202502211226.csv' # List of approved projects (with country codes)

# Image availability
# Baseline
poly_img_avail_base = '../data/tf_cohort1/results/baseline/polygon_imagery_coverage_cohort1_2025-04-02.csv'
low_cov_poly_base = '../data/tf_cohort1/results/baseline/low_coverage_polygons_cohort1_2025-04-02.csv'
proj_img_avail_base = '../data/tf_cohort1/results/baseline/project_imagery_coverage_cohort1_2025-04-02.csv'

# Early Verification
poly_img_avail_ev = '../data/tf_cohort1/results/year_2/polygon_imagery_coverage_cohort1_2025-04-02.csv'
low_cov_poly_ev = '../data/tf_cohort1/results/year_2/low_coverage_polygons_cohort1_2025-04-02.csv'
proj_img_avail_ev = '../data/tf_cohort1/results/year_2/project_imagery_coverage_cohort1_2025-04-02.csv'

## Read in files

In [3]:
# List of approved projects with country codes
approved_projects_df = pd.read_csv(approved_projects)

# Polygon feature data
poly_df = pd.read_csv(feats)

# Maxar data
maxar_df = pd.read_csv(maxar_md)

# Image availability
# Baseline
poly_img_avail_base_df = pd.read_csv(poly_img_avail_base)
low_cov_poly_base_df = pd.read_csv(low_cov_poly_base)
proj_img_avail_base_df = pd.read_csv(proj_img_avail_base)

# Early Verification
poly_img_avail_ev_df = pd.read_csv(poly_img_avail_ev)
low_cov_poly_ev_df = pd.read_csv(low_cov_poly_ev)
proj_img_avail_ev_df = pd.read_csv(proj_img_avail_ev)

## Preprocess polygon and maxar image data

In [4]:
poly_gdf = img_cover.preprocess_polygons(poly_df, debug=True)
maxar_gdf = img_cover.preprocess_images(maxar_df, debug=True)

Processing polygon data...
There are 0 polygons with invalid geometries.
All invalid geometries were fixed successfully.
There are 13537 unique polygons for 80 projects in this dataset.
Processing Maxar image data...
There are 175641 images for 12168 polygons in 78 projects in this dataset.


## Merge in the country codes

In [None]:
# # Polygon feature data
# poly_gdf = poly_gdf.merge(approved_projects_df[['project_id', 'country']], on='project_id', how='left')

# # Maxar metadata
# maxar_gdf = maxar_gdf.merge(approved_projects_df[['project_id', 'country']], on='project_id', how='left')

# # Baseline
# poly_img_avail_base_df = poly_img_avail_base_df.merge(approved_projects_df[['project_id', 'country']], on='project_id', how='left')
# low_cov_poly_base_df = low_cov_poly_base_df.merge(approved_projects_df[['project_id', 'country']], on='project_id', how='left')
# proj_img_avail_base_df = proj_img_avail_base_df.merge(approved_projects_df[['project_id', 'country']], on='project_id', how='left')

# # Early verification
# poly_img_avail_ev_df = poly_img_avail_ev_df.merge(approved_projects_df[['project_id', 'country']], on='project_id', how='left')
# low_cov_poly_ev_df = low_cov_poly_ev_df.merge(approved_projects_df[['project_id', 'country']], on='project_id', how='left')
# proj_img_avail_ev_df = proj_img_avail_ev_df.merge(approved_projects_df[['project_id', 'country']], on='project_id', how='left')

## Filter by landscape countries (optional)

## For now - filter by Jessica's shortlist of landscape projects

In [5]:
# Shortlist IDs
shortlist_ids = ['bbd88e69-cd85-429e-bebf-6234bf82dbb3', '47118e50-d4d1-4ba0-8094-59cfa441dbb0', '36504a4e-f7a3-4963-9ff2-9aa9982cf990', 'abdb9d09-7c55-4e26-8961-1aa26e991bbc',
                 'd6481438-9603-4c68-b152-6586ed825b0a', 'cf16b937-a02b-4691-b816-28669ec348f2', 'e4fe2fa4-6869-4c1e-9347-ba9b135306f5', '9100baf3-9ac4-4db5-85a7-bc12b236a370',
                 '243f93d2-0d4b-4dac-8b23-997e6528dc8e', '529e1bae-2187-473f-a2a3-17e577720aba']

In [6]:
# Filter by shortlisted project IDs
#Polygon feature data
poly_gdf = poly_gdf[poly_gdf['project_id'].isin(shortlist_ids)]

# Maxar metadata
maxar_gdf = maxar_gdf[maxar_gdf['project_id'].isin(shortlist_ids)]

# Baseline
poly_img_avail_base_df = poly_img_avail_base_df[poly_img_avail_base_df['project_id'].isin(shortlist_ids)]
low_cov_poly_base_df = low_cov_poly_base_df[low_cov_poly_base_df['project_id'].isin(shortlist_ids)]
proj_img_avail_base_df = proj_img_avail_base_df[proj_img_avail_base_df['project_id'].isin(shortlist_ids)]

# Early verification
poly_img_avail_ev_df = poly_img_avail_ev_df[poly_img_avail_ev_df['project_id'].isin(shortlist_ids)]
low_cov_poly_ev_df = low_cov_poly_ev_df[low_cov_poly_ev_df['project_id'].isin(shortlist_ids)]
proj_img_avail_ev_df = proj_img_avail_ev_df[proj_img_avail_ev_df['project_id'].isin(shortlist_ids)]

In [7]:
# Merge maxar and polygon metadata
# Merge polygon and maxar image data
merged_gdf, missing_polygons_list = img_cover.merge_polygons_images(maxar_gdf, poly_gdf, debug=True)

Merging polygon metadata into image data...
Total images in img_gdf: 98871
Total polygons in poly_gdf: 2229
Total rows in merged dataset: 98871
Unique polygons in merged dataset: 2199
30 polygons were dropped from the merged dataset because they have no Maxar images
Polygons without images (dropped at this stage): [('45b28e8c-5c0b-4f35-b147-5b40f40c48f3', '47118e50-d4d1-4ba0-8094-59cfa441dbb0'), ('d3975c6b-ddca-42ad-bd26-af20a4fadb65', '47118e50-d4d1-4ba0-8094-59cfa441dbb0'), ('6cb05973-5b3c-46a1-8db2-9d22c5b247cf', '47118e50-d4d1-4ba0-8094-59cfa441dbb0'), ('194f9bec-eea1-4c05-b4eb-7294128dd44a', '47118e50-d4d1-4ba0-8094-59cfa441dbb0'), ('5263be89-2f86-499c-960b-471fe044f01e', '47118e50-d4d1-4ba0-8094-59cfa441dbb0'), ('ad8c155c-7d14-4d37-a604-6af7c175a1a7', '47118e50-d4d1-4ba0-8094-59cfa441dbb0'), ('bffad4c6-14c4-4092-a137-b05130f98dca', '47118e50-d4d1-4ba0-8094-59cfa441dbb0'), ('f9d8436c-3635-47c0-b663-0ecef7366147', '47118e50-d4d1-4ba0-8094-59cfa441dbb0'), ('e3118e4b-f07e-49e4-b3a8-1

In [8]:
# Merge plantstart into poly_img_avail_ev_df
poly_img_avail_ev_df = poly_img_avail_ev_df.merge(poly_gdf[['poly_id', 'plantstart']], on='poly_id', how='left')

In [None]:
poly_img_avail_ev_df.groupby('project_id')['plantstart'].apply(lambda x: x.notna().mean()*100).reset_index(name='pct_valid_plantstart')

In [None]:
today = pd.Timestamp.today()
two_years_ago = today - pd.DateOffset(years=2)
print(today)
print(two_years_ago)
poly_img_avail_ev_df.groupby('project_id')['plantstart'].apply(lambda x: (x < two_years_ago))
#poly_img_avail_ev_df.head()
poly_img_avail_ev_df['pct_pltstrt_2+yrs_ago'] = poly_img_avail_ev_df['plantstart'].apply(lambda x: (x < two_years_ago))
poly_img_avail_ev_df.groupby('project_id')['pct_pltstrt_2+yrs_ago'].mean()*100

In [9]:
summary_df = analyze.summarize_project_planting_and_ev(poly_img_avail_ev_df)
summary_df

2025-04-09 13:39:31.492818


Unnamed: 0,project_id,total_poly,%_poly_valid_plantstart,%_poly_planted_2yr_ago,%_poly_ev_img
0,243f93d2-0d4b-4dac-8b23-997e6528dc8e,1,100.0,100.0,0.0
1,36504a4e-f7a3-4963-9ff2-9aa9982cf990,1541,100.0,15.444517,41.207008
2,47118e50-d4d1-4ba0-8094-59cfa441dbb0,118,100.0,29.661017,11.016949
3,529e1bae-2187-473f-a2a3-17e577720aba,2,100.0,100.0,0.0
4,9100baf3-9ac4-4db5-85a7-bc12b236a370,6,100.0,16.666667,16.666667
5,abdb9d09-7c55-4e26-8961-1aa26e991bbc,10,100.0,30.0,30.0
6,bbd88e69-cd85-429e-bebf-6234bf82dbb3,79,100.0,82.278481,84.810127
7,cf16b937-a02b-4691-b816-28669ec348f2,23,100.0,100.0,86.956522
8,d6481438-9603-4c68-b152-6586ed825b0a,447,100.0,59.50783,24.384787
9,e4fe2fa4-6869-4c1e-9347-ba9b135306f5,2,100.0,0.0,50.0


# Cohort-Level Image Availability Analysis

In [None]:
proj_img_avail_base_df.

## Calculating Overlap in Actual Imagery Coverage Between Baseline and EV Imagery Area 

In [None]:
# Start with poly_gdf and maxar_gdf
# Preprocess them
# Merged them into merged_gdf

# Do initial hard filter of cloud cover, sun elevation angle, off nadir angle, and date range for BOTH baseline & EV in same notebook

# Compute the polygon-level imagery coverage
