# Analyze Maxar Image Availability

Takes in a CSVs of:
- All polygon features
- All Maxar images available for those polygons
- Filtered mage availability per polygon
- Filtered image availability per project (aggregated)
- Low coverage polygons

In [None]:
import pandas as pd
import geopandas as gpd
import sys
sys.path.append('../src/')
import image_coverage as img_cover
import analyze_img_coverage as analyze
from datetime import datetime, timedelta

## Set file paths

In [None]:
# File paths
# Polygon metadata & geometries from TM API
feats = '../data/tf_cohort1/tm_api_cohort1_2025-04-02.csv' 

# Metadata for Maxar images corresponding to polygons
maxar_md = '../data/tf_cohort1/imagery_availability/comb_img_availability_cohort1_2025-04-02.csv' 

# List of approved projects (with country codes)
approved_projects = '../projects_all_approved_202502211226.csv'

# Image availability
# Baseline
poly_img_avail_base = '../data/tf_cohort1/results/baseline/polygon_imagery_coverage_cohort1_2025-04-02.csv'
low_cov_poly_base = '../data/tf_cohort1/results/baseline/low_coverage_polygons_cohort1_2025-04-02.csv'
proj_img_avail_base = '../data/tf_cohort1/results/baseline/project_imagery_coverage_cohort1_2025-04-02.csv'

# Early Verification
poly_img_avail_ev = '../data/tf_cohort1/results/year_2/polygon_imagery_coverage_cohort1_2025-04-02.csv'
low_cov_poly_ev = '../data/tf_cohort1/results/year_2/low_coverage_polygons_cohort1_2025-04-02.csv'
proj_img_avail_ev = '../data/tf_cohort1/results/year_2/project_imagery_coverage_cohort1_2025-04-02.csv'

## Read in files

In [None]:
# List of approved projects with country codes
approved_projects_df = pd.read_csv(approved_projects)

# Polygon feature data
poly_df = pd.read_csv(feats)

# Maxar data
maxar_df = pd.read_csv(maxar_md)

# Image availability
# Baseline
poly_img_avail_base_df = pd.read_csv(poly_img_avail_base)
low_cov_poly_base_df = pd.read_csv(low_cov_poly_base)
proj_img_avail_base_df = pd.read_csv(proj_img_avail_base)

# Early Verification
poly_img_avail_ev_df = pd.read_csv(poly_img_avail_ev)
low_cov_poly_ev_df = pd.read_csv(low_cov_poly_ev)
proj_img_avail_ev_df = pd.read_csv(proj_img_avail_ev)

In [None]:
maxar_df.head(2)

## Preprocess polygon and maxar image data

In [None]:
poly_gdf = img_cover.preprocess_polygons(poly_df, debug=True)
maxar_gdf = img_cover.preprocess_images(maxar_df, debug=True)

## Merge in the country codes

In [None]:
# Polygon feature data
poly_gdf = poly_gdf.merge(approved_projects_df[['project_id', 'country']], on='project_id', how='left')

# Maxar metadata
maxar_gdf = maxar_gdf.merge(approved_projects_df[['project_id', 'country']], on='project_id', how='left')

# Baseline
poly_img_avail_base_df = poly_img_avail_base_df.merge(approved_projects_df[['project_id', 'country']], on='project_id', how='left')
low_cov_poly_base_df = low_cov_poly_base_df.merge(approved_projects_df[['project_id', 'country']], on='project_id', how='left')
proj_img_avail_base_df = proj_img_avail_base_df.merge(approved_projects_df[['project_id', 'country']], on='project_id', how='left')

# Early verification
poly_img_avail_ev_df = poly_img_avail_ev_df.merge(approved_projects_df[['project_id', 'country']], on='project_id', how='left')
low_cov_poly_ev_df = low_cov_poly_ev_df.merge(approved_projects_df[['project_id', 'country']], on='project_id', how='left')
proj_img_avail_ev_df = proj_img_avail_ev_df.merge(approved_projects_df[['project_id', 'country']], on='project_id', how='left')

## Filter by landscape countries (optional)

## For now - filter by Jessica's shortlist of landscape projects

In [None]:
# Shortlist IDs
shortlist_ids = ['bbd88e69-cd85-429e-bebf-6234bf82dbb3', '47118e50-d4d1-4ba0-8094-59cfa441dbb0', '36504a4e-f7a3-4963-9ff2-9aa9982cf990', 'abdb9d09-7c55-4e26-8961-1aa26e991bbc',
                 'd6481438-9603-4c68-b152-6586ed825b0a', 'cf16b937-a02b-4691-b816-28669ec348f2', 'e4fe2fa4-6869-4c1e-9347-ba9b135306f5', '9100baf3-9ac4-4db5-85a7-bc12b236a370',
                 '243f93d2-0d4b-4dac-8b23-997e6528dc8e', '529e1bae-2187-473f-a2a3-17e577720aba']

In [None]:
# Filter by shortlisted project IDs
#Polygon feature data
poly_gdf = poly_gdf[poly_gdf['project_id'].isin(shortlist_ids)]

# Maxar metadata
maxar_gdf = maxar_gdf[maxar_gdf['project_id'].isin(shortlist_ids)]

# Baseline
poly_img_avail_base_df = poly_img_avail_base_df[poly_img_avail_base_df['project_id'].isin(shortlist_ids)]
low_cov_poly_base_df = low_cov_poly_base_df[low_cov_poly_base_df['project_id'].isin(shortlist_ids)]
proj_img_avail_base_df = proj_img_avail_base_df[proj_img_avail_base_df['project_id'].isin(shortlist_ids)]

# Early verification
poly_img_avail_ev_df = poly_img_avail_ev_df[poly_img_avail_ev_df['project_id'].isin(shortlist_ids)]
low_cov_poly_ev_df = low_cov_poly_ev_df[low_cov_poly_ev_df['project_id'].isin(shortlist_ids)]
proj_img_avail_ev_df = proj_img_avail_ev_df[proj_img_avail_ev_df['project_id'].isin(shortlist_ids)]

In [None]:
# Merge maxar and polygon metadata
# Merge polygon and maxar image data
merged_gdf, missing_polygons_list = img_cover.merge_polygons_images(maxar_gdf, poly_gdf, debug=True)

In [None]:
# Merge plantstart into poly_img_avail_ev_df
poly_img_avail_ev_df = poly_img_avail_ev_df.merge(poly_gdf[['poly_id', 'plantstart']], on='poly_id', how='left')

In [None]:
poly_img_avail_ev_df.groupby('project_id')['plantstart'].apply(lambda x: x.notna().mean()*100).reset_index(name='pct_valid_plantstart')

In [None]:
today = pd.Timestamp.today()
two_years_ago = today - pd.DateOffset(years=2)
print(today)
print(two_years_ago)
poly_img_avail_ev_df.groupby('project_id')['plantstart'].apply(lambda x: (x < two_years_ago))
#poly_img_avail_ev_df.head()
poly_img_avail_ev_df['pct_pltstrt_2+yrs_ago'] = poly_img_avail_ev_df['plantstart'].apply(lambda x: (x < two_years_ago))
poly_img_avail_ev_df.groupby('project_id')['pct_pltstrt_2+yrs_ago'].mean()*100

In [None]:
summary_df = analyze.summarize_project_planting_and_ev(poly_img_avail_ev_df)
summary_df.head(2)

# Cohort-Level Image Availability Analysis

In [None]:
proj_img_avail_base_df.

## Calculating Overlap in Actual Imagery Coverage Between Baseline and EV Imagery Area 

In [None]:
# Start with poly_gdf and maxar_gdf
# Preprocess them
# Merged them into merged_gdf

# Do initial hard filter of cloud cover, sun elevation angle, off nadir angle, and date range for BOTH baseline & EV in same notebook

# Compute the polygon-level imagery coverage
