# Maxar Image Availability Analysis

The Maxar image availability workflow takes as input a list of TerraFund project ids and returns as output a csv listing every project and how much of that project’s area has Maxar imagery coverage.

#### Workflow:
1. Pull info on project characteristics for the entire portfolio using the TerraMatch API
    - Repo/notebook: terrafund-portfolio-analysis/tm-api.ipynb
    - Input: list of TerraFund project IDs
    - Output: csv of all project features
2. Using the TM API csv, pull Maxar metadata
    - Repo/notebook: maxar-tools/decision-tree-metadata.ipynb and maxar-tools/src/decision_tree.py (? may need to change b/c of my additions to the acquire_metadata function)
    - Input: csv of project features
    - Output: csv of maxar metadata
3. Create imagery features (??)
    - Repo/notebook: terrafund-portfolio-analysis/maxar-img-avail.py
    - Input: csv of maxar metadata and csv of TM project features
    - Output: csv of project features and percent imagery coverage
4. Identify projects with 100% imagery coverage


In [1]:
import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
from shapely.geometry import shape
from shapely.geometry import Polygon, Point
import ast
from datetime import datetime, timedelta
import re
import os
import math
import requests
import yaml
import json
import sys
sys.path.append('../src/')
import image_availability as img
import process_api_results as clean
import decision_trees as tree
import tm_api_utils as api_request

%load_ext autoreload
%autoreload 2

### Parameters

In [2]:
# File paths
tm_auth_path = '../secrets.yaml'
tm_staging_url = "https://api-staging.terramatch.org/research/v3/sitePolygons?"                 # use for testing queries
tm_prod_url = "https://api.terramatch.org/research/v3/sitePolygons?"                            # Use to pull data for analysis'
approved_projects = '../terrafund-portfolio-analyses/projects_all_approved_202501091214.csv'    # List of projects with approved polygons
feats = '../data/tm_api_TEST.csv'                                                               # Polygon metadata & geometries from TM API
maxar_feats = '/home/darby/github_repos/maxar-tools/data/tm_api_TEST.csv'                       # Polygon metadata & geometries from TM API saved to maxar-tools repo
maxar_md = '../data/imagery_availability/comb_img_availability_2025-02-25.csv'                  # Metadata for Maxar images corresponding to polygons

# Define thesholds
cloud_thresh = 50             # Threshold for removing cloudy imagery
img_count = 1                 # Threshold for identifying image availability
baseline_range = (-366, 0)    # Baseline window (1 year before plantstart date)
ev_range = (730, 1095)        # Early verification window (2-3 years after plant start date)

### Load & Preprocess Data
Inputs: 
- TM API csv
- Maxar metadata csv

In [None]:
# # Load TM API polygons & convert to dataframe
# polygons = pd.read_csv(feats)
# polygons.columns = polygons.columns.str.lower()    # Enforce lowercase column names
# poly_df = pd.DataFrame(polygons)
# poly_df.columns

# # Rename columns
# poly_df = poly_df.rename(columns={'name': 'poly_name','geometry': 'poly_geom'})

# # Convert 'plantstart' column to a datetime
# poly_df['plantstart'] = pd.to_datetime(poly_df['plantstart'], errors='coerce')

In [3]:
# Load TM API polygons and convert to a GeoDataFrame
polygons = pd.read_csv(feats)
polygons.columns = polygons.columns.str.lower()   # Enforce lowercase column names

# Rename 'name' and 'geometry' columns
poly_df = polygons.rename(columns={'name': 'poly_name', 'geometry': 'poly_geom'})  

# Convert 'plantstart' column to a datetime
poly_df['plantstart'] = pd.to_datetime(poly_df['plantstart'], errors='coerce')

# Convert stringified 'poly_geom' dictionaries into real dictionaries
poly_df['poly_geom'] = poly_df['poly_geom'].apply(lambda x: shape(ast.literal_eval(x)) if isinstance(x, str) else shape(x))

# Convert 'poly_geom' (polygon geometries) from WKT to Shapely objects
poly_df['poly_geom'] = poly_df['poly_geom'].apply(shape)

# Convert DataFrame to GeoDataFrame
poly_gdf = gpd.GeoDataFrame(poly_df, geometry='poly_geom', crs="EPSG:4326")

In [23]:
print(poly_gdf.shape)
poly_gdf.head()
len(poly_gdf['poly_id'].unique())

(20, 16)


16

In [5]:
# Load Maxar images metadata and convert to a GeoDataFrame
images = pd.read_csv(maxar_md)

# Select relevent columns
img_df = images[['title', 'project_id', 'poly_id', 'datetime', 'area:cloud_cover_percentage', 'eo:cloud_cover', 'area:avg_off_nadir_angle', 'img_geom']]

# Convert 'datetime' column to a datetime and rename
img_df.loc[:, 'datetime'] = pd.to_datetime(img_df['datetime'], format='%Y-%m-%dT%H:%M:%S.%fZ', errors='coerce') # Convert to datetime type
img_df.loc[:, 'datetime'] = img_df['datetime'].apply(lambda x: x.replace(tzinfo=None) if pd.notna(x) else x)    # Remove time zone info
img_df = img_df.rename(columns={'datetime': 'img_date'})                                                        # Rename 'datetime' column 'img_date'

# Convert stringified 'poly_geom' dictionaries into real dictionaries
img_df['img_geom'] = img_df['img_geom'].apply(lambda x: shape(ast.literal_eval(x)) if isinstance(x, str) else shape(x))

# Convert 'img_geom' (image footprint geometries) from WKT to Shapely objects
#poly_df['poly_geom'] = poly_df['poly_geom'].apply(shape)
img_df['img_geom'] = img_df['img_geom'].apply(shape)

# Convert DataFrame to GeoDataFrame
img_gdf = gpd.GeoDataFrame(img_df, geometry='img_geom', crs="EPSG:4326")

In [6]:
print(img_gdf.shape)
img_gdf.head()

(229, 8)


Unnamed: 0,title,project_id,poly_id,img_date,area:cloud_cover_percentage,eo:cloud_cover,area:avg_off_nadir_angle,img_geom
0,Maxar WV03 Image 104001009309D800,529e1bae-2187-473f-a2a3-17e577720aba,e41e8d8a-efa3-4626-bbfe-5af48f23b6da,2024-04-14 10:26:14.756267,77.850805,77.320853,9.770739,"POLYGON ((0.44586 6.82522, 0.44583 6.82066, 0...."
1,Maxar WV02 Image 10300100E2B08800,529e1bae-2187-473f-a2a3-17e577720aba,e41e8d8a-efa3-4626-bbfe-5af48f23b6da,2023-02-14 10:31:15.104494,0.0,10.048596,14.278868,"POLYGON ((0.43586 6.81923, 0.43536 6.78056, 0...."
2,Maxar WV02 Image 10300100DF537E00,529e1bae-2187-473f-a2a3-17e577720aba,e41e8d8a-efa3-4626-bbfe-5af48f23b6da,2022-12-16 10:34:07.495606,38.283611,35.559627,21.58851,"POLYGON ((0.50385 7.02087, 0.34321 7.01415, 0...."
3,Maxar WV03 Image 104001009309D800,529e1bae-2187-473f-a2a3-17e577720aba,0b35ba48-a92c-48da-83d8-c21ddcdc3c0f,2024-04-14 10:26:14.756267,71.228748,77.320853,9.770739,"POLYGON ((0.44586 6.82522, 0.44583 6.82066, 0...."
4,Maxar WV02 Image 10300100E2B08800,529e1bae-2187-473f-a2a3-17e577720aba,0b35ba48-a92c-48da-83d8-c21ddcdc3c0f,2023-02-14 10:31:15.104494,0.0,10.048596,14.278868,"POLYGON ((0.43586 6.81923, 0.43536 6.78056, 0...."


### Merge Images with Polygons
Inputs:
- poly_gdf: geodataframe of polygon metadata
- img_gdf: geodataframe of maxar image metadata

Outputs:
- merged: merged geodataframe of maxar image metadata + associated polygon metadata

In [7]:
# Merge the image data with the polygon data (preserving image data rows and adding associated polygon attributes)
merged_gdf = img_gdf.merge(poly_gdf, on=['project_id', 'poly_id'], how='left')

# Ensure correct datetime format
merged_gdf['plantstart'] = pd.to_datetime(merged_gdf['plantstart'], errors='coerce')
merged_gdf['img_date'] = pd.to_datetime(merged_gdf['img_date'], errors='coerce')

In [8]:
print(merged_gdf.shape)
print(merged_gdf.columns)
merged_gdf.head()


(293, 22)
Index(['title', 'project_id', 'poly_id', 'img_date',
       'area:cloud_cover_percentage', 'eo:cloud_cover',
       'area:avg_off_nadir_angle', 'img_geom', 'poly_name', 'status', 'siteid',
       'poly_geom', 'plantstart', 'plantend', 'practice', 'targetsys', 'distr',
       'numtrees', 'calcarea', 'indicators', 'establishmenttreespecies',
       'reportingperiods'],
      dtype='object')


Unnamed: 0,title,project_id,poly_id,img_date,area:cloud_cover_percentage,eo:cloud_cover,area:avg_off_nadir_angle,img_geom,poly_name,status,...,plantstart,plantend,practice,targetsys,distr,numtrees,calcarea,indicators,establishmenttreespecies,reportingperiods
0,Maxar WV03 Image 104001009309D800,529e1bae-2187-473f-a2a3-17e577720aba,e41e8d8a-efa3-4626-bbfe-5af48f23b6da,2024-04-14 10:26:14.756267,77.850805,77.320853,9.770739,"POLYGON ((0.44586 6.82522, 0.44583 6.82066, 0....",OESR Feature 2,approved,...,2023-01-14,9/29/2023,tree-planting,natural-forest,full,68000.0,70.538672,"[{'indicatorSlug': 'restorationByStrategy', 'y...",[],"[{'dueAt': '2022-09-30T00:00:00.000Z', 'submit..."
1,Maxar WV02 Image 10300100E2B08800,529e1bae-2187-473f-a2a3-17e577720aba,e41e8d8a-efa3-4626-bbfe-5af48f23b6da,2023-02-14 10:31:15.104494,0.0,10.048596,14.278868,"POLYGON ((0.43586 6.81923, 0.43536 6.78056, 0....",OESR Feature 2,approved,...,2023-01-14,9/29/2023,tree-planting,natural-forest,full,68000.0,70.538672,"[{'indicatorSlug': 'restorationByStrategy', 'y...",[],"[{'dueAt': '2022-09-30T00:00:00.000Z', 'submit..."
2,Maxar WV02 Image 10300100DF537E00,529e1bae-2187-473f-a2a3-17e577720aba,e41e8d8a-efa3-4626-bbfe-5af48f23b6da,2022-12-16 10:34:07.495606,38.283611,35.559627,21.58851,"POLYGON ((0.50385 7.02087, 0.34321 7.01415, 0....",OESR Feature 2,approved,...,2023-01-14,9/29/2023,tree-planting,natural-forest,full,68000.0,70.538672,"[{'indicatorSlug': 'restorationByStrategy', 'y...",[],"[{'dueAt': '2022-09-30T00:00:00.000Z', 'submit..."
3,Maxar WV03 Image 104001009309D800,529e1bae-2187-473f-a2a3-17e577720aba,0b35ba48-a92c-48da-83d8-c21ddcdc3c0f,2024-04-14 10:26:14.756267,71.228748,77.320853,9.770739,"POLYGON ((0.44586 6.82522, 0.44583 6.82066, 0....",OESR Feature 1,approved,...,2022-10-14,10/16/2022,tree-planting,agroforest,full,2000.0,1.491618,"[{'indicatorSlug': 'restorationByStrategy', 'y...",[],"[{'dueAt': '2022-09-30T00:00:00.000Z', 'submit..."
4,Maxar WV02 Image 10300100E2B08800,529e1bae-2187-473f-a2a3-17e577720aba,0b35ba48-a92c-48da-83d8-c21ddcdc3c0f,2023-02-14 10:31:15.104494,0.0,10.048596,14.278868,"POLYGON ((0.43586 6.81923, 0.43536 6.78056, 0....",OESR Feature 1,approved,...,2022-10-14,10/16/2022,tree-planting,agroforest,full,2000.0,1.491618,"[{'indicatorSlug': 'restorationByStrategy', 'y...",[],"[{'dueAt': '2022-09-30T00:00:00.000Z', 'submit..."


### Filter Images Based on Constraints
Inputs:
- merged: merged dataframe of maxar image metadata + associated polygon metadata

Outputs:
- filtered_merged: a filtered version of the merged dataframe of maxar image metadata + associated polygon metadata

In [9]:
# Create a date differential column
merged_gdf['date_diff'] = (merged_gdf['img_date'] - merged_gdf['plantstart']).dt.days

# Filter to retain only images within the desired time range and cloud cover parameters
img_gdf_filtered = merged_gdf[
    (merged_gdf['date_diff'] >= baseline_range[0]) &
    (merged_gdf['date_diff'] <= baseline_range[1]) &
    (merged_gdf['area:cloud_cover_percentage'] < cloud_thresh)
].copy()    # Copy to avoid SettingWithCopyWarning

In [19]:
print('merged_gdf Unique Polygons:', len(merged_gdf['poly_id'].unique()))
merged_gdf['poly_id'].value_counts()

merged_gdf Unique Polygons: 16


poly_id
c9b59851-e4b7-4271-ac99-f4e601f86e85    36
e18c2562-7f73-4fd2-a361-a6eee01ed71a    36
4d13b994-be20-4392-9f0f-68709607e96b    30
e7e42658-360a-4452-8be4-60ea8d1ef0e7    26
f6871a61-a766-451a-be90-086219616cef    18
9e745667-0701-434a-8ecb-d917fe2bcf29    18
1cbca6da-0024-47dc-bb3a-06f8727d1cd6    17
410696dc-9579-4412-9c7b-55194cb1867c    17
0b9ef620-327a-4be2-8b0c-50ec0fa06788    17
212d5966-2c94-4db7-98e9-4847cfdc4215    17
8bc43765-9e53-4702-ba97-13005b806126    17
e7223a4d-68c6-4d32-b140-f871dec62bd3    17
9e508b07-4534-4e04-bb5b-bb0d3734a796    16
a91435c7-a179-4c1d-9891-de0fe1741654     5
0b35ba48-a92c-48da-83d8-c21ddcdc3c0f     3
e41e8d8a-efa3-4626-bbfe-5af48f23b6da     3
Name: count, dtype: int64

In [10]:
print(img_gdf_filtered.shape)
img_gdf_filtered['area:cloud_cover_percentage'].value_counts()

(38, 23)


area:cloud_cover_percentage
0.000000     27
38.283611     1
9.133758      1
1.945866      1
13.303457     1
25.365812     1
1.288262      1
15.639313     1
42.054866     1
29.062129     1
14.068180     1
40.883526     1
Name: count, dtype: int64

In [20]:
print('img_gdf_filtered Unique Polygons:', len(img_gdf_filtered['poly_id'].unique()))
img_gdf_filtered['poly_id'].value_counts()

img_gdf_filtered Unique Polygons: 15


poly_id
4d13b994-be20-4392-9f0f-68709607e96b    4
e18c2562-7f73-4fd2-a361-a6eee01ed71a    4
e7e42658-360a-4452-8be4-60ea8d1ef0e7    4
c9b59851-e4b7-4271-ac99-f4e601f86e85    4
1cbca6da-0024-47dc-bb3a-06f8727d1cd6    3
212d5966-2c94-4db7-98e9-4847cfdc4215    3
f6871a61-a766-451a-be90-086219616cef    2
410696dc-9579-4412-9c7b-55194cb1867c    2
e7223a4d-68c6-4d32-b140-f871dec62bd3    2
9e745667-0701-434a-8ecb-d917fe2bcf29    2
9e508b07-4534-4e04-bb5b-bb0d3734a796    2
8bc43765-9e53-4702-ba97-13005b806126    2
0b9ef620-327a-4be2-8b0c-50ec0fa06788    2
e41e8d8a-efa3-4626-bbfe-5af48f23b6da    1
a91435c7-a179-4c1d-9891-de0fe1741654    1
Name: count, dtype: int64

### Compute Coverage for Each Polygon
Input:
- poly_gdf: geodataframe of polygon metadata
- img_gdf_filtered: a filtered version of the merged geodataframe of maxar image metadata + associated polygon metadata

Output:
- csv of percent imagery coverage by project

In [None]:
# Empty list to hold results
results = []

for i, polygon in poly_gdf.iterrows():
    poly_id = polygon['poly_id']
    poly_geom = polygon['poly_geom'] # Geometry column
    poly_area = polygon['calcarea']  # Calculated area of the polygon

    # Create a filtered GeoDataFrame that contains only images that intersect the polygon
    