In [2]:
import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
import re
import os
import math
import requests
import yaml
import json
import sys
sys.path.append('../src/')
import image_availability as img
import process_api_results as clean
import decision_trees as tree
import tm_api_utils as api_request

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


[TM API documentation](https://api-staging.terramatch.org/research-service/documentation/api#/default/ ) for full set of search parameters:



# Decision Tree

     
**Workflow**
1. Pull info on project characteristics for entire portfolio using TM API
   - repo/notebook: `terrafund-portfolio-analyses/tm-api.ipynb`
   - input: list of project ids
   - output: csv of all project features
3. Using TM API csv, pull Maxar metadata
    - repo/notebook: `maxar-tools/decision-tree-metadata.ipynb` and `maxar-tools/src/decision_tree.py`
    - input: csv of project features
    - output: csv of maxar metadata
5. Create imagery features using `image_availability.py` (confirm what format this requires as input)
    - repo/notebook: `terrafund-portfolio-analyses/decision-tree.ipynb`
    - input: csv maxar metadata and csv of project features
    - output: csv of project features & img count
6. Run decision tree
    - input: master csv
    - output: decisions csv

# PARAMS

In [25]:
tm_auth_path = '../secrets.yaml'
tm_staging_url = "https://api-staging.terramatch.org/research/v3/sitePolygons?"       # use for testing queries
tm_prod_url = "https://api.terramatch.org/research/v3/sitePolygons?"                  # Use to pull data for analysis
approved_projects = '../projects_all_approved_202501091214.csv'
maxar_md = "../data/imagery_availability/comb_img_availability_2025-02-10.csv"
feats = '../data/tm_api_021025.csv'                           

### Define tree thresholds ###
canopy_threshold = 40                                         # threshold for identifying open vs closed canopy projects
cloud_thresh = 50                                             # threshold for identifying image quality
img_count = 1                                                 # threshold for identifying image availability
baseline_range = (-365, 0)                                    # baseline window (1 year before plant start date)
ev_range = (730, 1095)                                        # EV window (2-3 years after plant start date)

## Gather Projects & Attributes
Uses the TerraMatch API to download project features for a provided set of project ids.

In [26]:
with open(tm_auth_path) as auth_file:
    auth = yaml.safe_load(auth_file)
headers = {
    'Authorization': f"Bearer {auth['access_token']}"
    }

In [27]:
full = pd.read_csv(approved_projects)
full = full[(full.framework_key == 'terrafund-landscapes') | (full.framework_key == 'terrafund')]
full.framework_key.value_counts()

terrafund               108
terrafund-landscapes     99
Name: framework_key, dtype: int64

In [28]:
ids = list(set(full.project_id))
len(ids)

207

In [29]:
project_results = api_request.pull_tm_api_data(tm_staging_url, headers, ids)

Processing Projects: 100%|████████████████████████████████████████████████████████████████████| 207/207 [20:07<00:00,  5.84s/project]


## Clean Attributes
Performs a series of cleaning steps to correctly format the API output.

In [41]:
with open("../data/tm_api_response.json", "r") as file:
    project_results = json.load(file)

In [42]:
clean_api = clean.process_tm_api_results(project_results, 
                                       outfile1=feats, 
                                       outfile2='/Users/jessica.ertel/github/maxar-tools/data/tm_api_021025.csv') # save to maxar-tools repo

Number of rows missing a 'plantstart' date: 630/16251
Number of rows missing a 'plantend' date: 6835/16251
⚠️ Total rows missing start and end plant date: 560
⚠️ Total projects missing 'plantstart': 18
⚠️ Total polygons missing 'plantstart': 626
Projects fully removed: 0
Projects partially affected: 18


In [43]:
clean_api.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 15621 entries, 0 to 16250
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   project_id  15621 non-null  object        
 1   poly_id     15621 non-null  object        
 2   site_id     15621 non-null  object        
 3   geometry    15621 non-null  object        
 4   plantstart  15621 non-null  datetime64[ns]
 5   plantend    9346 non-null   datetime64[ns]
 6   ttc_2023    1 non-null      float64       
dtypes: datetime64[ns](2), float64(1), object(4)
memory usage: 976.3+ KB


In [None]:
# why are there only 127 projects after pulling from the API?
# are there duplicated polygons in the dataframe? each row should be a dataframe
# what is happening with ttc stats?

In [35]:
clean_api[clean_api.ttc_2023.notnull()]

Unnamed: 0,project_id,poly_id,site_id,plantstart,plantend,ttc_2023
6078,389aad5b-6577-4cea-bf9f-446dcfd94966,a40e322b-42ff-4008-8407-e611b170a39c,ec236c6f-214d-46dc-af61-6c1aa44530fc,2022-01-08,NaT,90.0


In [None]:
# checking missing ttc
with open('../data/terrafund_projects_tiles.json', 'r') as file:
    missing_ttc_json = json.load(file)

missing_ids = list(missing_ttc_json.keys())
len(missing_ids)

## Gather image metadata
This step is performed in the [maxar-tools repo](https://github.com/wri/maxar-tools/tree/jessica_meta).

## Create image features

In [64]:
## Image features
proj_df = pd.read_csv(feats)
img_df = pd.read_csv(maxar_md)

main = img.analyze_image_availability(proj_df, 
                               img_df, 
                               baseline_range, 
                               ev_range, 
                               cloud_thresh)

  img_df['datetime'] = pd.to_datetime(img_df['datetime'], format='%Y-%m-%dT%H:%M:%S.%fZ', errors='coerce')
  img_df.loc[:, 'datetime'] = img_df['datetime'].apply(lambda x: x.replace(tzinfo=None))


In [65]:
main

Unnamed: 0,unnamed: 0,name,status,siteid,geometry,plantstart,plantend,practice,targetsys,distr,numtrees,calcarea,indicators,establishmenttreespecies,reportingperiods,poly_id,project_id,baseline_img_count,ev_img_count
0,0,R12W 11,approved,60aa51dc-65f1-4b1d-be97-a7e962201819,"{'type': 'Polygon', 'coordinates': [[[32.64296...",2023-02-22,2023-03-31,tree-planting,agroforest,full,0.0,0.116644,[],[],"[{'dueAt': '2022-09-30T00:00:00.000Z', 'submit...",f8575cf2-9b8e-4e93-90de-667b15f8f0c0,6b2dc32e-4414-48e0-9472-ebb49a766c28,2.0,2.0
1,1,R29W11,approved,60aa51dc-65f1-4b1d-be97-a7e962201819,"{'type': 'Polygon', 'coordinates': [[[32.64465...",2024-01-28,2024-09-28,tree-planting,agroforest,full,300.0,1.694570,[],[],"[{'dueAt': '2022-09-30T00:00:00.000Z', 'submit...",02e33916-6380-4a18-81e7-97d29db84cb5,6b2dc32e-4414-48e0-9472-ebb49a766c28,0.0,2.0
2,2,R39W5,approved,990421d0-2287-4a8c-824a-cbf732199c33,"{'type': 'Polygon', 'coordinates': [[[32.83510...",2024-04-01,2024-07-31,tree-planting,agroforest,full,,0.046533,"[{'indicatorSlug': 'restorationByStrategy', 'y...",[],"[{'dueAt': '2023-01-31T00:00:00.000Z', 'submit...",05733383-5ddb-4e2f-8ba9-e5889eacef17,6b2dc32e-4414-48e0-9472-ebb49a766c28,0.0,0.0
3,3,R38W5,approved,990421d0-2287-4a8c-824a-cbf732199c33,"{'type': 'Polygon', 'coordinates': [[[32.83320...",2024-04-01,2024-07-31,tree-planting,agroforest,full,,0.184905,"[{'indicatorSlug': 'restorationByStrategy', 'y...",[],"[{'dueAt': '2023-01-31T00:00:00.000Z', 'submit...",e489aba2-b89a-455e-9436-05275bbff778,6b2dc32e-4414-48e0-9472-ebb49a766c28,0.0,0.0
4,4,R33W12,approved,1373d39d-7cca-4382-8518-b57a06df9a02,"{'type': 'Polygon', 'coordinates': [[[32.58095...",2024-06-01,2024-11-30,tree-planting,agroforest,full,240.0,0.264170,"[{'indicatorSlug': 'restorationByStrategy', 'y...",[],"[{'dueAt': '2022-09-30T00:00:00.000Z', 'submit...",112e1fd2-60bd-4384-a3f7-67ae9cbcbbcd,6b2dc32e-4414-48e0-9472-ebb49a766c28,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2409,2409,Wilson Muguna Kaulio,approved,a5798ea5-999a-42ef-9ce7-c6967bf91d0c,"{'type': 'Polygon', 'coordinates': [[[37.85101...",2023-08-16,2023-08-23,tree-planting,agroforest,partial,3.0,0.371539,"[{'indicatorSlug': 'restorationByStrategy', 'y...",[],"[{'dueAt': '2024-01-31T00:00:00.000Z', 'submit...",a6b4a860-9214-4508-8438-f15fe90f2253,71ffc45f-584a-472c-808d-7715c8d94929,0.0,3.0
2410,2410,Joe Laaria Nkaari,approved,a5798ea5-999a-42ef-9ce7-c6967bf91d0c,"{'type': 'Polygon', 'coordinates': [[[37.86342...",2023-07-28,2023-08-04,tree-planting,agroforest,partial,4.0,0.614006,"[{'indicatorSlug': 'restorationByStrategy', 'y...",[],"[{'dueAt': '2024-01-31T00:00:00.000Z', 'submit...",bb056c2b-7bd2-4ba4-96bc-3a8c24a789c9,71ffc45f-584a-472c-808d-7715c8d94929,0.0,3.0
2411,2411,Godfrey Kaigera Manyara,approved,a5798ea5-999a-42ef-9ce7-c6967bf91d0c,"{'type': 'Polygon', 'coordinates': [[[37.84986...",2023-10-12,2023-10-19,tree-planting,agroforest,partial,1.0,0.203293,"[{'indicatorSlug': 'restorationByStrategy', 'y...",[],"[{'dueAt': '2024-01-31T00:00:00.000Z', 'submit...",b6ff77f7-3fc6-495c-9842-957d68d42f87,71ffc45f-584a-472c-808d-7715c8d94929,0.0,3.0
2412,2412,Margaret Kiriga,approved,a5798ea5-999a-42ef-9ce7-c6967bf91d0c,"{'type': 'Polygon', 'coordinates': [[[37.85049...",2023-06-19,2023-06-26,tree-planting,agroforest,partial,2.0,0.387531,"[{'indicatorSlug': 'restorationByStrategy', 'y...",[],"[{'dueAt': '2024-01-31T00:00:00.000Z', 'submit...",6440ac2f-6ff6-431e-954d-e4d8ff57e81b,71ffc45f-584a-472c-808d-7715c8d94929,0.0,3.0


In [69]:
proj_df

Unnamed: 0,unnamed: 0,name,status,siteid,geometry,plantstart,plantend,practice,targetsys,distr,numtrees,calcarea,indicators,establishmenttreespecies,reportingperiods,poly_id,project_id
0,0,R12W 11,approved,60aa51dc-65f1-4b1d-be97-a7e962201819,"{'type': 'Polygon', 'coordinates': [[[32.64296...",2023-02-22,2023-03-31,tree-planting,agroforest,full,0.0,0.116644,[],[],"[{'dueAt': '2022-09-30T00:00:00.000Z', 'submit...",f8575cf2-9b8e-4e93-90de-667b15f8f0c0,6b2dc32e-4414-48e0-9472-ebb49a766c28
1,1,R29W11,approved,60aa51dc-65f1-4b1d-be97-a7e962201819,"{'type': 'Polygon', 'coordinates': [[[32.64465...",2024-01-28,2024-09-28,tree-planting,agroforest,full,300.0,1.694570,[],[],"[{'dueAt': '2022-09-30T00:00:00.000Z', 'submit...",02e33916-6380-4a18-81e7-97d29db84cb5,6b2dc32e-4414-48e0-9472-ebb49a766c28
2,2,R39W5,approved,990421d0-2287-4a8c-824a-cbf732199c33,"{'type': 'Polygon', 'coordinates': [[[32.83510...",2024-04-01,2024-07-31,tree-planting,agroforest,full,,0.046533,"[{'indicatorSlug': 'restorationByStrategy', 'y...",[],"[{'dueAt': '2023-01-31T00:00:00.000Z', 'submit...",05733383-5ddb-4e2f-8ba9-e5889eacef17,6b2dc32e-4414-48e0-9472-ebb49a766c28
3,3,R38W5,approved,990421d0-2287-4a8c-824a-cbf732199c33,"{'type': 'Polygon', 'coordinates': [[[32.83320...",2024-04-01,2024-07-31,tree-planting,agroforest,full,,0.184905,"[{'indicatorSlug': 'restorationByStrategy', 'y...",[],"[{'dueAt': '2023-01-31T00:00:00.000Z', 'submit...",e489aba2-b89a-455e-9436-05275bbff778,6b2dc32e-4414-48e0-9472-ebb49a766c28
4,4,R33W12,approved,1373d39d-7cca-4382-8518-b57a06df9a02,"{'type': 'Polygon', 'coordinates': [[[32.58095...",2024-06-01,2024-11-30,tree-planting,agroforest,full,240.0,0.264170,"[{'indicatorSlug': 'restorationByStrategy', 'y...",[],"[{'dueAt': '2022-09-30T00:00:00.000Z', 'submit...",112e1fd2-60bd-4384-a3f7-67ae9cbcbbcd,6b2dc32e-4414-48e0-9472-ebb49a766c28
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2409,2409,Wilson Muguna Kaulio,approved,a5798ea5-999a-42ef-9ce7-c6967bf91d0c,"{'type': 'Polygon', 'coordinates': [[[37.85101...",2023-08-16,2023-08-23,tree-planting,agroforest,partial,3.0,0.371539,"[{'indicatorSlug': 'restorationByStrategy', 'y...",[],"[{'dueAt': '2024-01-31T00:00:00.000Z', 'submit...",a6b4a860-9214-4508-8438-f15fe90f2253,71ffc45f-584a-472c-808d-7715c8d94929
2410,2410,Joe Laaria Nkaari,approved,a5798ea5-999a-42ef-9ce7-c6967bf91d0c,"{'type': 'Polygon', 'coordinates': [[[37.86342...",2023-07-28,2023-08-04,tree-planting,agroforest,partial,4.0,0.614006,"[{'indicatorSlug': 'restorationByStrategy', 'y...",[],"[{'dueAt': '2024-01-31T00:00:00.000Z', 'submit...",bb056c2b-7bd2-4ba4-96bc-3a8c24a789c9,71ffc45f-584a-472c-808d-7715c8d94929
2411,2411,Godfrey Kaigera Manyara,approved,a5798ea5-999a-42ef-9ce7-c6967bf91d0c,"{'type': 'Polygon', 'coordinates': [[[37.84986...",2023-10-12,2023-10-19,tree-planting,agroforest,partial,1.0,0.203293,"[{'indicatorSlug': 'restorationByStrategy', 'y...",[],"[{'dueAt': '2024-01-31T00:00:00.000Z', 'submit...",b6ff77f7-3fc6-495c-9842-957d68d42f87,71ffc45f-584a-472c-808d-7715c8d94929
2412,2412,Margaret Kiriga,approved,a5798ea5-999a-42ef-9ce7-c6967bf91d0c,"{'type': 'Polygon', 'coordinates': [[[37.85049...",2023-06-19,2023-06-26,tree-planting,agroforest,partial,2.0,0.387531,"[{'indicatorSlug': 'restorationByStrategy', 'y...",[],"[{'dueAt': '2024-01-31T00:00:00.000Z', 'submit...",6440ac2f-6ff6-431e-954d-e4d8ff57e81b,71ffc45f-584a-472c-808d-7715c8d94929


In [4]:
df = pd.read_csv(maxar_md)
df.project_id.value_counts()

20245af8-de10-46d8-8895-1bfa022932ce    4353
a8940698-ff28-456c-b8cd-f7289e612913    2392
5b02c3a0-0f64-4506-8cc5-719dfa8c1641    1158
562fa859-5124-49a5-947f-e1ddb7680e07     866
802bb88c-5eb5-4ce2-836f-19bc8e0ddfc4     845
71ffc45f-584a-472c-808d-7715c8d94929     606
aa0f8df7-b668-48f0-a8ce-bf5558f2a9d6     554
4124a403-45ca-4e48-be6f-6c15b3b48617     507
8cbda7fc-aa17-444b-8425-7047fade708c     416
8a112e82-e191-44ad-b306-2578c064104b     228
30812e57-54e1-4bde-af22-8856baee51d9     215
01918b25-6544-4027-82aa-6703c7c04784     177
96c86eae-d4f9-45d8-9780-69c55a9e36e9     148
bad12444-7180-4b29-a14c-d2b4305b7f52      84
6b2dc32e-4414-48e0-9472-ebb49a766c28      61
76374ef9-c5df-4d62-a28b-92b100b95581      58
744d9613-02a4-4484-be15-0d6a401f0086      57
71706fe4-0e80-4552-8be2-d6f6a55e13d2      41
bf838a8a-ab07-44c1-8f63-3d929782414a      25
576c1769-ed59-406d-ae6f-d70b136d028c      23
05c05359-98f9-4bb3-a7fa-236051940aa6      14
9bdf80ad-4c2d-4292-a21b-95b819f9c4be      11
e4a9f60c-e

In [5]:
df.poly_id.value_counts()

04f2821e-54f3-4a5a-ab57-2eea5c927df1    63
bb064fa6-c72c-4768-a1cb-4bc55a0c32f8    61
e15e410f-7261-43fe-a76d-b2d409e22fe6    53
65c7e59f-efa0-4edb-b874-1d8cad6cb620    35
d7f7c824-759f-4743-a6ec-0e5b64e24c47    30
                                        ..
33c5eee5-43d3-4caa-ac21-fcda02b43cb2     1
eb195285-4113-4969-9fca-505bdd09dacb     1
0ab16e88-31c8-496f-8525-20f5a02a8ec9     1
4dc385b5-d63f-4ce0-a7bf-53af59f626e7     1
346df5b7-ca4b-42d1-b5d1-3a60a1458cce     1
Name: poly_id, Length: 2249, dtype: int64

In [8]:
df[df.poly_id == '04f2821e-54f3-4a5a-ab57-2eea5c927df1']

Unnamed: 0,gsd,title,datetime,eo:bands,platform,utc_hour,local_hour,instruments,associations,view:azimuth,...,spacecraft_to_target_azimuth_start,target_to_spacecraft_azimuth_start,target_to_spacecraft_elevation_avg,target_to_spacecraft_elevation_end,target_to_spacecraft_elevation_max,target_to_spacecraft_elevation_min,target_to_spacecraft_elevation_start,project_id,project_na,poly_id
11821,0.579927,Maxar WV02 Image 10300101042B9C00,2024-09-04T11:30:20.772374Z,"[{'name': 'pan', 'center_wavelength': 625}, {'...",worldview-02,11,10,['VNIR'],[],43.821103,...,41.835365,222.084341,58.873222,59.265828,59.265828,58.347257,58.347257,01918b25-6544-4027-82aa-6703c7c04784,York,04f2821e-54f3-4a5a-ab57-2eea5c927df1
11822,0.609240,Maxar WV02 Image 10300100FE4F4D00,2024-07-11T11:11:46.470294Z,"[{'name': 'pan', 'center_wavelength': 625}, {'...",worldview-02,11,10,['VNIR'],[],331.089796,...,332.600430,152.365618,55.445704,57.915515,57.915515,52.838052,52.838052,01918b25-6544-4027-82aa-6703c7c04784,York,04f2821e-54f3-4a5a-ab57-2eea5c927df1
11823,0.531817,Maxar WV02 Image 10300100FCC35000,2024-07-11T11:10:38.320767Z,"[{'name': 'pan', 'center_wavelength': 625}, {'...",worldview-02,11,10,['VNIR'],[],263.718574,...,269.446170,89.082485,64.693352,65.207385,65.207385,64.002416,64.002416,01918b25-6544-4027-82aa-6703c7c04784,York,04f2821e-54f3-4a5a-ab57-2eea5c927df1
11824,0.384379,Maxar WV03 Image 104001009352E100,2024-04-25T11:34:53.895735Z,"[{'name': 'pan', 'center_wavelength': 625}, {'...",worldview-03,11,10,['VNIR'],[],92.292068,...,103.876564,284.213911,58.848286,58.464375,59.080009,58.464375,58.821167,01918b25-6544-4027-82aa-6703c7c04784,York,04f2821e-54f3-4a5a-ab57-2eea5c927df1
11825,0.383724,Maxar WV03 Image 1040010094173400,2024-03-24T11:34:40.527922Z,"[{'name': 'pan', 'center_wavelength': 625}, {'...",worldview-03,11,10,['VNIR'],[],78.498741,...,81.602780,261.915384,59.056205,58.789767,59.465231,58.789767,59.465231,01918b25-6544-4027-82aa-6703c7c04784,York,04f2821e-54f3-4a5a-ab57-2eea5c927df1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11879,0.586244,Maxar WV02 Image 10300100C38FB800,2021-08-10T11:37:04.190897Z,"[{'name': 'pan', 'center_wavelength': 625}, {'...",worldview-02,11,10,['VNIR'],[],146.360908,...,145.957630,325.957626,57.783214,57.284030,58.389031,57.284030,58.389031,01918b25-6544-4027-82aa-6703c7c04784,York,04f2821e-54f3-4a5a-ab57-2eea5c927df1
11880,0.545624,Maxar WV02 Image 10300100C3702D00,2021-08-05T11:23:00.361831Z,"[{'name': 'pan', 'center_wavelength': 625}, {'...",worldview-02,11,10,['VNIR'],[],352.022132,...,351.808600,171.808594,62.865279,63.330505,63.330505,62.229368,62.229368,01918b25-6544-4027-82aa-6703c7c04784,York,04f2821e-54f3-4a5a-ab57-2eea5c927df1
11881,0.386494,Maxar WV03 Image 1040010069707300,2021-07-30T11:35:14.657966Z,"[{'name': 'pan', 'center_wavelength': 625}, {'...",worldview-03,11,10,['VNIR'],[],48.530989,...,47.838017,227.838017,58.733570,58.632347,58.858125,58.632347,58.858125,01918b25-6544-4027-82aa-6703c7c04784,York,04f2821e-54f3-4a5a-ab57-2eea5c927df1
11882,0.542029,Maxar WV02 Image 10300100C2007900,2021-07-25T11:26:32.845260Z,"[{'name': 'pan', 'center_wavelength': 625}, {'...",worldview-02,11,10,['VNIR'],[],188.733312,...,189.411560,9.411560,63.028658,62.605910,63.603148,62.605910,63.603148,01918b25-6544-4027-82aa-6703c7c04784,York,04f2821e-54f3-4a5a-ab57-2eea5c927df1
