In [None]:
import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
import re
import os
import math
import requests
import yaml
import json
import sys
sys.path.append('../src/')
import image_availability as img
import process_api_results as clean
import decision_trees as tree
import tm_api_utils as api_request

%load_ext autoreload
%autoreload 2

# Decision Tree
The decision tree workflow takes as input a list of TerraFund project ids and returns as output a csv assigning a verification method to each polygon within a project. It uses APIs to pull data from two applications. [TM API documentation](https://api-staging.terramatch.org/research-service/documentation/api#/default/ ) for full set of search parameters:
     
**Workflow**
1. Pull info on project characteristics for entire portfolio using TM API
   - repo/notebook: `terrafund-portfolio-analyses/tm-api.ipynb`
   - input: list of project ids
   - output: csv of all project features
3. Using TM API csv, pull Maxar metadata
    - repo/notebook: `maxar-tools/decision-tree-metadata.ipynb` and `maxar-tools/src/decision_tree.py`
    - input: csv of project features
    - output: csv of maxar metadata
    - **bottleneck:** add functionality to track dropped projects
5. Create imagery features using `image_availability.py` 
    - repo/notebook: `terrafund-portfolio-analyses/decision-tree.ipynb`
    - input: csv maxar metadata and csv of project features
    - output: csv of project features & img count
6. Run decision tree
    - input: master csv
    - output: decisions csv

**bottlenecks:**
- staging environment subject to changes which breaks the request & cleaning functions
- inconsistent syntax for practice and target sys
- missing projects
- missing ttc stats

# PARAMS

In [None]:
tm_auth_path = '../secrets.yaml'
tm_staging_url = "https://api-staging.terramatch.org/research/v3/sitePolygons?"       # use for testing queries
tm_prod_url = "https://api.terramatch.org/research/v3/sitePolygons?"                  # Use to pull data for analysis
approved_projects = '../projects_all_approved_202501091214.csv'
#maxar_md = "../data/imagery_availability/comb_img_availability_2025-02-19.csv"
maxar_md = "/home/darby/github_repos/maxar-tools/data/metadata/final/comb_img_availability_2025-02-24.csv"
#feats = '../data/tm_api_021925.csv'
feats = '../data/tm_api_TEST.csv'                           
#maxar_feats = '/Users/jessica.ertel/github/maxar-tools/data/tm_api_021925.csv'
maxar_feats = '/home/darby/github_repos/maxar-tools/data/tm_api_TEST.csv'

### Define tree thresholds ###
canopy_threshold = 40                                         # threshold for identifying open vs closed canopy projects
cloud_thresh = 50                                             # threshold for identifying image quality
img_count = 1                                                 # threshold for identifying image availability
baseline_range = (-365, 0)                                    # baseline window (1 year before plant start date)
ev_range = (730, 1095)                                        # EV window (2-3 years after plant start date)

## Gather Projects & Attributes
Uses the TerraMatch API to download project features for a provided set of project ids.

In [None]:
with open(tm_auth_path) as auth_file:
    auth = yaml.safe_load(auth_file)
headers = {
    'Authorization': f"Bearer {auth['access_token']}"
    }

In [None]:
full = pd.read_csv(approved_projects)
full = full[(full.framework_key == 'terrafund-landscapes') | (full.framework_key == 'terrafund')]
display(full.framework_key.value_counts())

ids = list(set(full.project_id))
print(len(ids))

In [None]:
project_results = api_request.pull_tm_api_data(tm_prod_url, headers, ids, outfile="../data/tm_api_response_prod_TEST.json")

## Clean Attributes
Performs a series of cleaning steps to correctly format the API output.

In [None]:
# with open("../data/tm_api_response_prod.json", "r") as file:
#     project_results = json.load(file)

with open("../data/tm_api_response_prod_TEST.json", "r") as file:
    project_results = json.load(file)

In [None]:
clean_api = clean.process_tm_api_results(project_results, 
                                       outfile1=feats, 
                                       outfile2=maxar_feats) # save to maxar-tools repo

In [None]:
clean_api.info()

In [None]:
clean_api.project_id.nunique()

In [None]:
clean_api.target_sys.value_counts()

In [None]:
clean_api.practice.value_counts()

In [None]:
# checking missing ttc
with open('../data/terrafund_projects_tiles.json', 'r') as file:
    missing_ttc_json = json.load(file)

missing_ids = list(missing_ttc_json.keys())
len(missing_ids)

## Gather image metadata
This step is performed in the [maxar-tools repo](https://github.com/wri/maxar-tools/tree/jessica_meta).

## Create image features

In [None]:
## Image features
proj_df = pd.read_csv(feats)
img_df = pd.read_csv(maxar_md)

main = img.analyze_image_availability(proj_df, 
                               img_df, 
                               baseline_range, 
                               ev_range, 
                               cloud_thresh)

In [None]:
main.info()

In [None]:
# image count on left, poly count on right
main.baseline_img_count.value_counts().sort_index(ascending=True)