In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import sys
sys.path.append('../src/')
import visualizations as vis
import brownbag as b

%load_ext autoreload
%autoreload 2

In [None]:
# projects we've already done data production for -- 11 remote, 6 field
remote_done = [
'CAM_22_FAP',
'LBR_22_SCNL',
'MDG_22_DWCT',
'MLW_22_WFZ',
'RWA_22_ARCOS',
'RWA_22_ICRAF',
'RWA_22_BIRDLIFE',
'SLE_22_YARDO',
'TAZ_22_SJT',
'TGO_22_MAPTO',
'TGO_22_PERJAT',
]#11

field_done = [
'GHA_22_INEC',
'GHA_22_PADO',
'KEN_22_GREENPOT',
'GHA_22_ECOCARE',
'GHA_22_GGV',
'KEN_22_DNRC',
] #6

mangrove_done = [
'GHA_22_FOTE',
'MOZ_22_ISTITUTO',
'TAZ_22_FT',
'GHA_22_HMPOANO',
] #4

review_req = [
    "TGO_PADES",
    "SLE_22_EFA",
    "NIG_22_ARAMD",
    "KEN_22_FT",
    "GIN_22_AMSPM",
    "DRC_22_PWP",
    "BUR_22_PWP",
    "MLI_22_SAHELECO"
]

remote_upcoming = ['CAR_22_EEDD', 
                   'KEN_22_DREK', 
                   'RWA_22_SE', 
                   'UGA_22_SWAGEN', 
                   'UGA_22_PANR', 
                   'NIG_22_NCF', 
                   'ETH_22_WVE', 
                   'ETH_22_SUNARMA', 
                   'RWA_22_RECOR', 
                   'CIV_22_SOSFORETS', 
                   'KEN_22_KENVO', 
                   'NIG_22_IITA', 
                   'TGO_22_ADHD'] #13


## Notes on Findings

Experiments
- shifting the canopy cover threshold from 40% to 60% had the impact of moving 19 polygons from weak to strong remote, but insignificant in shifting proportion allocated to field.

Other updates / findings
- 13 projects contain multiple practices in a single polygon. 4 projects updated where a single row contains a multi-practice entry. Some projects are all multi-practice, so no clear solution.
- EV will use baseline canopy if 2 yr mark has passed (TTC not yet run for EV).
  
In what scenarios will the decision tree be unable to provide a result?
- If more than one practice is being used (i.e. `assisted-natural-regeneration,tree-planting`)
- 228 are flagged with `review required`. This is due to having multiple `practice` values assigned to a single polygon (159 polygons) or missing canopy cover at baseline (69 polygons).
  

In [None]:
df1 = pd.read_csv("../data/results/dtree_output_c1_07-14-2025_exp1.csv")
df2 = pd.read_csv("../data/results/dtree_output_c1_07-14-2025_exp2.csv")

In [None]:
vis.portfolio_breakdown(df1, title="Cohort 1 Decisions")

In [None]:
vis.NEW_portfolio_breakdown(df)

In [None]:
done = set(remote_done) | set(field_done) | set(mangrove_done)
c1_remain = df[~df['project_name'].isin(done)]
print(len(done), len(c1_remain.project_name.unique()))

In [None]:
# Remaining field verification requirements for Cohort 1
remove = set(remote_done) | set(field_done) | set(mangrove_done) | set(remote_upcoming)
c1_field = df[~df['project_name'].isin(remove)]

In [None]:
len(c1_field.project_name.unique())

## Remote Upcoming
- For the polygons that are not strong remote, does anything indicate we need to adjust the criteria?

In [None]:
rm = df[df['project_name'].isin(remote_upcoming)]
vis.plot_decision_proportions(rm, sort_by='strong remote', title = '13 Qualified for Remote Verification')

## Compare baseline & EV decisions

In [None]:
df4 = pd.read_csv("../data/results/dtree_output_c1_07-14-2025_exp4.csv")

review = set(mangrove_done) | set(review_req)
done = set(remote_done) | set(field_done) | set(mangrove_done) | set(review_req)
field = set(field_done)
remote = set(remote_done)
c1_field = df4[df4['project_name'].isin(field)]
c1_remote = df4[df4['project_name'].isin(remote)]
c1_remaining = df4[~df4['project_name'].isin(done)]
c1_review = df4[df4['project_name'].isin(review)]

In [None]:
print(len(c1_remaining.project_name.unique()))

In [None]:
vis.plot_decision_proportions(
    c1_field,
    sort_by="strong field",      
    group_height=0.8,
    figsize=(24, len(df4.project_id.unique()) * 0.1),
    title="COMPLETED - FIELD",
)

In [None]:
vis.plot_decision_proportions(
    c1_remote,
    sort_by=["strong remote"],      
    group_height=0.8,
    figsize=(24, len(df4.project_id.unique()) * 0.15),
    title="COMPLETED - REMOTE",
)

In [None]:
vis.plot_decision_proportions(
    c1_remaining,
    sort_by=["strong remote", "weak remote"],      
    group_height=0.8,
    figsize=(24, len(df4.project_id.unique()) * 0.4),
    title="REMAINING",
    threshold=0.8,
)

# Check results

In [None]:
## resolve practice

df1.practice.value_counts()

In [None]:
multi_practice_df = df1[df1['practice'].str.contains(',')]
multi_practice_df.decision.value_counts()

In [None]:
multi_practice_df.project_name.value_counts()

In [None]:
len(multi_practice_df.project_name.unique())

In [None]:
prj = df1[df1.project_name == 'DRC_22_PWP']
prj
#prj.to_csv('../data/surudev.csv')

In [None]:
df.target_sys.value_counts()

In [None]:
df.decision.value_counts()

In [None]:
len(df.project_id.unique()), len(df.project_name.unique())

In [None]:
df.head()

In [None]:
review = df[df.decision == 'review required']

In [None]:
#review[review['baseline_canopy'].isna()]

In [None]:
review.baseline_canopy.value_counts(dropna=False)

In [None]:
review_open = review[review.baseline_canopy == 'open']
review_closed = review[review.baseline_canopy == 'closed']

In [None]:
review_open.practice.value_counts()

In [None]:
review_closed

In [None]:
review_closed.slope.value_counts(dropna=False)

In [None]:
review_closed.info()

In [None]:
df.slope.value_counts(dropna=False)

In [None]:
review_tree = review[(review.practice == 'tree-planting')]

In [None]:
review_tree.baseline_canopy.value_counts(dropna=False)


In [None]:
review_tree.shape

In [None]:
review_tree

In [None]:
review_seeding = review[(review.practice == 'direct-seeding')]

In [None]:
review_seeding.baseline_canopy.value_counts(dropna=False)

In [None]:
review.baseline_canopy.value_counts(dropna=False)

## Brownbag

In [None]:
df4 = pd.read_csv("../data/results/dtree_output_c1_07-14-2025_exp4.csv")

review = set(mangrove_done) | set(review_req)
done = set(remote_done) | set(field_done) | set(mangrove_done) | set(review_req)
field = set(field_done)
remote = set(remote_done)

c1_field = df4[df4['project_name'].isin(field)]
c1_remote = df4[df4['project_name'].isin(remote)]
c1_remaining = df4[~df4['project_name'].isin(done)]
c1_review = df4[df4['project_name'].isin(review)]

In [None]:
b.plot_risk_map(c1_field, (10,10));

In [None]:
# vis.plot_decision_proportions_faceted(c1_remaining, 
#                                       ncols=4, 
#                                       figsize=(44, 40), 
#                                       title="Decision proportions by project", 
#                                       annotate=True,   # set True to show labels
#                                       label_min_prop=0.25)

In [None]:
# balanced
# vis.plot_decision_proportions_faceted(c1_remaining, ncols=6, figsize=(19, 14), title="Decision proportions by project",
#     annotate=True
# )