# Estimate Maxar Image Costs from Best Image Per Polygon CSV

In [None]:
import pandas as pd
import numpy as np
from datetime import datetime
import math

In [None]:
# Naming conventions
run_name = 'ppc_2025_tree_count_elig_round3_ciclos_2024'
run_dir = 'ppc_tree_count_elig'
analysis = 'baseline'

# Today's date
today = datetime.today().strftime('%Y-%m-%d')

# Best image per polygon file
imgs_path = f"../data/{run_dir}/results/{analysis}/polygon_imagery_coverage_{run_name}_{analysis}_{today}.csv"

In [None]:
# Load csv of best image per polygon
df = pd.read_csv(imgs_path)

In [None]:
# Aggregate the number of polygons and the total polygon area with image coverage per project_id and best image
aggregated_df = df.groupby(['project_id', 'best_image'], as_index=False).agg(
    num_polys=('poly_id', 'count'),
    total_overlap_ha=('overlap_area_ha', 'sum')
)

# Add the estimated order area based on Maxar buffering rules
base = np.maximum(aggregated_df['total_overlap_ha'], 100)                                   # Buffer to minimum 100 ha
connect = np.where(aggregated_df['num_polys'] > 1, aggregated_df['num_polys'] * 10, 0)      # Add an additional 10 ha per polygon in the image to account for connecting linestrings 
aggregated_df['order_area'] = (np.ceil((base + connect) / 100.0) * 100).astype(int)         # Round up to the next hundred hectares

# Add the cost based on the estimated order area
aggregated_df['cost'] = aggregated_df['order_area'] * 0.225

In [None]:
aggregated_df

In [None]:
# Roll up to project-level totals
project_totals = aggregated_df.groupby('project_id', as_index=False).agg(
    total_order_area_ha=('order_area', 'sum'),
    total_cost=('cost', 'sum')
)

project_totals

In [None]:
# Overall totals across all projects
overall_total_area = aggregated_df['order_area'].sum()
total_cost = aggregated_df['cost'].sum()

print(f"Total order area: {total_order_area} ha")
print(f"Total cost: ${total_cost}")