In [2]:
import geopandas as gpd
import pandas as pd
import sys
import json
import numpy as np
import rasterio as rs
from rasterio.mask import mask
from rasterio.warp import calculate_default_transform, reproject, Resampling
#from rasterstats import zonal_stats

sys.path.append('../../src/')
from evaluation import error_adjustments as err

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [6]:
## background has higher recall, area is underrepresented
## monoculture has higher recall, area is underrepresented
## agroforestry has higher precision, area is overrepresented
## natural has higher precision, area is overrepresented

In [3]:
def reproject_to_meters(input_raster, output_raster, target_crs='EPSG:3857'):
    
    with rs.open(input_raster) as src:
        transform, width, height = calculate_default_transform(
            src.crs, target_crs, src.width, src.height, *src.bounds
        )
        kwargs = src.meta.copy()
        kwargs.update({
            'crs': target_crs,
            'transform': transform,
            'width': width,
            'height': height
        })

        with rs.open(output_raster, 'w', **kwargs) as dst:
            for i in range(1, src.count + 1):
                reproject(
                    source=rs.band(src, i),
                    destination=rs.band(dst, i),
                    src_transform=src.transform,
                    src_crs=src.crs,
                    dst_transform=transform,
                    dst_crs=target_crs,
                    resampling=Resampling.nearest
                )

In [4]:
def calculate_adjusted_area(repr_raster, prj_districts, error_dict, outfile):
    '''
    Requires raster that has been reprj to meter CRS, a shapefile
    for the 26 priority districts, and a dictionary containing error
    statistics for each land use class
    Calculates area assessment in ha for each land use class in each
    district and adjusts assessments based on model error
    '''
    
    zonal_stats = []
    
    with rs.open(repr_raster) as src:
        for _, district in prj_districts.iterrows():
            out_image, out_transform = mask(src, [district.geometry], crop=True)
            district_mask = out_image[0] 
            unique, counts = np.unique(district_mask, return_counts=True)
            land_use_stats = dict(zip(unique, counts))

            # Calculate pixel size to convert to hectares
            pixel_width = src.transform[0]  # X resolution
            pixel_height = -src.transform[4]  # Y resolution
            px_size = pixel_width * pixel_height
            land_use_stats = {k: v * (px_size / 10000) for k, v in land_use_stats.items()}
            
            # Add district name
            land_use_stats['district'] = district.ADM2_EN
            zonal_stats.append(land_use_stats)

    df = pd.DataFrame(zonal_stats)
    df = df.round(2).rename(columns={
        0: "No vegetation",
        1: "Monoculture",
        2: "Agroforestry",
        3: "Natural",
        255: "No data"
    })
    
    # Adjust for error
    for land_use_class, stats in error_dict.items():
        if isinstance(stats, dict) and 'adj' in stats:
            if land_use_class in df.columns:
                df[land_use_class] = (df[land_use_class] * stats['adj']).round()

    df.to_csv(outfile, index=False)
    return df

In [5]:
prj_districts = gpd.read_file('../../data/epa_districts/Project_Districts.shp')
input_raster = '/Users/jessica.ertel/github/plantation_classifier/tmp/ghana/preds/mosaic/final_2024-10-29.tif'
repj_raster = '/Users/jessica.ertel/github/plantation_classifier/tmp/ghana/preds/mosaic/reprojected_2024-10-29.tif'

# reproject inputs from degrees to meters for area assessment calcs
dst_crs = 'EPSG:3857'
reproject_to_meters(input_raster, repj_raster, dst_crs)
prj_districts = prj_districts.to_crs(dst_crs)

with open("../../data/validation/ci_error_adjustment.json", "r") as f:
    error = json.load(f)

area_assess = calculate_adjusted_area(repj_raster, prj_districts, error, '../../data/adj_area_assessment_042125.csv')

In [8]:
area_assess

Unnamed: 0,No vegetation,Monoculture,Agroforestry,Natural,No data,district
0,7614.0,3299.0,40729.0,13189.0,57926.96,Adansi South
1,5893.0,2016.0,73866.0,11732.0,135822.38,Asante Akim South
2,4294.0,2245.0,50564.0,2351.0,51943.0,Assin North
3,14647.0,15769.0,120730.0,12695.0,168338.32,Atwima Mponua
4,117593.0,16.0,12590.0,1889.0,90072.34,Bawku West
5,4521.0,2287.0,31590.0,9469.0,98417.49,Bosome Freho
6,127505.0,3.0,19542.0,2944.0,195523.07,Builsa South
7,145989.0,1.0,13536.0,15317.0,142670.7,Daffiama Bussie Issa
8,1757.0,1691.0,9392.0,2489.0,40548.41,Juaben Municipal
9,87405.0,39.0,15269.0,2006.0,146231.46,Kasena Nankana West


In [9]:
# def area_assessment_figure(input_f, output_f):
#     '''
#     takes in a csv of area assessment calculations
#     and creates the columns and structure for the 
#     publication table
#     '''
#     north = gpd.read_file('../../data/shapefiles/pd_north.shp')
#     east = gpd.read_file('../../data/shapefiles/pd_east.shp')
#     west = gpd.read_file('../../data/shapefiles/pd_west.shp')
#     df = pd.read_csv(input_f)
#     district_region = {}
#     district_region['north'] = list(north.ADM2_EN)
#     district_region['east'] = list(east.ADM2_EN)
#     district_region['west'] = list(west.ADM2_EN)
#     district_region = {district: region for region, districts in district_region.items() for district in districts}
#     df['region'] = df['district'].map(district_region)
#     df = df.sort_values(by='region')
#     df_pubfigure2 = df[['region', 'district', 'Monoculture', 'Agroforestry', 'Natural']]
#     df_pubfigure2.to_csv(output_f, index=False)
#     return df_pubfigure2

In [15]:
def area_assessment_figure(input_f, output_f, include_summary_row=True):
    '''
    Takes in a CSV of area assessment calculations and creates
    the structured table for publication. Rounds values to nearest 0.1,
    and includes or prints total area per region.
    
    Parameters:
    - input_f (str): Path to input CSV
    - output_f (str): Path to output CSV
    - include_summary_row (bool): Whether to append summary row for each region
    '''
    north = gpd.read_file('../../data/shapefiles/pd_north.shp')
    east = gpd.read_file('../../data/shapefiles/pd_east.shp')
    west = gpd.read_file('../../data/shapefiles/pd_west.shp')
    df = pd.read_csv(input_f)

    # Map districts to regions
    district_region = {
        district: 'north' for district in north.ADM2_EN
    }
    district_region.update({
        district: 'east' for district in east.ADM2_EN
    })
    district_region.update({
        district: 'west' for district in west.ADM2_EN
    })
    df['region'] = df['district'].map(district_region)

    # Sort and round
    df = df.sort_values(by='region')
    cols_to_round = ['Monoculture', 'Agroforestry', 'Natural']
    df[cols_to_round] = df[cols_to_round].round(-1)

    # Create publication table
    df_pubfigure2 = df[['region', 'district'] + cols_to_round]

    # Compute regional totals
    region_totals = df.groupby('region')[cols_to_round].sum().round(-1)

    if include_summary_row:
        # Append summary rows
        summary_rows = []
        for region, row in region_totals.iterrows():
            summary_rows.append({
                'region': region.upper(),
                'district': 'TOTAL',
                'Monoculture': row['Monoculture'],
                'Agroforestry': row['Agroforestry'],
                'Natural': row['Natural']
            })
        df_pubfigure2 = pd.concat([df_pubfigure2, pd.DataFrame(summary_rows)], ignore_index=True)
    df_pubfigure2.to_csv(output_f, index=False)
    return df_pubfigure2


In [16]:
area_assessment_figure('../../data/adj_area_assessment_042125.csv',
                       '../../data/adj_area_assessment_pubfigure_042125.csv')

Unnamed: 0,region,district,Monoculture,Agroforestry,Natural
0,east,Adansi South,3300.0,40730.0,13190.0
1,east,Twifo Atti-Morkwa,12990.0,47810.0,20910.0
2,east,Sene West,200.0,41390.0,107820.0
3,east,Sekyere Afram Plains North,450.0,148880.0,75910.0
4,east,Kwahu West,480.0,24940.0,3620.0
5,east,Kwahu South,340.0,23590.0,12880.0
6,east,Kwahu Afram Plains South,210.0,55540.0,47700.0
7,east,Kwahu Afram Plains North,230.0,18400.0,15030.0
8,east,Kwahu East,360.0,22710.0,7610.0
9,east,Asante Akim South,2020.0,73870.0,11730.0


In [11]:
area_assessment_figure('../../data/adj_area_assessment.csv',
                       '../../data/adj_area_assessment_pubfigure.csv')

Unnamed: 0,region,district,Monoculture,Agroforestry,Natural
0,east,Adansi South,3395.0,40865.0,13193.0
22,east,Twifo Atti-Morkwa,13374.0,47970.0,20915.0
18,east,Sene West,208.0,41531.0,107855.0
17,east,Sekyere Afram Plains North,461.0,149378.0,75938.0
14,east,Kwahu West,494.0,25021.0,3625.0
13,east,Kwahu South,347.0,23673.0,12885.0
11,east,Kwahu Afram Plains South,213.0,55729.0,47719.0
10,east,Kwahu Afram Plains North,236.0,18462.0,15034.0
12,east,Kwahu East,371.0,22787.0,7612.0
1,east,Asante Akim South,2075.0,74112.0,11736.0


## Other // Not using

In [67]:
#calculate total area of raster (just for reference)
src = rs.open(repj_raster)
data = src.read(1)
valid_px = np.sum(data != src.nodata)
pixel_width = src.transform[0]  # X resolution
pixel_height = -src.transform[4]  # Y resolution
px_size = pixel_width * pixel_height
total_area_in_m2 = valid_px * (px_size)
total_area_in_ha = total_area_in_m2 / 10000

# sum all area assessments
# drops nodata column
district_total_area = df.iloc[:, :-2].sum().sum()

round(total_area_in_ha), round(district_total_area)

(5247876, 5247333)

In [70]:
df['ag_area'] = df.agroforestry + df.monoculture
df['nat_area'] = df.natural
df_pubfigure = df[['district', 'ag_area', 'nat_area']]
df_pubfigure

Unnamed: 0,district,ag_area,nat_area
0,Adansi South,54855.55,16628.07
1,Asante Akim South,98196.58,14791.46
2,Assin North,67500.35,2964.08
3,Atwima Mponua,164552.6,16005.69
4,Bawku West,16630.87,2381.25
5,Bosome Freho,42459.31,11938.37
6,Builsa South,25806.98,3711.35
7,Daffiama Bussie Issa,17875.03,19311.11
8,Juaben Municipal,12951.7,3137.66
9,Kasena Nankana West,20175.93,2528.68
