# Tree Cover Statistics: Data Prep & Analysis Pipeline

In [3]:
import os
import rasterio as rs
from rasterio.mask import mask
from rasterio.merge import merge
from rasterio.plot import show, show_hist, adjust_band
from rasterio.enums import Resampling
from rasterio import Affine, MemoryFile
from rasterio.windows import Window

import numpy as np 
import numpy.ma as ma 
import seaborn as sns
import matplotlib as plt
import pyproj
import geopandas as gpd 
import shapely
from shapely.geometry.polygon import Polygon
from shapely.geometry.multipolygon import MultiPolygon
import pandas as pd
import fiona
from contextlib import contextmanager  
from skimage.transform import resize
import math
import urllib.request
import osgeo
from osgeo import gdal
from osgeo import gdalconst
import glob
from copy import copy

%matplotlib inline

In [40]:
rs.__version__

'1.1.5'

In [41]:
gpd.__version__

'0.9.0'

In [37]:
plt.__version__

'3.2.1'

# Data Preparation

## Shapefile to Geojson

In [2]:
def shp_to_gjson(country):
    shapefile = glob.glob(f'{country}/shapefile/*.shp')
    new_shp = gpd.read_file(shapefile[0])
    new_shp.to_file(f'{country}/{country}_adminboundaries.geojson', driver='GeoJSON')
    print(f'There are {len(new_shp)} admins in {country}.')
    assert new_shp.crs == 'epsg:4326'
    assert new_shp.NAME_1.duplicated().sum() == 0
    return None

## Create Hansen Raster

In [22]:
def create_hansen_tif(country):
    '''
    Identifies the latitude and longitude coordinates for a country 
    to download Hansen tree cover and tree cover loss tif files. 
    Returns combined tifs as one file in the country's folder.
    
    Attributes
    ----------
    country : str
        a string indicating the country files to import
    
    '''
    gdal.UseExceptions()
    shapefile = gpd.read_file(f'{country}/{country}_adminboundaries.geojson')
    
    # identify min/max bounds for the country
    bounds = shapefile.geometry.bounds
    min_x = bounds.minx.min() 
    min_y = bounds.miny.min()
    max_x = bounds.maxx.max()
    max_y = bounds.maxy.max()

    # identify the lowest and highest 10 lat/lon increments for the country
    # index on top left corner
    lower_x = math.floor(min_x / 10) * 10 
    lower_y = math.ceil(min_y / 10) * 10 
    upper_x = math.ceil(max_x / 10) * 10 
    upper_y = math.ceil(max_y / 10) * 10
    #print(f'Rounded coords for {country}: ({lower_x}, {lower_y}, {upper_x}, {upper_y})')
    
    lon = 'N' if lower_y >= 0 else 'S'
    lat = 'E' if lower_x >= 0 else 'W'
    print(f'{country} has lon {lon} and lat {lat}.')

    # create a list of tif file names for the country
    tree_cover_files = []
    loss_files = []
    
    print('Downloading files from GLAD...')
    
    for x_grid in range(lower_x, upper_x, 10):
        for y_grid in range(lower_y, upper_y + 10, 10):
            
#             lon = 'N' if y_grid == 0 # to pull the correct Hansen files this must be set to N
#             lat = 'E' if
#             print(y_grid, x_grid)
 
            # download tree cover and loss files from UMD
            cover_url =  f'https://glad.umd.edu/Potapov/TCC_2010/treecover2010_' \
                         f'{str(y_grid).zfill(2)}{lon}_{str(np.absolute(x_grid)).zfill(3)}{lat}.tif'
            cover_dest = f'/Users/jessicaertel/wri/restoration-mapper/notebooks/analysis/hansen_treecover2010/'\
                         f'{str(y_grid).zfill(2)}{lon}_{str(np.absolute(x_grid)).zfill(3)}{lat}.tif'
            urllib.request.urlretrieve(cover_url, cover_dest)
            
            loss_url =  f'https://storage.googleapis.com/earthenginepartners-hansen/GFC-2020-v1.8/Hansen_GFC-2020-v1.8_lossyear_' \
                         f'{str(y_grid).zfill(2)}{lon}_{str(np.absolute(x_grid)).zfill(3)}{lat}.tif'
            loss_dest = f'/Users/jessicaertel/wri/restoration-mapper/notebooks/analysis/hansen_lossyear2020/'\
                         f'{str(y_grid).zfill(2)}{lon}_{str(np.absolute(x_grid)).zfill(3)}{lat}.tif'
            urllib.request.urlretrieve(loss_url, loss_dest)
            
            if not os.path.exists(cover_dest) or not os.path.exists(loss_dest):
                print(f'Files were not downloaded.')
                
            tree_cover_files.append(cover_dest)
            loss_files.append(loss_dest)
    
    # remove duplicate file names
    tree_tifs = [x for x in tree_cover_files if os.path.exists(x)] 
    loss_tifs = [x for x in loss_files if os.path.exists(x)]
    
    # convert tree cover and loss tifs into a virtual raster tile  
    gdal.BuildVRT(f'{country}/{country}_hansen_treecover2010.vrt', tree_tifs)
    gdal.BuildVRT(f'{country}/{country}_hansen_loss2020.vrt', loss_tifs)

    # open vrts and convert to a single .tif
    # FLAG -- adding tfw=yes increased file size significantly
    translateoptions = gdal.TranslateOptions(format='Gtiff', 
                                              outputSRS='EPSG:4326',
                                              outputType=gdal.GDT_Byte,
                                              noData=255,
                                              creationOptions=['COMPRESS=LZW'],
                                              resampleAlg='nearest')
 
    source = gdal.Open(f'{country}/{country}_hansen_treecover2010.vrt', )
    ds = gdal.Translate(f'{country}/{country}_hansen_treecover2010.tif', source, options=translateoptions)
    os.remove(f'{country}/{country}_hansen_treecover2010.vrt')
                      
    source = gdal.Open(f'{country}/{country}_hansen_loss2020.vrt')
    ds = gdal.Translate(f'{country}/{country}_hansen_loss2020.tif', source, options=translateoptions)
    os.remove(f'{country}/{country}_hansen_loss2020.vrt')
    
    assert os.path.exists(f'{country}/{country}_hansen_treecover2010.tif')
    assert os.path.exists(f'{country}/{country}_hansen_loss2020.tif')

    # if new files are properly create, delete what is not needed
    for file in tree_cover_files:
        os.remove(file)
    
    for file in loss_files:
        os.remove(file)
    
    print('Hansen raster built.')
    return None

## Remove loss

In [4]:
def remove_loss(country):
    '''
    Takes in a country name to import hansen tree cover loss tifs. Updates tree cover 
    to 0 if loss was detected between 2011-2020. Returns updated tif in the country's 
    folder.
    
    Attributes
    ----------
    country : str
        a string indicating the country files to import
      '''
    gdal.UseExceptions()
    hansen_cover = rs.open(f'{country}/{country}_hansen_treecover2010.tif').read(1) 
    hansen_loss = rs.open(f'{country}/{country}_hansen_loss2020.tif').read(1)
    
     # assert raster shape, datatype and max/min values
    assert hansen_cover.dtype == 'uint8'
    assert hansen_cover.shape != (0, ) and len(hansen_cover.shape) <= 2
    assert hansen_cover.max() <= 100 and hansen_cover.min() >= 0
    assert hansen_loss.dtype == 'uint8'
    assert hansen_loss.shape != (0, ) and len(hansen_loss.shape) <= 2
    assert hansen_loss.max() <= 20 and hansen_cover.min() >= 0
    
    # If there was loss between 2011-2020 (values between 11-20, make then 0 in tree cover
    hansen_cover_new = np.where((hansen_loss >= 11) & (hansen_loss <= 20), 0, hansen_cover)
    # hansen_cover[np.logical_and(hansen_loss >= 11, hansen_loss <= 20)] = 0  # logical_and isn't working
    
    # check bin counts after loss removed
    print(f'{(np.sum(hansen_cover > 0)) - (np.sum(hansen_cover_new > 0))} pixels converted to loss.')
    
    # save updated raster
    out_meta = rs.open(f'{country}/{country}_hansen_treecover2010.tif').meta
    out_meta.update({'driver': 'GTiff',    
                     'dtype': 'uint8',
                     'height': hansen_cover_new.shape[0],
                     'width': hansen_cover_new.shape[1],
                     'count': 1,
                     'compress':'lzw'})
    outpath = f'{country}/{country}_hansen_treecover2010_wloss.tif'
    with rs.open(outpath, 'w', **out_meta) as dest:
            dest.write(hansen_cover_new, 1) 
    
    # remove original hansen tree cover and loss files
    os.remove(f'{country}/{country}_hansen_treecover2010.tif')
    os.remove(f'{country}/{country}_hansen_loss2020.tif')
    
    return None

## Pad TOF Raster

In [5]:
def pad_tof_raster(country):
    
    '''
    Increase the raster extent to match the boundas of a country's shapefile
    and fill with no data value.
    
    Attributes
    ----------
    country : str
        a string indicating the country files to import
    '''
    
    shapefile = gpd.read_file(f'{country}/{country}_adminboundaries.geojson')

    # identify min/max bounds for the country
    bounds = shapefile.geometry.bounds
    min_x = bounds.minx.min() 
    min_y = bounds.miny.min()
    max_x = bounds.maxx.max()
    max_y = bounds.maxy.max()
    #print(f'Original bounds: ({min_x}, {min_y}, {max_x}, {max_y})')
    
    # round to the nearest .1 lat/lon
    lower_x = math.floor(min_x * 10) / 10 
    lower_y = math.floor(min_y * 10) / 10 
    upper_x = math.ceil(max_x * 10) / 10
    upper_y = math.ceil(max_y * 10) / 10
    #print(f'Padding bounds: ({lower_x}, {lower_y}, {upper_x}, {upper_y}')
          
    # create tif with new output bounds, filled with no data value
    warp_options = gdal.WarpOptions(format='Gtiff', 
                                    dstSRS='EPSG:4326',
                                    dstNodata=255,
                                    outputBounds=[lower_x, lower_y, upper_x, upper_y],
                                    resampleAlg='near',
                                    outputType=osgeo.gdalconst.GDT_Byte,
                                    creationOptions=['TFW=YES', 'COMPRESS=LZW'])
          
    ds = gdal.Warp(f'{country}/{country}_tof_padded.tif', 
                   f'{country}/{country}_tof.tif',      
                   options=warp_options)                  
    
    return None

## Clip Rasters by Admin Boundary

In [29]:
def create_clippings(country):
    '''
    Takes in a country name to import tof/hansen rasters and masks out administrative 
    boundaries based on the shapefile. Saves exploded shapefile as a geojson with polygons 
    split/numbered for each admin boundary. Returns clipped rasters as individual 
    files in the country's "clipped_rasters" folder. Deletes the original Hansen file. 
    
    Attributes
    ----------
    country : str
        a string indicating the country files to import        
    '''
    
    if not os.path.exists(f'{country}/clipped_rasters/hansen'):
        os.makedirs(f'{country}/clipped_rasters/hansen')
    
    if not os.path.exists(f'{country}/clipped_rasters/tof'):
        os.makedirs(f'{country}/clipped_rasters/tof')
    
    if not os.path.exists(f'{country}/clipped_rasters/esa'):
        os.makedirs(f'{country}/clipped_rasters/esa')
    
    orig_shapefile = gpd.read_file(f'{country}/{country}_adminboundaries.geojson')
    tof_raster = rs.open(f'{country}/{country}_tof_padded.tif') 
    hansen_raster = rs.open(f'{country}/{country}_hansen_treecover2010_wloss.tif')
    esa_raster = rs.open('ESACCI-LC-L4-LCCS-Map-300m-P1Y-2015-v2.0.7.tif')
    
    # preprocess shapefile from multipolygon to single
    counter = 0
    for idx, row in orig_shapefile.iterrows():
        counter += 1 if type(row.geometry) == MultiPolygon else 0

    if counter > 0:
        shapefile = orig_shapefile.explode()
        
        # add integer to admin name if multi polys
        shapefile.NAME_1 = np.where(shapefile.NAME_1.duplicated(keep=False), 
                                     shapefile.NAME_1 + shapefile.groupby('NAME_1').cumcount().add(1).astype(str),
                                     shapefile.NAME_1)

        shapefile = shapefile.reset_index()
        shapefile.drop(columns=['level_0', 'level_1'], inplace=True)
    
    # if no multi polys save original shapefile under new name
    else:
        shapefile = orig_shapefile
        print(f'No MultiPolygons in {country}.')
    
    shapefile.to_file(f'{country}/{country}_adminboundaries_exp.geojson', driver='GeoJSON')
    
    def mask_raster(polygon, admin, raster, folder):
        out_img, out_transform = mask(dataset=raster, shapes=[polygon], crop=True, nodata=0)
        out_meta = raster.meta
        out_meta.update({'driver': 'GTiff',    
                         'dtype': 'uint8',
                         'height': out_img.shape[1],
                         'width': out_img.shape[2],
                         'transform': out_transform})
        outpath = f'{country}/clipped_rasters/{folder}/{admin}.tif'
        with rs.open(outpath, 'w', **out_meta) as dest:
            dest.write(out_img)
        return None
    
    for polygon, admin in zip(shapefile.geometry, shapefile.NAME_1):
        mask_raster(polygon, admin, tof_raster, 'tof')
        mask_raster(polygon, admin, hansen_raster, 'hansen')
        mask_raster(polygon, admin, esa_raster, 'esa')
        
    
    # delete Tof and Hansen files once clippings created
    os.remove(f'{country}/{country}_hansen_treecover2010_wloss.tif')
    os.remove(f'{country}/{country}_tof_padded.tif')
    os.remove(f'{country}/{country}_tof_padded.tfw')
    
    print(f"{country}'s rasters clipped and saved.")
    return None

In [30]:
create_clippings('Burundi')

No MultiPolygons in Burundi.
Burundi's rasters clipped and saved.


## Resample to Match Resolution

In [7]:
def match_extent_and_res(source, reference, out_filename, tof=False, esa=False):

    '''
    Matches the projection, bounding box, and dimensions of source to reference
    '''
    
    # set up the source file 
    src = gdal.Open(source, gdalconst.GA_ReadOnly)
    src_proj = src.GetProjection()
    src_geotrans = src.GetGeoTransform()

    # set up the reference file (esa)
    ref_ds = gdal.Open(reference, gdalconst.GA_ReadOnly)
    ref_proj = ref_ds.GetProjection()
    ref_geotrans = ref_ds.GetGeoTransform()
    
    # create height/width for the interpolation (ref dataset except for tof)
    width = ref_ds.RasterXSize if not tof else src.RasterXSize
    height = ref_ds.RasterYSize if not tof else src.RasterYSize

    out = gdal.GetDriverByName('GTiff').Create(out_filename, width, height, 1, gdalconst.GDT_Byte)
    
    # do not adjust the bounds for esa, use source (esa)
    if esa:
        ref_proj = src_proj
        #ref_geotrans = src.GetGeoTransform()
    
    # set geotrans and proj for the out file
    out.SetGeoTransform(ref_geotrans)
    out.SetProjection(ref_proj)

    interpolation = gdalconst.GRA_NearestNeighbour
    gdal.ReprojectImage(src, out, src_proj, ref_proj, interpolation)

    return None

In [8]:
def apply_extent_res(country):
    
    '''
    Applies match_raster_extent_and_res() to all admin files
    for a country.
    '''
    
    if not os.path.exists(f'{country}/resampled_rasters/hansen'):
        os.makedirs(f'{country}/resampled_rasters/hansen')
    
    if not os.path.exists(f'{country}/resampled_rasters/tof'):
        os.makedirs(f'{country}/resampled_rasters/tof')
    
    if not os.path.exists(f'{country}/resampled_rasters/esa'):
        os.makedirs(f'{country}/resampled_rasters/esa')
        
    
    # import new shapefile containing only polygons
    shapefile = gpd.read_file(f'{country}/{country}_adminboundaries_exp.geojson')
    admin_boundaries = list(shapefile.NAME_1)
    
    for admin in admin_boundaries:
        
        # apply to esa
        match_extent_and_res(f'{country}/clipped_rasters/esa/{admin}.tif', # source
                             f'{country}/clipped_rasters/tof/{admin}.tif', # reference
                             f'{country}/resampled_rasters/esa/{admin}.tif', # outpath
                             tof = False, # is this tof?
                             esa = True) # is this esa?
        
        # apply to tof
        match_extent_and_res(f'{country}/clipped_rasters/tof/{admin}.tif', 
                             f'{country}/resampled_rasters/esa/{admin}.tif', 
                             f'{country}/resampled_rasters/tof/{admin}.tif', 
                             tof = True, 
                             esa = False) 
        
        # apply to hansen
        match_extent_and_res(f'{country}/clipped_rasters/hansen/{admin}.tif', 
                             f'{country}/resampled_rasters/esa/{admin}.tif', 
                             f'{country}/resampled_rasters/hansen/{admin}.tif', 
                             tof = False, 
                             esa = False) 
        
    return None

## Merge Admin Polygons

In [9]:
def merge_polygons(country):
    '''
    Takes in a country to iterate through the resampled rasters and identify
    which admin boundaries are composed of multipolygons. Combines individual files
    into one for the admin district, then deletes the individual files.
    '''

    shapefile = gpd.read_file(f'{country}/{country}_adminboundaries_exp.geojson')
    admin_boundaries_all = list(shapefile.NAME_1)
    
    # creates a list of admins that need to be merged (digits in filename)
    no_ints = []
    for admin in admin_boundaries_all:
        
        # if any characters are digits, remove them and ad admin to list
        if any(char.isdigit() for char in admin):
            clean_admin = ''.join([char for char in admin if not char.isdigit()])
            no_ints.append(clean_admin)

    no_ints = list(set(no_ints))
    print(f'{len(no_ints)} admins will be merged: {no_ints}')

    datasets = ['tof', 'hansen', 'esa']
    
    for data in datasets:
        for admin_2 in no_ints:

            # gather list of files for that admin (ex: Puntarenas1.tif, Puntarenas2.tif, Puntarenas3.tif)
            files_to_merge = [] # items need to be in dataset reader mode
            files_to_delete = [] # items are just string of the file name

            for path in glob.glob(f'{country}/resampled_rasters/{data}/{admin_2}?.tif'):
                filename = os.path.basename(path) 
                files_to_delete.append(filename)
                src = rs.open(f'{country}/resampled_rasters/{data}/{filename}')
                files_to_merge.append(src)

            # capture double digits
            for path in glob.glob(f'{country}/resampled_rasters/{data}/{admin_2}??.tif'):
                filename = os.path.basename(path) 
                files_to_delete.append(filename)
                src = rs.open(f'{country}/resampled_rasters/{data}/{filename}')
                files_to_merge.append(src)

            # capture triple digits
            for path in glob.glob(f'{country}/resampled_rasters/{data}/{admin_2}???.tif'):
                filename = os.path.basename(path) 
                files_to_delete.append(filename)
                src = rs.open(f'{country}/resampled_rasters/{data}/{filename}')
                files_to_merge.append(src)

            if len(files_to_merge) < 1:
                print(f'No files to merge in {data}.')

            mosaic, out_transform = merge(files_to_merge)

            outpath = f'{country}/resampled_rasters/{data}/{admin_2}.tif'
            out_meta = src.meta.copy()
            out_meta.update({'driver': "GTiff",
                             'dtype': 'uint8',
                             'height': mosaic.shape[1],
                             'width': mosaic.shape[2],
                             'transform': out_transform})

            with rs.open(outpath, "w", **out_meta) as dest:
                dest.write(mosaic)

            # delete the old separated tifs
            for file in files_to_delete:
                os.remove(f'{country}/resampled_rasters/{data}/{file}')

    return None

# Calculate Statistics

In [4]:
def reshape_to_4d(raster):
    
    '''
    Takes in a raster, identifies the dimensions and them down to the nearest 10th.
    Returns a reshaped 10x10 grid array. 
    
    Attributes
    ----------
    raster : str
        tree cover raster file to be reshaped
    '''
    
    def round_down(num, divisor):
         return num - (num%divisor)
   
    # round down rows and cols to nearest 10th
    rows, cols = round_down(raster.shape[0], 10), round_down(raster.shape[1], 10)
    
    # clip according to rounded numbers and reshape
    rounded = raster[:rows, :cols]
    reshaped = np.reshape(rounded, (rounded.shape[0] // 10, 10, rounded.shape[1] // 10, 10))
        
    return reshaped


In [14]:
def calculate_stats(country):
    
    '''
    Takes in a country to import appropriate tof/hansen/esa rasters. Returns a csv 
    with statistics per administrative district, per land cover class and per tree cover
    threshold.
    
    Attributes
    ----------
    country : str
        a string indicating the country files to import

    '''
    
    if not os.path.exists(f'{country}/stats'):
        os.makedirs(f'{country}/stats')
        
    # set up the dataframe
    df = pd.DataFrame(columns=['country','admin','esa_id','esa_class',
                               'esa_sampled_ha','esa_total_ha','tree_cover_class',
                               'tof_ha','hans_ha', 'tof_mean', 'hans_mean']) 
    counter = 0
    
    folder_contents = [f for f in os.listdir(f'{country}/resampled_rasters/tof') if f != '.ipynb_checkpoints']
    
    # iterate through the admins
    for file in folder_contents:
        
        counter += 1
        
        tof = rs.open(f'{country}/resampled_rasters/tof/{file}').read(1).astype(np.float32)
        hans = rs.open(f'{country}/resampled_rasters/hansen/{file}').read(1).astype(np.float32)
        esa = rs.open(f'{country}/resampled_rasters/esa/{file}').read(1).astype(np.float32)
        
        lower_rng = [x for x in range(0, 100, 10)]
        upper_rng = [x for x in range(10, 110, 10)]

        # convert values to their median for binning
        for lower, upper in zip(lower_rng, upper_rng):
            
            tof[(tof >= lower) & (tof < upper)] = lower + 4.5
            hans[(hans >= lower) & (hans < upper)] = lower + 4.5
    
        # iterate through the land cover classes
        esa_classes = np.unique(esa)
        for cover in esa_classes:
            print(cover)
            # change all values that are not equal to the lcc to NaN including no data vals
            tof_class = tof.copy()
            tof_class[esa != cover] = np.nan 
            tof_class[tof_class == 255] = np.nan

            # reshape and calculate stats
            tof_reshaped = reshape_to_4d(tof_class) 
            tof_class_mean = np.nanmean(tof_reshaped)
            tof_class_mean_per_ha = np.nanmean(tof_reshaped, axis=(1,3))

            # same for Hansen
            hans_class = hans.copy()
            hans_class[esa != cover] = np.nan
            hans_class[hans_class == 255] = np.nan

            hans_reshaped = reshape_to_4d(hans_class)
            hans_class_mean = np.nanmean(hans_reshaped)
            hans_class_mean_per_ha = np.nanmean(hans_reshaped, axis=(1,3)) 

            # iterate through the thresholds (0-10, 10-20, 20-30)
            for lower, upper in zip(lower_rng, upper_rng):

                # calculate total ha for that threshold 
                tof_bin = np.sum((tof_class_mean_per_ha >= lower) & (tof_class_mean_per_ha < upper))
                hans_bin = np.sum((hans_class_mean_per_ha >= lower) & (hans_class_mean_per_ha < upper))
                bin_name = (f'{str(lower)}-{str(upper - 1)}')
    
                # do we need to account for the total admin sampled?
                # area of lc sampled (tof is NOT null) and total area (esa raster equals cover)
                print(f'esa:{esa.shape}')
                print(f'tof:{tof.shape}')
                lc_sampled = np.sum(~np.isnan(tof_class)) / 100   
                # need to ensure this counts the no data class correctly (no data label is 0.0)
                lc_total = np.count_nonzero(esa == 0.0) / 100 if cover == 0.0 else np.sum(esa[esa == cover]) / 100
    
                # check for erroneous calculations
                if lc_sampled > lc_total:
                    raise ValueError(f'Sampled area is greater than total area for land cover {cover} in {file}.')
                    
                df = df.append({'country': country, 
                               'admin': file[:-4],
                               'esa_id': cover,
                               'esa_sampled_ha': lc_sampled,
                               'esa_total_ha': lc_total,
                               'tree_cover_class': bin_name,
                               'tof_ha': tof_bin,
                               'hans_ha': hans_bin,
                               'tof_mean': tof_class_mean, 
                               'hans_mean': hans_class_mean},
                                ignore_index=True)
        
        # map ESA id numbers to lcc labels
        esa_legend = {0: 'No Data',
                10: 'Cropland, rainfed',
                11: 'Cropland, rainfed, herbaceous cover',
                20: 'Cropland, irrigated or post-flooding',
                30: 'Mosaic cropland / natural vegetation',
                40: 'Mosaic natural vegetation / cropland',
                50: 'Tree cover, broadleaved, evergreen',
                60: 'Tree cover, broadleaved, deciduous',
                70: 'Tree cover, needleleaved, evergreen',
                80: 'Tree cover, needleleaved, deciduous',
                90: 'Tree cover, mixed leaf type',
                100: 'Mosaic tree and shrub / herbaceous cover',
                110: 'Mosaic herbaceous cover / tree and shrub',
                120: 'Shrubland',
                130: 'Grassland',
                140: 'Lichens and mosses',
                150: 'Sparse vegetation',
                160: 'Tree cover, flooded, fresh or brakish water',
                170: 'Tree cover, flooded, saline water',
                180: 'Shrub or herbaceous cover, flooded, fresh/saline/brakish water',
                190: 'Urban areas',
                200: 'Bare areas',
                210: 'Water bodies',
                220: 'Permanent snow and ice'}
     
        df['esa_class'] = df['esa_id'].map(esa_legend)
        
        if counter % 3 == 0:
            print(f'{counter}/{len(folder_contents)} admins processed...')
      
    df.to_csv(f'{country}/stats/{country}_statistics.csv', index=False)
    
    return None

# Test Pipeline

In [12]:
def execute_pipe(country):
    print('Converting shapefile to geojson...')
    shp_to_gjson(country)
    print('Building Hansen tree cover raster...')
    create_hansen_tif(country)
    print('Removing tree cover loss...')
    remove_loss(country)
    print('Padding tof raster...')
    pad_tof_raster(country)
    print('Clipping rasters by admin boundary...')
    create_clippings(country)
    print('Resampling to match raster extents and resolutions...')
    apply_extent_res(country)
    print('Merging admins containing multiple polygons...')
    merge_polygons(country)
    print('Data preparation complete.')
    print('Calculating statistics...')
    calculate_stats(country)
    return 'Analysis complete.'
    

In [35]:
execute_pipe('Belize')

Building Hansen tree cover raster...
Belize has lon N and lat W.
Beginning file download...
Building the tif...
Hansen raster built.
Removing tree cover loss...
35697769 pixels converted to loss.
Padding tof raster...
Clipping rasters by admin boundary...
Resampling to match raster extents and resolutions...
Merging admins with multiple polygons...
4 admins will be merged: ['Corozal', 'Stann Creek', 'Toledo', 'Belize']


'Data preparation complete.'

In [81]:
execute_pipe('Costa Rica')

Building Hansen tree cover raster...
Costa Rica has lon N and lat W.
Downloading files from GLAD...
Hansen raster built.
Removing tree cover loss...
38050442 pixels converted to loss.
Padding tof raster...
Clipping rasters by admin boundary...
Costa Rica's rasters clipped and saved.
Resampling to match raster extents and resolutions...
Merging admins with multiple polygons...
3 admins will be merged: ['Puntarenas', 'Limón', 'Guanacaste']


'Data preparation complete.'

In [74]:
execute_pipe('El Salvador')

Building Hansen tree cover raster...
El Salvador has lon N and lat W.
Downloading files from GLAD...
Hansen raster built.
Removing tree cover loss...
59400753 pixels converted to loss.
Padding tof raster...
Clipping rasters by admin boundary...
El Salvador's rasters clipped and saved.
Resampling to match raster extents and resolutions...
Merging admins with multiple polygons...
3 admins will be merged: ['Usulután', 'La Paz', 'La Unión']


'Data preparation complete.'

In [41]:
execute_pipe('Guatemala')

Building Hansen tree cover raster...
Guatemala has lon N and lat W.
Downloading files from GLAD...
Hansen raster built.
Removing tree cover loss...
59400753 pixels converted to loss.
Padding tof raster...
Clipping rasters by admin boundary...
Guatemala's rasters clipped and saved.
Resampling to match raster extents and resolutions...
Merging admins with multiple polygons...
3 admins will be merged: ['Jutiapa', 'Escuintla', 'Izabal']


'Data preparation complete.'

In [42]:
execute_pipe('Honduras')

Building Hansen tree cover raster...
Honduras has lon N and lat W.
Downloading files from GLAD...
Hansen raster built.
Removing tree cover loss...
35697769 pixels converted to loss.
Padding tof raster...
Clipping rasters by admin boundary...
Honduras's rasters clipped and saved.
Resampling to match raster extents and resolutions...
Merging admins with multiple polygons...
7 admins will be merged: ['Colón', 'Islas de la Bahía', 'Valle', 'Cortés', 'Choluteca', 'Atlántida', 'Gracias a Dios']


'Data preparation complete.'

In [75]:
execute_pipe('Nicaragua')

Building Hansen tree cover raster...
Nicaragua has lon N and lat W.
Downloading files from GLAD...
Hansen raster built.
Removing tree cover loss...
35697769 pixels converted to loss.
Padding tof raster...
Clipping rasters by admin boundary...
Nicaragua's rasters clipped and saved.
Resampling to match raster extents and resolutions...
Merging admins with multiple polygons...
8 admins will be merged: ['Atlántico Norte', 'Río San Juan', 'Rivas', 'Granada', 'Chinandega', 'Chontales', 'León', 'Atlántico Sur']


'Data preparation complete.'

In [51]:
execute_pipe('Panama')

Building Hansen tree cover raster...
Panama has lon N and lat W.
Downloading files from GLAD...
Hansen raster built.
Removing tree cover loss...
38598272 pixels converted to loss.
Padding tof raster...
Clipping rasters by admin boundary...
Panama's rasters clipped and saved.
Resampling to match raster extents and resolutions...
Merging admins containing multiple polygons...
12 admins will be merged: ['Emberá', 'Panamá Oeste', 'Coclé', 'Darién', 'Colón', 'Kuna Yala', 'Panamá', 'Chiriquí', 'Veraguas', 'Bocas del Toro', 'Los Santos', 'Ngöbe Buglé']
Data preparation complete.
Calculating statistics...




5/13 admins processed...
10/13 admins processed...


'Analysis complete.'

In [48]:
execute_pipe('Gambia')

Converting shapefile to geojson...
There are 6 admins in Gambia.
Building Hansen tree cover raster...
Gambia has lon N and lat W.
Downloading files from GLAD...
Hansen raster built.
Removing tree cover loss...
21982618 pixels converted to loss.
Padding tof raster...
Clipping rasters by admin boundary...
Gambia's rasters clipped and saved.
Resampling to match raster extents and resolutions...
Merging admins containing multiple polygons...
3 admins will be merged: ['Banjul', 'North Bank', 'Western']
Data preparation complete.
Calculating statistics...




3/6 admins processed...
6/6 admins processed...


'Analysis complete.'

In [31]:
execute_pipe('Burundi')

Converting shapefile to geojson...
There are 17 admins in Burundi.
Building Hansen tree cover raster...
Burundi has lon N and lat E.
Downloading files from GLAD...
Hansen raster built.
Removing tree cover loss...
96869725 pixels converted to loss.
Padding tof raster...
Clipping rasters by admin boundary...
No MultiPolygons in Burundi.
Burundi's rasters clipped and saved.
Resampling to match raster extents and resolutions...
Merging admins containing multiple polygons...
0 admins will be merged: []
Data preparation complete.
Calculating statistics...




3/17 admins processed...
6/17 admins processed...
9/17 admins processed...
12/17 admins processed...
15/17 admins processed...


'Analysis complete.'

In [33]:
execute_pipe('Rwanda')

Converting shapefile to geojson...
There are 5 admins in Rwanda.
Building Hansen tree cover raster...
Rwanda has lon N and lat E.
Downloading files from GLAD...
Hansen raster built.
Removing tree cover loss...
96869725 pixels converted to loss.
Padding tof raster...
Clipping rasters by admin boundary...
Rwanda's rasters clipped and saved.
Resampling to match raster extents and resolutions...
Merging admins containing multiple polygons...
1 admins will be merged: ['Iburengerazuba']
Data preparation complete.
Calculating statistics...




3/5 admins processed...


'Analysis complete.'

## Sanity Checks

In [None]:
# Check ESA no data land cover class
sons_esa = rs.open('El Salvador/resampled_rasters/esa/Sonsonate.tif').read(1)
sons_tof = rs.open('El Salvador/resampled_rasters/tof/Sonsonate.tif').read(1)

In [36]:
# check vals for Costa Rica admin
np.unique(currid_tof)

array([  0,  20,  40,  60,  80, 100], dtype=uint8)

In [37]:
# check vals for El Salvador admin
np.unique(apaneca_tof)

array([ 0, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
       36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52,
       53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
       70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86,
       87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99], dtype=uint8)

In [64]:
# confirm mean tree cover counts
# will be different in stats01 because it's mean tree cover per admin
# versus mean tree cover per admin per land cover type

es1 = pd.read_csv('El Salvador/stats/El Salvador_statistics01.csv')
es2 = pd.read_csv('El Salvador/stats/El Salvador_statistics02.csv')
es3 = pd.read_csv('El Salvador/stats/El Salvador_statistics03.csv')
print('Statistics01:')
print(f'TOF avg in ES: {es1.tof_mean_tc.mean()}, Hans avg in ES: {es1.hans_mean_tc.mean()}')
print('Statistics02:')
print(f'TOF avg in ES: {es2.tof_mean_tc_lc.mean()}, Hans avg in ES: {es2.hans_mean_tc_lc.mean()}')
print('Statistics03:')
print(f'TOF avg in ES: {es3.tof_mean_tc_lc.mean()}, Hans avg in ES: {es3.hans_mean_tc_lc.mean()}')

Statistics01:
TOF avg in ES: 16.18607142857143, Hans avg in ES: 17.820428571428568
Statistics02:
TOF avg in ES: 23.421297258496576, Hans avg in ES: 17.658193335813635
Statistics03:
TOF avg in ES: 23.421297258496576, Hans avg in ES: 17.658193335813635


In [65]:
cr1 = pd.read_csv('Costa Rica/stats/Costa Rica_statistics01.csv')
cr2 = pd.read_csv('Costa Rica/stats/Costa Rica_statistics02.csv')
cr3 = pd.read_csv('Costa Rica/stats/Costa Rica_statistics03.csv')

print('Statistics01:')
print(f'TOF avg in CR: {cr1.tof_mean_tc.mean()}, Hans avg in CR: {cr1.hans_mean_tc.mean()}')
print('Statistics02:')
print(f'TOF avg in CR: {cr2.tof_mean_tc_lc.mean()}, Hans avg in CR: {cr2.hans_mean_tc_lc.mean()}')
print('Statistics03:')
print(f'TOF avg in CR: {cr3.tof_mean_tc_lc.mean()}, Hans avg in CR: {cr3.hans_mean_tc_lc.mean()}')

Statistics01:
TOF avg in CR: 7.111285714285714, Hans avg in CR: 25.302999999999997
Statistics02:
TOF avg in CR: 16.921263565891472, Hans avg in CR: 32.848781838316725
Statistics03:
TOF avg in CR: 16.921263565891472, Hans avg in CR: 32.848781838316725


In [12]:
# ES statistics for 2nd function not calculating correctly
es_ipcc_means = es_above[['tof_mean_tc_lc', 
                          'hans_mean_tc_lc', 
                          'ipcc_class']].groupby('ipcc_class').mean().reset_index()

In [13]:
es_ipcc_means

Unnamed: 0,ipcc_class,tof_mean_tc_lc,hans_mean_tc_lc
0,Agriculture,29.250907,17.826823
1,Forest,32.203412,17.515225
2,Grassland,15.547973,18.052502
3,Other,9.602041,17.427191
4,Settlement,23.999687,17.826823
5,Wetland,14.905611,17.346923


In [14]:
es_esa_means = es_above[['tof_mean_tc_lc', 
                          'hans_mean_tc_lc', 
                          'esa_class']].groupby('esa_class').mean().reset_index()

In [17]:
cr_ipcc_means = cr_above[['tof_mean_tc_lc', 
                          'hans_mean_tc_lc', 
                          'ipcc_class']].groupby('ipcc_class').mean().reset_index()

In [18]:
cr_ipcc_means

Unnamed: 0,ipcc_class,tof_mean_tc_lc,hans_mean_tc_lc
0,Agriculture,21.557142,29.621502
1,Forest,21.932651,53.47892
2,Grassland,14.237551,25.316936
3,Other,2.697094,17.30521
4,Settlement,14.240675,10.02426
5,Wetland,7.563579,19.563395


In [19]:
cr_esa_means = cr_above[['tof_mean_tc_lc', 
                          'hans_mean_tc_lc', 
                          'esa_class']].groupby('esa_class').mean().reset_index()

## Old Code

In [None]:
def stats_01(country):
    
    '''
    Takes in a country to import appropriate tof/hansen rasters and calculates mean tree cover 
    per admin and the total number of hectares that fall within 5 20% thresholds (0-20%, 20-40%, 
    40-60%, 60-80%). Returns a pandas dataframe with statistics saved as a csv file in the country's
    stats folder.
    
    Attributes
    ----------
    country : str
        a string indicating the country files to import
    '''
    
    if not os.path.exists(f'{country}/stats'):
        os.makedirs(f'{country}/stats')
    
    tree_cover = pd.DataFrame(columns=['admin', 
                                       'tof_mean_tc',
                                       'hans_mean_tc',
                                       'tof_0_20',
                                       'tof_20_40',
                                       'tof_40_60',
                                       'tof_60_80',
                                       'tof_80_100',
                                       'hans_0_20',
                                       'hans_20_40',
                                       'hans_40_60',
                                       'hans_60_80',
                                       'hans_80_100'])
    
    for file in [f for f in os.listdir(f'{country}/resampled_rasters/tof') if f != '.ipynb_checkpoints']:
        
        tof = rs.open(f'{country}/resampled_rasters/tof/{file}').read(1)
        hansen = rs.open(f'{country}/resampled_rasters/hansen/{file}').read(1)

        # reshape to 10x10 grid - ex: (88, 10, 63, 10)
        tof_reshaped = reshape_to_4d(tof)
        hansen_reshaped = reshape_to_4d(hansen)
         
        # calculate mean tree cover for admin boundary
        tof_mean = round(np.mean(tof_reshaped), 3)   
        hansen_mean = round(np.mean(hansen_reshaped), 3)   

        # calculate mean tree cover for each hectare
        tof_mean_per_ha = np.mean(tof_reshaped, axis=(1,3)) 
        hansen_mean_per_ha = np.mean(hansen_reshaped, axis=(1,3)) 
        
        # calculate num of hectares with mean tree cover 0-20, 20-40, 40-60, 60-80, 80-100
        tof_0_20 = np.sum((tof_mean_per_ha >= 0) & (tof_mean_per_ha <= 19)) 
        tof_20_40 = np.sum((tof_mean_per_ha >= 20) & (tof_mean_per_ha <= 39)) 
        tof_40_60 = np.sum((tof_mean_per_ha >= 40) & (tof_mean_per_ha <= 59)) 
        tof_60_80 = np.sum((tof_mean_per_ha >= 60) & (tof_mean_per_ha <= 79)) 
        tof_80_100 = np.sum((tof_mean_per_ha >= 80) & (tof_mean_per_ha <= 100)) 
        
        hans_0_20 = np.sum((hansen_mean_per_ha >= 0) & (hansen_mean_per_ha <= 19)) 
        hans_20_40 = np.sum((hansen_mean_per_ha >= 20) & (hansen_mean_per_ha <= 39)) 
        hans_40_60 = np.sum((hansen_mean_per_ha >= 40) & (hansen_mean_per_ha <= 59)) 
        hans_60_80 = np.sum((hansen_mean_per_ha >= 60) & (hansen_mean_per_ha <= 79)) 
        hans_80_100 = np.sum((hansen_mean_per_ha >= 80) & (hansen_mean_per_ha <= 100)) 
        
        tree_cover = tree_cover.append({'admin': file[:-4], 
                                        'tof_mean_tc': tof_mean,
                                        'hans_mean_tc': hansen_mean,
                                        'tof_0_20': tof_0_20,
                                        'tof_20_40': tof_20_40,
                                        'tof_40_60': tof_40_60,
                                        'tof_60_80': tof_60_80,
                                        'tof_80_100': tof_80_100,
                                        'hans_0_20': hans_0_20,
                                        'hans_20_40': hans_20_40,
                                        'hans_40_60': hans_40_60,
                                        'hans_60_80': hans_60_80,
                                        'hans_80_100': hans_80_100}, ignore_index=True)
        # save to csv      
        tree_cover.to_csv(f'{country}/stats/{country}_statistics01.csv', index=False)
    
    return None

In [None]:
def stats_02_03(country):
    '''
    Takes in a country to import appropriate tof/hansen rasters. Rounds the values
    to their median. For each ESA land cover class within each admin district, calculates the mean 
    tree cover and the total number of contiguous hectares of tree cover above 10% 
    thresholds. Then calculates the total hectares of tree cover within 5
    20% thresholds (0-20%, 20-40%, 40-60%, 60-80%). The land cover categories and numbers 
    are aggregated to display the same statistics per IPCC land cover class.
    
    Attributes
    ----------
    country : str
        a string indicating the country files to import

    '''
    
    # set up the dataframes
    ipcc_above = pd.DataFrame(columns=['admin', 
                                       'esa_id',
                                       'esa_class',
                                       'ipcc_class',
                                       'tof_mean_tc_lc',
                                       'hans_mean_tc_lc',
                                       'tof_10+','tof_20+','tof_30+','tof_40+',
                                       'tof_50+', 'tof_60+','tof_70+','tof_80+',
                                       'tof_90+','hans_10+','hans_20+','hans_30+', 
                                       'hans_40+','hans_50+', 'hans_60+','hans_70+',
                                       'hans_80+','hans_90+']) 
    
    
    ipcc_btw = pd.DataFrame(columns=['admin', 
                                     'esa_id',
                                     'esa_class',
                                     'ipcc_class',
                                     'tof_mean_tc_lc',
                                     'hans_mean_tc_lc',
                                     'tof_0_20','tof_20_40','tof_40_60','tof_60_80',
                                     'tof_80_100','hans_0_20','hans_20_40','hans_40_60',
                                     'hans_60_80','hans_80_100'])
    
    counter = 0
    folder_contents = [f for f in os.listdir(f'{country}/resampled_rasters/tof') if f != '.ipynb_checkpoints']
    
    for file in folder_contents[:1]:
        
        counter += 1
        
        # read in 
        tof = rs.open(f'{country}/resampled_rasters/tof/{file}').read(1).astype(np.float32)
        hans = rs.open(f'{country}/resampled_rasters/hansen/{file}').read(1).astype(np.float32)
        esa = rs.open(f'{country}/resampled_rasters/esa/{file}').read(1).astype(np.float32)
                
        # convert values to their median for binning
        for lower, upper in zip(lower_rng, upper_rng):
            
            tof[(tof >= lower) & (tof < upper)] = lower + 4.5
            hans[(hans >= lower) & (hans < upper)] = lower + 4.5

        
        # identify the lccs for that admin district
        esa_classes = np.unique(esa)
        
        for cover in esa_classes:
            
            # change all values that are not equal to the lcc to NaN including no data vals
            tof_class = tof.copy()
            tof_class[esa != cover] = np.nan 
            tof_class[tof_class == 255] = np.nan
            
            # check - count the number of non nan instances 
            #print(f'For {cover} there are {np.count_nonzero(~np.isnan(tof_class))} non NaNs') 

            # reshape to a 10x10 grid to calculate stats
            # calc mean tree cover for the lcc (entire admin) and mean tree cover for the lcc per hectare
            # note that runtime warning: mean of empty slice indicates array has nothing but nan values
            tof_reshaped = reshape_to_4d(tof_class) 
            tof_class_mean = np.nanmean(tof_reshaped)
            tof_class_mean_per_ha = np.nanmean(tof_reshaped, axis=(1,3))
            
            # same for Hansen
            hans_class = hans.copy()
            hans_class[esa != cover] = np.nan
            hans_class[hans_class == 255] = np.nan
            
            # reshape and calculate stats
            hans_reshaped = reshape_to_4d(hans_class)
            hans_class_mean = np.nanmean(hans_reshaped)
            hans_class_mean_per_ha = np.nanmean(hans_reshaped, axis=(1,3))

            # calculate num of hectares above each threshold for the lcc
            tof_ha_over10 = np.sum(tof_class_mean_per_ha > 10.0) 
            tof_ha_over20 = np.sum(tof_class_mean_per_ha > 20.0) 
            tof_ha_over30 = np.sum(tof_class_mean_per_ha > 30.0) 
            tof_ha_over40 = np.sum(tof_class_mean_per_ha > 40.0) 
            tof_ha_over50 = np.sum(tof_class_mean_per_ha > 50.0)
            tof_ha_over60 = np.sum(tof_class_mean_per_ha > 60.0) 
            tof_ha_over70 = np.sum(tof_class_mean_per_ha > 70.0) 
            tof_ha_over80 = np.sum(tof_class_mean_per_ha > 80.0) 
            tof_ha_over90 = np.sum(tof_class_mean_per_ha > 90.0) 
            
            hans_ha_over10 = np.sum(hans_class_mean_per_ha > 10.0) 
            hans_ha_over20 = np.sum(hans_class_mean_per_ha > 20.0) 
            hans_ha_over30 = np.sum(hans_class_mean_per_ha > 30.0) 
            hans_ha_over40 = np.sum(hans_class_mean_per_ha > 40.0) 
            hans_ha_over50 = np.sum(hans_class_mean_per_ha > 50.0) 
            hans_ha_over60 = np.sum(hans_class_mean_per_ha > 60.0) 
            hans_ha_over70 = np.sum(hans_class_mean_per_ha > 70.0) 
            hans_ha_over80 = np.sum(hans_class_mean_per_ha > 80.0) 
            hans_ha_over90 = np.sum(hans_class_mean_per_ha > 90.0)           
            
            # calculate num of hectares between thresholds for the lcc
            tof_0_20 = np.sum((tof_class_mean_per_ha >= 0) & (tof_class_mean_per_ha <= 19)) 
            tof_20_40 = np.sum((tof_class_mean_per_ha >= 20) & (tof_class_mean_per_ha <= 39)) 
            tof_40_60 = np.sum((tof_class_mean_per_ha >= 40) & (tof_class_mean_per_ha <= 59)) 
            tof_60_80 = np.sum((tof_class_mean_per_ha >= 60) & (tof_class_mean_per_ha <= 79)) 
            tof_80_100 = np.sum((tof_class_mean_per_ha >= 80) & (tof_class_mean_per_ha <= 100)) 

            hans_0_20 = np.sum((hans_class_mean_per_ha >= 0) & (hans_class_mean_per_ha <= 19)) 
            hans_20_40 = np.sum((hans_class_mean_per_ha >= 20) & (hans_class_mean_per_ha <= 39)) 
            hans_40_60 = np.sum((hans_class_mean_per_ha >= 40) & (hans_class_mean_per_ha <= 59)) 
            hans_60_80 = np.sum((hans_class_mean_per_ha >= 60) & (hans_class_mean_per_ha <= 79)) 
            hans_80_100 = np.sum((hans_class_mean_per_ha >= 80) & (hans_class_mean_per_ha <= 100)) 
            
            
            ipcc_above = ipcc_above.append({'admin': file[:-4], 
                                            'esa_id': cover,
                                            'tof_mean_tc_lc': tof_class_mean,
                                            'hans_mean_tc_lc': hans_class_mean,
                                            'tof_10+':tof_ha_over10,
                                            'tof_20+':tof_ha_over20,
                                            'tof_30+':tof_ha_over30,
                                            'tof_40+':tof_ha_over40,
                                            'tof_50+':tof_ha_over50, 
                                            'tof_60+':tof_ha_over60,
                                            'tof_70+':tof_ha_over70,
                                            'tof_80+':tof_ha_over80,
                                            'tof_90+':tof_ha_over90,
                                            'hans_10+':hans_ha_over10,
                                            'hans_20+':hans_ha_over20,
                                            'hans_30+':hans_ha_over30,
                                            'hans_40+':hans_ha_over40,
                                            'hans_50+':hans_ha_over50,
                                            'hans_60+':hans_ha_over60,
                                            'hans_70+':hans_ha_over70,
                                            'hans_80+':hans_ha_over80,
                                            'hans_90+':hans_ha_over90}, ignore_index=True)
            
            ipcc_btw = ipcc_btw.append({'admin': file[:-4], 
                                        'esa_id': cover,
                                        'tof_mean_tc_lc': tof_class_mean,
                                        'hans_mean_tc_lc': hans_class_mean,
                                        'tof_0_20': tof_0_20,
                                        'tof_20_40': tof_20_40,
                                        'tof_40_60': tof_40_60,
                                        'tof_60_80': tof_60_80,
                                        'tof_80_100': tof_80_100,
                                        'hans_0_20': hans_0_20,
                                        'hans_20_40': hans_20_40,
                                        'hans_40_60': hans_40_60,
                                        'hans_60_80': hans_60_80,
                                        'hans_80_100': hans_80_100}, ignore_index=True)
            
        # map ESA id numbers to lcc labels
        esa_legend = {0: 'No Data',
                10: 'Cropland, rainfed',
                11: 'Cropland, rainfed, herbaceous cover',
                20: 'Cropland, irrigated or post-flooding',
                30: 'Mosaic cropland (>50%) / natural vegetation (tree, shrub, herbaceous cover)(<50%)',
                40: 'Mosaic natural vegetation (tree, shrub, herbaceous cover) (>50%) / cropland (<50%)',
                50: 'Tree cover, broadleaved, evergreen, closed to open (>15%)',
                60: 'Tree cover, broadleaved, deciduous, closed to open (>15%)',
                70: 'Tree cover, needleleaved, evergreen, closed to open (>15%)',
                80: 'Tree cover, needleleaved, deciduous, closed to open (>15%)',
                90: 'Tree cover, mixed leaf type (broadleaved and needleleaved)',
                100: 'Mosaic tree and shrub (>50%) / herbaceous cover (<50%)',
                110: 'Mosaic herbaceous cover (>50%) / tree and shrub (<50%)',
                120: 'Shrubland',
                130: 'Grassland',
                140: 'Lichens and mosses',
                150: 'Sparse vegetation (tree, shrub, herbaceous cover) (<15%)',
                160: 'Tree cover, flooded, fresh or brakish water',
                170: 'Tree cover, flooded, saline water',
                180: 'Shrub or herbaceous cover, flooded, fresh/saline/brakish water',
                190: 'Urban areas',
                200: 'Bare areas',
                210: 'Water bodies',
                220: 'Permanent snow and ice'}
        
        # map ESA id numbers to ipcc labels
        ipcc = {0: 'Other',
                10: 'Agriculture',
                11: 'Agriculture',
                20: 'Agriculture',
                30: 'Agriculture',
                40: 'Agriculture',
                50: 'Forest',
                60: 'Forest',
                70: 'Forest',
                80: 'Forest',
                90: 'Forest',
                100: 'Forest',
                110: 'Grassland',
                120: 'Other',
                130: 'Grassland',
                140: 'Other',
                150: 'Other',
                160: 'Forest',
                170: 'Forest',
                180: 'Wetland',
                190: 'Settlement',
                200: 'Other',
                210: 'Other',
                220: 'Other'}
        
        ipcc_above['esa_class'] = ipcc_above['esa_id'].map(esa_legend)
        ipcc_above['ipcc_class'] = ipcc_above['esa_id'].map(ipcc)
        ipcc_btw['esa_class'] = ipcc_btw['esa_id'].map(esa_legend)
        ipcc_btw['ipcc_class'] = ipcc_btw['esa_id'].map(ipcc)
        
        if counter % 2 == 0:
            print(f'{counter}/{len(folder_contents)} admins processed...')
    
    # save to csv      
    ipcc_above.to_csv(f'{country}/stats/{country}_statistics02_new.csv', index=False)
    ipcc_btw.to_csv(f'{country}/stats/{country}_statistics03_new.csv', index=False)
                                
    return None

In [None]:
tof[(tof > 10) & (tof < 19)] = 14.5 
tof[(tof > 20) & (tof < 29)] = 24.5
tof[(tof > 30) & (tof < 39)] = 34.5
tof[(tof > 40) & (tof < 49)] = 44.5
tof[(tof > 50) & (tof < 59)] = 54.5
tof[(tof > 60) & (tof < 69)] = 64.5
tof[(tof > 70) & (tof < 79)] = 74.5
tof[(tof > 80) & (tof < 89)] = 84.5
tof[(tof > 90) & (tof < 99)] = 94.5

hans[(hans > 0) & (hans < 9)] = 4.5 
hans[(hans > 10) & (hans < 19)] = 14.5 
hans[(hans > 20) & (hans < 29)] = 24.5
hans[(hans > 30) & (hans < 39)] = 34.5
hans[(hans > 40) & (hans < 49)] = 44.5
hans[(hans > 50) & (hans < 59)] = 54.5
hans[(hans > 60) & (hans < 69)] = 64.5
hans[(hans > 70) & (hans < 79)] = 74.5
hans[(hans > 80) & (hans < 89)] = 84.5
hans[(hans > 90) & (hans < 99)] = 94.5

tof_0_10 = np.sum((tof_class_mean_per_ha >= 0) & (tof_class_mean_per_ha <= 9))
tof_10_20 = np.sum((tof_class_mean_per_ha >= 10) & (tof_class_mean_per_ha <= 19))   
tof_20_30 = np.sum((tof_class_mean_per_ha >= 20) & (tof_class_mean_per_ha <= 29)) 
tof_30_40 = np.sum((tof_class_mean_per_ha >= 30) & (tof_class_mean_per_ha <= 39)) 
tof_40_50 = np.sum((tof_class_mean_per_ha >= 40) & (tof_class_mean_per_ha <= 49)) 
tof_50_60 = np.sum((tof_class_mean_per_ha >= 50) & (tof_class_mean_per_ha <= 59))
tof_60_70 = np.sum((tof_class_mean_per_ha >= 60) & (tof_class_mean_per_ha <= 69)) 
tof_70_80 = np.sum((tof_class_mean_per_ha >= 70) & (tof_class_mean_per_ha <= 79)) 
tof_80_90 = np.sum((tof_class_mean_per_ha >= 80) & (tof_class_mean_per_ha <= 89))
tof_90_100 = np.sum((tof_class_mean_per_ha >= 90) & (tof_class_mean_per_ha <= 100))

hans_0_10 = np.sum((hans_class_mean_per_ha >= 0) & (hans_class_mean_per_ha <= 9))
hans_10_20 = np.sum((hans_class_mean_per_ha >= 10) & (hans_class_mean_per_ha <= 19))
hans_20_30 = np.sum((hans_class_mean_per_ha >= 20) & (hans_class_mean_per_ha <= 29))
hans_30_40 = np.sum((hans_class_mean_per_ha >= 30) & (hans_class_mean_per_ha <= 39)) 
hans_40_50 = np.sum((hans_class_mean_per_ha >= 40) & (hans_class_mean_per_ha <= 49))
hans_50_60 = np.sum((hans_class_mean_per_ha >= 50) & (hans_class_mean_per_ha <= 59)) 
hans_60_70 = np.sum((hans_class_mean_per_ha >= 60) & (hans_class_mean_per_ha <= 69)) 
hans_70_80 = np.sum((hans_class_mean_per_ha >= 70) & (hans_class_mean_per_ha <= 79)) 
hans_80_90 = np.sum((hans_class_mean_per_ha >= 80) & (hans_class_mean_per_ha <= 89)) 
hans_90_100 = np.sum((hans_class_mean_per_ha >= 90) & (hans_class_mean_per_ha <= 100)) 

In [None]:
# ESA bounding box stays same, becomes same shape as TOF
# esa_out.tif has the proper resolution
match_raster_extent_and_res(esa, tof, esa_out.tif, tof=False, esa=True) 

# TOF bounding box becomes ESA, TOF stays same shape
# tof=true will not resample tof
match_raster_extent_and_res(tof, esa_out, tof_out.tif, tof=True, esa=False) 

# Hansen bounding box becomes TOF/ESA, Hansen shape becomes TOF/ESA
match_raster_extent_and_res(hansen, esa_out, hansen_out.tif, tof=False, esa=False) 

In [8]:
def resample_resize(country):
    
    '''
    Takes in a country name to import a clipped raster and resamples the raster
    to convert it to higher resolution. Crops or pads TOF/Hansen rasters to match
    the size of the ESA raster. Returns the new rasters as individuals files
    in the country's "resampled_rasters" folder. 

    Resampling a raster involves multiplying the pixel size by the scale factor 
    and dividing the dimensions by the scale factor. A scale >1 is an upsample
    and a scale <1: downsample.

        i.e. given a pixel size of 250m, dimensions of (1024, 1024) and a scale of 2,
        the resampled raster would have an output pixel size of 500m and dimensions of (512, 512)
        
    Attributes
    ----------
    country : str
        a string indicating the country files to import
    '''
    
    if not os.path.exists(f'{country}/resampled_rasters/hansen'):
        os.makedirs(f'{country}/resampled_rasters/hansen')
    
    if not os.path.exists(f'{country}/resampled_rasters/tof'):
        os.makedirs(f'{country}/resampled_rasters/tof')
    
    if not os.path.exists(f'{country}/resampled_rasters/esa'):
        os.makedirs(f'{country}/resampled_rasters/esa')
    
    # import new shapefile containing only polygons
    shapefile = gpd.read_file(f'{country}/{country}_adminboundaries_exp.geojson')
    admin_boundaries = list(shapefile.NAME_1)
    
    for admin in admin_boundaries[:1]:
        print(admin)
         
    # Resample ESA
        esa_raster = rs.open(f'{country}/clipped_rasters/esa/{admin}.tif')
        print(f'ESA original: {esa_raster.shape}')
        
        height = int(esa_raster.height)
        width = int(esa_raster.width)
        scale = 30
        
        # resample data to new resolution
        esa_resampled = esa_raster.read(out_shape=(esa_raster.count, (height * scale), (width * scale)),
                                        resampling=Resampling.nearest)
        
        # removes extra index
        esa_resampled = esa_resampled.squeeze()
        
        # scale image transform
        esa_transform = esa_raster.transform * esa_raster.transform.scale((width / esa_resampled.shape[-1]),
                                                                      (height / esa_resampled.shape[-2]))
        
        
        # assert raster shape, datatype and max/min values
        assert esa_resampled.dtype == 'uint8'
        assert esa_resampled.shape != (0, ) and len(esa_resampled.shape) <= 2
        assert esa_resampled.max() <= 255 and esa_resampled.min() >= 0
        
        esa_outpath = f'{country}/resampled_rasters/esa/{admin}.tif'
        esa_new = rs.open(esa_outpath, 'w', 
                              driver='GTiff',
                              height=esa_resampled.shape[0], 
                              width=esa_resampled.shape[1], 
                              count=1,
                              dtype="uint8",
                              crs='+proj=longlat +datum=WGS84 +no_defs',
                              transform=esa_transform,
                              compress='lzw')
        
        print(f'ESA after resample: {esa_new.shape}')
        esa_new.write(esa_resampled, 1)
        esa_new.close()
        
        # define parameters for gdal translate
        esa = rs.open(f'{country}/resampled_rasters/esa/{admin}.tif')
        esa_bounds = esa.bounds
        translateoptions = gdal.TranslateOptions(format='Gtiff', 
                                                  outputSRS='EPSG:4326',
                                                  outputType=gdal.GDT_Byte,
                                                  noData=255,
                                                  creationOptions=['COMPRESS=LZW'],
                                                  resampleAlg='nearest')
        
        # Crop Hansen to ESA bounds 
        hans_raster = f'{country}/clipped_rasters/hansen/{admin}.tif'
        hans_outpath = f'{country}/resampled_rasters/hansen/{admin}.tif'
        print(f'Hansen original: {rs.open(hans_raster).shape}')
        
        source = gdal.Open(hans_raster)
        ds = gdal.Translate(hans_outpath, 
                            source, 
                            projWin=[-90.22499999999281, 13.75833333332723, -89.78888888888166, 14.166666666660595], 
                            options=translateoptions)
        ds = None
        print(f'Hansen after gdal translate: {rs.open(hans_outpath).shape}')
        
        # Resample hansen
        hans_raster = rs.open(hans_raster)
        height = int(hans_raster.height)
        width = int(hans_raster.width)
        scale = 3
        
        # resample data to target shape -- use Resampling.nearest not Resampling.bilinear
        hans_resampled = hans_raster.read(out_shape=(hans_raster.count, (height * scale), (width * scale)),
                                          resampling=Resampling.nearest)
        
        # removes extra index
        hans_resampled = hans_resampled.squeeze()
        
        # scale image transform
        hans_transform = hans_raster.transform * hans_raster.transform.scale((width / hans_resampled.shape[-1]),
                                                                           (height / hans_resampled.shape[-2]))
        
        
        # assert raster shape, datatype and max/min values
        assert hans_resampled.dtype == 'uint8'
        assert hans_resampled.shape != (0, ) and len(hans_resampled.shape) <= 2
        assert hans_resampled.max() <= 255 and hans_resampled.min() >= 0
        
        # write the resampled raster to the new folder 
        hans_new = rs.open(hans_outpath, 'w', 
                              driver='GTiff',
                              height=hans_resampled.shape[0], 
                              width=hans_resampled.shape[1], 
                              count=1,
                              dtype="uint8",
                              crs='+proj=longlat +datum=WGS84 +no_defs',
                              transform=hans_transform,
                              compress='lzw')
        
        print(f'Hansen after resample: {hans_new.shape}')
        hans_new.write(hans_resampled, 1)
        hans_new.close()
        
        
        # Crop TOF to ESA bounds (move to resampled folder, skip resample)
        tof_raster = f'{country}/clipped_rasters/tof/{admin}.tif'
        tof_outpath = f'{country}/resampled_rasters/tof/{admin}.tif'
        source = gdal.Open(tof_raster)
        ds = gdal.Translate(tof_outpath, 
                            source, 
                            projWin=[-90.22499999999281, 13.75833333332723, -89.78888888888166, 14.166666666660595], 
                            options=translateoptions)
        ds = None
        print(f'TOF original: {rs.open(tof_raster).shape}')
        print(f'TOF after gdal translate: {rs.open(tof_outpath).shape}')
        print(' ')
    return None


In [None]:
def match_extent_and_res(country):
    
    '''
    Matches the projection, bounding box, and dimensions of source to reference
    ''' 
   
    # import new shapefile containing only polygons
    shapefile = gpd.read_file(f'{country}/{country}_adminboundaries_exp.geojson')
    admin_boundaries = list(shapefile.NAME_1)
    
    for admin in admin_boundaries:
        
        # import, get the projection and geotrans for each dataset
        tof = gdal.Open(f'{country}/clipped_rasters/tof/{admin}.tif', gdalconst.GA_ReadOnly)     
        tof_proj = tof.GetProjection()
        tof_geotrans = tof.GetGeoTransform()
        tof_outpath = f'{country}/resampled_rasters/tof/{admin}.tif'
        
        hans = gdal.Open(f'{country}/clipped_rasters/hansen/{admin}.tif', gdalconst.GA_ReadOnly)
        hans_proj = hans.GetProjection()
        hans_geotrans = hans.GetGeoTransform()
        hans_outpath = f'{country}/resampled_rasters/hansen/{admin}.tif'
        
        esa = gdal.Open(f'{country}/clipped_rasters/esa/{admin}.tif', gdalconst.GA_ReadOnly)
        esa_proj = esa.GetProjection()
        esa_geotrans = esa.GetGeoTransform() # not used remove
        esa_outpath = f'{country}/resampled_rasters/esa/{admin}.tif'
                
        # esa upsample to 10m
        width = esa.RasterXSize
        height = esa.RasterYSize
        esa_out = gdal.GetDriverByName('GTiff').Create(esa_outpath, width, height, 1, gdalconst.GDT_Byte)
        interpolation = gdalconst.GRA_NearestNeighbour
        gdal.ReprojectImage(esa, esa_outpath, esa_proj, tof_proj, interpolation)

        # set reference as resampled esa
        esa_resampled = gdal.Open(esa_outpath, gdalconst.GA_ReadOnly)
        esa_proj = esa_resampled.GetProjection()
        esa_geotrans = esa_resampled.GetGeoTransform()
        width = esa_resampled.RasterXSize
        height = esa_resampled.RasterYSize
        
        # tof transform
        tof_out = gdal.GetDriverByName('GTiff').Create(tof_outpath, width, height, 1, gdalconst.GDT_Byte)
        tof_out.SetGeoTransform(esa_geotrans)
        tof_out.SetProjection(esa_proj)
        
        interpolation = gdalconst.GRA_NearestNeighbour
        gdal.ReprojectImage(hans, hans_out, hans_proj, tof_proj, interpolation)
        
        # Hansen transform
        hans_out = gdal.GetDriverByName('GTiff').Create(hans_outpath, width, height, 1, gdalconst.GDT_Byte)
        hans_out.SetGeoTransform(esa_geotrans)
        hans_out.SetProjection(esa_proj)
        
        # resample hansen to tof
#         interpolation = gdalconst.GRA_NearestNeighbour
#         gdal.ReprojectImage(hans, hans_out, hans_proj, tof_proj, interpolation)
                
    return None

def resample_resize(country):
    
    '''
    Takes in a country name to import a clipped raster and resamples the raster
    to convert it to higher resolution. Crops or pads TOF/Hansen rasters to match
    the size of the ESA raster. Returns the new rasters as individuals files
    in the country's "resampled_rasters" folder. 

    Resampling a raster involves multiplying the pixel size by the scale factor 
    and dividing the dimensions by the scale factor. A scale >1 is an upsample
    and a scale <1: downsample.

        i.e. given a pixel size of 250m, dimensions of (1024, 1024) and a scale of 2,
        the resampled raster would have an output pixel size of 500m and dimensions of (512, 512)
        
    Attributes
    ----------
    country : str
        a string indicating the country files to import
    '''
    
    if not os.path.exists(f'{country}/resampled_rasters/hansen'):
        os.makedirs(f'{country}/resampled_rasters/hansen')
    
    if not os.path.exists(f'{country}/resampled_rasters/tof'):
        os.makedirs(f'{country}/resampled_rasters/tof')
    
    if not os.path.exists(f'{country}/resampled_rasters/esa'):
        os.makedirs(f'{country}/resampled_rasters/esa')
    
    # import new shapefile containing only polygons
    shapefile = gpd.read_file(f'{country}/{country}_adminboundaries_exp.geojson')
    admin_boundaries = list(shapefile.NAME_1)
    
    for admin in admin_boundaries[:1]:
        print(admin)
         
    # Resample ESA
        esa_raster = rs.open(f'{country}/clipped_rasters/esa/{admin}.tif')
        print(f'ESA original: {esa_raster.shape}')
        
        height = int(esa_raster.height)
        width = int(esa_raster.width)
        scale = 30
        
        # resample data to new resolution
        esa_resampled = esa_raster.read(out_shape=(esa_raster.count, (height * scale), (width * scale)),
                                        resampling=Resampling.nearest)
        
        # removes extra index
        esa_resampled = esa_resampled.squeeze()
        
        # scale image transform
        esa_transform = esa_raster.transform * esa_raster.transform.scale((width / esa_resampled.shape[-1]),
                                                                      (height / esa_resampled.shape[-2]))
        
        
        # assert raster shape, datatype and max/min values
        assert esa_resampled.dtype == 'uint8'
        assert esa_resampled.shape != (0, ) and len(esa_resampled.shape) <= 2
        assert esa_resampled.max() <= 255 and esa_resampled.min() >= 0
        
        esa_outpath = f'{country}/resampled_rasters/esa/{admin}.tif'
        esa_new = rs.open(esa_outpath, 'w', 
                              driver='GTiff',
                              height=esa_resampled.shape[0], 
                              width=esa_resampled.shape[1], 
                              count=1,
                              dtype="uint8",
                              crs='+proj=longlat +datum=WGS84 +no_defs',
                              transform=esa_transform,
                              compress='lzw')
        
        print(f'ESA after resample: {esa_new.shape}')
        esa_new.write(esa_resampled, 1)
        esa_new.close()
        
        # define parameters for gdal translate
        esa = rs.open(f'{country}/resampled_rasters/esa/{admin}.tif')
        esa_bounds = esa.bounds
        translateoptions = gdal.TranslateOptions(format='Gtiff', 
                                                  outputSRS='EPSG:4326',
                                                  outputType=gdal.GDT_Byte,
                                                  noData=255,
                                                  creationOptions=['COMPRESS=LZW'],
                                                  resampleAlg='nearest')
        
        # Crop Hansen to ESA bounds 
        hans_raster = f'{country}/clipped_rasters/hansen/{admin}.tif'
        hans_outpath = f'{country}/resampled_rasters/hansen/{admin}.tif'
        print(f'Hansen original: {rs.open(hans_raster).shape}')
        
        source = gdal.Open(hans_raster)
        ds = gdal.Translate(hans_outpath, 
                            source, 
                            projWin=[-90.22499999999281, 13.75833333332723, -89.78888888888166, 14.166666666660595], 
                            options=translateoptions)
        ds = None
        print(f'Hansen after gdal translate: {rs.open(hans_outpath).shape}')
        
        # Resample hansen
        hans_raster = rs.open(hans_raster)
        height = int(hans_raster.height)
        width = int(hans_raster.width)
        scale = 3
        
        # resample data to target shape -- use Resampling.nearest not Resampling.bilinear
        hans_resampled = hans_raster.read(out_shape=(hans_raster.count, (height * scale), (width * scale)),
                                          resampling=Resampling.nearest)
        
        # removes extra index
        hans_resampled = hans_resampled.squeeze()
        
        # scale image transform
        hans_transform = hans_raster.transform * hans_raster.transform.scale((width / hans_resampled.shape[-1]),
                                                                           (height / hans_resampled.shape[-2]))
        
        
        # assert raster shape, datatype and max/min values
        assert hans_resampled.dtype == 'uint8'
        assert hans_resampled.shape != (0, ) and len(hans_resampled.shape) <= 2
        assert hans_resampled.max() <= 255 and hans_resampled.min() >= 0
        
        # write the resampled raster to the new folder 
        hans_new = rs.open(hans_outpath, 'w', 
                              driver='GTiff',
                              height=hans_resampled.shape[0], 
                              width=hans_resampled.shape[1], 
                              count=1,
                              dtype="uint8",
                              crs='+proj=longlat +datum=WGS84 +no_defs',
                              transform=hans_transform,
                              compress='lzw')
        
        print(f'Hansen after resample: {hans_new.shape}')
        hans_new.write(hans_resampled, 1)
        hans_new.close()
        
        
        # Crop TOF to ESA bounds (move to resampled folder, skip resample)
        tof_raster = f'{country}/clipped_rasters/tof/{admin}.tif'
        tof_outpath = f'{country}/resampled_rasters/tof/{admin}.tif'
        source = gdal.Open(tof_raster)
        ds = gdal.Translate(tof_outpath, 
                            source, 
                            projWin=[-90.22499999999281, 13.75833333332723, -89.78888888888166, 14.166666666660595], 
                            options=translateoptions)
        ds = None
        print(f'TOF original: {rs.open(tof_raster).shape}')
        print(f'TOF after gdal translate: {rs.open(tof_outpath).shape}')
        print(' ')
    return None


In [40]:
tof = np.array([88, 84, 80, 10, 12, 17, 91, 42, 20, 23, 26, 29, 31, 39, 90, 92, 97]).astype('float32')

In [51]:
# count the number of non nan and nan instances in the dataset
np.count_nonzero(~np.isnan(tof_class)), np.count_nonzero(np.isnan(tof_class))

(4093998, 14244852)

In [41]:
data[data < 90] += 4.5
data[data == 90] += 5


tof[(tof > 90) & (tof < 99)] += 4.5 # if its 91, 92, 93, etc. make it 94.5
tof[tof == 90] += 5  # if its 90 make it 95
tof

array([88., 84., 80., 10., 12., 17., 91., 42., 20., 23., 26., 29., 31.,
       39., 95., 92., 97.], dtype=float32)

In [43]:
test1 = np.array([21, 24, 25, 28, 29]).astype('float32')
test2 = np.array([24.5, 24.5, 24.5, 24.5, 24.5])
test1.mean(), test2.mean()

(25.4, 24.5)

In [5]:
tof = np.array([0, 1, 2, 5, 6, 3, 8, 9, 10, 10, 2, 4, 6, 7, 8]).astype('float32')

tof[(tof > 0) & (tof < 10)] = 4.5 # if it's between 0-9 make it 4.5
tof[tof == 10] = 5 # if it's 10 make it 15
tof

array([0. , 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 4.5, 5. , 5. , 4.5, 4.5, 4.5,
       4.5, 4.5], dtype=float32)

In [28]:
for file in [f for f in os.listdir(f'Belize/resampled_rasters/tof') if f != '.ipynb_checkpoints'][:1]:

    # read in as a float in order to replace no data with NaNs
    tof = rs.open(f'Belize/resampled_rasters/tof/{file}').read(1).astype(np.float32)
    hans = rs.open(f'Belize/resampled_rasters/hansen/{file}').read(1).astype(np.float32)
    esa = rs.open(f'Belize/resampled_rasters/esa/{file}').read(1).astype(np.float32)

    esa[esa == 0.0] = 2.0
    
    # resize esa/hansen to have the same shape as tof and reshape for stats
    print('Resizing...')
    if esa.shape != tof.shape:
        esa = resize(esa, tof.shape, order=0, preserve_range=True)

    if hans.shape != tof.shape:
        hans = resize(hans, tof.shape, order=0, preserve_range=True)

    # identify the lccs for that admin district
    esa_classes = np.unique(esa)

    for cover in esa_classes[:1]:

        # change all values that are not equal to the lcc to NaN
        print(f'Filtering to {cover} for {file}...')
        tof_class_new = tof.copy()
        tof_class_new[esa != cover] = np.nan 

Resizing...
Filtering to 2.0 for Toledo.tif...


In [16]:
# check the value counts for tof_class
(unique, counts) = np.unique(tof_class_new, return_counts=True)
frequencies = np.asarray((unique, counts)).T

In [18]:
frequencies[:10]

array([[0.00000e+00, 1.03472e+05],
       [2.00000e+01, 1.45800e+03],
       [4.00000e+01, 5.77000e+03],
       [6.00000e+01, 4.12600e+03],
       [8.00000e+01, 5.88300e+03],
       [1.00000e+02, 6.60200e+03],
       [        nan, 1.00000e+00],
       [        nan, 1.00000e+00],
       [        nan, 1.00000e+00],
       [        nan, 1.00000e+00]])

In [23]:
tof = rs.open(f'Belize/resampled_rasters/tof/Toledo.tif').read(1)
(unique, counts) = np.unique(tof, return_counts=True)
toledo_tof = np.asarray((unique, counts)).T
toledo_tof

array([[       0, 93976598],
       [      20,    47917],
       [      40,   194512],
       [      60,   160780],
       [      80,   296476],
       [     100,  1512940]])

In [5]:
tof = rs.open(f'Costa Rica/clipped_rasters/tof/Puntarenas1.tif').read(1)
hans = rs.open(f'Costa Rica/clipped_rasters/hansen/Puntarenas1.tif').read(1)
esa = rs.open(f'Costa Rica/clipped_rasters/esa/Puntarenas1.tif').read(1)
print(f'esa: {esa.shape}, tof: {tof.shape}, hans: {hans.shape}')  

esa: (22, 26), tof: (638, 746), hans: (232, 271)


In [6]:
print((22*30, 26*30),(638, 746),(232*3, 271*3))


(660, 780) (638, 746) (696, 813)


In [82]:
tof = rs.open('El Salvador/resampled_rasters/tof/Apaneca.tif').read(1)
tof.shape 

(884, 631)

In [84]:
tof = rs.open('El Salvador/resampled_rasters/tof/Apaneca.tif').read(1).astype(np.float32)
tof[tof == 255] = np.nan
tof.shape 

(884, 631)

In [6]:
## EXAMPLE CODE


tof_data = rs.open('El Salvador/resampled_rasters/tof/Apaneca.tif')
width = tof_data.shape[1]
height = tof_data.shape[0]

tof_array = tof_data.read(1)[:13590, :25810].astype(np.float32)

# non contiguous hectares
tof_array[tof_array == 255] = np.nan
print(f"There are {np.sum(tof_array > 10) / 100} non-contiguous hectares above 10% canopy cover")

# contiguous hectares
tof_array = reshape_for_stats(tof_array) #there was another reshape that didn't work
tof_array = np.nanmean(tof_array, axis = (1, 3))
tof_array = (np.floor(tof_array)).astype(np.uint8)
print(f"There are {np.sum(tof_array > 10)} contiguous hectares above 10% canopy cover")
bounds = tof_data.bounds
transform = rs.transform.from_bounds(west = bounds[0], 
                                     south = bounds[1],
                                     east = bounds[2], 
                                     north = bounds[3],
                                     width = tof_array.shape[1], 
                                     height = tof_array.shape[0])

new_dataset = rs.open("el salvador-one-hectare-treecover.tif", 'w', 
                      driver='GTiff',
                      height=tof_array.shape[0], 
                      width=tof_array.shape[1], 
                      count=1,
                      dtype="uint8",
                      crs='+proj=longlat +datum=WGS84 +no_defs',
                      transform=transform)

new_dataset.write(np.array(tof_array), 1)
new_dataset.close()

new_dataset = rs.open("el salvador-one-hectare-binary.tif", 'w', 
                      driver='GTiff',
                      height=tof_array.shape[0], 
                      width=tof_array.shape[1], 
                      count=1,
                      dtype="uint8",
                      crs='+proj=longlat +datum=WGS84 +no_defs',
                      transform=transform)

new_dataset.write(np.array(tof_array >= 10).astype(np.uint8), 1)
new_dataset.close()

There are 3557.54 non-contiguous hectares above 10% canopy cover
There are 3709 contiguous hectares above 10% canopy cover


In [19]:
jurisdictions = gpd.read_file('El Salvador/El Salvador_admin.geojson')

In [4]:
cr_admins = gpd.read_file('Costa Rica/cri_admbnda_adm2_2020.shp')
cr_admins.head()

Unnamed: 0,ADM0_PCODE,ADM0_ES,ADM1_PCODE,ADM1_ES,ADM2_REF,ADM2_PCODE,ADM2_ES,geometry
0,CR,Costa Rica,CR03,Guanacaste,Guanacaste,CR0301,Abangares,"MULTIPOLYGON (((375722.282 1121031.337, 375696..."
1,CR,Costa Rica,CR07,San José,San José,CR0701,Acosta,"POLYGON ((482255.682 1090408.565, 482281.722 1..."
2,CR,Costa Rica,CR06,Puntarenas,Puntarenas,CR0601,Aguirre,"MULTIPOLYGON (((486711.014 1033380.147, 486700..."
3,CR,Costa Rica,CR01,Alajuela,Alajuela,CR0101,Alajuela,"POLYGON ((482377.488 1151480.958, 482376.471 1..."
4,CR,Costa Rica,CR07,San José,San José,CR0702,Alajuelita,"POLYGON ((487024.603 1098008.676, 487070.576 1..."


In [5]:
# reproject the shapefile if not espg 4326
cr_admins = cr_admins.to_crs('"EPSG:4326"')

In [30]:
cr_bounds = cr_admins.geometry.bounds
cr_bounds

Unnamed: 0,minx,miny,maxx,maxy
0,-85.225659,10.109038,-84.827092,10.389386
1,-84.341693,9.622506,-84.132088,9.861633
2,-84.253801,9.254671,-83.840040,9.572033
3,-84.367680,9.914267,-84.160955,10.413694
4,-84.145894,9.847564,-84.084156,9.931675
...,...,...,...,...
76,-84.592696,9.569590,-84.403502,9.925489
77,-85.449169,10.670787,-84.880578,11.065570
78,-84.362149,10.048149,-84.226587,10.276312
79,-84.034999,9.964382,-83.858583,10.190042


In [28]:
bounds = jurisdictions.geometry.bounds
min_x = bounds.minx.min()
min_y = bounds.miny.min()
max_x = bounds.maxx.max()
max_y = bounds.maxy.max()
min_x, min_y, max_x, max_y

(-90.03624725299994, 13.417916298000023, -88.95311737099996, 13.99701213800006)

In [43]:
lower_x = math.floor(min_x / 10) * 10
lower_y = math.ceil(min_y / 10) * 10
upper_y = math.ceil(max_y / 10) * 10
upper_x = math.ceil(max_x / 10) * 10

In [44]:
lower_x, lower_y, upper_x, upper_y

(-100, 20, -80, 20)

In [52]:
for x in range(lower_x, upper_x, 10):
    print(x)

-100
-90


In [53]:
for x in range(lower_y, upper_y + 10, 10):
    print(x)

20


In [21]:
# lower_x = np.absolute(math.floor(min_x / 10) * 10)
# lower_y = np.absolute(math.floor(min_y / 10) * 10)
# upper_y = np.absolute(math.ceil(max_y / 10) * 10)
# upper_x = np.absolute(math.ceil(max_x / 10) * 10)
# lower_x, lower_y, upper_x, upper_y

In [75]:
def calculate_stats_esa_lc(country, shapefile):
    
    '''
    Takes in a country name to import tof/hansen/esa rasters and calculates mean 
    tree cover thresholds per administrative boundary and ESA land cover 
    class. Returns a pandas dataframe with statistics.
    
    Attributes
    ----------
    country : str
        a string indicating the country files to import
    shapefile : .geojson or .shp file
        shapefile containing subnational administrative boundaries level 0-2
    '''
    
    tree_cover = pd.DataFrame(columns=['admin', 
                                       'esa_id', 
                                       'lc_class',
                                       'ipcc_class',
                                       'tof_mean_tc',
                                       'tof_total_ha', 
                                       'hansen_mean_tc',
                                       'hansen_total_ha',
                                       'tof_hans'])
    
    admin_boundaries = list(shapefile.NAME_1)
    
    for admin in admin_boundaries:
        
        # resize esa to tof boundaries then reshape both
        esa = rs.open(f'{country}/resampled_rasters/esa/{admin}.tif').read(1)
        tof = rs.open(f'{country}/resampled_rasters/tof/{admin}.tif').read(1)
        hansen = rs.open(f'{country}/resampled_rasters/hansen/{admin}.tif').read(1)
        
        # hard code esa to have the same shape as tof/hansen
        if esa.shape != tof.shape:
            esa = resize(esa, tof.shape, order=0, preserve_range=True)
        
        if hansen.shape != tof.shape:
            hansen = resize(hansen, tof.shape, order=0, preserve_range=True)
            
        esa = reshape_for_stats(esa)
        tof = reshape_for_stats(tof)
        hansen = reshape_for_stats(hansen)
        
        # get a list of land cover classes in that jurisdiction
        esa_classes = np.unique(esa)
        #print(f'{len(esa_classes)} land cover classes in {juris}.')
   
        for cover in esa_classes:
        
            tof_class = tof[esa == cover]
            hansen_class = hansen[esa == cover]
            
            # calculate mean tree cover for each lc class in the jurisdiction
            tof_tc_by_class = round(np.mean(tof_class), 3)
            hansen_tc_by_class = round(np.mean(hansen_class), 3)
            
            # calculate number of hectareas in each jurisdiction above 10% canopy cover per class
            # get sum of 10m pixels above 10% and divide by 100 to convert to to non-contiguous hectares
            tof_ha_over10 = np.sum(tof_class > 10.0) / 100 
            hansen_ha_over10 = np.sum(hansen_class > 10.0) / 100
            
            
            tree_cover = tree_cover.append({'admin': admin, 
                                            'esa_id': cover,
                                            'tof_mean_tc': tof_tc_by_class,
                                            'tof_total_ha': tof_ha_over10,
                                            'hansen_mean_tc': hansen_tc_by_class,
                                            'hansen_total_ha': hansen_ha_over10}, ignore_index=True)
        
        # from Appdx 1 of ESA product user guide
        legend = {0: 'No Data',
                10: 'Cropland, rainfed',
                11: 'Cropland, rainfed, herbaceous cover',
                20: 'Cropland, irrigated or post-flooding',
                30: 'Mosaic cropland (>50%) / natural vegetation (tree, shrub, herbaceous cover)(<50%)',
                40: 'Mosaic natural vegetation (tree, shrub, herbaceous cover) (>50%) / cropland (<50%)',
                50: 'Tree cover, broadleaved, evergreen, closed to open (>15%)',
                60: 'Tree cover, broadleaved, deciduous, closed to open (>15%)',
                70: 'Tree cover, needleleaved, evergreen, closed to open (>15%)',
                80: 'Tree cover, needleleaved, deciduous, closed to open (>15%)',
                90: 'Tree cover, mixed leaf type (broadleaved and needleleaved)',
                100: 'Mosaic tree and shrub (>50%) / herbaceous cover (<50%)',
                110: 'Mosaic herbaceous cover (>50%) / tree and shrub (<50%)',
                120: 'Shrubland',
                130: 'Grassland',
                140: 'Lichens and mosses',
                150: 'Sparse vegetation (tree, shrub, herbaceous cover) (<15%)',
                160: 'Tree cover, flooded, fresh or brakish water',
                170: 'Tree cover, flooded, saline water',
                180: 'Shrub or herbaceous cover, flooded, fresh/saline/brakish water',
                190: 'Urban areas',
                200: 'Bare areas',
                210: 'Water bodies',
                220: 'Permanent snow and ice'}
        
        # add line to map to IPCC land cover classes
        ipcc = {0: 'Other',
                10: 'Agriculture',
                11: 'Agriculture',
                20: 'Agriculture',
                30: 'Agriculture',
                40: 'Agriculture',
                50: 'Forest',
                60: 'Forest',
                70: 'Forest',
                80: 'Forest',
                90: 'Forest',
                100: 'Forest',
                110: 'Grassland',
                120: 'Other',
                130: 'Grassland',
                140: 'Other',
                150: 'Other',
                160: 'Forest',
                170: 'Forest',
                180: 'Wetland',
                190: 'Settlement',
                200: 'Other',
                210: 'Other',
                220: 'Other'}
        
        tree_cover['lc_class'] = tree_cover['esa_id'].map(legend)
        tree_cover['ipcc_class'] = tree_cover['esa_id'].map(ipcc)
        tree_cover['tof_hans'] = tree_cover['tof_total_ha'] - tree_cover['hansen_total_ha']
        
    print(f'{country} has {len(tree_cover.esa_id.value_counts())} land cover classes.')   
    return tree_cover

In [37]:
shapefile = gpd.read_file(f'insert new str')
admin_boundaries_all = list(shapefile.NAME_1)

# goals: create a list of admin names without numbers
# identify whether the admin has 1 or 2 digits in order to properly remove them
no_ints = []
for admin in admin_boundaries_all:
    # if any character is a digit
    if any(char.isdigit() for char in admin):
        # remove the digits and add it to the list of no ints
        clean_admin = ''.join([char for char in admin if not char.isdigit()])
        no_ints.append(clean_admin)
no_ints = list(set(no_ints))    

In [50]:
for admin_2 in no_ints:
    # gather list of files for that admin (ex: Puntarenas1.tif, Puntarenas2.tif, Puntarenas3.tif)
    files_to_merge = [] # this needs to be in dataset reader mode
    files_to_delete = [] # just a string of the file name
    for path in glob.glob(f'Costa Rica/resampled_rasters/tof/{admin_2}?.tif') and glob.glob(f'Costa Rica/resampled_rasters/tof/{admin_2}??.tif'):
        filename = os.path.basename(path) 
#         files_to_delete.append(filename)
#         src = rs.open(f'{country}/resampled_rasters/{data}/{filename}')
#         files_to_merge.append(src)

In [None]:
# // matches all non-digits, replaces it with "" and returns the length.
s.replaceAll("\\D", "").length()

In [111]:
files_to_merge = []
for path in glob.glob(f'Costa Rica/resampled_rasters/hansen/Acosta?.tif'):
    filename = os.path.basename(path) 
    files_to_merge.append(filename)
print(files_to_merge)

[]
