In [79]:
import os
os.environ['USE_PYGEOS'] = '0'   # Suppresses some warning about geopandas
import geopandas as gpd

# scipy basics
import numpy as np
import botocore
from osgeo import gdal      # Necessary to do this import to get rasterio to import
import rasterio as rio
import rasterio.features

import time

# dask/parallelization libraries
import coiled
import dask
import dask.array as dar
from dask.distributed import Client, LocalCluster, futures_of
import rioxarray
import xarray as xr

<font size="6">Making cloud and local clusters</font> 

In [80]:
coiled_cluster = coiled.Cluster(
    n_workers=25,
    use_best_zone=True, 
    compute_purchase_option="spot_with_fallback",
    idle_timeout="20 minutes",
    region="us-east-2",
    # name="DGibbs Europe height flux model", 
    account='jterry64'   # Necessary to use the AWS environment that Justin set up in Coiled
)

Output()

Output()

In [81]:
# Coiled cluster (cloud run)
coiled_client = coiled_cluster.get_client()
coiled_client

0,1
Connection method: Cluster object,Cluster type: coiled.Cluster
Dashboard: https://cluster-yhocq.dask.host/FQ2-MRMsT85J2azc/status,

0,1
Dashboard: https://cluster-yhocq.dask.host/FQ2-MRMsT85J2azc/status,Workers: 23
Total threads: 92,Total memory: 340.65 GiB

0,1
Comm: tls://10.1.34.122:8786,Workers: 23
Dashboard: http://10.1.34.122:8787/status,Total threads: 92
Started: Just now,Total memory: 340.65 GiB

0,1
Comm: tls://10.1.45.244:39181,Total threads: 4
Dashboard: http://10.1.45.244:8787/status,Memory: 14.81 GiB
Nanny: tls://10.1.45.244:36697,
Local directory: /scratch/dask-scratch-space/worker-d10x0sqh,Local directory: /scratch/dask-scratch-space/worker-d10x0sqh

0,1
Comm: tls://10.1.45.152:36725,Total threads: 4
Dashboard: http://10.1.45.152:8787/status,Memory: 14.81 GiB
Nanny: tls://10.1.45.152:46213,
Local directory: /scratch/dask-scratch-space/worker-jd7jdusn,Local directory: /scratch/dask-scratch-space/worker-jd7jdusn

0,1
Comm: tls://10.1.45.158:36611,Total threads: 4
Dashboard: http://10.1.45.158:8787/status,Memory: 14.82 GiB
Nanny: tls://10.1.45.158:43131,
Local directory: /scratch/dask-scratch-space/worker-choowvo3,Local directory: /scratch/dask-scratch-space/worker-choowvo3

0,1
Comm: tls://10.1.41.39:45995,Total threads: 4
Dashboard: http://10.1.41.39:8787/status,Memory: 14.82 GiB
Nanny: tls://10.1.41.39:41033,
Local directory: /scratch/dask-scratch-space/worker-65rt168d,Local directory: /scratch/dask-scratch-space/worker-65rt168d

0,1
Comm: tls://10.1.35.36:34769,Total threads: 4
Dashboard: http://10.1.35.36:8787/status,Memory: 14.83 GiB
Nanny: tls://10.1.35.36:33927,
Local directory: /scratch/dask-scratch-space/worker-q5avj81c,Local directory: /scratch/dask-scratch-space/worker-q5avj81c

0,1
Comm: tls://10.1.35.93:38539,Total threads: 4
Dashboard: http://10.1.35.93:8787/status,Memory: 14.83 GiB
Nanny: tls://10.1.35.93:33577,
Local directory: /scratch/dask-scratch-space/worker-wbmbwm1c,Local directory: /scratch/dask-scratch-space/worker-wbmbwm1c

0,1
Comm: tls://10.1.40.83:36945,Total threads: 4
Dashboard: http://10.1.40.83:8787/status,Memory: 14.82 GiB
Nanny: tls://10.1.40.83:35635,
Local directory: /scratch/dask-scratch-space/worker-ieylmkwp,Local directory: /scratch/dask-scratch-space/worker-ieylmkwp

0,1
Comm: tls://10.1.34.130:40393,Total threads: 4
Dashboard: http://10.1.34.130:8787/status,Memory: 14.82 GiB
Nanny: tls://10.1.34.130:41465,
Local directory: /scratch/dask-scratch-space/worker-cccib6il,Local directory: /scratch/dask-scratch-space/worker-cccib6il

0,1
Comm: tls://10.1.43.161:45507,Total threads: 4
Dashboard: http://10.1.43.161:8787/status,Memory: 14.82 GiB
Nanny: tls://10.1.43.161:38197,
Local directory: /scratch/dask-scratch-space/worker-kba6hud8,Local directory: /scratch/dask-scratch-space/worker-kba6hud8

0,1
Comm: tls://10.1.36.39:34337,Total threads: 4
Dashboard: http://10.1.36.39:8787/status,Memory: 14.81 GiB
Nanny: tls://10.1.36.39:39013,
Local directory: /scratch/dask-scratch-space/worker-miiehnh2,Local directory: /scratch/dask-scratch-space/worker-miiehnh2

0,1
Comm: tls://10.1.41.71:40747,Total threads: 4
Dashboard: http://10.1.41.71:8787/status,Memory: 14.81 GiB
Nanny: tls://10.1.41.71:40793,
Local directory: /scratch/dask-scratch-space/worker-puos15cn,Local directory: /scratch/dask-scratch-space/worker-puos15cn

0,1
Comm: tls://10.1.33.229:37947,Total threads: 4
Dashboard: http://10.1.33.229:8787/status,Memory: 14.81 GiB
Nanny: tls://10.1.33.229:36425,
Local directory: /scratch/dask-scratch-space/worker-iyn11qvl,Local directory: /scratch/dask-scratch-space/worker-iyn11qvl

0,1
Comm: tls://10.1.43.224:40329,Total threads: 4
Dashboard: http://10.1.43.224:8787/status,Memory: 14.82 GiB
Nanny: tls://10.1.43.224:35441,
Local directory: /scratch/dask-scratch-space/worker-usvx1vgz,Local directory: /scratch/dask-scratch-space/worker-usvx1vgz

0,1
Comm: tls://10.1.37.114:38115,Total threads: 4
Dashboard: http://10.1.37.114:8787/status,Memory: 14.82 GiB
Nanny: tls://10.1.37.114:42993,
Local directory: /scratch/dask-scratch-space/worker-x694bo5n,Local directory: /scratch/dask-scratch-space/worker-x694bo5n

0,1
Comm: tls://10.1.32.120:33507,Total threads: 4
Dashboard: http://10.1.32.120:8787/status,Memory: 14.81 GiB
Nanny: tls://10.1.32.120:45673,
Local directory: /scratch/dask-scratch-space/worker-4keuelww,Local directory: /scratch/dask-scratch-space/worker-4keuelww

0,1
Comm: tls://10.1.42.141:45289,Total threads: 4
Dashboard: http://10.1.42.141:8787/status,Memory: 14.67 GiB
Nanny: tls://10.1.42.141:32985,
Local directory: /scratch/dask-scratch-space/worker-szwkd5cb,Local directory: /scratch/dask-scratch-space/worker-szwkd5cb

0,1
Comm: tls://10.1.42.161:36731,Total threads: 4
Dashboard: http://10.1.42.161:8787/status,Memory: 14.81 GiB
Nanny: tls://10.1.42.161:39037,
Local directory: /scratch/dask-scratch-space/worker-_kbx59m7,Local directory: /scratch/dask-scratch-space/worker-_kbx59m7

0,1
Comm: tls://10.1.35.174:43183,Total threads: 4
Dashboard: http://10.1.35.174:8787/status,Memory: 14.82 GiB
Nanny: tls://10.1.35.174:40251,
Local directory: /scratch/dask-scratch-space/worker-pb4jcy2q,Local directory: /scratch/dask-scratch-space/worker-pb4jcy2q

0,1
Comm: tls://10.1.41.189:41483,Total threads: 4
Dashboard: http://10.1.41.189:8787/status,Memory: 14.82 GiB
Nanny: tls://10.1.41.189:38993,
Local directory: /scratch/dask-scratch-space/worker-0j74aswg,Local directory: /scratch/dask-scratch-space/worker-0j74aswg

0,1
Comm: tls://10.1.36.30:41841,Total threads: 4
Dashboard: http://10.1.36.30:8787/status,Memory: 14.82 GiB
Nanny: tls://10.1.36.30:35063,
Local directory: /scratch/dask-scratch-space/worker-gjqk95om,Local directory: /scratch/dask-scratch-space/worker-gjqk95om

0,1
Comm: tls://10.1.33.206:43147,Total threads: 4
Dashboard: http://10.1.33.206:8787/status,Memory: 14.82 GiB
Nanny: tls://10.1.33.206:46253,
Local directory: /scratch/dask-scratch-space/worker-n3_pup8s,Local directory: /scratch/dask-scratch-space/worker-n3_pup8s

0,1
Comm: tls://10.1.32.107:38603,Total threads: 4
Dashboard: http://10.1.32.107:8787/status,Memory: 14.82 GiB
Nanny: tls://10.1.32.107:37123,
Local directory: /scratch/dask-scratch-space/worker-5g77ox0h,Local directory: /scratch/dask-scratch-space/worker-5g77ox0h

0,1
Comm: tls://10.1.43.106:46511,Total threads: 4
Dashboard: http://10.1.43.106:8787/status,Memory: 14.81 GiB
Nanny: tls://10.1.43.106:45663,
Local directory: /scratch/dask-scratch-space/worker-ovxiw036,Local directory: /scratch/dask-scratch-space/worker-ovxiw036


In [None]:
# Local single-process cluster (local run). Will run .compute() on just one process, not a whole cluster.
local_client = Client()
local_client

<font size="6">Shutting down cloud and local clusters</font> 

In [90]:
coiled_cluster.shutdown()

In [None]:
local_client.shutdown()

<font size="6">Analysis</font> 

<font size="4">Paths and functions</font>

In [82]:
# General paths and constants

general_uri = 's3://gfw2-data/forest_change/GLAD_Europe_height_data/'

random_data_uri = 's3://gfw2-data/forest_change/GLAD_Europe_height_data/dummy_random_data__20230901/'

local_out_dir = 'C:\\GIS\\Carbon_model_Europe\\outputs\\'

timestr = time.strftime("%Y%m%d")

tile_size = 10      # Tile size in degrees is from the top left of the tile. 10 is a full tile. Anything smaller is a subset of that.

chunk_length = 8000


In [83]:
import rasterio.windows
import rasterio
import numpy as np

def get_tile_dataset_rio(uri, bounds, transform):
    # If the input tile_size is too large, it reverts to 10 (standard tile size)
    try:
        with rasterio.open(uri) as ds:
            return ds.read(1, window=rasterio.windows.from_bounds(*bounds, transform))
    except rasterio.errors.RasterioIOError as e:
        return np.zeros((chunk_length, chunk_length))
    

<font size="4">Model steps</font>

In [84]:
# Input file locations

# Using 10x10 degree rasters of actual data
forest_height_previous_uri = f'{general_uri}202307_revision/test_10x10_deg/50N_010E_FH_2020.tif'
forest_height_current_uri = f'{general_uri}202307_revision/test_10x10_deg/50N_010E_FH_2021.tif'
forest_loss_detection_uri = f'{general_uri}202307_revision/test_10x10_deg/50N_010E_DFL_2021.tif'

driver_uri = "s3://gfw2-data/climate/carbon_model/other_emissions_inputs/tree_cover_loss_drivers/processed/drivers_2022/20230407/50N_010E_tree_cover_loss_driver_processed.tif"
planted_forest_type_uri = "s3://gfw2-data/climate/carbon_model/other_emissions_inputs/planted_forest_type/SDPT_v1/standard/20200730/50N_010E_plantation_type_oilpalm_woodfiber_other_unmasked.tif"
peat_uri = "s3://gfw2-data/climate/carbon_model/other_emissions_inputs/peatlands/processed/20230315/50N_010E_peat_mask_processed.tif"
tclf_uri = "s3://gfw2-data/climate/carbon_model/other_emissions_inputs/tree_cover_loss_fires/20230315/processed/50N_010E_tree_cover_loss_fire_processed.tif"

uris = [forest_height_previous_uri, forest_height_current_uri, forest_loss_detection_uri, driver_uri, planted_forest_type_uri, peat_uri, tclf_uri]

In [85]:
from numba import jit
import numpy as np
import concurrent.futures

@jit(nopython=True)
def classify(forest_height_previous_block, forest_height_current_block, forest_loss_detection_block, driver_block, planted_forest_type_block, peat_block, tclf_block):
    forest_states = np.zeros(forest_height_previous_block.shape)
    emissions_factor = np.zeros(forest_height_previous_block.shape)   # https://dask.discourse.group/t/use-map-blocks-with-function-that-returns-a-tuple/84/7
    
    for row in range(forest_height_previous_block.shape[0]):
        for col in range(forest_height_previous_block.shape[1]):
            forest_height_previous = forest_height_previous_block[row, col]
            forest_height_current = forest_height_current_block[row, col]
            forest_loss_detection = forest_loss_detection_block[row, col]
            driver = driver_block[row, col]
            planted_forest_type = planted_forest_type_block[row, col]
            peat = peat_block[row, col]
            tclf = tclf_block[row, col]
            
            if forest_height_previous >= 5 and forest_height_current >= 5:   # maintained
                if peat:
                    forest_states[row, col] = 4
                else:
                    forest_states[row, col] = 6
            elif forest_height_previous >= 5 and forest_height_current < 5:  # loss
                forest_states[row, col] = 2
            elif forest_height_previous < 5 and forest_height_current >= 5:  # gain
                forest_states[row, col] = 3
            elif peat:
                forest_states[row, col] = 5
            elif driver:
                forest_states[row, col] = 6
            elif tclf > 0:
                forest_states[row, col] = 7
            elif planted_forest_type > 0:
                forest_states[row, col] = 8
            else:                                                                                                       # no forest
                forest_states[row, col] = 0
                
    return forest_states

In [86]:
forest_height_previous = rioxarray.open_rasterio(forest_height_previous_uri, chunks=chunk_length).squeeze("band")
template = xr.zeros_like(forest_height_previous)

In [87]:
%%time
from numba import jit
import numpy as np
import concurrent.futures
import boto3

"""
Some code that applies the decision tree to decision_tree_ds to make an xarray of forest_states for the previous and current years
"""

# TODO don't map blocks?
def map_blocks(block):
    futures = []
    layers = []

    # submit requests to S3 for layers
    with concurrent.futures.ThreadPoolExecutor() as executor:
        for uri in uris:
            futures.append(
                executor.submit(get_tile_dataset_rio, uri, block.rio.bounds(), block.rio.transform())
            )

    # wait for requests to come back with data from S3
    for future in concurrent.futures.as_completed(futures):
        layers.append(future.result())
                       
    forest_states = classify(*layers)
    
    file_name = "_".join([str(round(x)) for x in block.rio.bounds()])
    block.rio.to_raster(f"/tmp/{file_name}.tif", compress='DEFLATE', dtype='uint8')

    s3_client = boto3.client("s3")
    s3_client.upload_file(f"/tmp/{file_name}.tif", "gfw2-data", Key=f"climate/forest_states/{file_name}.tif")

    return block

# for tile in tiles:
#     blocks = get_blocks_from_tile(tile)
#     results = dask.compute(map_blocks, blocks)
#     # "success", "success", "failure"

forest_states = forest_height_previous.map_blocks(map_blocks, template=forest_height_previous).persist() 
forest_states

CPU times: total: 188 ms
Wall time: 185 ms


Unnamed: 0,Array,Chunk
Bytes,1.49 GiB,61.04 MiB
Shape,"(40000, 40000)","(8000, 8000)"
Dask graph,25 chunks in 1 graph layer,25 chunks in 1 graph layer
Data type,uint8 numpy.ndarray,uint8 numpy.ndarray
"Array Chunk Bytes 1.49 GiB 61.04 MiB Shape (40000, 40000) (8000, 8000) Dask graph 25 chunks in 1 graph layer Data type uint8 numpy.ndarray",40000  40000,

Unnamed: 0,Array,Chunk
Bytes,1.49 GiB,61.04 MiB
Shape,"(40000, 40000)","(8000, 8000)"
Dask graph,25 chunks in 1 graph layer,25 chunks in 1 graph layer
Data type,uint8 numpy.ndarray,uint8 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4 B,4 B
Shape,(),()
Dask graph,1 chunks in 1 graph layer,1 chunks in 1 graph layer
Data type,int32 numpy.ndarray,int32 numpy.ndarray
Array Chunk Bytes 4 B 4 B Shape () () Dask graph 1 chunks in 1 graph layer Data type int32 numpy.ndarray,,

Unnamed: 0,Array,Chunk
Bytes,4 B,4 B
Shape,(),()
Dask graph,1 chunks in 1 graph layer,1 chunks in 1 graph layer
Data type,int32 numpy.ndarray,int32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4 B,4 B
Shape,(),()
Dask graph,1 chunks in 1 graph layer,1 chunks in 1 graph layer
Data type,int32 numpy.ndarray,int32 numpy.ndarray
Array Chunk Bytes 4 B 4 B Shape () () Dask graph 1 chunks in 1 graph layer Data type int32 numpy.ndarray,,

Unnamed: 0,Array,Chunk
Bytes,4 B,4 B
Shape,(),()
Dask graph,1 chunks in 1 graph layer,1 chunks in 1 graph layer
Data type,int32 numpy.ndarray,int32 numpy.ndarray


In [89]:
futures_of(forest_states)

[<Future: error, key: ('band-map_blocks-76403797bfb5c6182f2954fc3ca4a2ce',)>,
 <Future: error, key: ('spatial_ref-map_blocks-76403797bfb5c6182f2954fc3ca4a2ce',)>,
 <Future: error, key: ('<this-array>-map_blocks-76403797bfb5c6182f2954fc3ca4a2ce', 0, 0)>,
 <Future: error, key: ('<this-array>-map_blocks-76403797bfb5c6182f2954fc3ca4a2ce', 0, 1)>,
 <Future: finished, type: numpy.ndarray, key: ('<this-array>-map_blocks-76403797bfb5c6182f2954fc3ca4a2ce', 0, 2)>,
 <Future: finished, type: numpy.ndarray, key: ('<this-array>-map_blocks-76403797bfb5c6182f2954fc3ca4a2ce', 0, 3)>,
 <Future: error, key: ('<this-array>-map_blocks-76403797bfb5c6182f2954fc3ca4a2ce', 0, 4)>,
 <Future: finished, type: numpy.ndarray, key: ('<this-array>-map_blocks-76403797bfb5c6182f2954fc3ca4a2ce', 1, 0)>,
 <Future: error, key: ('<this-array>-map_blocks-76403797bfb5c6182f2954fc3ca4a2ce', 1, 1)>,
 <Future: finished, type: numpy.ndarray, key: ('<this-array>-map_blocks-76403797bfb5c6182f2954fc3ca4a2ce', 1, 2)>,
 <Future: err

2023-10-20 11:45:29,701 - distributed.client - ERROR - Failed to reconnect to scheduler after 30.00 seconds, closing client


In [68]:
coiled_client.restart()   # https://distributed.dask.org/en/latest/memory.html
del forest_states

In [None]:
"_".join([str(round(x)) for x in forest_states.rio.bounds()])

In [None]:
# Exports forest state array to raster

forest_states_corrected.rio.to_raster(f'{local_out_dir}forest_states_2021__{timestr}_{tile_size}_deg.tif', compress='DEFLATE', dtype='uint8')

In [None]:
import shapely.geometry as geometry
import shapely.wkt as wkt

def create_fishnet_grid(min_x, min_y, max_x, max_y, cell_size):
    x, y = (min_x, min_y)
    geom_array = []

    # Polygon Size
    while y < max_y:
        while x < max_x:
            geom = geometry.Polygon(
                [
                    (x, y),
                    (x, y + cell_size),
                    (x + cell_size, y + cell_size),
                    (x + cell_size, y),
                    (x, y),
                ]
            )
            geom_array.append(geom)
            x += cell_size
        x = min_x
        y += cell_size

    fishnet = gpd.GeoDataFrame(geom_array, columns=["geometry"]).set_crs("EPSG:4326")
    return fishnet