<font size="6">Run the organic soils part of the AFOLU model</font> 

<font size="4">Must be run using the utilities_and_variables.ipynb kernel</font> 

In [137]:
import logging

In [138]:
# Set up basic configuration for logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

In [139]:
def calculate_and_upload_drainage(bounds, is_final):
    bounds_str = boundstr(bounds)    # String form of chunk bounds
    tile_id = xy_to_tile_id(bounds[0], bounds[3])    # tile_id in YYN/S_XXXE/W
    chunk_length_pixels = calc_chunk_length_pixels(bounds)   # Chunk length in pixels (as opposed to decimal degrees)

    no_data_val = 255
    logging.info(f"Processing tile {tile_id} with bounds {bounds_str}")

    try:
        # Dictionary of downloaded layers
        download_dict = {}
        layers = {}
    
        download_dict = {
            f"{land_cover}_2020": f"s3://gfw2-data/climate/AFOLU_flux_model/LULUCF/outputs/IPCC_basic_classes/2020/40000_pixels/20240205/{tile_id}__IPCC_classes_2020.tif",
            planted_forest_type_layer: f"s3://gfw2-data/climate/carbon_model/other_emissions_inputs/plantation_type/SDPTv2/20230911/{tile_id}_plantation_type_oilpalm_woodfiber_other.tif",
            planted_forest_tree_crop_layer: f"s3://gfw2-data/climate/carbon_model/other_emissions_inputs/plantation_simpleType__planted_forest_tree_crop/SDPTv2/20230911/{tile_id}.tif",
            "peat": f"s3://gfw2-data/climate/carbon_model/other_emissions_inputs/peatlands/processed/20230315/{tile_id}_peat_mask_processed.tif",
            "dadap": f"s3://gfw2-data/climate/AFOLU_flux_model/organic_soils/inputs/processed/dadap_density/dadap_{tile_id}.tif",
            "engert": f"s3://gfw2-data/climate/AFOLU_flux_model/organic_soils/inputs/processed/dadap_density/dadap_{tile_id}.tif",
            "grip": f"s3://gfw2-data/climate/AFOLU_flux_model/organic_soils/inputs/processed/grip_density/grip_density_{tile_id}.tif",
            "osm_roads": f"s3://gfw2-data/climate/AFOLU_flux_model/organic_soils/inputs/processed/grip_density/grip_density_{tile_id}.tif",
            "osm_canals": f"s3://gfw2-data/climate/AFOLU_flux_model/organic_soils/inputs/processed/grip_density/grip_density_{tile_id}.tif",
        }

    
        # Checks whether tile exists at all. Doesn't try to download chunk if the tile doesn't exist.
        tile_exists = check_for_tile(download_dict, is_final)
    
        if tile_exists == 0:
            logging.info(f"Tile {tile_id} does not exist. Skipping.")
            return
    
        logging.info(f"Tile {tile_id} exists. Proceeding with downloading data.")
        futures = prepare_to_download_chunk(bounds, download_dict, no_data_val)
    
        if not is_final:
            logging.info(f"Waiting for requests for data in chunk {bounds_str} in {tile_id}: {timestr()}")
        
        # Waits for requests to come back with data from S3
        for future in concurrent.futures.as_completed(futures):
            layer = futures[future]
            layers[layer] = future.result()
            logging.info(f"Downloaded data for layer: {layer}")
    
        data_in_chunk = check_chunk_for_data(layers, f"{land_cover}_", bounds_str, tile_id, no_data_val, is_final)
    
        if data_in_chunk == 0:
            logging.info(f"No data in chunk {bounds_str}. Skipping.")
            return
    
        logging.info(f"Data present in chunk {bounds_str}. Proceeding with processing.")
        
        # Initializes empty dictionaries for each type
        uint8_dict_layers = {}
        int16_dict_layers = {}
        float32_dict_layers = {}
        
        for key, array in layers.items():
            if array.dtype == np.uint8:
                uint8_dict_layers[key] = array
            elif array.dtype == np.int16:
                int16_dict_layers[key] = array
            elif array.dtype == np.float32:
                float32_dict_layers[key] = array
            else:
                raise TypeError(f"{key} dtype not in list")
    
        typed_dict_uint8 = Dict.empty(
            key_type=types.unicode_type, 
            value_type=types.Array(types.uint8, 2, 'C')
        )
    
        typed_dict_int16 = Dict.empty(
            key_type=types.unicode_type, 
            value_type=types.Array(types.int16, 2, 'C')
        )
    
        typed_dict_float32 = Dict.empty(
            key_type=types.unicode_type, 
            value_type=types.Array(types.float32, 2, 'C')
        )
    
        for key, array in uint8_dict_layers.items():
            typed_dict_uint8[key] = array
    
        for key, array in int16_dict_layers.items():
            typed_dict_int16[key] = array
    
        for key, array in float32_dict_layers.items():
            typed_dict_float32[key] = array
    
        logging.info(f"Creating drainage map in {bounds_str} in {tile_id}: {timestr()}")
        out_dict_uint32, out_dict_float32 = process_soil(
            typed_dict_uint8, typed_dict_int16, typed_dict_float32 
        )
    
        out_dict_all_dtypes = {}
    
        for key, value in out_dict_uint32.items():
            out_dict_all_dtypes[key] = value
    
        for key, value in out_dict_float32.items():
            out_dict_all_dtypes[key] = value
    
        del out_dict_uint32
        del out_dict_float32
    
        for key, value in out_dict_all_dtypes.items():
            data_type = value.dtype.name
            out_pattern = key[:-10]
            year = int(key[-4:])
            out_dict_all_dtypes[key] = [value, data_type, out_pattern, f'{year-5}_{year}']
    
        logging.info(f"Saving and uploading rasters for chunk {bounds_str}.")
        save_and_upload_small_raster_set(bounds, chunk_length_pixels, tile_id, bounds_str, out_dict_all_dtypes, is_final)
    
        del out_dict_all_dtypes
    
        logging.info(f"Completed processing for chunk {bounds_str}.")
        return f"Success for {bounds_str}: {timestr()}"

    except Exception as e:
        logging.error(f"Failed processing for {bounds_str}: {str(e)}")

In [140]:
@jit(nopython=True)
def process_soil(in_dict_uint8, in_dict_int16, in_dict_float32):
    peat_block = in_dict_uint8['peat']
    land_cover_block = in_dict_uint8['land_cover_2020']
    planted_forest_type_block = in_dict_uint8['planted_forest_type']
    dadap_block = in_dict_float32['dadap']
    osm_roads_block = in_dict_float32['osm_roads']
    osm_canals_block = in_dict_float32['osm_canals']
    engert_block = in_dict_float32['engert']
    grip_block = in_dict_float32['grip']

    rows, cols = peat_block.shape

    soil_block = np.empty((rows, cols), dtype=np.uint32)
    state_block = np.empty((rows, cols), dtype=np.uint32)

    out_dict_uint32 = {
        'soil': np.zeros((rows, cols), dtype=np.uint32),
        'state': np.zeros((rows, cols), dtype=np.uint32)
    }

    for row in range(peat_block.shape[0]):
        for col in range(peat_block.shape[1]):
            peat = peat_block[row, col]
            land_cover = land_cover_block[row, col]
            planted_forest_type = planted_forest_type_block[row, col]
            dadap = dadap_block[row, col]
            osm_roads = osm_roads_block[row, col]
            osm_canals = osm_canals_block[row, col]
            engert = engert_block[row, col]
            grip = grip_block[row, col]

            node = 0
            
            if peat == 1:
                soil_block[row, col] = 1  # 'organic'
                if dadap > 0 or osm_roads > 0 or osm_canals > 0 or engert > 0 or grip > 0:
                    node = accrete_node(node, 1)
                    state_block[row, col] = 1  # 'drained'
                elif land_cover == 6 or land_cover == 5:
                    node = accrete_node(node, 3)
                    state_block[row, col] = 1  # 'drained'
                else:
                    node = accrete_node(node, 4)
                    state_block[row, col] = 2  # 'undrained'
            else:
                soil_block[row, col] = 2  # 'inorganic'
                state_block[row, col] = 0  # Omitted from analysis

    out_dict_uint32['soil'] = soil_block
    out_dict_uint32['state'] = state_block

    return out_dict_uint32


In [None]:
%%time

## Create LULUCF flux and carbon stock 2x2 deg rasters 

## Area to analyze
## chunk_params arguments: W, S, E, N, chunk size (degrees)
# chunk_params = [-180, -60, 180, 80, 2]  # entire world
# chunk_params = [-10, 40, 20, 70, 1]    # 30x30 deg (70N_010W), 900 chunks

# chunk_params = [-10, 60, 0, 70, 1]    # 10x10 deg (70N_010W), 100 chunks
# chunk_params = [-10, 65, -5, 70, 1]    # 5x5 deg (70N_010W), 25 chunks
# chunk_params = [-10, 68, -8, 70, 1]    # 2x2 deg (70N_010W), 4 chunks
# chunk_params = [-10, 69, -9, 70, 1]    # 1x1 deg (70N_010W), 1 chunk

# chunk_params = [10, 40, 20, 50, 2]    # 10x10 deg (50N_010E), 25 chunks
# chunk_params = [10, 40, 20, 50, 10]    # 10x10 deg (50N_010E), 1 chunk
# chunk_params = [10, 46, 14, 50, 2]   # 4x4 deg, 4 chunks
# chunk_params = [110, -10, 114, -6, 2]   # 4x4 deg, 4 chunks
# chunk_params = [10, 48, 12, 50, 1]   # 2x2 deg, 4 chunks
# chunk_params = [10, 49, 11, 50, 1]   # 1x1 deg, 1 chunk
# chunk_params = [10, 49, 11, 50, 0.5] # 1x1 deg, 4 chunks
# chunk_params = [10, 49.5, 10.5, 50, 0.25] # 0.5x0.5 deg, 4 chunks
# chunk_params = [10, 42, 11, 43, 0.5] # 1x1 deg, 4 chunks (some GLCLU code=254 for ocean and some land, so data should be output)
# chunk_params = [10, 49.75, 10.25, 50, 0.25] # 0.25x0.25 deg, 1 chunk (has data, no fire)
#chunk_params = [15, 41.75, 15.25, 42, 0.25] # 0.25x0.25 deg, 1 chunk (has data with fire)

# # Range of no-data cases for testing
# chunk_params = [20, 69.75, 20.25, 70, 0.25] # 0.25x0.25 deg, 1 chunk (tile exists for GLCLU but not all other inputs, e.g., fire)
# chunk_params = [110, -10, 120, 0, 2]    # 10x10 deg (00N_110E), 25 chunks (all chunks have land and should be output)
# chunk_params = [110, -20, 120, -10, 2]    # 10x10 deg (00N_110E), 25 chunks (all chunks have land and should be output)
# chunk_params = [0, 79.75, 0.25, 80, 0.25] # 0.25x0.25 deg, 1 chunk (no 80N_000E tile-- no data)
# chunk_params = [112, -12, 116, -8, 2]   # 2x2 deg, 1 chunk (bottom of Java, has data but mostly ocean)
# chunk_params = [10.875, 41.75, 11, 42, 0.25] # 0.25x0.25 deg, 1 chunk (entirely GLCLU code=255 for ocean, so no actual data-- nothing should be be output)
# chunk_params = [-10, 21.75, -9.75, 22, 0.25] # 0.25x0.25 deg, 1 chunk (has data but entirely desert (fully GLCLU code=0))
# chunk_params = [10, 49.75, 10.25, 50, 0.25] # 0.25x0.25 deg, 1 chunk (has data)

chunk_params = [112.75, -2.75, 113.0, -2.5, 0.25] # 1 chunk, has data. SOuthern Borneo

# Makes list of chunks to analyze
chunks = get_chunk_bounds(chunk_params)  
print("Processing", len(chunks), "chunks")
# print(chunks)

# Determines if the output file names for final versions of outputs should be used
is_final = False
if len(chunks) > 90:
    is_final = True
    print("Running as final model.")

# Creates list of tasks to run (1 task = 1 chunk for all years)
delayed_result = [dask.delayed(calculate_and_upload_drainage)(chunk, is_final) for chunk in chunks]

# Actually runs analysis
results = dask.compute(*delayed_result)
results

2024-05-28 15:15:45,306 - INFO - Processing tile 00N_110E with bounds 113_-3_113_-2


Processing 1 chunks


2024-05-28 15:15:45,781 - INFO - Tile id 00N_110E exists. Proceeding.
2024-05-28 15:15:45,783 - INFO - Tile 00N_110E exists. Proceeding with downloading data.
2024-05-28 15:15:45,793 - INFO - Found credentials in shared credentials file: ~/.aws/credentials
2024-05-28 15:15:45,849 - INFO - Found credentials in shared credentials file: ~/.aws/credentials
2024-05-28 15:15:45,850 - INFO - Found credentials in shared credentials file: ~/.aws/credentials
2024-05-28 15:15:45,873 - INFO - Found credentials in shared credentials file: ~/.aws/credentials
2024-05-28 15:15:45,882 - INFO - Found credentials in shared credentials file: ~/.aws/credentials
2024-05-28 15:15:45,882 - INFO - Found credentials in shared credentials file: ~/.aws/credentials
2024-05-28 15:15:45,889 - INFO - Found credentials in shared credentials file: ~/.aws/credentials
2024-05-28 15:15:45,890 - INFO - Found credentials in shared credentials file: ~/.aws/credentials
2024-05-28 15:15:45,891 - INFO - Found credentials in sha

Requesting data in chunk 113_-3_113_-2 in 00N_110E: 20240528_15_15_45


2024-05-28 15:18:16,704 - INFO - Waiting for requests for data in chunk 113_-3_113_-2 in 00N_110E: 20240528_15_18_16
2024-05-28 15:18:16,705 - INFO - Downloaded data for layer: dadap
2024-05-28 15:18:16,706 - INFO - Downloaded data for layer: engert
2024-05-28 15:18:16,707 - INFO - Downloaded data for layer: planted_forest_type
2024-05-28 15:18:16,707 - INFO - Downloaded data for layer: osm_canals
2024-05-28 15:18:16,708 - INFO - Downloaded data for layer: grip
2024-05-28 15:18:16,708 - INFO - Downloaded data for layer: peat
2024-05-28 15:18:16,709 - INFO - Downloaded data for layer: land_cover_2020
2024-05-28 15:18:16,710 - INFO - Downloaded data for layer: osm_roads
2024-05-28 15:18:16,710 - INFO - Downloaded data for layer: planted_forest_tree_crop
2024-05-28 15:18:16,711 - INFO - Checking chunk for data in tile 00N_110E with bounds 113_-3_113_-2
2024-05-28 15:18:16,711 - INFO - Checking layer dadap with type <class 'numpy.ndarray'>
2024-05-28 15:18:16,713 - INFO - Checking layer en

In [None]:
### questions
# Tile id ses_2020 exists. Proceeding.


In [None]:
# local_client.shutdown()

In [None]:
# # Example data for testing
# in_dict_uint8 = {
#     'peat': np.random.randint(0, 2, size=(100, 100)).astype(np.uint8),
#     'land_cover_2020': np.random.randint(0, 10, size=(100, 100)).astype(np.uint8),
#     'planted_forest_type': np.random.randint(0, 2, size=(100, 100)).astype(np.uint8),
# }
# in_dict_int16 = {}
# in_dict_float32 = {
#     'dadap': np.random.rand(100, 100).astype(np.float32),
#     'osm_roads': np.random.rand(100, 100).astype(np.float32),
#     'osm_canals': np.random.rand(100, 100).astype(np.float32),
#     'engert': np.random.rand(100, 100).astype(np.float32),
#     'grip': np.random.rand(100, 100).astype(np.float32),
# }
