<font size="6">Create starting non-soil carbon density rasters from WHRC AGB 2000: aboveground carbon, belowground carbon, deadwood carbon, litter carbon</font> 

<font size="4">Must be run using the utilities_and_variables.ipynb kernel</font> 

In [None]:
# Function to create initial (year 2000) non-soil carbon pool densities
# Operates pixel by pixel, so uses numba (Python compiled to C++).
@jit(nopython=True)
def create_starting_C_densities(in_dict_uint8, in_dict_int16, in_dict_int32, in_dict_float32, mangrove_C_ratio_array):
    
    # Separate dictionaries for output numpy arrays of each datatype, named by output data type.
    # This is because a dictionary in a Numba function cannot have arrays with multiple data types, so each dictionary has to store only one data type,
    # just like inputs to the function.
    out_dict_float32 = {}

    whrc_agb_2000_block = in_dict_int16["agb_2000"]
    mangrove_agb_2000_block = in_dict_float32["mangrove_agb_2000"]
    elevation_block = in_dict_int16["elevation"]
    climate_domain_block = in_dict_int16["climate_domain"]
    precipitation_block = in_dict_int32["precipitation"]
    r_s_ratio_block = in_dict_float32["r_s_ratio"]
    continent_ecozone_block = in_dict_int16["continent_ecozone"]

    agc_2000_out_block = np.zeros(in_dict_float32["r_s_ratio"].shape).astype('float32')  # Need to specify the output datatype or it will default to float32
    bgc_2000_out_block = np.zeros(in_dict_float32["r_s_ratio"].shape).astype('float32')
    deadwood_c_2000_out_block = np.zeros(in_dict_float32["r_s_ratio"].shape).astype('float32')
    litter_c_2000_out_block = np.zeros(in_dict_float32["r_s_ratio"].shape).astype('float32')

    mangrove_in_chunk = True   # Flag for whether chunk has mangrove in it (defaults to True)

    # Checks if chunk has mangrove data in it by comparing the min and max values of the chunk with the NoData value.
    # I originally compared the mangrove chunk to NoData using chunk's np.mean. That identified non-mangrove chunks 
    # for 1000x1000 pixel chunks but not for 2000x2000 pixel chunks when I was using numba. 
    # That is, np.mean(mangrove_agb_2000_block) kept equalling ~67 rather than the expected 255 in 2000x2000 pixel chunks, specifically.
    # I don't know why np.mean and np.average gave incorrect chunk averages for these larger chunks,
    # but comparing the mangrove NoData value to the min and max of the chunk seems to correctly identify chunks without mangroves.
    if ((np.min(mangrove_agb_2000_block) == mang_no_data_val) and (np.max(mangrove_agb_2000_block) == mang_no_data_val)):
        mangrove_in_chunk = False

    # Iterates through all pixels in the chunk
    for row in range(whrc_agb_2000_block.shape[0]):
        for col in range(whrc_agb_2000_block.shape[1]):

            # Input values for this specific cell
            whrc_agb_2000 = whrc_agb_2000_block[row, col]    
            mangrove_agb_2000 = mangrove_agb_2000_block[row, col]   # Would be 255 if pixel is NoData
            elevation = elevation_block[row, col]
            climate_domain = climate_domain_block[row, col]
            precipitation = precipitation_block[row, col]
            r_s_ratio = r_s_ratio_block[row, col]
            continent_ecozone = continent_ecozone_block[row, col]

            # If mangrove AGB is present, it replaces the non-mangrove AGB
            if mangrove_in_chunk and (mangrove_agb_2000 > 0):   # Only replaces WHRC AGB if mangrove chunk exists and if there is a mangrove value in that pixel
                whrc_agb_2000 = mangrove_agb_2000

            # Calculates AGC from AGB. Same calculation for mangroves and non-mangroves.
            agc_2000_out_block[row, col] = whrc_agb_2000 * biomass_to_carbon

            # Ridiculous default BGC, deadwood C, and litter C ratios that will make it very clear if they are being used instead of 
            # something being assigned in the decision tree below
            bgc_ratio = -5
            deadwood_c_ratio = -10
            litter_c_ratio = -20

            # Separate branches for assigning BGC, deadwood C, and litter C ratios depending on whether the pixel has mangroves.
            # Calculation of BGC, deadwood C, and litter C are done after the decision tree assigns the ratios.
            
            # Mangrove carbon pool ratio branch
            # From IPCC 2013 Wetland Supplement
            if (mangrove_in_chunk) and (mangrove_agb_2000 > 0):    # Only replaces WHRC AGB if mangrove chunk exists and if mangrove value in that pixel                                                         
                bgc_ratio = mangrove_C_ratio_array[np.where(mangrove_C_ratio_array[:,0] == continent_ecozone)][0,1]
                deadwood_c_ratio = mangrove_C_ratio_array[np.where(mangrove_C_ratio_array[:,0] == continent_ecozone)][0,2]
                litter_c_ratio = mangrove_C_ratio_array[np.where(mangrove_C_ratio_array[:,0] == continent_ecozone)][0,3]

            # Non-mangrove carbon pool ratio branch
            # Deadwood and litter carbon as fractions of AGC are from
            # https://cdm.unfccc.int/methodologies/ARmethodologies/tools/ar-am-tool-12-v3.0.pdf
            # "Clean Development Mechanism A/R Methodological Tool: 
            # Estimation of carbon stocks and change in carbon stocks in dead wood and litter in A/R CDM project activities version 03.0"
            # Tables on pages 18 (deadwood) and 19 (litter).
            # They depend on the climate domain, elevation, and precipitation. 
            else:                                                                                       # Non-mangrove
                
                # If no mapped R:S, uses the global default value instead
                if r_s_ratio == 0:
                    r_s_ratio = default_r_s
                bgc_ratio = r_s_ratio                                                                   # Uses R:S for BGC

                if climate_domain == 1:                                                                 # Tropical
                    if elevation <= 2000:                                                               # Low elevation
                        if precipitation <= 1000:                                                       # Low precipitation or no precip raster
                            deadwood_c_ratio = tropical_low_elev_low_precip_deadwood_c_ratio
                            litter_c_ratio = tropical_low_elev_low_precip_litter_c_ratio
                        elif ((precipitation > 1000) and (precipitation <= 1600)):                      # Medium precipitation
                            deadwood_c_ratio = tropical_low_elev_med_precip_deadwood_c_ratio
                            litter_c_ratio = tropical_low_elev_med_precip_litter_c_ratio
                        else:                                                                           # High precipitation
                            deadwood_c_ratio = tropical_low_elev_high_precip_deadwood_c_ratio
                            litter_c_ratio = tropical_low_elev_high_precip_litter_c_ratio
                    else:                                                                               # High elevation
                        deadwood_c_ratio = tropical_high_elev_deadwood_c_ratio
                        litter_c_ratio = tropical_high_elev_litter_c_ratio
                else:                                                                                   # Non-tropical (temperate/boreal)
                    deadwood_c_ratio = non_tropical_deadwood_c_ratio
                    litter_c_ratio = non_tropical_litter_c_ratio  

            # Actually calculates BGC, deadwood C, and litter C using the ratios assigned in the above decision tree
            bgc_2000_out_block[row, col] = agc_2000_out_block[row, col] * bgc_ratio
            deadwood_c_2000_out_block[row, col] = agc_2000_out_block[row, col] * deadwood_c_ratio
            litter_c_2000_out_block[row, col] = agc_2000_out_block[row, col] * litter_c_ratio
    
    # Adds the output arrays to the dictionary with the appropriate data type
    # Outputs need .copy() so that previous intervals' arrays in dicationary aren't overwritten because arrays in dictionaries are mutable (courtesy of ChatGPT).        
    out_dict_float32[f"{agc_dens_pattern}_{first_year}"] = agc_2000_out_block.copy()
    out_dict_float32[f"{bgc_dens_pattern}_{first_year}"] = bgc_2000_out_block.copy()
    out_dict_float32[f"{deadwood_c_dens_pattern}_{first_year}"] = deadwood_c_2000_out_block.copy()
    out_dict_float32[f"{litter_c_dens_pattern}_{first_year}"] = litter_c_2000_out_block.copy()

    # return IPCC_change_block
    return out_dict_float32

In [None]:
# All steps for creating starting non-soil carbon pools in a chunk: download chunks, calculate carbon densities, upload to s3
def create_and_upload_starting_C_densities(bounds, is_final, mangrove_C_ratio_array):

    logger = setup_logging()

    bounds_str = boundstr(bounds)    # String form of chunk bounds
    tile_id = xy_to_tile_id(bounds[0], bounds[3])    # tile_id in YYN/S_XXXE/W
    chunk_length_pixels = calc_chunk_length_pixels(bounds)   # Chunk length in pixels (as opposed to decimal degrees)   
    
    ### Part 1: download chunks and check for data

    mang_no_data_val = 255   # NoData value in mangrove AGB raster. For checking input chunks.

    # Dictionary of downloaded layers
    layers = {}

    download_dict = {
        
        agb_2000: f"{agb_2000_path}{tile_id}_{agb_2000_pattern}.tif",
        mangrove_agb_2000: f"{mangrove_agb_2000_path}{tile_id}_{mangrove_agb_2000_pattern}.tif",
        elevation: f"{elevation_path}{tile_id}_{elevation_pattern}.tif",
        climate_domain: f"{climate_domain_path}{tile_id}_{climate_domain_pattern}.tif",
        precipitation: f"{precipitation_path}{tile_id}_{precipitation_pattern}.tif",
        r_s_ratio: f"{r_s_ratio_path}{tile_id}_{r_s_ratio_pattern}.tif",
        continent_ecozone: f"{continent_ecozone_path}{tile_id}_{continent_ecozone_pattern}.tif"
    }

    # Checks whether tile exists at all. Doesn't try to download chunk if the tile doesn't exist.
    tile_exists = check_for_tile(download_dict, is_final, logger)

    if tile_exists == 0:
        return

    futures = prepare_to_download_chunk(bounds, download_dict, mang_no_data_val, is_final, logger)

    print_and_log(f"Waiting for requests for data in chunk {bounds_str} in {tile_id}: {timestr()}", is_final, logger)

    # if not is_final:
    #     logger.info(f"flm: Waiting for requests for data in chunk {bounds_str} in {tile_id}: {timestr()}")
    #     # print(f"flm: Waiting for requests for data in chunk {bounds_str} in {tile_id}: {timestr()}")
    
    # Waits for requests to come back with data from S3
    for future in concurrent.futures.as_completed(futures):
        layer = futures[future]
        layers[layer] = future.result()
    
    layers_to_check_for_data = {'agb_2000': layers['agb_2000'],
                                'mangrove_agb_2000': layers['mangrove_agb_2000']}
    
    # Checks chunk for data. Skips the chunk if it has no data in it.
    data_in_chunk = check_chunk_for_data(layers_to_check_for_data, agb_2000, bounds_str, tile_id, is_final, logger)

    if data_in_chunk == 0:
        return
        
        
    ### Part 2: Create a separate dictionary for each chunk datatype so that they can be passed to Numba as separate arguments.
    ### Numba functions can accept (and return) dictionaries of arrays as long as each dictionary only has arrays of one data type (e.g., uint8, float32)
    ### Note: need to add new code if inputs with other data types are added

    typed_dict_uint8, typed_dict_int16, typed_dict_int32, typed_dict_float32 = create_typed_dicts(layers)
    
    
    ### Part 3: Create starting carbon pool densities and upload them to s3

    print_and_log(f"Creating starting C densities for {bounds_str} in {tile_id}: {timestr()}", is_final, logger)
    
    # if not is_final:
    #     logger.info(f"flm: Creating starting C densities for {bounds_str} in {tile_id}: {timestr()}")
    #     # print(f"flm: Creating starting C densities for {bounds_str} in {tile_id}: {timestr()}")

    # Create AGC, BGC, deadwood C and litter C
    out_dict_float32 = create_starting_C_densities(
        typed_dict_uint8, typed_dict_int16, typed_dict_int32, typed_dict_float32, mangrove_C_ratio_array  
    )

    # Fresh non-Numba-constrained dictionary that stores all numpy arrays.
    # The dictionaries by datatype that are returned from the numba function have limitations on them, 
    # e.g., they can't be combined with other datatypes. This prevents the addition of attributes needed for uploading to s3.
    # So the trick here is to copy the numba-exported arrays into normal Python arrays to which we can do anything in Python.
    
    out_dict_all_dtypes = {}

    # Transfers the dictionaries of numpy arrays for each data type to a new, Pythonic array
    for key, value in out_dict_float32.items():
        out_dict_all_dtypes[key] = value

    # Clear memory of unneeded arrays
    del out_dict_float32

    
    ### Part 4: Save numpy arrays as rasters and upload to s3

    out_no_data_val = 0   # NoData value for output raster (optional)

    # Adds metadata used for uploading outputs to s3 to the dictionary
    for key, value in out_dict_all_dtypes.items():

        data_type = value.dtype.name
        out_pattern = key[:-5]    # Drops the year (2000) from the end of the string 

        # Dictionary with metadata for each array
        out_dict_all_dtypes[key] = [value, data_type, out_pattern, first_year]

    save_and_upload_small_raster_set(bounds, chunk_length_pixels, tile_id, bounds_str, out_dict_all_dtypes, is_final, logger, out_no_data_val)
    
    # Clear memory of unneeded arrays
    del out_dict_all_dtypes

    return f"Success for {bounds_str}: {timestr()}"

In [None]:
%%time

"""
Coiled cluster config notes:
At most points, at least a few workers exceeded 16GB, with most between 8 and 16GB. 
This makes me think that 32GB workers are a good size. 
Running with 40x 32GB r6i.2xlarge workers had 200 tasks running simultaneously. 
"""

## Create carbon density year 2000 2x2 deg rasters 

## Area to analyze
## chunk_params arguments: W, S, E, N, chunk size (degrees)
chunk_params = [-180, -60, 180, 80, 2]  # entire world (12600 chunks) (40x 32GB r6i.2xlarge workers= 32 minutes; around 11 Coiled credits and a few dollars of AWS costs)
# chunk_params = [110, -10, 140, 20, 2]    # 30x30 deg (20N_110E, mangroves), 225 chunks (40x 32GB r6i.2xlarge workers=2.5 minutes)
# chunk_params = [-10, 40, 20, 70, 2]    # 30x30 deg (70N_010W, no mangroves), 225 chunks (40x 32GB r6i.2xlarge workers=2.25 minutes)
# chunk_params = [10, 40, 20, 50, 2]   # 10x10 deg (50N_010E), 25 chunks
# chunk_params = [10, 46, 14, 50, 2]   # 4x4 deg (50N_010E), 4 chunks
# chunk_params = [10, 48, 12, 50, 2]   # 2x2 deg (50N_010E), 1 chunk
# chunk_params = [10, 48, 12, 50, 1]   # 2x2 deg (50N_010E), 4 chunks
# chunk_params = [10, 49, 11, 50, 1]   # 1x1 deg (50N_010E), 1 chunk


# chunk_params = [10, 40, 20, 50, 10]    # 10x10 deg (50N_010E), 1 chunk   
# chunk_params = [10, 46, 14, 50, 2]   # 4x4 deg, 4 chunks
# chunk_params = [110, -10, 114, -6, 2]   # 4x4 deg, 4 chunks
# chunk_params = [10, 48, 12, 50, 1]   # 2x2 deg, 4 chunks
# chunk_params = [10, 49, 11, 50, 1]   # 1x1 deg, 1 chunk
# chunk_params = [10, 49, 11, 50, 0.5] # 1x1 deg, 4 chunks
# chunk_params = [10, 49.5, 10.5, 50, 0.25] # 0.5x0.5 deg, 4 chunks
# chunk_params = [10, 42, 11, 43, 0.5] # 1x1 deg, 4 chunks (some GLCLU code=254 for ocean and some land, so data should be output)
# chunk_params = [10, 49.75, 10.25, 50, 0.25] # 0.25x0.25 deg, 1 chunk (has data, no fire)
# chunk_params = [15, 41.75, 15.25, 42, 0.25] # 0.25x0.25 deg, 1 chunk (has data with fire)
# chunk_params = [116, -3, 116.25, -2.75, 0.25] # 0.25x0.25 deg, 1 chunk (has mangroves)


# # Range of no-data cases for testing
# chunk_params = [20, -70, 20.25, -69.75, 0.25] # 0.25x0.25 deg, 1 chunk (tile does not exist)
# chunk_params = [20, 69.75, 20.25, 70, 0.25] # 0.25x0.25 deg, 1 chunk (tile exists for GLCLU but not all other inputs, e.g., fire)
# chunk_params = [110, -10, 120, 0, 2]    # 10x10 deg (00N_110E), 25 chunks (all chunks have land and should be output)
# chunk_params = [110, -20, 120, -10, 2]    # 10x10 deg (00N_110E), 25 chunks (all chunks have land and should be output)
# chunk_params = [0, 79.75, 0.25, 80, 0.25] # 0.25x0.25 deg, 1 chunk (no 80N_000E tile-- no data)
# chunk_params = [112, -12, 116, -8, 2]   # 2x2 deg, 1 chunk (bottom of Java, has data but mostly ocean)
# chunk_params = [10.875, 41.75, 11, 42, 0.25] # 0.25x0.25 deg, 1 chunk (entirely GLCLU code=255 for ocean, so no actual data-- nothing should be be output)
# chunk_params = [-10, 21.75, -9.75, 22, 0.25] # 0.25x0.25 deg, 1 chunk (has data but entirely desert (fully GLCLU code=0))
# chunk_params = [10, 49.75, 10.25, 50, 0.25] # 0.25x0.25 deg, 1 chunk (has data)

# Creates numpy array of ratios of BGC, deadwood C, and litter C relative to AGC. Relevant columns must be specified. 
mangrove_C_ratio_array = convert_lookup_table_to_array(rate_ratio_spreadsheet, mangrove_rate_ratio_tab, ['gainEcoCon', 'BGC_AGC', 'deadwood_AGC', 'litter_AGC'])

# Makes list of chunks to analyze
chunks = get_chunk_bounds(chunk_params)  
print("Processing", len(chunks), "chunks")
# print(chunks)

# Determines if the output file names for final versions of outputs should be used
is_final = False
if len(chunks) > 90:
    is_final = True
    print("Running as final model.")

# Creates list of tasks to run (1 task = 1 chunk)
delayed_result = [dask.delayed(create_and_upload_starting_C_densities)(chunk, is_final, mangrove_C_ratio_array) for chunk in chunks]

# Actually runs analysis
results = dask.compute(*delayed_result)
results

In [None]:
# coiled_cluster.get_logs()

# Get the logs for all workers
logs = coiled_cluster.get_logs()

compile_and_upload_log(logs, "C_pools_2000")

In [None]:
%%time

## Create raster footprint shapefiles from listed rasters
## Doesn't use memory. Can be done on 4 GB workers. Only need as many workers as there are folders. 
## 12 minutes for global run

# Folders to process and the corresponding output shapefile names
input_dicts = [
           {"gfw2-data/climate/AFOLU_flux_model/LULUCF/outputs/AGC_density_MgC_ha/2000/8000_pixels/20240729/": "AGC_2000_global"},
           {"gfw2-data/climate/AFOLU_flux_model/LULUCF/outputs/BGC_density_MgC_ha/2000/8000_pixels/20240729/": "BGC_2000_global"},
           {"gfw2-data/climate/AFOLU_flux_model/LULUCF/outputs/deadwood_C_density_MgC_ha/2000/8000_pixels/20240729/": "deadwood_C_2000_global"},
           {"gfw2-data/climate/AFOLU_flux_model/LULUCF/outputs/litter_C_density_MgC_ha/2000/8000_pixels/20240729/": "litter_C_2000_global"}
          ]

# Make raster footprint shapefiles from output rasters
delayed_result = [dask.delayed(make_tile_footprint_shp)(input_dict) for input_dict in input_dicts]

# Actually runs analysis
results = dask.compute(*delayed_result)
results

In [None]:
%%time

## Create 10x10 degree rasters aggregated from 2x2 degree rasters
## Doesn't use much memory. Can be done on 30x 8 GB workers (1 hour). 
## In this case, it's aggregation of the carbon pool 2000 rasters
## 1144 chunks to process took 65 minutes

# Folders to process and the corresponding nodata values for the output rasters
s3_in_folder_dicts = [
           {"gfw2-data/climate/AFOLU_flux_model/LULUCF/outputs/AGC_density_MgC_ha/2000/8000_pixels/20240729/": 0},
           {"gfw2-data/climate/AFOLU_flux_model/LULUCF/outputs/BGC_density_MgC_ha/2000/8000_pixels/20240729/": 0},
           {"gfw2-data/climate/AFOLU_flux_model/LULUCF/outputs/deadwood_C_density_MgC_ha/2000/8000_pixels/20240729/": 0},
           {"gfw2-data/climate/AFOLU_flux_model/LULUCF/outputs/litter_C_density_MgC_ha/2000/8000_pixels/20240729/": 0}
          ]

# Creates the list of aggregated 10x10 rasters that will be created (list of dictionaries of input s3 folder and output aggregated raster name.
# These are the basis for the tasks.
list_of_s3_name_dicts_total = create_list_for_aggregation(s3_in_folder_dicts)

# # For testing. Limits the number of output rasters
# list_of_s3_name_dicts_total = list_of_s3_name_dicts_total[0:3]  # First 3 tiles
# list_of_s3_name_dicts_total = list_of_s3_name_dicts_total[40:41] # 10N_130E; Internal chunks missing and padding needed on right; FID40
# list_of_s3_name_dicts_total = list_of_s3_name_dicts_total[0:1]  # 00N_000E
# list_of_s3_name_dicts_total = list_of_s3_name_dicts_total[16:17] # 00N_110E 
# list_of_s3_name_dicts_total = list_of_s3_name_dicts_total[41:42]  # 10S_010E; No padding needed; FID41

delayed_result = [dask.delayed(merge_small_tiles_gdal)(s3_name_no_data_dict) for s3_name_no_data_dict in list_of_s3_name_dicts_total]

results = dask.compute(*delayed_result)
results

In [None]:
%%time

## Create raster footprint shapefiles from listed rasters
## Doesn't use memory. Can be done on 4 GB workers. Only need as many workers as there are folders. 
# Took 1.2 minutes

# Folders to process and the corresponding output shapefile names
input_dicts = [
           {"gfw2-data/climate/AFOLU_flux_model/LULUCF/outputs/AGC_density_MgC_ha/2000/40000_pixels/20240729/": "AGC_2000__global__10x10"},
           {"gfw2-data/climate/AFOLU_flux_model/LULUCF/outputs/BGC_density_MgC_ha/2000/40000_pixels/20240729/": "BGC_2000__global__10x10"},
           {"gfw2-data/climate/AFOLU_flux_model/LULUCF/outputs/deadwood_C_density_MgC_ha/2000/40000_pixels/20240729/": "deadwood_C_2000__global__10x10"},
           {"gfw2-data/climate/AFOLU_flux_model/LULUCF/outputs/litter_C_density_MgC_ha/2000/40000_pixels/20240729/": "litter_C_2000__global__10x10"}
          ]

# Make raster footprint shapefiles from output rasters
delayed_result = [dask.delayed(make_tile_footprint_shp)(input_dict) for input_dict in input_dicts]

# Actually runs analysis
results = dask.compute(*delayed_result)
results