<font size="6">Create starting non-soil carbon density rasters from WHRC AGB 2000: aboveground carbon, belowground carbon, deadwood carbon, litter carbon</font> 

<font size="4">Must be run using the utilities_and_variables.ipynb kernel</font> 

In [30]:
# Function to create initial (year 2000) non-soil carbon pool densities
# Operates pixel by pixel, so uses numba (Python compiled to C++).
@jit(nopython=True)
def create_starting_C_densities(in_dict_uint8, in_dict_int16, in_dict_int32, in_dict_float32):
    
    # Separate dictionaries for output numpy arrays of each datatype, named by output data type.
    # This is because a dictionary in a Numba function cannot have arrays with multiple data types, so each dictionary has to store only one data type,
    # just like inputs to the function.
    out_dict_float32 = {}

    agb_2000_block = in_dict_int16["agb_2000"]
    elevation_block = in_dict_int16["elevation"]
    climate_domain_block = in_dict_int16["climate_domain"]
    precipitation_block = in_dict_int32["precipitation"]
    r_s_ratio_block = in_dict_float32["r_s_ratio"]

    agc_2000_out_block = np.zeros(in_dict_float32["r_s_ratio"].shape)
    bgc_2000_out_block = np.zeros(in_dict_float32["r_s_ratio"].shape)
    deadwood_c_2000_out_block = np.zeros(in_dict_float32["r_s_ratio"].shape)
    litter_c_2000_out_block = np.zeros(in_dict_float32["r_s_ratio"].shape)

    # Iterates through all pixels in the chunk
    for row in range(agb_2000_block.shape[0]):
        for col in range(agb_2000_block.shape[1]):

            # Input values for this specific cell
            agb_2000 = agb_2000_block[row, col]
            elevation = elevation_block[row, col]
            climate_domain = climate_domain_block[row, col]
            precipitation = precipitation_block[row, col]
            r_s_ratio = r_s_ratio_block[row, col]

            # If no mapped R:S, uses the global default value instead
            if r_s_ratio == 0:
                r_s_ratio = default_r_s

            # Calculates AGC and BGC from AGB
            agc_2000_out_block[row, col] = agb_2000 * biomass_to_carbon   # Uses basic biomass:carbon ratio for AGC
            bgc_2000_out_block[row, col] = agc_2000_out_block[row, col] * r_s_ratio   # Uses R:S for BGC

            # Deadwood and litter carbon as fractions of AGC are from
            # https://cdm.unfccc.int/methodologies/ARmethodologies/tools/ar-am-tool-12-v3.0.pdf
            # "Clean Development Mechanism A/R Methodological Tool: 
            # Estimation of carbon stocks and change in carbon stocks in dead wood and litter in A/R CDM project activities version 03.0"
            # Tables on pages 18 (deadwood) and 19 (litter).
            # They depend on the climate domain, elevation, and precipitation. 
            if climate_domain == 1:                                                                 # Tropical
                if elevation <= 2000:                                                               # Low elevation
                    if precipitation <= 1000:                                                       # Low precipitation
                        deadwood_c_2000_out_block[row, col] = agc_2000_out_block[row, col] * 0.02
                        litter_c_2000_out_block[row, col] = agc_2000_out_block[row, col] * 0.04
                    elif ((precipitation > 1000) and (precipitation <= 1600)):                      # Medium precipitation
                        deadwood_c_2000_out_block[row, col] = agc_2000_out_block[row, col] * 0.01
                        litter_c_2000_out_block[row, col] = agc_2000_out_block[row, col] * 0.01
                    else:                                                                           # High precipitation
                        deadwood_c_2000_out_block[row, col] = agc_2000_out_block[row, col] * 0.06
                        litter_c_2000_out_block[row, col] = agc_2000_out_block[row, col] * 0.01
                else:                                                                               # High elevation
                    deadwood_c_2000_out_block[row, col] = agc_2000_out_block[row, col] * 0.07
                    litter_c_2000_out_block[row, col] = agc_2000_out_block[row, col] * 0.01
            else:                                                                                   # Non-tropical (temperate/boreal)
                    deadwood_c_2000_out_block[row, col] = agc_2000_out_block[row, col] * 0.08
                    litter_c_2000_out_block[row, col] = agc_2000_out_block[row, col] * 0.04               
                        
    # Adds the output arrays to the dictionary with the appropriate data type
    # Outputs need .copy() so that previous intervals' arrays in dicationary aren't overwritten because arrays in dictionaries are mutable (courtesy of ChatGPT).        
    out_dict_float32[f"{agc_dens_pattern}_{first_year}"] = agc_2000_out_block.copy()
    out_dict_float32[f"{bgc_dens_pattern}_{first_year}"] = bgc_2000_out_block.copy()
    out_dict_float32[f"{deadwood_c_dens_pattern}_{first_year}"] = deadwood_c_2000_out_block.copy()
    out_dict_float32[f"{litter_c_dens_pattern}_{first_year}"] = litter_c_2000_out_block.copy()

    # return IPCC_change_block
    return out_dict_float32

In [28]:
# 
def create_and_upload_starting_C_densities(bounds, is_final):

    bounds_str = boundstr(bounds)    # String form of chunk bounds
    tile_id = xy_to_tile_id(bounds[0], bounds[3])    # tile_id in YYN/S_XXXE/W
    chunk_length_pixels = calc_chunk_length_pixels(bounds)   # Chunk length in pixels (as opposed to decimal degrees)

    no_data_val = 255
    
    
    ### Part 1: download chunks and check for data

    # Dictionary of downloaded layers
    layers = {}

    download_dict = {
        
        agb_2000: f"s3://gfw2-data/climate/WHRC_biomass/WHRC_V4/Processed/{tile_id}_t_aboveground_biomass_ha_2000.tif",
        elevation: f"s3://gfw2-data/climate/carbon_model/inputs_for_carbon_pools/processed/elevation/20190418/{tile_id}_elevation.tif",
        climate_domain: f"s3://gfw2-data/climate/carbon_model/inputs_for_carbon_pools/processed/fao_ecozones_bor_tem_tro/20190418/{tile_id}_fao_ecozones_bor_tem_tro_processed.tif",
        precipitation: f"s3://gfw2-data/climate/carbon_model/inputs_for_carbon_pools/processed/precip/20190418/{tile_id}_precip_mm_annual.tif",
        r_s_ratio: f"s3://gfw2-data/climate/carbon_model/BGB_AGB_ratio/processed/20230216/{tile_id}_BGB_AGB_ratio.tif"
    }

    # Checks whether tile exists at all. Doesn't try to download chunk if the tile doesn't exist.
    tile_exists = check_for_tile(download_dict, is_final)

    if tile_exists == 0:
        return

    futures = prepare_to_download_chunk(bounds, download_dict, no_data_val)

    if not is_final:
        print(f"Waiting for requests for data in chunk {bounds_str} in {tile_id}: {timestr()}")
    
    # Waits for requests to come back with data from S3
    for future in concurrent.futures.as_completed(futures):
        layer = futures[future]
        layers[layer] = future.result()

    # Checks chunk for data. Skips the chunk if it has no data in it.
    data_in_chunk = check_chunk_for_data(layers, agb_2000, bounds_str, tile_id, no_data_val, is_final)

    if data_in_chunk == 0:
        return
        
        
    ### Part 2: Create a separate dictionary for each chunk datatype so that they can be passed to Numba as separate arguments.
    ### Numba functions can accept (and return) dictionaries of arrays as long as each dictionary only has arrays of one data type (e.g., uint8, float32)
    ### Note: need to add new code if inputs with other data types are added

    typed_dict_uint8, typed_dict_int16, typed_dict_int32, typed_dict_float32 = create_typed_dicts(layers)
    
    
    ### Part 3: Create starting carbon pool densities and upload them to s3

    print(f"Creating starting C densities for {bounds_str} in {tile_id}: {timestr()}")

    # Create AGC, BGC, deadwood C and litter C
    out_dict_float32 = create_starting_C_densities(
        typed_dict_uint8, typed_dict_int16, typed_dict_int32, typed_dict_float32  
    )

    # Fresh non-Numba-constrained dictionary that stores all numpy arrays.
    # The dictionaries by datatype that are returned from the numba function have limitations on them, 
    # e.g., they can't be combined with other datatypes. This prevents the addition of attributes needed for uploading to s3.
    # So the trick here is to copy the numba-exported arrays into normal Python arrays to which we can do anything in Python.
    
    out_dict_all_dtypes = {}

    # Transfers the dictionaries of numpy arrays for each data type to a new, Pythonic array
    for key, value in out_dict_float32.items():
        out_dict_all_dtypes[key] = value

    # Clear memory of unneeded arrays
    del out_dict_float32

    
    ### Part 4: Save numpy arrays as rasters and upload to s3

    # Adds metadata used for uploading outputs to s3 to the dictionary
    for key, value in out_dict_all_dtypes.items():

        data_type = value.dtype.name
        out_pattern = key[:-5]    # Drops the year (2000) from the end of the string 

        # Dictionary with metadata for each array
        out_dict_all_dtypes[key] = [value, data_type, out_pattern, first_year]

    save_and_upload_small_raster_set(bounds, chunk_length_pixels, tile_id, bounds_str, out_dict_all_dtypes, is_final)
    
    # Clear memory of unneeded arrays
    del out_dict_all_dtypes

    return f"Success for {bounds_str}: {timestr()}"

In [38]:
%%time

## Create LULUCF flux and carbon stock 2x2 deg rasters 

## Area to analyze
## chunk_params arguments: W, S, E, N, chunk size (degrees)
# chunk_params = [-180, -60, 180, 80, 2]  # entire world
# chunk_params = [-10, 40, 20, 70, 1]    # 30x30 deg (70N_010W), 900 chunks

# chunk_params = [-10, 60, 0, 70, 1]    # 10x10 deg (70N_010W), 100 chunks
# chunk_params = [-10, 65, -5, 70, 1]    # 5x5 deg (70N_010W), 25 chunks
# chunk_params = [-10, 68, -8, 70, 1]    # 2x2 deg (70N_010W), 4 chunks
# chunk_params = [-10, 69, -9, 70, 1]    # 1x1 deg (70N_010W), 1 chunk

# chunk_params = [10, 40, 20, 50, 2]    # 10x10 deg (50N_010E), 25 chunks
# Cannot create 10x10 carbon pools, even with 64 GB machine.
# It reads in all the inputs but fails on the numba calculation step.
# chunk_params = [10, 40, 20, 50, 10]    # 10x10 deg (50N_010E), 1 chunk   
# chunk_params = [10, 46, 14, 50, 2]   # 4x4 deg, 4 chunks
# chunk_params = [110, -10, 114, -6, 2]   # 4x4 deg, 4 chunks
# chunk_params = [10, 48, 12, 50, 1]   # 2x2 deg, 4 chunks
# chunk_params = [10, 49, 11, 50, 1]   # 1x1 deg, 1 chunk
# chunk_params = [10, 49, 11, 50, 0.5] # 1x1 deg, 4 chunks
# chunk_params = [10, 49.5, 10.5, 50, 0.25] # 0.5x0.5 deg, 4 chunks
# chunk_params = [10, 42, 11, 43, 0.5] # 1x1 deg, 4 chunks (some GLCLU code=254 for ocean and some land, so data should be output)
# chunk_params = [10, 49.75, 10.25, 50, 0.25] # 0.25x0.25 deg, 1 chunk (has data, no fire)
# chunk_params = [15, 41.75, 15.25, 42, 0.25] # 0.25x0.25 deg, 1 chunk (has data with fire)

# # Range of no-data cases for testing
# chunk_params = [20, -70, 20.25, -69.75, 0.25] # 0.25x0.25 deg, 1 chunk (tile does not exist)
# chunk_params = [20, 69.75, 20.25, 70, 0.25] # 0.25x0.25 deg, 1 chunk (tile exists for GLCLU but not all other inputs, e.g., fire)
# chunk_params = [110, -10, 120, 0, 2]    # 10x10 deg (00N_110E), 25 chunks (all chunks have land and should be output)
# chunk_params = [110, -20, 120, -10, 2]    # 10x10 deg (00N_110E), 25 chunks (all chunks have land and should be output)
# chunk_params = [0, 79.75, 0.25, 80, 0.25] # 0.25x0.25 deg, 1 chunk (no 80N_000E tile-- no data)
# chunk_params = [112, -12, 116, -8, 2]   # 2x2 deg, 1 chunk (bottom of Java, has data but mostly ocean)
# chunk_params = [10.875, 41.75, 11, 42, 0.25] # 0.25x0.25 deg, 1 chunk (entirely GLCLU code=255 for ocean, so no actual data-- nothing should be be output)
# chunk_params = [-10, 21.75, -9.75, 22, 0.25] # 0.25x0.25 deg, 1 chunk (has data but entirely desert (fully GLCLU code=0))
# chunk_params = [10, 49.75, 10.25, 50, 0.25] # 0.25x0.25 deg, 1 chunk (has data)


# Makes list of chunks to analyze
chunks = get_chunk_bounds(chunk_params)  
print("Processing", len(chunks), "chunks")
# print(chunks)

# Determines if the output file names for final versions of outputs should be used
is_final = False
if len(chunks) > 90:
    is_final = True
    print("Running as final model.")

# Creates list of tasks to run (1 task = 1 chunk)
delayed_result = [dask.delayed(create_and_upload_starting_C_densities)(chunk, is_final) for chunk in chunks]

# Actually runs analysis
results = dask.compute(*delayed_result)
results

Processing 1 chunks
Tile id 50N_010E exists. Proceeding.
Requesting data in chunk 10_40_20_50 in 50N_010E: 20240716_12_42_19
Waiting for requests for data in chunk 10_40_20_50 in 50N_010E: 20240716_12_43_22
Data in chunk 10_40_20_50. Proceeding.
Creating starting C densities for 10_40_20_50 in 50N_010E: 20240716_12_43_27
Tile id 50N_010E exists. Proceeding.
Requesting data in chunk 10_40_20_50 in 50N_010E: 20240716_12_43_45
Waiting for requests for data in chunk 10_40_20_50 in 50N_010E: 20240716_12_44_38
Data in chunk 10_40_20_50. Proceeding.
Creating starting C densities for 10_40_20_50 in 50N_010E: 20240716_12_44_44
Tile id 50N_010E exists. Proceeding.
Requesting data in chunk 10_40_20_50 in 50N_010E: 20240716_12_45_02


KeyboardInterrupt: 