<font size="6">Run the organic soils part of the AFOLU model</font> 

<font size="4">Must be run using the utilities_and_variables.ipynb kernel</font> 

Got the no data working - but it isn't an elegant solution. Will come back to this with a better fix. Also - need to compress input datasets 

In [38]:
import logging

In [39]:
# Set up basic configuration for logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

In [40]:
def save_and_upload_small_raster_set(bounds, chunk_length_pixels, tile_id, bounds_str, output_dict, is_final):
    s3_client = boto3.client("s3")  # Needs to be in the same function as the upload_file call

    transform = rasterio.transform.from_bounds(*bounds, width=chunk_length_pixels, height=chunk_length_pixels)

    file_info = f'{tile_id}__{bounds_str}'

    # For every output file, saves from array to local raster, then to s3.
    # Can't save directly to s3, unfortunately, so need to save locally first.
    for key, value in output_dict.items():
        logging.info(f"Processing output for key: {key}")
        try:
            data_array = value[0]
            data_type = value[1]
            data_meaning = value[2]
            year_out = value[3]

            logging.info(f"Data type: {data_type}, Data meaning: {data_meaning}, Year out: {year_out}")

            if not is_final:
                logging.info(f"Saving {bounds_str} in {tile_id} for {year_out}: {timestr()}")

            if is_final:
                file_name = f"{file_info}__{key}.tif"
            else:
                file_name = f"{file_info}__{key}__{timestr()}.tif"

            with rasterio.open(f"/tmp/{file_name}", 'w', driver='GTiff', width=chunk_length_pixels, height=chunk_length_pixels, count=1, 
                               dtype=data_type, crs='EPSG:4326', transform=transform, compress='lzw', blockxsize=400, blockysize=400) as dst:
                dst.write(data_array, 1)

            s3_path = f"{s3_out_dir}/{data_meaning}/{year_out}/{chunk_length_pixels}_pixels/{time.strftime('%Y%m%d')}"
            logging.info(f"Saving output to {s3_path}...")

            if not is_final:
                logging.info(f"Uploading {bounds_str} in {tile_id} for {year_out} to {s3_path}: {timestr()}")

            s3_client.upload_file(f"/tmp/{file_name}", "gfw2-data", Key=f"{s3_path}/{file_name}")

            # Deletes the local raster
            os.remove(f"/tmp/{file_name}")

            logging.info(f"Successfully processed and uploaded {file_name}")

        except Exception as e:
            logging.error(f"Error processing key {key} with value {value}: {str(e)}")

    logging.info(f"Completed processing for chunk {bounds_str}.")

In [41]:
def calculate_and_upload_drainage(bounds, is_final):
    bounds_str = boundstr(bounds)  # String form of chunk bounds
    tile_id = xy_to_tile_id(bounds[0], bounds[3])  # tile_id in YYN/S_XXXE/W
    chunk_length_pixels = calc_chunk_length_pixels(bounds)  # Chunk length in pixels (as opposed to decimal degrees)

    no_data_val = 255
    logging.info(f"Processing tile {tile_id} with bounds {bounds_str}")

    try:
        # Dictionary of downloaded layers
        download_dict = {}
        layers = {}

        download_dict = {
            f"{land_cover}_2020": f"s3://gfw2-data/climate/AFOLU_flux_model/LULUCF/outputs/IPCC_basic_classes/2020/40000_pixels/20240205/{tile_id}__IPCC_classes_2020.tif",
            planted_forest_type_layer: f"s3://gfw2-data/climate/carbon_model/other_emissions_inputs/plantation_type/SDPTv2/20230911/{tile_id}_plantation_type_oilpalm_woodfiber_other.tif",
            planted_forest_tree_crop_layer: f"s3://gfw2-data/climate/carbon_model/other_emissions_inputs/plantation_simpleType__planted_forest_tree_crop/SDPTv2/20230911/{tile_id}.tif",
            "peat": f"s3://gfw2-data/climate/carbon_model/other_emissions_inputs/peatlands/processed/20230315/{tile_id}_peat_mask_processed.tif",
            "dadap": f"s3://gfw2-data/climate/AFOLU_flux_model/organic_soils/inputs/processed/dadap_density/30m/dadap_{tile_id}.tif",
            "engert": f"s3://gfw2-data/climate/AFOLU_flux_model/organic_soils/inputs/processed/engert_density/30m/engert_{tile_id}.tif",
            "grip": f"s3://gfw2-data/climate/AFOLU_flux_model/organic_soils/inputs/processed/grip_density/30m/grip_density_{tile_id}.tif",
            "osm_roads": f"s3://gfw2-data/climate/AFOLU_flux_model/organic_soils/inputs/processed/osm_canals_density/30m/canals_density_{tile_id}.tif", # update once roads data is ready
            "osm_canals": f"s3://gfw2-data/climate/AFOLU_flux_model/organic_soils/inputs/processed/osm_canals_density/30m/canals_density_{tile_id}.tif",
            # descals oil palm layer
            # extraction mosaic dataset 
        }

        # Checks whether tile exists at all. Doesn't try to download chunk if the tile doesn't exist.
        tile_exists = check_for_tile(download_dict, is_final)

        if tile_exists == 0:
            logging.info(f"Tile {tile_id} does not exist. Skipping.")
            return

        logging.info(f"Tile {tile_id} exists. Proceeding with downloading data.")
        futures = prepare_to_download_chunk(bounds, download_dict, no_data_val)

        if not is_final:
            logging.info(f"Waiting for requests for data in chunk {bounds_str} in {tile_id}: {timestr()}")

        # Waits for requests to come back with data from S3
        for future in concurrent.futures.as_completed(futures):
            layer = futures[future]
            layers[layer] = future.result()
            logging.info(f"Downloaded data for layer: {layer}")

        data_in_chunk = check_chunk_for_data(layers, f"{land_cover}_", bounds_str, tile_id, no_data_val, is_final)

        if data_in_chunk == 0:
            logging.info(f"No data in chunk {bounds_str}. Skipping.")
            return

        logging.info(f"Data present in chunk {bounds_str}. Proceeding with processing.")

        # Initializes empty dictionaries for each type
        uint8_dict_layers = {}
        int16_dict_layers = {}
        float32_dict_layers = {}

        for key, array in layers.items():
            logging.info(f"Processing layer {key} with dtype {array.dtype}")
            if array.dtype == np.uint8:
                uint8_dict_layers[key] = array
            elif array.dtype == np.int16:
                int16_dict_layers[key] = array
            elif array.dtype == np.float32:
                float32_dict_layers[key] = array
            else:
                raise TypeError(f"{key} dtype not in list")

        peat_block = uint8_dict_layers["peat"]
        land_cover_block = uint8_dict_layers[f"{land_cover}_2020"]
        planted_forest_type_block = uint8_dict_layers[planted_forest_type_layer]
        # planted_forest_tree_crop_block = uint8_dict_layers[planted_forest_tree_crop_layer]
        dadap_block = float32_dict_layers["dadap"]
        osm_roads_block = float32_dict_layers["osm_roads"]
        osm_canals_block = float32_dict_layers["osm_canals"]
        engert_block = float32_dict_layers["engert"]
        grip_block = float32_dict_layers["grip"]

        logging.info(f"Creating drainage map in {bounds_str} in {tile_id}: {timestr()}")
        soil_block, state_out = process_soil(
            peat_block, land_cover_block, planted_forest_type_block, dadap_block, osm_roads_block, osm_canals_block, engert_block, grip_block
        )

        out_dict_uint32 = {
            "soil": soil_block,
            "state": state_out
        }

        out_dict_all_dtypes = {}

        for key, value in out_dict_uint32.items():
            data_type = value.dtype.name
            out_pattern = key
            year = 2020  # Hardcoded example year, change as needed
            out_dict_all_dtypes[key] = [value, data_type, out_pattern, f'{year}']

        logging.info(f"Saving and uploading rasters for chunk {bounds_str}.")
        save_and_upload_small_raster_set(bounds, chunk_length_pixels, tile_id, bounds_str, out_dict_all_dtypes, is_final)

        del out_dict_all_dtypes

        logging.info(f"Completed processing for chunk {bounds_str}.")

    except Exception as e:
        logging.error(f"Failed processing for {bounds_str}: {str(e)}")

In [42]:
@jit(nopython=True)
def accrete_node(combo, new):
    combo = combo*10 + new
    return combo

In [43]:
@jit(nopython=True)
def process_soil(peat_block, land_cover_block, planted_forest_type_block, dadap_block, osm_roads_block, osm_canals_block, engert_block, grip_block):
    rows, cols = peat_block.shape

    soil_block = np.empty((rows, cols), dtype=np.uint32)
    state_out = np.empty((rows, cols), dtype=np.uint32)

    for row in range(rows):
        for col in range(cols):
            peat = peat_block[row, col]
            land_cover = land_cover_block[row, col]
            planted_forest_type = planted_forest_type_block[row, col]
            dadap = dadap_block[row, col]
            osm_roads = osm_roads_block[row, col]
            osm_canals = osm_canals_block[row, col]
            engert = engert_block[row, col]
            grip = grip_block[row, col]

            node = 0
            
            if peat == 1:
                node = accrete_node(node, 1)
                if (dadap != 255 and dadap > 0) or (osm_canals != 255 and osm_canals > 0): 
                    node = accrete_node(node, 1)
                    soil_block[row, col] = 1  # 'drained'
                    state_out[row, col] = node  # 'drained'
                elif (engert != 255 and engert > 0) or (grip != 255 and grip > 0) or (osm_roads != 255 and osm_roads > 0):
                    node = accrete_node(node, 2)
                    soil_block[row, col] = 1  # 'drained'
                    state_out[row, col] = node  # 'drained'
                elif land_cover == 2 or land_cover == 3:  # 2 = cropland; 3 = settlement
                    node = accrete_node(node, 3)
                    soil_block[row, col] = 1  # 'drained'
                    state_out[row, col] = node  # 'drained'
                elif planted_forest_type != 255 and planted_forest_type > 0:  # note that we made need to remap planted forest type for emissions
                    node = accrete_node(node, 4)
                    soil_block[row, col] = 1  # 'drained'
                    state_out[row, col] = node  # 'drained'
                else:
                    node = accrete_node(node, 5)
                    soil_block[row, col] = 0  # 'undrained'
                    state_out[row, col] = node  # 'undrained'
            else:
                soil_block[row, col] = 0  # 'undrained'
                node = accrete_node(node, 2)
                state_out[row, col] = node  # 'undrained'

    return soil_block, state_out


In [44]:
%%time

## Create LULUCF flux and carbon stock 2x2 deg rasters 

## Area to analyze
## chunk_params arguments: W, S, E, N, chunk size (degrees)
# chunk_params = [-180, -60, 180, 80, 2]  # entire world
# chunk_params = [-10, 40, 20, 70, 1]    # 30x30 deg (70N_010W), 900 chunks

# chunk_params = [-10, 60, 0, 70, 1]    # 10x10 deg (70N_010W), 100 chunks
# chunk_params = [-10, 65, -5, 70, 1]    # 5x5 deg (70N_010W), 25 chunks
# chunk_params = [-10, 68, -8, 70, 1]    # 2x2 deg (70N_010W), 4 chunks
# chunk_params = [-10, 69, -9, 70, 1]    # 1x1 deg (70N_010W), 1 chunk

# chunk_params = [10, 40, 20, 50, 2]    # 10x10 deg (50N_010E), 25 chunks
# chunk_params = [10, 40, 20, 50, 10]    # 10x10 deg (50N_010E), 1 chunk
# chunk_params = [10, 46, 14, 50, 2]   # 4x4 deg, 4 chunks
# chunk_params = [110, -10, 114, -6, 2]   # 4x4 deg, 4 chunks
# chunk_params = [10, 48, 12, 50, 1]   # 2x2 deg, 4 chunks
# chunk_params = [10, 49, 11, 50, 1]   # 1x1 deg, 1 chunk
# chunk_params = [10, 49, 11, 50, 0.5] # 1x1 deg, 4 chunks
# chunk_params = [10, 49.5, 10.5, 50, 0.25] # 0.5x0.5 deg, 4 chunks
# chunk_params = [10, 42, 11, 43, 0.5] # 1x1 deg, 4 chunks (some GLCLU code=254 for ocean and some land, so data should be output)
# chunk_params = [10, 49.75, 10.25, 50, 0.25] # 0.25x0.25 deg, 1 chunk (has data, no fire)
#chunk_params = [15, 41.75, 15.25, 42, 0.25] # 0.25x0.25 deg, 1 chunk (has data with fire)

# # Range of no-data cases for testing
# chunk_params = [20, 69.75, 20.25, 70, 0.25] # 0.25x0.25 deg, 1 chunk (tile exists for GLCLU but not all other inputs, e.g., fire)
# chunk_params = [110, -10, 120, 0, 2]    # 10x10 deg (00N_110E), 25 chunks (all chunks have land and should be output)
# chunk_params = [110, -20, 120, -10, 2]    # 10x10 deg (00N_110E), 25 chunks (all chunks have land and should be output)
# chunk_params = [0, 79.75, 0.25, 80, 0.25] # 0.25x0.25 deg, 1 chunk (no 80N_000E tile-- no data)
# chunk_params = [112, -12, 116, -8, 2]   # 2x2 deg, 1 chunk (bottom of Java, has data but mostly ocean)
# chunk_params = [10.875, 41.75, 11, 42, 0.25] # 0.25x0.25 deg, 1 chunk (entirely GLCLU code=255 for ocean, so no actual data-- nothing should be be output)
# chunk_params = [-10, 21.75, -9.75, 22, 0.25] # 0.25x0.25 deg, 1 chunk (has data but entirely desert (fully GLCLU code=0))
# chunk_params = [10, 49.75, 10.25, 50, 0.25] # 0.25x0.25 deg, 1 chunk (has data)

#drainage chunks for testing
# chunk_params = [112.75, -2.75, 113.0, -2.5, 0.25] # 1 chunk, 0.25 degrees has data. Southern Borneo
# chunk_params = [110.0, -10.0, 120.0, 0.0, 2] # tile 00N_110E, Indonesia, 25 2-degree chunks
# chunk_params = [10.0, 0.0, 20.0, 10.0, 2] # tile 10N_010E, Central Africa, 25 2-degree chunks
# chunk_params = [16.0, 2.0, 18.0, 4.0, 2] #tile 10N_010E, Central Africa, 1 2-degree chunk with data
# chunk_params = [114.0, -4.0, 116.0, -2.0, 2] # tile 00N_110E, Souther Borneo, 1 2-degree chunk with data
# chunk_params = [-76.0, -2.0, -74.0, 0.0, 2] #tile 00N_080W, Peru, 1 2-degree chunk with data
chunk_params = [-180, -60, 180, 80, 2]  # entire world

# Makes list of chunks to analyze
chunks = get_chunk_bounds(chunk_params)  
print("Processing", len(chunks), "chunks")
# print(chunks)

# Determines if the output file names for final versions of outputs should be used
is_final = False
if len(chunks) > 90:
    is_final = True
    print("Running as final model.")

# Creates list of tasks to run (1 task = 1 chunk for all years)
delayed_result = [dask.delayed(calculate_and_upload_drainage)(chunk, is_final) for chunk in chunks]

# Actually runs analysis
results = dask.compute(*delayed_result)
results

Processing 12600 chunks
Running as final model.
Requesting data in chunk -68_56_-66_58 in 60N_070W: 20240607_17_16_15
Requesting data in chunk 122_22_124_24 in 30N_120E: 20240607_17_16_15
Requesting data in chunk 12_-4_14_-2 in 00N_010E: 20240607_17_16_15
Requesting data in chunk 128_-18_130_-16 in 10S_120E: 20240607_17_16_15
Requesting data in chunk -92_32_-90_34 in 40N_100W: 20240607_17_16_15
Requesting data in chunk 54_66_56_68 in 70N_050E: 20240607_17_16_15
Requesting data in chunk -16_18_-14_20 in 20N_020W: 20240607_17_16_15
Requesting data in chunk 102_28_104_30 in 30N_100E: 20240607_17_16_15
Requesting data in chunk 14_-40_16_-38 in 30S_010E: 20240607_17_16_15
Requesting data in chunk -96_42_-94_44 in 50N_100W: 20240607_17_16_15
Requesting data in chunk 130_2_132_4 in 10N_130E: 20240607_17_16_15
Requesting data in chunk -102_54_-100_56 in 60N_110W: 20240607_17_16_15
Requesting data in chunk 22_26_24_28 in 30N_020E: 20240607_17_16_15
Requesting data in chunk -60_16_-58_18 in 20N_

(None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,

In [45]:
### questions
# Tile id ses_2020 exists. Proceeding.


In [46]:
# local_client.shutdown()

In [47]:
# # Example data for testing
# in_dict_uint8 = {
#     'peat': np.random.randint(0, 2, size=(100, 100)).astype(np.uint8),
#     'land_cover_2020': np.random.randint(0, 10, size=(100, 100)).astype(np.uint8),
#     'planted_forest_type': np.random.randint(0, 2, size=(100, 100)).astype(np.uint8),
# }
# in_dict_int16 = {}
# in_dict_float32 = {
#     'dadap': np.random.rand(100, 100).astype(np.float32),
#     'osm_roads': np.random.rand(100, 100).astype(np.float32),
#     'osm_canals': np.random.rand(100, 100).astype(np.float32),
#     'engert': np.random.rand(100, 100).astype(np.float32),
#     'grip': np.random.rand(100, 100).astype(np.float32),
# }


2024-06-07 11:55:20,626 - distributed.client - ERROR - Failed to reconnect to scheduler after 30.00 seconds, closing client
2024-06-07 17:13:40,482 - INFO - Found credentials in shared credentials file: ~/.aws/credentials
