<font size="6">Reclassify GLCLU to basic IPCC classes and create change rasters</font> 

<font size="4">Must be run using the utilities_and_variables.ipynb kernel</font> 

In [32]:
# Function to reclassify GLCLU classes to basic IPCC reporting classes.
# Operates on the array/chunk. 
# Classification comes from https://onewri-my.sharepoint.com/:p:/g/personal/david_gibbs_wri_org/EWwyxRfgdeVJi4ezwX7LrfcBjCoqAcjL2jRAZjb_8RU9LQ?e=YUsQiU
def reclassify_to_IPCC(GLCLU_block):

    # Outputs
    IPCC_classes = np.zeros(GLCLU_block.shape)

    IPCC_classes[np.where(GLCLU_block <= 1)] = otherland                                 
    IPCC_classes[np.where((GLCLU_block >= 2) & (GLCLU_block <= 26))] = grassland          
    IPCC_classes[np.where((GLCLU_block >= 27) & (GLCLU_block <= 48))] = forest         
    IPCC_classes[np.where((GLCLU_block >= 100) & (GLCLU_block <= 101))] = wetland       
    IPCC_classes[np.where((GLCLU_block >= 102) & (GLCLU_block <= 126))] = grassland       
    IPCC_classes[np.where((GLCLU_block >= 127) & (GLCLU_block <= 148))] = forest       
    IPCC_classes[np.where((GLCLU_block >= 200) & (GLCLU_block <= 204))] = wetland       
    IPCC_classes[np.where((GLCLU_block >= 205) & (GLCLU_block <= 207))] = otherland       
    IPCC_classes[np.where(GLCLU_block == 241)] = otherland                                
    IPCC_classes[np.where(GLCLU_block == 244)] = cropland                                
    IPCC_classes[np.where(GLCLU_block == 250)] = settlement                               
    IPCC_classes[np.where(GLCLU_block == 254)] = otherland                              
    
    return IPCC_classes

In [33]:
# Function to map basic IPCC change classes.
# Operates pixel by pixel, so uses numba (Python compiled to C++).
@jit(nopython=True)
def change_classes_IPCC(IPCC_previous_block, IPCC_current_block):

    # Output array of 0s
    IPCC_change_block = np.zeros(IPCC_previous_block.shape)

    # Iterates through all pixels in the chunk
    for row in range(IPCC_previous_block.shape[0]):
        for col in range(IPCC_previous_block.shape[1]):

            IPCC_previous = IPCC_previous_block[row, col]
            IPCC_current = IPCC_current_block[row, col]

            # When land cover chunks have "no data"
            if (IPCC_previous == 0) and (IPCC_current == 0):
                IPCC_change_block[row, col] = 0

            else:
                # Equation to calculate the IPCC change code
                IPCC_change_block[row, col] = ((IPCC_previous - 1) * IPCC_class_max_val) + IPCC_current

    return IPCC_change_block

In [77]:
# Downloads input chunks, reclassifies GLCLU classes into IPCC land use reporting classes for each year, and maps changes between classes for consecutive years.
# Chunks are defined by a bounding box and a starting year for iteration
def reclassify_and_map_change_chunk(bounds, is_final):

    bounds_str = boundstr(bounds)    # String form of chunk bounds
    tile_id = xy_to_tile_id(bounds[0], bounds[3])    # tile_id in YYN/S_XXXE/W
    chunk_length_pixels = calc_chunk_length_pixels(bounds)   # Chunk length in pixels (as opposed to decimal degrees)

    no_data_val = 255

    
    ### Part 1: download chunks and check for data

    # Dictionary of downloaded layers
    layers = {}

    download_dict = {
        
        "land_cover_2000": f"{composite_LC_uri}/2000/raw/{tile_id}.tif",
        "land_cover_2005": f"{composite_LC_uri}/2005/raw/{tile_id}.tif",
        "land_cover_2010": f"{composite_LC_uri}/2010/raw/{tile_id}.tif",
        "land_cover_2015": f"{composite_LC_uri}/2015/raw/{tile_id}.tif",
        "land_cover_2020": f"{composite_LC_uri}/2020/raw/{tile_id}.tif"   
    }

    # Checks whether tile exists at all. Doesn't try to download chunk if the tile doesn't exist.
    tile_exists = check_for_tile(download_dict)

    if tile_exists == 0:
        return

    futures = prepare_to_download_chunk(bounds, download_dict, no_data_val)

    if not is_final:
        dask_print(f"Waiting for requests for data in chunk {bounds_str} in {tile_id}: {timestr()}")
    
    # Waits for requests to come back with data from S3
    for future in concurrent.futures.as_completed(futures):
        layer = futures[future]
        layers[layer] = future.result()

    # Checks chunk for data. Skips the chunk if it has no data in it.
    data_in_chunk = check_chunk_for_data(layers, "land_cover_", bounds_str, tile_id, no_data_val)

    if data_in_chunk == 0:
        return

   
    ### Part 2: reclassify GLCLU classes into IPCC reporting classes 
    IPCC_class_dict = {}

    # Iterates through model years
    for year in list(range(first_year, last_year+1, 5)):
        
        dask_print(f"Reclassifying {bounds_str} in {tile_id} for {year}: {timestr()}")

        # Reclassifies GLCLU to 6 IPCC classes 
        IPCC_classes = reclassify_to_IPCC(
            layers[f"land_cover_{year}"]   
        )

        # Output arrays to upload to s3. Adds new array to dictionary for each year
        IPCC_class_dict[f"IPCC_classes_{year}"] = [IPCC_classes, "uint8", "IPCC_basic_classes", year]                 
    
    save_and_upload_small_raster_set(bounds, chunk_length_pixels, tile_id, bounds_str, IPCC_class_dict, is_final)

    
    ### Part 3
    IPCC_change_dict = {}

    # Iterates through model years in a way that change can be calculated
    for year in list(range(first_year+5, last_year+1, 5)):
        
        dask_print(f"Getting IPCC class change in {bounds_str} in {tile_id} for {year}: {timestr()}")

        # Maps change between IPCC classes
        IPCC_change = change_classes_IPCC(
            IPCC_class_dict[f"IPCC_classes_{year-5}"][0], # first [0] needed because results_download is a tuple with a dictionary inside it. Second [0] to isolate the array.
            IPCC_class_dict[f"IPCC_classes_{year}"][0]    # first [0] needed because results_download is a tuple with a dictionary inside it. Second [0] to isolate the array.
        )

        # Output files to upload to s3
        IPCC_change_dict[f"IPCC_change_{year-5}_{year}"] = [IPCC_change, "uint8", "IPCC_basic_change", f'{year-5}_{year}']  

    save_and_upload_small_raster_set(bounds, chunk_length_pixels, tile_id, bounds_str, IPCC_change_dict, is_final)

    
    # Clear memory of unneeded arrays
    del IPCC_classes
    del IPCC_class_dict
    del IPCC_change
    del IPCC_change_dict

    return f"Success for {bounds_str}: {timestr()}"

In [28]:
%%time

## Create IPCC single-year class and change 2x2 deg rasters 

## Area to analyze
## chunk_params arguments: W, S, E, N, chunk size (degrees)
# chunk_params = [-180, -60, 180, 80, 2]  # entire world
# chunk_params = [-10, 40, 20, 70, 1]    # 30x30 deg (70N_010W), 900 chunks
# chunk_params = [10, 40, 20, 50, 2]    # 10x10 deg (50N_010E), 25 chunks
# chunk_params = [10, 40, 20, 50, 10]    # 10x10 deg (50N_010E), 1 chunk
# chunk_params = [10, 46, 14, 50, 2]   # 4x4 deg, 4 chunks
# chunk_params = [110, -10, 114, -6, 2]   # 4x4 deg, 4 chunks
# chunk_params = [10, 48, 12, 50, 1]   # 2x2 deg, 4 chunks
# chunk_params = [10, 49, 11, 50, 1]   # 1x1 deg, 1 chunk
# chunk_params = [10, 49, 11, 50, 0.5] # 1x1 deg, 4 chunks
# chunk_params = [10, 49.5, 10.5, 50, 0.25] # 0.5x0.5 deg, 4 chunks
# chunk_params = [10, 42, 11, 43, 0.5] # 1x1 deg, 4 chunks (some GLCLU code=254 for ocean and some land, so data should be output)
chunk_params = [10, 49.75, 10.25, 50, 0.25] # 0.25x0.25 deg, 1 chunk (has data)

# # Range of no-data cases for testing
# chunk_params = [110, -10, 120, 0, 2]    # 10x10 deg (00N_110E), 25 chunks (all chunks have land and should be output)
# chunk_params = [110, -20, 120, -10, 2]    # 10x10 deg (00N_110E), 25 chunks (all chunks have land and should be output)
# chunk_params = [0, 79.75, 0.25, 80, 0.25] # 0.25x0.25 deg, 1 chunk (no 80N_000E tile-- no data)
# chunk_params = [112, -12, 116, -8, 2]   # 2x2 deg, 1 chunk (bottom of Java, has data but mostly ocean)
# chunk_params = [10.875, 41.75, 11, 42, 0.25] # 0.25x0.25 deg, 1 chunk (entirely GLCLU code=255 for ocean, so no actual data-- nothing should be be output)
# chunk_params = [-10, 21.75, -9.75, 22, 0.25] # 0.25x0.25 deg, 1 chunk (has data but entirely desert (fully GLCLU code=0))
# chunk_params = [10, 49.75, 10.25, 50, 0.25] # 0.25x0.25 deg, 1 chunk (has data)


# Makes list of chunks to analyze
chunks = get_chunk_bounds(chunk_params)  
print("Processing", len(chunks), "chunks")

# Determines if the output file names for final versions of outputs should be used
is_final = False
if len(chunks) > 30:
    is_final = True
    print("Running as final model.")

# Creates list of tasks to run (1 task = 1 chunk for all years)
delayed_result = [dask.delayed(reclassify_and_map_change_chunk)(chunk, is_final) for chunk in chunks]

# Actually runs analysis
results = dask.compute(*delayed_result)
results

Processing 1 chunks
Tile id 50N_010E exists. Proceeding.
Requesting data in chunk 10_50_10_50 in 50N_010E: 20240228_18_37_51
Waiting for requests for data in chunk 10_50_10_50 in 50N_010E: 20240228_18_38_04
Data in chunk 10_50_10_50. Proceeding.
{'land_cover_2020': array([[244, 244, 244, ..., 250, 250, 250],
       [244, 244, 244, ..., 250, 250, 250],
       [244, 250, 244, ..., 250, 250, 250],
       ...,
       [ 41,  44,  45, ..., 244, 244, 244],
       [ 42,  42,  43, ..., 244, 244, 244],
       [ 43,  45,  43, ..., 244, 244, 244]], dtype=uint8), 'land_cover_2010': array([[244, 244, 244, ..., 250, 250, 250],
       [244, 244, 244, ..., 250, 250, 250],
       [244, 244, 244, ..., 250, 250, 250],
       ...,
       [ 44,  43,  44, ..., 244, 244, 244],
       [ 42,  44,  44, ..., 244, 244, 244],
       [ 45,  45,  46, ..., 244, 244, 244]], dtype=uint8), 'land_cover_2000': array([[244, 244, 244, ..., 250, 250, 250],
       [244, 244, 244, ..., 250, 250, 250],
       [244, 244, 244, ...

('Success for 10_50_10_50: 20240228_18_38_04',)

In [None]:
%%time

## Create raster footprint shapefiles from listed rasters
## Doesn't use memory. Can be done on 4 GB workers. Only need as many workers as there are folders. 

# Folders to process and the corresponding output shapefile names
input_dicts = [
           {"gfw2-data/climate/AFOLU_flux_model/LULUCF/outputs/IPCC_basic_classes/2000/8000_pixels/20240205/": "IPCC_basic_classes_2000"},
           {"gfw2-data/climate/AFOLU_flux_model/LULUCF/outputs/IPCC_basic_classes/2005/8000_pixels/20240205/": "IPCC_basic_classes_2005"},
           {"gfw2-data/climate/AFOLU_flux_model/LULUCF/outputs/IPCC_basic_classes/2010/8000_pixels/20240205/": "IPCC_basic_classes_2010"},
           {"gfw2-data/climate/AFOLU_flux_model/LULUCF/outputs/IPCC_basic_classes/2015/8000_pixels/20240205/": "IPCC_basic_classes_2015"},
           {"gfw2-data/climate/AFOLU_flux_model/LULUCF/outputs/IPCC_basic_classes/2020/8000_pixels/20240205/": "IPCC_basic_classes_2020"},
           {"gfw2-data/climate/AFOLU_flux_model/LULUCF/outputs/IPCC_basic_change/2000_2005/8000_pixels/20240205/": "IPCC_basic_change_2000_2005"},
           {"gfw2-data/climate/AFOLU_flux_model/LULUCF/outputs/IPCC_basic_change/2005_2010/8000_pixels/20240205/": "IPCC_basic_change_2005_2010"},
           {"gfw2-data/climate/AFOLU_flux_model/LULUCF/outputs/IPCC_basic_change/2010_2015/8000_pixels/20240205/": "IPCC_basic_change_2010_2015"},
           {"gfw2-data/climate/AFOLU_flux_model/LULUCF/outputs/IPCC_basic_change/2015_2020/8000_pixels/20240205/": "IPCC_basic_change_2015_2020"}
          ]

# Make raster footprint shapefiles from output rasters
delayed_result = [dask.delayed(make_tile_footprint_shp)(input_dict) for input_dict in input_dicts]

# Actually runs analysis
results = dask.compute(*delayed_result)
results

In [None]:
%%time

## Create 10x10 degree rasters aggregated from 2x2 degree rasters
## Doesn't use much memory. Can be done on 30x 8 GB workers (1 hour). 
## In this case, it's aggregation of the IPCC single-year class rasters and change rasters

# Folders to process and the corresponding nodata values for the output rasters (not currently used)
s3_in_folder_dicts = [
           {"gfw2-data/climate/AFOLU_flux_model/LULUCF/outputs/IPCC_basic_classes/2000/8000_pixels/20240205/": 0},
           {"gfw2-data/climate/AFOLU_flux_model/LULUCF/outputs/IPCC_basic_classes/2005/8000_pixels/20240205/": 0},
           {"gfw2-data/climate/AFOLU_flux_model/LULUCF/outputs/IPCC_basic_classes/2010/8000_pixels/20240205/": 0},
           {"gfw2-data/climate/AFOLU_flux_model/LULUCF/outputs/IPCC_basic_classes/2015/8000_pixels/20240205/": 0},
           {"gfw2-data/climate/AFOLU_flux_model/LULUCF/outputs/IPCC_basic_classes/2020/8000_pixels/20240205/": 0},
           {"gfw2-data/climate/AFOLU_flux_model/LULUCF/outputs/IPCC_basic_change/2000_2005/8000_pixels/20240205/": 255},
           {"gfw2-data/climate/AFOLU_flux_model/LULUCF/outputs/IPCC_basic_change/2005_2010/8000_pixels/20240205/": 255},
           {"gfw2-data/climate/AFOLU_flux_model/LULUCF/outputs/IPCC_basic_change/2010_2015/8000_pixels/20240205/": 255},
           {"gfw2-data/climate/AFOLU_flux_model/LULUCF/outputs/IPCC_basic_change/2015_2020/8000_pixels/20240205/": 255}
          ]

# Creates the list of aggregated 10x10 rasters that will be created (list of dictionaries of input s3 folder and output aggregated raster name.
# These are the basis for the tasks.
list_of_s3_name_dicts_total = create_list_for_aggregation(s3_in_folder_dicts)

# # For testing. Limits the number of output rasters
# list_of_s3_name_dicts_total = list_of_s3_name_dicts_total[0:40]  # First 40 tiles
# list_of_s3_name_dicts_total = list_of_s3_name_dicts_total[40:41] # 10N_130E; Internal chunks missing and padding needed on right; FID40
# list_of_s3_name_dicts_total = list_of_s3_name_dicts_total[0:1]  # 00N_000E; Padding below and left; FID0
# list_of_s3_name_dicts_total = list_of_s3_name_dicts_total[41:42]  # 10S_010E; No padding needed; FID41

delayed_result = [dask.delayed(merge_small_tiles_gdal)(s3_name_no_data_dict) for s3_name_no_data_dict in list_of_s3_name_dicts_total]

results = dask.compute(*delayed_result)
results

In [22]:
%%time

## Create raster footprint shapefiles from listed rasters
## Doesn't use memory. Can be done on 4 GB workers. Only need as many workers as there are folders. 

# Folders to process and the corresponding output shapefile names
input_dicts = [
           {"gfw2-data/climate/AFOLU_flux_model/LULUCF/outputs/IPCC_basic_classes/2000/40000_pixels/20240205/": "IPCC_basic_classes_2000__10x10"},
           {"gfw2-data/climate/AFOLU_flux_model/LULUCF/outputs/IPCC_basic_classes/2005/40000_pixels/20240205/": "IPCC_basic_classes_2005__10x10"},
           {"gfw2-data/climate/AFOLU_flux_model/LULUCF/outputs/IPCC_basic_classes/2010/40000_pixels/20240205/": "IPCC_basic_classes_2010__10x10"},
           {"gfw2-data/climate/AFOLU_flux_model/LULUCF/outputs/IPCC_basic_classes/2015/40000_pixels/20240205/": "IPCC_basic_classes_2015__10x10"},
           {"gfw2-data/climate/AFOLU_flux_model/LULUCF/outputs/IPCC_basic_classes/2020/40000_pixels/20240205/": "IPCC_basic_classes_2020__10x10"},
           {"gfw2-data/climate/AFOLU_flux_model/LULUCF/outputs/IPCC_basic_change/2000_2005/40000_pixels/20240205/": "IPCC_basic_change_2000_200__10x10"},
           {"gfw2-data/climate/AFOLU_flux_model/LULUCF/outputs/IPCC_basic_change/2005_2010/40000_pixels/20240205/": "IPCC_basic_change_2005_2010__10x10"},
           {"gfw2-data/climate/AFOLU_flux_model/LULUCF/outputs/IPCC_basic_change/2010_2015/40000_pixels/20240205/": "IPCC_basic_change_2010_2015__10x10"},
           {"gfw2-data/climate/AFOLU_flux_model/LULUCF/outputs/IPCC_basic_change/2015_2020/40000_pixels/20240205/": "IPCC_basic_change_2015_2020__10x10"}
          ]

# Make raster footprint shapefiles from output rasters
delayed_result = [dask.delayed(make_tile_footprint_shp)(input_dict) for input_dict in input_dicts]

# Actually runs analysis
results = dask.compute(*delayed_result)
results

Making tile index shapefile for: gfw2-data/climate/AFOLU_flux_model/LULUCF/outputs/IPCC_basic_change/2015_2020/40000_pixels/20240205/: 20240215_22_12_57
Making tile index shapefile for: gfw2-data/climate/AFOLU_flux_model/LULUCF/outputs/IPCC_basic_classes/2005/40000_pixels/20240205/: 20240215_22_12_57
Making tile index shapefile for: gfw2-data/climate/AFOLU_flux_model/LULUCF/outputs/IPCC_basic_classes/2015/40000_pixels/20240205/: 20240215_22_12_57
Making tile index shapefile for: gfw2-data/climate/AFOLU_flux_model/LULUCF/outputs/IPCC_basic_classes/2020/40000_pixels/20240205/: 20240215_22_12_57
Making tile index shapefile for: gfw2-data/climate/AFOLU_flux_model/LULUCF/outputs/IPCC_basic_change/2000_2005/40000_pixels/20240205/: 20240215_22_12_57
Making tile index shapefile for: gfw2-data/climate/AFOLU_flux_model/LULUCF/outputs/IPCC_basic_change/2010_2015/40000_pixels/20240205/: 20240215_22_12_57
Making tile index shapefile for: gfw2-data/climate/AFOLU_flux_model/LULUCF/outputs/IPCC_basic_

('Completed: 20240215_22_13_36',
 'Completed: 20240215_22_13_31',
 'Completed: 20240215_22_13_36',
 'Completed: 20240215_22_13_33',
 'Completed: 20240215_22_13_35',
 'Completed: 20240215_22_13_35',
 'Completed: 20240215_22_13_35',
 'Completed: 20240215_22_13_34',
 'Completed: 20240215_22_13_35')