<font size="6">Analysis</font> 

In [14]:
# Function to reclassify GLCLU classes to basic IPCC reporting classes.
# Operates on the array/chunk. 
# Classification comes from https://onewri-my.sharepoint.com/:p:/g/personal/david_gibbs_wri_org/EWwyxRfgdeVJi4ezwX7LrfcBjCoqAcjL2jRAZjb_8RU9LQ?e=YUsQiU
def reclassify_to_IPCC(GLCLU_block):

    # Outputs
    IPCC_classes = np.zeros(GLCLU_block.shape)

    IPCC_classes[np.where(GLCLU_block <= 1)] = otherland                                 
    IPCC_classes[np.where((GLCLU_block >= 2) & (GLCLU_block <= 26))] = grassland          
    IPCC_classes[np.where((GLCLU_block >= 27) & (GLCLU_block <= 48))] = forest         
    IPCC_classes[np.where((GLCLU_block >= 100) & (GLCLU_block <= 101))] = wetland       
    IPCC_classes[np.where((GLCLU_block >= 102) & (GLCLU_block <= 126))] = grassland       
    IPCC_classes[np.where((GLCLU_block >= 127) & (GLCLU_block <= 148))] = forest       
    IPCC_classes[np.where((GLCLU_block >= 200) & (GLCLU_block <= 204))] = wetland       
    IPCC_classes[np.where((GLCLU_block >= 205) & (GLCLU_block <= 207))] = otherland       
    IPCC_classes[np.where(GLCLU_block == 241)] = otherland                                
    IPCC_classes[np.where(GLCLU_block == 244)] = cropland                                
    IPCC_classes[np.where(GLCLU_block == 250)] = settlement                               
    IPCC_classes[np.where(GLCLU_block == 254)] = otherland                              
    
    return IPCC_classes

In [15]:
# Function to map basic IPCC change classes.
# Operates pixel by pixel, so uses numba (Python compiled to C++).
@jit(nopython=True)
def change_classes_IPCC(IPCC_previous_block, IPCC_current_block):

    # Output array of 0s
    IPCC_change_block = np.zeros(IPCC_previous_block.shape)

    # Iterates through all pixels in the chunk
    for row in range(IPCC_previous_block.shape[0]):
        for col in range(IPCC_previous_block.shape[1]):

            IPCC_previous = IPCC_previous_block[row, col]
            IPCC_current = IPCC_current_block[row, col]

            # When land cover chunks have "no data"
            if (IPCC_previous == 0) and (IPCC_current == 0):
                IPCC_change_block[row, col] = 0

            else:
                # Equation to calculate the IPCC change code
                IPCC_change_block[row, col] = ((IPCC_previous - 1) * IPCC_class_max_val) + IPCC_current

    return IPCC_change_block

In [16]:
# Downloads input chunks, reclassifies GLCLU classes into IPCC land use reporting classes for each year, and maps changes between classes for consecutive years.
# Chunks are defined by a bounding box and a starting year for iteration
def reclassify_and_map_change_chunk(bounds, is_final):

    bounds_str = boundstr(bounds)    # String form of chunk bounds
    tile_id = xy_to_tile_id(bounds[0], bounds[3])    # tile_id in YYN/S_XXXE/W
    chunk_length_pixels = calc_chunk_length_pixels(bounds)   # Chunk length in pixels (as opposed to decimal degrees)

    no_data_val = 255

    
    ### Part 1: download chunks and check for data

    # Dictionary of downloaded layers
    layers = {}

    download_dict = {
        
        "land_cover_2000": f"{composite_LC_uri}/2000/raw/{tile_id}.tif",
        "land_cover_2005": f"{composite_LC_uri}/2005/raw/{tile_id}.tif",
        "land_cover_2010": f"{composite_LC_uri}/2010/raw/{tile_id}.tif",
        "land_cover_2015": f"{composite_LC_uri}/2015/raw/{tile_id}.tif",
        "land_cover_2020": f"{composite_LC_uri}/2020/raw/{tile_id}.tif"   
    }
    
    futures = prepare_to_download_chunk(bounds, download_dict, no_data_val)
    dask_print(f"Waiting for requests for data in chunk {bounds_str} in {tile_id}: {timestr()}")
    
    # Waits for requests to come back with data from S3
    for future in concurrent.futures.as_completed(futures):
        layer = futures[future]
        layers[layer] = future.result()

    # Checks chunk for data

    data_in_chunk = check_chunk_for_data(layers, "land_cover_", bounds_str, tile_id, no_data_val)

    if data_in_chunk == 0:
        return

    
    ### Part 2: reclassify GLCLU classes into IPCC reporting classes 
    IPCC_class_dict = {}

    # Iterates through model years
    for year in list(range(first_year, last_year+1, 5)):
        
        dask_print(f"Reclassifying {bounds_str} in {tile_id} for {year}: {timestr()}")

        # Reclassifies GLCLU to 6 IPCC classes 
        IPCC_classes = reclassify_to_IPCC(
            layers[f"land_cover_{year}"]   
        )

        # Output files to upload to s3
        IPCC_class_dict[f"IPCC_classes_{year}"] = [IPCC_classes, "uint8", "IPCC_basic_classes", year]                 
    
    save_and_upload_raster(bounds, chunk_length_pixels, tile_id, bounds_str, IPCC_class_dict, is_final)

    
    ### Part 3
    IPCC_change_dict = {}

    # Iterates through model years in a way that change can be calculated
    for year in list(range(first_year+5, last_year+1, 5)):
        
        dask_print(f"Getting IPCC class change in {bounds_str} in {tile_id} for {year}: {timestr()}")

        # Maps change between IPCC classes
        IPCC_change = change_classes_IPCC(
            IPCC_class_dict[f"IPCC_classes_{year-5}"][0], # first [0] needed because results_download is a tuple with a dictionary inside it. Second [0] to isolate the array.
            IPCC_class_dict[f"IPCC_classes_{year}"][0]    # first [0] needed because results_download is a tuple with a dictionary inside it. Second [0] to isolate the array.
        )

        # Output files to upload to s3
        IPCC_change_dict[f"IPCC_change_{year-5}_{year}"] = [IPCC_change, "uint8", "IPCC_basic_change", f'{year-5}_{year}']  

    save_and_upload_raster(bounds, chunk_length_pixels, tile_id, bounds_str, IPCC_change_dict, is_final)

    
    # Clear memory of unneeded arrays
    del IPCC_classes
    del IPCC_class_dict
    del IPCC_change
    del IPCC_change_dict

    return f"Success for {bounds_str}: {timestr()}"

In [61]:
%%time

# Area to analyze
# chunk_params arguments: W, S, E, N, chunk size (degrees)
# chunk_params = [-180, -50, 180, 80, 2]  # entire world
# chunk_params = [-10, 40, 20, 70, 1]    # 30x30 deg (70N_010W), 900 chunks
# chunk_params = [10, 40, 20, 50, 2]    # 10x10 deg (50N_010E), 25 chunks
# chunk_params = [10, 40, 20, 50, 10]    # 10x10 deg (50N_010E), 1 chunk
# chunk_params = [10, 46, 14, 50, 2]   # 4x4 deg, 4 chunks
# chunk_params = [10, 48, 12, 50, 1]   # 2x2 deg, 4 chunks
# chunk_params = [10, 49, 11, 50, 1]   # 1x1 deg, 1 chunk
# chunk_params = [10, 49, 11, 50, 0.5] # 1x1 deg, 4 chunks
# chunk_params = [10, 49.5, 10.5, 50, 0.25] # 0.5x0.5 deg, 4 chunks
# chunk_params = [10, 42, 11, 43, 0.5] # 1x1 deg, 4 chunks (some GLCLU code=254 for ocean and some land, so data should be output)
# chunk_params = [10, 49.75, 10.25, 50, 0.25] # 0.25x0.25 deg, 1 chunk (has data)

# # Edge cases with potential nodata issues
# chunk_params = [10, 49.75, 10.25, 50, 0.25] # 0.25x0.25 deg, 1 chunk (has data)
# chunk_params = [0, 79.75, 0.25, 80, 0.25] # 0.25x0.25 deg, 1 chunk (no tile-- no data)
# chunk_params = [-10, 21.75, -9.75, 22, 0.25] # 0.25x0.25 deg, 1 chunk (has data but entirely desert (fully GLCLU code=0))
# chunk_params = [10, 41, 11, 42, 0.5] # 1x1 deg, 4 chunks (entirely GLCLU code=255 for ocean, so no actual data-- nothing should be be output)


# Makes list of chunks to analyze
chunks = get_chunk_bounds(chunk_params)  
print("Processing", len(chunks), "chunks")

# Determines if the output file names for final versions of outputs should be used
is_final = False
if len(chunks) > 100:
    is_final = True
    print("Running as final model.")

# Creates list of tasks to run (1 task = 1 chunk for all years)
delayed_result = [dask.delayed(reclassify_and_map_change_chunk)(chunk, is_final) for chunk in chunks]

# Actually runs analysis
results = dask.compute(*delayed_result)
results

Processing 4 chunks
Requesting data in chunk 10_42_11_43 in 50N_010E: 20240202_13_56_24
Requesting data in chunk 10_42_10_42 in 50N_010E: 20240202_13_56_24
Requesting data in chunk 10_42_11_42 in 50N_010E: 20240202_13_56_24
Requesting data in chunk 10_42_10_43 in 50N_010E: 20240202_13_56_24
Waiting for requests for data in chunk 10_42_10_42 in 50N_010E: 20240202_13_57_12
Data in chunk 10_42_10_42. Proceeding.
Reclassifying 10_42_10_42 in 50N_010E for 2000: 20240202_13_57_12
Waiting for requests for data in chunk 10_42_11_42 in 50N_010E: 20240202_13_57_12
Data in chunk 10_42_11_42. Proceeding.
Reclassifying 10_42_11_42 in 50N_010E for 2000: 20240202_13_57_12
Reclassifying 10_42_10_42 in 50N_010E for 2005: 20240202_13_57_13
Waiting for requests for data in chunk 10_42_10_43 in 50N_010E: 20240202_13_57_13
Data in chunk 10_42_10_43. Proceeding.
Reclassifying 10_42_10_43 in 50N_010E for 2000: 20240202_13_57_13
Reclassifying 10_42_11_42 in 50N_010E for 2005: 20240202_13_57_13
Waiting for req

('Success for 10_42_10_42: 20240202_13_57_21',
 'Success for 10_42_11_42: 20240202_13_57_29',
 'Success for 10_42_10_43: 20240202_13_57_24',
 'Success for 10_42_11_43: 20240202_13_57_26')

In [None]:
%%time

# Make raster footprint shapefiles from output rasters
types = ["IPCC_basic_classes", "IPCC_basic_change"]
years = [2005, 2010, 2015, 2020]

# Creates list of tasks to run (1 task = 1 chunk for all years)
delayed_result = [dask.delayed(make_index_shp)(year, type) for year in years for type in types]

# Actually runs analysis
results = dask.compute(*delayed_result)
results