The purpose of this notebook is to process the Dadap Canals density raster into 10x10 degree tiles and to set up a template for processing other input datasets. The code needs to check the CRS, projection, and cell size of the dataset and correct if necessary. The code also needs to export chunks of the input dataset to s3, which can later be merged into 10x10 degree tiles. Code is currently exporting files to s3, but they are all no data


In [123]:
import rioxarray
import rasterio
import xarray as xr
import pandas as pd
import os
import dask
import boto3
from dask.distributed import Client
import dask.array as da

# scipy basics
import numpy as np
import rasterio
import rasterio.transform
import rasterio.windows
from rasterio.windows import from_bounds
#import geopandas as gpd
import pandas as pd
import rioxarray
import xarray as xr
from rioxarray.merge import merge_arrays


In [124]:
template_uri = "s3://gfw2-data/climate/AFOLU_flux_model/organic_soils/inputs/raw/GFW_Global_Peatlands/00N_110E.tif"
dadap_uri = "s3://gfw2-data/climate/AFOLU_flux_model/organic_soils/inputs/raw/Dadap_SEA_Drainage/canal_length_data/canal_length_1km.tif"
s3_base_dir = "s3://gfw2-data/climate/AFOLU_flux_model/organic_soils/"
dadap_pattern = "dadap_density"

In [125]:
def get_tile_dataset_rio(uri, bounds, chunk_length_pixels):
    bounds_str = boundstr(bounds)
    try:
        with rasterio.open(uri) as ds:
            no_data_val = 3.4028235e+38
            window = from_bounds(*bounds, ds.transform)
            data = ds.read(1, window=window)
            if data.size == 0:  # Skip chunks with no data
                print(f"No data in chunk {bounds_str}, skipping.")
                return None

            transform = ds.window_transform(window)
            data_array = xr.DataArray(data, dims=["y", "x"], coords={
                "x": np.linspace(transform.c, transform.c + transform.a * (data.shape[1] - 1), num=data.shape[1]),
                "y": np.linspace(transform.f, transform.f + transform.e * (data.shape[0] - 1), num=data.shape[0])
            })
            data_array.rio.write_crs(ds.crs, inplace=True)
            data_array.rio.write_nodata(no_data_val, inplace=True)
            return data_array
    except Exception as e:
        print(f"Error reading data for bounds {bounds_str}: {e}")
        return None


In [126]:
def process_dadap_chunk(bounds, dadap_uri, template_uri, is_final):
    print(f"Processing chunk with bounds: {bounds}")
    chunk_length_pixels = calc_chunk_length_pixels(bounds)
    tile_id = xy_to_tile_id(bounds[0], bounds[3])

    dadap_data = get_tile_dataset_rio(dadap_uri, bounds, chunk_length_pixels)
    if dadap_data is None:
        return f"Skipped chunk {bounds} due to no data"

    template_data = get_tile_dataset_rio(template_uri, bounds, chunk_length_pixels)
    if template_data is None:
        return f"Skipped chunk {bounds} due to no template data"

    dadap_data_matched = dadap_data.rio.reproject_match(template_data)

    out_dir = s3_processed_dir
    
    save_and_upload_small_raster_set(bounds, chunk_length_pixels, tile_id, boundstr(bounds), {
        "dadap_chunk": [dadap_data_matched, 'float32', 'dadap_density', time.strftime('%Y')]
    }, out_dir, is_final)
    
    return f"Processed and uploaded chunk {bounds}"

In [127]:
# Makes list of chunks to analyze
chunk_params = [110, -10, 120, 0, 2] # 1 tile, 25 chunks
#chunk_params = [116, -6, 118, -4, 2] # 1 chunk, has data
#chunk_params = [116, -10, 118, -8, 2] # 1 chunk
chunks = get_chunk_bounds(chunk_params)  
print("Processing", len(chunks), "chunks")

is_final = len(chunks) > 30
if is_final:
    print("Running as final model.")

# Correct the function name and parameters
delayed_result = [dask.delayed(process_dadap_chunk)(chunk, dadap_uri, template_uri, is_final) for chunk in chunks]

results = dask.compute(*delayed_result)
print(results)

Processing 25 chunks
Processing chunk with bounds: [112, -4, 114, -2]
Processing chunk with bounds: [110, -6, 112, -4]
Processing chunk with bounds: [118, -10, 120, -8]
Processing chunk with bounds: [110, -8, 112, -6]
Processing chunk with bounds: [110, -10, 112, -8]
Processing chunk with bounds: [114, -8, 116, -6]
Processing chunk with bounds: [112, -10, 114, -8]
Processing chunk with bounds: [118, -8, 120, -6]
Processing chunk with bounds: [110, -4, 112, -2]
Processing chunk with bounds: [112, -2, 114, 0]
Processing chunk with bounds: [110, -2, 112, 0]
Processing chunk with bounds: [114, -10, 116, -8]
No data in chunk 118_-8_120_-6, skipping.No data in chunk 110_-8_112_-6, skipping.
No data in chunk 114_-10_116_-8, skipping.

Processing chunk with bounds: [118, -6, 120, -4]
No data in chunk 110_-10_112_-8, skipping.
Processing chunk with bounds: [114, -6, 116, -4]
No data in chunk 112_-10_114_-8, skipping.
Processing chunk with bounds: [116, -6, 118, -4]
No data in chunk 114_-8_116_-