"""
Load dadap density data, resample to correct grid, and reclassify to binary
"""

In [27]:
import rioxarray
import rasterio
import xarray as xr
import pandas as pd
import os
import dask
import boto3
from dask.distributed import Client
import dask.array as da


In [28]:
peatlands_uri = "s3://gfw2-data/climate/AFOLU_flux_model/organic_soils/inputs/raw/GFW_Global_Peatlands/"
dadap_uri = "s3://gfw2-data/climate/AFOLU_flux_model/organic_soils/inputs/raw/Dadap_SEA_Drainage/canal_length_data/canal_length_1km.tif"
s3_base_dir = "s3://gfw2-data/climate/AFOLU_flux_model/organic_soils/"
dadap_pattern = "dadap_density"

processed_dir = "s3:/gfw2-data/climate/AFOLU_flux_model/organic_soils/inputs/processed"
output_dir = os.path.join(processed_dir,dadap_pattern)

In [29]:
def preprocess_dadap_density():
    print("Loading dadap density raster...")
    dadap_density = get_dataset(dadap_uri, "dadap_density", template=None)

    # Load a template raster from the Hansen dataset for reference
    print("Loading template raster for CRS and resolution reference...")
    template_raster = get_tile_dataset(peatlands_uri, "00N_110E.tif", "peatlands", template=None)

    # Reproject and resample dadap_density to match template
    print("Reprojecting and resampling dadap density raster...")
    dadap_density = dadap_density.rio.reproject_match(template_raster)

    return dadap_density

dadap_density = preprocess_dadap_density()

Loading dadap density raster...
Loading template raster for CRS and resolution reference...
Reprojecting and resampling dadap density raster...


In [30]:
def clip_and_save_dadap_density(bounds, is_final):
    xmin, ymin, xmax, ymax = bounds
    bounds_str = boundstr(bounds)    # String form of chunk bounds
    tile_id = xy_to_tile_id(bounds[0], bounds[3])    # tile_id in YYN/S_XXXE/W
    chunk_length_pixels = calc_chunk_length_pixels(bounds)   # Chunk length in pixels (as opposed to decimal degrees)    
    
    #xmin, ymin, xmax, ymax = bounds 
    local_path = "/tmp"  # Temporary local path for saving the file
    output_filename = f"{tile_id}_{dadap_pattern}.tif" if is_final else f"{tile_id}_{dadap_pattern}_{time.strftime('%Y%m%d%H%M%S')}.tif"
    local_file_path = os.path.join(local_path, output_filename)
    output_s3_path = os.path.join(output_dir, output_filename)

    # Clip the raster
    clipped = dadap_density.rio.clip_box(minx=xmin, miny=ymin, maxx=xmax, maxy=ymax)
    
    # Check if there is any non-nodata value in the clipped raster
    if np.any(clipped.data != clipped.rio.nodata):
        # Save the raster locally
        clipped.rio.to_raster(local_file_path)
        print(f"Saved locally: {local_file_path}")

        # Upload to S3
        try:
            s3 = boto3.client('s3')
            s3.upload_file(local_file_path, "gfw2-data", output_s3_path.lstrip('/'))
            print(f"Successfully uploaded {output_filename} to S3 at {output_s3_path}")
        except NoCredentialsError:
            print("Credentials not available for AWS S3.")
        except Exception as e:
            print(f"Failed to upload to S3: {str(e)}")

        # Optionally, remove the local file after upload
        os.remove(local_file_path)
        print(f"Deleted local file: {local_file_path}")
    else:
        print(f"No valid data in chunk {bounds} for tile {tile_id}. Skipping this chunk.")

In [None]:
chunk_params = [110, -10, 120, 0, 2]  # Customize as needed
#chunk_params = [110, -6, 112, -4, 1]

# Makes list of chunks to analyze
chunks = get_chunk_bounds(chunk_params)  
print("Processing", len(chunks), "chunks")

# Determines if the output file names for final versions of outputs should be used
is_final = False
if len(chunks) > 30:
    is_final = True
    print("Running as final model.")

# Creates list of tasks to run (1 task = 1 chunk for all years)
delayed_result = [dask.delayed(clip_and_save_dadap_density)(chunk, is_final) for chunk in chunks]

# Actually runs analysis
results = dask.compute(*delayed_result)
results

Processing 25 chunks




No valid data in chunk [112, -10, 114, -8] for tile 00N_110E. Skipping this chunk.
No valid data in chunk [114, -8, 116, -6] for tile 00N_110E. Skipping this chunk.
No valid data in chunk [118, -8, 120, -6] for tile 00N_110E. Skipping this chunk.
No valid data in chunk [116, -10, 118, -8] for tile 00N_110E. Skipping this chunk.
No valid data in chunk [116, -8, 118, -6] for tile 00N_110E. Skipping this chunk.
No valid data in chunk [112, -8, 114, -6] for tile 00N_110E. Skipping this chunk.
No valid data in chunk [110, -8, 112, -6] for tile 00N_110E. Skipping this chunk.
No valid data in chunk [118, -10, 120, -8] for tile 00N_110E. Skipping this chunk.


In [None]:
# # Convert processing to Dask delayed tasks
# tasks = [dask.delayed(process_tiles)(dadap_density, output_dir, [tile]) for tile in tiles]
# results = dask.compute(*tasks)