## Data Preprocessing

We'll start with the Beaufort dataset as we have better confidence that this dataset is complete.
The basics here are to rename the data and to reproject them.

In [None]:
geotiffs_to_ingest = []
metas = []

for region in ice_zones:
    for year in ice_years:
        target = Path(INPUT_FLAT_DIR / "EM24" / region / year)
        tiffs = eda.list_geotiffs(target, str_to_match="dailyslie")
        _, meta = eda.fetch_all_geotiff_metadata(target, str_to_match="dailyslie")
        geotiffs_to_ingest = geotiffs_to_ingest + tiffs
        metas = metas + meta
print(len(geotiffs_to_ingest))
geotiffs_to_ingest[0]

In [None]:
def rename(fp):
    if "Chuk" == fp.parent.parent.name:
        zone = "Chukchi"
        out_dir = DAILY_CHUKCHI_DIR
    elif "Beau" == fp.parent.parent.name:
        zone = "Beaufort"
        out_dir = DAILY_BEAUFORT_DIR
    else:
        print(fp)
    
    fname = fp.name
    yyyymmdd = fname.split("_")[0][1:]

    data_source_indicator = fname[0]
    source_str = data_sources[data_source_indicator].lower().replace(" ", "_")

    new_name = f"{zone.lower()}_{yyyymmdd}_{source_str}_slie.tif"
    new_fp = out_dir / new_name
    return new_fp

rename(geotiffs_to_ingest[0])

In [None]:
def reproject_raster(file):

    dst_crs = rio.crs.CRS.from_epsg(3338)

    with rio.open(file) as src:

        # compute the new affine transformation, width and height
        warp_transform, width, height = rio.warp.calculate_default_transform(
            src.crs, dst_crs, src.width, src.height, *src.bounds)
        # define the output raster profile
        out_profile = src.profile.copy()
        out_profile.update(
            {
                "crs": dst_crs,
                "transform": warp_transform,
                "width": width,
                "height": height,
                "bounds": array_bounds(height, width, warp_transform),
            }
        )

        # create the new raster file
        out_file = rename(file)
        
        with rio.open(out_file, "w", **out_profile) as dst:
            # reproject the input raster data
            rio.warp.reproject(
                source=src.read(1),
                destination=rio.band(dst, 1),
                src_transform=src.transform,
                src_crs=src.crs,
                dst_transform=warp_transform,
                dst_crs=dst_crs,
                resampling=Resampling.nearest,  # NN is default, but explicit here for easy change or experimentation later
            )


In [None]:
for tiff in geotiffs_to_ingest:
    reproject_raster(tiff)