# Imports

In [None]:
import os
from glob import glob
from typing import Tuple

import dask.bag
import datacube.utils.geometry as dcug
import numpy as np
import rasterio
import xarray
from dask.diagnostics import ProgressBar
from datacube.utils.cog import write_cog
from distributed import Client
from rasterio import Affine, MemoryFile
from rasterio.enums import Resampling

pbar = ProgressBar()
pbar.register()


data_dir = "/media/xultaeculcis/2TB/datasets/cruts/original/"

tmn = "tmn"
tmx = "tmx"
tmp = "tmp"
pre = "pre"

variables = [
    tmn,
    tmx,
    tmp,
    pre,
]

variable_files = [f"cru_ts4.04.1901.2019.{var}.dat.nc" for var in variables]

out_dir = "/media/xultaeculcis/2TB/datasets/cruts/pre-processed/"

full_res_dir = "full-res"
tiles_dir = "tiles"

sub_dirs = [
    full_res_dir,
    tiles_dir,
]

# ensure sub-dirs exist
for dir_name in sub_dirs:
    for var in variables:
        os.makedirs(os.path.join(out_dir, dir_name, var), exist_ok=True)

# Convert CRU-TS To COG

In [None]:
def as_cog(var: str) -> None:
    fp = f"cru_ts4.04.1901.2019.{var}.dat.nc"
    file_path = os.path.join(data_dir, fp)
    out_path = os.path.join(out_dir, full_res_dir, var)
    ds = xarray.open_dataset(file_path)
    for i in range(ds.dims["time"]):
        # get frame at time index i
        arr = ds[var].isel(time=i)

        # make it geo
        arr = dcug.assign_crs(arr, "EPSG:4326")

        # extract date
        date_str = np.datetime_as_string(arr.time, unit="D")

        # Write as Cloud Optimized GeoTIFF
        write_cog(
            geo_im=arr,
            fname=os.path.join(out_path, f"cruts-{var}-{date_str}.tif"),
            overwrite=True,
        )

In [None]:
results = dask.bag.from_sequence(variables).map(as_cog).compute()

# Resize WorldClim

In [None]:
import os
from glob import glob
from typing import Tuple

import dask.bag
import datacube.utils.geometry as dcug
import numpy as np
import rasterio
import xarray
from dask.diagnostics import ProgressBar
from datacube.utils.cog import write_cog
from distributed import Client
from rasterio import Affine, MemoryFile
from rasterio.enums import Resampling

pbar = ProgressBar()
pbar.register()

resolutions = [
    (720, 360),
    (1080, 720),
    (2160, 1080),
]

data_dir = "/media/xultaeculcis/2TB/datasets/wc/weather/"
variables = [
    "tmin",
    "tmax",
    "prec",
]

pattern = "*.tif"

out_dir = "/media/xultaeculcis/2TB/datasets/wc/pre-processed/"
resized = "resized"
tiled = "tiled"

resolution_multipliers = [
    ("1x", 1 / 12),
    ("2x", 1 / 6),
    ("4x", 1 / 3),
]

for var in variables:
    for rm in resolution_multipliers:
        os.makedirs(os.path.join(out_dir, var, resized, rm[0]), exist_ok=True)
        os.makedirs(os.path.join(out_dir, var, tiled, rm[0]), exist_ok=True)


def resize(
    file_path: str,
    var: str,
    scaling_factor: float,
    resolution_multiplier: str,
    out_dir: str,
) -> None:
    with rasterio.open(file_path) as raster:

        # resample data to target shape
        t = raster.transform
        transform = Affine(
            t.a / scaling_factor, t.b, t.c, t.d, t.e / scaling_factor, t.f
        )
        height = int(raster.height * scaling_factor)
        width = int(raster.width * scaling_factor)

        profile = raster.profile
        profile.update(transform=transform, driver="COG", height=height, width=width)

        data = raster.read(
            out_shape=(raster.count, height, width),
            resampling=Resampling.nearest,
        )

        fname = os.path.basename(file_path)
        fname = os.path.join(out_dir, var, resized, resolution_multiplier, fname)

        with rasterio.open(fname, "w", **profile) as dataset:  # Open as DatasetWriter
            dataset.write(data)


client = Client(n_workers=8, threads_per_worker=1)

try:
    for var in variables:
        files = sorted(glob(os.path.join(data_dir, var, "**", pattern), recursive=True))
        for multiplier, scale in resolution_multipliers:
            dask.bag.from_sequence(files, npartitions=1000).map(
                resize, var, scale, multiplier, out_dir
            ).compute()
finally:
    client.close()