In [45]:
import os
from glob import glob

import numpy as np
import pandas as pd
import rasterio as rio
import xarray as xr
from pre_processing.cruts_config import CRUTSConfig
from rasterio.mask import mask
from tqdm import tqdm

%matplotlib inline

In [20]:
europe_bbox = ((-16, 84.25), (40.25, 32.875))
left_upper = [-16, 84.25]
left_lower = [-16, 32.875]
right_upper = [40.25, 84.25]
right_lower = [40.25, 32.875]
inference_dir = "/media/xultaeculcis/2TB/datasets/cruts/inference"
bbox = [
    {
        "coordinates": [[left_upper, right_upper, right_lower, left_lower, left_upper]],
        "type": "Polygon",
    }
]
out_path = "/media/xultaeculcis/2TB/datasets/cruts/inference-europe-extent"

for var in CRUTSConfig.variables_cts:
    files = sorted(glob(os.path.join(inference_dir, var, "*.tif")))
    os.makedirs(os.path.join(out_path, var), exist_ok=True)
    for fp in tqdm(files):
        filename = os.path.basename(fp)

        with rio.open(fp) as ds:
            crop, transform = mask(ds, bbox, crop=True)
            meta = ds.meta

        meta.update(
            {
                "driver": "GTiff",
                "height": crop.shape[1],
                "width": crop.shape[2],
                "transform": transform,
            }
        )

        with rio.open(os.path.join(out_path, var, filename), "w", **meta) as dest:
            dest.write(crop)

100%|██████████| 1428/1428 [00:08<00:00, 160.09it/s]
100%|██████████| 1428/1428 [00:23<00:00, 59.57it/s]
100%|██████████| 1428/1428 [00:18<00:00, 77.07it/s]
100%|██████████| 1428/1428 [00:18<00:00, 75.68it/s]


In [90]:
nc_out_path = "/media/xultaeculcis/2TB/datasets/cruts/inference-europe-extent-nc"
os.makedirs(nc_out_path, exist_ok=True)

var_to_variable = {
    CRUTSConfig.pre: "Precipitation",
    CRUTSConfig.tmn: "Minimum Temperature",
    CRUTSConfig.tmp: "Average Temperature",
    CRUTSConfig.tmx: "Maximum Temperature",
}

for var in CRUTSConfig.variables_cts:
    das = []
    fps = sorted(glob(os.path.join(out_path, var, "*.tif")))
    timestamps = []
    lat = None
    lon = None
    arrs = []
    for fp in tqdm(fps):
        filename = os.path.basename(fp)

        splitted = filename.replace(".tif", "").split("-")
        timestamp = "-".join(splitted[-3:])
        timestamps.append(timestamp)

        da = xr.open_rasterio(fp).rename(var)
        if lat is None:
            lat = da.y.data
        if lon is None:
            lon = da.x.data
        arr = da.data
        arrs.append(arr)

    var_data = np.concatenate(arrs, axis=0)
    time = pd.to_datetime(timestamps)
    ds = xr.Dataset(
        {
            var: (("time", "lat", "lon"), var_data),
        },
        {"time": time, "lon": lon, "lat": lat},
        {
            "Conventions": "CF-1.4",
            "title": f"CRU TS4.04 {var_to_variable[var]}",
            "source": "Neural-Downscaling approach.",
            "extent": "Europe. Based on ETRS89.",
        },
    )
    ds.to_netcdf(
        os.path.join(nc_out_path, f"cru_ts4.04.nn.inference.1901.2019.{var}.dat.nc")
    )

100%|██████████| 1428/1428 [00:02<00:00, 479.79it/s]
100%|██████████| 1428/1428 [00:08<00:00, 165.46it/s]
100%|██████████| 1428/1428 [00:04<00:00, 314.23it/s]
100%|██████████| 1428/1428 [00:03<00:00, 467.00it/s]
