In [2]:
import os
from glob import glob

import numpy as np
import pandas as pd
import rasterio as rio
import xarray as xr
from pre_processing.cruts_config import CRUTSConfig
from rasterio.mask import mask
from tqdm import tqdm

In [9]:
europe_bbox_lr = ((-16, 84), (40.25, 33))
europe_bbox_hr = ((-16, 84.25), (40.25, 32.875))
left_upper_lr = [-16, 84.375]
left_lower_lr = [-16, 33.125]
right_upper_lr = [40.25, 84.375]
right_lower_lr = [40.25, 33.125]

left_upper_hr = [-16, 84.375]
left_lower_hr = [-16, 33]
right_upper_hr = [40.25, 84.375]
right_lower_hr = [40.25, 33]

src_dir_lr = "/media/xultaeculcis/2TB/datasets/cruts/pre-processed/full-res/"
src_dir_hr = "/media/xultaeculcis/2TB/datasets/cruts/inference/"

lr_polygon = [
    [
        left_upper_lr,
        right_upper_lr,
        right_lower_lr,
        left_lower_lr,
        left_upper_lr,
    ]
]
hr_polygon = [
    [
        left_upper_hr,
        right_upper_hr,
        right_lower_hr,
        left_lower_hr,
        left_upper_hr,
    ]
]

out_path = "/media/xultaeculcis/2TB/datasets/cruts/pre-processed/europe-extent"
variables = CRUTSConfig.variables_cts


def extract_extent(src_dir, variables, polygon):
    bbox = [
        {
            "coordinates": polygon,
            "type": "Polygon",
        }
    ]

    for var in variables:
        files = sorted(glob(os.path.join(src_dir, var, "*.tif")))
        os.makedirs(os.path.join(out_path, var), exist_ok=True)
        for fp in tqdm(files):
            filename = os.path.basename(fp)

            with rio.open(fp) as ds:
                crop, transform = mask(ds, bbox, crop=True)
                meta = ds.meta

            meta.update(
                {
                    "driver": "GTiff",
                    "height": crop.shape[1],
                    "width": crop.shape[2],
                    "transform": transform,
                }
            )

            with rio.open(os.path.join(out_path, var, filename), "w", **meta) as dest:
                dest.write(crop)


extract_extent(src_dir_hr, variables, hr_polygon)

 15%|█▌        | 221/1428 [00:01<00:08, 150.26it/s]


KeyboardInterrupt: 

In [None]:
nc_out_path = "/media/xultaeculcis/2TB/datasets/cruts/inference-europe-extent-nc"
os.makedirs(nc_out_path, exist_ok=True)

var_to_variable = {
    CRUTSConfig.pre: "Precipitation",
    CRUTSConfig.tmn: "Minimum Temperature",
    CRUTSConfig.tmp: "Average Temperature",
    CRUTSConfig.tmx: "Maximum Temperature",
}

for var in CRUTSConfig.variables_cts:
    das = []
    fps = sorted(glob(os.path.join(out_path, var, "*.tif")))
    timestamps = []
    lat = None
    lon = None
    arrs = []
    for fp in tqdm(fps):
        filename = os.path.basename(fp)

        splitted = filename.replace(".tif", "").split("-")
        timestamp = "-".join(splitted[-3:])
        timestamps.append(timestamp)

        da = xr.open_rasterio(fp).rename(var)
        if lat is None:
            lat = da.y.data
        if lon is None:
            lon = da.x.data
        arr = da.data
        arrs.append(arr)

    var_data = np.concatenate(arrs, axis=0)
    time = pd.to_datetime(timestamps)
    ds = xr.Dataset(
        {
            var: (("time", "lat", "lon"), var_data),
        },
        {"time": time, "lon": lon, "lat": lat},
        {
            "Conventions": "CF-1.4",
            "title": f"CRU TS4.04 {var_to_variable[var]}",
            "source": "Neural-Downscaling approach.",
            "extent": "Europe. Based on ETRS89.",
        },
    )
    ds.to_netcdf(
        os.path.join(nc_out_path, f"cru_ts4.04.nn.inference.1901.2019.{var}.dat.nc")
    )